From 26f8b501e77b7bd9a73028637e82de2f2605dd3a Mon Sep 17 00:00:00 2001 From: sushilicious <*> Date: Tue, 3 Aug 2021 13:46:56 -0700 Subject: [PATCH 1/3] Made CleanStringParser more robust Now it can handle [...] at beginning of string --- Emby.Naming/Common/NamingOptions.cs | 7 ++- Emby.Naming/Video/CleanStringParser.cs | 44 +++++++++++++++---- .../Video/CleanStringTests.cs | 8 +++- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/Emby.Naming/Common/NamingOptions.cs b/Emby.Naming/Common/NamingOptions.cs index 5f125eb4f1..eb95c9b9c3 100644 --- a/Emby.Naming/Common/NamingOptions.cs +++ b/Emby.Naming/Common/NamingOptions.cs @@ -137,8 +137,11 @@ namespace Emby.Naming.Common CleanStrings = new[] { - @"[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)", - @"(\[.*\])" + @"^\s*(?.+?)[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)", + @"^(?.+?)(\[.*\])", + @"^\s*(?.+?)\WE\d+(-|~)E?\d+(\W|$)", + @"^\s*\[[^\]]+\](?!\.\w+$)\s*(?.+)", + @"^\s*(?.+?)\s+-\s+\d+\s*$" }; SubtitleFileExtensions = new[] diff --git a/Emby.Naming/Video/CleanStringParser.cs b/Emby.Naming/Video/CleanStringParser.cs index 4eef3ebc5e..0518095705 100644 --- a/Emby.Naming/Video/CleanStringParser.cs +++ b/Emby.Naming/Video/CleanStringParser.cs @@ -25,26 +25,54 @@ namespace Emby.Naming.Video return false; } - var len = expressions.Count; - for (int i = 0; i < len; i++) + // Iteratively remove extra cruft until we're left with the string + // we want. + newName = ReadOnlySpan.Empty; + const int maxTries = 100; // This is just a precautionary + // measure. Should not be neccesary. + var loopCounter = 0; + for (; loopCounter < maxTries; loopCounter++) { - if (TryClean(name, expressions[i], out newName)) + bool cleaned = false; + var len = expressions.Count; + for (int i = 0; i < len; i++) + { + if (TryClean(name, expressions[i], out newName)) + { + cleaned = true; + name = newName.ToString(); + break; + } + } + + if (!cleaned) { - return true; + break; } } - newName = ReadOnlySpan.Empty; - return false; + if (loopCounter > 0) + { + newName = name.AsSpan(); + } + + return newName != ReadOnlySpan.Empty; } private static bool TryClean(string name, Regex expression, out ReadOnlySpan newName) { var match = expression.Match(name); int index = match.Index; - if (match.Success && index != 0) + if (match.Success) { - newName = name.AsSpan().Slice(0, match.Index); + var found = match.Groups.TryGetValue("cleaned", out var cleaned); + if (!found || cleaned == null) + { + newName = ReadOnlySpan.Empty; + return false; + } + + newName = name.AsSpan().Slice(cleaned.Index, cleaned.Length); return true; } diff --git a/tests/Jellyfin.Naming.Tests/Video/CleanStringTests.cs b/tests/Jellyfin.Naming.Tests/Video/CleanStringTests.cs index fb050cf5a7..1d51e7ca59 100644 --- a/tests/Jellyfin.Naming.Tests/Video/CleanStringTests.cs +++ b/tests/Jellyfin.Naming.Tests/Video/CleanStringTests.cs @@ -1,4 +1,4 @@ -using System; +using System; using Emby.Naming.Common; using Emby.Naming.Video; using Xunit; @@ -23,6 +23,12 @@ namespace Jellyfin.Naming.Tests.Video [InlineData("Crouching.Tiger.Hidden.Dragon.BDrip.mkv", "Crouching.Tiger.Hidden.Dragon")] [InlineData("Crouching.Tiger.Hidden.Dragon.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")] [InlineData("Crouching.Tiger.Hidden.Dragon.4K.UltraHD.HDR.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")] + [InlineData("[HorribleSubs] Made in Abyss - 13 [720p].mkv", "Made in Abyss")] + [InlineData("[Tsundere] Kore wa Zombie Desu ka of the Dead [BDRip h264 1920x1080 FLAC]", "Kore wa Zombie Desu ka of the Dead")] + [InlineData("[Erai-raws] Jujutsu Kaisen - 03 [720p][Multiple Subtitle].mkv", "Jujutsu Kaisen")] + [InlineData("[OCN] 애타는 로맨스 720p-NEXT", "애타는 로맨스")] + [InlineData("[tvN] 혼술남녀.E01-E16.720p-NEXT", "혼술남녀")] + [InlineData("[tvN] 연애말고 결혼 E01~E16 END HDTV.H264.720p-WITH", "연애말고 결혼")] // FIXME: [InlineData("After The Sunset - [0004].mkv", "After The Sunset")] public void CleanStringTest_NeedsCleaning_Success(string input, string expectedName) { From b2a10609af703b1c513570d6c397ff22e0214bb0 Mon Sep 17 00:00:00 2001 From: sushilicious <*> Date: Wed, 4 Aug 2021 23:25:54 -0700 Subject: [PATCH 2/3] Made CleanStringParser go through regexps only once --- Emby.Naming/Common/NamingOptions.cs | 4 ++-- Emby.Naming/Video/CleanStringParser.cs | 33 ++++++-------------------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/Emby.Naming/Common/NamingOptions.cs b/Emby.Naming/Common/NamingOptions.cs index eb95c9b9c3..a745532c4f 100644 --- a/Emby.Naming/Common/NamingOptions.cs +++ b/Emby.Naming/Common/NamingOptions.cs @@ -139,9 +139,9 @@ namespace Emby.Naming.Common { @"^\s*(?.+?)[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)", @"^(?.+?)(\[.*\])", - @"^\s*(?.+?)\WE\d+(-|~)E?\d+(\W|$)", + @"^\s*(?.+?)\WE[0-9]+(-|~)E?[0-9]+(\W|$)", @"^\s*\[[^\]]+\](?!\.\w+$)\s*(?.+)", - @"^\s*(?.+?)\s+-\s+\d+\s*$" + @"^\s*(?.+?)\s+-\s+[0-9]+\s*$" }; SubtitleFileExtensions = new[] diff --git a/Emby.Naming/Video/CleanStringParser.cs b/Emby.Naming/Video/CleanStringParser.cs index 0518095705..63a5953578 100644 --- a/Emby.Naming/Video/CleanStringParser.cs +++ b/Emby.Naming/Video/CleanStringParser.cs @@ -25,37 +25,18 @@ namespace Emby.Naming.Video return false; } - // Iteratively remove extra cruft until we're left with the string - // we want. - newName = ReadOnlySpan.Empty; - const int maxTries = 100; // This is just a precautionary - // measure. Should not be neccesary. - var loopCounter = 0; - for (; loopCounter < maxTries; loopCounter++) + // Iteratively apply the regexps to clean the string. + bool cleaned = false; + for (int i = 0; i < expressions.Count; i++) { - bool cleaned = false; - var len = expressions.Count; - for (int i = 0; i < len; i++) - { - if (TryClean(name, expressions[i], out newName)) - { - cleaned = true; - name = newName.ToString(); - break; - } - } - - if (!cleaned) + if (TryClean(name, expressions[i], out newName)) { - break; + cleaned = true; + name = newName.ToString(); } } - if (loopCounter > 0) - { - newName = name.AsSpan(); - } - + newName = cleaned ? name.AsSpan() : ReadOnlySpan.Empty; return newName != ReadOnlySpan.Empty; } From 6bc7d78f6fbef9e47731a4991ca72d3ba4c23bce Mon Sep 17 00:00:00 2001 From: sushilicious <40350682+sushilicious@users.noreply.github.com> Date: Thu, 5 Aug 2021 13:09:42 -0700 Subject: [PATCH 3/3] Update Emby.Naming/Video/CleanStringParser.cs Cleaned up code a bit Co-authored-by: Claus Vium --- Emby.Naming/Video/CleanStringParser.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Emby.Naming/Video/CleanStringParser.cs b/Emby.Naming/Video/CleanStringParser.cs index 63a5953578..99cb289a25 100644 --- a/Emby.Naming/Video/CleanStringParser.cs +++ b/Emby.Naming/Video/CleanStringParser.cs @@ -37,7 +37,7 @@ namespace Emby.Naming.Video } newName = cleaned ? name.AsSpan() : ReadOnlySpan.Empty; - return newName != ReadOnlySpan.Empty; + return cleaned; } private static bool TryClean(string name, Regex expression, out ReadOnlySpan newName)