From 716eadc5511165ed8a14704785ffed72df3a8690 Mon Sep 17 00:00:00 2001 From: Qstick Date: Mon, 20 Dec 2021 18:01:00 -0600 Subject: [PATCH] Add Multiple Languages Closes #6385 Closes #6564 Closes #6694 Closes #6463 Co-Authored-By: siankatabg Co-Authored-By: tandy1000 <24867509+tandy-1000@users.noreply.github.com> Co-Authored-By: Kristof Mattei <864376+kristof-mattei@users.noreply.github.com> Co-Authored-By: Oleksandr Hulyi <4095184+pamidur@users.noreply.github.com> --- .../Languages/LanguageFixture.cs | 10 +++- .../ParserTests/LanguageParserFixture.cs | 35 +++++++++++++ src/NzbDrone.Core/Languages/Language.cs | 3 ++ src/NzbDrone.Core/Parser/IsoLanguages.cs | 5 +- src/NzbDrone.Core/Parser/LanguageParser.cs | 51 +++++++++++++++++-- 5 files changed, 96 insertions(+), 8 deletions(-) diff --git a/src/NzbDrone.Core.Test/Languages/LanguageFixture.cs b/src/NzbDrone.Core.Test/Languages/LanguageFixture.cs index 783823b4c..d4e387260 100644 --- a/src/NzbDrone.Core.Test/Languages/LanguageFixture.cs +++ b/src/NzbDrone.Core.Test/Languages/LanguageFixture.cs @@ -43,7 +43,10 @@ namespace NzbDrone.Core.Test.Languages new object[] { 28, Language.Thai }, new object[] { 29, Language.Bulgarian }, new object[] { 30, Language.PortugueseBR }, - new object[] { 31, Language.Arabic } + new object[] { 31, Language.Arabic }, + new object[] { 32, Language.Ukrainian }, + new object[] { 33, Language.Persian }, + new object[] { 34, Language.Bengali }, }; public static object[] ToIntCases = @@ -81,7 +84,10 @@ namespace NzbDrone.Core.Test.Languages new object[] { Language.Thai, 28 }, new object[] { Language.Bulgarian, 29 }, new object[] { Language.PortugueseBR, 30 }, - new object[] { Language.Arabic, 31 } + new object[] { Language.Arabic, 31 }, + new object[] { Language.Ukrainian, 32 }, + new object[] { Language.Persian, 33 }, + new object[] { Language.Bengali, 34 }, }; [Test] diff --git a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs index 061e9f69e..00a44b5e4 100644 --- a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs @@ -153,6 +153,7 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("Movie.Title.1994.Bulgarian.1080p.XviD-LOL")] [TestCase("Movie.Title.1994.BGAUDIO.1080p.XviD-LOL")] + [TestCase("Movie.Title.1994.BG.AUDIO.1080p.XviD-LOL")] public void should_parse_language_bulgarian(string postTitle) { var result = Parser.Parser.ParseMovieTitle(postTitle, true); @@ -306,6 +307,40 @@ namespace NzbDrone.Core.Test.ParserTests result.Languages.Should().BeEquivalentTo(Language.Arabic); } + [TestCase("Movie.Title [1989, BDRip] MVO + DVO + UKR (MVO) + Sub")] + [TestCase("Movie.Title (2006) BDRemux 1080p 2xUkr | Sub Ukr")] + [TestCase("Movie.Title [1984, BDRip 720p] MVO + MVO + Dub + AVO + 3xUkr")] + [TestCase("Movie.Title.2019.UKRAINIAN.WEBRip.x264-VXT")] + public void should_parse_language_ukrainian(string postTitle) + { + var result = Parser.Parser.ParseMovieTitle(postTitle, true); + + result.Languages.Should().BeEquivalentTo(Language.Ukrainian); + } + + [TestCase("Movie.Title [1937, BDRip 1080p] Dub UKR/Eng + Sub rus")] + [TestCase("Movie.Title.[2003.BDRemux.1080p].Dub.MVO.(2xUkr/Fra).Sub.(Rus/Fra)")] + public void should_parse_language_ukrainian_multi(string postTitle) + { + var result = Parser.Parser.ParseMovieTitle(postTitle, true); + + result.Languages.Should().Contain(Language.Ukrainian); + } + + [TestCase("Movie.Title.2019.PERSIAN.WEBRip.x264-VXT")] + public void should_parse_language_persian(string postTitle) + { + var result = Parser.Parser.ParseMovieTitle(postTitle); + result.Languages.Should().BeEquivalentTo(Language.Persian); + } + + [TestCase("Movie.Title.2019.BENGALI.WEBRip.x264-VXT")] + public void should_parse_language_bengali(string postTitle) + { + var result = Parser.Parser.ParseMovieTitle(postTitle); + result.Languages.Should().BeEquivalentTo(Language.Bengali); + } + [TestCase("Movie.Title.en.sub")] [TestCase("Movie Title.eng.sub")] [TestCase("Movie.Title.eng.forced.sub")] diff --git a/src/NzbDrone.Core/Languages/Language.cs b/src/NzbDrone.Core/Languages/Language.cs index 1a634e6d6..c4eb036a1 100644 --- a/src/NzbDrone.Core/Languages/Language.cs +++ b/src/NzbDrone.Core/Languages/Language.cs @@ -102,6 +102,9 @@ namespace NzbDrone.Core.Languages public static Language Bulgarian => new Language(29, "Bulgarian"); public static Language PortugueseBR => new Language(30, "Portuguese (Brazil)"); public static Language Arabic => new Language(31, "Arabic"); + public static Language Ukrainian => new Language(32, "Unkrainian"); + public static Language Persian => new Language(33, "Persian"); + public static Language Bengali => new Language(34, "Bengali"); public static Language Any => new Language(-1, "Any"); public static Language Original => new Language(-2, "Original"); diff --git a/src/NzbDrone.Core/Parser/IsoLanguages.cs b/src/NzbDrone.Core/Parser/IsoLanguages.cs index f12c024ae..f02909ad3 100644 --- a/src/NzbDrone.Core/Parser/IsoLanguages.cs +++ b/src/NzbDrone.Core/Parser/IsoLanguages.cs @@ -39,7 +39,10 @@ namespace NzbDrone.Core.Parser new IsoLanguage("bg", "", "bul", "Bulgarian", Language.Bulgarian), new IsoLanguage("ro", "", "ron", "Romanian", Language.Romanian), new IsoLanguage("pt", "br", "", "Portuguese (Brazil)", Language.PortugueseBR), - new IsoLanguage("ar", "", "ara", "Arabic", Language.Arabic) + new IsoLanguage("ar", "", "ara", "Arabic", Language.Arabic), + new IsoLanguage("uk", "", "uar", "Ukrainian", Language.Ukrainian), + new IsoLanguage("fa", "", "fas", "Persian", Language.Persian), + new IsoLanguage("be", "", "ben", "Bengali", Language.Bengali) }; public static IsoLanguage Find(string isoCode) diff --git a/src/NzbDrone.Core/Parser/LanguageParser.cs b/src/NzbDrone.Core/Parser/LanguageParser.cs index 90a707a5d..9dbc1387d 100644 --- a/src/NzbDrone.Core/Parser/LanguageParser.cs +++ b/src/NzbDrone.Core/Parser/LanguageParser.cs @@ -14,11 +14,27 @@ namespace NzbDrone.Core.Parser { private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser)); - private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_|^)(?\b(?:ita|italian)\b)|(?german\b|videomann|ger[. ]dub)|(?flemish)|(?bgaudio)|(?dublado)|(?greek)|(?\b(?:FR|VO|VFF|VFQ|VFI|VF2|TRUEFRENCH|FRE|FRA)\b)|(?\brus\b)|(?\beng\b)|(?\b(?:HUNDUB|HUN)\b)|(?\bHebDub\b)|(?\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)", - RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?\bLT\b)|(?\bCZ\b)|(?\bPL\b))(?:(?i)(?![\W|_|^]SUB))", - RegexOptions.Compiled); + private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_|^)(?\b(?:ita|italian)\b)| + (?german\b|videomann|ger[. ]dub)| + (?flemish)| + (?bgaudio)| + (?dublado)| + (?greek)| + (?\b(?:FR|VO|VFF|VFQ|VFI|VF2|TRUEFRENCH|FRE|FRA)\b)| + (?\brus\b)| + (?\beng\b)| + (?\b(?:HUNDUB|HUN)\b)| + (?\bHebDub\b)| + (?\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)| + (?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)| + (?(?:(?:\dx)?UKR))", + RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); + + private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?\bLT\b)| + (?\bCZ\b)| + (?\bPL\b)| + (?\bBG\b))(?:(?i)(?![\W|_|^]SUB))", + RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace); private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?[a-z]{2,3})(?:[-_. ]forced)?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); @@ -147,6 +163,21 @@ namespace NzbDrone.Core.Parser languages.Add(Language.Hebrew); } + if (lowerTitle.Contains("ukrainian")) + { + languages.Add(Language.Ukrainian); + } + + if (lowerTitle.Contains("persian")) + { + languages.Add(Language.Persian); + } + + if (lowerTitle.Contains("bengali")) + { + languages.Add(Language.Bengali); + } + // Case sensitive var caseSensitiveMatch = CaseSensitiveLanguageRegex.Match(title); @@ -165,6 +196,11 @@ namespace NzbDrone.Core.Parser languages.Add(Language.Polish); } + if (caseSensitiveMatch.Groups["bulgarian"].Captures.Cast().Any()) + { + languages.Add(Language.Bulgarian); + } + var matches = LanguageRegex.Matches(title); foreach (Match match in matches) @@ -238,6 +274,11 @@ namespace NzbDrone.Core.Parser { languages.Add(Language.Chinese); } + + if (match.Groups["ukrainian"].Success) + { + languages.Add(Language.Ukrainian); + } } if (title.ToLower().Contains("multi"))