From 4bac44e893444911821f533872e60871db3c232e Mon Sep 17 00:00:00 2001 From: tenshiak <75363841+tenshiak@users.noreply.github.com> Date: Thu, 31 Dec 2020 04:50:45 +0100 Subject: [PATCH] New: Better Polish language Release Parsing (#5592) * Parse Polish language * Tests for parsing Polish language --- .../ParserTests/LanguageParserFixture.cs | 8 ++++++++ src/NzbDrone.Core/Parser/LanguageParser.cs | 14 ++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs index eb744e341..6cb167293 100644 --- a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs @@ -158,6 +158,14 @@ namespace NzbDrone.Core.Test.ParserTests } [TestCase("Pulp.Fiction.1994.Polish.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.PL.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.PLDUB.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.DUBPL.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.PL-DUB.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.DUB-PL.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.PLLEK.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.LEKPL.1080p.XviD-LOL")] + [TestCase("Pulp.Fiction.1994.PL-LEK.1080p.XviD-LOL")] public void should_parse_language_polish(string postTitle) { var result = Parser.Parser.ParseMovieTitle(postTitle, true); diff --git a/src/NzbDrone.Core/Parser/LanguageParser.cs b/src/NzbDrone.Core/Parser/LanguageParser.cs index 792ce8cf6..fb09661e6 100644 --- a/src/NzbDrone.Core/Parser/LanguageParser.cs +++ b/src/NzbDrone.Core/Parser/LanguageParser.cs @@ -14,10 +14,10 @@ namespace NzbDrone.Core.Parser { private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser)); - private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_|^)(?\b(?:ita|italian)\b)|(?\b(?:german|videomann|ger)\b)|(?flemish)|(?bgaudio)|(?dublado)|(?greek)|(?(?:\W|_)(?:FR|VO|VFF|VFQ|VFI|VF2|TRUEFRENCH)(?:\W|_))|(?\brus\b)|(?\beng\b)|(?\b(?:HUNDUB|HUN)\b)|(?\bHebDub\b)|(?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)", + private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_|^)(?\b(?:ita|italian)\b)|(?\b(?:german|videomann|ger)\b)|(?flemish)|(?bgaudio)|(?dublado)|(?greek)|(?(?:\W|_)(?:FR|VO|VFF|VFQ|VFI|VF2|TRUEFRENCH)(?:\W|_))|(?\brus\b)|(?\beng\b)|(?\b(?:HUNDUB|HUN)\b)|(?\bHebDub\b)|(?\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)", RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?\bLT\b)|(?\bCZ\b)", + private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?\bLT\b)|(?\bCZ\b)|(?\bPL\b)", RegexOptions.Compiled); private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?[a-z]{2,3})(?:[-_. ]forced)?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); @@ -155,6 +155,11 @@ namespace NzbDrone.Core.Parser languages.Add(Language.Czech); } + if (caseSensitiveMatch.Groups["polish"].Captures.Cast().Any()) + { + languages.Add(Language.Polish); + } + var matches = LanguageRegex.Matches(title); foreach (Match match in matches) @@ -219,6 +224,11 @@ namespace NzbDrone.Core.Parser languages.Add(Language.Hebrew); } + if (match.Groups["polish"].Success) + { + languages.Add(Language.Polish); + } + if (match.Groups["chinese"].Success) { languages.Add(Language.Chinese);