From 73eba0f95d6689092a310f09d0d5ea54e8551128 Mon Sep 17 00:00:00 2001 From: Gabriel Patzleiner Date: Wed, 1 Apr 2020 22:47:51 +0200 Subject: [PATCH] Fixed: some Parser problems (Improved editions and German releases) - Moved the ReportEditionRegex up because we use it in another 2 regexes (so it's not 3 times the same regex). Also added an optional bracket at the beginning. - Added Recut to the edition regex - The Regex for german and french tracker formats (ReportMovieTitleLenientRegexBefore) has been updated to support the same editions as the english versions, but the regex is only used if ParsingLeniency is set to Lenient. Should resolve a lot of cases for german releases where the movietitle wasn't parsed correctly before. - Updated acronym method. Fixed wrong dots for "World War Z.", "World War Z. 2", but still supports "R.I.P.D.", "V.H.S. 2", "G.I. Joe" and "2 Tage in L.A." - Added a lot of tests for this changes (there is a new test for german releases that works without lenient parsing, and another one that only works if lenient parsing is activated) --- .../ParserTests/ParserFixture.cs | 48 +++++++++++++++++++ src/NzbDrone.Core/Parser/Parser.cs | 22 +++++---- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs index 02589c01c..6d9e70c2f 100644 --- a/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs @@ -1,8 +1,10 @@ using System.Linq; using FluentAssertions; +using FluentAssertions.Execution; using NUnit.Framework; using NzbDrone.Core.Languages; using NzbDrone.Core.Parser; +using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Test.Framework; namespace NzbDrone.Core.Test.ParserTests @@ -56,12 +58,58 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("Leaving Jeruselem by Railway (1897) [DVD].mp4", "Leaving Jeruselem by Railway")] [TestCase("Climax.2018.1080p.AMZN.WEB-DL.DD5.1.H.264-NTG", "Climax")] [TestCase("Movie.Title.Imax.2018.1080p.AMZN.WEB-DL.DD5.1.H.264-NTG", "Movie Title")] + [TestCase("World.War.Z.EXTENDED.2013.German.DL.1080p.BluRay.AVC-XANOR", "World War Z")] + [TestCase("World.War.Z.2.EXTENDED.2013.German.DL.1080p.BluRay.AVC-XANOR", "World War Z 2")] + [TestCase("G.I.Joe.Retaliation.2013.THEATRiCAL.COMPLETE.BLURAY-GLiMMER", "G.I. Joe Retaliation")] [TestCase("www.Torrenting.org - Revenge.2008.720p.X264-DIMENSION", "Revenge")] public void should_parse_movie_title(string postTitle, string title) { Parser.Parser.ParseMovieTitle(postTitle, true).MovieTitle.Should().Be(title); } + [TestCase("Avatar.Aufbruch.nach.Pandora.Extended.2009.German.DTS.720p.BluRay.x264-SoW", "Avatar Aufbruch nach Pandora", "Extended", 2009)] + [TestCase("Drop.Zone.1994.German.AC3D.DL.720p.BluRay.x264-KLASSiGERHD", "Drop Zone", "", 1994)] + [TestCase("Kick.Ass.2.2013.German.DTS.DL.720p.BluRay.x264-Pate", "Kick Ass 2", "", 2013)] + [TestCase("Paradise.Hills.2019.German.DL.AC3.Dubbed.1080p.BluRay.x264-muhHD", "Paradise Hills", "", 2019)] + [TestCase("96.Hours.Taken.3.EXTENDED.2014.German.DL.1080p.BluRay.x264-ENCOUNTERS", "96 Hours Taken 3", "EXTENDED", 2014)] + [TestCase("World.War.Z.EXTENDED.CUT.2013.German.DL.1080p.BluRay.x264-HQX", "World War Z", "EXTENDED CUT", 2013)] + [TestCase("Sin.City.2005.RECUT.EXTENDED.German.DL.1080p.BluRay.x264-DETAiLS", "Sin City", "RECUT EXTENDED", 2005)] + [TestCase("Die.Klasse.von.1999.1990.German.720p.HDTV.x264-NORETAiL", "Die Klasse von 1999", "", 1990)] //year in the title + [TestCase("2.Tage.in.L.A.1996.GERMAN.DL.720p.WEB.H264-SOV", "2 Tage in L.A.", "", 1996)] + [TestCase("8.2019.GERMAN.720p.BluRay.x264-UNiVERSUM", "8", "", 2019)] + [TestCase("Life.Partners.2014.German.DL.PAL.DVDR-ETM", "Life Partners", "", 2014)] + [TestCase("Joe.Dreck.2.EXTENDED.EDITION.2015.German.DL.PAL.DVDR-ETM", "Joe Dreck 2", "EXTENDED EDITION", 2015)] + [TestCase("Rango.EXTENDED.2011.HDRip.AC3.German.XviD-POE", "Rango", "EXTENDED", 2011)] + [TestCase("Suicide.Squad.2016.EXTENDED.German.DL.AC3.BDRip.x264-hqc", "Suicide Squad", "EXTENDED", 2016)] //edition after year + public void should_parse_german_movie(string postTitle, string title, string edition, int year) + { + ParsedMovieInfo movie = Parser.Parser.ParseMovieTitle(postTitle, false); + using (new AssertionScope()) + { + movie.MovieTitle.Should().Be(title); + movie.Edition.Should().Be(edition); + movie.Year.Should().Be(year); + } + } + + [TestCase("Der.Hobbit.Eine.Unerwartete.Reise.Extended.German.720p.BluRay.x264-EXQUiSiTE", "Der Hobbit Eine Unerwartete Reise", "Extended", 0)] //no year + [TestCase("Die.Unfassbaren.Now.You.See.Me.EXTENDED.German.DTS.720p.BluRay.x264-RHD", "Die Unfassbaren Now You See Me", "EXTENDED", 0)] //no year + [TestCase("Der.Soldat.James.German.Bluray.FuckYou.Pso.Why.cant.you.follow.scene.rules.1998", "Der Soldat James", "", 1998)] + [TestCase("Passengers.German.DL.AC3.Dubbed..BluRay.x264-PsO", "Passengers", "", 0)] //no year + [TestCase("Das.A.Team.Der.Film.Extended.Cut.German.720p.BluRay.x264-ANCIENT", "Das A Team Der Film", "Extended Cut", 0)] //no year + [TestCase("Cars.2.German.DL.720p.BluRay.x264-EmpireHD", "Cars 2", "", 0)] //no year + [TestCase("Der.Film.deines.Lebens.German.2011.PAL.DVDR-ETM", "Der Film deines Lebens", "", 2011)] //year at wrong position + public void should_parse_german_movie_lenient(string postTitle, string title, string edition, int year) + { + ParsedMovieInfo movie = Parser.Parser.ParseMovieTitle(postTitle, true); + using (new AssertionScope()) + { + movie.MovieTitle.Should().Be(title); + movie.Edition.Should().Be(edition); + movie.Year.Should().Be(year); + } + } + [TestCase("(1995) Ghost in the Shell", "Ghost in the Shell")] public void should_parse_movie_folder_name(string postTitle, string title) { diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index b2e3fee8f..f01b99fba 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -16,10 +16,14 @@ namespace NzbDrone.Core.Parser { private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser)); + private static readonly Regex ReportYearRegex = new Regex(@"^.*(?(19|20)\d{2}).*$", RegexOptions.Compiled); + + private static readonly Regex ReportEditionRegex = new Regex(@"\(?\b(?(((Recut.|Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex[] ReportMovieTitleRegex = new[] { //Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011 - new Regex(@"^(?(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*\(?\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?.{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", + new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*" + ReportEditionRegex + @".{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.2011.Special.Edition //TODO: Seems to slow down parsing heavily! @@ -48,7 +52,7 @@ namespace NzbDrone.Core.Parser private static readonly Regex[] ReportMovieTitleLenientRegexBefore = new[] { //Some german or french tracker formats - new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(?:(?<!(19|20)\d{2}.)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(" + ReportEditionRegex + @".{1,3})?(?:(?<!(19|20)\d{2}.)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; private static readonly Regex[] ReportMovieTitleLenientRegexAfter = new Regex[] @@ -135,10 +139,6 @@ namespace NzbDrone.Core.Parser private static readonly Regex RequestInfoRegex = new Regex(@"^(?:\[.+?\])+", RegexOptions.Compiled); - private static readonly Regex ReportYearRegex = new Regex(@"^.*(?<year>(19|20)\d{2}).*$", RegexOptions.Compiled); - - private static readonly Regex ReportEditionRegex = new Regex(@"\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\)?\b", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" }; private static Dictionary<string, string> _umlautMappings = new Dictionary<string, string> { @@ -488,13 +488,19 @@ namespace NzbDrone.Core.Parser { nextPart = parts[n + 1]; } + else + { + nextPart = ""; + } - if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n)) + if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _) && + (previousAcronym || n < parts.Length - 1) && + (previousAcronym || nextPart.Length != 1 || !int.TryParse(nextPart, out _))) { movieName += part + "."; previousAcronym = true; } - else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1)) + else if (part.ToLower() == "a" && (previousAcronym || nextPart.Length == 1)) { movieName += part + "."; previousAcronym = true;