Fixed: some Parser problems (Improved editions and German releases)

- Moved the ReportEditionRegex up because we use it in another 2 regexes (so it's not 3 times the same regex). Also added an optional bracket at the beginning.
- Added Recut to the edition regex
- The Regex for german and french tracker formats (ReportMovieTitleLenientRegexBefore) has been updated to support the same editions as the english versions, but the regex is only used if ParsingLeniency is set to Lenient. Should resolve a lot of cases for german releases where the movietitle wasn't parsed correctly before.
- Updated acronym method. Fixed wrong dots for "World War Z.", "World War Z. 2", but still supports "R.I.P.D.", "V.H.S. 2", "G.I. Joe" and "2 Tage in L.A."
- Added a lot of tests for this changes (there is a new test for german releases that works without lenient parsing, and another one that only works if lenient parsing is activated)
pull/4503/head
Gabriel Patzleiner 5 years ago committed by Qstick
parent 1ef31e8094
commit 73eba0f95d

@ -1,8 +1,10 @@
using System.Linq;
using FluentAssertions;
using FluentAssertions.Execution;
using NUnit.Framework;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Parser.Model;
using NzbDrone.Core.Test.Framework;
namespace NzbDrone.Core.Test.ParserTests
@ -56,12 +58,58 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("Leaving Jeruselem by Railway (1897) [DVD].mp4", "Leaving Jeruselem by Railway")]
[TestCase("Climax.2018.1080p.AMZN.WEB-DL.DD5.1.H.264-NTG", "Climax")]
[TestCase("Movie.Title.Imax.2018.1080p.AMZN.WEB-DL.DD5.1.H.264-NTG", "Movie Title")]
[TestCase("World.War.Z.EXTENDED.2013.German.DL.1080p.BluRay.AVC-XANOR", "World War Z")]
[TestCase("World.War.Z.2.EXTENDED.2013.German.DL.1080p.BluRay.AVC-XANOR", "World War Z 2")]
[TestCase("G.I.Joe.Retaliation.2013.THEATRiCAL.COMPLETE.BLURAY-GLiMMER", "G.I. Joe Retaliation")]
[TestCase("www.Torrenting.org - Revenge.2008.720p.X264-DIMENSION", "Revenge")]
public void should_parse_movie_title(string postTitle, string title)
{
Parser.Parser.ParseMovieTitle(postTitle, true).MovieTitle.Should().Be(title);
}
[TestCase("Avatar.Aufbruch.nach.Pandora.Extended.2009.German.DTS.720p.BluRay.x264-SoW", "Avatar Aufbruch nach Pandora", "Extended", 2009)]
[TestCase("Drop.Zone.1994.German.AC3D.DL.720p.BluRay.x264-KLASSiGERHD", "Drop Zone", "", 1994)]
[TestCase("Kick.Ass.2.2013.German.DTS.DL.720p.BluRay.x264-Pate", "Kick Ass 2", "", 2013)]
[TestCase("Paradise.Hills.2019.German.DL.AC3.Dubbed.1080p.BluRay.x264-muhHD", "Paradise Hills", "", 2019)]
[TestCase("96.Hours.Taken.3.EXTENDED.2014.German.DL.1080p.BluRay.x264-ENCOUNTERS", "96 Hours Taken 3", "EXTENDED", 2014)]
[TestCase("World.War.Z.EXTENDED.CUT.2013.German.DL.1080p.BluRay.x264-HQX", "World War Z", "EXTENDED CUT", 2013)]
[TestCase("Sin.City.2005.RECUT.EXTENDED.German.DL.1080p.BluRay.x264-DETAiLS", "Sin City", "RECUT EXTENDED", 2005)]
[TestCase("Die.Klasse.von.1999.1990.German.720p.HDTV.x264-NORETAiL", "Die Klasse von 1999", "", 1990)] //year in the title
[TestCase("2.Tage.in.L.A.1996.GERMAN.DL.720p.WEB.H264-SOV", "2 Tage in L.A.", "", 1996)]
[TestCase("8.2019.GERMAN.720p.BluRay.x264-UNiVERSUM", "8", "", 2019)]
[TestCase("Life.Partners.2014.German.DL.PAL.DVDR-ETM", "Life Partners", "", 2014)]
[TestCase("Joe.Dreck.2.EXTENDED.EDITION.2015.German.DL.PAL.DVDR-ETM", "Joe Dreck 2", "EXTENDED EDITION", 2015)]
[TestCase("Rango.EXTENDED.2011.HDRip.AC3.German.XviD-POE", "Rango", "EXTENDED", 2011)]
[TestCase("Suicide.Squad.2016.EXTENDED.German.DL.AC3.BDRip.x264-hqc", "Suicide Squad", "EXTENDED", 2016)] //edition after year
public void should_parse_german_movie(string postTitle, string title, string edition, int year)
{
ParsedMovieInfo movie = Parser.Parser.ParseMovieTitle(postTitle, false);
using (new AssertionScope())
{
movie.MovieTitle.Should().Be(title);
movie.Edition.Should().Be(edition);
movie.Year.Should().Be(year);
}
}
[TestCase("Der.Hobbit.Eine.Unerwartete.Reise.Extended.German.720p.BluRay.x264-EXQUiSiTE", "Der Hobbit Eine Unerwartete Reise", "Extended", 0)] //no year
[TestCase("Die.Unfassbaren.Now.You.See.Me.EXTENDED.German.DTS.720p.BluRay.x264-RHD", "Die Unfassbaren Now You See Me", "EXTENDED", 0)] //no year
[TestCase("Der.Soldat.James.German.Bluray.FuckYou.Pso.Why.cant.you.follow.scene.rules.1998", "Der Soldat James", "", 1998)]
[TestCase("Passengers.German.DL.AC3.Dubbed..BluRay.x264-PsO", "Passengers", "", 0)] //no year
[TestCase("Das.A.Team.Der.Film.Extended.Cut.German.720p.BluRay.x264-ANCIENT", "Das A Team Der Film", "Extended Cut", 0)] //no year
[TestCase("Cars.2.German.DL.720p.BluRay.x264-EmpireHD", "Cars 2", "", 0)] //no year
[TestCase("Der.Film.deines.Lebens.German.2011.PAL.DVDR-ETM", "Der Film deines Lebens", "", 2011)] //year at wrong position
public void should_parse_german_movie_lenient(string postTitle, string title, string edition, int year)
{
ParsedMovieInfo movie = Parser.Parser.ParseMovieTitle(postTitle, true);
using (new AssertionScope())
{
movie.MovieTitle.Should().Be(title);
movie.Edition.Should().Be(edition);
movie.Year.Should().Be(year);
}
}
[TestCase("(1995) Ghost in the Shell", "Ghost in the Shell")]
public void should_parse_movie_folder_name(string postTitle, string title)
{

@ -16,10 +16,14 @@ namespace NzbDrone.Core.Parser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser));
private static readonly Regex ReportYearRegex = new Regex(@"^.*(?<year>(19|20)\d{2}).*$", RegexOptions.Compiled);
private static readonly Regex ReportEditionRegex = new Regex(@"\(?\b(?<edition>(((Recut.|Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex[] ReportMovieTitleRegex = new[]
{
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011
new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*\(?\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?.{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)",
new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*" + ReportEditionRegex + @".{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.2011.Special.Edition //TODO: Seems to slow down parsing heavily!
@ -48,7 +52,7 @@ namespace NzbDrone.Core.Parser
private static readonly Regex[] ReportMovieTitleLenientRegexBefore = new[]
{
//Some german or french tracker formats
new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(?:(?<!(19|20)\d{2}.)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(" + ReportEditionRegex + @".{1,3})?(?:(?<!(19|20)\d{2}.)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReportMovieTitleLenientRegexAfter = new Regex[]
@ -135,10 +139,6 @@ namespace NzbDrone.Core.Parser
private static readonly Regex RequestInfoRegex = new Regex(@"^(?:\[.+?\])+", RegexOptions.Compiled);
private static readonly Regex ReportYearRegex = new Regex(@"^.*(?<year>(19|20)\d{2}).*$", RegexOptions.Compiled);
private static readonly Regex ReportEditionRegex = new Regex(@"\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\)?\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
private static Dictionary<string, string> _umlautMappings = new Dictionary<string, string>
{
@ -488,13 +488,19 @@ namespace NzbDrone.Core.Parser
{
nextPart = parts[n + 1];
}
else
{
nextPart = "";
}
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n))
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _) &&
(previousAcronym || n < parts.Length - 1) &&
(previousAcronym || nextPart.Length != 1 || !int.TryParse(nextPart, out _)))
{
movieName += part + ".";
previousAcronym = true;
}
else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1))
else if (part.ToLower() == "a" && (previousAcronym || nextPart.Length == 1))
{
movieName += part + ".";
previousAcronym = true;

Loading…
Cancel
Save