From e273f16c3905e0c2451f43cf98b9b7ad1cbdc777 Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Fri, 26 May 2023 19:36:50 -0700 Subject: [PATCH] Fixed: Strip additional domains from release names --- .../ParserTests/SingleEpisodeParserFixture.cs | 1 + .../ParserTests/UrlFixture.cs | 54 +++++++++++++++++++ src/NzbDrone.Core/Parser/Parser.cs | 6 ++- 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs diff --git a/src/NzbDrone.Core.Test/ParserTests/SingleEpisodeParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/SingleEpisodeParserFixture.cs index 1075631b9..a15ef0f79 100644 --- a/src/NzbDrone.Core.Test/ParserTests/SingleEpisodeParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/SingleEpisodeParserFixture.cs @@ -160,6 +160,7 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("SeriesTitle-S16E08-10426008-0.mkv", "SeriesTitle", 16, 8)] [TestCase("Series-S07E12-31st_Century_Fox-[Bluray-1080p].mkv", "Series", 7, 12)] [TestCase("TheTitle-S12E13-3_Acts_of_God.mkv", "TheTitle", 12, 13)] + [TestCase("Series Title - Temporada 2 [HDTV 720p][Cap.408]", "Series Title", 4, 8)] // [TestCase("", "", 0, 0)] public void should_parse_single_episode(string postTitle, string title, int seasonNumber, int episodeNumber) diff --git a/src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs b/src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs new file mode 100644 index 000000000..ebc0e70bd --- /dev/null +++ b/src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs @@ -0,0 +1,54 @@ +using FluentAssertions; +using NUnit.Framework; +using NzbDrone.Core.Parser; +using NzbDrone.Core.Test.Framework; + +namespace NzbDrone.Core.Test.ParserTests +{ + [TestFixture] + public class UrlFixture : CoreTest + { + [TestCase("[www.test.com] - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("test.net - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("[www.test-hyphen.com] - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("www.test123.org - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("[test.co.uk] - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("www.test-hyphen.net.au - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("[www.test123.co.nz] - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("test-hyphen123.org.au - Series.S03E14.720p.HDTV.X264-DIMENSION", "Series")] + [TestCase("[www.test123.de] - Mad Series - Season 1 [Bluray720p]", "Mad Series")] + [TestCase("www.test-hyphen.de - Mad Series - Season 1 [Bluray1080p]", "Mad Series")] + [TestCase("[test-hyphen123.co.za] - The Daily Series - 2023-05-26", "The Daily Series")] + [TestCase("www.test123.co.za - The Series Bros. (2006) - S01E01", "The Series Bros. (2006)")] + [TestCase("[www.test-hyphen.ca] - Series (2011) S01", "Series (2011)")] + [TestCase("test123.ca - Series Time S02 720p HDTV x264 CRON", "Series Time")] + [TestCase("[www.test-hyphen123.co.za] - Series Title S01E01", "Series Title")] + + public void should_not_parse_url_in_name(string postTitle, string title) + { + var result = Parser.Parser.ParseSeriesName(postTitle).CleanSeriesTitle(); + result.Should().Be(title.CleanSeriesTitle()); + } + + [TestCase("Series.2009.S01E14.English.HDTV.XviD-LOL[www.abb.com]", "LOL")] + [TestCase("Series 2009 S01E14 English HDTV XviD LOL[www.academy.org]", null)] + [TestCase("Series Now S05 EXTRAS DVDRip XviD RUNNER[www.aetna.net]", null)] + [TestCase("Series.Title.S01.EXTRAS.DVDRip.XviD-RUNNER[www.alfaromeo.io]", "RUNNER")] + [TestCase("2020.Series.2011.12.02.PDTV.XviD-C4TV[rarbg.to]", "C4TV")] + [TestCase("Series.Title.S01E14.English.HDTV.XviD-LOL[www.abbott.gov]", "LOL")] + [TestCase("Series 2020 S01E14 English HDTV XviD LOL[www.actor.org]", null)] + [TestCase("Series Live S05 EXTRAS DVDRip XviD RUNNER[www.agency.net]", null)] + [TestCase("Series.Title.S02.EXTRAS.DVDRip.XviD-RUNNER[www.airbus.io]", "RUNNER")] + [TestCase("2021.Series.2012.12.02.PDTV.XviD-C4TV[rarbg.to]", "C4TV")] + [TestCase("Series.2020.S01E14.English.HDTV.XviD-LOL[www.afl.com]", "LOL")] + [TestCase("Series 2021 S01E14 English HDTV XviD LOL[www.adult.org]", null)] + [TestCase("Series Future S05 EXTRAS DVDRip XviD RUNNER[www.allstate.net]", null)] + [TestCase("Series.Title.S03.EXTRAS.DVDRip.XviD-RUNNER[www.ally.io]", "RUNNER")] + [TestCase("2022.Series.2013.12.02.PDTV.XviD-C4TV[rarbg.to]", "C4TV")] + + public void should_not_parse_url_in_group(string title, string expected) + { + Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); + } + } +} diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 3a7b1530f..49f25a2eb 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -465,11 +465,13 @@ namespace NzbDrone.Core.Parser string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^\[\s*[-a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z]+\.(?:com|net|org)[ -]*", + // Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt + + private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*\]|[ -]{2,})[ -]*", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace(@"\[\s*[-a-z]+(\.[a-z0-9]+)+\s*\]$", + private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace(@"(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:xn--[a-z0-9-]{4,}|[a-z]{2,6})\b(?:\s*\])$", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled);