New: Remove websites in parentheses before parsing

(cherry picked from commit ea4fe392a0cc4774bb28c969fb3903db264c8d6c)

Closes #10114
pull/10115/head
Mark McDowall 7 months ago committed by Bogdan
parent 085b1db77f
commit bb4e185644

@ -22,6 +22,7 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("[www.test-hyphen.ca] - Movie Title (2023)", "Movie Title")]
[TestCase("test123.ca - Movie Time 2023 720p HDTV x264 CRON", "Movie Time")]
[TestCase("[www.test-hyphen123.co.za] - Movie Title 2023", "Movie Title")]
[TestCase("(movieawake.com) Movie Title 2023 [720p] [English Subbed]", "Movie Title")]
public void should_not_parse_url_in_name(string postTitle, string title)
{
var result = Parser.Parser.ParseMovieTitle(postTitle).MovieTitle.CleanMovieTitle();

@ -121,7 +121,7 @@ namespace NzbDrone.Core.Parser
private static readonly Regex SimpleReleaseTitleRegex = new Regex(@"\s*(?:[<>?*|])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt
private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?<!Naruto-Kun\.)(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*\]|[ -]{2,})[ -]*",
private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^(?:(?:\[|\()\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?<!Naruto-Kun\.)(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*(?:\]|\))|[ -]{2,})[ -]*",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);

Loading…
Cancel
Save