Fixed: Mis-classification of releases as being Czech (#3378)

After the changes from PR #2948, the regex was too eager to match any
substring contain "SK", such as in "MASK". Fix by requiring word
separation around it, as was already the case with the "CZ" token.
pull/2/head
Václav Slavík 6 years ago committed by Leonardo Galli
parent 53f49f3b07
commit 264629cfa5

@ -49,6 +49,7 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("The Danish Girl 2015", Language.English)]
[TestCase("Nocturnal Animals (2016) MULTi VFQ English [1080p] BluRay x264-PopHD", Language.English, Language.French)]
[TestCase("Wonder.Woman.2017.720p.BluRay.DD5.1.x264-TayTO.CZ-FTU", Language.Czech)]
[TestCase("Fantastic.Beasts.The.Crimes.Of.Grindelwald.2018.2160p.WEBRip.x265.10bit.HDR.DD5.1-GASMASK", Language.English)]
public void should_parse_language(string postTitle, params Language[] languages)
{
var movieInfo = Parser.Parser.ParseMovieTitle(postTitle, true);

@ -13,7 +13,7 @@ namespace NzbDrone.Core.Parser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser));
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR|VO|VFF|VFQ|VF2|TRUEFRENCH)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<czech>\bCZ|SK\b)",
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR|VO|VFF|VFQ|VF2|TRUEFRENCH)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<czech>\b(?:CZ|SK)\b)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase);

Loading…
Cancel
Save