using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; using NLog; using NzbDrone.Common.Extensions; using NzbDrone.Common.Instrumentation; using NzbDrone.Core.MediaFiles.MediaInfo; using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Languages; namespace NzbDrone.Core.Parser { public static class Parser { private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser)); private static readonly Regex[] ReportMusicTitleRegex = new[] { // Track with artist (01 - artist - trackName) new Regex(@"(?\d*){0,1}([-| ]{0,1})(?[a-zA-Z0-9, ().&_]*)[-| ]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track without artist (01 - trackName) new Regex(@"(?\d*)[-| .]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track without trackNumber or artist(trackName) new Regex(@"(?\d*)[-| .]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track without trackNumber and with artist(artist - trackName) new Regex(@"(?\d*)[-| .]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track with artist and starting title (01 - artist - trackName) new Regex(@"(?\d*){0,1}[-| ]{0,1}(?[a-zA-Z0-9, ().&_]*)[-| ]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; private static readonly Regex[] ReportAlbumTitleRegex = new[] { //Artist - Album (Year) Strict new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?:\(|\[).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album (Year) new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?:\(|\[)(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?:\(|\[)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album Year new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(\d{4}|\d{3})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist Discography new Regex(@"^(?.+?)\W*(?Discograghy|Discografia).+(?\d{4}).+(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album (Year) Strict new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(?:\(|\[).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album (Year) new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(?:\(|\[)(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(?:\(|\[)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Artist - Album Year new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(\d{4}|\d{3})", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; private static readonly Regex[] ReportTitleRegex = new[] { //Anime - Absolute Episode Number + Title + Season+Episode //Todo: This currently breaks series that start with numbers // new Regex(@"^(?:(?\d{2,3})(?:_|-|\s|\.)+)+(?.+?)(?:\W|_)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)", // RegexOptions.IgnoreCase | RegexOptions.Compiled), //Multi-Part episodes without a title (S01E05.S01E06) new Regex(@"^(?:\W*S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc) new Regex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title Episode Absolute Episode Number ([SubGroup] Series Title Episode 01) new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title Absolute Episode Number + Season+Episode new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<absoluteepisode>\d{2,3}))+(?:_|-|\s|\.)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+).*?(?<hash>[(\[]\w{8}[)\]])?(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title Season+Episode + Absolute Episode Number new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:(?:_|-|\s|\.)+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title Season+Episode new Regex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:\s|\.).*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title with trailing number Absolute Episode Number new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>[^-]+?\d+?)[-_. ]+(?:[-_. ]?(?<absoluteepisode>\d{3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title - Absolute Episode Number new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)(?:[. ]-[. ](?<absoluteepisode>\d{2,3}(?!\d+|[-])))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - [SubGroup] Title Absolute Episode Number new Regex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)[-_. ]+\(?(?:[-_. ]?(?<absoluteepisode>\d{2,3}(?!\d+)))+\)?(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title Season EpisodeNumber + Absolute Episode Number [SubGroup] new Regex(@"^(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+)))+).+?(?:[-_. ]?(?<absoluteepisode>(?<!\d+)\d{3}(?!\d+)))+.+?\[(?<subgroup>.+?)\](?:$|\.mkv)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title Absolute Episode Number [SubGroup] new Regex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{3}(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title Absolute Episode Number [Hash] new Regex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{2,3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?[-_. ]+.*?(?<hash>\[\w{8}\])(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with airdate AND season/episode number, capture season/epsiode only new Regex(@"^(?<title>.+?)?\W*(?<airdate>\d{4}\W+[0-1][0-9]\W+[0-3][0-9])(?!\W+[0-3][0-9])[-_. ](?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with airdate AND season/episode number new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9]).+?(?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Multi-episode Repeated (S01E05 - S01E06, 1x05 - 1x06, etc) new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]|[-_. ]e){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc) new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with a title, 4 digit season number, Single episodes (S2016E05, etc) & Multi-episode (S2016E05E06, S2016E05-06, S2016E05 E06, etc) new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with a title, 4 digit season number, Single episodes (2016x05, etc) & Multi-episode (2016x05x06, 2016x05-06, 2016x05 x06, etc) new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:x|\Wx|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|x|\Wx|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Mini-Series with year in title, treated as season 1, episodes are labelled as Part01, Part 01, Part.1 new Regex(@"^(?<title>.+?\d{4})(?:\W+(?:(?:Part\W?|e)(?<episode>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Mini-Series, treated as season 1, episodes are labelled as Part01, Part 01, Part.1 new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Mini-Series, treated as season 1, episodes are labelled as Part One/Two/Three/...Nine, Part.One, Part_One new Regex(@"^(?<title>.+?)(?:\W+(?:Part[-._ ](?<episode>One|Two|Three|Four|Five|Six|Seven|Eight|Nine)(?>[-._ ])))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Mini-Series, treated as season 1, episodes are labelled as XofY new Regex(@"^(?<title>.+?)(?:\W+(?:(?<episode>(?<!\d+)\d{1,2}(?!\d+))of\d+)+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Supports Season 01 Episode 03 new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:[-_\W](?<![()\[]))+(?:\W?Season\W?)(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)+(?:Episode\W)(?:[-_. ]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Multi-episode release with no space between series title and season (S01E11E12) new Regex(@"(?:.*(?:^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{2}(?!\d+))(?:E(?<episode>(?<!\d+)\d{2}(?!\d+)))+", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Multi-episode with single episode numbers (S6.E1-E2, S6.E1E2, S6E1E2, etc) new Regex(@"^(?<title>.+?)[-_. ]S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[-_. ]?[ex]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Single episode season or episode S1E1 or S1-E1 new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //3 digit season S010E05 new Regex(@"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{3}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //5 digit episode number with a title new Regex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //5 digit multi-episode with a title new Regex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:[-_. ]{1,3}ep){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))+", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Separated season and episode numbers S01 - E01 new Regex(@"^(?<title>.+?)(?:_|-|\s|\.)+S(?<season>\d{2}(?!\d+))(\W-\W)E(?<episode>(?<!\d+)\d{2}(?!\d+))(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Season only releases new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //4 digit season only releases new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{4}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with a title and season/episode in square brackets new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+\[S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+|i|p)))+\])\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Supports 103/113 naming new Regex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))+(?<season>(?<!\d+)[1-9])(?<episode>[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with airdate new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9])", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Supports 1103/1113 naming new Regex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<season>(?<!\d+|\(|\[|e|x)\d{2})(?<episode>(?<!e|x)\d{2}(?!p|i|\d+|\)|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //4 digit episode number //Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc) new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //4 digit episode number //Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc) new Regex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with single digit episode number (S01E1, S01E5E6, etc) new Regex(@"^(?<title>.*?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //iTunes Season 1\05 Title (Quality).ext new Regex(@"^(?:Season(?:_|-|\s|\.)(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:_|-|\s|\.)(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title Absolute Episode Number (e66) new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:_|-|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,3}))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title Episode Absolute Episode Number (Series Title Episode 01) new Regex(@"^(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title Absolute Episode Number new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Anime - Title {Absolute Episode Number} new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Extant, terrible multi-episode naming (extant.10708.hdtv-lol.mp4) new Regex(@"^(?<title>.+?)[-_. ](?<season>[0]?\d?)(?:(?<episode>\d{2}){2}(?!\d+))[-_. ]", RegexOptions.IgnoreCase | RegexOptions.Compiled) }; private static readonly Regex[] RejectHashedReleasesRegex = new Regex[] { // Generic match for md5 and mixed-case hashes. new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled), // Generic match for shorter lower-case hashes. new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled), // Format seen on some NZBGeek releases // Be very strict with these coz they are very close to the valid 101 ep numbering. new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled), new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled), //Backup filename (Unknown origins) new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled), //123 - Started appearing December 2014 new Regex(@"^123$", RegexOptions.Compiled), //abc - Started appearing January 2015 new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase), //b00bs - Started appearing January 2015 new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase) }; //Regex to detect whether the title was reversed. private static readonly Regex ReversedTitleRegex = new Regex(@"[-._ ](p027|p0801|\d{2}E\d{2}S)[-._ ]", RegexOptions.Compiled); private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex SimpleTitleRegex = new Regex(@"(?:(480|720|1080|2160|320)[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080|3840x2160|4096x2160|(8|10)b(it)?)\s*", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex WebsitePrefixRegex = new Regex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex AirDateRegex = new Regex(@"^(.*?)(?<!\d)((?<airyear>\d{4})[_.-](?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])|(?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])[_.-](?<airyear>\d{4}))(?!\d)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex SixDigitAirDateRegex = new Regex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex CleanReleaseGroupRegex = new Regex(@"^(.*?[-._ ](S\d+E\d+)[-._ ])|(-(RP|1|NZBGeek|Obfuscated|Scrambled|sample))+$", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex CleanTorrentSuffixRegex = new Regex(@"\[(?:ettv|rartv|rarbg|cttv)\]$", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex ReleaseGroupRegex = new Regex(@"-(?<releasegroup>[a-z0-9]+)(?<!WEB-DL|480p|720p|1080p|2160p)(?:\b|[-._ ])", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex AnimeReleaseGroupRegex = new Regex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<spanish>\b(?:espaƱol|castellano)\b)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\|)+", RegexOptions.Compiled); private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled); private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled); private static readonly Regex RequestInfoRegex = new Regex(@"\[.+?\]", RegexOptions.Compiled); private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" }; public static ParsedTrackInfo ParseMusicPath(string path) { var fileInfo = new FileInfo(path); var file = TagLib.File.Create(path); var trackNumber = file.Tag.Track; var trackTitle = file.Tag.Title; var artist = file.Tag.FirstAlbumArtist; if (artist.IsNullOrWhiteSpace()) { artist = file.Tag.FirstPerformer; } var artistTitleInfo = new ArtistTitleInfo { Title = artist, Year = (int)file.Tag.Year }; var temp = new int[1]; temp[0] = (int)trackNumber; var result = new ParsedTrackInfo { Language = Language.English, //TODO Parse from Tag/Mediainfo AlbumTitle = file.Tag.Album, ArtistTitle = artist, ArtistMBId = file.Tag.MusicBrainzArtistId, AlbumMBId = file.Tag.MusicBrainzReleaseId, TrackMBId = file.Tag.MusicBrainzReleaseType, TrackNumbers = temp, ArtistTitleInfo = artistTitleInfo, Title = trackTitle }; foreach (TagLib.ICodec codec in file.Properties.Codecs) { TagLib.IAudioCodec acodec = codec as TagLib.IAudioCodec; TagLib.IVideoCodec vcodec = codec as TagLib.IVideoCodec; if (acodec != null && (acodec.MediaTypes & TagLib.MediaTypes.Audio) != TagLib.MediaTypes.None) { Logger.Debug("Audio Properties : " + acodec.Description); Logger.Debug("Bitrate: " + acodec.AudioBitrate); Logger.Debug("SampleRate: " + acodec.AudioSampleRate); Logger.Debug("Channels: " + acodec.AudioChannels + "\n"); result.Quality = QualityParser.ParseQuality(acodec.Description, acodec.AudioBitrate, acodec.AudioSampleRate); Logger.Debug("Quality parsed: {0}", result.Quality); } } // TODO: Check if it is common that we might need to fallback to parser to gather details //var result = ParseMusicTitle(fileInfo.Name); if (result == null) { Logger.Debug("Attempting to parse track info using directory and file names. {0}", fileInfo.Directory.Name); result = ParseMusicTitle(fileInfo.Directory.Name + " " + fileInfo.Name); } if (result == null) { Logger.Debug("Attempting to parse track info using directory name. {0}", fileInfo.Directory.Name); result = ParseMusicTitle(fileInfo.Directory.Name + fileInfo.Extension); } return result; } public static ParsedTrackInfo ParseMusicTitle(string title) { try { if (!ValidateBeforeParsing(title)) return null; Logger.Debug("Parsing string '{0}'", title); if (ReversedTitleRegex.IsMatch(title)) { var titleWithoutExtension = RemoveFileExtension(title).ToCharArray(); Array.Reverse(titleWithoutExtension); title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length); Logger.Debug("Reversed name detected. Converted to '{0}'", title); } var releaseTitle = RemoveFileExtension(title); var simpleTitle = SimpleTitleRegex.Replace(releaseTitle, string.Empty); // TODO: Quick fix stripping [url] - prefixes. simpleTitle = WebsitePrefixRegex.Replace(simpleTitle, string.Empty); simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle, string.Empty); var airDateMatch = AirDateRegex.Match(simpleTitle); if (airDateMatch.Success) { simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value; } var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle); if (sixDigitAirDateMatch.Success) { var airYear = sixDigitAirDateMatch.Groups["airyear"].Value; var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value; var airDay = sixDigitAirDateMatch.Groups["airday"].Value; if (airMonth != "00" || airDay != "00") { var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay); simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate); } } foreach (var regex in ReportMusicTitleRegex) { var match = regex.Matches(simpleTitle); if (match.Count != 0) { Logger.Trace(regex); try { var result = ParseMatchMusicCollection(match); if (result != null) { //if (result.FullSeason && title.ContainsIgnoreCase("Special")) //{ // result.FullSeason = false; // result.Special = true; //} //result.Language = LanguageParser.ParseLanguage(title); //Logger.Debug("Language parsed: {0}", result.Language); result.Quality = QualityParser.ParseQuality(title); Logger.Debug("Quality parsed: {0}", result.Quality); // Majora: We don't currently need Release Group for Music. //result.ReleaseGroup = ParseReleaseGroup(title); //var subGroup = GetSubGroup(match); //if (!subGroup.IsNullOrWhiteSpace()) //{ // result.ReleaseGroup = subGroup; //} //Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup); //result.ReleaseHash = GetReleaseHash(match); //if (!result.ReleaseHash.IsNullOrWhiteSpace()) //{ // Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash); //} return result; } } catch (InvalidDateException ex) { Logger.Debug(ex, ex.Message); break; } } } } catch (Exception e) { if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc")) Logger.Error(e, "An error has occurred while trying to parse {0}", title); } Logger.Debug("Unable to parse {0}", title); return null; } public static ParsedAlbumInfo ParseAlbumTitle(string title) { try { if (!ValidateBeforeParsing(title)) return null; Logger.Debug("Parsing string '{0}'", title); if (ReversedTitleRegex.IsMatch(title)) { var titleWithoutExtension = RemoveFileExtension(title).ToCharArray(); Array.Reverse(titleWithoutExtension); title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length); Logger.Debug("Reversed name detected. Converted to '{0}'", title); } var releaseTitle = RemoveFileExtension(title); var simpleTitle = SimpleTitleRegex.Replace(releaseTitle, string.Empty); // TODO: Quick fix stripping [url] - prefixes. simpleTitle = WebsitePrefixRegex.Replace(simpleTitle, string.Empty); simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle, string.Empty); var airDateMatch = AirDateRegex.Match(simpleTitle); if (airDateMatch.Success) { simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value; } var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle); if (sixDigitAirDateMatch.Success) { var airYear = sixDigitAirDateMatch.Groups["airyear"].Value; var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value; var airDay = sixDigitAirDateMatch.Groups["airday"].Value; if (airMonth != "00" || airDay != "00") { var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay); simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate); } } foreach (var regex in ReportAlbumTitleRegex) { var match = regex.Matches(simpleTitle); if (match.Count != 0) { Logger.Trace(regex); try { var result = ParseAlbumMatchCollection(match); if (result != null) { result.Language = LanguageParser.ParseLanguage(releaseTitle); Logger.Debug("Language parsed: {0}", result.Language); result.Quality = QualityParser.ParseQuality(title); Logger.Debug("Quality parsed: {0}", result.Quality); result.ReleaseGroup = ParseReleaseGroup(releaseTitle); var subGroup = GetSubGroup(match); if (!subGroup.IsNullOrWhiteSpace()) { result.ReleaseGroup = subGroup; } Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup); result.ReleaseHash = GetReleaseHash(match); if (!result.ReleaseHash.IsNullOrWhiteSpace()) { Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash); } return result; } } catch (InvalidDateException ex) { Logger.Debug(ex, ex.Message); break; } } } } catch (Exception e) { if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc")) Logger.Error(e, "An error has occurred while trying to parse {0}", title); } Logger.Debug("Unable to parse {0}", title); return null; } public static string CleanSeriesTitle(this string title) { long number = 0; //If Title only contains numbers return it as is. if (long.TryParse(title, out number)) return title; return NormalizeRegex.Replace(title, string.Empty).ToLower().RemoveAccent(); } public static string CleanArtistName(this string name) { long number = 0; //If Title only contains numbers return it as is. if (long.TryParse(name, out number)) return name; return NormalizeRegex.Replace(name, string.Empty).ToLower().RemoveAccent(); } public static string NormalizeEpisodeTitle(string title) { title = SpecialEpisodeWordRegex.Replace(title, string.Empty); title = PunctuationRegex.Replace(title, " "); title = DuplicateSpacesRegex.Replace(title, " "); return title.Trim() .ToLower(); } public static string NormalizeTitle(string title) { title = WordDelimiterRegex.Replace(title, " "); title = PunctuationRegex.Replace(title, string.Empty); title = CommonWordRegex.Replace(title, string.Empty); title = DuplicateSpacesRegex.Replace(title, " "); return title.Trim().ToLower(); } public static string ParseReleaseGroup(string title) { title = title.Trim(); title = RemoveFileExtension(title); title = WebsitePrefixRegex.Replace(title, ""); var animeMatch = AnimeReleaseGroupRegex.Match(title); if (animeMatch.Success) { return animeMatch.Groups["subgroup"].Value; } title = CleanReleaseGroupRegex.Replace(title, ""); var matches = ReleaseGroupRegex.Matches(title); if (matches.Count != 0) { var group = matches.OfType<Match>().Last().Groups["releasegroup"].Value; int groupIsNumeric; if (int.TryParse(group, out groupIsNumeric)) { return null; } return group; } return null; } public static string RemoveFileExtension(string title) { title = FileExtensionRegex.Replace(title, m => { var extension = m.Value.ToLower(); if (MediaFiles.MediaFileExtensions.Extensions.Contains(extension) || new[] { ".par2", ".nzb" }.Contains(extension)) { return string.Empty; } return m.Value; }); return title; } public static Language ParseLanguage(string title) { var lowerTitle = title.ToLower(); if (lowerTitle.Contains("english")) return Language.English; if (lowerTitle.Contains("french")) return Language.French; if (lowerTitle.Contains("spanish")) return Language.Spanish; if (lowerTitle.Contains("danish")) return Language.Danish; if (lowerTitle.Contains("dutch")) return Language.Dutch; if (lowerTitle.Contains("japanese")) return Language.Japanese; if (lowerTitle.Contains("cantonese")) return Language.Cantonese; if (lowerTitle.Contains("mandarin")) return Language.Mandarin; if (lowerTitle.Contains("korean")) return Language.Korean; if (lowerTitle.Contains("russian")) return Language.Russian; if (lowerTitle.Contains("polish")) return Language.Polish; if (lowerTitle.Contains("vietnamese")) return Language.Vietnamese; if (lowerTitle.Contains("swedish")) return Language.Swedish; if (lowerTitle.Contains("norwegian")) return Language.Norwegian; if (lowerTitle.Contains("nordic")) return Language.Norwegian; if (lowerTitle.Contains("finnish")) return Language.Finnish; if (lowerTitle.Contains("turkish")) return Language.Turkish; if (lowerTitle.Contains("portuguese")) return Language.Portuguese; if (lowerTitle.Contains("hungarian")) return Language.Hungarian; var match = LanguageRegex.Match(title); if (match.Groups["italian"].Captures.Cast<Capture>().Any()) return Language.Italian; if (match.Groups["german"].Captures.Cast<Capture>().Any()) return Language.German; if (match.Groups["flemish"].Captures.Cast<Capture>().Any()) return Language.Flemish; if (match.Groups["greek"].Captures.Cast<Capture>().Any()) return Language.Greek; if (match.Groups["spanish"].Captures.Cast<Capture>().Any()) return Language.Spanish; if (match.Groups["french"].Success) return Language.French; if (match.Groups["russian"].Success) return Language.Russian; if (match.Groups["dutch"].Success) return Language.Dutch; if (match.Groups["hungarian"].Success) return Language.Hungarian; return Language.English; } private static ParsedTrackInfo ParseMatchMusicCollection(MatchCollection matchCollection) { var artistName = matchCollection[0].Groups["artist"].Value./*Removed for cases like Will.I.Am Replace('.', ' ').*/Replace('_', ' '); artistName = RequestInfoRegex.Replace(artistName, "").Trim(' '); // Coppied from Radarr (https://github.com/Radarr/Radarr/blob/develop/src/NzbDrone.Core/Parser/Parser.cs) // TODO: Split into separate method and write unit tests for. var parts = artistName.Split('.'); artistName = ""; int n = 0; bool previousAcronym = false; string nextPart = ""; foreach (var part in parts) { if (parts.Length >= n + 2) { nextPart = parts[n + 1]; } if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n)) { artistName += part + "."; previousAcronym = true; } else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1)) { artistName += part + "."; previousAcronym = true; } else { if (previousAcronym) { artistName += " "; previousAcronym = false; } artistName += part + " "; } n++; } artistName = artistName.Trim(' '); int trackNumber; int.TryParse(matchCollection[0].Groups["trackNumber"].Value, out trackNumber); ParsedTrackInfo result = new ParsedTrackInfo(); result.ArtistTitle = artistName; result.ArtistTitleInfo = GetArtistTitleInfo(result.ArtistTitle); Logger.Debug("Episode Parsed. {0}", result); return result; } private static ArtistTitleInfo GetArtistTitleInfo(string title) { var artistTitleInfo = new ArtistTitleInfo(); artistTitleInfo.Title = title; //var match = YearInTitleRegex.Match(title); //if (!match.Success) //{ // artistTitleInfo.TitleWithoutYear = title; //} //else //{ // artistTitleInfo.TitleWithoutYear = match.Groups["title"].Value; // artistTitleInfo.Year = Convert.ToInt32(match.Groups["year"].Value); //} return artistTitleInfo; } public static string ParseArtistName(string title) { Logger.Debug("Parsing string '{0}'", title); var parseResult = ParseAlbumTitle(title); if (parseResult == null) { return CleanSeriesTitle(title); } return parseResult.ArtistName; } private static ParsedAlbumInfo ParseAlbumMatchCollection(MatchCollection matchCollection) { var artistName = matchCollection[0].Groups["artist"].Value.Replace('.', ' ').Replace('_', ' '); var albumTitle = matchCollection[0].Groups["album"].Value.Replace('.', ' ').Replace('_', ' '); artistName = RequestInfoRegex.Replace(artistName, "").Trim(' '); albumTitle = RequestInfoRegex.Replace(albumTitle, "").Trim(' '); int airYear; int.TryParse(matchCollection[0].Groups["year"].Value, out airYear); ParsedAlbumInfo result; result = new ParsedAlbumInfo(); result.ArtistName = artistName; result.AlbumTitle = albumTitle; result.ArtistTitleInfo = GetArtistTitleInfo(result.ArtistName); Logger.Debug("Album Parsed. {0}", result); return result; } private static bool ValidateBeforeParsing(string title) { if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc")) { Logger.Debug(""); return false; } if (!title.Any(char.IsLetterOrDigit)) { return false; } var titleWithoutExtension = RemoveFileExtension(title); if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension))) { Logger.Debug("Rejected Hashed Release Title: " + title); return false; } return true; } private static string GetSubGroup(MatchCollection matchCollection) { var subGroup = matchCollection[0].Groups["subgroup"]; if (subGroup.Success) { return subGroup.Value; } return string.Empty; } private static string GetReleaseHash(MatchCollection matchCollection) { var hash = matchCollection[0].Groups["hash"]; if (hash.Success) { var hashValue = hash.Value.Trim('[', ']'); if (hashValue.Equals("1280x720")) { return string.Empty; } return hashValue; } return string.Empty; } private static int ParseNumber(string value) { int number; if (int.TryParse(value, out number)) { return number; } number = Array.IndexOf(Numbers, value.ToLower()); if (number != -1) { return number; } throw new FormatException(string.Format("{0} isn't a number", value)); } } }