You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readarr/src/NzbDrone.Core/Parser/Parser.cs

906 lines
36 KiB

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using NLog;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.Books;
using NzbDrone.Core.Parser.Model;
namespace NzbDrone.Core.Parser
{
public static class Parser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser));
private static readonly Regex[] ReportMusicTitleRegex = new[]
{
// Track with author (01 - author - trackName)
new Regex(@"(?<trackNumber>\d*){0,1}([-| ]{0,1})(?<author>[a-zA-Z0-9, ().&_]*)[-| ]{0,1}(?<trackName>[a-zA-Z0-9, ().&_]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Track without author (01 - trackName)
new Regex(@"(?<trackNumber>\d*)[-| .]{0,1}(?<trackName>[a-zA-Z0-9, ().&_]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Track without trackNumber or author(trackName)
new Regex(@"(?<trackNumber>\d*)[-| .]{0,1}(?<trackName>[a-zA-Z0-9, ().&_]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Track without trackNumber and with author(author - trackName)
new Regex(@"(?<trackNumber>\d*)[-| .]{0,1}(?<trackName>[a-zA-Z0-9, ().&_]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Track with author and starting title (01 - author - trackName)
new Regex(@"(?<trackNumber>\d*){0,1}[-| ]{0,1}(?<author>[a-zA-Z0-9, ().&_]*)[-| ]{0,1}(?<trackName>[a-zA-Z0-9, ().&_]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReportBookTitleRegex = new[]
{
//ruTracker - (Genre) [Source]? Author - Discography
new Regex(@"^(?:\(.+?\))(?:\W*(?:\[(?<source>.+?)\]))?\W*(?<author>.+?)(?: - )(?<discography>Discography|Discografia).+?(?<startyear>\d{4}).+?(?<endyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Discography with two years
new Regex(@"^(?<author>.+?)(?: - )(?:.+?)?(?<discography>Discography|Discografia).+?(?<startyear>\d{4}).+?(?<endyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Discography with end year
new Regex(@"^(?<author>.+?)(?: - )(?:.+?)?(?<discography>Discography|Discografia).+?(?<endyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author Discography with two years
new Regex(@"^(?<author>.+?)\W*(?<discography>Discography|Discografia).+?(?<startyear>\d{4}).+?(?<endyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author Discography with end year
new Regex(@"^(?<author>.+?)\W*(?<discography>Discography|Discografia).+?(?<endyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author Discography
new Regex(@"^(?<author>.+?)\W*(?<discography>Discography|Discografia)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//MyAnonaMouse - Title by Author [lang / pdf]
new Regex(@"^(?<book>.+)\bby\b(?<author>.+?)(?:\[|\()",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//ruTracker - (Genre) [Source]? Author - Book - Year
new Regex(@"^(?:\(.+?\))(?:\W*(?:\[(?<source>.+?)\]))?\W*(?<author>.+?)(?: - )(?<book>.+?)(?: - )(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book-Version-Source-Year
//ex. Imagine Dragons-Smoke And Mirrors-Deluxe Edition-2CD-FLAC-2015-JLM
new Regex(@"^(?<author>.+?)[-](?<book>.+?)[-](?:[\(|\[]?)(?<version>.+?(?:Edition)?)(?:[\)|\]]?)[-](?<source>\d?CD|WEB).+?(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book-Source-Year
//ex. Dani_Sbert-Togheter-WEB-2017-FURY
new Regex(@"^(?<author>.+?)[-](?<book>.+?)[-](?<source>\d?CD|WEB).+?(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Book (Year) Strict
new Regex(@"^(?:(?<author>.+?)(?: - )+)(?<book>.+?)\W*(?:\(|\[).+?(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Book (Year)
new Regex(@"^(?:(?<author>.+?)(?: - )+)(?<book>.+?)\W*(?:\(|\[)(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Book - Year [something]
new Regex(@"^(?:(?<author>.+?)(?: - )+)(?<book>.+?)\W*(?: - )(?<releaseyear>\d{4})\W*(?:\(|\[)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Book [something] or Author - Book (something)
new Regex(@"^(?:(?<author>.+?)(?: - )+)(?<book>.+?)\W*(?:\(|\[)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Book Year
new Regex(@"^(?:(?<author>.+?)(?: - )+)(?<book>.+?)\W*(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book (Year) Strict
//Hyphen no space between author and book
new Regex(@"^(?:(?<author>.+?)(?:-)+)(?<book>.+?)\W*(?:\(|\[).+?(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book (Year)
//Hyphen no space between author and book
new Regex(@"^(?:(?<author>.+?)(?:-)+)(?<book>.+?)\W*(?:\(|\[)(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book [something] or Author-Book (something)
//Hyphen no space between author and book
new Regex(@"^(?:(?<author>.+?)(?:-)+)(?<book>.+?)\W*(?:\(|\[)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book-something-Year
new Regex(@"^(?:(?<author>.+?)(?:-)+)(?<book>.+?)(?:-.+?)(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author-Book Year
//Hyphen no space between author and book
new Regex(@"^(?:(?<author>.+?)(?:-)+)(?:(?<book>.+?)(?:-)+)(?<releaseyear>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Author - Year - Book
// Hypen with no or more spaces between author/book/year
new Regex(@"^(?:(?<author>.+?)(?:-))(?<releaseyear>\d{4})(?:-)(?<book>[^-]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] RejectHashedReleasesRegex = new Regex[]
{
// Generic match for md5 and mixed-case hashes.
new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled),
// Generic match for shorter lower-case hashes.
new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled),
// Format seen on some NZBGeek releases
// Be very strict with these coz they are very close to the valid 101 ep numbering.
new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled),
new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled),
//Backup filename (Unknown origins)
new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled),
//123 - Started appearing December 2014
new Regex(@"^123$", RegexOptions.Compiled),
//abc - Started appearing January 2015
new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase),
//b00bs - Started appearing January 2015
new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase)
};
private static readonly RegexReplace NormalizeRegex = new RegexReplace(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex PercentRegex = new Regex(@"(?<=\b\d+)%", RegexOptions.Compiled);
private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
//TODO Rework this Regex for Music
private static readonly RegexReplace SimpleTitleRegex = new RegexReplace(@"(?:(480|720|1080|2160|320)[ip]|[xh][\W_]?26[45]|DD\W?5\W1|848x480|1280x720|1920x1080|3840x2160|4096x2160|(8|10)b(it)?)\s*",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
// Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt
private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*\]|[ -]{2,})[ -]*",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace(@"(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:xn--[a-z0-9-]{4,}|[a-z]{2,6})\b(?:\s*\])$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex AirDateRegex = new Regex(@"^(.*?)(?<!\d)((?<airyear>\d{4})[_.-](?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])|(?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])[_.-](?<airyear>\d{4}))(?!\d)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SixDigitAirDateRegex = new Regex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace CleanReleaseGroupRegex = new RegexReplace(@"^(.*?[-._ ])|(-(RP|1|NZBGeek|Obfuscated|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet))+$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace CleanTorrentSuffixRegex = new RegexReplace(@"\[(?:ettv|rartv|rarbg|cttv)\]$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ReleaseGroupRegex = new Regex(@"-(?<releasegroup>[a-z0-9]+)(?<!MP3|ALAC|FLAC|WEB)(?:\b|[-._ ])",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex AnimeReleaseGroupRegex = new Regex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly HashSet<char> WordDelimiters = new HashSet<char>(" .,_-=()[]|\"`'");
4 years ago
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\(|\)|\[|\]|\|)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
private static readonly Regex RequestInfoRegex = new Regex(@"\[.+?\]", RegexOptions.Compiled);
private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
private static readonly Regex[] CommonTagRegex = new Regex[]
{
new Regex(@"(\[|\()*\b((featuring|feat.|feat|ft|ft.)\s{1}){1}\s*.*(\]|\))*", RegexOptions.IgnoreCase | RegexOptions.Compiled),
new Regex(@"(?:\(|\[)(?:[^\(\[]*)(?:version|limited|deluxe|single|clean|book|special|bonus|promo|remastered)(?:[^\)\]]*)(?:\)|\])", RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
private static readonly Regex[] BracketRegex = new Regex[]
{
new Regex(@"\(.*\)", RegexOptions.Compiled),
new Regex(@"\[.*\]", RegexOptions.Compiled)
};
private static readonly Regex AfterDashRegex = new Regex(@"[-:].*", RegexOptions.Compiled);
public static ParsedTrackInfo ParseMusicPath(string path)
{
var fileInfo = new FileInfo(path);
ParsedTrackInfo result = null;
Logger.Debug("Attempting to parse book info using directory and file names. {0}", fileInfo.Directory.Name);
result = ParseTitle(fileInfo.Directory.Name + " " + fileInfo.Name);
if (result == null)
{
Logger.Debug("Attempting to parse book info using directory name. {0}", fileInfo.Directory.Name);
result = ParseTitle(fileInfo.Directory.Name + fileInfo.Extension);
}
return result;
}
public static ParsedTrackInfo ParseTitle(string title)
{
try
{
if (!ValidateBeforeParsing(title))
{
return null;
}
Logger.Debug("Parsing string '{0}'", title);
var releaseTitle = RemoveFileExtension(title);
releaseTitle = releaseTitle.Replace("【", "[").Replace("】", "]");
var simpleTitle = SimpleTitleRegex.Replace(releaseTitle);
// TODO: Quick fix stripping [url] - prefixes.
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle);
simpleTitle = WebsitePostfixRegex.Replace(simpleTitle);
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle);
var airDateMatch = AirDateRegex.Match(simpleTitle);
if (airDateMatch.Success)
{
simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value;
}
var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle);
if (sixDigitAirDateMatch.Success)
{
var airYear = sixDigitAirDateMatch.Groups["airyear"].Value;
var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value;
var airDay = sixDigitAirDateMatch.Groups["airday"].Value;
if (airMonth != "00" || airDay != "00")
{
var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay);
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
}
}
foreach (var regex in ReportMusicTitleRegex)
{
var match = regex.Matches(simpleTitle);
if (match.Count != 0)
{
Logger.Trace(regex);
try
{
var result = ParseMatchMusicCollection(match);
if (result != null)
{
4 years ago
result.Quality = QualityParser.ParseQuality(title);
Logger.Debug("Quality parsed: {0}", result.Quality);
return result;
}
}
catch (InvalidDateException ex)
{
Logger.Debug(ex, ex.Message);
break;
}
}
}
}
catch (Exception e)
{
if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc"))
{
Logger.Error(e, "An error has occurred while trying to parse {0}", title);
}
}
Logger.Debug("Unable to parse {0}", title);
return null;
}
public static ParsedBookInfo ParseBookTitleWithSearchCriteria(string title, Author author, List<Book> books)
{
try
{
if (!ValidateBeforeParsing(title))
{
return null;
}
var authorName = author.Name == "Various Authors" ? "VA" : author.Name.RemoveAccent();
Logger.Debug("Parsing string '{0}' using search criteria author: '{1}' books: '{2}'",
title,
authorName.RemoveAccent(),
string.Join(", ", books.Select(a => a.Title.RemoveAccent())));
var releaseTitle = RemoveFileExtension(title);
var simpleTitle = SimpleTitleRegex.Replace(releaseTitle);
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle);
simpleTitle = WebsitePostfixRegex.Replace(simpleTitle);
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle);
var bestBook = books
.OrderByDescending(x => simpleTitle.FuzzyMatch(x.Editions.Value.Single(x => x.Monitored).Title, wordDelimiters: WordDelimiters))
.First()
.Editions.Value
.Single(x => x.Monitored);
var foundAuthor = GetTitleFuzzy(simpleTitle, authorName, out var remainder);
if (foundAuthor == null)
{
foundAuthor = GetTitleFuzzy(simpleTitle, authorName.ToLastFirst(), out remainder);
}
var foundBook = GetTitleFuzzy(remainder, bestBook.Title, out _);
if (foundBook == null)
{
foundBook = GetTitleFuzzy(remainder, bestBook.Title.SplitBookTitle(authorName).Item1, out _);
}
Logger.Trace($"Found {foundAuthor} - {foundBook} with fuzzy parser");
if (foundAuthor == null || foundBook == null)
{
4 years ago
return null;
}
var result = new ParsedBookInfo
4 years ago
{
AuthorName = foundAuthor,
AuthorTitleInfo = GetAuthorTitleInfo(foundAuthor),
BookTitle = foundBook
4 years ago
};
4 years ago
try
{
result.Quality = QualityParser.ParseQuality(title);
Logger.Debug("Quality parsed: {0}", result.Quality);
4 years ago
result.ReleaseGroup = ParseReleaseGroup(releaseTitle);
4 years ago
Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup);
4 years ago
return result;
}
catch (InvalidDateException ex)
{
Logger.Debug(ex, ex.Message);
}
}
catch (Exception e)
{
if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc"))
{
Logger.Error(e, "An error has occurred while trying to parse {0}", title);
}
}
Logger.Debug("Unable to parse {0}", title);
return null;
}
public static string GetTitleFuzzy(string report, string name, out string remainder)
4 years ago
{
remainder = report;
Logger.Trace($"Finding '{name}' in '{report}'");
var (locStart, matchLength, score) = report.ToLowerInvariant().FuzzyMatch(name.ToLowerInvariant(), 0.6, WordDelimiters);
if (locStart == -1)
4 years ago
{
return null;
}
var found = report.Substring(locStart, matchLength);
if (score >= 0.8)
4 years ago
{
remainder = report.Remove(locStart, matchLength);
4 years ago
return found.Replace('.', ' ').Replace('_', ' ');
}
return null;
}
public static ParsedBookInfo ParseBookTitle(string title)
{
try
{
if (!ValidateBeforeParsing(title))
{
return null;
}
Logger.Debug("Parsing string '{0}'", title);
var releaseTitle = RemoveFileExtension(title);
var simpleTitle = SimpleTitleRegex.Replace(releaseTitle);
// TODO: Quick fix stripping [url] - prefixes.
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle);
simpleTitle = WebsitePostfixRegex.Replace(simpleTitle);
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle);
var airDateMatch = AirDateRegex.Match(simpleTitle);
if (airDateMatch.Success)
{
simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value;
}
var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle);
if (sixDigitAirDateMatch.Success)
{
var airYear = sixDigitAirDateMatch.Groups["airyear"].Value;
var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value;
var airDay = sixDigitAirDateMatch.Groups["airday"].Value;
if (airMonth != "00" || airDay != "00")
{
var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay);
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
}
}
foreach (var regex in ReportBookTitleRegex)
{
var match = regex.Matches(simpleTitle);
if (match.Count != 0)
{
Logger.Trace(regex);
try
{
var result = ParseBookMatchCollection(match, releaseTitle);
if (result != null)
{
4 years ago
result.Quality = QualityParser.ParseQuality(title);
Logger.Debug("Quality parsed: {0}", result.Quality);
result.ReleaseGroup = ParseReleaseGroup(releaseTitle);
var subGroup = GetSubGroup(match);
if (!subGroup.IsNullOrWhiteSpace())
{
result.ReleaseGroup = subGroup;
}
Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup);
result.ReleaseHash = GetReleaseHash(match);
if (!result.ReleaseHash.IsNullOrWhiteSpace())
{
Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash);
}
return result;
}
}
catch (InvalidDateException ex)
{
Logger.Debug(ex, ex.Message);
break;
}
}
}
}
catch (Exception e)
{
if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc"))
{
Logger.Error(e, "An error has occurred while trying to parse {0}", title);
}
}
Logger.Debug("Unable to parse {0}", title);
return null;
}
public static (string, string) SplitBookTitle(this string book, string author)
{
// Strip author from title, eg Tom Clancy: Ghost Protocol
if (book.StartsWith($"{author}:"))
{
book = book.Split(':', 2)[1].Trim();
}
var parenthesis = book.IndexOf('(');
var colon = book.IndexOf(':');
string[] parts = null;
if (parenthesis > -1)
{
var endParenthesis = book.IndexOf(')', parenthesis);
if (endParenthesis == -1 || !book.Substring(parenthesis + 1, endParenthesis - parenthesis).Contains(' '))
{
parenthesis = -1;
}
}
if (colon > -1 && parenthesis > -1)
{
if (colon < parenthesis)
{
parts = book.Split(':', 2);
}
else
{
parts = book.Split('(', 2);
parts[1] = parts[1].TrimEnd(')');
}
}
else if (colon > -1)
{
parts = book.Split(':', 2);
}
else if (parenthesis > -1)
{
parts = book.Split('(');
parts[1] = parts[1].TrimEnd(')');
}
if (parts != null)
{
return (parts[0].Trim(), parts[1].TrimEnd(':').Trim());
}
return (book, string.Empty);
}
public static string CleanAuthorName(this string name)
{
if (name.IsNullOrWhiteSpace())
{
return string.Empty;
}
// If Title only contains numbers return it as is.
if (long.TryParse(name, out _))
{
return name;
}
name = PercentRegex.Replace(name, "percent");
return NormalizeRegex.Replace(name).ToLower().RemoveAccent();
}
public static string NormalizeTrackTitle(this string title)
{
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
title = PunctuationRegex.Replace(title, " ");
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim().ToLower();
}
public static string NormalizeTitle(string title)
{
title = WordDelimiterRegex.Replace(title, " ");
title = PunctuationRegex.Replace(title, string.Empty);
title = CommonWordRegex.Replace(title, string.Empty);
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim().ToLower();
}
public static string ParseReleaseGroup(string title)
{
title = title.Trim();
title = RemoveFileExtension(title);
title = WebsitePrefixRegex.Replace(title);
var animeMatch = AnimeReleaseGroupRegex.Match(title);
if (animeMatch.Success)
{
return animeMatch.Groups["subgroup"].Value;
}
title = CleanReleaseGroupRegex.Replace(title);
var matches = ReleaseGroupRegex.Matches(title);
if (matches.Count != 0)
{
var group = matches.OfType<Match>().Last().Groups["releasegroup"].Value;
if (int.TryParse(group, out _))
{
return null;
}
return group;
}
return null;
}
public static string RemoveFileExtension(string title)
{
title = FileExtensionRegex.Replace(title, m =>
{
var extension = m.Value.ToLower();
4 years ago
if (MediaFiles.MediaFileExtensions.AllExtensions.Contains(extension) || new[] { ".par2", ".nzb" }.Contains(extension))
{
return string.Empty;
}
return m.Value;
});
return title;
}
public static string CleanBookTitle(this string book)
{
return CommonTagRegex[1].Replace(book, string.Empty).Trim();
}
public static string RemoveBracketsAndContents(this string book)
{
var intermediate = book;
foreach (var regex in BracketRegex)
{
intermediate = regex.Replace(intermediate, string.Empty).Trim();
}
return intermediate;
}
public static string RemoveAfterDash(this string text)
{
return AfterDashRegex.Replace(text, string.Empty).Trim();
}
Whole album matching and fingerprinting (#592) * Cache result of GetAllArtists * Fixed: Manual import not respecting album import notifications * Fixed: partial album imports stay in queue, prompting manual import * Fixed: Allow release if tracks are missing * Fixed: Be tolerant of missing/extra "The" at start of artist name * Improve manual import UI * Omit video tracks from DB entirely * Revert "faster test packaging in build.sh" This reverts commit 2723e2a7b86bcbff9051fd2aced07dd807b4bcb7. -u and -T are not supported on macOS * Fix tests on linux and macOS * Actually lint on linux On linux yarn runs scripts with sh not bash so ** doesn't recursively glob * Match whole albums * Option to disable fingerprinting * Rip out MediaInfo * Don't split up things that have the same album selected in manual import * Try to speed up IndentificationService * More speedups * Some fixes and increase power of recording id * Fix NRE when no tags * Fix NRE when some (but not all) files in a directory have missing tags * Bump taglib, tidy up tag parsing * Add a health check * Remove media info setting * Tags -> audioTags * Add some tests where tags are null * Rename history events * Add missing method to interface * Reinstate MediaInfo tags and update info with artist scan Also adds migration to remove old format media info * This file no longer exists * Don't penalise year if missing from tags * Formatting improvements * Use correct system newline * Switch to the netstandard2.0 library to support net 461 * TagLib.File is IDisposable so should be in a using * Improve filename matching and add tests * Neater logging of parsed tags * Fix disk scan tests for new media info update * Fix quality detection source * Fix Inexact Artist/Album match * Add button to clear track mapping * Fix warning * Pacify eslint * Use \ not / * Fix UI updates * Fix media covers Prevent localizing URL propaging back to the metadata object * Reduce database overhead broadcasting UI updates * Relax timings a bit to make test pass * Remove irrelevant tests * Test framework for identification service * Fix PreferMissingToBadMatch test case * Make fingerprinting more robust * More logging * Penalize unknown media format and country * Prefer USA to UK * Allow Data CD * Fix exception if fingerprinting fails for all files * Fix tests * Fix NRE * Allow apostrophes and remove accents in filename aggregation * Address codacy issues * Cope with old versions of fpcalc and suggest upgrade * fpcalc health check passes if fingerprinting disabled * Get the Artist meta with the artist * Fix the mapper so that lazy loaded lists will be populated on Join And therefore we can join TrackFiles on Tracks by default and avoid an extra query * Rename subtitle -> lyric * Tidy up MediaInfoFormatter
5 years ago
public static string CleanTrackTitle(this string title)
{
var intermediateTitle = title;
foreach (var regex in CommonTagRegex)
{
intermediateTitle = regex.Replace(intermediateTitle, string.Empty).Trim();
}
return intermediateTitle;
}
private static ParsedTrackInfo ParseMatchMusicCollection(MatchCollection matchCollection)
{
var authorName = matchCollection[0].Groups["author"].Value./*Removed for cases like Will.I.Am Replace('.', ' ').*/Replace('_', ' ');
authorName = RequestInfoRegex.Replace(authorName, "").Trim(' ');
// Coppied from Radarr (https://github.com/Radarr/Radarr/blob/develop/src/NzbDrone.Core/Parser/Parser.cs)
// TODO: Split into separate method and write unit tests for.
var parts = authorName.Split('.');
authorName = "";
var n = 0;
var previousAcronym = false;
var nextPart = "";
foreach (var part in parts)
{
if (parts.Length >= n + 2)
{
nextPart = parts[n + 1];
}
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n))
{
authorName += part + ".";
previousAcronym = true;
}
else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1))
{
authorName += part + ".";
previousAcronym = true;
}
else
{
if (previousAcronym)
{
authorName += " ";
previousAcronym = false;
}
authorName += part + " ";
}
n++;
}
authorName = authorName.Trim(' ');
var result = new ParsedTrackInfo();
result.Authors = new List<string> { authorName };
Logger.Debug("Track Parsed. {0}", result);
return result;
}
private static AuthorTitleInfo GetAuthorTitleInfo(string title)
{
var authorTitleInfo = new AuthorTitleInfo();
authorTitleInfo.Title = title;
return authorTitleInfo;
}
public static string ParseAuthorName(string title)
{
Logger.Debug("Parsing string '{0}'", title);
var parseResult = ParseBookTitle(title);
if (parseResult == null)
{
return CleanAuthorName(title);
}
return parseResult.AuthorName;
}
private static ParsedBookInfo ParseBookMatchCollection(MatchCollection matchCollection, string releaseTitle)
{
var authorName = matchCollection[0].Groups["author"].Value.Replace('.', ' ').Replace('_', ' ');
var bookTitle = matchCollection[0].Groups["book"].Value.Replace('.', ' ').Replace('_', ' ');
var releaseVersion = matchCollection[0].Groups["version"].Value.Replace('.', ' ').Replace('_', ' ');
authorName = RequestInfoRegex.Replace(authorName, "").Trim(' ');
bookTitle = RequestInfoRegex.Replace(bookTitle, "").Trim(' ');
releaseVersion = RequestInfoRegex.Replace(releaseVersion, "").Trim(' ');
int.TryParse(matchCollection[0].Groups["releaseyear"].Value, out var releaseYear);
ParsedBookInfo result;
result = new ParsedBookInfo
{
ReleaseTitle = releaseTitle
};
result.AuthorName = authorName;
result.BookTitle = bookTitle;
result.AuthorTitleInfo = GetAuthorTitleInfo(result.AuthorName);
result.ReleaseDate = releaseYear.ToString();
result.ReleaseVersion = releaseVersion;
if (matchCollection[0].Groups["discography"].Success)
{
int.TryParse(matchCollection[0].Groups["startyear"].Value, out var discStart);
int.TryParse(matchCollection[0].Groups["endyear"].Value, out var discEnd);
result.Discography = true;
if (discStart > 0 && discEnd > 0)
{
result.DiscographyStart = discStart;
result.DiscographyEnd = discEnd;
}
else if (discEnd > 0)
{
result.DiscographyEnd = discEnd;
}
result.BookTitle = "Discography";
}
Logger.Debug("Book Parsed. {0}", result);
return result;
}
private static bool ValidateBeforeParsing(string title)
{
if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc"))
{
Logger.Debug("");
return false;
}
if (!title.Any(char.IsLetterOrDigit))
{
return false;
}
var titleWithoutExtension = RemoveFileExtension(title);
if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension)))
{
Logger.Debug("Rejected Hashed Release Title: " + title);
return false;
}
return true;
}
private static string GetSubGroup(MatchCollection matchCollection)
{
var subGroup = matchCollection[0].Groups["subgroup"];
if (subGroup.Success)
{
return subGroup.Value;
}
return string.Empty;
}
private static string GetReleaseHash(MatchCollection matchCollection)
{
var hash = matchCollection[0].Groups["hash"];
if (hash.Success)
{
var hashValue = hash.Value.Trim('[', ']');
if (hashValue.Equals("1280x720"))
{
return string.Empty;
}
return hashValue;
}
return string.Empty;
}
private static int ParseNumber(string value)
{
if (int.TryParse(value, out var number))
{
return number;
}
number = Array.IndexOf(Numbers, value.ToLower());
if (number != -1)
{
return number;
}
throw new FormatException(string.Format("{0} isn't a number", value));
}
}
}