New: Release parser improvements

pull/834/head
ta264 4 years ago
parent ad2b3e5cc5
commit 98611c7d02

@ -101,6 +101,7 @@ namespace NzbDrone.Core.Books
{ {
tc((a, t) => a.CleanName.FuzzyMatch(t), cleanTitle), tc((a, t) => a.CleanName.FuzzyMatch(t), cleanTitle),
tc((a, t) => a.Name.FuzzyMatch(t), title), tc((a, t) => a.Name.FuzzyMatch(t), title),
tc((a, t) => a.Name.ToSortName().FuzzyMatch(t), title),
tc((a, t) => a.Metadata.Value.Aliases.Concat(new List<string> { a.Name }).Max(x => x.CleanAuthorName().FuzzyMatch(t)), cleanTitle), tc((a, t) => a.Metadata.Value.Aliases.Concat(new List<string> { a.Name }).Max(x => x.CleanAuthorName().FuzzyMatch(t)), cleanTitle),
}; };
@ -151,7 +152,8 @@ namespace NzbDrone.Core.Books
var scoringFunctions = new List<Tuple<Func<Author, string, double>, string>> var scoringFunctions = new List<Tuple<Func<Author, string, double>, string>>
{ {
tc((a, t) => t.FuzzyContains(a.CleanName), cleanReportTitle), tc((a, t) => t.FuzzyContains(a.CleanName), cleanReportTitle),
tc((a, t) => t.FuzzyContains(a.Metadata.Value.Name), reportTitle) tc((a, t) => t.FuzzyContains(a.Metadata.Value.Name), reportTitle),
tc((a, t) => t.FuzzyContains(a.Metadata.Value.Name.ToSortName()), reportTitle)
}; };
return scoringFunctions; return scoringFunctions;
@ -162,7 +164,7 @@ namespace NzbDrone.Core.Books
var authors = GetAllAuthors(); var authors = GetAllAuthors();
var output = new List<Author>(); var output = new List<Author>();
foreach (var func in AuthorScoringFunctions(reportTitle, reportTitle.CleanAuthorName())) foreach (var func in ReportAuthorScoringFunctions(reportTitle, reportTitle.CleanAuthorName()))
{ {
output.AddRange(FindByStringInexact(authors, func.Item1, func.Item2)); output.AddRange(FindByStringInexact(authors, func.Item1, func.Item2));
} }

@ -30,12 +30,6 @@ namespace NzbDrone.Core.DecisionEngine.Specifications.Search
return Decision.Accept(); return Decision.Accept();
} }
if (Parser.Parser.CleanAuthorName(singleBookSpec.BookTitle) != Parser.Parser.CleanAuthorName(remoteBook.ParsedBookInfo.BookTitle))
{
_logger.Debug("Book does not match searched book title, skipping.");
return Decision.Reject("Wrong book");
}
if (!remoteBook.ParsedBookInfo.BookTitle.Any()) if (!remoteBook.ParsedBookInfo.BookTitle.Any())
{ {
_logger.Debug("Full discography result during single book search, skipping."); _logger.Debug("Full discography result during single book search, skipping.");

@ -356,6 +356,12 @@ namespace NzbDrone.Core.Parser
var bestBook = books.OrderByDescending(x => simpleTitle.FuzzyContains(x.Title)).First(); var bestBook = books.OrderByDescending(x => simpleTitle.FuzzyContains(x.Title)).First();
var foundAuthor = GetTitleFuzzy(simpleTitle, authorName, out var remainder); var foundAuthor = GetTitleFuzzy(simpleTitle, authorName, out var remainder);
if (foundAuthor == null)
{
foundAuthor = GetTitleFuzzy(simpleTitle, authorName.ToSortName(), out remainder);
}
var foundBook = GetTitleFuzzy(remainder, bestBook.Title, out _); var foundBook = GetTitleFuzzy(remainder, bestBook.Title, out _);
Logger.Trace($"Found {foundAuthor} - {foundBook} with fuzzy parser"); Logger.Trace($"Found {foundAuthor} - {foundBook} with fuzzy parser");
@ -405,14 +411,21 @@ namespace NzbDrone.Core.Parser
remainder = report; remainder = report;
Logger.Trace($"Finding '{name}' in '{report}'"); Logger.Trace($"Finding '{name}' in '{report}'");
var loc = report.ToLowerInvariant().FuzzyFind(name.ToLowerInvariant(), 0.6);
if (loc == -1) var (locStart, score) = report.ToLowerInvariant().FuzzyMatch(name.ToLowerInvariant(), 0.6);
if (locStart == -1)
{ {
return null; return null;
} }
Logger.Trace($"start '{loc}'"); var diff = (int)Math.Round((1.0 - score) * name.Length, 0);
var length = Math.Min(name.Length + diff, report.Length - locStart);
var reportReversed = new string(report.Substring(locStart, length).ToLowerInvariant().Reverse().ToArray());
var nameReversed = new string(name.ToLowerInvariant().Reverse().ToArray());
var locEnd = locStart + reportReversed.Length - reportReversed.FuzzyFind(nameReversed, 0.6);
var boundaries = WordDelimiterRegex.Matches(report); var boundaries = WordDelimiterRegex.Matches(report);
@ -454,11 +467,8 @@ namespace NzbDrone.Core.Parser
finishes.Add(report.Length - 1); finishes.Add(report.Length - 1);
} }
Logger.Trace(starts.ConcatToString(x => x.ToString())); var wordStart = starts.OrderBy(x => Math.Abs(x - locStart)).First();
Logger.Trace(finishes.ConcatToString(x => x.ToString())); var wordEnd = finishes.OrderBy(x => Math.Abs(x - locEnd)).First();
var wordStart = starts.OrderBy(x => Math.Abs(x - loc)).First();
var wordEnd = finishes.OrderBy(x => Math.Abs(x - (loc + name.Length))).First();
var found = report.Substring(wordStart, wordEnd - wordStart + 1); var found = report.Substring(wordStart, wordEnd - wordStart + 1);
@ -577,6 +587,11 @@ namespace NzbDrone.Core.Parser
return null; return null;
} }
public static string ToSortName(this string name)
{
return name.Split(' ', 2).Reverse().ConcatToString(", ");
}
public static string CleanAuthorName(this string name) public static string CleanAuthorName(this string name)
{ {
// If Title only contains numbers return it as is. // If Title only contains numbers return it as is.

@ -150,7 +150,8 @@ namespace NzbDrone.Core.Parser
if (searchCriteria != null) if (searchCriteria != null)
{ {
bookInfo = searchCriteria.Books.ExclusiveOrDefault(e => e.Title == bookTitle); var cleanTitle = Parser.CleanAuthorName(parsedBookInfo.BookTitle);
bookInfo = searchCriteria.Books.ExclusiveOrDefault(e => e.Title == bookTitle || e.CleanTitle == cleanTitle);
} }
if (bookInfo == null) if (bookInfo == null)

Loading…
Cancel
Save