From 4541d3d3b0de6e684f07330cd9e321a6ba6635eb Mon Sep 17 00:00:00 2001 From: ta264 Date: Thu, 29 Jul 2021 21:28:04 +0100 Subject: [PATCH] Fixed: Parse search results using edition titles also Fixes #1154 --- .../DownloadDecisionMakerFixture.cs | 12 ++- .../ParserTests/ParserFixture.cs | 7 +- .../Books/Repositories/EditionRepository.cs | 24 ++++++ .../Books/Services/EditionService.cs | 75 +++++++++++++++++++ src/NzbDrone.Core/Parser/Parser.cs | 6 +- src/NzbDrone.Core/Parser/ParsingService.cs | 31 ++++++++ 6 files changed, 151 insertions(+), 4 deletions(-) diff --git a/src/NzbDrone.Core.Test/DecisionEngineTests/DownloadDecisionMakerFixture.cs b/src/NzbDrone.Core.Test/DecisionEngineTests/DownloadDecisionMakerFixture.cs index 12f34e453..6f4b1e37e 100644 --- a/src/NzbDrone.Core.Test/DecisionEngineTests/DownloadDecisionMakerFixture.cs +++ b/src/NzbDrone.Core.Test/DecisionEngineTests/DownloadDecisionMakerFixture.cs @@ -187,7 +187,17 @@ namespace NzbDrone.Core.Test.DecisionEngineTests _reports[0].Title = "1937 - Snow White and the Seven Dwarves"; var author = new Author { Name = "Some Author" }; - var books = new List { new Book { Title = "Some Book" } }; + var books = new List + { + new Book + { + Title = "Some Book", + Editions = new List + { + new Edition { Title = "Some Edition Title" } + } + } + }; Subject.GetSearchDecision(_reports, new BookSearchCriteria { Author = author, Books = books }).ToList(); diff --git a/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs index 77bf8c4af..44f05f161 100644 --- a/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs @@ -29,8 +29,11 @@ namespace NzbDrone.Core.Test.ParserTests private void GivenSearchCriteria(string authorName, string bookTitle) { _author.Name = authorName; - var a = new Book(); - a.Title = bookTitle; + var a = new Book + { + Title = bookTitle, + Editions = new List { new Edition { Title = bookTitle, Monitored = true } } + }; _books.Add(a); } diff --git a/src/NzbDrone.Core/Books/Repositories/EditionRepository.cs b/src/NzbDrone.Core/Books/Repositories/EditionRepository.cs index ba01413da..b7d767c64 100644 --- a/src/NzbDrone.Core/Books/Repositories/EditionRepository.cs +++ b/src/NzbDrone.Core/Books/Repositories/EditionRepository.cs @@ -12,6 +12,8 @@ namespace NzbDrone.Core.Books Edition FindByForeignEditionId(string foreignEditionId); List FindByBook(int id); List FindByAuthor(int id); + List FindByAuthorMetadataId(int id, bool onlyMonitored); + Edition FindByTitle(int authorMetadataId, string title); List GetEditionsForRefresh(int bookId, IEnumerable foreignEditionIds); List SetMonitored(Edition edition); } @@ -63,6 +65,28 @@ namespace NzbDrone.Core.Books .Where(a => a.Id == id)); } + public List FindByAuthorMetadataId(int authorMetadataId, bool onlyMonitored) + { + var builder = Builder().Join((e, b) => e.BookId == b.Id) + .Where(b => b.AuthorMetadataId == authorMetadataId); + + if (onlyMonitored) + { + builder = builder.Where(e => e.Monitored == true); + } + + return Query(builder); + } + + public Edition FindByTitle(int authorMetadataId, string title) + { + return Query(Builder().Join((e, b) => e.BookId == b.Id) + .Where(b => b.AuthorMetadataId == authorMetadataId) + .Where(e => e.Monitored == true) + .Where(e => e.Title == title)) + .FirstOrDefault(); + } + public List SetMonitored(Edition edition) { var allEditions = FindByBook(edition.BookId); diff --git a/src/NzbDrone.Core/Books/Services/EditionService.cs b/src/NzbDrone.Core/Books/Services/EditionService.cs index 6e2048736..81fd1563b 100644 --- a/src/NzbDrone.Core/Books/Services/EditionService.cs +++ b/src/NzbDrone.Core/Books/Services/EditionService.cs @@ -1,7 +1,10 @@ +using System; using System.Collections.Generic; using System.Linq; +using NzbDrone.Common.Extensions; using NzbDrone.Core.Books.Events; using NzbDrone.Core.Messaging.Events; +using NzbDrone.Core.Parser; namespace NzbDrone.Core.Books { @@ -16,6 +19,9 @@ namespace NzbDrone.Core.Books List GetEditionsForRefresh(int bookId, IEnumerable foreignEditionIds); List GetEditionsByBook(int bookId); List GetEditionsByAuthor(int authorId); + Edition FindByTitle(int authorMetadataId, string title); + Edition FindByTitleInexact(int authorMetadataId, string title); + List GetCandidates(int authorMetadataId, string title); List SetMonitored(Edition edition); } @@ -81,6 +87,40 @@ namespace NzbDrone.Core.Books return _editionRepository.FindByAuthor(authorId); } + public Edition FindByTitle(int authorMetadataId, string title) + { + return _editionRepository.FindByTitle(authorMetadataId, title); + } + + public Edition FindByTitleInexact(int authorMetadataId, string title) + { + var books = _editionRepository.FindByAuthorMetadataId(authorMetadataId, true); + + foreach (var func in EditionScoringFunctions(title)) + { + var results = FindByStringInexact(books, func.Item1, func.Item2); + if (results.Count == 1) + { + return results[0]; + } + } + + return null; + } + + public List GetCandidates(int authorMetadataId, string title) + { + var books = _editionRepository.FindByAuthorMetadataId(authorMetadataId, true); + var output = new List(); + + foreach (var func in EditionScoringFunctions(title)) + { + output.AddRange(FindByStringInexact(books, func.Item1, func.Item2)); + } + + return output.DistinctBy(x => x.Id).ToList(); + } + public List SetMonitored(Edition edition) { return _editionRepository.SetMonitored(edition); @@ -91,5 +131,40 @@ namespace NzbDrone.Core.Books var editions = GetEditionsByBook(message.Book.Id); DeleteMany(editions); } + + private List, string>> EditionScoringFunctions(string title) + { + Func, string, Tuple, string>> tc = Tuple.Create; + var scoringFunctions = new List, string>> + { + tc((a, t) => a.Title.FuzzyMatch(t), title), + tc((a, t) => a.Title.FuzzyMatch(t), title.RemoveBracketsAndContents().CleanAuthorName()), + tc((a, t) => a.Title.FuzzyMatch(t), title.RemoveAfterDash().CleanAuthorName()), + tc((a, t) => a.Title.FuzzyMatch(t), title.RemoveBracketsAndContents().RemoveAfterDash().CleanAuthorName()), + tc((a, t) => t.FuzzyContains(a.Title), title) + }; + + return scoringFunctions; + } + + private List FindByStringInexact(List editions, Func scoreFunction, string title) + { + const double fuzzThreshold = 0.7; + const double fuzzGap = 0.4; + + var sortedEditions = editions.Select(s => new + { + MatchProb = scoreFunction(s, title), + Edition = s + }) + .ToList() + .OrderByDescending(s => s.MatchProb) + .ToList(); + + return sortedEditions.TakeWhile((x, i) => i == 0 || sortedEditions[i - 1].MatchProb - x.MatchProb < fuzzGap) + .TakeWhile((x, i) => x.MatchProb > fuzzThreshold || (i > 0 && sortedEditions[i - 1].MatchProb > fuzzThreshold)) + .Select(x => x.Edition) + .ToList(); + } } } diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 36d847453..18b3df508 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -351,7 +351,11 @@ namespace NzbDrone.Core.Parser simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle); - var bestBook = books.OrderByDescending(x => simpleTitle.FuzzyContains(x.Title)).First(); + var bestBook = books + .OrderByDescending(x => simpleTitle.FuzzyContains(x.Editions.Value.Single(x => x.Monitored).Title)) + .First() + .Editions.Value + .Single(x => x.Monitored); var foundAuthor = GetTitleFuzzy(simpleTitle, authorName, out var remainder); diff --git a/src/NzbDrone.Core/Parser/ParsingService.cs b/src/NzbDrone.Core/Parser/ParsingService.cs index 4907a209e..dd100fc2b 100644 --- a/src/NzbDrone.Core/Parser/ParsingService.cs +++ b/src/NzbDrone.Core/Parser/ParsingService.cs @@ -28,15 +28,18 @@ namespace NzbDrone.Core.Parser { private readonly IAuthorService _authorService; private readonly IBookService _bookService; + private readonly IEditionService _editionService; private readonly IMediaFileService _mediaFileService; private readonly Logger _logger; public ParsingService(IAuthorService authorService, IBookService bookService, + IEditionService editionService, IMediaFileService mediaFileService, Logger logger) { _bookService = bookService; + _editionService = editionService; _authorService = authorService; _mediaFileService = mediaFileService; _logger = logger; @@ -127,12 +130,25 @@ namespace NzbDrone.Core.Parser bookInfo = _bookService.FindByTitle(author.AuthorMetadataId, parsedBookInfo.BookTitle); } + if (bookInfo == null) + { + var edition = _editionService.FindByTitle(author.AuthorMetadataId, parsedBookInfo.BookTitle); + bookInfo = edition?.Book.Value; + } + if (bookInfo == null) { _logger.Debug("Trying inexact book match for {0}", parsedBookInfo.BookTitle); bookInfo = _bookService.FindByTitleInexact(author.AuthorMetadataId, parsedBookInfo.BookTitle); } + if (bookInfo == null) + { + _logger.Debug("Trying inexact edition match for {0}", parsedBookInfo.BookTitle); + var edition = _editionService.FindByTitleInexact(author.AuthorMetadataId, parsedBookInfo.BookTitle); + bookInfo = edition?.Book.Value; + } + if (bookInfo != null) { result.Add(bookInfo); @@ -213,6 +229,21 @@ namespace NzbDrone.Core.Parser bestBook = book; } } + + var possibleEditions = _editionService.GetCandidates(author.AuthorMetadataId, title); + foreach (var edition in possibleEditions) + { + var editionMatch = title.FuzzyMatch(edition.Title, 0.5); + var score = (authorMatch.Item2 + editionMatch.Item2) / 2; + + _logger.Trace($"Edition {edition} has score {score}"); + + if (score > bestScore) + { + bestAuthor = author; + bestBook = edition.Book.Value; + } + } } _logger.Trace($"Best match: {bestAuthor} {bestBook}");