New: Get more candidates and include ISBN/ASIN in distance calculation

pull/965/head
ta264 4 years ago
parent d078dacaab
commit 9f37b1c484

@ -1,3 +1,4 @@
using System;
using System.Collections.Generic;
using System.Linq;
using NLog;
@ -12,7 +13,7 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
public interface ICandidateService
{
List<CandidateEdition> GetDbCandidatesFromTags(LocalEdition localEdition, IdentificationOverrides idOverrides, bool includeExisting);
List<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition);
IEnumerable<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition);
}
public class CandidateService : ICandidateService
@ -183,116 +184,194 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
return candidateReleases;
}
public List<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition)
public IEnumerable<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition)
{
// Gets candidate book releases from the metadata server.
// Will eventually need adding locally if we find a match
var watch = System.Diagnostics.Stopwatch.StartNew();
List<Book> remoteBooks = null;
var candidates = new List<CandidateEdition>();
List<Book> remoteBooks;
var seenCandidates = new HashSet<string>();
var isbns = localEdition.LocalBooks.Select(x => x.FileTrackInfo.Isbn).Distinct().ToList();
var asins = localEdition.LocalBooks.Select(x => x.FileTrackInfo.Asin).Distinct().ToList();
var goodreads = localEdition.LocalBooks.Select(x => x.FileTrackInfo.GoodreadsId).Distinct().ToList();
try
// grab possibilities for all the IDs present
if (isbns.Count == 1 && isbns[0].IsNotNullOrWhiteSpace())
{
if (isbns.Count == 1 && isbns[0].IsNotNullOrWhiteSpace())
{
_logger.Trace($"Searching by isbn {isbns[0]}");
_logger.Trace($"Searching by isbn {isbns[0]}");
try
{
remoteBooks = _bookSearchService.SearchByIsbn(isbns[0]);
}
catch (GoodreadsException e)
{
_logger.Info(e, "Skipping ISBN search due to Goodreads Error");
remoteBooks = new List<Book>();
}
// Calibre puts junk asins into books it creates so check for sensible length
if ((remoteBooks == null || !remoteBooks.Any()) &&
asins.Count == 1 &&
asins[0].IsNotNullOrWhiteSpace() &&
asins[0].Length == 10)
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
{
_logger.Trace($"Searching by asin {asins[0]}");
yield return candidate;
}
}
if (asins.Count == 1 &&
asins[0].IsNotNullOrWhiteSpace() &&
asins[0].Length == 10)
{
_logger.Trace($"Searching by asin {asins[0]}");
try
{
remoteBooks = _bookSearchService.SearchByAsin(asins[0]);
}
// if we don't have an independent ID, try a goodreads ID, but may have been matched to the wrong edition by calibre
if ((remoteBooks == null || !remoteBooks.Any()) &&
goodreads.Count == 1 &&
goodreads[0].IsNotNullOrWhiteSpace())
catch (GoodreadsException e)
{
if (int.TryParse(goodreads[0], out var id))
{
_logger.Trace($"Searching by goodreads id {id}");
_logger.Info(e, "Skipping ASIN search due to Goodreads Error");
remoteBooks = new List<Book>();
}
remoteBooks = _bookSearchService.SearchByGoodreadsId(id);
}
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
{
yield return candidate;
}
}
// if no asin/isbn or no result, fall back to text search
if (remoteBooks == null || !remoteBooks.Any())
if (goodreads.Count == 1 &&
goodreads[0].IsNotNullOrWhiteSpace())
{
if (int.TryParse(goodreads[0], out var id))
{
// fall back to author / book name search
List<string> authorTags = new List<string>();
_logger.Trace($"Searching by goodreads id {id}");
if (TrackGroupingService.IsVariousAuthors(localEdition.LocalBooks))
try
{
authorTags.Add("Various Authors");
remoteBooks = _bookSearchService.SearchByGoodreadsId(id);
}
else
catch (GoodreadsException e)
{
authorTags.AddRange(localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.Authors));
_logger.Info(e, "Skipping Goodreads ID search due to Goodreads Error");
remoteBooks = new List<Book>();
}
var bookTag = localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.BookTitle) ?? "";
if (!authorTags.Any() || bookTag.IsNullOrWhiteSpace())
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
{
return candidates;
yield return candidate;
}
}
}
foreach (var authorTag in authorTags)
{
remoteBooks = _bookSearchService.SearchForNewBook(bookTag, authorTag);
if (remoteBooks.Any())
{
break;
}
}
// If we got an id result, stop
if (seenCandidates.Any())
{
yield break;
}
if (!remoteBooks.Any())
{
var bookSearch = _bookSearchService.SearchForNewBook(bookTag, null);
var authorSearch = authorTags.SelectMany(a => _bookSearchService.SearchForNewBook(a, null));
// fall back to author / book name search
var authorTags = new List<string>();
remoteBooks = bookSearch.Concat(authorSearch).DistinctBy(x => x.ForeignBookId).ToList();
}
if (TrackGroupingService.IsVariousAuthors(localEdition.LocalBooks))
{
authorTags.Add("Various Authors");
}
else
{
authorTags.AddRange(localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.Authors));
}
var bookTag = localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.BookTitle) ?? "";
// If no valid author or book tags, stop
if (!authorTags.Any() || bookTag.IsNullOrWhiteSpace())
{
yield break;
}
// Search by author+book
foreach (var authorTag in authorTags)
{
try
{
remoteBooks = _bookSearchService.SearchForNewBook(bookTag, authorTag);
}
catch (GoodreadsException e)
{
_logger.Info(e, "Skipping author/title search due to Goodreads Error");
remoteBooks = new List<Book>();
}
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
{
yield return candidate;
}
}
// If we got an author/book search result, stop
if (seenCandidates.Any())
{
yield break;
}
// Search by just book title
try
{
remoteBooks = _bookSearchService.SearchForNewBook(bookTag, null);
}
catch (GoodreadsException e)
{
_logger.Info(e, "Skipping book due to Goodreads error");
_logger.Info(e, "Skipping book title search due to Goodreads Error");
remoteBooks = new List<Book>();
}
foreach (var book in remoteBooks)
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
{
yield return candidate;
}
// Search by just author
foreach (var a in authorTags)
{
try
{
remoteBooks = _bookSearchService.SearchForNewBook(a, null);
}
catch (GoodreadsException e)
{
_logger.Info(e, "Skipping author search due to Goodreads Error");
remoteBooks = new List<Book>();
}
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
{
yield return candidate;
}
}
}
private List<CandidateEdition> ToCandidates(IEnumerable<Book> books, HashSet<string> seenCandidates)
{
var candidates = new List<CandidateEdition>();
foreach (var book in books)
{
// We have to make sure various bits and pieces are populated that are normally handled
// by a database lazy load
foreach (var edition in book.Editions.Value)
{
edition.Book = book;
candidates.Add(new CandidateEdition
if (!seenCandidates.Contains(edition.ForeignEditionId))
{
Edition = edition,
ExistingFiles = new List<BookFile>()
});
seenCandidates.Add(edition.ForeignEditionId);
edition.Book = book;
candidates.Add(new CandidateEdition
{
Edition = edition,
ExistingFiles = new List<BookFile>()
});
}
}
}
watch.Stop();
_logger.Debug($"Getting {candidates.Count} remote candidates from tags for {localEdition.LocalBooks.Count} tracks took {watch.ElapsedMilliseconds}ms");
return candidates;
}
}

@ -13,6 +13,8 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
{ "source", 2.0 },
{ "author", 3.0 },
{ "book", 3.0 },
{ "isbn", 10.0 },
{ "asin", 10.0 },
{ "media_count", 1.0 },
{ "media_format", 1.0 },
{ "year", 1.0 },

@ -64,6 +64,20 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
dist.AddString("book", title, titleOptions);
Logger.Trace("book: '{0}' vs '{1}'; {2}", title, titleOptions.ConcatToString("' or '"), dist.NormalizedDistance());
var isbn = localTracks.MostCommon(x => x.FileTrackInfo.Isbn);
if (isbn.IsNotNullOrWhiteSpace() && edition.Isbn13.IsNotNullOrWhiteSpace())
{
dist.AddBool("isbn", isbn != edition.Isbn13);
Logger.Trace("isbn: '{0}' vs '{1}'; {2}", isbn, edition.Isbn13, dist.NormalizedDistance());
}
var asin = localTracks.MostCommon(x => x.FileTrackInfo.Asin);
if (asin.IsNotNullOrWhiteSpace() && edition.Asin.IsNotNullOrWhiteSpace())
{
dist.AddBool("asin", asin != edition.Asin);
Logger.Trace("asin: '{0}' vs '{1}'; {2}", asin, edition.Asin, dist.NormalizedDistance());
}
// Year
var localYear = localTracks.MostCommon(x => x.FileTrackInfo.Year);
if (localYear > 0 && edition.ReleaseDate.HasValue)

@ -116,14 +116,22 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
{
var watch = System.Diagnostics.Stopwatch.StartNew();
var candidateReleases = _candidateService.GetDbCandidatesFromTags(localBookRelease, idOverrides, config.IncludeExisting);
IEnumerable<CandidateEdition> candidateReleases = _candidateService.GetDbCandidatesFromTags(localBookRelease, idOverrides, config.IncludeExisting);
if (candidateReleases.Count == 0 && config.AddNewAuthors)
// convert all the TrackFiles that represent extra files to List<LocalTrack>
// local candidates are actually a list so this is fine to enumerate
var allLocalTracks = ToLocalTrack(candidateReleases
.SelectMany(x => x.ExistingFiles)
.DistinctBy(x => x.Path), localBookRelease);
_logger.Debug($"Retrieved {allLocalTracks.Count} possible tracks in {watch.ElapsedMilliseconds}ms");
if (!candidateReleases.Any() && config.AddNewAuthors)
{
candidateReleases = _candidateService.GetRemoteCandidates(localBookRelease);
}
if (candidateReleases.Count == 0)
if (!candidateReleases.Any())
{
// can't find any candidates even after fingerprinting
// populate the overrides and return
@ -137,15 +145,6 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
return;
}
_logger.Debug($"Got {candidateReleases.Count} candidates for {localBookRelease.LocalBooks.Count} tracks in {watch.ElapsedMilliseconds}ms");
// convert all the TrackFiles that represent extra files to List<LocalTrack>
var allLocalTracks = ToLocalTrack(candidateReleases
.SelectMany(x => x.ExistingFiles)
.DistinctBy(x => x.Path), localBookRelease);
_logger.Debug($"Retrieved {allLocalTracks.Count} possible tracks in {watch.ElapsedMilliseconds}ms");
GetBestRelease(localBookRelease, candidateReleases, allLocalTracks);
_logger.Debug($"Best release found in {watch.ElapsedMilliseconds}ms");
@ -155,11 +154,11 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
_logger.Debug($"IdentifyRelease done in {watch.ElapsedMilliseconds}ms");
}
private void GetBestRelease(LocalEdition localBookRelease, List<CandidateEdition> candidateReleases, List<LocalBook> extraTracksOnDisk)
private void GetBestRelease(LocalEdition localBookRelease, IEnumerable<CandidateEdition> candidateReleases, List<LocalBook> extraTracksOnDisk)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
_logger.Debug("Matching {0} track files against {1} candidates", localBookRelease.TrackCount, candidateReleases.Count);
_logger.Debug("Matching {0} track files against candidates", localBookRelease.TrackCount);
_logger.Trace("Processing files:\n{0}", string.Join("\n", localBookRelease.LocalBooks.Select(x => x.Path)));
double bestDistance = 1.0;

@ -418,12 +418,30 @@ namespace NzbDrone.Core.MetadataSource.Goodreads
public List<Book> SearchByIsbn(string isbn)
{
return SearchByField("isbn", isbn);
var result = SearchByField("isbn", isbn);
// we don't get isbn back in search result, but if only one result assume the query was correct
// and add in the searched isbn
if (result.Count == 1 && result[0].Editions.Value.Count == 1)
{
result[0].Editions.Value[0].Isbn13 = isbn;
}
return result;
}
public List<Book> SearchByAsin(string asin)
{
return SearchByField("isbn", asin);
var result = SearchByField("asin", asin);
// we don't get isbn back in search result, but if only one result assume the query was correct
// and add in the searched isbn
if (result.Count == 1 && result[0].Editions.Value.Count == 1)
{
result[0].Editions.Value[0].Asin = asin;
}
return result;
}
public List<Book> SearchByGoodreadsId(int id)

Loading…
Cancel
Save