Improve the fuzzy matching (#522)

* Fixed: improve track matching

* Deal with tracks sequentially numbered across discs
pull/6/head
ta264 6 years ago committed by Qstick
parent 8320508688
commit e260a29b57

@ -0,0 +1,61 @@
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Common.Extensions;
using NzbDrone.Test.Common;
namespace NzbDrone.Common.Test
{
[TestFixture]
public class FuzzyContainsFixture : TestBase
{
[TestCase("abcdef", "abcdef", 0.5, 0)]
[TestCase("", "abcdef", 0.5, -1)]
[TestCase("abcdef", "", 0.5, -1)]
[TestCase("", "", 0.5, -1)]
[TestCase("abcdef", "de", 0.5, 3)]
[TestCase("abcdef", "defy", 0.5, 3)]
[TestCase("abcdef", "abcdefy", 0.5, 0)]
[TestCase("I am the very model of a modern major general.", " that berry ", 0.3, 4)]
[TestCase("abcdefghijk", "fgh", 0.5, 5)]
[TestCase("abcdefghijk", "fgh", 0.5, 5)]
[TestCase("abcdefghijk", "efxhi", 0.5, 4)]
[TestCase("abcdefghijk", "cdefxyhijk", 0.5, 2)]
[TestCase("abcdefghijk", "bxy", 0.5, -1)]
[TestCase("123456789xx0", "3456789x0", 0.5, 2)]
[TestCase("abcdef", "xxabc", 0.5, 0)]
[TestCase("abcdef", "defyy", 0.5, 3)]
[TestCase("abcdef", "xabcdefy", 0.5, 0)]
[TestCase("abcdefghijk", "efxyhi", 0.6, 4)]
[TestCase("abcdefghijk", "efxyhi", 0.7, -1)]
[TestCase("abcdefghijk", "bcdef", 0.0, 1)]
[TestCase("abcdexyzabcde", "abccde", 0.5, 0)]
[TestCase("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 0.5, 0)]
[TestCase("abcdefghijklmnopqrstuvwxyz", "abcdefg", 0.5, 0)]
[TestCase("The quick brown fox jumps over the lazy dog", "The quick brown fox jumps over the lazy d", 0.5, 0)]
[TestCase("The quick brown fox jumps over the lazy dog", "The quick brown fox jumps over the lazy g", 0.5, 0)]
[TestCase("The quick brown fox jumps over the lazy dog", "quikc brown fox jumps over the lazy dog", 0.5, 4)]
[TestCase("The quick brown fox jumps over the lazy dog", "qui jumps over the lazy dog", 0.5, 16)]
[TestCase("The quick brown fox jumps over the lazy dog", "quikc brown fox jumps over the lazy dog", 0.5, 4)]
[TestCase("u6IEytQiYpzAccsbjQ5ISuE4smDQ1ZiU42cFBrTeKB2XrVLEqAvgIiKlDP75iApy07jzmK", "xEytQiYpzAccsbjQ5ISuE4smDQ1ZiU42cFBrTeKB2XrVLEqAvgIiKlDP75iApy07jzmK", 0.5, 2)]
[TestCase("plusifeelneedforredundantinformationintitlefield", "anthology", 0.5, -1)]
public void FuzzyFind(string text, string pattern, double threshold, int expected)
{
text.FuzzyFind(pattern, threshold).Should().Be(expected);
}
[TestCase("abcdef", "abcdef", 1)]
[TestCase("", "abcdef", 0)]
[TestCase("abcdef", "", 0)]
[TestCase("", "", 0)]
[TestCase("abcdef", "de", 1)]
[TestCase("abcdef", "defy", 0.75)]
[TestCase("abcdef", "abcdefghk", 6.0/9)]
[TestCase("abcdef", "zabcdefz", 6.0/8)]
[TestCase("plusifeelneedforredundantinformationintitlefield", "anthology", 4.0/9)]
[TestCase("+ (Plus) - I feel the need for redundant information in the title field", "+", 1)]
public void FuzzyContains(string text, string pattern, double expectedScore)
{
text.FuzzyContains(pattern).Should().BeApproximately(expectedScore, 1e-9);
}
}
}

@ -42,5 +42,21 @@ namespace NzbDrone.Common.Test
{
text.ToLower().LevenshteinDistanceClean(other.ToLower()).Should().Be(expected);
}
[TestCase("hello", "hello")]
[TestCase("hello", "bye")]
[TestCase("a longer string", "a different long string")]
public void FuzzyMatchSymmetric(string a, string b)
{
a.FuzzyMatch(b).Should().Be(b.FuzzyMatch(a));
}
[TestCase("", "", 0)]
[TestCase("a", "", 0)]
[TestCase("", "a", 0)]
public void FuzzyMatchEmptyValuesReturnZero(string a, string b, double expected)
{
a.FuzzyMatch(b).Should().Be(expected);
}
}
}

@ -84,6 +84,7 @@
<Compile Include="ExtensionTests\IEnumerableExtensionTests\IntersectByFixture.cs" />
<Compile Include="ExtensionTests\Int64ExtensionFixture.cs" />
<Compile Include="ExtensionTests\UrlExtensionsFixture.cs" />
<Compile Include="ExtensionTests\FuzzyContainsFixture.cs" />
<Compile Include="HashUtilFixture.cs" />
<Compile Include="Http\HttpClientFixture.cs" />
<Compile Include="Http\HttpHeaderFixture.cs" />

@ -0,0 +1,167 @@
/*
* This file incorporates work covered by the following copyright and
* permission notice:
*
* Diff Match and Patch
* Copyright 2018 The diff-match-patch Authors.
* https://github.com/google/diff-match-patch
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Numerics;
namespace NzbDrone.Common.Extensions
{
public static class FuzzyContainsExtension {
public static int FuzzyFind(this string text, string pattern, double matchProb)
{
return match(text, pattern, matchProb).Item1;
}
// return the accuracy of the best match of pattern within text
public static double FuzzyContains(this string text, string pattern)
{
return match(text, pattern, 0.25).Item2;
}
/**
* Locate the best instance of 'pattern' in 'text'.
* Returns (-1, 1) if no match found.
* @param text The text to search.
* @param pattern The pattern to search for.
* @return Best match index or -1.
*/
private static Tuple<int, double> match(string text, string pattern, double matchThreshold = 0.5) {
// Check for null inputs not needed since null can't be passed in C#.
if (text.Length == 0 || pattern.Length == 0) {
// Nothing to match.
return new Tuple<int, double> (-1, 0);
}
if (pattern.Length <= text.Length)
{
var loc = text.IndexOf(pattern, StringComparison.Ordinal);
if (loc != -1)
{
// Perfect match!
return new Tuple<int, double> (loc, 1);
}
}
// Do a fuzzy compare.
return match_bitap(text, pattern, matchThreshold);
}
/**
* Locate the best instance of 'pattern' in 'text' near 'loc' using the
* Bitap algorithm. Returns -1 if no match found.
* @param text The text to search.
* @param pattern The pattern to search for.
* @return Best match index or -1.
*/
private static Tuple<int, double> match_bitap(string text, string pattern, double matchThreshold) {
// Initialise the alphabet.
Dictionary<char, BigInteger> s = alphabet(pattern);
// don't keep creating new BigInteger(1)
var big1 = new BigInteger(1);
// Lowest score belowe which we give up.
var score_threshold = matchThreshold;
// Initialise the bit arrays.
var matchmask = big1 << (pattern.Length - 1);
int best_loc = -1;
// Empty initialization added to appease C# compiler.
var last_rd = new BigInteger[0];
for (int d = 0; d < pattern.Length; d++) {
// Scan for the best match; each iteration allows for one more error.
int start = 1;
int finish = text.Length + pattern.Length;
var rd = new BigInteger[finish + 2];
rd[finish + 1] = (big1 << d) - big1;
for (int j = finish; j >= start; j--) {
BigInteger charMatch;
if (text.Length <= j - 1 || !s.ContainsKey(text[j - 1])) {
// Out of range.
charMatch = 0;
} else {
charMatch = s[text[j - 1]];
}
if (d == 0) {
// First pass: exact match.
rd[j] = ((rd[j + 1] << 1) | big1) & charMatch;
} else {
// Subsequent passes: fuzzy match.
rd[j] = ((rd[j + 1] << 1) | big1) & charMatch
| (((last_rd[j + 1] | last_rd[j]) << 1) | big1) | last_rd[j + 1];
}
if ((rd[j] & matchmask) != 0) {
var score = bitapScore(d, pattern);
// This match will almost certainly be better than any existing
// match. But check anyway.
if (score >= score_threshold) {
// Told you so.
score_threshold = score;
best_loc = j - 1;
}
}
}
if (bitapScore(d + 1, pattern) < score_threshold) {
// No hope for a (better) match at greater error levels.
break;
}
last_rd = rd;
}
return new Tuple<int, double> (best_loc, score_threshold);
}
/**
* Compute and return the score for a match with e errors and x location.
* @param e Number of errors in match.
* @param pattern Pattern being sought.
* @return Overall score for match (1.0 = good, 0.0 = bad).
*/
private static double bitapScore(int e, string pattern) {
return 1.0 - (double)e / pattern.Length;
}
/**
* Initialise the alphabet for the Bitap algorithm.
* @param pattern The text to encode.
* @return Hash of character locations.
*/
private static Dictionary<char, BigInteger> alphabet(string pattern) {
var s = new Dictionary<char, BigInteger>();
char[] char_pattern = pattern.ToCharArray();
foreach (char c in char_pattern) {
if (!s.ContainsKey(c)) {
s.Add(c, 0);
}
}
int i = 0;
foreach (char c in char_pattern) {
s[c] = s[c] | (new BigInteger(1) << (pattern.Length - i - 1));
i++;
}
return s;
}
}
}

@ -143,29 +143,17 @@ namespace NzbDrone.Common.Extensions
public static double FuzzyMatch(this string a, string b)
{
if (a.Contains(" ") && b.Contains(" "))
if (a.IsNullOrWhiteSpace() || b.IsNullOrWhiteSpace())
{
return 0;
}
else if (a.Contains(" ") && b.Contains(" "))
{
var partsA = a.Split(' ');
var partsB = b.Split(' ');
var weightedHighCoefficients = new double[partsA.Length];
var distanceRatios = new double[partsA.Length];
for (int i = 0; i < partsA.Length; i++)
{
double high = 0.0;
int indexDistance = 0;
for (int x = 0; x < partsB.Length; x++)
{
var coef = LevenshteinCoefficient(partsA[i], partsB[x]);
if (coef > high)
{
high = coef;
indexDistance = Math.Abs(i - x);
}
}
double distanceWeight = 1.0 - (double)indexDistance / (double)partsA.Length;
weightedHighCoefficients[i] = high * distanceWeight;
}
return weightedHighCoefficients.Sum() / (double)partsA.Length;
var coef = (FuzzyMatchComponents(partsA, partsB) + FuzzyMatchComponents(partsB, partsA)) / (partsA.Length + partsB.Length);
return Math.Max(coef, LevenshteinCoefficient(a, b));
}
else
{
@ -173,6 +161,28 @@ namespace NzbDrone.Common.Extensions
}
}
private static double FuzzyMatchComponents(string[] a, string[] b)
{
double weightDenom = Math.Max(a.Length, b.Length);
double sum = 0;
for (int i = 0; i < a.Length; i++)
{
double high = 0.0;
int indexDistance = 0;
for (int x = 0; x < b.Length; x++)
{
var coef = LevenshteinCoefficient(a[i], b[x]);
if (coef > high)
{
high = coef;
indexDistance = Math.Abs(i - x);
}
}
sum += (1.0 - (double)indexDistance / weightDenom) * high;
}
return sum;
}
public static double LevenshteinCoefficient(this string a, string b)
{
return 1.0 - (double)a.LevenshteinDistance(b) / Math.Max(a.Length, b.Length);

@ -71,6 +71,7 @@
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Xml" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Numerics" />
</ItemGroup>
<ItemGroup>
<Compile Include="ArchiveService.cs" />
@ -198,6 +199,7 @@
<Compile Include="Instrumentation\Sentry\LidarrSentryPacket.cs" />
<Compile Include="Instrumentation\VersionLayoutRenderer.cs" />
<Compile Include="Extensions\LevenstheinExtensions.cs" />
<Compile Include="Extensions\FuzzyContains.cs" />
<Compile Include="Messaging\IEvent.cs" />
<Compile Include="Messaging\IMessage.cs" />
<Compile Include="Model\ProcessInfo.cs" />

@ -94,7 +94,6 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
}
[Test]
public void should_find_album_in_db_by_releaseid()
{
@ -129,6 +128,7 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
[TestCase("ANTholog")]
[TestCase("nthology")]
[TestCase("antholoyg")]
[TestCase("÷")]
public void should_not_find_album_in_db_by_incorrect_title(string title)
{
var album = _albumRepo.FindByTitle(_artist.Id, title);
@ -136,28 +136,6 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
album.Should().BeNull();
}
[TestCase("ANTholog")]
[TestCase("antholoyg")]
[TestCase("ANThology CD")]
public void should_find_album_in_db_by_inexact_title(string title)
{
var album = _albumRepo.FindByTitleInexact(_artist.Id, title);
album.Should().NotBeNull();
album.Title.Should().Be(_album.Title);
}
[TestCase("ANTholog")]
[TestCase("antholoyg")]
[TestCase("ANThology CD")]
public void should_not_find_album_in_db_by_inexact_title_when_two_similar_matches(string title)
{
_albumRepo.Insert(_albumSimilar);
var album = _albumRepo.FindByTitleInexact(_artist.Id, title);
album.Should().BeNull();
}
[Test]
public void should_not_find_album_in_db_by_partial_releaseid()
{

@ -0,0 +1,77 @@
using FizzWare.NBuilder;
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Core.Music;
using NzbDrone.Core.Test.Framework;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using NLog;
using Moq;
namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
{
[TestFixture]
public class AlbumServiceFixture : CoreTest<AlbumService>
{
private List<Album> _albums;
[SetUp]
public void Setup()
{
_albums = new List<Album>();
_albums.Add(new Album
{
Title = "ANThology",
CleanTitle = "anthology",
});
_albums.Add(new Album
{
Title = "+",
CleanTitle = "",
});
Mocker.GetMock<IAlbumRepository>()
.Setup(s => s.GetAlbums(It.IsAny<int>()))
.Returns(_albums);
}
private void GivenSimilarAlbum()
{
_albums.Add(new Album
{
Title = "ANThology2",
CleanTitle = "anthology2",
});
}
[TestCase("ANTholog", "ANThology")]
[TestCase("antholoyg", "ANThology")]
[TestCase("ANThology CD", "ANThology")]
[TestCase("ANThology CD xxxx (Remastered) - [Oh please why do they do this?]", "ANThology")]
[TestCase("+ (Plus) - I feel the need for redundant information in the title field", "+")]
public void should_find_album_in_db_by_inexact_title(string title, string expected)
{
var album = Subject.FindByTitleInexact(0, title);
album.Should().NotBeNull();
album.Title.Should().Be(expected);
}
[TestCase("ANTholog")]
[TestCase("antholoyg")]
[TestCase("ANThology CD")]
[TestCase("÷")]
[TestCase("÷ (Divide)")]
public void should_not_find_album_in_db_by_inexact_title_when_two_similar_matches(string title)
{
GivenSimilarAlbum();
var album = Subject.FindByTitleInexact(0, title);
album.Should().BeNull();
}
}
}

@ -17,6 +17,24 @@ namespace NzbDrone.Core.Test.MusicTests.ArtistRepositoryTests
public class ArtistRepositoryFixture : DbTest<ArtistRepository, Artist>
{
private ArtistRepository _artistRepo;
private Artist CreateArtist(string name)
{
return Builder<Artist>.CreateNew()
.With(a => a.Name = name)
.With(a => a.CleanName = Parser.Parser.CleanArtistName(name))
.With(a => a.ForeignArtistId = name)
.BuildNew();
}
private void GivenArtists()
{
_artistRepo = Mocker.Resolve<ArtistRepository>();
_artistRepo.Insert(CreateArtist("The Black Eyed Peas"));
_artistRepo.Insert(CreateArtist("The Black Keys"));
}
[Test]
public void should_lazyload_profiles()
{
@ -61,5 +79,16 @@ namespace NzbDrone.Core.Test.MusicTests.ArtistRepositoryTests
StoredModel.MetadataProfile.Should().NotBeNull();
}
[TestCase("The Black Eyed Peas")]
[TestCase("The Black Keys")]
public void should_find_artist_in_db_by_name(string name)
{
GivenArtists();
var artist = _artistRepo.FindByName(Parser.Parser.CleanArtistName(name));
artist.Should().NotBeNull();
artist.Name.Should().Be(name);
}
}
}

@ -0,0 +1,58 @@
using System.Collections.Generic;
using FizzWare.NBuilder;
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Core.Test.Framework;
using NzbDrone.Core.Music;
namespace NzbDrone.Core.Test.MusicTests.ArtistServiceTests
{
[TestFixture]
public class FindByNameInexactFixture : CoreTest<ArtistService>
{
private List<Artist> _artists;
private Artist CreateArtist(string name)
{
return Builder<Artist>.CreateNew()
.With(a => a.Name = name)
.With(a => a.CleanName = Parser.Parser.CleanArtistName(name))
.With(a => a.ForeignArtistId = name)
.BuildNew();
}
[SetUp]
public void Setup()
{
_artists = new List<Artist>();
_artists.Add(CreateArtist("The Black Eyed Peas"));
_artists.Add(CreateArtist("The Black Keys"));
Mocker.GetMock<IArtistRepository>()
.Setup(s => s.All())
.Returns(_artists);
}
[TestCase("The Black Eyde Peas", "The Black Eyed Peas")]
[TestCase("Black Eyed Peas", "The Black Eyed Peas")]
[TestCase("The Black eys", "The Black Keys")]
public void should_find_artist_in_db_by_name_inexact(string name, string expected)
{
var artist = Subject.FindByNameInexact(name);
artist.Should().NotBeNull();
artist.Name.Should().Be(expected);
}
[TestCase("The Black Peas")]
public void should_not_find_artist_in_db_by_ambiguous_name(string name)
{
var artist = Subject.FindByNameInexact(name);
artist.Should().BeNull();
}
}
}

@ -1,27 +1,21 @@
using System.Linq;
using FluentAssertions;
using NLog;
using NUnit.Framework;
using NzbDrone.Core.Configuration;
using NzbDrone.Core.Music;
using NzbDrone.Core.Test.Framework;
using System.Collections.Generic;
using Moq;
namespace NzbDrone.Core.Test.MusicTests.TitleMatchingTests
{
[TestFixture]
public class TitleMatchingFixture : DbTest<TrackService, Track>
public class TitleMatchingFixture : CoreTest<TrackService>
{
private TrackRepository _trackRepository;
private TrackService _trackService;
private List<Track> _tracks;
[SetUp]
public void Setup()
{
_trackRepository = Mocker.Resolve<TrackRepository>();
_trackService =
new TrackService(_trackRepository, Mocker.Resolve<ConfigService>(), Mocker.Resolve<Logger>());
var trackNames = new List<string> {
"Courage",
"Movies",
@ -35,45 +29,94 @@ namespace NzbDrone.Core.Test.MusicTests.TitleMatchingTests
"Calico",
"(Happy) Death Day",
"Smooth Criminal",
"Universe / Orange Appeal"
"Universe / Orange Appeal",
"Christian's Inferno"
};
_tracks = new List<Track>();
for (int i = 0; i < trackNames.Count; i++) {
_tracks.Add(new Track
{
Title = trackNames[i],
ForeignTrackId = (i+1).ToString(),
AbsoluteTrackNumber = i+1,
MediumNumber = 1
});
}
Mocker.GetMock<ITrackRepository>()
.Setup(s => s.GetTracksByMedium(It.IsAny<int>(), It.IsAny<int>()))
.Returns(_tracks);
Mocker.GetMock<ITrackRepository>()
.Setup(s => s.Find(1234, 4321, It.IsAny<int>(), It.IsAny<int>()))
.Returns((int artistid, int albumid, int medium, int track) => _tracks.Where(t => t.AbsoluteTrackNumber == track && t.MediumNumber == medium).Single());
}
private void GivenSecondDisc()
{
var trackNames = new List<string> {
"Courage",
"another entry",
"random name"
};
for (int i = 0; i < trackNames.Count; i++) {
_trackRepository.Insert(new Track
{
Title = trackNames[i],
ForeignTrackId = (i+1).ToString(),
AlbumId = 4321,
AbsoluteTrackNumber = i+1,
MediumNumber = 1,
TrackFileId = i+1
});
_tracks.Add(new Track
{
Title = trackNames[i],
ForeignTrackId = (100+i+1).ToString(),
AbsoluteTrackNumber = i+1,
MediumNumber = 2
});
}
}
[Test]
public void should_find_track_in_db_by_tracktitle_longer_then_releasetitle()
{
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 1, "Courage with some bla");
var track = Subject.FindTrackByTitle(1234, 4321, 1, 1, "Courage with some bla");
track.Should().NotBeNull();
track.Title.Should().Be(_trackRepository.GetTracksByFileId(1).First().Title);
track.Title.Should().Be(Subject.FindTrack(1234, 4321, 1, 1).Title);
}
[Test]
public void should_find_track_in_db_by_tracktitle_shorter_then_releasetitle()
{
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 3, "and Bone");
var track = Subject.FindTrackByTitle(1234, 4321, 1, 3, "and Bone");
track.Should().NotBeNull();
track.Title.Should().Be(_trackRepository.GetTracksByFileId(3).First().Title);
track.Title.Should().Be(Subject.FindTrack(1234, 4321, 1, 3).Title);
}
[Test]
public void should_not_find_track_in_db_by_wrong_title()
{
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 1, "Not a track");
var track = Subject.FindTrackByTitle(1234, 4321, 1, 1, "Not a track");
track.Should().BeNull();
}
[TestCase("another entry", 2, 2)]
[TestCase("random name", 2, 3)]
public void should_find_track_on_second_disc_when_disc_tag_missing(string title, int discNumber, int trackNumber)
{
GivenSecondDisc();
var track = Subject.FindTrackByTitle(1234, 4321, 0, trackNumber, title);
var expected = Subject.FindTrack(1234, 4321, discNumber, trackNumber);
track.Should().NotBeNull();
expected.Should().NotBeNull();
track.Title.Should().Be(expected.Title);
}
[Test]
public void should_return_null_if_tracks_with_same_name_and_number_on_different_discs()
{
GivenSecondDisc();
var track = Subject.FindTrackByTitle(1234, 4321, 0, 1, "Courage");
track.Should().BeNull();
}
@ -81,19 +124,53 @@ namespace NzbDrone.Core.Test.MusicTests.TitleMatchingTests
[TestCase("Atitude", 7)]
[TestCase("Smoth cRimnal", 12)]
[TestCase("Sticks and Stones (live)", 6)]
[TestCase("Sticks and Stones (live) - there's a lot of rubbish here", 6)]
[TestCase("Smoth cRimnal feat. someone I don't care about", 12)]
[TestCase("Christians Inferno", 14)]
[TestCase("xxxyyy some random prefix Christians Infurno", 14)]
public void should_find_track_in_db_by_inexact_title(string title, int trackNumber)
{
var track = _trackService.FindTrackByTitleInexact(1234, 4321, 1, trackNumber, title);
var track = Subject.FindTrackByTitleInexact(1234, 4321, 1, trackNumber, title);
var expected = Subject.FindTrack(1234, 4321, 1, trackNumber);
track.Should().NotBeNull();
expected.Should().NotBeNull();
track.Title.Should().Be(expected.Title);
}
[TestCase("Fesh and Bone", 1)]
[TestCase("Atitude", 1)]
[TestCase("Smoth cRimnal", 1)]
[TestCase("Sticks and Stones (live)", 1)]
[TestCase("Christians Inferno", 1)]
public void should_not_find_track_in_db_by_inexact_title_with_wrong_tracknumber(string title, int trackNumber)
{
var track = Subject.FindTrackByTitleInexact(1234, 4321, 1, trackNumber, title);
track.Should().BeNull();
}
[TestCase("Movis", 1, 2)]
[TestCase("anoth entry", 2, 2)]
[TestCase("random.name", 2, 3)]
public void should_find_track_in_db_by_inexact_title_when_disc_tag_missing(string title, int discNumber, int trackNumber)
{
GivenSecondDisc();
var track = Subject.FindTrackByTitleInexact(1234, 4321, 0, trackNumber, title);
var expected = Subject.FindTrack(1234, 4321, discNumber, trackNumber);
track.Should().NotBeNull();
track.Title.Should().Be(_trackRepository.GetTracksByFileId(trackNumber).First().Title);
expected.Should().NotBeNull();
track.Title.Should().Be(expected.Title);
}
[TestCase("A random title", 1)]
[TestCase("Stones and Sticks", 6)]
public void should_not_find_track_in_db_by_different_inexact_title(string title, int trackId)
{
var track = _trackService.FindTrackByTitleInexact(1234, 4321, 1, trackId, title);
var track = Subject.FindTrackByTitleInexact(1234, 4321, 1, trackId, title);
track.Should().BeNull();
}

@ -299,12 +299,14 @@
<Compile Include="MetadataSource\SearchArtistComparerFixture.cs" />
<Compile Include="MetadataSource\SkyHook\SkyHookProxyFixture.cs" />
<Compile Include="MusicTests\AddAlbumFixture.cs" />
<Compile Include="MusicTests\AlbumServiceFixture.cs" />
<Compile Include="MusicTests\AddArtistFixture.cs" />
<Compile Include="MusicTests\AlbumMonitoredServiceTests\AlbumMonitoredServiceFixture.cs" />
<Compile Include="MusicTests\AlbumRepositoryTests\AlbumRepositoryFixture.cs" />
<Compile Include="MusicTests\ArtistRepositoryTests\ArtistRepositoryFixture.cs" />
<Compile Include="MusicTests\ArtistServiceTests\AddArtistFixture.cs" />
<Compile Include="MusicTests\ArtistServiceTests\UpdateMultipleArtistFixture.cs" />
<Compile Include="MusicTests\ArtistServiceTests\FindByNameInexactFixture.cs" />
<Compile Include="MusicTests\RefreshAlbumServiceFixture.cs" />
<Compile Include="MusicTests\ShouldRefreshAlbumFixture.cs" />
<Compile Include="MusicTests\TitleMatchingTests\TitleMatchingFixture.cs" />

@ -8,7 +8,6 @@ using System.Collections.Generic;
using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Qualities;
using NzbDrone.Common.Extensions;
namespace NzbDrone.Core.Music
{
@ -17,7 +16,6 @@ namespace NzbDrone.Core.Music
List<Album> GetAlbums(int artistId);
Album FindByName(string cleanTitle);
Album FindByTitle(int artistId, string title);
Album FindByTitleInexact(int artistId, string title);
Album FindByArtistAndName(string artistName, string cleanTitle);
Album FindById(string spotifyId);
PagingSpec<Album> AlbumsWithoutFiles(PagingSpec<Album> pagingSpec);
@ -49,7 +47,7 @@ namespace NzbDrone.Core.Music
public Album FindById(string foreignAlbumId)
{
return Query.SingleOrDefault(s => s.ForeignAlbumId == foreignAlbumId);
return Query.Where(s => s.ForeignAlbumId == foreignAlbumId).SingleOrDefault();
}
public PagingSpec<Album> AlbumsWithoutFiles(PagingSpec<Album> pagingSpec)
@ -285,7 +283,7 @@ namespace NzbDrone.Core.Music
{
cleanTitle = cleanTitle.ToLowerInvariant();
return Query.SingleOrDefault(s => s.CleanTitle == cleanTitle);
return Query.Where(s => s.CleanTitle == cleanTitle).SingleOrDefault();
}
public Album FindByTitle(int artistId, string title)
@ -300,39 +298,6 @@ namespace NzbDrone.Core.Music
.FirstOrDefault();
}
public Album FindByTitleInexact(int artistId, string title)
{
double fuzzThreshold = 0.7;
double fuzzGap = 0.4;
var cleanTitle = Parser.Parser.CleanArtistName(title);
if (string.IsNullOrEmpty(cleanTitle))
cleanTitle = title;
var sortedAlbums = Query.Where(s => s.ArtistId == artistId)
.Select(s => new
{
MatchProb = s.CleanTitle.FuzzyMatch(cleanTitle),
Album = s
})
.ToList()
.OrderByDescending(s => s.MatchProb)
.ToList();
if (!sortedAlbums.Any())
return null;
_logger.Trace("\nFuzzy album match on '{0}':\n{1}",
cleanTitle,
string.Join("\n", sortedAlbums.Select(x => $"{x.Album.CleanTitle}: {x.MatchProb}")));
if (sortedAlbums[0].MatchProb > fuzzThreshold
&& (sortedAlbums.Count == 1 || sortedAlbums[0].MatchProb - sortedAlbums[1].MatchProb > fuzzGap))
return sortedAlbums[0].Album;
return null;
}
public Album FindByArtistAndName(string artistName, string cleanTitle)
{
var cleanArtistName = Parser.Parser.CleanArtistName(artistName);
@ -340,7 +305,8 @@ namespace NzbDrone.Core.Music
return Query.Join<Album, Artist>(JoinType.Inner, album => album.Artist, (album, artist) => album.ArtistId == artist.Id)
.Where<Artist>(artist => artist.CleanName == cleanArtistName)
.SingleOrDefault(album => album.CleanTitle == cleanTitle);
.Where<Album>(album => album.CleanTitle == cleanTitle)
.SingleOrDefault();
}
public Album FindAlbumByRelease(string releaseId)

@ -5,6 +5,8 @@ using System;
using System.Collections.Generic;
using System.Linq;
using NzbDrone.Core.Datastore;
using NzbDrone.Core.Parser;
using NzbDrone.Common.Extensions;
namespace NzbDrone.Core.Music
{
@ -89,7 +91,63 @@ namespace NzbDrone.Core.Music
public Album FindByTitleInexact(int artistId, string title)
{
return _albumRepository.FindByTitleInexact(artistId, title);
var cleanTitle = title.CleanArtistName();
var albums = GetAlbumsByArtist(artistId);
Func< Func<Album, string, double>, string, Tuple<Func<Album, string, double>, string>> tc = Tuple.Create;
var scoringFunctions = new List<Tuple<Func<Album, string, double>, string>> {
tc((a, t) => a.CleanTitle.FuzzyMatch(t), cleanTitle),
tc((a, t) => a.Title.FuzzyMatch(t), title),
tc((a, t) => a.CleanTitle.FuzzyMatch(t), title.RemoveBracketsAndContents().CleanArtistName()),
tc((a, t) => a.CleanTitle.FuzzyMatch(t), title.RemoveAfterDash().CleanArtistName()),
tc((a, t) => a.CleanTitle.FuzzyMatch(t), title.RemoveBracketsAndContents().RemoveAfterDash().CleanArtistName()),
tc((a, t) => t.FuzzyContains(a.CleanTitle), cleanTitle),
tc((a, t) => t.FuzzyContains(a.Title), title)
};
foreach (var func in scoringFunctions)
{
var album = FindByStringInexact(albums, func.Item1, func.Item2);
if (album != null)
{
return album;
}
}
return null;
}
private Album FindByStringInexact(List<Album> albums, Func<Album, string, double> scoreFunction, string title)
{
const double fuzzThreshold = 0.7;
const double fuzzGap = 0.4;
var sortedAlbums = albums.Select(s => new
{
MatchProb = scoreFunction(s, title),
Album = s
})
.ToList()
.OrderByDescending(s => s.MatchProb)
.ToList();
if (!sortedAlbums.Any())
{
return null;
}
_logger.Trace("\nFuzzy album match on '{0}':\n{1}",
title,
string.Join("\n", sortedAlbums.Select(x => $"[{x.Album.Title}] {x.Album.CleanTitle}: {x.MatchProb}")));
if (sortedAlbums[0].MatchProb > fuzzThreshold
&& (sortedAlbums.Count == 1 || sortedAlbums[0].MatchProb - sortedAlbums[1].MatchProb > fuzzGap))
{
return sortedAlbums[0].Album;
}
return null;
}
public List<Album> GetAllAlbums()

@ -20,7 +20,7 @@ namespace NzbDrone.Core.Music
List<Artist> AddArtists(List<Artist> newArtists);
Artist FindById(string spotifyId);
Artist FindByName(string title);
Artist FindByTitleInexact(string title);
Artist FindByNameInexact(string title);
void DeleteArtist(int artistId, bool deleteFiles);
List<Artist> GetAllArtists();
List<Artist> AllForTag(int tagId);
@ -89,9 +89,43 @@ namespace NzbDrone.Core.Music
return _artistRepository.FindByName(title.CleanArtistName());
}
public Artist FindByTitleInexact(string title)
public Artist FindByNameInexact(string title)
{
throw new NotImplementedException();
const double fuzzThreshold = 0.8;
const double fuzzGap = 0.2;
var cleanTitle = Parser.Parser.CleanArtistName(title);
if (string.IsNullOrEmpty(cleanTitle))
{
cleanTitle = title;
}
var sortedArtists = GetAllArtists()
.Select(s => new
{
MatchProb = s.CleanName.FuzzyMatch(cleanTitle),
Artist = s
})
.ToList()
.OrderByDescending(s => s.MatchProb)
.ToList();
if (!sortedArtists.Any())
{
return null;
}
_logger.Trace("\nFuzzy artist match on '{0}':\n{1}",
cleanTitle,
string.Join("\n", sortedArtists.Select(x => $"{x.Artist.CleanName}: {x.MatchProb}")));
if (sortedArtists[0].MatchProb > fuzzThreshold
&& (sortedArtists.Count == 1 || sortedArtists[0].MatchProb - sortedArtists[1].MatchProb > fuzzGap))
{
return sortedArtists[0].Artist;
}
return null;
}
public List<Artist> GetAllArtists()
@ -110,7 +144,6 @@ namespace NzbDrone.Core.Music
return _artistRepository.Get(artistDBId);
}
public List<Artist> GetArtists(IEnumerable<int> artistIds)
{
return _artistRepository.Get(artistIds).ToList();

@ -61,6 +61,11 @@ namespace NzbDrone.Core.Music
public List<Track> GetTracksByMedium(int albumId, int mediumNumber)
{
if (mediumNumber < 1)
{
return GetTracksByAlbum(albumId);
}
return Query.Where(s => s.AlbumId == albumId)
.AndWhere(s => s.MediumNumber == mediumNumber)
.ToList();

@ -5,6 +5,7 @@ using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.MediaFiles.Events;
using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.Music.Events;
using NzbDrone.Core.Parser;
using NzbDrone.Common.Extensions;
using System;
using System.Collections.Generic;
@ -81,60 +82,73 @@ namespace NzbDrone.Core.Music
public Track FindTrackByTitle(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle)
{
// TODO: can replace this search mechanism with something smarter/faster/better
var normalizedReleaseTitle = Parser.Parser.NormalizeTrackTitle(releaseTitle).Replace(".", " ");
var normalizedReleaseTitle = releaseTitle.NormalizeTrackTitle().Replace(".", " ");
var tracks = _trackRepository.GetTracksByMedium(albumId, mediumNumber);
var matches = from track in tracks
//if we have a trackNumber use it
let trackNumCheck = (trackNumber == 0 || track.AbsoluteTrackNumber == trackNumber)
//if release title is longer than track title
let posReleaseTitle = normalizedReleaseTitle.IndexOf(Parser.Parser.NormalizeTrackTitle(track.Title), StringComparison.CurrentCultureIgnoreCase)
//if track title is longer than release title
let posTrackTitle = Parser.Parser.NormalizeTrackTitle(track.Title).IndexOf(normalizedReleaseTitle, StringComparison.CurrentCultureIgnoreCase)
where track.Title.Length > 0 && trackNumCheck && (posReleaseTitle >= 0 || posTrackTitle >= 0)
orderby posReleaseTitle, posTrackTitle
select new
{
NormalizedLength = Parser.Parser.NormalizeTrackTitle(track.Title).Length,
Track = track
};
var matches = tracks.Where(t => (trackNumber == 0 || t.AbsoluteTrackNumber == trackNumber)
&& t.Title.Length > 0
&& (normalizedReleaseTitle.Contains(t.Title.NormalizeTrackTitle())
|| t.Title.NormalizeTrackTitle().Contains(normalizedReleaseTitle)));
return matches.OrderByDescending(e => e.NormalizedLength).FirstOrDefault()?.Track;
return matches.Count() > 1 ? null : matches.SingleOrDefault();
}
public Track FindTrackByTitleInexact(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle)
public Track FindTrackByTitleInexact(int artistId, int albumId, int mediumNumber, int trackNumber, string title)
{
double fuzzThreshold = 0.6;
double fuzzGap = 0.2;
var normalizedReleaseTitle = Parser.Parser.NormalizeTrackTitle(releaseTitle).Replace(".", " ");
var normalizedTitle = title.NormalizeTrackTitle().Replace(".", " ");
var tracks = _trackRepository.GetTracksByMedium(albumId, mediumNumber);
var matches = from track in tracks
let normalizedTitle = Parser.Parser.NormalizeTrackTitle(track.Title).Replace(".", " ")
let matchProb = normalizedTitle.FuzzyMatch(normalizedReleaseTitle)
where track.Title.Length > 0
orderby matchProb descending
select new
Func< Func<Track, string, double>, string, Tuple<Func<Track, string, double>, string>> tc = Tuple.Create;
var scoringFunctions = new List<Tuple<Func<Track, string, double>, string>> {
tc((a, t) => a.Title.NormalizeTrackTitle().FuzzyMatch(t), normalizedTitle),
tc((a, t) => a.Title.NormalizeTrackTitle().FuzzyContains(t), normalizedTitle),
tc((a, t) => t.FuzzyContains(a.Title.NormalizeTrackTitle()), normalizedTitle)
};
foreach (var func in scoringFunctions)
{
var track = FindByStringInexact(tracks, func.Item1, func.Item2, trackNumber);
if (track != null)
{
MatchProb = matchProb,
NormalizedTitle = normalizedTitle,
Track = track
};
return track;
}
}
var matchList = matches.ToList();
return null;
}
if (!matchList.Any())
private Track FindByStringInexact(List<Track> tracks, Func<Track, string, double> scoreFunction, string title, int trackNumber)
{
const double fuzzThreshold = 0.7;
const double fuzzGap = 0.2;
var sortedTracks = tracks.Select(s => new
{
MatchProb = scoreFunction(s, title),
Track = s
})
.ToList()
.OrderByDescending(s => s.MatchProb)
.ToList();
if (!sortedTracks.Any())
{
return null;
}
_logger.Trace("\nFuzzy track match on '{0}':\n{1}",
normalizedReleaseTitle,
string.Join("\n", matchList.Select(x => $"{x.NormalizedTitle}: {x.MatchProb}")));
_logger.Trace("\nFuzzy track match on '{0:D2} - {1}':\n{2}",
trackNumber,
title,
string.Join("\n", sortedTracks.Select(x => $"{x.Track.AbsoluteTrackNumber:D2} - {x.Track.Title}: {x.MatchProb}")));
if (matchList[0].MatchProb > fuzzThreshold
&& (matchList.Count == 1 || matchList[0].MatchProb - matchList[1].MatchProb > fuzzGap)
&& (trackNumber == 0 || matchList[0].Track.AbsoluteTrackNumber == trackNumber))
return matchList[0].Track;
if (sortedTracks[0].MatchProb > fuzzThreshold
&& (sortedTracks.Count == 1 || sortedTracks[0].MatchProb - sortedTracks[1].MatchProb > fuzzGap)
&& (trackNumber == 0
|| sortedTracks[0].Track.AbsoluteTrackNumber == trackNumber
|| sortedTracks[0].Track.AbsoluteTrackNumber + tracks.Count(t => t.MediumNumber < sortedTracks[0].Track.MediumNumber) == trackNumber))
{
return sortedTracks[0].Track;
}
return null;
}

@ -208,6 +208,14 @@ namespace NzbDrone.Core.Parser
new Regex(@"(\[|\()*\b((featuring|feat.|feat|ft|ft.)\s{1}){1}\s*.*(\]|\))*", RegexOptions.IgnoreCase | RegexOptions.Compiled),
new Regex(@"(?:\(|\[)(?:[^\(\[]*)(?:version|limited|deluxe|single|clean|album|special|bonus|promo|remastered)(?:[^\)\]]*)(?:\)|\])", RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
private static readonly Regex[] BracketRegex = new Regex[]
{
new Regex(@"\(.*\)", RegexOptions.Compiled),
new Regex(@"\[.*\]", RegexOptions.Compiled)
};
private static readonly Regex AfterDashRegex = new Regex(@"[-:].*", RegexOptions.Compiled);
public static ParsedTrackInfo ParseMusicPath(string path)
{
@ -528,14 +536,13 @@ namespace NzbDrone.Core.Parser
return NormalizeRegex.Replace(name, string.Empty).ToLower().RemoveAccent();
}
public static string NormalizeTrackTitle(string title)
public static string NormalizeTrackTitle(this string title)
{
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
title = PunctuationRegex.Replace(title, " ");
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim()
.ToLower();
return title.Trim().ToLower();
}
public static string NormalizeTitle(string title)
@ -601,6 +608,22 @@ namespace NzbDrone.Core.Parser
return CommonTagRegex[1].Replace(album, string.Empty).Trim();
}
public static string RemoveBracketsAndContents(this string album)
{
var intermediate = album;
foreach (var regex in BracketRegex)
{
intermediate = regex.Replace(intermediate, string.Empty).Trim();
}
return intermediate;
}
public static string RemoveAfterDash(this string text)
{
return AfterDashRegex.Replace(text, string.Empty).Trim();
}
public static string CleanTrackTitle(string title)
{
var intermediateTitle = title;
@ -619,7 +642,7 @@ namespace NzbDrone.Core.Parser
var trackNumber = file.Tag.Track;
var trackTitle = file.Tag.Title;
var discNumber = (file.Tag.Disc > 0) ? Convert.ToInt32(file.Tag.Disc) : 1;
var discNumber = (int)file.Tag.Disc;
var artist = file.Tag.FirstAlbumArtist;

@ -50,14 +50,21 @@ namespace NzbDrone.Core.Parser
public Artist GetArtist(string title)
{
var parsedAlbumInfo = Parser.ParseAlbumTitle(title);
if (parsedAlbumInfo != null && !parsedAlbumInfo.ArtistName.IsNullOrWhiteSpace())
{
title = parsedAlbumInfo.ArtistName;
}
if (parsedAlbumInfo == null || parsedAlbumInfo.ArtistName.IsNullOrWhiteSpace())
var artistInfo = _artistService.FindByName(title);
if (artistInfo == null)
{
return _artistService.FindByName(title);
_logger.Debug("Trying inexact artist match for {0}", title);
artistInfo = _artistService.FindByNameInexact(title);
}
return _artistService.FindByName(parsedAlbumInfo.ArtistName);
return artistInfo;
}
public Artist GetArtistFromTag(string file)
@ -81,8 +88,15 @@ namespace NzbDrone.Core.Parser
return null;
}
return _artistService.FindByName(parsedTrackInfo.ArtistTitle);
artist = _artistService.FindByName(parsedTrackInfo.ArtistTitle);
if (artist == null)
{
_logger.Debug("Trying inexact artist match for {0}", parsedTrackInfo.ArtistTitle);
artist = _artistService.FindByNameInexact(parsedTrackInfo.ArtistTitle);
}
return artist;
}
public RemoteAlbum Map(ParsedAlbumInfo parsedAlbumInfo, SearchCriteriaBase searchCriteria = null)
@ -147,6 +161,12 @@ namespace NzbDrone.Core.Parser
albumInfo = _albumService.FindByTitle(artist.Id, parsedAlbumInfo.AlbumTitle);
}
if (albumInfo == null)
{
_logger.Debug("Trying inexact album match for {0}", parsedAlbumInfo.AlbumTitle);
albumInfo = _albumService.FindByTitleInexact(artist.Id, parsedAlbumInfo.AlbumTitle);
}
if (albumInfo != null)
{
result.Add(albumInfo);
@ -186,6 +206,12 @@ namespace NzbDrone.Core.Parser
artist = _artistService.FindByName(parsedAlbumInfo.ArtistName);
if (artist == null)
{
_logger.Debug("Trying inexact artist match for {0}", parsedAlbumInfo.ArtistName);
artist = _artistService.FindByNameInexact(parsedAlbumInfo.ArtistName);
}
if (artist == null)
{
_logger.Debug("No matching artist {0}", parsedAlbumInfo.ArtistName);

Loading…
Cancel
Save