Fixed: Speed up RSS sync

3 years ago · 2a4f681b17
parent 24e2ff56dd
commit 2a4f681b17
8 changed files with 595 additions and 319 deletions
--- a/src/NzbDrone.Common.Test/LevenshteinDistanceFixture.cs
+++ b/src/NzbDrone.Common.Test/LevenshteinDistanceFixture.cs
@ -1,4 +1,4 @@
-using FluentAssertions;
+using FluentAssertions;
 using NUnit.Framework;
 using NzbDrone.Common.Extensions;
 using NzbDrone.Test.Common;
@ -25,24 +25,6 @@ namespace NzbDrone.Common.Test
            text.LevenshteinDistance(other).Should().Be(expected);
        }

-        [TestCase("", "", 0)]
-        [TestCase("abc", "abc", 0)]
-        [TestCase("abc", "abcd", 1)]
-        [TestCase("abcd", "abc", 3)]
-        [TestCase("abc", "abd", 3)]
-        [TestCase("abc", "adc", 3)]
-        [TestCase("abcdefgh", "abcghdef", 8)]
-        [TestCase("a.b.c.", "abc", 0)]
-        [TestCase("Agents of shield", "Marvel's Agents Of S.H.I.E.L.D.", 9)]
-        [TestCase("Agents of shield", "Agents of cracked", 14)]
-        [TestCase("Agents of shield", "the shield", 24)]
-        [TestCase("ABCxxx", "ABC1xx", 3)]
-        [TestCase("ABC1xx", "ABCxxx", 3)]
-        public void LevenshteinDistanceClean(string text, string other, int expected)
-        {
-            text.ToLower().LevenshteinDistanceClean(other.ToLower()).Should().Be(expected);
-        }
-
        [TestCase("hello", "hello")]
        [TestCase("hello", "bye")]
        [TestCase("a longer string", "a different long string")]
@ -58,5 +40,26 @@ namespace NzbDrone.Common.Test
        {
            a.FuzzyMatch(b).Should().Be(expected);
        }
+
+        [TestCase("AVERY", "GARVEY", 3)]
+        [TestCase("ADCROFT", "ADDESSI", 5)]
+        [TestCase("BAIRD", "BAISDEN", 3)]
+        [TestCase("BOGGAN", "BOGGS", 2)]
+        [TestCase("CLAYTON", "CLEARY", 5)]
+        [TestCase("DYBAS", "DYCKMAN", 4)]
+        [TestCase("EMINETH", "EMMERT", 4)]
+        [TestCase("GALANTE", "GALICKI", 4)]
+        [TestCase("HARDIN", "HARDING", 1)]
+        [TestCase("KEHOE", "KEHR", 2)]
+        [TestCase("LOWRY", "LUBARSKY", 5)]
+        [TestCase("MAGALLAN", "MAGANA", 3)]
+        [TestCase("MAYO", "MAYS", 1)]
+        [TestCase("MOENY", "MOFFETT", 4)]
+        [TestCase("PARE", "PARENT", 2)]
+        [TestCase("RAMEY", "RAMFREY", 2)]
+        public void BMtest(string a, string b, int expected)
+        {
+            ModifiedBerghelRoachEditDistance.GetDistance(a, b, 10).Should().Be(expected);
+        }
    }
 }
--- a/src/NzbDrone.Common/Extensions/BerghelRoach.cs
+++ b/src/NzbDrone.Common/Extensions/BerghelRoach.cs
@ -0,0 +1,478 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+using System;
+using System.Diagnostics;
+
+namespace NzbDrone.Common.Extensions
+{
+    /**
+ * A modified version of a string edit distance described by Berghel and
+ * Roach that uses only O(d) space and O(n*d) worst-case time, where n is
+ * the pattern string length and d is the edit distance computed.
+ * We achieve the space reduction by keeping only those sub-computations
+ * required to compute edit distance, giving up the ability to
+ * reconstruct the edit path.
+ */
+    public static class ModifiedBerghelRoachEditDistance
+    {
+        /*
+         * This is a modification of the original Berghel-Roach edit
+         * distance (based on prior work by Ukkonen) described in
+         *   ACM Transactions on Information Systems, Vol. 14, No. 1,
+         *   January 1996, pages 94-106.
+         *
+         * I observed that only O(d) prior computations are required
+         * to compute edit distance.  Rather than keeping all prior
+         * f(k,p) results in a matrix, we keep only the two "outer edges"
+         * in the triangular computation pattern that will be used in
+         * subsequent rounds.  We cannot reconstruct the edit path,
+         * but many applications do not require that; for them, this
+         * modification uses less space (and empirically, slightly
+         * less time).
+         *
+         * First, some history behind the algorithm necessary to understand
+         * Berghel-Roach and our modification...
+         *
+         * The traditional algorithm for edit distance uses dynamic programming,
+         * building a matrix of distances for substrings:
+         * D[i,j] holds the distance for string1[0..i]=>string2[0..j].
+         * The matrix is initially populated with the trivial values
+         * D[0,j]=j and D[i,0]=i; and then expanded with the rule:
+         * <pre>
+         *    D[i,j] = min( D[i-1,j]+1,       // insertion
+         *                  D[i,j-1]+1,       // deletion
+         *                  (D[i-1,j-1]
+         *                   + (string1[i]==string2[j])
+         *                      ? 0           // match
+         *                      : 1           // substitution ) )
+         * </pre>
+         *
+         * Ukkonen observed that each diagonal of the matrix must increase
+         * by either 0 or 1 from row to row.  If D[i,j] = p, then the
+         * matching rule requires that D[i+x,j+x] = p for all x
+         * where string1[i..i+x) matches string2[j..j+j+x). Ukkonen
+         * defined a function f(k,p) as the highest row number in which p
+         * appears on the k-th diagonal (those D[i,j] where k=(i-j), noting
+         * that k may be negative).  The final result of the edit
+         * distance is the D[n,m] cell, on the (n-m) diagonal; it is
+         * the value of p for which f(n-m, p) = m.  The function f can
+         * also be computed dynamically, according to a simple recursion:
+         * <pre>
+         *    f(k,p) {
+         *      contains_p = max(f(k-1,p-1), f(k,p-1)+1, f(k+1,p-1)+1)
+         *      while (string1[contains_p] == string2[contains_p + k])
+         *        contains_p++;
+         *      return contains_p;
+         *    }
+         * </pre>
+         * The max() expression finds a row where the k-th diagonal must
+         * contain p by virtue of an edit from the prior, same, or following
+         * diagonal (corresponding to an insert, substitute, or delete);
+         * we need not consider more distant diagonals because row-to-row
+         * and column-to-column changes are at most +/- 1.
+         *
+         * The original Ukkonen algorithm computed f(k,p) roughly as
+         * follows:
+         * <pre>
+         *    for (p = 0; ; p++) {
+         *      compute f(k,p) for all valid k
+         *      if (f(n-m, p) == m) return p;
+         *    }
+         * </pre>
+         *
+         * Berghel and Roach observed that many values of f(k,p) are
+         * computed unnecessarily, and reorganized the computation into
+         * a just-in-time sequence.  In each iteration, we are primarily
+         * interested in the terminating value f(main,p), where main=(n-m)
+         * is the main diagonal.  To compute that we need f(x,p-1) for
+         * three values of x: main-1, main, and main+1.  Those depend on
+         * values for p-2, and so forth.  We will already have computed
+         * f(main,p-1) in the prior round, and thus f(main-1,p-2) and
+         * f(main+1,p-2), and so forth.  The only new values we need to compute
+         * are on the edges: f(main-i,p-i) and f(main+i,p-i).  Noting that
+         * f(k,p) is only meaningful when abs(k) is no greater than p,
+         * one of the Berghel-Roach reviewers noted that we can compute
+         * the bounds for i:
+         * <pre>
+         *    (main+i &le p-i) implies (i &le; (p-main)/2)
+         * </pre>
+         * (where main+i is limited on the positive side) and similarly
+         * <pre>
+         *    (-(main-i) &le p-i) implies (i &le; (p+main)/2).
+         * </pre>
+         * (where main-i is limited on the negative side).
+         *
+         * This reduces the computation sequence to
+         * <pre>
+         *   for (i = (p-main)/2; i > 0; i--) compute f(main+i,p-i);
+         *   for (i = (p+main)/2; i > 0; i--) compute f(main-i,p-i);
+         *   if (f(main, p) == m) return p;
+         * </pre>
+         *
+         * The original Berghel-Roach algorithm recorded prior values
+         * of f(k,p) in a matrix, using O(distance^2) space, enabling
+         * reconstruction of the edit path, but if all we want is the
+         * edit *distance*, we only need to keep O(distance) prior computations.
+         *
+         * The requisite prior k-1, k, and k+1 values are conveniently
+         * computed in the current round and the two preceding it.
+         * For example, on the higher-diagonal side, we compute:
+         * <pre>
+         *    current[i] = f(main+i, p-i)
+         * </pre>
+         * We keep the two prior rounds of results, where p was one and two
+         * smaller.  So, from the preceidng round
+         * <pre>
+         *    last[i] = f(main+i, (p-1)-i)
+         * </pre>
+         *  and from the prior round, but one position back:
+         * <pre>
+         *    prior[i-1] = f(main+(i-1), (p-2)-(i-1))
+         * </pre>
+         * In the current round, one iteration earlier:
+         * <pre>
+         *    current[i+1] = f(main+(i+1), p-(i+1))
+         * </pre>
+         * Note that the distance in all of these evaluates to p-i-1,
+         * and the diagonals are (main+i) and its neighbors... just
+         * what we need.  The lower-diagonal side behaves similarly.
+         *
+         * We need to materialize values that are not computed in prior
+         * rounds, for either of two reasons: <ul>
+         *    <li> Initially, we have no prior rounds, so we need to fill
+         *     all of the "last" and "prior" values for use in the
+         *     first round.  The first round uses only on one side
+         *     of the main diagonal or the other.
+         *    <li> In every other round, we compute one more diagonal than before.
+         * </ul>
+         * In all of these cases, the missing f(k,p) values are for abs(k) > p,
+         * where a real value of f(k,p) is undefined.  [The original Berghel-Roach
+         * algorithm prefills its F matrix with these values, but we fill
+         * them as we go, as needed.]  We define
+         * <pre>
+         *    f(-p-1,p) = p, so that we start diagonal -p with row p,
+         *    f(p+1,p) = -1, so that we start diagonal p with row 0.
+         * </pre>
+         * (We also allow f(p+2,p)=f(-p-2,p)=-1, causing those values to
+         * have no effect in the starting row computation.]
+         *
+         * We only expand the set of diagonals visited every other round,
+         * when (p-main) or (p+main) is even.  We keep track of even/oddness
+         * to save some arithmetic.  The first round is always even, as p=abs(main).
+         * Note that we rename the "f" function to "computeRow" to be Googley.
+         */
+
+        public static int LevenshteinDistance(this string text, string other)
+        {
+            return ModifiedBerghelRoachEditDistance.GetDistance(text, other);
+        }
+
+        public static int GetDistance(string target, string pattern, int limit = 20)
+        {
+            return GetDistance(target.ToCharArray(), pattern.ToCharArray(), limit);
+        }
+
+        public static int GetDistance(char[] target, char[] pattern, int limit = 20)
+        {
+            var currentLeft = new int[limit];
+
+            var currentRight = new int[limit];
+
+            var lastLeft = new int[limit];
+
+            var lastRight = new int[limit];
+
+            var priorLeft = new int[limit];
+
+            var priorRight = new int[limit];
+
+            var targetLength = target.Length;
+
+            /*
+             * Compute the main diagonal number.
+             * The final result lies on this diagonal.
+             */
+            var main = pattern.Length - targetLength;
+
+            /*
+             * Compute our initial distance candidate.
+             * The result cannot be less than the difference in
+             * string lengths, so we start there.
+             */
+            var distance = Math.Abs(main);
+            if (distance > limit)
+            {
+                /* More than we wanted.  Give up right away */
+                return int.MaxValue;
+            }
+
+            /*
+             * In the main loop below, the current{Right,Left} arrays record results
+             * from the current outer loop pass.  The last{Right,Left} and
+             * prior{Right,Left} arrays hold the results from the preceding two passes.
+             * At the end of the outer loop, we shift them around (reusing the prior
+             * array as the current for the next round, to avoid reallocating).
+             * The Right reflects higher-numbered diagonals, Left lower-numbered.
+             */
+
+            /*
+             * Fill in "prior" values for the first two passes through
+             * the distance loop.  Note that we will execute only one side of
+             * the main diagonal in these passes, so we only need
+             * initialize one side of prior values.
+             */
+
+            if (main <= 0)
+            {
+                EnsureCapacityRight(ref currentRight, ref lastRight, ref priorRight, distance, false);
+                for (var j = 0; j <= distance; j++)
+                {
+                    lastRight[j] = distance - j - 1; /* Make diagonal -k start in row k */
+                    priorRight[j] = -1;
+                }
+            }
+            else
+            {
+                EnsureCapacityLeft(ref currentLeft, ref lastLeft, ref priorLeft, distance, false);
+                for (var j = 0; j <= distance; j++)
+                {
+                    lastLeft[j] = -1; /* Make diagonal +k start in row 0 */
+                    priorLeft[j] = -1;
+                }
+            }
+
+            /*
+             * Keep track of even rounds.  Only those rounds consider new diagonals,
+             * and thus only they require artificial "last" values below.
+             */
+            var even = true;
+
+            /*
+             * MAIN LOOP: try each successive possible distance until one succeeds.
+             */
+            while (true)
+            {
+                /*
+                 * Before calling computeRow(main, distance), we need to fill in
+                 * missing cache elements.  See the high-level description above.
+                 */
+
+                /*
+                 * Higher-numbered diagonals
+                 */
+
+                var offDiagonal = (distance - main) / 2;
+                EnsureCapacityRight(ref currentRight, ref lastRight, ref priorRight, offDiagonal, true);
+
+                if (even)
+                {
+                    /* Higher diagonals start at row 0 */
+                    lastRight[offDiagonal] = -1;
+                }
+
+                var immediateRight = -1;
+                for (; offDiagonal > 0; offDiagonal--)
+                {
+                    currentRight[offDiagonal] = immediateRight = ComputeRow(
+                        main + offDiagonal,
+                        distance - offDiagonal,
+                        pattern,
+                        target,
+                        priorRight[offDiagonal - 1],
+                        lastRight[offDiagonal],
+                        immediateRight);
+                }
+
+                /*
+                 * Lower-numbered diagonals
+                 */
+
+                offDiagonal = (distance + main) / 2;
+                EnsureCapacityLeft(ref currentLeft, ref lastLeft, ref priorLeft, offDiagonal, true);
+
+                if (even)
+                {
+                    /* Lower diagonals, fictitious values for f(-x-1,x) = x */
+                    lastLeft[offDiagonal] = ((distance - main) / 2) - 1;
+                }
+
+                var immediateLeft = even ? -1 : (distance - main) / 2;
+
+                for (; offDiagonal > 0; offDiagonal--)
+                {
+                    currentLeft[offDiagonal] = immediateLeft = ComputeRow(
+                        main - offDiagonal,
+                        distance - offDiagonal,
+                        pattern,
+                        target,
+                        immediateLeft,
+                        lastLeft[offDiagonal],
+                        priorLeft[offDiagonal - 1]);
+                }
+
+                /*
+                 * We are done if the main diagonal has distance in the last row.
+                 */
+                var mainRow = ComputeRow(main, distance, pattern, target, immediateLeft, lastLeft[0], immediateRight);
+
+                if ((mainRow == targetLength) || (++distance > limit) || (distance < 0))
+                {
+                    break;
+                }
+
+                /* The [0] element goes to both sides. */
+                currentLeft[0] = currentRight[0] = mainRow;
+
+                /* Rotate rows around for next round: current=>last=>prior (=>current) */
+                var tmp = priorLeft;
+                priorLeft = lastLeft;
+                lastLeft = currentLeft;
+                currentLeft = priorLeft;
+
+                tmp = priorRight;
+                priorRight = lastRight;
+                lastRight = currentRight;
+                currentRight = tmp;
+
+                /* Update evenness, too */
+                even = !even;
+            }
+
+            return distance;
+        }
+
+        /**
+ * Computes the highest row in which the distance {@code p} appears
+ * in diagonal {@code k} of the edit distance computation for
+ * strings {@code a} and {@code b}.  The diagonal number is
+ * represented by the difference in the indices for the two strings;
+ * it can range from {@code -b.length()} through {@code a.length()}.
+ *
+ * More precisely, this computes the highest value x such that
+ * <pre>
+ *     p = edit-distance(a[0:(x+k)), b[0:x)).
+ * </pre>
+ *
+ * This is the "f" function described by Ukkonen.
+ *
+ * The caller must assure that abs(k) &le; p, the only values for
+ * which this is well-defined.
+ *
+ * The implementation depends on the cached results of prior
+ * computeRow calls for diagonals k-1, k, and k+1 for distance p-1.
+ * These must be supplied in {@code knownLeft}, {@code knownAbove},
+ * and {@code knownRight}, respectively.
+ * @param k diagonal number
+ * @param p edit distance
+ * @param a one string to be compared
+ * @param b other string to be compared
+ * @param knownLeft value of {@code computeRow(k-1, p-1, ...)}
+ * @param knownAbove value of {@code computeRow(k, p-1, ...)}
+ * @param knownRight value of {@code computeRow(k+1, p-1, ...)}
+ */
+        private static int ComputeRow(int k,
+            int p,
+            char[] a,
+            char[] b,
+            int knownLeft,
+            int knownAbove,
+            int knownRight)
+        {
+            Debug.Assert(Math.Abs(k) <= p);
+            Debug.Assert(p >= 0);
+
+            /*
+             * Compute our starting point using the recurrance.
+             * That is, find the first row where the desired edit distance
+             * appears in our diagonal.  This is at least one past
+             * the highest row for
+             */
+            int t;
+            if (p == 0)
+            {
+                t = 0;
+            }
+            else
+            {
+                /*
+                 * We look at the adjacent diagonals for the next lower edit distance.
+                 * We can start in the next row after the prior result from
+                 * our own diagonal (the "substitute" case), or the next diagonal
+                 * ("delete"), but only the same row as the prior result from
+                 * the prior diagonal ("insert").
+                 */
+                t = Math.Max(Math.Max(knownAbove, knownRight) + 1, knownLeft);
+            }
+
+            /*
+             * Look down our diagonal for matches to find the maximum
+             * row with edit-distance p.
+             */
+            var tmax = Math.Min(b.Length, a.Length - k);
+
+            while ((t < tmax) && b[t] == a[t + k])
+            {
+                t++;
+            }
+
+            return t;
+        }
+
+/*
+ * Ensures that the Left arrays can be indexed through {@code index},
+ * inclusively, resizing (and copying) as necessary.
+ */
+        private static void EnsureCapacityLeft(ref int[] currentLeft, ref int[] lastLeft, ref int[] priorLeft, int index, bool copy)
+        {
+            if (currentLeft.Length <= index)
+            {
+                index++;
+                Resize(ref priorLeft, index, copy);
+                Resize(ref lastLeft, index, copy);
+                Resize(ref currentLeft, index, false);
+            }
+        }
+
+/*
+ * Ensures that the Right arrays can be indexed through {@code index},
+ * inclusively, resizing (and copying) as necessary.
+ */
+        private static void EnsureCapacityRight(ref int[] currentRight, ref int[] lastRight, ref int[] priorRight, int index, bool copy)
+        {
+            if (currentRight.Length <= index)
+            {
+                index++;
+                Resize(ref priorRight, index, copy);
+                Resize(ref lastRight, index, copy);
+                Resize(ref currentRight, index, false);
+            }
+        }
+
+/* Resize an array, copying old contents if requested */
+        private static void Resize(ref int[] array, int size, bool copy)
+        {
+            if (copy)
+            {
+                Array.Resize(ref array, size);
+            }
+            else
+            {
+                array = new int[size];
+            }
+        }
+    }
+}
--- a/src/NzbDrone.Common/Extensions/FuzzyContains.cs
+++ b/src/NzbDrone.Common/Extensions/FuzzyContains.cs
@ -1,4 +1,4 @@
-/*
+/*
 * This file incorporates work covered by the following copyright and
 * permission notice:
 *
@ -65,7 +65,17 @@ namespace NzbDrone.Common.Extensions
            }

            // Do a fuzzy compare.
-            return MatchBitap(text, pattern, matchThreshold);
+            if (pattern.Length < 32)
+            {
+                return MatchBitap(text, pattern, matchThreshold, new IntCalculator());
+            }
+
+            if (pattern.Length < 64)
+            {
+                return MatchBitap(text, pattern, matchThreshold, new LongCalculator());
+            }
+
+            return MatchBitap(text, pattern, matchThreshold, new BigIntCalculator());
        }

        /**
@ -75,38 +85,34 @@ namespace NzbDrone.Common.Extensions
         * @param pattern The pattern to search for.
         * @return Best match index or -1.
         */
-        private static Tuple<int, double> MatchBitap(string text, string pattern, double matchThreshold)
+        private static Tuple<int, double> MatchBitap<T>(string text, string pattern, double matchThreshold, Calculator<T> calculator)
        {
            // Initialise the alphabet.
-            Dictionary<char, BigInteger> s = alphabet(pattern);
-
-            // don't keep creating new BigInteger(1)
-            var big1 = new BigInteger(1);
+            var s = Alphabet(pattern, calculator);

-            // Lowest score belowe which we give up.
-            var score_threshold = matchThreshold;
+            // Lowest score below which we give up.
+            var scoreThreshold = matchThreshold;

            // Initialise the bit arrays.
-            var matchmask = big1 << (pattern.Length - 1);
-            int best_loc = -1;
+            var matchmask = calculator.LeftShift(calculator.One, pattern.Length - 1);
+            var bestLoc = -1;

-            // Empty initialization added to appease C# compiler.
-            var last_rd = new BigInteger[0];
-            for (int d = 0; d < pattern.Length; d++)
+            var lastRd = Array.Empty<T>();
+            for (var d = 0; d < pattern.Length; d++)
            {
                // Scan for the best match; each iteration allows for one more error.
-                int start = 1;
-                int finish = text.Length + pattern.Length;
+                var start = 1;
+                var finish = text.Length + pattern.Length;

-                var rd = new BigInteger[finish + 2];
-                rd[finish + 1] = (big1 << d) - big1;
-                for (int j = finish; j >= start; j--)
+                var rd = new T[finish + 2];
+                rd[finish + 1] = calculator.Subtract(calculator.LeftShift(calculator.One, d), calculator.One);
+                for (var j = finish; j >= start; j--)
                {
-                    BigInteger charMatch;
+                    T charMatch;
                    if (text.Length <= j - 1 || !s.ContainsKey(text[j - 1]))
                    {
                        // Out of range.
-                        charMatch = 0;
+                        charMatch = calculator.Zero;
                    }
                    else
                    {
@ -116,40 +122,40 @@ namespace NzbDrone.Common.Extensions
                    if (d == 0)
                    {
                        // First pass: exact match.
-                        rd[j] = ((rd[j + 1] << 1) | big1) & charMatch;
+                        rd[j] = calculator.BitwiseAnd(calculator.BitwiseOr(calculator.LeftShift(rd[j + 1], 1), calculator.One), charMatch);
                    }
                    else
                    {
                        // Subsequent passes: fuzzy match.
-                        rd[j] = ((rd[j + 1] << 1) | big1) & charMatch
-                            | (((last_rd[j + 1] | last_rd[j]) << 1) | big1) | last_rd[j + 1];
+                        rd[j] = calculator.BitwiseOr(calculator.BitwiseAnd(calculator.BitwiseOr(calculator.LeftShift(rd[j + 1], 1), calculator.One), charMatch),
+                            calculator.BitwiseOr(calculator.BitwiseOr(calculator.LeftShift(calculator.BitwiseOr(lastRd[j + 1], lastRd[j]), 1), calculator.One), lastRd[j + 1]));
                    }

-                    if ((rd[j] & matchmask) != 0)
+                    if (calculator.NotEqual(calculator.BitwiseAnd(rd[j], matchmask), calculator.Zero))
                    {
-                        var score = bitapScore(d, pattern);
+                        var score = BitapScore(d, pattern);

                        // This match will almost certainly be better than any existing
                        // match.  But check anyway.
-                        if (score >= score_threshold)
+                        if (score >= scoreThreshold)
                        {
                            // Told you so.
-                            score_threshold = score;
-                            best_loc = j - 1;
+                            scoreThreshold = score;
+                            bestLoc = j - 1;
                        }
                    }
                }

-                if (bitapScore(d + 1, pattern) < score_threshold)
+                if (BitapScore(d + 1, pattern) < scoreThreshold)
                {
                    // No hope for a (better) match at greater error levels.
                    break;
                }

-                last_rd = rd;
+                lastRd = rd;
            }

-            return new Tuple<int, double>(best_loc, score_threshold);
+            return new Tuple<int, double>(bestLoc, scoreThreshold);
        }

        /**
@ -158,7 +164,7 @@ namespace NzbDrone.Common.Extensions
         * @param pattern Pattern being sought.
         * @return Overall score for match (1.0 = good, 0.0 = bad).
         */
-        private static double bitapScore(int e, string pattern)
+        private static double BitapScore(int e, string pattern)
        {
            return 1.0 - ((double)e / pattern.Length);
        }
@ -168,26 +174,70 @@ namespace NzbDrone.Common.Extensions
         * @param pattern The text to encode.
         * @return Hash of character locations.
         */
-        private static Dictionary<char, BigInteger> alphabet(string pattern)
+        private static Dictionary<char, T> Alphabet<T>(string pattern, Calculator<T> calculator)
        {
-            var s = new Dictionary<char, BigInteger>();
-            char[] char_pattern = pattern.ToCharArray();
-            foreach (char c in char_pattern)
+            var s = new Dictionary<char, T>();
+            var charPattern = pattern.ToCharArray();
+            foreach (var c in charPattern)
            {
                if (!s.ContainsKey(c))
                {
-                    s.Add(c, 0);
+                    s.Add(c, calculator.Zero);
                }
            }

-            int i = 0;
-            foreach (char c in char_pattern)
+            var i = 0;
+            foreach (var c in charPattern)
            {
-                s[c] = s[c] | (new BigInteger(1) << (pattern.Length - i - 1));
+                s[c] = calculator.BitwiseOr(s[c], calculator.LeftShift(calculator.One, pattern.Length - i - 1));
                i++;
            }

            return s;
        }
+
+        private abstract class Calculator<T>
+        {
+            public abstract T Zero { get; }
+            public abstract T One { get; }
+            public abstract T Subtract(T a, T b);
+            public abstract T LeftShift(T a, int shift);
+            public abstract T BitwiseOr(T a, T b);
+            public abstract T BitwiseAnd(T a, T b);
+            public abstract bool NotEqual(T a, T b);
+        }
+
+        private sealed class BigIntCalculator : Calculator<BigInteger>
+        {
+            public override BigInteger Zero => new BigInteger(0);
+            public override BigInteger One => new BigInteger(1);
+            public override BigInteger Subtract(BigInteger a, BigInteger b) => a - b;
+            public override BigInteger LeftShift(BigInteger a, int shift) => a << shift;
+            public override BigInteger BitwiseOr(BigInteger a, BigInteger b) => a | b;
+            public override BigInteger BitwiseAnd(BigInteger a, BigInteger b) => a & b;
+            public override bool NotEqual(BigInteger a, BigInteger b) => a != b;
+        }
+
+        private sealed class IntCalculator : Calculator<int>
+        {
+            public override int Zero => 0;
+            public override int One => 1;
+            public override int Subtract(int a, int b) => a - b;
+            public override int LeftShift(int a, int shift) => a << shift;
+            public override int BitwiseOr(int a, int b) => a | b;
+            public override int BitwiseAnd(int a, int b) => a & b;
+            public override bool NotEqual(int a, int b) => a != b;
+        }
+
+        private sealed class LongCalculator : Calculator<long>
+        {
+            public override long Zero => 0;
+            public override long One => 1;
+            public override long Subtract(long a, long b) => a - b;
+            public override long LeftShift(long a, int shift) => a << shift;
+            public override long BitwiseOr(long a, long b) => a | b;
+            public override long BitwiseAnd(long a, long b) => a & b;
+            public override bool NotEqual(long a, long b) => a != b;
+        }
    }
 }
--- a/src/NzbDrone.Common/Extensions/LevenstheinExtensions.cs
+++ b/src/NzbDrone.Common/Extensions/LevenstheinExtensions.cs
@ -1,61 +0,0 @@
-using System;
-
-namespace NzbDrone.Common.Extensions
-{
-    public static class LevenstheinExtensions
-    {
-        public static int LevenshteinDistance(this string text, string other, int costInsert = 1, int costDelete = 1, int costSubstitute = 1)
-        {
-            if (text == other)
-            {
-                return 0;
-            }
-
-            if (text.Length == 0)
-            {
-                return other.Length * costInsert;
-            }
-
-            if (other.Length == 0)
-            {
-                return text.Length * costDelete;
-            }
-
-            int[] matrix = new int[other.Length + 1];
-
-            for (var i = 1; i < matrix.Length; i++)
-            {
-                matrix[i] = i * costInsert;
-            }
-
-            for (var i = 0; i < text.Length; i++)
-            {
-                int topLeft = matrix[0];
-                matrix[0] = matrix[0] + costDelete;
-
-                for (var j = 0; j < other.Length; j++)
-                {
-                    int top = matrix[j];
-                    int left = matrix[j + 1];
-
-                    var sumIns = top + costInsert;
-                    var sumDel = left + costDelete;
-                    var sumSub = topLeft + (text[i] == other[j] ? 0 : costSubstitute);
-
-                    topLeft = matrix[j + 1];
-                    matrix[j + 1] = Math.Min(Math.Min(sumIns, sumDel), sumSub);
-                }
-            }
-
-            return matrix[other.Length];
-        }
-
-        public static int LevenshteinDistanceClean(this string expected, string other)
-        {
-            expected = expected.ToLower().Replace(".", "");
-            other = other.ToLower().Replace(".", "");
-
-            return expected.LevenshteinDistance(other, 1, 3, 3);
-        }
-    }
-}
--- a/src/NzbDrone.Core.Test/MetadataSource/SearchArtistComparerFixture.cs
+++ b/src/NzbDrone.Core.Test/MetadataSource/SearchArtistComparerFixture.cs
@ -1,71 +0,0 @@
-using System.Collections.Generic;
-using System.Linq;
-using FluentAssertions;
-using NUnit.Framework;
-using NzbDrone.Core.Books;
-using NzbDrone.Core.MetadataSource;
-using NzbDrone.Core.Test.Framework;
-
-namespace NzbDrone.Core.Test.MetadataSource
-{
-    [TestFixture]
-    public class SearchAuthorComparerFixture : CoreTest
-    {
-        private List<Author> _author;
-
-        [SetUp]
-        public void Setup()
-        {
-            _author = new List<Author>();
-        }
-
-        private void WithSeries(string name)
-        {
-            _author.Add(new Author { Name = name });
-        }
-
-        [Test]
-        public void should_prefer_the_walking_dead_over_talking_dead_when_searching_for_the_walking_dead()
-        {
-            WithSeries("Talking Dead");
-            WithSeries("The Walking Dead");
-
-            _author.Sort(new SearchAuthorComparer("the walking dead"));
-
-            _author.First().Name.Should().Be("The Walking Dead");
-        }
-
-        [Test]
-        public void should_prefer_the_walking_dead_over_talking_dead_when_searching_for_walking_dead()
-        {
-            WithSeries("Talking Dead");
-            WithSeries("The Walking Dead");
-
-            _author.Sort(new SearchAuthorComparer("walking dead"));
-
-            _author.First().Name.Should().Be("The Walking Dead");
-        }
-
-        [Test]
-        public void should_prefer_blacklist_over_the_blacklist_when_searching_for_blacklist()
-        {
-            WithSeries("The Blacklist");
-            WithSeries("Blacklist");
-
-            _author.Sort(new SearchAuthorComparer("blacklist"));
-
-            _author.First().Name.Should().Be("Blacklist");
-        }
-
-        [Test]
-        public void should_prefer_the_blacklist_over_blacklist_when_searching_for_the_blacklist()
-        {
-            WithSeries("Blacklist");
-            WithSeries("The Blacklist");
-
-            _author.Sort(new SearchAuthorComparer("the blacklist"));
-
-            _author.First().Name.Should().Be("The Blacklist");
-        }
-    }
-}
--- a/src/NzbDrone.Core.Test/MusicTests/ArtistServiceTests/FindByNameInexactFixture.cs
+++ b/src/NzbDrone.Core.Test/MusicTests/ArtistServiceTests/FindByNameInexactFixture.cs
@ -34,10 +34,8 @@ namespace NzbDrone.Core.Test.MusicTests.AuthorServiceTests
                .Returns(_authors);
        }

-        [TestCase("The Black Eyde Peas", "The Black Eyed Peas")]
-        [TestCase("Black Eyed Peas", "The Black Eyed Peas")]
+        [TestCase("The Black Eyd Peas", "The Black Eyed Peas")]
        [TestCase("The Black eys", "The Black Keys")]
-        [TestCase("Black Keys", "The Black Keys")]
        public void should_find_author_in_db_by_name_inexact(string name, string expected)
        {
            var author = Subject.FindByNameInexact(name);
@ -46,20 +44,6 @@ namespace NzbDrone.Core.Test.MusicTests.AuthorServiceTests
            author.Name.Should().Be(expected);
        }

-        [Test]
-        public void should_find_author_when_the_is_omitted_from_start()
-        {
-            _authors = new List<Author>();
-            _authors.Add(CreateAuthor("Black Keys"));
-            _authors.Add(CreateAuthor("The Black Eyed Peas"));
-
-            Mocker.GetMock<IAuthorRepository>()
-                .Setup(s => s.All())
-                .Returns(_authors);
-
-            Subject.FindByNameInexact("The Black Keys").Should().NotBeNull();
-        }
-
        [TestCase("The Black Peas")]
        public void should_not_find_author_in_db_by_ambiguous_name(string name)
        {
--- a/src/NzbDrone.Core/Books/Services/AuthorService.cs
+++ b/src/NzbDrone.Core/Books/Services/AuthorService.cs
@ -99,21 +99,10 @@ namespace NzbDrone.Core.Books
            Func<Func<Author, string, double>, string, Tuple<Func<Author, string, double>, string>> tc = Tuple.Create;
            var scoringFunctions = new List<Tuple<Func<Author, string, double>, string>>
            {
-                tc((a, t) => a.CleanName.FuzzyMatch(t), cleanTitle),
-                tc((a, t) => a.Name.FuzzyMatch(t), title),
-                tc((a, t) => a.Name.ToLastFirst().FuzzyMatch(t), title),
-                tc((a, t) => a.Metadata.Value.Aliases.Concat(new List<string> { a.Name }).Max(x => x.CleanAuthorName().FuzzyMatch(t)), cleanTitle),
+                tc((a, t) => a.Metadata.Value.Name.FuzzyMatch(t), title),
+                tc((a, t) => a.Metadata.Value.NameLastFirst.FuzzyMatch(t), title)
            };

-            if (title.StartsWith("The ", StringComparison.CurrentCultureIgnoreCase))
-            {
-                scoringFunctions.Add(tc((a, t) => a.CleanName.FuzzyMatch(t), title.Substring(4).CleanAuthorName()));
-            }
-            else
-            {
-                scoringFunctions.Add(tc((a, t) => a.CleanName.FuzzyMatch(t), "the" + cleanTitle));
-            }
-
            return scoringFunctions;
        }

@ -151,9 +140,8 @@ namespace NzbDrone.Core.Books
            Func<Func<Author, string, double>, string, Tuple<Func<Author, string, double>, string>> tc = Tuple.Create;
            var scoringFunctions = new List<Tuple<Func<Author, string, double>, string>>
            {
-                tc((a, t) => t.FuzzyContains(a.CleanName), cleanReportTitle),
                tc((a, t) => t.FuzzyContains(a.Metadata.Value.Name), reportTitle),
-                tc((a, t) => t.FuzzyContains(a.Metadata.Value.Name.ToLastFirst()), reportTitle)
+                tc((a, t) => t.FuzzyContains(a.Metadata.Value.NameLastFirst), reportTitle)
            };

            return scoringFunctions;
--- a/src/NzbDrone.Core/MetadataSource/SearchAuthorComparer.cs
+++ b/src/NzbDrone.Core/MetadataSource/SearchAuthorComparer.cs
@ -1,95 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Text.RegularExpressions;
-using NzbDrone.Common.Extensions;
-using NzbDrone.Core.Books;
-
-namespace NzbDrone.Core.MetadataSource
-{
-    public class SearchAuthorComparer : IComparer<Author>
-    {
-        private static readonly Regex RegexCleanPunctuation = new Regex("[-._:]", RegexOptions.Compiled);
-        private static readonly Regex RegexCleanCountryYearPostfix = new Regex(@"(?<=.+)( \([A-Z]{2}\)| \(\d{4}\)| \([A-Z]{2}\) \(\d{4}\))$", RegexOptions.Compiled);
-        private static readonly Regex ArticleRegex = new Regex(@"^(a|an|the)\s", RegexOptions.IgnoreCase | RegexOptions.Compiled);
-
-        public string SearchQuery { get; private set; }
-
-        private readonly string _searchQueryWithoutYear;
-        private int? _year;
-
-        public SearchAuthorComparer(string searchQuery)
-        {
-            SearchQuery = searchQuery;
-
-            var match = Regex.Match(SearchQuery, @"^(?<query>.+)\s+(?:\((?<year>\d{4})\)|(?<year>\d{4}))$");
-            if (match.Success)
-            {
-                _searchQueryWithoutYear = match.Groups["query"].Value.ToLowerInvariant();
-                _year = int.Parse(match.Groups["year"].Value);
-            }
-            else
-            {
-                _searchQueryWithoutYear = searchQuery.ToLowerInvariant();
-            }
-        }
-
-        public int Compare(Author x, Author y)
-        {
-            int result = 0;
-
-            // Prefer exact matches
-            result = Compare(x, y, s => CleanPunctuation(s.Name).Equals(CleanPunctuation(SearchQuery)));
-            if (result != 0)
-            {
-                return -result;
-            }
-
-            // Remove Articles (a/an/the)
-            result = Compare(x, y, s => CleanArticles(s.Name).Equals(CleanArticles(SearchQuery)));
-            if (result != 0)
-            {
-                return -result;
-            }
-
-            // Prefer close matches
-            result = Compare(x, y, s => CleanPunctuation(s.Name).LevenshteinDistance(CleanPunctuation(SearchQuery)) <= 1);
-            if (result != 0)
-            {
-                return -result;
-            }
-
-            return Compare(x, y, s => SearchQuery.LevenshteinDistanceClean(s.Name));
-        }
-
-        public int Compare<T>(Author x, Author y, Func<Author, T> keySelector)
-            where T : IComparable<T>
-        {
-            var keyX = keySelector(x);
-            var keyY = keySelector(y);
-
-            return keyX.CompareTo(keyY);
-        }
-
-        private string CleanPunctuation(string title)
-        {
-            title = RegexCleanPunctuation.Replace(title, "");
-
-            return title.ToLowerInvariant();
-        }
-
-        private string CleanTitle(string title)
-        {
-            title = RegexCleanPunctuation.Replace(title, "");
-            title = RegexCleanCountryYearPostfix.Replace(title, "");
-
-            return title.ToLowerInvariant();
-        }
-
-        private string CleanArticles(string title)
-        {
-            title = ArticleRegex.Replace(title, "");
-
-            return title.Trim().ToLowerInvariant();
-        }
-    }
-}