Fixed: Parsing similar movie titles with common words at end

pull/7690/head
Mark McDowall 3 years ago committed by Qstick
parent 53a9c849cb
commit 691a8955fe

@ -37,22 +37,40 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("or")] [TestCase("or")]
[TestCase("an")] [TestCase("an")]
[TestCase("of")] [TestCase("of")]
public void should_remove_common_words(string word) public void should_remove_common_words_from_middle_of_title(string word)
{ {
var dirtyFormat = new[] var dirtyFormat = new[]
{ {
"word.{0}.word", "word.{0}.word",
"word {0} word", "word {0} word",
"word-{0}-word", "word-{0}-word"
};
foreach (var s in dirtyFormat)
{
var dirty = string.Format(s, word);
dirty.CleanMovieTitle().Should().Be("wordword");
}
}
[TestCase("the")]
[TestCase("and")]
[TestCase("or")]
[TestCase("an")]
[TestCase("of")]
public void should_not_remove_common_words_from_end_of_title(string word)
{
var dirtyFormat = new[]
{
"word.word.{0}", "word.word.{0}",
"word-word-{0}", "word-word-{0}",
"word-word {0}", "word-word {0}"
}; };
foreach (var s in dirtyFormat) foreach (var s in dirtyFormat)
{ {
var dirty = string.Format(s, word); var dirty = string.Format(s, word);
dirty.CleanMovieTitle().Should().Be("wordword"); dirty.CleanMovieTitle().Should().Be("wordword" + word.ToLower());
} }
} }

@ -100,7 +100,7 @@ namespace NzbDrone.Core.Parser
// Regex to unbracket alternative titles. // Regex to unbracket alternative titles.
private static readonly Regex BracketedAlternativeTitleRegex = new Regex(@"(.*) \([ ]*AKA[ ]+(.*)\)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex BracketedAlternativeTitleRegex = new Regex(@"(.*) \([ ]*AKA[ ]+(.*)\)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])(a(?!$|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])|an|the|and|or|of)(?:\b|_))|\W|_", private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])(a(?!$|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])|an|the|and|or|of)(?!$)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$", private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$",

Loading…
Cancel
Save