Merge pull request #3071 from rigtorp/tvdb-normalize

Make tvdb name normalizer unicode aware
pull/3323/head
dkanada 4 years ago committed by GitHub
commit 91fcd56380
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -274,16 +274,6 @@ namespace MediaBrowser.Providers.Plugins.TheTvdb
.ToList();
}
/// <summary>
/// The remove.
/// </summary>
const string remove = "\"'!`?";
/// <summary>
/// The spacers.
/// </summary>
const string spacers = "/,.:;\\(){}[]+-_=*"; // (there are two types of dashes, short and long)
/// <summary>
/// Gets the name of the comparable.
/// </summary>
@ -293,33 +283,11 @@ namespace MediaBrowser.Providers.Plugins.TheTvdb
{
name = name.ToLowerInvariant();
name = name.Normalize(NormalizationForm.FormKD);
var sb = new StringBuilder();
foreach (var c in name)
{
if (c >= 0x2B0 && c <= 0x0333)
{
// skip char modifier and diacritics
}
else if (remove.IndexOf(c) > -1)
{
// skip chars we are removing
}
else if (spacers.IndexOf(c) > -1)
{
sb.Append(" ");
}
else if (c == '&')
{
sb.Append(" and ");
}
else
{
sb.Append(c);
}
}
sb.Replace(", the", string.Empty).Replace("the ", " ").Replace(" the ", " ");
return Regex.Replace(sb.ToString().Trim(), @"\s+", " ");
name = name.Replace(", the", string.Empty).Replace("the ", " ").Replace(" the ", " ");
name = name.Replace("&", " and " );
name = Regex.Replace(name, @"[\p{Lm}\p{Mn}]", string.Empty); // Remove diacritics, etc
name = Regex.Replace(name, @"[\W\p{Pc}]+", " "); // Replace sequences of non-word characters and _ with " "
return name.Trim();
}
private void MapSeriesToResult(MetadataResult<Series> result, TvDbSharper.Dto.Series tvdbSeries, string metadataLanguage)

Loading…
Cancel
Save