@ -5,6 +5,7 @@ using System.Linq;
using System.Net ;
using System.Net ;
using System.Threading ;
using System.Threading ;
using System.Threading.Tasks ;
using System.Threading.Tasks ;
using System.Text.RegularExpressions ;
using MediaBrowser.Common.Net ;
using MediaBrowser.Common.Net ;
using MediaBrowser.Controller.Library ;
using MediaBrowser.Controller.Library ;
using MediaBrowser.Controller.Providers ;
using MediaBrowser.Controller.Providers ;
@ -19,6 +20,20 @@ namespace MediaBrowser.Providers.Tmdb.Movies
public class TmdbSearch
public class TmdbSearch
{
{
private static readonly CultureInfo EnUs = new CultureInfo ( "en-US" ) ;
private static readonly CultureInfo EnUs = new CultureInfo ( "en-US" ) ;
private static readonly Regex cleanEnclosed = new Regex ( @"\p{Ps}.*\p{Pe}" , RegexOptions . Compiled ) ;
private static readonly Regex cleanNonWord = new Regex ( @"[\W_]+" , RegexOptions . Compiled ) ;
private static readonly Regex cleanStopWords = new Regex ( @ "\b( # Start at word boundary
19 [ 0 - 9 ] { 2 } | 20 [ 0 - 9 ] { 2 } | # 1900 - 2099
S [ 0 - 9 ] { 2 } | # Season
E [ 0 - 9 ] { 2 } | # Episode
( 2160 | 1080 | 720 | 576 | 480 ) [ ip ] ? | # Resolution
[xh] ? 264 | # Encoding
( web | dvd | bd | hdtv | hd ) rip | # * Rip
web | hdtv | mp4 | bluray | ktr | dl | single | imageset | internal | doku | dubbed | retail | xxx | flac
) . * # Match rest of string ",
RegexOptions . Compiled | RegexOptions . IgnorePatternWhitespace | RegexOptions . IgnoreCase ) ;
private const string Search3 = TmdbUtils . BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}" ;
private const string Search3 = TmdbUtils . BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}" ;
private readonly ILogger _logger ;
private readonly ILogger _logger ;
@ -61,19 +76,18 @@ namespace MediaBrowser.Providers.Tmdb.Movies
var tmdbImageUrl = tmdbSettings . images . GetImageUrl ( "original" ) ;
var tmdbImageUrl = tmdbSettings . images . GetImageUrl ( "original" ) ;
if ( ! string . IsNullOrWhiteSpace ( name ) )
// Does this mean we are reparsing already parsed ItemLookupInfo?
{
var parsedName = _libraryManager . ParseName ( name ) ;
var parsedName = _libraryManager . ParseName ( name ) ;
var yearInName = parsedName . Year ;
var yearInName = parsedName . Year ;
name = parsedName . Name ;
name = parsedName . Name ;
year = year ? ? yearInName ;
year = year ? ? yearInName ;
}
_logger . LogInformation ( " MovieDbProvider: Finding id for item: " + name ) ;
_logger . LogInformation ( "TmdbSearch: Finding id for item: {0} ({1})" , name , year ) ;
var language = idInfo . MetadataLanguage . ToLowerInvariant ( ) ;
var language = idInfo . MetadataLanguage . ToLowerInvariant ( ) ;
//nope - search for it
// Replace sequences of non-word characters with space
//var searchType = item is BoxSet ? "collection" : "movie";
// TMDB expects a space separated list of words make sure that is the case
name = cleanNonWord . Replace ( name , " " ) . Trim ( ) ;
var results = await GetSearchResults ( name , searchType , year , language , tmdbImageUrl , cancellationToken ) . ConfigureAwait ( false ) ;
var results = await GetSearchResults ( name , searchType , year , language , tmdbImageUrl , cancellationToken ) . ConfigureAwait ( false ) ;
@ -86,36 +100,35 @@ namespace MediaBrowser.Providers.Tmdb.Movies
}
}
}
}
// Ideally retrying alternatives should be done outside the search
// provider so that the retry logic can be common for all search
// providers
if ( results . Count = = 0 )
if ( results . Count = = 0 )
{
{
// try with dot and _ turned to space
name = parsedName . Name ;
var originalName = name ;
// Remove things enclosed in []{}() etc
name = name . Replace ( "," , " " ) ;
name = cleanEnclosed . Replace ( name , string . Empty ) ;
name = name . Replace ( "." , " " ) ;
name = name . Replace ( "_" , " " ) ;
name = name . Replace ( "-" , " " ) ;
name = name . Replace ( "!" , " " ) ;
name = name . Replace ( "?" , " " ) ;
var parenthIndex = name . IndexOf ( '(' ) ;
if ( parenthIndex ! = - 1 )
{
name = name . Substring ( 0 , parenthIndex ) ;
}
// Replace sequences of non-word characters with space
name = cleanNonWord . Replace ( name , " " ) ;
// Clean based on common stop words / tokens
name = cleanStopWords . Replace ( name , string . Empty ) ;
// Trim whitespace
name = name . Trim ( ) ;
name = name . Trim ( ) ;
// Search again if the new name is different
// Search again if the new name is different
if ( ! string . Equals ( name , originalName ) )
if ( ! string . Equals ( name , parsedName. Name ) & & ! string . IsNullOrWhiteSpace ( n ame) )
{
{
_logger . LogInformation ( "TmdbSearch: Finding id for item: {0} ({1})" , name , year ) ;
results = await GetSearchResults ( name , searchType , year , language , tmdbImageUrl , cancellationToken ) . ConfigureAwait ( false ) ;
results = await GetSearchResults ( name , searchType , year , language , tmdbImageUrl , cancellationToken ) . ConfigureAwait ( false ) ;
if ( results . Count = = 0 & & ! string . Equals ( language , "en" , StringComparison . OrdinalIgnoreCase ) )
if ( results . Count = = 0 & & ! string . Equals ( language , "en" , StringComparison . OrdinalIgnoreCase ) )
{
{
//one more time, in english
//one more time, in english
results = await GetSearchResults ( name , searchType , year , "en" , tmdbImageUrl , cancellationToken ) . ConfigureAwait ( false ) ;
results = await GetSearchResults ( name , searchType , year , "en" , tmdbImageUrl , cancellationToken ) . ConfigureAwait ( false ) ;
}
}
}
}
}
}