@ -33,6 +33,25 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger = logging . getLogger ( __name__ )
# Check if any element from source array is contained partially or exactly in any element from target array
# Returns on the first match
def _contains_element ( _from = None , _in = None , exactly = False ) :
source_array = _from
target_array = _in
for source in source_array :
for target in target_array :
if exactly :
if sanitize ( source ) == sanitize ( target ) :
return True
else :
if sanitize ( source ) in sanitize ( target ) :
return True
return False
class TitulkySubtitle ( Subtitle ) :
""" Titulky.com subtitle """
@ -78,8 +97,9 @@ class TitulkySubtitle(Subtitle):
self . matches = None
# Try to parse S00E00 string from the main subtitle name
season_episode_string = re . findall ( r ' S( \ d+)E( \ d+) ' , self . names [ 0 ] ,
re . IGNORECASE )
season_episode_string = None
if len ( self . names ) > 0 :
season_episode_string = re . findall ( r ' S( \ d+)E( \ d+) ' , self . names [ 0 ] , re . IGNORECASE )
# If we did not search for subtitles with season and episode numbers in search query,
# try to parse it from the main subtitle name that most likely contains it
@ -103,11 +123,14 @@ class TitulkySubtitle(Subtitle):
sub_names = self . _remove_season_episode_string ( self . names )
if _type == ' episode ' :
# # EPISODE
# EPISODE
# match imdb_id of a series
if video . series_imdb_id and video . series_imdb_id == self . imdb_id :
matches . add ( ' series_imdb_id ' )
# NOTE: Is matches.add('series_imdb_id') doing anything?
# For now, let's match with the 'series' to not reject
# subs with no name but a correct imdb id.
matches . add ( ' series ' )
# match season/episode
if self . season and self . season == video . season :
@ -116,27 +139,28 @@ class TitulkySubtitle(Subtitle):
matches . add ( ' episode ' )
# match series name
series_names = [ video . series ] + video . alternative_series
logger . debug (
f " Titulky.com: Finding exact match between subtitle names { sub_names } and series names { series_names } "
)
if _contains_element ( _from = series_names ,
_in = sub_names ,
exactly = True ) :
matches . add ( ' series ' )
# match episode title
episode_titles = [ video . title ]
logger . debug (
f " Titulky.com: Finding exact match between subtitle names { sub_names } and episode titles { episode_titles } "
)
if _contains_element ( _from = episode_titles ,
_in = sub_names ,
exactly = True ) :
matches . add ( ' episode_title ' )
if len ( sub_names ) > 0 :
series_names = [ video . series ] + video . alternative_series
logger . debug (
f " Titulky.com: Finding exact match between subtitle names { sub_names } and series names { series_names } "
)
if _contains_element ( _from = series_names ,
_in = sub_names ,
exactly = True ) :
matches . add ( ' series ' )
# match episode title
episode_titles = [ video . title ]
logger . debug (
f " Titulky.com: Finding exact match between subtitle names { sub_names } and episode titles { episode_titles } "
)
if _contains_element ( _from = episode_titles ,
_in = sub_names ,
exactly = True ) :
matches . add ( ' episode_title ' )
elif _type == ' movie ' :
# # MOVIE
# MOVIE
# match imdb_id of a movie
if video . imdb_id and video . imdb_id == self . imdb_id :
@ -152,7 +176,7 @@ class TitulkySubtitle(Subtitle):
exactly = True ) :
matches . add ( ' title ' )
# # MOVIE OR EPISODE
# MOVIE OR EPISODE
# match year
if video . year and video . year == self . year :
@ -163,8 +187,7 @@ class TitulkySubtitle(Subtitle):
matches | = guess_matches ( video , guessit ( release , { " type " : _type } ) )
# If turned on in settings, then do not match if video FPS is not equal to subtitle FPS
if self . skip_wrong_fps and video . fps and self . fps and not framerate_equal (
video . fps , self . fps ) :
if self . skip_wrong_fps and video . fps and self . fps and not framerate_equal ( video . fps , self . fps ) :
logger . info ( f " Titulky.com: Skipping subtitle { self } : wrong FPS " )
matches . clear ( )
@ -213,16 +236,13 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
raise ConfigurationError ( " Username and password must be specified! " )
if type ( skip_wrong_fps ) is not bool :
raise ConfigurationError (
f " Skip_wrong_fps { skip_wrong_fps } must be a boolean! " )
raise ConfigurationError ( f " Skip_wrong_fps { skip_wrong_fps } must be a boolean! " )
if type ( approved_only ) is not bool :
raise ConfigurationError (
f " Approved_only { approved_only } must be a boolean! " )
raise ConfigurationError ( f " Approved_only { approved_only } must be a boolean! " )
if type ( multithreading ) is not bool :
raise ConfigurationError (
f " Multithreading { multithreading } must be a boolean! " )
raise ConfigurationError ( f " Multithreading { multithreading } must be a boolean! " )
self . username = username
self . password = password
@ -243,7 +263,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# Set headers
cached_user_agent = cache . get ( ' titulky_user_agent ' )
if cached_user_agent == NO_VALUE :
new_user_agent = AGENT_LIST [ randint ( 0 , len ( AGENT_LIST ) - 1 ) ]
new_user_agent = AGENT_LIST [ randint ( 0 , len ( AGENT_LIST ) - 1 ) ]
cache . set ( ' titulky_user_agent ' , new_user_agent )
self . session . headers [ ' User-Agent ' ] = new_user_agent
else :
@ -271,7 +291,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return True
logger . info ( " Titulky.com: Logging in... " )
data = { ' LoginName ' : self . username , ' LoginPassword ' : self . password }
res = self . session . post ( self . server_url ,
data ,
@ -312,22 +332,24 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
else :
raise AuthenticationError ( " Logout failed. " )
# GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies
# GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies
# and subsequent relogging and sending the original request again. If all went well, returns the response.
def get_request ( self , url , ref = server_url , recursion= 0 ) :
# That's deep... recursion... Stop. We don't have infinite memmory. And don't want to
def get_request ( self , url , ref = server_url , allow_redirects= False , _ recursion= 0 ) :
# That's deep... recursion... Stop. We don't have infinite memmory. And don't want to
# spam titulky's server either. So we have to just accept the defeat. Let it throw!
if recursion > = 5 :
logger . debug ( f " Titulky.com: Got into a loop while trying to send a request after relogging. " )
raise AuthenticationError ( " Got into a loop and couldn ' t get authenticated! " )
if _recursion > = 5 :
logger . debug (
f " Titulky.com: Got into a loop while trying to send a request after relogging. " )
raise AuthenticationError (
" Got into a loop and couldn ' t get authenticated! " )
logger . debug ( f " Titulky.com: Fetching url: { url } " )
res = self . session . get (
url ,
timeout = self . timeout ,
allow_redirects = False ,
headers = { ' Referer ' : quote ( ref ) if ref else None } ) # URL encode ref if it has value
allow_redirects = allow_redirects ,
headers = { ' Referer ' : quote ( ref ) if ref else None } ) # URL encode ref if it has value
# Check if we got redirected because login cookies expired.
# Note: microoptimization - don't bother parsing qs for non 302 responses.
@ -336,15 +358,12 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if location_qs [ ' msg_type ' ] [ 0 ] == ' e ' and " Přihlašte se " in location_qs [ ' msg ' ] [ 0 ] :
logger . debug ( f " Titulky.com: Login cookies expired. " )
self . login ( True )
return self . get_request ( url , ref = ref , recursion = ( recursion + 1 ) )
return res
return self . get_request ( url , ref = ref , _recursion = ( _recursion + 1 ) )
def fetch_page ( self , url , ref = None ) :
logger . debug ( f " Titulky.com: Fetching url: { url } " )
return res
res = self . get_request ( url , ref = ref )
def fetch_page ( self , url , ref = server_url , allow_redirects = False ) :
res = self . get_request ( url , ref = ref , allow_redirects = allow_redirects )
if res . status_code != 200 :
raise HTTPError ( f " Fetch failed with status code { res . status_code } " )
@ -353,13 +372,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return res . text
def build_ search_ url( self , params ) :
def build_ url( self , params ) :
result = f " { self . server_url } /? "
params [ ' action ' ] = ' search '
# Requires subtitle names to match full search keyword
params [ ' fsf ' ] = 1
for key , value in params . items ( ) :
result + = f ' { key } = { value } & '
@ -371,9 +386,39 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return result
# Makes sure the function communicates with the caller as expected. For threads, do not return data, but
# pass them via threads_data object. For synchronous calls, treat it normally, without any changes.
def capable_of_multithreading ( func ) :
def outer_func ( * args , * * kwargs ) :
if ' threads_data ' in kwargs and ' thread_id ' in kwargs :
if type ( kwargs [ ' threads_data ' ] ) is list and type ( kwargs [ ' thread_id ' ] ) is int :
try :
func_kwargs = kwargs . copy ( )
func_kwargs . pop ( ' threads_data ' , None )
func_kwargs . pop ( ' thread_id ' , None )
returnValue = func ( * args , * * func_kwargs )
kwargs [ ' threads_data ' ] [ kwargs [ ' thread_id ' ] ] = {
' return_value ' : returnValue ,
' exception ' : None
}
except BaseException as e :
kwargs [ ' threads_data ' ] [ kwargs [ ' thread_id ' ] ] = {
' return_value ' : None ,
' exception ' : e
}
raise e
else :
return func ( * args , * * kwargs )
return outer_func
# TODO: Parse name and alternative names of a series / movie
# Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year
def parse_details ( self , details_url , search_url ) :
html_src = self . fetch_page ( details_url , ref = search_url )
@capable_of_multithreading
def parse_details ( self , partial_info , ref_url = None ) :
html_src = self . fetch_page ( partial_info [ ' details_link ' ] , ref = ref_url )
details_page_soup = ParserBeautifulSoup ( html_src ,
[ ' lxml ' , ' html.parser ' ] )
@ -383,9 +428,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
logger . info ( " Titulky.com: Could not find details div container. Skipping. " )
return False
# ## IMDB ID
# IMDB ID
imdb_id = None
imdb_tag = details_ container . find ( ' a ' , attrs = { ' target ' : ' imdb ' } )
imdb_tag = details_ page_soup . find ( ' a ' , attrs = { ' target ' : re . compile ( r " imdb " , re . IGNORECASE ) } )
if imdb_tag :
imdb_url = imdb_tag . get ( ' href ' )
@ -394,20 +439,19 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not imdb_id :
logger . debug ( " Titulky.com: No IMDB ID supplied on details page. " )
# ## RELEASE
# RELEASE
release = None
release_tag = details_container . find ( ' div ' , class_ = ' releas ' )
if not release_tag :
raise ParseResponseError (
" Could not find release tag. Did the HTML source change? " )
raise ParseResponseError ( " Could not find release tag. Did the HTML source change? " )
release = release_tag . get_text ( strip = True )
if not release :
logger . debug ( " Titulky.com: No release information supplied on details page. " )
# ## LANGUAGE
# LANGUAGE
language = None
czech_flag = details_container . select ( ' img[src*= \' flag-CZ \' ] ' )
slovak_flag = details_container . select ( ' img[src*= \' flag-SK \' ] ' )
@ -420,7 +464,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not language :
logger . debug ( " Titulky.com: No language information supplied on details page. " )
# ## UPLOADER
# UPLOADER
uploader = None
uploader_tag = details_container . find ( ' div ' , class_ = ' ulozil ' )
@ -441,7 +485,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not uploader :
logger . debug ( " Titulky.com: No uploader name supplied on details page. " )
# ## FPS
# FPS
fps = None
fps_icon_tag_selection = details_container . select (
' img[src*= \' Movieroll \' ] ' )
@ -462,7 +506,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not fps :
logger . debug ( " Titulky.com: No fps supplied on details page. " )
# ## YEAR
# YEAR
year = None
h1_tag = details_container . find ( ' h1 ' , id = ' titulky ' )
@ -482,7 +526,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
details_page_soup = None
# Return the subtitle details
return {
info = {
' releases ' : [ release ] ,
' language ' : language ,
' uploader ' : uploader ,
@ -491,88 +536,141 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
' imdb_id ' : imdb_id
}
info . update ( partial_info )
return info
# Process a single row of subtitles from a query method
@capable_of_multithreading
def process_row ( self ,
row ,
video_names ,
search_url ,
thread_id = None ,
threads_data = None ) :
try :
# The first anchor tag is an image preview, the second is the name
anchor_tag = row . find_all ( ' a ' ) [ 1 ]
# The details link is relative, so we need to remove the dot at the beginning
details_link = f " { self . server_url } { anchor_tag . get ( ' href ' ) [ 1 : ] } "
id_match = re . findall ( r ' id=( \ d+) ' , details_link )
sub_id = id_match [ 0 ] if len ( id_match ) > 0 else None
download_link = f " { self . download_url } { sub_id } "
# Approved subtitles have a pbl1 class for their row, others have a pbl0 class
approved = True if ' pbl1 ' in row . get ( ' class ' ) else False
# Subtitle name + its alternative names
table_columns = row . findAll ( ' td ' )
main_sub_name = anchor_tag . get_text ( strip = True )
alt_sub_names = [
alt_sub_name . strip ( )
for alt_sub_name in table_columns [ 2 ] . string . split ( ' / ' )
] if table_columns [ 2 ] . string else [ ]
sub_names = [ main_sub_name ] + alt_sub_names
# Does at least one subtitle name contain one of the video names?
# Skip subtitles that do not match
# Video names -> the main title and alternative titles of a movie or an episode and so on...
# Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
# Could be handled in TitulkySubtitle class, however we want to keep the number of requests
# as low as possible and this prevents the from requesting the details page unnecessarily
if not _contains_element ( _from = video_names , _in = sub_names ) :
logger . info (
f " Titulky.com: Skipping subtitle with names: { sub_names } , because there was no match with video names: { video_names } "
)
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = {
' sub_info ' : None ,
' exception ' : None
}
return None
details = self . parse_details ( details_link , search_url )
if not details :
# Details parsing was NOT successful, skipping
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = {
' sub_info ' : None ,
' exception ' : None
}
return None
# Combine all subtitle data into one dict
result = {
' names ' : sub_names ,
' id ' : sub_id ,
' approved ' : approved ,
' details_link ' : details_link ,
' download_link ' : download_link
}
result . update ( details )
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = {
' sub_info ' : result ,
' exception ' : None
}
ref_url ) :
# The first anchor tag is an image preview, the second is the name
anchor_tag = row . find_all ( ' a ' ) [ 1 ]
# The details link is relative, so we need to remove the dot at the beginning
details_link = f " { self . server_url } { anchor_tag . get ( ' href ' ) [ 1 : ] } "
id_match = re . findall ( r ' id=( \ d+) ' , details_link )
sub_id = id_match [ 0 ] if len ( id_match ) > 0 else None
download_link = f " { self . download_url } { sub_id } "
# Approved subtitles have a pbl1 class for their row, others have a pbl0 class
approved = True if ' pbl1 ' in row . get ( ' class ' ) else False
# Subtitle name + its alternative names
table_columns = row . findAll ( ' td ' )
main_sub_name = anchor_tag . get_text ( strip = True )
alt_sub_names = [
alt_sub_name . strip ( )
for alt_sub_name in table_columns [ 2 ] . string . split ( ' / ' )
] if table_columns [ 2 ] . string else [ ]
sub_names = [ main_sub_name ] + alt_sub_names
# Does at least one subtitle name contain one of the video names?
# Skip subtitles that do not match
# Video names -> the main title and alternative titles of a movie or an episode and so on...
# Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
# Could be handled in TitulkySubtitle class, however we want to keep the number of requests
# as low as possible and this prevents the from requesting the details page unnecessarily
if not _contains_element ( _from = video_names , _in = sub_names ) :
logger . info (
f " Titulky.com: Skipping subtitle with names: { sub_names } , because there was no match with video names: { video_names } "
)
return None
partial_info = {
' names ' : sub_names ,
' id ' : sub_id ,
' approved ' : approved ,
' details_link ' : details_link ,
' download_link ' : download_link
}
details = self . parse_details ( partial_info , ref_url )
return details
#########
# FIXME: After switching from Multithreaded to singlethreaded option, the provider does not return
# any data and requires bazarr to restart in order to work again with this setting. However,
# switching back to multithreaded does NOT require bazarr to be restarded.
####
# Execute a func for each array member and return results. Handles async/sync side of things
def execute_foreach ( self , array , func , args = [ ] , kwargs = { } ) :
if not self . multithreading :
logger . info ( " Titulky.com: processing in sequence " )
result_array = [ ]
for i , obj in enumerate ( array ) :
passing_args = [ obj ] + args
return_value = func ( * passing_args , * * kwargs )
if return_value :
result_array . append ( return_value )
else :
logger . debug ( f " Titulky.com: No data returned, element number: { i } " )
return result_array
else :
logger . info ( f " Titulky.com: processing in parelell, { self . max_threads } elements at a time. " )
array_length = len ( array )
return details
except Exception as e :
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = { ' sub_info ' : None , ' exception ' : e }
threads = [ None ] * array_length
threads_data = [ None ] * array_length
raise e
# Process in parallel, self.max_threads at a time.
cycles = math . ceil ( array_length / self . max_threads )
for i in range ( cycles ) :
# Batch number i
starting_index = i * self . max_threads # Inclusive
ending_index = starting_index + self . max_threads # Non-inclusive
# Create threads for all elements in this batch
for j in range ( starting_index , ending_index ) :
# Check if j-th element exists
if j < array_length :
# Element number j
logger . debug ( f " Titulky.com: Creating thread { j } (batch: { i } ) " )
# Merge supplied kwargs with our dict
kwargs . update ( {
' thread_id ' : j ,
' threads_data ' : threads_data
} )
# Create a thread for element j and start it
threads [ j ] = Thread (
target = func ,
args = [ array [ j ] ] + args ,
kwargs = kwargs
)
threads [ j ] . start ( )
# Wait for all created threads to finish before moving to another batch of data
for j in range ( starting_index , ending_index ) :
# Check if j-th data exists
if j < array_length :
threads [ j ] . join ( )
result_array = [ ]
# Process the resulting data from all threads
for i in range ( len ( threads_data ) ) :
thread_data = threads_data [ i ]
# If the thread returned didn't communicate at all
if not thread_data :
raise ProviderError ( f " No communication from thread ID: { i } " )
# If an exception was raised in a thread, raise it again here
if ' exception ' in thread_data and thread_data [ ' exception ' ] :
logger . debug ( f " Titulky.com: An error occured while processing in the thread ID { i } " )
raise thread_data [ ' exception ' ]
if ' return_value ' in thread_data :
result_array . append ( thread_data [ ' return_value ' ] )
return result_array
# There are multiple ways to find subs from this provider:
# \\ Using self.query function: "Universal search" //
# 1. SEARCH by sub title
# - parameter: .................. Fulltext=<SUB NAME>
# 2. SEARCH by imdb id
@ -590,10 +688,120 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# - parameter: .................. ASchvalene=<('1' for approved only | '-0' for subs awaiting approval | '' for all)>
# - redirects should NOT be allowed here
#
# 8. BROWSE subtitles by IMDB ID
# \\ Using self.browse function: "Episode search" //
# 8. BROWSE subtitles by IMDB ID (only episodes)
# - Subtitles are here categorised by seasons and episodes
# - URL: https://premium.titulky.com/?action=serial&step=<SEASON>&id=<IMDB ID>
# - it seems that the url redirects to a page with their own internal ID, redirects should be allowed here
# Special search only for episodes. Complements the query method of searching.
def browse_episodes ( self ,
imdb_id = None ,
season = None ,
episode = None ) :
params = {
' action ' : ' serial ' ,
' step ' : season ,
# Remove the "tt" prefix
' id ' : imdb_id [ 2 : ]
}
browse_url = self . build_url ( params )
html_src = self . fetch_page ( browse_url , allow_redirects = True )
browse_page_soup = ParserBeautifulSoup (
html_src , [ ' lxml ' , ' html.parser ' ] )
# Container element containing subtitle div rows, None if the series was not found or similar
container = browse_page_soup . find ( ' form ' , class_ = ' cloudForm ' )
# No container with subtitles
if not container :
logger . debug ( " Titulky.com: Could not find container element. No subtitles found. " )
return [ ]
# All rows: subtitle rows, episode number rows, useless rows... Gotta filter this out.
all_rows = container . find_all ( ' div ' , class_ = ' row ' )
# Filtering and parsing rows
episodes_dict = { }
last_ep_num = None
for row in all_rows :
# This element holds the episode number of following row(s) of subtitles
# E.g.: 1., 2., 3., 4.
episode_num = row . find ( ' h5 ' )
# Link to the sub details
details_anchor = row . find ( ' a ' ) if ' pbl1 ' in row [ ' class ' ] or ' pbl0 ' in row [ ' class ' ] else None
if episode_num :
# The row is a div with episode number as its text content
try :
# Remove period at the end and parse the string into a number
number = int ( episode_num . string . replace ( ' . ' , ' ' ) )
last_ep_num = number
except :
logger . debug ( " Titulky.com: An error during parsing episode number! " )
raise ProviderError ( " Could not parse episode number! " )
elif details_anchor :
# The row is a subtitles row. Contains link to details page
if not last_ep_num :
logger . debug ( " Titulky.com: No previous episode number! " )
raise ProviderError ( " Previous episode number missing, can ' t parse. " )
# If this row contains the first subtitles to an episode number,
# add an empty array into the episodes dict at its place.
if not last_ep_num in episodes_dict :
episodes_dict [ last_ep_num ] = [ ]
details_link = f " { self . server_url } { details_anchor . get ( ' href ' ) [ 1 : ] } "
id_match = re . findall ( r ' id=( \ d+) ' , details_link )
sub_id = id_match [ 0 ] if len ( id_match ) > 0 else None
download_link = f " { self . download_url } { sub_id } "
# Approved subtitles have a pbl1 class for their row, others have a pbl0 class
approved = True if ' pbl1 ' in row . get ( ' class ' ) else False
result = {
' names ' : [ ] ,
' id ' : sub_id ,
' approved ' : approved ,
' details_link ' : details_link ,
' download_link ' : download_link
}
episodes_dict [ last_ep_num ] . append ( result )
# Rows parsed into episodes_dict, now lets read what we got.
if not episode in episodes_dict :
# well, we got nothing, that happens!
logger . debug ( " Titulky.com: No subtitles found " )
return [ ]
# Lets parse more details about subtitles that we actually care about
subtitle_details = self . execute_foreach ( episodes_dict [ episode ] , self . parse_details )
# After parsing, create new instances of Subtitle class
subtitles = [ ]
for sub_info in subtitle_details :
subtitle_instance = self . subtitle_class (
sub_info [ ' id ' ] ,
imdb_id ,
sub_info [ ' language ' ] ,
sub_info [ ' names ' ] ,
season ,
episode ,
sub_info [ ' year ' ] ,
sub_info [ ' releases ' ] ,
sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] ,
sub_info [ ' approved ' ] ,
sub_info [ ' details_link ' ] ,
sub_info [ ' download_link ' ] ,
skip_wrong_fps = self . skip_wrong_fps ,
asked_for_episode = True )
subtitles . append ( subtitle_instance )
return subtitles
# Universal search for subtitles. Searches both episodes and movies.
def query ( self ,
language ,
video_names ,
@ -603,8 +811,12 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
season = None ,
episode = None ,
imdb_id = None ) :
## Build the search URL
params = { }
# Build the search URL
params = {
' action ' : ' search ' ,
# Requires subtitle names to match full search keyword
' fsf ' : 1
}
# Keyword
if keyword :
@ -641,9 +853,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
else :
params [ ' ASchvalene ' ] = ' '
search_url = self . build_ search_ url( params )
search_url = self . build_ url( params )
# # Search results page parsing
# Search results page parsing
html_src = self . fetch_page ( search_url )
search_page_soup = ParserBeautifulSoup ( html_src ,
[ ' lxml ' , ' html.parser ' ] )
@ -669,138 +881,32 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
table_body = table . find ( ' tbody ' )
if not table_body :
logger . debug ( " Titulky.com: Could not find table body " )
raise ParseResponseError (
" Could not find table body. Did the HTML source change? " )
raise ParseResponseError ( " Could not find table body. Did the HTML source change? " )
# # Loop over all subtitles on the first page and put them in a list
# Loop over all subtitles on the first page and put them in a list
subtitles = [ ]
rows = table_body . find_all ( ' tr ' )
if not self . multithreading :
# Process the rows sequentially
logger . info ( " Titulky.com: processing results in sequence " )
for i , row in enumerate ( rows ) :
sub_info = self . process_row ( row , video_names , search_url )
# If subtitle info was returned, then everything was okay
# and we can instationate it and add it to the list
if sub_info :
logger . debug (
f " Titulky.com: Sucessfully retrieved subtitle info, row: { i } "
)
# If we found the subtitle by IMDB ID, no need to get it from details page
sub_imdb_id = imdb_id or sub_info [ ' imdb_id ' ]
subtitle_instance = self . subtitle_class (
sub_info [ ' id ' ] ,
sub_imdb_id ,
sub_info [ ' language ' ] ,
sub_info [ ' names ' ] ,
season ,
episode ,
sub_info [ ' year ' ] ,
sub_info [ ' releases ' ] ,
sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] ,
sub_info [ ' approved ' ] ,
sub_info [ ' details_link ' ] ,
sub_info [ ' download_link ' ] ,
skip_wrong_fps = self . skip_wrong_fps ,
asked_for_episode = ( type == ' episode ' ) )
subtitles . append ( subtitle_instance )
else :
# No subtitle info was returned, i. e. something unexpected
# happend during subtitle details page fetching and processing.
logger . debug ( f " Titulky.com: No subtitle info retrieved, row: { i } " )
else :
# Process the rows in paralell
logger . info (
f " Titulky.com: processing results in parelell, { self . max_threads } rows at a time. "
)
threads = [ None ] * len ( rows )
threads_data = [ None ] * len ( rows )
# Process rows in parallel, self.max_threads at a time.
cycles = math . ceil ( len ( rows ) / self . max_threads )
for i in range ( cycles ) :
# Batch number i
starting_index = i * self . max_threads # Inclusive
ending_index = starting_index + self . max_threads # Non-inclusive
# Create threads for all rows in this batch
for j in range ( starting_index , ending_index ) :
# Check if j-th row exists
if j < len ( rows ) :
# Row number j
logger . debug (
f " Titulky.com: Creating thread { j } (batch: { i } ) " )
# Create a thread for row j and start it
threads [ j ] = Thread (
target = self . process_row ,
args = [ rows [ j ] , video_names , search_url ] ,
kwargs = {
' thread_id ' : j ,
' threads_data ' : threads_data
} )
threads [ j ] . start ( )
# Wait for all created threads to finish before moving to another batch of rows
for j in range ( starting_index , ending_index ) :
# Check if j-th row exists
if j < len ( rows ) :
threads [ j ] . join ( )
# Process the resulting data from all threads
for i in range ( len ( threads_data ) ) :
thread_data = threads_data [ i ]
# If the thread returned didn't return anything, but expected a dict object
if not thread_data :
raise ProviderError ( f " No data returned from thread ID: { i } " )
# If an exception was raised in a thread, raise it again here
if ' exception ' in thread_data and thread_data [ ' exception ' ] :
logger . debug (
f " Titulky.com: An error occured while processing a row in the thread ID { i } "
)
raise thread_data [ ' exception ' ]
# If the thread returned a subtitle info, great, instantiate it and add it to the list
if ' sub_info ' in thread_data and thread_data [ ' sub_info ' ] :
# Instantiate the subtitle object
logger . debug (
f " Titulky.com: Sucessfully retrieved subtitle info, thread ID: { i } "
)
sub_info = thread_data [ ' sub_info ' ]
# If we found the subtitle by IMDB ID, no need to get it from details page
sub_imdb_id = imdb_id or sub_info [ ' imdb_id ' ]
subtitle_instance = self . subtitle_class (
sub_info [ ' id ' ] ,
sub_imdb_id ,
sub_info [ ' language ' ] ,
sub_info [ ' names ' ] ,
season ,
episode ,
sub_info [ ' year ' ] ,
sub_info [ ' releases ' ] ,
sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] ,
sub_info [ ' approved ' ] ,
sub_info [ ' details_link ' ] ,
sub_info [ ' download_link ' ] ,
skip_wrong_fps = self . skip_wrong_fps ,
asked_for_episode = ( type == ' episode ' ) )
subtitles . append ( subtitle_instance )
else :
# The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
# happend during subtitle details page fetching and processing.
logger . debug (
f " Titulky.com: No subtitle info retrieved, thread ID: { i } "
)
for sub_info in self . execute_foreach ( rows , self . process_row , args = [ video_names , search_url ] ) :
# If we found the subtitle by IMDB ID, no need to get it from details page
sub_imdb_id = imdb_id or sub_info [ ' imdb_id ' ]
subtitle_instance = self . subtitle_class (
sub_info [ ' id ' ] ,
sub_imdb_id ,
sub_info [ ' language ' ] ,
sub_info [ ' names ' ] ,
season ,
episode ,
sub_info [ ' year ' ] ,
sub_info [ ' releases ' ] ,
sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] ,
sub_info [ ' approved ' ] ,
sub_info [ ' details_link ' ] ,
sub_info [ ' download_link ' ] ,
skip_wrong_fps = self . skip_wrong_fps ,
asked_for_episode = ( type == ' episode ' ) )
subtitles . append ( subtitle_instance )
# Clean up
search_page_soup . decompose ( )
@ -814,20 +920,28 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
subtitles = [ ]
# Possible paths:
# (0) Special for episodes: Browse TV Series page and search for subtitles
# (1) Search by IMDB ID [and season/episode for tv series]
# (2) Search by keyword: video (title|series) [and season/episode for tv series]
# (3) Search by keyword: video series + S00E00 (tv series only)
for language in languages :
if isinstance ( video , Episode ) :
video_names = [ video . series , video . title
] + video . alternative_series
video_names = [ video . series , video . title ] + video . alternative_series
# (0)
if video . series_imdb_id :
logger . info ( " Titulky.com: Finding subtitles by browsing TV Series page (0) " )
partial_subs = self . browse_episodes ( imdb_id = video . series_imdb_id ,
season = video . season ,
episode = video . episode )
if ( len ( partial_subs ) > 0 ) :
subtitles + = partial_subs
continue
# (1)
logger . info (
" Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1) "
)
if video . series_imdb_id :
logger . info ( " Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1) " )
partial_subs = self . query ( language ,
video_names ,
' episode ' ,
@ -839,9 +953,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
continue
# (2)
logger . info (
" Titulky.com: Finding subtitles by keyword, Season and Episode (2) "
)
logger . info ( " Titulky.com: Finding subtitles by keyword, Season and Episode (2) " )
keyword = video . series
partial_subs = self . query ( language ,
video_names ,
@ -865,8 +977,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
video_names = [ video . title ] + video . alternative_titles
# (1)
logger . info ( " Titulky.com: Finding subtitles by IMDB ID (1) " )
if video . imdb_id :
logger . info ( " Titulky.com: Finding subtitles by IMDB ID (1) " )
partial_subs = self . query ( language ,
video_names ,
' movie ' ,
@ -892,46 +1004,25 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
try :
res . raise_for_status ( )
except :
raise HTTPError (
f " An error occured during the download request to { subtitle . download_link } "
)
raise HTTPError ( f " An error occured during the download request to { subtitle . download_link } " )
archive_stream = io . BytesIO ( res . content )
archive = None
if rarfile . is_rarfile ( archive_stream ) :
logger . debug ( " Titulky.com: Identified rar archive " )
archive = rarfile . RarFile ( archive_stream )
subtitle_content = self . get_subtitle_from_archive ( subtitle , archive )
subtitle_content = self . get_subtitle_from_archive (
subtitle , archive )
elif zipfile . is_zipfile ( archive_stream ) :
logger . debug ( " Titulky.com: Identified zip archive " )
archive = zipfile . ZipFile ( archive_stream )
subtitle_content = self . get_subtitle_from_archive ( subtitle , archive )
subtitle_content = self . get_subtitle_from_archive (
subtitle , archive )
else :
subtitle_content = fix_line_ending ( res . content )
if not subtitle_content :
logger . debug (
" Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded. "
)
raise DownloadLimitExceeded (
" Subtitles download limit has been exceeded " )
logger . debug ( " Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded. " )
raise DownloadLimitExceeded ( " Subtitles download limit has been exceeded " )
subtitle . content = subtitle_content
# Check if any element from source array is contained partially or exactly in any element from target array
# Returns on the first match
def _contains_element ( _from = None , _in = None , exactly = False ) :
source_array = _from
target_array = _in
for source in source_array :
for target in target_array :
if exactly :
if sanitize ( source ) == sanitize ( target ) :
return True
else :
if sanitize ( source ) in sanitize ( target ) :
return True
return False