@ -36,10 +36,10 @@ class TitulkySubtitle(Subtitle):
hash_verifiable = False
hearing_impaired_verifiable = False
def __init__ ( self , sub_id , language , titl e, year , release_info , fps , uploader , approved , page_link , download_link , season = None , episode = None , skip_wrong_fps = False ) :
def __init__ ( self , sub_id , language , names, season , episod e, year , release_info , fps , uploader , approved , page_link , download_link , skip_wrong_fps = False ) :
super ( ) . __init__ ( language , page_link = page_link )
self . title = title
self . names = names
self . year = year
self . sub_id = sub_id
self . fps = fps
@ -73,21 +73,17 @@ class TitulkySubtitle(Subtitle):
if self . episode and self . episode == video . episode :
matches . add ( ' episode ' )
if self . season is None and self . episode is None :
matches . add ( ' episode ' )
if sanitize ( f " S { video . season : 02d } E { video . episode : 02d } " ) in sanitize ( self . title ) :
matches . add ( ' season ' )
matches . add ( ' episode ' )
if video . series and sanitize ( video . series ) in sanitize ( self . title ) :
name_matches = [ video . series and sanitize ( name ) in sanitize ( video . series ) for name in self . names ]
if any ( name_matches ) :
matches . add ( ' series ' )
elif _type == ' movie ' :
## MOVIE
if video . title and sanitize ( video . title ) in sanitize ( self . title ) :
matches . add ( ' title ' )
name_matches = [ video . title and sanitize ( name ) in sanitize ( video . title ) for name in self . names ]
if any ( name_matches ) :
matches . add ( ' title ' )
## MOVIE OR EPISODE
if video . year and video . year == self . year :
matches . add ( ' year ' )
@ -223,68 +219,90 @@ class TitulkyProvider(Provider):
return result
# Parse details of an individual subtitle: title, year, language, etc.
# Parse details of an individual subtitle: release, language, uploader, fps and year
def parse_details ( self , url ) :
html_src = self . fetch_page ( url )
details_page_soup = ParserBeautifulSoup ( html_src , [ ' lxml ' , ' html.parser ' ] )
details_container = details_page_soup . find ( ' div ' , class_ = ' detail ' )
if not details_container :
# The subtitles were removed and got redirected to a different page. Better treat this silently.
logger . debug ( " Titulky.com: Could not find details div container. Skipping. " )
return False
### TITLE AND YEAR
h1_tag = details_container . find ( ' h1 ' , id = ' titulky ' )
if not h1_tag :
logger . debug ( " Titulky.com: Could not find h1 tag. Skipping. " )
return False
# The h1 tag contains the title of the subtitle and year
h1_texts = [ text . strip ( ) for text in h1_tag . stripped_strings ]
if len ( h1_texts ) < 1 :
logger . debug ( " Titulky.com: The header tag did not include sufficient data. Skipping. " )
return False
title = h1_texts [ 0 ]
year = int ( h1_texts [ 1 ] ) if len ( h1_texts ) > 1 else None
### UPLOADER
uploader_tag = details_container . find ( ' div ' , class_ = ' ulozil ' )
if not uploader_tag :
logger . debug ( " Titulky.com: Could not find uploader tag. Skipping. " )
return False
uploader_anchor_tag = uploader_tag . find ( ' a ' )
if not uploader_anchor_tag :
logger . debug ( " Titulky.com: Could not find uploader anchor tag. Skipping. " )
return False
uploader = uploader_anchor_tag . string . strip ( )
### RELEASE
release = None
release_tag = details_container . find ( ' div ' , class_ = ' releas ' )
if not release_tag :
logger . debug ( " Titulky.com: Could not find releas tag. Skipping. " )
return False
raise Error ( " Could not find release tag. Did the HTML source change? " )
release = release_tag . get_text ( strip = True )
if not release :
logger . info ( " Titulky.com: No release information supplied on details page. " )
### LANGUAGE
language = None
czech_flag = details_container . select ( ' img[src*= \' flag-CZ \' ] ' )
slovak_flag = details_container . select ( ' img[src*= \' flag-SK \' ] ' )
if czech_flag and not slovak_flag :
language = Language ( ' ces ' )
elif slovak_flag and not czech_flag :
language = Language ( ' slk ' )
if not language :
logger . debug ( " Titulky.com: No language information supplied on details page. " )
### UPLOADER
uploader = None
uploader_tag = details_container . find ( ' div ' , class_ = ' ulozil ' )
if not uploader_tag :
raise Error ( " Could not find uploader tag. Did the HTML source change? " )
uploader_anchor_tag = uploader_tag . find ( ' a ' )
if not uploader_anchor_tag :
raise Error ( " Could not find uploader anchor tag. Did the HTML source change? " )
uploader = uploader_anchor_tag . string . strip ( ) if uploader_anchor_tag else None
if not uploader :
logger . debug ( " Titulky.com: No uploader name supplied on details page. " )
### FPS
fps = None
fps_icon_tag_selection = details_container . select ( ' img[src*= \' Movieroll \' ] ' )
if len ( fps_icon_tag_selection ) > 0 and hasattr ( fps_icon_tag_selection [ 0 ] , ' parent ' ) :
fps_icon_tag = fps_icon_tag_selection [ 0 ]
parent_text = fps_icon_tag . parent . get_text ( strip = True )
match = re . findall ( ' ( \ d+, \ d+) fps ' , parent_text )
if not fps_icon_tag_selection and not hasattr ( fps_icon_tag_selection [ 0 ] , ' parent ' ) :
raise Error ( " Could not find parent of the fps icon tag. Did the HTML source change? " )
fps_icon_tag = fps_icon_tag_selection [ 0 ]
parent_text = fps_icon_tag . parent . get_text ( strip = True )
match = re . findall ( ' ( \ d+, \ d+) fps ' , parent_text )
# If the match is found, change the decimal separator to a dot and convert to float
fps = float ( match [ 0 ] . replace ( ' , ' , ' . ' ) ) if len ( match ) > 0 else None
# If the match is found, change the decimal separator to a dot and convert to float
fps = float ( match [ 0 ] . replace ( ' , ' , ' . ' ) ) if len ( match ) > 0 else None
if not fps :
logger . debug ( " Titulky.com: No fps supplied on details page. " )
### YEAR
year = None
h1_tag = details_container . find ( ' h1 ' , id = ' titulky ' )
if not h1_tag :
raise Error ( " Could not find h1 tag. Did the HTML source change? " )
# The h1 tag contains the name of the subtitle and a year
h1_texts = [ text for text in h1_tag . stripped_strings ]
year = int ( h1_texts [ 1 ] ) if len ( h1_texts ) > 1 else None
if not year :
logger . debug ( " Titulky.com: No year supplied on details page. " )
# Clean up
details_page_soup . decompose ( )
@ -292,19 +310,17 @@ class TitulkyProvider(Provider):
# Return the subtitle details
return {
' title ' : title ,
' year ' : year ,
' uploader ' : uploader ,
' release ' : release ,
' language ' : language ,
' fps ' : fps
' uploader ' : uploader ,
' fps ' : fps ,
' year ' : year
}
def process_row ( self , row , keyword , thread_id = None , threads_data = None ) :
try :
# The first anchor tag is an image preview, the second is the titl e
# The first anchor tag is an image preview, the second is the nam e
anchor_tag = row . find_all ( ' a ' ) [ 1 ]
title = anchor_tag . string . strip ( )
# The details link is relative, so we need to remove the dot at the beginning
details_link = f " { self . server_url } { anchor_tag . get ( ' href ' ) [ 1 : ] } "
id_match = re . findall ( ' id=( \ d+) ' , details_link )
@ -314,9 +330,19 @@ class TitulkyProvider(Provider):
# Approved subtitles have a pbl1 class for their row, others have a pbl0 class
approved = True if ' pbl1 ' in row . get ( ' class ' ) else False
# Skip subtitles that do not contain keyword in their title
if keyword and sanitize ( keyword ) not in sanitize ( title ) :
logger . debug ( f " Titulky.com: Skipping subtitle ' { title } ' because it does not contain the keyword ' { keyword } ' " )
# Name + alternative names
table_columns = row . findAll ( " td " )
main_name = anchor_tag . get_text ( strip = True )
alt_names = [ alt_name . strip ( ) for alt_name in table_columns [ 2 ] . get_text ( strip = True ) . split ( " / " ) ]
names = [ main_name ] + alt_names
# Loop over all subtitle names and check if the keyword contains them
name_matches = [ keyword and sanitize ( keyword ) not in sanitize ( name ) for name in names ]
# Skip subtitles that do not contain the keyword in their name(s)
if keyword and all ( name_matches ) is False :
logger . debug ( f " Titulky.com: Skipping subtitle with names: ' { names } ' , because it does not not contain the keyword: ' { keyword } ' " )
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = {
' sub_info ' : None ,
@ -336,22 +362,25 @@ class TitulkyProvider(Provider):
return None
# Return additional data besides the subtitle details
details [ ' id ' ] = sub_id
details [ ' approved ' ] = approved
details [ ' details_link ' ] = details_link
details [ ' download_link ' ] = download_link
# Combine all subtitle data into one dict
result = {
' names ' : names ,
' id ' : sub_id ,
' approved ' : approved ,
' details_link ' : details_link ,
' download_link ' : download_link
}
result . update ( details )
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = {
' sub_info ' : details ,
' sub_info ' : result ,
' exception ' : None
}
return details
except :
e = Error ( " Whoops, something unexpected happend while fetching or parsing details page. " )
except Exception as e :
if type ( threads_data ) is list and type ( thread_id ) is int :
threads_data [ thread_id ] = {
' sub_info ' : None ,
@ -362,7 +391,7 @@ class TitulkyProvider(Provider):
# There are multiple ways to find subs from this provider:
# 1. SEARCH by sub title
# - parameter: .................. Fulltext=<SUB TITL E>
# - parameter: .................. Fulltext=<SUB NAM E>
# 2. SEARCH by imdb id
# - parameter: .................. IMDB=<IMDB ID>
# 3. SEARCH by season/episode
@ -460,8 +489,18 @@ class TitulkyProvider(Provider):
# and we can instationate it and add it to the list
if sub_info :
logger . debug ( f " Titulky.com: Sucessfully retrieved subtitle info, row: { i } " )
subtitle_instance = self . subtitle_class ( sub_info [ ' id ' ] , sub_info [ ' language ' ] , sub_info [ ' title ' ] , sub_info [ ' year ' ] , sub_info [ ' release ' ] , sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] , sub_info [ ' approved ' ] , sub_info [ ' details_link ' ] , sub_info [ ' download_link ' ] , season = season , episode = episode , skip_wrong_fps = self . skip_wrong_fps )
# Try to parse S00E00 string from the main subtitle name
sub_season = None
sub_episode = None
season_episode_string = re . findall ( ' S( \ d+)E( \ d+) ' , sub_info [ ' names ' ] [ 0 ] , re . IGNORECASE )
if season_episode_string :
sub_season = season_episode_string [ 0 ] [ 0 ]
sub_episode = season_episode_string [ 0 ] [ 1 ]
subtitle_instance = self . subtitle_class ( sub_info [ ' id ' ] , sub_info [ ' language ' ] , sub_info [ ' names ' ] , sub_season , sub_episode , sub_info [ ' year ' ] , sub_info [ ' release ' ] , sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] , sub_info [ ' approved ' ] , sub_info [ ' details_link ' ] , sub_info [ ' download_link ' ] , skip_wrong_fps = self . skip_wrong_fps )
subtitles . append ( subtitle_instance )
else :
# No subtitle info was returned, i. e. something unexpected
@ -506,8 +545,8 @@ class TitulkyProvider(Provider):
raise Error ( f " No data returned from thread ID: { i } " )
# If an exception was raised in a thread, raise it again here
if " exception " in thread_data and thread_data [ " exception " ] :
logger . debug ( f " Titulky.com: An error occured in a thread ID: { i } " )
if ' exception ' in thread_data and thread_data [ ' exception ' ] :
logger . debug ( f " Titulky.com: An error occured while process ing a row in the thread ID { i } " )
raise thread_data [ ' exception ' ]
# If the thread returned a subtitle info, great, instantiate it and add it to the list
@ -515,8 +554,17 @@ class TitulkyProvider(Provider):
# Instantiate the subtitle object
logger . debug ( f " Titulky.com: Sucessfully retrieved subtitle info, thread ID: { i } " )
sub_info = thread_data [ ' sub_info ' ]
subtitle_instance = self . subtitle_class ( sub_info [ ' id ' ] , sub_info [ ' language ' ] , sub_info [ ' title ' ] , sub_info [ ' year ' ] , sub_info [ ' release ' ] , sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] , sub_info [ ' approved ' ] , sub_info [ ' details_link ' ] , sub_info [ ' download_link ' ] , season = season , episode = episode , skip_wrong_fps = self . skip_wrong_fps )
# Try to parse S00E00 string from the main subtitle name
sub_season = None
sub_episode = None
season_episode_string = re . findall ( ' S( \ d+)E( \ d+) ' , sub_info [ ' names ' ] [ 0 ] , re . IGNORECASE )
if season_episode_string :
sub_season = season_episode_string [ 0 ] [ 0 ]
sub_episode = season_episode_string [ 0 ] [ 1 ]
subtitle_instance = self . subtitle_class ( sub_info [ ' id ' ] , sub_info [ ' language ' ] , sub_info [ ' names ' ] , sub_season , sub_episode , sub_info [ ' year ' ] , sub_info [ ' release ' ] , sub_info [ ' fps ' ] ,
sub_info [ ' uploader ' ] , sub_info [ ' approved ' ] , sub_info [ ' details_link ' ] , sub_info [ ' download_link ' ] , skip_wrong_fps = self . skip_wrong_fps )
subtitles . append ( subtitle_instance )
else :
# The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected