@ -13,7 +13,7 @@ from requests import Session
from subliminal import __short_version__
from subliminal . exceptions import ServiceUnavailable
from subliminal . providers import ParserBeautifulSoup
from subliminal . subtitle import SUBTITLE_EXTENSIONS , fix_line_ending , guess_matches
from subliminal . subtitle import SUBTITLE_EXTENSIONS , fix_line_ending , guess_matches
from subliminal . video import Episode , Movie
from subliminal_patch . exceptions import APIThrottled
from six . moves import range
@ -26,18 +26,20 @@ logger = logging.getLogger(__name__)
class SubdivxSubtitle ( Subtitle ) :
provider_name = ' subdivx '
provider_name = " subdivx "
hash_verifiable = False
def __init__ ( self , language , video , page_link , title , description , uploader ) :
super ( SubdivxSubtitle , self ) . __init__ ( language , hearing_impaired = False , page_link = page_link )
super ( SubdivxSubtitle , self ) . __init__ (
language , hearing_impaired = False , page_link = page_link
)
self . video = video
self . title = title
self . description = description
self . uploader = uploader
self . release_info = self . title
if self . description and self . description . strip ( ) :
self . release_info + = ' | ' + self . description
self . release_info + = " | " + self . description
@property
def id ( self ) :
@ -49,60 +51,33 @@ class SubdivxSubtitle(Subtitle):
# episode
if isinstance ( video , Episode ) :
# already matched in search query
matches . update ( [ ' title ' , ' series ' , ' season ' , ' episode ' , ' year ' ] )
matches . update ( [ " title " , " series " , " season " , " episode " , " year " ] )
# movie
elif isinstance ( video , Movie ) :
# already matched in search query
matches . update ( [ ' title ' , ' year ' ] )
# release_group
if video . release_group and video . release_group . lower ( ) in self . description :
matches . add ( ' release_group ' )
# resolution
if video . resolution and video . resolution . lower ( ) in self . description :
matches . add ( ' resolution ' )
# source
if video . source :
formats = [ video . source . lower ( ) ]
if formats [ 0 ] == " web " :
formats . append ( " webdl " )
formats . append ( " web-dl " )
formats . append ( " webrip " )
formats . append ( " web " )
for frmt in formats :
if frmt in self . description :
matches . add ( ' source ' )
break
# video_codec
if video . video_codec :
video_codecs = [ video . video_codec . lower ( ) ]
if video_codecs [ 0 ] == " h.264 " :
video_codecs . append ( " h264 " )
video_codecs . append ( " x264 " )
elif video_codecs [ 0 ] == " h.265 " :
video_codecs . append ( " h265 " )
video_codecs . append ( " x265 " )
elif video_codecs [ 0 ] == " divx " :
video_codecs . append ( " divx " )
for vc in video_codecs :
if vc in self . description :
matches . add ( ' video_codec ' )
break
matches . update ( [ " title " , " year " ] )
# Special string comparisons are unnecessary. Guessit can match keys
# from any string and find even more keywords.
matches | = guess_matches (
video ,
guessit (
self . description ,
{ " type " : " episode " if isinstance ( video , Episode ) else " movie " } ,
) ,
)
return matches
class SubdivxSubtitlesProvider ( Provider ) :
provider_name = ' subdivx '
provider_name = " subdivx "
hash_verifiable = False
languages = { Language . fromalpha2 ( lang ) for lang in [ ' es ' ] }
languages = { Language . fromalpha2 ( lang ) for lang in [ " es " ] }
subtitle_class = SubdivxSubtitle
server_url = ' https://www.subdivx.com/ '
server_url = " https://www.subdivx.com/ "
multi_result_throttle = 2
language_list = list ( languages )
@ -111,36 +86,31 @@ class SubdivxSubtitlesProvider(Provider):
def initialize ( self ) :
self . session = Session ( )
self . session . headers [ ' User-Agent ' ] = ' Subliminal/ {} ' . format ( __short_version__ )
self . session . headers [ " User-Agent " ] = f " Subliminal/ { __short_version__ } "
def terminate ( self ) :
self . session . close ( )
def query ( self , video , languages ) :
if isinstance ( video , Episode ) :
query = " {} S {:02d} E {:02d} " . format ( video . series , video . season , video . episode )
query = f" { video . series } S { video . season : 02 } E { video . episode : 02 } "
else :
# Subdvix has problems searching foreign movies if the year is
# appended. For example: if we search "Memories of Murder 2003",
# Subdix won't return any results; but if we search "Memories of
# Murder", it will. That's because in Subdvix foreign titles have
# the year after the original title ("Salinui chueok (2003) aka
# Memories of Murder").
# A proper solution would be filtering results with the year in
# _parse_subtitles_page.
# appended. A proper solution would be filtering results with the
# year in self._parse_subtitles_page.
query = video . title
params = {
' q ' : query , # search string
' accion ' : 5 , # action search
' oxdown ' : 1 , # order by downloads descending
' pg ' : 1 # page 1
" q " : query , # search string
" accion " : 5 , # action search
" oxdown " : 1 , # order by downloads descending
" pg " : 1 , # page 1
}
logger . debug ( ' Searching subtitles %r ' , query )
logger . debug ( f" Searching subtitles: { query } " )
subtitles = [ ]
language = self . language_list [ 0 ]
search_link = self . server_url + ' index.php '
search_link = self . server_url + " index.php "
while True :
response = self . session . get ( search_link , params = params , timeout = 20 )
self . _check_response ( response )
@ -148,7 +118,7 @@ class SubdivxSubtitlesProvider(Provider):
try :
page_subtitles = self . _parse_subtitles_page ( video , response , language )
except Exception as e :
logger . error ( ' Error parsing subtitles list: ' + str ( e ) )
logger . error ( f" Error parsing subtitles list: { e } " )
break
subtitles + = page_subtitles
@ -156,7 +126,7 @@ class SubdivxSubtitlesProvider(Provider):
if len ( page_subtitles ) < 100 :
break # this is the last page
params [ ' pg ' ] + = 1 # search next page
params [ " pg " ] + = 1 # search next page
time . sleep ( self . multi_result_throttle )
return subtitles
@ -167,14 +137,17 @@ class SubdivxSubtitlesProvider(Provider):
def download_subtitle ( self , subtitle ) :
if isinstance ( subtitle , SubdivxSubtitle ) :
# download the subtitle
logger . info ( ' Downloading subtitle %r ' , subtitle )
logger . info ( " Downloading subtitle %r " , subtitle )
# get download link
download_link = self . _get_download_link ( subtitle )
# download zip / rar file with the subtitle
response = self . session . get ( self . server_url + download_link , headers = { ' Referer ' : subtitle . page_link } ,
timeout = 30 )
response = self . session . get (
self . server_url + download_link ,
headers = { " Referer " : subtitle . page_link } ,
timeout = 30 ,
)
self . _check_response ( response )
# open the compressed archive
@ -187,9 +160,11 @@ class SubdivxSubtitlesProvider(Provider):
def _parse_subtitles_page ( self , video , response , language ) :
subtitles = [ ]
page_soup = ParserBeautifulSoup ( response . content . decode ( ' utf-8 ' , ' ignore ' ) , [ ' lxml ' , ' html.parser ' ] )
title_soups = page_soup . find_all ( " div " , { ' id ' : ' menu_detalle_buscador ' } )
body_soups = page_soup . find_all ( " div " , { ' id ' : ' buscador_detalle ' } )
page_soup = ParserBeautifulSoup (
response . content . decode ( " utf-8 " , " ignore " ) , [ " lxml " , " html.parser " ]
)
title_soups = page_soup . find_all ( " div " , { " id " : " menu_detalle_buscador " } )
body_soups = page_soup . find_all ( " div " , { " id " : " buscador_detalle " } )
for subtitle in range ( 0 , len ( title_soups ) ) :
title_soup , body_soup = title_soups [ subtitle ] , body_soups [ subtitle ]
@ -204,15 +179,17 @@ class SubdivxSubtitlesProvider(Provider):
page_link = title_soup . find ( " a " ) [ " href " ]
# description
description = body_soup . find ( " div " , { ' id ' : ' buscador_detalle_sub ' } ) . text
description = body_soup . find ( " div " , { " id " : " buscador_detalle_sub " } ) . text
description = description . replace ( " , " , " " ) . lower ( )
# uploader
uploader = body_soup . find ( " a " , { ' class ' : ' link1 ' } ) . text
uploader = body_soup . find ( " a " , { " class " : " link1 " } ) . text
subtitle = self . subtitle_class ( language , video , page_link , title , description , uploader )
subtitle = self . subtitle_class (
language , video , page_link , title , description , uploader
)
logger . debug ( ' Found subtitle %r ' , subtitle )
logger . debug ( " Found subtitle %r " , subtitle )
subtitles . append ( subtitle )
return subtitles
@ -221,37 +198,39 @@ class SubdivxSubtitlesProvider(Provider):
response = self . session . get ( subtitle . page_link , timeout = 20 )
self . _check_response ( response )
try :
page_soup = ParserBeautifulSoup ( response . content . decode ( ' utf-8 ' , ' ignore ' ) , [ ' lxml ' , ' html.parser ' ] )
links_soup = page_soup . find_all ( " a " , { ' class ' : ' detalle_link ' } )
page_soup = ParserBeautifulSoup (
response . content . decode ( " utf-8 " , " ignore " ) , [ " lxml " , " html.parser " ]
)
links_soup = page_soup . find_all ( " a " , { " class " : " detalle_link " } )
for link_soup in links_soup :
if link_soup [ ' href ' ] . startswith ( ' bajar ' ) :
return self . server_url + link_soup [ ' href ' ]
links_soup = page_soup . find_all ( " a " , { ' class ' : ' link1 ' } )
if link_soup [ " href " ] . startswith ( " bajar " ) :
return self . server_url + link_soup [ " href " ]
links_soup = page_soup . find_all ( " a " , { " class " : " link1 " } )
for link_soup in links_soup :
if " bajar.php " in link_soup [ ' href ' ] :
return link_soup [ ' href ' ]
if " bajar.php " in link_soup [ " href " ] :
return link_soup [ " href " ]
except Exception as e :
raise APIThrottled ( ' Error parsing download link: ' + str ( e ) )
raise APIThrottled ( f" Error parsing download link: { e } " )
raise APIThrottled ( ' Download link not found ' )
raise APIThrottled ( " Download link not found " )
@staticmethod
def _check_response ( response ) :
if response . status_code != 200 :
raise ServiceUnavailable ( ' Bad status code: ' + str ( response . status_code ) )
raise ServiceUnavailable ( f" Bad status code: { response . status_code } " )
@staticmethod
def _get_archive ( content ) :
# open the archive
archive_stream = io . BytesIO ( content )
if rarfile . is_rarfile ( archive_stream ) :
logger . debug ( ' Identified rar archive ' )
logger . debug ( " Identified rar archive " )
archive = rarfile . RarFile ( archive_stream )
elif zipfile . is_zipfile ( archive_stream ) :
logger . debug ( ' Identified zip archive ' )
logger . debug ( " Identified zip archive " )
archive = zipfile . ZipFile ( archive_stream )
else :
raise APIThrottled ( ' Unsupported compressed format ' )
raise APIThrottled ( " Unsupported compressed format " )
return archive
@ -261,12 +240,16 @@ class SubdivxSubtitlesProvider(Provider):
for name in archive . namelist ( ) :
# discard hidden files
# discard non-subtitle files
if not os . path . split ( name ) [ - 1 ] . startswith ( ' . ' ) and name . lower ( ) . endswith ( SUBTITLE_EXTENSIONS ) :
if not os . path . split ( name ) [ - 1 ] . startswith ( " . " ) and name . lower ( ) . endswith (
SUBTITLE_EXTENSIONS
) :
_valid_names . append ( name )
# archive with only 1 subtitle
if len ( _valid_names ) == 1 :
logger . debug ( " returning from archive: {} (single subtitle file) " . format ( _valid_names [ 0 ] ) )
logger . debug (
f " returning from archive: { _valid_names [ 0 ] } (single subtitle file) "
)
return archive . read ( _valid_names [ 0 ] )
# in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file
@ -275,31 +258,36 @@ class SubdivxSubtitlesProvider(Provider):
_max_name = " "
for name in _valid_names :
_guess = guessit ( name )
if ' season ' not in _guess :
_guess [ ' season ' ] = - 1
if ' episode ' not in _guess :
_guess [ ' episode ' ] = - 1
if " season " not in _guess :
_guess [ " season " ] = - 1
if " episode " not in _guess :
_guess [ " episode " ] = - 1
if isinstance ( subtitle . video , Episode ) :
logger . debug ( " guessing %s " % name )
logger . debug ( " subtitle S {} E {} video S {} E {} " . format (
_guess [ ' season ' ] , _guess [ ' episode ' ] , subtitle . video . season , subtitle . video . episode ) )
if subtitle . video . episode != _guess [ ' episode ' ] or subtitle . video . season != _guess [ ' season ' ] :
logger . debug ( ' subtitle does not match video, skipping ' )
logger . debug (
f " subtitle S { _guess [ ' season ' ] } E { _guess [ ' episode ' ] } video "
f " S { subtitle . video . season } E { subtitle . video . episode } "
)
if (
subtitle . video . episode != _guess [ " episode " ]
or subtitle . video . season != _guess [ " season " ]
) :
logger . debug ( " subtitle does not match video, skipping " )
continue
matches = set ( )
matches | = guess_matches ( subtitle . video , _guess )
_score = sum ( ( _scores . get ( match , 0 ) for match in matches ) )
logger . debug ( ' srt matches: %s , score %d ' % ( matches , _score ) )
logger . debug ( " srt matches: %s , score %d " % ( matches , _score ) )
if _score > _max_score :
_max_score = _score
_max_name = name
logger . debug ( " new max: {} {} " . format ( name , _score ) )
logger . debug ( f" new max: { name } { _score } " )
if _max_score > 0 :
logger . debug ( " returning from archive: { } scored {} " . format ( _max_name , _max_score ) )
logger . debug ( f " returning from archive: { _max_name } scored { _max_score } " )
return archive . read ( _max_name )
raise APIThrottled ( ' Can not find the subtitle in the compressed file ' )
raise APIThrottled ( " Can not find the subtitle in the compressed file " )