@ -2,20 +2,22 @@
from __future__ import absolute_import
import logging
import io
import re
import os
import rarfile
import zipfile
from requests import Session
from guessit import guessit
from subliminal _patch . exceptions import ParseResponseError
from subliminal . exceptions import ConfigurationError, AuthenticationError , ServiceUnavailable , DownloadLimitExceeded
from subliminal_patch . providers import Provider
from subliminal . providers import ParserBeautifulSoup
from subliminal_patch . subtitle import Subtitle
from subliminal . video import Episode , Movie
from subliminal . subtitle import SUBTITLE_EXTENSIONS , fix_line_ending , guess_matches
from subliminal . subtitle import SUBTITLE_EXTENSIONS , fix_line_ending , guess_matches
from subzero . language import Language
from subliminal_patch . score import get_scores
from subliminal . utils import sanitize , sanitize_release_group
logger = logging . getLogger ( __name__ )
@ -27,11 +29,12 @@ class LegendasdivxSubtitle(Subtitle):
super ( LegendasdivxSubtitle , self ) . __init__ ( language )
self . language = language
self . page_link = data [ ' link ' ]
self . hits = data [ ' hits ' ]
self . exact_match = data [ ' exact_match ' ]
self . description = data [ ' description ' ] . lower ( )
self . hits = data [ ' hits ' ]
self . exact_match = data [ ' exact_match ' ]
self . description = data [ ' description ' ]
self . video = video
self . videoname = data [ ' videoname ' ]
self . video_filename = data [ ' video_filename ' ]
self . uploader = data [ ' uploader ' ]
@property
def id ( self ) :
@ -44,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle):
def get_matches ( self , video ) :
matches = set ( )
if self . videoname . lower ( ) in self . description :
description = sanitize ( self . description )
if sanitize ( self . video_filename ) in description :
matches . update ( [ ' title ' ] )
matches . update ( [ ' season ' ] )
matches . update ( [ ' episode ' ] )
# episode
if video . title and video. title . lower ( ) in self . description :
if video . title and sanitize( video. title ) in description :
matches . update ( [ ' title ' ] )
if video . year and ' {:04d} ' . format ( video . year ) in self . description :
if video . year and ' {:04d} ' . format ( video . year ) in description :
matches . update ( [ ' year ' ] )
if isinstance ( video , Episode ) :
# already matched in search query
if video . season and ' s {:02d} ' . format ( video . season ) in self . description :
if video . season and ' s {:02d} ' . format ( video . season ) in description :
matches . update ( [ ' season ' ] )
if video . episode and ' e {:02d} ' . format ( video . episode ) in self . description :
if video . episode and ' e {:02d} ' . format ( video . episode ) in description :
matches . update ( [ ' episode ' ] )
if video . episode and video . season and video . series :
if ' {} .s {:02d} e {:02d} ' . format ( video . series . lower ( ) , video . season , video . episode ) in self . description :
matches . update ( [ ' series ' ] )
matches . update ( [ ' season ' ] )
matches . update ( [ ' episode ' ] )
if ' {} s {:02d} e {:02d} ' . format ( video . series . lower ( ) , video . season , video . episode ) in self . description :
if ' {} s {:02d} e {:02d} ' . format ( sanitize ( video . series ) , video . season , video . episode ) in description :
matches . update ( [ ' series ' ] )
matches . update ( [ ' season ' ] )
matches . update ( [ ' episode ' ] )
# release_group
if video . release_group and video . release_group . lower ( ) in self . description :
if video . release_group and sanitize_release_group ( video . release_group ) in sanitize_release_group ( description ) :
matches . update ( [ ' release_group ' ] )
# resolution
if video . resolution and video . resolution . lower ( ) in self . description :
if video . resolution and video . resolution . lower ( ) in description :
matches . update ( [ ' resolution ' ] )
# format
@ -87,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle):
if formats [ 0 ] == " web-dl " :
formats . append ( " webdl " )
formats . append ( " webrip " )
formats . append ( " web " )
formats . append ( " web " )
for frmt in formats :
if frmt . lower ( ) in self . description :
if frmt in description :
matches . update ( [ ' format ' ] )
break
@ -97,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle):
if video . video_codec :
video_codecs = [ video . video_codec . lower ( ) ]
if video_codecs [ 0 ] == " h264 " :
format s. append ( " x264 " )
video_codec s. append ( " x264 " )
elif video_codecs [ 0 ] == " h265 " :
format s. append ( " x265 " )
for vc in format s:
if vc . lower ( ) in self . description :
video_codec s. append ( " x265 " )
for vc in video_codec s:
if vc in description :
matches . update ( [ ' video_codec ' ] )
break
@ -109,9 +109,6 @@ class LegendasdivxSubtitle(Subtitle):
# matches |= guess_matches(video, guessit(self.description))
return matches
class LegendasdivxProvider ( Provider ) :
""" Legendasdivx Provider. """
languages = { Language ( ' por ' , ' BR ' ) } | { Language ( ' por ' ) }
@ -126,15 +123,21 @@ class LegendasdivxProvider(Provider):
' Cache-Control ' : ' no-cache '
}
loginpage = site + ' /forum/ucp.php?mode=login '
logoutpage = site + ' /sair.php '
searchurl = site + ' /modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query= {query} '
language_list = list ( languages )
download_link = site + ' /modules.php {link} '
def __init__ ( self , username , password ) :
# make sure login credentials are configured.
if any ( ( username , password ) ) and not all ( ( username , password ) ) :
raise ConfigurationError ( ' Username and password must be specified ' )
self . username = username
self . password = password
self . logged_in = False
def initialize ( self ) :
self . session = Session ( )
self . session . headers . update ( self . headers )
self . login ( )
def terminate ( self ) :
@ -143,100 +146,103 @@ class LegendasdivxProvider(Provider):
def login ( self ) :
logger . info ( ' Logging in ' )
self . headers [ ' Referer ' ] = self . site + ' /index.php '
self . session . headers . update ( self . headers . items ( ) )
res = self . session . get ( self . loginpage )
bsoup = ParserBeautifulSoup ( res . content , [ ' lxml ' ] )
_allinputs = bsoup . findAll ( ' input ' )
fields = { }
data = { }
# necessary to set 'sid' for POST request
for field in _allinputs :
fields [ field . get ( ' name ' ) ] = field . get ( ' value ' )
fields [ ' username ' ] = self . username
fields [ ' password ' ] = self . password
fields [ ' autologin ' ] = ' on '
fields [ ' viewonline ' ] = ' on '
self . headers [ ' Referer ' ] = self . loginpage
self . session . headers . update ( self . headers . items ( ) )
res = self . session . post ( self . loginpage , fields )
data [ field . get ( ' name ' ) ] = field . get ( ' value ' )
data [ ' username ' ] = self . username
data [ ' password ' ] = self . password
res = self . session . post ( self . loginpage , data )
res . raise_for_status ( )
try :
logger . debug ( ' Got session id %s ' %
logger . debug ( ' Logged in successfully: PHPSESSID: %s ' %
self . session . cookies . get_dict ( ) [ ' PHPSESSID ' ] )
except KeyError as e :
logger . error ( repr ( e ) )
logger . error ( " Didn' t get session id , check your credentials" )
r eturn False
self . logged_in = True
except KeyError :
logger . error ( " Couldn' t retrieve session ID , check your credentials" )
r aise AuthenticationError ( " Please check your credentials. " )
except Exception as e :
logger . error ( repr ( e ) )
logger . error ( ' uncached error #legendasdivx #AA ' )
return False
return True
if ' bloqueado ' in res . text . lower ( ) : # blocked IP address
logger . error ( " LegendasDivx.pt :: Your IP is blocked on this server. " )
raise ParseResponseError ( " Legendasdivx.pt :: %r " % res . text )
logger . error ( " LegendasDivx.pt :: Uncaught error: %r " % repr ( e ) )
raise ServiceUnavailable ( " LegendasDivx.pt :: Uncaught error: %r " % repr ( e ) )
def logout ( self ) :
# need to figure this out
return True
if self . logged_in :
logger . info ( ' Legendasdivx:: Logging out ' )
r = self . session . get ( self . logoutpage , timeout = 10 )
r . raise_for_status ( )
logger . debug ( ' Legendasdivx :: Logged out ' )
self . logged_in = False
def _process_page ( self , video , bsoup , video_filename ) :
def _process_page ( self , video , bsoup , querytext , videoname ) :
subtitles = [ ]
_allsubs = bsoup . findAll ( " div " , { " class " : " sub_box " } )
lang = Language . fromopensubtitles ( " pob " )
for _subbox in _allsubs :
hits = 0
hits = 0
for th in _subbox . findAll ( " th " , { " class " : " color2 " } ) :
if th . string == ' Hits: ' :
hits = int ( th . parent . find ( " td " ) . string )
if th . string == ' Idioma: ' :
lang = th . parent . find ( " td " ) . find ( " img " ) . get ( ' src ' )
if ' brazil ' in lang :
lang = th . parent . find ( " td " ) . find ( " img " ) . get ( ' src ' )
if ' brazil ' in lang . lower ( ) :
lang = Language . fromopensubtitles ( ' pob ' )
el se :
el if ' portugal ' in lang . lower ( ) :
lang = Language . fromopensubtitles ( ' por ' )
description = _subbox . find ( " td " , { " class " : " td_desc brd_up " } )
else :
continue
# get description for matches
description = _subbox . find ( " td " , { " class " : " td_desc brd_up " } ) . get_text ( )
#get subtitle link
download = _subbox . find ( " a " , { " class " : " sub_download " } )
try :
# sometimes BSoup just doesn't get the link
logger . debug ( download . get ( ' href ' ) )
except Exception as e :
logger . warning ( ' skipping subbox on %s ' % self . searchurl . format ( query = querytext ) )
continue
# sometimes BSoup can't find 'a' tag and returns None.
i = 0
while not ( download ) : # must get it... trying again...
download = _subbox . find ( " a " , { " class " : " sub_download " } )
i = + 1
logger . debug ( " Try number {0} try! " . format ( str ( i ) ) )
dl = download . get ( ' href ' )
logger . debug ( " Found subtitle on: %s " % self . download_link . format ( link = dl ) )
# get subtitle uploader
sub_header = _subbox . find ( " div " , { " class " : " sub_header " } )
uploader = sub_header . find ( " a " ) . text if sub_header else ' anonymous '
exact_match = False
if video . name . lower ( ) in description . get_text ( ) . lower ( ) :
if video . name . lower ( ) in description . lower( ) :
exact_match = True
data = { ' link ' : self . site + ' /modules.php ' + download . get ( ' href ' ) ,
' exact_match ' : exact_match ,
' hits ' : hits ,
' videoname ' : videoname ,
' description ' : description . get_text ( ) }
' uploader ' : uploader ,
' video_filename ' : video_filename ,
' description ' : description
}
subtitles . append (
LegendasdivxSubtitle ( lang , video , data )
)
return subtitles
def query ( self , video , language ) :
try :
logger . debug ( ' Got session id %s ' %
self . session . cookies . get_dict ( ) [ ' PHPSESSID ' ] )
except Exception as e :
self . login ( )
language_ids = ' 0 '
if isinstance ( language , ( tuple , list , set ) ) :
if len ( language ) == 1 :
language_ids = ' , ' . join ( sorted ( l . opensubtitles for l in language ) )
if language_ids == ' por ' :
language_ids = ' &form_cat=28 '
else :
language_ids = ' &form_cat=29 '
videoname = video . name
videoname = os . path . basename ( videoname )
videoname , _ = os . path . splitext ( videoname )
# querytext = videoname.lower()
def query ( self , video , languages ) :
video_filename = video . name
video_filename = os . path . basename ( video_filename )
video_filename , _ = os . path . splitext ( video_filename )
video_filename = sanitize_release_group ( video_filename )
_searchurl = self . searchurl
if video . imdb_id is None :
if isinstance ( video , Episode ) :
@ -246,22 +252,47 @@ class LegendasdivxProvider(Provider):
else :
querytext = video . imdb_id
# language query filter
if isinstance ( languages , ( tuple , list , set ) ) :
language_ids = ' , ' . join ( sorted ( l . opensubtitles for l in languages ) )
if ' por ' in language_ids : # prioritize portuguese subtitles
lang_filter = ' &form_cat=28 ' # pt
elif ' pob ' in language_ids :
lang_filter = ' &form_cat=29 ' # br
else :
lang_filter = ' '
querytext = querytext + lang_filter if lang_filter else querytext
# querytext = querytext.replace(
# ".", "+").replace("[", "").replace("]", "")
if language_ids != ' 0 ' :
querytext = querytext + language_ids
self . headers [ ' Referer ' ] = self . site + ' /index.php '
self . session . headers . update ( self . headers . items ( ) )
res = self . session . get ( _searchurl . format ( query = querytext ) )
# form_cat=28 = br
# form_cat=29 = pt
if " A legenda não foi encontrada " in res . text :
logger . warning ( ' %s not found ' , querytext )
return [ ]
bsoup = ParserBeautifulSoup ( res . content , [ ' html.parser ' ] )
subtitles = self . _process_page ( video , bsoup , querytext , videoname )
subtitles = self . _process_page ( video , bsoup , video_filename )
# search for more than 10 results (legendasdivx uses pagination)
# don't throttle - maximum results = 6 * 10
MAX_PAGES = 6
#get number of pages bases on results found
page_header = bsoup . find ( " div " , { " class " : " pager_bar " } )
results_found = re . search ( r ' \ ((.*?) encontradas \ ) ' , page_header . text ) . group ( 1 )
num_pages = ( int ( results_found ) / / 10 ) + 1
num_pages = min ( MAX_PAGES , num_pages )
if num_pages > 1 :
for num_page in range ( 2 , num_pages + 2 ) :
_search_next = self . searchurl . format ( query = querytext ) + " &page= {0} " . format ( str ( num_page ) )
logger . debug ( " Moving to next page: %s " % _search_next )
res = self . session . get ( _search_next )
next_page = ParserBeautifulSoup ( res . content , [ ' html.parser ' ] )
subs = self . _process_page ( video , next_page , video_filename )
subtitles . extend ( subs )
return subtitles
@ -270,9 +301,14 @@ class LegendasdivxProvider(Provider):
def download_subtitle ( self , subtitle ) :
res = self . session . get ( subtitle . page_link )
res . raise_for_status ( )
if res :
if res . text == ' 500 ' :
raise ValueError ( ' Error 500 on server ' )
if res . status_code in [ ' 500 ' , ' 503 ' ] :
raise ServiceUnavailable ( " Legendasdivx.pt :: 503 - Service Unavailable " )
elif ' limite ' in res . text . lower ( ) : # daily downloads limit reached
raise DownloadLimitReached ( " Legendasdivx.pt :: Download limit reached " )
elif ' bloqueado ' in res . text . lower ( ) : # blocked IP address
raise ParseResponseError ( " Legendasdivx.pt :: %r " % res . text )
archive = self . _get_archive ( res . content )
# extract the subtitle
@ -281,7 +317,9 @@ class LegendasdivxProvider(Provider):
subtitle . normalize ( )
return subtitle
raise ValueError ( ' Problems conecting to the server ' )
logger . error ( " Legendasdivx.pt :: there was a problem retrieving subtitle (status %s ) " % res . status_code )
return
def _get_archive ( self , content ) :
# open the archive
@ -294,7 +332,6 @@ class LegendasdivxProvider(Provider):
logger . debug ( ' Identified zip archive ' )
archive = zipfile . ZipFile ( archive_stream )
else :
# raise ParseResponseError('Unsupported compressed format')
raise Exception ( ' Unsupported compressed format ' )
return archive
@ -305,7 +342,7 @@ class LegendasdivxProvider(Provider):
_tmp . remove ( ' .txt ' )
_subtitle_extensions = tuple ( _tmp )
_max_score = 0
_scores = get_scores ( subtitle . video )
_scores = get_scores ( subtitle . video )
for name in archive . namelist ( ) :
# discard hidden files
@ -338,4 +375,4 @@ class LegendasdivxProvider(Provider):
logger . debug ( " returning from archive: {} scored {} " . format ( _max_name , _max_score ) )
return archive . read ( _max_name )
raise ParseResponseError ( ' Can not find the subtitle in the compressed file ' )
raise ValueError ( " No subtitle found on compressed file. Max score was 0 " )