# coding: utf-8
from __future__ import absolute_import
import io
import six
import logging
import re
import os
import time
from babelfish import language_converters
from subzero . language import Language
from requests import Session
from subliminal_patch . providers import Provider
from subliminal_patch . providers . mixins import ProviderSubtitleArchiveMixin
from subliminal . providers import ParserBeautifulSoup
from subliminal_patch . exceptions import ProviderError
from subliminal . score import get_equivalent_release_groups
from subliminal_patch . subtitle import Subtitle , guess_matches
from subliminal . utils import sanitize , sanitize_release_group
from subliminal . video import Episode
from zipfile import ZipFile , is_zipfile
from rarfile import RarFile , is_rarfile
from subliminal_patch . utils import sanitize , fix_inconsistent_naming as _fix_inconsistent_naming
from guessit import guessit
def fix_inconsistent_naming ( title ) :
""" Fix titles with inconsistent naming using dictionary and sanitize them.
: param str title : original title .
: return : new title .
: rtype : str
"""
return _fix_inconsistent_naming ( title , { " Stargate Origins " : " Stargate: Origins " ,
" Marvel ' s Agents of S.H.I.E.L.D. " : " Marvels+Agents+of+S.H.I.E.L.D " ,
" Mayans M.C. " : " Mayans MC " } , True )
logger = logging . getLogger ( __name__ )
language_converters . register ( ' hosszupuska = subliminal_patch.converters.hosszupuska:HosszupuskaConverter ' )
_SUB_ENGLISH_NAME_RE = re . compile ( r ' s( \ d { 1,2})e( \ d { 1,2}) ' )
_SUB_YEAR_RE = re . compile ( r " (?<= \ ()( \ d {4} )(?= \ )) " )
class HosszupuskaSubtitle ( Subtitle ) :
""" Hosszupuska Subtitle. """
provider_name = ' hosszupuska '
def __str__ ( self ) :
subtit = ( f " Subtitle id: { self . subtitle_id } Series: { self . series } "
f " Season: { self . season } Episode: { self . episode } "
f " Releases: { self . releases } " )
if self . year :
subtit = f " { subtit } Year: { self . year } "
if six . PY3 :
return subtit
return subtit . encode ( ' utf-8 ' )
def __init__ ( self , language , page_link , subtitle_id , series , season , episode , version ,
releases , year , asked_for_release_group = None , asked_for_episode = None ) :
super ( HosszupuskaSubtitle , self ) . __init__ ( language , page_link = page_link )
self . subtitle_id = subtitle_id
self . series = series
self . season = season
self . episode = episode
self . version = version
self . releases = releases
self . year = year
if year :
self . year = int ( year )
self . release_info = u " , " . join ( releases )
self . page_link = page_link
self . asked_for_release_group = asked_for_release_group
self . asked_for_episode = asked_for_episode
def __repr__ ( self ) :
ep_addon = ( " S %02d E %02d " % ( self . season , self . episode ) ) if self . episode else " "
return ' < %s %r [ %s ]> ' % (
self . __class__ . __name__ , u " %s %s %s [ %s ] " % ( self . series , " ( %s ) " % self . year if self . year else " " , ep_addon ,
self . release_info ) , self . language )
@property
def id ( self ) :
return str ( self . subtitle_id )
def get_matches ( self , video ) :
matches = set ( )
# series
if video . series and ( sanitize ( self . series ) == sanitize ( fix_inconsistent_naming ( video . series ) ) or sanitize ( self . series ) == sanitize ( video . series ) ) :
matches . add ( ' series ' )
# season
if video . season and self . season == video . season :
matches . add ( ' season ' )
# episode
if video . episode and self . episode == video . episode :
matches . add ( ' episode ' )
# year
if ( ' series ' in matches and video . original_series and self . year is None or
video . year and video . year == self . year ) :
matches . add ( ' year ' )
logger . debug ( " Matches: %s " , matches )
# release_group
if ( video . release_group and self . version and
any ( r in sanitize_release_group ( self . version )
for r in get_equivalent_release_groups ( sanitize_release_group ( video . release_group ) ) ) ) :
matches . add ( ' release_group ' )
matches | = guess_matches ( video , guessit ( self . release_info ) , { " type " : " episode " } )
return matches
class HosszupuskaProvider ( Provider , ProviderSubtitleArchiveMixin ) :
""" Hosszupuska Provider. """
languages = { Language ( ' hun ' , ' HU ' ) } | { Language ( l ) for l in [
' hun ' , ' eng '
] }
video_types = ( Episode , )
server_url = ' http://hosszupuskasub.com/ '
subtitle_class = HosszupuskaSubtitle
hearing_impaired_verifiable = False
multi_result_throttle = 2 # seconds
def initialize ( self ) :
self . session = Session ( )
self . session . headers = { ' User-Agent ' : os . environ . get ( " SZ_USER_AGENT " , " Sub-Zero/2 " ) }
def terminate ( self ) :
self . session . close ( )
def get_language ( self , text ) :
if text == ' 1.gif ' :
return Language . fromhosszupuska ( ' hu ' )
if text == ' 2.gif ' :
return Language . fromhosszupuska ( ' en ' )
return None
def query ( self , series , season , episode , year = None , video = None ) :
# Search for s01e03 instead of s1e3
seasona = " %02d " % season
episodea = " %02d " % episode
seriesa = fix_inconsistent_naming ( series )
seriesa = series . replace ( ' ' , ' + ' )
# get the episode page
logger . info ( ' Getting the page for episode %s ' , episode )
url = self . server_url + " sorozatok.php?cim= " + seriesa + " &evad= " + str ( seasona ) + \
" &resz= " + str ( episodea ) + " &nyelvtipus= % 25&x=24&y=8 "
logger . info ( ' Url %s ' , url )
r = self . session . get ( url , timeout = 10 ) . content
soup = ParserBeautifulSoup ( r , [ ' lxml ' ] )
subtitles = [ ]
for num , temp in enumerate ( soup . find_all ( " table " ) ) :
if " this.style.backgroundImage= ' url(css/over2.jpg) " in str ( temp ) and " css/infooldal.png " in str ( temp ) :
logger . debug ( " Found valid table ( %d index) " , num )
subtitles + = self . _loop_over_table ( temp , season , episode , video )
return subtitles
def _loop_over_table ( self , table , season , episode , video ) :
i = 0
for row in table . find_all ( " tr " ) :
i = i + 1
if " this.style.backgroundImage= ' url(css/over2.jpg) " in str ( row ) : #and i > 5:
datas = row . find_all ( " td " )
# Currently subliminal not use these params, but maybe later will come in handy
# hunagrian_name = re.split('s(\d{1,2})', datas[1].find_all('b')[0].getText())[0]
# Translator of subtitle
# sub_translator = datas[3].getText()
# Posting date of subtitle
# sub_date = datas[4].getText()
sub_year = sub_english_name = sub_version = None
# Handle the case when '(' in subtitle
if datas [ 1 ] . getText ( ) . count ( ' ( ' ) == 1 :
sub_english_name = _SUB_ENGLISH_NAME_RE . split ( datas [ 1 ] . getText ( ) ) [ 3 ]
if datas [ 1 ] . getText ( ) . count ( ' ( ' ) == 2 :
sub_year = _SUB_YEAR_RE . findall ( datas [ 1 ] . getText ( ) . strip ( ) ) [ 0 ]
sub_english_name = _SUB_ENGLISH_NAME_RE . split ( datas [ 1 ] . getText ( ) . split ( ' ( ' ) [ 0 ] ) [ 0 ]
if not sub_english_name :
continue
sub_season = int ( ( re . findall ( r ' s( \ d { 1,2}) ' , datas [ 1 ] . find_all ( ' b ' ) [ 0 ] . getText ( ) , re . VERBOSE ) [ 0 ] )
. lstrip ( ' 0 ' ) )
sub_episode = int ( ( re . findall ( r ' e( \ d { 1,2}) ' , datas [ 1 ] . find_all ( ' b ' ) [ 0 ] . getText ( ) , re . VERBOSE ) [ 0 ] )
. lstrip ( ' 0 ' ) )
if sub_season == season and sub_episode == episode :
sub_language = self . get_language ( datas [ 2 ] . find_all ( ' img ' ) [ 0 ] [ ' src ' ] . split ( ' / ' ) [ 1 ] )
sub_downloadlink = datas [ 6 ] . find_all ( ' a ' ) [ 1 ] [ ' href ' ]
sub_id = sub_downloadlink . split ( ' = ' ) [ 1 ] . split ( ' . ' ) [ 0 ]
if datas [ 1 ] . getText ( ) . count ( ' ( ' ) == 1 :
sub_version = datas [ 1 ] . getText ( ) . split ( ' ( ' ) [ 1 ] . split ( ' ) ' ) [ 0 ]
if datas [ 1 ] . getText ( ) . count ( ' ( ' ) == 2 :
sub_version = datas [ 1 ] . getText ( ) . split ( ' ( ' ) [ 2 ] . split ( ' ) ' ) [ 0 ]
# One subtitle can be used for several releases
sub_releases = [ s . strip ( ) for s in sub_version . split ( ' , ' ) ]
subtitle = self . subtitle_class ( sub_language , sub_downloadlink , sub_id , sub_english_name . strip ( ) ,
sub_season , sub_episode , sub_version , sub_releases , sub_year ,
asked_for_release_group = video . release_group ,
asked_for_episode = episode )
logger . debug ( ' Found subtitle: %r ' , subtitle )
yield subtitle
def list_subtitles ( self , video , languages ) :
titles = [ video . series ] + video . alternative_series
for title in titles :
subs = self . query ( title , video . season , video . episode , video . year , video = video )
if subs :
return subs
time . sleep ( self . multi_result_throttle )
return [ ]
def download_subtitle ( self , subtitle ) :
r = self . session . get ( subtitle . page_link , timeout = 10 )
r . raise_for_status ( )
# open the archive
archive_stream = io . BytesIO ( r . content )
if is_rarfile ( archive_stream ) :
logger . debug ( ' Archive identified as rar ' )
archive = RarFile ( archive_stream )
elif is_zipfile ( archive_stream ) :
logger . debug ( ' Archive identified as zip ' )
archive = ZipFile ( archive_stream )
else :
raise ProviderError ( ' Unidentified archive type ' )
subtitle . content = self . get_subtitle_from_archive ( subtitle , archive )