pull/684/head
Louis Vézina 5 years ago
parent c5fa0f56e4
commit 3ca2c98cd4

@ -1,6 +1,73 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""Death by Captcha HTTP and socket API clients.
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
socket ones. Both offer the same functionalily, with the socket API
sporting faster responses and using way less connections.
To access the socket API, use SocketClient class; for the HTTP API, use
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
connection opened and serializes all API requests sent through it, thus
it is advised to keep a pool of them if you're script is heavily
multithreaded.
Both SocketClient and HttpClient give you the following methods:
get_user()
Returns your DBC account details as a dict with the following keys:
"user": your account numeric ID; if login fails, it will be the only
item with the value of 0;
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
solved CAPTCHA in US cents;
"balance": your DBC account balance in US cents;
"is_banned": flag indicating whether your account is suspended or not.
get_balance()
Returns your DBC account balance in US cents.
get_captcha(cid)
Returns an uploaded CAPTCHA details as a dict with the following keys:
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
be the only item with the value of 0;
"text": the CAPTCHA text, if solved, otherwise None;
"is_correct": flag indicating whether the CAPTCHA was solved correctly
(DBC can detect that in rare cases).
The only argument `cid` is the CAPTCHA numeric ID.
get_text(cid)
Returns an uploaded CAPTCHA text (None if not solved). The only argument
`cid` is the CAPTCHA numeric ID.
report(cid)
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
CAPTCHA numeric ID. Returns True on success, False otherwise.
upload(captcha)
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
object (any object with `read` method defined, actually, so StringIO
will do), or CAPTCHA image file name. On successul upload you'll get
the CAPTCHA details dict (see get_captcha() method).
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
to poll for its status periodically using get_captcha() or get_text()
method until the CAPTCHA is solved and you get the text.
decode(captcha, timeout=DEFAULT_TIMEOUT)
A convenient method that uploads a CAPTCHA and polls for its status
periodically, but no longer than `timeout` (defaults to 60 seconds).
If solved, you'll get the CAPTCHA details dict (see get_captcha()
method for details). See upload() method for details on `captcha`
argument.
Visit http://www.deathbycaptcha.com/user/api for updates.
"""
import base64
import binascii
import errno
@ -12,7 +79,7 @@ import socket
import sys
import threading
import time
import urllib
try:
from json import read as json_decode, write as json_encode
except ImportError:
@ -21,67 +88,60 @@ except ImportError:
except ImportError:
from simplejson import loads as json_decode, dumps as json_encode
try:
from urllib2 import build_opener, HTTPRedirectHandler, Request, HTTPError
from urllib import urlencode, urlopen
except ImportError:
from urllib.request import build_opener, HTTPRedirectHandler, Request, urlopen
from urllib.error import HTTPError
from urllib.parse import urlencode
# API version and unique software ID
API_VERSION = 'DBC/Python v4.0.11'
SOFTWARE_VENDOR_ID = 0
API_VERSION = 'DBC/Python v4.6'
# Default CAPTCHA timeout and decode() polling interval
DEFAULT_TIMEOUT = 60
POLLS_INTERVAL = 5
DEFAULT_TOKEN_TIMEOUT = 120
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
DFLT_POLL_INTERVAL = 3
# Base HTTP API url
HTTP_BASE_URL = 'http://api.deathbycaptcha.com/api'
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
# Preferred HTTP API server's response content type, do not change
HTTP_RESPONSE_TYPE = 'application/json'
# Socket API server's host & ports range
SOCKET_HOST = 'api.deathbycaptcha.com'
SOCKET_HOST = 'api.dbcapi.me'
SOCKET_PORTS = range(8123, 8131)
def _load_image(captcha):
if hasattr(captcha, 'read'):
img = captcha.read()
elif type(captcha) == bytearray:
img = captcha
else:
img = ''
try:
captcha_file = open(captcha, 'rb')
except Exception:
raise
else:
img = captcha_file.read()
captcha_file.close()
if not len(img):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, img) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return img
class AccessDeniedException(Exception):
pass
class Client(object):
"""Death by Captcha API Client"""
"""Death by Captcha API Client."""
def __init__(self, username, password):
self.is_verbose = False
self.userpwd = {'username': username,
'password': password}
def _load_file(self, captcha):
if hasattr(captcha, 'read'):
raw_captcha = captcha.read()
elif isinstance(captcha, bytearray):
raw_captcha = captcha
elif os.path.isfile(captcha):
raw_captcha = ''
try:
f = open(captcha, 'rb')
except Exception as e:
raise e
else:
raw_captcha = f.read()
f.close()
else:
f_stream = urlopen(captcha)
raw_captcha = f_stream.read()
if not len(raw_captcha):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, raw_captcha) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return raw_captcha
self.userpwd = {'username': username, 'password': password}
def _log(self, cmd, msg=''):
if self.is_verbose:
@ -95,16 +155,16 @@ class Client(object):
pass
def get_user(self):
"""Fetch the user's details dict -- balance, rate and banned status."""
raise NotImplemented()
"""Fetch user details -- ID, balance, rate and banned status."""
raise NotImplementedError()
def get_balance(self):
"""Fetch the user's balance (in US cents)."""
"""Fetch user balance (in US cents)."""
return self.get_user().get('balance')
def get_captcha(self, cid):
"""Fetch a CAPTCHA details dict -- its ID, text and correctness."""
raise NotImplemented()
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
raise NotImplementedError()
def get_text(self, cid):
"""Fetch a CAPTCHA text."""
@ -112,11 +172,7 @@ class Client(object):
def report(self, cid):
"""Report a CAPTCHA as incorrectly solved."""
raise NotImplemented()
def remove(self, cid):
"""Remove an unsolved CAPTCHA."""
raise NotImplemented()
raise NotImplementedError()
def upload(self, captcha):
"""Upload a CAPTCHA.
@ -125,32 +181,56 @@ class Client(object):
dict on success.
"""
raise NotImplemented()
raise NotImplementedError()
def decode(self, captcha, timeout=DEFAULT_TIMEOUT):
"""Try to solve a CAPTCHA.
def decode(self, captcha=None, timeout=None, **kwargs):
"""
Try to solve a CAPTCHA.
See Client.upload() for arguments details.
Uploads a CAPTCHA, polls for its status periodically with arbitrary
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
"""
if not timeout:
if not captcha:
timeout = DEFAULT_TOKEN_TIMEOUT
else:
timeout = DEFAULT_TIMEOUT
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
c = self.upload(captcha)
if c:
while deadline > time.time() and not c.get('text'):
time.sleep(POLLS_INTERVAL)
c = self.get_captcha(c['captcha'])
if c.get('text') and c.get('is_correct'):
return c
uploaded_captcha = self.upload(captcha, **kwargs)
if uploaded_captcha:
intvl_idx = 0 # POLL_INTERVAL index
while deadline > time.time() and not uploaded_captcha.get('text'):
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
time.sleep(intvl)
pulled = self.get_captcha(uploaded_captcha['captcha'])
if pulled['captcha'] == uploaded_captcha['captcha']:
uploaded_captcha = pulled
if uploaded_captcha.get('text') and \
uploaded_captcha.get('is_correct'):
return uploaded_captcha
def _get_poll_interval(self, idx):
"""Returns poll interval and next index depending on index provided"""
if len(POLLS_INTERVAL) > idx:
intvl = POLLS_INTERVAL[idx]
else:
intvl = DFLT_POLL_INTERVAL
idx += 1
return intvl, idx
class HttpClient(Client):
"""Death by Captcha HTTP API client."""
def __init__(self, *args):
Client.__init__(self, *args)
self.opener = build_opener(HTTPRedirectHandler())
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
def _call(self, cmd, payload=None, headers=None):
if headers is None:
@ -158,22 +238,30 @@ class HttpClient(Client):
headers['Accept'] = HTTP_RESPONSE_TYPE
headers['User-Agent'] = API_VERSION
if hasattr(payload, 'items'):
payload = urlencode(payload)
payload = urllib.urlencode(payload)
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
else:
self._log('SEND', '%s' % cmd)
if payload is not None:
headers['Content-Length'] = len(payload)
try:
response = self.opener.open(Request(
response = self.opener.open(urllib2.Request(
HTTP_BASE_URL + '/' + cmd.strip('/'),
data=payload,
headers=headers
)).read()
except HTTPError as e:
if 403 == e.code:
raise AccessDeniedException(
'Access denied, please check your credentials and/or balance')
elif 400 == e.code or 413 == e.code:
raise ValueError("CAPTCHA was rejected by the service, check if it's a valid image")
except urllib2.HTTPError as err:
if 403 == err.code:
raise AccessDeniedException('Access denied, please check'
' your credentials and/or balance')
elif 400 == err.code or 413 == err.code:
raise ValueError("CAPTCHA was rejected by the service, check"
" if it's a valid image")
elif 503 == err.code:
raise OverflowError("CAPTCHA was rejected due to service"
" overload, try again later")
else:
raise err
else:
self._log('RECV', '%d %s' % (len(response), response))
try:
@ -192,38 +280,53 @@ class HttpClient(Client):
return not self._call('captcha/%d/report' % cid,
self.userpwd.copy()).get('is_correct')
def remove(self, cid):
return not self._call('captcha/%d/remove' % cid,
self.userpwd.copy()).get('captcha')
def upload(self, captcha):
def upload(self, captcha=None, **kwargs):
boundary = binascii.hexlify(os.urandom(16))
data = self.userpwd.copy()
data['swid'] = SOFTWARE_VENDOR_ID
body = '\r\n'.join(('\r\n'.join(('--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v))))
for k, v in data.items())
captcha = self._load_file(captcha)
body += '\r\n'.join(('',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(captcha),
'',
captcha,
'--%s--' % boundary,
''))
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
body = '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in self.userpwd.items())
body += '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in kwargs.items())
if captcha:
img = _load_image(captcha)
body += '\r\n'.join((
'',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; '
'filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(img),
'',
img,
'--%s--' % boundary,
''
))
response = self._call('captcha', body, {
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
}) or {}
if response.get('captcha'):
return response
class SocketClient(Client):
"""Death by Captcha socket API client."""
TERMINATOR = '\r\n'
@ -253,12 +356,11 @@ class SocketClient(Client):
self.socket.settimeout(0)
try:
self.socket.connect(host)
except socket.error as e:
if errno.EINPROGRESS == e[0]:
pass
else:
except socket.error as err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
self.close()
raise e
raise err
return self.socket
def __del__(self):
@ -269,27 +371,30 @@ class SocketClient(Client):
fds = [sock]
buf += self.TERMINATOR
response = ''
intvl_idx = 0
while True:
rd, wr, ex = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
POLLS_INTERVAL)
if ex:
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
rds, wrs, exs = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
intvl)
if exs:
raise IOError('select() failed')
try:
if wr:
if wrs:
while buf:
buf = buf[wr[0].send(buf):]
elif rd:
buf = buf[wrs[0].send(buf):]
elif rds:
while True:
s = rd[0].recv(256)
s = rds[0].recv(256)
if not s:
raise IOError('recv(): connection lost')
else:
response += s
except socket.error as e:
if e[0] not in (errno.EAGAIN, errno.EINPROGRESS):
raise e
except socket.error as err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
raise err
if response.endswith(self.TERMINATOR):
self._log('RECV', response)
return response.rstrip(self.TERMINATOR)
@ -303,16 +408,18 @@ class SocketClient(Client):
request = json_encode(data)
response = None
for i in range(2):
for _ in range(2):
if not self.socket and cmd != 'login':
self._call('login', self.userpwd.copy())
self.socket_lock.acquire()
try:
sock = self.connect()
response = self._sendrecv(sock, request)
except IOError as e:
sys.stderr.write(str(e) + "\n")
except IOError as err:
sys.stderr.write(str(err) + "\n")
self.close()
except socket.error as e:
sys.stderr.write(str(e) + "\n")
except socket.error as err:
sys.stderr.write(str(err) + "\n")
self.close()
raise IOError('Connection refused')
else:
@ -320,64 +427,68 @@ class SocketClient(Client):
finally:
self.socket_lock.release()
if response is None:
raise IOError('Connection lost or timed out during API request')
try:
if response is None:
raise IOError('Connection lost timed out during API request')
try:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if 'error' in response:
error = response['error']
if 'not-logged-in' == error:
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException('CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise ValueError(
'CAPTCHA was rejected due to service overload, try again later')
else:
raise RuntimeError('API server error occured: %s' % error)
except Exception as e:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if not response.get('error'):
return response
error = response['error']
if error in ('not-logged-in', 'invalid-credentials'):
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException(
'CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise OverflowError(
'CAPTCHA was rejected due to service overload, try again later')
else:
self.socket_lock.acquire()
self.close()
self.socket_lock.release()
raise e
else:
return response
raise RuntimeError('API server error occured: %s' % error)
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
return self._call('user') or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
def upload(self, captcha):
data = self.userpwd.copy()
data['captcha'] = base64.b64encode(self._load_file(captcha))
def upload(self, captcha=None, **kwargs):
data = {}
if captcha:
data['captcha'] = base64.b64encode(_load_image(captcha))
if kwargs:
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = base64.b64encode(_load_image(banner))
data.update(kwargs)
response = self._call('upload', data)
if response.get('captcha'):
return dict((k, response.get(k)) for k in ('captcha', 'text', 'is_correct'))
uploaded_captcha = dict(
(k, response.get(k))
for k in ('captcha', 'text', 'is_correct')
)
if not uploaded_captcha['text']:
uploaded_captcha['text'] = None
return uploaded_captcha
def report(self, cid):
data = self.userpwd.copy()
data['captcha'] = cid
return not self._call('report', data).get('is_correct')
return not self._call('report', {'captcha': cid}).get('is_correct')
def remove(self, cid):
data = self.userpwd.copy()
data['captcha'] = cid
return not self._call('remove', data).get('captcha')
if '__main__' == __name__:
import sys
# Put your DBC username & password here:
#client = HttpClient(sys.argv[1], sys.argv[2])
# client = HttpClient(sys.argv[1], sys.argv[2])
client = SocketClient(sys.argv[1], sys.argv[2])
client.is_verbose = True
@ -393,11 +504,12 @@ if '__main__' == __name__:
captcha = None
if captcha:
print('CAPTCHA %d solved: %s' % (captcha['captcha'], captcha['text']))
print('CAPTCHA %d solved: %s' % \
(captcha['captcha'], captcha['text']))
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
# in fact incorrectly solved!
try:
client.report(captcha['captcha'])
except Exception as e:
sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))
# try:
# client.report(captcha['captcha'])
# except Exception, e:
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))

@ -1,516 +0,0 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""Death by Captcha HTTP and socket API clients.
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
socket ones. Both offer the same functionalily, with the socket API
sporting faster responses and using way less connections.
To access the socket API, use SocketClient class; for the HTTP API, use
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
connection opened and serializes all API requests sent through it, thus
it is advised to keep a pool of them if you're script is heavily
multithreaded.
Both SocketClient and HttpClient give you the following methods:
get_user()
Returns your DBC account details as a dict with the following keys:
"user": your account numeric ID; if login fails, it will be the only
item with the value of 0;
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
solved CAPTCHA in US cents;
"balance": your DBC account balance in US cents;
"is_banned": flag indicating whether your account is suspended or not.
get_balance()
Returns your DBC account balance in US cents.
get_captcha(cid)
Returns an uploaded CAPTCHA details as a dict with the following keys:
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
be the only item with the value of 0;
"text": the CAPTCHA text, if solved, otherwise None;
"is_correct": flag indicating whether the CAPTCHA was solved correctly
(DBC can detect that in rare cases).
The only argument `cid` is the CAPTCHA numeric ID.
get_text(cid)
Returns an uploaded CAPTCHA text (None if not solved). The only argument
`cid` is the CAPTCHA numeric ID.
report(cid)
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
CAPTCHA numeric ID. Returns True on success, False otherwise.
upload(captcha)
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
object (any object with `read` method defined, actually, so StringIO
will do), or CAPTCHA image file name. On successul upload you'll get
the CAPTCHA details dict (see get_captcha() method).
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
to poll for its status periodically using get_captcha() or get_text()
method until the CAPTCHA is solved and you get the text.
decode(captcha, timeout=DEFAULT_TIMEOUT)
A convenient method that uploads a CAPTCHA and polls for its status
periodically, but no longer than `timeout` (defaults to 60 seconds).
If solved, you'll get the CAPTCHA details dict (see get_captcha()
method for details). See upload() method for details on `captcha`
argument.
Visit http://www.deathbycaptcha.com/user/api for updates.
"""
import base64
import binascii
import errno
import imghdr
import random
import os
import select
import socket
import sys
import threading
import time
import urllib
import urllib2
try:
from json import read as json_decode, write as json_encode
except ImportError:
try:
from json import loads as json_decode, dumps as json_encode
except ImportError:
from simplejson import loads as json_decode, dumps as json_encode
# API version and unique software ID
API_VERSION = 'DBC/Python v4.6'
# Default CAPTCHA timeout and decode() polling interval
DEFAULT_TIMEOUT = 60
DEFAULT_TOKEN_TIMEOUT = 120
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
DFLT_POLL_INTERVAL = 3
# Base HTTP API url
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
# Preferred HTTP API server's response content type, do not change
HTTP_RESPONSE_TYPE = 'application/json'
# Socket API server's host & ports range
SOCKET_HOST = 'api.dbcapi.me'
SOCKET_PORTS = range(8123, 8131)
def _load_image(captcha):
if hasattr(captcha, 'read'):
img = captcha.read()
elif type(captcha) == bytearray:
img = captcha
else:
img = ''
try:
captcha_file = open(captcha, 'rb')
except Exception:
raise
else:
img = captcha_file.read()
captcha_file.close()
if not len(img):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, img) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return img
class AccessDeniedException(Exception):
pass
class Client(object):
"""Death by Captcha API Client."""
def __init__(self, username, password):
self.is_verbose = False
self.userpwd = {'username': username, 'password': password}
def _log(self, cmd, msg=''):
if self.is_verbose:
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
return self
def close(self):
pass
def connect(self):
pass
def get_user(self):
"""Fetch user details -- ID, balance, rate and banned status."""
raise NotImplementedError()
def get_balance(self):
"""Fetch user balance (in US cents)."""
return self.get_user().get('balance')
def get_captcha(self, cid):
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
raise NotImplementedError()
def get_text(self, cid):
"""Fetch a CAPTCHA text."""
return self.get_captcha(cid).get('text') or None
def report(self, cid):
"""Report a CAPTCHA as incorrectly solved."""
raise NotImplementedError()
def upload(self, captcha):
"""Upload a CAPTCHA.
Accepts file names and file-like objects. Returns CAPTCHA details
dict on success.
"""
raise NotImplementedError()
def decode(self, captcha=None, timeout=None, **kwargs):
"""
Try to solve a CAPTCHA.
See Client.upload() for arguments details.
Uploads a CAPTCHA, polls for its status periodically with arbitrary
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
"""
if not timeout:
if not captcha:
timeout = DEFAULT_TOKEN_TIMEOUT
else:
timeout = DEFAULT_TIMEOUT
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
uploaded_captcha = self.upload(captcha, **kwargs)
if uploaded_captcha:
intvl_idx = 0 # POLL_INTERVAL index
while deadline > time.time() and not uploaded_captcha.get('text'):
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
time.sleep(intvl)
pulled = self.get_captcha(uploaded_captcha['captcha'])
if pulled['captcha'] == uploaded_captcha['captcha']:
uploaded_captcha = pulled
if uploaded_captcha.get('text') and \
uploaded_captcha.get('is_correct'):
return uploaded_captcha
def _get_poll_interval(self, idx):
"""Returns poll interval and next index depending on index provided"""
if len(POLLS_INTERVAL) > idx:
intvl = POLLS_INTERVAL[idx]
else:
intvl = DFLT_POLL_INTERVAL
idx += 1
return intvl, idx
class HttpClient(Client):
"""Death by Captcha HTTP API client."""
def __init__(self, *args):
Client.__init__(self, *args)
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
def _call(self, cmd, payload=None, headers=None):
if headers is None:
headers = {}
headers['Accept'] = HTTP_RESPONSE_TYPE
headers['User-Agent'] = API_VERSION
if hasattr(payload, 'items'):
payload = urllib.urlencode(payload)
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
else:
self._log('SEND', '%s' % cmd)
if payload is not None:
headers['Content-Length'] = len(payload)
try:
response = self.opener.open(urllib2.Request(
HTTP_BASE_URL + '/' + cmd.strip('/'),
data=payload,
headers=headers
)).read()
except urllib2.HTTPError, err:
if 403 == err.code:
raise AccessDeniedException('Access denied, please check'
' your credentials and/or balance')
elif 400 == err.code or 413 == err.code:
raise ValueError("CAPTCHA was rejected by the service, check"
" if it's a valid image")
elif 503 == err.code:
raise OverflowError("CAPTCHA was rejected due to service"
" overload, try again later")
else:
raise err
else:
self._log('RECV', '%d %s' % (len(response), response))
try:
return json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
return {}
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha/%d' % cid) or {'captcha': 0}
def report(self, cid):
return not self._call('captcha/%d/report' % cid,
self.userpwd.copy()).get('is_correct')
def upload(self, captcha=None, **kwargs):
boundary = binascii.hexlify(os.urandom(16))
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
body = '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in self.userpwd.items())
body += '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in kwargs.items())
if captcha:
img = _load_image(captcha)
body += '\r\n'.join((
'',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; '
'filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(img),
'',
img,
'--%s--' % boundary,
''
))
response = self._call('captcha', body, {
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
}) or {}
if response.get('captcha'):
return response
class SocketClient(Client):
"""Death by Captcha socket API client."""
TERMINATOR = '\r\n'
def __init__(self, *args):
Client.__init__(self, *args)
self.socket_lock = threading.Lock()
self.socket = None
def close(self):
if self.socket:
self._log('CLOSE')
try:
self.socket.shutdown(socket.SHUT_RDWR)
except socket.error:
pass
finally:
self.socket.close()
self.socket = None
def connect(self):
if not self.socket:
self._log('CONN')
host = (socket.gethostbyname(SOCKET_HOST),
random.choice(SOCKET_PORTS))
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.socket.settimeout(0)
try:
self.socket.connect(host)
except socket.error, err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
self.close()
raise err
return self.socket
def __del__(self):
self.close()
def _sendrecv(self, sock, buf):
self._log('SEND', buf)
fds = [sock]
buf += self.TERMINATOR
response = ''
intvl_idx = 0
while True:
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
rds, wrs, exs = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
intvl)
if exs:
raise IOError('select() failed')
try:
if wrs:
while buf:
buf = buf[wrs[0].send(buf):]
elif rds:
while True:
s = rds[0].recv(256)
if not s:
raise IOError('recv(): connection lost')
else:
response += s
except socket.error, err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
raise err
if response.endswith(self.TERMINATOR):
self._log('RECV', response)
return response.rstrip(self.TERMINATOR)
raise IOError('send/recv timed out')
def _call(self, cmd, data=None):
if data is None:
data = {}
data['cmd'] = cmd
data['version'] = API_VERSION
request = json_encode(data)
response = None
for _ in range(2):
if not self.socket and cmd != 'login':
self._call('login', self.userpwd.copy())
self.socket_lock.acquire()
try:
sock = self.connect()
response = self._sendrecv(sock, request)
except IOError, err:
sys.stderr.write(str(err) + "\n")
self.close()
except socket.error, err:
sys.stderr.write(str(err) + "\n")
self.close()
raise IOError('Connection refused')
else:
break
finally:
self.socket_lock.release()
if response is None:
raise IOError('Connection lost or timed out during API request')
try:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if not response.get('error'):
return response
error = response['error']
if error in ('not-logged-in', 'invalid-credentials'):
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException(
'CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise OverflowError(
'CAPTCHA was rejected due to service overload, try again later')
else:
self.socket_lock.acquire()
self.close()
self.socket_lock.release()
raise RuntimeError('API server error occured: %s' % error)
def get_user(self):
return self._call('user') or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
def upload(self, captcha=None, **kwargs):
data = {}
if captcha:
data['captcha'] = base64.b64encode(_load_image(captcha))
if kwargs:
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = base64.b64encode(_load_image(banner))
data.update(kwargs)
response = self._call('upload', data)
if response.get('captcha'):
uploaded_captcha = dict(
(k, response.get(k))
for k in ('captcha', 'text', 'is_correct')
)
if not uploaded_captcha['text']:
uploaded_captcha['text'] = None
return uploaded_captcha
def report(self, cid):
return not self._call('report', {'captcha': cid}).get('is_correct')
if '__main__' == __name__:
# Put your DBC username & password here:
# client = HttpClient(sys.argv[1], sys.argv[2])
client = SocketClient(sys.argv[1], sys.argv[2])
client.is_verbose = True
print 'Your balance is %s US cents' % client.get_balance()
for fn in sys.argv[3:]:
try:
# Put your CAPTCHA image file name or file-like object, and optional
# solving timeout (in seconds) here:
captcha = client.decode(fn, DEFAULT_TIMEOUT)
except Exception, e:
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
captcha = None
if captcha:
print 'CAPTCHA %d solved: %s' % \
(captcha['captcha'], captcha['text'])
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
# in fact incorrectly solved!
# try:
# client.report(captcha['captcha'])
# except Exception, e:
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))

@ -0,0 +1,403 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import base64
import binascii
import errno
import imghdr
import random
import os
import select
import socket
import sys
import threading
import time
try:
from json import read as json_decode, write as json_encode
except ImportError:
try:
from json import loads as json_decode, dumps as json_encode
except ImportError:
from simplejson import loads as json_decode, dumps as json_encode
try:
from urllib2 import build_opener, HTTPRedirectHandler, Request, HTTPError
from urllib import urlencode, urlopen
except ImportError:
from urllib.request import build_opener, HTTPRedirectHandler, Request, urlopen
from urllib.error import HTTPError
from urllib.parse import urlencode
# API version and unique software ID
API_VERSION = 'DBC/Python v4.0.11'
SOFTWARE_VENDOR_ID = 0
# Default CAPTCHA timeout and decode() polling interval
DEFAULT_TIMEOUT = 60
POLLS_INTERVAL = 5
# Base HTTP API url
HTTP_BASE_URL = 'http://api.deathbycaptcha.com/api'
# Preferred HTTP API server's response content type, do not change
HTTP_RESPONSE_TYPE = 'application/json'
# Socket API server's host & ports range
SOCKET_HOST = 'api.deathbycaptcha.com'
SOCKET_PORTS = range(8123, 8131)
class AccessDeniedException(Exception):
pass
class Client(object):
"""Death by Captcha API Client"""
def __init__(self, username, password):
self.is_verbose = False
self.userpwd = {'username': username,
'password': password}
def _load_file(self, captcha):
if hasattr(captcha, 'read'):
raw_captcha = captcha.read()
elif isinstance(captcha, bytearray):
raw_captcha = captcha
elif os.path.isfile(captcha):
raw_captcha = ''
try:
f = open(captcha, 'rb')
except Exception as e:
raise e
else:
raw_captcha = f.read()
f.close()
else:
f_stream = urlopen(captcha)
raw_captcha = f_stream.read()
if not len(raw_captcha):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, raw_captcha) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return raw_captcha
def _log(self, cmd, msg=''):
if self.is_verbose:
print('%d %s %s' % (time.time(), cmd, msg.rstrip()))
return self
def close(self):
pass
def connect(self):
pass
def get_user(self):
"""Fetch the user's details dict -- balance, rate and banned status."""
raise NotImplemented()
def get_balance(self):
"""Fetch the user's balance (in US cents)."""
return self.get_user().get('balance')
def get_captcha(self, cid):
"""Fetch a CAPTCHA details dict -- its ID, text and correctness."""
raise NotImplemented()
def get_text(self, cid):
"""Fetch a CAPTCHA text."""
return self.get_captcha(cid).get('text') or None
def report(self, cid):
"""Report a CAPTCHA as incorrectly solved."""
raise NotImplemented()
def remove(self, cid):
"""Remove an unsolved CAPTCHA."""
raise NotImplemented()
def upload(self, captcha):
"""Upload a CAPTCHA.
Accepts file names and file-like objects. Returns CAPTCHA details
dict on success.
"""
raise NotImplemented()
def decode(self, captcha, timeout=DEFAULT_TIMEOUT):
"""Try to solve a CAPTCHA.
See Client.upload() for arguments details.
Uploads a CAPTCHA, polls for its status periodically with arbitrary
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
"""
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
c = self.upload(captcha)
if c:
while deadline > time.time() and not c.get('text'):
time.sleep(POLLS_INTERVAL)
c = self.get_captcha(c['captcha'])
if c.get('text') and c.get('is_correct'):
return c
class HttpClient(Client):
"""Death by Captcha HTTP API client."""
def __init__(self, *args):
Client.__init__(self, *args)
self.opener = build_opener(HTTPRedirectHandler())
def _call(self, cmd, payload=None, headers=None):
if headers is None:
headers = {}
headers['Accept'] = HTTP_RESPONSE_TYPE
headers['User-Agent'] = API_VERSION
if hasattr(payload, 'items'):
payload = urlencode(payload)
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
if payload is not None:
headers['Content-Length'] = len(payload)
try:
response = self.opener.open(Request(
HTTP_BASE_URL + '/' + cmd.strip('/'),
data=payload,
headers=headers
)).read()
except HTTPError as e:
if 403 == e.code:
raise AccessDeniedException(
'Access denied, please check your credentials and/or balance')
elif 400 == e.code or 413 == e.code:
raise ValueError("CAPTCHA was rejected by the service, check if it's a valid image")
else:
self._log('RECV', '%d %s' % (len(response), response))
try:
return json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
return {}
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha/%d' % cid) or {'captcha': 0}
def report(self, cid):
return not self._call('captcha/%d/report' % cid,
self.userpwd.copy()).get('is_correct')
def remove(self, cid):
return not self._call('captcha/%d/remove' % cid,
self.userpwd.copy()).get('captcha')
def upload(self, captcha):
boundary = binascii.hexlify(os.urandom(16))
data = self.userpwd.copy()
data['swid'] = SOFTWARE_VENDOR_ID
body = '\r\n'.join(('\r\n'.join(('--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v))))
for k, v in data.items())
captcha = self._load_file(captcha)
body += '\r\n'.join(('',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(captcha),
'',
captcha,
'--%s--' % boundary,
''))
response = self._call('captcha', body, {
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
}) or {}
if response.get('captcha'):
return response
class SocketClient(Client):
"""Death by Captcha socket API client."""
TERMINATOR = '\r\n'
def __init__(self, *args):
Client.__init__(self, *args)
self.socket_lock = threading.Lock()
self.socket = None
def close(self):
if self.socket:
self._log('CLOSE')
try:
self.socket.shutdown(socket.SHUT_RDWR)
except socket.error:
pass
finally:
self.socket.close()
self.socket = None
def connect(self):
if not self.socket:
self._log('CONN')
host = (socket.gethostbyname(SOCKET_HOST),
random.choice(SOCKET_PORTS))
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.socket.settimeout(0)
try:
self.socket.connect(host)
except socket.error as e:
if errno.EINPROGRESS == e[0]:
pass
else:
self.close()
raise e
return self.socket
def __del__(self):
self.close()
def _sendrecv(self, sock, buf):
self._log('SEND', buf)
fds = [sock]
buf += self.TERMINATOR
response = ''
while True:
rd, wr, ex = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
POLLS_INTERVAL)
if ex:
raise IOError('select() failed')
try:
if wr:
while buf:
buf = buf[wr[0].send(buf):]
elif rd:
while True:
s = rd[0].recv(256)
if not s:
raise IOError('recv(): connection lost')
else:
response += s
except socket.error as e:
if e[0] not in (errno.EAGAIN, errno.EINPROGRESS):
raise e
if response.endswith(self.TERMINATOR):
self._log('RECV', response)
return response.rstrip(self.TERMINATOR)
raise IOError('send/recv timed out')
def _call(self, cmd, data=None):
if data is None:
data = {}
data['cmd'] = cmd
data['version'] = API_VERSION
request = json_encode(data)
response = None
for i in range(2):
self.socket_lock.acquire()
try:
sock = self.connect()
response = self._sendrecv(sock, request)
except IOError as e:
sys.stderr.write(str(e) + "\n")
self.close()
except socket.error as e:
sys.stderr.write(str(e) + "\n")
self.close()
raise IOError('Connection refused')
else:
break
finally:
self.socket_lock.release()
try:
if response is None:
raise IOError('Connection lost timed out during API request')
try:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if 'error' in response:
error = response['error']
if 'not-logged-in' == error:
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException('CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise ValueError(
'CAPTCHA was rejected due to service overload, try again later')
else:
raise RuntimeError('API server error occured: %s' % error)
except Exception as e:
self.socket_lock.acquire()
self.close()
self.socket_lock.release()
raise e
else:
return response
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
def upload(self, captcha):
data = self.userpwd.copy()
data['captcha'] = base64.b64encode(self._load_file(captcha))
response = self._call('upload', data)
if response.get('captcha'):
return dict((k, response.get(k)) for k in ('captcha', 'text', 'is_correct'))
def report(self, cid):
data = self.userpwd.copy()
data['captcha'] = cid
return not self._call('report', data).get('is_correct')
def remove(self, cid):
data = self.userpwd.copy()
data['captcha'] = cid
return not self._call('remove', data).get('captcha')
if '__main__' == __name__:
import sys
# Put your DBC username & password here:
#client = HttpClient(sys.argv[1], sys.argv[2])
client = SocketClient(sys.argv[1], sys.argv[2])
client.is_verbose = True
print('Your balance is %s US cents' % client.get_balance())
for fn in sys.argv[3:]:
try:
# Put your CAPTCHA image file name or file-like object, and optional
# solving timeout (in seconds) here:
captcha = client.decode(fn, DEFAULT_TIMEOUT)
except Exception as e:
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
captcha = None
if captcha:
print('CAPTCHA %d solved: %s' % (captcha['captcha'], captcha['text']))
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
# in fact incorrectly solved!
try:
client.report(captcha['captcha'])
except Exception as e:
sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
__title__ = 'subliminal'
__version__ = '2.0.5'
__version__ = '2.1.0.dev'
__short_version__ = '.'.join(__version__.split('.')[:2])
__author__ = 'Antoine Bertin'
__license__ = 'MIT'

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import datetime
from dogpile.cache import make_region

@ -4,6 +4,7 @@ Subliminal uses `click <http://click.pocoo.org>`_ to provide a powerful :abbr:`C
"""
from __future__ import division
from __future__ import absolute_import
from collections import defaultdict
from datetime import timedelta
import glob
@ -219,13 +220,12 @@ config_file = 'config.ini'
@click.option('--legendastv', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='LegendasTV configuration.')
@click.option('--opensubtitles', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD',
help='OpenSubtitles configuration.')
@click.option('--subscenter', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='SubsCenter configuration.')
@click.option('--cache-dir', type=click.Path(writable=True, file_okay=False), default=dirs.user_cache_dir,
show_default=True, expose_value=True, help='Path to the cache directory.')
@click.option('--debug', is_flag=True, help='Print useful information for debugging subliminal and for reporting bugs.')
@click.version_option(__version__)
@click.pass_context
def subliminal(ctx, addic7ed, legendastv, opensubtitles, subscenter, cache_dir, debug):
def subliminal(ctx, addic7ed, legendastv, opensubtitles, cache_dir, debug):
"""Subtitles, faster than your thoughts."""
# create cache directory
try:
@ -253,8 +253,6 @@ def subliminal(ctx, addic7ed, legendastv, opensubtitles, subscenter, cache_dir,
ctx.obj['provider_configs']['legendastv'] = {'username': legendastv[0], 'password': legendastv[1]}
if opensubtitles:
ctx.obj['provider_configs']['opensubtitles'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
if subscenter:
ctx.obj['provider_configs']['subscenter'] = {'username': subscenter[0], 'password': subscenter[1]}
@subliminal.command()

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter, language_converters

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter, language_converters

@ -1,19 +1,40 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
import platform
from six.moves import range
is_windows_special_path = False
if platform.system() == "Windows":
try:
__file__
except UnicodeDecodeError:
is_windows_special_path = True
if not is_windows_special_path:
from concurrent.futures import ThreadPoolExecutor
else:
ThreadPoolExecutor = object
from datetime import datetime
import io
import itertools
import logging
import operator
import os.path
import os
import socket
from babelfish import Language, LanguageReverseError
from guessit import guessit
from rarfile import NotRarFile, RarCannotExec, RarFile
from six.moves.xmlrpc_client import ProtocolError
from rarfile import BadRarFile, NotRarFile, RarCannotExec, RarFile
from zipfile import BadZipfile
from ssl import SSLError
import requests
from .exceptions import ServiceUnavailable
from .extensions import provider_manager, refiner_manager
from .score import compute_score as default_compute_score
from .subtitle import SUBTITLE_EXTENSIONS, get_subtitle_path
@ -79,6 +100,18 @@ class ProviderPool(object):
self.initialized_providers[name].terminate()
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out, improperly terminated', name)
except (ServiceUnavailable, ProtocolError): # OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Provider %r unavailable, improperly terminated', name)
except requests.exceptions.HTTPError as e:
if e.response.status_code in range(500, 600):
logger.error('Provider %r unavailable, improperly terminated', name)
else:
logger.exception('Provider %r http error %r, improperly terminated', name, e.response.status_code)
except SSLError as e:
if e.args[0] == 'The read operation timed out':
logger.error('Provider %r unavailable, improperly terminated', name)
else:
logger.exception('Provider %r SSL error %r, improperly terminated', name, e.args[0])
except:
logger.exception('Provider %r terminated unexpectedly', name)
@ -118,6 +151,18 @@ class ProviderPool(object):
return self[provider].list_subtitles(video, provider_languages)
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out', provider)
except (ServiceUnavailable, ProtocolError): # OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Provider %r unavailable', provider)
except requests.exceptions.HTTPError as e:
if e.response.status_code in range(500, 600):
logger.error('Provider %r unavailable', provider)
else:
logger.exception('Provider %r http error %r', provider, e.response.status_code)
except SSLError as e:
if e.args[0] == 'The read operation timed out':
logger.error('Provider %r unavailable', provider)
else:
logger.exception('Provider %r SSL error %r', provider, e.args[0])
except:
logger.exception('Unexpected error in provider %r', provider)
@ -173,6 +218,28 @@ class ProviderPool(object):
logger.error('Provider %r timed out, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
return False
except (ServiceUnavailable, ProtocolError): # OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Provider %r unavailable, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
return False
except requests.exceptions.HTTPError as e:
if e.response.status_code in range(500, 600):
logger.error('Provider %r unavailable, discarding it', subtitle.provider_name)
else:
logger.exception('Provider %r http error %r, discarding it', subtitle.provider_name,
e.response.status_code)
self.discarded_providers.add(subtitle.provider_name)
return False
except SSLError as e:
if e.args[0] == 'The read operation timed out':
logger.error('Provider %r unavailable, discarding it', subtitle.provider_name)
else:
logger.exception('Provider %r SSL error %r, discarding it', subtitle.provider_name, e.args[0])
self.discarded_providers.add(subtitle.provider_name)
return False
except (BadRarFile, BadZipfile):
logger.error('Bad archive for %r', subtitle)
return False
except:
logger.exception('Unexpected error in provider %r, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
@ -492,9 +559,15 @@ def scan_videos(path, age=None, archives=True):
continue
# skip old files
if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
try:
file_age = datetime.utcfromtimestamp(os.path.getmtime(filepath))
except ValueError:
logger.warning('Could not get age of file %r in %r', filename, dirpath)
continue
else:
if age and datetime.utcnow() - file_age > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
continue
# scan
if filename.endswith(VIDEO_EXTENSIONS): # video
@ -541,7 +614,8 @@ def refine(video, episode_refiners=None, movie_refiners=None, **kwargs):
try:
refiner_manager[refiner].plugin(video, **kwargs)
except:
logger.exception('Failed to refine video')
logger.error('Failed to refine video %r', video.name)
logger.debug('Refiner exception:', exc_info=True)
def list_subtitles(videos, languages, pool_class=ProviderPool, **kwargs):

@ -19,8 +19,8 @@ class AuthenticationError(ProviderError):
pass
class TooManyRequests(ProviderError):
"""Exception raised by providers when too many requests are made."""
class ServiceUnavailable(ProviderError):
"""Exception raised when status is '503 Service Unavailable'."""
pass

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from pkg_resources import EntryPoint
from stevedore import ExtensionManager
@ -29,9 +30,9 @@ class RegistrableExtensionManager(ExtensionManager):
super(RegistrableExtensionManager, self).__init__(namespace, **kwargs)
def _find_entry_points(self, namespace):
def list_entry_points(self):
# copy of default extensions
eps = list(super(RegistrableExtensionManager, self)._find_entry_points(namespace))
eps = list(super(RegistrableExtensionManager, self).list_entry_points())
# internal extensions
for iep in self.internal_extensions:
@ -93,7 +94,6 @@ provider_manager = RegistrableExtensionManager('subliminal.providers', [
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'subscenter = subliminal.providers.subscenter:SubsCenterProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
])

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
from bs4 import BeautifulSoup, FeatureNotFound
@ -68,6 +69,9 @@ class Provider(object):
#: Required hash, if any
required_hash = None
#: Subtitle class to use
subtitle_class = None
def __enter__(self):
self.initialize()
return self

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import re
@ -9,7 +10,7 @@ from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, TooManyRequests
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
@ -19,8 +20,11 @@ logger = logging.getLogger(__name__)
language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')
# Series cell matching regex
show_cells_re = re.compile(b'<td class="version">.*?</td>', re.DOTALL)
#: Series header parsing regex
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),&!?-]+?)(?: \((?P<year>\d{4})\))?$')
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')
class Addic7edSubtitle(Subtitle):
@ -29,7 +33,7 @@ class Addic7edSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link)
super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
@ -45,8 +49,9 @@ class Addic7edSubtitle(Subtitle):
def get_matches(self, video):
matches = set()
# series
if video.series and sanitize(self.series) == sanitize(video.series):
# series name
if video.series and sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# season
if video.season and self.season == video.season:
@ -54,7 +59,7 @@ class Addic7edSubtitle(Subtitle):
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# title
# title of the episode
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
@ -86,21 +91,23 @@ class Addic7edProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
subtitle_class = Addic7edSubtitle
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
# login
if self.username is not None and self.password is not None:
if self.username and self.password:
logger.info('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
@ -134,7 +141,16 @@ class Addic7edProvider(Provider):
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# LXML parser seems to fail when parsing Addic7ed.com HTML markup.
# Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content)
if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
else:
# If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids
show_ids = {}
@ -166,8 +182,6 @@ class Addic7edProvider(Provider):
logger.info('Searching show ids with %r', params)
r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
r.raise_for_status()
if r.status_code == 304:
raise TooManyRequests()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# get the suggestion
@ -218,24 +232,23 @@ class Addic7edProvider(Provider):
# search as last resort
if not show_id:
logger.warning('Series not found in show ids')
logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)
return show_id
def query(self, series, season, year=None, country=None):
# get the show id
show_id = self.get_show_id(series, year, country)
if show_id is None:
logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
return []
def query(self, show_id, series, season, year=None, country=None):
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r.raise_for_status()
if r.status_code == 304:
raise TooManyRequests()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitle rows
@ -262,16 +275,32 @@ class Addic7edProvider(Provider):
version = cells[4].text
download_link = cells[9].a['href'][1:]
subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.series, video.season, video.year)
if s.language in languages and s.episode == video.episode]
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.get_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download the subtitle
@ -280,6 +309,12 @@ class Addic7edProvider(Provider):
timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('Unable to download subtitle. No data returned from provider')
return
# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import json
import logging
@ -18,7 +19,7 @@ from zipfile import ZipFile, is_zipfile
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError, ServiceUnavailable
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches, sanitize
from ..video import Episode, Movie
@ -44,8 +45,11 @@ rating_re = re.compile(r'nota (?P<rating>\d+)')
#: Timestamp parsing regex
timestamp_re = re.compile(r'(?P<day>\d+)/(?P<month>\d+)/(?P<year>\d+) - (?P<hour>\d+):(?P<minute>\d+)')
#: Title with year/country regex
title_re = re.compile(r'^(?P<series>.*?)(?: \((?:(?P<year>\d{4})|(?P<country>[A-Z]{2}))\))?$')
#: Cache key for releases
releases_key = __name__ + ':releases|{archive_id}'
releases_key = __name__ + ':releases|{archive_id}|{archive_name}'
class LegendasTVArchive(object):
@ -60,8 +64,8 @@ class LegendasTVArchive(object):
:param int rating: rating (0-10).
:param timestamp: timestamp.
:type timestamp: datetime.datetime
"""
def __init__(self, id, name, pack, featured, link, downloads=0, rating=0, timestamp=None):
#: Identifier
self.id = id
@ -96,10 +100,11 @@ class LegendasTVArchive(object):
class LegendasTVSubtitle(Subtitle):
"""LegendasTV Subtitle."""
provider_name = 'legendastv'
def __init__(self, language, type, title, year, imdb_id, season, archive, name):
super(LegendasTVSubtitle, self).__init__(language, archive.link)
super(LegendasTVSubtitle, self).__init__(language, page_link=archive.link)
self.type = type
self.title = title
self.year = year
@ -118,11 +123,12 @@ class LegendasTVSubtitle(Subtitle):
# episode
if isinstance(video, Episode) and self.type == 'episode':
# series
if video.series and sanitize(self.title) == sanitize(video.series):
if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# year (year is based on season air date hence the adjustment)
if video.original_series and self.year is None or video.year and video.year == self.year - self.season + 1:
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# imdb_id
@ -132,7 +138,8 @@ class LegendasTVSubtitle(Subtitle):
# movie
elif isinstance(video, Movie) and self.type == 'movie':
# title
if video.title and sanitize(self.title) == sanitize(video.title):
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
@ -143,9 +150,6 @@ class LegendasTVSubtitle(Subtitle):
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
# archive name
matches |= guess_matches(video, guessit(self.archive.name, {'type': self.type}))
# name
matches |= guess_matches(video, guessit(self.name, {'type': self.type}))
@ -157,29 +161,38 @@ class LegendasTVProvider(Provider):
:param str username: username.
:param str password: password.
"""
languages = {Language.fromlegendastv(l) for l in language_converters['legendastv'].codes}
server_url = 'http://legendas.tv/'
subtitle_class = LegendasTVSubtitle
def __init__(self, username=None, password=None):
if username and not password or not username and password:
# Provider needs UNRAR installed. If not available raise ConfigurationError
try:
rarfile.custom_check(rarfile.UNRAR_TOOL)
except rarfile.RarExecError:
raise ConfigurationError('UNRAR tool not available')
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
# login
if self.username is not None and self.password is not None:
if self.username and self.password:
logger.info('Logging in')
data = {'_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password}
r = self.session.post(self.server_url + 'login', data, allow_redirects=False, timeout=10)
r.raise_for_status()
raise_for_status(r)
soup = ParserBeautifulSoup(r.content, ['html.parser'])
if soup.find('div', {'class': 'alert-error'}, string=re.compile(u'Usuário ou senha inválidos')):
@ -193,94 +206,174 @@ class LegendasTVProvider(Provider):
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'users/logout', allow_redirects=False, timeout=10)
r.raise_for_status()
raise_for_status(r)
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def search_titles(self, title):
@staticmethod
def is_valid_title(title, title_id, sanitized_title, season, year):
"""Check if is a valid title."""
sanitized_result = sanitize(title['title'])
if sanitized_result != sanitized_title:
logger.debug("Mismatched title, discarding title %d (%s)",
title_id, sanitized_result)
return
# episode type
if season:
# discard mismatches on type
if title['type'] != 'episode':
logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on season
if 'season' not in title or title['season'] != season:
logger.debug('Mismatched season %s, discarding title %d (%s)',
title.get('season'), title_id, sanitized_result)
return
# movie type
else:
# discard mismatches on type
if title['type'] != 'movie':
logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on year
if year is not None and 'year' in title and title['year'] != year:
logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result)
return
return True
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_titles(self, title, season, title_year):
"""Search for titles matching the `title`.
For episodes, each season has it own title
:param str title: the title to search for.
:param int season: season of the title
:param int title_year: year of the title
:return: found titles.
:rtype: dict
"""
# make the query
logger.info('Searching title %r', title)
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(title), timeout=10)
r.raise_for_status()
results = json.loads(r.text)
# loop over results
titles = {}
for result in results:
source = result['_source']
# extract id
title_id = int(source['id_filme'])
# extract type and title
title = {'type': type_map[source['tipo']], 'title': source['dsc_nome']}
# extract year
if source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
title['year'] = int(source['dsc_data_lancamento'])
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
title['imdb_id'] = source['id_imdb']
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
else:
logger.warning('No season detected for title %d', title_id)
sanitized_titles = [sanitize(title)]
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters))
for sanitized_title in sanitized_titles:
# make the query
if season:
logger.info('Searching episode title %r for season %r', sanitized_title, season)
else:
logger.info('Searching movie title %r', sanitized_title)
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10)
raise_for_status(r)
results = json.loads(r.text)
# loop over results
for result in results:
source = result['_source']
# add title
titles[title_id] = title
# extract id
title_id = int(source['id_filme'])
logger.debug('Found %d titles', len(titles))
# extract type
title = {'type': type_map[source['tipo']]}
# extract title, year and country
name, year, country = title_re.match(source['dsc_nome']).groups()
title['title'] = name
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
title['imdb_id'] = source['id_imdb']
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
else:
logger.debug('No season detected for title %d (%s)', title_id, name)
# extract year
if year:
title['year'] = int(year)
elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
# year is based on season air date hence the adjustment
title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1
# add title only if is valid
# Check against title without ignored chars
if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year):
titles[title_id] = title
logger.debug('Found %d titles', len(titles))
return titles
@region.cache_on_arguments(expiration_time=timedelta(minutes=15).total_seconds())
def get_archives(self, title_id, language_code):
"""Get the archive list from a given `title_id` and `language_code`.
def get_archives(self, title_id, language_code, title_type, season, episode):
"""Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.
:param int title_id: title id.
:param int language_code: language code.
:param str title_type: episode or movie
:param int season: season
:param int episode: episode
:return: the archives.
:rtype: list of :class:`LegendasTVArchive`
"""
logger.info('Getting archives for title %d and language %d', title_id, language_code)
archives = []
page = 1
page = 0
while True:
# get the archive page
url = self.server_url + 'util/carrega_legendas_busca_filme/{title}/{language}/-/{page}'.format(
title=title_id, language=language_code, page=page)
url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
language=language_code, page=page, title=title_id)
r = self.session.get(url)
r.raise_for_status()
raise_for_status(r)
# parse the results
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
for archive_soup in soup.select('div.list_element > article > div'):
for archive_soup in soup.select('div.list_element > article > div > div.f_left'):
# create archive
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2], archive_soup.a.text,
'pack' in archive_soup['class'], 'destaque' in archive_soup['class'],
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2],
archive_soup.a.text,
'pack' in archive_soup.parent['class'],
'destaque' in archive_soup.parent['class'],
self.server_url + archive_soup.a['href'][1:])
# clean name of path separators and pack flags
clean_name = archive.name.replace('/', '-')
if archive.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': title_type})
# episode
if season and episode:
# discard mismatches on episode in non-pack archives
# Guessit may return int for single episode or list for multi-episode
# Check if archive name has multiple episodes releases on it
if not archive.pack and 'episode' in guess:
wanted_episode = set(episode) if isinstance(episode, list) else {episode}
archive_episode = guess['episode'] if isinstance(guess['episode'], list) else {guess['episode']}
if not wanted_episode.intersection(archive_episode):
logger.debug('Mismatched episode %s, discarding archive: %s', guess['episode'], clean_name)
continue
# extract text containing downloads, rating and timestamp
data_text = archive_soup.find('p', class_='data').text
@ -300,6 +393,8 @@ class LegendasTVProvider(Provider):
raise ProviderError('Archive timestamp is in the future')
# add archive
logger.info('Found archive for title %d and language %d at page %s: %s',
title_id, language_code, page, archive)
archives.append(archive)
# stop on last page
@ -322,7 +417,7 @@ class LegendasTVProvider(Provider):
"""
logger.info('Downloading archive %s', archive.id)
r = self.session.get(self.server_url + 'downloadarquivo/{}'.format(archive.id))
r.raise_for_status()
raise_for_status(r)
# open the archive
archive_stream = io.BytesIO(r.content)
@ -337,60 +432,26 @@ class LegendasTVProvider(Provider):
def query(self, language, title, season=None, episode=None, year=None):
# search for titles
titles = self.search_titles(sanitize(title))
# search for titles with the quote or dot character
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
titles.update(self.search_titles(sanitize(title, ignore_characters=ignore_characters)))
titles = self.search_titles(title, season, year)
subtitles = []
# iterate over titles
for title_id, t in titles.items():
# discard mismatches on title
if sanitize(t['title']) != sanitize(title):
continue
# episode
if season and episode:
# discard mismatches on type
if t['type'] != 'episode':
continue
# discard mismatches on season
if 'season' not in t or t['season'] != season:
continue
# movie
else:
# discard mismatches on type
if t['type'] != 'movie':
continue
# discard mismatches on year
if year is not None and 'year' in t and t['year'] != year:
continue
logger.info('Getting archives for title %d and language %d', title_id, language.legendastv)
archives = self.get_archives(title_id, language.legendastv, t['type'], season, episode)
if not archives:
logger.info('No archives found for title %d and language %d', title_id, language.legendastv)
# iterate over title's archives
for a in self.get_archives(title_id, language.legendastv):
# clean name of path separators and pack flags
clean_name = a.name.replace('/', '-')
if a.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': t['type']})
# episode
if season and episode:
# discard mismatches on episode in non-pack archives
if not a.pack and 'episode' in guess and guess['episode'] != episode:
continue
for a in archives:
# compute an expiration time based on the archive timestamp
expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()
# attempt to get the releases from the cache
releases = region.get(releases_key.format(archive_id=a.id), expiration_time=expiration_time)
cache_key = releases_key.format(archive_id=a.id, archive_name=a.name)
releases = region.get(cache_key, expiration_time=expiration_time)
# the releases are not in cache or cache is expired
if releases == NO_VALUE:
@ -417,12 +478,12 @@ class LegendasTVProvider(Provider):
releases.append(name)
# cache the releases
region.set(releases_key.format(archive_id=a.id), releases)
region.set(cache_key, releases)
# iterate over releases
for r in releases:
subtitle = LegendasTVSubtitle(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
@ -431,13 +492,19 @@ class LegendasTVProvider(Provider):
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
title = video.series
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
title = video.title
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return [s for l in languages for s in self.query(l, title, season=season, episode=episode, year=video.year)]
return []
def download_subtitle(self, subtitle):
# download archive in case we previously hit the releases cache and didn't download it
@ -446,3 +513,11 @@ class LegendasTVProvider(Provider):
# extract subtitle's content
subtitle.content = fix_line_ending(subtitle.archive.content.read(subtitle.name))
def raise_for_status(r):
# When site is under maintaince and http status code 200.
if 'Em breve estaremos de volta' in r.text:
raise ServiceUnavailable
else:
r.raise_for_status()

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
from babelfish import Language
@ -7,6 +8,7 @@ from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle
from six.moves import range
logger = logging.getLogger(__name__)
@ -42,6 +44,7 @@ class NapiProjektSubtitle(Subtitle):
def __init__(self, language, hash):
super(NapiProjektSubtitle, self).__init__(language)
self.hash = hash
self.content = None
@property
def id(self):
@ -62,6 +65,10 @@ class NapiProjektProvider(Provider):
languages = {Language.fromalpha2(l) for l in ['pl']}
required_hash = 'napiprojekt'
server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
subtitle_class = NapiProjektSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
@ -81,16 +88,16 @@ class NapiProjektProvider(Provider):
'f': hash,
't': get_subhash(hash)}
logger.info('Searching subtitle %r', params)
response = self.session.get(self.server_url, params=params, timeout=10)
response.raise_for_status()
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found and errors
if response.content[:4] == b'NPc0':
if r.content[:4] == b'NPc0':
logger.debug('No subtitles found')
return None
subtitle = NapiProjektSubtitle(language, hash)
subtitle.content = response.content
subtitle = self.subtitle_class(language, hash)
subtitle.content = r.content
logger.debug('Found subtitle %r', subtitle)
return subtitle

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import base64
import logging
import os
@ -11,7 +12,8 @@ from six.moves.xmlrpc_client import ServerProxy
from . import Provider, TimeoutSafeTransport
from .. import __short_version__
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError
from ..exceptions import (AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError,
ServiceUnavailable)
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
@ -26,7 +28,8 @@ class OpenSubtitlesSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, filename, encoding):
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired, page_link, encoding)
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired=hearing_impaired,
page_link=page_link, encoding=encoding)
self.subtitle_id = subtitle_id
self.matched_by = matched_by
self.movie_kind = movie_kind
@ -58,7 +61,8 @@ class OpenSubtitlesSubtitle(Subtitle):
if isinstance(video, Episode) and self.movie_kind == 'episode':
# tag match, assume series, year, season and episode matches
if self.matched_by == 'tag':
matches |= {'series', 'year', 'season', 'episode'}
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
matches |= {'series', 'year', 'season', 'episode'}
# series
if video.series and sanitize(self.series_name) == sanitize(video.series):
matches.add('series')
@ -87,7 +91,8 @@ class OpenSubtitlesSubtitle(Subtitle):
elif isinstance(video, Movie) and self.movie_kind == 'movie':
# tag match, assume title and year matches
if self.matched_by == 'tag':
matches |= {'title', 'year'}
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
matches |= {'title', 'year'}
# title
if video.title and sanitize(self.movie_name) == sanitize(video.title):
matches.add('title')
@ -122,10 +127,11 @@ class OpenSubtitlesProvider(Provider):
"""
languages = {Language.fromopensubtitles(l) for l in language_converters['opensubtitles'].codes}
subtitle_class = OpenSubtitlesSubtitle
def __init__(self, username=None, password=None):
self.server = ServerProxy('https://api.opensubtitles.org/xml-rpc', TimeoutSafeTransport(10))
if username and not password or not username and password:
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
# None values not allowed for logging in, so replace it by ''
self.username = username or ''
@ -156,7 +162,10 @@ class OpenSubtitlesProvider(Provider):
if hash and size:
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
if imdb_id:
criteria.append({'imdbid': imdb_id[2:]})
if season and episode:
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
else:
criteria.append({'imdbid': imdb_id[2:]})
if tag:
criteria.append({'tag': tag})
if query and season and episode:
@ -199,9 +208,9 @@ class OpenSubtitlesProvider(Provider):
filename = subtitle_item['SubFileName']
encoding = subtitle_item.get('SubEncoding') or None
subtitle = OpenSubtitlesSubtitle(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
subtitles.append(subtitle)
@ -260,11 +269,6 @@ class DisabledUserAgent(OpenSubtitlesError, AuthenticationError):
pass
class ServiceUnavailable(OpenSubtitlesError):
"""Exception raised when status is '503 Service Unavailable'."""
pass
def checked(response):
"""Check a response status before returning it.

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import re
@ -31,7 +32,7 @@ class PodnapisiSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, pid, releases, title, season=None, episode=None,
year=None):
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired, page_link)
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.pid = pid
self.releases = releases
self.title = title
@ -49,7 +50,8 @@ class PodnapisiSubtitle(Subtitle):
# episode
if isinstance(video, Episode):
# series
if video.series and sanitize(self.title) == sanitize(video.series):
if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
@ -66,7 +68,8 @@ class PodnapisiSubtitle(Subtitle):
# movie
elif isinstance(video, Movie):
# title
if video.title and sanitize(self.title) == sanitize(video.title):
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
if video.year and self.year == video.year:
@ -82,7 +85,11 @@ class PodnapisiProvider(Provider):
"""Podnapisi Provider."""
languages = ({Language('por', 'BR'), Language('srp', script='Latn')} |
{Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
server_url = 'http://podnapisi.net/subtitles/'
server_url = 'https://www.podnapisi.net/subtitles/'
subtitle_class = PodnapisiSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
@ -108,7 +115,9 @@ class PodnapisiProvider(Provider):
pids = set()
while True:
# query the server
xml = etree.fromstring(self.session.get(self.server_url + 'search/old', params=params, timeout=10).content)
r = self.session.get(self.server_url + 'search/old', params=params, timeout=10)
r.raise_for_status()
xml = etree.fromstring(r.content)
# exit if no results
if not int(xml.find('pagination/results').text):
@ -118,15 +127,19 @@ class PodnapisiProvider(Provider):
# loop over subtitles
for subtitle_xml in xml.findall('subtitle'):
# read xml elements
pid = subtitle_xml.find('pid').text
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
language = Language.fromietf(subtitle_xml.find('language').text)
hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
page_link = subtitle_xml.find('url').text
pid = subtitle_xml.find('pid').text
releases = []
if subtitle_xml.find('release').text:
for release in subtitle_xml.find('release').text.split():
release = re.sub(r'\.+$', '', release) # remove trailing dots
release = ''.join(filter(lambda x: ord(x) < 128, release)) # remove non-ascii characters
release = ''.join([x for x in release if ord(x) < 128]) # remove non-ascii characters
releases.append(release)
title = subtitle_xml.find('title').text
season = int(subtitle_xml.find('tvSeason').text)
@ -134,15 +147,11 @@ class PodnapisiProvider(Provider):
year = int(subtitle_xml.find('year').text)
if is_episode:
subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
else:
subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
year=year)
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
year=year)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
@ -159,11 +168,21 @@ class PodnapisiProvider(Provider):
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
return [s for l in languages for s in self.query(l, video.series, season=video.season,
episode=video.episode, year=video.year)]
elif isinstance(video, Movie):
return [s for l in languages for s in self.query(l, video.title, year=video.year)]
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download as a zip

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import json
import logging
import os
@ -42,6 +43,10 @@ class ShooterProvider(Provider):
"""Shooter Provider."""
languages = {Language(l) for l in ['eng', 'zho']}
server_url = 'https://www.shooter.cn/api/subapi.php'
subtitle_class = ShooterSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
@ -64,7 +69,7 @@ class ShooterProvider(Provider):
# parse the subtitles
results = json.loads(r.text)
subtitles = [ShooterSubtitle(language, hash, t['Link']) for s in results for t in s['Files']]
subtitles = [self.subtitle_class(language, hash, t['Link']) for s in results for t in s['Files']]
return subtitles

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import bisect
from collections import defaultdict
import io
@ -26,7 +27,7 @@ class SubsCenterSubtitle(Subtitle):
provider_name = 'subscenter'
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, subtitle_id, subtitle_key,
downloaded, releases):
subtitle_version, downloaded, releases):
super(SubsCenterSubtitle, self).__init__(language, hearing_impaired, page_link)
self.series = series
self.season = season
@ -34,6 +35,7 @@ class SubsCenterSubtitle(Subtitle):
self.title = title
self.subtitle_id = subtitle_id
self.subtitle_key = subtitle_key
self.subtitle_version = subtitle_version
self.downloaded = downloaded
self.releases = releases
@ -74,7 +76,8 @@ class SubsCenterSubtitle(Subtitle):
class SubsCenterProvider(Provider):
"""SubsCenter Provider."""
languages = {Language.fromalpha2(l) for l in ['he']}
server_url = 'http://www.subscenter.co/he/'
server_url = 'http://www.subscenter.org/he/'
subtitle_class = SubsCenterSubtitle
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
@ -189,6 +192,7 @@ class SubsCenterProvider(Provider):
hearing_impaired = bool(subtitle_item['hearing_impaired'])
subtitle_id = subtitle_item['id']
subtitle_key = subtitle_item['key']
subtitle_version = subtitle_item['h_version']
downloaded = subtitle_item['downloaded']
release = subtitle_item['subtitle_version']
@ -200,12 +204,13 @@ class SubsCenterProvider(Provider):
continue
# otherwise create it
subtitle = SubsCenterSubtitle(language, hearing_impaired, page_link, title, season, episode,
title, subtitle_id, subtitle_key, downloaded, [release])
subtitle = self.subtitle_class(language, hearing_impaired, page_link, title, season, episode,
title, subtitle_id, subtitle_key, subtitle_version, downloaded,
[release])
logger.debug('Found subtitle %r', subtitle)
subtitles[subtitle_id] = subtitle
return subtitles.values()
return list(subtitles.values())
def list_subtitles(self, video, languages):
season = episode = None
@ -221,15 +226,19 @@ class SubsCenterProvider(Provider):
def download_subtitle(self, subtitle):
# download
url = self.server_url + 'subtitle/download/{}/{}/'.format(subtitle.language.alpha2, subtitle.subtitle_id)
params = {'v': subtitle.releases[0], 'key': subtitle.subtitle_key}
params = {'v': subtitle.subtitle_version, 'key': subtitle.subtitle_key}
r = self.session.get(url, params=params, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
# open the zip
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
# remove some filenames from the namelist
namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
if len(namelist) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(namelist[0]))
try:
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
# remove some filenames from the namelist
namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
if len(namelist) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(namelist[0]))
except zipfile.BadZipfile:
# if no zip file was retrieved, daily downloads limit has exceeded
raise ProviderError('Daily limit exceeded')

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
from babelfish import Language, language_converters
@ -40,6 +41,10 @@ class TheSubDBProvider(Provider):
languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
required_hash = 'thesubdb'
server_url = 'http://api.thesubdb.com/'
subtitle_class = TheSubDBSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
@ -66,7 +71,7 @@ class TheSubDBProvider(Provider):
for language_code in r.text.split(','):
language = Language.fromthesubdb(language_code)
subtitle = TheSubDBSubtitle(language, hash)
subtitle = self.subtitle_class(language, hash)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import re
@ -47,7 +48,8 @@ class TVsubtitlesSubtitle(Subtitle):
matches = set()
# series
if video.series and sanitize(self.series) == sanitize(video.series):
if video.series and (sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# season
if video.season and self.season == video.season:
@ -80,6 +82,10 @@ class TVsubtitlesProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.tvsubtitles.net/'
subtitle_class = TVsubtitlesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
@ -158,13 +164,7 @@ class TVsubtitlesProvider(Provider):
return episode_ids
def query(self, series, season, episode, year=None):
# search the show id
show_id = self.search_show_id(series, year)
if show_id is None:
logger.error('No show id found for %r (%r)', series, {'year': year})
return []
def query(self, show_id, series, season, episode, year=None):
# get the episode ids
episode_ids = self.get_episode_ids(show_id, season)
if episode not in episode_ids:
@ -184,9 +184,9 @@ class TVsubtitlesProvider(Provider):
subtitle_id = int(row.parent['href'][10:-5])
page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
rip = row.find('p', title='rip').text.strip() or None
release = row.find('p', title='release').text.strip() or None
release = row.find('h5').text.strip() or None
subtitle = TVsubtitlesSubtitle(language, page_link, subtitle_id, series, season, episode, year, rip,
subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
release)
logger.debug('Found subtitle %s', subtitle)
subtitles.append(subtitle)
@ -194,7 +194,24 @@ class TVsubtitlesProvider(Provider):
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.series, video.season, video.episode, video.year) if s.language in languages]
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.search_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.episode, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download as a zip

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import os

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import operator

@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from datetime import datetime, timedelta
from functools import wraps
import logging
import re
import _strptime
import requests
from .. import __short_version__
@ -331,6 +332,7 @@ def refine(video, **kwargs):
# add series information
logger.debug('Found series %r', series)
video.series = matching_result['match']['series']
video.alternative_series.extend(series['aliases'])
video.year = matching_result['match']['year']
video.original_series = matching_result['match']['original_series']
video.series_tvdb_id = series['id']

@ -28,6 +28,7 @@ Available matches:
"""
from __future__ import division, print_function
from __future__ import absolute_import
import logging
from .video import Episode, Movie
@ -44,7 +45,7 @@ movie_scores = {'hash': 119, 'title': 60, 'year': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: Equivalent release groups
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'})
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'}, {'AVS', 'SVA'})
def get_equivalent_release_groups(release_group):

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import codecs
import logging
import os
@ -208,8 +209,14 @@ def guess_matches(video, guess, partial=False):
if video.season and 'season' in guess and guess['season'] == video.season:
matches.add('season')
# episode
if video.episode and 'episode' in guess and guess['episode'] == video.episode:
matches.add('episode')
# Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
# Most providers only support single-ep, so make sure it contains only 1 episode
# In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
if video.episode and 'episode' in guess:
episode_guess = guess['episode']
episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
if episode == video.episode:
matches.add('episode')
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
@ -252,4 +259,4 @@ def fix_line_ending(content):
:rtype: bytes
"""
return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
return content.replace(b'\r\n', b'\n')

@ -1,88 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import time
class Component(object):
"""Base class for cue text.
:param list components: sub-components of this one.
"""
tag_name = 'Component'
def __init__(self, components=None):
if components is None:
self.components = []
elif isinstance(components, list):
self.components = components
else:
self.components = [components]
def __iter__(self):
return iter(self.components)
def __len__(self):
return len(self.components)
def __str__(self):
return ''.join(str(c) for c in self.components)
def __repr__(self):
return '<{name}>{components}</{name}>'.format(name=self.tag_name,
components=''.join(repr(c) for c in self.components))
class Bold(Component):
"""Bold :class:`Component`."""
tag_name = 'b'
class Italic(Component):
"""Italic :class:`Component`."""
tag_name = 'i'
class Underline(Component):
"""Underline :class:`Component`."""
tag_name = 'u'
class Strikethrough(Component):
"""Strikethrough :class:`Component`."""
tag_name = 's'
class Font(Component):
"""Font :class:`Component`."""
tag_name = 'font'
def __init__(self, color, *args, **kwargs):
super(Font, self).__init__(*args, **kwargs)
self.color = color
def __repr__(self):
return '<{name} "{color}">{components}</{name}>'.format(name=self.tag_name, color=self.color,
components=''.join(repr(c) for c in self.components))
class Cue(object):
"""A single subtitle cue with timings and components.
:param datetime.time start_time: start time.
:param datetime.time end_time: end time.
:param list components: cue components.
"""
def __init__(self, start_time, end_time, components):
self.start_time = start_time
self.end_time = end_time
self.components = components
def __repr__(self):
return '<Cue [{start_time}->{end_time}] "{text}">'.format(start_time=self.start_time, end_time=self.end_time,
text=''.join(repr(c) for c in self.components))
if __name__ == '__main__':
cue = Cue(time(), time(1), [Bold('Hello')])
print repr(cue)

@ -1,82 +0,0 @@
# -*- coding: utf-8 -*-
import re
from datetime import time
from subliminal.subtitles import Cue
index_re = re.compile(r'(?P<index>\d+)')
timing_re = re.compile(r'(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2}),(?P<milliseconds>\d{3})')
class SubripReadError(Exception):
pass
class SubripReadIndexError(SubripReadError):
pass
class SubripReader(object):
INDEX = 1
TIMINGS = 2
TEXT = 3
def __init__(self):
self.state = self.INDEX
def read(self, content):
pass
def read_line(self, line):
if self.state == self.INDEX:
if index_re.match(line):
raise SubripReadIndexError
def read_cue(stream):
"""Attempt to parse a complete Cue from the stream"""
# skip blank lines
line = ''
while not line:
line = stream.readline()
# parse index
if not index_re.match(line):
raise SubripReadIndexError
# parse timings
line = stream.readline()
if '-->' not in line:
raise SubripReadError
timings = line.split('-->')
if not len(timings):
raise SubripReadError
# parse start time
match = timing_re.match(timings[0].strip())
if not match:
raise SubripReadError
start_time = time(**match.groupdict())
# parse end time
match = timing_re.match(timings[0].strip())
if not match:
raise SubripReadError
end_time = time(**match.groupdict())
class SubripSubtitle(object):
def __init__(self):
self.cues = []
if __name__ == '__main__':
print read_cue('toto')
i = 0
for x in read_cue('toto'):
print x
if i > 10:
break
i += 1

@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from datetime import datetime
import hashlib
import os
import re
import struct
from six.moves import range
def hash_opensubtitles(video_path):

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import absolute_import
from datetime import datetime, timedelta
import logging
import os
@ -13,9 +14,9 @@ VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv', '.omf',
'.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
'.vob', '.vro', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
'.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
class Video(object):
@ -123,11 +124,12 @@ class Episode(Video):
:param int year: year of the series.
:param bool original_series: whether the series is the first with this name.
:param int tvdb_id: TVDB id of the episode.
:param list alternative_series: alternative names of the series
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
series_tvdb_id=None, series_imdb_id=None, **kwargs):
series_tvdb_id=None, series_imdb_id=None, alternative_series=None, **kwargs):
super(Episode, self).__init__(name, **kwargs)
#: Series of the episode
@ -157,6 +159,9 @@ class Episode(Video):
#: IMDb id of the series
self.series_imdb_id = series_imdb_id
#: Alternative names of the series
self.alternative_series = alternative_series or []
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'episode':
@ -165,7 +170,13 @@ class Episode(Video):
if 'title' not in guess or 'episode' not in guess:
raise ValueError('Insufficient data to process the guess')
return cls(name, guess['title'], guess.get('season', 1), guess['episode'], title=guess.get('episode_title'),
# Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
# Most providers only support single-ep, so make sure it contains only 1 episode
# In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
episode_guess = guess.get('episode')
episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
return cls(name, guess['title'], guess.get('season', 1), episode, title=guess.get('episode_title'),
year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess,
release_group=guess.get('release_group'), resolution=guess.get('screen_size'),
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'))
@ -186,10 +197,11 @@ class Movie(Video):
:param str title: title of the movie.
:param int year: year of the movie.
:param list alternative_titles: alternative titles of the movie
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, title, year=None, **kwargs):
def __init__(self, name, title, year=None, alternative_titles=None, **kwargs):
super(Movie, self).__init__(name, **kwargs)
#: Title of the movie
@ -198,6 +210,9 @@ class Movie(Video):
#: Year of the movie
self.year = year
#: Alternative titles of the movie
self.alternative_titles = alternative_titles or []
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'movie':
@ -206,9 +221,13 @@ class Movie(Video):
if 'title' not in guess:
raise ValueError('Insufficient data to process the guess')
alternative_titles = []
if 'alternative_title' in guess:
alternative_titles.append(u"%s %s" % (guess['title'], guess['alternative_title']))
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
audio_codec=guess.get('audio_codec'), year=guess.get('year'))
audio_codec=guess.get('audio_codec'), year=guess.get('year'), alternative_titles=alternative_titles)
@classmethod
def fromname(cls, name):

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import subliminal
# patch subliminal's subtitle and provider base
@ -12,8 +13,8 @@ from .core import scan_video, search_external_subtitles, list_all_subtitles, sav
download_best_subtitles
from .score import compute_score
from .video import Video
import extensions
import http
from . import extensions
from . import http
# patch subliminal's core functions
subliminal.scan_video = subliminal.core.scan_video = scan_video

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter
from subliminal.exceptions import ConfigurationError

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter, language_converters

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
from babelfish import LanguageReverseConverter
from subliminal.exceptions import ConfigurationError
from subzero.language import Language

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from babelfish import LanguageReverseConverter, language_converters

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import logging
from babelfish import LanguageReverseConverter

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import codecs
import json
import re
@ -10,7 +11,7 @@ import time
import operator
import itertools
from http.client import ResponseNotReady
from six.moves.http_client import ResponseNotReady
import rarfile
import requests
@ -21,16 +22,18 @@ from babelfish import LanguageReverseError
from guessit.jsonutils import GuessitEncoder
from subliminal import ProviderError, refiner_manager
from subliminal_patch.extensions import provider_registry
from .extensions import provider_registry
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded
from subliminal.score import compute_score as default_compute_score
from subliminal.utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb
from subliminal.video import VIDEO_EXTENSIONS, Video, Episode, Movie
from subliminal.core import guessit, ProviderPool, io, is_windows_special_path, \
ThreadPoolExecutor, check_video
from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ServiceUnavailable, DownloadLimitExceeded
from subliminal_patch.exceptions import TooManyRequests, APIThrottled
from subzero.language import Language
from scandir import scandir, scandir_generic as _scandir_generic
import six
logger = logging.getLogger(__name__)
@ -644,7 +647,7 @@ def search_external_subtitles(path, languages=None, only_one=False):
for folder_or_subfolder in [video_path] + CUSTOM_PATHS:
# folder_or_subfolder may be a relative path or an absolute one
try:
abspath = unicode(os.path.abspath(
abspath = six.text_type(os.path.abspath(
os.path.join(*[video_path if not os.path.isabs(folder_or_subfolder) else "", folder_or_subfolder,
video_filename])))
except Exception as e:

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
from subliminal import ProviderError
@ -9,13 +10,3 @@ class TooManyRequests(ProviderError):
class APIThrottled(ProviderError):
pass
class ServiceUnavailable(ProviderError):
"""Exception raised when status is '503 Service Unavailable'."""
pass
class DownloadLimitExceeded(ProviderError):
"""Exception raised by providers when download limit is exceeded."""
pass

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
from collections import OrderedDict
import subliminal
@ -43,7 +44,7 @@ class ProviderRegistry(object):
self.providers[name] = cls
def names(self):
return self.providers.keys()
return list(self.providers.keys())
provider_registry = ProviderRegistry()

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import json
from collections import OrderedDict
@ -8,7 +9,7 @@ import os
import socket
import logging
import requests
import xmlrpc.client
import six.moves.xmlrpc_client
import dns.resolver
import ipaddress
import re
@ -21,6 +22,7 @@ from dogpile.cache.api import NO_VALUE
from subliminal.cache import region
from subliminal_patch.pitcher import pitchers
from cloudscraper import CloudScraper
import six
try:
import brotli
@ -28,10 +30,12 @@ except:
pass
try:
from urlparse import urlparse
from six.moves.urllib.parse import urlparse
except ImportError:
from urllib.parse import urlparse
from subzero.lib.io import get_viable_encoding
logger = logging.getLogger(__name__)
pem_file = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", certifi.where()))
try:
@ -148,7 +152,7 @@ class CFSession(CloudScraper):
if cf_data is not NO_VALUE:
cf_cookies, hdrs = cf_data
logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
for cookie, value in cf_cookies.iteritems():
for cookie, value in six.iteritems(cf_cookies):
self.cookies.set(cookie, value, domain=domain)
self.headers = hdrs
@ -179,10 +183,10 @@ class CFSession(CloudScraper):
"Unable to find Cloudflare cookies. Does the site actually have "
"Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
return (OrderedDict(filter(lambda x: x[1], [
return (OrderedDict([x for x in [
("__cfduid", self.cookies.get("__cfduid", "", domain=cookie_domain)),
("cf_clearance", self.cookies.get("cf_clearance", "", domain=cookie_domain))
])),
] if x[1]]),
self.headers
)
@ -229,7 +233,7 @@ class RetryingCFSession(RetryingSession, CFSession):
pass
class SubZeroRequestsTransport(xmlrpc.client.SafeTransport):
class SubZeroRequestsTransport(six.moves.xmlrpc_client.SafeTransport):
"""
Drop in Transport for xmlrpclib that uses Requests instead of httplib
@ -252,7 +256,7 @@ class SubZeroRequestsTransport(xmlrpc.client.SafeTransport):
"https": proxy
}
xmlrpclib.SafeTransport.__init__(self, *args, **kwargs)
six.moves.xmlrpc_client.SafeTransport.__init__(self, *args, **kwargs)
def request(self, host, handler, request_body, verbose=0):
"""
@ -315,7 +319,7 @@ def patch_create_connection():
host, port = address
try:
ipaddress.ip_address(unicode(host))
ipaddress.ip_address(six.text_type(host))
except (ipaddress.AddressValueError, ValueError):
__custom_resolver_ips = os.environ.get("dns_resolvers", None)

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
from subliminal.converters.addic7ed import Addic7edConverter
from babelfish.converters.opensubtitles import OpenSubtitlesConverter

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import os
import time
import logging
@ -8,7 +9,9 @@ from subliminal.cache import region
from dogpile.cache.api import NO_VALUE
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
Proxy
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TIMEOUT
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
import six
from six.moves import range
logger = logging.getLogger(__name__)
@ -167,7 +170,7 @@ class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
self.user_agent = kwargs.pop("user_agent")
cookies = kwargs.pop("cookies", {})
if isinstance(cookies, dict):
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in six.iteritems(cookies)])
super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
@ -185,7 +188,7 @@ class DBCProxyLessPitcher(Pitcher):
password = None
def __init__(self, website_name, website_url, website_key,
timeout=DEFAULT_TIMEOUT, tries=3, *args, **kwargs):
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
self.username, self.password = self.client_key.split(":", 1)

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import importlib
import os
import subliminal
@ -10,6 +11,7 @@ from subliminal_patch.http import RetryingSession
from subliminal_patch.subtitle import Subtitle, guess_matches
from subzero.lib.io import get_viable_encoding
import six
class Provider(_Provider):
@ -20,7 +22,7 @@ class Provider(_Provider):
# register providers
# fixme: this is bad
for name in os.listdir(os.path.dirname(unicode(__file__, get_viable_encoding()))):
for name in os.listdir(os.path.dirname(six.text_type(__file__, get_viable_encoding()))):
if name in ("__init__.py", "mixins.py", "utils.py") or not name.endswith(".py"):
continue

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import re
import datetime

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import os
import io

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import json
import logging
import os
@ -20,7 +21,7 @@ logger = logging.getLogger(__name__)
language_converters.register('assrt = subliminal_patch.converters.assrt:AssrtConverter')
server_url = 'https://api.assrt.net/v1'
supported_languages = language_converters['assrt'].to_assrt.keys()
supported_languages = list(language_converters['assrt'].to_assrt.keys())
class AssrtSubtitle(Subtitle):

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import json
import logging
import os

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import os

@ -1,5 +1,6 @@
# coding: utf-8
from __future__ import absolute_import
import io
import six
import logging

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import rarfile
import os

@ -1,12 +1,13 @@
# coding=utf-8
from __future__ import absolute_import
import re
import time
import logging
import traceback
import types
import os
from httplib import ResponseNotReady
from six.moves.http_client import ResponseNotReady
from guessit import guessit
from subliminal import ProviderError
@ -107,7 +108,7 @@ class ProviderSubtitleArchiveMixin(object):
if "format" in subtitle.matches:
format_matches = False
if isinstance(subtitle.releases, types.ListType):
if isinstance(subtitle.releases, list):
releases = ",".join(subtitle.releases).lower()
else:
releases = subtitle.releases.lower()
@ -117,7 +118,7 @@ class ProviderSubtitleArchiveMixin(object):
else:
formats = guess["format"]
if not isinstance(formats, types.ListType):
if not isinstance(formats, list):
formats = [formats]
for f in formats:

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import logging
from subliminal.providers.napiprojekt import NapiProjektProvider as _NapiProjektProvider, \

@ -1,3 +1,4 @@
from __future__ import absolute_import
import logging
import os
from io import BytesIO

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
from random import randint

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import base64
import logging
import os
@ -13,7 +14,7 @@ from subliminal.exceptions import ConfigurationError, ServiceUnavailable
from subliminal.providers.opensubtitles import OpenSubtitlesProvider as _OpenSubtitlesProvider,\
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, Movie, ServerProxy, Unauthorized, NoSession, \
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError
from mixins import ProviderRetryMixin
from .mixins import ProviderRetryMixin
from subliminal.subtitle import fix_line_ending
from subliminal_patch.http import SubZeroRequestsTransport
from subliminal_patch.utils import sanitize

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import re
import io

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
from subliminal.providers.shooter import ShooterProvider as _ShooterProvider, ShooterSubtitle as _ShooterSubtitle

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import os
@ -14,6 +15,7 @@ from subliminal import __short_version__
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending,guess_matches
from subliminal.video import Episode, Movie
from six.moves import range
logger = logging.getLogger(__name__)

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import os

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import os

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import io
import logging
import os
@ -11,8 +12,8 @@ import requests
import inflect
import re
import json
import HTMLParser
import urlparse
import six.moves.html_parser
import six.moves.urllib.parse
from zipfile import ZipFile
from babelfish import language_converters
@ -29,6 +30,7 @@ from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal_patch.converters.subscene import language_ids, supported_languages
from subscene_api.subscene import search, Subtitle as APISubtitle, SITE_DOMAIN
from subzero.language import Language
import six
p = inflect.engine()
@ -157,9 +159,9 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content)
if match:
h = HTMLParser.HTMLParser()
h = six.moves.html_parser.HTMLParser()
data = json.loads(h.unescape(match.group(1)))
login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"])
login_url = six.moves.urllib.parse.urljoin(data["siteUrl"], data["loginUrl"])
time.sleep(1.0)
r = self.session.post(login_url,
@ -187,7 +189,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
else:
cj = self.session.cookies.copy()
store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username")
for cn in self.session.cookies.iterkeys():
for cn in six.iterkeys(self.session.cookies):
if cn not in store_cks:
del cj[cn]
@ -266,7 +268,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
for s in film.subtitles:
try:
subtitle = SubsceneSubtitle.from_api(s)
except NotImplementedError, e:
except NotImplementedError as e:
logger.info(e)
continue
subtitle.asked_for_release_group = video.release_group

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
from subliminal.providers.subscenter import SubsCenterProvider as _SubsCenterProvider, \
SubsCenterSubtitle as _SubsCenterSubtitle
from subzero.language import Language

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import re
import io

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import re
import io

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
from random import randint

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import json
import logging

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import io
import six
import os

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import io
import logging
import math
@ -30,6 +31,7 @@ from subzero.language import Language
from random import randint
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
from six.moves import map
# parsing regex definitions
title_re = re.compile(r'(?P<title>(?:.+(?= [Aa][Kk][Aa] ))|.+)(?:(?:.+)(?P<altitle>(?<= [Aa][Kk][Aa] ).+))?')
@ -155,7 +157,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
# handle possible duplicate use of Serbian Latin
if "sr" in lang_strings and "sr-Latn" in lang_strings:
logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages')
used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages)
used_languages = [l for l in used_languages if l != Language.fromietf('sr-Latn')]
logger.info('Filtered language list %r', used_languages)
# convert list of languages into search string

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import os

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import re

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import io
import logging
import os

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import os

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import types
import os
@ -10,6 +11,7 @@ from requests.compat import urljoin, quote
from subliminal import Episode, Movie, region
from subliminal_patch.core import remove_crap_from_fn
from subliminal_patch.http import CertifiSession
import six
logger = logging.getLogger(__name__)
@ -52,11 +54,11 @@ class DroneAPIClient(object):
:return:
"""
out = {}
for key, value in params.iteritems():
if not isinstance(value, types.StringTypes):
for key, value in six.iteritems(params):
if not isinstance(value, (str,)):
value = str(value)
elif isinstance(value, unicode):
elif isinstance(value, six.text_type):
value = value.encode("utf-8")
key = key.split('_')[0] + ''.join(x.capitalize() for x in key.split('_')[1:])

@ -1,10 +1,11 @@
# coding=utf-8
from __future__ import absolute_import
import sys
import os
import logging
import codecs
from common import update_video
from .common import update_video
logger = logging.getLogger(__name__)

@ -1,8 +1,9 @@
# coding=utf-8
from __future__ import absolute_import
import logging
from libfilebot import get_filebot_attrs
from common import update_video
from .common import update_video
logger = logging.getLogger(__name__)

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import os

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
import os
import subliminal
import base64

@ -1,7 +1,8 @@
# coding=utf-8
from __future__ import absolute_import
import os
from common import update_video
from .common import update_video
def refine(video, **kwargs):

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import datetime
from subliminal.refiners.tvdb import Episode, logger, search_series, series_re, sanitize, get_series, \

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import types
from subliminal_patch.http import TimeoutSession

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging
from subliminal.video import Episode, Movie

@ -1,6 +1,7 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import traceback
@ -362,7 +363,7 @@ def guess_matches(video, guess, partial=False):
# series
if video.series and 'title' in guess:
titles = guess["title"]
if not isinstance(titles, types.ListType):
if not isinstance(titles, list):
titles = [titles]
for title in titles:
@ -401,7 +402,7 @@ def guess_matches(video, guess, partial=False):
# release_group
if 'release_group' in guess:
release_groups = guess["release_group"]
if not isinstance(release_groups, types.ListType):
if not isinstance(release_groups, list):
release_groups = [release_groups]
if video.release_group:
@ -418,7 +419,7 @@ def guess_matches(video, guess, partial=False):
# format
if 'format' in guess:
formats = guess["format"]
if not isinstance(formats, types.ListType):
if not isinstance(formats, list):
formats = [formats]
if video.format:

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import re

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import os
from subliminal.video import Video as Video_

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import struct
import binascii

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
from dogpile.cache.api import CacheBackend, NO_VALUE
from fcache.cache import FileCache

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import datetime
import logging
import traceback
@ -7,7 +8,8 @@ import types
from subzero.language import Language
from constants import mode_map
from .constants import mode_map
import six
logger = logging.getLogger(__name__)
@ -54,10 +56,10 @@ class SubtitleHistoryItem(object):
return mode_map.get(self.mode, "Unknown")
def __repr__(self):
return unicode(self)
return six.text_type(self)
def __unicode__(self):
return u"%s (Score: %s)" % (unicode(self.item_title), self.score)
return u"%s (Score: %s)" % (six.text_type(self.item_title), self.score)
def __str__(self):
return str(self.rating_key)
@ -108,7 +110,7 @@ class SubtitleHistory(object):
items = []
logger.error("Failed to load history storage: %s" % traceback.format_exc())
if not isinstance(items, types.ListType):
if not isinstance(items, list):
items = []
else:
items = items[:]

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import datetime
import threading

@ -1,8 +1,10 @@
# coding=utf-8
from __future__ import absolute_import
import types
from babelfish.exceptions import LanguageError
from babelfish import Language as Language_, basestr
from six.moves import zip
repl_map = {
"dk": "da",
@ -54,7 +56,7 @@ def wrap_forced(f):
args = args[1:]
s = args.pop(0)
forced = None
if isinstance(s, types.StringTypes):
if isinstance(s, (str,)):
base, forced = s.split(":") if ":" in s else (s, False)
else:
base = s
@ -108,7 +110,7 @@ class Language(Language_):
state = instance.__getstate__()
attrs = ("country", "script", "forced")
language = state[0]
kwa = dict(zip(attrs, state[1:]))
kwa = dict(list(zip(attrs, state[1:])))
kwa.update(replkw)
return cls(language, **kwa)

@ -1,8 +1,3 @@
from .dict import *
from .geezip import *
from .httpfake import *
from .io import *
from .json import *
from .rar import *
from .which import *
from __future__ import absolute_import
from . import dict, geezip, httpfake, io, json, rar, which

@ -1,3 +1,4 @@
import six
# coding=utf-8
@ -19,11 +20,10 @@ class DictProxy(object):
return getattr(super(DictProxy, self), name)
def __setattr__(self, name, value):
if not self.__dict__.has_key(
'_DictProxy__initialized'): # this test allows attributes to be set in the __init__ method
if '_DictProxy__initialized' not in self.__dict__: # this test allows attributes to be set in the __init__ method
return object.__setattr__(self, name, value)
elif self.__dict__.has_key(name): # any normal attributes are handled normally
elif name in self.__dict__: # any normal attributes are handled normally
object.__setattr__(self, name, value)
else:
@ -56,7 +56,7 @@ class DictProxy(object):
return str(self.Dict[self.store])
def __len__(self):
return len(self.Dict[self.store].keys())
return len(list(self.Dict[self.store].keys()))
def __delitem__(self, key):
del self.Dict[self.store][key]
@ -81,16 +81,16 @@ class DictProxy(object):
return self.Dict[self.store].update(*args, **kwargs)
def keys(self):
return self.Dict[self.store].keys()
return list(self.Dict[self.store].keys())
def values(self):
return self.Dict[self.store].values()
return list(self.Dict[self.store].values())
def items(self):
return self.Dict[self.store].items()
return list(self.Dict[self.store].items())
def __unicode__(self):
return unicode(repr(self.Dict[self.store]))
return six.text_type(repr(self.Dict[self.store]))
def setup_defaults(self):
raise NotImplementedError
@ -104,14 +104,14 @@ class Dicked(object):
def __init__(self, **entries):
self._entries = entries or None
for key, value in entries.iteritems():
for key, value in six.iteritems(entries):
self.__dict__[key] = (Dicked(**value) if isinstance(value, dict) else value)
def __repr__(self):
return str(self)
def __unicode__(self):
return unicode(self.__digged__)
return six.text_type(self.__digged__)
def __str__(self):
return str(self.__digged__)
@ -143,7 +143,7 @@ class Dicked(object):
@property
def __digged__(self):
return {key: value for key, value in self.__dict__.iteritems() if key != "_entries"}
return {key: value for key, value in six.iteritems(self.__dict__) if key != "_entries"}
def __len__(self):
return len(self.__digged__)

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import gzip
from zlib import Z_FINISH

@ -1,3 +1,4 @@
import six
# coding=utf-8
@ -37,7 +38,7 @@ class PlexPyNativeResponseProxy(object):
return str(self.data)
def __unicode__(self):
return unicode(self.data)
return six.text_type(self.data)
def __repr__(self):
return repr(self.data)

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import os
import sys

@ -1,4 +1,5 @@
# coding=utf-8
from __future__ import absolute_import
from gzip import GzipFile
from json_tricks import TricksEncoder

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import rarfile

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save