parent
164230c7cd
commit
d910db7965
@ -0,0 +1,178 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import io
|
||||
import logging
|
||||
from random import randint
|
||||
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
from rarfile import RarFile, is_rarfile
|
||||
|
||||
from guessit import guessit
|
||||
from requests import Session
|
||||
from bs4 import NavigableString
|
||||
from ftfy import fix_text
|
||||
from subzero.language import Language
|
||||
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subliminal_patch.score import framerate_equal
|
||||
from subliminal.exceptions import ProviderError
|
||||
from subliminal.providers import ParserBeautifulSoup
|
||||
from subliminal.subtitle import sanitize, guess_matches
|
||||
from subliminal.video import Movie
|
||||
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NekurSubtitle(Subtitle):
|
||||
"""Nekur Subtitle."""
|
||||
provider_name = 'nekur'
|
||||
|
||||
def __init__(self, language, page_link, download_link, title, year, imdb_id, fps, notes):
|
||||
super(NekurSubtitle, self).__init__(language, page_link=page_link)
|
||||
self.download_link = download_link
|
||||
self.title = title
|
||||
self.year = year
|
||||
self.imdb_id = imdb_id
|
||||
self.fps = fps
|
||||
self.notes = notes
|
||||
self.matches = None
|
||||
# self.encoding = 'utf-16'
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.download_link
|
||||
|
||||
def get_matches(self, video):
|
||||
matches = set()
|
||||
|
||||
if isinstance(video, Movie):
|
||||
# title
|
||||
if video.title and sanitize(self.title) == sanitize(video.title):
|
||||
matches.add('title')
|
||||
# year
|
||||
if video.year and self.year == video.year:
|
||||
matches.add('year')
|
||||
# imdb id
|
||||
if video.imdb_id and self.imdb_id == video.imdb_id:
|
||||
matches.add('imdb_id')
|
||||
# fps
|
||||
if video.fps and self.fps and not framerate_equal(video.fps, self.fps):
|
||||
logger.warning("nekur: Wrong FPS (expected: %s, got: %s)", video.fps, self.fps)
|
||||
# guess additional info from notes
|
||||
matches |= guess_matches(video, guessit(self.notes, {'type': 'movie'}), partial=True)
|
||||
|
||||
self.matches = matches
|
||||
return matches
|
||||
|
||||
|
||||
class NekurProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
"""Nekur Provider."""
|
||||
subtitle_class = NekurSubtitle
|
||||
languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']}
|
||||
server_url = 'http://subtitri.nekur.net/'
|
||||
search_url = server_url + 'modules/Subtitles.php'
|
||||
|
||||
def __init__(self):
|
||||
self.session = None
|
||||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
|
||||
self.session.headers['Referer'] = self.server_url
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def query(self, title):
|
||||
subtitles = []
|
||||
|
||||
data = {
|
||||
'ajax': '1',
|
||||
'sSearch': title,
|
||||
}
|
||||
|
||||
r = self.session.post(self.search_url, data=data, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
if not r.content:
|
||||
logger.debug('No data returned from provider')
|
||||
return []
|
||||
|
||||
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
|
||||
|
||||
# loop over subtitle cells
|
||||
rows = soup.select('tbody > tr')
|
||||
for row in rows:
|
||||
# title
|
||||
title_anchor_el = row.select_one('.title > a')
|
||||
title_inner_text = [element for element in title_anchor_el if isinstance(element, NavigableString)]
|
||||
title = title_inner_text[0].strip()
|
||||
|
||||
# year
|
||||
year = row.select_one('.year').text.strip('()')
|
||||
|
||||
# download link
|
||||
href = title_anchor_el.get('href')
|
||||
download_link = self.server_url + href
|
||||
|
||||
# imdb id
|
||||
imdb_td = row.select_one('td:nth-of-type(4)')
|
||||
imdb_link = imdb_td.select_one('a').get('href')
|
||||
imdb_id = imdb_link.split('/')[-2]
|
||||
|
||||
# fps
|
||||
fps = row.select_one('.fps').text.strip()
|
||||
|
||||
# additional notes
|
||||
notes = row.select_one('.notes').text.strip()
|
||||
|
||||
# page link = archive link (there is no seperate subtitle page link)
|
||||
page_link = 'http://subtitri.nekur.net/filmu-subtitri/'
|
||||
|
||||
# create/add the subitle
|
||||
subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id, fps, notes)
|
||||
logger.debug('nekur: Found subtitle %r', subtitle)
|
||||
subtitles.append(subtitle)
|
||||
|
||||
return subtitles
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
if isinstance(video, Movie):
|
||||
titles = [video.title] + video.alternative_titles
|
||||
else:
|
||||
titles = []
|
||||
|
||||
subtitles = []
|
||||
# query for subtitles
|
||||
for title in titles:
|
||||
if isinstance(video, Movie):
|
||||
subtitles += [s for s in self.query(title) if s.language in languages]
|
||||
|
||||
return subtitles
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
if isinstance(subtitle, NekurSubtitle):
|
||||
# download the subtitle
|
||||
r = self.session.get(subtitle.download_link, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
# open the archive
|
||||
archive_stream = io.BytesIO(r.content)
|
||||
if is_rarfile(archive_stream):
|
||||
archive = RarFile(archive_stream)
|
||||
elif is_zipfile(archive_stream):
|
||||
archive = ZipFile(archive_stream)
|
||||
else:
|
||||
subtitle.content = r.content
|
||||
if subtitle.is_valid():
|
||||
return
|
||||
subtitle.content = None
|
||||
|
||||
raise ProviderError('Unidentified archive type')
|
||||
|
||||
subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
|
||||
# fix content encoding (utf-16 encoded by default)
|
||||
fixed_subtitle_content = fix_text(subtitle_content.decode('utf-16'), {'uncurl_quotes': False, 'fix_character_width': False}).encode(encoding='utf-8')
|
||||
subtitle.content = fixed_subtitle_content
|
@ -0,0 +1,163 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import io
|
||||
import logging
|
||||
from random import randint
|
||||
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
from rarfile import RarFile, is_rarfile
|
||||
|
||||
from requests import Session
|
||||
from ftfy import fix_text
|
||||
from subzero.language import Language
|
||||
|
||||
from subliminal_patch.providers import Provider
|
||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||
from subliminal_patch.subtitle import Subtitle
|
||||
from subliminal.exceptions import ProviderError
|
||||
from subliminal.providers import ParserBeautifulSoup
|
||||
from subliminal.subtitle import sanitize
|
||||
from subliminal.video import Movie
|
||||
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitriIdSubtitle(Subtitle):
|
||||
"""subtitri.id.lv Subtitle."""
|
||||
provider_name = 'subtitriid'
|
||||
|
||||
def __init__(self, language, page_link, download_link, title, year, imdb_id):
|
||||
super(SubtitriIdSubtitle, self).__init__(language, page_link=page_link)
|
||||
self.download_link = download_link
|
||||
self.title = title
|
||||
self.year = year
|
||||
self.imdb_id = imdb_id
|
||||
self.matches = None
|
||||
# self.encoding = 'utf-16'
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return self.download_link
|
||||
|
||||
def get_matches(self, video):
|
||||
matches = set()
|
||||
if isinstance(video, Movie):
|
||||
# title
|
||||
if video.title and sanitize(self.title) == sanitize(video.title):
|
||||
matches.add('title')
|
||||
# year
|
||||
if video.year and self.year == video.year:
|
||||
matches.add('year')
|
||||
# imdb id
|
||||
if video.imdb_id and self.imdb_id == video.imdb_id:
|
||||
matches.add('imdb_id')
|
||||
|
||||
self.matches = matches
|
||||
return matches
|
||||
|
||||
|
||||
class SubtitriIdProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||
"""subtitri.id.lv Provider."""
|
||||
subtitle_class = SubtitriIdSubtitle
|
||||
languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']}
|
||||
server_url = 'http://subtitri.id.lv'
|
||||
search_url = server_url + '/search/'
|
||||
|
||||
def __init__(self):
|
||||
self.session = None
|
||||
|
||||
def initialize(self):
|
||||
self.session = Session()
|
||||
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
|
||||
self.session.headers['Referer'] = self.server_url
|
||||
|
||||
def terminate(self):
|
||||
self.session.close()
|
||||
|
||||
def query(self, title):
|
||||
subtitles = []
|
||||
|
||||
r = self.session.get(self.search_url, params = {'q': title}, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
if not r.content:
|
||||
logger.debug('No data returned from provider')
|
||||
return []
|
||||
|
||||
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
|
||||
|
||||
# loop over subtitle cells
|
||||
rows = soup.select('.eBlock')
|
||||
for row in rows:
|
||||
result_anchor_el = row.select_one('.eTitle > a')
|
||||
|
||||
# page link
|
||||
page_link = result_anchor_el.get('href')
|
||||
|
||||
# fetch/parse additional info
|
||||
r = self.session.get(page_link, timeout=10)
|
||||
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
|
||||
|
||||
# title
|
||||
movie_titles_string = soup.select_one('.main-header').text.strip()
|
||||
movie_titles_list = movie_titles_string.split(' / ')
|
||||
title = movie_titles_list[-1]
|
||||
# # TODO alternate titles(?)
|
||||
# alternate_titles = movie_title_list.remove(title)
|
||||
|
||||
# year
|
||||
year = soup.select_one('#film-page-year').text.strip()
|
||||
|
||||
# imdb id
|
||||
imdb_link = soup.select_one('#actors-page > a').get('href')
|
||||
imdb_id = imdb_link.split('/')[-2]
|
||||
|
||||
# download link
|
||||
href = soup.select_one('.hvr').get('href')
|
||||
download_link = self.server_url + href
|
||||
|
||||
# create/add the subitle
|
||||
subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id)
|
||||
logger.debug('subtitri.id.lv: Found subtitle %r', subtitle)
|
||||
subtitles.append(subtitle)
|
||||
|
||||
return subtitles
|
||||
|
||||
def list_subtitles(self, video, languages):
|
||||
if isinstance(video, Movie):
|
||||
titles = [video.title] + video.alternative_titles
|
||||
else:
|
||||
titles = []
|
||||
|
||||
subtitles = []
|
||||
# query for subtitles
|
||||
for title in titles:
|
||||
if isinstance(video, Movie):
|
||||
subtitles += [s for s in self.query(title) if s.language in languages]
|
||||
|
||||
return subtitles
|
||||
|
||||
def download_subtitle(self, subtitle):
|
||||
if isinstance(subtitle, SubtitriIdSubtitle):
|
||||
# download the subtitle
|
||||
r = self.session.get(subtitle.download_link, timeout=10)
|
||||
r.raise_for_status()
|
||||
|
||||
# open the archive
|
||||
archive_stream = io.BytesIO(r.content)
|
||||
if is_rarfile(archive_stream):
|
||||
archive = RarFile(archive_stream)
|
||||
elif is_zipfile(archive_stream):
|
||||
archive = ZipFile(archive_stream)
|
||||
else:
|
||||
subtitle.content = r.content
|
||||
if subtitle.is_valid():
|
||||
return
|
||||
subtitle.content = None
|
||||
|
||||
raise ProviderError('Unidentified archive type')
|
||||
|
||||
subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
|
||||
# fix content encoding (utf-16 encoded by default)
|
||||
fixed_subtitle_content = fix_text(subtitle_content.decode('utf-16'), {'uncurl_quotes': False, 'fix_character_width': False}).encode(encoding='utf-8')
|
||||
subtitle.content = fixed_subtitle_content
|
Loading…
Reference in new issue