add subtitri.nekur.net and subtitri.id.lv subtitle providers

6 years ago · d910db7965
parent 164230c7cd
commit d910db7965
3 changed files with 385 additions and 0 deletions
--- a/libs/subliminal_patch/providers/nekur.py
+++ b/libs/subliminal_patch/providers/nekur.py
@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+import io
+import logging
+from random import randint
+
+from zipfile import ZipFile, is_zipfile
+from rarfile import RarFile, is_rarfile
+
+from guessit import guessit
+from requests import Session
+from bs4 import NavigableString
+from ftfy import fix_text
+from subzero.language import Language
+
+from subliminal_patch.providers import Provider
+from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
+from subliminal_patch.subtitle import Subtitle
+from subliminal_patch.score import framerate_equal
+from subliminal.exceptions import ProviderError
+from subliminal.providers import ParserBeautifulSoup
+from subliminal.subtitle import sanitize, guess_matches
+from subliminal.video import Movie
+from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
+
+logger = logging.getLogger(__name__)
+
+
+class NekurSubtitle(Subtitle):
+    """Nekur Subtitle."""
+    provider_name = 'nekur'
+
+    def __init__(self, language, page_link, download_link, title, year, imdb_id, fps, notes):
+        super(NekurSubtitle, self).__init__(language, page_link=page_link)
+        self.download_link = download_link
+        self.title = title
+        self.year = year
+        self.imdb_id = imdb_id
+        self.fps = fps
+        self.notes = notes
+        self.matches = None
+        # self.encoding = 'utf-16'
+
+    @property
+    def id(self):
+        return self.download_link
+
+    def get_matches(self, video):
+        matches = set()
+
+        if isinstance(video, Movie):
+            # title
+            if video.title and sanitize(self.title) == sanitize(video.title):
+                matches.add('title')
+            # year
+            if video.year and self.year == video.year:
+                matches.add('year')
+            # imdb id
+            if video.imdb_id and self.imdb_id == video.imdb_id:
+                matches.add('imdb_id')
+            # fps
+            if video.fps and self.fps and not framerate_equal(video.fps, self.fps):
+                logger.warning("nekur: Wrong FPS (expected: %s, got: %s)", video.fps, self.fps)
+            # guess additional info from notes 
+            matches |= guess_matches(video, guessit(self.notes, {'type': 'movie'}), partial=True)          
+
+        self.matches = matches
+        return matches
+
+
+class NekurProvider(Provider, ProviderSubtitleArchiveMixin):
+    """Nekur Provider."""
+    subtitle_class = NekurSubtitle
+    languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']}
+    server_url = 'http://subtitri.nekur.net/'
+    search_url = server_url + 'modules/Subtitles.php'
+
+    def __init__(self):
+        self.session = None
+
+    def initialize(self):
+        self.session = Session()
+        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
+        self.session.headers['Referer'] = self.server_url
+
+    def terminate(self):
+        self.session.close()
+
+    def query(self, title):
+        subtitles = []
+
+        data = {
+            'ajax': '1',
+            'sSearch': title,
+        }
+
+        r = self.session.post(self.search_url, data=data, timeout=10)
+        r.raise_for_status()
+
+        if not r.content:
+            logger.debug('No data returned from provider')
+            return []
+
+        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
+
+        # loop over subtitle cells
+        rows = soup.select('tbody > tr')
+        for row in rows:
+            # title
+            title_anchor_el = row.select_one('.title > a')
+            title_inner_text = [element for element in title_anchor_el if isinstance(element, NavigableString)]
+            title = title_inner_text[0].strip()
+            
+            # year
+            year = row.select_one('.year').text.strip('()')
+            
+            # download link
+            href = title_anchor_el.get('href')
+            download_link = self.server_url + href
+
+            # imdb id
+            imdb_td = row.select_one('td:nth-of-type(4)')
+            imdb_link = imdb_td.select_one('a').get('href')
+            imdb_id = imdb_link.split('/')[-2]
+
+            # fps
+            fps = row.select_one('.fps').text.strip()
+
+            # additional notes
+            notes = row.select_one('.notes').text.strip()
+
+            # page link = archive link (there is no seperate subtitle page link)
+            page_link = 'http://subtitri.nekur.net/filmu-subtitri/'
+            
+            # create/add the subitle
+            subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id, fps, notes)
+            logger.debug('nekur: Found subtitle %r', subtitle)
+            subtitles.append(subtitle)
+
+        return subtitles
+
+    def list_subtitles(self, video, languages):
+        if isinstance(video, Movie):
+            titles = [video.title] + video.alternative_titles
+        else:
+            titles = []
+
+        subtitles = []
+        # query for subtitles
+        for title in titles:
+            if isinstance(video, Movie):
+                subtitles += [s for s in self.query(title) if s.language in languages]
+
+        return subtitles
+
+    def download_subtitle(self, subtitle):
+        if isinstance(subtitle, NekurSubtitle):
+            # download the subtitle            
+            r = self.session.get(subtitle.download_link, timeout=10)
+            r.raise_for_status()
+
+            # open the archive
+            archive_stream = io.BytesIO(r.content)
+            if is_rarfile(archive_stream):
+                archive = RarFile(archive_stream)
+            elif is_zipfile(archive_stream):
+                archive = ZipFile(archive_stream)
+            else:
+                subtitle.content = r.content
+                if subtitle.is_valid():
+                    return
+                subtitle.content = None
+
+                raise ProviderError('Unidentified archive type')
+
+            subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
+            # fix content encoding (utf-16 encoded by default)
+            fixed_subtitle_content = fix_text(subtitle_content.decode('utf-16'), {'uncurl_quotes': False, 'fix_character_width': False}).encode(encoding='utf-8')
+            subtitle.content = fixed_subtitle_content
--- a/libs/subliminal_patch/providers/subtitriid.py
+++ b/libs/subliminal_patch/providers/subtitriid.py
@ -0,0 +1,163 @@
+# -*- coding: utf-8 -*-
+import io
+import logging
+from random import randint
+
+from zipfile import ZipFile, is_zipfile
+from rarfile import RarFile, is_rarfile
+
+from requests import Session
+from ftfy import fix_text
+from subzero.language import Language
+
+from subliminal_patch.providers import Provider
+from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
+from subliminal_patch.subtitle import Subtitle
+from subliminal.exceptions import ProviderError
+from subliminal.providers import ParserBeautifulSoup
+from subliminal.subtitle import sanitize
+from subliminal.video import Movie
+from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
+
+logger = logging.getLogger(__name__)
+
+
+class SubtitriIdSubtitle(Subtitle):
+    """subtitri.id.lv Subtitle."""
+    provider_name = 'subtitriid'
+
+    def __init__(self, language, page_link, download_link, title, year, imdb_id):
+        super(SubtitriIdSubtitle, self).__init__(language, page_link=page_link)
+        self.download_link = download_link
+        self.title = title
+        self.year = year
+        self.imdb_id = imdb_id
+        self.matches = None
+        # self.encoding = 'utf-16'
+
+    @property
+    def id(self):
+        return self.download_link
+
+    def get_matches(self, video):
+        matches = set()
+        if isinstance(video, Movie):
+            # title
+            if video.title and sanitize(self.title) == sanitize(video.title):
+                matches.add('title')
+            # year
+            if video.year and self.year == video.year:
+                matches.add('year')
+            # imdb id
+            if video.imdb_id and self.imdb_id == video.imdb_id:
+                matches.add('imdb_id')
+        
+        self.matches = matches
+        return matches
+
+
+class SubtitriIdProvider(Provider, ProviderSubtitleArchiveMixin):
+    """subtitri.id.lv Provider."""
+    subtitle_class = SubtitriIdSubtitle
+    languages = {Language('lva', 'LV')} | {Language.fromalpha2(l) for l in ['lv']}
+    server_url = 'http://subtitri.id.lv'
+    search_url =  server_url + '/search/'
+
+    def __init__(self):
+        self.session = None
+
+    def initialize(self):
+        self.session = Session()
+        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
+        self.session.headers['Referer'] = self.server_url
+
+    def terminate(self):
+        self.session.close()
+
+    def query(self, title):
+        subtitles = []
+
+        r = self.session.get(self.search_url, params = {'q': title}, timeout=10)
+        r.raise_for_status()
+
+        if not r.content:
+            logger.debug('No data returned from provider')
+            return []
+
+        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
+
+        # loop over subtitle cells
+        rows = soup.select('.eBlock')
+        for row in rows:
+            result_anchor_el = row.select_one('.eTitle > a')
+            
+            # page link
+            page_link = result_anchor_el.get('href')
+
+            # fetch/parse additional info
+            r = self.session.get(page_link, timeout=10)
+            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
+
+            # title
+            movie_titles_string = soup.select_one('.main-header').text.strip()
+            movie_titles_list = movie_titles_string.split(' / ')
+            title = movie_titles_list[-1]
+            # # TODO alternate titles(?)
+            # alternate_titles = movie_title_list.remove(title)
+
+            # year
+            year = soup.select_one('#film-page-year').text.strip()
+
+            # imdb id
+            imdb_link = soup.select_one('#actors-page > a').get('href')
+            imdb_id = imdb_link.split('/')[-2]
+
+            # download link
+            href = soup.select_one('.hvr').get('href')
+            download_link = self.server_url + href
+
+            # create/add the subitle
+            subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id)
+            logger.debug('subtitri.id.lv: Found subtitle %r', subtitle)
+            subtitles.append(subtitle)
+
+        return subtitles
+
+    def list_subtitles(self, video, languages):
+        if isinstance(video, Movie):
+            titles = [video.title] + video.alternative_titles
+        else:
+            titles = []
+
+        subtitles = []
+        # query for subtitles
+        for title in titles:
+            if isinstance(video, Movie):
+                subtitles += [s for s in self.query(title) if s.language in languages]
+
+        return subtitles
+
+    def download_subtitle(self, subtitle):
+        if isinstance(subtitle, SubtitriIdSubtitle):
+            # download the subtitle
+            r = self.session.get(subtitle.download_link, timeout=10)
+            r.raise_for_status()
+
+            # open the archive
+            archive_stream = io.BytesIO(r.content)
+            if is_rarfile(archive_stream):
+                archive = RarFile(archive_stream)
+            elif is_zipfile(archive_stream):
+                archive = ZipFile(archive_stream)
+            else:
+                subtitle.content = r.content
+                if subtitle.is_valid():
+                    return
+                subtitle.content = None
+
+                raise ProviderError('Unidentified archive type')
+
+            subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
+            # fix content encoding (utf-16 encoded by default)
+            fixed_subtitle_content = fix_text(subtitle_content.decode('utf-16'), {'uncurl_quotes': False, 'fix_character_width': False}).encode(encoding='utf-8')
+            subtitle.content = fixed_subtitle_content
--- a/views/settings.tpl
+++ b/views/settings.tpl
@ -1472,6 +1472,28 @@

                        </div>

+                        <div class="middle aligned row">
+                            <div class="right aligned four wide column">
+                                <label>Nekur</label>
+                            </div>
+                            <div class="one wide column">
+                                <div id="nekur" class="ui toggle checkbox provider">
+                                    <input type="checkbox">
+                                    <label></label>
+                                </div>
+                            </div>
+                            <div class="collapsed column">
+                                <div class="collapsed center aligned column">
+                                    <div class="ui basic icon" data-tooltip="Latvian subtitles provider." data-inverted="">
+                                        <i class="help circle large icon"></i>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                        <div id="nekur_option" class="ui grid container">
+
+                        </div>
+
                        <div class="middle aligned row">
                            <div class="right aligned four wide column">
                                <label>LegendasTV</label>
@ -1756,6 +1778,28 @@

                        </div>

+                        <div class="middle aligned row">
+                            <div class="right aligned four wide column">
+                                <label>subtitri.id.lv</label>
+                            </div>
+                            <div class="one wide column">
+                                <div id="subtitriid" class="ui toggle checkbox provider">
+                                    <input type="checkbox">
+                                    <label></label>
+                                </div>
+                            </div>
+                            <div class="collapsed column">
+                                <div class="collapsed center aligned column">
+                                    <div class="ui basic icon" data-tooltip="Latvian subtitles provider." data-inverted="">
+                                        <i class="help circle large icon"></i>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                        <div id="subtitriid_option" class="ui grid container">
+
+                        </div>
+
                        <div class="middle aligned row">
                            <div class="right aligned four wide column">
                                <label>SubZ</label>