From 45f085eb5dc7a31a411f4cbb93f96cc214bc7607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20Bart=C3=ADk?= <63553146+sambartik@users.noreply.github.com> Date: Fri, 4 Feb 2022 12:47:46 +0100 Subject: [PATCH] Added cookies caching to titulky provider --- libs/subliminal_patch/providers/titulky.py | 69 ++++++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/libs/subliminal_patch/providers/titulky.py b/libs/subliminal_patch/providers/titulky.py index 7b33acd04..e48e38d21 100644 --- a/libs/subliminal_patch/providers/titulky.py +++ b/libs/subliminal_patch/providers/titulky.py @@ -14,6 +14,7 @@ from requests import Session from requests.adapters import HTTPAdapter from requests.exceptions import HTTPError +from subliminal.cache import region as cache from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, Error, ProviderError from subliminal.providers import ParserBeautifulSoup from subliminal.subtitle import fix_line_ending @@ -25,6 +26,7 @@ from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal_patch.score import framerate_equal from subliminal_patch.subtitle import Subtitle, guess_matches, sanitize +from dogpile.cache.api import NO_VALUE from subzero.language import Language from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST @@ -239,9 +241,14 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): self.session.mount('http://', HTTPAdapter(pool_maxsize=pool_maxsize)) # Set headers - self.session.headers['User-Agent'] = AGENT_LIST[randint( - 0, - len(AGENT_LIST) - 1)] + cached_user_agent = cache.get('titulky_user_agent') + if cached_user_agent == NO_VALUE: + new_user_agent = AGENT_LIST[ randint(0, len(AGENT_LIST) - 1) ] + cache.set('titulky_user_agent', new_user_agent) + self.session.headers['User-Agent'] = new_user_agent + else: + self.session.headers['User-Agent'] = cached_user_agent + self.session.headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' self.session.headers['Accept-Language'] = 'sk,cz,en;q=0.5' self.session.headers['Accept-Encoding'] = 'gzip, deflate' @@ -253,19 +260,24 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): self.login() def terminate(self): - self.logout() self.session.close() def login(self): - logger.info("Titulky.com: Logging in") - - self.session.get(self.server_url) + # Reuse all cookies if found in cache and skip login. + cached_cookiejar = cache.get('titulky_cookiejar') + if cached_cookiejar != NO_VALUE: + logger.info("Titulky.com: Reusing cached cookies.") + self.session.cookies.update(cached_cookiejar) + return True + logger.info("Titulky.com: Logging in...") + data = {'LoginName': self.username, 'LoginPassword': self.password} res = self.session.post(self.server_url, data, allow_redirects=False, - timeout=self.timeout) + timeout=self.timeout, + headers={'Referer': self.server_url}) location_qs = parse_qs(urlparse(res.headers['Location']).query) @@ -274,6 +286,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): if 'omezené' in location_qs['msg'][0]: raise AuthenticationError("V.I.P. account is required for this provider to work!") else: + logger.info("Titulky.com: Successfully logged in, caching cookies for future connections...") + cache.set('titulky_cookiejar', self.session.cookies.copy()) return True else: raise AuthenticationError("Login failed") @@ -283,24 +297,55 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): res = self.session.get(self.logout_url, allow_redirects=False, - timeout=self.timeout) + timeout=self.timeout, + headers={'Referer': self.server_url}) location_qs = parse_qs(urlparse(res.headers['Location']).query) + logger.info("Titulky.com: Clearing cache...") + cache.delete('titulky_cookiejar') + cache.delete('titulky_user_agent') + # If the response is a redirect and doesnt point to an error message page, then we are logged out if res.status_code == 302 and location_qs['msg_type'][0] == 'i': return True else: raise AuthenticationError("Logout failed.") - def fetch_page(self, url, ref=None): + # GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies + # and subsequent relogging and sending the original request again. If all went well, returns the response. + def get_request(self, url, ref=None, __recursion=0): + # That's deep... recursion... Stop. We don't have infinite memmory. And don't want to + # spam titulky's server either. So we have to just accept the defeat. Let it throw! + if __recursion >= 5: + logger.debug(f"Titulky.com: Got into a loop while trying to send a request after relogging.") + raise AuthenticationError("Got into a loop and couldn't get authenticated!") + logger.debug(f"Titulky.com: Fetching url: {url}") res = self.session.get( url, timeout=self.timeout, + allow_redirects=False, headers={'Referer': ref if ref else self.server_url}) + # Check if we got redirected because login cookies expired. + # Note: microoptimization - don't bother parsing qs for non 302 responses. + if res.status_code == 302: + location_qs = parse_qs(urlparse(res.headers['Location']).query) + if location_qs['msg_type'][0] == 'e' and "Přihlašte se" in location_qs['msg'][0]: + logger.debug(f"Titulky.com: Login cookies expired.") + self.login() + return self.get_request(url, ref=ref, __recursion=++__recursion) + + return res + + + def fetch_page(self, url, ref=None): + logger.debug(f"Titulky.com: Fetching url: {url}") + + res = self.get_request(url, ref=ref) + if res.status_code != 200: raise HTTPError(f"Fetch failed with status code {res.status_code}") if not res.text: @@ -842,9 +887,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): return subtitles def download_subtitle(self, subtitle): - res = self.session.get(subtitle.download_link, - headers={'Referer': subtitle.page_link}, - timeout=self.timeout) + res = self.get_request(subtitle.download_link, ref=subtitle.page_link) try: res.raise_for_status()