From 040ddb236269c7a27d5d4f9c7fe708e53caba72f Mon Sep 17 00:00:00 2001
From: morpheus65535 <louis_vezina@hotmail.com>
Date: Fri, 11 Mar 2022 22:18:36 -0500
Subject: [PATCH] Improved throttling of LegendasDivx provider to prevent IP
 address blocking by reaching 150 searches a day. #1757

---
 bazarr/get_providers.py                       | 10 ++++++-
 libs/subliminal_patch/exceptions.py           |  6 ++++
 .../providers/legendasdivx.py                 | 28 ++++++++++++++++++-
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py
index bf7efad33..bdcdc5d7b 100644
--- a/bazarr/get_providers.py
+++ b/bazarr/get_providers.py
@@ -13,7 +13,8 @@ from get_args import args
 from config import settings, get_array_from
 from event_handler import event_stream
 from utils import get_binary, blacklist_log, blacklist_log_movie
-from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked, MustGetBlacklisted
+from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked, \
+    MustGetBlacklisted, SearchLimitReached
 from subliminal.providers.opensubtitles import DownloadLimitReached
 from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable
 from subliminal import region as subliminal_cache_region
@@ -36,6 +37,11 @@ def time_until_end_of_day(dt=None):
 titulky_server_local_time = datetime.datetime.now(tz=pytz.timezone('Europe/Prague')).replace(tzinfo=None)
 titulky_limit_reset_datetime = time_until_end_of_day(dt=titulky_server_local_time)
 
+# LegendasDivx reset its searches limit at approximately midnight, Lisbon time, everyday.
+legendasdivx_server_local_time = datetime.datetime.now(tz=pytz.timezone('Europe/Lisbon')).replace(tzinfo=None)
+legendasdivx_limit_reset_datetime = time_until_end_of_day(dt=legendasdivx_server_local_time) + \
+                                    datetime.timedelta(minutes=15)
+
 hours_until_end_of_day = time_until_end_of_day().seconds // 3600 + 1
 
 VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled,
@@ -79,6 +85,8 @@ PROVIDER_THROTTLE_MAP = {
             datetime.timedelta(hours=hours_until_end_of_day), "{} hours".format(str(hours_until_end_of_day))),
         IPAddressBlocked: (
             datetime.timedelta(hours=hours_until_end_of_day), "{} hours".format(str(hours_until_end_of_day))),
+        SearchLimitReached: (legendasdivx_limit_reset_datetime,
+                             f"{legendasdivx_limit_reset_datetime.seconds // 3600} hours"),
     }
 }
 
diff --git a/libs/subliminal_patch/exceptions.py b/libs/subliminal_patch/exceptions.py
index 8b931425a..d257520b5 100644
--- a/libs/subliminal_patch/exceptions.py
+++ b/libs/subliminal_patch/exceptions.py
@@ -25,6 +25,12 @@ class IPAddressBlocked(ProviderError):
     pass
 
 
+class SearchLimitReached(ProviderError):
+    """Exception raised when maximum searches for a provider have been reached."""
+
+    pass
+
+
 class MustGetBlacklisted(ProviderError):
     def __init__(self, id: str, media_type: str):
         super().__init__()
diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py
index e4a5ab292..d224f6a17 100644
--- a/libs/subliminal_patch/providers/legendasdivx.py
+++ b/libs/subliminal_patch/providers/legendasdivx.py
@@ -17,7 +17,7 @@ from subliminal.providers import ParserBeautifulSoup
 from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending
 from subliminal.utils import sanitize, sanitize_release_group
 from subliminal.video import Episode, Movie
-from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked
+from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked, SearchLimitReached
 from subliminal_patch.http import RetryingCFSession
 from subliminal_patch.providers import Provider, reinitialize_on_error
 from subliminal_patch.score import get_scores, framerate_equal
@@ -121,6 +121,7 @@ class LegendasdivxProvider(Provider):
     languages = {Language('por', 'BR')} | {Language('por')}
     video_types = (Episode, Movie)
     SEARCH_THROTTLE = 8
+    SAFE_SEARCH_LIMIT = 145  # real limit is 150, but we use 145 to keep a buffer and prevent IPAddressBlocked exception to be raised
     site = 'https://www.legendasdivx.pt'
     headers = {
         'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
@@ -294,10 +295,21 @@ class LegendasdivxProvider(Provider):
             try:
                 # sleep for a 1 second before another request
                 sleep(1)
+                searchLimitReached = False
                 self.headers['Referer'] = self.site + '/index.php'
                 self.session.headers.update(self.headers)
                 res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False)
                 res.raise_for_status()
+                if res.status_code == 200 and "<!--pesquisas:" in res.text:
+                    searches_count_groups = re.search(r'<!--pesquisas: (\d*)-->', res.text)
+                    if searches_count_groups:
+                        try:
+                            searches_count = int(searches_count_groups.group(1))
+                        except TypeError:
+                            pass
+                        else:
+                            if searches_count >= self.SAFE_SEARCH_LIMIT:
+                                searchLimitReached = True
                 if (res.status_code == 200 and "A legenda não foi encontrada" in res.text):
                     logger.warning('Legendasdivx.pt :: query %s return no results!', querytext)
                     # for series, if no results found, try again just with series and season (subtitle packs)
@@ -308,6 +320,16 @@ class LegendasdivxProvider(Provider):
                         sleep(1)
                         res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False)
                         res.raise_for_status()
+                        if res.status_code == 200 and "<!--pesquisas:" in res.text:
+                            searches_count_groups = re.search(r'<!--pesquisas: (\d*)-->', res.text)
+                            if searches_count_groups:
+                                try:
+                                    searches_count = int(searches_count_groups.group(1))
+                                except TypeError:
+                                    pass
+                                else:
+                                    if searches_count >= self.SAFE_SEARCH_LIMIT:
+                                        searchLimitReached = True
                         if (res.status_code == 200 and "A legenda não foi encontrada" in res.text):
                             logger.warning('Legendasdivx.pt :: query {0} return no results for language {1}(for series and season only).'.format(querytext, language_id))
                             continue
@@ -331,6 +353,10 @@ class LegendasdivxProvider(Provider):
                 logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
                 raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)
 
+            if searchLimitReached:
+                raise SearchLimitReached(
+                    "LegendasDivx.pt :: You've reached maximum number of search for the day.")
+
             bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
 
             # search for more than 10 results (legendasdivx uses pagination)