From f4673cc13c276ef288300ab3753cc96c8f53faa0 Mon Sep 17 00:00:00 2001
From: benni347 <cdrc+gitkraken@skwar.me>
Date: Mon, 31 Jan 2022 11:06:29 +0100
Subject: [PATCH] Refractored sites.py, sherlock.py and notify.py.

---
 sherlock/notify.py   | 59 ++++++++++++++---------------
 sherlock/sherlock.py | 79 +++++++++++++++++---------------------
 sherlock/sites.py    | 90 ++++++++++++++++++++++----------------------
 site_list.py         |  3 +-
 4 files changed, 110 insertions(+), 121 deletions(-)

diff --git a/sherlock/notify.py b/sherlock/notify.py
index e3e4abb..79dc9b0 100644
--- a/sherlock/notify.py
+++ b/sherlock/notify.py
@@ -7,7 +7,7 @@ from result import QueryStatus
 from colorama import Fore, Style, init
 
 
-class QueryNotify():
+class QueryNotify:
     """Query Notify Object.
 
     Base class that describes methods available to notify the results of
@@ -15,6 +15,7 @@ class QueryNotify():
     It is intended that other classes inherit from this base class and
     override the methods to implement specific functionality.
     """
+
     def __init__(self, result=None):
         """Create Query Notify Object.
 
@@ -32,7 +33,7 @@ class QueryNotify():
 
         self.result = result
 
-        return
+        # return
 
     def start(self, message=None):
         """Notify Start.
@@ -51,7 +52,7 @@ class QueryNotify():
         Nothing.
         """
 
-        return
+        # return
 
     def update(self, result):
         """Notify Update.
@@ -70,7 +71,7 @@ class QueryNotify():
 
         self.result = result
 
-        return
+        # return
 
     def finish(self, message=None):
         """Notify Finish.
@@ -89,7 +90,7 @@ class QueryNotify():
         Nothing.
         """
 
-        return
+        # return
 
     def __str__(self):
         """Convert Object To String.
@@ -100,9 +101,7 @@ class QueryNotify():
         Return Value:
         Nicely formatted string to get information about this object.
         """
-        result = str(self.result)
-
-        return result
+        return str(self.result)
 
 
 class QueryNotifyPrint(QueryNotify):
@@ -110,6 +109,7 @@ class QueryNotifyPrint(QueryNotify):
 
     Query notify class that prints results.
     """
+
     def __init__(self, result=None, verbose=False, color=True, print_all=False):
         """Create Query Notify Print Object.
 
@@ -155,14 +155,14 @@ class QueryNotifyPrint(QueryNotify):
         title = "Checking username"
         if self.color:
             print(Style.BRIGHT + Fore.GREEN + "[" +
-                Fore.YELLOW + "*" +
-                Fore.GREEN + f"] {title}" +
-                Fore.WHITE + f" {message}" +
-                Fore.GREEN + " on:")
+                  Fore.YELLOW + "*" +
+                  Fore.GREEN + f"] {title}" +
+                  Fore.WHITE + f" {message}" +
+                  Fore.GREEN + " on:")
         else:
             print(f"[*] {title} {message} on:")
 
-        return
+        # return
 
     def update(self, result):
         """Notify Update.
@@ -179,7 +179,7 @@ class QueryNotifyPrint(QueryNotify):
         """
         self.result = result
 
-        if self.verbose == False or self.result.query_time is None:
+        if self.verbose is False or self.result.query_time is None:
             response_time_text = ""
         else:
             response_time_text = f" [{round(self.result.query_time * 1000)} ms]"
@@ -202,23 +202,23 @@ class QueryNotifyPrint(QueryNotify):
             if self.print_all:
                 if self.color:
                     print((Style.BRIGHT + Fore.WHITE + "[" +
-                        Fore.RED + "-" +
-                        Fore.WHITE + "]" +
-                        response_time_text +
-                        Fore.GREEN + f" {self.result.site_name}:" +
-                        Fore.YELLOW + " Not Found!"))
+                           Fore.RED + "-" +
+                           Fore.WHITE + "]" +
+                           response_time_text +
+                           Fore.GREEN + f" {self.result.site_name}:" +
+                           Fore.YELLOW + " Not Found!"))
                 else:
                     print(f"[-]{response_time_text} {self.result.site_name}: Not Found!")
 
         elif result.status == QueryStatus.UNKNOWN:
             if self.print_all:
                 if self.color:
-                    print(Style.BRIGHT + Fore.WHITE + "[" +
-                          Fore.RED + "-" +
-                          Fore.WHITE + "]" +
-                          Fore.GREEN + f" {self.result.site_name}:" +
-                          Fore.RED + f" {self.result.context}" +
-                          Fore.YELLOW + f" ")
+                    print((Style.BRIGHT + Fore.WHITE + "[" +
+                           Fore.RED + "-" +
+                           Fore.WHITE + "]" +
+                           Fore.GREEN + f" {self.result.site_name}:" +
+                           Fore.RED + f" {self.result.context}" +
+                           Fore.YELLOW + ' '))
                 else:
                     print(f"[-] {self.result.site_name}: {self.result.context} ")
 
@@ -236,8 +236,9 @@ class QueryNotifyPrint(QueryNotify):
 
         else:
             # It should be impossible to ever get here...
-            raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
-                             f"site '{self.result.site_name}'")
+            raise ValueError(
+                f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
+            )
 
         return
 
@@ -250,6 +251,4 @@ class QueryNotifyPrint(QueryNotify):
         Return Value:
         Nicely formatted string to get information about this object.
         """
-        result = str(self.result)
-
-        return result
+        return str(self.result)
diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py
index e29a56a..57ca80c 100644
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@@ -22,16 +22,14 @@ from torrequest import TorRequest
 from result import QueryStatus
 from result import QueryResult
 from notify import QueryNotifyPrint
-from sites  import SitesInformation
+from sites import SitesInformation
 
 module_name = "Sherlock: Find Usernames Across Social Networks"
 __version__ = "0.14.0"
 
 
-
-
 class SherlockFuturesSession(FuturesSession):
-    def request(self, method, url, hooks={}, *args, **kwargs):
+    def request(self, method, url, hooks=None, *args, **kwargs):
         """Request URL.
 
         This extends the FuturesSession request method to calculate a response
@@ -53,6 +51,8 @@ class SherlockFuturesSession(FuturesSession):
         Request object.
         """
         # Record the start time for the request.
+        if hooks is None:
+            hooks = {}
         start = monotonic()
 
         def response_time(resp, *args, **kwargs):
@@ -95,12 +95,11 @@ class SherlockFuturesSession(FuturesSession):
 
 
 def get_response(request_future, error_type, social_network):
-
     # Default for Response object if some failure occurs.
     response = None
 
     error_context = "General Unknown Error"
-    expection_text = None
+    exception_text = None
     try:
         response = request_future.result()
         if response.status_code:
@@ -108,21 +107,21 @@ def get_response(request_future, error_type, social_network):
             error_context = None
     except requests.exceptions.HTTPError as errh:
         error_context = "HTTP Error"
-        expection_text = str(errh)
+        exception_text = str(errh)
     except requests.exceptions.ProxyError as errp:
         error_context = "Proxy Error"
-        expection_text = str(errp)
+        exception_text = str(errp)
     except requests.exceptions.ConnectionError as errc:
         error_context = "Error Connecting"
-        expection_text = str(errc)
+        exception_text = str(errc)
     except requests.exceptions.Timeout as errt:
         error_context = "Timeout Error"
-        expection_text = str(errt)
+        exception_text = str(errt)
     except requests.exceptions.RequestException as err:
         error_context = "Unknown Error"
-        expection_text = str(err)
+        exception_text = str(err)
 
-    return response, error_context, expection_text
+    return response, error_context, exception_text
 
 
 def interpolate_string(object, username):
@@ -190,15 +189,14 @@ def sherlock(username, site_data, query_notify,
     # Limit number of workers to 20.
     # This is probably vastly overkill.
     if len(site_data) >= 20:
-        max_workers=20
+        max_workers = 20
     else:
-        max_workers=len(site_data)
+        max_workers = len(site_data)
 
     # Create multi-threaded session for all requests.
     session = SherlockFuturesSession(max_workers=max_workers,
                                      session=underlying_session)
 
-
     # Results from analysis of all sites
     results_total = {}
 
@@ -206,10 +204,9 @@ def sherlock(username, site_data, query_notify,
     for social_network, net_info in site_data.items():
 
         # Results from analysis of this specific site
-        results_site = {}
+        results_site = {"url_main": net_info.get("urlMain")}
 
         # Record URL of main site
-        results_site["url_main"] = net_info.get("urlMain")
 
         # A user agent is needed because some sites don't return the correct
         # information since they think that we are bots (Which we actually are...)
@@ -227,7 +224,7 @@ def sherlock(username, site_data, query_notify,
         # Don't make request if username is invalid for the site
         regex_check = net_info.get("regexCheck")
         if regex_check and re.search(regex_check, username) is None:
-            # No need to do the check at the site: this user name is not allowed.
+            # No need to do the check at the site: this username is not allowed.
             results_site["status"] = QueryResult(username,
                                                  social_network,
                                                  url,
@@ -254,7 +251,7 @@ def sherlock(username, site_data, query_notify,
                 elif request_method == "PUT":
                     request = session.put
                 else:
-                    raise RuntimeError( f"Unsupported request_method for {url}")
+                    raise RuntimeError(f"Unsupported request_method for {url}")
 
             if request_payload is not None:
                 request_payload = interpolate_string(request_payload, username)
@@ -300,10 +297,10 @@ def sherlock(username, site_data, query_notify,
                                  )
             else:
                 future = request(url=url_probe, headers=headers,
-                                allow_redirects=allow_redirects,
-                                timeout=timeout,
-                                json=request_payload
-                                )
+                                 allow_redirects=allow_redirects,
+                                 timeout=timeout,
+                                 json=request_payload
+                                 )
 
             # Store future in data for access later
             net_info["request_future"] = future
@@ -312,7 +309,7 @@ def sherlock(username, site_data, query_notify,
             if unique_tor:
                 underlying_request.reset_identity()
 
-        # Add this site's results into final dictionary with all of the other results.
+        # Add this site's results into final dictionary with all the other results.
         results_total[social_network] = results_site
 
     # Open the file containing account links
@@ -334,7 +331,7 @@ def sherlock(username, site_data, query_notify,
 
         # Retrieve future and ensure it has finished
         future = net_info["request_future"]
-        r, error_text, expection_text = get_response(request_future=future,
+        r, error_text, exception_text = get_response(request_future=future,
                                                      error_type=error_type,
                                                      social_network=social_network)
 
@@ -365,13 +362,13 @@ def sherlock(username, site_data, query_notify,
             # error_flag True denotes no error found in the HTML
             # error_flag False denotes error found in the HTML
             error_flag = True
-            errors=net_info.get("errorMsg")
+            errors = net_info.get("errorMsg")
             # errors will hold the error message
             # it can be string or list
-            # by insinstance method we can detect that
+            # by isinstance method we can detect that
             # and handle the case for strings as normal procedure
             # and if its list we can iterate the errors
-            if isinstance(errors,str):
+            if isinstance(errors, str):
                 # Checks if the error message is in the HTML
                 # if error is present we will set flag to False
                 if errors in r.text:
@@ -431,7 +428,6 @@ def sherlock(username, site_data, query_notify,
             raise ValueError(f"Unknown Error Type '{error_type}' for "
                              f"site '{social_network}'")
 
-
         # Notify caller about results of query.
         query_notify.update(result)
 
@@ -477,8 +473,7 @@ def timeout_check(value):
 
 
 def main():
-
-    version_string = f"%(prog)s {__version__}\n" +  \
+    version_string = f"%(prog)s {__version__}\n" + \
                      f"{requests.__description__}:  {requests.__version__}\n" + \
                      f"Python:  {platform.python_version()}"
 
@@ -486,11 +481,11 @@ def main():
                             description=f"{module_name} (Version {__version__})"
                             )
     parser.add_argument("--version",
-                        action="version",  version=version_string,
+                        action="version", version=version_string,
                         help="Display version information and dependencies."
                         )
     parser.add_argument("--verbose", "-v", "-d", "--debug",
-                        action="store_true",  dest="verbose", default=False,
+                        action="store_true", dest="verbose", default=False,
                         help="Display extra debugging information and metrics."
                         )
     parser.add_argument("--folderoutput", "-fo", dest="folderoutput",
@@ -506,7 +501,7 @@ def main():
                         action="store_true", dest="unique_tor", default=False,
                         help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.")
     parser.add_argument("--csv",
-                        action="store_true",  dest="csv", default=False,
+                        action="store_true", dest="csv", default=False,
                         help="Create Comma-Separated Values (CSV) File."
                         )
     parser.add_argument("--site",
@@ -528,15 +523,15 @@ def main():
                              "Default timeout is infinity. "
                              "A longer timeout will be more likely to get results from slow sites. "
                              "On the other hand, this may cause a long delay to gather all results."
-                       )
+                        )
     parser.add_argument("--print-all",
                         action="store_true", dest="print_all",
                         help="Output sites where the username was not found."
-                       )
+                        )
     parser.add_argument("--print-found",
                         action="store_false", dest="print_all", default=False,
                         help="Output sites where the username was found."
-                       )
+                        )
     parser.add_argument("--no-color",
                         action="store_true", dest="no_color", default=False,
                         help="Don't color terminal output"
@@ -570,7 +565,6 @@ def main():
     except Exception as error:
         print(f"A problem occurred while checking for an update: {error}")
 
-
     # Argument check
     # TODO regex check on args.proxy
     if args.tor and (args.proxy is not None):
@@ -582,7 +576,8 @@ def main():
 
     if args.tor or args.unique_tor:
         print("Using Tor to make requests")
-        print("Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.")
+        print(
+            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.")
 
     # Check if both output methods are entered as input.
     if args.output is not None and args.folderoutput is not None:
@@ -594,7 +589,6 @@ def main():
         print("You can only use --output with a single username")
         sys.exit(1)
 
-
     # Create object with all information about sites we are aware of.
     try:
         if args.local:
@@ -608,10 +602,7 @@ def main():
     # Create original dictionary from SitesInformation() object.
     # Eventually, the rest of the code will be updated to use the new object
     # directly, but this will glue the two pieces together.
-    site_data_all = {}
-    for site in sites:
-        site_data_all[site.name] = site.information
-
+    site_data_all = {site.name: site.information for site in sites}
     if args.site_list is None:
         # Not desired to look at a sub-set of sites
         site_data = site_data_all
diff --git a/sherlock/sites.py b/sherlock/sites.py
index de8eeec..c706110 100644
--- a/sherlock/sites.py
+++ b/sherlock/sites.py
@@ -1,21 +1,18 @@
 """Sherlock Sites Information Module
 
-This module supports storing information about web sites.
+This module supports storing information about websites.
 This is the raw data that will be used to search for usernames.
 """
-import os
 import json
-import operator
 import requests
-import sys
 
 
-class SiteInformation():
+class SiteInformation:
     def __init__(self, name, url_home, url_username_format, username_claimed,
                  username_unclaimed, information):
         """Create Site Information Object.
 
-        Contains information about a specific web site.
+        Contains information about a specific website.
 
         Keyword Arguments:
         self                   -- This object.
@@ -30,13 +27,13 @@ class SiteInformation():
                                          indicates that the individual
                                          usernames would show up under the
                                          "https://somesite.com/users/" area of
-                                         the web site.
+                                         the website.
         username_claimed       -- String containing username which is known
-                                  to be claimed on web site.
+                                  to be claimed on website.
         username_unclaimed     -- String containing username which is known
-                                  to be unclaimed on web site.
+                                  to be unclaimed on website.
         information            -- Dictionary containing all known information
-                                  about web site.
+                                  about website.
                                   NOTE:  Custom information about how to
                                          actually detect the existence of the
                                          username will be included in this
@@ -49,13 +46,13 @@ class SiteInformation():
         Nothing.
         """
 
-        self.name                = name
-        self.url_home            = url_home
+        self.name = name
+        self.url_home = url_home
         self.url_username_format = url_username_format
 
-        self.username_claimed    = username_claimed
-        self.username_unclaimed  = username_unclaimed
-        self.information         = information
+        self.username_claimed = username_claimed
+        self.username_unclaimed = username_unclaimed
+        self.information = information
 
         return
 
@@ -72,11 +69,11 @@ class SiteInformation():
         return f"{self.name} ({self.url_home})"
 
 
-class SitesInformation():
+class SitesInformation:
     def __init__(self, data_file_path=None):
         """Create Sites Information Object.
 
-        Contains information about all supported web sites.
+        Contains information about all supported websites.
 
         Keyword Arguments:
         self                   -- This object.
@@ -109,7 +106,7 @@ class SitesInformation():
 
         if data_file_path is None:
             # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
-            # this instead of the local one is so that the user has the most up to date data. This prevents
+            # this instead of the local one is so that the user has the most up-to-date data. This prevents
             # users from creating issue about false positives which has already been fixed or having outdated data
             data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json"
 
@@ -117,26 +114,29 @@ class SitesInformation():
         if not data_file_path.lower().endswith(".json"):
             raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")
 
-        if "http://"  == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
+        if (
+            data_file_path[:7].lower() == "http://"
+            or data_file_path[:8].lower() == "https://"
+        ):
             # Reference is to a URL.
             try:
                 response = requests.get(url=data_file_path)
             except Exception as error:
-                raise FileNotFoundError(f"Problem while attempting to access "
-                                        f"data file URL '{data_file_path}':  "
-                                        f"{str(error)}"
-                                       )
-            if response.status_code == 200:
-                try:
-                    site_data = response.json()
-                except Exception as error:
-                    raise ValueError(f"Problem parsing json contents at "
-                                     f"'{data_file_path}':  {str(error)}."
-                                    )
-            else:
+                raise FileNotFoundError(
+                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
+                )
+
+            if response.status_code != 200:
                 raise FileNotFoundError(f"Bad response while accessing "
                                         f"data file URL '{data_file_path}'."
-                                       )
+                                        )
+            try:
+                site_data = response.json()
+            except Exception as error:
+                raise ValueError(
+                    f"Problem parsing json contents at '{data_file_path}':  {error}."
+                )
+
         else:
             # Reference is to a file.
             try:
@@ -144,17 +144,18 @@ class SitesInformation():
                     try:
                         site_data = json.load(file)
                     except Exception as error:
-                        raise ValueError(f"Problem parsing json contents at "
-                                         f"'{data_file_path}':  {str(error)}."
-                                        )
-            except FileNotFoundError as error:
+                        raise ValueError(
+                            f"Problem parsing json contents at '{data_file_path}':  {error}."
+                        )
+
+            except FileNotFoundError:
                 raise FileNotFoundError(f"Problem while attempting to access "
                                         f"data file '{data_file_path}'."
-                                       )
+                                        )
 
         self.sites = {}
 
-        # Add all of site information from the json file to internal site list.
+        # Add all site information from the json file to internal site list.
         for site_name in site_data:
             try:
 
@@ -165,12 +166,11 @@ class SitesInformation():
                                     site_data[site_name]["username_claimed"],
                                     site_data[site_name]["username_unclaimed"],
                                     site_data[site_name]
-                                   )
+                                    )
             except KeyError as error:
-                raise ValueError(f"Problem parsing json contents at "
-                                 f"'{data_file_path}':  "
-                                 f"Missing attribute {str(error)}."
-                                )
+                raise ValueError(
+                    f"Problem parsing json contents at '{data_file_path}':  Missing attribute {error}."
+                )
 
         return
 
@@ -184,9 +184,7 @@ class SitesInformation():
         List of strings containing names of sites.
         """
 
-        site_names = sorted([site.name for site in self], key=str.lower)
-
-        return site_names
+        return sorted([site.name for site in self], key=str.lower)
 
     def __iter__(self):
         """Iterator For Object.
diff --git a/site_list.py b/site_list.py
index 339f729..7ba267a 100644
--- a/site_list.py
+++ b/site_list.py
@@ -3,9 +3,10 @@ This module generates the listing of supported sites
 which can be found in sites.md
 It also organizes all the sites in alphanumeric order
 """
+
 import json
 
-pool = list()
+pool = []
 
 with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
     data = json.load(data_file)