Refractored sites.py, sherlock.py and notify.py.

3 years ago · f4673cc13c
parent a3e2f7c214
commit f4673cc13c
4 changed files with 110 additions and 121 deletions
--- a/sherlock/notify.py
+++ b/sherlock/notify.py
@ -7,7 +7,7 @@ from result import QueryStatus
 from colorama import Fore, Style, init
-class QueryNotify():
+class QueryNotify:
    """Query Notify Object.
    Base class that describes methods available to notify the results of
@ -15,6 +15,7 @@ class QueryNotify():
    It is intended that other classes inherit from this base class and
    override the methods to implement specific functionality.
    """
    def __init__(self, result=None):
        """Create Query Notify Object.
@ -32,7 +33,7 @@ class QueryNotify():
        self.result = result
-        return
+        # return
    def start(self, message=None):
        """Notify Start.
@ -51,7 +52,7 @@ class QueryNotify():
        Nothing.
        """
-        return
+        # return
    def update(self, result):
        """Notify Update.
@ -70,7 +71,7 @@ class QueryNotify():
        self.result = result
-        return
+        # return
    def finish(self, message=None):
        """Notify Finish.
@ -89,7 +90,7 @@ class QueryNotify():
        Nothing.
        """
-        return
+        # return
    def __str__(self):
        """Convert Object To String.
@ -100,9 +101,7 @@ class QueryNotify():
        Return Value:
        Nicely formatted string to get information about this object.
        """
-        result = str(self.result)
+        return str(self.result)
        return result
 class QueryNotifyPrint(QueryNotify):
@ -110,6 +109,7 @@ class QueryNotifyPrint(QueryNotify):
    Query notify class that prints results.
    """
    def __init__(self, result=None, verbose=False, color=True, print_all=False):
        """Create Query Notify Print Object.
@ -155,14 +155,14 @@ class QueryNotifyPrint(QueryNotify):
        title = "Checking username"
        if self.color:
            print(Style.BRIGHT + Fore.GREEN + "[" +
-                Fore.YELLOW + "*" +
+                  Fore.YELLOW + "*" +
-                Fore.GREEN + f"] {title}" +
+                  Fore.GREEN + f"] {title}" +
-                Fore.WHITE + f" {message}" +
+                  Fore.WHITE + f" {message}" +
-                Fore.GREEN + " on:")
+                  Fore.GREEN + " on:")
        else:
            print(f"[*] {title} {message} on:")
-        return
+        # return
    def update(self, result):
        """Notify Update.
@ -179,7 +179,7 @@ class QueryNotifyPrint(QueryNotify):
        """
        self.result = result
-        if self.verbose == False or self.result.query_time is None:
+        if self.verbose is False or self.result.query_time is None:
            response_time_text = ""
        else:
            response_time_text = f" [{round(self.result.query_time * 1000)} ms]"
@ -202,23 +202,23 @@ class QueryNotifyPrint(QueryNotify):
            if self.print_all:
                if self.color:
                    print((Style.BRIGHT + Fore.WHITE + "[" +
-                        Fore.RED + "-" +
+                           Fore.RED + "-" +
-                        Fore.WHITE + "]" +
+                           Fore.WHITE + "]" +
-                        response_time_text +
+                           response_time_text +
-                        Fore.GREEN + f" {self.result.site_name}:" +
+                           Fore.GREEN + f" {self.result.site_name}:" +
-                        Fore.YELLOW + " Not Found!"))
+                           Fore.YELLOW + " Not Found!"))
                else:
                    print(f"[-]{response_time_text} {self.result.site_name}: Not Found!")
        elif result.status == QueryStatus.UNKNOWN:
            if self.print_all:
                if self.color:
-                    print(Style.BRIGHT + Fore.WHITE + "[" +
+                    print((Style.BRIGHT + Fore.WHITE + "[" +
-                          Fore.RED + "-" +
+                           Fore.RED + "-" +
-                          Fore.WHITE + "]" +
+                           Fore.WHITE + "]" +
-                          Fore.GREEN + f" {self.result.site_name}:" +
+                           Fore.GREEN + f" {self.result.site_name}:" +
-                          Fore.RED + f" {self.result.context}" +
+                           Fore.RED + f" {self.result.context}" +
-                          Fore.YELLOW + f" ")
+                           Fore.YELLOW + ' '))
                else:
                    print(f"[-] {self.result.site_name}: {self.result.context} ")
@ -236,8 +236,9 @@ class QueryNotifyPrint(QueryNotify):
        else:
            # It should be impossible to ever get here...
-            raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
+            raise ValueError(
-                             f"site '{self.result.site_name}'")
+                f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
            )
        return
@ -250,6 +251,4 @@ class QueryNotifyPrint(QueryNotify):
        Return Value:
        Nicely formatted string to get information about this object.
        """
-        result = str(self.result)
+        return str(self.result)
        return result
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@ -22,16 +22,14 @@ from torrequest import TorRequest
 from result import QueryStatus
 from result import QueryResult
 from notify import QueryNotifyPrint
-from sites  import SitesInformation
+from sites import SitesInformation
 module_name = "Sherlock: Find Usernames Across Social Networks"
 __version__ = "0.14.0"
 class SherlockFuturesSession(FuturesSession):
-    def request(self, method, url, hooks={}, *args, **kwargs):
+    def request(self, method, url, hooks=None, *args, **kwargs):
        """Request URL.
        This extends the FuturesSession request method to calculate a response
@ -53,6 +51,8 @@ class SherlockFuturesSession(FuturesSession):
        Request object.
        """
        # Record the start time for the request.
        if hooks is None:
            hooks = {}
        start = monotonic()
        def response_time(resp, *args, **kwargs):
@ -95,12 +95,11 @@ class SherlockFuturesSession(FuturesSession):
 def get_response(request_future, error_type, social_network):
    # Default for Response object if some failure occurs.
    response = None
    error_context = "General Unknown Error"
-    expection_text = None
+    exception_text = None
    try:
        response = request_future.result()
        if response.status_code:
@ -108,21 +107,21 @@ def get_response(request_future, error_type, social_network):
            error_context = None
    except requests.exceptions.HTTPError as errh:
        error_context = "HTTP Error"
-        expection_text = str(errh)
+        exception_text = str(errh)
    except requests.exceptions.ProxyError as errp:
        error_context = "Proxy Error"
-        expection_text = str(errp)
+        exception_text = str(errp)
    except requests.exceptions.ConnectionError as errc:
        error_context = "Error Connecting"
-        expection_text = str(errc)
+        exception_text = str(errc)
    except requests.exceptions.Timeout as errt:
        error_context = "Timeout Error"
-        expection_text = str(errt)
+        exception_text = str(errt)
    except requests.exceptions.RequestException as err:
        error_context = "Unknown Error"
-        expection_text = str(err)
+        exception_text = str(err)
-    return response, error_context, expection_text
+    return response, error_context, exception_text
 def interpolate_string(object, username):
@ -190,15 +189,14 @@ def sherlock(username, site_data, query_notify,
    # Limit number of workers to 20.
    # This is probably vastly overkill.
    if len(site_data) >= 20:
-        max_workers=20
+        max_workers = 20
    else:
-        max_workers=len(site_data)
+        max_workers = len(site_data)
    # Create multi-threaded session for all requests.
    session = SherlockFuturesSession(max_workers=max_workers,
                                     session=underlying_session)
    # Results from analysis of all sites
    results_total = {}
@ -206,10 +204,9 @@ def sherlock(username, site_data, query_notify,
    for social_network, net_info in site_data.items():
        # Results from analysis of this specific site
-        results_site = {}
+        results_site = {"url_main": net_info.get("urlMain")}
        # Record URL of main site
        results_site["url_main"] = net_info.get("urlMain")
        # A user agent is needed because some sites don't return the correct
        # information since they think that we are bots (Which we actually are...)
@ -227,7 +224,7 @@ def sherlock(username, site_data, query_notify,
        # Don't make request if username is invalid for the site
        regex_check = net_info.get("regexCheck")
        if regex_check and re.search(regex_check, username) is None:
-            # No need to do the check at the site: this user name is not allowed.
+            # No need to do the check at the site: this username is not allowed.
            results_site["status"] = QueryResult(username,
                                                 social_network,
                                                 url,
@ -254,7 +251,7 @@ def sherlock(username, site_data, query_notify,
                elif request_method == "PUT":
                    request = session.put
                else:
-                    raise RuntimeError( f"Unsupported request_method for {url}")
+                    raise RuntimeError(f"Unsupported request_method for {url}")
            if request_payload is not None:
                request_payload = interpolate_string(request_payload, username)
@ -300,10 +297,10 @@ def sherlock(username, site_data, query_notify,
                                 )
            else:
                future = request(url=url_probe, headers=headers,
-                                allow_redirects=allow_redirects,
+                                 allow_redirects=allow_redirects,
-                                timeout=timeout,
+                                 timeout=timeout,
-                                json=request_payload
+                                 json=request_payload
-                                )
+                                 )
            # Store future in data for access later
            net_info["request_future"] = future
@ -312,7 +309,7 @@ def sherlock(username, site_data, query_notify,
            if unique_tor:
                underlying_request.reset_identity()
-        # Add this site's results into final dictionary with all of the other results.
+        # Add this site's results into final dictionary with all the other results.
        results_total[social_network] = results_site
    # Open the file containing account links
@ -334,7 +331,7 @@ def sherlock(username, site_data, query_notify,
        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
-        r, error_text, expection_text = get_response(request_future=future,
+        r, error_text, exception_text = get_response(request_future=future,
                                                     error_type=error_type,
                                                     social_network=social_network)
@ -365,13 +362,13 @@ def sherlock(username, site_data, query_notify,
            # error_flag True denotes no error found in the HTML
            # error_flag False denotes error found in the HTML
            error_flag = True
-            errors=net_info.get("errorMsg")
+            errors = net_info.get("errorMsg")
            # errors will hold the error message
            # it can be string or list
-            # by insinstance method we can detect that
+            # by isinstance method we can detect that
            # and handle the case for strings as normal procedure
            # and if its list we can iterate the errors
-            if isinstance(errors,str):
+            if isinstance(errors, str):
                # Checks if the error message is in the HTML
                # if error is present we will set flag to False
                if errors in r.text:
@ -431,7 +428,6 @@ def sherlock(username, site_data, query_notify,
            raise ValueError(f"Unknown Error Type '{error_type}' for "
                             f"site '{social_network}'")
        # Notify caller about results of query.
        query_notify.update(result)
@ -477,8 +473,7 @@ def timeout_check(value):
 def main():
-
+    version_string = f"%(prog)s {__version__}\n" + \
    version_string = f"%(prog)s {__version__}\n" +  \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"
@ -486,11 +481,11 @@ def main():
                            description=f"{module_name} (Version {__version__})"
                            )
    parser.add_argument("--version",
-                        action="version",  version=version_string,
+                        action="version", version=version_string,
                        help="Display version information and dependencies."
                        )
    parser.add_argument("--verbose", "-v", "-d", "--debug",
-                        action="store_true",  dest="verbose", default=False,
+                        action="store_true", dest="verbose", default=False,
                        help="Display extra debugging information and metrics."
                        )
    parser.add_argument("--folderoutput", "-fo", dest="folderoutput",
@ -506,7 +501,7 @@ def main():
                        action="store_true", dest="unique_tor", default=False,
                        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.")
    parser.add_argument("--csv",
-                        action="store_true",  dest="csv", default=False,
+                        action="store_true", dest="csv", default=False,
                        help="Create Comma-Separated Values (CSV) File."
                        )
    parser.add_argument("--site",
@ -528,15 +523,15 @@ def main():
                             "Default timeout is infinity. "
                             "A longer timeout will be more likely to get results from slow sites. "
                             "On the other hand, this may cause a long delay to gather all results."
-                       )
+                        )
    parser.add_argument("--print-all",
                        action="store_true", dest="print_all",
                        help="Output sites where the username was not found."
-                       )
+                        )
    parser.add_argument("--print-found",
                        action="store_false", dest="print_all", default=False,
                        help="Output sites where the username was found."
-                       )
+                        )
    parser.add_argument("--no-color",
                        action="store_true", dest="no_color", default=False,
                        help="Don't color terminal output"
@ -570,7 +565,6 @@ def main():
    except Exception as error:
        print(f"A problem occurred while checking for an update: {error}")
    # Argument check
    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
@ -582,7 +576,8 @@ def main():
    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
-        print("Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.")
+        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.")
    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
@ -594,7 +589,6 @@ def main():
        print("You can only use --output with a single username")
        sys.exit(1)
    # Create object with all information about sites we are aware of.
    try:
        if args.local:
@ -608,10 +602,7 @@ def main():
    # Create original dictionary from SitesInformation() object.
    # Eventually, the rest of the code will be updated to use the new object
    # directly, but this will glue the two pieces together.
-    site_data_all = {}
+    site_data_all = {site.name: site.information for site in sites}
    for site in sites:
        site_data_all[site.name] = site.information
    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
--- a/sherlock/sites.py
+++ b/sherlock/sites.py
@ -1,21 +1,18 @@
 """Sherlock Sites Information Module
-This module supports storing information about web sites.
+This module supports storing information about websites.
 This is the raw data that will be used to search for usernames.
 """
 import os
 import json
 import operator
 import requests
 import sys
-class SiteInformation():
+class SiteInformation:
    def __init__(self, name, url_home, url_username_format, username_claimed,
                 username_unclaimed, information):
        """Create Site Information Object.
-        Contains information about a specific web site.
+        Contains information about a specific website.
        Keyword Arguments:
        self                   -- This object.
@ -30,13 +27,13 @@ class SiteInformation():
                                         indicates that the individual
                                         usernames would show up under the
                                         "https://somesite.com/users/" area of
-                                         the web site.
+                                         the website.
        username_claimed       -- String containing username which is known
-                                  to be claimed on web site.
+                                  to be claimed on website.
        username_unclaimed     -- String containing username which is known
-                                  to be unclaimed on web site.
+                                  to be unclaimed on website.
        information            -- Dictionary containing all known information
-                                  about web site.
+                                  about website.
                                  NOTE:  Custom information about how to
                                         actually detect the existence of the
                                         username will be included in this
@ -49,13 +46,13 @@ class SiteInformation():
        Nothing.
        """
-        self.name                = name
+        self.name = name
-        self.url_home            = url_home
+        self.url_home = url_home
        self.url_username_format = url_username_format
-        self.username_claimed    = username_claimed
+        self.username_claimed = username_claimed
-        self.username_unclaimed  = username_unclaimed
+        self.username_unclaimed = username_unclaimed
-        self.information         = information
+        self.information = information
        return
@ -72,11 +69,11 @@ class SiteInformation():
        return f"{self.name} ({self.url_home})"
-class SitesInformation():
+class SitesInformation:
    def __init__(self, data_file_path=None):
        """Create Sites Information Object.
-        Contains information about all supported web sites.
+        Contains information about all supported websites.
        Keyword Arguments:
        self                   -- This object.
@ -109,7 +106,7 @@ class SitesInformation():
        if data_file_path is None:
            # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
-            # this instead of the local one is so that the user has the most up to date data. This prevents
+            # this instead of the local one is so that the user has the most up-to-date data. This prevents
            # users from creating issue about false positives which has already been fixed or having outdated data
            data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json"
@ -117,26 +114,29 @@ class SitesInformation():
        if not data_file_path.lower().endswith(".json"):
            raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")
-        if "http://"  == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
+        if (
            data_file_path[:7].lower() == "http://"
            or data_file_path[:8].lower() == "https://"
        ):
            # Reference is to a URL.
            try:
                response = requests.get(url=data_file_path)
            except Exception as error:
-                raise FileNotFoundError(f"Problem while attempting to access "
+                raise FileNotFoundError(
-                                        f"data file URL '{data_file_path}':  "
+                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
-                                        f"{str(error)}"
+                )
-                                       )
+
-            if response.status_code == 200:
+            if response.status_code != 200:
                try:
                    site_data = response.json()
                except Exception as error:
                    raise ValueError(f"Problem parsing json contents at "
                                     f"'{data_file_path}':  {str(error)}."
                                    )
            else:
                raise FileNotFoundError(f"Bad response while accessing "
                                        f"data file URL '{data_file_path}'."
-                                       )
+                                        )
            try:
                site_data = response.json()
            except Exception as error:
                raise ValueError(
                    f"Problem parsing json contents at '{data_file_path}':  {error}."
                )
        else:
            # Reference is to a file.
            try:
@ -144,17 +144,18 @@ class SitesInformation():
                    try:
                        site_data = json.load(file)
                    except Exception as error:
-                        raise ValueError(f"Problem parsing json contents at "
+                        raise ValueError(
-                                         f"'{data_file_path}':  {str(error)}."
+                            f"Problem parsing json contents at '{data_file_path}':  {error}."
-                                        )
+                        )
-            except FileNotFoundError as error:
+
            except FileNotFoundError:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file '{data_file_path}'."
-                                       )
+                                        )
        self.sites = {}
-        # Add all of site information from the json file to internal site list.
+        # Add all site information from the json file to internal site list.
        for site_name in site_data:
            try:
@ -165,12 +166,11 @@ class SitesInformation():
                                    site_data[site_name]["username_claimed"],
                                    site_data[site_name]["username_unclaimed"],
                                    site_data[site_name]
-                                   )
+                                    )
            except KeyError as error:
-                raise ValueError(f"Problem parsing json contents at "
+                raise ValueError(
-                                 f"'{data_file_path}':  "
+                    f"Problem parsing json contents at '{data_file_path}':  Missing attribute {error}."
-                                 f"Missing attribute {str(error)}."
+                )
                                )
        return
@ -184,9 +184,7 @@ class SitesInformation():
        List of strings containing names of sites.
        """
-        site_names = sorted([site.name for site in self], key=str.lower)
+        return sorted([site.name for site in self], key=str.lower)
        return site_names
    def __iter__(self):
        """Iterator For Object.
--- a/site_list.py
+++ b/site_list.py
@ -3,9 +3,10 @@ This module generates the listing of supported sites
 which can be found in sites.md
 It also organizes all the sites in alphanumeric order
 """
 import json
-pool = list()
+pool = []
 with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
    data = json.load(data_file)