Refractored sites.py, sherlock.py and notify.py.

3 years ago · f4673cc13c
parent a3e2f7c214
commit f4673cc13c
4 changed files with 110 additions and 121 deletions
--- a/sherlock/notify.py
+++ b/sherlock/notify.py
@ -7,7 +7,7 @@ from result import QueryStatus
 from colorama import Fore, Style, init


-class QueryNotify():
+class QueryNotify:
    """Query Notify Object.

    Base class that describes methods available to notify the results of
@ -15,6 +15,7 @@ class QueryNotify():
    It is intended that other classes inherit from this base class and
    override the methods to implement specific functionality.
    """
+
    def __init__(self, result=None):
        """Create Query Notify Object.

@ -32,7 +33,7 @@ class QueryNotify():

        self.result = result

-        return
+        # return

    def start(self, message=None):
        """Notify Start.
@ -51,7 +52,7 @@ class QueryNotify():
        Nothing.
        """

-        return
+        # return

    def update(self, result):
        """Notify Update.
@ -70,7 +71,7 @@ class QueryNotify():

        self.result = result

-        return
+        # return

    def finish(self, message=None):
        """Notify Finish.
@ -89,7 +90,7 @@ class QueryNotify():
        Nothing.
        """

-        return
+        # return

    def __str__(self):
        """Convert Object To String.
@ -100,9 +101,7 @@ class QueryNotify():
        Return Value:
        Nicely formatted string to get information about this object.
        """
-        result = str(self.result)
-
-        return result
+        return str(self.result)


 class QueryNotifyPrint(QueryNotify):
@ -110,6 +109,7 @@ class QueryNotifyPrint(QueryNotify):

    Query notify class that prints results.
    """
+
    def __init__(self, result=None, verbose=False, color=True, print_all=False):
        """Create Query Notify Print Object.

@ -155,14 +155,14 @@ class QueryNotifyPrint(QueryNotify):
        title = "Checking username"
        if self.color:
            print(Style.BRIGHT + Fore.GREEN + "[" +
-                Fore.YELLOW + "*" +
-                Fore.GREEN + f"] {title}" +
-                Fore.WHITE + f" {message}" +
-                Fore.GREEN + " on:")
+                  Fore.YELLOW + "*" +
+                  Fore.GREEN + f"] {title}" +
+                  Fore.WHITE + f" {message}" +
+                  Fore.GREEN + " on:")
        else:
            print(f"[*] {title} {message} on:")

-        return
+        # return

    def update(self, result):
        """Notify Update.
@ -179,7 +179,7 @@ class QueryNotifyPrint(QueryNotify):
        """
        self.result = result

-        if self.verbose == False or self.result.query_time is None:
+        if self.verbose is False or self.result.query_time is None:
            response_time_text = ""
        else:
            response_time_text = f" [{round(self.result.query_time * 1000)} ms]"
@ -202,23 +202,23 @@ class QueryNotifyPrint(QueryNotify):
            if self.print_all:
                if self.color:
                    print((Style.BRIGHT + Fore.WHITE + "[" +
-                        Fore.RED + "-" +
-                        Fore.WHITE + "]" +
-                        response_time_text +
-                        Fore.GREEN + f" {self.result.site_name}:" +
-                        Fore.YELLOW + " Not Found!"))
+                           Fore.RED + "-" +
+                           Fore.WHITE + "]" +
+                           response_time_text +
+                           Fore.GREEN + f" {self.result.site_name}:" +
+                           Fore.YELLOW + " Not Found!"))
                else:
                    print(f"[-]{response_time_text} {self.result.site_name}: Not Found!")

        elif result.status == QueryStatus.UNKNOWN:
            if self.print_all:
                if self.color:
-                    print(Style.BRIGHT + Fore.WHITE + "[" +
-                          Fore.RED + "-" +
-                          Fore.WHITE + "]" +
-                          Fore.GREEN + f" {self.result.site_name}:" +
-                          Fore.RED + f" {self.result.context}" +
-                          Fore.YELLOW + f" ")
+                    print((Style.BRIGHT + Fore.WHITE + "[" +
+                           Fore.RED + "-" +
+                           Fore.WHITE + "]" +
+                           Fore.GREEN + f" {self.result.site_name}:" +
+                           Fore.RED + f" {self.result.context}" +
+                           Fore.YELLOW + ' '))
                else:
                    print(f"[-] {self.result.site_name}: {self.result.context} ")

@ -236,8 +236,9 @@ class QueryNotifyPrint(QueryNotify):

        else:
            # It should be impossible to ever get here...
-            raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
-                             f"site '{self.result.site_name}'")
+            raise ValueError(
+                f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
+            )

        return

@ -250,6 +251,4 @@ class QueryNotifyPrint(QueryNotify):
        Return Value:
        Nicely formatted string to get information about this object.
        """
-        result = str(self.result)
-
-        return result
+        return str(self.result)
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@ -22,16 +22,14 @@ from torrequest import TorRequest
 from result import QueryStatus
 from result import QueryResult
 from notify import QueryNotifyPrint
-from sites  import SitesInformation
+from sites import SitesInformation

 module_name = "Sherlock: Find Usernames Across Social Networks"
 __version__ = "0.14.0"


-
-
 class SherlockFuturesSession(FuturesSession):
-    def request(self, method, url, hooks={}, *args, **kwargs):
+    def request(self, method, url, hooks=None, *args, **kwargs):
        """Request URL.

        This extends the FuturesSession request method to calculate a response
@ -53,6 +51,8 @@ class SherlockFuturesSession(FuturesSession):
        Request object.
        """
        # Record the start time for the request.
+        if hooks is None:
+            hooks = {}
        start = monotonic()

        def response_time(resp, *args, **kwargs):
@ -95,12 +95,11 @@ class SherlockFuturesSession(FuturesSession):


 def get_response(request_future, error_type, social_network):
-
    # Default for Response object if some failure occurs.
    response = None

    error_context = "General Unknown Error"
-    expection_text = None
+    exception_text = None
    try:
        response = request_future.result()
        if response.status_code:
@ -108,21 +107,21 @@ def get_response(request_future, error_type, social_network):
            error_context = None
    except requests.exceptions.HTTPError as errh:
        error_context = "HTTP Error"
-        expection_text = str(errh)
+        exception_text = str(errh)
    except requests.exceptions.ProxyError as errp:
        error_context = "Proxy Error"
-        expection_text = str(errp)
+        exception_text = str(errp)
    except requests.exceptions.ConnectionError as errc:
        error_context = "Error Connecting"
-        expection_text = str(errc)
+        exception_text = str(errc)
    except requests.exceptions.Timeout as errt:
        error_context = "Timeout Error"
-        expection_text = str(errt)
+        exception_text = str(errt)
    except requests.exceptions.RequestException as err:
        error_context = "Unknown Error"
-        expection_text = str(err)
+        exception_text = str(err)

-    return response, error_context, expection_text
+    return response, error_context, exception_text


 def interpolate_string(object, username):
@ -190,15 +189,14 @@ def sherlock(username, site_data, query_notify,
    # Limit number of workers to 20.
    # This is probably vastly overkill.
    if len(site_data) >= 20:
-        max_workers=20
+        max_workers = 20
    else:
-        max_workers=len(site_data)
+        max_workers = len(site_data)

    # Create multi-threaded session for all requests.
    session = SherlockFuturesSession(max_workers=max_workers,
                                     session=underlying_session)

-
    # Results from analysis of all sites
    results_total = {}

@ -206,10 +204,9 @@ def sherlock(username, site_data, query_notify,
    for social_network, net_info in site_data.items():

        # Results from analysis of this specific site
-        results_site = {}
+        results_site = {"url_main": net_info.get("urlMain")}

        # Record URL of main site
-        results_site["url_main"] = net_info.get("urlMain")

        # A user agent is needed because some sites don't return the correct
        # information since they think that we are bots (Which we actually are...)
@ -227,7 +224,7 @@ def sherlock(username, site_data, query_notify,
        # Don't make request if username is invalid for the site
        regex_check = net_info.get("regexCheck")
        if regex_check and re.search(regex_check, username) is None:
-            # No need to do the check at the site: this user name is not allowed.
+            # No need to do the check at the site: this username is not allowed.
            results_site["status"] = QueryResult(username,
                                                 social_network,
                                                 url,
@ -254,7 +251,7 @@ def sherlock(username, site_data, query_notify,
                elif request_method == "PUT":
                    request = session.put
                else:
-                    raise RuntimeError( f"Unsupported request_method for {url}")
+                    raise RuntimeError(f"Unsupported request_method for {url}")

            if request_payload is not None:
                request_payload = interpolate_string(request_payload, username)
@ -300,10 +297,10 @@ def sherlock(username, site_data, query_notify,
                                 )
            else:
                future = request(url=url_probe, headers=headers,
-                                allow_redirects=allow_redirects,
-                                timeout=timeout,
-                                json=request_payload
-                                )
+                                 allow_redirects=allow_redirects,
+                                 timeout=timeout,
+                                 json=request_payload
+                                 )

            # Store future in data for access later
            net_info["request_future"] = future
@ -312,7 +309,7 @@ def sherlock(username, site_data, query_notify,
            if unique_tor:
                underlying_request.reset_identity()

-        # Add this site's results into final dictionary with all of the other results.
+        # Add this site's results into final dictionary with all the other results.
        results_total[social_network] = results_site

    # Open the file containing account links
@ -334,7 +331,7 @@ def sherlock(username, site_data, query_notify,

        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
-        r, error_text, expection_text = get_response(request_future=future,
+        r, error_text, exception_text = get_response(request_future=future,
                                                     error_type=error_type,
                                                     social_network=social_network)

@ -365,13 +362,13 @@ def sherlock(username, site_data, query_notify,
            # error_flag True denotes no error found in the HTML
            # error_flag False denotes error found in the HTML
            error_flag = True
-            errors=net_info.get("errorMsg")
+            errors = net_info.get("errorMsg")
            # errors will hold the error message
            # it can be string or list
-            # by insinstance method we can detect that
+            # by isinstance method we can detect that
            # and handle the case for strings as normal procedure
            # and if its list we can iterate the errors
-            if isinstance(errors,str):
+            if isinstance(errors, str):
                # Checks if the error message is in the HTML
                # if error is present we will set flag to False
                if errors in r.text:
@ -431,7 +428,6 @@ def sherlock(username, site_data, query_notify,
            raise ValueError(f"Unknown Error Type '{error_type}' for "
                             f"site '{social_network}'")

-
        # Notify caller about results of query.
        query_notify.update(result)

@ -477,8 +473,7 @@ def timeout_check(value):


 def main():
-
-    version_string = f"%(prog)s {__version__}\n" +  \
+    version_string = f"%(prog)s {__version__}\n" + \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

@ -486,11 +481,11 @@ def main():
                            description=f"{module_name} (Version {__version__})"
                            )
    parser.add_argument("--version",
-                        action="version",  version=version_string,
+                        action="version", version=version_string,
                        help="Display version information and dependencies."
                        )
    parser.add_argument("--verbose", "-v", "-d", "--debug",
-                        action="store_true",  dest="verbose", default=False,
+                        action="store_true", dest="verbose", default=False,
                        help="Display extra debugging information and metrics."
                        )
    parser.add_argument("--folderoutput", "-fo", dest="folderoutput",
@ -506,7 +501,7 @@ def main():
                        action="store_true", dest="unique_tor", default=False,
                        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.")
    parser.add_argument("--csv",
-                        action="store_true",  dest="csv", default=False,
+                        action="store_true", dest="csv", default=False,
                        help="Create Comma-Separated Values (CSV) File."
                        )
    parser.add_argument("--site",
@ -528,15 +523,15 @@ def main():
                             "Default timeout is infinity. "
                             "A longer timeout will be more likely to get results from slow sites. "
                             "On the other hand, this may cause a long delay to gather all results."
-                       )
+                        )
    parser.add_argument("--print-all",
                        action="store_true", dest="print_all",
                        help="Output sites where the username was not found."
-                       )
+                        )
    parser.add_argument("--print-found",
                        action="store_false", dest="print_all", default=False,
                        help="Output sites where the username was found."
-                       )
+                        )
    parser.add_argument("--no-color",
                        action="store_true", dest="no_color", default=False,
                        help="Don't color terminal output"
@ -570,7 +565,6 @@ def main():
    except Exception as error:
        print(f"A problem occurred while checking for an update: {error}")

-
    # Argument check
    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
@ -582,7 +576,8 @@ def main():

    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
-        print("Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.")
+        print(
+            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.")

    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
@ -594,7 +589,6 @@ def main():
        print("You can only use --output with a single username")
        sys.exit(1)

-
    # Create object with all information about sites we are aware of.
    try:
        if args.local:
@ -608,10 +602,7 @@ def main():
    # Create original dictionary from SitesInformation() object.
    # Eventually, the rest of the code will be updated to use the new object
    # directly, but this will glue the two pieces together.
-    site_data_all = {}
-    for site in sites:
-        site_data_all[site.name] = site.information
-
+    site_data_all = {site.name: site.information for site in sites}
    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
--- a/sherlock/sites.py
+++ b/sherlock/sites.py
@ -1,21 +1,18 @@
 """Sherlock Sites Information Module

-This module supports storing information about web sites.
+This module supports storing information about websites.
 This is the raw data that will be used to search for usernames.
 """
-import os
 import json
-import operator
 import requests
-import sys


-class SiteInformation():
+class SiteInformation:
    def __init__(self, name, url_home, url_username_format, username_claimed,
                 username_unclaimed, information):
        """Create Site Information Object.

-        Contains information about a specific web site.
+        Contains information about a specific website.

        Keyword Arguments:
        self                   -- This object.
@ -30,13 +27,13 @@ class SiteInformation():
                                         indicates that the individual
                                         usernames would show up under the
                                         "https://somesite.com/users/" area of
-                                         the web site.
+                                         the website.
        username_claimed       -- String containing username which is known
-                                  to be claimed on web site.
+                                  to be claimed on website.
        username_unclaimed     -- String containing username which is known
-                                  to be unclaimed on web site.
+                                  to be unclaimed on website.
        information            -- Dictionary containing all known information
-                                  about web site.
+                                  about website.
                                  NOTE:  Custom information about how to
                                         actually detect the existence of the
                                         username will be included in this
@ -49,13 +46,13 @@ class SiteInformation():
        Nothing.
        """

-        self.name                = name
-        self.url_home            = url_home
+        self.name = name
+        self.url_home = url_home
        self.url_username_format = url_username_format

-        self.username_claimed    = username_claimed
-        self.username_unclaimed  = username_unclaimed
-        self.information         = information
+        self.username_claimed = username_claimed
+        self.username_unclaimed = username_unclaimed
+        self.information = information

        return

@ -72,11 +69,11 @@ class SiteInformation():
        return f"{self.name} ({self.url_home})"


-class SitesInformation():
+class SitesInformation:
    def __init__(self, data_file_path=None):
        """Create Sites Information Object.

-        Contains information about all supported web sites.
+        Contains information about all supported websites.

        Keyword Arguments:
        self                   -- This object.
@ -109,7 +106,7 @@ class SitesInformation():

        if data_file_path is None:
            # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
-            # this instead of the local one is so that the user has the most up to date data. This prevents
+            # this instead of the local one is so that the user has the most up-to-date data. This prevents
            # users from creating issue about false positives which has already been fixed or having outdated data
            data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json"

@ -117,26 +114,29 @@ class SitesInformation():
        if not data_file_path.lower().endswith(".json"):
            raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")

-        if "http://"  == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
+        if (
+            data_file_path[:7].lower() == "http://"
+            or data_file_path[:8].lower() == "https://"
+        ):
            # Reference is to a URL.
            try:
                response = requests.get(url=data_file_path)
            except Exception as error:
-                raise FileNotFoundError(f"Problem while attempting to access "
-                                        f"data file URL '{data_file_path}':  "
-                                        f"{str(error)}"
-                                       )
-            if response.status_code == 200:
-                try:
-                    site_data = response.json()
-                except Exception as error:
-                    raise ValueError(f"Problem parsing json contents at "
-                                     f"'{data_file_path}':  {str(error)}."
-                                    )
-            else:
+                raise FileNotFoundError(
+                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
+                )
+
+            if response.status_code != 200:
                raise FileNotFoundError(f"Bad response while accessing "
                                        f"data file URL '{data_file_path}'."
-                                       )
+                                        )
+            try:
+                site_data = response.json()
+            except Exception as error:
+                raise ValueError(
+                    f"Problem parsing json contents at '{data_file_path}':  {error}."
+                )
+
        else:
            # Reference is to a file.
            try:
@ -144,17 +144,18 @@ class SitesInformation():
                    try:
                        site_data = json.load(file)
                    except Exception as error:
-                        raise ValueError(f"Problem parsing json contents at "
-                                         f"'{data_file_path}':  {str(error)}."
-                                        )
-            except FileNotFoundError as error:
+                        raise ValueError(
+                            f"Problem parsing json contents at '{data_file_path}':  {error}."
+                        )
+
+            except FileNotFoundError:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file '{data_file_path}'."
-                                       )
+                                        )

        self.sites = {}

-        # Add all of site information from the json file to internal site list.
+        # Add all site information from the json file to internal site list.
        for site_name in site_data:
            try:

@ -165,12 +166,11 @@ class SitesInformation():
                                    site_data[site_name]["username_claimed"],
                                    site_data[site_name]["username_unclaimed"],
                                    site_data[site_name]
-                                   )
+                                    )
            except KeyError as error:
-                raise ValueError(f"Problem parsing json contents at "
-                                 f"'{data_file_path}':  "
-                                 f"Missing attribute {str(error)}."
-                                )
+                raise ValueError(
+                    f"Problem parsing json contents at '{data_file_path}':  Missing attribute {error}."
+                )

        return

@ -184,9 +184,7 @@ class SitesInformation():
        List of strings containing names of sites.
        """

-        site_names = sorted([site.name for site in self], key=str.lower)
-
-        return site_names
+        return sorted([site.name for site in self], key=str.lower)

    def __iter__(self):
        """Iterator For Object.
--- a/site_list.py
+++ b/site_list.py
@ -3,9 +3,10 @@ This module generates the listing of supported sites
 which can be found in sites.md
 It also organizes all the sites in alphanumeric order
 """
+
 import json

-pool = list()
+pool = []

 with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
    data = json.load(data_file)