From f4673cc13c276ef288300ab3753cc96c8f53faa0 Mon Sep 17 00:00:00 2001 From: benni347 Date: Mon, 31 Jan 2022 11:06:29 +0100 Subject: [PATCH] Refractored sites.py, sherlock.py and notify.py. --- sherlock/notify.py | 59 ++++++++++++++--------------- sherlock/sherlock.py | 79 +++++++++++++++++--------------------- sherlock/sites.py | 90 ++++++++++++++++++++++---------------------- site_list.py | 3 +- 4 files changed, 110 insertions(+), 121 deletions(-) diff --git a/sherlock/notify.py b/sherlock/notify.py index e3e4abb..79dc9b0 100644 --- a/sherlock/notify.py +++ b/sherlock/notify.py @@ -7,7 +7,7 @@ from result import QueryStatus from colorama import Fore, Style, init -class QueryNotify(): +class QueryNotify: """Query Notify Object. Base class that describes methods available to notify the results of @@ -15,6 +15,7 @@ class QueryNotify(): It is intended that other classes inherit from this base class and override the methods to implement specific functionality. """ + def __init__(self, result=None): """Create Query Notify Object. @@ -32,7 +33,7 @@ class QueryNotify(): self.result = result - return + # return def start(self, message=None): """Notify Start. @@ -51,7 +52,7 @@ class QueryNotify(): Nothing. """ - return + # return def update(self, result): """Notify Update. @@ -70,7 +71,7 @@ class QueryNotify(): self.result = result - return + # return def finish(self, message=None): """Notify Finish. @@ -89,7 +90,7 @@ class QueryNotify(): Nothing. """ - return + # return def __str__(self): """Convert Object To String. @@ -100,9 +101,7 @@ class QueryNotify(): Return Value: Nicely formatted string to get information about this object. """ - result = str(self.result) - - return result + return str(self.result) class QueryNotifyPrint(QueryNotify): @@ -110,6 +109,7 @@ class QueryNotifyPrint(QueryNotify): Query notify class that prints results. """ + def __init__(self, result=None, verbose=False, color=True, print_all=False): """Create Query Notify Print Object. @@ -155,14 +155,14 @@ class QueryNotifyPrint(QueryNotify): title = "Checking username" if self.color: print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + f"] {title}" + - Fore.WHITE + f" {message}" + - Fore.GREEN + " on:") + Fore.YELLOW + "*" + + Fore.GREEN + f"] {title}" + + Fore.WHITE + f" {message}" + + Fore.GREEN + " on:") else: print(f"[*] {title} {message} on:") - return + # return def update(self, result): """Notify Update. @@ -179,7 +179,7 @@ class QueryNotifyPrint(QueryNotify): """ self.result = result - if self.verbose == False or self.result.query_time is None: + if self.verbose is False or self.result.query_time is None: response_time_text = "" else: response_time_text = f" [{round(self.result.query_time * 1000)} ms]" @@ -202,23 +202,23 @@ class QueryNotifyPrint(QueryNotify): if self.print_all: if self.color: print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + " Not Found!")) + Fore.RED + "-" + + Fore.WHITE + "]" + + response_time_text + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.YELLOW + " Not Found!")) else: print(f"[-]{response_time_text} {self.result.site_name}: Not Found!") elif result.status == QueryStatus.UNKNOWN: if self.print_all: if self.color: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + f" {self.result.context}" + - Fore.YELLOW + f" ") + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.RED + f" {self.result.context}" + + Fore.YELLOW + ' ')) else: print(f"[-] {self.result.site_name}: {self.result.context} ") @@ -236,8 +236,9 @@ class QueryNotifyPrint(QueryNotify): else: # It should be impossible to ever get here... - raise ValueError(f"Unknown Query Status '{str(result.status)}' for " - f"site '{self.result.site_name}'") + raise ValueError( + f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'" + ) return @@ -250,6 +251,4 @@ class QueryNotifyPrint(QueryNotify): Return Value: Nicely formatted string to get information about this object. """ - result = str(self.result) - - return result + return str(self.result) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index e29a56a..57ca80c 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -22,16 +22,14 @@ from torrequest import TorRequest from result import QueryStatus from result import QueryResult from notify import QueryNotifyPrint -from sites import SitesInformation +from sites import SitesInformation module_name = "Sherlock: Find Usernames Across Social Networks" __version__ = "0.14.0" - - class SherlockFuturesSession(FuturesSession): - def request(self, method, url, hooks={}, *args, **kwargs): + def request(self, method, url, hooks=None, *args, **kwargs): """Request URL. This extends the FuturesSession request method to calculate a response @@ -53,6 +51,8 @@ class SherlockFuturesSession(FuturesSession): Request object. """ # Record the start time for the request. + if hooks is None: + hooks = {} start = monotonic() def response_time(resp, *args, **kwargs): @@ -95,12 +95,11 @@ class SherlockFuturesSession(FuturesSession): def get_response(request_future, error_type, social_network): - # Default for Response object if some failure occurs. response = None error_context = "General Unknown Error" - expection_text = None + exception_text = None try: response = request_future.result() if response.status_code: @@ -108,21 +107,21 @@ def get_response(request_future, error_type, social_network): error_context = None except requests.exceptions.HTTPError as errh: error_context = "HTTP Error" - expection_text = str(errh) + exception_text = str(errh) except requests.exceptions.ProxyError as errp: error_context = "Proxy Error" - expection_text = str(errp) + exception_text = str(errp) except requests.exceptions.ConnectionError as errc: error_context = "Error Connecting" - expection_text = str(errc) + exception_text = str(errc) except requests.exceptions.Timeout as errt: error_context = "Timeout Error" - expection_text = str(errt) + exception_text = str(errt) except requests.exceptions.RequestException as err: error_context = "Unknown Error" - expection_text = str(err) + exception_text = str(err) - return response, error_context, expection_text + return response, error_context, exception_text def interpolate_string(object, username): @@ -190,15 +189,14 @@ def sherlock(username, site_data, query_notify, # Limit number of workers to 20. # This is probably vastly overkill. if len(site_data) >= 20: - max_workers=20 + max_workers = 20 else: - max_workers=len(site_data) + max_workers = len(site_data) # Create multi-threaded session for all requests. session = SherlockFuturesSession(max_workers=max_workers, session=underlying_session) - # Results from analysis of all sites results_total = {} @@ -206,10 +204,9 @@ def sherlock(username, site_data, query_notify, for social_network, net_info in site_data.items(): # Results from analysis of this specific site - results_site = {} + results_site = {"url_main": net_info.get("urlMain")} # Record URL of main site - results_site["url_main"] = net_info.get("urlMain") # A user agent is needed because some sites don't return the correct # information since they think that we are bots (Which we actually are...) @@ -227,7 +224,7 @@ def sherlock(username, site_data, query_notify, # Don't make request if username is invalid for the site regex_check = net_info.get("regexCheck") if regex_check and re.search(regex_check, username) is None: - # No need to do the check at the site: this user name is not allowed. + # No need to do the check at the site: this username is not allowed. results_site["status"] = QueryResult(username, social_network, url, @@ -254,7 +251,7 @@ def sherlock(username, site_data, query_notify, elif request_method == "PUT": request = session.put else: - raise RuntimeError( f"Unsupported request_method for {url}") + raise RuntimeError(f"Unsupported request_method for {url}") if request_payload is not None: request_payload = interpolate_string(request_payload, username) @@ -300,10 +297,10 @@ def sherlock(username, site_data, query_notify, ) else: future = request(url=url_probe, headers=headers, - allow_redirects=allow_redirects, - timeout=timeout, - json=request_payload - ) + allow_redirects=allow_redirects, + timeout=timeout, + json=request_payload + ) # Store future in data for access later net_info["request_future"] = future @@ -312,7 +309,7 @@ def sherlock(username, site_data, query_notify, if unique_tor: underlying_request.reset_identity() - # Add this site's results into final dictionary with all of the other results. + # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site # Open the file containing account links @@ -334,7 +331,7 @@ def sherlock(username, site_data, query_notify, # Retrieve future and ensure it has finished future = net_info["request_future"] - r, error_text, expection_text = get_response(request_future=future, + r, error_text, exception_text = get_response(request_future=future, error_type=error_type, social_network=social_network) @@ -365,13 +362,13 @@ def sherlock(username, site_data, query_notify, # error_flag True denotes no error found in the HTML # error_flag False denotes error found in the HTML error_flag = True - errors=net_info.get("errorMsg") + errors = net_info.get("errorMsg") # errors will hold the error message # it can be string or list - # by insinstance method we can detect that + # by isinstance method we can detect that # and handle the case for strings as normal procedure # and if its list we can iterate the errors - if isinstance(errors,str): + if isinstance(errors, str): # Checks if the error message is in the HTML # if error is present we will set flag to False if errors in r.text: @@ -431,7 +428,6 @@ def sherlock(username, site_data, query_notify, raise ValueError(f"Unknown Error Type '{error_type}' for " f"site '{social_network}'") - # Notify caller about results of query. query_notify.update(result) @@ -477,8 +473,7 @@ def timeout_check(value): def main(): - - version_string = f"%(prog)s {__version__}\n" + \ + version_string = f"%(prog)s {__version__}\n" + \ f"{requests.__description__}: {requests.__version__}\n" + \ f"Python: {platform.python_version()}" @@ -486,11 +481,11 @@ def main(): description=f"{module_name} (Version {__version__})" ) parser.add_argument("--version", - action="version", version=version_string, + action="version", version=version_string, help="Display version information and dependencies." ) parser.add_argument("--verbose", "-v", "-d", "--debug", - action="store_true", dest="verbose", default=False, + action="store_true", dest="verbose", default=False, help="Display extra debugging information and metrics." ) parser.add_argument("--folderoutput", "-fo", dest="folderoutput", @@ -506,7 +501,7 @@ def main(): action="store_true", dest="unique_tor", default=False, help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.") parser.add_argument("--csv", - action="store_true", dest="csv", default=False, + action="store_true", dest="csv", default=False, help="Create Comma-Separated Values (CSV) File." ) parser.add_argument("--site", @@ -528,15 +523,15 @@ def main(): "Default timeout is infinity. " "A longer timeout will be more likely to get results from slow sites. " "On the other hand, this may cause a long delay to gather all results." - ) + ) parser.add_argument("--print-all", action="store_true", dest="print_all", help="Output sites where the username was not found." - ) + ) parser.add_argument("--print-found", action="store_false", dest="print_all", default=False, help="Output sites where the username was found." - ) + ) parser.add_argument("--no-color", action="store_true", dest="no_color", default=False, help="Don't color terminal output" @@ -570,7 +565,6 @@ def main(): except Exception as error: print(f"A problem occurred while checking for an update: {error}") - # Argument check # TODO regex check on args.proxy if args.tor and (args.proxy is not None): @@ -582,7 +576,8 @@ def main(): if args.tor or args.unique_tor: print("Using Tor to make requests") - print("Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.") + print( + "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.") # Check if both output methods are entered as input. if args.output is not None and args.folderoutput is not None: @@ -594,7 +589,6 @@ def main(): print("You can only use --output with a single username") sys.exit(1) - # Create object with all information about sites we are aware of. try: if args.local: @@ -608,10 +602,7 @@ def main(): # Create original dictionary from SitesInformation() object. # Eventually, the rest of the code will be updated to use the new object # directly, but this will glue the two pieces together. - site_data_all = {} - for site in sites: - site_data_all[site.name] = site.information - + site_data_all = {site.name: site.information for site in sites} if args.site_list is None: # Not desired to look at a sub-set of sites site_data = site_data_all diff --git a/sherlock/sites.py b/sherlock/sites.py index de8eeec..c706110 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -1,21 +1,18 @@ """Sherlock Sites Information Module -This module supports storing information about web sites. +This module supports storing information about websites. This is the raw data that will be used to search for usernames. """ -import os import json -import operator import requests -import sys -class SiteInformation(): +class SiteInformation: def __init__(self, name, url_home, url_username_format, username_claimed, username_unclaimed, information): """Create Site Information Object. - Contains information about a specific web site. + Contains information about a specific website. Keyword Arguments: self -- This object. @@ -30,13 +27,13 @@ class SiteInformation(): indicates that the individual usernames would show up under the "https://somesite.com/users/" area of - the web site. + the website. username_claimed -- String containing username which is known - to be claimed on web site. + to be claimed on website. username_unclaimed -- String containing username which is known - to be unclaimed on web site. + to be unclaimed on website. information -- Dictionary containing all known information - about web site. + about website. NOTE: Custom information about how to actually detect the existence of the username will be included in this @@ -49,13 +46,13 @@ class SiteInformation(): Nothing. """ - self.name = name - self.url_home = url_home + self.name = name + self.url_home = url_home self.url_username_format = url_username_format - self.username_claimed = username_claimed - self.username_unclaimed = username_unclaimed - self.information = information + self.username_claimed = username_claimed + self.username_unclaimed = username_unclaimed + self.information = information return @@ -72,11 +69,11 @@ class SiteInformation(): return f"{self.name} ({self.url_home})" -class SitesInformation(): +class SitesInformation: def __init__(self, data_file_path=None): """Create Sites Information Object. - Contains information about all supported web sites. + Contains information about all supported websites. Keyword Arguments: self -- This object. @@ -109,7 +106,7 @@ class SitesInformation(): if data_file_path is None: # The default data file is the live data.json which is in the GitHub repo. The reason why we are using - # this instead of the local one is so that the user has the most up to date data. This prevents + # this instead of the local one is so that the user has the most up-to-date data. This prevents # users from creating issue about false positives which has already been fixed or having outdated data data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json" @@ -117,26 +114,29 @@ class SitesInformation(): if not data_file_path.lower().endswith(".json"): raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.") - if "http://" == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower(): + if ( + data_file_path[:7].lower() == "http://" + or data_file_path[:8].lower() == "https://" + ): # Reference is to a URL. try: response = requests.get(url=data_file_path) except Exception as error: - raise FileNotFoundError(f"Problem while attempting to access " - f"data file URL '{data_file_path}': " - f"{str(error)}" - ) - if response.status_code == 200: - try: - site_data = response.json() - except Exception as error: - raise ValueError(f"Problem parsing json contents at " - f"'{data_file_path}': {str(error)}." - ) - else: + raise FileNotFoundError( + f"Problem while attempting to access data file URL '{data_file_path}': {error}" + ) + + if response.status_code != 200: raise FileNotFoundError(f"Bad response while accessing " f"data file URL '{data_file_path}'." - ) + ) + try: + site_data = response.json() + except Exception as error: + raise ValueError( + f"Problem parsing json contents at '{data_file_path}': {error}." + ) + else: # Reference is to a file. try: @@ -144,17 +144,18 @@ class SitesInformation(): try: site_data = json.load(file) except Exception as error: - raise ValueError(f"Problem parsing json contents at " - f"'{data_file_path}': {str(error)}." - ) - except FileNotFoundError as error: + raise ValueError( + f"Problem parsing json contents at '{data_file_path}': {error}." + ) + + except FileNotFoundError: raise FileNotFoundError(f"Problem while attempting to access " f"data file '{data_file_path}'." - ) + ) self.sites = {} - # Add all of site information from the json file to internal site list. + # Add all site information from the json file to internal site list. for site_name in site_data: try: @@ -165,12 +166,11 @@ class SitesInformation(): site_data[site_name]["username_claimed"], site_data[site_name]["username_unclaimed"], site_data[site_name] - ) + ) except KeyError as error: - raise ValueError(f"Problem parsing json contents at " - f"'{data_file_path}': " - f"Missing attribute {str(error)}." - ) + raise ValueError( + f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." + ) return @@ -184,9 +184,7 @@ class SitesInformation(): List of strings containing names of sites. """ - site_names = sorted([site.name for site in self], key=str.lower) - - return site_names + return sorted([site.name for site in self], key=str.lower) def __iter__(self): """Iterator For Object. diff --git a/site_list.py b/site_list.py index 339f729..7ba267a 100644 --- a/site_list.py +++ b/site_list.py @@ -3,9 +3,10 @@ This module generates the listing of supported sites which can be found in sites.md It also organizes all the sites in alphanumeric order """ + import json -pool = list() +pool = [] with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file: data = json.load(data_file)