From b59dc556311d85339f573a6257301fa86144b130 Mon Sep 17 00:00:00 2001 From: Master Date: Wed, 26 Dec 2018 15:16:49 +0200 Subject: [PATCH 1/3] cleanup, fixed handling connection error exception --- sherlock.py | 135 +++++++++++++++++++++++++++------------------------- 1 file changed, 71 insertions(+), 64 deletions(-) diff --git a/sherlock.py b/sherlock.py index cbb0fb24..4f4b22ac 100644 --- a/sherlock.py +++ b/sherlock.py @@ -1,18 +1,12 @@ -import requests -import json import os +import json +import requests # TODO: fix tumblr -def write_to_file(url, fname): - with open(fname, "a") as f: - f.write(url+"\n") - - -def main(): - # Not sure why, but the banner messes up if i put into one print function - print("\033[37;1m .\"\"\"-.") - print("\033[37;1m / \\") +def print_banner(): + print(" .\"\"\"-.") + print(" / \\") print("\033[37;1m ____ _ _ _ | _..--'-.") print("\033[37;1m/ ___|| |__ ___ _ __| | ___ ___| |__ >.`__.-\"\"\;\"`") print("\033[37;1m\___ \| '_ \ / _ \ '__| |/ _ \ / __| |/ / / /( ^\\") @@ -21,67 +15,80 @@ def main(): print("\033[37;1m .'`-._ `.\ | J /") print("\033[37;1m / `--.| \__/\033[0m") - username = input("\033[92;1m[\033[37;1m?\033[92;1m]\033[92;1m Input Username: \033[0m") - print() - - fname = username+".txt" - - if os.path.isfile(fname): - os.remove(fname) - print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Removing previous file:\033[1;37m {}\033[0m".format(fname)) - +def search_accounts(username, social_networks_params): + existing_accounts = list() - print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {}\033[0m\033[1;92m on: \033[0m".format(username)) - raw = open("data.json", "r", encoding="utf-8") - data = json.load(raw) - - # User agent is needed because some sites does not - # return the correct information because it thinks that - # we are bot headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' } - for social_network in data: - url = data.get(social_network).get("url").format(username) - error_type = data.get(social_network).get("errorType") - - cant_have_period = data.get(social_network).get("noPeriod") - - if cant_have_period == "True": - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + for social_network in social_networks_params: + url = social_networks_params.get(social_network).get("url").format(username) + error_type = social_networks_params.get(social_network).get("errorType") + try: + response = requests.get(url, headers=headers) + except Exception as e: + print(f"error connecting to {url}: {e}") continue - r = requests.get(url, headers=headers) - if error_type == "message": - error = data.get(social_network).get("errorMsg") - # Checks if the error message is in the HTML - if not error in r.text: - print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - - else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) - + error = social_networks_params.get(social_network).get("errorMsg") + if error in response.text: + print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {social_network}:\033[93;1m Not Found!") + continue + elif error_type == "status_code": - # Checks if the status code of the repsonse is 404 - if not r.status_code == 404: - print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - - else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + if response.status_code == 404: + print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {social_network}:\033[93;1m Not Found!") + continue elif error_type == "response_url": - error = data.get(social_network).get("errorUrl") - # Checks if the redirect url is the same as the one defined in data.json - if not error in r.url: - print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) - - print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{}\033[0m".format(username+".txt")) - -main() + error = social_networks_params.get(social_network).get("errorUrl") + if error in response.url: + print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {social_network}:\033[93;1m Not Found!") + continue + + print(f"\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {social_network}:\033[0m", url) + existing_accounts.append(url) + + return existing_accounts + +def save_account_urls(account_urls, filename): + with open(filename, "a") as file: + file.writelines(account_urls) + +def remove_old_file(filename): + if os.path.isfile(filename): + os.remove(filename) + print(f"\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Removing previous file:\033[1;37m {filename}\033[0m") + +def get_social_networks_params(): + with open("data.json", "r") as file: + social_networks_params = json.load(file) + return social_networks_params + +def main(): + print_banner() + username = input("\033[92;1m[\033[37;1m?\033[92;1m]\033[92;1m Input Username: \033[0m") + print() + + filename = f"{username}.txt" + remove_old_file(filename) + + social_networks_params = get_social_networks_params() + if social_networks_params is None: + print("Error loading social networks parameters. Exiting.") + return + + print(f"\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {username}\033[0m\033[1;92m on: \033[0m") + + existing_accounts = search_accounts(username, social_networks_params) + if len(existing_accounts) == 0: + print("No accounts found. Exiting without saving.") + return + + save_account_urls(existing_accounts, filename) + print(f"\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{filename}\033[0m") + +if '__main__' in __name__: + main() \ No newline at end of file From 69cdbe83f21137b344ca4adaef3991087a805a82 Mon Sep 17 00:00:00 2001 From: Master Date: Sat, 5 Jan 2019 19:45:19 +0200 Subject: [PATCH 2/3] fix #95: head request may cause 301 redirect which can be interpreted wrong i.e. github's cause --- sherlock.py | 111 ++++------------------------------------------------ 1 file changed, 8 insertions(+), 103 deletions(-) diff --git a/sherlock.py b/sherlock.py index 06b7189b..3640e937 100644 --- a/sherlock.py +++ b/sherlock.py @@ -1,25 +1,3 @@ -<<<<<<< HEAD -import os -import json -import requests - -# TODO: fix tumblr - -def print_banner(): - print(" .\"\"\"-.") - print(" / \\") - print("\033[37;1m ____ _ _ _ | _..--'-.") - print("\033[37;1m/ ___|| |__ ___ _ __| | ___ ___| |__ >.`__.-\"\"\;\"`") - print("\033[37;1m\___ \| '_ \ / _ \ '__| |/ _ \ / __| |/ / / /( ^\\") - print("\033[37;1m ___) | | | | __/ | | | (_) | (__| < '-`) =|-.") - print("\033[37;1m|____/|_| |_|\___|_| |_|\___/ \___|_|\_\ /`--.'--' \ .-.") - print("\033[37;1m .'`-._ `.\ | J /") - print("\033[37;1m / `--.| \__/\033[0m") - -def search_accounts(username, social_networks_params): - existing_accounts = list() - -======= #! /usr/bin/env python3 """ @@ -125,83 +103,10 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): # A user agent is needed because some sites don't # return the correct information since they think that # we are bots ->>>>>>> 0d857030939da206f9e6098241ff80d869ae80e8 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' } -<<<<<<< HEAD - for social_network in social_networks_params: - url = social_networks_params.get(social_network).get("url").format(username) - error_type = social_networks_params.get(social_network).get("errorType") - try: - response = requests.get(url, headers=headers) - except Exception as e: - print(f"error connecting to {url}: {e}") - continue - - if error_type == "message": - error = social_networks_params.get(social_network).get("errorMsg") - if error in response.text: - print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {social_network}:\033[93;1m Not Found!") - continue - - elif error_type == "status_code": - if response.status_code == 404: - print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {social_network}:\033[93;1m Not Found!") - continue - - elif error_type == "response_url": - error = social_networks_params.get(social_network).get("errorUrl") - if error in response.url: - print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {social_network}:\033[93;1m Not Found!") - continue - - print(f"\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {social_network}:\033[0m", url) - existing_accounts.append(url) - - return existing_accounts - -def save_account_urls(account_urls, filename): - with open(filename, "a") as file: - file.writelines(account_urls) - -def remove_old_file(filename): - if os.path.isfile(filename): - os.remove(filename) - print(f"\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Removing previous file:\033[1;37m {filename}\033[0m") - -def get_social_networks_params(): - with open("data.json", "r") as file: - social_networks_params = json.load(file) - return social_networks_params - -def main(): - print_banner() - username = input("\033[92;1m[\033[37;1m?\033[92;1m]\033[92;1m Input Username: \033[0m") - print() - - filename = f"{username}.txt" - remove_old_file(filename) - - social_networks_params = get_social_networks_params() - if social_networks_params is None: - print("Error loading social networks parameters. Exiting.") - return - - print(f"\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {username}\033[0m\033[1;92m on: \033[0m") - - existing_accounts = search_accounts(username, social_networks_params) - if len(existing_accounts) == 0: - print("No accounts found. Exiting without saving.") - return - - save_account_urls(existing_accounts, filename) - print(f"\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{filename}\033[0m") - -if '__main__' in __name__: - main() -======= # Load the data data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json") with open(data_file_path, "r", encoding="utf-8") as raw: @@ -212,7 +117,7 @@ if '__main__' in __name__: # Create session based on request methodology underlying_session = requests.session() - underlying_request = requests.Request() + underlying_request = requests.Request('GET') if tor or unique_tor: underlying_request = TorRequest() underlying_session = underlying_request.session() @@ -247,11 +152,12 @@ if '__main__' in __name__: url = net_info["url"].format(username) results_site["url_user"] = url - # If only the status_code is needed don't download the body - if net_info["errorType"] == 'status_code': - request_method = session.head - else: - request_method = session.get + # # If only the status_code is needed don't download the body + # if net_info["errorType"] == 'status_code': + # request_method = session.head + # else: + # request_method = session.get + request_method = session.get # This future starts running the request in a new thread, doesn't block the main thread future = request_method(url=url, headers=headers) @@ -475,5 +381,4 @@ def main(): ) if __name__ == "__main__": - main() ->>>>>>> 0d857030939da206f9e6098241ff80d869ae80e8 + main() \ No newline at end of file From 5dc147e39a27af49f3b4bcf96c8fa9b26e840f8e Mon Sep 17 00:00:00 2001 From: Master Date: Sat, 5 Jan 2019 23:05:23 +0200 Subject: [PATCH 3/3] efficient usage of GET request only for github --- sherlock.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sherlock.py b/sherlock.py index 3640e937..ed0792bb 100644 --- a/sherlock.py +++ b/sherlock.py @@ -117,7 +117,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): # Create session based on request methodology underlying_session = requests.session() - underlying_request = requests.Request('GET') + underlying_request = requests.Request() if tor or unique_tor: underlying_request = TorRequest() underlying_session = underlying_request.session() @@ -152,12 +152,11 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): url = net_info["url"].format(username) results_site["url_user"] = url - # # If only the status_code is needed don't download the body - # if net_info["errorType"] == 'status_code': - # request_method = session.head - # else: - # request_method = session.get request_method = session.get + if social_network != "GitHub": + # If only the status_code is needed don't download the body + if net_info["errorType"] == 'status_code': + request_method = session.head # This future starts running the request in a new thread, doesn't block the main thread future = request_method(url=url, headers=headers)