From 8abeaac01776c88e16d1f4bae44dee96e1fcc56c Mon Sep 17 00:00:00 2001 From: Tejasvi Nareddy Date: Tue, 25 Dec 2018 13:14:31 -0500 Subject: [PATCH 1/4] Add gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..077813a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# User files +*.txt + +# Virtual Environment +venv/ \ No newline at end of file From 8ed7a39b8624e2e30c71bf5069020de1b0062e40 Mon Sep 17 00:00:00 2001 From: Tejasvi Nareddy Date: Tue, 25 Dec 2018 13:14:39 -0500 Subject: [PATCH 2/4] Reformat file --- sherlock.py | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/sherlock.py b/sherlock.py index 79d8abe..482dce5 100644 --- a/sherlock.py +++ b/sherlock.py @@ -2,9 +2,10 @@ import requests import json import os + def write_to_file(url, fname): - with open(fname, "a") as f: - f.write(url+"\n") + with open(fname, "a") as f: + f.write(url + "\n") def main(): @@ -22,14 +23,17 @@ def main(): username = input("\033[92;1m[\033[37;1m?\033[92;1m]\033[92;1m Input Username: \033[0m") print() - fname = username+".txt" + fname = username + ".txt" if os.path.isfile(fname): - os.remove(fname) - print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Removing previous file:\033[1;37m {}\033[0m".format(fname)) - + os.remove(fname) + print( + "\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Removing previous file:\033[1;37m {}\033[0m".format(fname)) - print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {}\033[0m\033[1;92m on: \033[0m".format(username)) + print( + "\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {}\033[0m\033[1;92m on: " + "\033[0m".format( + username)) raw = open("data.json", "r") data = json.load(raw) @@ -43,38 +47,38 @@ def main(): for social_network in data: url = data.get(social_network).get("url").format(username) error_type = data.get(social_network).get("errorType") - r = requests.get(url, headers=headers) - + if error_type == "message": error = data.get(social_network).get("errorMsg") - + if not error in r.text: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - + write_to_file(url, fname) + else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) - + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + elif error_type == "status_code": - + if not r.status_code == 404: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) - + else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) elif error_type == "response_url": error = data.get(social_network).get("errorMsgInUrl") - + if not error in r.url: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + + print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{}\033[0m".format(username + ".txt")) + - print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{}\033[0m".format(username+".txt")) - -main() \ No newline at end of file +main() From 971a88dd1ecc0b18aa550dbbf7533a81188a57ab Mon Sep 17 00:00:00 2001 From: Tejasvi Nareddy Date: Tue, 25 Dec 2018 14:19:21 -0500 Subject: [PATCH 3/4] Asynchronous requests Improves performance greatly --- requirements.txt | 1 + sherlock.py | 40 ++++++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index f229360..7ab0f40 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests +requests_futures \ No newline at end of file diff --git a/sherlock.py b/sherlock.py index 482dce5..44ac0b3 100644 --- a/sherlock.py +++ b/sherlock.py @@ -1,7 +1,14 @@ -import requests +from concurrent.futures import ThreadPoolExecutor +from requests_futures.sessions import FuturesSession import json import os +raw = open("data.json", "r") +data = json.load(raw) + +# Allow 1 thread for each external service, so `len(data)` threads total +session = FuturesSession(executor=ThreadPoolExecutor(max_workers=len(data))) + def write_to_file(url, fname): with open(fname, "a") as f: @@ -34,8 +41,6 @@ def main(): "\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {}\033[0m\033[1;92m on: " "\033[0m".format( username)) - raw = open("data.json", "r") - data = json.load(raw) # User agent is needed because some sites does not # return the correct information because it thinks that @@ -44,16 +49,31 @@ def main(): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' } + # Create futures for all requests for social_network in data: - url = data.get(social_network).get("url").format(username) - error_type = data.get(social_network).get("errorType") + url = data[social_network]['url'].format(username) + + # This future starts running the request in a new thread, doesn't block the main thread + future = session.get(url=url, headers=headers) + + # Store future in data for access later + data[social_network]['request'] = future + + # Print results + for social_network in data: + + url = data[social_network]['url'].format(username) + error_type = data[social_network]['errorType'] - r = requests.get(url, headers=headers) + # Retrieve future and ensure it has finished + future = data[social_network]['request'] + response = future.result() + # Print result if error_type == "message": - error = data.get(social_network).get("errorMsg") + error = data[social_network]['errorMsg'] - if not error in r.text: + if not error in response.text: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) @@ -62,7 +82,7 @@ def main(): elif error_type == "status_code": - if not r.status_code == 404: + if not response.status_code == 404: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) @@ -72,7 +92,7 @@ def main(): elif error_type == "response_url": error = data.get(social_network).get("errorMsgInUrl") - if not error in r.url: + if not error in response.url: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) else: From c4c31530fff45073b31db6aa3eab34e44e92061a Mon Sep 17 00:00:00 2001 From: Tejasvi Nareddy Date: Tue, 25 Dec 2018 14:29:41 -0500 Subject: [PATCH 4/4] Remove requests from requirements --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7ab0f40..396d4a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -requests requests_futures \ No newline at end of file