From a63bdb315218b7d99eb6ae78488857bf5bcc20da Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 18:59:33 +0100 Subject: [PATCH 01/15] Created new file 'load_proxies.py' to store functions for reading proxies from files, and checking proxy anonimity. Created the function 'load_proxies_from_csv' which reads proxies from a .csv file to a list of named tuples. --- load_proxies.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 load_proxies.py diff --git a/load_proxies.py b/load_proxies.py new file mode 100644 index 00000000..bfad7a83 --- /dev/null +++ b/load_proxies.py @@ -0,0 +1,22 @@ +import csv +import requests +import time +from collections import namedtuple + +""" +A function which loads proxies from a .csv file, to a list. + +Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'. + +Outputs: list containing proxies stored in named tuples. +""" + + +def load_proxies_from_csv(path_to_list): + Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol']) + + with open(path_to_list, 'r') as csv_file: + csv_reader = csv.DictReader(csv_file) + proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader] + + return proxies \ No newline at end of file From 901074ea4e685abdbf1631ae89e76aea8767428d Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:02:57 +0100 Subject: [PATCH 02/15] Function 'check_proxy', which checks anonimity of a signle proxy by anaylizing return headers received from a request using the proxy in question. --- load_proxies.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/load_proxies.py b/load_proxies.py index bfad7a83..45e826f4 100644 --- a/load_proxies.py +++ b/load_proxies.py @@ -19,4 +19,27 @@ def load_proxies_from_csv(path_to_list): csv_reader = csv.DictReader(csv_file) proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader] - return proxies \ No newline at end of file + return proxies + + +""" +A function which test the proxy by attempting +to make a request to the designated website. + +We use 'wikipedia.org' as a test, since we can test the proxy anonymity +by check if the returning 'X-Client-IP' header matches the proxy ip. +""" + + +def check_proxy(proxy_ip, proxy_port, protocol): + full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}' + proxies = {'http': full_proxy, 'https': full_proxy} + try: + r = requests.get('https://www.wikipedia.org',proxies=proxies, timeout=4) + return_proxy = r.headers['X-Client-IP'] + if proxy_ip==return_proxy: + return True + else: + return False + except Exception: + return False \ No newline at end of file From 65a040dbbbf748f6666e35951200d369bdd69157 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:07:32 +0100 Subject: [PATCH 03/15] Function 'check_proxy_list' which checks anonimity of each proxy contained in a list of named tuples. Proxies are checked by using the 'check_proxy' function. --- load_proxies.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/load_proxies.py b/load_proxies.py index 45e826f4..d569b150 100644 --- a/load_proxies.py +++ b/load_proxies.py @@ -2,6 +2,7 @@ import csv import requests import time from collections import namedtuple +from colorama import Fore, Style """ A function which loads proxies from a .csv file, to a list. @@ -42,4 +43,54 @@ def check_proxy(proxy_ip, proxy_port, protocol): else: return False except Exception: - return False \ No newline at end of file + return False + + +""" +A function which takes in one mandatory argument -> a proxy list in +the format returned by the function 'load_proxies_from_csv'. + +It also takes an optional argument 'max_proxies', if the user wishes to +cap the number of validated proxies. + +Each proxy is tested by the check_proxy function. Since each test is done on +'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, +but are sending successive requests each separated by at least 1 sec. + +Outputs: list containing proxies stored in named tuples. +""" + + +from colorama import Fore, Style + +def check_proxy_list(proxy_list, max_proxies=None): + print((Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + "] Started checking proxies.")) + working_proxies = [] + + # If the user has limited the number of proxies we need, + # the function will stop when the working_proxies + # loads the max number of requested proxies. + if max_proxies != None: + for proxy in proxy_list: + if len(working_proxies) < max_proxies: + time.sleep(1) + if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True: + working_proxies.append(proxy) + else: + break + else: + for proxy in proxy_list: + time.sleep(1) + if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True: + working_proxies.append(proxy) + + if len(working_proxies) > 0: + print((Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + "] Finished checking proxies.")) + return working_proxies + + else: + raise Exception("Found no working proxies.") \ No newline at end of file From 166d22442382bd8711e80d32d0254c82e65c8bf6 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:11:06 +0100 Subject: [PATCH 04/15] First change to 'sherlock.py' for use of load_proxies module. Global variable proxy_list is created, and by default set to an empty list. This variable will store proxies from a proxy list (if this option is used), and will enable different threads to access proxies at the same time. --- sherlock.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sherlock.py b/sherlock.py index 7f8046e3..246b6a30 100644 --- a/sherlock.py +++ b/sherlock.py @@ -40,6 +40,9 @@ BANNER = r''' # TODO: fix tumblr +global proxy_list + +proxy_list = [] class ElapsedFuturesSession(FuturesSession): """ From c5e06b068ee2cce666281fa81e4f91a79bcefa81 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:14:02 +0100 Subject: [PATCH 05/15] Added two new arguments, '--proxy_list'/'-pl' and '--check_proxies'/'-cp', for users to activate options of reading proxies from a document (at this time, only .csv is supported), and check their anonimity before using them. --- sherlock.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sherlock.py b/sherlock.py index 246b6a30..fbc2d1e6 100644 --- a/sherlock.py +++ b/sherlock.py @@ -380,6 +380,16 @@ def main(): parser.add_argument("--json", "-j", metavar="JSON_FILE", dest="json_file", default="data.json", help="Load data from a JSON file or an online, valid, JSON file.") + parser.add_argument("--proxy_list", "-pl", metavar='PROXY_LIST', + action="store", dest="proxy_list", default=None, + help="Make requests over a proxy randomly chosen from a list generated from a .csv file." + ) + parser.add_argument("--check_proxies", "-cp", metavar='CHECK_PROXY', + action="store", dest="check_prox", default=None, + help="To be used with the '--proxy_list' parameter. " + "The script will check if the proxies supplied in the .csv file are working and anonymous." + "Put 0 for no limit on successfully checked proxies, or another number to institute a limit." + ) parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", From dc32d473e01eeb5ce2f8f3d2997ad78f835fe4d7 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:16:06 +0100 Subject: [PATCH 06/15] Exception will now be raised if etiher a single proxy or proxy_list options are used along with Tor. --- sherlock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock.py b/sherlock.py index fbc2d1e6..3d72e5b4 100644 --- a/sherlock.py +++ b/sherlock.py @@ -402,7 +402,7 @@ def main(): # Argument check # TODO regex check on args.proxy - if args.tor and args.proxy != None: + if args.tor and (args.proxy != None or args.proxy_list != None): raise Exception("TOR and Proxy cannot be set in the meantime.") # Make prompts From bd683022b357567e316fd21f15d4837e82bd3487 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:18:24 +0100 Subject: [PATCH 07/15] Exception is raised if both a single proxy and the proxy_list are used. As needed, this can be changed to merging the single proxy with the proxy list, but seems a bit unnecessary at this time. --- sherlock.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock.py b/sherlock.py index 3d72e5b4..786ce14d 100644 --- a/sherlock.py +++ b/sherlock.py @@ -405,6 +405,13 @@ def main(): if args.tor and (args.proxy != None or args.proxy_list != None): raise Exception("TOR and Proxy cannot be set in the meantime.") + # Proxy argument check. + # Does not necessarily need to throw an error, + # since we could join the single proxy with the ones generated from the .csv, + # but it seems unnecessarily complex at this time. + if args.proxy != None and args.proxy_list != None: + raise Exception("A single proxy cannot be used along with proxy list.") + # Make prompts if args.proxy != None: print("Using the proxy: " + args.proxy) From 6cc4e2289872f17ed250445015498fa353788e95 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:22:44 +0100 Subject: [PATCH 08/15] If the user selected --proxy_list option, we attempt to read proxies from the csv, and store the list in global var proxy_list. --- sherlock.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sherlock.py b/sherlock.py index 786ce14d..6c915a0e 100644 --- a/sherlock.py +++ b/sherlock.py @@ -415,6 +415,18 @@ def main(): # Make prompts if args.proxy != None: print("Using the proxy: " + args.proxy) + + global proxy_list + + if args.proxy_list != None: + print((Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + "] Checking username" + + Fore.WHITE + " {}" + + Fore.GREEN + " on:").format(args.proxy_list)) + + proxy_list = load_proxies_from_csv(args.proxy_list) + if args.tor or args.unique_tor: print("Using TOR to make requests") print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") From 2accdcafea95cb435e9b83b4394f9a1e4a728eae Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:26:17 +0100 Subject: [PATCH 09/15] If the user selected --check_proxies option along with --proxy_list option, proxies loaded from the .csv file are checked using the check_proxies function from the load_proxies module. Proxies which pass the test are stored in the proxy_list global var. --- sherlock.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sherlock.py b/sherlock.py index 6c915a0e..726f9082 100644 --- a/sherlock.py +++ b/sherlock.py @@ -427,6 +427,19 @@ def main(): proxy_list = load_proxies_from_csv(args.proxy_list) + # Checking if proxies should be checked for anonymity. + if args.check_prox != None and args.proxy_list != None: + try: + limit = int(args.check_prox) + if limit == 0: + proxy_list = check_proxy_list(proxy_list) + elif limit > 0: + proxy_list = check_proxy_list(proxy_list, limit) + else: + raise ValueError + except ValueError: + raise Exception("Prameter --check_proxies/-cp must be a positive intiger.") + if args.tor or args.unique_tor: print("Using TOR to make requests") print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") From 855f154d9b828d2d494c3bcab059336688b9eb00 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:31:08 +0100 Subject: [PATCH 10/15] If the 'proxy_list' we select a random member and pass it as the proxy to the session. If the list is empty, the proxy parameter will be set to arg.proxy, which defaults to None if the user did not pass an individual proxy as well. --- sherlock.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sherlock.py b/sherlock.py index 726f9082..3cd34e85 100644 --- a/sherlock.py +++ b/sherlock.py @@ -534,6 +534,15 @@ def main(): username + ".txt"), "w", encoding="utf-8") else: file = open(username + ".txt", "w", encoding="utf-8") + + # We try to ad a random member of the 'proxy_list' var as the proxy of the request. + # If we can't access the list or it is empty, we proceed with args.proxy as the proxy. + try: + random_proxy = random.choice(proxy_list) + proxy = f'{random_proxy.protocol}://{random_proxy.ip}:{random_proxy.port}' + except (NameError, IndexError): + proxy = args.proxy + results = {} results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy) From 6bf8358342990ca2006eabae1168ef16109f286a Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:34:26 +0100 Subject: [PATCH 11/15] Set new parameter 'retry_no' of the 'get_response' function to 3 (can be changed). This will be used if retrying a ProxyError. --- sherlock.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sherlock.py b/sherlock.py index 3cd34e85..67c80fed 100644 --- a/sherlock.py +++ b/sherlock.py @@ -261,7 +261,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr r, error_type, response_time = get_response(request_future=future, error_type=error_type, social_network=social_network, - verbose=verbose) + verbose=verbose, + retry_no=3) # Attempt to get request information try: From 8587d1a8359a6982e963c6989c3bd0d03362783b Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:44:04 +0100 Subject: [PATCH 12/15] If the ProxyError gets raised in the 'get_response' function, the request will be tried with another proxy selected from the 'proxy_list' global var. New parameter 'retry_no' is the number of retries that will be made before throwing a final ProxyError. --- sherlock.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sherlock.py b/sherlock.py index 67c80fed..1b354dce 100644 --- a/sherlock.py +++ b/sherlock.py @@ -99,13 +99,28 @@ def print_not_found(social_network, response_time, verbose=False): Fore.YELLOW + " Not Found!").format(social_network)) -def get_response(request_future, error_type, social_network, verbose=False): +def get_response(request_future, error_type, social_network, verbose=False, retry_no=None): + + global proxy_list + try: rsp = request_future.result() if rsp.status_code: return rsp, error_type, rsp.elapsed except requests.exceptions.HTTPError as errh: print_error(errh, "HTTP Error:", social_network, verbose) + + # In case our proxy fails, we retry with another proxy. + except requests.exceptions.ProxyError as errp: + if retry_no>0 and len(proxy_list)>0: + #Selecting the new proxy. + new_proxy = random.choice(proxy_list) + new_proxy = f'{new_proxy.protocol}://{new_proxy.ip}:{new_proxy.port}' + print(f'Retrying with {new_proxy}') + request_future.proxy = {'http':new_proxy,'https':new_proxy} + get_response(request_future,error_type, social_network, verbose,retry_no=retry_no-1) + else: + print_error(errp, "Proxy error:", social_network, verbose) except requests.exceptions.ConnectionError as errc: print_error(errc, "Error Connecting:", social_network, verbose) except requests.exceptions.Timeout as errt: From 9f523365f7666c22a6c93a52ce10ebff48ea9a57 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 19:49:43 +0100 Subject: [PATCH 13/15] Finally importing load_proxies module. --- sherlock.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock.py b/sherlock.py index 1b354dce..0e26d5bc 100644 --- a/sherlock.py +++ b/sherlock.py @@ -22,6 +22,7 @@ from colorama import Fore, Style, init from requests_futures.sessions import FuturesSession from torrequest import TorRequest +from load_proxies import load_proxies_from_csv, check_proxy_list module_name = "Sherlock: Find Usernames Across Social Networks" __version__ = "0.4.3" From 465f4c85c3a7fcf0de7df36340722251bc50e0e9 Mon Sep 17 00:00:00 2001 From: BlucyBlue Date: Sun, 20 Jan 2019 20:00:40 +0100 Subject: [PATCH 14/15] Typo in printout when reading proxies from file. --- sherlock.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sherlock.py b/sherlock.py index 0e26d5bc..ab2cdd3d 100644 --- a/sherlock.py +++ b/sherlock.py @@ -438,9 +438,9 @@ def main(): if args.proxy_list != None: print((Style.BRIGHT + Fore.GREEN + "[" + Fore.YELLOW + "*" + - Fore.GREEN + "] Checking username" + + Fore.GREEN + "] Loading proxies from" + Fore.WHITE + " {}" + - Fore.GREEN + " on:").format(args.proxy_list)) + Fore.GREEN + " :").format(args.proxy_list)) proxy_list = load_proxies_from_csv(args.proxy_list) From f69be0580345f77826e809c2851559741edde016 Mon Sep 17 00:00:00 2001 From: Yahya SayadArbabi Date: Tue, 29 Jan 2019 10:11:18 +0330 Subject: [PATCH 15/15] Rebase & bump version --- sherlock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock.py b/sherlock.py index ab2cdd3d..adc942d6 100644 --- a/sherlock.py +++ b/sherlock.py @@ -25,7 +25,7 @@ from torrequest import TorRequest from load_proxies import load_proxies_from_csv, check_proxy_list module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.4.3" +__version__ = "0.4.4" amount = 0 BANNER = r'''