diff --git a/load_proxies.py b/load_proxies.py new file mode 100644 index 00000000..d569b150 --- /dev/null +++ b/load_proxies.py @@ -0,0 +1,96 @@ +import csv +import requests +import time +from collections import namedtuple +from colorama import Fore, Style + +""" +A function which loads proxies from a .csv file, to a list. + +Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'. + +Outputs: list containing proxies stored in named tuples. +""" + + +def load_proxies_from_csv(path_to_list): + Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol']) + + with open(path_to_list, 'r') as csv_file: + csv_reader = csv.DictReader(csv_file) + proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader] + + return proxies + + +""" +A function which test the proxy by attempting +to make a request to the designated website. + +We use 'wikipedia.org' as a test, since we can test the proxy anonymity +by check if the returning 'X-Client-IP' header matches the proxy ip. +""" + + +def check_proxy(proxy_ip, proxy_port, protocol): + full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}' + proxies = {'http': full_proxy, 'https': full_proxy} + try: + r = requests.get('https://www.wikipedia.org',proxies=proxies, timeout=4) + return_proxy = r.headers['X-Client-IP'] + if proxy_ip==return_proxy: + return True + else: + return False + except Exception: + return False + + +""" +A function which takes in one mandatory argument -> a proxy list in +the format returned by the function 'load_proxies_from_csv'. + +It also takes an optional argument 'max_proxies', if the user wishes to +cap the number of validated proxies. + +Each proxy is tested by the check_proxy function. Since each test is done on +'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, +but are sending successive requests each separated by at least 1 sec. + +Outputs: list containing proxies stored in named tuples. +""" + + +from colorama import Fore, Style + +def check_proxy_list(proxy_list, max_proxies=None): + print((Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + "] Started checking proxies.")) + working_proxies = [] + + # If the user has limited the number of proxies we need, + # the function will stop when the working_proxies + # loads the max number of requested proxies. + if max_proxies != None: + for proxy in proxy_list: + if len(working_proxies) < max_proxies: + time.sleep(1) + if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True: + working_proxies.append(proxy) + else: + break + else: + for proxy in proxy_list: + time.sleep(1) + if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True: + working_proxies.append(proxy) + + if len(working_proxies) > 0: + print((Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + "] Finished checking proxies.")) + return working_proxies + + else: + raise Exception("Found no working proxies.") \ No newline at end of file diff --git a/sherlock.py b/sherlock.py index 7f8046e3..adc942d6 100644 --- a/sherlock.py +++ b/sherlock.py @@ -22,9 +22,10 @@ from colorama import Fore, Style, init from requests_futures.sessions import FuturesSession from torrequest import TorRequest +from load_proxies import load_proxies_from_csv, check_proxy_list module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.4.3" +__version__ = "0.4.4" amount = 0 BANNER = r''' @@ -40,6 +41,9 @@ BANNER = r''' # TODO: fix tumblr +global proxy_list + +proxy_list = [] class ElapsedFuturesSession(FuturesSession): """ @@ -96,13 +100,28 @@ def print_not_found(social_network, response_time, verbose=False): Fore.YELLOW + " Not Found!").format(social_network)) -def get_response(request_future, error_type, social_network, verbose=False): +def get_response(request_future, error_type, social_network, verbose=False, retry_no=None): + + global proxy_list + try: rsp = request_future.result() if rsp.status_code: return rsp, error_type, rsp.elapsed except requests.exceptions.HTTPError as errh: print_error(errh, "HTTP Error:", social_network, verbose) + + # In case our proxy fails, we retry with another proxy. + except requests.exceptions.ProxyError as errp: + if retry_no>0 and len(proxy_list)>0: + #Selecting the new proxy. + new_proxy = random.choice(proxy_list) + new_proxy = f'{new_proxy.protocol}://{new_proxy.ip}:{new_proxy.port}' + print(f'Retrying with {new_proxy}') + request_future.proxy = {'http':new_proxy,'https':new_proxy} + get_response(request_future,error_type, social_network, verbose,retry_no=retry_no-1) + else: + print_error(errp, "Proxy error:", social_network, verbose) except requests.exceptions.ConnectionError as errc: print_error(errc, "Error Connecting:", social_network, verbose) except requests.exceptions.Timeout as errt: @@ -258,7 +277,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr r, error_type, response_time = get_response(request_future=future, error_type=error_type, social_network=social_network, - verbose=verbose) + verbose=verbose, + retry_no=3) # Attempt to get request information try: @@ -377,6 +397,16 @@ def main(): parser.add_argument("--json", "-j", metavar="JSON_FILE", dest="json_file", default="data.json", help="Load data from a JSON file or an online, valid, JSON file.") + parser.add_argument("--proxy_list", "-pl", metavar='PROXY_LIST', + action="store", dest="proxy_list", default=None, + help="Make requests over a proxy randomly chosen from a list generated from a .csv file." + ) + parser.add_argument("--check_proxies", "-cp", metavar='CHECK_PROXY', + action="store", dest="check_prox", default=None, + help="To be used with the '--proxy_list' parameter. " + "The script will check if the proxies supplied in the .csv file are working and anonymous." + "Put 0 for no limit on successfully checked proxies, or another number to institute a limit." + ) parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", @@ -389,12 +419,44 @@ def main(): # Argument check # TODO regex check on args.proxy - if args.tor and args.proxy != None: + if args.tor and (args.proxy != None or args.proxy_list != None): raise Exception("TOR and Proxy cannot be set in the meantime.") + # Proxy argument check. + # Does not necessarily need to throw an error, + # since we could join the single proxy with the ones generated from the .csv, + # but it seems unnecessarily complex at this time. + if args.proxy != None and args.proxy_list != None: + raise Exception("A single proxy cannot be used along with proxy list.") + # Make prompts if args.proxy != None: print("Using the proxy: " + args.proxy) + + global proxy_list + + if args.proxy_list != None: + print((Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + "] Loading proxies from" + + Fore.WHITE + " {}" + + Fore.GREEN + " :").format(args.proxy_list)) + + proxy_list = load_proxies_from_csv(args.proxy_list) + + # Checking if proxies should be checked for anonymity. + if args.check_prox != None and args.proxy_list != None: + try: + limit = int(args.check_prox) + if limit == 0: + proxy_list = check_proxy_list(proxy_list) + elif limit > 0: + proxy_list = check_proxy_list(proxy_list, limit) + else: + raise ValueError + except ValueError: + raise Exception("Prameter --check_proxies/-cp must be a positive intiger.") + if args.tor or args.unique_tor: print("Using TOR to make requests") print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") @@ -489,6 +551,15 @@ def main(): username + ".txt"), "w", encoding="utf-8") else: file = open(username + ".txt", "w", encoding="utf-8") + + # We try to ad a random member of the 'proxy_list' var as the proxy of the request. + # If we can't access the list or it is empty, we proceed with args.proxy as the proxy. + try: + random_proxy = random.choice(proxy_list) + proxy = f'{random_proxy.protocol}://{random_proxy.ip}:{random_proxy.port}' + except (NameError, IndexError): + proxy = args.proxy + results = {} results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy)