Merge branch 'BlucyBlue-master'

6 years ago · 916fdd0603
parent 263b8b3b90 f69be05803
commit 916fdd0603
2 changed files with 171 additions and 4 deletions
--- a/load_proxies.py
+++ b/load_proxies.py
@ -0,0 +1,96 @@
+import csv
+import requests
+import time
+from collections import namedtuple
+from colorama import Fore, Style
+
+"""
+A function which loads proxies from a .csv file, to a list.
+
+Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'.
+
+Outputs: list containing proxies stored in named tuples.
+"""
+
+
+def load_proxies_from_csv(path_to_list):
+    Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol'])
+
+    with open(path_to_list, 'r') as csv_file:
+        csv_reader = csv.DictReader(csv_file)
+        proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader]
+
+    return proxies
+
+
+"""
+A function which test the proxy by attempting 
+to make a request to the designated website.
+
+We use 'wikipedia.org' as a test, since we can test the proxy anonymity 
+by check if the returning 'X-Client-IP' header matches the proxy ip.
+"""
+
+
+def check_proxy(proxy_ip, proxy_port, protocol):
+    full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}'
+    proxies = {'http': full_proxy, 'https': full_proxy}
+    try:
+        r = requests.get('https://www.wikipedia.org',proxies=proxies, timeout=4)
+        return_proxy = r.headers['X-Client-IP']
+        if proxy_ip==return_proxy:
+            return True
+        else:
+            return False
+    except Exception:
+        return False
+
+
+"""
+A function which takes in one mandatory argument -> a proxy list in
+the format returned by the function 'load_proxies_from_csv'.
+    
+It also takes an optional argument 'max_proxies', if the user wishes to
+cap the number of validated proxies.
+
+Each proxy is tested by the check_proxy function. Since each test is done on 
+'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, 
+but are sending successive requests each separated by at least 1 sec.
+
+Outputs: list containing proxies stored in named tuples. 
+"""
+
+
+from colorama import Fore, Style
+
+def check_proxy_list(proxy_list, max_proxies=None):
+    print((Style.BRIGHT + Fore.GREEN + "[" +
+           Fore.YELLOW + "*" +
+           Fore.GREEN + "] Started checking proxies."))
+    working_proxies = []
+
+    # If the user has limited the number of proxies we need,
+    # the function will stop when the working_proxies
+    # loads the max number of requested proxies.
+    if max_proxies != None:
+        for proxy in proxy_list:
+            if len(working_proxies) < max_proxies:
+                time.sleep(1)
+                if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True:
+                    working_proxies.append(proxy)
+            else:
+                break
+    else:
+        for proxy in proxy_list:
+            time.sleep(1)
+            if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True:
+                working_proxies.append(proxy)
+
+    if len(working_proxies) > 0:
+        print((Style.BRIGHT + Fore.GREEN + "[" +
+               Fore.YELLOW + "*" +
+               Fore.GREEN + "] Finished checking proxies."))
+        return working_proxies
+
+    else:
+        raise Exception("Found no working proxies.")
--- a/sherlock.py
+++ b/sherlock.py
@ -22,9 +22,10 @@ from colorama import Fore, Style, init

 from requests_futures.sessions import FuturesSession
 from torrequest import TorRequest
+from load_proxies import load_proxies_from_csv, check_proxy_list

 module_name = "Sherlock: Find Usernames Across Social Networks"
-__version__ = "0.4.3"
+__version__ = "0.4.4"
 amount = 0

 BANNER = r'''
@ -40,6 +41,9 @@ BANNER = r'''

 # TODO: fix tumblr

+global proxy_list
+
+proxy_list = []

 class ElapsedFuturesSession(FuturesSession):
    """
@ -96,13 +100,28 @@ def print_not_found(social_network, response_time, verbose=False):
           Fore.YELLOW + " Not Found!").format(social_network))


-def get_response(request_future, error_type, social_network, verbose=False):
+def get_response(request_future, error_type, social_network, verbose=False, retry_no=None):
+    
+    global proxy_list
+
    try:
        rsp = request_future.result()
        if rsp.status_code:
            return rsp, error_type, rsp.elapsed
    except requests.exceptions.HTTPError as errh:
        print_error(errh, "HTTP Error:", social_network, verbose)
+
+    # In case our proxy fails, we retry with another proxy.
+    except requests.exceptions.ProxyError as errp:
+        if retry_no>0 and len(proxy_list)>0:
+            #Selecting the new proxy.
+            new_proxy = random.choice(proxy_list)
+            new_proxy = f'{new_proxy.protocol}://{new_proxy.ip}:{new_proxy.port}'
+            print(f'Retrying with {new_proxy}')
+            request_future.proxy = {'http':new_proxy,'https':new_proxy}
+            get_response(request_future,error_type, social_network, verbose,retry_no=retry_no-1)
+        else:
+            print_error(errp, "Proxy error:", social_network, verbose)
    except requests.exceptions.ConnectionError as errc:
        print_error(errc, "Error Connecting:", social_network, verbose)
    except requests.exceptions.Timeout as errt:
@ -258,7 +277,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr
        r, error_type, response_time = get_response(request_future=future,
                                                    error_type=error_type,
                                                    social_network=social_network,
-                                                    verbose=verbose)
+                                                    verbose=verbose,
+                                                    retry_no=3)

        # Attempt to get request information
        try:
@ -377,6 +397,16 @@ def main():
    parser.add_argument("--json", "-j", metavar="JSON_FILE",
                        dest="json_file", default="data.json",
                        help="Load data from a JSON file or an online, valid, JSON file.")
+    parser.add_argument("--proxy_list", "-pl", metavar='PROXY_LIST',
+                        action="store", dest="proxy_list", default=None,
+                        help="Make requests over a proxy randomly chosen from a list generated from a .csv file."
+                        )
+    parser.add_argument("--check_proxies", "-cp", metavar='CHECK_PROXY',
+                        action="store", dest="check_prox", default=None,
+                        help="To be used with the '--proxy_list' parameter. "
+                             "The script will check if the proxies supplied in the .csv file are working and anonymous."
+                             "Put 0 for no limit on successfully checked proxies, or another number to institute a limit."
+                        )
    parser.add_argument("username",
                        nargs='+', metavar='USERNAMES',
                        action="store",
@ -389,12 +419,44 @@ def main():

    # Argument check
    # TODO regex check on args.proxy
-    if args.tor and args.proxy != None:
+    if args.tor and (args.proxy != None or args.proxy_list != None):
        raise Exception("TOR and Proxy cannot be set in the meantime.")

+    # Proxy argument check.
+    # Does not necessarily need to throw an error,
+    # since we could join the single proxy with the ones generated from the .csv,
+    # but it seems unnecessarily complex at this time.
+    if args.proxy != None and args.proxy_list != None:
+        raise Exception("A single proxy cannot be used along with proxy list.")
+
    # Make prompts
    if args.proxy != None:
        print("Using the proxy: " + args.proxy)
+
+    global proxy_list
+
+    if args.proxy_list != None:
+        print((Style.BRIGHT + Fore.GREEN + "[" +
+               Fore.YELLOW + "*" +
+               Fore.GREEN + "] Loading proxies from" +
+               Fore.WHITE + " {}" +
+               Fore.GREEN + " :").format(args.proxy_list))
+
+        proxy_list = load_proxies_from_csv(args.proxy_list)
+
+    # Checking if proxies should be checked for anonymity.
+    if args.check_prox != None and args.proxy_list != None:
+        try:
+            limit = int(args.check_prox)
+            if limit == 0:
+                proxy_list = check_proxy_list(proxy_list)
+            elif limit > 0:
+                proxy_list = check_proxy_list(proxy_list, limit)
+            else:
+                raise ValueError
+        except ValueError:
+            raise Exception("Prameter --check_proxies/-cp must be a positive intiger.")
+
    if args.tor or args.unique_tor:
        print("Using TOR to make requests")
        print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
@ -489,6 +551,15 @@ def main():
                                     username + ".txt"), "w", encoding="utf-8")
        else:
            file = open(username + ".txt", "w", encoding="utf-8")
+
+        # We try to ad a random member of the 'proxy_list' var as the proxy of the request.
+        # If we can't access the list or it is empty, we proceed with args.proxy as the proxy.
+        try:
+            random_proxy = random.choice(proxy_list)
+            proxy = f'{random_proxy.protocol}://{random_proxy.ip}:{random_proxy.port}'
+        except (NameError, IndexError):
+            proxy = args.proxy
+
        results = {}
        results = sherlock(username, site_data, verbose=args.verbose,
                           tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy)