From 33e8beb5b4387a299f7422e7757e41daf994edd0 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 5 Jan 2019 22:52:53 -0600 Subject: [PATCH] Add command line option to only run a report on specified sites (as opposed to all of them). Move loading of JSON file out of the query logic proper: we need to keep the database and the query logic separate anyway for future changes, so this is a first step in the refactoring. Update readme file with latest information. --- README.md | 16 +++++++++++----- sherlock.py | 51 +++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7985691a..6b964136 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ ```bash # clone the repo -$ git clone https://github.com/sdushantha/sherlock.git +$ git clone https://github.com/TheYahya/sherlock.git # change the working directory to sherlock $ cd sherlock @@ -24,10 +24,11 @@ $ pip3 install -r requirements.txt ```bash $ python3 sherlock.py --help -usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor] +usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor] + [--unique-tor] [--csv] [--site SITE_NAME] USERNAMES [USERNAMES ...] -Sherlock: Find Usernames Across Social Networks (Version 2018.12.30) +Sherlock: Find Usernames Across Social Networks (Version 0.2.0) positional arguments: USERNAMES One or more usernames to check with social networks. @@ -38,9 +39,14 @@ optional arguments: --verbose, -v, -d, --debug Display extra debugging information. --quiet, -q Disable debugging information (Default Option). + --tor, -t Make requests over TOR; increases runtime; requires + TOR to be installed and in system path. + --unique-tor, -u Make requests over TOR with new TOR circuit after each + request; increases runtime; requires TOR to be + installed and in system path. --csv Create Comma-Separated Values (CSV) File. - --tor, -t Make requests over TOR; increases runtime; requires TOR to be installed and in system path. - --unique-tor, -u Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path. + --site SITE_NAME Limit analysis to just the listed sites. Add multiple + options to specify more than one site. ``` For example, run ```python3 sherlock.py user123```, and all of the accounts diff --git a/sherlock.py b/sherlock.py index a2434568..29989b77 100644 --- a/sherlock.py +++ b/sherlock.py @@ -10,6 +10,7 @@ networks. import csv import json import os +import sys import platform import re from argparse import ArgumentParser, RawDescriptionHelpFormatter @@ -21,7 +22,7 @@ from requests_futures.sessions import FuturesSession from torrequest import TorRequest module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.1.10" +__version__ = "0.2.0" amount=0 # TODO: fix tumblr @@ -60,7 +61,7 @@ def get_response(request_future, error_type, social_network, verbose=False): return None, "" -def sherlock(username, verbose=False, tor=False, unique_tor=False): +def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False): """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -68,6 +69,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): Keyword Arguments: username -- String indicating username that report should be created against. + site_data -- Dictionary containing all of the site data. verbose -- Boolean indicating whether to give verbose output. tor -- Boolean indicating whether to use a tor circuit for the requests. unique_tor -- Boolean indicating whether to use a new tor circuit for each request. @@ -107,13 +109,8 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' } - # Load the data - data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json") - with open(data_file_path, "r", encoding="utf-8") as raw: - data = json.load(raw) - - # Allow 1 thread for each external service, so `len(data)` threads total - executor = ThreadPoolExecutor(max_workers=len(data)) + # Allow 1 thread for each external service, so `len(site_data)` threads total + executor = ThreadPoolExecutor(max_workers=len(site_data)) # Create session based on request methodology underlying_session = requests.session() @@ -129,7 +126,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): results_total = {} # First create futures for all requests. This allows for the requests to run in parallel - for social_network, net_info in data.items(): + for social_network, net_info in site_data.items(): # Results from analysis of this specific site results_site = {} @@ -175,7 +172,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): f = open_file(fname) # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses - for social_network, net_info in data.items(): + for social_network, net_info in site_data.items(): # Retrieve results again results_site = results_total.get(social_network) @@ -330,6 +327,11 @@ def main(): action="store_true", dest="csv", default=False, help="Create Comma-Separated Values (CSV) File." ) + parser.add_argument("--site", + action="append", metavar='SITE_NAME', + dest="site_list", default=None, + help="Limit analysis to just the listed sites. Add multiple options to specify more than one site." + ) parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", @@ -353,10 +355,35 @@ def main(): if args.tor or args.unique_tor: print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") + # Load the data + data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json") + with open(data_file_path, "r", encoding="utf-8") as raw: + site_data_all = json.load(raw) + + if args.site_list is None: + # Not desired to look at a sub-set of sites + site_data = site_data_all + else: + # User desires to selectively run queries on a sub-set of the site list. + + # Make sure that the sites are supported & build up pruned site database. + site_data = {} + site_missing = [] + for site in args.site_list: + if site in site_data_all: + site_data[site] = site_data_all[site] + else: + # Build up list of sites not supported for future error message. + site_missing.append(f"'{site}'") + + if site_missing != []: + print(f"Error: Desired sites not found: {', '.join(site_missing)}.") + sys.exit(1) + # Run report on all specified users. for username in args.username: print() - results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) + results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) if args.csv == True: with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report: