Merge pull request #101 from TheYahya/hoadlck-specify-sites

Command Line Option To Specify Sub-Set Of Sites To Query
pull/102/head
Christopher Kent Hoadley 6 years ago committed by GitHub
commit 740f20ff05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -11,7 +11,7 @@
```bash ```bash
# clone the repo # clone the repo
$ git clone https://github.com/sdushantha/sherlock.git $ git clone https://github.com/TheYahya/sherlock.git
# change the working directory to sherlock # change the working directory to sherlock
$ cd sherlock $ cd sherlock
@ -24,10 +24,11 @@ $ pip3 install -r requirements.txt
```bash ```bash
$ python3 sherlock.py --help $ python3 sherlock.py --help
usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor] usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor]
[--unique-tor] [--csv] [--site SITE_NAME]
USERNAMES [USERNAMES ...] USERNAMES [USERNAMES ...]
Sherlock: Find Usernames Across Social Networks (Version 2018.12.30) Sherlock: Find Usernames Across Social Networks (Version 0.2.0)
positional arguments: positional arguments:
USERNAMES One or more usernames to check with social networks. USERNAMES One or more usernames to check with social networks.
@ -38,9 +39,14 @@ optional arguments:
--verbose, -v, -d, --debug --verbose, -v, -d, --debug
Display extra debugging information. Display extra debugging information.
--quiet, -q Disable debugging information (Default Option). --quiet, -q Disable debugging information (Default Option).
--tor, -t Make requests over TOR; increases runtime; requires
TOR to be installed and in system path.
--unique-tor, -u Make requests over TOR with new TOR circuit after each
request; increases runtime; requires TOR to be
installed and in system path.
--csv Create Comma-Separated Values (CSV) File. --csv Create Comma-Separated Values (CSV) File.
--tor, -t Make requests over TOR; increases runtime; requires TOR to be installed and in system path. --site SITE_NAME Limit analysis to just the listed sites. Add multiple
--unique-tor, -u Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path. options to specify more than one site.
``` ```
For example, run ```python3 sherlock.py user123```, and all of the accounts For example, run ```python3 sherlock.py user123```, and all of the accounts

@ -10,6 +10,7 @@ networks.
import csv import csv
import json import json
import os import os
import sys
import platform import platform
import re import re
from argparse import ArgumentParser, RawDescriptionHelpFormatter from argparse import ArgumentParser, RawDescriptionHelpFormatter
@ -21,7 +22,7 @@ from requests_futures.sessions import FuturesSession
from torrequest import TorRequest from torrequest import TorRequest
module_name = "Sherlock: Find Usernames Across Social Networks" module_name = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.1.10" __version__ = "0.2.0"
amount=0 amount=0
# TODO: fix tumblr # TODO: fix tumblr
@ -60,7 +61,7 @@ def get_response(request_future, error_type, social_network, verbose=False):
return None, "" return None, ""
def sherlock(username, verbose=False, tor=False, unique_tor=False): def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False):
"""Run Sherlock Analysis. """Run Sherlock Analysis.
Checks for existence of username on various social media sites. Checks for existence of username on various social media sites.
@ -68,6 +69,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
Keyword Arguments: Keyword Arguments:
username -- String indicating username that report username -- String indicating username that report
should be created against. should be created against.
site_data -- Dictionary containing all of the site data.
verbose -- Boolean indicating whether to give verbose output. verbose -- Boolean indicating whether to give verbose output.
tor -- Boolean indicating whether to use a tor circuit for the requests. tor -- Boolean indicating whether to use a tor circuit for the requests.
unique_tor -- Boolean indicating whether to use a new tor circuit for each request. unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
@ -107,13 +109,8 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0'
} }
# Load the data # Allow 1 thread for each external service, so `len(site_data)` threads total
data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json") executor = ThreadPoolExecutor(max_workers=len(site_data))
with open(data_file_path, "r", encoding="utf-8") as raw:
data = json.load(raw)
# Allow 1 thread for each external service, so `len(data)` threads total
executor = ThreadPoolExecutor(max_workers=len(data))
# Create session based on request methodology # Create session based on request methodology
underlying_session = requests.session() underlying_session = requests.session()
@ -129,7 +126,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
results_total = {} results_total = {}
# First create futures for all requests. This allows for the requests to run in parallel # First create futures for all requests. This allows for the requests to run in parallel
for social_network, net_info in data.items(): for social_network, net_info in site_data.items():
# Results from analysis of this specific site # Results from analysis of this specific site
results_site = {} results_site = {}
@ -175,7 +172,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
f = open_file(fname) f = open_file(fname)
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
for social_network, net_info in data.items(): for social_network, net_info in site_data.items():
# Retrieve results again # Retrieve results again
results_site = results_total.get(social_network) results_site = results_total.get(social_network)
@ -330,6 +327,11 @@ def main():
action="store_true", dest="csv", default=False, action="store_true", dest="csv", default=False,
help="Create Comma-Separated Values (CSV) File." help="Create Comma-Separated Values (CSV) File."
) )
parser.add_argument("--site",
action="append", metavar='SITE_NAME',
dest="site_list", default=None,
help="Limit analysis to just the listed sites. Add multiple options to specify more than one site."
)
parser.add_argument("username", parser.add_argument("username",
nargs='+', metavar='USERNAMES', nargs='+', metavar='USERNAMES',
action="store", action="store",
@ -353,10 +355,35 @@ def main():
if args.tor or args.unique_tor: if args.tor or args.unique_tor:
print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
# Load the data
data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
with open(data_file_path, "r", encoding="utf-8") as raw:
site_data_all = json.load(raw)
if args.site_list is None:
# Not desired to look at a sub-set of sites
site_data = site_data_all
else:
# User desires to selectively run queries on a sub-set of the site list.
# Make sure that the sites are supported & build up pruned site database.
site_data = {}
site_missing = []
for site in args.site_list:
if site in site_data_all:
site_data[site] = site_data_all[site]
else:
# Build up list of sites not supported for future error message.
site_missing.append(f"'{site}'")
if site_missing != []:
print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
sys.exit(1)
# Run report on all specified users. # Run report on all specified users.
for username in args.username: for username in args.username:
print() print()
results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
if args.csv == True: if args.csv == True:
with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report: with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:

Loading…
Cancel
Save