From 4596f7121eb8edcd4479a9a33c7721a2aff274f8 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 5 Jan 2019 20:39:56 -0600 Subject: [PATCH 1/5] =?UTF-8?q?Specify=20the=20encoding=20to=20be=20UTF-8?= =?UTF-8?q?=20for=20the=20csv=20file.=20=20The=20Raj=C4=8De.net=20site=20w?= =?UTF-8?q?as=20causing=20this=20to=20fail.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sherlock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock.py b/sherlock.py index e5146aec..a2434568 100644 --- a/sherlock.py +++ b/sherlock.py @@ -359,7 +359,7 @@ def main(): results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) if args.csv == True: - with open(username + ".csv", "w", newline='') as csv_report: + with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report: writer = csv.writer(csv_report) writer.writerow(['username', 'name', From 33e8beb5b4387a299f7422e7757e41daf994edd0 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 5 Jan 2019 22:52:53 -0600 Subject: [PATCH 2/5] Add command line option to only run a report on specified sites (as opposed to all of them). Move loading of JSON file out of the query logic proper: we need to keep the database and the query logic separate anyway for future changes, so this is a first step in the refactoring. Update readme file with latest information. --- README.md | 16 +++++++++++----- sherlock.py | 51 +++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7985691a..6b964136 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ ```bash # clone the repo -$ git clone https://github.com/sdushantha/sherlock.git +$ git clone https://github.com/TheYahya/sherlock.git # change the working directory to sherlock $ cd sherlock @@ -24,10 +24,11 @@ $ pip3 install -r requirements.txt ```bash $ python3 sherlock.py --help -usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor] +usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor] + [--unique-tor] [--csv] [--site SITE_NAME] USERNAMES [USERNAMES ...] -Sherlock: Find Usernames Across Social Networks (Version 2018.12.30) +Sherlock: Find Usernames Across Social Networks (Version 0.2.0) positional arguments: USERNAMES One or more usernames to check with social networks. @@ -38,9 +39,14 @@ optional arguments: --verbose, -v, -d, --debug Display extra debugging information. --quiet, -q Disable debugging information (Default Option). + --tor, -t Make requests over TOR; increases runtime; requires + TOR to be installed and in system path. + --unique-tor, -u Make requests over TOR with new TOR circuit after each + request; increases runtime; requires TOR to be + installed and in system path. --csv Create Comma-Separated Values (CSV) File. - --tor, -t Make requests over TOR; increases runtime; requires TOR to be installed and in system path. - --unique-tor, -u Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path. + --site SITE_NAME Limit analysis to just the listed sites. Add multiple + options to specify more than one site. ``` For example, run ```python3 sherlock.py user123```, and all of the accounts diff --git a/sherlock.py b/sherlock.py index a2434568..29989b77 100644 --- a/sherlock.py +++ b/sherlock.py @@ -10,6 +10,7 @@ networks. import csv import json import os +import sys import platform import re from argparse import ArgumentParser, RawDescriptionHelpFormatter @@ -21,7 +22,7 @@ from requests_futures.sessions import FuturesSession from torrequest import TorRequest module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.1.10" +__version__ = "0.2.0" amount=0 # TODO: fix tumblr @@ -60,7 +61,7 @@ def get_response(request_future, error_type, social_network, verbose=False): return None, "" -def sherlock(username, verbose=False, tor=False, unique_tor=False): +def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False): """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -68,6 +69,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): Keyword Arguments: username -- String indicating username that report should be created against. + site_data -- Dictionary containing all of the site data. verbose -- Boolean indicating whether to give verbose output. tor -- Boolean indicating whether to use a tor circuit for the requests. unique_tor -- Boolean indicating whether to use a new tor circuit for each request. @@ -107,13 +109,8 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' } - # Load the data - data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json") - with open(data_file_path, "r", encoding="utf-8") as raw: - data = json.load(raw) - - # Allow 1 thread for each external service, so `len(data)` threads total - executor = ThreadPoolExecutor(max_workers=len(data)) + # Allow 1 thread for each external service, so `len(site_data)` threads total + executor = ThreadPoolExecutor(max_workers=len(site_data)) # Create session based on request methodology underlying_session = requests.session() @@ -129,7 +126,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): results_total = {} # First create futures for all requests. This allows for the requests to run in parallel - for social_network, net_info in data.items(): + for social_network, net_info in site_data.items(): # Results from analysis of this specific site results_site = {} @@ -175,7 +172,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): f = open_file(fname) # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses - for social_network, net_info in data.items(): + for social_network, net_info in site_data.items(): # Retrieve results again results_site = results_total.get(social_network) @@ -330,6 +327,11 @@ def main(): action="store_true", dest="csv", default=False, help="Create Comma-Separated Values (CSV) File." ) + parser.add_argument("--site", + action="append", metavar='SITE_NAME', + dest="site_list", default=None, + help="Limit analysis to just the listed sites. Add multiple options to specify more than one site." + ) parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", @@ -353,10 +355,35 @@ def main(): if args.tor or args.unique_tor: print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") + # Load the data + data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json") + with open(data_file_path, "r", encoding="utf-8") as raw: + site_data_all = json.load(raw) + + if args.site_list is None: + # Not desired to look at a sub-set of sites + site_data = site_data_all + else: + # User desires to selectively run queries on a sub-set of the site list. + + # Make sure that the sites are supported & build up pruned site database. + site_data = {} + site_missing = [] + for site in args.site_list: + if site in site_data_all: + site_data[site] = site_data_all[site] + else: + # Build up list of sites not supported for future error message. + site_missing.append(f"'{site}'") + + if site_missing != []: + print(f"Error: Desired sites not found: {', '.join(site_missing)}.") + sys.exit(1) + # Run report on all specified users. for username in args.username: print() - results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) + results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) if args.csv == True: with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report: From 2ed685f53001cdc8fa4bc2f05a88f3db00cb38d5 Mon Sep 17 00:00:00 2001 From: David Jenne Date: Sun, 6 Jan 2019 06:59:11 +0100 Subject: [PATCH 3/5] =?UTF-8?q?Changed=20Raj=C4=8De=20to=20Rajce?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit because an issue in #81 --- sites.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sites.md b/sites.md index d94b9164..e8db3c4e 100644 --- a/sites.md +++ b/sites.md @@ -104,7 +104,7 @@ 103. [BlackPlanet](http://blackplanet.com/) 104. [Cloob](https://www.cloob.com/) 105. [Crunchyroll](https://www.crunchyroll.com/) -106. [Rajče.net](https://www.rajce.idnes.cz/) +106. [Rajce.net](https://www.rajce.idnes.cz/) 107. [VirusTotal](https://www.virustotal.com/) 108. [WebNode](https://www.webnode.cz/) 109. [Aptoide](https://en.aptoide.com/) From 2e8d5ca020d5095b2a4569a545f3f803e8b344b9 Mon Sep 17 00:00:00 2001 From: David Jenne Date: Sun, 6 Jan 2019 07:03:17 +0100 Subject: [PATCH 4/5] =?UTF-8?q?changed=20Raj=C4=8De=20to=20Rajce?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.json b/data.json index fbcf798b..f102c102 100644 --- a/data.json +++ b/data.json @@ -595,7 +595,7 @@ "urlMain": "https://www.crunchyroll.com/", "errorType": "status_code" }, - "Rajče.net": { + "Rajce.net": { "url": "https://{}.rajce.idnes.cz/", "urlMain": "https://www.rajce.idnes.cz/", "errorType": "message", From c165952ca8fbe2912d903dab77ddb65c1701f7ef Mon Sep 17 00:00:00 2001 From: Yahya SayadArbabi Date: Sun, 6 Jan 2019 13:55:50 +0330 Subject: [PATCH 5/5] bump version --- sherlock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock.py b/sherlock.py index 29989b77..1333d1dc 100644 --- a/sherlock.py +++ b/sherlock.py @@ -22,7 +22,7 @@ from requests_futures.sessions import FuturesSession from torrequest import TorRequest module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.2.0" +__version__ = "0.2.1" amount=0 # TODO: fix tumblr