Merge branch 'Czechball/patch-1'

6 years ago · beaa8acd42
parent 81d15482f7 c165952ca8
commit beaa8acd42
4 changed files with 53 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -11,7 +11,7 @@
 ```bash
 # clone the repo
-$ git clone https://github.com/sdushantha/sherlock.git
+$ git clone https://github.com/TheYahya/sherlock.git
 # change the working directory to sherlock
 $ cd sherlock
@ -24,10 +24,11 @@ $ pip3 install -r requirements.txt
 ```bash
 $ python3 sherlock.py --help
-usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor]
+usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor]
                   [--unique-tor] [--csv] [--site SITE_NAME]
                   USERNAMES [USERNAMES ...]
-Sherlock: Find Usernames Across Social Networks (Version 2018.12.30)
+Sherlock: Find Usernames Across Social Networks (Version 0.2.0)
 positional arguments:
  USERNAMES             One or more usernames to check with social networks.
@ -38,9 +39,14 @@ optional arguments:
  --verbose, -v, -d, --debug
                        Display extra debugging information.
  --quiet, -q           Disable debugging information (Default Option).
  --tor, -t             Make requests over TOR; increases runtime; requires
                        TOR to be installed and in system path.
  --unique-tor, -u      Make requests over TOR with new TOR circuit after each
                        request; increases runtime; requires TOR to be
                        installed and in system path.
  --csv                 Create Comma-Separated Values (CSV) File.
-  --tor, -t             Make requests over TOR; increases runtime; requires TOR to be installed and in system path.
+  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple
-  --unique-tor, -u      Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.
+                        options to specify more than one site.
 ```
 For example, run ```python3 sherlock.py user123```, and all of the accounts
--- a/data.json
+++ b/data.json
@ -595,7 +595,7 @@
    "urlMain": "https://www.crunchyroll.com/",
    "errorType": "status_code"
  },
-  "Rajče.net": {
+  "Rajce.net": {
    "url": "https://{}.rajce.idnes.cz/",
    "urlMain": "https://www.rajce.idnes.cz/",
    "errorType": "message",
--- a/sherlock.py
+++ b/sherlock.py
@ -10,6 +10,7 @@ networks.
 import csv
 import json
 import os
 import sys
 import platform
 import re
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
@ -21,7 +22,7 @@ from requests_futures.sessions import FuturesSession
 from torrequest import TorRequest
 module_name = "Sherlock: Find Usernames Across Social Networks"
-__version__ = "0.1.10"
+__version__ = "0.2.1"
 amount=0
 # TODO: fix tumblr
@ -60,7 +61,7 @@ def get_response(request_future, error_type, social_network, verbose=False):
    return None, ""
-def sherlock(username, verbose=False, tor=False, unique_tor=False):
+def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False):
    """Run Sherlock Analysis.
    Checks for existence of username on various social media sites.
@ -68,6 +69,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
    Keyword Arguments:
    username               -- String indicating username that report
                              should be created against.
    site_data              -- Dictionary containing all of the site data.
    verbose                -- Boolean indicating whether to give verbose output.
    tor                    -- Boolean indicating whether to use a tor circuit for the requests.
    unique_tor             -- Boolean indicating whether to use a new tor circuit for each request.
@ -107,13 +109,8 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0'
    }
-    # Load the data
+    # Allow 1 thread for each external service, so `len(site_data)` threads total
-    data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
+    executor = ThreadPoolExecutor(max_workers=len(site_data))
    with open(data_file_path, "r", encoding="utf-8") as raw:
        data = json.load(raw)
    # Allow 1 thread for each external service, so `len(data)` threads total
    executor = ThreadPoolExecutor(max_workers=len(data))
    # Create session based on request methodology
    underlying_session = requests.session()
@ -129,7 +126,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
    results_total = {}
    # First create futures for all requests. This allows for the requests to run in parallel
-    for social_network, net_info in data.items():
+    for social_network, net_info in site_data.items():
        # Results from analysis of this specific site
        results_site = {}
@ -175,7 +172,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
    f = open_file(fname)
    # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
-    for social_network, net_info in data.items():
+    for social_network, net_info in site_data.items():
        # Retrieve results again
        results_site = results_total.get(social_network)
@ -330,6 +327,11 @@ def main():
                        action="store_true",  dest="csv", default=False,
                        help="Create Comma-Separated Values (CSV) File."
                       )
    parser.add_argument("--site",
                        action="append", metavar='SITE_NAME',
                        dest="site_list", default=None,
                        help="Limit analysis to just the listed sites.  Add multiple options to specify more than one site."
                       )
    parser.add_argument("username",
                        nargs='+', metavar='USERNAMES',
                        action="store",
@ -353,13 +355,38 @@ def main():
    if args.tor or args.unique_tor:
        print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
    # Load the data
    data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
    with open(data_file_path, "r", encoding="utf-8") as raw:
        site_data_all = json.load(raw)
    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.
        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            if site in site_data_all:
                site_data[site] = site_data_all[site]
            else:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")
        if site_missing != []:
            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
            sys.exit(1)
    # Run report on all specified users.
    for username in args.username:
        print()
-        results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
+        results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
        if args.csv == True:
-            with open(username + ".csv", "w", newline='') as csv_report:
+            with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow(['username',
                                 'name',
--- a/sites.md
+++ b/sites.md
@ -104,7 +104,7 @@
 103. [BlackPlanet](http://blackplanet.com/)
 104. [Cloob](https://www.cloob.com/)
 105. [Crunchyroll](https://www.crunchyroll.com/)
-106. [Rajče.net](https://www.rajce.idnes.cz/)
+106. [Rajce.net](https://www.rajce.idnes.cz/)
 107. [VirusTotal](https://www.virustotal.com/)
 108. [WebNode](https://www.webnode.cz/)
 109. [Aptoide](https://en.aptoide.com/)