From 4596f7121eb8edcd4479a9a33c7721a2aff274f8 Mon Sep 17 00:00:00 2001
From: "Christopher K. Hoadley" <chris.hoadley@gmail.com>
Date: Sat, 5 Jan 2019 20:39:56 -0600
Subject: [PATCH 1/5] =?UTF-8?q?Specify=20the=20encoding=20to=20be=20UTF-8?=
 =?UTF-8?q?=20for=20the=20csv=20file.=20=20The=20Raj=C4=8De.net=20site=20w?=
 =?UTF-8?q?as=20causing=20this=20to=20fail.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 sherlock.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sherlock.py b/sherlock.py
index e5146aec..a2434568 100644
--- a/sherlock.py
+++ b/sherlock.py
@@ -359,7 +359,7 @@ def main():
         results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
 
         if args.csv == True:
-            with open(username + ".csv", "w", newline='') as csv_report:
+            with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:
                 writer = csv.writer(csv_report)
                 writer.writerow(['username',
                                  'name',

From 33e8beb5b4387a299f7422e7757e41daf994edd0 Mon Sep 17 00:00:00 2001
From: "Christopher K. Hoadley" <chris.hoadley@gmail.com>
Date: Sat, 5 Jan 2019 22:52:53 -0600
Subject: [PATCH 2/5] Add command line option to only run a report on specified
 sites (as opposed to all of them).  Move loading of JSON file out of the
 query logic proper: we need to keep the database and the query logic separate
 anyway for future changes, so this is a first step in the refactoring. 
 Update readme file with latest information.

---
 README.md   | 16 +++++++++++-----
 sherlock.py | 51 +++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 7985691a..6b964136 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 
 ```bash
 # clone the repo
-$ git clone https://github.com/sdushantha/sherlock.git
+$ git clone https://github.com/TheYahya/sherlock.git
 
 # change the working directory to sherlock
 $ cd sherlock
@@ -24,10 +24,11 @@ $ pip3 install -r requirements.txt
 
 ```bash
 $ python3 sherlock.py --help
-usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor]
+usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor]
+                   [--unique-tor] [--csv] [--site SITE_NAME]
                    USERNAMES [USERNAMES ...]
 
-Sherlock: Find Usernames Across Social Networks (Version 2018.12.30)
+Sherlock: Find Usernames Across Social Networks (Version 0.2.0)
 
 positional arguments:
   USERNAMES             One or more usernames to check with social networks.
@@ -38,9 +39,14 @@ optional arguments:
   --verbose, -v, -d, --debug
                         Display extra debugging information.
   --quiet, -q           Disable debugging information (Default Option).
+  --tor, -t             Make requests over TOR; increases runtime; requires
+                        TOR to be installed and in system path.
+  --unique-tor, -u      Make requests over TOR with new TOR circuit after each
+                        request; increases runtime; requires TOR to be
+                        installed and in system path.
   --csv                 Create Comma-Separated Values (CSV) File.
-  --tor, -t             Make requests over TOR; increases runtime; requires TOR to be installed and in system path.
-  --unique-tor, -u      Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.
+  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple
+                        options to specify more than one site.
 ```
 
 For example, run ```python3 sherlock.py user123```, and all of the accounts
diff --git a/sherlock.py b/sherlock.py
index a2434568..29989b77 100644
--- a/sherlock.py
+++ b/sherlock.py
@@ -10,6 +10,7 @@ networks.
 import csv
 import json
 import os
+import sys
 import platform
 import re
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
@@ -21,7 +22,7 @@ from requests_futures.sessions import FuturesSession
 from torrequest import TorRequest
 
 module_name = "Sherlock: Find Usernames Across Social Networks"
-__version__ = "0.1.10"
+__version__ = "0.2.0"
 amount=0
 
 # TODO: fix tumblr
@@ -60,7 +61,7 @@ def get_response(request_future, error_type, social_network, verbose=False):
     return None, ""
 
 
-def sherlock(username, verbose=False, tor=False, unique_tor=False):
+def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False):
     """Run Sherlock Analysis.
 
     Checks for existence of username on various social media sites.
@@ -68,6 +69,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
     Keyword Arguments:
     username               -- String indicating username that report
                               should be created against.
+    site_data              -- Dictionary containing all of the site data.
     verbose                -- Boolean indicating whether to give verbose output.
     tor                    -- Boolean indicating whether to use a tor circuit for the requests.
     unique_tor             -- Boolean indicating whether to use a new tor circuit for each request.
@@ -107,13 +109,8 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0'
     }
 
-    # Load the data
-    data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
-    with open(data_file_path, "r", encoding="utf-8") as raw:
-        data = json.load(raw)
-
-    # Allow 1 thread for each external service, so `len(data)` threads total
-    executor = ThreadPoolExecutor(max_workers=len(data))
+    # Allow 1 thread for each external service, so `len(site_data)` threads total
+    executor = ThreadPoolExecutor(max_workers=len(site_data))
 
     # Create session based on request methodology
     underlying_session = requests.session()
@@ -129,7 +126,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
     results_total = {}
 
     # First create futures for all requests. This allows for the requests to run in parallel
-    for social_network, net_info in data.items():
+    for social_network, net_info in site_data.items():
 
         # Results from analysis of this specific site
         results_site = {}
@@ -175,7 +172,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
     f = open_file(fname)
 
     # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
-    for social_network, net_info in data.items():
+    for social_network, net_info in site_data.items():
 
         # Retrieve results again
         results_site = results_total.get(social_network)
@@ -330,6 +327,11 @@ def main():
                         action="store_true",  dest="csv", default=False,
                         help="Create Comma-Separated Values (CSV) File."
                        )
+    parser.add_argument("--site",
+                        action="append", metavar='SITE_NAME',
+                        dest="site_list", default=None,
+                        help="Limit analysis to just the listed sites.  Add multiple options to specify more than one site."
+                       )
     parser.add_argument("username",
                         nargs='+', metavar='USERNAMES',
                         action="store",
@@ -353,10 +355,35 @@ def main():
     if args.tor or args.unique_tor:
         print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
 
+    # Load the data
+    data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
+    with open(data_file_path, "r", encoding="utf-8") as raw:
+        site_data_all = json.load(raw)
+
+    if args.site_list is None:
+        # Not desired to look at a sub-set of sites
+        site_data = site_data_all
+    else:
+        # User desires to selectively run queries on a sub-set of the site list.
+
+        # Make sure that the sites are supported & build up pruned site database.
+        site_data = {}
+        site_missing = []
+        for site in args.site_list:
+            if site in site_data_all:
+                site_data[site] = site_data_all[site]
+            else:
+                # Build up list of sites not supported for future error message.
+                site_missing.append(f"'{site}'")
+
+        if site_missing != []:
+            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
+            sys.exit(1)
+
     # Run report on all specified users.
     for username in args.username:
         print()
-        results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
+        results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
 
         if args.csv == True:
             with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:

From 2ed685f53001cdc8fa4bc2f05a88f3db00cb38d5 Mon Sep 17 00:00:00 2001
From: David Jenne <david@jenne.cz>
Date: Sun, 6 Jan 2019 06:59:11 +0100
Subject: [PATCH 3/5] =?UTF-8?q?Changed=20Raj=C4=8De=20to=20Rajce?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

because an issue in #81
---
 sites.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sites.md b/sites.md
index d94b9164..e8db3c4e 100644
--- a/sites.md
+++ b/sites.md
@@ -104,7 +104,7 @@
 103. [BlackPlanet](http://blackplanet.com/)
 104. [Cloob](https://www.cloob.com/)
 105. [Crunchyroll](https://www.crunchyroll.com/)
-106. [Rajče.net](https://www.rajce.idnes.cz/)
+106. [Rajce.net](https://www.rajce.idnes.cz/)
 107. [VirusTotal](https://www.virustotal.com/)
 108. [WebNode](https://www.webnode.cz/)
 109. [Aptoide](https://en.aptoide.com/)

From 2e8d5ca020d5095b2a4569a545f3f803e8b344b9 Mon Sep 17 00:00:00 2001
From: David Jenne <david@jenne.cz>
Date: Sun, 6 Jan 2019 07:03:17 +0100
Subject: [PATCH 4/5] =?UTF-8?q?changed=20Raj=C4=8De=20to=20Rajce?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data.json b/data.json
index fbcf798b..f102c102 100644
--- a/data.json
+++ b/data.json
@@ -595,7 +595,7 @@
     "urlMain": "https://www.crunchyroll.com/",
     "errorType": "status_code"
   },
-  "Rajče.net": {
+  "Rajce.net": {
     "url": "https://{}.rajce.idnes.cz/",
     "urlMain": "https://www.rajce.idnes.cz/",
     "errorType": "message",

From c165952ca8fbe2912d903dab77ddb65c1701f7ef Mon Sep 17 00:00:00 2001
From: Yahya SayadArbabi <yahya.arbabi@gmail.com>
Date: Sun, 6 Jan 2019 13:55:50 +0330
Subject: [PATCH 5/5] bump version

---
 sherlock.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sherlock.py b/sherlock.py
index 29989b77..1333d1dc 100644
--- a/sherlock.py
+++ b/sherlock.py
@@ -22,7 +22,7 @@ from requests_futures.sessions import FuturesSession
 from torrequest import TorRequest
 
 module_name = "Sherlock: Find Usernames Across Social Networks"
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 amount=0
 
 # TODO: fix tumblr