From ae657460c8c069bdc2767f27f02b2d26145c83fb Mon Sep 17 00:00:00 2001
From: "Christopher K. Hoadley" <chris.hoadley@gmail.com>
Date: Tue, 22 Jan 2019 20:16:19 -0600
Subject: [PATCH 1/3] Add coverage tests for all sites that use the response
 URL detection method.  This test fails because Sherlock does not handle all
 of these sites properly.

---
 tests/all.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/tests/all.py b/tests/all.py
index 87d3b9b..3d06281 100644
--- a/tests/all.py
+++ b/tests/all.py
@@ -23,7 +23,7 @@ class SherlockDetectTests(SherlockBaseTest):
         """
 
         self.username_check(['jack'],  ['Twitter'],   exist_check=True)
-        #self.username_check(['dfox'],  ['devRant'],   exist_check=True)
+        self.username_check(['dfox'],  ['devRant'],   exist_check=True)
         self.username_check(['blue'],  ['Pinterest'], exist_check=True)
         self.username_check(['kevin'], ['Instagram'], exist_check=True)
         self.username_check(['zuck'],  ['Facebook'],  exist_check=True)
@@ -92,3 +92,51 @@ class SherlockDetectTests(SherlockBaseTest):
                            )
 
         return
+
+
+class SherlockSiteCoverageTests(SherlockBaseTest):
+    def test_coverage_false_via_response_url(self):
+        """Test Username Does Not Exist Site Coverage (Via Response URL).
+
+        This test checks all sites with the "response URL" detection mechanism
+        to ensure that a Username that does not exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.username_check(['noonewouldeverusethis7'],
+                            ["Pinterest", "iMGSRC.RU", "Pastebin",
+                             "WordPress", "devRant", "ImageShack", "MeetMe"
+                            ],
+                            exist_check=False
+                           )
+
+        return
+
+    def test_coverage_true_via_response_url(self):
+        """Test Username Does Exist Site Coverage (Via Response URL).
+
+        This test checks all sites with the "response URL" detection mechanism
+        to ensure that a Username that does exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.username_check(['blue'],
+                            ["Pinterest", "iMGSRC.RU", "Pastebin",
+                             "WordPress", "devRant", "ImageShack", "MeetMe"
+                            ],
+                            exist_check=True
+                           )
+
+        return

From bb66d6a992debdd27323c848c3ccee8d8d35aa7e Mon Sep 17 00:00:00 2001
From: "Christopher K. Hoadley" <chris.hoadley@gmail.com>
Date: Tue, 22 Jan 2019 20:19:34 -0600
Subject: [PATCH 2/3] Update Pinterest and WordPress user URLs to exactly match
 what the site ends up with.  If the request does not have the trailing "/",
 then the site will forward us to that URL.

---
 data.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data.json b/data.json
index f47095f..5b3122c 100644
--- a/data.json
+++ b/data.json
@@ -41,7 +41,7 @@
     "errorMsg":"page not found"
   },
   "Pinterest": {
-    "url": "https://www.pinterest.com/{}",
+    "url": "https://www.pinterest.com/{}/",
     "urlMain": "https://www.pinterest.com/",
     "errorType": "response_url",
     "errorUrl": "https://www.pinterest.com/?show_error=true"
@@ -415,7 +415,7 @@
     "errorType": "status_code"
   },
   "WordPress": {
-    "url": "https://{}.wordpress.com",
+    "url": "https://{}.wordpress.com/",
     "urlMain": "https://wordpress.com",
     "errorType": "response_url",
     "errorUrl": "wordpress.com/typo/?subdomain=",

From 65b38592c427a5a2c064009fc8185302fc5ad42b Mon Sep 17 00:00:00 2001
From: "Christopher K. Hoadley" <chris.hoadley@gmail.com>
Date: Tue, 22 Jan 2019 20:37:05 -0600
Subject: [PATCH 3/3] Change "response_url" detection strategy completely.

Previously, there was a problem with sites that redirect an attempt to view a non-existing username to the main site. For example, if you try to go to https://devrant.com/users/dfoxxxxxxxxx (a user name that does not exist), then we get a redirect to the https://devrant.com/ root of the site. But, the "response_url" checking algorithm was only looking for the configured error URL being included in the response.  So, these sites always indicated that the username was not found.

Update the "response_url" detection method so that the request does not allow redirects. If we get a 200 response of some type, then the username has been found. However, if we get something like a 302, then we know that the username was not found as we are being redirected.

This whole method seems fragile, but I did exhaustively test all of the supported sites, and they all work.  So, this change is clearly an improvement.
---
 sherlock.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/sherlock.py b/sherlock.py
index e7fef9d..3cc2850 100644
--- a/sherlock.py
+++ b/sherlock.py
@@ -208,13 +208,27 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr
                 if net_info["errorType"] == 'status_code':
                     request_method = session.head
 
+            if net_info["errorType"] == "response_url":
+                #Site forwards request to a different URL if username not
+                #found.  Disallow the redirect so we can capture the
+                #http status from the original URL request.
+                allow_redirects = False
+            else:
+                #Allow whatever redirect that the site wants to do.
+                #The final result of the request will be what is available.
+                allow_redirects = True
+
             # This future starts running the request in a new thread, doesn't block the main thread
             if proxy != None:
                 proxies = {"http": proxy, "https": proxy}
-                future = request_method(
-                    url=url, headers=headers, proxies=proxies)
+                future = request_method(url=url, headers=headers,
+                                        proxies=proxies,
+                                        allow_redirects=allow_redirects
+                                       )
             else:
-                future = request_method(url=url, headers=headers)
+                future = request_method(url=url, headers=headers,
+                                        allow_redirects=allow_redirects
+                                       )
 
             # Store future in data for access later
             net_info["request_future"] = future
@@ -290,9 +304,13 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr
                 exists = "no"
 
         elif error_type == "response_url":
-            error = net_info.get("errorUrl")
-            # Checks if the redirect url is the same as the one defined in data.json
-            if not error in r.url:
+            # For this detection method, we have turned off the redirect.
+            # So, there is no need to check the response URL: it will always
+            # match the request.  Instead, we will ensure that the response
+            # code indicates that the request was successful (i.e. no 404, or
+            # forward to some odd redirect).
+            if (r.status_code >= 200) and (r.status_code < 300):
+                #
                 print_found(social_network, url, response_time, verbose)
                 write_to_file(url, f)
                 exists = "yes"