From 61d79aa3ce9519e4bb60f162e4ad73865614a5fd Mon Sep 17 00:00:00 2001
From: Paul Pfeister <code@pfeister.dev>
Date: Fri, 12 Apr 2024 17:50:19 -0400
Subject: [PATCH 1/2] Add FlareSolverr support base

---
 sherlock/sherlock.py | 78 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 16 deletions(-)

diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py
index fb9f524..04fb7c7 100644
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@@ -27,10 +27,18 @@ from notify import QueryNotifyPrint
 from sites import SitesInformation
 from colorama import init
 from argparse import ArgumentTypeError
+from enum import Enum
+from urllib.parse import urlparse
 
 module_name = "Sherlock: Find Usernames Across Social Networks"
 __version__ = "0.14.3"
 
+class ProxyType(Enum):
+    """Proxy type enumeration for special handling.
+
+    Names here will be occasionally presented to the user.
+    """
+    FLARESOLV = "FlareSolverr"   # Username Detected
 
 class SherlockFuturesSession(FuturesSession):
     def request(self, method, url, hooks=None, *args, **kwargs):
@@ -162,6 +170,7 @@ def sherlock(
     tor=False,
     unique_tor=False,
     proxy=None,
+    proxy_type:ProxyType=None,
     timeout=60,
 ):
     """Run Sherlock Analysis.
@@ -261,7 +270,22 @@ def sherlock(
             request_payload = net_info.get("request_payload")
             request = None
 
-            if request_method is not None:
+            if proxy_type is ProxyType.FLARESOLV:
+                request = session.post
+
+            if request_method is None:
+                if net_info["errorType"] == "status_code":
+                    # In most cases when we are detecting by status code,
+                    # it is not necessary to get the entire body:  we can
+                    # detect fine with just the HEAD response.
+                    request_method = "HEAD"
+                else:
+                    # Either this detect method needs the content associated
+                    # with the GET response, or this specific website will
+                    # not respond properly unless we request the whole page.
+                    request_method = "GET"
+
+            if request is None and request_method is not None:
                 if request_method == "GET":
                     request = session.get
                 elif request_method == "HEAD":
@@ -284,18 +308,6 @@ def sherlock(
                 # from where the user profile normally can be found.
                 url_probe = interpolate_string(url_probe, username)
 
-            if request is None:
-                if net_info["errorType"] == "status_code":
-                    # In most cases when we are detecting by status code,
-                    # it is not necessary to get the entire body:  we can
-                    # detect fine with just the HEAD response.
-                    request = session.head
-                else:
-                    # Either this detect method needs the content associated
-                    # with the GET response, or this specific website will
-                    # not respond properly unless we request the whole page.
-                    request = session.get
-
             if net_info["errorType"] == "response_url":
                 # Site forwards request to a different URL if username not
                 # found.  Disallow the redirect so we can capture the
@@ -307,7 +319,25 @@ def sherlock(
                 allow_redirects = True
 
             # This future starts running the request in a new thread, doesn't block the main thread
-            if proxy is not None:
+            if proxy_type is ProxyType.FLARESOLV:
+                if request_method == "HEAD":
+                    request_method = "GET"
+                if request_method not in ['GET', 'POST', 'PUT']:
+                    raise RuntimeError(f"Unsupported request_method for {url}")
+                timeout = 6000 if timeout < 6000 else timeout # Longer minimum timeout for CloudFlare
+                future = request(
+                    url=proxy,
+                    headers={"Content-Type": "application/json"},
+                    allow_redirects=allow_redirects,
+                    timeout=timeout,
+                    json={
+                        "cmd": f"request.{request_method.lower()}",
+                        "url": url_probe,
+                        "maxTimeout": timeout,
+                        "postData": request_payload
+                    },
+                )
+            elif proxy is not None:
                 proxies = {"http": proxy, "https": proxy}
                 future = request(
                     url=url_probe,
@@ -378,6 +408,8 @@ def sherlock(
         query_status = QueryStatus.UNKNOWN
         error_context = None
 
+        print(r.text)
+
         if error_text is not None:
             error_context = error_text
 
@@ -671,9 +703,22 @@ def main():
     if args.tor and (args.proxy is not None):
         raise Exception("Tor and Proxy cannot be set at the same time.")
 
-    # Make prompts
+    # Present proxy to user and detect known proxies that require special handling
+    proxy_type = None
     if args.proxy is not None:
-        print("Using the proxy: " + args.proxy)
+        print("Using the proxy: " + args.proxy, end="")
+        try:
+            proxy_parsedUrl = urlparse(args.proxy)
+            if proxy_parsedUrl.scheme == "http" or proxy_parsedUrl.scheme == "https":
+                proxy_rootUrl = f"{proxy_parsedUrl.scheme}://{proxy_parsedUrl.hostname}{f":{proxy_parsedUrl.port}" if proxy_parsedUrl.port is not None else ""}"
+                proxy_rootResponse = requests.get(proxy_rootUrl)
+                if "FlareSolverr is ready!" in proxy_rootResponse.content.decode('utf-8'):
+                    proxy_type = ProxyType.FLARESOLV
+        except:
+            pass
+        if proxy_type is not None:
+            print(f" (detected {proxy_type.value})", end="")
+        print()
 
     if args.tor or args.unique_tor:
         print("Using Tor to make requests")
@@ -763,6 +808,7 @@ def main():
             tor=args.tor,
             unique_tor=args.unique_tor,
             proxy=args.proxy,
+            proxy_type=proxy_type,
             timeout=args.timeout,
         )
 

From 0f732cdaf92bceb4b57be549e5072b74daeb6764 Mon Sep 17 00:00:00 2001
From: Paul Pfeister <code@pfeister.dev>
Date: Fri, 12 Apr 2024 20:34:00 -0400
Subject: [PATCH 2/2] Add partial FlareSolverr support

Detection of and support for FlareSolverr proxies has been added.
Currently, the only supported mode is "message".

Other validation methods may be added in the future, depending on FlareSolverr's advancement and interest within Sherlock's userbase.

Note that the returned data may sometimes vary slightly compared to that of a normal 'requests' request, including some extras. This may require more specific fingerprints.
---
 sherlock/resources/data.json |  2 +-
 sherlock/sherlock.py         | 55 ++++++++++++++++++++++++++----------
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json
index b7500f4..c45e394 100644
--- a/sherlock/resources/data.json
+++ b/sherlock/resources/data.json
@@ -654,7 +654,7 @@
     "username_claimed": "JennyKrafts"
   },
   "Euw": {
-    "errorMsg": "This summoner is not registered at OP.GG. Please check spelling.",
+    "errorMsg": "<h2 class=\"header__title\">This summoner is not registered at OP.GG.",
     "errorType": "message",
     "url": "https://euw.op.gg/summoner/userName={}",
     "urlMain": "https://euw.op.gg/",
diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py
index 04fb7c7..700def0 100644
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@@ -14,6 +14,7 @@ import os
 import platform
 import re
 import sys
+import json
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from time import monotonic
 
@@ -324,18 +325,19 @@ def sherlock(
                     request_method = "GET"
                 if request_method not in ['GET', 'POST', 'PUT']:
                     raise RuntimeError(f"Unsupported request_method for {url}")
+                req_data={}
+                req_data['cmd']=f"request.{request_method.lower()}"
+                req_data['url']=url_probe
+                req_data['maxTimeout']=timeout
+                if request_method == "POST":
+                    req_data['postData']=request_payload
                 timeout = 6000 if timeout < 6000 else timeout # Longer minimum timeout for CloudFlare
                 future = request(
                     url=proxy,
                     headers={"Content-Type": "application/json"},
                     allow_redirects=allow_redirects,
-                    timeout=timeout,
-                    json={
-                        "cmd": f"request.{request_method.lower()}",
-                        "url": url_probe,
-                        "maxTimeout": timeout,
-                        "postData": request_payload
-                    },
+                    timeout=timeout*1.1, # slight increase to allow for FlareSolverr's own timeout
+                    json=req_data
                 )
             elif proxy is not None:
                 proxies = {"http": proxy, "https": proxy}
@@ -383,6 +385,11 @@ def sherlock(
         error_type = net_info["errorType"]
         error_code = net_info.get("errorCode")
 
+        # Do not proxy incompatible errorTypes to FlareSolverr
+        if proxy_type is ProxyType.FLARESOLV and error_type != "message":
+            proxy = proxyType = None
+
+
         # Retrieve future and ensure it has finished
         future = net_info["request_future"]
         r, error_text, exception_text = get_response(
@@ -401,14 +408,24 @@ def sherlock(
         except Exception:
             http_status = "?"
         try:
-            response_text = r.text.encode(r.encoding or "UTF-8")
+            response_text = r.text
         except Exception:
             response_text = ""
 
         query_status = QueryStatus.UNKNOWN
         error_context = None
 
-        print(r.text)
+        # Overwrite standard values if necessary for proxy type
+        if proxy_type is ProxyType.FLARESOLV:
+            try:
+                response_json = json.loads(r.text)
+                if response_json['status'] != 'ok':
+                    error_text = f"{ProxyType.FLARESOLV.value} {response_json['message']}"
+                else:
+                    response_text = response_json['solution']['response']
+                    http_status = response_json['solution']['status']
+            except:
+                print('somethin messed up')
 
         if error_text is not None:
             error_context = error_text
@@ -426,12 +443,12 @@ def sherlock(
             if isinstance(errors, str):
                 # Checks if the error message is in the HTML
                 # if error is present we will set flag to False
-                if errors in r.text:
+                if errors in response_text:
                     error_flag = False
             else:
                 # If it's list, it will iterate all the error message
                 for error in errors:
-                    if error in r.text:
+                    if error in response_text:
                         error_flag = False
                         break
             if error_flag:
@@ -440,10 +457,10 @@ def sherlock(
                 query_status = QueryStatus.AVAILABLE
         elif error_type == "status_code":
             # Checks if the Status Code is equal to the optional "errorCode" given in 'data.json'
-            if error_code == r.status_code:
+            if error_code == http_status:
                 query_status = QueryStatus.AVAILABLE
             # Checks if the status code of the response is 2XX
-            elif not r.status_code >= 300 or r.status_code < 200:
+            elif not http_status >= 300 or http_status < 200:
                 query_status = QueryStatus.CLAIMED
             else:
                 query_status = QueryStatus.AVAILABLE
@@ -453,7 +470,7 @@ def sherlock(
             # match the request.  Instead, we will ensure that the response
             # code indicates that the request was successful (i.e. no 404, or
             # forward to some odd redirect).
-            if 200 <= r.status_code < 300:
+            if 200 <= http_status < 300:
                 query_status = QueryStatus.CLAIMED
             else:
                 query_status = QueryStatus.AVAILABLE
@@ -479,7 +496,7 @@ def sherlock(
 
         # Save results from request
         results_site["http_status"] = http_status
-        results_site["response_text"] = response_text
+        results_site["response_text"] = response_text.encode(r.encoding or "UTF-8")
 
         # Add this site's results into final dictionary with all of the other results.
         results_total[social_network] = results_site
@@ -720,6 +737,14 @@ def main():
             print(f" (detected {proxy_type.value})", end="")
         print()
 
+        #### FlareSolverr Development Warning
+        ## FlareSolverr sometimes returns slightly different results than the normal requests module.
+        ## While this is being improved upon, be aware that results may vary when compared to a normal
+        ## unproxied search.
+        if proxy_type is ProxyType.FLARESOLV:
+            print("!! FlareSolverr support is under active development. Results may vary.")
+            print("!! Only routing supported data types through FlareSolverr proxy.")
+
     if args.tor or args.unique_tor:
         print("Using Tor to make requests")