From 8619a353e4d71b94a6cf8e6c1f8b24d4bff99f51 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 May 2020 08:13:23 -0500 Subject: [PATCH] Add override to HTTP Status Detection so HEAD request is not used. Configure Instagram to use this option. In most cases when we are detecting by status code, it is not necessary to get the entire body: we can detect fine with just the HEAD response. However, Richard Getz discovered that some sites (e.g. Instagram) will not respond properly if Sherlock only requests the HEAD. Add a "request_head_only" attribute to the data.json so HTTP Status Detection can be configured either way. It is simpler to support this change in this fashion, as it does not require changes to the tests. With Richard Getz --- README.md | 2 +- sherlock/resources/data.json | 1 + sherlock/sherlock.py | 12 +++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f512fcd..f6c388e 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ usage: sherlock [-h] [--version] [--verbose] [--rank] [--no-color] [--browse] USERNAMES [USERNAMES ...] -Sherlock: Find Usernames Across Social Networks (Version 0.11.1) +Sherlock: Find Usernames Across Social Networks (Version 0.12.0) positional arguments: USERNAMES One or more usernames to check with social networks. diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index 07a7037..c86fa25 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -882,6 +882,7 @@ }, "Instagram": { "errorType": "status_code", + "request_head_only": false, "rank": 35, "url": "https://www.instagram.com/{}", "urlMain": "https://www.instagram.com/", diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 2cc7f70..3a79eb3 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -30,7 +30,7 @@ from notify import QueryNotifyPrint from sites import SitesInformation module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.11.1" +__version__ = "0.12.0" @@ -237,10 +237,16 @@ def sherlock(username, site_data, query_notify, # from where the user profile normally can be found. url_probe = url_probe.format(username) - #If only the status_code is needed don't download the body - if net_info["errorType"] == 'status_code': + if (net_info["errorType"] == 'status_code' and + net_info.get("request_head_only", True) == True): + #In most cases when we are detecting by status code, + #it is not necessary to get the entire body: we can + #detect fine with just the HEAD response. request_method = session.head else: + #Either this detect method needs the content associated + #with the GET response, or this specific website will + #not respond properly unless we request the whole page. request_method = session.get if net_info["errorType"] == "response_url":