diff --git a/sherlock.py b/sherlock.py index 9b35cd86..093594f3 100644 --- a/sherlock.py +++ b/sherlock.py @@ -221,13 +221,13 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr request_method = session.head if net_info["errorType"] == "response_url": - #Site forwards request to a different URL if username not - #found. Disallow the redirect so we can capture the - #http status from the original URL request. + # Site forwards request to a different URL if username not + # found. Disallow the redirect so we can capture the + # http status from the original URL request. allow_redirects = False else: - #Allow whatever redirect that the site wants to do. - #The final result of the request will be what is available. + # Allow whatever redirect that the site wants to do. + # The final result of the request will be what is available. allow_redirects = True # This future starts running the request in a new thread, doesn't block the main thread @@ -402,6 +402,9 @@ def main(): action="store", dest="proxy", default=None, help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080" ) + parser.add_argument("--json", "-j", metavar="JSON_FILE", + dest="json_file", default="data.json", + help="Load data from a JSON file or an online, valid, JSON file.") parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", @@ -424,12 +427,42 @@ def main(): print("Using TOR to make requests") print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") - # Load the data - data_file_path = os.path.join(os.path.dirname( - os.path.realpath(__file__)), "data.json") - with open(data_file_path, "r", encoding="utf-8") as raw: - site_data_all = json.load(raw) + response_json_online = None + site_data_all = None + + # Try to load json from website. + try: + response_json_online = requests.get(url=args.json_file) + except requests.exceptions.MissingSchema: # In case the schema is wrong it's because it may not be a website + pass + + # Check if the response is appropriate. + if response_json_online is not None and response_json_online.status_code == 200: + # Since we got data from a website, try to load json and exit if parsing fails. + try: + site_data_all = response_json_online.json() + except ValueError: + print("Invalid JSON from website!") + sys.exit(1) + pass + + data_file_path = os.path.join(os.path.dirname( + os.path.realpath(__file__)), args.json_file) + # This will be none if the request had a missing schema + if site_data_all is None: + # Check if the file exists otherwise exit. + if not os.path.exists(data_file_path): + print("JSON file at doesn't exist.") + print("If this is not a file but a website, make sure you have appended http:// or https://.") + sys.exit(1) + else: + raw = open(data_file_path, "r", encoding="utf-8") + try: + site_data_all = json.load(raw) + except: + print("Invalid JSON loaded from file.") + if args.site_list is None: # Not desired to look at a sub-set of sites site_data = site_data_all