pull/1896/merge
Ripunjay Singh 4 months ago committed by GitHub
commit 234312f52e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -4,8 +4,13 @@ This module supports storing information about websites.
This is the raw data that will be used to search for usernames.
"""
import json
import requests
import secrets
import sys
import requests
from requests.exceptions import Timeout
from tqdm import tqdm
class SiteInformation:
def __init__(self, name, url_home, url_username_format, username_claimed,
@ -105,56 +110,48 @@ class SitesInformation:
Return Value:
Nothing.
"""
if not data_file_path:
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
# this instead of the local one is so that the user has the most up-to-date data. This prevents
# users from creating issue about false positives which has already been fixed or having outdated data
data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json"
# Ensure that specified data file has correct extension.
if not data_file_path.lower().endswith(".json"):
raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")
# if "http://" == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
if data_file_path.lower().startswith("http"):
# Reference is to a URL.
try:
response = requests.get(url=data_file_path)
except Exception as error:
raise FileNotFoundError(
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
)
if response.status_code != 200:
raise FileNotFoundError(f"Bad response while accessing "
f"data file URL '{data_file_path}'."
)
try:
# sys.stdout.write("Loading...")
# sys.stdout.flush()
data_file_url = data_file_path if data_file_path else "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json"
try:
response = None
# Attempt to fetch data from the specified URL
if data_file_url.lower().startswith("http"):
sys.stdout.write("Establishing connection to data file URL...")
sys.stdout.flush()
try:
response = requests.get(url=data_file_url, timeout=10)
response.raise_for_status() # Raise an exception for non-200 responses
except Timeout:
sys.stdout.write("\rConnection timed out. Please check your internet connection.")
sys.stdout.flush()
except requests.exceptions.RequestException as error:
sys.stdout.write(f"\rAn error occurred while fetching data from URL: {error}")
sys.stdout.flush()
if response and response.status_code == 200:
site_data = response.json()
except Exception as error:
raise ValueError(
f"Problem parsing json contents at '{data_file_path}': {error}."
)
else:
# Reference is to a file.
try:
else:
sys.stdout.write("\rFalling back to the local data file...")
sys.stdout.flush()
data_file_path = "sherlock/resources/data.json"
with open(data_file_path, "r", encoding="utf-8") as file:
try:
site_data = json.load(file)
except Exception as error:
raise ValueError(
f"Problem parsing json contents at '{data_file_path}': {error}."
)
except FileNotFoundError:
raise FileNotFoundError(f"Problem while attempting to access "
f"data file '{data_file_path}'."
)
site_data = json.load(file)
except Exception as error:
sys.stdout.write(f"\rAn error occurred while loading data: {error}")
sys.stdout.flush()
site_data = None
self.sites = {}
if not site_data:
raise ValueError("Failed to load site data.")
# Clear the previous message by overwriting it with spaces
sys.stdout.write('\r' + ' ' * 100 + '\r')
sys.stdout.flush()
self.sites = {}
# Add all site information from the json file to internal site list.
for site_name in site_data:
try:

Loading…
Cancel
Save