Add module to store information about the sites. This handles getting the information loaded from the JSON file. For now, use the new SitesInformation() object to calculate the original JSON dictionary: the rest of the code will be updated in the future.

pull/350/head
Christopher K. Hoadley 5 years ago
parent 647aea577c
commit 7f87f5fcc4

@ -23,6 +23,7 @@ from requests_futures.sessions import FuturesSession
from torrequest import TorRequest
from result import QueryStatus
from result import QueryResult
from sites import SitesInformation
module_name = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.10.0"
@ -499,7 +500,7 @@ def main():
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
)
parser.add_argument("--json", "-j", metavar="JSON_FILE",
dest="json_file", default="resources/data.json",
dest="json_file", default=None,
help="Load data from a JSON file or an online, valid, JSON file.")
parser.add_argument("--timeout",
action="store", metavar='TIMEOUT',
@ -549,41 +550,20 @@ def main():
print("You can only use --output with a single username")
sys.exit(1)
response_json_online = None
site_data_all = None
# Try to load json from website.
#Create object with all information about sites we are aware of.
try:
response_json_online = requests.get(url=args.json_file)
except requests.exceptions.MissingSchema: # In case the schema is wrong it's because it may not be a website
pass
sites = SitesInformation(args.json_file)
except Exception as error:
print(f"ERROR: {error}")
sys.exit(1)
# Check if the response is appropriate.
if response_json_online is not None and response_json_online.status_code == 200:
# Since we got data from a website, try to load json and exit if parsing fails.
try:
site_data_all = response_json_online.json()
except ValueError:
print("Invalid JSON from website!")
sys.exit(1)
pass
data_file_path = os.path.join(os.path.dirname(
os.path.realpath(__file__)), args.json_file)
# This will be none if the request had a missing schema
if site_data_all is None:
# Check if the file exists otherwise exit.
if not os.path.exists(data_file_path):
print("JSON file doesn't exist.")
print(
"If this is not a file but a website, make sure you have appended http:// or https://.")
sys.exit(1)
else:
raw = open(data_file_path, "r", encoding="utf-8")
try:
site_data_all = json.load(raw)
except:
print("Invalid JSON loaded from file.")
#Create original dictionary from SitesInformation() object.
#Eventually, the rest of the code will be updated to use the new object
#directly, but this will glue the two pieces together.
site_data_all = {}
for site in sites:
site_data_all[site.name] = site.information
if args.site_list is None:
# Not desired to look at a sub-set of sites

@ -0,0 +1,226 @@
"""Sherlock Sites Information Module
This module supports storing information about web sites.
This is the raw data that will be used to search for usernames.
"""
import logging
import os
import json
import requests
class SiteInformation():
def __init__(self, name, url_home, url_username_format,
username_claimed, username_unclaimed,
information):
"""Create Site Information Object.
Contains information about a specific web site.
Keyword Arguments:
self -- This object.
name -- String which identifies site.
url_home -- String containing URL for home of site.
url_username_format -- String containing URL for Username format
on site.
NOTE: The string should contain the
token "{}" where the username should
be substituted. For example, a string
of "https://somesite.com/users/{}"
indicates that the individual
usernames would show up under the
"https://somesite.com/users/" area of
the web site.
username_claimed -- String containing username which is known
to be claimed on web site.
username_unclaimed -- String containing username which is known
to be unclaimed on web site.
information -- Dictionary containing all known information
about web site.
NOTE: Custom information about how to
actually detect the existence of the
username will be included in this
dictionary. This information will
be needed by the detection method,
but it is only recorded in this
object for future use.
Return Value:
Nothing.
"""
self.name = name
self.url_home = url_home
self.url_username_format = url_username_format
self.username_claimed = username_claimed
self.username_unclaimed = username_unclaimed
self.information = information
return
def __str__(self):
"""Convert Object To String.
Keyword Arguments:
self -- This object.
Return Value:
Nicely formatted string to get information about this object.
"""
return f"{self.name} ({self.url_home})"
class SitesInformation():
def __init__(self, data_file_path=None):
"""Create Sites Information Object.
Contains information about all supported web sites.
Keyword Arguments:
self -- This object.
data_file_path -- String which indicates path to data file.
The file name must end in ".json".
There are 3 possible formats:
* Absolute File Format
For example, "c:/stuff/data.json".
* Relative File Format
The current working directory is used
as the context.
For example, "data.json".
* URL Format
For example,
"https://example.com/data.json", or
"http://example.com/data.json".
An exception will be thrown if the path
to the data file is not in the expected
format, or if there was any problem loading
the file.
If this option is not specified, then a
default site list will be used.
Return Value:
Nothing.
"""
if data_file_path is None:
#Use internal default.
data_file_path = \
os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/data.json"
)
#Ensure that specified data file has correct extension.
if ".json" != data_file_path[-5:].lower():
raise FileNotFoundError(f"Incorrect JSON file extension for "
f"data file '{data_file_path}'."
)
if ( ("http://" == data_file_path[:7].lower()) or
("https://" == data_file_path[:8].lower())
):
#Reference is to a URL.
try:
response = requests.get(url=data_file_path)
except Exception as error:
raise FileNotFoundError(f"Problem while attempting to access "
f"data file URL '{data_file_path}': "
f"{str(error)}"
)
if response.status_code == 200:
try:
site_data = response.json()
except Exception as error:
raise ValueError(f"Problem parsing json contents at "
f"'{data_file_path}': {str(error)}."
)
else:
raise FileNotFoundError(f"Bad response while accessing "
f"data file URL '{data_file_path}'."
)
else:
#Reference is to a file.
try:
with open(data_file_path, "r", encoding="utf-8") as file:
try:
site_data = json.load(file)
except Exception as error:
raise ValueError(f"Problem parsing json contents at "
f"'{data_file_path}': {str(error)}."
)
except FileNotFoundError as error:
raise FileNotFoundError(f"Problem while attempting to access "
f"data file '{data_file_path}'."
)
self.sites = {}
#Add all of site information from the json file to internal site list.
for site_name in site_data:
try:
self.sites[site_name] = \
SiteInformation(site_name,
site_data[site_name]["urlMain"],
site_data[site_name]["url"],
site_data[site_name]["username_claimed"],
site_data[site_name]["username_unclaimed"],
site_data[site_name]
)
except KeyError as error:
raise ValueError(f"Problem parsing json contents at "
f"'{data_file_path}': "
f"Missing attribute {str(error)}."
)
#Initialize state if anyone iterates over this object.
self.__iteration_index = 0
return
def __iter__(self):
"""Iterator For Object.
Keyword Arguments:
self -- This object.
Return Value:
Iterator for sites object.
"""
return self
def __next__(self):
"""Next Method For Object.
Keyword Arguments:
self -- This object.
Return Value:
Returns individual site from beginning of self.sites dictionary
to the end.
Raises StopIteration when all sites have been passed.
"""
if self.__iteration_index >= len(self.sites):
#Finished with iteration.
self.__iteration_index = 0
raise StopIteration
else:
#Retrieve the next site from the ordered dictionary.
site = self.sites[list(self.sites)[self.__iteration_index]]
self.__iteration_index += 1
return site
def __len__(self):
"""Length For Object.
Keyword Arguments:
self -- This object.
Return Value:
Length of sites object.
"""
return len(self.sites)
Loading…
Cancel
Save