|
|
@ -52,7 +52,7 @@ class SherlockFuturesSession(FuturesSession):
|
|
|
|
Return Value:
|
|
|
|
Return Value:
|
|
|
|
Request object.
|
|
|
|
Request object.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
#Record the start time for the request.
|
|
|
|
# Record the start time for the request.
|
|
|
|
start = monotonic()
|
|
|
|
start = monotonic()
|
|
|
|
|
|
|
|
|
|
|
|
def response_time(resp, *args, **kwargs):
|
|
|
|
def response_time(resp, *args, **kwargs):
|
|
|
@ -70,22 +70,22 @@ class SherlockFuturesSession(FuturesSession):
|
|
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
#Install hook to execute when response completes.
|
|
|
|
# Install hook to execute when response completes.
|
|
|
|
#Make sure that the time measurement hook is first, so we will not
|
|
|
|
# Make sure that the time measurement hook is first, so we will not
|
|
|
|
#track any later hook's execution time.
|
|
|
|
# track any later hook's execution time.
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
if isinstance(hooks['response'], list):
|
|
|
|
if isinstance(hooks['response'], list):
|
|
|
|
hooks['response'].insert(0, response_time)
|
|
|
|
hooks['response'].insert(0, response_time)
|
|
|
|
elif isinstance(hooks['response'], tuple):
|
|
|
|
elif isinstance(hooks['response'], tuple):
|
|
|
|
#Convert tuple to list and insert time measurement hook first.
|
|
|
|
# Convert tuple to list and insert time measurement hook first.
|
|
|
|
hooks['response'] = list(hooks['response'])
|
|
|
|
hooks['response'] = list(hooks['response'])
|
|
|
|
hooks['response'].insert(0, response_time)
|
|
|
|
hooks['response'].insert(0, response_time)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
#Must have previously contained a single hook function,
|
|
|
|
# Must have previously contained a single hook function,
|
|
|
|
#so convert to list.
|
|
|
|
# so convert to list.
|
|
|
|
hooks['response'] = [response_time, hooks['response']]
|
|
|
|
hooks['response'] = [response_time, hooks['response']]
|
|
|
|
except KeyError:
|
|
|
|
except KeyError:
|
|
|
|
#No response hook was already defined, so install it ourselves.
|
|
|
|
# No response hook was already defined, so install it ourselves.
|
|
|
|
hooks['response'] = [response_time]
|
|
|
|
hooks['response'] = [response_time]
|
|
|
|
|
|
|
|
|
|
|
|
return super(SherlockFuturesSession, self).request(method,
|
|
|
|
return super(SherlockFuturesSession, self).request(method,
|
|
|
@ -96,7 +96,7 @@ class SherlockFuturesSession(FuturesSession):
|
|
|
|
|
|
|
|
|
|
|
|
def get_response(request_future, error_type, social_network):
|
|
|
|
def get_response(request_future, error_type, social_network):
|
|
|
|
|
|
|
|
|
|
|
|
#Default for Response object if some failure occurs.
|
|
|
|
# Default for Response object if some failure occurs.
|
|
|
|
response = None
|
|
|
|
response = None
|
|
|
|
|
|
|
|
|
|
|
|
error_context = "General Unknown Error"
|
|
|
|
error_context = "General Unknown Error"
|
|
|
@ -104,7 +104,7 @@ def get_response(request_future, error_type, social_network):
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
response = request_future.result()
|
|
|
|
response = request_future.result()
|
|
|
|
if response.status_code:
|
|
|
|
if response.status_code:
|
|
|
|
#status code exists in response object
|
|
|
|
# Status code exists in response object
|
|
|
|
error_context = None
|
|
|
|
error_context = None
|
|
|
|
except requests.exceptions.HTTPError as errh:
|
|
|
|
except requests.exceptions.HTTPError as errh:
|
|
|
|
error_context = "HTTP Error"
|
|
|
|
error_context = "HTTP Error"
|
|
|
@ -159,27 +159,27 @@ def sherlock(username, site_data, query_notify,
|
|
|
|
there was an HTTP error when checking for existence.
|
|
|
|
there was an HTTP error when checking for existence.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
#Notify caller that we are starting the query.
|
|
|
|
# Notify caller that we are starting the query.
|
|
|
|
query_notify.start(username)
|
|
|
|
query_notify.start(username)
|
|
|
|
|
|
|
|
|
|
|
|
# Create session based on request methodology
|
|
|
|
# Create session based on request methodology
|
|
|
|
if tor or unique_tor:
|
|
|
|
if tor or unique_tor:
|
|
|
|
#Requests using Tor obfuscation
|
|
|
|
# Requests using Tor obfuscation
|
|
|
|
underlying_request = TorRequest()
|
|
|
|
underlying_request = TorRequest()
|
|
|
|
underlying_session = underlying_request.session
|
|
|
|
underlying_session = underlying_request.session
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
#Normal requests
|
|
|
|
# Normal requests
|
|
|
|
underlying_session = requests.session()
|
|
|
|
underlying_session = requests.session()
|
|
|
|
underlying_request = requests.Request()
|
|
|
|
underlying_request = requests.Request()
|
|
|
|
|
|
|
|
|
|
|
|
#Limit number of workers to 20.
|
|
|
|
# Limit number of workers to 20.
|
|
|
|
#This is probably vastly overkill.
|
|
|
|
# This is probably vastly overkill.
|
|
|
|
if len(site_data) >= 20:
|
|
|
|
if len(site_data) >= 20:
|
|
|
|
max_workers=20
|
|
|
|
max_workers=20
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
max_workers=len(site_data)
|
|
|
|
max_workers=len(site_data)
|
|
|
|
|
|
|
|
|
|
|
|
#Create multi-threaded session for all requests.
|
|
|
|
# Create multi-threaded session for all requests.
|
|
|
|
session = SherlockFuturesSession(max_workers=max_workers,
|
|
|
|
session = SherlockFuturesSession(max_workers=max_workers,
|
|
|
|
session=underlying_session)
|
|
|
|
session=underlying_session)
|
|
|
|
|
|
|
|
|
|
|
@ -235,14 +235,14 @@ def sherlock(username, site_data, query_notify,
|
|
|
|
|
|
|
|
|
|
|
|
if (net_info["errorType"] == 'status_code' and
|
|
|
|
if (net_info["errorType"] == 'status_code' and
|
|
|
|
net_info.get("request_head_only", True) == True):
|
|
|
|
net_info.get("request_head_only", True) == True):
|
|
|
|
#In most cases when we are detecting by status code,
|
|
|
|
# In most cases when we are detecting by status code,
|
|
|
|
#it is not necessary to get the entire body: we can
|
|
|
|
# it is not necessary to get the entire body: we can
|
|
|
|
#detect fine with just the HEAD response.
|
|
|
|
# detect fine with just the HEAD response.
|
|
|
|
request_method = session.head
|
|
|
|
request_method = session.head
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
#Either this detect method needs the content associated
|
|
|
|
# Either this detect method needs the content associated
|
|
|
|
#with the GET response, or this specific website will
|
|
|
|
# with the GET response, or this specific website will
|
|
|
|
#not respond properly unless we request the whole page.
|
|
|
|
# not respond properly unless we request the whole page.
|
|
|
|
request_method = session.get
|
|
|
|
request_method = session.get
|
|
|
|
|
|
|
|
|
|
|
|
if net_info["errorType"] == "response_url":
|
|
|
|
if net_info["errorType"] == "response_url":
|
|
|
@ -302,7 +302,7 @@ def sherlock(username, site_data, query_notify,
|
|
|
|
error_type=error_type,
|
|
|
|
error_type=error_type,
|
|
|
|
social_network=social_network)
|
|
|
|
social_network=social_network)
|
|
|
|
|
|
|
|
|
|
|
|
#Get response time for response of our request.
|
|
|
|
# Get response time for response of our request.
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
response_time = r.elapsed
|
|
|
|
response_time = r.elapsed
|
|
|
|
except AttributeError:
|
|
|
|
except AttributeError:
|
|
|
@ -391,12 +391,12 @@ def sherlock(username, site_data, query_notify,
|
|
|
|
QueryStatus.AVAILABLE,
|
|
|
|
QueryStatus.AVAILABLE,
|
|
|
|
query_time=response_time)
|
|
|
|
query_time=response_time)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
#It should be impossible to ever get here...
|
|
|
|
# It should be impossible to ever get here...
|
|
|
|
raise ValueError(f"Unknown Error Type '{error_type}' for "
|
|
|
|
raise ValueError(f"Unknown Error Type '{error_type}' for "
|
|
|
|
f"site '{social_network}'")
|
|
|
|
f"site '{social_network}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Notify caller about results of query.
|
|
|
|
# Notify caller about results of query.
|
|
|
|
query_notify.update(result)
|
|
|
|
query_notify.update(result)
|
|
|
|
|
|
|
|
|
|
|
|
# Save status of request
|
|
|
|
# Save status of request
|
|
|
@ -409,7 +409,7 @@ def sherlock(username, site_data, query_notify,
|
|
|
|
# Add this site's results into final dictionary with all of the other results.
|
|
|
|
# Add this site's results into final dictionary with all of the other results.
|
|
|
|
results_total[social_network] = results_site
|
|
|
|
results_total[social_network] = results_site
|
|
|
|
|
|
|
|
|
|
|
|
#Notify caller that all queries are finished.
|
|
|
|
# Notify caller that all queries are finished.
|
|
|
|
query_notify.finish()
|
|
|
|
query_notify.finish()
|
|
|
|
|
|
|
|
|
|
|
|
return results_total
|
|
|
|
return results_total
|
|
|
@ -559,7 +559,7 @@ def main():
|
|
|
|
sys.exit(1)
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Create object with all information about sites we are aware of.
|
|
|
|
# Create object with all information about sites we are aware of.
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
if args.local:
|
|
|
|
if args.local:
|
|
|
|
sites = SitesInformation(os.path.join(os.path.dirname(__file__), 'resources/data.json'))
|
|
|
|
sites = SitesInformation(os.path.join(os.path.dirname(__file__), 'resources/data.json'))
|
|
|
@ -569,9 +569,9 @@ def main():
|
|
|
|
print(f"ERROR: {error}")
|
|
|
|
print(f"ERROR: {error}")
|
|
|
|
sys.exit(1)
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
#Create original dictionary from SitesInformation() object.
|
|
|
|
# Create original dictionary from SitesInformation() object.
|
|
|
|
#Eventually, the rest of the code will be updated to use the new object
|
|
|
|
# Eventually, the rest of the code will be updated to use the new object
|
|
|
|
#directly, but this will glue the two pieces together.
|
|
|
|
# directly, but this will glue the two pieces together.
|
|
|
|
site_data_all = {}
|
|
|
|
site_data_all = {}
|
|
|
|
for site in sites:
|
|
|
|
for site in sites:
|
|
|
|
site_data_all[site.name] = site.information
|
|
|
|
site_data_all[site.name] = site.information
|
|
|
@ -601,7 +601,7 @@ def main():
|
|
|
|
if not site_data:
|
|
|
|
if not site_data:
|
|
|
|
sys.exit(1)
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
#Create notify object for query results.
|
|
|
|
# Create notify object for query results.
|
|
|
|
query_notify = QueryNotifyPrint(result=None,
|
|
|
|
query_notify = QueryNotifyPrint(result=None,
|
|
|
|
verbose=args.verbose,
|
|
|
|
verbose=args.verbose,
|
|
|
|
print_all=args.print_all,
|
|
|
|
print_all=args.print_all,
|
|
|
|