From 45f15236a85899bc1b63a639645b8e2cd84f087d Mon Sep 17 00:00:00 2001 From: aristotelis gkithkopoulos Date: Wed, 8 Jun 2022 16:51:06 +0300 Subject: [PATCH] add xlsx file option by using --xlsx parameter --- .gitignore | 3 +++ requirements.txt | 1 + sherlock/sherlock.py | 41 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 1f0eb5c..6326672 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ src/ # Comma-Separated Values (CSV) Reports *.csv +#XLSX Reports +*.xlsx + # Excluded sites list tests/.excluded_sites diff --git a/requirements.txt b/requirements.txt index 649ec7a..87ae820 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ requests>=2.22.0 requests-futures>=1.0.0 stem>=1.8.0 torrequest>=0.1.0 +pandas>=1.0.0 diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index c69636d..e3c7442 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -8,6 +8,8 @@ networks. """ import csv +import xlsxwriter +import pandas as pd import os import platform import re @@ -376,7 +378,7 @@ def sherlock(username, site_data, query_notify, if error_text is not None: error_context = error_text - + elif error_type == "message": # error_flag True denotes no error found in the HTML # error_flag False denotes error found in the HTML @@ -506,6 +508,10 @@ def main(): action="store_true", dest="csv", default=False, help="Create Comma-Separated Values (CSV) File." ) + parser.add_argument("--xlsx", + action="store_true", dest="xlsx", default=False, + help="Create the standard file for the modern Microsoft Excel spreadsheet (xslx)." + ) parser.add_argument("--site", action="append", metavar="SITE_NAME", dest="site_list", default=None, @@ -579,7 +585,7 @@ def main(): if args.tor or args.unique_tor: print("Using Tor to make requests") - + print( "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.") @@ -647,6 +653,7 @@ def main(): print_all=args.print_all) # Run report on all specified users. + all_usernames = [] for username in args.username: if(CheckForParameter(username)): @@ -716,6 +723,36 @@ def main(): response_time_s ] ) + if args.xlsx: + usernames = [] + names = [] + url_main = [] + url_user = [] + exists = [] + http_status = [] + response_time_s = [] + + result_file = f"{username}.xlsx" + workbook = xlsxwriter.Workbook(f"{username}.xlsx") + sheet = workbook.add_worksheet() + for site in results: + + if response_time_s is None: + response_time_s.append("") + else: + response_time_s.append(results[site]["status"].query_time) + usernames.append(username) + names.append(site) + url_main.append(results[site]["url_main"]) + url_user.append(results[site]["url_user"]) + exists.append(str(results[site]["status"].status)) + http_status.append(results[site]["http_status"]) + + DataFrame=pd.DataFrame({"username":usernames , "name":names , "url_main":url_main , "url_user":url_user , "exists" : exists , "http_status":http_status , "response_time_s":response_time_s}) + DataFrame.to_excel(f'{username}.xlsx', sheet_name='sheet1', index=False) + + + print() query_notify.finish()