diff --git a/.gitignore b/.gitignore index 753dfef7..2e5e5bec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ # Jupyter Notebook .ipynb_checkpoints *.ipynb + +# Output files, except requirements.txt +*.txt +!requirements.txt + +# Comma-Separated Values (CSV) Reports +*.csv diff --git a/README.md b/README.md index b0b5ed42..ed16650b 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ ## Installation +**NOTE**: Python 3.6 or higher is required. + ```bash # clone the repo $ git clone https://github.com/sdushantha/sherlock.git @@ -23,7 +25,7 @@ $ pip3 install -r requirements.txt ```bash $ python3 sherlock.py --help -usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor] [--unique-tor] +usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor] USERNAMES [USERNAMES ...] Sherlock: Find Usernames Across Social Networks (Version 0.1.0) @@ -37,6 +39,7 @@ optional arguments: --verbose, -v, -d, --debug Display extra debugging information. --quiet, -q Disable debugging information (Default Option). + --csv Create Comma-Separated Values (CSV) File. --tor, -t Make requests over TOR; increases runtime; requires TOR to be installed and in system path. --unique-tor, -u Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path. ``` diff --git a/data.json b/data.json index 35499672..e1bf7d80 100644 --- a/data.json +++ b/data.json @@ -1,378 +1,462 @@ { "Instagram": { "url": "https://www.instagram.com/{}", + "urlMain": "https://www.instagram.com/", "errorType": "message", "errorMsg": "The link you followed may be broken" }, "Twitter": { "url": "https://www.twitter.com/{}", + "urlMain": "https://www.twitter.com/", "errorType": "message", "errorMsg": "page doesn’t exist" }, "Facebook": { "url": "https://www.facebook.com/{}", + "urlMain": "https://www.facebook.com/", "errorType": "status_code" }, "YouTube": { "url": "https://www.youtube.com/{}", + "urlMain": "https://www.youtube.com/", "errorType": "message", "errorMsg": "Not Found" }, "Blogger": { "url": "https://{}.blogspot.com", + "urlMain": "https://www.blogger.com/", "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "Google Plus": { "url": "https://plus.google.com/+{}", + "urlMain": "https://plus.google.com/", "errorType": "status_code" }, "Reddit": { "url": "https://www.reddit.com/user/{}", + "urlMain": "https://www.reddit.com/", "errorType": "message", "errorMsg":"page not found" }, "Pinterest": { "url": "https://www.pinterest.com/{}", + "urlMain": "https://www.pinterest.com/", "errorType": "response_url", "errorUrl": "https://www.pinterest.com/?show_error=true" }, "GitHub": { "url": "https://www.github.com/{}", + "urlMain": "https://www.github.com/", "errorType": "status_code", "regexCheck": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$" }, "Steam": { "url": "https://steamcommunity.com/id/{}", + "urlMain": "https://steamcommunity.com/", "errorType": "message", "errorMsg": "The specified profile could not be found" }, "Vimeo": { "url": "https://vimeo.com/{}", + "urlMain": "https://vimeo.com/", "errorType": "message", "errorMsg": "404 Not Found" }, "SoundCloud": { "url": "https://soundcloud.com/{}", + "urlMain": "https://soundcloud.com/", "errorType": "status_code" }, "Disqus": { "url": "https://disqus.com/{}", + "urlMain": "https://disqus.com/", "errorType": "status_code" }, "Medium": { "url": "https://medium.com/@{}", + "urlMain": "https://medium.com/", "errorType": "status_code" }, "DeviantART": { "url": "https://{}.deviantart.com", + "urlMain": "https://deviantart.com", "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "VK": { "url": "https://vk.com/{}", + "urlMain": "https://vk.com/", "errorType": "status_code" }, "About.me": { "url": "https://about.me/{}", + "urlMain": "https://about.me/", "errorType": "status_code" }, "Imgur": { "url": "https://imgur.com/user/{}", + "urlMain": "https://imgur.com/", "errorType": "status_code" }, "Flipboard": { "url": "https://flipboard.com/@{}", + "urlMain": "https://flipboard.com/", "errorType": "message", "errorMsg": "loading" }, "SlideShare": { "url": "https://slideshare.net/{}", + "urlMain": "https://slideshare.net/", "errorType": "status_code" }, "Fotolog": { "url": "https://fotolog.com/{}", + "urlMain": "https://fotolog.com/", "errorType": "status_code" }, "Spotify": { "url": "https://open.spotify.com/user/{}", + "urlMain": "https://open.spotify.com/", "errorType": "status_code" }, "MixCloud": { "url": "https://www.mixcloud.com/{}", + "urlMain": "https://www.mixcloud.com/", "errorType": "message", "errorMsg": "Page Not Found" }, "Scribd": { "url": "https://www.scribd.com/{}", + "urlMain": "https://www.scribd.com/", "errorType": "message", "errorMsg": "Page not found" }, "Patreon": { "url": "https://www.patreon.com/{}", + "urlMain": "https://www.patreon.com/", "errorType": "status_code" }, "BitBucket": { "url": "https://bitbucket.org/{}", + "urlMain": "https://bitbucket.org/", "errorType": "status_code" }, "Roblox": { "url": "https://www.roblox.com/user.aspx?username={}", + "urlMain": "https://www.roblox.com/", "errorType": "message", "errorMsg": "Page cannot be found or no longer exists" }, "Gravatar": { "url": "http://en.gravatar.com/{}", + "urlMain": "http://en.gravatar.com/", "errorType": "status_code" }, "iMGSRC.RU": { "url": "https://imgsrc.ru/main/user.php?user={}", + "urlMain": "https://imgsrc.ru/", "errorType": "response_url", "errorUrl": "https://imgsrc.ru/" }, "DailyMotion": { "url": "https://www.dailymotion.com/{}", + "urlMain": "https://www.dailymotion.com/", "errorType": "message", "errorMsg": "Page not found" }, "Etsy": { "url": "https://www.etsy.com/shop/{}", + "urlMain": "https://www.etsy.com/", "errorType": "status_code" }, "CashMe": { "url": "https://cash.me/{}", + "urlMain": "https://cash.me/", "errorType": "status_code" }, "Behance": { "url": "https://www.behance.net/{}", + "urlMain": "https://www.behance.net/", "errorType": "message", "errorMsg": "Oops! We can’t find that page." }, "GoodReads": { "url": "https://www.goodreads.com/{}", + "urlMain": "https://www.goodreads.com/", "errorType": "status_code" }, "Instructables": { "url": "https://www.instructables.com/member/{}", + "urlMain": "https://www.instructables.com/", "errorType": "message", "errorMsg": "404: We're sorry, things break sometimes" }, "Keybase": { "url": "https://keybase.io/{}", + "urlMain": "https://keybase.io/", "errorType": "status_code" }, "Kongregate": { "url": "https://www.kongregate.com/accounts/{}", + "urlMain": "https://www.kongregate.com/", "errorType": "message", "errorMsg": "Sorry, no account with that name was found.", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "LiveJournal": { "url": "https://{}.livejournal.com", + "urlMain": "https://www.livejournal.com/", "errorType": "message", "errorMsg": "Unknown Journal", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "VSCO": { "url": "https://vsco.co/{}", + "urlMain": "https://vsco.co/", "errorType": "status_code" }, "AngelList": { "url": "https://angel.co/{}", + "urlMain": "https://angel.co/", "errorType": "message", "errorMsg": "We couldn't find what you were looking for." }, "last.fm": { "url": "https://last.fm/user/{}", + "urlMain": "https://last.fm/", "errorType": "message", "errorMsg": "Whoops! Sorry, but this page doesn't exist." }, "Dribbble": { "url": "https://dribbble.com/{}", + "urlMain": "https://dribbble.com/", "errorType": "message", "errorMsg": "Whoops, that page is gone.", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "Codecademy": { "url": "https://www.codecademy.com/{}", + "urlMain": "https://www.codecademy.com/", "errorType": "message", "errorMsg": "404 error" }, "Pastebin": { "url": "https://pastebin.com/u/{}", + "urlMain": "https://pastebin.com/", "errorType": "response_url", "errorUrl": "https://pastebin.com/index" }, "Foursquare": { "url": "https://foursquare.com/{}", + "urlMain": "https://foursquare.com/", "errorType": "status_code" }, "Gumroad": { "url": "https://www.gumroad.com/{}", + "urlMain": "https://www.gumroad.com/", "errorType": "message", "errorMsg": "Page not found." }, "Newgrounds": { "url": "https://{}.newgrounds.com", + "urlMain": "https://newgrounds.com", "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "Wattpad": { "url": "https://www.wattpad.com/user/{}", + "urlMain": "https://www.wattpad.com/", "errorType": "message", "errorMsg": "This page seems to be missing..." }, "Canva": { "url": "https://www.canva.com/{}", + "urlMain": "https://www.canva.com/", "errorType": "message", "errorMsg": "Not found (404)" }, "Trakt": { "url": "https://www.trakt.tv/users/{}", + "urlMain": "https://www.trakt.tv/", "errorType": "message", "errorMsg": "404" }, "500px": { "url": "https://500px.com/{}", + "urlMain": "https://500px.com/", "errorType": "message", "errorMsg": "Sorry, no such page." }, "BuzzFeed": { "url": "https://buzzfeed.com/{}", + "urlMain": "https://buzzfeed.com/", "errorType": "message", "errorMsg": "We can't find the page you're looking for." }, "TripAdvisor": { "url": "https://tripadvisor.com/members/{}", + "urlMain": "https://tripadvisor.com/", "errorType": "message", "errorMsg": "This page is on vacation…" }, "Contently": { "url": "https://{}.contently.com/", + "urlMain": "https://contently.com/", "errorType": "message", "errorMsg": "We can't find that page!", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "Houzz": { "url": "https://houzz.com/user/{}", + "urlMain": "https://houzz.com/", "errorType": "message", "errorMsg": "The page you requested was not found." }, "BLIP.fm": { "url": "https://blip.fm/{}", + "urlMain": "https://blip.fm/", "errorType": "message", "errorMsg": "Page Not Found" }, "HackerNews": { "url": "https://news.ycombinator.com/user?id={}", + "urlMain": "https://news.ycombinator.com/", "errorType": "message", "errorMsg": "No such user." }, "Codementor": { "url": "https://www.codementor.io/{}", + "urlMain": "https://www.codementor.io/", "errorType": "message", "errorMsg": "404" }, "ReverbNation": { "url": "https://www.reverbnation.com/{}", + "urlMain": "https://www.reverbnation.com/", "errorType": "message", "errorMsg": "Sorry, we couldn't find that page" }, "Designspiration": { "url": "https://www.designspiration.net/{}", + "urlMain": "https://www.designspiration.net/", "errorType": "message", "errorMsg": "Content Not Found" }, "Bandcamp": { "url": "https://www.bandcamp.com/{}", + "urlMain": "https://www.bandcamp.com/", "errorType": "message", "errorMsg": "Sorry, that something isn’t here" }, "ColourLovers": { "url": "https://www.colourlovers.com/love/{}", + "urlMain": "https://www.colourlovers.com/", "errorType": "message", "errorMsg": "Page Not Loved" }, "IFTTT": { "url": "https://www.ifttt.com/p/{}", + "urlMain": "https://www.ifttt.com/", "errorType": "message", "errorMsg": "The requested page or file does not exist" }, "Ebay": { "url": "https://www.ebay.com/usr/{}", + "urlMain": "https://www.ebay.com/", "errorType": "message", "errorMsg": "The User ID you entered was not found" }, "Slack": { "url": "https://{}.slack.com", + "urlMain": "https://slack.com", "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "Trip": { "url": "https://www.trip.skyscanner.com/user/{}", + "urlMain": "https://www.trip.skyscanner.com/", "errorType": "message", "errorMsg": "Page not found" }, "Ello": { "url": "https://ello.co/{}", + "urlMain": "https://ello.co/", "errorType": "message", "errorMsg": "We couldn't find the page you're looking for" }, "HackerOne": { "url": "https://hackerone.com/{}", + "urlMain": "https://hackerone.com/", "errorType": "message", "errorMsg": "Page not found" }, "Tinder": { "url": "https://www.gotinder.com/@{}", + "urlMain": "https://tinder.com/", "errorType": "message", "errorMsg": "Looking for Someone?" }, "We Heart It": { "url": "https://weheartit.com/{}", + "urlMain": "https://weheartit.com/", "errorType": "message", "errorMsg": "Oops! You've landed on a moving target!" }, "Flickr": { "url": "https://www.flickr.com/people/{}", + "urlMain": "https://www.flickr.com/", "errorType": "status_code" }, "WordPress": { "url": "https://{}.wordpress.com", + "urlMain": "https://wordpress.com", "errorType": "response_url", "errorUrl": "wordpress.com/typo/?subdomain=", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$" }, "Unsplash": { "url": "https://unsplash.com/@{}", + "urlMain": "https://unsplash.com/", "errorType": "status_code" }, "Pexels": { "url": "https://www.pexels.com/@{}", + "urlMain": "https://www.pexels.com/", "errorType": "message", "errorMsg": "Ouch, something went wrong!" }, "devRant": { "url": "https://devrant.com/users/{}", + "urlMain": "https://devrant.com/", "errorType": "response_url", "errorUrl": "https://devrant.com/" }, "MyAnimeList": { "url": "https://myanimelist.net/profile/{}", + "urlMain": "https://myanimelist.net/", "errorType": "status_code" }, "ImageShack": { "url": "https://imageshack.us/user/{}", + "urlMain": "https://imageshack.us/", "errorType": "response_url", "errorUrl": "https://imageshack.us/" }, "Badoo": { "url": "https://badoo.com/profile/{}", + "urlMain": "https://badoo.com/", "errorType": "status_code" }, "MeetMe": { "url": "https://www.meetme.com/{}", + "urlMain": "https://www.meetme.com/", "errorType": "response_url", "errorUrl": "https://www.meetme.com/" + }, + "Quora": { + "url": "https://www.quora.com/profile/{}", + "urlMain": "https://www.quora.com/", + "errorType": "status_code" } } diff --git a/sherlock.py b/sherlock.py index 25bf1d5a..bb4aca26 100644 --- a/sherlock.py +++ b/sherlock.py @@ -8,6 +8,7 @@ import json import os import sys import re +import csv from argparse import ArgumentParser, RawDescriptionHelpFormatter import platform from torrequest import TorRequest @@ -50,6 +51,29 @@ def make_request(url, headers, error_type, social_network, verbose=False, tor=Fa def sherlock(username, verbose=False, tor=False, unique_tor=False): + """Run Sherlock Analysis. + + Checks for existence of username on various social media sites. + + Keyword Arguments: + username -- String indicating username that report + should be created against. + verbose -- Boolean indicating whether to give verbose output. + tor -- Boolean indicating whether to use a tor circuit for the requests. + unique_tor -- Boolean indicating whether to use a new tor circuit for each request. + + Return Value: + Dictionary containing results from report. Key of dictionary is the name + of the social network site, and the value is another dictionary with + the following keys: + url_main: URL of main site. + url_user: URL of user on site (if account exists). + exists: String indicating results of test for account existence. + http_status: HTTP status code of query which checked for existence on + site. + response_text: Text that came back from request. May be None if + there was an HTTP error when checking for existence. + """ fname = username+".txt" if os.path.isfile(fname): @@ -67,52 +91,92 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0' } + # Results from analysis of all sites + results_total = {} for social_network in data: + # Results from analysis of this specific site + results_site = {} + + # Record URL of main site + results_site['url_main'] = data.get(social_network).get("urlMain") + + # URL of user on site (if it exists) url = data.get(social_network).get("url").format(username) + results_site['url_user'] = url + error_type = data.get(social_network).get("errorType") regex_check = data.get(social_network).get("regexCheck") + # Default data in case there are any failures in doing a request. + http_status = "?" + response_text = "" + if regex_check and re.search(regex_check, username) is None: #No need to do the check at the site: this user name is not allowed. print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Illegal Username Format For This Site!".format(social_network)) - continue - - r, error_type = make_request(url=url, headers=headers, error_type=error_type, social_network=social_network, verbose=verbose, tor=tor, unique_tor=unique_tor) - - if error_type == "message": - error = data.get(social_network).get("errorMsg") - # Checks if the error message is in the HTML - if not error in r.text: - print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - - else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) - - elif error_type == "status_code": - # Checks if the status code of the repsonse is 404 - if not r.status_code == 404: - print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - - else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) - - elif error_type == "response_url": - error = data.get(social_network).get("errorUrl") - # Checks if the redirect url is the same as the one defined in data.json - if not error in r.url: - print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) - write_to_file(url, fname) - else: - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) - - elif error_type == "": - print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Error!".format(social_network)) + exists = "illegal" + else: + r, error_type = make_request(url=url, headers=headers, error_type=error_type, social_network=social_network, verbose=verbose, tor=tor, unique_tor=unique_tor) + + # Attempt to get request information + try: + http_status = r.status_code + except: + pass + try: + response_text = r.text.encode(r.encoding) + except: + pass + + if error_type == "message": + error = data.get(social_network).get("errorMsg") + # Checks if the error message is in the HTML + if not error in r.text: + print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) + write_to_file(url, fname) + exists = "yes" + else: + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + exists = "no" + + elif error_type == "status_code": + # Checks if the status code of the response is 404 + if not r.status_code == 404: + print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) + write_to_file(url, fname) + exists = "yes" + else: + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + exists = "no" + + elif error_type == "response_url": + error = data.get(social_network).get("errorUrl") + # Checks if the redirect url is the same as the one defined in data.json + if not error in r.url: + print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) + write_to_file(url, fname) + exists = "yes" + else: + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) + exists = "no" + + elif error_type == "": + print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Error!".format(social_network)) + exists = "error" + + # Save exists flag + results_site['exists'] = exists + + # Save results from request + results_site['http_status'] = http_status + results_site['response_text'] = response_text + + # Add this site's results into final dictionary with all of the other results. + results_total[social_network] = results_site print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{}\033[0m".format(username+".txt")) - return + return results_total def main(): @@ -141,6 +205,10 @@ def main(): parser.add_argument("--unique-tor", "-u", action="store_true", dest="unique_tor", default=False, help="Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.") + parser.add_argument("--csv", + action="store_true", dest="csv", default=False, + help="Create Comma-Separated Values (CSV) File." + ) parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", @@ -167,9 +235,28 @@ def main(): # Run report on all specified users. for username in args.username: print() - sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) - - + results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor) + + if args.csv == True: + with open(username + ".csv", "w", newline='') as csv_report: + writer = csv.writer(csv_report) + writer.writerow(['username', + 'name', + 'url_main', + 'url_user', + 'exists', + 'http_status' + ] + ) + for site in results: + writer.writerow([username, + site, + results[site]['url_main'], + results[site]['url_user'], + results[site]['exists'], + results[site]['http_status'] + ] + ) if __name__ == "__main__": - main() \ No newline at end of file + main()