From 8e8baac9c283bf8eda6457dddedd25bd8e5b7885 Mon Sep 17 00:00:00 2001 From: Siddharth Dushantha Date: Sun, 12 Mar 2023 21:50:16 +0100 Subject: [PATCH] generate unclaimed username based on regex --- sherlock/resources/data.json | 1 + sherlock/tests/all.py | 80 +++++++++++------------------------- sherlock/tests/base.py | 43 ++++++++++--------- 3 files changed, 47 insertions(+), 77 deletions(-) diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index 709e940f..5f98c339 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -94,6 +94,7 @@ "username_claimed": "pink" }, "AllMyLinks": { + "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$", "errorMsg": "Not Found", "errorType": "message", "url": "https://allmylinks.com/{}", diff --git a/sherlock/tests/all.py b/sherlock/tests/all.py index 4a2b78b6..7943fab4 100644 --- a/sherlock/tests/all.py +++ b/sherlock/tests/all.py @@ -3,7 +3,7 @@ This module contains various tests. """ from tests.base import SherlockBaseTest -import secrets +import exrex class SherlockDetectTests(SherlockBaseTest): @@ -27,10 +27,7 @@ class SherlockDetectTests(SherlockBaseTest): # Ensure that the site's detection method has not changed. self.assertEqual("message", site_data["errorType"]) - self.username_check([site_data["username_claimed"]], - [site], - exist_check=True - ) + self.username_check([site_data["username_claimed"]], [site], exist_check=True) return @@ -54,10 +51,16 @@ class SherlockDetectTests(SherlockBaseTest): # Ensure that the site's detection method has not changed. self.assertEqual("message", site_data["errorType"]) - self.username_check([secrets.token_urlsafe(10)], - [site], - exist_check=False - ) + # Generate a valid username based on the regex for a username that the + # site supports that is *most likely* not taken. The regex is slighlty + # modified version of site_data["regexCheck"] as we want a username + # that has the maximum length that is supported by the site. This way, + # we wont generate a random username that might actually exist. This + # method is very hacky, but it does the job as having hardcoded + # usernames that dont exists will lead to people with ill intent to + # create an account with that username which will break the tests + valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$") + self.username_check([valid_username], [site], exist_check=False) return @@ -75,16 +78,13 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - site = "9GAG" + site = "BitBucket" site_data = self.site_data_all[site] # Ensure that the site's detection method has not changed. self.assertEqual("status_code", site_data["errorType"]) - self.username_check([site_data["username_claimed"]], - [site], - exist_check=True - ) + self.username_check([site_data["username_claimed"]], [site], exist_check=True) return @@ -102,57 +102,27 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - site = "9GAG" + site = "BitBucket" site_data = self.site_data_all[site] # Ensure that the site's detection method has not changed. self.assertEqual("status_code", site_data["errorType"]) - self.username_check([secrets.token_urlsafe(10)], - [site], - exist_check=False - ) + # Generate a valid username based on the regex for a username that the + # site supports that is *most likely* not taken. The regex is slighlty + # modified version of site_data["regexCheck"] as we want a username + # that has the maximum length that is supported by the site. This way, + # we wont generate a random username that might actually exist. This + # method is very hacky, but it does the job as having hardcoded + # usernames that dont exists will lead to people with ill intent to + # create an account with that username which will break the tests + valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}") + self.username_check([valid_username], [site], exist_check=False) return class SherlockSiteCoverageTests(SherlockBaseTest): - def test_coverage_false_via_response_url(self): - """Test Username Does Not Exist Site Coverage (Via Response URL). - - This test checks all sites with the "response URL" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("response_url", exist_check=False) - - return - - def test_coverage_true_via_response_url(self): - """Test Username Does Exist Site Coverage (Via Response URL). - - This test checks all sites with the "response URL" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("response_url", exist_check=True) - - return - def test_coverage_false_via_status(self): """Test Username Does Not Exist Site Coverage (Via HTTP Status). diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index be87ceee..de958b9d 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -7,9 +7,8 @@ import os.path import unittest import sherlock from result import QueryStatus -from result import QueryResult from notify import QueryNotify -from sites import SitesInformation +from sites import SitesInformation import warnings @@ -26,16 +25,16 @@ class SherlockBaseTest(unittest.TestCase): Nothing. """ - #This ignores the ResourceWarning from an unclosed SSLSocket. - #TODO: Figure out how to fix the code so this is not needed. + # This ignores the ResourceWarning from an unclosed SSLSocket. + # TODO: Figure out how to fix the code so this is not needed. warnings.simplefilter("ignore", ResourceWarning) - #Create object with all information about sites we are aware of. - sites = SitesInformation() + # Create object with all information about sites we are aware of. + sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../resources/data.json")) - #Create original dictionary from SitesInformation() object. - #Eventually, the rest of the code will be updated to use the new object - #directly, but this will glue the two pieces together. + # Create original dictionary from SitesInformation() object. + # Eventually, the rest of the code will be updated to use the new object + # directly, but this will glue the two pieces together. site_data_all = {} for site in sites: site_data_all[site.name] = site.information @@ -44,18 +43,18 @@ class SherlockBaseTest(unittest.TestCase): # Load excluded sites list, if any excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites") try: - with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file: - self.excluded_sites = excluded_sites_file.read().splitlines() + with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file: + self.excluded_sites = excluded_sites_file.read().splitlines() except FileNotFoundError: - self.excluded_sites = [] + self.excluded_sites = [] - #Create notify object for query results. + # Create notify object for query results. self.query_notify = QueryNotify() - self.tor=False - self.unique_tor=False - self.timeout=None - self.skip_error_sites=True + self.tor = False + self.unique_tor = False + self.timeout = None + self.skip_error_sites = True return @@ -102,7 +101,7 @@ class SherlockBaseTest(unittest.TestCase): existence state. """ - #Filter all site data down to just what is needed for this test. + # Filter all site data down to just what is needed for this test. site_data = self.site_data_filter(site_list) if exist_check: @@ -161,8 +160,8 @@ class SherlockBaseTest(unittest.TestCase): existence state. """ - #Dictionary of sites that should be tested for having a username. - #This will allow us to test sites with a common username in parallel. + # Dictionary of sites that should be tested for having a username. + # This will allow us to test sites with a common username in parallel. sites_by_username = {} for site, site_data in self.site_data_all.items(): @@ -181,9 +180,9 @@ class SherlockBaseTest(unittest.TestCase): # Figure out which type of user if exist_check: - username = site_data.get("username_claimed") + username = site_data.get("username_claimed") else: - username = site_data.get("username_unclaimed") + username = site_data.get("username_unclaimed") # Add this site to the list of sites corresponding to this # username.