Merge pull request #1730 from sherlock-project/enhanced-unit-tests

generate unclaimed username based on regex
2 years ago · 64eec640d7
parent cbad2ff44e 42f825b35d
commit 64eec640d7
5 changed files with 49 additions and 78 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -7,3 +7,4 @@ stem>=1.8.0
 torrequest>=0.1.0
 pandas>=1.0.0
 openpyxl<=3.0.10
+exrex>=0.11.0
--- a/sherlock/resources/data.json
+++ b/sherlock/resources/data.json
@ -94,6 +94,7 @@
    "username_claimed": "pink"
  },
  "AllMyLinks": {
+    "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
    "errorMsg": "Not Found",
    "errorType": "message",
    "url": "https://allmylinks.com/{}",
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@ -28,7 +28,7 @@ from sites import SitesInformation
 from colorama import init

 module_name = "Sherlock: Find Usernames Across Social Networks"
-__version__ = "0.14.2"
+__version__ = "0.14.3"


 class SherlockFuturesSession(FuturesSession):
--- a/sherlock/tests/all.py
+++ b/sherlock/tests/all.py
@ -3,7 +3,7 @@
 This module contains various tests.
 """
 from tests.base import SherlockBaseTest
-import secrets
+import exrex


 class SherlockDetectTests(SherlockBaseTest):
@ -27,10 +27,7 @@ class SherlockDetectTests(SherlockBaseTest):
        # Ensure that the site's detection method has not changed.
        self.assertEqual("message", site_data["errorType"])

-        self.username_check([site_data["username_claimed"]],
-                            [site],
-                            exist_check=True
-                            )
+        self.username_check([site_data["username_claimed"]], [site], exist_check=True)

        return

@ -54,10 +51,16 @@ class SherlockDetectTests(SherlockBaseTest):
        # Ensure that the site's detection method has not changed.
        self.assertEqual("message", site_data["errorType"])

-        self.username_check([secrets.token_urlsafe(10)],
-                            [site],
-                            exist_check=False
-                            )
+        # Generate a valid username based on the regex for a username that the
+        # site supports that is *most likely* not taken. The regex is slighlty
+        # modified version of site_data["regexCheck"] as we want a username
+        # that has the maximum length that is supported by the site. This way,
+        # we wont generate a random username that might actually exist. This
+        # method is very hacky, but it does the job as having hardcoded
+        # usernames that dont exists will lead to people with ill intent to
+        # create an account with that username which will break the tests
+        valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$")
+        self.username_check([valid_username], [site], exist_check=False)

        return

@ -75,16 +78,13 @@ class SherlockDetectTests(SherlockBaseTest):
        Will trigger an assert if detection mechanism did not work as expected.
        """

-        site = "9GAG"
+        site = "BitBucket"
        site_data = self.site_data_all[site]

        # Ensure that the site's detection method has not changed.
        self.assertEqual("status_code", site_data["errorType"])

-        self.username_check([site_data["username_claimed"]],
-                            [site],
-                            exist_check=True
-                            )
+        self.username_check([site_data["username_claimed"]], [site], exist_check=True)

        return

@ -102,57 +102,27 @@ class SherlockDetectTests(SherlockBaseTest):
        Will trigger an assert if detection mechanism did not work as expected.
        """

-        site = "9GAG"
+        site = "BitBucket"
        site_data = self.site_data_all[site]

        # Ensure that the site's detection method has not changed.
        self.assertEqual("status_code", site_data["errorType"])

-        self.username_check([secrets.token_urlsafe(10)],
-                            [site],
-                            exist_check=False
-                            )
+        # Generate a valid username based on the regex for a username that the
+        # site supports that is *most likely* not taken. The regex is slighlty
+        # modified version of site_data["regexCheck"] as we want a username
+        # that has the maximum length that is supported by the site. This way,
+        # we wont generate a random username that might actually exist. This
+        # method is very hacky, but it does the job as having hardcoded
+        # usernames that dont exists will lead to people with ill intent to
+        # create an account with that username which will break the tests
+        valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}") 
+        self.username_check([valid_username], [site], exist_check=False)

        return


 class SherlockSiteCoverageTests(SherlockBaseTest):
-    def test_coverage_false_via_response_url(self):
-        """Test Username Does Not Exist Site Coverage (Via Response URL).
-
-        This test checks all sites with the "response URL" detection mechanism
-        to ensure that a Username that does not exist is reported that way.
-
-        Keyword Arguments:
-        self                   -- This object.
-
-        Return Value:
-        Nothing.
-        Will trigger an assert if detection mechanism did not work as expected.
-        """
-
-        self.detect_type_check("response_url", exist_check=False)
-
-        return
-
-    def test_coverage_true_via_response_url(self):
-        """Test Username Does Exist Site Coverage (Via Response URL).
-
-        This test checks all sites with the "response URL" detection mechanism
-        to ensure that a Username that does exist is reported that way.
-
-        Keyword Arguments:
-        self                   -- This object.
-
-        Return Value:
-        Nothing.
-        Will trigger an assert if detection mechanism did not work as expected.
-        """
-
-        self.detect_type_check("response_url", exist_check=True)
-
-        return
-
    def test_coverage_false_via_status(self):
        """Test Username Does Not Exist Site Coverage (Via HTTP Status).

--- a/sherlock/tests/base.py
+++ b/sherlock/tests/base.py
@ -7,9 +7,8 @@ import os.path
 import unittest
 import sherlock
 from result import QueryStatus
-from result import QueryResult
 from notify import QueryNotify
-from sites  import SitesInformation
+from sites import SitesInformation
 import warnings


@ -26,16 +25,16 @@ class SherlockBaseTest(unittest.TestCase):
        Nothing.
        """

-        #This ignores the ResourceWarning from an unclosed SSLSocket.
-        #TODO: Figure out how to fix the code so this is not needed.
+        # This ignores the ResourceWarning from an unclosed SSLSocket.
+        # TODO: Figure out how to fix the code so this is not needed.
        warnings.simplefilter("ignore", ResourceWarning)

-        #Create object with all information about sites we are aware of.
-        sites = SitesInformation()
+        # Create object with all information about sites we are aware of.
+        sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../resources/data.json"))

-        #Create original dictionary from SitesInformation() object.
-        #Eventually, the rest of the code will be updated to use the new object
-        #directly, but this will glue the two pieces together.
+        # Create original dictionary from SitesInformation() object.
+        # Eventually, the rest of the code will be updated to use the new object
+        # directly, but this will glue the two pieces together.
        site_data_all = {}
        for site in sites:
            site_data_all[site.name] = site.information
@ -44,18 +43,18 @@ class SherlockBaseTest(unittest.TestCase):
        # Load excluded sites list, if any
        excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites")
        try:
-          with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
-            self.excluded_sites = excluded_sites_file.read().splitlines()
+            with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
+                self.excluded_sites = excluded_sites_file.read().splitlines()
        except FileNotFoundError:
-          self.excluded_sites = []
+            self.excluded_sites = []

-        #Create notify object for query results.
+        # Create notify object for query results.
        self.query_notify = QueryNotify()

-        self.tor=False
-        self.unique_tor=False
-        self.timeout=None
-        self.skip_error_sites=True
+        self.tor = False
+        self.unique_tor = False
+        self.timeout = None
+        self.skip_error_sites = True

        return

@ -102,7 +101,7 @@ class SherlockBaseTest(unittest.TestCase):
        existence state.
        """

-        #Filter all site data down to just what is needed for this test.
+        # Filter all site data down to just what is needed for this test.
        site_data = self.site_data_filter(site_list)

        if exist_check:
@ -161,8 +160,8 @@ class SherlockBaseTest(unittest.TestCase):
        existence state.
        """

-        #Dictionary of sites that should be tested for having a username.
-        #This will allow us to test sites with a common username in parallel.
+        # Dictionary of sites that should be tested for having a username.
+        # This will allow us to test sites with a common username in parallel.
        sites_by_username = {}

        for site, site_data in self.site_data_all.items():
@ -181,9 +180,9 @@ class SherlockBaseTest(unittest.TestCase):

                # Figure out which type of user
                if exist_check:
-                     username = site_data.get("username_claimed")
+                    username = site_data.get("username_claimed")
                else:
-                     username = site_data.get("username_unclaimed")
+                    username = site_data.get("username_unclaimed")

                # Add this site to the list of sites corresponding to this
                # username.