Add module to store information about the sites. This handles getting the information loaded from the JSON file. For now, use the new SitesInformation() object to calculate the original JSON dictionary: the rest of the code will be updated in the future.

5 years ago · 7f87f5fcc4
parent 647aea577c
commit 7f87f5fcc4
2 changed files with 239 additions and 33 deletions
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@ -23,6 +23,7 @@ from requests_futures.sessions import FuturesSession
 from torrequest import TorRequest
 from result import QueryStatus
 from result import QueryResult
+from sites  import SitesInformation

 module_name = "Sherlock: Find Usernames Across Social Networks"
 __version__ = "0.10.0"
@ -499,7 +500,7 @@ def main():
                        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
                        )
    parser.add_argument("--json", "-j", metavar="JSON_FILE",
-                        dest="json_file", default="resources/data.json",
+                        dest="json_file", default=None,
                        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument("--timeout",
                        action="store", metavar='TIMEOUT',
@ -549,41 +550,20 @@ def main():
        print("You can only use --output with a single username")
        sys.exit(1)

-    response_json_online = None
-    site_data_all = None

-    # Try to load json from website.
+    #Create object with all information about sites we are aware of.
    try:
-        response_json_online = requests.get(url=args.json_file)
-    except requests.exceptions.MissingSchema:  # In case the schema is wrong it's because it may not be a website
-        pass
+        sites = SitesInformation(args.json_file)
+    except Exception as error:
+        print(f"ERROR:  {error}")
+        sys.exit(1)

-    # Check if the response is appropriate.
-    if response_json_online is not None and response_json_online.status_code == 200:
-        # Since we got data from a website, try to load json and exit if parsing fails.
-        try:
-            site_data_all = response_json_online.json()
-        except ValueError:
-            print("Invalid JSON from website!")
-            sys.exit(1)
-            pass
-
-    data_file_path = os.path.join(os.path.dirname(
-        os.path.realpath(__file__)), args.json_file)
-    # This will be none if the request had a missing schema
-    if site_data_all is None:
-        # Check if the file exists otherwise exit.
-        if not os.path.exists(data_file_path):
-            print("JSON file doesn't exist.")
-            print(
-                "If this is not a file but a website, make sure you have appended http:// or https://.")
-            sys.exit(1)
-        else:
-            raw = open(data_file_path, "r", encoding="utf-8")
-            try:
-                site_data_all = json.load(raw)
-            except:
-                print("Invalid JSON loaded from file.")
+    #Create original dictionary from SitesInformation() object.
+    #Eventually, the rest of the code will be updated to use the new object
+    #directly, but this will glue the two pieces together.
+    site_data_all = {}
+    for site in sites:
+        site_data_all[site.name] = site.information

    if args.site_list is None:
        # Not desired to look at a sub-set of sites
--- a/sherlock/sites.py
+++ b/sherlock/sites.py
@ -0,0 +1,226 @@
+"""Sherlock Sites Information Module
+
+This module supports storing information about web sites.
+This is the raw data that will be used to search for usernames.
+"""
+import logging
+import os
+import json
+import requests
+
+
+class SiteInformation():
+    def __init__(self, name, url_home, url_username_format,
+                 username_claimed, username_unclaimed,
+                 information):
+        """Create Site Information Object.
+
+        Contains information about a specific web site.
+
+        Keyword Arguments:
+        self                   -- This object.
+        name                   -- String which identifies site.
+        url_home               -- String containing URL for home of site.
+        url_username_format    -- String containing URL for Username format
+                                  on site.
+                                  NOTE:  The string should contain the
+                                         token "{}" where the username should
+                                         be substituted.  For example, a string
+                                         of "https://somesite.com/users/{}"
+                                         indicates that the individual
+                                         usernames would show up under the
+                                         "https://somesite.com/users/" area of
+                                         the web site.
+        username_claimed       -- String containing username which is known
+                                  to be claimed on web site.
+        username_unclaimed     -- String containing username which is known
+                                  to be unclaimed on web site.
+        information            -- Dictionary containing all known information
+                                  about web site.
+                                  NOTE:  Custom information about how to
+                                         actually detect the existence of the
+                                         username will be included in this
+                                         dictionary.  This information will
+                                         be needed by the detection method,
+                                         but it is only recorded in this
+                                         object for future use.
+
+        Return Value:
+        Nothing.
+        """
+
+        self.name                = name
+        self.url_home            = url_home
+        self.url_username_format = url_username_format
+        self.username_claimed    = username_claimed
+        self.username_unclaimed  = username_unclaimed
+        self.information         = information
+
+        return
+
+    def __str__(self):
+        """Convert Object To String.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Nicely formatted string to get information about this object.
+        """
+
+        return f"{self.name} ({self.url_home})"
+
+
+class SitesInformation():
+    def __init__(self, data_file_path=None):
+        """Create Sites Information Object.
+
+        Contains information about all supported web sites.
+
+        Keyword Arguments:
+        self                   -- This object.
+        data_file_path         -- String which indicates path to data file.
+                                  The file name must end in ".json".
+
+                                  There are 3 possible formats:
+                                   * Absolute File Format
+                                     For example, "c:/stuff/data.json".
+                                   * Relative File Format
+                                     The current working directory is used
+                                     as the context.
+                                     For example, "data.json".
+                                   * URL Format
+                                     For example,
+                                     "https://example.com/data.json", or
+                                     "http://example.com/data.json".
+
+                                  An exception will be thrown if the path
+                                  to the data file is not in the expected
+                                  format, or if there was any problem loading
+                                  the file.
+
+                                  If this option is not specified, then a
+                                  default site list will be used.
+
+        Return Value:
+        Nothing.
+        """
+
+        if data_file_path is None:
+            #Use internal default.
+            data_file_path = \
+                os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                             "resources/data.json"
+                            )
+
+        #Ensure that specified data file has correct extension.
+        if ".json" != data_file_path[-5:].lower():
+            raise FileNotFoundError(f"Incorrect JSON file extension for "
+                                    f"data file '{data_file_path}'."
+                                   )
+
+        if ( ("http://"  == data_file_path[:7].lower()) or
+             ("https://" == data_file_path[:8].lower())
+           ):
+            #Reference is to a URL.
+            try:
+                response = requests.get(url=data_file_path)
+            except Exception as error:
+                raise FileNotFoundError(f"Problem while attempting to access "
+                                        f"data file URL '{data_file_path}':  "
+                                        f"{str(error)}"
+                                       )
+            if response.status_code == 200:
+                try:
+                    site_data = response.json()
+                except Exception as error:
+                    raise ValueError(f"Problem parsing json contents at "
+                                     f"'{data_file_path}':  {str(error)}."
+                                    )
+            else:
+                raise FileNotFoundError(f"Bad response while accessing "
+                                        f"data file URL '{data_file_path}'."
+                                       )
+        else:
+            #Reference is to a file.
+            try:
+                with open(data_file_path, "r", encoding="utf-8") as file:
+                    try:
+                        site_data = json.load(file)
+                    except Exception as error:
+                        raise ValueError(f"Problem parsing json contents at "
+                                         f"'{data_file_path}':  {str(error)}."
+                                        )
+            except FileNotFoundError as error:
+                raise FileNotFoundError(f"Problem while attempting to access "
+                                        f"data file '{data_file_path}'."
+                                       )
+
+        self.sites = {}
+
+        #Add all of site information from the json file to internal site list.
+        for site_name in site_data:
+            try:
+                self.sites[site_name] = \
+                    SiteInformation(site_name,
+                                    site_data[site_name]["urlMain"],
+                                    site_data[site_name]["url"],
+                                    site_data[site_name]["username_claimed"],
+                                    site_data[site_name]["username_unclaimed"],
+                                    site_data[site_name]
+                                   )
+            except KeyError as error:
+                raise ValueError(f"Problem parsing json contents at "
+                                 f"'{data_file_path}':  "
+                                 f"Missing attribute {str(error)}."
+                                )
+
+        #Initialize state if anyone iterates over this object.
+        self.__iteration_index = 0
+
+        return
+
+    def __iter__(self):
+        """Iterator For Object.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Iterator for sites object.
+        """
+        return self
+
+    def __next__(self):
+        """Next Method For Object.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Returns individual site from beginning of self.sites dictionary
+        to the end.
+        Raises StopIteration when all sites have been passed.
+        """
+
+        if self.__iteration_index >= len(self.sites):
+            #Finished with iteration.
+            self.__iteration_index = 0
+            raise StopIteration
+        else:
+            #Retrieve the next site from the ordered dictionary.
+            site = self.sites[list(self.sites)[self.__iteration_index]]
+            self.__iteration_index += 1
+
+        return site
+
+    def __len__(self):
+        """Length For Object.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Length of sites object.
+        """
+        return len(self.sites)