From 198798d28bec5f3a60232d7cca545102eb80caf5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 00:09:00 -0400 Subject: [PATCH] Add basic schema --- sherlock/resources/data.json | 11 ++---- sherlock/resources/data.schema.json | 60 +++++++++++++++++++++++++++++ sherlock/sites.py | 5 +++ site_list.py | 8 +++- 4 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 sherlock/resources/data.schema.json diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index d698d52..31c29a3 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -1,4 +1,5 @@ { + "$schema": "data.schema.json", "1337x": { "errorMsg": [ "Error something went wrong.", @@ -479,8 +480,7 @@ "url": "https://codeforces.com/profile/{}", "urlMain": "https://codeforces.com/", "urlProbe": "https://codeforces.com/api/user.info?handles={}", - "username_claimed": "tourist", - "username_unclaimed": "noonewouldeverusethis7" + "username_claimed": "tourist" }, "Codepen": { "errorType": "status_code", @@ -1072,8 +1072,7 @@ "errorUrl": "https://irc-galleria.net/users/search?username={}", "url": "https://irc-galleria.net/user/{}", "urlMain": "https://irc-galleria.net/", - "username_claimed": "appas", - "username_unclaimed": "noonewouldeverusethis77" + "username_claimed": "appas" }, "Icons8 Community": { "errorType": "status_code", @@ -1160,7 +1159,6 @@ }, "Jimdo": { "errorType": "status_code", - "noPeriod": "True", "regexCheck": "^[a-zA-Z0-9@_-]$", "url": "https://{}.jimdosite.com", "urlMain": "https://jimdosite.com/", @@ -1334,8 +1332,7 @@ "url": "https://monkeytype.com/profile/{}", "urlMain": "https://monkeytype.com/", "urlProbe": "https://api.monkeytype.com/users/{}/profile", - "username_claimed": "Lost_Arrow", - "username_unclaimed": "noonewouldeverusethis7" + "username_claimed": "Lost_Arrow" }, "Motherless": { "errorMsg": "no longer a member", diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json new file mode 100644 index 0000000..5f08541 --- /dev/null +++ b/sherlock/resources/data.schema.json @@ -0,0 +1,60 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Targets", + "description": "Social media target to probe for existence of usernames", + "type": "object", + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "required": [ "url", "urlMain", "errorType", "username_claimed" ], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "tags": { + "oneOf": [ + { + "type": "string", + "enum": [ "adult", "gaming" ] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": [ "adult", "gaming" ] + } + } + ] + }, + "request_method": { + "type": "string", + "enum": [ "GET", "POST", "HEAD" ] + }, + "errorType": { + "type": "string", + "enum": [ "message", "response_url", "status_code" ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" } + }, + "additionalProperties": false + } + } +} diff --git a/sherlock/sites.py b/sherlock/sites.py index 9bef100..009a05c 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -152,6 +152,11 @@ class SitesInformation: raise FileNotFoundError(f"Problem while attempting to access " f"data file '{data_file_path}'." ) + + try: + site_data.pop('$schema') + except: + pass self.sites = {} diff --git a/site_list.py b/site_list.py index 58a9d68..b66010d 100644 --- a/site_list.py +++ b/site_list.py @@ -5,10 +5,14 @@ import json # Read the data.json file with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file: - data = json.load(data_file) + data: dict = json.load(data_file) + +# Removes schema-specific keywords for proper processing +social_networks: dict = dict(data) +social_networks.pop('$schema') # Sort the social networks in alphanumeric order -social_networks = sorted(data.items()) +social_networks: list = sorted(social_networks.items()) # Write the list of supported sites to sites.md with open("sites.md", "w") as site_file: