You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
sherlock/site_list.py

58 lines
2.0 KiB

"""Sherlock: Supported Site Listing
This module generates the listing of supported sites.
"""
import json
import sys
import requests
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from bs4 import BeautifulSoup as bs
6 years ago
from collections import OrderedDict
def get_rank(domain_to_query):
result = -1
url = "http://www.alexa.com/siteinfo/" + domain_to_query
page = requests.get(url).text
soup = bs(page, features="lxml")
for span in soup.find_all('span'):
if span.has_attr("class"):
if "globleRank" in span["class"]:
for strong in span.find_all("strong"):
if strong.has_attr("class"):
if "metrics-data" in strong["class"]:
result = int(strong.text.strip().replace(',', ''))
return result
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
)
parser.add_argument("--rank","-r",
action="store_true", dest="rank", default=False,
help="Update all website ranks (not recommended)."
)
args = parser.parse_args()
with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file)
with open("sites.md", "w") as site_file:
data_length = len(data)
site_file.write(f'## List Of Supported Sites ({data_length} Sites In Total!)\n')
index = 1
for social_network in data:
url_main = data.get(social_network).get("urlMain")
site_file.write(f'{index}. [{social_network}]({url_main})\n')
if args.rank == True:
data.get(social_network)["rank"] = get_rank(url_main)
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush()
index = index + 1
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
with open("data.json", "w") as data_file:
data_file.write(sorted_json_data)
sys.stdout.write("\r{0}".format(f"Finished updating supported site listing!\n"))
sys.stdout.flush()