add rank paramether to site_list.py

--rank or -r to update all page ranks
pull/146/head
ptalmeida 5 years ago
parent ad4c321f3f
commit 40fc51fc32

@ -5,6 +5,7 @@ This module generates the listing of supported sites.
import json
import sys
import requests
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from bs4 import BeautifulSoup as bs
from datetime import datetime
from collections import OrderedDict
@ -15,7 +16,7 @@ def get_rank(domain_to_query):
page = requests.get(url).text
soup = bs(page, features="lxml")
for span in soup.find_all('span'):
if span.has_attr("class"):
if span.has_attr("class"):
if "globleRank" in span["class"]:
for strong in span.find_all("strong"):
if strong.has_attr("class"):
@ -23,6 +24,14 @@ def get_rank(domain_to_query):
result = int(strong.text.strip().replace(',', ''))
return result
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
)
parser.add_argument("--rank","-r",
action="store_true", dest="rank", default=False,
help="Update all website ranks (not recommended)."
)
args = parser.parse_args()
with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file)
@ -34,13 +43,12 @@ with open("sites.md", "w") as site_file:
for social_network in data:
url_main = data.get(social_network).get("urlMain")
site_file.write(f'{index}. [{social_network}]({url_main})\n')
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush()
data.get(social_network)["rank"] = get_rank(url_main)
if args.rank == True:
data.get(social_network)["rank"] = get_rank(url_main)
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush()
index = index + 1
site_file.write(f'\nAlexa.com rank data fetched at {datetime.utcnow()} UTC\n')
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
with open("data.json", "w") as data_file:

@ -132,5 +132,3 @@
131. [devRant](https://devrant.com/)
132. [iMGSRC.RU](https://imgsrc.ru/)
133. [last.fm](https://last.fm/)
Alexa.com rank data fetched at 2019-01-24 10:58:49.318475 UTC

Loading…
Cancel
Save