add rank paramether to site_list.py

--rank or -r to update all page ranks
pull/146/head
ptalmeida 6 years ago
parent ad4c321f3f
commit 40fc51fc32

@ -5,6 +5,7 @@ This module generates the listing of supported sites.
import json import json
import sys import sys
import requests import requests
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
from datetime import datetime from datetime import datetime
from collections import OrderedDict from collections import OrderedDict
@ -15,7 +16,7 @@ def get_rank(domain_to_query):
page = requests.get(url).text page = requests.get(url).text
soup = bs(page, features="lxml") soup = bs(page, features="lxml")
for span in soup.find_all('span'): for span in soup.find_all('span'):
if span.has_attr("class"): if span.has_attr("class"):
if "globleRank" in span["class"]: if "globleRank" in span["class"]:
for strong in span.find_all("strong"): for strong in span.find_all("strong"):
if strong.has_attr("class"): if strong.has_attr("class"):
@ -23,6 +24,14 @@ def get_rank(domain_to_query):
result = int(strong.text.strip().replace(',', '')) result = int(strong.text.strip().replace(',', ''))
return result return result
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
)
parser.add_argument("--rank","-r",
action="store_true", dest="rank", default=False,
help="Update all website ranks (not recommended)."
)
args = parser.parse_args()
with open("data.json", "r", encoding="utf-8") as data_file: with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file) data = json.load(data_file)
@ -34,13 +43,12 @@ with open("sites.md", "w") as site_file:
for social_network in data: for social_network in data:
url_main = data.get(social_network).get("urlMain") url_main = data.get(social_network).get("urlMain")
site_file.write(f'{index}. [{social_network}]({url_main})\n') site_file.write(f'{index}. [{social_network}]({url_main})\n')
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries")) if args.rank == True:
sys.stdout.flush() data.get(social_network)["rank"] = get_rank(url_main)
data.get(social_network)["rank"] = get_rank(url_main) sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush()
index = index + 1 index = index + 1
site_file.write(f'\nAlexa.com rank data fetched at {datetime.utcnow()} UTC\n')
sorted_json_data = json.dumps(data, indent=2, sort_keys=True) sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
with open("data.json", "w") as data_file: with open("data.json", "w") as data_file:

@ -132,5 +132,3 @@
131. [devRant](https://devrant.com/) 131. [devRant](https://devrant.com/)
132. [iMGSRC.RU](https://imgsrc.ru/) 132. [iMGSRC.RU](https://imgsrc.ru/)
133. [last.fm](https://last.fm/) 133. [last.fm](https://last.fm/)
Alexa.com rank data fetched at 2019-01-24 10:58:49.318475 UTC

Loading…
Cancel
Save