Parallelized updating alexa ranking

pull/153/head
Avinash Shenoy 6 years ago
parent 1442f333c2
commit 3db3f4558b
No known key found for this signature in database
GPG Key ID: 9D8D61724A8B92DE

@ -7,6 +7,7 @@ import requests
import threading import threading
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
from datetime import datetime from datetime import datetime
from argparse import ArgumentParser, RawDescriptionHelpFormatter
pool = list() pool = list()
@ -22,7 +23,15 @@ def get_rank(domain_to_query, dest):
if strong.has_attr("class"): if strong.has_attr("class"):
if "metrics-data" in strong["class"]: if "metrics-data" in strong["class"]:
result = int(strong.text.strip().replace(',', '')) result = int(strong.text.strip().replace(',', ''))
dest = result dest['rank'] = result
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
)
parser.add_argument("--rank","-r",
action="store_true", dest="rank", default=False,
help="Update all website ranks (not recommended)."
)
args = parser.parse_args()
with open("data.json", "r", encoding="utf-8") as data_file: with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file) data = json.load(data_file)
@ -33,19 +42,26 @@ with open("sites.md", "w") as site_file:
for social_network in data: for social_network in data:
url_main = data.get(social_network).get("urlMain") url_main = data.get(social_network).get("urlMain")
th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)["rank"])) data.get(social_network)["rank"] = 0
pool.append((url_main, th)) if args.rank:
th.start() th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)))
else:
th = None
pool.append((url_main, url_main, th))
if args.rank:
th.start()
index = 1 index = 1
for social_network, th in pool: for social_network, url_main, th in pool:
th.join() if args.rank:
th.join()
site_file.write(f'{index}. [{social_network}]({url_main})\n') site_file.write(f'{index}. [{social_network}]({url_main})\n')
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries")) sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush() sys.stdout.flush()
index = index + 1 index = index + 1
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n') if args.rank:
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
sorted_json_data = json.dumps(data, indent=2, sort_keys=True) sorted_json_data = json.dumps(data, indent=2, sort_keys=True)

Loading…
Cancel
Save