From 3db3f4558bd94f6f088a76f47eb21dc869fe1b56 Mon Sep 17 00:00:00 2001 From: Avinash Shenoy Date: Sun, 27 Jan 2019 15:20:45 +0530 Subject: [PATCH] Parallelized updating alexa ranking --- site_list.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/site_list.py b/site_list.py index cf8f414d..1d6b2214 100644 --- a/site_list.py +++ b/site_list.py @@ -7,6 +7,7 @@ import requests import threading from bs4 import BeautifulSoup as bs from datetime import datetime +from argparse import ArgumentParser, RawDescriptionHelpFormatter pool = list() @@ -22,7 +23,15 @@ def get_rank(domain_to_query, dest): if strong.has_attr("class"): if "metrics-data" in strong["class"]: result = int(strong.text.strip().replace(',', '')) - dest = result + dest['rank'] = result + +parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter + ) +parser.add_argument("--rank","-r", + action="store_true", dest="rank", default=False, + help="Update all website ranks (not recommended)." + ) +args = parser.parse_args() with open("data.json", "r", encoding="utf-8") as data_file: data = json.load(data_file) @@ -33,19 +42,26 @@ with open("sites.md", "w") as site_file: for social_network in data: url_main = data.get(social_network).get("urlMain") - th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)["rank"])) - pool.append((url_main, th)) - th.start() + data.get(social_network)["rank"] = 0 + if args.rank: + th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network))) + else: + th = None + pool.append((url_main, url_main, th)) + if args.rank: + th.start() index = 1 - for social_network, th in pool: - th.join() + for social_network, url_main, th in pool: + if args.rank: + th.join() site_file.write(f'{index}. [{social_network}]({url_main})\n') sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries")) sys.stdout.flush() index = index + 1 - site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n') + if args.rank: + site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n') sorted_json_data = json.dumps(data, indent=2, sort_keys=True)