Merge branch 'ptalmeida-Add-sorgin-by-alexa-rank-functionality'

pull/150/head
Yahya SayadArbabi 6 years ago
commit d6b7c0ac55

@ -43,7 +43,12 @@ optional arguments:
--version Display version information and dependencies.
--verbose, -v, -d, --debug
Display extra debugging information and metrics.
--quiet, -q Disable debugging information (Default Option).
--folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
If using multiple usernames, the output of the results
will be saved at this folder.
--output OUTPUT, -o OUTPUT
If using single username, the output of the result
will be saved at this file.
--tor, -t Make requests over TOR; increases runtime; requires
TOR to be installed and in system path.
--unique-tor, -u Make requests over TOR with new TOR circuit after each
@ -55,6 +60,9 @@ optional arguments:
--proxy PROXY_URL, -p PROXY_URL
Make requests over a proxy. e.g.
socks5://127.0.0.1:1080
--json JSON_FILE, -j JSON_FILE
Load data from a JSON file or an online, valid, JSON
file.
```
For example, run ```python3 sherlock.py user123```, and all of the accounts

@ -2,83 +2,98 @@
"500px": {
"errorMsg": "Sorry, no such page.",
"errorType": "message",
"rank": 2461,
"url": "https://500px.com/{}",
"urlMain": "https://500px.com/"
},
"9GAG": {
"errorType": "status_code",
"rank": 333,
"url": "https://9gag.com/u/{}",
"urlMain": "https://9gag.com/"
},
"About.me": {
"errorType": "status_code",
"rank": 12686,
"url": "https://about.me/{}",
"urlMain": "https://about.me/"
},
"Academia.edu": {
"errorMsg": "Page Not Found",
"errorType": "message",
"rank": 385,
"url": "https://independent.academia.edu/{}",
"urlMain": "https://www.academia.edu/"
},
"AngelList": {
"errorMsg": "We couldn't find what you were looking for.",
"errorType": "message",
"rank": 3469,
"url": "https://angel.co/{}",
"urlMain": "https://angel.co/"
},
"Aptoide": {
"errorType": "status_code",
"rank": 6107,
"url": "https://{}.en.aptoide.com/",
"urlMain": "https://en.aptoide.com/"
},
"AskFM": {
"errorType": "status_code",
"rank": 1109,
"url": "https://ask.fm/{}",
"urlMain": "https://ask.fm/"
},
"BLIP.fm": {
"errorMsg": "Page Not Found",
"errorType": "message",
"rank": 261919,
"url": "https://blip.fm/{}",
"urlMain": "https://blip.fm/"
},
"Badoo": {
"errorType": "status_code",
"rank": 949,
"url": "https://badoo.com/profile/{}",
"urlMain": "https://badoo.com/"
},
"Bandcamp": {
"errorMsg": "Sorry, that something isn\u2019t here",
"errorType": "message",
"rank": 573,
"url": "https://www.bandcamp.com/{}",
"urlMain": "https://www.bandcamp.com/"
},
"Basecamp": {
"errorMsg": "The account you were looking for doesn't exist",
"errorType": "message",
"rank": 1559,
"url": "https://{}.basecamphq.com",
"urlMain": "https://basecamp.com/"
},
"Behance": {
"errorMsg": "Oops! We can\u2019t find that page.",
"errorType": "message",
"rank": 394,
"url": "https://www.behance.net/{}",
"urlMain": "https://www.behance.net/"
},
"BitBucket": {
"errorType": "status_code",
"rank": 848,
"url": "https://bitbucket.org/{}",
"urlMain": "https://bitbucket.org/"
},
"BlackPlanet": {
"errorMsg": "My Hits",
"errorType": "message",
"rank": 107509,
"url": "http://blackplanet.com/{}",
"urlMain": "http://blackplanet.com/"
},
"Blogger": {
"errorType": "status_code",
"rank": 193,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.blogspot.com",
"urlMain": "https://www.blogger.com/"
@ -86,113 +101,133 @@
"BuzzFeed": {
"errorMsg": "We can't find the page you're looking for.",
"errorType": "message",
"rank": 294,
"url": "https://buzzfeed.com/{}",
"urlMain": "https://buzzfeed.com/"
},
"Canva": {
"errorMsg": "Not found (404)",
"errorType": "message",
"rank": 215,
"url": "https://www.canva.com/{}",
"urlMain": "https://www.canva.com/"
},
"Carbonmade": {
"errorMsg": "You've accidentally stumbled upon Mike's super secret nap grotto.",
"errorType": "message",
"rank": 32239,
"url": "https://{}.carbonmade.com",
"urlMain": "https://carbonmade.com/"
},
"CashMe": {
"errorType": "status_code",
"rank": 45066,
"url": "https://cash.me/{}",
"urlMain": "https://cash.me/"
},
"Cloob": {
"errorType": "status_code",
"rank": 8052,
"url": "https://www.cloob.com/name/{}",
"urlMain": "https://www.cloob.com/"
},
"Codecademy": {
"errorMsg": "404 error",
"errorType": "message",
"rank": 2314,
"url": "https://www.codecademy.com/{}",
"urlMain": "https://www.codecademy.com/"
},
"Codementor": {
"errorMsg": "404",
"errorType": "message",
"rank": 12456,
"url": "https://www.codementor.io/{}",
"urlMain": "https://www.codementor.io/"
},
"Codepen": {
"errorType": "status_code",
"rank": 863,
"url": "https://codepen.io/{}",
"urlMain": "https://codepen.io/"
},
"Coderwall": {
"errorMsg": "404! Our feels when that url is used",
"errorType": "message",
"rank": 17346,
"url": "https://coderwall.com/{}",
"urlMain": "https://coderwall.com/"
},
"ColourLovers": {
"errorMsg": "Page Not Loved",
"errorType": "message",
"rank": 30625,
"url": "https://www.colourlovers.com/love/{}",
"urlMain": "https://www.colourlovers.com/"
},
"Contently": {
"errorMsg": "We can't find that page!",
"errorType": "message",
"rank": 59032,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.contently.com/",
"urlMain": "https://contently.com/"
},
"Coroflot": {
"errorType": "status_code",
"rank": 37568,
"url": "https://www.coroflot.com/{}",
"urlMain": "https://coroflot.com/"
},
"CreativeMarket": {
"errorType": "response_url",
"errorUrl": "https://www.creativemarket.com/",
"rank": 1790,
"url": "https://creativemarket.com/{}",
"urlMain": "https://creativemarket.com/"
},
"Crevado": {
"errorType": "status_code",
"rank": 168903,
"url": "https://{}.crevado.com",
"urlMain": "https://crevado.com/"
},
"Crunchyroll": {
"errorType": "status_code",
"rank": 463,
"url": "https://www.crunchyroll.com/user/{}",
"urlMain": "https://www.crunchyroll.com/"
},
"DailyMotion": {
"errorType": "status_code",
"rank": 132,
"url": "https://www.dailymotion.com/{}",
"urlMain": "https://www.dailymotion.com/"
},
"Designspiration": {
"errorMsg": "Content Not Found",
"errorType": "message",
"rank": 24722,
"url": "https://www.designspiration.net/{}",
"urlMain": "https://www.designspiration.net/"
},
"DeviantART": {
"errorType": "status_code",
"rank": 185,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.deviantart.com",
"urlMain": "https://deviantart.com"
},
"Disqus": {
"errorType": "status_code",
"rank": 1311,
"url": "https://disqus.com/{}",
"urlMain": "https://disqus.com/"
},
"Dribbble": {
"errorMsg": "Whoops, that page is gone.",
"errorType": "message",
"rank": 937,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://dribbble.com/{}",
"urlMain": "https://dribbble.com/"
@ -200,67 +235,79 @@
"EVE Online": {
"errorType": "response_url",
"errorUrl": "https://eveonline.com",
"rank": 11655,
"url": "https://evewho.com/pilot/{}/",
"urlMain": "https://eveonline.com"
},
"Ebay": {
"errorMsg": "The User ID you entered was not found",
"errorType": "message",
"rank": 37,
"url": "https://www.ebay.com/usr/{}",
"urlMain": "https://www.ebay.com/"
},
"Ello": {
"errorMsg": "We couldn't find the page you're looking for",
"errorType": "message",
"rank": 29841,
"url": "https://ello.co/{}",
"urlMain": "https://ello.co/"
},
"Etsy": {
"errorType": "status_code",
"rank": 152,
"url": "https://www.etsy.com/shop/{}",
"urlMain": "https://www.etsy.com/"
},
"EyeEm": {
"errorType": "response_url",
"errorUrl": "https://www.eyeem.com/",
"rank": 33189,
"url": "https://www.eyeem.com/u/{}",
"urlMain": "https://www.eyeem.com/"
},
"Facebook": {
"errorType": "status_code",
"rank": 3,
"regexCheck": "^[a-zA-Z0-9]{4,49}(?<!.com|.org|.net)$",
"url": "https://www.facebook.com/{}",
"urlMain": "https://www.facebook.com/"
},
"Flickr": {
"errorType": "status_code",
"rank": 361,
"url": "https://www.flickr.com/people/{}",
"urlMain": "https://www.flickr.com/"
},
"Flipboard": {
"errorMsg": "loading",
"errorType": "message",
"rank": 4494,
"regexCheck": "^([a-zA-Z0-9_]){1,15}$",
"url": "https://flipboard.com/@{}",
"urlMain": "https://flipboard.com/"
},
"Fotolog": {
"errorType": "status_code",
"rank": 46852,
"url": "https://fotolog.com/{}",
"urlMain": "https://fotolog.com/"
},
"Foursquare": {
"errorType": "status_code",
"rank": 2281,
"url": "https://foursquare.com/{}",
"urlMain": "https://foursquare.com/"
},
"Giphy": {
"errorType": "status_code",
"rank": 575,
"url": "https://giphy.com/{}",
"urlMain": "https://giphy.com/"
},
"GitHub": {
"errorType": "status_code",
"rank": 58,
"regexCheck": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$",
"url": "https://www.github.com/{}",
"urlMain": "https://www.github.com/"
@ -268,190 +315,224 @@
"GitLab": {
"errorMsg": "You need to sign in or sign up before continuing.",
"errorType": "message",
"rank": 1764,
"url": "https://gitlab.com/{}",
"urlMain": "https://gitlab.com/"
},
"Gitee": {
"errorType": "status_code",
"rank": 3662,
"url": "https://gitee.com/{}",
"urlMain": "https://gitee.com/"
},
"GoodReads": {
"errorType": "status_code",
"rank": 384,
"url": "https://www.goodreads.com/{}",
"urlMain": "https://www.goodreads.com/"
},
"Google Plus": {
"errorType": "status_code",
"rank": 1,
"url": "https://plus.google.com/+{}",
"urlMain": "https://plus.google.com/"
},
"Gravatar": {
"errorType": "status_code",
"rank": 5621,
"url": "http://en.gravatar.com/{}",
"urlMain": "http://en.gravatar.com/"
},
"Gumroad": {
"errorMsg": "Page not found.",
"errorType": "message",
"rank": 4033,
"url": "https://www.gumroad.com/{}",
"urlMain": "https://www.gumroad.com/"
},
"HackerNews": {
"errorMsg": "No such user.",
"errorType": "message",
"rank": 3035,
"url": "https://news.ycombinator.com/user?id={}",
"urlMain": "https://news.ycombinator.com/"
},
"HackerOne": {
"errorMsg": "Page not found",
"errorType": "message",
"rank": 39153,
"url": "https://hackerone.com/{}",
"urlMain": "https://hackerone.com/"
},
"House-Mixes.com": {
"errorMsg": "Profile Not Found",
"errorType": "message",
"rank": 126875,
"url": "https://www.house-mixes.com/profile/{}",
"urlMain": "https://www.house-mixes.com/"
},
"Houzz": {
"errorMsg": "The page you requested was not found.",
"errorType": "message",
"rank": 2280,
"url": "https://houzz.com/user/{}",
"urlMain": "https://houzz.com/"
},
"HubPages": {
"errorType": "status_code",
"rank": 9948,
"url": "https://hubpages.com/@{}",
"urlMain": "https://hubpages.com/"
},
"IFTTT": {
"errorMsg": "The requested page or file does not exist",
"errorType": "message",
"rank": 4530,
"url": "https://www.ifttt.com/p/{}",
"urlMain": "https://www.ifttt.com/"
},
"ImageShack": {
"errorType": "response_url",
"errorUrl": "https://imageshack.us/",
"rank": 38014,
"url": "https://imageshack.us/user/{}",
"urlMain": "https://imageshack.us/"
},
"Imgur": {
"errorType": "status_code",
"rank": 63,
"url": "https://imgur.com/user/{}",
"urlMain": "https://imgur.com/"
},
"Instagram": {
"errorMsg": "The link you followed may be broken",
"errorType": "message",
"rank": 16,
"url": "https://www.instagram.com/{}",
"urlMain": "https://www.instagram.com/"
},
"Instructables": {
"errorMsg": "404: We're sorry, things break sometimes",
"errorType": "message",
"rank": 1011,
"url": "https://www.instructables.com/member/{}",
"urlMain": "https://www.instructables.com/"
},
"Issuu": {
"errorType": "status_code",
"rank": 901,
"url": "https://issuu.com/{}",
"urlMain": "https://issuu.com/"
},
"Itch.io": {
"errorType": "status_code",
"rank": 2293,
"url": "https://{}.itch.io/",
"urlMain": "https://itch.io/"
},
"Jimdo": {
"errorType": "status_code",
"noPeriod": "True",
"rank": 104890,
"url": "https://{}.jimdosite.com",
"urlMain": "https://jimdosite.com/"
},
"Kaggle": {
"errorType": "status_code",
"rank": 2714,
"url": "https://www.kaggle.com/{}",
"urlMain": "https://www.kaggle.com/"
},
"KanoWorld": {
"errorType": "status_code",
"rank": 72187,
"url": "https://api.kano.me/progress/user/{}",
"urlMain": "https://world.kano.me/"
},
"Keybase": {
"errorType": "status_code",
"rank": 101635,
"url": "https://keybase.io/{}",
"urlMain": "https://keybase.io/"
},
"Kik": {
"errorMsg": "The page you requested was not found",
"errorType": "message",
"rank": 323474,
"url": "https://ws2.kik.com/user/{}",
"urlMain": "http://kik.me/"
},
"Kongregate": {
"errorMsg": "Sorry, no account with that name was found.",
"errorType": "message",
"rank": 1955,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://www.kongregate.com/accounts/{}",
"urlMain": "https://www.kongregate.com/"
},
"Launchpad": {
"errorType": "status_code",
"rank": 7392,
"url": "https://launchpad.net/~{}",
"urlMain": "https://launchpad.net/"
},
"Letterboxd": {
"errorMsg": "Sorry, we can\u2019t find the page you\u2019ve requested.",
"errorType": "message",
"rank": 2590,
"url": "https://letterboxd.com/{}",
"urlMain": "https://letterboxd.com/"
},
"LiveJournal": {
"errorMsg": "Unknown Journal",
"errorType": "message",
"rank": 223,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.livejournal.com",
"urlMain": "https://www.livejournal.com/"
},
"Mastodon": {
"errorType": "status_code",
"rank": 978330,
"url": "https://mstdn.io/@{}",
"urlMain": "https://mstdn.io/"
},
"Medium": {
"errorType": "status_code",
"rank": 216,
"url": "https://medium.com/@{}",
"urlMain": "https://medium.com/"
},
"MeetMe": {
"errorType": "response_url",
"errorUrl": "https://www.meetme.com/",
"rank": 14535,
"url": "https://www.meetme.com/{}",
"urlMain": "https://www.meetme.com/"
},
"MixCloud": {
"errorMsg": "Page Not Found",
"errorType": "message",
"rank": 3187,
"url": "https://www.mixcloud.com/{}",
"urlMain": "https://www.mixcloud.com/"
},
"MyAnimeList": {
"errorType": "status_code",
"rank": 477,
"url": "https://myanimelist.net/profile/{}",
"urlMain": "https://myanimelist.net/"
},
"NameMC (Minecraft.net skins)": {
"errorMsg": "Profiles: 0 results",
"errorType": "message",
"rank": 5988,
"url": "https://namemc.com/profile/{}",
"urlMain": "https://namemc.com/"
},
"Newgrounds": {
"errorType": "status_code",
"rank": 2479,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.newgrounds.com",
"urlMain": "https://newgrounds.com"
@ -459,294 +540,346 @@
"Pastebin": {
"errorType": "response_url",
"errorUrl": "https://pastebin.com/index",
"rank": 1043,
"url": "https://pastebin.com/u/{}",
"urlMain": "https://pastebin.com/"
},
"Patreon": {
"errorType": "status_code",
"rank": 339,
"url": "https://www.patreon.com/{}",
"urlMain": "https://www.patreon.com/"
},
"Pexels": {
"errorMsg": "Ouch, something went wrong!",
"errorType": "message",
"rank": 614,
"url": "https://www.pexels.com/@{}",
"urlMain": "https://www.pexels.com/"
},
"Photobucket": {
"errorType": "status_code",
"rank": 3331,
"url": "https://photobucket.com/user/{}/library",
"urlMain": "https://photobucket.com/"
},
"Pinterest": {
"errorType": "response_url",
"errorUrl": "https://www.pinterest.com/?show_error=true",
"rank": 76,
"url": "https://www.pinterest.com/{}/",
"urlMain": "https://www.pinterest.com/"
},
"Pixabay": {
"errorType": "status_code",
"rank": 403,
"url": "https://pixabay.com/en/users/{}",
"urlMain": "https://pixabay.com/"
},
"Plug.DJ": {
"errorType": "status_code",
"rank": 34127,
"url": "https://plug.dj/@/{}",
"urlMain": "https://plug.dj/"
},
"ProductHunt": {
"errorMsg": "Product Hunt is a curation of the best new products",
"errorType": "message",
"rank": 4138,
"url": "https://www.producthunt.com/@{}",
"urlMain": "https://www.producthunt.com/"
},
"Quora": {
"errorType": "status_code",
"rank": 91,
"url": "https://www.quora.com/profile/{}",
"urlMain": "https://www.quora.com/"
},
"Rajce.net": {
"errorMsg": "410",
"errorType": "message",
"rank": 1187,
"url": "https://{}.rajce.idnes.cz/",
"urlMain": "https://www.rajce.idnes.cz/"
},
"Reddit": {
"errorMsg": "page not found",
"errorType": "message",
"rank": 17,
"url": "https://www.reddit.com/user/{}",
"urlMain": "https://www.reddit.com/"
},
"Repl.it": {
"errorMsg": "404",
"errorType": "message",
"rank": 8926,
"url": "https://repl.it/@{}",
"urlMain": "https://repl.it/"
},
"ReverbNation": {
"errorMsg": "Sorry, we couldn't find that page",
"errorType": "message",
"rank": 10610,
"url": "https://www.reverbnation.com/{}",
"urlMain": "https://www.reverbnation.com/"
},
"Roblox": {
"errorMsg": "Page cannot be found or no longer exists",
"errorType": "message",
"rank": 105,
"url": "https://www.roblox.com/user.aspx?username={}",
"urlMain": "https://www.roblox.com/"
},
"Scribd": {
"errorMsg": "Page not found",
"errorType": "message",
"rank": 206,
"url": "https://www.scribd.com/{}",
"urlMain": "https://www.scribd.com/"
},
"Slack": {
"errorType": "status_code",
"rank": 244,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.slack.com",
"urlMain": "https://slack.com"
},
"SlideShare": {
"errorType": "status_code",
"rank": 180,
"url": "https://slideshare.net/{}",
"urlMain": "https://slideshare.net/"
},
"Smashcast": {
"errorType": "status_code",
"rank": 101679,
"url": "https://www.smashcast.tv/api/media/live/{}",
"urlMain": "https://www.smashcast.tv/"
},
"SoundCloud": {
"errorType": "status_code",
"rank": 113,
"url": "https://soundcloud.com/{}",
"urlMain": "https://soundcloud.com/"
},
"SourceForge": {
"errorType": "status_code",
"rank": 368,
"url": "https://sourceforge.net/u/{}",
"urlMain": "https://sourceforge.net/"
},
"Spotify": {
"errorType": "status_code",
"rank": 106,
"url": "https://open.spotify.com/user/{}",
"urlMain": "https://open.spotify.com/"
},
"Star Citizen": {
"errorType": "status_code",
"rank": 4694,
"url": "https://robertsspaceindustries.com/citizens/{}",
"urlMain": "https://robertsspaceindustries.com/"
},
"Steam": {
"errorMsg": "The specified profile could not be found",
"errorType": "message",
"rank": 164,
"url": "https://steamcommunity.com/id/{}",
"urlMain": "https://steamcommunity.com/"
},
"StreamMe": {
"errorType": "status_code",
"rank": 21153,
"url": "https://www.stream.me/{}",
"urlMain": "https://www.stream.me/"
},
"Taringa": {
"errorMsg": "Lo que est\u00e1s buscando no est\u00e1 por aqu\u00ed.",
"errorType": "message",
"rank": 935,
"url": "https://www.taringa.net/{}",
"urlMain": "https://taringa.net/"
},
"Telegram": {
"errorMsg": "<meta property=\"twitter:title\" content=\"Telegram: Contact",
"errorType": "message",
"rank": 1032,
"url": "https://t.me/{}",
"urlMain": "https://t.me/"
},
"Tinder": {
"errorMsg": "Looking for Someone?",
"errorType": "message",
"rank": 1368,
"url": "https://www.gotinder.com/@{}",
"urlMain": "https://tinder.com/"
},
"TradingView": {
"errorType": "status_code",
"rank": 592,
"url": "https://www.tradingview.com/u/{}/",
"urlMain": "https://www.tradingview.com/"
},
"Trakt": {
"errorMsg": "404",
"errorType": "message",
"rank": 5944,
"url": "https://www.trakt.tv/users/{}",
"urlMain": "https://www.trakt.tv/"
},
"Trip": {
"errorMsg": "Page not found",
"errorType": "message",
"rank": 2956,
"url": "https://www.trip.skyscanner.com/user/{}",
"urlMain": "https://www.trip.skyscanner.com/"
},
"TripAdvisor": {
"errorMsg": "This page is on vacation\u2026",
"errorType": "message",
"rank": 283,
"url": "https://tripadvisor.com/members/{}",
"urlMain": "https://tripadvisor.com/"
},
"Twitter": {
"errorMsg": "page doesn\u2019t exist",
"errorType": "message",
"rank": 11,
"url": "https://www.twitter.com/{}",
"urlMain": "https://www.twitter.com/"
},
"Unsplash": {
"errorType": "status_code",
"rank": 598,
"url": "https://unsplash.com/@{}",
"urlMain": "https://unsplash.com/"
},
"VK": {
"errorType": "status_code",
"rank": 19,
"url": "https://vk.com/{}",
"urlMain": "https://vk.com/"
},
"VSCO": {
"errorType": "status_code",
"rank": 3414,
"url": "https://vsco.co/{}",
"urlMain": "https://vsco.co/"
},
"Venmo": {
"errorType": "status_code",
"rank": 5004,
"url": "https://venmo.com/{}",
"urlMain": "https://venmo.com/"
},
"Vimeo": {
"errorMsg": "404 Not Found",
"errorType": "message",
"rank": 143,
"url": "https://vimeo.com/{}",
"urlMain": "https://vimeo.com/"
},
"VirusTotal": {
"errorMsg": "not found",
"errorType": "message",
"rank": 4265,
"url": "https://www.virustotal.com/ui/users/{}/trusted_users",
"urlMain": "https://www.virustotal.com/"
},
"Wattpad": {
"errorMsg": "This page seems to be missing...",
"errorType": "message",
"rank": 516,
"url": "https://www.wattpad.com/user/{}",
"urlMain": "https://www.wattpad.com/"
},
"We Heart It": {
"errorMsg": "Oops! You've landed on a moving target!",
"errorType": "message",
"rank": 3415,
"url": "https://weheartit.com/{}",
"urlMain": "https://weheartit.com/"
},
"WebNode": {
"errorMsg": "Ztratili jste se?",
"errorType": "message",
"rank": 16094,
"url": "https://{}.webnode.cz/",
"urlMain": "https://www.webnode.cz/"
},
"Wikia": {
"errorMsg": "does not exist",
"errorType": "message",
"rank": 70,
"url": "https://wikia.com/wiki/User:{}",
"urlMain": "http://www.wikia.com/"
},
"Wikipedia": {
"errorMsg": "If a page was recently created here, it may not be visible yet because of a delay in updating the database",
"errorType": "message",
"rank": 5,
"url": "https://www.wikipedia.org/wiki/User:{}",
"urlMain": "https://www.wikipedia.org/"
},
"Wix": {
"errorType": "status_code",
"rank": 416,
"url": "https://{}.wix.com",
"urlMain": "https://wix.com/"
},
"WordPress": {
"errorType": "response_url",
"errorUrl": "wordpress.com/typo/?subdomain=",
"rank": 60,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.wordpress.com/",
"urlMain": "https://wordpress.com"
},
"YouPic": {
"errorType": "status_code",
"rank": 40894,
"url": "https://youpic.com/photographer/{}",
"urlMain": "https://youpic.com/"
},
"YouTube": {
"errorMsg": "Not Found",
"errorType": "message",
"rank": 2,
"url": "https://www.youtube.com/{}",
"urlMain": "https://www.youtube.com/"
},
"Younow": {
"errorMsg": "pageTitle || 'YouNow - Broadcast Live",
"errorType": "message",
"rank": 13432,
"url": "https://www.younow.com/{}",
"urlMain": "https://www.younow.com/"
},
"Zhihu": {
"errorMsg": "404",
"errorType": "message",
"rank": 85,
"url": "https://www.zhihu.com/people/{}",
"urlMain": "https://www.zhihu.com/"
},
"devRant": {
"errorType": "response_url",
"errorUrl": "https://devrant.com/",
"rank": 148214,
"url": "https://devrant.com/users/{}",
"urlMain": "https://devrant.com/"
},
"iMGSRC.RU": {
"errorType": "response_url",
"errorUrl": "https://imgsrc.ru/",
"rank": 3619,
"url": "https://imgsrc.ru/main/user.php?user={}",
"urlMain": "https://imgsrc.ru/"
},
"last.fm": {
"errorMsg": "Whoops! Sorry, but this page doesn't exist.",
"errorType": "message",
"rank": 1192,
"url": "https://last.fm/user/{}",
"urlMain": "https://last.fm/"
}

@ -1,4 +1,14 @@
beautifulsoup4
bs4
certifi
chardet
colorama
idna
lxml
PySocks
requests
requests_futures
requests-futures
soupsieve
stem
torrequest
colorama
urllib3

@ -346,6 +346,9 @@ def main():
action="store_true", dest="verbose", default=False,
help="Display extra debugging information and metrics."
)
parser.add_argument("--rank", "-r",
action="store_true", dest="rank", default=False,
help="Present websites ordered by their Alexa.com global rank in popularity.")
parser.add_argument("--folderoutput", "-fo", dest="folderoutput",
help="If using multiple usernames, the output of the results will be saved at this folder."
)
@ -464,6 +467,14 @@ def main():
f"Error: Desired sites not found: {', '.join(site_missing)}.")
sys.exit(1)
if args.rank:
# Sort data by rank
site_dataCpy = dict(site_data)
ranked_sites = sorted(site_data, key=lambda k: ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
site_data = {}
for site in ranked_sites:
site_data[site] = site_dataCpy.get(site)
# Run report on all specified users.
for username in args.username:
print()

@ -3,23 +3,54 @@
This module generates the listing of supported sites.
"""
import json
from collections import OrderedDict
import sys
import requests
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from bs4 import BeautifulSoup as bs
with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file)
def get_rank(domain_to_query):
result = -1
url = "http://www.alexa.com/siteinfo/" + domain_to_query
page = requests.get(url).text
soup = bs(page, features="lxml")
for span in soup.find_all('span'):
if span.has_attr("class"):
if "globleRank" in span["class"]:
for strong in span.find_all("strong"):
if strong.has_attr("class"):
if "metrics-data" in strong["class"]:
result = int(strong.text.strip().replace(',', ''))
return result
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
)
parser.add_argument("--rank","-r",
action="store_true", dest="rank", default=False,
help="Update all website ranks (not recommended)."
)
args = parser.parse_args()
with open("data.json", "w") as data_file:
data_file.write(sorted_json_data)
with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file)
with open("sites.md", "w") as site_file:
site_file.write(f'## List Of Supported Sites ({len(data)} Sites In Total!)\n')
data_length = len(data)
site_file.write(f'## List Of Supported Sites ({data_length} Sites In Total!)\n')
index = 1
for social_network in OrderedDict(sorted(data.items())):
for social_network in data:
url_main = data.get(social_network).get("urlMain")
site_file.write(f'{index}. [{social_network}]({url_main})\n')
if args.rank == True:
data.get(social_network)["rank"] = get_rank(url_main)
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush()
index = index + 1
print("Finished updating supported site listing!")
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
with open("data.json", "w") as data_file:
data_file.write(sorted_json_data)
sys.stdout.write("\r{0}".format(f"Finished updating supported site listing!\n"))
sys.stdout.flush()

Loading…
Cancel
Save