From 75aba053999b9ae7872027b909abf7ebcb7c9b2f Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 18:55:55 -0600 Subject: [PATCH 01/12] Convert Carbonmade to use Response URL detection. This is much more robust than the Error Message method. There are some odd things going on with this site... When I manually try certain user names, I get forwarded to other usernames. I suspect that there is some aliasing going on in the background. The detection for this site is probably not quite correct. But, it definitly works better than before. Add to tests. --- data.json | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/data.json b/data.json index 78349b0b..eb59be44 100644 --- a/data.json +++ b/data.json @@ -139,11 +139,13 @@ "username_unclaimed": "xgtrq" }, "Carbonmade": { - "errorMsg": "You've accidentally stumbled upon Mike's super secret nap grotto.", - "errorType": "message", + "errorType": "response_url", + "errorUrl": "https://carbonmade.com/fourohfour?domain={}.carbonmade.com", "rank": 31911, "url": "https://{}.carbonmade.com", - "urlMain": "https://carbonmade.com/" + "urlMain": "https://carbonmade.com/", + "username_claimed": "jenny", + "username_unclaimed": "noonewouldeverusethis7" }, "CashMe": { "errorType": "status_code", From 38661a95a0f508b440179c9cc2c115ba6e0d9763 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 18:56:36 -0600 Subject: [PATCH 02/12] Convert LiveJournal to use more reliable HTTP Status detection method. Add to tests. --- data.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/data.json b/data.json index eb59be44..3323d13d 100644 --- a/data.json +++ b/data.json @@ -624,12 +624,13 @@ "username_unclaimed": "noonewouldeverusethis7" }, "LiveJournal": { - "errorMsg": "Unknown Journal", - "errorType": "message", + "errorType": "status_code", "rank": 223, "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "url": "https://{}.livejournal.com", - "urlMain": "https://www.livejournal.com/" + "urlMain": "https://www.livejournal.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" }, "Mastodon": { "errorType": "status_code", From bf41b63c09c05207306dbec890301b24a869cbb2 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 19:01:04 -0600 Subject: [PATCH 03/12] Add Plug.DJ to tests. --- data.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data.json b/data.json index 3323d13d..1a5c09e9 100644 --- a/data.json +++ b/data.json @@ -747,7 +747,9 @@ "errorType": "status_code", "rank": 34208, "url": "https://plug.dj/@/{}", - "urlMain": "https://plug.dj/" + "urlMain": "https://plug.dj/", + "username_claimed": "plug-dj-rock", + "username_unclaimed": "noonewouldeverusethis7" }, "ProductHunt": { "errorMsg": "Product Hunt is a curation of the best new products", From bb95811936ba726f4d7e1e8a4f3a702713565929 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 19:54:15 -0600 Subject: [PATCH 04/12] Convert Quora to use Response URL detection. For some reason, a query against a known profile (e.g. https://www.quora.com/profile/Matt-Riggsby) would return a 403 HTTP Status. Add to tests. --- data.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/data.json b/data.json index 1a5c09e9..bbbafdc7 100644 --- a/data.json +++ b/data.json @@ -761,10 +761,13 @@ "username_unclaimed": "noonewouldeverusethis7" }, "Quora": { - "errorType": "status_code", + "errorType": "response_url", + "errorUrl": "https://www.quora.com/profile/{}", "rank": 89, "url": "https://www.quora.com/profile/{}", - "urlMain": "https://www.quora.com/" + "urlMain": "https://www.quora.com/", + "username_claimed": "Matt-Riggsby", + "username_unclaimed": "noonewouldeverusethis7" }, "Rajce.net": { "errorMsg": "410", From 1d7b76aabd319709c90b38c9d6b6424594c8d357 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:05:55 -0600 Subject: [PATCH 05/12] Convert VK to use Response URL detection. For some reason, a query against a known profile (e.g. https://vk.com/smith), would return a 418 HTTP Status. Which is odd, because this HTTP Status means "I'm a teapot". It was defined as an April Fools' joke in 1998. If I use the Response URL detection method, I am able to get around this issue. Add to tests. --- data.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/data.json b/data.json index bbbafdc7..3c3f0d12 100644 --- a/data.json +++ b/data.json @@ -993,10 +993,13 @@ "username_unclaimed": "noonewouldeverusethis7" }, "VK": { - "errorType": "status_code", + "errorType": "response_url", + "errorUrl": "https://www.quora.com/profile/{}", "rank": 15, "url": "https://vk.com/{}", - "urlMain": "https://vk.com/" + "urlMain": "https://vk.com/", + "username_claimed": "smith", + "username_unclaimed": "noonewouldeverusethis7" }, "VSCO": { "errorType": "status_code", From ff9470974e3ab258d9b9dbbb482d2c21064a2349 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:11:45 -0600 Subject: [PATCH 06/12] Convert WebNode to use more reliable HTTP Status detection. Add to tests. This site is slow. I sometimes get HTTP 504 errors when testing it. --- data.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/data.json b/data.json index 3c3f0d12..0e9206ef 100644 --- a/data.json +++ b/data.json @@ -1053,11 +1053,12 @@ "username_unclaimed": "noonewouldeverusethis7" }, "WebNode": { - "errorMsg": "Ztratili jste se?", - "errorType": "message", + "errorType": "status_code", "rank": 16178, "url": "https://{}.webnode.cz/", - "urlMain": "https://www.webnode.cz/" + "urlMain": "https://www.webnode.cz/", + "username_claimed": "radkabalcarova", + "username_unclaimed": "noonewouldeverusethis7" }, "Wikia": { "errorMsg": "does not exist", From d62674fdb9f356397fa3f45b23ba9ed4cdf2975c Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:19:00 -0600 Subject: [PATCH 07/12] Add Wix to tests. --- data.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data.json b/data.json index 0e9206ef..8f0efb9c 100644 --- a/data.json +++ b/data.json @@ -1082,7 +1082,9 @@ "errorType": "status_code", "rank": 416, "url": "https://{}.wix.com", - "urlMain": "https://wix.com/" + "urlMain": "https://wix.com/", + "username_claimed": "support", + "username_unclaimed": "noonewouldeverusethis7" }, "WordPress": { "errorType": "response_url", From 19d73ac118754205511221d2af546ce06fae4c29 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:23:01 -0600 Subject: [PATCH 08/12] Convert Zhihu to use Response URL detection. For some reason the site returns a 405 HTTP Status for a known user (e.g. https://www.zhihu.com/people/blue). Add to tests. --- data.json | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/data.json b/data.json index 8f0efb9c..45352470 100644 --- a/data.json +++ b/data.json @@ -1123,11 +1123,13 @@ "username_unclaimed": "noonewouldeverusethis7" }, "Zhihu": { - "errorMsg": "404", - "errorType": "message", + "errorType": "response_url", + "errorUrl": "https://www.zhihu.com/people/{}", "rank": 81, "url": "https://www.zhihu.com/people/{}", - "urlMain": "https://www.zhihu.com/" + "urlMain": "https://www.zhihu.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" }, "devRant": { "errorType": "response_url", From 5637ab88d9c29ec6c6627c37f689f25b21eebb60 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:29:19 -0600 Subject: [PATCH 09/12] Remove Fotolog site. I get a 502 HTTP error (bad gateway) for any access. It does not seem to be working, so there is no sense in including it in Sherlock. --- data.json | 6 ------ 1 file changed, 6 deletions(-) diff --git a/data.json b/data.json index 45352470..ed83ed47 100644 --- a/data.json +++ b/data.json @@ -361,12 +361,6 @@ "username_claimed": "blue", "username_unclaimed": "xgtrq" }, - "Fotolog": { - "errorType": "status_code", - "rank": 47777, - "url": "https://fotolog.com/{}", - "urlMain": "https://fotolog.com/" - }, "Foursquare": { "errorType": "status_code", "rank": 2286, From 9633aaba6778c97351d6a65053fa895f92e24fd3 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:32:35 -0600 Subject: [PATCH 10/12] Remove BlackPlanet site. This site has always returned a false positive. I looked into it and discovered that the site returns the exact same text for a claimed or an unclaimed username. The site must be rendering all of the different content using Javascript in the browser. So, there is no way distinguish between the results with the current design of Sherlock. --- data.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/data.json b/data.json index ed83ed47..5d205171 100644 --- a/data.json +++ b/data.json @@ -105,13 +105,6 @@ "username_claimed": "blue", "username_unclaimed": "noonewouldeverusethis7" }, - "BlackPlanet": { - "errorMsg": "My Hits", - "errorType": "message", - "rank": 110021, - "url": "http://blackplanet.com/{}", - "urlMain": "http://blackplanet.com/" - }, "Blogger": { "errorType": "status_code", "rank": 192, From 419c62f6753e4c23206ab8d49433d6e1f2bdee6b Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 9 Feb 2019 20:43:50 -0600 Subject: [PATCH 11/12] Update sites now that a couple had to be removed. Update rank as well (since the rank information gets purged from the JSON if you do not add this option). --- data.json | 2294 ++++++++++++++++++++++++++--------------------------- sites.md | 275 ++++--- 2 files changed, 1284 insertions(+), 1285 deletions(-) diff --git a/data.json b/data.json index 5d205171..bc949f94 100644 --- a/data.json +++ b/data.json @@ -1,1147 +1,1147 @@ -{ - "500px": { - "errorMsg": "Sorry, no such page.", - "errorType": "message", - "rank": 2521, - "url": "https://500px.com/{}", - "urlMain": "https://500px.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "9GAG": { - "errorType": "status_code", - "rank": 331, - "url": "https://9gag.com/u/{}", - "urlMain": "https://9gag.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "About.me": { - "errorType": "status_code", - "rank": 12674, - "url": "https://about.me/{}", - "urlMain": "https://about.me/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Academia.edu": { - "errorType": "status_code", - "rank": 383, - "url": "https://independent.academia.edu/{}", - "urlMain": "https://www.academia.edu/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "AngelList": { - "errorType": "status_code", - "rank": 3371, - "url": "https://angel.co/{}", - "urlMain": "https://angel.co/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Aptoide": { - "errorType": "status_code", - "rank": 6108, - "url": "https://{}.en.aptoide.com/", - "urlMain": "https://en.aptoide.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "AskFM": { - "errorType": "status_code", - "rank": 1121, - "url": "https://ask.fm/{}", - "urlMain": "https://ask.fm/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "BLIP.fm": { - "errorType": "status_code", - "rank": 282942, - "url": "https://blip.fm/{}", - "urlMain": "https://blip.fm/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Badoo": { - "errorType": "status_code", - "rank": 954, - "url": "https://badoo.com/profile/{}", - "urlMain": "https://badoo.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Bandcamp": { - "errorType": "status_code", - "rank": 568, - "url": "https://www.bandcamp.com/{}", - "urlMain": "https://www.bandcamp.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Basecamp": { - "errorMsg": "The account you were looking for doesn't exist", - "errorType": "message", - "rank": 1544, - "url": "https://{}.basecamphq.com", - "urlMain": "https://basecamp.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Behance": { - "errorType": "status_code", - "rank": 401, - "url": "https://www.behance.net/{}", - "urlMain": "https://www.behance.net/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "BitBucket": { - "errorType": "status_code", - "rank": 839, - "url": "https://bitbucket.org/{}/", - "urlMain": "https://bitbucket.org/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Blogger": { - "errorType": "status_code", - "rank": 192, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.blogspot.com", - "urlMain": "https://www.blogger.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "BuzzFeed": { - "errorType": "status_code", - "rank": 294, - "url": "https://buzzfeed.com/{}", - "urlMain": "https://buzzfeed.com/", - "username_claimed": "blue", - "username_unclaimed": "xgtrq" - }, - "Canva": { - "errorType": "response_url", - "errorUrl": "https://www.canva.com/{}", - "rank": 214, - "url": "https://www.canva.com/{}", - "urlMain": "https://www.canva.com/", - "username_claimed": "blue", - "username_unclaimed": "xgtrq" - }, - "Carbonmade": { - "errorType": "response_url", - "errorUrl": "https://carbonmade.com/fourohfour?domain={}.carbonmade.com", - "rank": 31911, - "url": "https://{}.carbonmade.com", - "urlMain": "https://carbonmade.com/", - "username_claimed": "jenny", - "username_unclaimed": "noonewouldeverusethis7" - }, - "CashMe": { - "errorType": "status_code", - "rank": 45615, - "url": "https://cash.me/{}", - "urlMain": "https://cash.me/", - "username_claimed": "jenny", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Cloob": { - "errorType": "status_code", - "rank": 8131, - "url": "https://www.cloob.com/name/{}", - "urlMain": "https://www.cloob.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Codecademy": { - "errorType": "status_code", - "rank": 2325, - "url": "https://www.codecademy.com/{}", - "urlMain": "https://www.codecademy.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Codementor": { - "errorType": "status_code", - "rank": 12164, - "url": "https://www.codementor.io/{}", - "urlMain": "https://www.codementor.io/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Codepen": { - "errorType": "status_code", - "rank": 852, - "url": "https://codepen.io/{}", - "urlMain": "https://codepen.io/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Coderwall": { - "errorMsg": "404! Our feels when that url is used", - "errorType": "message", - "rank": 17253, - "url": "https://coderwall.com/{}", - "urlMain": "https://coderwall.com/", - "username_claimed": "jenny", - "username_unclaimed": "noonewouldeverusethis7" - }, - "ColourLovers": { - "errorType": "status_code", - "rank": 30873, - "url": "https://www.colourlovers.com/lover/{}", - "urlMain": "https://www.colourlovers.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Contently": { - "errorMsg": "We can't find that page!", - "errorType": "message", - "rank": 57715, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.contently.com/", - "urlMain": "https://contently.com/", - "username_claimed": "jordanteicher", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Coroflot": { - "errorType": "status_code", - "rank": 38626, - "url": "https://www.coroflot.com/{}", - "urlMain": "https://coroflot.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "CreativeMarket": { - "errorType": "response_url", - "errorUrl": "https://www.creativemarket.com/", - "rank": 1790, - "url": "https://creativemarket.com/{}", - "urlMain": "https://creativemarket.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Crevado": { - "errorType": "status_code", - "rank": 170211, - "url": "https://{}.crevado.com", - "urlMain": "https://crevado.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Crunchyroll": { - "errorType": "status_code", - "rank": 447, - "url": "https://www.crunchyroll.com/user/{}", - "urlMain": "https://www.crunchyroll.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "DailyMotion": { - "errorType": "status_code", - "rank": 133, - "url": "https://www.dailymotion.com/{}", - "urlMain": "https://www.dailymotion.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Designspiration": { - "errorType": "status_code", - "rank": 24423, - "url": "https://www.designspiration.net/{}/", - "urlMain": "https://www.designspiration.net/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "DeviantART": { - "errorType": "status_code", - "rank": 186, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.deviantart.com", - "urlMain": "https://deviantart.com", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Disqus": { - "errorType": "status_code", - "rank": 1330, - "url": "https://disqus.com/{}", - "urlMain": "https://disqus.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Dribbble": { - "errorMsg": "Whoops, that page is gone.", - "errorType": "message", - "rank": 921, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://dribbble.com/{}", - "urlMain": "https://dribbble.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "EVE Online": { - "errorType": "response_url", - "errorUrl": "https://eveonline.com", - "rank": 11650, - "url": "https://evewho.com/pilot/{}/", - "urlMain": "https://eveonline.com", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Ebay": { - "errorMsg": "The User ID you entered was not found", - "errorType": "message", - "rank": 39, - "url": "https://www.ebay.com/usr/{}", - "urlMain": "https://www.ebay.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Ello": { - "errorMsg": "We couldn't find the page you're looking for", - "errorType": "message", - "rank": 28550, - "url": "https://ello.co/{}", - "urlMain": "https://ello.co/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Etsy": { - "errorType": "status_code", - "rank": 152, - "url": "https://www.etsy.com/shop/{}", - "urlMain": "https://www.etsy.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "EyeEm": { - "errorType": "response_url", - "errorUrl": "https://www.eyeem.com/", - "rank": 33324, - "url": "https://www.eyeem.com/u/{}", - "urlMain": "https://www.eyeem.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Facebook": { - "errorType": "status_code", - "rank": 3, - "regexCheck": "^[a-zA-Z0-9]{4,49}(? Date: Sat, 9 Feb 2019 23:34:09 -0600 Subject: [PATCH 12/12] Add test to ensure that all sites have test data. If they do not, then the person running the tests will be able to see the list of sites missing coverage. --- tests/all.py | 17 +++++++++++++++++ tests/base.py | 26 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tests/all.py b/tests/all.py index 1fdfec6b..064cd6d3 100644 --- a/tests/all.py +++ b/tests/all.py @@ -202,3 +202,20 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.detect_type_check("message", exist_check=True) return + + def test_coverage_total(self): + """Test Site Coverage Is Total. + + This test checks that all sites have test data available. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if we do not have total coverage. + """ + + self.coverage_total_check() + + return diff --git a/tests/base.py b/tests/base.py index e3b18970..ff4c5416 100644 --- a/tests/base.py +++ b/tests/base.py @@ -166,3 +166,29 @@ class SherlockBaseTest(unittest.TestCase): ) return + + def coverage_total_check(self): + """Total Coverage Check. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Counts up all Sites with full test data available. + Will trigger an assert if any Site does not have test coverage. + """ + + site_no_tests_list = [] + + for site, site_data in self.site_data_all.items(): + if ( + (site_data.get("username_claimed") is None) or + (site_data.get("username_unclaimed") is None) + ): + # Test information not available on this site. + site_no_tests_list.append(site) + + self.assertEqual("", ", ".join(site_no_tests_list)) + + return