From f609320d3c5306ee6ceae2a62b661e626a83b2c1 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:25:05 -0600 Subject: [PATCH 01/13] Convert Canva to the more robust Response URL detection method. Add to tests to ensure that it is covered. --- data.json | 6 +++--- tests/all.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/data.json b/data.json index 4858d42a..186e2cdf 100644 --- a/data.json +++ b/data.json @@ -105,9 +105,9 @@ "url": "https://buzzfeed.com/{}", "urlMain": "https://buzzfeed.com/" }, - "Canva": { - "errorMsg": "Not found (404)", - "errorType": "message", + "Canva": { + "errorType": "response_url", + "errorUrl": "https://www.canva.com/{}", "rank": 215, "url": "https://www.canva.com/{}", "urlMain": "https://www.canva.com/" diff --git a/tests/all.py b/tests/all.py index c486c608..15f8efac 100644 --- a/tests/all.py +++ b/tests/all.py @@ -112,7 +112,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Pinterest", "iMGSRC.RU", "Pastebin", "WordPress", "devRant", "ImageShack", "MeetMe", - "EyeEm", "CreativeMarket", "EVE Online" + "EyeEm", "CreativeMarket", "EVE Online", "Canva" ], exist_check=False ) @@ -136,7 +136,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Pinterest", "iMGSRC.RU", "Pastebin", "WordPress", "devRant", "ImageShack", "MeetMe", - "EyeEm", "CreativeMarket", "EVE Online" + "EyeEm", "CreativeMarket", "EVE Online", "Canva" ], exist_check=True ) From bd941c8034cd7c2b28d1324377a66186963e9ff7 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:44:09 -0600 Subject: [PATCH 02/13] Convert Academia.edu to use the Status Code detection method. The site gives a clean 404 error. --- data.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data.json b/data.json index 186e2cdf..0908a7cb 100644 --- a/data.json +++ b/data.json @@ -19,8 +19,7 @@ "urlMain": "https://about.me/" }, "Academia.edu": { - "errorMsg": "Page Not Found", - "errorType": "message", + "errorType": "status_code", "rank": 385, "url": "https://independent.academia.edu/{}", "urlMain": "https://www.academia.edu/" From 89787b15099d23c572f196f857027a4f855491ef Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:45:36 -0600 Subject: [PATCH 03/13] Add test methods for HTTP Status detection method as well. --- tests/all.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/all.py b/tests/all.py index 15f8efac..0959ca03 100644 --- a/tests/all.py +++ b/tests/all.py @@ -142,3 +142,47 @@ class SherlockSiteCoverageTests(SherlockBaseTest): ) return + + def test_coverage_false_via_status(self): + """Test Username Does Not Exist Site Coverage (Via HTTP Status). + + This test checks all sites with the "HTTP Status" detection mechanism + to ensure that a Username that does not exist is reported that way. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if detection mechanism did not work as expected. + """ + + self.username_check(['noonewouldeverusethis7'], + ["Academia.edu", "9GAG", "About.me" + ], + exist_check=False + ) + + return + + def test_coverage_true_via_status(self): + """Test Username Does Exist Site Coverage (Via HTTP Status). + + This test checks all sites with the "HTTP Status" detection mechanism + to ensure that a Username that does exist is reported that way. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if detection mechanism did not work as expected. + """ + + self.username_check(['blue'], + ["Academia.edu", "9GAG", "About.me" + ], + exist_check=True + ) + + return From 8a82d883c62dfbe186db120ffde778cb078c63e3 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:49:04 -0600 Subject: [PATCH 04/13] Convert AngelList to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index 0908a7cb..d47cc5ce 100644 --- a/data.json +++ b/data.json @@ -25,8 +25,7 @@ "urlMain": "https://www.academia.edu/" }, "AngelList": { - "errorMsg": "We couldn't find what you were looking for.", - "errorType": "message", + "errorType": "status_code", "rank": 3469, "url": "https://angel.co/{}", "urlMain": "https://angel.co/" diff --git a/tests/all.py b/tests/all.py index 0959ca03..107ca473 100644 --- a/tests/all.py +++ b/tests/all.py @@ -158,7 +158,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): """ self.username_check(['noonewouldeverusethis7'], - ["Academia.edu", "9GAG", "About.me" + ["Academia.edu", "9GAG", "About.me", "AngelList" ], exist_check=False ) @@ -180,7 +180,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): """ self.username_check(['blue'], - ["Academia.edu", "9GAG", "About.me" + ["Academia.edu", "9GAG", "About.me", "AngelList" ], exist_check=True ) From c76b4524da3744b87e115acdccd46bcde773c59b Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:51:46 -0600 Subject: [PATCH 05/13] Convert BLIP.fm to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 6 ++++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index d47cc5ce..42404264 100644 --- a/data.json +++ b/data.json @@ -43,8 +43,7 @@ "urlMain": "https://ask.fm/" }, "BLIP.fm": { - "errorMsg": "Page Not Found", - "errorType": "message", + "errorType": "status_code", "rank": 261919, "url": "https://blip.fm/{}", "urlMain": "https://blip.fm/" diff --git a/tests/all.py b/tests/all.py index 107ca473..a299bd38 100644 --- a/tests/all.py +++ b/tests/all.py @@ -158,7 +158,8 @@ class SherlockSiteCoverageTests(SherlockBaseTest): """ self.username_check(['noonewouldeverusethis7'], - ["Academia.edu", "9GAG", "About.me", "AngelList" + ["Academia.edu", "9GAG", "About.me", "AngelList", + "BLIP.fm" ], exist_check=False ) @@ -180,7 +181,8 @@ class SherlockSiteCoverageTests(SherlockBaseTest): """ self.username_check(['blue'], - ["Academia.edu", "9GAG", "About.me", "AngelList" + ["Academia.edu", "9GAG", "About.me", "AngelList", + "BLIP.fm" ], exist_check=True ) From 65e382060882fb08934ea6b6d2c8043e57443ec8 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:54:22 -0600 Subject: [PATCH 06/13] Convert Bandcamp to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index 42404264..b2892af2 100644 --- a/data.json +++ b/data.json @@ -55,8 +55,7 @@ "urlMain": "https://badoo.com/" }, "Bandcamp": { - "errorMsg": "Sorry, that something isn\u2019t here", - "errorType": "message", + "errorType": "status_code", "rank": 573, "url": "https://www.bandcamp.com/{}", "urlMain": "https://www.bandcamp.com/" diff --git a/tests/all.py b/tests/all.py index a299bd38..b8bf309c 100644 --- a/tests/all.py +++ b/tests/all.py @@ -159,7 +159,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm" + "BLIP.fm", "Bandcamp" ], exist_check=False ) @@ -182,7 +182,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm" + "BLIP.fm", "Bandcamp" ], exist_check=True ) From 08ac008828b5101e1621ca46e3689a46342c6c82 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 17:58:14 -0600 Subject: [PATCH 07/13] Convert Behance to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index b2892af2..33f2798c 100644 --- a/data.json +++ b/data.json @@ -68,8 +68,7 @@ "urlMain": "https://basecamp.com/" }, "Behance": { - "errorMsg": "Oops! We can\u2019t find that page.", - "errorType": "message", + "errorType": "status_code", "rank": 394, "url": "https://www.behance.net/{}", "urlMain": "https://www.behance.net/" diff --git a/tests/all.py b/tests/all.py index b8bf309c..2758e633 100644 --- a/tests/all.py +++ b/tests/all.py @@ -159,7 +159,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm", "Bandcamp" + "BLIP.fm", "Bandcamp", "Behance" ], exist_check=False ) @@ -182,7 +182,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm", "Bandcamp" + "BLIP.fm", "Bandcamp", "Behance" ], exist_check=True ) From 223d9716cbfa45291d0d1365b154ab76c6b8d1ee Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 18:04:24 -0600 Subject: [PATCH 08/13] Convert BuzzFeed to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index 33f2798c..e40a97df 100644 --- a/data.json +++ b/data.json @@ -94,8 +94,7 @@ "urlMain": "https://www.blogger.com/" }, "BuzzFeed": { - "errorMsg": "We can't find the page you're looking for.", - "errorType": "message", + "errorType": "status_code", "rank": 294, "url": "https://buzzfeed.com/{}", "urlMain": "https://buzzfeed.com/" diff --git a/tests/all.py b/tests/all.py index 2758e633..395945d2 100644 --- a/tests/all.py +++ b/tests/all.py @@ -159,7 +159,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm", "Bandcamp", "Behance" + "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed" ], exist_check=False ) @@ -182,7 +182,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm", "Bandcamp", "Behance" + "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed" ], exist_check=True ) From 110b93a757439645f5e5cbc278c9e77a5a0fdabc Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 18:07:29 -0600 Subject: [PATCH 09/13] Convert Codecademy to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 6 ++++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index e40a97df..57dfe48e 100644 --- a/data.json +++ b/data.json @@ -126,8 +126,7 @@ "urlMain": "https://www.cloob.com/" }, "Codecademy": { - "errorMsg": "404 error", - "errorType": "message", + "errorType": "status_code", "rank": 2314, "url": "https://www.codecademy.com/{}", "urlMain": "https://www.codecademy.com/" diff --git a/tests/all.py b/tests/all.py index 395945d2..d32d1f02 100644 --- a/tests/all.py +++ b/tests/all.py @@ -159,7 +159,8 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed" + "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed", + "Codecademy" ], exist_check=False ) @@ -182,7 +183,8 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Academia.edu", "9GAG", "About.me", "AngelList", - "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed" + "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed", + "Codecademy" ], exist_check=True ) From 26ef2e1b9bc6b7162f3e6f196f68455585673393 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 18:10:06 -0600 Subject: [PATCH 10/13] Convert Codementor to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 3 +-- tests/all.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data.json b/data.json index 57dfe48e..0ab091b6 100644 --- a/data.json +++ b/data.json @@ -132,8 +132,7 @@ "urlMain": "https://www.codecademy.com/" }, "Codementor": { - "errorMsg": "404", - "errorType": "message", + "errorType": "status_code", "rank": 12456, "url": "https://www.codementor.io/{}", "urlMain": "https://www.codementor.io/" diff --git a/tests/all.py b/tests/all.py index d32d1f02..d85b0ffc 100644 --- a/tests/all.py +++ b/tests/all.py @@ -160,7 +160,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Academia.edu", "9GAG", "About.me", "AngelList", "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed", - "Codecademy" + "Codecademy", "Codementor" ], exist_check=False ) @@ -184,7 +184,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Academia.edu", "9GAG", "About.me", "AngelList", "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed", - "Codecademy" + "Codecademy", "Codementor" ], exist_check=True ) From 6fc5c131dbf8dd4e8406db714e16492060daee0f Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 18:15:42 -0600 Subject: [PATCH 11/13] Convert Designspiration to use the Status Code detection method. The site gives a clean 404 error. Add to tests. --- data.json | 5 ++--- tests/all.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/data.json b/data.json index 0ab091b6..1ea77c5f 100644 --- a/data.json +++ b/data.json @@ -197,10 +197,9 @@ "urlMain": "https://www.dailymotion.com/" }, "Designspiration": { - "errorMsg": "Content Not Found", - "errorType": "message", + "errorType": "status_code", "rank": 24722, - "url": "https://www.designspiration.net/{}", + "url": "https://www.designspiration.net/{}/", "urlMain": "https://www.designspiration.net/" }, "DeviantART": { diff --git a/tests/all.py b/tests/all.py index d85b0ffc..c76ff2aa 100644 --- a/tests/all.py +++ b/tests/all.py @@ -160,7 +160,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['noonewouldeverusethis7'], ["Academia.edu", "9GAG", "About.me", "AngelList", "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed", - "Codecademy", "Codementor" + "Codecademy", "Codementor", "Designspiration" ], exist_check=False ) @@ -184,7 +184,7 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.username_check(['blue'], ["Academia.edu", "9GAG", "About.me", "AngelList", "BLIP.fm", "Bandcamp", "Behance", "BuzzFeed", - "Codecademy", "Codementor" + "Codecademy", "Codementor", "Designspiration" ], exist_check=True ) From 83aed9aeeed4b6d3c3cf524c66b56bd4e770ade5 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 18:24:28 -0600 Subject: [PATCH 12/13] Add test methods for Error Message detection method as well. Add Dribbble to tests. --- tests/all.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/all.py b/tests/all.py index c76ff2aa..e21e960f 100644 --- a/tests/all.py +++ b/tests/all.py @@ -190,3 +190,47 @@ class SherlockSiteCoverageTests(SherlockBaseTest): ) return + + def test_coverage_false_via_message(self): + """Test Username Does Not Exist Site Coverage (Via Error Message). + + This test checks all sites with the "Error Message" detection mechanism + to ensure that a Username that does not exist is reported that way. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if detection mechanism did not work as expected. + """ + + self.username_check(['noonewouldeverusethis7'], + ["Dribbble" + ], + exist_check=False + ) + + return + + def test_coverage_true_via_message(self): + """Test Username Does Exist Site Coverage (Via Error Message). + + This test checks all sites with the "Error Message" detection mechanism + to ensure that a Username that does exist is reported that way. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if detection mechanism did not work as expected. + """ + + self.username_check(['blue'], + ["Dribbble" + ], + exist_check=True + ) + + return From 7fb6d26cc74dbfefc2f8de4833a08b5a7c970c45 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Wed, 30 Jan 2019 18:37:03 -0600 Subject: [PATCH 13/13] Previous code was failing the flake8 tests because the random module was not imported. --- sherlock.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock.py b/sherlock.py index adc942d6..03d5407b 100644 --- a/sherlock.py +++ b/sherlock.py @@ -13,6 +13,7 @@ import os import platform import re import sys +import random from argparse import ArgumentParser, RawDescriptionHelpFormatter from concurrent.futures import ThreadPoolExecutor from time import time