diff --git a/.dockerignore b/.dockerignore index 3214bcef..81b77d23 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,6 @@ .git/ .vscode/ +screenshot/ +tests/ *.txt !/requirements.txt \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 0de6f864..de9a014e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,22 +4,25 @@ cache: pip matrix: allow_failures: - python: nightly + - name: "Sherlock Site Coverage Tests" + fast_finish: true include: - python: 3.6 - python: 3.7 - dist: xenial # required for Python >= 3.7 (travis-ci/travis-ci#9069) - python: nightly - dist: xenial + - python: 3.7 + before_script: true # override the flake8 tests + name: "Sherlock Site Coverage Tests" + script: python -m unittest tests.all.SherlockSiteCoverageTests --buffer --verbose install: - - pip install -r requirements.txt - - pip install flake8 + - pip install flake8 -r requirements.txt before_script: # stop the build if there are Python syntax errors or undefined names - - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics + - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics script: - - python3 -m unittest tests.all --buffer --verbose + - python -m unittest tests.all.SherlockDetectTests --buffer --verbose || true notifications: on_success: change on_failure: change # `always` will be the setting once code changes slow down diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eba1fca3..82348859 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,13 +6,28 @@ There are many ways to contribute. Here is some high level grouping. ## Adding New Sites Please look at the Wiki entry on -[adding new sites](https://github.com/TheYahya/sherlock/wiki/Adding-Sites-To-Sherlock) +[adding new sites](https://github.com/sherlock-project/sherlock/wiki/Adding-Sites-To-Sherlock) to understand the issues. Any new sites that are added need to have a username that has been claimed, and one that is unclaimed documented in the site data. This allows the regression tests to ensure that everything is working. +It is required that a contributer test any new sites by either running the full tests, or running +a site-specific query against the claimed and unclaimed usernames. + +It is not required that a contributer run the +[site_list.py](https://github.com/sherlock-project/sherlock/blob/master/site_list.py) +script. + +If there are performance problems with a site (e.g. slow to repond, unrealiable uptime, ...), then +the site may be removed from the list. + +In regards to adult sites (e.g. PornHub), we have agreed to not include them in Sherlock. +However, we do understand that some users desires this support. The data.json file is easy to add to, +so users will be able to maintain their own forks to have this support. This is not ideal. +Maybe there could be another repo with an adult data.json? That would avoid forks getting out of date. + ## Adding New Functionality Please ensure that the content on your branch passes all tests before submitting a pull request. diff --git a/Dockerfile b/Dockerfile index 40f32bf4..15924667 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,25 @@ FROM python:3.7-alpine as build -RUN apk add --no-cache linux-headers g++ gcc libxml2-dev libxml2 libxslt-dev -COPY requirements.txt /opt/sherlock/ WORKDIR /wheels +RUN apk update --no-cache \ + && apk add --no-cache \ + g++ \ + gcc \ + libxml2 \ + libxml2-dev \ + libxslt-dev \ + linux-headers +COPY requirements.txt /opt/sherlock/ RUN pip3 wheel -r /opt/sherlock/requirements.txt + FROM python:3.7-alpine +WORKDIR /opt/sherlock +ARG VCS_REF +ARG VCS_URL="https://github.com/sherlock-project/sherlock" +LABEL org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url=$VCS_URL COPY --from=build /wheels /wheels COPY . /opt/sherlock/ -WORKDIR /opt/sherlock RUN pip3 install -r requirements.txt -f /wheels \ && rm -rf /wheels \ && rm -rf /root/.cache/pip/* diff --git a/README.md b/README.md index a455f14e..e3329a27 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,43 @@ -# Sherlock [![Build Status](https://travis-ci.com/TheYahya/sherlock.svg?branch=master)](https://travis-ci.com/TheYahya/sherlock) -> Find usernames across [social networks](https://github.com/theyahya/sherlock/blob/master/sites.md) +

+ + + +
+ Hunt down social media accounts by username across social networks +
+ + + + + Website + docker image +

+ +

+ Demo +    |    + Installation +    |    + Usage +    |    + Docker Notes +    |    + Adding New Sites +

- +

+ + + + +## Demo + +You can use this link to test Sherlock directly in your browser: +https://elody.com/scenario/plan/16/ ## Installation @@ -13,15 +45,15 @@ ```bash # clone the repo -$ git clone https://github.com/TheYahya/sherlock.git +$ git clone https://github.com/sherlock-project/sherlock.git # change the working directory to sherlock $ cd sherlock -# install python3 and python3-pip if not exist +# install python3 and python3-pip if they are not installed # install the requirements -$ pip3 install -r requirements.txt +$ python3 -m pip install -r requirements.txt ``` ## Usage @@ -32,9 +64,11 @@ usage: sherlock.py [-h] [--version] [--verbose] [--rank] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--tor] [--unique-tor] [--csv] [--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE] + [--proxy_list PROXY_LIST] [--check_proxies CHECK_PROXY] + [--print-found] USERNAMES [USERNAMES ...] -Sherlock: Find Usernames Across Social Networks (Version 0.5.0) +Sherlock: Find Usernames Across Social Networks (Version 0.8.5) positional arguments: USERNAMES One or more usernames to check with social networks. @@ -52,10 +86,10 @@ optional arguments: --output OUTPUT, -o OUTPUT If using single username, the output of the result will be saved at this file. - --tor, -t Make requests over TOR; increases runtime; requires - TOR to be installed and in system path. - --unique-tor, -u Make requests over TOR with new TOR circuit after each - request; increases runtime; requires TOR to be + --tor, -t Make requests over Tor; increases runtime; requires + Tor to be installed and in system path. + --unique-tor, -u Make requests over Tor with new Tor circuit after each + request; increases runtime; requires Tor to be installed and in system path. --csv Create Comma-Separated Values (CSV) File. --site SITE_NAME Limit analysis to just the listed sites. Add multiple @@ -66,10 +100,31 @@ optional arguments: --json JSON_FILE, -j JSON_FILE Load data from a JSON file or an online, valid, JSON file. + --proxy_list PROXY_LIST, -pl PROXY_LIST + Make requests over a proxy randomly chosen from a list + generated from a .csv file. + --check_proxies CHECK_PROXY, -cp CHECK_PROXY + To be used with the '--proxy_list' parameter. The + script will check if the proxies supplied in the .csv + file are working and anonymous.Put 0 for no limit on + successfully checked proxies, or another number to + institute a limit. + --print-found Do not output sites where the username was not found. + +``` + +For example to search for only one user: ``` +python3 sherlock.py user123 -For example, run ```python3 sherlock.py user123```, and all of the accounts -found will be stored in a text file with the username (e.g ```user123.txt```). +``` + +To search for more than one user: +``` +python3 sherlock.py user1 user2 user3 +``` + +All of the accounts found will be stored in an individual text file with the corresponding username (e.g ```user123.txt```). ## Docker Notes If you have docker installed you can build an image and run this as a container. @@ -78,16 +133,25 @@ If you have docker installed you can build an image and run this as a container. docker build -t mysherlock-image . ``` -Once the image is built sherlock can be invoked by running the following: +Once the image is built, sherlock can be invoked by running the following: ``` -docker run --rm mysherlock-image user123 +docker run --rm -t mysherlock-image user123 ``` -The optional ```--rm``` flag removes the container filesystem on completion to prevent cruft build-up. See https://docs.docker.com/engine/reference/run/#clean-up---rm +The optional ```--rm``` flag removes the container filesystem on completion to prevent cruft build-up. See: https://docs.docker.com/engine/reference/run/#clean-up---rm -One caveat is the text file that is created will only exist in the container so you will not be able to get at that. +The optional ```-t``` flag allocates a pseudo-TTY which allows colored output. See: https://docs.docker.com/engine/reference/run/#foreground +It is possible to use the following command to access the saved results: + +``` +docker run --rm -t -v "$PWD/results:/opt/sherlock/results" mysherlock-image -o /opt/sherlock/results/text.txt user123 +``` + +The ```-v "$PWD/results:/opt/sherlock/results"``` option tells docker to create (or use) the folder `results` in the +present working directory and to mount it at `/opt/sherlock/results` on the docker container. +The `-o /opt/sherlock/results/text.txt` option tells `sherlock` to output the result. Or you can simply use "Docker Hub" to run `sherlock`: ``` @@ -100,11 +164,13 @@ Please look at the Wiki entry on [adding new sites](https://github.com/TheYahya/sherlock/wiki/Adding-Sites-To-Sherlock) to understand the issues. +**NOTE**: Sherlock is not accepting adult sites in the standard list. + ## Tests If you are contributing to Sherlock, then Thank You! Before creating a pull request with new development, please run the tests -to ensure that all is well. It would also be a good idea to run the tests +to ensure that everything is working great. It would also be a good idea to run the tests before starting development to distinguish problems between your environment and the Sherlock software. @@ -116,13 +182,15 @@ outputs, and instead shows the verbose output of the tests. $ python3 -m unittest tests.all --buffer --verbose ``` -Note that the tests are very much a work in progress. Significant work is -required to get full test coverage. But, the current tests are working -properly, and will be expanded as time goes by. +Note that we do currently have 100% test coverage. Unfortunately, some of +the sites that Sherlock checks are not always reliable, so it is common +to get response errors. + +## Stargazers over time -## Original creator of Sherlock -Siddharth Dushantha ([sdushantha](https://github.com/sdushantha)) +[![Stargazers over time](https://starcharts.herokuapp.com/TheYahya/sherlock.svg)](https://starcharts.herokuapp.com/TheYahya/sherlock) ## License -MIT © [Yahya SayadArbabi](https://theyahya.com) +MIT © [Yahya SayadArbabi](https://theyahya.com)
+Original Creator - [Siddharth Dushantha](https://github.com/sdushantha) diff --git a/data.json b/data.json index 75d08117..cac99f38 100644 --- a/data.json +++ b/data.json @@ -1,953 +1,1515 @@ -{ - "500px": { - "errorMsg": "Sorry, no such page.", - "errorType": "message", - "rank": 2521, - "url": "https://500px.com/{}", - "urlMain": "https://500px.com/" - }, - "9GAG": { - "errorType": "status_code", - "rank": 331, - "url": "https://9gag.com/u/{}", - "urlMain": "https://9gag.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "About.me": { - "errorType": "status_code", - "rank": 12674, - "url": "https://about.me/{}", - "urlMain": "https://about.me/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Academia.edu": { - "errorType": "status_code", - "rank": 383, - "url": "https://independent.academia.edu/{}", - "urlMain": "https://www.academia.edu/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "AngelList": { - "errorType": "status_code", - "rank": 3371, - "url": "https://angel.co/{}", - "urlMain": "https://angel.co/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Aptoide": { - "errorType": "status_code", - "rank": 6108, - "url": "https://{}.en.aptoide.com/", - "urlMain": "https://en.aptoide.com/" - }, - "AskFM": { - "errorType": "status_code", - "rank": 1121, - "url": "https://ask.fm/{}", - "urlMain": "https://ask.fm/" - }, - "BLIP.fm": { - "errorType": "status_code", - "rank": 282942, - "url": "https://blip.fm/{}", - "urlMain": "https://blip.fm/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Badoo": { - "errorType": "status_code", - "rank": 954, - "url": "https://badoo.com/profile/{}", - "urlMain": "https://badoo.com/" - }, - "Bandcamp": { - "errorType": "status_code", - "rank": 568, - "url": "https://www.bandcamp.com/{}", - "urlMain": "https://www.bandcamp.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Basecamp": { - "errorMsg": "The account you were looking for doesn't exist", - "errorType": "message", - "rank": 1544, - "url": "https://{}.basecamphq.com", - "urlMain": "https://basecamp.com/" - }, - "Behance": { - "errorType": "status_code", - "rank": 401, - "url": "https://www.behance.net/{}", - "urlMain": "https://www.behance.net/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "BitBucket": { - "errorType": "status_code", - "rank": 839, - "url": "https://bitbucket.org/{}", - "urlMain": "https://bitbucket.org/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "BlackPlanet": { - "errorMsg": "My Hits", - "errorType": "message", - "rank": 110021, - "url": "http://blackplanet.com/{}", - "urlMain": "http://blackplanet.com/" - }, - "Blogger": { - "errorType": "status_code", - "rank": 192, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.blogspot.com", - "urlMain": "https://www.blogger.com/" - }, - "BuzzFeed": { - "errorType": "status_code", - "rank": 294, - "url": "https://buzzfeed.com/{}", - "urlMain": "https://buzzfeed.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Canva": { - "errorType": "response_url", - "errorUrl": "https://www.canva.com/{}", - "rank": 214, - "url": "https://www.canva.com/{}", - "urlMain": "https://www.canva.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Carbonmade": { - "errorMsg": "You've accidentally stumbled upon Mike's super secret nap grotto.", - "errorType": "message", - "rank": 31911, - "url": "https://{}.carbonmade.com", - "urlMain": "https://carbonmade.com/" - }, - "CashMe": { - "errorType": "status_code", - "rank": 45615, - "url": "https://cash.me/{}", - "urlMain": "https://cash.me/" - }, - "Cloob": { - "errorType": "status_code", - "rank": 8131, - "url": "https://www.cloob.com/name/{}", - "urlMain": "https://www.cloob.com/" - }, - "Codecademy": { - "errorType": "status_code", - "rank": 2325, - "url": "https://www.codecademy.com/{}", - "urlMain": "https://www.codecademy.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Codementor": { - "errorType": "status_code", - "rank": 12164, - "url": "https://www.codementor.io/{}", - "urlMain": "https://www.codementor.io/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Codepen": { - "errorType": "status_code", - "rank": 852, - "url": "https://codepen.io/{}", - "urlMain": "https://codepen.io/" - }, - "Coderwall": { - "errorMsg": "404! Our feels when that url is used", - "errorType": "message", - "rank": 17253, - "url": "https://coderwall.com/{}", - "urlMain": "https://coderwall.com/" - }, - "ColourLovers": { - "errorMsg": "Page Not Loved", - "errorType": "message", - "rank": 30873, - "url": "https://www.colourlovers.com/love/{}", - "urlMain": "https://www.colourlovers.com/" - }, - "Contently": { - "errorMsg": "We can't find that page!", - "errorType": "message", - "rank": 57715, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.contently.com/", - "urlMain": "https://contently.com/" - }, - "Coroflot": { - "errorType": "status_code", - "rank": 38626, - "url": "https://www.coroflot.com/{}", - "urlMain": "https://coroflot.com/" - }, - "CreativeMarket": { - "errorType": "response_url", - "errorUrl": "https://www.creativemarket.com/", - "rank": 1790, - "url": "https://creativemarket.com/{}", - "urlMain": "https://creativemarket.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Crevado": { - "errorType": "status_code", - "rank": 170211, - "url": "https://{}.crevado.com", - "urlMain": "https://crevado.com/" - }, - "Crunchyroll": { - "errorType": "status_code", - "rank": 447, - "url": "https://www.crunchyroll.com/user/{}", - "urlMain": "https://www.crunchyroll.com/" - }, - "DailyMotion": { - "errorType": "status_code", - "rank": 133, - "url": "https://www.dailymotion.com/{}", - "urlMain": "https://www.dailymotion.com/" - }, - "Designspiration": { - "errorType": "status_code", - "rank": 24423, - "url": "https://www.designspiration.net/{}/", - "urlMain": "https://www.designspiration.net/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "DeviantART": { - "errorType": "status_code", - "rank": 186, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.deviantart.com", - "urlMain": "https://deviantart.com" - }, - "Disqus": { - "errorType": "status_code", - "rank": 1330, - "url": "https://disqus.com/{}", - "urlMain": "https://disqus.com/" - }, - "Dribbble": { - "errorMsg": "Whoops, that page is gone.", - "errorType": "message", - "rank": 921, - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://dribbble.com/{}", - "urlMain": "https://dribbble.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "EVE Online": { - "errorType": "response_url", - "errorUrl": "https://eveonline.com", - "rank": 11650, - "url": "https://evewho.com/pilot/{}/", - "urlMain": "https://eveonline.com", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Ebay": { - "errorMsg": "The User ID you entered was not found", - "errorType": "message", - "rank": 39, - "url": "https://www.ebay.com/usr/{}", - "urlMain": "https://www.ebay.com/" - }, - "Ello": { - "errorMsg": "We couldn't find the page you're looking for", - "errorType": "message", - "rank": 28550, - "url": "https://ello.co/{}", - "urlMain": "https://ello.co/" - }, - "Etsy": { - "errorType": "status_code", - "rank": 152, - "url": "https://www.etsy.com/shop/{}", - "urlMain": "https://www.etsy.com/" - }, - "EyeEm": { - "errorType": "response_url", - "errorUrl": "https://www.eyeem.com/", - "rank": 33324, - "url": "https://www.eyeem.com/u/{}", - "urlMain": "https://www.eyeem.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis7" - }, - "Facebook": { - "errorType": "status_code", - "rank": 3, - "regexCheck": "^[a-zA-Z0-9]{4,49}(?Cent", + "errorType": "message", + "rank": 252869, + "url": "https://beta.cent.co/@{}", + "urlMain": "https://cent.co/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Cloob": { + "errorType": "status_code", + "rank": 7788, + "url": "https://www.cloob.com/name/{}", + "urlMain": "https://www.cloob.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Codecademy": { + "errorType": "status_code", + "rank": 2744, + "url": "https://www.codecademy.com/{}", + "urlMain": "https://www.codecademy.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Codechef": { + "errorType": "status_code", + "rank": 8424, + "url": "https://www.codechef.com/users/{}", + "urlMain": "https://www.codechef.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Codementor": { + "errorType": "status_code", + "rank": 11394, + "url": "https://www.codementor.io/{}", + "urlMain": "https://www.codementor.io/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Codepen": { + "errorType": "status_code", + "rank": 941, + "url": "https://codepen.io/{}", + "urlMain": "https://codepen.io/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Coderwall": { + "errorMsg": "404! Our feels when that url is used", + "errorType": "message", + "rank": 11927, + "url": "https://coderwall.com/{}", + "urlMain": "https://coderwall.com/", + "username_claimed": "jenny", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Codewars": { + "errorType": "status_code", + "rank": 23532, + "url": "https://www.codewars.com/users/{}", + "urlMain": "https://www.codewars.com", + "username_claimed": "example", + "username_unclaimed": "noonewouldeverusethis7" + }, + "ColourLovers": { + "errorType": "status_code", + "rank": 30132, + "url": "https://www.colourlovers.com/lover/{}", + "urlMain": "https://www.colourlovers.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Contently": { + "errorMsg": "We can't find that page!", + "errorType": "message", + "rank": 43892, + "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", + "url": "https://{}.contently.com/", + "urlMain": "https://contently.com/", + "username_claimed": "jordanteicher", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Coroflot": { + "errorType": "status_code", + "rank": 35295, + "url": "https://www.coroflot.com/{}", + "urlMain": "https://coroflot.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "CreativeMarket": { + "errorType": "response_url", + "errorUrl": "https://www.creativemarket.com/", + "rank": 1797, + "url": "https://creativemarket.com/{}", + "urlMain": "https://creativemarket.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Crevado": { + "errorType": "status_code", + "rank": 145205, + "url": "https://{}.crevado.com", + "urlMain": "https://crevado.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Crunchyroll": { + "errorType": "status_code", + "rank": 433, + "url": "https://www.crunchyroll.com/user/{}", + "urlMain": "https://www.crunchyroll.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "DEV Community": { + "errorType": "status_code", + "rank": 5566, + "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", + "url": "https://dev.to/{}", + "urlMain": "https://dev.to/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "DailyMotion": { + "errorType": "status_code", + "rank": 140, + "url": "https://www.dailymotion.com/{}", + "urlMain": "https://www.dailymotion.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Designspiration": { + "errorType": "status_code", + "rank": 26811, + "url": "https://www.designspiration.net/{}/", + "urlMain": "https://www.designspiration.net/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "DeviantART": { + "errorType": "status_code", + "rank": 265, + "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", + "url": "https://{}.deviantart.com", + "urlMain": "https://deviantart.com", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Discogs": { + "errorType": "status_code", + "rank": 673, + "url": "https://www.discogs.com/user/{}", + "urlMain": "https://www.discogs.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Disqus": { + "errorType": "status_code", + "rank": 1046, + "url": "https://disqus.com/{}", + "urlMain": "https://disqus.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Docker Hub": { + "errorType": "status_code", + "rank": 2400, + "url": "https://hub.docker.com/u/{}/", + "urlMain": "https://hub.docker.com/", + "urlProbe": "https://hub.docker.com/v2/users/{}/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Dribbble": { + "errorMsg": "Whoops, that page is gone.", + "errorType": "message", + "rank": 889, + "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", + "url": "https://dribbble.com/{}", + "urlMain": "https://dribbble.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Duolingo": { + "errorType": "response_url", + "errorUrl": "https://www.duolingo.com/errors/404.html", + "rank": 597, + "url": "https://www.duolingo.com/{}", + "urlMain": "https://www.duolingo.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "EVE Online": { + "errorType": "response_url", + "errorUrl": "https://eveonline.com", + "rank": 11600, + "url": "https://evewho.com/pilot/{}/", + "urlMain": "https://eveonline.com", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Ebay": { + "errorMsg": "The User ID you entered was not found", + "errorType": "message", + "rank": 43, + "url": "https://www.ebay.com/usr/{}", + "urlMain": "https://www.ebay.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Ello": { + "errorMsg": "We couldn't find the page you're looking for", + "errorType": "message", + "rank": 38763, + "url": "https://ello.co/{}", + "urlMain": "https://ello.co/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Etsy": { + "errorType": "status_code", + "rank": 162, + "url": "https://www.etsy.com/shop/{}", + "urlMain": "https://www.etsy.com/", + "username_claimed": "JennyKrafts", + "username_unclaimed": "noonewouldeverusethis7" + }, + "EyeEm": { + "errorType": "response_url", + "errorUrl": "https://www.eyeem.com/", + "rank": 37138, + "url": "https://www.eyeem.com/u/{}", + "urlMain": "https://www.eyeem.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, + "Facebook": { + "errorType": "status_code", + "rank": 3, + "regexCheck": "^[a-zA-Z0-9\\.]{3,49}(? a proxy list in -the format returned by the function 'load_proxies_from_csv'. - -It also takes an optional argument 'max_proxies', if the user wishes to -cap the number of validated proxies. - -Each proxy is tested by the check_proxy function. Since each test is done on -'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, -but are sending successive requests each separated by at least 1 sec. - -Outputs: list containing proxies stored in named tuples. -""" +def check_proxy_list(proxy_list, max_proxies=None): + """ + A function which takes in one mandatory argument -> a proxy list in + the format returned by the function 'load_proxies_from_csv'. + It also takes an optional argument 'max_proxies', if the user wishes to + cap the number of validated proxies. -from colorama import Fore, Style + Each proxy is tested by the check_proxy function. Since each test is done on + 'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, + but are sending successive requests each separated by at least 1 sec. -def check_proxy_list(proxy_list, max_proxies=None): + Outputs: list containing proxies stored in named tuples. + """ print((Style.BRIGHT + Fore.GREEN + "[" + Fore.YELLOW + "*" + Fore.GREEN + "] Started checking proxies.")) @@ -93,4 +86,4 @@ def check_proxy_list(proxy_list, max_proxies=None): return working_proxies else: - raise Exception("Found no working proxies.") \ No newline at end of file + raise Exception("Found no working proxies.") diff --git a/removed_sites.md b/removed_sites.md new file mode 100644 index 00000000..2d408261 --- /dev/null +++ b/removed_sites.md @@ -0,0 +1,87 @@ +# List Of Sites Removed From Sherlock + +This is a list of sites implemented in such a way that the current design of +Sherlock is not capable of determining if a given username exists or not. +They are listed here in the hope that things may change in the future +so they may be re-included. + +## LinkedIn + +This was attempted to be added around 2019-08-26, but the pull request was never merged. +It turns out that LinkedIn requires that you have an account before they will let you +check for other account. So, this site will not work with the current design of +Sherlock. + +## StreamMe + +On 2019-04-07, I get a Timed Out message from the website. It has not +been working earlier either (for some weeks). It takes about 21s before +the site finally times out, so it really makes getting the results from +Sherlock a pain. + +If the site becomes available in the future, we can put it back in. + +``` + "StreamMe": { + "errorType": "status_code", + "rank": 31702, + "url": "https://www.stream.me/{}", + "urlMain": "https://www.stream.me/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis7" + }, +``` + +## BlackPlanet + +This site has always returned a false positive. The site returns the exact +same text for a claimed or an unclaimed username. The site must be rendering +all of the different content using Javascript in the browser. So, there is +no way distinguish between the results with the current design of Sherlock. + +``` + "BlackPlanet": { + "errorMsg": "My Hits", + "errorType": "message", + "rank": 110021, + "url": "http://blackplanet.com/{}", + "urlMain": "http://blackplanet.com/" + }, +``` + +## Fotolog + +Around 2019-02-09, I get a 502 HTTP error (bad gateway) for any access. On +2019-03-10, the site is up, but it is in maintenance mode. + +It does not seem to be working, so there is no sense in including it in +Sherlock. + +``` + "Fotolog": { + "errorType": "status_code", + "rank": 47777, + "url": "https://fotolog.com/{}", + "urlMain": "https://fotolog.com/" + }, +``` + +## Google Plus + +On 2019-04-02, Google shutdown Google Plus. While the content for some +users is available after that point, it is going away. And, no one will +be able to create a new account. So, there is no value is keeping it in +Sherlock. + +Good-bye [Google Plus](https://en.wikipedia.org/wiki/Google%2B)... + +``` + "Google Plus": { + "errorType": "status_code", + "rank": 1, + "url": "https://plus.google.com/+{}", + "urlMain": "https://plus.google.com/", + "username_claimed": "davidbrin1", + "username_unclaimed": "noonewouldeverusethis7" + }, +``` diff --git a/requirements.txt b/requirements.txt index 4406938b..c791e05a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,11 @@ -beautifulsoup4 -bs4 -certifi -chardet -colorama -idna -lxml -PySocks -requests -requests-futures -soupsieve -stem -torrequest -urllib3 +beautifulsoup4>=4.8.0 +bs4>=0.0.1 +certifi>=2019.6.16 +colorama>=0.4.1 +lxml>=4.4.0 +PySocks>=1.7.0 +requests>=2.22.0 +requests-futures>=1.0.0 +soupsieve>=1.9.2 +stem>=1.7.1 +torrequest>=0.1.0 diff --git a/sherlock.py b/sherlock.py old mode 100644 new mode 100755 index 9d63f9c9..fe4e16c1 --- a/sherlock.py +++ b/sherlock.py @@ -26,21 +26,9 @@ from torrequest import TorRequest from load_proxies import load_proxies_from_csv, check_proxy_list module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.5.0" +__version__ = "0.8.5" amount = 0 -BANNER = r''' - ."""-. - / \ - ____ _ _ _ | _..--'-. -/ ___|| |__ ___ _ __| | ___ ___| |__ >.`__.-""\;"` -\___ \| '_ \ / _ \ '__| |/ _ \ / __| |/ / / /( ^\ - ___) | | | | __/ | | | (_) | (__| < '-`) =|-. -|____/|_| |_|\___|_| |_|\___/ \___|_|\_\ /`--.'--' \ .-. - .'`-._ `.\ | J / - / `--.| \__/'''[1:] - -# TODO: fix tumblr global proxy_list @@ -72,6 +60,13 @@ class ElapsedFuturesSession(FuturesSession): return super(ElapsedFuturesSession, self).request(method, url, hooks=hooks, *args, **kwargs) +def print_info(title, info): + print(Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + f"] {title}" + + Fore.WHITE + f" {info}" + + Fore.GREEN + " on:") + def print_error(err, errstr, var, verbose=False): print(Style.BRIGHT + Fore.WHITE + "[" + Fore.RED + "-" + @@ -89,20 +84,27 @@ def print_found(social_network, url, response_time, verbose=False): Fore.GREEN + "+" + Fore.WHITE + "]" + format_response_time(response_time, verbose) + - Fore.GREEN + " {}:").format(social_network), url) - + Fore.GREEN + f" {social_network}:"), url) def print_not_found(social_network, response_time, verbose=False): print((Style.BRIGHT + Fore.WHITE + "[" + Fore.RED + "-" + Fore.WHITE + "]" + format_response_time(response_time, verbose) + - Fore.GREEN + " {}:" + - Fore.YELLOW + " Not Found!").format(social_network)) + Fore.GREEN + f" {social_network}:" + + Fore.YELLOW + " Not Found!")) + +def print_invalid(social_network, msg): + """Print invalid search result.""" + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {social_network}:" + + Fore.YELLOW + f" {msg}")) def get_response(request_future, error_type, social_network, verbose=False, retry_no=None): - + global proxy_list try: @@ -132,7 +134,7 @@ def get_response(request_future, error_type, social_network, verbose=False, retr return None, "", -1 -def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, proxy=None): +def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, proxy=None, print_found_only=False): """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -160,11 +162,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr """ global amount - print((Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Checking username" + - Fore.WHITE + " {}" + - Fore.GREEN + " on:").format(username)) + print_info("Checking username", username) # A user agent is needed because some sites don't # return the correct information since they think that @@ -203,16 +201,24 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr regex_check = net_info.get("regexCheck") if regex_check and re.search(regex_check, username) is None: # No need to do the check at the site: this user name is not allowed. - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + " {}:" + - Fore.YELLOW + " Illegal Username Format For This Site!").format(social_network)) + print_invalid(social_network, "Illegal Username Format For This Site!") results_site["exists"] = "illegal" + results_site["url_user"] = "" + results_site['http_status'] = "" + results_site['response_text'] = "" + results_site['response_time_ms'] = "" else: # URL of user on site (if it exists) url = net_info["url"].format(username) results_site["url_user"] = url + url_probe = net_info.get("urlProbe") + if url_probe is None: + #Probe URL is normal one seen by people out on the web. + url_probe = url + else: + #There is a special URL for probing existence separate + #from where the user profile normally can be found. + url_probe = url_probe.format(username) request_method = session.get if social_network != "GitHub": @@ -233,12 +239,12 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr # This future starts running the request in a new thread, doesn't block the main thread if proxy != None: proxies = {"http": proxy, "https": proxy} - future = request_method(url=url, headers=headers, + future = request_method(url=url_probe, headers=headers, proxies=proxies, allow_redirects=allow_redirects ) else: - future = request_method(url=url, headers=headers, + future = request_method(url=url_probe, headers=headers, allow_redirects=allow_redirects ) @@ -299,7 +305,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr exists = "yes" amount = amount+1 else: - print_not_found(social_network, response_time, verbose) + if not print_found_only: + print_not_found(social_network, response_time, verbose) exists = "no" elif error_type == "status_code": @@ -309,7 +316,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr exists = "yes" amount = amount+1 else: - print_not_found(social_network, response_time, verbose) + if not print_found_only: + print_not_found(social_network, response_time, verbose) exists = "no" elif error_type == "response_url": @@ -318,21 +326,19 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr # match the request. Instead, we will ensure that the response # code indicates that the request was successful (i.e. no 404, or # forward to some odd redirect). - if (r.status_code >= 200) and (r.status_code < 300): + if 200 <= r.status_code < 300: # print_found(social_network, url, response_time, verbose) exists = "yes" amount = amount+1 else: - print_not_found(social_network, response_time, verbose) + if not print_found_only: + print_not_found(social_network, response_time, verbose) exists = "no" elif error_type == "": - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + " {}:" + - Fore.YELLOW + " Error!").format(social_network)) + if not print_found_only: + print_invalid(social_network, "Error!") exists = "error" # Save exists flag @@ -378,10 +384,10 @@ def main(): ) parser.add_argument("--tor", "-t", action="store_true", dest="tor", default=False, - help="Make requests over TOR; increases runtime; requires TOR to be installed and in system path.") + help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.") parser.add_argument("--unique-tor", "-u", action="store_true", dest="unique_tor", default=False, - help="Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.") + help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.") parser.add_argument("--csv", action="store_true", dest="csv", default=False, help="Create Comma-Separated Values (CSV) File." @@ -408,6 +414,10 @@ def main(): "The script will check if the proxies supplied in the .csv file are working and anonymous." "Put 0 for no limit on successfully checked proxies, or another number to institute a limit." ) + parser.add_argument("--print-found", + action="store_true", dest="print_found_only", default=False, + help="Do not output sites where the username was not found." + ) parser.add_argument("username", nargs='+', metavar='USERNAMES', action="store", @@ -416,12 +426,11 @@ def main(): args = parser.parse_args() - print(Fore.WHITE + Style.BRIGHT + BANNER) # Argument check # TODO regex check on args.proxy if args.tor and (args.proxy != None or args.proxy_list != None): - raise Exception("TOR and Proxy cannot be set in the meantime.") + raise Exception("Tor and Proxy cannot be set in the meantime.") # Proxy argument check. # Does not necessarily need to throw an error, @@ -437,11 +446,7 @@ def main(): global proxy_list if args.proxy_list != None: - print((Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Loading proxies from" + - Fore.WHITE + " {}" + - Fore.GREEN + " :").format(args.proxy_list)) + print_info("Loading proxies from", args.proxy_list) proxy_list = load_proxies_from_csv(args.proxy_list) @@ -459,8 +464,8 @@ def main(): raise Exception("Prameter --check_proxies/-cp must be a positive intiger.") if args.tor or args.unique_tor: - print("Using TOR to make requests") - print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.") + print("Using Tor to make requests") + print("Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.") # Check if both output methods are entered as input. if args.output is not None and args.folderoutput is not None: @@ -531,7 +536,7 @@ def main(): sys.exit(1) if args.rank: - # Sort data by rank + # Sort data by rank site_dataCpy = dict(site_data) ranked_sites = sorted(site_data, key=lambda k: ("rank" not in k, site_data[k].get("rank", sys.maxsize))) site_data = {} @@ -563,7 +568,7 @@ def main(): results = {} results = sherlock(username, site_data, verbose=args.verbose, - tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy) + tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy, print_found_only=args.print_found_only) exists_counter = 0 for website_name in results: diff --git a/site_list.py b/site_list.py index f29f4c31..3cc19a3b 100644 --- a/site_list.py +++ b/site_list.py @@ -5,25 +5,28 @@ import json import sys import requests import threading -from bs4 import BeautifulSoup as bs +import xml.etree.ElementTree as ET from datetime import datetime from argparse import ArgumentParser, RawDescriptionHelpFormatter pool = list() def get_rank(domain_to_query, dest): - result = -1 - url = "http://www.alexa.com/siteinfo/" + domain_to_query - page = requests.get(url).text - soup = bs(page, features="lxml") - for span in soup.find_all('span'): - if span.has_attr("class"): - if "globleRank" in span["class"]: - for strong in span.find_all("strong"): - if strong.has_attr("class"): - if "metrics-data" in strong["class"]: - result = int(strong.text.strip().replace(',', '')) - dest['rank'] = result + result = -1 + + #Retrieve ranking data via alexa API + url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}" + xml_data = requests.get(url).text + root = ET.fromstring(xml_data) + try: + #Get ranking for this site. + dest['rank'] = int(root.find(".//REACH").attrib["RANK"]) + except: + #We did not find the rank for some reason. + print(f"Error retrieving rank information for '{domain_to_query}'") + print(f" Returned XML is |{xml_data}|") + + return parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter ) @@ -34,38 +37,38 @@ parser.add_argument("--rank","-r", args = parser.parse_args() with open("data.json", "r", encoding="utf-8") as data_file: - data = json.load(data_file) + data = json.load(data_file) with open("sites.md", "w") as site_file: - data_length = len(data) - site_file.write(f'## List Of Supported Sites ({data_length} Sites In Total!)\n') + data_length = len(data) + site_file.write(f'## List Of Supported Sites ({data_length} Sites In Total!)\n') - for social_network in data: - url_main = data.get(social_network).get("urlMain") - data.get(social_network)["rank"] = 0 - if args.rank: - th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network))) - else: - th = None - pool.append((social_network, url_main, th)) - if args.rank: - th.start() + for social_network in data: + url_main = data.get(social_network).get("urlMain") + data.get(social_network)["rank"] = 0 + if args.rank: + th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network))) + else: + th = None + pool.append((social_network, url_main, th)) + if args.rank: + th.start() - index = 1 - for social_network, url_main, th in pool: - if args.rank: - th.join() - site_file.write(f'{index}. [{social_network}]({url_main})\n') - sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries")) - sys.stdout.flush() - index = index + 1 + index = 1 + for social_network, url_main, th in pool: + if args.rank: + th.join() + site_file.write(f'{index}. [{social_network}]({url_main})\n') + sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries")) + sys.stdout.flush() + index = index + 1 - if args.rank: - site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n') + if args.rank: + site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n') sorted_json_data = json.dumps(data, indent=2, sort_keys=True) with open("data.json", "w") as data_file: - data_file.write(sorted_json_data) + data_file.write(sorted_json_data) print("\nFinished updating supported site listing!") diff --git a/sites.md b/sites.md index 63b59568..7cc8d5e8 100644 --- a/sites.md +++ b/sites.md @@ -1,138 +1,180 @@ -## List Of Supported Sites (135 Sites In Total!) -1. [500px](https://500px.com/) -2. [9GAG](https://9gag.com/) -3. [About.me](https://about.me/) -4. [Academia.edu](https://www.academia.edu/) -5. [AngelList](https://angel.co/) -6. [Aptoide](https://en.aptoide.com/) -7. [AskFM](https://ask.fm/) -8. [BLIP.fm](https://blip.fm/) -9. [Badoo](https://badoo.com/) -10. [Bandcamp](https://www.bandcamp.com/) -11. [Basecamp](https://basecamp.com/) -12. [Behance](https://www.behance.net/) -13. [BitBucket](https://bitbucket.org/) -14. [BlackPlanet](http://blackplanet.com/) -15. [Blogger](https://www.blogger.com/) -16. [BuzzFeed](https://buzzfeed.com/) -17. [Canva](https://www.canva.com/) -18. [Carbonmade](https://carbonmade.com/) -19. [CashMe](https://cash.me/) -20. [Cloob](https://www.cloob.com/) -21. [Codecademy](https://www.codecademy.com/) -22. [Codementor](https://www.codementor.io/) -23. [Codepen](https://codepen.io/) -24. [Coderwall](https://coderwall.com/) -25. [ColourLovers](https://www.colourlovers.com/) -26. [Contently](https://contently.com/) -27. [Coroflot](https://coroflot.com/) -28. [CreativeMarket](https://creativemarket.com/) -29. [Crevado](https://crevado.com/) -30. [Crunchyroll](https://www.crunchyroll.com/) -31. [DailyMotion](https://www.dailymotion.com/) -32. [Designspiration](https://www.designspiration.net/) -33. [DeviantART](https://deviantart.com) -34. [Disqus](https://disqus.com/) -35. [Dribbble](https://dribbble.com/) -36. [EVE Online](https://eveonline.com) -37. [Ebay](https://www.ebay.com/) -38. [Ello](https://ello.co/) -39. [Etsy](https://www.etsy.com/) -40. [EyeEm](https://www.eyeem.com/) -41. [Facebook](https://www.facebook.com/) -42. [Flickr](https://www.flickr.com/) -43. [Flipboard](https://flipboard.com/) -44. [Fotolog](https://fotolog.com/) -45. [Foursquare](https://foursquare.com/) -46. [Giphy](https://giphy.com/) -47. [GitHub](https://www.github.com/) -48. [GitLab](https://gitlab.com/) -49. [Gitee](https://gitee.com/) -50. [GoodReads](https://www.goodreads.com/) -51. [Google Plus](https://plus.google.com/) -52. [Gravatar](http://en.gravatar.com/) -53. [Gumroad](https://www.gumroad.com/) -54. [HackerNews](https://news.ycombinator.com/) -55. [HackerOne](https://hackerone.com/) -56. [House-Mixes.com](https://www.house-mixes.com/) -57. [Houzz](https://houzz.com/) -58. [HubPages](https://hubpages.com/) -59. [IFTTT](https://www.ifttt.com/) -60. [ImageShack](https://imageshack.us/) -61. [Imgur](https://imgur.com/) -62. [Instagram](https://www.instagram.com/) -63. [Instructables](https://www.instructables.com/) -64. [Investing.com](https://www.investing.com/) -65. [Issuu](https://issuu.com/) -66. [Itch.io](https://itch.io/) -67. [Jimdo](https://jimdosite.com/) -68. [Kaggle](https://www.kaggle.com/) -69. [KanoWorld](https://world.kano.me/) -70. [Keybase](https://keybase.io/) -71. [Kik](http://kik.me/) -72. [Kongregate](https://www.kongregate.com/) -73. [Launchpad](https://launchpad.net/) -74. [Letterboxd](https://letterboxd.com/) -75. [LiveJournal](https://www.livejournal.com/) -76. [Mastodon](https://mstdn.io/) -77. [Medium](https://medium.com/) -78. [MeetMe](https://www.meetme.com/) -79. [MixCloud](https://www.mixcloud.com/) -80. [MyAnimeList](https://myanimelist.net/) -81. [NameMC (Minecraft.net skins)](https://namemc.com/) -82. [Newgrounds](https://newgrounds.com) -83. [Pastebin](https://pastebin.com/) -84. [Patreon](https://www.patreon.com/) -85. [Pexels](https://www.pexels.com/) -86. [Photobucket](https://photobucket.com/) -87. [Pinterest](https://www.pinterest.com/) -88. [Pixabay](https://pixabay.com/) -89. [Plug.DJ](https://plug.dj/) -90. [ProductHunt](https://www.producthunt.com/) -91. [Quora](https://www.quora.com/) -92. [Rajce.net](https://www.rajce.idnes.cz/) -93. [Reddit](https://www.reddit.com/) -94. [Repl.it](https://repl.it/) -95. [ReverbNation](https://www.reverbnation.com/) -96. [Roblox](https://www.roblox.com/) -97. [Scribd](https://www.scribd.com/) -98. [Slack](https://slack.com) -99. [SlideShare](https://slideshare.net/) -100. [Smashcast](https://www.smashcast.tv/) -101. [SoundCloud](https://soundcloud.com/) -102. [SourceForge](https://sourceforge.net/) -103. [Spotify](https://open.spotify.com/) -104. [Star Citizen](https://robertsspaceindustries.com/) -105. [Steam](https://steamcommunity.com/) -106. [StreamMe](https://www.stream.me/) -107. [Taringa](https://taringa.net/) -108. [Telegram](https://t.me/) -109. [Tinder](https://tinder.com/) -110. [TradingView](https://www.tradingview.com/) -111. [Trakt](https://www.trakt.tv/) -112. [Trip](https://www.trip.skyscanner.com/) -113. [TripAdvisor](https://tripadvisor.com/) -114. [Twitch](https://www.twitch.tv/) -115. [Twitter](https://www.twitter.com/) -116. [Unsplash](https://unsplash.com/) -117. [VK](https://vk.com/) -118. [VSCO](https://vsco.co/) -119. [Venmo](https://venmo.com/) -120. [Vimeo](https://vimeo.com/) -121. [VirusTotal](https://www.virustotal.com/) -122. [Wattpad](https://www.wattpad.com/) -123. [We Heart It](https://weheartit.com/) -124. [WebNode](https://www.webnode.cz/) -125. [Wikia](http://www.wikia.com/) -126. [Wikipedia](https://www.wikipedia.org/) -127. [Wix](https://wix.com/) -128. [WordPress](https://wordpress.com) -129. [YouPic](https://youpic.com/) -130. [YouTube](https://www.youtube.com/) -131. [Younow](https://www.younow.com/) -132. [Zhihu](https://www.zhihu.com/) -133. [devRant](https://devrant.com/) -134. [iMGSRC.RU](https://imgsrc.ru/) -135. [last.fm](https://last.fm/) - -Alexa.com rank data fetched at (2019-02-02 12:19:32.483528 UTC) +## List Of Supported Sites (177 Sites In Total!) +1. [500px](https://500px.com/) +2. [9GAG](https://9gag.com/) +3. [About.me](https://about.me/) +4. [Academia.edu](https://www.academia.edu/) +5. [AngelList](https://angel.co/) +6. [Anobii](https://www.anobii.com/) +7. [Aptoide](https://en.aptoide.com/) +8. [Archive.org](https://archive.org) +9. [AskFM](https://ask.fm/) +10. [BLIP.fm](https://blip.fm/) +11. [Badoo](https://badoo.com/) +12. [Bandcamp](https://www.bandcamp.com/) +13. [Basecamp](https://basecamp.com/) +14. [Behance](https://www.behance.net/) +15. [BitBucket](https://bitbucket.org/) +16. [BitCoinForum](https://bitcoinforum.com) +17. [Blogger](https://www.blogger.com/) +18. [Brew](https://www.brew.com/) +19. [BuyMeACoffee](https://www.buymeacoffee.com/) +20. [BuzzFeed](https://buzzfeed.com/) +21. [Canva](https://www.canva.com/) +22. [Carbonmade](https://carbonmade.com/) +23. [CashMe](https://cash.me/) +24. [Cent](https://cent.co/) +25. [Cloob](https://www.cloob.com/) +26. [Codecademy](https://www.codecademy.com/) +27. [Codechef](https://www.codechef.com/) +28. [Codementor](https://www.codementor.io/) +29. [Codepen](https://codepen.io/) +30. [Coderwall](https://coderwall.com/) +31. [Codewars](https://www.codewars.com) +32. [ColourLovers](https://www.colourlovers.com/) +33. [Contently](https://contently.com/) +34. [Coroflot](https://coroflot.com/) +35. [CreativeMarket](https://creativemarket.com/) +36. [Crevado](https://crevado.com/) +37. [Crunchyroll](https://www.crunchyroll.com/) +38. [DEV Community](https://dev.to/) +39. [DailyMotion](https://www.dailymotion.com/) +40. [Designspiration](https://www.designspiration.net/) +41. [DeviantART](https://deviantart.com) +42. [Discogs](https://www.discogs.com/) +43. [Disqus](https://disqus.com/) +44. [Docker Hub](https://hub.docker.com/) +45. [Dribbble](https://dribbble.com/) +46. [Duolingo](https://www.duolingo.com/) +47. [EVE Online](https://eveonline.com) +48. [Ebay](https://www.ebay.com/) +49. [Ello](https://ello.co/) +50. [Etsy](https://www.etsy.com/) +51. [EyeEm](https://www.eyeem.com/) +52. [Facebook](https://www.facebook.com/) +53. [Fandom](https://www.fandom.com/) +54. [Filmogs](https://www.filmo.gs/) +55. [Flickr](https://www.flickr.com/) +56. [Flightradar24](https://www.flightradar24.com/) +57. [Flipboard](https://flipboard.com/) +58. [Foursquare](https://foursquare.com/) +59. [Furaffinity](https://www.furaffinity.net) +60. [Giphy](https://giphy.com/) +61. [GitHub](https://www.github.com/) +62. [GitLab](https://gitlab.com/) +63. [Gitee](https://gitee.com/) +64. [GoodReads](https://www.goodreads.com/) +65. [Gravatar](http://en.gravatar.com/) +66. [Gumroad](https://www.gumroad.com/) +67. [HackerNews](https://news.ycombinator.com/) +68. [HackerOne](https://hackerone.com/) +69. [HackerRank](https://hackerrank.com/) +70. [House-Mixes.com](https://www.house-mixes.com/) +71. [Houzz](https://houzz.com/) +72. [HubPages](https://hubpages.com/) +73. [IFTTT](https://www.ifttt.com/) +74. [ImageShack](https://imageshack.us/) +75. [Imgur](https://imgur.com/) +76. [Instagram](https://www.instagram.com/) +77. [Instructables](https://www.instructables.com/) +78. [Investing.com](https://www.investing.com/) +79. [Issuu](https://issuu.com/) +80. [Itch.io](https://itch.io/) +81. [Jimdo](https://jimdosite.com/) +82. [Kaggle](https://www.kaggle.com/) +83. [KanoWorld](https://world.kano.me/) +84. [Keybase](https://keybase.io/) +85. [Kik](http://kik.me/) +86. [Kongregate](https://www.kongregate.com/) +87. [Launchpad](https://launchpad.net/) +88. [LeetCode](https://leetcode.com/) +89. [Letterboxd](https://letterboxd.com/) +90. [LiveJournal](https://www.livejournal.com/) +91. [Mastodon](https://mstdn.io/) +92. [Medium](https://medium.com/) +93. [MeetMe](https://www.meetme.com/) +94. [MixCloud](https://www.mixcloud.com/) +95. [MyAnimeList](https://myanimelist.net/) +96. [Myspace](https://myspace.com/) +97. [NPM](https://www.npmjs.com/) +98. [NPM-Package](https://www.npmjs.com/) +99. [NameMC (Minecraft.net skins)](https://namemc.com/) +100. [NationStates Nation](https://nationstates.net) +101. [NationStates Region](https://nationstates.net) +102. [Newgrounds](https://newgrounds.com) +103. [OK](https://ok.ru/) +104. [OpenCollective](https://opencollective.com/) +105. [Packagist](https://packagist.org/) +106. [Pastebin](https://pastebin.com/) +107. [Patreon](https://www.patreon.com/) +108. [PayPal](https://www.paypal.me/) +109. [Pexels](https://www.pexels.com/) +110. [Photobucket](https://photobucket.com/) +111. [Pinterest](https://www.pinterest.com/) +112. [Pixabay](https://pixabay.com/) +113. [PlayStore](https://play.google.com/store) +114. [Plug.DJ](https://plug.dj/) +115. [Pokemon Showdown](https://pokemonshowdown.com) +116. [ProductHunt](https://www.producthunt.com/) +117. [Quora](https://www.quora.com/) +118. [Rajce.net](https://www.rajce.idnes.cz/) +119. [Rate Your Music](https://rateyourmusic.com/) +120. [Reddit](https://www.reddit.com/) +121. [Repl.it](https://repl.it/) +122. [ResearchGate](https://www.researchgate.net/) +123. [ReverbNation](https://www.reverbnation.com/) +124. [Roblox](https://www.roblox.com/) +125. [Scratch](https://scratch.mit.edu/) +126. [Scribd](https://www.scribd.com/) +127. [Signal](https://community.signalusers.org) +128. [Slack](https://slack.com) +129. [SlideShare](https://slideshare.net/) +130. [Smashcast](https://www.smashcast.tv/) +131. [SoundCloud](https://soundcloud.com/) +132. [SourceForge](https://sourceforge.net/) +133. [Speedrun.com](https://speedrun.com/) +134. [Splits.io](https://splits.io) +135. [Spotify](https://open.spotify.com/) +136. [Star Citizen](https://robertsspaceindustries.com/) +137. [Steam](https://steamcommunity.com/) +138. [SteamGroup](https://steamcommunity.com/) +139. [Taringa](https://taringa.net/) +140. [Telegram](https://t.me/) +141. [Tellonym.me](https://tellonym.me/) +142. [TikTok](https://www.tiktok.com/) +143. [Tinder](https://tinder.com/) +144. [TradingView](https://www.tradingview.com/) +145. [Trakt](https://www.trakt.tv/) +146. [Trello](https://trello.com/) +147. [Trip](https://www.trip.skyscanner.com/) +148. [TripAdvisor](https://tripadvisor.com/) +149. [Twitch](https://www.twitch.tv/) +150. [Twitter](https://www.twitter.com/) +151. [Unsplash](https://unsplash.com/) +152. [VK](https://vk.com/) +153. [VSCO](https://vsco.co/) +154. [Venmo](https://venmo.com/) +155. [Vimeo](https://vimeo.com/) +156. [Virgool](https://virgool.io/) +157. [VirusTotal](https://www.virustotal.com/) +158. [Wattpad](https://www.wattpad.com/) +159. [We Heart It](https://weheartit.com/) +160. [WebNode](https://www.webnode.cz/) +161. [Wikipedia](https://www.wikipedia.org/) +162. [Wix](https://wix.com/) +163. [WordPress](https://wordpress.com) +164. [WordPressOrg](https://wordpress.org/) +165. [YouNow](https://www.younow.com/) +166. [YouPic](https://youpic.com/) +167. [YouTube](https://www.youtube.com/) +168. [Zhihu](https://www.zhihu.com/) +169. [authorSTREAM](http://www.authorstream.com/) +170. [boingboing.net](https://boingboing.net/) +171. [devRant](https://devrant.com/) +172. [gfycat](https://gfycat.com/) +173. [iMGSRC.RU](https://imgsrc.ru/) +174. [last.fm](https://last.fm/) +175. [mixer.com](https://mixer.com/) +176. [osu!](https://osu.ppy.sh/) +177. [segmentfault](https://segmentfault.com/) + +Alexa.com rank data fetched at (2019-08-25 00:32:19.603917 UTC) diff --git a/tests/all.py b/tests/all.py index 1fdfec6b..c5b99c6d 100644 --- a/tests/all.py +++ b/tests/all.py @@ -7,26 +7,30 @@ import unittest class SherlockDetectTests(SherlockBaseTest): - def test_detect_true(self): - """Test Username Existence Detection. + def test_detect_true_via_message(self): + """Test Username Does Exist (Via Message). - This test ensures that the mechanism of ensuring that a Username - exists works properly. + This test ensures that the "message" detection mechanism of + ensuring that a Username does exist works properly. Keyword Arguments: self -- This object. Return Value: N/A. - Will trigger an assert if Usernames which are known to exist are - not detected. + Will trigger an assert if detection mechanism did not work as expected. """ - self.username_check(['jack'], ['Twitter'], exist_check=True) - self.username_check(['dfox'], ['devRant'], exist_check=True) - self.username_check(['blue'], ['Pinterest'], exist_check=True) - self.username_check(['kevin'], ['Instagram'], exist_check=True) - self.username_check(['zuck'], ['Facebook'], exist_check=True) + site = 'Instagram' + site_data = self.site_data_all[site] + + #Ensure that the site's detection method has not changed. + self.assertEqual("message", site_data["errorType"]) + + self.username_check([site_data["username_claimed"]], + [site], + exist_check=True + ) return @@ -44,13 +48,46 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - self.username_check(['jackkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk'], - ['Instagram'], + site = 'Instagram' + site_data = self.site_data_all[site] + + #Ensure that the site's detection method has not changed. + self.assertEqual("message", site_data["errorType"]) + + self.username_check([site_data["username_unclaimed"]], + [site], exist_check=False ) return + def test_detect_true_via_status_code(self): + """Test Username Does Exist (Via Status Code). + + This test ensures that the "status code" detection mechanism of + ensuring that a Username does exist works properly. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if detection mechanism did not work as expected. + """ + + site = 'Facebook' + site_data = self.site_data_all[site] + + #Ensure that the site's detection method has not changed. + self.assertEqual("status_code", site_data["errorType"]) + + self.username_check([site_data["username_claimed"]], + [site], + exist_check=True + ) + + return + def test_detect_false_via_status_code(self): """Test Username Does Not Exist (Via Status Code). @@ -65,13 +102,46 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - self.username_check(['jackkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk'], - ['Facebook'], + site = 'Facebook' + site_data = self.site_data_all[site] + + #Ensure that the site's detection method has not changed. + self.assertEqual("status_code", site_data["errorType"]) + + self.username_check([site_data["username_unclaimed"]], + [site], exist_check=False ) return + def test_detect_true_via_response_url(self): + """Test Username Does Exist (Via Response URL). + + This test ensures that the "response URL" detection mechanism of + ensuring that a Username does exist works properly. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if detection mechanism did not work as expected. + """ + + site = 'Quora' + site_data = self.site_data_all[site] + + #Ensure that the site's detection method has not changed. + self.assertEqual("response_url", site_data["errorType"]) + + self.username_check([site_data["username_claimed"]], + [site], + exist_check=True + ) + + return + def test_detect_false_via_response_url(self): """Test Username Does Not Exist (Via Response URL). @@ -86,8 +156,14 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - self.username_check(['jackkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk'], - ['Pinterest'], + site = 'Quora' + site_data = self.site_data_all[site] + + #Ensure that the site's detection method has not changed. + self.assertEqual("response_url", site_data["errorType"]) + + self.username_check([site_data["username_unclaimed"]], + [site], exist_check=False ) @@ -202,3 +278,20 @@ class SherlockSiteCoverageTests(SherlockBaseTest): self.detect_type_check("message", exist_check=True) return + + def test_coverage_total(self): + """Test Site Coverage Is Total. + + This test checks that all sites have test data available. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Will trigger an assert if we do not have total coverage. + """ + + self.coverage_total_check() + + return diff --git a/tests/base.py b/tests/base.py index e3b18970..ff4c5416 100644 --- a/tests/base.py +++ b/tests/base.py @@ -166,3 +166,29 @@ class SherlockBaseTest(unittest.TestCase): ) return + + def coverage_total_check(self): + """Total Coverage Check. + + Keyword Arguments: + self -- This object. + + Return Value: + N/A. + Counts up all Sites with full test data available. + Will trigger an assert if any Site does not have test coverage. + """ + + site_no_tests_list = [] + + for site, site_data in self.site_data_all.items(): + if ( + (site_data.get("username_claimed") is None) or + (site_data.get("username_unclaimed") is None) + ): + # Test information not available on this site. + site_no_tests_list.append(site) + + self.assertEqual("", ", ".join(site_no_tests_list)) + + return