diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76001ae9..31fade06 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,11 +1,15 @@ ### REPOSITORY /.github/CODEOWNERS @sdushantha +./github/FUNDING.yml @sdushantha /LICENSE @sdushantha ### PACKAGING # Changes made to these items without code owner approval may negatively -# impact packaging pipelines. Code owners may need time to verify or adapt. +# impact packaging pipelines. /pyproject.toml @ppfeister @sdushantha -/setup.cfg @ppfeister @sdushantha -/setup.py @ppfeister -/*.spec @ppfeister + +### REGRESSION +/.github/workflows/regression.yml @ppfeister +/tox.ini @ppfeister +/pytest.ini @ppfeister +/tests/ @ppfeister diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..b15d1a23 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [ sdushantha, ppfeister, matheusfelipeog ] diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index abd49f43..00000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Tests - -on: - push: - branches: [ master ] - -jobs: - tests: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: - - '3.12' - - '3.11' - - '3.10' - - '3.9' - - '3.8' - - '3.7' - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - pip install ruff flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with ruff - run: | - # stop the build if there are Python syntax errors or undefined names - ruff . --output-format=github --select=E9,F63,F7,F82 - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Sherlock Site Detect Tests - run: | - cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml deleted file mode 100644 index ed55a168..00000000 --- a/.github/workflows/nightly.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Nightly - -on: - schedule: - # Run Nightly Tests At 3AM (The Hour Of The Wolf) Every Day - - cron: '0 3 * * *' - -jobs: - tests: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.x] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Sherlock Site Coverage Tests - run: | - cd sherlock && python -m unittest tests.all.SherlockSiteCoverageTests --verbose diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml deleted file mode 100644 index a70ad44d..00000000 --- a/.github/workflows/pull_request.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Pull Request Action - -on: - pull_request: - branches: [ master ] - -jobs: - getchange: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.changes.outputs.matrix }} - steps: - - id: changes - run: | - URL="https://api.github.com/repos/sherlock-project/sherlock/pulls/${{ github.event.pull_request.number }}/files" - FILES=$(curl -s -X GET -G $URL | jq -r '.[] | .filename') - if echo $FILES | grep -q ".json"; then - echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.x\"}]}" - else - echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.7\"},{\"python\":\"3.8\"}]},{\"python\":\"3.9\"},{\"python\":\"3.10\"}]},{\"python\":\"3.11\"},{\"python\":\"3.12\"}]}" - fi - tests: - needs: [getchange] - runs-on: ubuntu-latest - strategy: - matrix: ${{ fromJson(needs.getchange.outputs.matrix) }} - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - pip install ruff flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint With Ruff - run: | - # stop the build if there are Python syntax errors or undefined names - ruff check . --output-format=github --select=E9,F63,F7,F82 - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Sherlock Site Detect Tests - run: | - cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml new file mode 100644 index 00000000..af12916c --- /dev/null +++ b/.github/workflows/regression.yml @@ -0,0 +1,40 @@ +name: Regression Testing + +on: + pull_request: + branches: [ master ] + push: + branches: [ master ] + +jobs: + tox-matrix: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false # We want to know what version it fails on + matrix: + os: [ + ubuntu-latest, + windows-latest, + macos-latest, + ] + python-version: [ + '3.8', + '3.9', + '3.10', + '3.11', + '3.12', + ] + + steps: + - uses: actions/checkout@v4 + - name: Set up environment ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox and related dependencies + run: | + python -m pip install --upgrade pip + pip install tox + pip install tox-gh-actions + - name: Run tox + run: tox diff --git a/.gitignore b/.gitignore index 4d47421a..c53e34ad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ -# Virtual Environment +# Virtual Environments venv/ bin/ lib/ pyvenv.cfg +poetry.lock # Editor Configurations .vscode/ @@ -14,8 +15,9 @@ __pycache__/ # Pip src/ -# Pip / PyProject Devel & Installation +# Devel, Build, and Installation *.egg-info/ +dist/** # Jupyter Notebook .ipynb_checkpoints diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 996efe8a..1056ec8c 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -68,7 +68,7 @@ Sherlock. This invocation hides the progress text that Sherlock normally outputs, and instead shows the verbose output of the tests. ```console -$ cd sherlock/sherlock +# Assumes current working directory is respository root $ python3 -m unittest tests.all --verbose ``` diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 9813a09c..d00be915 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -37,13 +37,24 @@ Python pipx install sherlock-project ``` -### Build python package from source (useful for contributors) +### Build live package from source (useful for contributors) + +Building an editable (or live) package links the entry point to your current directory, rather than to the standard install location. This is often useful when working with the code base, as changes are reflected immediately without reinstallation. + +Note that the version number will be 0.0.0 for pipx local builds unless manually changed in the pyproject file (it will prompt the user for an update). ```bash -# pipx is recommended, but pip may suffice if pipx is unavailable -git clone https://github.com/sherlock-project/sherlock.git -cd sherlock -pipx install . +# Assumes repository cloned, and current working directory is repository root +pipx install -e . +``` + +### Run package from source (without installing) + +If you'd rather not install directly to your system, you can import the module at runtime with `-m`. + +```bash +# Assumes repository cloned, and current working directory is repository root +python3 -m sherlock user123 user789 ```

diff --git a/docs/README.md b/docs/README.md index 9df30261..9b6237a8 100644 --- a/docs/README.md +++ b/docs/README.md @@ -27,11 +27,11 @@ [![PyPI - Version](https://img.shields.io/pypi/v/sherlock-project?logo=PyPi&label=PyPI&color=darkgreen)][ext_pypi] [![Docker Image Version](https://img.shields.io/docker/v/sherlock/sherlock?sort=semver&logo=docker&label=Docker&color=darkgreen)][docs_docker] [![homebrew version](https://img.shields.io/homebrew/v/sherlock?logo=Homebrew&color=darkgreen)][ext_brew] -| Method | Command | Notes | +| | Command | Notes | | - | - | - | -| pypi | `pipx install sherlock-project` | `pip` may be used in place of `pipx` | -| brew | `brew install sherlock` | Community supported | -| docker | `docker pull sherlock/sherlock` | | +| PyPI | `pipx install sherlock-project` | `pip` may be used in place of `pipx` | +| Homebrew | `brew install sherlock` | Community supported | +| Docker | `docker pull sherlock/sherlock` | | ### Alternative guides and methods @@ -133,15 +133,10 @@ Original Creator - [Siddharth Dushantha](https://github.com/sdushantha) [docs_install]: /docs/INSTALL.md [docs_docker]: /docs/INSTALL.md#docker -[docs_docker_dockerhub]: /docs/INSTALL.md#docker -[docs_docker_compose]: /docs/INSTALL.md#using-compose -[docs_docker_source]: /docs/INSTALL.md#build-image-from-source-useful-for-contributors [docs_py]: /docs/INSTALL.md#python -[docs_py_build]: /docs/INSTALL.md#build-python-package-from-source-useful-for-contributors [docs_contrib]: /docs/CONTRIBUTING.md [docs_contrib_adding_targets]: /docs/CONTRIBUTING.md#adding-targets [docs_contrib_removing_targets]: /docs/CONTRIBUTING.md#removing-targets [docs_contrib_restoring_targets]: /docs/CONTRIBUTING.md#restoring-targets [ext_pypi]: https://pypi.org/project/sherlock-project/ [ext_brew]: https://formulae.brew.sh/formula/sherlock - diff --git a/docs/pyproj/README.md b/docs/pyproj/README.md index 446382f5..f33358a5 100644 --- a/docs/pyproj/README.md +++ b/docs/pyproj/README.md @@ -2,17 +2,12 @@


- +
- Hunt down social media accounts by username across social networks + Hunt down social media accounts by username across 400+ social networks +

+ Additional documentation can be found at our GitHub repository
- Additional documentation can be found on our GitHub repository -
-

- -

- -

## Usage @@ -36,11 +31,12 @@ To search for more than one user: ```bash $ sherlock user1 user2 user3 ``` +
-## Star History +___ - - - - Sherlock Project Star History Chart - +
+

+ + +

diff --git a/pyproject.toml b/pyproject.toml index 7356c0d1..fafa9f85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,48 +1,59 @@ [build-system] -requires = ["setuptools >= 61.0"] -build-backend = "setuptools.build_meta" +requires = [ "poetry-core>=1.2.0" ] +build-backend = "poetry.core.masonry.api" +# poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL -[project.scripts] -sherlock = "sherlock:main" +[tool.poetry-version-plugin] +source = "init" -[project.urls] -Homepage = "http://sherlock-project.github.io/" -Repository = "https://github.com/sherlock-project/sherlock.git" -Issues = "https://github.com/sherlock-project/sherlock/issues" - -[project] +[tool.poetry] name = "sherlock-project" +# single source of truth for version is __init__.py +version = "0" +description = "Hunt down social media accounts by username across social networks" +license = "MIT" authors = [ - { name = "Siddharth Dushantha" } + "Siddharth Dushantha " ] maintainers = [ - { name = "Matheus Felipe" }, - { name = "Sondre Karlsen Dyrnes" }, - { name = "Paul Pfeister" } + "Paul Pfeister ", + "Matheus Felipe ", + "Sondre Karlsen Dyrnes " ] -description = "Hunt down social media accounts by username across social networks" readme = "docs/pyproj/README.md" -# Do not set license to file. Causes issues with rpm packaging for some reason. -license = {text = "MIT"} -dynamic = ["dependencies", "version"] +packages = [ { include = "sherlock"} ] keywords = [ "osint", "reconnaissance", "information gathering" ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", - "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Security" ] +homepage = "https://sherlock-project.github.io/" +repository = "https://github.com/sherlock-project/sherlock" + + +[tool.poetry.urls] +"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues" -[tool.setuptools.dynamic] -dependencies = { file = [ "requirements.txt" ] } -version = { attr = "sherlock.__version__" } +[tool.poetry.dependencies] +python = "^3.8" +certifi = ">=2019.6.16" +colorama = "^0.4.1" +PySocks = "^1.7.0" +requests = "^2.22.0" +requests-futures = "^1.0.0" +stem = "^1.8.0" +torrequest = "^0.1.0" +# pandas can likely be bumped up to ^2.0.0 after fc39 EOL +pandas = ">=1.0.0,<3.0.0" +openpyxl = "^3.0.10" -[tool.setuptools] -package-dir = {"" = "sherlock"} +[tool.poetry.group.dev.dependencies] +jsonschema = "^4.0.0" -[tool.setuptools.package-data] -"*" = ["*.json"] +[tool.poetry.scripts] +sherlock = 'sherlock.sherlock:main' diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..bc1df7de --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +addopts = --strict-markers +markers = + online: mark tests are requiring internet access. diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index b1dd6252..00000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -certifi>=2019.6.16 -colorama>=0.4.1 -PySocks>=1.7.0 -requests>=2.22.0 -requests-futures>=1.0.0 -stem>=1.8.0 -torrequest>=0.1.0 -pandas>=1.0.0 -openpyxl>=3.0.10 -exrex>=0.11.0 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 8929bbae..00000000 --- a/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[metadata] -name = Sherlock -author = Sherlock Project -url = http://sherlock-project.github.io/ diff --git a/setup.py b/setup.py deleted file mode 100644 index 434d3f95..00000000 --- a/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python3 - -"""Sometimes required for rpm spec - particularly when using pyp2rpm -""" - -from setuptools import setup - -setup() diff --git a/sherlock-project.spec b/sherlock-project.spec deleted file mode 100644 index b07dded1..00000000 --- a/sherlock-project.spec +++ /dev/null @@ -1,71 +0,0 @@ -# Packager: Paul Pfeister (GitHub @ppfeister) -%global source_ref master - -Name: sherlock-project -Version: 0.14.4 -Release: %autorelease -Summary: Hunt down social media accounts by username across social networks - -License: MIT -URL: https://github.com/sherlock-project/sherlock -Source: %{url}/archive/%{source_ref}.tar.gz -# Switch to new Source URL after adoption of tagged releases - -BuildArch: noarch -BuildRequires: python3-devel -BuildRequires: help2man - -%global _description %{expand: -Hunt down social media accounts by username across 400+ social networks and -websites. New targets are tested and implemented regularly. -} - -%description %{_description} - - -%prep -%autosetup -n sherlock-%{source_ref} - - -%generate_buildrequires -%pyproject_buildrequires - - -%build -%pyproject_wheel - - -%install -%pyproject_install -%pyproject_save_files -l sherlock sites result notify __init__ __main__ - -sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/__main__.py' -sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/sherlock.py' - -install -d '%{buildroot}%{_mandir}/man1' -PYTHONPATH='%{buildroot}%{python3_sitelib}' help2man \ - --no-info \ - --version-string='%{version}' \ - --name='%{summary}' \ - --output='%{buildroot}%{_mandir}/man1/sherlock.1' \ - '%{buildroot}%{_bindir}/sherlock' - - -%check -# Tests fail when pwd isn't sherlock. Relative pathing need fix upstream. -cd sherlock -%{py3_test_envvars} %{python3} -m unittest tests.all --verbose -cd .. - - -%files -f %{pyproject_files} -%doc README.md -%{_bindir}/sherlock -%{python3_sitelib}/resources -%pycached %{python3_sitelib}/tests/*.py -%{_mandir}/man1/sherlock.1* - - -%changelog -* Tue May 14 2024 Paul Pfeister 0.14.4-1 -- Initial package. diff --git a/sherlock/__init__.py b/sherlock/__init__.py index 219dcaec..5bf79e24 100644 --- a/sherlock/__init__.py +++ b/sherlock/__init__.py @@ -4,3 +4,7 @@ This module contains the main logic to search for usernames at social networks. """ + +__shortname__ = "Sherlock" +__longname__ = "Sherlock: Find Usernames Across Social Networks" +__version__ = "0.14.4" diff --git a/sherlock/__main__.py b/sherlock/__main__.py index b10bd6ae..014d8b35 100644 --- a/sherlock/__main__.py +++ b/sherlock/__main__.py @@ -14,9 +14,9 @@ if __name__ == "__main__": # Check if the user is using the correct version of Python python_version = sys.version.split()[0] - if sys.version_info < (3, 6): - print(f"Sherlock requires Python 3.6+\nYou are using Python {python_version}, which is not supported by Sherlock.") + if sys.version_info < (3, 8): + print(f"Sherlock requires Python 3.8+\nYou are using Python {python_version}, which is not supported by Sherlock.") sys.exit(1) - import sherlock + from sherlock import sherlock sherlock.main() diff --git a/sherlock/notify.py b/sherlock/notify.py index 4af1ff18..c198fe62 100644 --- a/sherlock/notify.py +++ b/sherlock/notify.py @@ -3,7 +3,7 @@ This module defines the objects for notifying the caller about the results of queries. """ -from result import QueryStatus +from sherlock.result import QueryStatus from colorama import Fore, Style import webbrowser diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 6f81e981..4b7a2798 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -19,17 +19,27 @@ from time import monotonic import requests -from requests_futures.sessions import FuturesSession -from torrequest import TorRequest -from result import QueryStatus -from result import QueryResult -from notify import QueryNotifyPrint -from sites import SitesInformation -from colorama import init -from argparse import ArgumentTypeError - -module_name = "Sherlock: Find Usernames Across Social Networks" +# Removing __version__ here will trigger update message for users +# Do not remove until ready to trigger that message +# When removed, also remove all the noqa: E402 comments for linting __version__ = "0.14.4" +del __version__ + +from .__init__ import ( # noqa: E402 + __shortname__, + __longname__, + __version__ +) + +from requests_futures.sessions import FuturesSession # noqa: E402 +from torrequest import TorRequest # noqa: E402 +from sherlock.result import QueryStatus # noqa: E402 +from sherlock.result import QueryResult # noqa: E402 +from sherlock.notify import QueryNotify # noqa: E402 +from sherlock.notify import QueryNotifyPrint # noqa: E402 +from sherlock.sites import SitesInformation # noqa: E402 +from colorama import init # noqa: E402 +from argparse import ArgumentTypeError # noqa: E402 class SherlockFuturesSession(FuturesSession): @@ -157,9 +167,9 @@ def multiple_usernames(username): def sherlock( username, site_data, - query_notify, - tor=False, - unique_tor=False, + query_notify: QueryNotify, + tor: bool = False, + unique_tor: bool = False, proxy=None, timeout=60, ): @@ -510,7 +520,7 @@ def handler(signal_received, frame): def main(): parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, - description=f"{module_name} (Version {__version__})", + description=f"{__longname__} (Version {__version__})", ) parser.add_argument( "--version", @@ -664,10 +674,10 @@ def main(): # Check for newer version of Sherlock. If it exists, let the user know about it try: r = requests.get( - "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/sherlock.py" + "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/__init__.py" ) - remote_version = str(re.findall('__version__ = "(.*)"', r.text)[0]) + remote_version = str(re.findall('__version__ *= *"(.*)"', r.text)[0]) local_version = __version__ if remote_version != local_version: diff --git a/sherlock/sites.py b/sherlock/sites.py index 2b360afa..112b6d02 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -174,7 +174,7 @@ class SitesInformation: raise ValueError( f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." ) - except TypeError as error: + except TypeError: print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n") return diff --git a/sherlock/tests/__init__.py b/sherlock/tests/__init__.py deleted file mode 100644 index 944e27ce..00000000 --- a/sherlock/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Sherlock Tests - -This package contains various submodules used to run tests. -""" diff --git a/sherlock/tests/all.py b/sherlock/tests/all.py deleted file mode 100644 index 926946fd..00000000 --- a/sherlock/tests/all.py +++ /dev/null @@ -1,213 +0,0 @@ -"""Sherlock Tests - -This module contains various tests. -""" -from tests.base import SherlockBaseTest -import exrex - - -class SherlockDetectTests(SherlockBaseTest): - def test_detect_true_via_message(self): - """Test Username Does Exist (Via Message). - - This test ensures that the "message" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "AllMyLinks" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("message", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], [site], exist_check=True) - - return - - def test_detect_false_via_message(self): - """Test Username Does Not Exist (Via Message). - - This test ensures that the "message" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "AllMyLinks" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("message", site_data["errorType"]) - - # Generate a valid username based on the regex for a username that the - # site supports that is *most likely* not taken. The regex is slightly - # modified version of site_data["regexCheck"] as we want a username - # that has the maximum length that is supported by the site. This way, - # we wont generate a random username that might actually exist. This - # method is very hacky, but it does the job as having hardcoded - # usernames that dont exists will lead to people with ill intent to - # create an account with that username which will break the tests - valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$") - self.username_check([valid_username], [site], exist_check=False) - - return - - def test_detect_true_via_status_code(self): - """Test Username Does Exist (Via Status Code). - - This test ensures that the "status code" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "BitBucket" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("status_code", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], [site], exist_check=True) - - return - - def test_detect_false_via_status_code(self): - """Test Username Does Not Exist (Via Status Code). - - This test ensures that the "status code" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "BitBucket" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("status_code", site_data["errorType"]) - - # Generate a valid username based on the regex for a username that the - # site supports that is *most likely* not taken. The regex is slightly - # modified version of site_data["regexCheck"] as we want a username - # that has the maximum length that is supported by the site. This way, - # we wont generate a random username that might actually exist. This - # method is very hacky, but it does the job as having hardcoded - # usernames that dont exists will lead to people with ill intent to - # create an account with that username which will break the tests - valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}") - self.username_check([valid_username], [site], exist_check=False) - - return - - -class SherlockSiteCoverageTests(SherlockBaseTest): - def test_coverage_false_via_status(self): - """Test Username Does Not Exist Site Coverage (Via HTTP Status). - - This test checks all sites with the "HTTP Status" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("status_code", exist_check=False) - - return - - def test_coverage_true_via_status(self): - """Test Username Does Exist Site Coverage (Via HTTP Status). - - This test checks all sites with the "HTTP Status" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("status_code", exist_check=True) - - return - - def test_coverage_false_via_message(self): - """Test Username Does Not Exist Site Coverage (Via Error Message). - - This test checks all sites with the "Error Message" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("message", exist_check=False) - - return - - def test_coverage_true_via_message(self): - """Test Username Does Exist Site Coverage (Via Error Message). - - This test checks all sites with the "Error Message" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("message", exist_check=True) - - return - - def test_coverage_total(self): - """Test Site Coverage Is Total. - - This test checks that all sites have test data available. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if we do not have total coverage. - """ - - self.coverage_total_check() - - return diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py deleted file mode 100644 index de958b9d..00000000 --- a/sherlock/tests/base.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Sherlock Base Tests - -This module contains various utilities for running tests. -""" -import os -import os.path -import unittest -import sherlock -from result import QueryStatus -from notify import QueryNotify -from sites import SitesInformation -import warnings - - -class SherlockBaseTest(unittest.TestCase): - def setUp(self): - """Sherlock Base Test Setup. - - Does common setup tasks for base Sherlock tests. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - """ - - # This ignores the ResourceWarning from an unclosed SSLSocket. - # TODO: Figure out how to fix the code so this is not needed. - warnings.simplefilter("ignore", ResourceWarning) - - # Create object with all information about sites we are aware of. - sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../resources/data.json")) - - # Create original dictionary from SitesInformation() object. - # Eventually, the rest of the code will be updated to use the new object - # directly, but this will glue the two pieces together. - site_data_all = {} - for site in sites: - site_data_all[site.name] = site.information - self.site_data_all = site_data_all - - # Load excluded sites list, if any - excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites") - try: - with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file: - self.excluded_sites = excluded_sites_file.read().splitlines() - except FileNotFoundError: - self.excluded_sites = [] - - # Create notify object for query results. - self.query_notify = QueryNotify() - - self.tor = False - self.unique_tor = False - self.timeout = None - self.skip_error_sites = True - - return - - def site_data_filter(self, site_list): - """Filter Site Data. - - Keyword Arguments: - self -- This object. - site_list -- List of strings corresponding to sites which - should be filtered. - - Return Value: - Dictionary containing sub-set of site data specified by "site_list". - """ - - # Create new dictionary that has filtered site data based on input. - # Note that any site specified which is not understood will generate - # an error. - site_data = {} - for site in site_list: - with self.subTest(f"Checking test vector Site '{site}' " - f"exists in total site data." - ): - site_data[site] = self.site_data_all[site] - - return site_data - - def username_check(self, username_list, site_list, exist_check=True): - """Username Exist Check. - - Keyword Arguments: - self -- This object. - username_list -- List of strings corresponding to usernames - which should exist on *all* of the sites. - site_list -- List of strings corresponding to sites which - should be filtered. - exist_check -- Boolean which indicates if this should be - a check for Username existence, - or non-existence. - - Return Value: - Nothing. - Will trigger an assert if Username does not have the expected - existence state. - """ - - # Filter all site data down to just what is needed for this test. - site_data = self.site_data_filter(site_list) - - if exist_check: - check_type_text = "claimed" - exist_result_desired = QueryStatus.CLAIMED - else: - check_type_text = "available" - exist_result_desired = QueryStatus.AVAILABLE - - for username in username_list: - results = sherlock.sherlock(username, - site_data, - self.query_notify, - tor=self.tor, - unique_tor=self.unique_tor, - timeout=self.timeout - ) - for site, result in results.items(): - with self.subTest(f"Checking Username '{username}' " - f"{check_type_text} on Site '{site}'" - ): - if ( - (self.skip_error_sites == True) and - (result["status"].status == QueryStatus.UNKNOWN) - ): - #Some error connecting to site. - self.skipTest(f"Skipping Username '{username}' " - f"{check_type_text} on Site '{site}': " - f"Site returned error status." - ) - - self.assertEqual(exist_result_desired, - result["status"].status) - - return - - def detect_type_check(self, detect_type, exist_check=True): - """Username Exist Check. - - Keyword Arguments: - self -- This object. - detect_type -- String corresponding to detection algorithm - which is desired to be tested. - Note that only sites which have documented - usernames which exist and do not exist - will be tested. - exist_check -- Boolean which indicates if this should be - a check for Username existence, - or non-existence. - - Return Value: - Nothing. - Runs tests on all sites using the indicated detection algorithm - and which also has test vectors specified. - Will trigger an assert if Username does not have the expected - existence state. - """ - - # Dictionary of sites that should be tested for having a username. - # This will allow us to test sites with a common username in parallel. - sites_by_username = {} - - for site, site_data in self.site_data_all.items(): - if ( - (site in self.excluded_sites) or - (site_data["errorType"] != detect_type) or - (site_data.get("username_claimed") is None) or - (site_data.get("username_unclaimed") is None) - ): - # This is either not a site we are interested in, or the - # site does not contain the required information to do - # the tests. - pass - else: - # We should run a test on this site. - - # Figure out which type of user - if exist_check: - username = site_data.get("username_claimed") - else: - username = site_data.get("username_unclaimed") - - # Add this site to the list of sites corresponding to this - # username. - if username in sites_by_username: - sites_by_username[username].append(site) - else: - sites_by_username[username] = [site] - - # Check on the username availability against all of the sites. - for username, site_list in sites_by_username.items(): - self.username_check([username], - site_list, - exist_check=exist_check - ) - - return - - def coverage_total_check(self): - """Total Coverage Check. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Counts up all Sites with full test data available. - Will trigger an assert if any Site does not have test coverage. - """ - - site_no_tests_list = [] - - for site, site_data in self.site_data_all.items(): - if site_data.get("username_claimed") is None: - # Test information not available on this site. - site_no_tests_list.append(site) - - self.assertEqual("", ", ".join(site_no_tests_list)) - - return diff --git a/sherlock/tests/test_multiple_usernames.py b/sherlock/tests/test_multiple_usernames.py deleted file mode 100644 index 41811087..00000000 --- a/sherlock/tests/test_multiple_usernames.py +++ /dev/null @@ -1,29 +0,0 @@ -import importlib -import unittest -import sys -sys.path.append('../') -import sherlock as sh - -checksymbols = [] -checksymbols = ["_", "-", "."] - -"""Test for multiple usernames. - - This test ensures that the function multiple_usernames works properly. More specific, - different scenarios are tested and only usernames that contain this specific sequence: {?} - should return positive. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - """ -class TestMultipleUsernames(unittest.TestCase): - def test_area(self): - test_usernames = ["test{?}test" , "test{?feo" , "test"] - for name in test_usernames: - if(sh.check_for_parameter(name)): - self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"]) - else: - self.assertAlmostEqual(name, name) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..a13388bb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,23 @@ +import os +import json +import urllib +import pytest +from sherlock.sites import SitesInformation + +@pytest.fixture() +def sites_obj(): + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) + yield sites_obj + +@pytest.fixture(scope="session") +def sites_info(): + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) + sites_iterable = {site.name: site.information for site in sites_obj} + yield sites_iterable + +@pytest.fixture(scope="session") +def remote_schema(): + schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.schema.json' + with urllib.request.urlopen(schema_url) as remoteschema: + schemadat = json.load(remoteschema) + yield schemadat diff --git a/tests/few_test_basic.py b/tests/few_test_basic.py new file mode 100644 index 00000000..7ec81ac1 --- /dev/null +++ b/tests/few_test_basic.py @@ -0,0 +1,7 @@ +import sherlock + +#from sherlock.sites import SitesInformation +#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") + +def test_username_via_message(): + sherlock.__main__("--version") diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py new file mode 100644 index 00000000..d0424af1 --- /dev/null +++ b/tests/sherlock_interactives.py @@ -0,0 +1,39 @@ +import os +import platform +import re +import subprocess + +class Interactives: + def run_cli(args:str = "") -> str: + """Pass arguments to Sherlock as a normal user on the command line""" + # Adapt for platform differences (Windows likes to be special) + if platform.system == "Windows": + command:str = f"py -m sherlock {args}" + else: + command:str = f"sherlock {args}" + + proc_out:str = "" + try: + proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + return proc_out.decode() + except subprocess.CalledProcessError as e: + raise InteractivesSubprocessError(e.output.decode()) + + + # -> list[str] is prefered, but will require deprecation of support for Python 3.8 + def walk_sherlock_for_files_with(pattern: str) -> list: + """Check all files within the Sherlock package for matching patterns""" + pattern:re.Pattern = re.compile(pattern) + matching_files:list[str] = [] + for root, dirs, files in os.walk("sherlock"): + for file in files: + file_path = os.path.join(root,file) + if "__pycache__" in file_path: + continue + with open(file_path, 'r', errors='ignore') as f: + if pattern.search(f.read()): + matching_files.append(file_path) + return matching_files + +class InteractivesSubprocessError(Exception): + pass diff --git a/tests/test_manifest.py b/tests/test_manifest.py new file mode 100644 index 00000000..177af21e --- /dev/null +++ b/tests/test_manifest.py @@ -0,0 +1,39 @@ +import os +import json +import pytest +from jsonschema import validate + +def test_validate_manifest_against_local_schema(): + """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" + json_relative: str = '../sherlock/resources/data.json' + schema_relative: str = '../sherlock/resources/data.schema.json' + + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) + schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) + + with open(json_path, 'r') as f: + jsondat = json.load(f) + with open(schema_path, 'r') as f: + schemadat = json.load(f) + + validate(instance=jsondat, schema=schemadat) + + +@pytest.mark.online +def test_validate_manifest_against_remote_schema(remote_schema): + """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients.""" + json_relative: str = '../sherlock/resources/data.json' + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) + + with open(json_path, 'r') as f: + jsondat = json.load(f) + + validate(instance=jsondat, schema=remote_schema) + +# Ensure that the expected values are beind returned by the site list +@pytest.mark.parametrize("target_name,target_expected_err_type", [ + ('GitHub', 'status_code'), + ('GitLab', 'message'), +]) +def test_site_list_iterability (sites_info, target_name, target_expected_err_type): + assert sites_info[target_name]['errorType'] == target_expected_err_type diff --git a/tests/test_probes.py b/tests/test_probes.py new file mode 100644 index 00000000..39e0ef0b --- /dev/null +++ b/tests/test_probes.py @@ -0,0 +1,105 @@ +import pytest +import random +import string +import re +from sherlock.sherlock import sherlock +from sherlock.notify import QueryNotify +from sherlock.result import QueryStatus +#from sherlock_interactives import Interactives + + +def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: + query_notify = QueryNotify() + site_data: dict = {} + site_data[site] = sites_info[site] + return sherlock( + username=username, + site_data=site_data, + query_notify=query_notify, + )[site]['status'].status + + +@pytest.mark.online +class TestLiveTargets: + """Actively test probes against live and trusted targets""" + # Known positives should only use sites trusted to be reliable and unchanging + @pytest.mark.parametrize('site,username',[ + ('GitLab', 'ppfeister'), + ('AllMyLinks', 'blue'), + ]) + def test_known_positives_via_message(self, sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + + + # Known positives should only use sites trusted to be reliable and unchanging + @pytest.mark.parametrize('site,username',[ + ('GitHub', 'ppfeister'), + ('GitHub', 'sherlock-project'), + ('Docker Hub', 'ppfeister'), + ('Docker Hub', 'sherlock'), + ]) + def test_known_positives_via_status_code(self, sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + + + # Known positives should only use sites trusted to be reliable and unchanging + @pytest.mark.parametrize('site,username',[ + ('BodyBuilding', 'blue'), + ('labpentestit', 'CSV'), + ]) + def test_known_positives_via_response_url(self, sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + + + # Randomly generate usernames of high length and test for positive availability + # Randomly generated usernames should be simple alnum for simplicity and high + # compatibility. Several attempts may be made ~just in case~ a real username is + # generated. + @pytest.mark.parametrize('site,random_len',[ + ('GitLab', 255), + ('Codecademy', 30) + ]) + def test_likely_negatives_via_message(self, sites_info, site, random_len): + num_attempts: int = 3 + attempted_usernames: list[str] = [] + status: QueryStatus = QueryStatus.CLAIMED + for i in range(num_attempts): + acceptable_types = string.ascii_letters + string.digits + random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + attempted_usernames.append(random_handle) + status = simple_query(sites_info=sites_info, site=site, username=random_handle) + if status is QueryStatus.AVAILABLE: + break + assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + + + # Randomly generate usernames of high length and test for positive availability + # Randomly generated usernames should be simple alnum for simplicity and high + # compatibility. Several attempts may be made ~just in case~ a real username is + # generated. + @pytest.mark.parametrize('site,random_len',[ + ('GitHub', 39), + ('Docker Hub', 30) + ]) + def test_likely_negatives_via_status_code(self, sites_info, site, random_len): + num_attempts: int = 3 + attempted_usernames: list[str] = [] + status: QueryStatus = QueryStatus.CLAIMED + for i in range(num_attempts): + acceptable_types = string.ascii_letters + string.digits + random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + attempted_usernames.append(random_handle) + status = simple_query(sites_info=sites_info, site=site, username=random_handle) + if status is QueryStatus.AVAILABLE: + break + assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + + +def test_username_illegal_regex(sites_info): + site: str = 'BitBucket' + invalid_handle: str = '*#$Y&*JRE' + pattern = re.compile(sites_info[site]['regexCheck']) + # Ensure that the username actually fails regex before testing sherlock + assert pattern.match(invalid_handle) is None + assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL + diff --git a/tests/test_ux.py b/tests/test_ux.py new file mode 100644 index 00000000..c14035f5 --- /dev/null +++ b/tests/test_ux.py @@ -0,0 +1,43 @@ +import pytest +from sherlock import sherlock +from sherlock_interactives import Interactives +from sherlock_interactives import InteractivesSubprocessError + +def test_remove_nsfw(sites_obj): + nsfw_target: str = 'Pornhub' + assert nsfw_target in {site.name: site.information for site in sites_obj} + sites_obj.remove_nsfw_sites() + assert nsfw_target not in {site.name: site.information for site in sites_obj} + + +# Parametrized sites should *not* include Motherless, which is acting as the control +@pytest.mark.parametrize('nsfwsites', [ + ['Pornhub'], + ['Pornhub', 'Xvideos'], +]) +def test_nsfw_explicit_selection(sites_obj, nsfwsites): + for site in nsfwsites: + assert site in {site.name: site.information for site in sites_obj} + sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites) + for site in nsfwsites: + assert site in {site.name: site.information for site in sites_obj} + assert 'Motherless' not in {site.name: site.information for site in sites_obj} + +def test_wildcard_username_expansion(): + assert sherlock.check_for_parameter('test{?}test') is True + assert sherlock.check_for_parameter('test{.}test') is False + assert sherlock.check_for_parameter('test{}test') is False + assert sherlock.check_for_parameter('testtest') is False + assert sherlock.check_for_parameter('test{?test') is False + assert sherlock.check_for_parameter('test?}test') is False + assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"] + + +@pytest.mark.parametrize('cliargs', [ + '', + '--site urghrtuight --egiotr', + '--', +]) +def test_no_usernames_provided(cliargs): + with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"): + Interactives.run_cli(cliargs) diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 00000000..68119c9e --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,17 @@ +import os +from sherlock_interactives import Interactives +import sherlock + +def test_versioning() -> None: + # Ensure __version__ matches version presented to the user + assert sherlock.__version__ in Interactives.run_cli("--version") + # Ensure __init__ is single source of truth for __version__ in package + # Temporarily allows sherlock.py so as to not trigger early upgrades + found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *') + expected:list = [ + # Normalization is REQUIRED for Windows ( / vs \ ) + os.path.normpath("sherlock/__init__.py"), + os.path.normpath("sherlock/sherlock.py"), + ] + # Sorting is REQUIRED for Mac + assert sorted(found) == sorted(expected) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..0c309410 --- /dev/null +++ b/tox.ini @@ -0,0 +1,44 @@ +[tox] +requires = + tox >= 4 +envlist = + lint + py313 + py312 + py311 + py310 + py39 + py38 + +[testenv] +description = Attempt to build and install the package +deps = + coverage + jsonschema + pytest +allowlist_externals = coverage +commands = + coverage run --source=sherlock --module pytest -v + coverage report --show-missing + +[testenv:offline] +deps = + jsonschema + pytest +commands = + pytest -v -m "not online" + +[testenv:lint] +description = Lint with Ruff +deps = + ruff +commands = + ruff check + +[gh-actions] +python = + 3.12: py312 + 3.11: py311 + 3.10: py310 + 3.9: py39 + 3.8: py38