Merge #2127 to implement Poetry, pytest, and tox

Closes #2111 as its feature branch was the base of this PR's branch
Closes #2133 as no longer necessary
Closes #2147 as no longer necessary
new-logo
Paul Pfeister 8 months ago
commit 3211b90b02
No known key found for this signature in database
GPG Key ID: 70D33A96CBD7A994

12
.github/CODEOWNERS vendored

@ -1,11 +1,15 @@
### REPOSITORY ### REPOSITORY
/.github/CODEOWNERS @sdushantha /.github/CODEOWNERS @sdushantha
./github/FUNDING.yml @sdushantha
/LICENSE @sdushantha /LICENSE @sdushantha
### PACKAGING ### PACKAGING
# Changes made to these items without code owner approval may negatively # Changes made to these items without code owner approval may negatively
# impact packaging pipelines. Code owners may need time to verify or adapt. # impact packaging pipelines.
/pyproject.toml @ppfeister @sdushantha /pyproject.toml @ppfeister @sdushantha
/setup.cfg @ppfeister @sdushantha
/setup.py @ppfeister ### REGRESSION
/*.spec @ppfeister /.github/workflows/regression.yml @ppfeister
/tox.ini @ppfeister
/pytest.ini @ppfeister
/tests/ @ppfeister

@ -0,0 +1 @@
github: [ sdushantha, ppfeister, matheusfelipeog ]

@ -1,38 +0,0 @@
name: Tests
on:
push:
branches: [ master ]
jobs:
tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- '3.12'
- '3.11'
- '3.10'
- '3.9'
- '3.8'
- '3.7'
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install ruff flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
ruff . --output-format=github --select=E9,F63,F7,F82
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Sherlock Site Detect Tests
run: |
cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose

@ -1,27 +0,0 @@
name: Nightly
on:
schedule:
# Run Nightly Tests At 3AM (The Hour Of The Wolf) Every Day
- cron: '0 3 * * *'
jobs:
tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.x]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Sherlock Site Coverage Tests
run: |
cd sherlock && python -m unittest tests.all.SherlockSiteCoverageTests --verbose

@ -1,47 +0,0 @@
name: Pull Request Action
on:
pull_request:
branches: [ master ]
jobs:
getchange:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.changes.outputs.matrix }}
steps:
- id: changes
run: |
URL="https://api.github.com/repos/sherlock-project/sherlock/pulls/${{ github.event.pull_request.number }}/files"
FILES=$(curl -s -X GET -G $URL | jq -r '.[] | .filename')
if echo $FILES | grep -q ".json"; then
echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.x\"}]}"
else
echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.7\"},{\"python\":\"3.8\"}]},{\"python\":\"3.9\"},{\"python\":\"3.10\"}]},{\"python\":\"3.11\"},{\"python\":\"3.12\"}]}"
fi
tests:
needs: [getchange]
runs-on: ubuntu-latest
strategy:
matrix: ${{ fromJson(needs.getchange.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install ruff flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint With Ruff
run: |
# stop the build if there are Python syntax errors or undefined names
ruff check . --output-format=github --select=E9,F63,F7,F82
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Sherlock Site Detect Tests
run: |
cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose

@ -0,0 +1,40 @@
name: Regression Testing
on:
pull_request:
branches: [ master ]
push:
branches: [ master ]
jobs:
tox-matrix:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false # We want to know what version it fails on
matrix:
os: [
ubuntu-latest,
windows-latest,
macos-latest,
]
python-version: [
'3.8',
'3.9',
'3.10',
'3.11',
'3.12',
]
steps:
- uses: actions/checkout@v4
- name: Set up environment ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install tox and related dependencies
run: |
python -m pip install --upgrade pip
pip install tox
pip install tox-gh-actions
- name: Run tox
run: tox

6
.gitignore vendored

@ -1,8 +1,9 @@
# Virtual Environment # Virtual Environments
venv/ venv/
bin/ bin/
lib/ lib/
pyvenv.cfg pyvenv.cfg
poetry.lock
# Editor Configurations # Editor Configurations
.vscode/ .vscode/
@ -14,8 +15,9 @@ __pycache__/
# Pip # Pip
src/ src/
# Pip / PyProject Devel & Installation # Devel, Build, and Installation
*.egg-info/ *.egg-info/
dist/**
# Jupyter Notebook # Jupyter Notebook
.ipynb_checkpoints .ipynb_checkpoints

@ -68,7 +68,7 @@ Sherlock. This invocation hides the progress text that Sherlock normally
outputs, and instead shows the verbose output of the tests. outputs, and instead shows the verbose output of the tests.
```console ```console
$ cd sherlock/sherlock # Assumes current working directory is respository root
$ python3 -m unittest tests.all --verbose $ python3 -m unittest tests.all --verbose
``` ```

@ -37,13 +37,24 @@ Python
pipx install sherlock-project pipx install sherlock-project
``` ```
### Build python package from source (useful for contributors) ### Build live package from source (useful for contributors)
Building an editable (or live) package links the entry point to your current directory, rather than to the standard install location. This is often useful when working with the code base, as changes are reflected immediately without reinstallation.
Note that the version number will be 0.0.0 for pipx local builds unless manually changed in the pyproject file (it will prompt the user for an update).
```bash ```bash
# pipx is recommended, but pip may suffice if pipx is unavailable # Assumes repository cloned, and current working directory is repository root
git clone https://github.com/sherlock-project/sherlock.git pipx install -e .
cd sherlock ```
pipx install .
### Run package from source (without installing)
If you'd rather not install directly to your system, you can import the module at runtime with `-m`.
```bash
# Assumes repository cloned, and current working directory is repository root
python3 -m sherlock user123 user789
``` ```
<h2> <h2>

@ -27,11 +27,11 @@
[![PyPI - Version](https://img.shields.io/pypi/v/sherlock-project?logo=PyPi&label=PyPI&color=darkgreen)][ext_pypi] [![Docker Image Version](https://img.shields.io/docker/v/sherlock/sherlock?sort=semver&logo=docker&label=Docker&color=darkgreen)][docs_docker] [![homebrew version](https://img.shields.io/homebrew/v/sherlock?logo=Homebrew&color=darkgreen)][ext_brew] [![PyPI - Version](https://img.shields.io/pypi/v/sherlock-project?logo=PyPi&label=PyPI&color=darkgreen)][ext_pypi] [![Docker Image Version](https://img.shields.io/docker/v/sherlock/sherlock?sort=semver&logo=docker&label=Docker&color=darkgreen)][docs_docker] [![homebrew version](https://img.shields.io/homebrew/v/sherlock?logo=Homebrew&color=darkgreen)][ext_brew]
| Method | Command | Notes | | | Command | Notes |
| - | - | - | | - | - | - |
| pypi | `pipx install sherlock-project` | `pip` may be used in place of `pipx` | | PyPI | `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
| brew | `brew install sherlock` | Community supported | | Homebrew | `brew install sherlock` | Community supported |
| docker | `docker pull sherlock/sherlock` | | | Docker | `docker pull sherlock/sherlock` | |
### Alternative guides and methods ### Alternative guides and methods
@ -133,15 +133,10 @@ Original Creator - [Siddharth Dushantha](https://github.com/sdushantha)
[docs_install]: /docs/INSTALL.md [docs_install]: /docs/INSTALL.md
[docs_docker]: /docs/INSTALL.md#docker [docs_docker]: /docs/INSTALL.md#docker
[docs_docker_dockerhub]: /docs/INSTALL.md#docker
[docs_docker_compose]: /docs/INSTALL.md#using-compose
[docs_docker_source]: /docs/INSTALL.md#build-image-from-source-useful-for-contributors
[docs_py]: /docs/INSTALL.md#python [docs_py]: /docs/INSTALL.md#python
[docs_py_build]: /docs/INSTALL.md#build-python-package-from-source-useful-for-contributors
[docs_contrib]: /docs/CONTRIBUTING.md [docs_contrib]: /docs/CONTRIBUTING.md
[docs_contrib_adding_targets]: /docs/CONTRIBUTING.md#adding-targets [docs_contrib_adding_targets]: /docs/CONTRIBUTING.md#adding-targets
[docs_contrib_removing_targets]: /docs/CONTRIBUTING.md#removing-targets [docs_contrib_removing_targets]: /docs/CONTRIBUTING.md#removing-targets
[docs_contrib_restoring_targets]: /docs/CONTRIBUTING.md#restoring-targets [docs_contrib_restoring_targets]: /docs/CONTRIBUTING.md#restoring-targets
[ext_pypi]: https://pypi.org/project/sherlock-project/ [ext_pypi]: https://pypi.org/project/sherlock-project/
[ext_brew]: https://formulae.brew.sh/formula/sherlock [ext_brew]: https://formulae.brew.sh/formula/sherlock

@ -2,17 +2,12 @@
<p align=center> <p align=center>
<br> <br>
<a href="https://sherlock-project.github.io/" target="_blank"><img src="https://user-images.githubusercontent.com/27065646/53551960-ae4dff80-3b3a-11e9-9075-cef786c69364.png"/></a> <a href="https://sherlock-project.github.io/" target="_blank"><img src="https://www.kali.org/tools/sherlock/images/sherlock-logo.svg" width="25%"/></a>
<br> <br>
<span>Hunt down social media accounts by username across <a href="https://github.com/sherlock-project/sherlock/blob/master/sites.md">social networks</a></span> <strong><span>Hunt down social media accounts by username across <a href="https://github.com/sherlock-project/sherlock/blob/master/sites.md">400+ social networks</a></span></strong>
<br><br>
<span>Additional documentation can be found at our <a href="https://github.com/sherlock-project/sherlock/">GitHub repository</a></span>
<br> <br>
<span>Additional documentation can be found on our <a href="https://github.com/sherlock-project/sherlock/">GitHub repository</a></span>
<br>
</p>
<p align="center">
<img width="70%" height="70%" src="https://user-images.githubusercontent.com/27065646/219638267-a5e11090-aa6e-4e77-87f7-0e95f6ad5978.png"/>
</a>
</p> </p>
## Usage ## Usage
@ -36,11 +31,12 @@ To search for more than one user:
```bash ```bash
$ sherlock user1 user2 user3 $ sherlock user1 user2 user3
``` ```
<br>
## Star History ___
<picture> <br>
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" /> <p align="center">
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" /> <img width="70%" height="70%" src="https://user-images.githubusercontent.com/27065646/219638267-a5e11090-aa6e-4e77-87f7-0e95f6ad5978.png"/>
<img alt="Sherlock Project Star History Chart" src="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" /> </a>
</picture> </p>

@ -1,48 +1,59 @@
[build-system] [build-system]
requires = ["setuptools >= 61.0"] requires = [ "poetry-core>=1.2.0" ]
build-backend = "setuptools.build_meta" build-backend = "poetry.core.masonry.api"
# poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL
[project.scripts] [tool.poetry-version-plugin]
sherlock = "sherlock:main" source = "init"
[project.urls] [tool.poetry]
Homepage = "http://sherlock-project.github.io/"
Repository = "https://github.com/sherlock-project/sherlock.git"
Issues = "https://github.com/sherlock-project/sherlock/issues"
[project]
name = "sherlock-project" name = "sherlock-project"
# single source of truth for version is __init__.py
version = "0"
description = "Hunt down social media accounts by username across social networks"
license = "MIT"
authors = [ authors = [
{ name = "Siddharth Dushantha" } "Siddharth Dushantha <siddharth.dushantha@gmail.com>"
] ]
maintainers = [ maintainers = [
{ name = "Matheus Felipe" }, "Paul Pfeister <code@pfeister.dev>",
{ name = "Sondre Karlsen Dyrnes" }, "Matheus Felipe <matheusfelipeog@protonmail.com>",
{ name = "Paul Pfeister" } "Sondre Karlsen Dyrnes <sondre@villdyr.no>"
] ]
description = "Hunt down social media accounts by username across social networks"
readme = "docs/pyproj/README.md" readme = "docs/pyproj/README.md"
# Do not set license to file. Causes issues with rpm packaging for some reason. packages = [ { include = "sherlock"} ]
license = {text = "MIT"}
dynamic = ["dependencies", "version"]
keywords = [ "osint", "reconnaissance", "information gathering" ] keywords = [ "osint", "reconnaissance", "information gathering" ]
classifiers = [ classifiers = [
"Development Status :: 5 - Production/Stable", "Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers", "Intended Audience :: Developers",
"Intended Audience :: Information Technology", "Intended Audience :: Information Technology",
"License :: OSI Approved :: MIT License",
"Natural Language :: English", "Natural Language :: English",
"Operating System :: OS Independent", "Operating System :: OS Independent",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Topic :: Security" "Topic :: Security"
] ]
homepage = "https://sherlock-project.github.io/"
repository = "https://github.com/sherlock-project/sherlock"
[tool.poetry.urls]
"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
[tool.setuptools.dynamic] [tool.poetry.dependencies]
dependencies = { file = [ "requirements.txt" ] } python = "^3.8"
version = { attr = "sherlock.__version__" } certifi = ">=2019.6.16"
colorama = "^0.4.1"
PySocks = "^1.7.0"
requests = "^2.22.0"
requests-futures = "^1.0.0"
stem = "^1.8.0"
torrequest = "^0.1.0"
# pandas can likely be bumped up to ^2.0.0 after fc39 EOL
pandas = ">=1.0.0,<3.0.0"
openpyxl = "^3.0.10"
[tool.setuptools] [tool.poetry.group.dev.dependencies]
package-dir = {"" = "sherlock"} jsonschema = "^4.0.0"
[tool.setuptools.package-data] [tool.poetry.scripts]
"*" = ["*.json"] sherlock = 'sherlock.sherlock:main'

@ -0,0 +1,4 @@
[pytest]
addopts = --strict-markers
markers =
online: mark tests are requiring internet access.

@ -1,10 +0,0 @@
certifi>=2019.6.16
colorama>=0.4.1
PySocks>=1.7.0
requests>=2.22.0
requests-futures>=1.0.0
stem>=1.8.0
torrequest>=0.1.0
pandas>=1.0.0
openpyxl>=3.0.10
exrex>=0.11.0

@ -1,4 +0,0 @@
[metadata]
name = Sherlock
author = Sherlock Project
url = http://sherlock-project.github.io/

@ -1,8 +0,0 @@
#!/usr/bin/env python3
"""Sometimes required for rpm spec - particularly when using pyp2rpm
"""
from setuptools import setup
setup()

@ -1,71 +0,0 @@
# Packager: Paul Pfeister <rh-bugzilla@pfeister.dev> (GitHub @ppfeister)
%global source_ref master
Name: sherlock-project
Version: 0.14.4
Release: %autorelease
Summary: Hunt down social media accounts by username across social networks
License: MIT
URL: https://github.com/sherlock-project/sherlock
Source: %{url}/archive/%{source_ref}.tar.gz
# Switch to new Source URL after adoption of tagged releases
BuildArch: noarch
BuildRequires: python3-devel
BuildRequires: help2man
%global _description %{expand:
Hunt down social media accounts by username across 400+ social networks and
websites. New targets are tested and implemented regularly.
}
%description %{_description}
%prep
%autosetup -n sherlock-%{source_ref}
%generate_buildrequires
%pyproject_buildrequires
%build
%pyproject_wheel
%install
%pyproject_install
%pyproject_save_files -l sherlock sites result notify __init__ __main__
sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/__main__.py'
sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/sherlock.py'
install -d '%{buildroot}%{_mandir}/man1'
PYTHONPATH='%{buildroot}%{python3_sitelib}' help2man \
--no-info \
--version-string='%{version}' \
--name='%{summary}' \
--output='%{buildroot}%{_mandir}/man1/sherlock.1' \
'%{buildroot}%{_bindir}/sherlock'
%check
# Tests fail when pwd isn't sherlock. Relative pathing need fix upstream.
cd sherlock
%{py3_test_envvars} %{python3} -m unittest tests.all --verbose
cd ..
%files -f %{pyproject_files}
%doc README.md
%{_bindir}/sherlock
%{python3_sitelib}/resources
%pycached %{python3_sitelib}/tests/*.py
%{_mandir}/man1/sherlock.1*
%changelog
* Tue May 14 2024 Paul Pfeister <rh-bugzilla@pfeister.dev> 0.14.4-1
- Initial package.

@ -4,3 +4,7 @@ This module contains the main logic to search for usernames at social
networks. networks.
""" """
__shortname__ = "Sherlock"
__longname__ = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.14.4"

@ -14,9 +14,9 @@ if __name__ == "__main__":
# Check if the user is using the correct version of Python # Check if the user is using the correct version of Python
python_version = sys.version.split()[0] python_version = sys.version.split()[0]
if sys.version_info < (3, 6): if sys.version_info < (3, 8):
print(f"Sherlock requires Python 3.6+\nYou are using Python {python_version}, which is not supported by Sherlock.") print(f"Sherlock requires Python 3.8+\nYou are using Python {python_version}, which is not supported by Sherlock.")
sys.exit(1) sys.exit(1)
import sherlock from sherlock import sherlock
sherlock.main() sherlock.main()

@ -3,7 +3,7 @@
This module defines the objects for notifying the caller about the This module defines the objects for notifying the caller about the
results of queries. results of queries.
""" """
from result import QueryStatus from sherlock.result import QueryStatus
from colorama import Fore, Style from colorama import Fore, Style
import webbrowser import webbrowser

@ -19,17 +19,27 @@ from time import monotonic
import requests import requests
from requests_futures.sessions import FuturesSession # Removing __version__ here will trigger update message for users
from torrequest import TorRequest # Do not remove until ready to trigger that message
from result import QueryStatus # When removed, also remove all the noqa: E402 comments for linting
from result import QueryResult
from notify import QueryNotifyPrint
from sites import SitesInformation
from colorama import init
from argparse import ArgumentTypeError
module_name = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.14.4" __version__ = "0.14.4"
del __version__
from .__init__ import ( # noqa: E402
__shortname__,
__longname__,
__version__
)
from requests_futures.sessions import FuturesSession # noqa: E402
from torrequest import TorRequest # noqa: E402
from sherlock.result import QueryStatus # noqa: E402
from sherlock.result import QueryResult # noqa: E402
from sherlock.notify import QueryNotify # noqa: E402
from sherlock.notify import QueryNotifyPrint # noqa: E402
from sherlock.sites import SitesInformation # noqa: E402
from colorama import init # noqa: E402
from argparse import ArgumentTypeError # noqa: E402
class SherlockFuturesSession(FuturesSession): class SherlockFuturesSession(FuturesSession):
@ -157,9 +167,9 @@ def multiple_usernames(username):
def sherlock( def sherlock(
username, username,
site_data, site_data,
query_notify, query_notify: QueryNotify,
tor=False, tor: bool = False,
unique_tor=False, unique_tor: bool = False,
proxy=None, proxy=None,
timeout=60, timeout=60,
): ):
@ -510,7 +520,7 @@ def handler(signal_received, frame):
def main(): def main():
parser = ArgumentParser( parser = ArgumentParser(
formatter_class=RawDescriptionHelpFormatter, formatter_class=RawDescriptionHelpFormatter,
description=f"{module_name} (Version {__version__})", description=f"{__longname__} (Version {__version__})",
) )
parser.add_argument( parser.add_argument(
"--version", "--version",
@ -664,10 +674,10 @@ def main():
# Check for newer version of Sherlock. If it exists, let the user know about it # Check for newer version of Sherlock. If it exists, let the user know about it
try: try:
r = requests.get( r = requests.get(
"https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/sherlock.py" "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/__init__.py"
) )
remote_version = str(re.findall('__version__ = "(.*)"', r.text)[0]) remote_version = str(re.findall('__version__ *= *"(.*)"', r.text)[0])
local_version = __version__ local_version = __version__
if remote_version != local_version: if remote_version != local_version:

@ -174,7 +174,7 @@ class SitesInformation:
raise ValueError( raise ValueError(
f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}."
) )
except TypeError as error: except TypeError:
print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n") print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n")
return return

@ -1,4 +0,0 @@
"""Sherlock Tests
This package contains various submodules used to run tests.
"""

@ -1,213 +0,0 @@
"""Sherlock Tests
This module contains various tests.
"""
from tests.base import SherlockBaseTest
import exrex
class SherlockDetectTests(SherlockBaseTest):
def test_detect_true_via_message(self):
"""Test Username Does Exist (Via Message).
This test ensures that the "message" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "AllMyLinks"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("message", site_data["errorType"])
self.username_check([site_data["username_claimed"]], [site], exist_check=True)
return
def test_detect_false_via_message(self):
"""Test Username Does Not Exist (Via Message).
This test ensures that the "message" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "AllMyLinks"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("message", site_data["errorType"])
# Generate a valid username based on the regex for a username that the
# site supports that is *most likely* not taken. The regex is slightly
# modified version of site_data["regexCheck"] as we want a username
# that has the maximum length that is supported by the site. This way,
# we wont generate a random username that might actually exist. This
# method is very hacky, but it does the job as having hardcoded
# usernames that dont exists will lead to people with ill intent to
# create an account with that username which will break the tests
valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$")
self.username_check([valid_username], [site], exist_check=False)
return
def test_detect_true_via_status_code(self):
"""Test Username Does Exist (Via Status Code).
This test ensures that the "status code" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "BitBucket"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("status_code", site_data["errorType"])
self.username_check([site_data["username_claimed"]], [site], exist_check=True)
return
def test_detect_false_via_status_code(self):
"""Test Username Does Not Exist (Via Status Code).
This test ensures that the "status code" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "BitBucket"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("status_code", site_data["errorType"])
# Generate a valid username based on the regex for a username that the
# site supports that is *most likely* not taken. The regex is slightly
# modified version of site_data["regexCheck"] as we want a username
# that has the maximum length that is supported by the site. This way,
# we wont generate a random username that might actually exist. This
# method is very hacky, but it does the job as having hardcoded
# usernames that dont exists will lead to people with ill intent to
# create an account with that username which will break the tests
valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}")
self.username_check([valid_username], [site], exist_check=False)
return
class SherlockSiteCoverageTests(SherlockBaseTest):
def test_coverage_false_via_status(self):
"""Test Username Does Not Exist Site Coverage (Via HTTP Status).
This test checks all sites with the "HTTP Status" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("status_code", exist_check=False)
return
def test_coverage_true_via_status(self):
"""Test Username Does Exist Site Coverage (Via HTTP Status).
This test checks all sites with the "HTTP Status" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("status_code", exist_check=True)
return
def test_coverage_false_via_message(self):
"""Test Username Does Not Exist Site Coverage (Via Error Message).
This test checks all sites with the "Error Message" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("message", exist_check=False)
return
def test_coverage_true_via_message(self):
"""Test Username Does Exist Site Coverage (Via Error Message).
This test checks all sites with the "Error Message" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("message", exist_check=True)
return
def test_coverage_total(self):
"""Test Site Coverage Is Total.
This test checks that all sites have test data available.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if we do not have total coverage.
"""
self.coverage_total_check()
return

@ -1,224 +0,0 @@
"""Sherlock Base Tests
This module contains various utilities for running tests.
"""
import os
import os.path
import unittest
import sherlock
from result import QueryStatus
from notify import QueryNotify
from sites import SitesInformation
import warnings
class SherlockBaseTest(unittest.TestCase):
def setUp(self):
"""Sherlock Base Test Setup.
Does common setup tasks for base Sherlock tests.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
"""
# This ignores the ResourceWarning from an unclosed SSLSocket.
# TODO: Figure out how to fix the code so this is not needed.
warnings.simplefilter("ignore", ResourceWarning)
# Create object with all information about sites we are aware of.
sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../resources/data.json"))
# Create original dictionary from SitesInformation() object.
# Eventually, the rest of the code will be updated to use the new object
# directly, but this will glue the two pieces together.
site_data_all = {}
for site in sites:
site_data_all[site.name] = site.information
self.site_data_all = site_data_all
# Load excluded sites list, if any
excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites")
try:
with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
self.excluded_sites = excluded_sites_file.read().splitlines()
except FileNotFoundError:
self.excluded_sites = []
# Create notify object for query results.
self.query_notify = QueryNotify()
self.tor = False
self.unique_tor = False
self.timeout = None
self.skip_error_sites = True
return
def site_data_filter(self, site_list):
"""Filter Site Data.
Keyword Arguments:
self -- This object.
site_list -- List of strings corresponding to sites which
should be filtered.
Return Value:
Dictionary containing sub-set of site data specified by "site_list".
"""
# Create new dictionary that has filtered site data based on input.
# Note that any site specified which is not understood will generate
# an error.
site_data = {}
for site in site_list:
with self.subTest(f"Checking test vector Site '{site}' "
f"exists in total site data."
):
site_data[site] = self.site_data_all[site]
return site_data
def username_check(self, username_list, site_list, exist_check=True):
"""Username Exist Check.
Keyword Arguments:
self -- This object.
username_list -- List of strings corresponding to usernames
which should exist on *all* of the sites.
site_list -- List of strings corresponding to sites which
should be filtered.
exist_check -- Boolean which indicates if this should be
a check for Username existence,
or non-existence.
Return Value:
Nothing.
Will trigger an assert if Username does not have the expected
existence state.
"""
# Filter all site data down to just what is needed for this test.
site_data = self.site_data_filter(site_list)
if exist_check:
check_type_text = "claimed"
exist_result_desired = QueryStatus.CLAIMED
else:
check_type_text = "available"
exist_result_desired = QueryStatus.AVAILABLE
for username in username_list:
results = sherlock.sherlock(username,
site_data,
self.query_notify,
tor=self.tor,
unique_tor=self.unique_tor,
timeout=self.timeout
)
for site, result in results.items():
with self.subTest(f"Checking Username '{username}' "
f"{check_type_text} on Site '{site}'"
):
if (
(self.skip_error_sites == True) and
(result["status"].status == QueryStatus.UNKNOWN)
):
#Some error connecting to site.
self.skipTest(f"Skipping Username '{username}' "
f"{check_type_text} on Site '{site}': "
f"Site returned error status."
)
self.assertEqual(exist_result_desired,
result["status"].status)
return
def detect_type_check(self, detect_type, exist_check=True):
"""Username Exist Check.
Keyword Arguments:
self -- This object.
detect_type -- String corresponding to detection algorithm
which is desired to be tested.
Note that only sites which have documented
usernames which exist and do not exist
will be tested.
exist_check -- Boolean which indicates if this should be
a check for Username existence,
or non-existence.
Return Value:
Nothing.
Runs tests on all sites using the indicated detection algorithm
and which also has test vectors specified.
Will trigger an assert if Username does not have the expected
existence state.
"""
# Dictionary of sites that should be tested for having a username.
# This will allow us to test sites with a common username in parallel.
sites_by_username = {}
for site, site_data in self.site_data_all.items():
if (
(site in self.excluded_sites) or
(site_data["errorType"] != detect_type) or
(site_data.get("username_claimed") is None) or
(site_data.get("username_unclaimed") is None)
):
# This is either not a site we are interested in, or the
# site does not contain the required information to do
# the tests.
pass
else:
# We should run a test on this site.
# Figure out which type of user
if exist_check:
username = site_data.get("username_claimed")
else:
username = site_data.get("username_unclaimed")
# Add this site to the list of sites corresponding to this
# username.
if username in sites_by_username:
sites_by_username[username].append(site)
else:
sites_by_username[username] = [site]
# Check on the username availability against all of the sites.
for username, site_list in sites_by_username.items():
self.username_check([username],
site_list,
exist_check=exist_check
)
return
def coverage_total_check(self):
"""Total Coverage Check.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Counts up all Sites with full test data available.
Will trigger an assert if any Site does not have test coverage.
"""
site_no_tests_list = []
for site, site_data in self.site_data_all.items():
if site_data.get("username_claimed") is None:
# Test information not available on this site.
site_no_tests_list.append(site)
self.assertEqual("", ", ".join(site_no_tests_list))
return

@ -1,29 +0,0 @@
import importlib
import unittest
import sys
sys.path.append('../')
import sherlock as sh
checksymbols = []
checksymbols = ["_", "-", "."]
"""Test for multiple usernames.
This test ensures that the function multiple_usernames works properly. More specific,
different scenarios are tested and only usernames that contain this specific sequence: {?}
should return positive.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
"""
class TestMultipleUsernames(unittest.TestCase):
def test_area(self):
test_usernames = ["test{?}test" , "test{?feo" , "test"]
for name in test_usernames:
if(sh.check_for_parameter(name)):
self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"])
else:
self.assertAlmostEqual(name, name)

@ -0,0 +1,23 @@
import os
import json
import urllib
import pytest
from sherlock.sites import SitesInformation
@pytest.fixture()
def sites_obj():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
yield sites_obj
@pytest.fixture(scope="session")
def sites_info():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
sites_iterable = {site.name: site.information for site in sites_obj}
yield sites_iterable
@pytest.fixture(scope="session")
def remote_schema():
schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.schema.json'
with urllib.request.urlopen(schema_url) as remoteschema:
schemadat = json.load(remoteschema)
yield schemadat

@ -0,0 +1,7 @@
import sherlock
#from sherlock.sites import SitesInformation
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
def test_username_via_message():
sherlock.__main__("--version")

@ -0,0 +1,39 @@
import os
import platform
import re
import subprocess
class Interactives:
def run_cli(args:str = "") -> str:
"""Pass arguments to Sherlock as a normal user on the command line"""
# Adapt for platform differences (Windows likes to be special)
if platform.system == "Windows":
command:str = f"py -m sherlock {args}"
else:
command:str = f"sherlock {args}"
proc_out:str = ""
try:
proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
return proc_out.decode()
except subprocess.CalledProcessError as e:
raise InteractivesSubprocessError(e.output.decode())
# -> list[str] is prefered, but will require deprecation of support for Python 3.8
def walk_sherlock_for_files_with(pattern: str) -> list:
"""Check all files within the Sherlock package for matching patterns"""
pattern:re.Pattern = re.compile(pattern)
matching_files:list[str] = []
for root, dirs, files in os.walk("sherlock"):
for file in files:
file_path = os.path.join(root,file)
if "__pycache__" in file_path:
continue
with open(file_path, 'r', errors='ignore') as f:
if pattern.search(f.read()):
matching_files.append(file_path)
return matching_files
class InteractivesSubprocessError(Exception):
pass

@ -0,0 +1,39 @@
import os
import json
import pytest
from jsonschema import validate
def test_validate_manifest_against_local_schema():
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
json_relative: str = '../sherlock/resources/data.json'
schema_relative: str = '../sherlock/resources/data.schema.json'
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
with open(json_path, 'r') as f:
jsondat = json.load(f)
with open(schema_path, 'r') as f:
schemadat = json.load(f)
validate(instance=jsondat, schema=schemadat)
@pytest.mark.online
def test_validate_manifest_against_remote_schema(remote_schema):
"""Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
json_relative: str = '../sherlock/resources/data.json'
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
with open(json_path, 'r') as f:
jsondat = json.load(f)
validate(instance=jsondat, schema=remote_schema)
# Ensure that the expected values are beind returned by the site list
@pytest.mark.parametrize("target_name,target_expected_err_type", [
('GitHub', 'status_code'),
('GitLab', 'message'),
])
def test_site_list_iterability (sites_info, target_name, target_expected_err_type):
assert sites_info[target_name]['errorType'] == target_expected_err_type

@ -0,0 +1,105 @@
import pytest
import random
import string
import re
from sherlock.sherlock import sherlock
from sherlock.notify import QueryNotify
from sherlock.result import QueryStatus
#from sherlock_interactives import Interactives
def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus:
query_notify = QueryNotify()
site_data: dict = {}
site_data[site] = sites_info[site]
return sherlock(
username=username,
site_data=site_data,
query_notify=query_notify,
)[site]['status'].status
@pytest.mark.online
class TestLiveTargets:
"""Actively test probes against live and trusted targets"""
# Known positives should only use sites trusted to be reliable and unchanging
@pytest.mark.parametrize('site,username',[
('GitLab', 'ppfeister'),
('AllMyLinks', 'blue'),
])
def test_known_positives_via_message(self, sites_info, site, username):
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
# Known positives should only use sites trusted to be reliable and unchanging
@pytest.mark.parametrize('site,username',[
('GitHub', 'ppfeister'),
('GitHub', 'sherlock-project'),
('Docker Hub', 'ppfeister'),
('Docker Hub', 'sherlock'),
])
def test_known_positives_via_status_code(self, sites_info, site, username):
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
# Known positives should only use sites trusted to be reliable and unchanging
@pytest.mark.parametrize('site,username',[
('BodyBuilding', 'blue'),
('labpentestit', 'CSV'),
])
def test_known_positives_via_response_url(self, sites_info, site, username):
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
# Randomly generate usernames of high length and test for positive availability
# Randomly generated usernames should be simple alnum for simplicity and high
# compatibility. Several attempts may be made ~just in case~ a real username is
# generated.
@pytest.mark.parametrize('site,random_len',[
('GitLab', 255),
('Codecademy', 30)
])
def test_likely_negatives_via_message(self, sites_info, site, random_len):
num_attempts: int = 3
attempted_usernames: list[str] = []
status: QueryStatus = QueryStatus.CLAIMED
for i in range(num_attempts):
acceptable_types = string.ascii_letters + string.digits
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
attempted_usernames.append(random_handle)
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
if status is QueryStatus.AVAILABLE:
break
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
# Randomly generate usernames of high length and test for positive availability
# Randomly generated usernames should be simple alnum for simplicity and high
# compatibility. Several attempts may be made ~just in case~ a real username is
# generated.
@pytest.mark.parametrize('site,random_len',[
('GitHub', 39),
('Docker Hub', 30)
])
def test_likely_negatives_via_status_code(self, sites_info, site, random_len):
num_attempts: int = 3
attempted_usernames: list[str] = []
status: QueryStatus = QueryStatus.CLAIMED
for i in range(num_attempts):
acceptable_types = string.ascii_letters + string.digits
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
attempted_usernames.append(random_handle)
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
if status is QueryStatus.AVAILABLE:
break
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
def test_username_illegal_regex(sites_info):
site: str = 'BitBucket'
invalid_handle: str = '*#$Y&*JRE'
pattern = re.compile(sites_info[site]['regexCheck'])
# Ensure that the username actually fails regex before testing sherlock
assert pattern.match(invalid_handle) is None
assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL

@ -0,0 +1,43 @@
import pytest
from sherlock import sherlock
from sherlock_interactives import Interactives
from sherlock_interactives import InteractivesSubprocessError
def test_remove_nsfw(sites_obj):
nsfw_target: str = 'Pornhub'
assert nsfw_target in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites()
assert nsfw_target not in {site.name: site.information for site in sites_obj}
# Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
['Pornhub'],
['Pornhub', 'Xvideos'],
])
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
for site in nsfwsites:
assert site in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites)
for site in nsfwsites:
assert site in {site.name: site.information for site in sites_obj}
assert 'Motherless' not in {site.name: site.information for site in sites_obj}
def test_wildcard_username_expansion():
assert sherlock.check_for_parameter('test{?}test') is True
assert sherlock.check_for_parameter('test{.}test') is False
assert sherlock.check_for_parameter('test{}test') is False
assert sherlock.check_for_parameter('testtest') is False
assert sherlock.check_for_parameter('test{?test') is False
assert sherlock.check_for_parameter('test?}test') is False
assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]
@pytest.mark.parametrize('cliargs', [
'',
'--site urghrtuight --egiotr',
'--',
])
def test_no_usernames_provided(cliargs):
with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"):
Interactives.run_cli(cliargs)

@ -0,0 +1,17 @@
import os
from sherlock_interactives import Interactives
import sherlock
def test_versioning() -> None:
# Ensure __version__ matches version presented to the user
assert sherlock.__version__ in Interactives.run_cli("--version")
# Ensure __init__ is single source of truth for __version__ in package
# Temporarily allows sherlock.py so as to not trigger early upgrades
found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
expected:list = [
# Normalization is REQUIRED for Windows ( / vs \ )
os.path.normpath("sherlock/__init__.py"),
os.path.normpath("sherlock/sherlock.py"),
]
# Sorting is REQUIRED for Mac
assert sorted(found) == sorted(expected)

@ -0,0 +1,44 @@
[tox]
requires =
tox >= 4
envlist =
lint
py313
py312
py311
py310
py39
py38
[testenv]
description = Attempt to build and install the package
deps =
coverage
jsonschema
pytest
allowlist_externals = coverage
commands =
coverage run --source=sherlock --module pytest -v
coverage report --show-missing
[testenv:offline]
deps =
jsonschema
pytest
commands =
pytest -v -m "not online"
[testenv:lint]
description = Lint with Ruff
deps =
ruff
commands =
ruff check
[gh-actions]
python =
3.12: py312
3.11: py311
3.10: py310
3.9: py39
3.8: py38
Loading…
Cancel
Save