Merge #2127 to implement Poetry, pytest, and tox
Closes #2111 as its feature branch was the base of this PR's branch Closes #2133 as no longer necessary Closes #2147 as no longer necessarynew-logo
commit
3211b90b02
@ -1,11 +1,15 @@
|
||||
### REPOSITORY
|
||||
/.github/CODEOWNERS @sdushantha
|
||||
./github/FUNDING.yml @sdushantha
|
||||
/LICENSE @sdushantha
|
||||
|
||||
### PACKAGING
|
||||
# Changes made to these items without code owner approval may negatively
|
||||
# impact packaging pipelines. Code owners may need time to verify or adapt.
|
||||
# impact packaging pipelines.
|
||||
/pyproject.toml @ppfeister @sdushantha
|
||||
/setup.cfg @ppfeister @sdushantha
|
||||
/setup.py @ppfeister
|
||||
/*.spec @ppfeister
|
||||
|
||||
### REGRESSION
|
||||
/.github/workflows/regression.yml @ppfeister
|
||||
/tox.ini @ppfeister
|
||||
/pytest.ini @ppfeister
|
||||
/tests/ @ppfeister
|
||||
|
@ -0,0 +1 @@
|
||||
github: [ sdushantha, ppfeister, matheusfelipeog ]
|
@ -1,38 +0,0 @@
|
||||
name: Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- '3.12'
|
||||
- '3.11'
|
||||
- '3.10'
|
||||
- '3.9'
|
||||
- '3.8'
|
||||
- '3.7'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install ruff flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Lint with ruff
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
ruff . --output-format=github --select=E9,F63,F7,F82
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Sherlock Site Detect Tests
|
||||
run: |
|
||||
cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose
|
@ -1,27 +0,0 @@
|
||||
name: Nightly
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run Nightly Tests At 3AM (The Hour Of The Wolf) Every Day
|
||||
- cron: '0 3 * * *'
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.x]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Sherlock Site Coverage Tests
|
||||
run: |
|
||||
cd sherlock && python -m unittest tests.all.SherlockSiteCoverageTests --verbose
|
@ -1,47 +0,0 @@
|
||||
name: Pull Request Action
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
getchange:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.changes.outputs.matrix }}
|
||||
steps:
|
||||
- id: changes
|
||||
run: |
|
||||
URL="https://api.github.com/repos/sherlock-project/sherlock/pulls/${{ github.event.pull_request.number }}/files"
|
||||
FILES=$(curl -s -X GET -G $URL | jq -r '.[] | .filename')
|
||||
if echo $FILES | grep -q ".json"; then
|
||||
echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.x\"}]}"
|
||||
else
|
||||
echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.7\"},{\"python\":\"3.8\"}]},{\"python\":\"3.9\"},{\"python\":\"3.10\"}]},{\"python\":\"3.11\"},{\"python\":\"3.12\"}]}"
|
||||
fi
|
||||
tests:
|
||||
needs: [getchange]
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.getchange.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install ruff flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Lint With Ruff
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
ruff check . --output-format=github --select=E9,F63,F7,F82
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Sherlock Site Detect Tests
|
||||
run: |
|
||||
cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose
|
@ -0,0 +1,40 @@
|
||||
name: Regression Testing
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
tox-matrix:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false # We want to know what version it fails on
|
||||
matrix:
|
||||
os: [
|
||||
ubuntu-latest,
|
||||
windows-latest,
|
||||
macos-latest,
|
||||
]
|
||||
python-version: [
|
||||
'3.8',
|
||||
'3.9',
|
||||
'3.10',
|
||||
'3.11',
|
||||
'3.12',
|
||||
]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up environment ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install tox and related dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install tox
|
||||
pip install tox-gh-actions
|
||||
- name: Run tox
|
||||
run: tox
|
@ -1,48 +1,59 @@
|
||||
[build-system]
|
||||
requires = ["setuptools >= 61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
requires = [ "poetry-core>=1.2.0" ]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
# poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL
|
||||
|
||||
[project.scripts]
|
||||
sherlock = "sherlock:main"
|
||||
[tool.poetry-version-plugin]
|
||||
source = "init"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "http://sherlock-project.github.io/"
|
||||
Repository = "https://github.com/sherlock-project/sherlock.git"
|
||||
Issues = "https://github.com/sherlock-project/sherlock/issues"
|
||||
|
||||
[project]
|
||||
[tool.poetry]
|
||||
name = "sherlock-project"
|
||||
# single source of truth for version is __init__.py
|
||||
version = "0"
|
||||
description = "Hunt down social media accounts by username across social networks"
|
||||
license = "MIT"
|
||||
authors = [
|
||||
{ name = "Siddharth Dushantha" }
|
||||
"Siddharth Dushantha <siddharth.dushantha@gmail.com>"
|
||||
]
|
||||
maintainers = [
|
||||
{ name = "Matheus Felipe" },
|
||||
{ name = "Sondre Karlsen Dyrnes" },
|
||||
{ name = "Paul Pfeister" }
|
||||
"Paul Pfeister <code@pfeister.dev>",
|
||||
"Matheus Felipe <matheusfelipeog@protonmail.com>",
|
||||
"Sondre Karlsen Dyrnes <sondre@villdyr.no>"
|
||||
]
|
||||
description = "Hunt down social media accounts by username across social networks"
|
||||
readme = "docs/pyproj/README.md"
|
||||
# Do not set license to file. Causes issues with rpm packaging for some reason.
|
||||
license = {text = "MIT"}
|
||||
dynamic = ["dependencies", "version"]
|
||||
packages = [ { include = "sherlock"} ]
|
||||
keywords = [ "osint", "reconnaissance", "information gathering" ]
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Information Technology",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Topic :: Security"
|
||||
]
|
||||
homepage = "https://sherlock-project.github.io/"
|
||||
repository = "https://github.com/sherlock-project/sherlock"
|
||||
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
dependencies = { file = [ "requirements.txt" ] }
|
||||
version = { attr = "sherlock.__version__" }
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
certifi = ">=2019.6.16"
|
||||
colorama = "^0.4.1"
|
||||
PySocks = "^1.7.0"
|
||||
requests = "^2.22.0"
|
||||
requests-futures = "^1.0.0"
|
||||
stem = "^1.8.0"
|
||||
torrequest = "^0.1.0"
|
||||
# pandas can likely be bumped up to ^2.0.0 after fc39 EOL
|
||||
pandas = ">=1.0.0,<3.0.0"
|
||||
openpyxl = "^3.0.10"
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "sherlock"}
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jsonschema = "^4.0.0"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"*" = ["*.json"]
|
||||
[tool.poetry.scripts]
|
||||
sherlock = 'sherlock.sherlock:main'
|
||||
|
@ -0,0 +1,4 @@
|
||||
[pytest]
|
||||
addopts = --strict-markers
|
||||
markers =
|
||||
online: mark tests are requiring internet access.
|
@ -1,10 +0,0 @@
|
||||
certifi>=2019.6.16
|
||||
colorama>=0.4.1
|
||||
PySocks>=1.7.0
|
||||
requests>=2.22.0
|
||||
requests-futures>=1.0.0
|
||||
stem>=1.8.0
|
||||
torrequest>=0.1.0
|
||||
pandas>=1.0.0
|
||||
openpyxl>=3.0.10
|
||||
exrex>=0.11.0
|
@ -1,4 +0,0 @@
|
||||
[metadata]
|
||||
name = Sherlock
|
||||
author = Sherlock Project
|
||||
url = http://sherlock-project.github.io/
|
@ -1,8 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Sometimes required for rpm spec - particularly when using pyp2rpm
|
||||
"""
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
setup()
|
@ -1,71 +0,0 @@
|
||||
# Packager: Paul Pfeister <rh-bugzilla@pfeister.dev> (GitHub @ppfeister)
|
||||
%global source_ref master
|
||||
|
||||
Name: sherlock-project
|
||||
Version: 0.14.4
|
||||
Release: %autorelease
|
||||
Summary: Hunt down social media accounts by username across social networks
|
||||
|
||||
License: MIT
|
||||
URL: https://github.com/sherlock-project/sherlock
|
||||
Source: %{url}/archive/%{source_ref}.tar.gz
|
||||
# Switch to new Source URL after adoption of tagged releases
|
||||
|
||||
BuildArch: noarch
|
||||
BuildRequires: python3-devel
|
||||
BuildRequires: help2man
|
||||
|
||||
%global _description %{expand:
|
||||
Hunt down social media accounts by username across 400+ social networks and
|
||||
websites. New targets are tested and implemented regularly.
|
||||
}
|
||||
|
||||
%description %{_description}
|
||||
|
||||
|
||||
%prep
|
||||
%autosetup -n sherlock-%{source_ref}
|
||||
|
||||
|
||||
%generate_buildrequires
|
||||
%pyproject_buildrequires
|
||||
|
||||
|
||||
%build
|
||||
%pyproject_wheel
|
||||
|
||||
|
||||
%install
|
||||
%pyproject_install
|
||||
%pyproject_save_files -l sherlock sites result notify __init__ __main__
|
||||
|
||||
sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/__main__.py'
|
||||
sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/sherlock.py'
|
||||
|
||||
install -d '%{buildroot}%{_mandir}/man1'
|
||||
PYTHONPATH='%{buildroot}%{python3_sitelib}' help2man \
|
||||
--no-info \
|
||||
--version-string='%{version}' \
|
||||
--name='%{summary}' \
|
||||
--output='%{buildroot}%{_mandir}/man1/sherlock.1' \
|
||||
'%{buildroot}%{_bindir}/sherlock'
|
||||
|
||||
|
||||
%check
|
||||
# Tests fail when pwd isn't sherlock. Relative pathing need fix upstream.
|
||||
cd sherlock
|
||||
%{py3_test_envvars} %{python3} -m unittest tests.all --verbose
|
||||
cd ..
|
||||
|
||||
|
||||
%files -f %{pyproject_files}
|
||||
%doc README.md
|
||||
%{_bindir}/sherlock
|
||||
%{python3_sitelib}/resources
|
||||
%pycached %{python3_sitelib}/tests/*.py
|
||||
%{_mandir}/man1/sherlock.1*
|
||||
|
||||
|
||||
%changelog
|
||||
* Tue May 14 2024 Paul Pfeister <rh-bugzilla@pfeister.dev> 0.14.4-1
|
||||
- Initial package.
|
@ -1,4 +0,0 @@
|
||||
"""Sherlock Tests
|
||||
|
||||
This package contains various submodules used to run tests.
|
||||
"""
|
@ -1,213 +0,0 @@
|
||||
"""Sherlock Tests
|
||||
|
||||
This module contains various tests.
|
||||
"""
|
||||
from tests.base import SherlockBaseTest
|
||||
import exrex
|
||||
|
||||
|
||||
class SherlockDetectTests(SherlockBaseTest):
|
||||
def test_detect_true_via_message(self):
|
||||
"""Test Username Does Exist (Via Message).
|
||||
|
||||
This test ensures that the "message" detection mechanism of
|
||||
ensuring that a Username does exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = "AllMyLinks"
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
# Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("message", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_claimed"]], [site], exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_false_via_message(self):
|
||||
"""Test Username Does Not Exist (Via Message).
|
||||
|
||||
This test ensures that the "message" detection mechanism of
|
||||
ensuring that a Username does *not* exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = "AllMyLinks"
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
# Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("message", site_data["errorType"])
|
||||
|
||||
# Generate a valid username based on the regex for a username that the
|
||||
# site supports that is *most likely* not taken. The regex is slightly
|
||||
# modified version of site_data["regexCheck"] as we want a username
|
||||
# that has the maximum length that is supported by the site. This way,
|
||||
# we wont generate a random username that might actually exist. This
|
||||
# method is very hacky, but it does the job as having hardcoded
|
||||
# usernames that dont exists will lead to people with ill intent to
|
||||
# create an account with that username which will break the tests
|
||||
valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$")
|
||||
self.username_check([valid_username], [site], exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_true_via_status_code(self):
|
||||
"""Test Username Does Exist (Via Status Code).
|
||||
|
||||
This test ensures that the "status code" detection mechanism of
|
||||
ensuring that a Username does exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = "BitBucket"
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
# Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("status_code", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_claimed"]], [site], exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_false_via_status_code(self):
|
||||
"""Test Username Does Not Exist (Via Status Code).
|
||||
|
||||
This test ensures that the "status code" detection mechanism of
|
||||
ensuring that a Username does *not* exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = "BitBucket"
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
# Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("status_code", site_data["errorType"])
|
||||
|
||||
# Generate a valid username based on the regex for a username that the
|
||||
# site supports that is *most likely* not taken. The regex is slightly
|
||||
# modified version of site_data["regexCheck"] as we want a username
|
||||
# that has the maximum length that is supported by the site. This way,
|
||||
# we wont generate a random username that might actually exist. This
|
||||
# method is very hacky, but it does the job as having hardcoded
|
||||
# usernames that dont exists will lead to people with ill intent to
|
||||
# create an account with that username which will break the tests
|
||||
valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}")
|
||||
self.username_check([valid_username], [site], exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
|
||||
class SherlockSiteCoverageTests(SherlockBaseTest):
|
||||
def test_coverage_false_via_status(self):
|
||||
"""Test Username Does Not Exist Site Coverage (Via HTTP Status).
|
||||
|
||||
This test checks all sites with the "HTTP Status" detection mechanism
|
||||
to ensure that a Username that does not exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("status_code", exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_true_via_status(self):
|
||||
"""Test Username Does Exist Site Coverage (Via HTTP Status).
|
||||
|
||||
This test checks all sites with the "HTTP Status" detection mechanism
|
||||
to ensure that a Username that does exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("status_code", exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_false_via_message(self):
|
||||
"""Test Username Does Not Exist Site Coverage (Via Error Message).
|
||||
|
||||
This test checks all sites with the "Error Message" detection mechanism
|
||||
to ensure that a Username that does not exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("message", exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_true_via_message(self):
|
||||
"""Test Username Does Exist Site Coverage (Via Error Message).
|
||||
|
||||
This test checks all sites with the "Error Message" detection mechanism
|
||||
to ensure that a Username that does exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("message", exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_total(self):
|
||||
"""Test Site Coverage Is Total.
|
||||
|
||||
This test checks that all sites have test data available.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if we do not have total coverage.
|
||||
"""
|
||||
|
||||
self.coverage_total_check()
|
||||
|
||||
return
|
@ -1,224 +0,0 @@
|
||||
"""Sherlock Base Tests
|
||||
|
||||
This module contains various utilities for running tests.
|
||||
"""
|
||||
import os
|
||||
import os.path
|
||||
import unittest
|
||||
import sherlock
|
||||
from result import QueryStatus
|
||||
from notify import QueryNotify
|
||||
from sites import SitesInformation
|
||||
import warnings
|
||||
|
||||
|
||||
class SherlockBaseTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
"""Sherlock Base Test Setup.
|
||||
|
||||
Does common setup tasks for base Sherlock tests.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
# This ignores the ResourceWarning from an unclosed SSLSocket.
|
||||
# TODO: Figure out how to fix the code so this is not needed.
|
||||
warnings.simplefilter("ignore", ResourceWarning)
|
||||
|
||||
# Create object with all information about sites we are aware of.
|
||||
sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../resources/data.json"))
|
||||
|
||||
# Create original dictionary from SitesInformation() object.
|
||||
# Eventually, the rest of the code will be updated to use the new object
|
||||
# directly, but this will glue the two pieces together.
|
||||
site_data_all = {}
|
||||
for site in sites:
|
||||
site_data_all[site.name] = site.information
|
||||
self.site_data_all = site_data_all
|
||||
|
||||
# Load excluded sites list, if any
|
||||
excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites")
|
||||
try:
|
||||
with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
|
||||
self.excluded_sites = excluded_sites_file.read().splitlines()
|
||||
except FileNotFoundError:
|
||||
self.excluded_sites = []
|
||||
|
||||
# Create notify object for query results.
|
||||
self.query_notify = QueryNotify()
|
||||
|
||||
self.tor = False
|
||||
self.unique_tor = False
|
||||
self.timeout = None
|
||||
self.skip_error_sites = True
|
||||
|
||||
return
|
||||
|
||||
def site_data_filter(self, site_list):
|
||||
"""Filter Site Data.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
site_list -- List of strings corresponding to sites which
|
||||
should be filtered.
|
||||
|
||||
Return Value:
|
||||
Dictionary containing sub-set of site data specified by "site_list".
|
||||
"""
|
||||
|
||||
# Create new dictionary that has filtered site data based on input.
|
||||
# Note that any site specified which is not understood will generate
|
||||
# an error.
|
||||
site_data = {}
|
||||
for site in site_list:
|
||||
with self.subTest(f"Checking test vector Site '{site}' "
|
||||
f"exists in total site data."
|
||||
):
|
||||
site_data[site] = self.site_data_all[site]
|
||||
|
||||
return site_data
|
||||
|
||||
def username_check(self, username_list, site_list, exist_check=True):
|
||||
"""Username Exist Check.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
username_list -- List of strings corresponding to usernames
|
||||
which should exist on *all* of the sites.
|
||||
site_list -- List of strings corresponding to sites which
|
||||
should be filtered.
|
||||
exist_check -- Boolean which indicates if this should be
|
||||
a check for Username existence,
|
||||
or non-existence.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Will trigger an assert if Username does not have the expected
|
||||
existence state.
|
||||
"""
|
||||
|
||||
# Filter all site data down to just what is needed for this test.
|
||||
site_data = self.site_data_filter(site_list)
|
||||
|
||||
if exist_check:
|
||||
check_type_text = "claimed"
|
||||
exist_result_desired = QueryStatus.CLAIMED
|
||||
else:
|
||||
check_type_text = "available"
|
||||
exist_result_desired = QueryStatus.AVAILABLE
|
||||
|
||||
for username in username_list:
|
||||
results = sherlock.sherlock(username,
|
||||
site_data,
|
||||
self.query_notify,
|
||||
tor=self.tor,
|
||||
unique_tor=self.unique_tor,
|
||||
timeout=self.timeout
|
||||
)
|
||||
for site, result in results.items():
|
||||
with self.subTest(f"Checking Username '{username}' "
|
||||
f"{check_type_text} on Site '{site}'"
|
||||
):
|
||||
if (
|
||||
(self.skip_error_sites == True) and
|
||||
(result["status"].status == QueryStatus.UNKNOWN)
|
||||
):
|
||||
#Some error connecting to site.
|
||||
self.skipTest(f"Skipping Username '{username}' "
|
||||
f"{check_type_text} on Site '{site}': "
|
||||
f"Site returned error status."
|
||||
)
|
||||
|
||||
self.assertEqual(exist_result_desired,
|
||||
result["status"].status)
|
||||
|
||||
return
|
||||
|
||||
def detect_type_check(self, detect_type, exist_check=True):
|
||||
"""Username Exist Check.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
detect_type -- String corresponding to detection algorithm
|
||||
which is desired to be tested.
|
||||
Note that only sites which have documented
|
||||
usernames which exist and do not exist
|
||||
will be tested.
|
||||
exist_check -- Boolean which indicates if this should be
|
||||
a check for Username existence,
|
||||
or non-existence.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Runs tests on all sites using the indicated detection algorithm
|
||||
and which also has test vectors specified.
|
||||
Will trigger an assert if Username does not have the expected
|
||||
existence state.
|
||||
"""
|
||||
|
||||
# Dictionary of sites that should be tested for having a username.
|
||||
# This will allow us to test sites with a common username in parallel.
|
||||
sites_by_username = {}
|
||||
|
||||
for site, site_data in self.site_data_all.items():
|
||||
if (
|
||||
(site in self.excluded_sites) or
|
||||
(site_data["errorType"] != detect_type) or
|
||||
(site_data.get("username_claimed") is None) or
|
||||
(site_data.get("username_unclaimed") is None)
|
||||
):
|
||||
# This is either not a site we are interested in, or the
|
||||
# site does not contain the required information to do
|
||||
# the tests.
|
||||
pass
|
||||
else:
|
||||
# We should run a test on this site.
|
||||
|
||||
# Figure out which type of user
|
||||
if exist_check:
|
||||
username = site_data.get("username_claimed")
|
||||
else:
|
||||
username = site_data.get("username_unclaimed")
|
||||
|
||||
# Add this site to the list of sites corresponding to this
|
||||
# username.
|
||||
if username in sites_by_username:
|
||||
sites_by_username[username].append(site)
|
||||
else:
|
||||
sites_by_username[username] = [site]
|
||||
|
||||
# Check on the username availability against all of the sites.
|
||||
for username, site_list in sites_by_username.items():
|
||||
self.username_check([username],
|
||||
site_list,
|
||||
exist_check=exist_check
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def coverage_total_check(self):
|
||||
"""Total Coverage Check.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
Counts up all Sites with full test data available.
|
||||
Will trigger an assert if any Site does not have test coverage.
|
||||
"""
|
||||
|
||||
site_no_tests_list = []
|
||||
|
||||
for site, site_data in self.site_data_all.items():
|
||||
if site_data.get("username_claimed") is None:
|
||||
# Test information not available on this site.
|
||||
site_no_tests_list.append(site)
|
||||
|
||||
self.assertEqual("", ", ".join(site_no_tests_list))
|
||||
|
||||
return
|
@ -1,29 +0,0 @@
|
||||
import importlib
|
||||
import unittest
|
||||
import sys
|
||||
sys.path.append('../')
|
||||
import sherlock as sh
|
||||
|
||||
checksymbols = []
|
||||
checksymbols = ["_", "-", "."]
|
||||
|
||||
"""Test for multiple usernames.
|
||||
|
||||
This test ensures that the function multiple_usernames works properly. More specific,
|
||||
different scenarios are tested and only usernames that contain this specific sequence: {?}
|
||||
should return positive.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
class TestMultipleUsernames(unittest.TestCase):
|
||||
def test_area(self):
|
||||
test_usernames = ["test{?}test" , "test{?feo" , "test"]
|
||||
for name in test_usernames:
|
||||
if(sh.check_for_parameter(name)):
|
||||
self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"])
|
||||
else:
|
||||
self.assertAlmostEqual(name, name)
|
@ -0,0 +1,23 @@
|
||||
import os
|
||||
import json
|
||||
import urllib
|
||||
import pytest
|
||||
from sherlock.sites import SitesInformation
|
||||
|
||||
@pytest.fixture()
|
||||
def sites_obj():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
|
||||
yield sites_obj
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sites_info():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
|
||||
sites_iterable = {site.name: site.information for site in sites_obj}
|
||||
yield sites_iterable
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def remote_schema():
|
||||
schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.schema.json'
|
||||
with urllib.request.urlopen(schema_url) as remoteschema:
|
||||
schemadat = json.load(remoteschema)
|
||||
yield schemadat
|
@ -0,0 +1,7 @@
|
||||
import sherlock
|
||||
|
||||
#from sherlock.sites import SitesInformation
|
||||
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
|
||||
|
||||
def test_username_via_message():
|
||||
sherlock.__main__("--version")
|
@ -0,0 +1,39 @@
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
class Interactives:
|
||||
def run_cli(args:str = "") -> str:
|
||||
"""Pass arguments to Sherlock as a normal user on the command line"""
|
||||
# Adapt for platform differences (Windows likes to be special)
|
||||
if platform.system == "Windows":
|
||||
command:str = f"py -m sherlock {args}"
|
||||
else:
|
||||
command:str = f"sherlock {args}"
|
||||
|
||||
proc_out:str = ""
|
||||
try:
|
||||
proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
|
||||
return proc_out.decode()
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise InteractivesSubprocessError(e.output.decode())
|
||||
|
||||
|
||||
# -> list[str] is prefered, but will require deprecation of support for Python 3.8
|
||||
def walk_sherlock_for_files_with(pattern: str) -> list:
|
||||
"""Check all files within the Sherlock package for matching patterns"""
|
||||
pattern:re.Pattern = re.compile(pattern)
|
||||
matching_files:list[str] = []
|
||||
for root, dirs, files in os.walk("sherlock"):
|
||||
for file in files:
|
||||
file_path = os.path.join(root,file)
|
||||
if "__pycache__" in file_path:
|
||||
continue
|
||||
with open(file_path, 'r', errors='ignore') as f:
|
||||
if pattern.search(f.read()):
|
||||
matching_files.append(file_path)
|
||||
return matching_files
|
||||
|
||||
class InteractivesSubprocessError(Exception):
|
||||
pass
|
@ -0,0 +1,39 @@
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
from jsonschema import validate
|
||||
|
||||
def test_validate_manifest_against_local_schema():
|
||||
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
||||
json_relative: str = '../sherlock/resources/data.json'
|
||||
schema_relative: str = '../sherlock/resources/data.schema.json'
|
||||
|
||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
||||
|
||||
with open(json_path, 'r') as f:
|
||||
jsondat = json.load(f)
|
||||
with open(schema_path, 'r') as f:
|
||||
schemadat = json.load(f)
|
||||
|
||||
validate(instance=jsondat, schema=schemadat)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
def test_validate_manifest_against_remote_schema(remote_schema):
|
||||
"""Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
|
||||
json_relative: str = '../sherlock/resources/data.json'
|
||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||
|
||||
with open(json_path, 'r') as f:
|
||||
jsondat = json.load(f)
|
||||
|
||||
validate(instance=jsondat, schema=remote_schema)
|
||||
|
||||
# Ensure that the expected values are beind returned by the site list
|
||||
@pytest.mark.parametrize("target_name,target_expected_err_type", [
|
||||
('GitHub', 'status_code'),
|
||||
('GitLab', 'message'),
|
||||
])
|
||||
def test_site_list_iterability (sites_info, target_name, target_expected_err_type):
|
||||
assert sites_info[target_name]['errorType'] == target_expected_err_type
|
@ -0,0 +1,105 @@
|
||||
import pytest
|
||||
import random
|
||||
import string
|
||||
import re
|
||||
from sherlock.sherlock import sherlock
|
||||
from sherlock.notify import QueryNotify
|
||||
from sherlock.result import QueryStatus
|
||||
#from sherlock_interactives import Interactives
|
||||
|
||||
|
||||
def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus:
|
||||
query_notify = QueryNotify()
|
||||
site_data: dict = {}
|
||||
site_data[site] = sites_info[site]
|
||||
return sherlock(
|
||||
username=username,
|
||||
site_data=site_data,
|
||||
query_notify=query_notify,
|
||||
)[site]['status'].status
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
class TestLiveTargets:
|
||||
"""Actively test probes against live and trusted targets"""
|
||||
# Known positives should only use sites trusted to be reliable and unchanging
|
||||
@pytest.mark.parametrize('site,username',[
|
||||
('GitLab', 'ppfeister'),
|
||||
('AllMyLinks', 'blue'),
|
||||
])
|
||||
def test_known_positives_via_message(self, sites_info, site, username):
|
||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
||||
|
||||
|
||||
# Known positives should only use sites trusted to be reliable and unchanging
|
||||
@pytest.mark.parametrize('site,username',[
|
||||
('GitHub', 'ppfeister'),
|
||||
('GitHub', 'sherlock-project'),
|
||||
('Docker Hub', 'ppfeister'),
|
||||
('Docker Hub', 'sherlock'),
|
||||
])
|
||||
def test_known_positives_via_status_code(self, sites_info, site, username):
|
||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
||||
|
||||
|
||||
# Known positives should only use sites trusted to be reliable and unchanging
|
||||
@pytest.mark.parametrize('site,username',[
|
||||
('BodyBuilding', 'blue'),
|
||||
('labpentestit', 'CSV'),
|
||||
])
|
||||
def test_known_positives_via_response_url(self, sites_info, site, username):
|
||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
||||
|
||||
|
||||
# Randomly generate usernames of high length and test for positive availability
|
||||
# Randomly generated usernames should be simple alnum for simplicity and high
|
||||
# compatibility. Several attempts may be made ~just in case~ a real username is
|
||||
# generated.
|
||||
@pytest.mark.parametrize('site,random_len',[
|
||||
('GitLab', 255),
|
||||
('Codecademy', 30)
|
||||
])
|
||||
def test_likely_negatives_via_message(self, sites_info, site, random_len):
|
||||
num_attempts: int = 3
|
||||
attempted_usernames: list[str] = []
|
||||
status: QueryStatus = QueryStatus.CLAIMED
|
||||
for i in range(num_attempts):
|
||||
acceptable_types = string.ascii_letters + string.digits
|
||||
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
|
||||
attempted_usernames.append(random_handle)
|
||||
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
|
||||
if status is QueryStatus.AVAILABLE:
|
||||
break
|
||||
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
|
||||
|
||||
|
||||
# Randomly generate usernames of high length and test for positive availability
|
||||
# Randomly generated usernames should be simple alnum for simplicity and high
|
||||
# compatibility. Several attempts may be made ~just in case~ a real username is
|
||||
# generated.
|
||||
@pytest.mark.parametrize('site,random_len',[
|
||||
('GitHub', 39),
|
||||
('Docker Hub', 30)
|
||||
])
|
||||
def test_likely_negatives_via_status_code(self, sites_info, site, random_len):
|
||||
num_attempts: int = 3
|
||||
attempted_usernames: list[str] = []
|
||||
status: QueryStatus = QueryStatus.CLAIMED
|
||||
for i in range(num_attempts):
|
||||
acceptable_types = string.ascii_letters + string.digits
|
||||
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
|
||||
attempted_usernames.append(random_handle)
|
||||
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
|
||||
if status is QueryStatus.AVAILABLE:
|
||||
break
|
||||
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
|
||||
|
||||
|
||||
def test_username_illegal_regex(sites_info):
|
||||
site: str = 'BitBucket'
|
||||
invalid_handle: str = '*#$Y&*JRE'
|
||||
pattern = re.compile(sites_info[site]['regexCheck'])
|
||||
# Ensure that the username actually fails regex before testing sherlock
|
||||
assert pattern.match(invalid_handle) is None
|
||||
assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL
|
||||
|
@ -0,0 +1,43 @@
|
||||
import pytest
|
||||
from sherlock import sherlock
|
||||
from sherlock_interactives import Interactives
|
||||
from sherlock_interactives import InteractivesSubprocessError
|
||||
|
||||
def test_remove_nsfw(sites_obj):
|
||||
nsfw_target: str = 'Pornhub'
|
||||
assert nsfw_target in {site.name: site.information for site in sites_obj}
|
||||
sites_obj.remove_nsfw_sites()
|
||||
assert nsfw_target not in {site.name: site.information for site in sites_obj}
|
||||
|
||||
|
||||
# Parametrized sites should *not* include Motherless, which is acting as the control
|
||||
@pytest.mark.parametrize('nsfwsites', [
|
||||
['Pornhub'],
|
||||
['Pornhub', 'Xvideos'],
|
||||
])
|
||||
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
|
||||
for site in nsfwsites:
|
||||
assert site in {site.name: site.information for site in sites_obj}
|
||||
sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites)
|
||||
for site in nsfwsites:
|
||||
assert site in {site.name: site.information for site in sites_obj}
|
||||
assert 'Motherless' not in {site.name: site.information for site in sites_obj}
|
||||
|
||||
def test_wildcard_username_expansion():
|
||||
assert sherlock.check_for_parameter('test{?}test') is True
|
||||
assert sherlock.check_for_parameter('test{.}test') is False
|
||||
assert sherlock.check_for_parameter('test{}test') is False
|
||||
assert sherlock.check_for_parameter('testtest') is False
|
||||
assert sherlock.check_for_parameter('test{?test') is False
|
||||
assert sherlock.check_for_parameter('test?}test') is False
|
||||
assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cliargs', [
|
||||
'',
|
||||
'--site urghrtuight --egiotr',
|
||||
'--',
|
||||
])
|
||||
def test_no_usernames_provided(cliargs):
|
||||
with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"):
|
||||
Interactives.run_cli(cliargs)
|
@ -0,0 +1,17 @@
|
||||
import os
|
||||
from sherlock_interactives import Interactives
|
||||
import sherlock
|
||||
|
||||
def test_versioning() -> None:
|
||||
# Ensure __version__ matches version presented to the user
|
||||
assert sherlock.__version__ in Interactives.run_cli("--version")
|
||||
# Ensure __init__ is single source of truth for __version__ in package
|
||||
# Temporarily allows sherlock.py so as to not trigger early upgrades
|
||||
found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
|
||||
expected:list = [
|
||||
# Normalization is REQUIRED for Windows ( / vs \ )
|
||||
os.path.normpath("sherlock/__init__.py"),
|
||||
os.path.normpath("sherlock/sherlock.py"),
|
||||
]
|
||||
# Sorting is REQUIRED for Mac
|
||||
assert sorted(found) == sorted(expected)
|
@ -0,0 +1,44 @@
|
||||
[tox]
|
||||
requires =
|
||||
tox >= 4
|
||||
envlist =
|
||||
lint
|
||||
py313
|
||||
py312
|
||||
py311
|
||||
py310
|
||||
py39
|
||||
py38
|
||||
|
||||
[testenv]
|
||||
description = Attempt to build and install the package
|
||||
deps =
|
||||
coverage
|
||||
jsonschema
|
||||
pytest
|
||||
allowlist_externals = coverage
|
||||
commands =
|
||||
coverage run --source=sherlock --module pytest -v
|
||||
coverage report --show-missing
|
||||
|
||||
[testenv:offline]
|
||||
deps =
|
||||
jsonschema
|
||||
pytest
|
||||
commands =
|
||||
pytest -v -m "not online"
|
||||
|
||||
[testenv:lint]
|
||||
description = Lint with Ruff
|
||||
deps =
|
||||
ruff
|
||||
commands =
|
||||
ruff check
|
||||
|
||||
[gh-actions]
|
||||
python =
|
||||
3.12: py312
|
||||
3.11: py311
|
||||
3.10: py310
|
||||
3.9: py39
|
||||
3.8: py38
|
Loading…
Reference in new issue