You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
843 lines
27 KiB
843 lines
27 KiB
5 years ago
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import copy
|
||
|
import logging
|
||
|
from os.path import abspath, join
|
||
|
import unittest
|
||
|
from tempfile import gettempdir
|
||
|
from typing import Type
|
||
|
|
||
|
from urllib.parse import urlsplit
|
||
|
|
||
|
from faker import Faker # type: ignore
|
||
|
|
||
|
from .. import defaults
|
||
|
from ..base import BaseTLDSourceParser
|
||
|
from ..conf import get_setting, reset_settings, set_setting
|
||
|
from ..exceptions import (
|
||
|
TldBadUrl,
|
||
|
TldDomainNotFound,
|
||
|
TldImproperlyConfigured,
|
||
|
TldIOError,
|
||
|
)
|
||
|
from ..helpers import project_dir
|
||
|
from ..registry import Registry
|
||
|
from ..utils import (
|
||
|
get_fld,
|
||
|
get_tld,
|
||
|
get_tld_names,
|
||
|
get_tld_names_container,
|
||
|
is_tld,
|
||
|
MozillaTLDSourceParser,
|
||
|
BaseMozillaTLDSourceParser,
|
||
|
parse_tld,
|
||
|
reset_tld_names,
|
||
|
update_tld_names,
|
||
|
update_tld_names_cli,
|
||
|
)
|
||
|
|
||
|
from .base import internet_available_only, log_info
|
||
|
|
||
|
__author__ = 'Artur Barseghyan'
|
||
|
__copyright__ = '2013-2020 Artur Barseghyan'
|
||
|
__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
|
||
|
__all__ = ('TestCore',)
|
||
|
|
||
|
LOGGER = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
class TestCore(unittest.TestCase):
|
||
|
"""Core tld functionality tests."""
|
||
|
|
||
|
@classmethod
|
||
|
def setUpClass(cls):
|
||
|
cls.faker = Faker()
|
||
|
cls.temp_dir = gettempdir()
|
||
|
|
||
|
def setUp(self):
|
||
|
"""Set up."""
|
||
|
self.good_patterns = [
|
||
|
{
|
||
|
'url': 'http://www.google.co.uk',
|
||
|
'fld': 'google.co.uk',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'google',
|
||
|
'suffix': 'co.uk',
|
||
|
'tld': 'co.uk',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://www.v2.google.co.uk',
|
||
|
'fld': 'google.co.uk',
|
||
|
'subdomain': 'www.v2',
|
||
|
'domain': 'google',
|
||
|
'suffix': 'co.uk',
|
||
|
'tld': 'co.uk',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
# No longer valid
|
||
|
# {
|
||
|
# 'url': 'http://www.me.congresodelalengua3.ar',
|
||
|
# 'tld': 'me.congresodelalengua3.ar',
|
||
|
# 'subdomain': 'www',
|
||
|
# 'domain': 'me',
|
||
|
# 'suffix': 'congresodelalengua3.ar',
|
||
|
# },
|
||
|
{
|
||
|
'url': u'http://хром.гугл.рф',
|
||
|
'fld': u'гугл.рф',
|
||
|
'subdomain': u'хром',
|
||
|
'domain': u'гугл',
|
||
|
'suffix': u'рф',
|
||
|
'tld': u'рф',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://www.google.co.uk:8001/lorem-ipsum/',
|
||
|
'fld': 'google.co.uk',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'google',
|
||
|
'suffix': 'co.uk',
|
||
|
'tld': 'co.uk',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://www.me.cloudfront.net',
|
||
|
'fld': 'me.cloudfront.net',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'me',
|
||
|
'suffix': 'cloudfront.net',
|
||
|
'tld': 'cloudfront.net',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://www.v2.forum.tech.google.co.uk:8001/'
|
||
|
'lorem-ipsum/',
|
||
|
'fld': 'google.co.uk',
|
||
|
'subdomain': 'www.v2.forum.tech',
|
||
|
'domain': 'google',
|
||
|
'suffix': 'co.uk',
|
||
|
'tld': 'co.uk',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'https://pantheon.io/',
|
||
|
'fld': 'pantheon.io',
|
||
|
'subdomain': '',
|
||
|
'domain': 'pantheon',
|
||
|
'suffix': 'io',
|
||
|
'tld': 'io',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'v2.www.google.com',
|
||
|
'fld': 'google.com',
|
||
|
'subdomain': 'v2.www',
|
||
|
'domain': 'google',
|
||
|
'suffix': 'com',
|
||
|
'tld': 'com',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': '//v2.www.google.com',
|
||
|
'fld': 'google.com',
|
||
|
'subdomain': 'v2.www',
|
||
|
'domain': 'google',
|
||
|
'suffix': 'com',
|
||
|
'tld': 'com',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://foo@bar.com',
|
||
|
'fld': 'bar.com',
|
||
|
'subdomain': '',
|
||
|
'domain': 'bar',
|
||
|
'suffix': 'com',
|
||
|
'tld': 'com',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://user:foo@bar.com',
|
||
|
'fld': 'bar.com',
|
||
|
'subdomain': '',
|
||
|
'domain': 'bar',
|
||
|
'suffix': 'com',
|
||
|
'tld': 'com',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'https://faguoren.xn--fiqs8s',
|
||
|
'fld': 'faguoren.xn--fiqs8s',
|
||
|
'subdomain': '',
|
||
|
'domain': 'faguoren',
|
||
|
'suffix': 'xn--fiqs8s',
|
||
|
'tld': 'xn--fiqs8s',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'blogs.lemonde.paris',
|
||
|
'fld': 'lemonde.paris',
|
||
|
'subdomain': 'blogs',
|
||
|
'domain': 'lemonde',
|
||
|
'suffix': 'paris',
|
||
|
'tld': 'paris',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'axel.brighton.ac.uk',
|
||
|
'fld': 'brighton.ac.uk',
|
||
|
'subdomain': 'axel',
|
||
|
'domain': 'brighton',
|
||
|
'suffix': 'ac.uk',
|
||
|
'tld': 'ac.uk',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'm.fr.blogspot.com.au',
|
||
|
'fld': 'fr.blogspot.com.au',
|
||
|
'subdomain': 'm',
|
||
|
'domain': 'fr',
|
||
|
'suffix': 'blogspot.com.au',
|
||
|
'tld': 'blogspot.com.au',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'help.www.福岡.jp',
|
||
|
'fld': u'www.福岡.jp',
|
||
|
'subdomain': 'help',
|
||
|
'domain': 'www',
|
||
|
'suffix': u'福岡.jp',
|
||
|
'tld': u'福岡.jp',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'syria.arabic.variant.سوريا',
|
||
|
'fld': u'variant.سوريا',
|
||
|
'subdomain': 'syria.arabic',
|
||
|
'domain': 'variant',
|
||
|
'suffix': u'سوريا',
|
||
|
'tld': u'سوريا',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'http://www.help.kawasaki.jp',
|
||
|
'fld': u'www.help.kawasaki.jp',
|
||
|
'subdomain': '',
|
||
|
'domain': 'www',
|
||
|
'suffix': u'help.kawasaki.jp',
|
||
|
'tld': u'help.kawasaki.jp',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'http://www.city.kawasaki.jp',
|
||
|
'fld': u'city.kawasaki.jp',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'city',
|
||
|
'suffix': u'kawasaki.jp',
|
||
|
'tld': u'kawasaki.jp',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'http://fedoraproject.org',
|
||
|
'fld': u'fedoraproject.org',
|
||
|
'subdomain': '',
|
||
|
'domain': 'fedoraproject',
|
||
|
'suffix': u'org',
|
||
|
'tld': u'org',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'http://www.cloud.fedoraproject.org',
|
||
|
'fld': u'www.cloud.fedoraproject.org',
|
||
|
'subdomain': '',
|
||
|
'domain': 'www',
|
||
|
'suffix': u'cloud.fedoraproject.org',
|
||
|
'tld': u'cloud.fedoraproject.org',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': u'https://www.john.app.os.fedoraproject.org',
|
||
|
'fld': u'john.app.os.fedoraproject.org',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'john',
|
||
|
'suffix': u'app.os.fedoraproject.org',
|
||
|
'tld': u'app.os.fedoraproject.org',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'ftp://www.xn--mxail5aa.xn--11b4c3d',
|
||
|
'fld': 'xn--mxail5aa.xn--11b4c3d',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'xn--mxail5aa',
|
||
|
'suffix': 'xn--11b4c3d',
|
||
|
'tld': 'xn--11b4c3d',
|
||
|
'kwargs': {'fail_silently': True},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://cloud.fedoraproject.org',
|
||
|
'fld': 'cloud.fedoraproject.org',
|
||
|
'subdomain': '',
|
||
|
'domain': 'cloud.fedoraproject.org',
|
||
|
'suffix': 'cloud.fedoraproject.org',
|
||
|
'tld': 'cloud.fedoraproject.org',
|
||
|
'kwargs': {'fail_silently': True}
|
||
|
},
|
||
|
{
|
||
|
'url': 'github.io',
|
||
|
'fld': 'github.io',
|
||
|
'subdomain': '',
|
||
|
'domain': 'github.io',
|
||
|
'suffix': 'github.io',
|
||
|
'tld': 'github.io',
|
||
|
'kwargs': {'fail_silently': True, 'fix_protocol': True}
|
||
|
},
|
||
|
{
|
||
|
'url': urlsplit('http://lemonde.fr/article.html'),
|
||
|
'fld': 'lemonde.fr',
|
||
|
'subdomain': '',
|
||
|
'domain': 'lemonde',
|
||
|
'suffix': 'fr',
|
||
|
'tld': 'fr',
|
||
|
'kwargs': {'fail_silently': True}
|
||
|
},
|
||
|
{
|
||
|
'url': 'https://github.com....../barseghyanartur/tld/',
|
||
|
'fld': 'github.com',
|
||
|
'subdomain': '',
|
||
|
'domain': 'github',
|
||
|
'suffix': 'com',
|
||
|
'tld': 'com',
|
||
|
'kwargs': {'fail_silently': True}
|
||
|
},
|
||
|
]
|
||
|
|
||
|
self.bad_patterns = {
|
||
|
'v2.www.google.com': {
|
||
|
'exception': TldBadUrl,
|
||
|
},
|
||
|
'/index.php?a=1&b=2': {
|
||
|
'exception': TldBadUrl,
|
||
|
},
|
||
|
'http://www.tld.doesnotexist': {
|
||
|
'exception': TldDomainNotFound,
|
||
|
},
|
||
|
'https://2001:0db8:0000:85a3:0000:0000:ac1f:8001': {
|
||
|
'exception': TldDomainNotFound,
|
||
|
},
|
||
|
'http://192.169.1.1': {
|
||
|
'exception': TldDomainNotFound,
|
||
|
},
|
||
|
'http://localhost:8080': {
|
||
|
'exception': TldDomainNotFound,
|
||
|
},
|
||
|
'https://localhost': {
|
||
|
'exception': TldDomainNotFound,
|
||
|
},
|
||
|
'https://localhost2': {
|
||
|
'exception': TldImproperlyConfigured,
|
||
|
'kwargs': {'search_public': False, 'search_private': False},
|
||
|
},
|
||
|
}
|
||
|
|
||
|
self.invalid_tlds = {
|
||
|
'v2.www.google.com',
|
||
|
'tld.doesnotexist',
|
||
|
'2001:0db8:0000:85a3:0000:0000:ac1f',
|
||
|
'192.169.1.1',
|
||
|
'localhost',
|
||
|
'google.com',
|
||
|
}
|
||
|
|
||
|
self.tld_names_local_path_custom = project_dir(
|
||
|
join(
|
||
|
'tests',
|
||
|
'res',
|
||
|
'effective_tld_names_custom.dat.txt'
|
||
|
)
|
||
|
)
|
||
|
self.good_patterns_custom_parser = [
|
||
|
{
|
||
|
'url': 'http://www.foreverchild',
|
||
|
'fld': 'www.foreverchild',
|
||
|
'subdomain': '',
|
||
|
'domain': 'www',
|
||
|
'suffix': 'foreverchild',
|
||
|
'tld': 'foreverchild',
|
||
|
'kwargs': {
|
||
|
'fail_silently': True,
|
||
|
# 'parser_class': self.get_custom_parser_class(),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
'url': 'http://www.v2.foreverchild',
|
||
|
'fld': 'v2.foreverchild',
|
||
|
'subdomain': 'www',
|
||
|
'domain': 'v2',
|
||
|
'suffix': 'foreverchild',
|
||
|
'tld': 'foreverchild',
|
||
|
'kwargs': {
|
||
|
'fail_silently': True,
|
||
|
# 'parser_class': self.get_custom_parser_class(),
|
||
|
},
|
||
|
},
|
||
|
]
|
||
|
reset_settings()
|
||
|
|
||
|
def tearDown(self):
|
||
|
"""Tear down."""
|
||
|
reset_settings()
|
||
|
Registry.reset()
|
||
|
|
||
|
@property
|
||
|
def good_url(self):
|
||
|
return self.good_patterns[0]['url']
|
||
|
|
||
|
@property
|
||
|
def bad_url(self):
|
||
|
return list(self.bad_patterns.keys())[0]
|
||
|
|
||
|
def get_custom_parser_class(
|
||
|
self,
|
||
|
uid: str = 'custom_mozilla',
|
||
|
source_url: str = None,
|
||
|
local_path: str = 'tests/res/effective_tld_names_custom.dat.txt'
|
||
|
) -> Type[BaseTLDSourceParser]:
|
||
|
# Define a custom TLD source parser class
|
||
|
parser_class = type(
|
||
|
'CustomMozillaTLDSourceParser',
|
||
|
(BaseMozillaTLDSourceParser,),
|
||
|
{
|
||
|
'uid': uid,
|
||
|
'source_url': source_url,
|
||
|
'local_path': local_path,
|
||
|
}
|
||
|
)
|
||
|
return parser_class
|
||
|
|
||
|
@log_info
|
||
|
def test_0_tld_names_loaded(self):
|
||
|
"""Test if tld names are loaded."""
|
||
|
get_fld('http://www.google.co.uk')
|
||
|
from ..utils import tld_names
|
||
|
res = len(tld_names) > 0
|
||
|
self.assertTrue(res)
|
||
|
return res
|
||
|
|
||
|
@internet_available_only
|
||
|
@log_info
|
||
|
def test_1_update_tld_names(self):
|
||
|
"""Test updating the tld names (re-fetch mozilla source)."""
|
||
|
res = update_tld_names(fail_silently=False)
|
||
|
self.assertTrue(res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_2_fld_good_patterns_pass(self):
|
||
|
"""Test good URL patterns."""
|
||
|
res = []
|
||
|
for data in self.good_patterns:
|
||
|
_res = get_fld(data['url'], **data['kwargs'])
|
||
|
self.assertEqual(_res, data['fld'])
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_3_fld_bad_patterns_pass(self):
|
||
|
"""Test bad URL patterns."""
|
||
|
res = []
|
||
|
for url, params in self.bad_patterns.items():
|
||
|
_res = get_fld(url, fail_silently=True)
|
||
|
self.assertEqual(_res, None)
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_4_override_settings(self):
|
||
|
"""Testing settings override."""
|
||
|
def override_settings():
|
||
|
"""Override settings."""
|
||
|
return get_setting('DEBUG')
|
||
|
|
||
|
self.assertEqual(defaults.DEBUG, override_settings())
|
||
|
|
||
|
set_setting('DEBUG', True)
|
||
|
|
||
|
self.assertEqual(True, override_settings())
|
||
|
|
||
|
return override_settings()
|
||
|
|
||
|
@log_info
|
||
|
def test_5_tld_good_patterns_pass_parsed_object(self):
|
||
|
"""Test good URL patterns."""
|
||
|
res = []
|
||
|
for data in self.good_patterns:
|
||
|
kwargs = copy.copy(data['kwargs'])
|
||
|
kwargs['as_object'] = True
|
||
|
_res = get_tld(data['url'], **kwargs)
|
||
|
self.assertEqual(_res.tld, data['tld'])
|
||
|
self.assertEqual(_res.subdomain, data['subdomain'])
|
||
|
self.assertEqual(_res.domain, data['domain'])
|
||
|
self.assertEqual(_res.suffix, data['suffix'])
|
||
|
self.assertEqual(_res.fld, data['fld'])
|
||
|
|
||
|
self.assertEqual(
|
||
|
str(_res).encode('utf8'),
|
||
|
data['tld'].encode('utf8')
|
||
|
)
|
||
|
|
||
|
self.assertEqual(
|
||
|
_res.__dict__,
|
||
|
{
|
||
|
'tld': _res.tld,
|
||
|
'domain': _res.domain,
|
||
|
'subdomain': _res.subdomain,
|
||
|
'fld': _res.fld,
|
||
|
'parsed_url': _res.parsed_url,
|
||
|
}
|
||
|
)
|
||
|
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_6_override_full_names_path(self):
|
||
|
default = project_dir('dummy.txt')
|
||
|
override_base = '/tmp/test'
|
||
|
set_setting('NAMES_LOCAL_PATH_PARENT', override_base)
|
||
|
modified = project_dir('dummy.txt')
|
||
|
self.assertNotEqual(default, modified)
|
||
|
self.assertEqual(modified, abspath('/tmp/test/dummy.txt'))
|
||
|
|
||
|
@log_info
|
||
|
def test_7_public_private(self):
|
||
|
res = get_fld(
|
||
|
'http://silly.cc.ua',
|
||
|
fail_silently=True,
|
||
|
search_private=False
|
||
|
)
|
||
|
|
||
|
self.assertEqual(res, None)
|
||
|
|
||
|
res = get_fld(
|
||
|
'http://silly.cc.ua',
|
||
|
fail_silently=True,
|
||
|
search_private=True
|
||
|
)
|
||
|
|
||
|
self.assertEqual(res, 'silly.cc.ua')
|
||
|
|
||
|
res = get_fld(
|
||
|
'mercy.compute.amazonaws.com',
|
||
|
fail_silently=True,
|
||
|
search_private=False,
|
||
|
fix_protocol=True
|
||
|
)
|
||
|
|
||
|
self.assertEqual(res, None)
|
||
|
|
||
|
res = get_fld(
|
||
|
'http://whatever.com',
|
||
|
fail_silently=True,
|
||
|
search_public=False
|
||
|
)
|
||
|
|
||
|
self.assertEqual(res, None)
|
||
|
|
||
|
@log_info
|
||
|
def test_8_fld_bad_patterns_exceptions(self):
|
||
|
"""Test exceptions."""
|
||
|
res = []
|
||
|
for url, params in self.bad_patterns.items():
|
||
|
kwargs = params['kwargs'] if 'kwargs' in params else {}
|
||
|
kwargs['fail_silently'] = False
|
||
|
with self.assertRaises(params['exception']):
|
||
|
_res = get_fld(url, **kwargs)
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_9_tld_good_patterns_pass(self):
|
||
|
"""Test `get_tld` good URL patterns."""
|
||
|
res = []
|
||
|
for data in self.good_patterns:
|
||
|
_res = get_tld(data['url'], **data['kwargs'])
|
||
|
self.assertEqual(_res, data['tld'])
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_10_tld_bad_patterns_pass(self):
|
||
|
"""Test `get_tld` bad URL patterns."""
|
||
|
res = []
|
||
|
for url, params in self.bad_patterns.items():
|
||
|
_res = get_tld(url, fail_silently=True)
|
||
|
self.assertEqual(_res, None)
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_11_parse_tld_good_patterns(self):
|
||
|
"""Test `parse_tld` good URL patterns."""
|
||
|
res = []
|
||
|
for data in self.good_patterns:
|
||
|
_res = parse_tld(data['url'], **data['kwargs'])
|
||
|
self.assertEqual(
|
||
|
_res,
|
||
|
(data['tld'], data['domain'], data['subdomain'])
|
||
|
)
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_12_is_tld_good_patterns(self):
|
||
|
"""Test `is_tld` good URL patterns."""
|
||
|
for data in self.good_patterns:
|
||
|
self.assertTrue(is_tld(data['tld']))
|
||
|
|
||
|
@log_info
|
||
|
def test_13_is_tld_bad_patterns(self):
|
||
|
"""Test `is_tld` bad URL patterns."""
|
||
|
for _tld in self.invalid_tlds:
|
||
|
self.assertFalse(is_tld(_tld))
|
||
|
|
||
|
@log_info
|
||
|
def test_14_fail_update_tld_names(self):
|
||
|
"""Test fail `update_tld_names`."""
|
||
|
parser_class = self.get_custom_parser_class(
|
||
|
uid='custom_mozilla_2',
|
||
|
source_url='i-do-not-exist'
|
||
|
)
|
||
|
# Assert raise TldIOError on wrong NAMES_SOURCE_URL
|
||
|
with self.assertRaises(TldIOError):
|
||
|
update_tld_names(fail_silently=False, parser_uid=parser_class.uid)
|
||
|
|
||
|
# Assert return False on wrong NAMES_SOURCE_URL
|
||
|
self.assertFalse(
|
||
|
update_tld_names(fail_silently=True, parser_uid=parser_class.uid)
|
||
|
)
|
||
|
|
||
|
@log_info
|
||
|
def test_15_fail_get_tld_names(self):
|
||
|
"""Test fail `update_tld_names`."""
|
||
|
parser_class = self.get_custom_parser_class(
|
||
|
uid='custom_mozilla_3',
|
||
|
source_url='i-do-not-exist',
|
||
|
local_path='/srv/tests/res/effective_tld_names_custom_3.dat.txt'
|
||
|
)
|
||
|
reset_tld_names()
|
||
|
# Assert raise TldIOError on wrong NAMES_SOURCE_URL
|
||
|
for params in self.good_patterns:
|
||
|
kwargs = {'url': params['url']}
|
||
|
kwargs.update(params['kwargs'])
|
||
|
kwargs['fail_silently'] = False
|
||
|
kwargs['parser_class'] = parser_class
|
||
|
with self.assertRaises(TldIOError):
|
||
|
get_tld(**kwargs)
|
||
|
|
||
|
@log_info
|
||
|
def test_15_fail_get_fld_wrong_kwargs(self):
|
||
|
"""Test fail `get_fld` with wrong kwargs."""
|
||
|
with self.assertRaises(TldImproperlyConfigured):
|
||
|
get_fld(self.good_url, as_object=True)
|
||
|
|
||
|
@log_info
|
||
|
def test_16_fail_parse_tld(self):
|
||
|
"""Test fail `parse_tld`.
|
||
|
|
||
|
Assert raise TldIOError on wrong `NAMES_SOURCE_URL` for `parse_tld`.
|
||
|
"""
|
||
|
parser_class = self.get_custom_parser_class(
|
||
|
source_url='i-do-not-exist'
|
||
|
)
|
||
|
parsed_tld = parse_tld(
|
||
|
self.bad_url,
|
||
|
fail_silently=False,
|
||
|
parser_class=parser_class
|
||
|
)
|
||
|
self.assertEqual(parsed_tld, (None, None, None))
|
||
|
|
||
|
@log_info
|
||
|
def test_17_get_tld_names_and_reset_tld_names(self):
|
||
|
"""Test fail `get_tld_names` and repair using `reset_tld_names`."""
|
||
|
tmp_filename = join(
|
||
|
gettempdir(),
|
||
|
f'{self.faker.uuid4()}.dat.txt'
|
||
|
)
|
||
|
parser_class = self.get_custom_parser_class(
|
||
|
source_url='i-do-not-exist',
|
||
|
local_path=tmp_filename
|
||
|
)
|
||
|
reset_tld_names()
|
||
|
|
||
|
with self.subTest('Assert raise TldIOError'):
|
||
|
# Assert raise TldIOError on wrong NAMES_SOURCE_URL for
|
||
|
# `get_tld_names`
|
||
|
with self.assertRaises(TldIOError):
|
||
|
get_tld_names(
|
||
|
fail_silently=False,
|
||
|
parser_class=parser_class
|
||
|
)
|
||
|
|
||
|
tmp_filename = join(
|
||
|
gettempdir(),
|
||
|
f'{self.faker.uuid4()}.dat.txt'
|
||
|
)
|
||
|
parser_class_2 = self.get_custom_parser_class(
|
||
|
source_url='i-do-not-exist-2',
|
||
|
local_path=tmp_filename
|
||
|
)
|
||
|
reset_tld_names()
|
||
|
|
||
|
with self.subTest('Assert get None'):
|
||
|
# Assert get None on wrong `NAMES_SOURCE_URL` for `get_tld_names`
|
||
|
self.assertIsNone(
|
||
|
get_tld_names(
|
||
|
fail_silently=True,
|
||
|
parser_class=parser_class_2
|
||
|
)
|
||
|
)
|
||
|
|
||
|
@internet_available_only
|
||
|
@log_info
|
||
|
def test_18_update_tld_names_cli(self):
|
||
|
"""Test the return code of the CLI version of `update_tld_names`."""
|
||
|
reset_tld_names()
|
||
|
res = update_tld_names_cli()
|
||
|
self.assertEqual(res, 0)
|
||
|
|
||
|
@log_info
|
||
|
def test_19_parse_tld_custom_tld_names_good_patterns(self):
|
||
|
"""Test `parse_tld` good URL patterns for custom tld names."""
|
||
|
res = []
|
||
|
|
||
|
for data in self.good_patterns_custom_parser:
|
||
|
kwargs = copy.copy(data['kwargs'])
|
||
|
kwargs['parser_class'] = self.get_custom_parser_class()
|
||
|
_res = parse_tld(data['url'], **kwargs)
|
||
|
self.assertEqual(
|
||
|
_res,
|
||
|
(data['tld'], data['domain'], data['subdomain'])
|
||
|
)
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_20_tld_custom_tld_names_good_patterns_pass_parsed_object(self):
|
||
|
"""Test `get_tld` good URL patterns for custom tld names."""
|
||
|
res = []
|
||
|
for data in self.good_patterns_custom_parser:
|
||
|
kwargs = copy.copy(data['kwargs'])
|
||
|
kwargs.update({
|
||
|
'as_object': True,
|
||
|
'parser_class': self.get_custom_parser_class(),
|
||
|
})
|
||
|
_res = get_tld(data['url'], **kwargs)
|
||
|
self.assertEqual(_res.tld, data['tld'])
|
||
|
self.assertEqual(_res.subdomain, data['subdomain'])
|
||
|
self.assertEqual(_res.domain, data['domain'])
|
||
|
self.assertEqual(_res.suffix, data['suffix'])
|
||
|
self.assertEqual(_res.fld, data['fld'])
|
||
|
|
||
|
self.assertEqual(
|
||
|
str(_res).encode('utf8'),
|
||
|
data['tld'].encode('utf8')
|
||
|
)
|
||
|
|
||
|
self.assertEqual(
|
||
|
_res.__dict__,
|
||
|
{
|
||
|
'tld': _res.tld,
|
||
|
'domain': _res.domain,
|
||
|
'subdomain': _res.subdomain,
|
||
|
'fld': _res.fld,
|
||
|
'parsed_url': _res.parsed_url,
|
||
|
}
|
||
|
)
|
||
|
|
||
|
res.append(_res)
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_21_reset_tld_names_for_custom_parser(self):
|
||
|
"""Test `reset_tld_names` for `tld_names_local_path`."""
|
||
|
res = []
|
||
|
parser_class = self.get_custom_parser_class()
|
||
|
for data in self.good_patterns_custom_parser:
|
||
|
kwargs = copy.copy(data['kwargs'])
|
||
|
kwargs.update({
|
||
|
'as_object': True,
|
||
|
'parser_class': self.get_custom_parser_class(),
|
||
|
})
|
||
|
_res = get_tld(data['url'], **kwargs)
|
||
|
self.assertEqual(_res.tld, data['tld'])
|
||
|
self.assertEqual(_res.subdomain, data['subdomain'])
|
||
|
self.assertEqual(_res.domain, data['domain'])
|
||
|
self.assertEqual(_res.suffix, data['suffix'])
|
||
|
self.assertEqual(_res.fld, data['fld'])
|
||
|
|
||
|
self.assertEqual(
|
||
|
str(_res).encode('utf8'),
|
||
|
data['tld'].encode('utf8')
|
||
|
)
|
||
|
|
||
|
self.assertEqual(
|
||
|
_res.__dict__,
|
||
|
{
|
||
|
'tld': _res.tld,
|
||
|
'domain': _res.domain,
|
||
|
'subdomain': _res.subdomain,
|
||
|
'fld': _res.fld,
|
||
|
'parsed_url': _res.parsed_url,
|
||
|
}
|
||
|
)
|
||
|
|
||
|
res.append(_res)
|
||
|
|
||
|
tld_names = get_tld_names_container()
|
||
|
self.assertIn(parser_class.local_path, tld_names)
|
||
|
reset_tld_names(parser_class.local_path)
|
||
|
self.assertNotIn(parser_class.local_path, tld_names)
|
||
|
|
||
|
return res
|
||
|
|
||
|
@log_info
|
||
|
def test_22_fail_define_custom_parser_class_without_uid(self):
|
||
|
"""Test fail define custom parser class without `uid`."""
|
||
|
class CustomParser(BaseTLDSourceParser):
|
||
|
pass
|
||
|
|
||
|
class AnotherCustomParser(BaseTLDSourceParser):
|
||
|
|
||
|
uid = 'another-custom-parser'
|
||
|
|
||
|
# Assert raise TldImproperlyConfigured
|
||
|
with self.assertRaises(TldImproperlyConfigured):
|
||
|
CustomParser.get_tld_names()
|
||
|
|
||
|
# Assert raise NotImplementedError
|
||
|
with self.assertRaises(NotImplementedError):
|
||
|
AnotherCustomParser.get_tld_names()
|
||
|
|
||
|
@log_info
|
||
|
def test_23_len_trie_nodes(self):
|
||
|
"""Test len of the trie nodes."""
|
||
|
get_tld('http://delusionalinsanity.com')
|
||
|
tld_names = get_tld_names_container()
|
||
|
self.assertGreater(
|
||
|
len(tld_names[MozillaTLDSourceParser.local_path]),
|
||
|
0
|
||
|
)
|
||
|
|
||
|
@log_info
|
||
|
def test_24_get_tld_names_no_arguments(self):
|
||
|
"""Test len of the trie nodes."""
|
||
|
tld_names = get_tld_names()
|
||
|
self.assertGreater(
|
||
|
len(tld_names),
|
||
|
0
|
||
|
)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
unittest.main()
|