added tld library, fix add space after dot in domain names

- added tld library, so "Common Fixes" mod can detect domain names and won't add spaces after each dot in them. - fix HI_before_colon_noncaps, so it won't remove http: from URLs.
5 years ago · a430bffe57
parent dd5cc8feb2
commit a430bffe57
19 changed files with 28157 additions and 6 deletions
--- a/libs/subzero/modification/mods/common.py
+++ b/libs/subzero/modification/mods/common.py
@ -9,6 +9,7 @@ from subzero.modification.mods import SubtitleTextModification, empty_line_post_
 from subzero.modification.processors import FuncProcessor
 from subzero.modification.processors.re_processor import NReProcessor
 from subzero.modification import registry
+from tld import get_tld


 ENGLISH = Language("eng")
@ -115,7 +116,9 @@ class CommonFixes(SubtitleTextModification):
        NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))'), r"\1", name="CM_punctuation_space"),

        # add space after punctuation
-        NReProcessor(re.compile(r'(?u)([!?.,:])([A-zÀ-ž]{2,})'), r"\1 \2", name="CM_punctuation_space2"),
+        NReProcessor(re.compile(r'(?u)(([^\s]*)([!?.,:])([A-zÀ-ž]{2,}))'),
+                     lambda match: u"%s%s %s" % (match.group(2), match.group(3), match.group(4)) if not get_tld(match.group(1), fail_silently=True, fix_protocol=True) else match.group(1),
+                     name="CM_punctuation_space2"),

        # fix lowercase I in english
        NReProcessor(re.compile(r'(?u)(\b)i(\b)'), r"\1I\2", name="CM_EN_lowercase_i",
--- a/libs/subzero/modification/mods/hearing_impaired.py
+++ b/libs/subzero/modification/mods/hearing_impaired.py
@ -42,7 +42,7 @@ class HearingImpaired(SubtitleTextModification):
        # possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
        # a space is inside the text; ignore anything ending with a quote
        NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s\->~]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
-                                r'[A-zÀ-ž-_0-9\s\"\'&+()\[\]]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
+                                r'[A-zÀ-ž-_0-9\s\"\'&+()\[\]]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9]|//)'),
                     lambda match:
                     match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
                     else "" if not match.group(1).startswith(" ") else " ",
--- a/libs/tld/init.py
+++ b/libs/tld/init.py
@ -0,0 +1,24 @@
+from .utils import (
+    get_fld,
+    get_tld,
+    get_tld_names,
+    is_tld,
+    parse_tld,
+    Result,
+    update_tld_names,
+)
+
+__title__ = 'tld'
+__version__ = '0.12.2'
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'get_fld',
+    'get_tld',
+    'get_tld_names',
+    'is_tld',
+    'parse_tld',
+    'Result',
+    'update_tld_names',
+)
--- a/libs/tld/base.py
+++ b/libs/tld/base.py
@ -0,0 +1,68 @@
+from codecs import open as codecs_open
+from urllib.request import urlopen
+from typing import Optional
+
+from .exceptions import (
+    TldIOError,
+    TldImproperlyConfigured,
+)
+from .helpers import project_dir
+from .registry import Registry
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = ('BaseTLDSourceParser',)
+
+
+class BaseTLDSourceParser(metaclass=Registry):
+    """Base TLD source parser."""
+
+    uid: Optional[str] = None
+    source_url: str
+    local_path: str
+
+    @classmethod
+    def validate(cls):
+        """Constructor."""
+        if not cls.uid:
+            raise TldImproperlyConfigured(
+                "The `uid` property of the TLD source parser shall be defined."
+            )
+
+    @classmethod
+    def get_tld_names(cls, fail_silently: bool = False, retry_count: int = 0):
+        """Get tld names.
+
+        :param fail_silently:
+        :param retry_count:
+        :return:
+        """
+        cls.validate()
+        raise NotImplementedError(
+            "Your TLD source parser shall implement `get_tld_names` method."
+        )
+
+    @classmethod
+    def update_tld_names(cls, fail_silently: bool = False) -> bool:
+        """Update the local copy of the TLD file.
+
+        :param fail_silently:
+        :return:
+        """
+        try:
+            remote_file = urlopen(cls.source_url)
+            local_file = codecs_open(
+                project_dir(cls.local_path),
+                'wb',
+                encoding='utf8'
+            )
+            local_file.write(remote_file.read().decode('utf8'))
+            local_file.close()
+            remote_file.close()
+        except Exception as err:
+            if fail_silently:
+                return False
+            raise TldIOError(err)
+
+        return True
--- a/libs/tld/conf.py
+++ b/libs/tld/conf.py
@ -0,0 +1,58 @@
+from typing import Any
+from . import defaults
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'get_setting',
+    'reset_settings',
+    'set_setting',
+    'settings',
+)
+
+
+class Settings(object):
+    """Settings registry."""
+
+    def __init__(self):
+        self._settings = {}
+        self._settings_get = self._settings.get
+
+    def set(self, name: str, value: Any) -> None:
+        """
+        Override default settings.
+
+        :param str name:
+        :param mixed value:
+        """
+        self._settings[name] = value
+
+    def get(self, name: str, default: Any = None) -> Any:
+        """
+        Gets a variable from local settings.
+
+        :param str name:
+        :param mixed default: Default value.
+        :return mixed:
+        """
+        if name in self._settings:
+            return self._settings_get(name, default)
+        elif hasattr(defaults, name):
+            return getattr(defaults, name, default)
+
+        return default
+
+    def reset(self) -> None:
+        """Reset settings."""
+        for name in defaults.__all__:
+            self.set(name, getattr(defaults, name))
+
+
+settings = Settings()
+
+get_setting = settings.get
+
+set_setting = settings.set
+
+reset_settings = settings.reset
--- a/libs/tld/defaults.py
+++ b/libs/tld/defaults.py
@ -0,0 +1,14 @@
+from os.path import dirname
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'DEBUG',
+    'NAMES_LOCAL_PATH_PARENT',
+)
+
+# Absolute base path that is prepended to NAMES_LOCAL_PATH
+NAMES_LOCAL_PATH_PARENT = dirname(__file__)
+
+DEBUG = False
--- a/libs/tld/exceptions.py
+++ b/libs/tld/exceptions.py
@ -0,0 +1,56 @@
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'TldBadUrl',
+    'TldDomainNotFound',
+    'TldImproperlyConfigured',
+    'TldIOError',
+)
+
+
+class TldIOError(IOError):
+    """TldIOError.
+
+    Supposed to be thrown when problems with reading/writing occur.
+    """
+
+
+class TldDomainNotFound(ValueError):
+    """TldDomainNotFound.
+
+    Supposed to be thrown when domain name is not found (didn't match) the
+    local TLD policy.
+    """
+
+    def __init__(self, domain_name):
+        super(TldDomainNotFound, self).__init__(
+            "Domain %s didn't match any existing TLD name!" % domain_name
+        )
+
+
+class TldBadUrl(ValueError):
+    """TldBadUrl.
+
+    Supposed to be thrown when bad URL is given.
+    """
+
+    def __init__(self, url):
+        super(TldBadUrl, self).__init__("Is not a valid URL %s!" % url)
+
+
+class TldImproperlyConfigured(Exception):
+    """TldImproperlyConfigured.
+
+    Supposed to be thrown when code is improperly configured. Typical use-case
+    is when user tries to use `get_tld` function with both `search_public` and
+    `search_private` set to False.
+    """
+
+    def __init__(self, msg=None):
+        if msg is None:
+            msg = "Improperly configured."
+        else:
+            msg = "Improperly configured. %s" % msg
+
+        super(TldImproperlyConfigured, self).__init__(msg)
--- a/libs/tld/helpers.py
+++ b/libs/tld/helpers.py
@ -0,0 +1,22 @@
+from os.path import abspath, join
+
+from .conf import get_setting
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'project_dir',
+    'PROJECT_DIR',
+)
+
+
+def project_dir(base: str) -> str:
+    """Project dir."""
+    tld_names_local_path_parent = get_setting('NAMES_LOCAL_PATH_PARENT')
+    return abspath(
+        join(tld_names_local_path_parent, base).replace('\\', '/')
+    )
+
+
+PROJECT_DIR = project_dir
--- a/libs/tld/registry.py
+++ b/libs/tld/registry.py
@ -0,0 +1,45 @@
+from typing import Dict
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'Registry',
+)
+
+
+class Registry(type):
+
+    REGISTRY = {}  # type: Dict[str, Registry]
+
+    def __new__(mcs, name, bases, attrs):
+        new_cls = type.__new__(mcs, name, bases, attrs)
+        # Here the name of the class is used as key but it could be any class
+        # parameter.
+        if getattr(new_cls, '_uid', None):
+            mcs.REGISTRY[new_cls._uid] = new_cls
+        return new_cls
+
+    @property
+    def _uid(cls) -> str:
+        return getattr(cls, 'uid', cls.__name__)
+
+    @classmethod
+    def reset(mcs) -> None:
+        mcs.REGISTRY = {}
+
+    @classmethod
+    def get(mcs, key, default=None):
+        return mcs.REGISTRY.get(key, default)
+
+    @classmethod
+    def items(mcs):
+        return mcs.REGISTRY.items()
+
+    # @classmethod
+    # def get_registry(mcs) -> Dict[str, Type]:
+    #     return dict(mcs.REGISTRY)
+    #
+    # @classmethod
+    # def pop(mcs, uid) -> None:
+    #     mcs.REGISTRY.pop(uid)
--- a/libs/tld/res/effective_tld_names.dat.txt
+++ b/libs/tld/res/effective_tld_names.dat.txt
--- a/libs/tld/result.py
+++ b/libs/tld/result.py
@ -0,0 +1,67 @@
+from typing import Any, Dict
+from urllib.parse import SplitResult
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'Result',
+)
+
+
+class Result(object):
+    """Container."""
+
+    __slots__ = ('subdomain', 'domain', 'tld', '__fld', 'parsed_url')
+
+    def __init__(self,
+                 tld: str,
+                 domain: str,
+                 subdomain: str,
+                 parsed_url: SplitResult):
+        self.tld = tld
+        self.domain = domain if domain != '' else tld
+        self.subdomain = subdomain
+        self.parsed_url = parsed_url
+
+        if domain:
+            self.__fld = f"{self.domain}.{self.tld}"
+        else:
+            self.__fld = self.tld
+
+    @property
+    def extension(self) -> str:
+        """Alias of ``tld``.
+
+        :return str:
+        """
+        return self.tld
+    suffix = extension
+
+    @property
+    def fld(self) -> str:
+        """First level domain.
+
+        :return:
+        :rtype: str
+        """
+        return self.__fld
+
+    def __str__(self) -> str:
+        return self.tld
+    __repr__ = __str__
+
+    @property
+    def __dict__(self) -> Dict[str, Any]:  # type: ignore
+        """Mimic __dict__ functionality.
+
+        :return:
+        :rtype: dict
+        """
+        return {
+            'tld': self.tld,
+            'domain': self.domain,
+            'subdomain': self.subdomain,
+            'fld': self.fld,
+            'parsed_url': self.parsed_url,
+        }
--- a/libs/tld/tests/init.py
+++ b/libs/tld/tests/init.py
@ -0,0 +1,8 @@
+import unittest
+
+from .test_core import *
+from .test_commands import *
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/libs/tld/tests/base.py
+++ b/libs/tld/tests/base.py
@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+from functools import lru_cache
+import logging
+import socket
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'internet_available_only',
+    'log_info',
+)
+
+LOG_INFO = True
+LOGGER = logging.getLogger(__name__)
+
+
+def log_info(func):
+    """Log some useful info."""
+    if not LOG_INFO:
+        return func
+
+    def inner(self, *args, **kwargs):
+        """Inner."""
+        result = func(self, *args, **kwargs)
+
+        LOGGER.debug('\n\n%s', func.__name__)
+        LOGGER.debug('============================')
+        if func.__doc__:
+            LOGGER.debug('""" %s """', func.__doc__.strip())
+        LOGGER.debug('----------------------------')
+        if result is not None:
+            LOGGER.debug(result)
+        LOGGER.debug('\n++++++++++++++++++++++++++++')
+
+        return result
+    return inner
+
+
+@lru_cache(maxsize=32)
+def is_internet_available(host="8.8.8.8", port=53, timeout=3):
+    """Check if internet is available.
+
+    Host: 8.8.8.8 (google-public-dns-a.google.com)
+    OpenPort: 53/tcp
+    Service: domain (DNS/TCP)
+    """
+    try:
+        socket.setdefaulttimeout(timeout)
+        socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port))
+        return True
+    except socket.error as ex:
+        print(ex)
+        return False
+
+
+def internet_available_only(func):
+
+    def inner(self, *args, **kwargs):
+        """Inner."""
+        if not is_internet_available():
+            LOGGER.debug('\n\n%s', func.__name__)
+            LOGGER.debug('============================')
+            if func.__doc__:
+                LOGGER.debug('""" %s """', func.__doc__.strip())
+            LOGGER.debug('----------------------------')
+            LOGGER.debug("Skipping because no Internet connection available.")
+            LOGGER.debug('\n++++++++++++++++++++++++++++')
+            return None
+
+        result = func(self, *args, **kwargs)
+        return result
+
+    return inner
--- a/libs/tld/tests/res/effective_tld_names_custom.dat.txt
+++ b/libs/tld/tests/res/effective_tld_names_custom.dat.txt
--- a/libs/tld/tests/test_commands.py
+++ b/libs/tld/tests/test_commands.py
@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import unittest
+
+import subprocess
+
+from .base import log_info, internet_available_only
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'GPL 2.0/LGPL 2.1'
+__all__ = ('TestCommands',)
+
+LOGGER = logging.getLogger(__name__)
+
+
+class TestCommands(unittest.TestCase):
+    """Tld commands tests."""
+
+    def setUp(self):
+        """Set up."""
+
+    @internet_available_only
+    @log_info
+    def test_1_update_tld_names_command(self):
+        """Test updating the tld names (re-fetch mozilla source)."""
+        res = subprocess.check_output(['update-tld-names']).strip()
+        self.assertEqual(res, b'')
+        return res
+
+    @internet_available_only
+    @log_info
+    def test_1_update_tld_names_mozilla_command(self):
+        """Test updating the tld names (re-fetch mozilla source)."""
+        res = subprocess.check_output(['update-tld-names', 'mozilla']).strip()
+        self.assertEqual(res, b'')
+        return res
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/libs/tld/tests/test_core.py
+++ b/libs/tld/tests/test_core.py
@ -0,0 +1,842 @@
+# -*- coding: utf-8 -*-
+
+import copy
+import logging
+from os.path import abspath, join
+import unittest
+from tempfile import gettempdir
+from typing import Type
+
+from urllib.parse import urlsplit
+
+from faker import Faker  # type: ignore
+
+from .. import defaults
+from ..base import BaseTLDSourceParser
+from ..conf import get_setting, reset_settings, set_setting
+from ..exceptions import (
+    TldBadUrl,
+    TldDomainNotFound,
+    TldImproperlyConfigured,
+    TldIOError,
+)
+from ..helpers import project_dir
+from ..registry import Registry
+from ..utils import (
+    get_fld,
+    get_tld,
+    get_tld_names,
+    get_tld_names_container,
+    is_tld,
+    MozillaTLDSourceParser,
+    BaseMozillaTLDSourceParser,
+    parse_tld,
+    reset_tld_names,
+    update_tld_names,
+    update_tld_names_cli,
+)
+
+from .base import internet_available_only, log_info
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = ('TestCore',)
+
+LOGGER = logging.getLogger(__name__)
+
+
+class TestCore(unittest.TestCase):
+    """Core tld functionality tests."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.faker = Faker()
+        cls.temp_dir = gettempdir()
+
+    def setUp(self):
+        """Set up."""
+        self.good_patterns = [
+            {
+                'url': 'http://www.google.co.uk',
+                'fld': 'google.co.uk',
+                'subdomain': 'www',
+                'domain': 'google',
+                'suffix': 'co.uk',
+                'tld': 'co.uk',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'http://www.v2.google.co.uk',
+                'fld': 'google.co.uk',
+                'subdomain': 'www.v2',
+                'domain': 'google',
+                'suffix': 'co.uk',
+                'tld': 'co.uk',
+                'kwargs': {'fail_silently': True},
+            },
+            # No longer valid
+            # {
+            #    'url': 'http://www.me.congresodelalengua3.ar',
+            #    'tld': 'me.congresodelalengua3.ar',
+            #    'subdomain': 'www',
+            #    'domain': 'me',
+            #    'suffix': 'congresodelalengua3.ar',
+            # },
+            {
+                'url': u'http://хром.гугл.рф',
+                'fld': u'гугл.рф',
+                'subdomain': u'хром',
+                'domain': u'гугл',
+                'suffix': u'рф',
+                'tld': u'рф',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'http://www.google.co.uk:8001/lorem-ipsum/',
+                'fld': 'google.co.uk',
+                'subdomain': 'www',
+                'domain': 'google',
+                'suffix': 'co.uk',
+                'tld': 'co.uk',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'http://www.me.cloudfront.net',
+                'fld': 'me.cloudfront.net',
+                'subdomain': 'www',
+                'domain': 'me',
+                'suffix': 'cloudfront.net',
+                'tld': 'cloudfront.net',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'http://www.v2.forum.tech.google.co.uk:8001/'
+                       'lorem-ipsum/',
+                'fld': 'google.co.uk',
+                'subdomain': 'www.v2.forum.tech',
+                'domain': 'google',
+                'suffix': 'co.uk',
+                'tld': 'co.uk',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'https://pantheon.io/',
+                'fld': 'pantheon.io',
+                'subdomain': '',
+                'domain': 'pantheon',
+                'suffix': 'io',
+                'tld': 'io',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'v2.www.google.com',
+                'fld': 'google.com',
+                'subdomain': 'v2.www',
+                'domain': 'google',
+                'suffix': 'com',
+                'tld': 'com',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': '//v2.www.google.com',
+                'fld': 'google.com',
+                'subdomain': 'v2.www',
+                'domain': 'google',
+                'suffix': 'com',
+                'tld': 'com',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': 'http://foo@bar.com',
+                'fld': 'bar.com',
+                'subdomain': '',
+                'domain': 'bar',
+                'suffix': 'com',
+                'tld': 'com',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'http://user:foo@bar.com',
+                'fld': 'bar.com',
+                'subdomain': '',
+                'domain': 'bar',
+                'suffix': 'com',
+                'tld': 'com',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'https://faguoren.xn--fiqs8s',
+                'fld': 'faguoren.xn--fiqs8s',
+                'subdomain': '',
+                'domain': 'faguoren',
+                'suffix': 'xn--fiqs8s',
+                'tld': 'xn--fiqs8s',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'blogs.lemonde.paris',
+                'fld': 'lemonde.paris',
+                'subdomain': 'blogs',
+                'domain': 'lemonde',
+                'suffix': 'paris',
+                'tld': 'paris',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': 'axel.brighton.ac.uk',
+                'fld': 'brighton.ac.uk',
+                'subdomain': 'axel',
+                'domain': 'brighton',
+                'suffix': 'ac.uk',
+                'tld': 'ac.uk',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': 'm.fr.blogspot.com.au',
+                'fld': 'fr.blogspot.com.au',
+                'subdomain': 'm',
+                'domain': 'fr',
+                'suffix': 'blogspot.com.au',
+                'tld': 'blogspot.com.au',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': u'help.www.福岡.jp',
+                'fld': u'www.福岡.jp',
+                'subdomain': 'help',
+                'domain': 'www',
+                'suffix': u'福岡.jp',
+                'tld': u'福岡.jp',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': u'syria.arabic.variant.سوريا',
+                'fld': u'variant.سوريا',
+                'subdomain': 'syria.arabic',
+                'domain': 'variant',
+                'suffix': u'سوريا',
+                'tld': u'سوريا',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True},
+            },
+            {
+                'url': u'http://www.help.kawasaki.jp',
+                'fld': u'www.help.kawasaki.jp',
+                'subdomain': '',
+                'domain': 'www',
+                'suffix': u'help.kawasaki.jp',
+                'tld': u'help.kawasaki.jp',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': u'http://www.city.kawasaki.jp',
+                'fld': u'city.kawasaki.jp',
+                'subdomain': 'www',
+                'domain': 'city',
+                'suffix': u'kawasaki.jp',
+                'tld': u'kawasaki.jp',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': u'http://fedoraproject.org',
+                'fld': u'fedoraproject.org',
+                'subdomain': '',
+                'domain': 'fedoraproject',
+                'suffix': u'org',
+                'tld': u'org',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': u'http://www.cloud.fedoraproject.org',
+                'fld': u'www.cloud.fedoraproject.org',
+                'subdomain': '',
+                'domain': 'www',
+                'suffix': u'cloud.fedoraproject.org',
+                'tld': u'cloud.fedoraproject.org',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': u'https://www.john.app.os.fedoraproject.org',
+                'fld': u'john.app.os.fedoraproject.org',
+                'subdomain': 'www',
+                'domain': 'john',
+                'suffix': u'app.os.fedoraproject.org',
+                'tld': u'app.os.fedoraproject.org',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'ftp://www.xn--mxail5aa.xn--11b4c3d',
+                'fld': 'xn--mxail5aa.xn--11b4c3d',
+                'subdomain': 'www',
+                'domain': 'xn--mxail5aa',
+                'suffix': 'xn--11b4c3d',
+                'tld': 'xn--11b4c3d',
+                'kwargs': {'fail_silently': True},
+            },
+            {
+                'url': 'http://cloud.fedoraproject.org',
+                'fld': 'cloud.fedoraproject.org',
+                'subdomain': '',
+                'domain': 'cloud.fedoraproject.org',
+                'suffix': 'cloud.fedoraproject.org',
+                'tld': 'cloud.fedoraproject.org',
+                'kwargs': {'fail_silently': True}
+            },
+            {
+                'url': 'github.io',
+                'fld': 'github.io',
+                'subdomain': '',
+                'domain': 'github.io',
+                'suffix': 'github.io',
+                'tld': 'github.io',
+                'kwargs': {'fail_silently': True, 'fix_protocol': True}
+            },
+            {
+                'url': urlsplit('http://lemonde.fr/article.html'),
+                'fld': 'lemonde.fr',
+                'subdomain': '',
+                'domain': 'lemonde',
+                'suffix': 'fr',
+                'tld': 'fr',
+                'kwargs': {'fail_silently': True}
+            },
+            {
+                'url': 'https://github.com....../barseghyanartur/tld/',
+                'fld': 'github.com',
+                'subdomain': '',
+                'domain': 'github',
+                'suffix': 'com',
+                'tld': 'com',
+                'kwargs': {'fail_silently': True}
+            },
+        ]
+
+        self.bad_patterns = {
+            'v2.www.google.com': {
+                'exception': TldBadUrl,
+            },
+            '/index.php?a=1&b=2': {
+                'exception': TldBadUrl,
+            },
+            'http://www.tld.doesnotexist': {
+                'exception': TldDomainNotFound,
+            },
+            'https://2001:0db8:0000:85a3:0000:0000:ac1f:8001': {
+                'exception': TldDomainNotFound,
+            },
+            'http://192.169.1.1': {
+                'exception': TldDomainNotFound,
+            },
+            'http://localhost:8080': {
+                'exception': TldDomainNotFound,
+            },
+            'https://localhost': {
+                'exception': TldDomainNotFound,
+            },
+            'https://localhost2': {
+                'exception': TldImproperlyConfigured,
+                'kwargs': {'search_public': False, 'search_private': False},
+            },
+        }
+
+        self.invalid_tlds = {
+            'v2.www.google.com',
+            'tld.doesnotexist',
+            '2001:0db8:0000:85a3:0000:0000:ac1f',
+            '192.169.1.1',
+            'localhost',
+            'google.com',
+        }
+
+        self.tld_names_local_path_custom = project_dir(
+            join(
+                'tests',
+                'res',
+                'effective_tld_names_custom.dat.txt'
+            )
+        )
+        self.good_patterns_custom_parser = [
+            {
+                'url': 'http://www.foreverchild',
+                'fld': 'www.foreverchild',
+                'subdomain': '',
+                'domain': 'www',
+                'suffix': 'foreverchild',
+                'tld': 'foreverchild',
+                'kwargs': {
+                    'fail_silently': True,
+                    # 'parser_class': self.get_custom_parser_class(),
+                },
+            },
+            {
+                'url': 'http://www.v2.foreverchild',
+                'fld': 'v2.foreverchild',
+                'subdomain': 'www',
+                'domain': 'v2',
+                'suffix': 'foreverchild',
+                'tld': 'foreverchild',
+                'kwargs': {
+                    'fail_silently': True,
+                    # 'parser_class': self.get_custom_parser_class(),
+                },
+            },
+        ]
+        reset_settings()
+
+    def tearDown(self):
+        """Tear down."""
+        reset_settings()
+        Registry.reset()
+
+    @property
+    def good_url(self):
+        return self.good_patterns[0]['url']
+
+    @property
+    def bad_url(self):
+        return list(self.bad_patterns.keys())[0]
+
+    def get_custom_parser_class(
+        self,
+        uid: str = 'custom_mozilla',
+        source_url: str = None,
+        local_path: str = 'tests/res/effective_tld_names_custom.dat.txt'
+    ) -> Type[BaseTLDSourceParser]:
+        # Define a custom TLD source parser class
+        parser_class = type(
+            'CustomMozillaTLDSourceParser',
+            (BaseMozillaTLDSourceParser,),
+            {
+                'uid': uid,
+                'source_url': source_url,
+                'local_path': local_path,
+            }
+        )
+        return parser_class
+
+    @log_info
+    def test_0_tld_names_loaded(self):
+        """Test if tld names are loaded."""
+        get_fld('http://www.google.co.uk')
+        from ..utils import tld_names
+        res = len(tld_names) > 0
+        self.assertTrue(res)
+        return res
+
+    @internet_available_only
+    @log_info
+    def test_1_update_tld_names(self):
+        """Test updating the tld names (re-fetch mozilla source)."""
+        res = update_tld_names(fail_silently=False)
+        self.assertTrue(res)
+        return res
+
+    @log_info
+    def test_2_fld_good_patterns_pass(self):
+        """Test good URL patterns."""
+        res = []
+        for data in self.good_patterns:
+            _res = get_fld(data['url'], **data['kwargs'])
+            self.assertEqual(_res, data['fld'])
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_3_fld_bad_patterns_pass(self):
+        """Test bad URL patterns."""
+        res = []
+        for url, params in self.bad_patterns.items():
+            _res = get_fld(url, fail_silently=True)
+            self.assertEqual(_res, None)
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_4_override_settings(self):
+        """Testing settings override."""
+        def override_settings():
+            """Override settings."""
+            return get_setting('DEBUG')
+
+        self.assertEqual(defaults.DEBUG, override_settings())
+
+        set_setting('DEBUG', True)
+
+        self.assertEqual(True, override_settings())
+
+        return override_settings()
+
+    @log_info
+    def test_5_tld_good_patterns_pass_parsed_object(self):
+        """Test good URL patterns."""
+        res = []
+        for data in self.good_patterns:
+            kwargs = copy.copy(data['kwargs'])
+            kwargs['as_object'] = True
+            _res = get_tld(data['url'], **kwargs)
+            self.assertEqual(_res.tld, data['tld'])
+            self.assertEqual(_res.subdomain, data['subdomain'])
+            self.assertEqual(_res.domain, data['domain'])
+            self.assertEqual(_res.suffix, data['suffix'])
+            self.assertEqual(_res.fld, data['fld'])
+
+            self.assertEqual(
+                str(_res).encode('utf8'),
+                data['tld'].encode('utf8')
+            )
+
+            self.assertEqual(
+                _res.__dict__,
+                {
+                    'tld': _res.tld,
+                    'domain': _res.domain,
+                    'subdomain': _res.subdomain,
+                    'fld': _res.fld,
+                    'parsed_url': _res.parsed_url,
+                }
+            )
+
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_6_override_full_names_path(self):
+        default = project_dir('dummy.txt')
+        override_base = '/tmp/test'
+        set_setting('NAMES_LOCAL_PATH_PARENT', override_base)
+        modified = project_dir('dummy.txt')
+        self.assertNotEqual(default, modified)
+        self.assertEqual(modified, abspath('/tmp/test/dummy.txt'))
+
+    @log_info
+    def test_7_public_private(self):
+        res = get_fld(
+            'http://silly.cc.ua',
+            fail_silently=True,
+            search_private=False
+        )
+
+        self.assertEqual(res, None)
+
+        res = get_fld(
+            'http://silly.cc.ua',
+            fail_silently=True,
+            search_private=True
+        )
+
+        self.assertEqual(res, 'silly.cc.ua')
+
+        res = get_fld(
+            'mercy.compute.amazonaws.com',
+            fail_silently=True,
+            search_private=False,
+            fix_protocol=True
+        )
+
+        self.assertEqual(res, None)
+
+        res = get_fld(
+            'http://whatever.com',
+            fail_silently=True,
+            search_public=False
+        )
+
+        self.assertEqual(res, None)
+
+    @log_info
+    def test_8_fld_bad_patterns_exceptions(self):
+        """Test exceptions."""
+        res = []
+        for url, params in self.bad_patterns.items():
+            kwargs = params['kwargs'] if 'kwargs' in params else {}
+            kwargs['fail_silently'] = False
+            with self.assertRaises(params['exception']):
+                _res = get_fld(url, **kwargs)
+                res.append(_res)
+        return res
+
+    @log_info
+    def test_9_tld_good_patterns_pass(self):
+        """Test `get_tld` good URL patterns."""
+        res = []
+        for data in self.good_patterns:
+            _res = get_tld(data['url'], **data['kwargs'])
+            self.assertEqual(_res, data['tld'])
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_10_tld_bad_patterns_pass(self):
+        """Test `get_tld` bad URL patterns."""
+        res = []
+        for url, params in self.bad_patterns.items():
+            _res = get_tld(url, fail_silently=True)
+            self.assertEqual(_res, None)
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_11_parse_tld_good_patterns(self):
+        """Test `parse_tld` good URL patterns."""
+        res = []
+        for data in self.good_patterns:
+            _res = parse_tld(data['url'], **data['kwargs'])
+            self.assertEqual(
+                _res,
+                (data['tld'], data['domain'], data['subdomain'])
+            )
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_12_is_tld_good_patterns(self):
+        """Test `is_tld` good URL patterns."""
+        for data in self.good_patterns:
+            self.assertTrue(is_tld(data['tld']))
+
+    @log_info
+    def test_13_is_tld_bad_patterns(self):
+        """Test `is_tld` bad URL patterns."""
+        for _tld in self.invalid_tlds:
+            self.assertFalse(is_tld(_tld))
+
+    @log_info
+    def test_14_fail_update_tld_names(self):
+        """Test fail `update_tld_names`."""
+        parser_class = self.get_custom_parser_class(
+            uid='custom_mozilla_2',
+            source_url='i-do-not-exist'
+        )
+        # Assert raise TldIOError on wrong NAMES_SOURCE_URL
+        with self.assertRaises(TldIOError):
+            update_tld_names(fail_silently=False, parser_uid=parser_class.uid)
+
+        # Assert return False on wrong NAMES_SOURCE_URL
+        self.assertFalse(
+            update_tld_names(fail_silently=True, parser_uid=parser_class.uid)
+        )
+
+    @log_info
+    def test_15_fail_get_tld_names(self):
+        """Test fail `update_tld_names`."""
+        parser_class = self.get_custom_parser_class(
+            uid='custom_mozilla_3',
+            source_url='i-do-not-exist',
+            local_path='/srv/tests/res/effective_tld_names_custom_3.dat.txt'
+        )
+        reset_tld_names()
+        # Assert raise TldIOError on wrong NAMES_SOURCE_URL
+        for params in self.good_patterns:
+            kwargs = {'url': params['url']}
+            kwargs.update(params['kwargs'])
+            kwargs['fail_silently'] = False
+            kwargs['parser_class'] = parser_class
+            with self.assertRaises(TldIOError):
+                get_tld(**kwargs)
+
+    @log_info
+    def test_15_fail_get_fld_wrong_kwargs(self):
+        """Test fail `get_fld` with wrong kwargs."""
+        with self.assertRaises(TldImproperlyConfigured):
+            get_fld(self.good_url, as_object=True)
+
+    @log_info
+    def test_16_fail_parse_tld(self):
+        """Test fail `parse_tld`.
+
+        Assert raise TldIOError on wrong `NAMES_SOURCE_URL` for `parse_tld`.
+        """
+        parser_class = self.get_custom_parser_class(
+            source_url='i-do-not-exist'
+        )
+        parsed_tld = parse_tld(
+            self.bad_url,
+            fail_silently=False,
+            parser_class=parser_class
+        )
+        self.assertEqual(parsed_tld, (None, None, None))
+
+    @log_info
+    def test_17_get_tld_names_and_reset_tld_names(self):
+        """Test fail `get_tld_names` and repair using `reset_tld_names`."""
+        tmp_filename = join(
+            gettempdir(),
+            f'{self.faker.uuid4()}.dat.txt'
+        )
+        parser_class = self.get_custom_parser_class(
+            source_url='i-do-not-exist',
+            local_path=tmp_filename
+        )
+        reset_tld_names()
+
+        with self.subTest('Assert raise TldIOError'):
+            # Assert raise TldIOError on wrong NAMES_SOURCE_URL for
+            # `get_tld_names`
+            with self.assertRaises(TldIOError):
+                get_tld_names(
+                    fail_silently=False,
+                    parser_class=parser_class
+                )
+
+        tmp_filename = join(
+            gettempdir(),
+            f'{self.faker.uuid4()}.dat.txt'
+        )
+        parser_class_2 = self.get_custom_parser_class(
+            source_url='i-do-not-exist-2',
+            local_path=tmp_filename
+        )
+        reset_tld_names()
+
+        with self.subTest('Assert get None'):
+            # Assert get None on wrong `NAMES_SOURCE_URL` for `get_tld_names`
+            self.assertIsNone(
+                get_tld_names(
+                    fail_silently=True,
+                    parser_class=parser_class_2
+                )
+            )
+
+    @internet_available_only
+    @log_info
+    def test_18_update_tld_names_cli(self):
+        """Test the return code of the CLI version of `update_tld_names`."""
+        reset_tld_names()
+        res = update_tld_names_cli()
+        self.assertEqual(res, 0)
+
+    @log_info
+    def test_19_parse_tld_custom_tld_names_good_patterns(self):
+        """Test `parse_tld` good URL patterns for custom tld names."""
+        res = []
+
+        for data in self.good_patterns_custom_parser:
+            kwargs = copy.copy(data['kwargs'])
+            kwargs['parser_class'] = self.get_custom_parser_class()
+            _res = parse_tld(data['url'], **kwargs)
+            self.assertEqual(
+                _res,
+                (data['tld'], data['domain'], data['subdomain'])
+            )
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_20_tld_custom_tld_names_good_patterns_pass_parsed_object(self):
+        """Test `get_tld` good URL patterns for custom tld names."""
+        res = []
+        for data in self.good_patterns_custom_parser:
+            kwargs = copy.copy(data['kwargs'])
+            kwargs.update({
+                'as_object': True,
+                'parser_class': self.get_custom_parser_class(),
+            })
+            _res = get_tld(data['url'], **kwargs)
+            self.assertEqual(_res.tld, data['tld'])
+            self.assertEqual(_res.subdomain, data['subdomain'])
+            self.assertEqual(_res.domain, data['domain'])
+            self.assertEqual(_res.suffix, data['suffix'])
+            self.assertEqual(_res.fld, data['fld'])
+
+            self.assertEqual(
+                str(_res).encode('utf8'),
+                data['tld'].encode('utf8')
+            )
+
+            self.assertEqual(
+                _res.__dict__,
+                {
+                    'tld': _res.tld,
+                    'domain': _res.domain,
+                    'subdomain': _res.subdomain,
+                    'fld': _res.fld,
+                    'parsed_url': _res.parsed_url,
+                }
+            )
+
+            res.append(_res)
+        return res
+
+    @log_info
+    def test_21_reset_tld_names_for_custom_parser(self):
+        """Test `reset_tld_names` for `tld_names_local_path`."""
+        res = []
+        parser_class = self.get_custom_parser_class()
+        for data in self.good_patterns_custom_parser:
+            kwargs = copy.copy(data['kwargs'])
+            kwargs.update({
+                'as_object': True,
+                'parser_class': self.get_custom_parser_class(),
+            })
+            _res = get_tld(data['url'], **kwargs)
+            self.assertEqual(_res.tld, data['tld'])
+            self.assertEqual(_res.subdomain, data['subdomain'])
+            self.assertEqual(_res.domain, data['domain'])
+            self.assertEqual(_res.suffix, data['suffix'])
+            self.assertEqual(_res.fld, data['fld'])
+
+            self.assertEqual(
+                str(_res).encode('utf8'),
+                data['tld'].encode('utf8')
+            )
+
+            self.assertEqual(
+                _res.__dict__,
+                {
+                    'tld': _res.tld,
+                    'domain': _res.domain,
+                    'subdomain': _res.subdomain,
+                    'fld': _res.fld,
+                    'parsed_url': _res.parsed_url,
+                }
+            )
+
+            res.append(_res)
+
+        tld_names = get_tld_names_container()
+        self.assertIn(parser_class.local_path, tld_names)
+        reset_tld_names(parser_class.local_path)
+        self.assertNotIn(parser_class.local_path, tld_names)
+
+        return res
+
+    @log_info
+    def test_22_fail_define_custom_parser_class_without_uid(self):
+        """Test fail define custom parser class without `uid`."""
+        class CustomParser(BaseTLDSourceParser):
+            pass
+
+        class AnotherCustomParser(BaseTLDSourceParser):
+
+            uid = 'another-custom-parser'
+
+        # Assert raise TldImproperlyConfigured
+        with self.assertRaises(TldImproperlyConfigured):
+            CustomParser.get_tld_names()
+
+        # Assert raise NotImplementedError
+        with self.assertRaises(NotImplementedError):
+            AnotherCustomParser.get_tld_names()
+
+    @log_info
+    def test_23_len_trie_nodes(self):
+        """Test len of the trie nodes."""
+        get_tld('http://delusionalinsanity.com')
+        tld_names = get_tld_names_container()
+        self.assertGreater(
+            len(tld_names[MozillaTLDSourceParser.local_path]),
+            0
+        )
+
+    @log_info
+    def test_24_get_tld_names_no_arguments(self):
+        """Test len of the trie nodes."""
+        tld_names = get_tld_names()
+        self.assertGreater(
+            len(tld_names),
+            0
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/libs/tld/trie.py
+++ b/libs/tld/trie.py
@ -0,0 +1,64 @@
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'Trie',
+    'TrieNode',
+)
+
+
+class TrieNode(object):
+    """Class representing a single Trie node."""
+
+    __slots__ = ('children', 'exception', 'leaf', 'private')
+
+    def __init__(self):
+        self.children = None
+        self.exception = None
+        self.leaf = False
+        self.private = False
+
+
+class Trie(object):
+    """An adhoc Trie data structure to store tlds in reverse notation order."""
+
+    def __init__(self):
+        self.root = TrieNode()
+        self.__nodes = 0
+
+    def __len__(self):
+        return self.__nodes
+
+    def add(self, tld: str, private: bool = False) -> None:
+        node = self.root
+
+        # Iterating over the tld parts in reverse order
+        # for part in reversed(tld.split('.')):
+        tld_split = tld.split('.')
+        tld_split.reverse()
+        for part in tld_split:
+
+            if part.startswith('!'):
+                node.exception = part[1:]
+                break
+
+            # To save up some RAM, we initialize the children dict only
+            # when strictly necessary
+            if node.children is None:
+                node.children = {}
+                child = TrieNode()
+            else:
+                child = node.children.get(part)
+                if child is None:
+                    child = TrieNode()
+
+            node.children[part] = child
+
+            node = child
+
+        node.leaf = True
+
+        if private:
+            node.private = True
+
+        self.__nodes += 1
--- a/libs/tld/utils.py
+++ b/libs/tld/utils.py
@ -0,0 +1,624 @@
+from __future__ import unicode_literals
+import argparse
+from codecs import open as codecs_open
+from functools import lru_cache
+# codecs_open = open
+from os.path import isabs
+import sys
+from typing import Dict, Type, Union, Tuple, List, Optional
+from urllib.parse import urlsplit, SplitResult
+
+from .base import BaseTLDSourceParser
+from .exceptions import (
+    TldBadUrl,
+    TldDomainNotFound,
+    TldImproperlyConfigured,
+    TldIOError,
+)
+from .helpers import project_dir
+from .trie import Trie
+from .registry import Registry
+from .result import Result
+
+__author__ = 'Artur Barseghyan'
+__copyright__ = '2013-2020 Artur Barseghyan'
+__license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later'
+__all__ = (
+    'BaseMozillaTLDSourceParser',
+    'get_fld',
+    'get_tld',
+    'get_tld_names',
+    'get_tld_names_container',
+    'is_tld',
+    'MozillaTLDSourceParser',
+    'parse_tld',
+    'pop_tld_names_container',
+    'process_url',
+    'reset_tld_names',
+    'Result',
+    'tld_names',
+    'update_tld_names',
+    'update_tld_names_cli',
+    'update_tld_names_container',
+)
+
+tld_names: Dict[str, Trie] = {}
+
+
+def get_tld_names_container() -> Dict[str, Trie]:
+    """Get container of all tld names.
+
+    :return:
+    :rtype dict:
+    """
+    global tld_names
+    return tld_names
+
+
+def update_tld_names_container(tld_names_local_path: str,
+                               trie_obj: Trie) -> None:
+    """Update TLD Names container item.
+
+    :param tld_names_local_path:
+    :param trie_obj:
+    :return:
+    """
+    global tld_names
+    # tld_names.update({tld_names_local_path: trie_obj})
+    tld_names[tld_names_local_path] = trie_obj
+
+
+def pop_tld_names_container(tld_names_local_path: str) -> None:
+    """Remove TLD names container item.
+
+    :param tld_names_local_path:
+    :return:
+    """
+    global tld_names
+    tld_names.pop(tld_names_local_path, None)
+
+
+@lru_cache(maxsize=128, typed=True)
+def update_tld_names(
+    fail_silently: bool = False,
+    parser_uid: str = None
+) -> bool:
+    """Update TLD names.
+
+    :param fail_silently:
+    :param parser_uid:
+    :return:
+    """
+    results: List[bool] = []
+    results_append = results.append
+    if parser_uid:
+        parser_cls = Registry.get(parser_uid, None)
+        if parser_cls and parser_cls.source_url:
+            results_append(
+                parser_cls.update_tld_names(fail_silently=fail_silently)
+            )
+    else:
+        for parser_uid, parser_cls in Registry.items():
+            if parser_cls and parser_cls.source_url:
+                results_append(
+                    parser_cls.update_tld_names(fail_silently=fail_silently)
+                )
+
+    return all(results)
+
+
+def update_tld_names_cli() -> int:
+    """CLI wrapper for update_tld_names.
+
+    Since update_tld_names returns True on success, we need to negate the
+    result to match CLI semantics.
+    """
+    parser = argparse.ArgumentParser(description='Update TLD names')
+    parser.add_argument(
+        'parser_uid',
+        nargs='?',
+        default=None,
+        help="UID of the parser to update TLD names for.",
+    )
+    parser.add_argument(
+        '--fail-silently',
+        dest="fail_silently",
+        default=False,
+        action='store_true',
+        help="Fail silently",
+    )
+    args = parser.parse_args(sys.argv[1:])
+    parser_uid = args.parser_uid
+    fail_silently = args.fail_silently
+    return int(
+        not update_tld_names(
+            parser_uid=parser_uid,
+            fail_silently=fail_silently
+        )
+    )
+
+
+def get_tld_names(
+    fail_silently: bool = False,
+    retry_count: int = 0,
+    parser_class: Type[BaseTLDSourceParser] = None
+) -> Dict[str, Trie]:
+    """Build the ``tlds`` list if empty. Recursive.
+
+    :param fail_silently: If set to True, no exceptions are raised and None
+        is returned on failure.
+    :param retry_count: If greater than 1, we raise an exception in order
+        to avoid infinite loops.
+    :param parser_class:
+    :type fail_silently: bool
+    :type retry_count: int
+    :type parser_class: BaseTLDSourceParser
+    :return: List of TLD names
+    :rtype: obj:`tld.utils.Trie`
+    """
+    if not parser_class:
+        parser_class = MozillaTLDSourceParser
+
+    return parser_class.get_tld_names(
+        fail_silently=fail_silently,
+        retry_count=retry_count
+    )
+
+
+# **************************************************************************
+# **************************** Parser classes ******************************
+# **************************************************************************
+
+class BaseMozillaTLDSourceParser(BaseTLDSourceParser):
+
+    @classmethod
+    def get_tld_names(
+        cls,
+        fail_silently: bool = False,
+        retry_count: int = 0
+    ) -> Optional[Dict[str, Trie]]:
+        """Parse.
+
+        :param fail_silently:
+        :param retry_count:
+        :return:
+        """
+        if retry_count > 1:
+            if fail_silently:
+                return None
+            else:
+                raise TldIOError
+
+        global tld_names
+        _tld_names = tld_names
+        # _tld_names = get_tld_names_container()
+
+        # If already loaded, return
+        if (
+            cls.local_path in _tld_names
+            and _tld_names[cls.local_path] is not None
+        ):
+            return _tld_names
+
+        try:
+            # Load the TLD names file
+            if isabs(cls.local_path):
+                local_path = cls.local_path
+            else:
+                local_path = project_dir(cls.local_path)
+            local_file = codecs_open(
+                local_path,
+                'r',
+                encoding='utf8'
+            )
+            trie = Trie()
+            trie_add = trie.add  # Performance opt
+            # Make a list of it all, strip all garbage
+            private_section = False
+
+            for line in local_file:
+                if '===BEGIN PRIVATE DOMAINS===' in line:
+                    private_section = True
+
+                # Puny code TLD names
+                if '// xn--' in line:
+                    line = line.split()[1]
+
+                if line[0] in ('/', '\n'):
+                    continue
+
+                trie_add(
+                    f'{line.strip()}',
+                    private=private_section
+                )
+
+            update_tld_names_container(cls.local_path, trie)
+
+            local_file.close()
+        except IOError as err:
+            # Grab the file
+            cls.update_tld_names(
+                fail_silently=fail_silently
+            )
+            # Increment ``retry_count`` in order to avoid infinite loops
+            retry_count += 1
+            # Run again
+            return cls.get_tld_names(
+                fail_silently=fail_silently,
+                retry_count=retry_count
+            )
+        except Exception as err:
+            if fail_silently:
+                return None
+            else:
+                raise err
+        finally:
+            try:
+                local_file.close()
+            except Exception:
+                pass
+
+        return _tld_names
+
+
+class MozillaTLDSourceParser(BaseMozillaTLDSourceParser):
+    """Mozilla TLD source."""
+
+    uid: str = 'mozilla'
+    source_url: str = 'https://publicsuffix.org/list/public_suffix_list.dat'
+    local_path: str = 'res/effective_tld_names.dat.txt'
+
+# **************************************************************************
+# **************************** Core functions ******************************
+# **************************************************************************
+
+
+def process_url(
+    url: str,
+    fail_silently: bool = False,
+    fix_protocol: bool = False,
+    search_public: bool = True,
+    search_private: bool = True,
+    parser_class: Type[BaseTLDSourceParser] = MozillaTLDSourceParser
+) -> Union[Tuple[List[str], int, SplitResult], Tuple[None, None, SplitResult]]:
+    """Process URL.
+
+    :param parser_class:
+    :param url:
+    :param fail_silently:
+    :param fix_protocol:
+    :param search_public:
+    :param search_private:
+    :return:
+    """
+    if not (search_public or search_private):
+        raise TldImproperlyConfigured(
+            "Either `search_public` or `search_private` (or both) shall be "
+            "set to True."
+        )
+
+    # Init
+    _tld_names = get_tld_names(
+        fail_silently=fail_silently,
+        parser_class=parser_class
+    )
+
+    if not isinstance(url, SplitResult):
+        url = url.lower()
+
+        if (
+            fix_protocol and not url.startswith(('//', 'http://', 'https://'))
+        ):
+            url = f'https://{url}'
+
+        # Get parsed URL as we might need it later
+        parsed_url = urlsplit(url)
+    else:
+        parsed_url = url
+
+    # Get (sub) domain name
+    domain_name = parsed_url.hostname
+
+    if not domain_name:
+        if fail_silently:
+            return None, None, parsed_url
+        else:
+            raise TldBadUrl(url=url)
+
+    # This will correctly handle dots at the end of domain name in URLs like
+    # https://github.com............/barseghyanartur/tld/
+    if domain_name.endswith('.'):
+        domain_name = domain_name.rstrip('.')
+
+    domain_parts = domain_name.split('.')
+    tld_names_local_path = parser_class.local_path
+
+    # Now we query our Trie iterating on the domain parts in reverse order
+    node = _tld_names[tld_names_local_path].root
+    current_length = 0
+    tld_length = 0
+    match = None
+    len_domain_parts = len(domain_parts)
+    for i in range(len_domain_parts-1, -1, -1):
+        part = domain_parts[i]
+
+        # Cannot go deeper
+        if node.children is None:
+            break
+
+        # Exception
+        if part == node.exception:
+            break
+
+        child = node.children.get(part)
+
+        # Wildcards
+        if child is None:
+            child = node.children.get('*')
+
+        # If the current part is not in current node's children, we can stop
+        if child is None:
+            break
+
+        # Else we move deeper and increment our tld offset
+        current_length += 1
+        node = child
+
+        if node.leaf:
+            tld_length = current_length
+            match = node
+
+    # Checking the node we finished on is a leaf and is one we allow
+    if (
+        (match is None) or
+        (not match.leaf) or
+        (not search_public and not match.private) or
+        (not search_private and match.private)
+    ):
+        if fail_silently:
+            return None, None, parsed_url
+        else:
+            raise TldDomainNotFound(domain_name=domain_name)
+
+    if len_domain_parts == tld_length:
+        non_zero_i = -1  # hostname = tld
+    else:
+        non_zero_i = max(1, len_domain_parts - tld_length)
+
+    return domain_parts, non_zero_i, parsed_url
+
+
+def get_fld(
+    url: str,
+    fail_silently: bool = False,
+    fix_protocol: bool = False,
+    search_public: bool = True,
+    search_private: bool = True,
+    parser_class: Type[BaseTLDSourceParser] = MozillaTLDSourceParser,
+    **kwargs
+) -> Optional[str]:
+    """Extract the first level domain.
+
+    Extract the top level domain based on the mozilla's effective TLD names
+    dat file. Returns a string. May throw ``TldBadUrl`` or
+    ``TldDomainNotFound`` exceptions if there's bad URL provided or no TLD
+    match found respectively.
+
+    :param url: URL to get top level domain from.
+    :param fail_silently: If set to True, no exceptions are raised and None
+        is returned on failure.
+    :param fix_protocol: If set to True, missing or wrong protocol is
+        ignored (https is appended instead).
+    :param search_public: If set to True, search in public domains.
+    :param search_private: If set to True, search in private domains.
+    :param parser_class:
+    :type url: str
+    :type fail_silently: bool
+    :type fix_protocol: bool
+    :type search_public: bool
+    :type search_private: bool
+    :return: String with top level domain (if ``as_object`` argument
+        is set to False) or a ``tld.utils.Result`` object (if ``as_object``
+        argument is set to True); returns None on failure.
+    :rtype: str
+    """
+    if 'as_object' in kwargs:
+        raise TldImproperlyConfigured(
+            "`as_object` argument is deprecated for `get_fld`. Use `get_tld` "
+            "instead."
+        )
+
+    domain_parts, non_zero_i, parsed_url = process_url(
+        url=url,
+        fail_silently=fail_silently,
+        fix_protocol=fix_protocol,
+        search_public=search_public,
+        search_private=search_private,
+        parser_class=parser_class
+    )
+
+    if domain_parts is None:
+        return None
+
+    # This should be None when domain_parts is None
+    # but mypy isn't quite smart enough to figure that out yet
+    assert non_zero_i is not None
+    if non_zero_i < 0:
+        # hostname = tld
+        return parsed_url.hostname
+
+    return ".".join(domain_parts[non_zero_i-1:])
+
+
+def get_tld(
+    url: str,
+    fail_silently: bool = False,
+    as_object: bool = False,
+    fix_protocol: bool = False,
+    search_public: bool = True,
+    search_private: bool = True,
+    parser_class: Type[BaseTLDSourceParser] = MozillaTLDSourceParser
+) -> Optional[Union[str, Result]]:
+    """Extract the top level domain.
+
+    Extract the top level domain based on the mozilla's effective TLD names
+    dat file. Returns a string. May throw ``TldBadUrl`` or
+    ``TldDomainNotFound`` exceptions if there's bad URL provided or no TLD
+    match found respectively.
+
+    :param url: URL to get top level domain from.
+    :param fail_silently: If set to True, no exceptions are raised and None
+        is returned on failure.
+    :param as_object: If set to True, ``tld.utils.Result`` object is returned,
+        ``domain``, ``suffix`` and ``tld`` properties.
+    :param fix_protocol: If set to True, missing or wrong protocol is
+        ignored (https is appended instead).
+    :param search_public: If set to True, search in public domains.
+    :param search_private: If set to True, search in private domains.
+    :param parser_class:
+    :type url: str
+    :type fail_silently: bool
+    :type as_object: bool
+    :type fix_protocol: bool
+    :type search_public: bool
+    :type search_private: bool
+    :return: String with top level domain (if ``as_object`` argument
+        is set to False) or a ``tld.utils.Result`` object (if ``as_object``
+        argument is set to True); returns None on failure.
+    :rtype: str
+    """
+    domain_parts, non_zero_i, parsed_url = process_url(
+        url=url,
+        fail_silently=fail_silently,
+        fix_protocol=fix_protocol,
+        search_public=search_public,
+        search_private=search_private,
+        parser_class=parser_class
+    )
+
+    if domain_parts is None:
+        return None
+
+    # This should be None when domain_parts is None
+    # but mypy isn't quite smart enough to figure that out yet
+    assert non_zero_i is not None
+
+    if not as_object:
+        if non_zero_i < 0:
+            # hostname = tld
+            return parsed_url.hostname
+        return ".".join(domain_parts[non_zero_i:])
+
+    if non_zero_i < 0:
+        # hostname = tld
+        subdomain = ""
+        domain = ""
+        # This is checked in process_url but the type is ambiguous (Optional[str])
+        # so this assertion is just to satisfy mypy
+        assert parsed_url.hostname is not None, "No hostname in URL"
+        _tld = parsed_url.hostname
+    else:
+        subdomain = ".".join(domain_parts[:non_zero_i-1])
+        domain = ".".join(
+            domain_parts[non_zero_i-1:non_zero_i]
+        )
+        _tld = ".".join(domain_parts[non_zero_i:])
+
+    return Result(
+        subdomain=subdomain,
+        domain=domain,
+        tld=_tld,
+        parsed_url=parsed_url
+    )
+
+
+def parse_tld(
+    url: str,
+    fail_silently: bool = False,
+    fix_protocol: bool = False,
+    search_public: bool = True,
+    search_private: bool = True,
+    parser_class: Type[BaseTLDSourceParser] = MozillaTLDSourceParser
+) -> Union[Tuple[None, None, None], Tuple[str, str, str]]:
+    """Parse TLD into parts.
+
+    :param url:
+    :param fail_silently:
+    :param fix_protocol:
+    :param search_public:
+    :param search_private:
+    :param parser_class:
+    :return: Tuple (tld, domain, subdomain)
+    :rtype: tuple
+    """
+    try:
+        obj = get_tld(
+            url,
+            fail_silently=fail_silently,
+            as_object=True,
+            fix_protocol=fix_protocol,
+            search_public=search_public,
+            search_private=search_private,
+            parser_class=parser_class
+        )
+        if obj is None:
+            return None, None, None
+
+        return obj.tld, obj.domain, obj.subdomain  # type: ignore
+
+    except (
+        TldBadUrl,
+        TldDomainNotFound,
+        TldImproperlyConfigured,
+        TldIOError
+    ):
+        pass
+
+    return None, None, None
+
+
+def is_tld(
+    value: str,
+    search_public: bool = True,
+    search_private: bool = True,
+    parser_class: Type[BaseTLDSourceParser] = MozillaTLDSourceParser
+) -> bool:
+    """Check if given URL is tld.
+
+    :param value: URL to get top level domain from.
+    :param search_public: If set to True, search in public domains.
+    :param search_private: If set to True, search in private domains.
+    :param parser_class:
+    :type value: str
+    :type search_public: bool
+    :type search_private: bool
+    :return:
+    :rtype: bool
+    """
+    _tld = get_tld(
+        url=value,
+        fail_silently=True,
+        fix_protocol=True,
+        search_public=search_public,
+        search_private=search_private,
+        parser_class=parser_class
+    )
+    return value == _tld
+
+
+def reset_tld_names(tld_names_local_path: str = None) -> None:
+    """Reset the ``tld_names`` to empty value.
+
+    If ``tld_names_local_path`` is given, removes specified
+    entry from ``tld_names`` instead.
+
+    :param tld_names_local_path:
+    :type tld_names_local_path: str
+    :return:
+    """
+
+    if tld_names_local_path:
+        pop_tld_names_container(tld_names_local_path)
+    else:
+        global tld_names
+        tld_names = {}
--- a/views/settingssubtitles.html
+++ b/views/settingssubtitles.html
@ -267,7 +267,7 @@
                        <input type="checkbox" class="custom-control-input subzero_mods" id="remove_HI">
                        <span class="custom-control-label"></span>
                    </label>
-                    <label>Removes HI-tags from subtitles (such as (SIRENS WAIL), DOCTOR: Rose!).</label>
+                    <label>Removes tags, text and characters from subtitles that are meant for hearing impaired people.</label>
                </div>
            </div>
            <div class="row">
@ -279,7 +279,7 @@
                        <input type="checkbox" class="custom-control-input subzero_mods" id="remove_tags">
                        <span class="custom-control-label"></span>
                    </label>
-                    <label>Removes any font style tags from the subtitles (bold, italic, underline, colors, ...).</label>
+                    <label>Removes all possible style tags from the subtitle, such as font, bold, color etc.</label>
                </div>
            </div>
            <div class="row">
@ -291,7 +291,7 @@
                        <input type="checkbox" class="custom-control-input subzero_mods" id="OCR_fixes">
                        <span class="custom-control-label"></span>
                    </label>
-                    <label>Fixes problems in subtitles introduced by OCR (custom implementation of SubtitleEdit's dictionaries).</label>
+                    <label>Fix issues that happen when a subtitle gets converted from bitmap to text through OCR.</label>
                </div>
            </div>
            <div class="row">
@ -303,7 +303,7 @@
                        <input type="checkbox" class="custom-control-input subzero_mods" id="common">
                        <span class="custom-control-label"></span>
                    </label>
-                    <label> Fixes common issues in subtitles, such as punctuation (-- I don't know! -> ... I don't know!; over 9 000! -> over 9000!).</label>
+                    <label>Fix common and whitespace/punctuation issues in subtitles.</label>
                </div>
            </div>
            <div class="row">