You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Charset-Normalizer
|
|
|
|
|
~~~~~~~~~~~~~~
|
|
|
|
|
The Real First Universal Charset Detector.
|
|
|
|
|
A library that helps you read text from an unknown charset encoding.
|
|
|
|
|
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
|
|
|
|
|
All IANA character set names for which the Python core library provides codecs are supported.
|
|
|
|
|
|
|
|
|
|
Basic usage:
|
|
|
|
|
>>> from charset_normalizer import from_bytes
|
|
|
|
|
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
|
|
|
|
|
>>> best_guess = results.best()
|
|
|
|
|
>>> str(best_guess)
|
|
|
|
|
'Bсеки човек има право на образование. Oбразованието!'
|
|
|
|
|
|
|
|
|
|
Others methods and usages are available - see the full documentation
|
|
|
|
|
at <https://github.com/Ousret/charset_normalizer>.
|
|
|
|
|
:copyright: (c) 2021 by Ahmed TAHRI
|
|
|
|
|
:license: MIT, see LICENSE for more details.
|
|
|
|
|
"""
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
from .api import from_bytes, from_fp, from_path
|
|
|
|
|
from .legacy import detect
|
|
|
|
|
from .models import CharsetMatch, CharsetMatches
|
|
|
|
|
from .utils import set_logging_handler
|
|
|
|
|
from .version import VERSION, __version__
|
|
|
|
|
|
|
|
|
|
__all__ = (
|
|
|
|
|
"from_fp",
|
|
|
|
|
"from_path",
|
|
|
|
|
"from_bytes",
|
|
|
|
|
"detect",
|
|
|
|
|
"CharsetMatch",
|
|
|
|
|
"CharsetMatches",
|
|
|
|
|
"__version__",
|
|
|
|
|
"VERSION",
|
|
|
|
|
"set_logging_handler",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Attach a NullHandler to the top level logger by default
|
|
|
|
|
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
|
|
|
|
|
|
|
|
|
|
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
|