From f389c38a9c89e8e1329899ff7cf1d53d62450b45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Louis=20V=C3=A9zina?= <5130500+morpheus65535@users.noreply.github.com> Date: Wed, 18 Sep 2019 11:30:46 -0400 Subject: [PATCH] WIP --- bazarr/get_providers.py | 1 - bazarr/libs.py | 1 - bazarr/list_subtitles.py | 5 +- bazarr/main.py | 15 +- libs/certifi/__init__.py | 2 +- libs/certifi/cacert.pem | 100 -- libs/idna/core.py | 5 +- libs/idna/idnadata.py | 122 +- libs/idna/package_data.py | 2 +- libs/idna/uts46data.py | 660 +++++------ libs/ipaddress.py | 1006 ++++++++++------- libs/libfilebot/__init__.py | 3 +- libs/libfilebot/lib.py | 1 + libs/libfilebot/main.py | 8 +- libs/pyprobe/__init__.py | 4 +- libs/pyprobe/ffprobeparsers.py | 19 +- libs/pyprobe/pyprobe.py | 199 +++- libs/requests/__init__.py | 4 +- libs/requests/__version__.py | 6 +- libs/requests/api.py | 4 +- libs/subliminal_patch/http.py | 6 +- libs/subliminal_patch/pitcher.py | 4 +- libs/subliminal_patch/providers/__init__.py | 2 +- libs/subliminal_patch/providers/assrt.py | 2 +- libs/subliminal_patch/providers/legendastv.py | 4 +- .../providers/opensubtitles.py | 4 +- libs/subliminal_patch/providers/subscene.py | 8 +- libs/subscene_api/subscene.py | 4 +- libs/subzero/language.py | 3 + libs/urllib3/__init__.py | 3 +- libs/urllib3/connection.py | 62 +- libs/urllib3/connectionpool.py | 41 +- .../contrib/_securetransport/bindings.py | 14 +- libs/urllib3/contrib/pyopenssl.py | 32 +- libs/urllib3/contrib/securetransport.py | 87 +- libs/urllib3/contrib/socks.py | 35 +- libs/urllib3/fields.py | 140 ++- libs/urllib3/packages/ordered_dict.py | 259 ----- libs/urllib3/poolmanager.py | 6 +- libs/urllib3/response.py | 69 +- libs/urllib3/util/__init__.py | 2 + libs/urllib3/util/request.py | 7 + libs/urllib3/util/selectors.py | 581 ---------- libs/urllib3/util/ssl_.py | 110 +- libs/urllib3/util/timeout.py | 3 +- libs/urllib3/util/url.py | 215 ++-- views/menu.tpl | 1 + 47 files changed, 1853 insertions(+), 2018 deletions(-) delete mode 100644 libs/urllib3/packages/ordered_dict.py delete mode 100644 libs/urllib3/util/selectors.py diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index 5f4d84884..108053053 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -1,5 +1,4 @@ # coding=utf-8 -from __future__ import absolute_import import os import datetime import logging diff --git a/bazarr/libs.py b/bazarr/libs.py index 5cab7b473..37b8809ac 100644 --- a/bazarr/libs.py +++ b/bazarr/libs.py @@ -1,6 +1,5 @@ # coding=utf-8 -from __future__ import absolute_import import os import sys diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index 9e211c180..acae7d2a0 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -16,6 +16,7 @@ from bs4 import UnicodeDammit from itertools import islice from database import TableShows, TableEpisodes, TableMovies from peewee import fn, JOIN +from functools import reduce from get_args import args from get_languages import alpha2_from_alpha3, get_language_set @@ -224,9 +225,9 @@ def store_subtitles_movie(file): def list_missing_subtitles(no=None): - episodes_subtitles_clause = {TableShows.sonarr_series_id.is_null(False)} + episodes_subtitles_clause = "TableShows.sonarr_series_id.is_null(False)" if no is not None: - episodes_subtitles_clause = {TableShows.sonarr_series_id ** no} + episodes_subtitles_clause = "TableShows.sonarr_series_id ** no" episodes_subtitles = TableEpisodes.select( TableEpisodes.sonarr_episode_id, diff --git a/bazarr/main.py b/bazarr/main.py index 003502759..fccb0f058 100644 --- a/bazarr/main.py +++ b/bazarr/main.py @@ -1,9 +1,10 @@ # coding=utf-8 +bazarr_version = '0.8.2' + import six from six.moves import zip from functools import reduce -bazarr_version = '0.8.2' import gc import sys @@ -60,8 +61,6 @@ from config import settings, url_sonarr, url_radarr, url_radarr_short, url_sonar from subliminal_patch.extensions import provider_registry as provider_manager from subliminal_patch.core import SUBTITLE_EXTENSIONS -reload(sys) -sys.setdefaultencoding('utf8') gc.enable() os.environ["SZ_USER_AGENT"] = "Bazarr/1" @@ -2016,10 +2015,12 @@ def system(): def get_logs(): authorize() logs = [] - for line in reversed(open(os.path.join(args.config_dir, 'log', 'bazarr.log')).readlines()): - lin = [] - lin = line.split('|') - logs.append(lin) + with open(os.path.join(args.config_dir, 'log', 'bazarr.log')) as file: + for line in file.readlines(): + lin = [] + lin = line.split('|') + logs.append(lin) + logs.reverse() return dict(data=logs) diff --git a/libs/certifi/__init__.py b/libs/certifi/__init__.py index 632db8e13..8e358e4c8 100644 --- a/libs/certifi/__init__.py +++ b/libs/certifi/__init__.py @@ -1,3 +1,3 @@ from .core import where -__version__ = "2019.03.09" +__version__ = "2019.09.11" diff --git a/libs/certifi/cacert.pem b/libs/certifi/cacert.pem index 84636dde7..70fa91f61 100644 --- a/libs/certifi/cacert.pem +++ b/libs/certifi/cacert.pem @@ -771,36 +771,6 @@ vEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep +OkuE6N36B9K -----END CERTIFICATE----- -# Issuer: CN=Class 2 Primary CA O=Certplus -# Subject: CN=Class 2 Primary CA O=Certplus -# Label: "Certplus Class 2 Primary CA" -# Serial: 177770208045934040241468760488327595043 -# MD5 Fingerprint: 88:2c:8c:52:b8:a2:3c:f3:f7:bb:03:ea:ae:ac:42:0b -# SHA1 Fingerprint: 74:20:74:41:72:9c:dd:92:ec:79:31:d8:23:10:8d:c2:81:92:e2:bb -# SHA256 Fingerprint: 0f:99:3c:8a:ef:97:ba:af:56:87:14:0e:d5:9a:d1:82:1b:b4:af:ac:f0:aa:9a:58:b5:d5:7a:33:8a:3a:fb:cb ------BEGIN CERTIFICATE----- -MIIDkjCCAnqgAwIBAgIRAIW9S/PY2uNp9pTXX8OlRCMwDQYJKoZIhvcNAQEFBQAw -PTELMAkGA1UEBhMCRlIxETAPBgNVBAoTCENlcnRwbHVzMRswGQYDVQQDExJDbGFz -cyAyIFByaW1hcnkgQ0EwHhcNOTkwNzA3MTcwNTAwWhcNMTkwNzA2MjM1OTU5WjA9 -MQswCQYDVQQGEwJGUjERMA8GA1UEChMIQ2VydHBsdXMxGzAZBgNVBAMTEkNsYXNz -IDIgUHJpbWFyeSBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANxQ -ltAS+DXSCHh6tlJw/W/uz7kRy1134ezpfgSN1sxvc0NXYKwzCkTsA18cgCSR5aiR -VhKC9+Ar9NuuYS6JEI1rbLqzAr3VNsVINyPi8Fo3UjMXEuLRYE2+L0ER4/YXJQyL -kcAbmXuZVg2v7tK8R1fjeUl7NIknJITesezpWE7+Tt9avkGtrAjFGA7v0lPubNCd -EgETjdyAYveVqUSISnFOYFWe2yMZeVYHDD9jC1yw4r5+FfyUM1hBOHTE4Y+L3yas -H7WLO7dDWWuwJKZtkIvEcupdM5i3y95ee++U8Rs+yskhwcWYAqqi9lt3m/V+llU0 -HGdpwPFC40es/CgcZlUCAwEAAaOBjDCBiTAPBgNVHRMECDAGAQH/AgEKMAsGA1Ud -DwQEAwIBBjAdBgNVHQ4EFgQU43Mt38sOKAze3bOkynm4jrvoMIkwEQYJYIZIAYb4 -QgEBBAQDAgEGMDcGA1UdHwQwMC4wLKAqoCiGJmh0dHA6Ly93d3cuY2VydHBsdXMu -Y29tL0NSTC9jbGFzczIuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQCnVM+IRBnL39R/ -AN9WM2K191EBkOvDP9GIROkkXe/nFL0gt5o8AP5tn9uQ3Nf0YtaLcF3n5QRIqWh8 -yfFC82x/xXp8HVGIutIKPidd3i1RTtMTZGnkLuPT55sJmabglZvOGtd/vjzOUrMR -FcEPF80Du5wlFbqidon8BvEY0JNLDnyCt6X09l/+7UCmnYR0ObncHoUW2ikbhiMA -ybuJfm6AiB4vFLQDJKgybwOaRywwvlbGp0ICcBvqQNi6BQNwB6SW//1IMwrh3KWB -kJtN3X3n57LNXMhqlfil9o3EXXgIvnsG1knPGTZQIy4I5p4FTUcY1Rbpsda2ENW7 -l7+ijrRU ------END CERTIFICATE----- - # Issuer: CN=DST Root CA X3 O=Digital Signature Trust Co. # Subject: CN=DST Root CA X3 O=Digital Signature Trust Co. # Label: "DST Root CA X3" @@ -1219,36 +1189,6 @@ t0QmwCbAr1UwnjvVNioZBPRcHv/PLLf/0P2HQBHVESO7SMAhqaQoLf0V+LBOK/Qw WyH8EZE0vkHve52Xdf+XlcCWWC/qu0bXu+TZLg== -----END CERTIFICATE----- -# Issuer: CN=Deutsche Telekom Root CA 2 O=Deutsche Telekom AG OU=T-TeleSec Trust Center -# Subject: CN=Deutsche Telekom Root CA 2 O=Deutsche Telekom AG OU=T-TeleSec Trust Center -# Label: "Deutsche Telekom Root CA 2" -# Serial: 38 -# MD5 Fingerprint: 74:01:4a:91:b1:08:c4:58:ce:47:cd:f0:dd:11:53:08 -# SHA1 Fingerprint: 85:a4:08:c0:9c:19:3e:5d:51:58:7d:cd:d6:13:30:fd:8c:de:37:bf -# SHA256 Fingerprint: b6:19:1a:50:d0:c3:97:7f:7d:a9:9b:cd:aa:c8:6a:22:7d:ae:b9:67:9e:c7:0b:a3:b0:c9:d9:22:71:c1:70:d3 ------BEGIN CERTIFICATE----- -MIIDnzCCAoegAwIBAgIBJjANBgkqhkiG9w0BAQUFADBxMQswCQYDVQQGEwJERTEc -MBoGA1UEChMTRGV1dHNjaGUgVGVsZWtvbSBBRzEfMB0GA1UECxMWVC1UZWxlU2Vj -IFRydXN0IENlbnRlcjEjMCEGA1UEAxMaRGV1dHNjaGUgVGVsZWtvbSBSb290IENB -IDIwHhcNOTkwNzA5MTIxMTAwWhcNMTkwNzA5MjM1OTAwWjBxMQswCQYDVQQGEwJE -RTEcMBoGA1UEChMTRGV1dHNjaGUgVGVsZWtvbSBBRzEfMB0GA1UECxMWVC1UZWxl -U2VjIFRydXN0IENlbnRlcjEjMCEGA1UEAxMaRGV1dHNjaGUgVGVsZWtvbSBSb290 -IENBIDIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCrC6M14IspFLEU -ha88EOQ5bzVdSq7d6mGNlUn0b2SjGmBmpKlAIoTZ1KXleJMOaAGtuU1cOs7TuKhC -QN/Po7qCWWqSG6wcmtoIKyUn+WkjR/Hg6yx6m/UTAtB+NHzCnjwAWav12gz1Mjwr -rFDa1sPeg5TKqAyZMg4ISFZbavva4VhYAUlfckE8FQYBjl2tqriTtM2e66foai1S -NNs671x1Udrb8zH57nGYMsRUFUQM+ZtV7a3fGAigo4aKSe5TBY8ZTNXeWHmb0moc -QqvF1afPaA+W5OFhmHZhyJF81j4A4pFQh+GdCuatl9Idxjp9y7zaAzTVjlsB9WoH -txa2bkp/AgMBAAGjQjBAMB0GA1UdDgQWBBQxw3kbuvVT1xfgiXotF2wKsyudMzAP -BgNVHRMECDAGAQH/AgEFMA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQUFAAOC -AQEAlGRZrTlk5ynrE/5aw4sTV8gEJPB0d8Bg42f76Ymmg7+Wgnxu1MM9756Abrsp -tJh6sTtU6zkXR34ajgv8HzFZMQSyzhfzLMdiNlXiItiJVbSYSKpk+tYcNthEeFpa -IzpXl/V6ME+un2pMSyuOoAPjPuCp1NJ70rOo4nI8rZ7/gFnkm0W09juwzTkZmDLl -6iFhkOQxIY40sfcvNUqFENrnijchvllj4PKFiDFT1FQUhXB59C4Gdyd1Lx+4ivn+ -xbrYNuSD7Odlt79jWvNGr4GUN9RBjNYj1h7P9WgbRGOiWrqnNVmh5XAFmw4jV5mU -Cm26OWMohpLzGITY+9HPBVZkVw== ------END CERTIFICATE----- - # Issuer: CN=Cybertrust Global Root O=Cybertrust, Inc # Subject: CN=Cybertrust Global Root O=Cybertrust, Inc # Label: "Cybertrust Global Root" @@ -3453,46 +3393,6 @@ AAoACxGV2lZFA4gKn2fQ1XmxqI1AbQ3CekD6819kR5LLU7m7Wc5P/dAVUwHY3+vZ 5nbv0CO7O6l5s9UCKc2Jo5YPSjXnTkLAdc0Hz+Ys63su -----END CERTIFICATE----- -# Issuer: CN=Certinomis - Root CA O=Certinomis OU=0002 433998903 -# Subject: CN=Certinomis - Root CA O=Certinomis OU=0002 433998903 -# Label: "Certinomis - Root CA" -# Serial: 1 -# MD5 Fingerprint: 14:0a:fd:8d:a8:28:b5:38:69:db:56:7e:61:22:03:3f -# SHA1 Fingerprint: 9d:70:bb:01:a5:a4:a0:18:11:2e:f7:1c:01:b9:32:c5:34:e7:88:a8 -# SHA256 Fingerprint: 2a:99:f5:bc:11:74:b7:3c:bb:1d:62:08:84:e0:1c:34:e5:1c:cb:39:78:da:12:5f:0e:33:26:88:83:bf:41:58 ------BEGIN CERTIFICATE----- -MIIFkjCCA3qgAwIBAgIBATANBgkqhkiG9w0BAQsFADBaMQswCQYDVQQGEwJGUjET -MBEGA1UEChMKQ2VydGlub21pczEXMBUGA1UECxMOMDAwMiA0MzM5OTg5MDMxHTAb -BgNVBAMTFENlcnRpbm9taXMgLSBSb290IENBMB4XDTEzMTAyMTA5MTcxOFoXDTMz -MTAyMTA5MTcxOFowWjELMAkGA1UEBhMCRlIxEzARBgNVBAoTCkNlcnRpbm9taXMx -FzAVBgNVBAsTDjAwMDIgNDMzOTk4OTAzMR0wGwYDVQQDExRDZXJ0aW5vbWlzIC0g -Um9vdCBDQTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANTMCQosP5L2 -fxSeC5yaah1AMGT9qt8OHgZbn1CF6s2Nq0Nn3rD6foCWnoR4kkjW4znuzuRZWJfl -LieY6pOod5tK8O90gC3rMB+12ceAnGInkYjwSond3IjmFPnVAy//ldu9n+ws+hQV -WZUKxkd8aRi5pwP5ynapz8dvtF4F/u7BUrJ1Mofs7SlmO/NKFoL21prbcpjp3vDF -TKWrteoB4owuZH9kb/2jJZOLyKIOSY008B/sWEUuNKqEUL3nskoTuLAPrjhdsKkb -5nPJWqHZZkCqqU2mNAKthH6yI8H7KsZn9DS2sJVqM09xRLWtwHkziOC/7aOgFLSc -CbAK42C++PhmiM1b8XcF4LVzbsF9Ri6OSyemzTUK/eVNfaoqoynHWmgE6OXWk6Ri -wsXm9E/G+Z8ajYJJGYrKWUM66A0ywfRMEwNvbqY/kXPLynNvEiCL7sCCeN5LLsJJ -wx3tFvYk9CcbXFcx3FXuqB5vbKziRcxXV4p1VxngtViZSTYxPDMBbRZKzbgqg4SG -m/lg0h9tkQPTYKbVPZrdd5A9NaSfD171UkRpucC63M9933zZxKyGIjK8e2uR73r4 -F2iw4lNVYC2vPsKD2NkJK/DAZNuHi5HMkesE/Xa0lZrmFAYb1TQdvtj/dBxThZng -WVJKYe2InmtJiUZ+IFrZ50rlau7SZRFDAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIB -BjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTvkUz1pcMw6C8I6tNxIqSSaHh0 -2TAfBgNVHSMEGDAWgBTvkUz1pcMw6C8I6tNxIqSSaHh02TANBgkqhkiG9w0BAQsF -AAOCAgEAfj1U2iJdGlg+O1QnurrMyOMaauo++RLrVl89UM7g6kgmJs95Vn6RHJk/ -0KGRHCwPT5iVWVO90CLYiF2cN/z7ZMF4jIuaYAnq1fohX9B0ZedQxb8uuQsLrbWw -F6YSjNRieOpWauwK0kDDPAUwPk2Ut59KA9N9J0u2/kTO+hkzGm2kQtHdzMjI1xZS -g081lLMSVX3l4kLr5JyTCcBMWwerx20RoFAXlCOotQqSD7J6wWAsOMwaplv/8gzj -qh8c3LigkyfeY+N/IZ865Z764BNqdeuWXGKRlI5nU7aJ+BIJy29SWwNyhlCVCNSN -h4YVH5Uk2KRvms6knZtt0rJ2BobGVgjF6wnaNsIbW0G+YSrjcOa4pvi2WsS9Iff/ -ql+hbHY5ZtbqTFXhADObE5hjyW/QASAJN1LnDE8+zbz1X5YnpyACleAu6AdBBR8V -btaw5BngDwKTACdyxYvRVB9dSsNAl35VpnzBMwQUAR1JIGkLGZOdblgi90AMRgwj -Y/M50n92Uaf0yKHxDHYiI0ZSKS3io0EHVmmY0gUJvGnHWmHNj4FgFU2A3ZDifcRQ -8ow7bkrHxuaAKzyBvBGAFhAn1/DNP3nMcyrDflOR1m749fPH0FFNjkulW+YZFzvW -gQncItzujrnEj1PhZ7szuIgVRs/taTX/dQ1G885x4cVrhkIGuUE= ------END CERTIFICATE----- - # Issuer: CN=OISTE WISeKey Global Root GB CA O=WISeKey OU=OISTE Foundation Endorsed # Subject: CN=OISTE WISeKey Global Root GB CA O=WISeKey OU=OISTE Foundation Endorsed # Label: "OISTE WISeKey Global Root GB CA" diff --git a/libs/idna/core.py b/libs/idna/core.py index 090c2c18d..104624ad2 100644 --- a/libs/idna/core.py +++ b/libs/idna/core.py @@ -267,10 +267,7 @@ def alabel(label): try: label = label.encode('ascii') - try: - ulabel(label) - except IDNAError: - raise IDNAError('The label {0} is not a valid A-label'.format(label)) + ulabel(label) if not valid_label_length(label): raise IDNAError('Label too long') return label diff --git a/libs/idna/idnadata.py b/libs/idna/idnadata.py index 17974e233..a80c959d2 100644 --- a/libs/idna/idnadata.py +++ b/libs/idna/idnadata.py @@ -1,6 +1,6 @@ # This file is automatically generated by tools/idna-data -__version__ = "10.0.0" +__version__ = "11.0.0" scripts = { 'Greek': ( 0x37000000374, @@ -49,7 +49,7 @@ scripts = { 0x30210000302a, 0x30380000303c, 0x340000004db6, - 0x4e0000009feb, + 0x4e0000009ff0, 0xf9000000fa6e, 0xfa700000fada, 0x200000002a6d7, @@ -62,7 +62,7 @@ scripts = { 'Hebrew': ( 0x591000005c8, 0x5d0000005eb, - 0x5f0000005f5, + 0x5ef000005f5, 0xfb1d0000fb37, 0xfb380000fb3d, 0xfb3e0000fb3f, @@ -248,6 +248,7 @@ joining_types = { 0x6fb: 68, 0x6fc: 68, 0x6ff: 68, + 0x70f: 84, 0x710: 82, 0x712: 68, 0x713: 68, @@ -522,6 +523,7 @@ joining_types = { 0x1875: 68, 0x1876: 68, 0x1877: 68, + 0x1878: 68, 0x1880: 85, 0x1881: 85, 0x1882: 85, @@ -690,6 +692,70 @@ joining_types = { 0x10bad: 68, 0x10bae: 68, 0x10baf: 85, + 0x10d00: 76, + 0x10d01: 68, + 0x10d02: 68, + 0x10d03: 68, + 0x10d04: 68, + 0x10d05: 68, + 0x10d06: 68, + 0x10d07: 68, + 0x10d08: 68, + 0x10d09: 68, + 0x10d0a: 68, + 0x10d0b: 68, + 0x10d0c: 68, + 0x10d0d: 68, + 0x10d0e: 68, + 0x10d0f: 68, + 0x10d10: 68, + 0x10d11: 68, + 0x10d12: 68, + 0x10d13: 68, + 0x10d14: 68, + 0x10d15: 68, + 0x10d16: 68, + 0x10d17: 68, + 0x10d18: 68, + 0x10d19: 68, + 0x10d1a: 68, + 0x10d1b: 68, + 0x10d1c: 68, + 0x10d1d: 68, + 0x10d1e: 68, + 0x10d1f: 68, + 0x10d20: 68, + 0x10d21: 68, + 0x10d22: 82, + 0x10d23: 68, + 0x10f30: 68, + 0x10f31: 68, + 0x10f32: 68, + 0x10f33: 82, + 0x10f34: 68, + 0x10f35: 68, + 0x10f36: 68, + 0x10f37: 68, + 0x10f38: 68, + 0x10f39: 68, + 0x10f3a: 68, + 0x10f3b: 68, + 0x10f3c: 68, + 0x10f3d: 68, + 0x10f3e: 68, + 0x10f3f: 68, + 0x10f40: 68, + 0x10f41: 68, + 0x10f42: 68, + 0x10f43: 68, + 0x10f44: 68, + 0x10f45: 85, + 0x10f51: 68, + 0x10f52: 68, + 0x10f53: 68, + 0x10f54: 82, + 0x110bd: 85, + 0x110cd: 85, 0x1e900: 68, 0x1e901: 68, 0x1e902: 68, @@ -1034,14 +1100,15 @@ codepoint_classes = { 0x52d0000052e, 0x52f00000530, 0x5590000055a, - 0x56100000587, + 0x56000000587, + 0x58800000589, 0x591000005be, 0x5bf000005c0, 0x5c1000005c3, 0x5c4000005c6, 0x5c7000005c8, 0x5d0000005eb, - 0x5f0000005f3, + 0x5ef000005f3, 0x6100000061b, 0x62000000640, 0x64100000660, @@ -1054,12 +1121,13 @@ codepoint_classes = { 0x7100000074b, 0x74d000007b2, 0x7c0000007f6, + 0x7fd000007fe, 0x8000000082e, 0x8400000085c, 0x8600000086b, 0x8a0000008b5, 0x8b6000008be, - 0x8d4000008e2, + 0x8d3000008e2, 0x8e300000958, 0x96000000964, 0x96600000970, @@ -1077,6 +1145,7 @@ codepoint_classes = { 0x9e0000009e4, 0x9e6000009f2, 0x9fc000009fd, + 0x9fe000009ff, 0xa0100000a04, 0xa0500000a0b, 0xa0f00000a11, @@ -1136,8 +1205,7 @@ codepoint_classes = { 0xbd000000bd1, 0xbd700000bd8, 0xbe600000bf0, - 0xc0000000c04, - 0xc0500000c0d, + 0xc0000000c0d, 0xc0e00000c11, 0xc1200000c29, 0xc2a00000c3a, @@ -1276,7 +1344,7 @@ codepoint_classes = { 0x17dc000017de, 0x17e0000017ea, 0x18100000181a, - 0x182000001878, + 0x182000001879, 0x1880000018ab, 0x18b0000018f6, 0x19000000191f, @@ -1544,11 +1612,11 @@ codepoint_classes = { 0x309d0000309f, 0x30a1000030fb, 0x30fc000030ff, - 0x31050000312f, + 0x310500003130, 0x31a0000031bb, 0x31f000003200, 0x340000004db6, - 0x4e0000009feb, + 0x4e0000009ff0, 0xa0000000a48d, 0xa4d00000a4fe, 0xa5000000a60d, @@ -1655,8 +1723,10 @@ codepoint_classes = { 0xa7a50000a7a6, 0xa7a70000a7a8, 0xa7a90000a7aa, + 0xa7af0000a7b0, 0xa7b50000a7b6, 0xa7b70000a7b8, + 0xa7b90000a7ba, 0xa7f70000a7f8, 0xa7fa0000a828, 0xa8400000a874, @@ -1664,8 +1734,7 @@ codepoint_classes = { 0xa8d00000a8da, 0xa8e00000a8f8, 0xa8fb0000a8fc, - 0xa8fd0000a8fe, - 0xa9000000a92e, + 0xa8fd0000a92e, 0xa9300000a954, 0xa9800000a9c1, 0xa9cf0000a9da, @@ -1743,7 +1812,7 @@ codepoint_classes = { 0x10a0500010a07, 0x10a0c00010a14, 0x10a1500010a18, - 0x10a1900010a34, + 0x10a1900010a36, 0x10a3800010a3b, 0x10a3f00010a40, 0x10a6000010a7d, @@ -1756,6 +1825,11 @@ codepoint_classes = { 0x10b8000010b92, 0x10c0000010c49, 0x10cc000010cf3, + 0x10d0000010d28, + 0x10d3000010d3a, + 0x10f0000010f1d, + 0x10f2700010f28, + 0x10f3000010f51, 0x1100000011047, 0x1106600011070, 0x1107f000110bb, @@ -1763,10 +1837,11 @@ codepoint_classes = { 0x110f0000110fa, 0x1110000011135, 0x1113600011140, + 0x1114400011147, 0x1115000011174, 0x1117600011177, 0x11180000111c5, - 0x111ca000111cd, + 0x111c9000111cd, 0x111d0000111db, 0x111dc000111dd, 0x1120000011212, @@ -1786,7 +1861,7 @@ codepoint_classes = { 0x1132a00011331, 0x1133200011334, 0x113350001133a, - 0x1133c00011345, + 0x1133b00011345, 0x1134700011349, 0x1134b0001134e, 0x1135000011351, @@ -1796,6 +1871,7 @@ codepoint_classes = { 0x1137000011375, 0x114000001144b, 0x114500001145a, + 0x1145e0001145f, 0x11480000114c6, 0x114c7000114c8, 0x114d0000114da, @@ -1807,15 +1883,17 @@ codepoint_classes = { 0x116500001165a, 0x11680000116b8, 0x116c0000116ca, - 0x117000001171a, + 0x117000001171b, 0x1171d0001172c, 0x117300001173a, + 0x118000001183b, 0x118c0000118ea, 0x118ff00011900, 0x11a0000011a3f, 0x11a4700011a48, 0x11a5000011a84, 0x11a8600011a9a, + 0x11a9d00011a9e, 0x11ac000011af9, 0x11c0000011c09, 0x11c0a00011c37, @@ -1831,6 +1909,13 @@ codepoint_classes = { 0x11d3c00011d3e, 0x11d3f00011d48, 0x11d5000011d5a, + 0x11d6000011d66, + 0x11d6700011d69, + 0x11d6a00011d8f, + 0x11d9000011d92, + 0x11d9300011d99, + 0x11da000011daa, + 0x11ee000011ef7, 0x120000001239a, 0x1248000012544, 0x130000001342f, @@ -1845,11 +1930,12 @@ codepoint_classes = { 0x16b5000016b5a, 0x16b6300016b78, 0x16b7d00016b90, + 0x16e6000016e80, 0x16f0000016f45, 0x16f5000016f7f, 0x16f8f00016fa0, 0x16fe000016fe2, - 0x17000000187ed, + 0x17000000187f2, 0x1880000018af3, 0x1b0000001b11f, 0x1b1700001b2fc, diff --git a/libs/idna/package_data.py b/libs/idna/package_data.py index 39c192bae..257e89893 100644 --- a/libs/idna/package_data.py +++ b/libs/idna/package_data.py @@ -1,2 +1,2 @@ -__version__ = '2.7' +__version__ = '2.8' diff --git a/libs/idna/uts46data.py b/libs/idna/uts46data.py index 79731cb9e..a68ed4c0e 100644 --- a/libs/idna/uts46data.py +++ b/libs/idna/uts46data.py @@ -4,7 +4,7 @@ """IDNA Mapping Table from UTS46.""" -__version__ = "10.0.0" +__version__ = "11.0.0" def _seg_0(): return [ (0x0, '3'), @@ -1029,11 +1029,8 @@ def _seg_9(): (0x556, 'M', u'ֆ'), (0x557, 'X'), (0x559, 'V'), - (0x560, 'X'), - (0x561, 'V'), (0x587, 'M', u'եւ'), - (0x588, 'X'), - (0x589, 'V'), + (0x588, 'V'), (0x58B, 'X'), (0x58D, 'V'), (0x590, 'X'), @@ -1041,15 +1038,15 @@ def _seg_9(): (0x5C8, 'X'), (0x5D0, 'V'), (0x5EB, 'X'), - (0x5F0, 'V'), + (0x5EF, 'V'), (0x5F5, 'X'), + (0x606, 'V'), + (0x61C, 'X'), + (0x61E, 'V'), ] def _seg_10(): return [ - (0x606, 'V'), - (0x61C, 'X'), - (0x61E, 'V'), (0x675, 'M', u'اٴ'), (0x676, 'M', u'وٴ'), (0x677, 'M', u'ۇٴ'), @@ -1064,7 +1061,7 @@ def _seg_10(): (0x7B2, 'X'), (0x7C0, 'V'), (0x7FB, 'X'), - (0x800, 'V'), + (0x7FD, 'V'), (0x82E, 'X'), (0x830, 'V'), (0x83F, 'X'), @@ -1078,7 +1075,7 @@ def _seg_10(): (0x8B5, 'X'), (0x8B6, 'V'), (0x8BE, 'X'), - (0x8D4, 'V'), + (0x8D3, 'V'), (0x8E2, 'X'), (0x8E3, 'V'), (0x958, 'M', u'क़'), @@ -1118,7 +1115,7 @@ def _seg_10(): (0x9E0, 'V'), (0x9E4, 'X'), (0x9E6, 'V'), - (0x9FE, 'X'), + (0x9FF, 'X'), (0xA01, 'V'), (0xA04, 'X'), (0xA05, 'V'), @@ -1147,19 +1144,19 @@ def _seg_10(): (0xA4E, 'X'), (0xA51, 'V'), (0xA52, 'X'), + (0xA59, 'M', u'ਖ਼'), + (0xA5A, 'M', u'ਗ਼'), + (0xA5B, 'M', u'ਜ਼'), ] def _seg_11(): return [ - (0xA59, 'M', u'ਖ਼'), - (0xA5A, 'M', u'ਗ਼'), - (0xA5B, 'M', u'ਜ਼'), (0xA5C, 'V'), (0xA5D, 'X'), (0xA5E, 'M', u'ਫ਼'), (0xA5F, 'X'), (0xA66, 'V'), - (0xA76, 'X'), + (0xA77, 'X'), (0xA81, 'V'), (0xA84, 'X'), (0xA85, 'V'), @@ -1250,16 +1247,14 @@ def _seg_11(): (0xBE6, 'V'), (0xBFB, 'X'), (0xC00, 'V'), - (0xC04, 'X'), - ] - -def _seg_12(): - return [ - (0xC05, 'V'), (0xC0D, 'X'), (0xC0E, 'V'), (0xC11, 'X'), (0xC12, 'V'), + ] + +def _seg_12(): + return [ (0xC29, 'X'), (0xC2A, 'V'), (0xC3A, 'X'), @@ -1278,8 +1273,6 @@ def _seg_12(): (0xC66, 'V'), (0xC70, 'X'), (0xC78, 'V'), - (0xC84, 'X'), - (0xC85, 'V'), (0xC8D, 'X'), (0xC8E, 'V'), (0xC91, 'X'), @@ -1355,10 +1348,6 @@ def _seg_12(): (0xE83, 'X'), (0xE84, 'V'), (0xE85, 'X'), - ] - -def _seg_13(): - return [ (0xE87, 'V'), (0xE89, 'X'), (0xE8A, 'V'), @@ -1366,6 +1355,10 @@ def _seg_13(): (0xE8D, 'V'), (0xE8E, 'X'), (0xE94, 'V'), + ] + +def _seg_13(): + return [ (0xE98, 'X'), (0xE99, 'V'), (0xEA0, 'X'), @@ -1459,10 +1452,6 @@ def _seg_13(): (0x124E, 'X'), (0x1250, 'V'), (0x1257, 'X'), - ] - -def _seg_14(): - return [ (0x1258, 'V'), (0x1259, 'X'), (0x125A, 'V'), @@ -1470,6 +1459,10 @@ def _seg_14(): (0x1260, 'V'), (0x1289, 'X'), (0x128A, 'V'), + ] + +def _seg_14(): + return [ (0x128E, 'X'), (0x1290, 'V'), (0x12B1, 'X'), @@ -1538,7 +1531,7 @@ def _seg_14(): (0x1810, 'V'), (0x181A, 'X'), (0x1820, 'V'), - (0x1878, 'X'), + (0x1879, 'X'), (0x1880, 'V'), (0x18AB, 'X'), (0x18B0, 'V'), @@ -1563,10 +1556,6 @@ def _seg_14(): (0x19DB, 'X'), (0x19DE, 'V'), (0x1A1C, 'X'), - ] - -def _seg_15(): - return [ (0x1A1E, 'V'), (0x1A5F, 'X'), (0x1A60, 'V'), @@ -1574,6 +1563,10 @@ def _seg_15(): (0x1A7F, 'V'), (0x1A8A, 'X'), (0x1A90, 'V'), + ] + +def _seg_15(): + return [ (0x1A9A, 'X'), (0x1AA0, 'V'), (0x1AAE, 'X'), @@ -1667,10 +1660,6 @@ def _seg_15(): (0x1D68, 'M', u'ρ'), (0x1D69, 'M', u'φ'), (0x1D6A, 'M', u'χ'), - ] - -def _seg_16(): - return [ (0x1D6B, 'V'), (0x1D78, 'M', u'н'), (0x1D79, 'V'), @@ -1678,6 +1667,10 @@ def _seg_16(): (0x1D9C, 'M', u'c'), (0x1D9D, 'M', u'ɕ'), (0x1D9E, 'M', u'ð'), + ] + +def _seg_16(): + return [ (0x1D9F, 'M', u'ɜ'), (0x1DA0, 'M', u'f'), (0x1DA1, 'M', u'ɟ'), @@ -1771,10 +1764,6 @@ def _seg_16(): (0x1E36, 'M', u'ḷ'), (0x1E37, 'V'), (0x1E38, 'M', u'ḹ'), - ] - -def _seg_17(): - return [ (0x1E39, 'V'), (0x1E3A, 'M', u'ḻ'), (0x1E3B, 'V'), @@ -1782,6 +1771,10 @@ def _seg_17(): (0x1E3D, 'V'), (0x1E3E, 'M', u'ḿ'), (0x1E3F, 'V'), + ] + +def _seg_17(): + return [ (0x1E40, 'M', u'ṁ'), (0x1E41, 'V'), (0x1E42, 'M', u'ṃ'), @@ -1875,10 +1868,6 @@ def _seg_17(): (0x1E9F, 'V'), (0x1EA0, 'M', u'ạ'), (0x1EA1, 'V'), - ] - -def _seg_18(): - return [ (0x1EA2, 'M', u'ả'), (0x1EA3, 'V'), (0x1EA4, 'M', u'ấ'), @@ -1886,6 +1875,10 @@ def _seg_18(): (0x1EA6, 'M', u'ầ'), (0x1EA7, 'V'), (0x1EA8, 'M', u'ẩ'), + ] + +def _seg_18(): + return [ (0x1EA9, 'V'), (0x1EAA, 'M', u'ẫ'), (0x1EAB, 'V'), @@ -1979,10 +1972,6 @@ def _seg_18(): (0x1F0B, 'M', u'ἃ'), (0x1F0C, 'M', u'ἄ'), (0x1F0D, 'M', u'ἅ'), - ] - -def _seg_19(): - return [ (0x1F0E, 'M', u'ἆ'), (0x1F0F, 'M', u'ἇ'), (0x1F10, 'V'), @@ -1990,6 +1979,10 @@ def _seg_19(): (0x1F18, 'M', u'ἐ'), (0x1F19, 'M', u'ἑ'), (0x1F1A, 'M', u'ἒ'), + ] + +def _seg_19(): + return [ (0x1F1B, 'M', u'ἓ'), (0x1F1C, 'M', u'ἔ'), (0x1F1D, 'M', u'ἕ'), @@ -2083,10 +2076,6 @@ def _seg_19(): (0x1F9A, 'M', u'ἢι'), (0x1F9B, 'M', u'ἣι'), (0x1F9C, 'M', u'ἤι'), - ] - -def _seg_20(): - return [ (0x1F9D, 'M', u'ἥι'), (0x1F9E, 'M', u'ἦι'), (0x1F9F, 'M', u'ἧι'), @@ -2094,6 +2083,10 @@ def _seg_20(): (0x1FA1, 'M', u'ὡι'), (0x1FA2, 'M', u'ὢι'), (0x1FA3, 'M', u'ὣι'), + ] + +def _seg_20(): + return [ (0x1FA4, 'M', u'ὤι'), (0x1FA5, 'M', u'ὥι'), (0x1FA6, 'M', u'ὦι'), @@ -2187,10 +2180,6 @@ def _seg_20(): (0x2024, 'X'), (0x2027, 'V'), (0x2028, 'X'), - ] - -def _seg_21(): - return [ (0x202F, '3', u' '), (0x2030, 'V'), (0x2033, 'M', u'′′'), @@ -2198,6 +2187,10 @@ def _seg_21(): (0x2035, 'V'), (0x2036, 'M', u'‵‵'), (0x2037, 'M', u'‵‵‵'), + ] + +def _seg_21(): + return [ (0x2038, 'V'), (0x203C, '3', u'!!'), (0x203D, 'V'), @@ -2291,10 +2284,6 @@ def _seg_21(): (0x2120, 'M', u'sm'), (0x2121, 'M', u'tel'), (0x2122, 'M', u'tm'), - ] - -def _seg_22(): - return [ (0x2123, 'V'), (0x2124, 'M', u'z'), (0x2125, 'V'), @@ -2302,6 +2291,10 @@ def _seg_22(): (0x2127, 'V'), (0x2128, 'M', u'z'), (0x2129, 'V'), + ] + +def _seg_22(): + return [ (0x212A, 'M', u'k'), (0x212B, 'M', u'å'), (0x212C, 'M', u'b'), @@ -2395,10 +2388,6 @@ def _seg_22(): (0x226E, '3'), (0x2270, 'V'), (0x2329, 'M', u'〈'), - ] - -def _seg_23(): - return [ (0x232A, 'M', u'〉'), (0x232B, 'V'), (0x2427, 'X'), @@ -2406,6 +2395,10 @@ def _seg_23(): (0x244B, 'X'), (0x2460, 'M', u'1'), (0x2461, 'M', u'2'), + ] + +def _seg_23(): + return [ (0x2462, 'M', u'3'), (0x2463, 'M', u'4'), (0x2464, 'M', u'5'), @@ -2499,10 +2492,6 @@ def _seg_23(): (0x24CF, 'M', u'z'), (0x24D0, 'M', u'a'), (0x24D1, 'M', u'b'), - ] - -def _seg_24(): - return [ (0x24D2, 'M', u'c'), (0x24D3, 'M', u'd'), (0x24D4, 'M', u'e'), @@ -2510,6 +2499,10 @@ def _seg_24(): (0x24D6, 'M', u'g'), (0x24D7, 'M', u'h'), (0x24D8, 'M', u'i'), + ] + +def _seg_24(): + return [ (0x24D9, 'M', u'j'), (0x24DA, 'M', u'k'), (0x24DB, 'M', u'l'), @@ -2541,13 +2534,9 @@ def _seg_24(): (0x2B76, 'V'), (0x2B96, 'X'), (0x2B98, 'V'), - (0x2BBA, 'X'), - (0x2BBD, 'V'), (0x2BC9, 'X'), (0x2BCA, 'V'), - (0x2BD3, 'X'), - (0x2BEC, 'V'), - (0x2BF0, 'X'), + (0x2BFF, 'X'), (0x2C00, 'M', u'ⰰ'), (0x2C01, 'M', u'ⰱ'), (0x2C02, 'M', u'ⰲ'), @@ -2603,10 +2592,6 @@ def _seg_24(): (0x2C62, 'M', u'ɫ'), (0x2C63, 'M', u'ᵽ'), (0x2C64, 'M', u'ɽ'), - ] - -def _seg_25(): - return [ (0x2C65, 'V'), (0x2C67, 'M', u'ⱨ'), (0x2C68, 'V'), @@ -2618,6 +2603,10 @@ def _seg_25(): (0x2C6E, 'M', u'ɱ'), (0x2C6F, 'M', u'ɐ'), (0x2C70, 'M', u'ɒ'), + ] + +def _seg_25(): + return [ (0x2C71, 'V'), (0x2C72, 'M', u'ⱳ'), (0x2C73, 'V'), @@ -2707,10 +2696,6 @@ def _seg_25(): (0x2CCD, 'V'), (0x2CCE, 'M', u'ⳏ'), (0x2CCF, 'V'), - ] - -def _seg_26(): - return [ (0x2CD0, 'M', u'ⳑ'), (0x2CD1, 'V'), (0x2CD2, 'M', u'ⳓ'), @@ -2722,6 +2707,10 @@ def _seg_26(): (0x2CD8, 'M', u'ⳙ'), (0x2CD9, 'V'), (0x2CDA, 'M', u'ⳛ'), + ] + +def _seg_26(): + return [ (0x2CDB, 'V'), (0x2CDC, 'M', u'ⳝ'), (0x2CDD, 'V'), @@ -2768,7 +2757,7 @@ def _seg_26(): (0x2DD8, 'V'), (0x2DDF, 'X'), (0x2DE0, 'V'), - (0x2E4A, 'X'), + (0x2E4F, 'X'), (0x2E80, 'V'), (0x2E9A, 'X'), (0x2E9B, 'V'), @@ -2811,10 +2800,6 @@ def _seg_26(): (0x2F20, 'M', u'士'), (0x2F21, 'M', u'夂'), (0x2F22, 'M', u'夊'), - ] - -def _seg_27(): - return [ (0x2F23, 'M', u'夕'), (0x2F24, 'M', u'大'), (0x2F25, 'M', u'女'), @@ -2826,6 +2811,10 @@ def _seg_27(): (0x2F2B, 'M', u'尸'), (0x2F2C, 'M', u'屮'), (0x2F2D, 'M', u'山'), + ] + +def _seg_27(): + return [ (0x2F2E, 'M', u'巛'), (0x2F2F, 'M', u'工'), (0x2F30, 'M', u'己'), @@ -2915,10 +2904,6 @@ def _seg_27(): (0x2F84, 'M', u'至'), (0x2F85, 'M', u'臼'), (0x2F86, 'M', u'舌'), - ] - -def _seg_28(): - return [ (0x2F87, 'M', u'舛'), (0x2F88, 'M', u'舟'), (0x2F89, 'M', u'艮'), @@ -2930,6 +2915,10 @@ def _seg_28(): (0x2F8F, 'M', u'行'), (0x2F90, 'M', u'衣'), (0x2F91, 'M', u'襾'), + ] + +def _seg_28(): + return [ (0x2F92, 'M', u'見'), (0x2F93, 'M', u'角'), (0x2F94, 'M', u'言'), @@ -3019,13 +3008,9 @@ def _seg_28(): (0x309F, 'M', u'より'), (0x30A0, 'V'), (0x30FF, 'M', u'コト'), - ] - -def _seg_29(): - return [ (0x3100, 'X'), (0x3105, 'V'), - (0x312F, 'X'), + (0x3130, 'X'), (0x3131, 'M', u'ᄀ'), (0x3132, 'M', u'ᄁ'), (0x3133, 'M', u'ᆪ'), @@ -3034,6 +3019,10 @@ def _seg_29(): (0x3136, 'M', u'ᆭ'), (0x3137, 'M', u'ᄃ'), (0x3138, 'M', u'ᄄ'), + ] + +def _seg_29(): + return [ (0x3139, 'M', u'ᄅ'), (0x313A, 'M', u'ᆰ'), (0x313B, 'M', u'ᆱ'), @@ -3123,10 +3112,6 @@ def _seg_29(): (0x318F, 'X'), (0x3190, 'V'), (0x3192, 'M', u'一'), - ] - -def _seg_30(): - return [ (0x3193, 'M', u'二'), (0x3194, 'M', u'三'), (0x3195, 'M', u'四'), @@ -3138,6 +3123,10 @@ def _seg_30(): (0x319B, 'M', u'丙'), (0x319C, 'M', u'丁'), (0x319D, 'M', u'天'), + ] + +def _seg_30(): + return [ (0x319E, 'M', u'地'), (0x319F, 'M', u'人'), (0x31A0, 'V'), @@ -3227,10 +3216,6 @@ def _seg_30(): (0x3256, 'M', u'26'), (0x3257, 'M', u'27'), (0x3258, 'M', u'28'), - ] - -def _seg_31(): - return [ (0x3259, 'M', u'29'), (0x325A, 'M', u'30'), (0x325B, 'M', u'31'), @@ -3242,6 +3227,10 @@ def _seg_31(): (0x3261, 'M', u'ᄂ'), (0x3262, 'M', u'ᄃ'), (0x3263, 'M', u'ᄅ'), + ] + +def _seg_31(): + return [ (0x3264, 'M', u'ᄆ'), (0x3265, 'M', u'ᄇ'), (0x3266, 'M', u'ᄉ'), @@ -3331,10 +3320,6 @@ def _seg_31(): (0x32BA, 'M', u'45'), (0x32BB, 'M', u'46'), (0x32BC, 'M', u'47'), - ] - -def _seg_32(): - return [ (0x32BD, 'M', u'48'), (0x32BE, 'M', u'49'), (0x32BF, 'M', u'50'), @@ -3346,6 +3331,10 @@ def _seg_32(): (0x32C5, 'M', u'6月'), (0x32C6, 'M', u'7月'), (0x32C7, 'M', u'8月'), + ] + +def _seg_32(): + return [ (0x32C8, 'M', u'9月'), (0x32C9, 'M', u'10月'), (0x32CA, 'M', u'11月'), @@ -3435,10 +3424,6 @@ def _seg_32(): (0x331E, 'M', u'コーポ'), (0x331F, 'M', u'サイクル'), (0x3320, 'M', u'サンチーム'), - ] - -def _seg_33(): - return [ (0x3321, 'M', u'シリング'), (0x3322, 'M', u'センチ'), (0x3323, 'M', u'セント'), @@ -3450,6 +3435,10 @@ def _seg_33(): (0x3329, 'M', u'ノット'), (0x332A, 'M', u'ハイツ'), (0x332B, 'M', u'パーセント'), + ] + +def _seg_33(): + return [ (0x332C, 'M', u'パーツ'), (0x332D, 'M', u'バーレル'), (0x332E, 'M', u'ピアストル'), @@ -3539,10 +3528,6 @@ def _seg_33(): (0x3382, 'M', u'μa'), (0x3383, 'M', u'ma'), (0x3384, 'M', u'ka'), - ] - -def _seg_34(): - return [ (0x3385, 'M', u'kb'), (0x3386, 'M', u'mb'), (0x3387, 'M', u'gb'), @@ -3554,6 +3539,10 @@ def _seg_34(): (0x338D, 'M', u'μg'), (0x338E, 'M', u'mg'), (0x338F, 'M', u'kg'), + ] + +def _seg_34(): + return [ (0x3390, 'M', u'hz'), (0x3391, 'M', u'khz'), (0x3392, 'M', u'mhz'), @@ -3643,10 +3632,6 @@ def _seg_34(): (0x33E6, 'M', u'7日'), (0x33E7, 'M', u'8日'), (0x33E8, 'M', u'9日'), - ] - -def _seg_35(): - return [ (0x33E9, 'M', u'10日'), (0x33EA, 'M', u'11日'), (0x33EB, 'M', u'12日'), @@ -3658,6 +3643,10 @@ def _seg_35(): (0x33F1, 'M', u'18日'), (0x33F2, 'M', u'19日'), (0x33F3, 'M', u'20日'), + ] + +def _seg_35(): + return [ (0x33F4, 'M', u'21日'), (0x33F5, 'M', u'22日'), (0x33F6, 'M', u'23日'), @@ -3673,7 +3662,7 @@ def _seg_35(): (0x3400, 'V'), (0x4DB6, 'X'), (0x4DC0, 'V'), - (0x9FEB, 'X'), + (0x9FF0, 'X'), (0xA000, 'V'), (0xA48D, 'X'), (0xA490, 'V'), @@ -3747,10 +3736,6 @@ def _seg_35(): (0xA692, 'M', u'ꚓ'), (0xA693, 'V'), (0xA694, 'M', u'ꚕ'), - ] - -def _seg_36(): - return [ (0xA695, 'V'), (0xA696, 'M', u'ꚗ'), (0xA697, 'V'), @@ -3762,6 +3747,10 @@ def _seg_36(): (0xA69D, 'M', u'ь'), (0xA69E, 'V'), (0xA6F8, 'X'), + ] + +def _seg_36(): + return [ (0xA700, 'V'), (0xA722, 'M', u'ꜣ'), (0xA723, 'V'), @@ -3851,10 +3840,6 @@ def _seg_36(): (0xA780, 'M', u'ꞁ'), (0xA781, 'V'), (0xA782, 'M', u'ꞃ'), - ] - -def _seg_37(): - return [ (0xA783, 'V'), (0xA784, 'M', u'ꞅ'), (0xA785, 'V'), @@ -3866,6 +3851,10 @@ def _seg_37(): (0xA78E, 'V'), (0xA790, 'M', u'ꞑ'), (0xA791, 'V'), + ] + +def _seg_37(): + return [ (0xA792, 'M', u'ꞓ'), (0xA793, 'V'), (0xA796, 'M', u'ꞗ'), @@ -3893,7 +3882,7 @@ def _seg_37(): (0xA7AC, 'M', u'ɡ'), (0xA7AD, 'M', u'ɬ'), (0xA7AE, 'M', u'ɪ'), - (0xA7AF, 'X'), + (0xA7AF, 'V'), (0xA7B0, 'M', u'ʞ'), (0xA7B1, 'M', u'ʇ'), (0xA7B2, 'M', u'ʝ'), @@ -3903,6 +3892,8 @@ def _seg_37(): (0xA7B6, 'M', u'ꞷ'), (0xA7B7, 'V'), (0xA7B8, 'X'), + (0xA7B9, 'V'), + (0xA7BA, 'X'), (0xA7F7, 'V'), (0xA7F8, 'M', u'ħ'), (0xA7F9, 'M', u'œ'), @@ -3917,8 +3908,6 @@ def _seg_37(): (0xA8CE, 'V'), (0xA8DA, 'X'), (0xA8E0, 'V'), - (0xA8FE, 'X'), - (0xA900, 'V'), (0xA954, 'X'), (0xA95F, 'V'), (0xA97D, 'X'), @@ -3955,10 +3944,6 @@ def _seg_37(): (0xAB5F, 'M', u'ꭒ'), (0xAB60, 'V'), (0xAB66, 'X'), - ] - -def _seg_38(): - return [ (0xAB70, 'M', u'Ꭰ'), (0xAB71, 'M', u'Ꭱ'), (0xAB72, 'M', u'Ꭲ'), @@ -3970,6 +3955,10 @@ def _seg_38(): (0xAB78, 'M', u'Ꭸ'), (0xAB79, 'M', u'Ꭹ'), (0xAB7A, 'M', u'Ꭺ'), + ] + +def _seg_38(): + return [ (0xAB7B, 'M', u'Ꭻ'), (0xAB7C, 'M', u'Ꭼ'), (0xAB7D, 'M', u'Ꭽ'), @@ -4059,10 +4048,6 @@ def _seg_38(): (0xF907, 'M', u'龜'), (0xF909, 'M', u'契'), (0xF90A, 'M', u'金'), - ] - -def _seg_39(): - return [ (0xF90B, 'M', u'喇'), (0xF90C, 'M', u'奈'), (0xF90D, 'M', u'懶'), @@ -4074,6 +4059,10 @@ def _seg_39(): (0xF913, 'M', u'邏'), (0xF914, 'M', u'樂'), (0xF915, 'M', u'洛'), + ] + +def _seg_39(): + return [ (0xF916, 'M', u'烙'), (0xF917, 'M', u'珞'), (0xF918, 'M', u'落'), @@ -4163,10 +4152,6 @@ def _seg_39(): (0xF96C, 'M', u'塞'), (0xF96D, 'M', u'省'), (0xF96E, 'M', u'葉'), - ] - -def _seg_40(): - return [ (0xF96F, 'M', u'說'), (0xF970, 'M', u'殺'), (0xF971, 'M', u'辰'), @@ -4178,6 +4163,10 @@ def _seg_40(): (0xF977, 'M', u'亮'), (0xF978, 'M', u'兩'), (0xF979, 'M', u'凉'), + ] + +def _seg_40(): + return [ (0xF97A, 'M', u'梁'), (0xF97B, 'M', u'糧'), (0xF97C, 'M', u'良'), @@ -4267,10 +4256,6 @@ def _seg_40(): (0xF9D0, 'M', u'類'), (0xF9D1, 'M', u'六'), (0xF9D2, 'M', u'戮'), - ] - -def _seg_41(): - return [ (0xF9D3, 'M', u'陸'), (0xF9D4, 'M', u'倫'), (0xF9D5, 'M', u'崙'), @@ -4282,6 +4267,10 @@ def _seg_41(): (0xF9DB, 'M', u'率'), (0xF9DC, 'M', u'隆'), (0xF9DD, 'M', u'利'), + ] + +def _seg_41(): + return [ (0xF9DE, 'M', u'吏'), (0xF9DF, 'M', u'履'), (0xF9E0, 'M', u'易'), @@ -4371,10 +4360,6 @@ def _seg_41(): (0xFA39, 'M', u'塀'), (0xFA3A, 'M', u'墨'), (0xFA3B, 'M', u'層'), - ] - -def _seg_42(): - return [ (0xFA3C, 'M', u'屮'), (0xFA3D, 'M', u'悔'), (0xFA3E, 'M', u'慨'), @@ -4386,6 +4371,10 @@ def _seg_42(): (0xFA44, 'M', u'梅'), (0xFA45, 'M', u'海'), (0xFA46, 'M', u'渚'), + ] + +def _seg_42(): + return [ (0xFA47, 'M', u'漢'), (0xFA48, 'M', u'煮'), (0xFA49, 'M', u'爫'), @@ -4475,10 +4464,6 @@ def _seg_42(): (0xFA9F, 'M', u'犯'), (0xFAA0, 'M', u'猪'), (0xFAA1, 'M', u'瑱'), - ] - -def _seg_43(): - return [ (0xFAA2, 'M', u'甆'), (0xFAA3, 'M', u'画'), (0xFAA4, 'M', u'瘝'), @@ -4490,6 +4475,10 @@ def _seg_43(): (0xFAAA, 'M', u'着'), (0xFAAB, 'M', u'磌'), (0xFAAC, 'M', u'窱'), + ] + +def _seg_43(): + return [ (0xFAAD, 'M', u'節'), (0xFAAE, 'M', u'类'), (0xFAAF, 'M', u'絛'), @@ -4579,10 +4568,6 @@ def _seg_43(): (0xFB38, 'M', u'טּ'), (0xFB39, 'M', u'יּ'), (0xFB3A, 'M', u'ךּ'), - ] - -def _seg_44(): - return [ (0xFB3B, 'M', u'כּ'), (0xFB3C, 'M', u'לּ'), (0xFB3D, 'X'), @@ -4594,6 +4579,10 @@ def _seg_44(): (0xFB43, 'M', u'ףּ'), (0xFB44, 'M', u'פּ'), (0xFB45, 'X'), + ] + +def _seg_44(): + return [ (0xFB46, 'M', u'צּ'), (0xFB47, 'M', u'קּ'), (0xFB48, 'M', u'רּ'), @@ -4683,10 +4672,6 @@ def _seg_44(): (0xFC19, 'M', u'خج'), (0xFC1A, 'M', u'خح'), (0xFC1B, 'M', u'خم'), - ] - -def _seg_45(): - return [ (0xFC1C, 'M', u'سج'), (0xFC1D, 'M', u'سح'), (0xFC1E, 'M', u'سخ'), @@ -4698,6 +4683,10 @@ def _seg_45(): (0xFC24, 'M', u'ضخ'), (0xFC25, 'M', u'ضم'), (0xFC26, 'M', u'طح'), + ] + +def _seg_45(): + return [ (0xFC27, 'M', u'طم'), (0xFC28, 'M', u'ظم'), (0xFC29, 'M', u'عج'), @@ -4787,10 +4776,6 @@ def _seg_45(): (0xFC7D, 'M', u'في'), (0xFC7E, 'M', u'قى'), (0xFC7F, 'M', u'قي'), - ] - -def _seg_46(): - return [ (0xFC80, 'M', u'كا'), (0xFC81, 'M', u'كل'), (0xFC82, 'M', u'كم'), @@ -4802,6 +4787,10 @@ def _seg_46(): (0xFC88, 'M', u'ما'), (0xFC89, 'M', u'مم'), (0xFC8A, 'M', u'نر'), + ] + +def _seg_46(): + return [ (0xFC8B, 'M', u'نز'), (0xFC8C, 'M', u'نم'), (0xFC8D, 'M', u'نن'), @@ -4891,10 +4880,6 @@ def _seg_46(): (0xFCE1, 'M', u'بم'), (0xFCE2, 'M', u'به'), (0xFCE3, 'M', u'تم'), - ] - -def _seg_47(): - return [ (0xFCE4, 'M', u'ته'), (0xFCE5, 'M', u'ثم'), (0xFCE6, 'M', u'ثه'), @@ -4906,6 +4891,10 @@ def _seg_47(): (0xFCEC, 'M', u'كم'), (0xFCED, 'M', u'لم'), (0xFCEE, 'M', u'نم'), + ] + +def _seg_47(): + return [ (0xFCEF, 'M', u'نه'), (0xFCF0, 'M', u'يم'), (0xFCF1, 'M', u'يه'), @@ -4995,10 +4984,6 @@ def _seg_47(): (0xFD57, 'M', u'تمخ'), (0xFD58, 'M', u'جمح'), (0xFD5A, 'M', u'حمي'), - ] - -def _seg_48(): - return [ (0xFD5B, 'M', u'حمى'), (0xFD5C, 'M', u'سحج'), (0xFD5D, 'M', u'سجح'), @@ -5010,6 +4995,10 @@ def _seg_48(): (0xFD66, 'M', u'صمم'), (0xFD67, 'M', u'شحم'), (0xFD69, 'M', u'شجي'), + ] + +def _seg_48(): + return [ (0xFD6A, 'M', u'شمخ'), (0xFD6C, 'M', u'شمم'), (0xFD6E, 'M', u'ضحى'), @@ -5099,10 +5088,6 @@ def _seg_48(): (0xFDF3, 'M', u'اكبر'), (0xFDF4, 'M', u'محمد'), (0xFDF5, 'M', u'صلعم'), - ] - -def _seg_49(): - return [ (0xFDF6, 'M', u'رسول'), (0xFDF7, 'M', u'عليه'), (0xFDF8, 'M', u'وسلم'), @@ -5114,6 +5099,10 @@ def _seg_49(): (0xFDFE, 'X'), (0xFE00, 'I'), (0xFE10, '3', u','), + ] + +def _seg_49(): + return [ (0xFE11, 'M', u'、'), (0xFE12, 'X'), (0xFE13, '3', u':'), @@ -5203,10 +5192,6 @@ def _seg_49(): (0xFE8F, 'M', u'ب'), (0xFE93, 'M', u'ة'), (0xFE95, 'M', u'ت'), - ] - -def _seg_50(): - return [ (0xFE99, 'M', u'ث'), (0xFE9D, 'M', u'ج'), (0xFEA1, 'M', u'ح'), @@ -5218,6 +5203,10 @@ def _seg_50(): (0xFEB1, 'M', u'س'), (0xFEB5, 'M', u'ش'), (0xFEB9, 'M', u'ص'), + ] + +def _seg_50(): + return [ (0xFEBD, 'M', u'ض'), (0xFEC1, 'M', u'ط'), (0xFEC5, 'M', u'ظ'), @@ -5307,10 +5296,6 @@ def _seg_50(): (0xFF41, 'M', u'a'), (0xFF42, 'M', u'b'), (0xFF43, 'M', u'c'), - ] - -def _seg_51(): - return [ (0xFF44, 'M', u'd'), (0xFF45, 'M', u'e'), (0xFF46, 'M', u'f'), @@ -5322,6 +5307,10 @@ def _seg_51(): (0xFF4C, 'M', u'l'), (0xFF4D, 'M', u'm'), (0xFF4E, 'M', u'n'), + ] + +def _seg_51(): + return [ (0xFF4F, 'M', u'o'), (0xFF50, 'M', u'p'), (0xFF51, 'M', u'q'), @@ -5411,10 +5400,6 @@ def _seg_51(): (0xFFA5, 'M', u'ᆬ'), (0xFFA6, 'M', u'ᆭ'), (0xFFA7, 'M', u'ᄃ'), - ] - -def _seg_52(): - return [ (0xFFA8, 'M', u'ᄄ'), (0xFFA9, 'M', u'ᄅ'), (0xFFAA, 'M', u'ᆰ'), @@ -5426,6 +5411,10 @@ def _seg_52(): (0xFFB0, 'M', u'ᄚ'), (0xFFB1, 'M', u'ᄆ'), (0xFFB2, 'M', u'ᄇ'), + ] + +def _seg_52(): + return [ (0xFFB3, 'M', u'ᄈ'), (0xFFB4, 'M', u'ᄡ'), (0xFFB5, 'M', u'ᄉ'), @@ -5515,10 +5504,6 @@ def _seg_52(): (0x10300, 'V'), (0x10324, 'X'), (0x1032D, 'V'), - ] - -def _seg_53(): - return [ (0x1034B, 'X'), (0x10350, 'V'), (0x1037B, 'X'), @@ -5530,6 +5515,10 @@ def _seg_53(): (0x103D6, 'X'), (0x10400, 'M', u'𐐨'), (0x10401, 'M', u'𐐩'), + ] + +def _seg_53(): + return [ (0x10402, 'M', u'𐐪'), (0x10403, 'M', u'𐐫'), (0x10404, 'M', u'𐐬'), @@ -5619,10 +5608,6 @@ def _seg_53(): (0x10570, 'X'), (0x10600, 'V'), (0x10737, 'X'), - ] - -def _seg_54(): - return [ (0x10740, 'V'), (0x10756, 'X'), (0x10760, 'V'), @@ -5634,6 +5619,10 @@ def _seg_54(): (0x1080A, 'V'), (0x10836, 'X'), (0x10837, 'V'), + ] + +def _seg_54(): + return [ (0x10839, 'X'), (0x1083C, 'V'), (0x1083D, 'X'), @@ -5666,11 +5655,11 @@ def _seg_54(): (0x10A15, 'V'), (0x10A18, 'X'), (0x10A19, 'V'), - (0x10A34, 'X'), + (0x10A36, 'X'), (0x10A38, 'V'), (0x10A3B, 'X'), (0x10A3F, 'V'), - (0x10A48, 'X'), + (0x10A49, 'X'), (0x10A50, 'V'), (0x10A59, 'X'), (0x10A60, 'V'), @@ -5723,10 +5712,6 @@ def _seg_54(): (0x10C9B, 'M', u'𐳛'), (0x10C9C, 'M', u'𐳜'), (0x10C9D, 'M', u'𐳝'), - ] - -def _seg_55(): - return [ (0x10C9E, 'M', u'𐳞'), (0x10C9F, 'M', u'𐳟'), (0x10CA0, 'M', u'𐳠'), @@ -5738,6 +5723,10 @@ def _seg_55(): (0x10CA6, 'M', u'𐳦'), (0x10CA7, 'M', u'𐳧'), (0x10CA8, 'M', u'𐳨'), + ] + +def _seg_55(): + return [ (0x10CA9, 'M', u'𐳩'), (0x10CAA, 'M', u'𐳪'), (0x10CAB, 'M', u'𐳫'), @@ -5752,9 +5741,15 @@ def _seg_55(): (0x10CC0, 'V'), (0x10CF3, 'X'), (0x10CFA, 'V'), - (0x10D00, 'X'), + (0x10D28, 'X'), + (0x10D30, 'V'), + (0x10D3A, 'X'), (0x10E60, 'V'), (0x10E7F, 'X'), + (0x10F00, 'V'), + (0x10F28, 'X'), + (0x10F30, 'V'), + (0x10F5A, 'X'), (0x11000, 'V'), (0x1104E, 'X'), (0x11052, 'V'), @@ -5770,7 +5765,7 @@ def _seg_55(): (0x11100, 'V'), (0x11135, 'X'), (0x11136, 'V'), - (0x11144, 'X'), + (0x11147, 'X'), (0x11150, 'V'), (0x11177, 'X'), (0x11180, 'V'), @@ -5811,7 +5806,7 @@ def _seg_55(): (0x11334, 'X'), (0x11335, 'V'), (0x1133A, 'X'), - (0x1133C, 'V'), + (0x1133B, 'V'), (0x11345, 'X'), (0x11347, 'V'), (0x11349, 'X'), @@ -5827,16 +5822,16 @@ def _seg_55(): (0x1136D, 'X'), (0x11370, 'V'), (0x11375, 'X'), - ] - -def _seg_56(): - return [ (0x11400, 'V'), (0x1145A, 'X'), (0x1145B, 'V'), (0x1145C, 'X'), (0x1145D, 'V'), - (0x1145E, 'X'), + ] + +def _seg_56(): + return [ + (0x1145F, 'X'), (0x11480, 'V'), (0x114C8, 'X'), (0x114D0, 'V'), @@ -5856,11 +5851,13 @@ def _seg_56(): (0x116C0, 'V'), (0x116CA, 'X'), (0x11700, 'V'), - (0x1171A, 'X'), + (0x1171B, 'X'), (0x1171D, 'V'), (0x1172C, 'X'), (0x11730, 'V'), (0x11740, 'X'), + (0x11800, 'V'), + (0x1183C, 'X'), (0x118A0, 'M', u'𑣀'), (0x118A1, 'M', u'𑣁'), (0x118A2, 'M', u'𑣂'), @@ -5902,8 +5899,6 @@ def _seg_56(): (0x11A50, 'V'), (0x11A84, 'X'), (0x11A86, 'V'), - (0x11A9D, 'X'), - (0x11A9E, 'V'), (0x11AA3, 'X'), (0x11AC0, 'V'), (0x11AF9, 'X'), @@ -5931,14 +5926,28 @@ def _seg_56(): (0x11D3B, 'X'), (0x11D3C, 'V'), (0x11D3E, 'X'), - ] - -def _seg_57(): - return [ (0x11D3F, 'V'), (0x11D48, 'X'), (0x11D50, 'V'), (0x11D5A, 'X'), + (0x11D60, 'V'), + ] + +def _seg_57(): + return [ + (0x11D66, 'X'), + (0x11D67, 'V'), + (0x11D69, 'X'), + (0x11D6A, 'V'), + (0x11D8F, 'X'), + (0x11D90, 'V'), + (0x11D92, 'X'), + (0x11D93, 'V'), + (0x11D99, 'X'), + (0x11DA0, 'V'), + (0x11DAA, 'X'), + (0x11EE0, 'V'), + (0x11EF9, 'X'), (0x12000, 'V'), (0x1239A, 'X'), (0x12400, 'V'), @@ -5973,6 +5982,8 @@ def _seg_57(): (0x16B78, 'X'), (0x16B7D, 'V'), (0x16B90, 'X'), + (0x16E60, 'V'), + (0x16E9B, 'X'), (0x16F00, 'V'), (0x16F45, 'X'), (0x16F50, 'V'), @@ -5982,7 +5993,7 @@ def _seg_57(): (0x16FE0, 'V'), (0x16FE2, 'X'), (0x17000, 'V'), - (0x187ED, 'X'), + (0x187F2, 'X'), (0x18800, 'V'), (0x18AF3, 'X'), (0x1B000, 'V'), @@ -6024,21 +6035,23 @@ def _seg_57(): (0x1D1C1, 'V'), (0x1D1E9, 'X'), (0x1D200, 'V'), + ] + +def _seg_58(): + return [ (0x1D246, 'X'), + (0x1D2E0, 'V'), + (0x1D2F4, 'X'), (0x1D300, 'V'), (0x1D357, 'X'), (0x1D360, 'V'), - (0x1D372, 'X'), + (0x1D379, 'X'), (0x1D400, 'M', u'a'), (0x1D401, 'M', u'b'), (0x1D402, 'M', u'c'), (0x1D403, 'M', u'd'), (0x1D404, 'M', u'e'), (0x1D405, 'M', u'f'), - ] - -def _seg_58(): - return [ (0x1D406, 'M', u'g'), (0x1D407, 'M', u'h'), (0x1D408, 'M', u'i'), @@ -6126,6 +6139,10 @@ def _seg_58(): (0x1D45A, 'M', u'm'), (0x1D45B, 'M', u'n'), (0x1D45C, 'M', u'o'), + ] + +def _seg_59(): + return [ (0x1D45D, 'M', u'p'), (0x1D45E, 'M', u'q'), (0x1D45F, 'M', u'r'), @@ -6139,10 +6156,6 @@ def _seg_58(): (0x1D467, 'M', u'z'), (0x1D468, 'M', u'a'), (0x1D469, 'M', u'b'), - ] - -def _seg_59(): - return [ (0x1D46A, 'M', u'c'), (0x1D46B, 'M', u'd'), (0x1D46C, 'M', u'e'), @@ -6230,6 +6243,10 @@ def _seg_59(): (0x1D4C1, 'M', u'l'), (0x1D4C2, 'M', u'm'), (0x1D4C3, 'M', u'n'), + ] + +def _seg_60(): + return [ (0x1D4C4, 'X'), (0x1D4C5, 'M', u'p'), (0x1D4C6, 'M', u'q'), @@ -6243,10 +6260,6 @@ def _seg_59(): (0x1D4CE, 'M', u'y'), (0x1D4CF, 'M', u'z'), (0x1D4D0, 'M', u'a'), - ] - -def _seg_60(): - return [ (0x1D4D1, 'M', u'b'), (0x1D4D2, 'M', u'c'), (0x1D4D3, 'M', u'd'), @@ -6334,6 +6347,10 @@ def _seg_60(): (0x1D526, 'M', u'i'), (0x1D527, 'M', u'j'), (0x1D528, 'M', u'k'), + ] + +def _seg_61(): + return [ (0x1D529, 'M', u'l'), (0x1D52A, 'M', u'm'), (0x1D52B, 'M', u'n'), @@ -6347,10 +6364,6 @@ def _seg_60(): (0x1D533, 'M', u'v'), (0x1D534, 'M', u'w'), (0x1D535, 'M', u'x'), - ] - -def _seg_61(): - return [ (0x1D536, 'M', u'y'), (0x1D537, 'M', u'z'), (0x1D538, 'M', u'a'), @@ -6438,6 +6451,10 @@ def _seg_61(): (0x1D58C, 'M', u'g'), (0x1D58D, 'M', u'h'), (0x1D58E, 'M', u'i'), + ] + +def _seg_62(): + return [ (0x1D58F, 'M', u'j'), (0x1D590, 'M', u'k'), (0x1D591, 'M', u'l'), @@ -6451,10 +6468,6 @@ def _seg_61(): (0x1D599, 'M', u't'), (0x1D59A, 'M', u'u'), (0x1D59B, 'M', u'v'), - ] - -def _seg_62(): - return [ (0x1D59C, 'M', u'w'), (0x1D59D, 'M', u'x'), (0x1D59E, 'M', u'y'), @@ -6542,6 +6555,10 @@ def _seg_62(): (0x1D5F0, 'M', u'c'), (0x1D5F1, 'M', u'd'), (0x1D5F2, 'M', u'e'), + ] + +def _seg_63(): + return [ (0x1D5F3, 'M', u'f'), (0x1D5F4, 'M', u'g'), (0x1D5F5, 'M', u'h'), @@ -6555,10 +6572,6 @@ def _seg_62(): (0x1D5FD, 'M', u'p'), (0x1D5FE, 'M', u'q'), (0x1D5FF, 'M', u'r'), - ] - -def _seg_63(): - return [ (0x1D600, 'M', u's'), (0x1D601, 'M', u't'), (0x1D602, 'M', u'u'), @@ -6646,6 +6659,10 @@ def _seg_63(): (0x1D654, 'M', u'y'), (0x1D655, 'M', u'z'), (0x1D656, 'M', u'a'), + ] + +def _seg_64(): + return [ (0x1D657, 'M', u'b'), (0x1D658, 'M', u'c'), (0x1D659, 'M', u'd'), @@ -6659,10 +6676,6 @@ def _seg_63(): (0x1D661, 'M', u'l'), (0x1D662, 'M', u'm'), (0x1D663, 'M', u'n'), - ] - -def _seg_64(): - return [ (0x1D664, 'M', u'o'), (0x1D665, 'M', u'p'), (0x1D666, 'M', u'q'), @@ -6750,6 +6763,10 @@ def _seg_64(): (0x1D6B9, 'M', u'θ'), (0x1D6BA, 'M', u'σ'), (0x1D6BB, 'M', u'τ'), + ] + +def _seg_65(): + return [ (0x1D6BC, 'M', u'υ'), (0x1D6BD, 'M', u'φ'), (0x1D6BE, 'M', u'χ'), @@ -6763,10 +6780,6 @@ def _seg_64(): (0x1D6C6, 'M', u'ε'), (0x1D6C7, 'M', u'ζ'), (0x1D6C8, 'M', u'η'), - ] - -def _seg_65(): - return [ (0x1D6C9, 'M', u'θ'), (0x1D6CA, 'M', u'ι'), (0x1D6CB, 'M', u'κ'), @@ -6854,6 +6867,10 @@ def _seg_65(): (0x1D71F, 'M', u'δ'), (0x1D720, 'M', u'ε'), (0x1D721, 'M', u'ζ'), + ] + +def _seg_66(): + return [ (0x1D722, 'M', u'η'), (0x1D723, 'M', u'θ'), (0x1D724, 'M', u'ι'), @@ -6867,10 +6884,6 @@ def _seg_65(): (0x1D72C, 'M', u'ρ'), (0x1D72D, 'M', u'θ'), (0x1D72E, 'M', u'σ'), - ] - -def _seg_66(): - return [ (0x1D72F, 'M', u'τ'), (0x1D730, 'M', u'υ'), (0x1D731, 'M', u'φ'), @@ -6958,6 +6971,10 @@ def _seg_66(): (0x1D785, 'M', u'φ'), (0x1D786, 'M', u'χ'), (0x1D787, 'M', u'ψ'), + ] + +def _seg_67(): + return [ (0x1D788, 'M', u'ω'), (0x1D789, 'M', u'∂'), (0x1D78A, 'M', u'ε'), @@ -6971,10 +6988,6 @@ def _seg_66(): (0x1D792, 'M', u'γ'), (0x1D793, 'M', u'δ'), (0x1D794, 'M', u'ε'), - ] - -def _seg_67(): - return [ (0x1D795, 'M', u'ζ'), (0x1D796, 'M', u'η'), (0x1D797, 'M', u'θ'), @@ -7062,6 +7075,10 @@ def _seg_67(): (0x1D7EC, 'M', u'0'), (0x1D7ED, 'M', u'1'), (0x1D7EE, 'M', u'2'), + ] + +def _seg_68(): + return [ (0x1D7EF, 'M', u'3'), (0x1D7F0, 'M', u'4'), (0x1D7F1, 'M', u'5'), @@ -7075,10 +7092,6 @@ def _seg_67(): (0x1D7F9, 'M', u'3'), (0x1D7FA, 'M', u'4'), (0x1D7FB, 'M', u'5'), - ] - -def _seg_68(): - return [ (0x1D7FC, 'M', u'6'), (0x1D7FD, 'M', u'7'), (0x1D7FE, 'M', u'8'), @@ -7143,6 +7156,8 @@ def _seg_68(): (0x1E95A, 'X'), (0x1E95E, 'V'), (0x1E960, 'X'), + (0x1EC71, 'V'), + (0x1ECB5, 'X'), (0x1EE00, 'M', u'ا'), (0x1EE01, 'M', u'ب'), (0x1EE02, 'M', u'ج'), @@ -7164,6 +7179,10 @@ def _seg_68(): (0x1EE12, 'M', u'ق'), (0x1EE13, 'M', u'ر'), (0x1EE14, 'M', u'ش'), + ] + +def _seg_69(): + return [ (0x1EE15, 'M', u'ت'), (0x1EE16, 'M', u'ث'), (0x1EE17, 'M', u'خ'), @@ -7179,10 +7198,6 @@ def _seg_68(): (0x1EE21, 'M', u'ب'), (0x1EE22, 'M', u'ج'), (0x1EE23, 'X'), - ] - -def _seg_69(): - return [ (0x1EE24, 'M', u'ه'), (0x1EE25, 'X'), (0x1EE27, 'M', u'ح'), @@ -7268,6 +7283,10 @@ def _seg_69(): (0x1EE81, 'M', u'ب'), (0x1EE82, 'M', u'ج'), (0x1EE83, 'M', u'د'), + ] + +def _seg_70(): + return [ (0x1EE84, 'M', u'ه'), (0x1EE85, 'M', u'و'), (0x1EE86, 'M', u'ز'), @@ -7283,10 +7302,6 @@ def _seg_69(): (0x1EE90, 'M', u'ف'), (0x1EE91, 'M', u'ص'), (0x1EE92, 'M', u'ق'), - ] - -def _seg_70(): - return [ (0x1EE93, 'M', u'ر'), (0x1EE94, 'M', u'ش'), (0x1EE95, 'M', u'ت'), @@ -7372,6 +7387,10 @@ def _seg_70(): (0x1F122, '3', u'(s)'), (0x1F123, '3', u'(t)'), (0x1F124, '3', u'(u)'), + ] + +def _seg_71(): + return [ (0x1F125, '3', u'(v)'), (0x1F126, '3', u'(w)'), (0x1F127, '3', u'(x)'), @@ -7382,15 +7401,11 @@ def _seg_70(): (0x1F12C, 'M', u'r'), (0x1F12D, 'M', u'cd'), (0x1F12E, 'M', u'wz'), - (0x1F12F, 'X'), + (0x1F12F, 'V'), (0x1F130, 'M', u'a'), (0x1F131, 'M', u'b'), (0x1F132, 'M', u'c'), (0x1F133, 'M', u'd'), - ] - -def _seg_71(): - return [ (0x1F134, 'M', u'e'), (0x1F135, 'M', u'f'), (0x1F136, 'M', u'g'), @@ -7476,6 +7491,10 @@ def _seg_71(): (0x1F239, 'M', u'割'), (0x1F23A, 'M', u'営'), (0x1F23B, 'M', u'配'), + ] + +def _seg_72(): + return [ (0x1F23C, 'X'), (0x1F240, 'M', u'〔本〕'), (0x1F241, 'M', u'〔三〕'), @@ -7491,21 +7510,17 @@ def _seg_71(): (0x1F251, 'M', u'可'), (0x1F252, 'X'), (0x1F260, 'V'), - ] - -def _seg_72(): - return [ (0x1F266, 'X'), (0x1F300, 'V'), (0x1F6D5, 'X'), (0x1F6E0, 'V'), (0x1F6ED, 'X'), (0x1F6F0, 'V'), - (0x1F6F9, 'X'), + (0x1F6FA, 'X'), (0x1F700, 'V'), (0x1F774, 'X'), (0x1F780, 'V'), - (0x1F7D5, 'X'), + (0x1F7D9, 'X'), (0x1F800, 'V'), (0x1F80C, 'X'), (0x1F810, 'V'), @@ -7521,15 +7536,21 @@ def _seg_72(): (0x1F910, 'V'), (0x1F93F, 'X'), (0x1F940, 'V'), - (0x1F94D, 'X'), - (0x1F950, 'V'), - (0x1F96C, 'X'), - (0x1F980, 'V'), - (0x1F998, 'X'), + (0x1F971, 'X'), + (0x1F973, 'V'), + (0x1F977, 'X'), + (0x1F97A, 'V'), + (0x1F97B, 'X'), + (0x1F97C, 'V'), + (0x1F9A3, 'X'), + (0x1F9B0, 'V'), + (0x1F9BA, 'X'), (0x1F9C0, 'V'), - (0x1F9C1, 'X'), + (0x1F9C3, 'X'), (0x1F9D0, 'V'), - (0x1F9E7, 'X'), + (0x1FA00, 'X'), + (0x1FA60, 'V'), + (0x1FA6E, 'X'), (0x20000, 'V'), (0x2A6D7, 'X'), (0x2A700, 'V'), @@ -7574,6 +7595,10 @@ def _seg_72(): (0x2F81F, 'M', u'㓟'), (0x2F820, 'M', u'刻'), (0x2F821, 'M', u'剆'), + ] + +def _seg_73(): + return [ (0x2F822, 'M', u'割'), (0x2F823, 'M', u'剷'), (0x2F824, 'M', u'㔕'), @@ -7595,10 +7620,6 @@ def _seg_72(): (0x2F836, 'M', u'及'), (0x2F837, 'M', u'叟'), (0x2F838, 'M', u'𠭣'), - ] - -def _seg_73(): - return [ (0x2F839, 'M', u'叫'), (0x2F83A, 'M', u'叱'), (0x2F83B, 'M', u'吆'), @@ -7678,6 +7699,10 @@ def _seg_73(): (0x2F887, 'M', u'幩'), (0x2F888, 'M', u'㡢'), (0x2F889, 'M', u'𢆃'), + ] + +def _seg_74(): + return [ (0x2F88A, 'M', u'㡼'), (0x2F88B, 'M', u'庰'), (0x2F88C, 'M', u'庳'), @@ -7699,10 +7724,6 @@ def _seg_73(): (0x2F89E, 'M', u'志'), (0x2F89F, 'M', u'忹'), (0x2F8A0, 'M', u'悁'), - ] - -def _seg_74(): - return [ (0x2F8A1, 'M', u'㤺'), (0x2F8A2, 'M', u'㤜'), (0x2F8A3, 'M', u'悔'), @@ -7782,6 +7803,10 @@ def _seg_74(): (0x2F8ED, 'M', u'櫛'), (0x2F8EE, 'M', u'㰘'), (0x2F8EF, 'M', u'次'), + ] + +def _seg_75(): + return [ (0x2F8F0, 'M', u'𣢧'), (0x2F8F1, 'M', u'歔'), (0x2F8F2, 'M', u'㱎'), @@ -7803,10 +7828,6 @@ def _seg_74(): (0x2F902, 'M', u'流'), (0x2F903, 'M', u'浩'), (0x2F904, 'M', u'浸'), - ] - -def _seg_75(): - return [ (0x2F905, 'M', u'涅'), (0x2F906, 'M', u'𣴞'), (0x2F907, 'M', u'洴'), @@ -7886,6 +7907,10 @@ def _seg_75(): (0x2F953, 'M', u'祖'), (0x2F954, 'M', u'𥚚'), (0x2F955, 'M', u'𥛅'), + ] + +def _seg_76(): + return [ (0x2F956, 'M', u'福'), (0x2F957, 'M', u'秫'), (0x2F958, 'M', u'䄯'), @@ -7907,10 +7932,6 @@ def _seg_75(): (0x2F969, 'M', u'糣'), (0x2F96A, 'M', u'紀'), (0x2F96B, 'M', u'𥾆'), - ] - -def _seg_76(): - return [ (0x2F96C, 'M', u'絣'), (0x2F96D, 'M', u'䌁'), (0x2F96E, 'M', u'緇'), @@ -7990,6 +8011,10 @@ def _seg_76(): (0x2F9B8, 'M', u'蚈'), (0x2F9B9, 'M', u'蜎'), (0x2F9BA, 'M', u'蛢'), + ] + +def _seg_77(): + return [ (0x2F9BB, 'M', u'蝹'), (0x2F9BC, 'M', u'蜨'), (0x2F9BD, 'M', u'蝫'), @@ -8011,10 +8036,6 @@ def _seg_76(): (0x2F9CD, 'M', u'䚾'), (0x2F9CE, 'M', u'䛇'), (0x2F9CF, 'M', u'誠'), - ] - -def _seg_77(): - return [ (0x2F9D0, 'M', u'諭'), (0x2F9D1, 'M', u'變'), (0x2F9D2, 'M', u'豕'), @@ -8094,6 +8115,10 @@ def _seg_77(): (0x2FA1D, 'M', u'𪘀'), (0x2FA1E, 'X'), (0xE0100, 'I'), + ] + +def _seg_78(): + return [ (0xE01F0, 'X'), ] @@ -8176,4 +8201,5 @@ uts46data = tuple( + _seg_75() + _seg_76() + _seg_77() + + _seg_78() ) diff --git a/libs/ipaddress.py b/libs/ipaddress.py index b024bf73c..f2d076684 100644 --- a/libs/ipaddress.py +++ b/libs/ipaddress.py @@ -1,92 +1,129 @@ -# Python 2.7 port of Python 3.4's ipaddress module. +# Copyright 2007 Google Inc. +# Licensed to PSF under a Contributor Agreement. -# List of compatibility changes: +"""A fast, lightweight IPv4/IPv6 manipulation library in Python. -# Python 3 uses only new-style classes. -# s/class \(\w\+\):/class \1(object):/ +This library is used to create/poke/manipulate IPv4 and IPv6 addresses +and networks. -# Use iterator versions of map and range: -try: - from itertools import imap as map -except ImportError: - imap = map +""" +from __future__ import unicode_literals + + +import itertools +import struct + +__version__ = '1.0.22' + +# Compatibility functions +_compat_int_types = (int,) try: - import xrange - range = xrange -except ImportError: + _compat_int_types = (int, long) +except NameError: pass +try: + _compat_str = unicode +except NameError: + _compat_str = str + assert bytes != str +if b'\0'[0] == 0: # Python 3 semantics + def _compat_bytes_to_byte_vals(byt): + return byt +else: + def _compat_bytes_to_byte_vals(byt): + return [struct.unpack(b'!B', b)[0] for b in byt] +try: + _compat_int_from_byte_vals = int.from_bytes +except AttributeError: + def _compat_int_from_byte_vals(bytvals, endianess): + assert endianess == 'big' + res = 0 + for bv in bytvals: + assert isinstance(bv, _compat_int_types) + res = (res << 8) + bv + return res + + +def _compat_to_bytes(intval, length, endianess): + assert isinstance(intval, _compat_int_types) + assert endianess == 'big' + if length == 4: + if intval < 0 or intval >= 2 ** 32: + raise struct.error("integer out of range for 'I' format code") + return struct.pack(b'!I', intval) + elif length == 16: + if intval < 0 or intval >= 2 ** 128: + raise struct.error("integer out of range for 'QQ' format code") + return struct.pack(b'!QQ', intval >> 64, intval & 0xffffffffffffffff) + else: + raise NotImplementedError() + -# Except that xrange only supports machine integers, not longs, so... -def long_range(start, end): - while start < end: - yield start - start += 1 - -# This backport uses bytearray instead of bytes, as bytes is the same -# as str in Python 2.7. -bytes = bytearray - -# Python 2 does not support exception chaining. -# s/ from None$// - -# When checking for instances of int, also allow Python 2's long. -_builtin_isinstance = isinstance - -def isinstance(val, types): - if types is int: - types = (int, long) - elif type(types) is tuple and int in types: - types += (long,) - return _builtin_isinstance(val, types) - -# functools.lru_cache is Python 3.2+ only. -# /@functools.lru_cache()/d - -# int().to_bytes is Python 3.2+ only. -# s/\(\w+\)\.to_bytes(/_int_to_bytes(\1, / -def _int_to_bytes(self, length, byteorder, signed=False): - assert byteorder == 'big' and signed is False - if self < 0 or self >= 256**length: - raise OverflowError() - return bytearray(('%0*x' % (length * 2, self)).decode('hex')) - -# int.from_bytes is Python 3.2+ only. -# s/int\.from_bytes(/_int_from_bytes(/g -def _int_from_bytes(what, byteorder, signed=False): - assert byteorder == 'big' and signed is False - return int(str(bytearray(what)).encode('hex'), 16) - -# Python 2.6 has no int.bit_length() if hasattr(int, 'bit_length'): - # Not `int.bit_length`, since it must also work for `long`. - _int_bit_length = lambda i: i.bit_length() + # Not int.bit_length , since that won't work in 2.7 where long exists + def _compat_bit_length(i): + return i.bit_length() else: - _int_bit_length = lambda i: len(bin(abs(i))) - 2 + def _compat_bit_length(i): + for res in itertools.count(): + if i >> res == 0: + return res +def _compat_range(start, end, step=1): + assert step > 0 + i = start + while i < end: + yield i + i += step -# ---------------------------------------------------------------------------- +class _TotalOrderingMixin(object): + __slots__ = () -# Copyright 2007 Google Inc. -# Licensed to PSF under a Contributor Agreement. + # Helper that derives the other comparison operations from + # __lt__ and __eq__ + # We avoid functools.total_ordering because it doesn't handle + # NotImplemented correctly yet (http://bugs.python.org/issue10042) + def __eq__(self, other): + raise NotImplementedError -"""A fast, lightweight IPv4/IPv6 manipulation library in Python. + def __ne__(self, other): + equal = self.__eq__(other) + if equal is NotImplemented: + return NotImplemented + return not equal -This library is used to create/poke/manipulate IPv4 and IPv6 addresses -and networks. + def __lt__(self, other): + raise NotImplementedError -""" + def __le__(self, other): + less = self.__lt__(other) + if less is NotImplemented or not less: + return self.__eq__(other) + return less -__version__ = '1.0' + def __gt__(self, other): + less = self.__lt__(other) + if less is NotImplemented: + return NotImplemented + equal = self.__eq__(other) + if equal is NotImplemented: + return NotImplemented + return not (less or equal) + def __ge__(self, other): + less = self.__lt__(other) + if less is NotImplemented: + return NotImplemented + return not less -import functools IPV4LENGTH = 32 IPV6LENGTH = 128 + class AddressValueError(ValueError): """A Value Error related to the address.""" @@ -121,6 +158,12 @@ def ip_address(address): except (AddressValueError, NetmaskValueError): pass + if isinstance(address, bytes): + raise AddressValueError( + '%r does not appear to be an IPv4 or IPv6 address. ' + 'Did you pass in a bytes (str in Python 2) instead of' + ' a unicode object?' % address) + raise ValueError('%r does not appear to be an IPv4 or IPv6 address' % address) @@ -151,6 +194,12 @@ def ip_network(address, strict=True): except (AddressValueError, NetmaskValueError): pass + if isinstance(address, bytes): + raise AddressValueError( + '%r does not appear to be an IPv4 or IPv6 network. ' + 'Did you pass in a bytes (str in Python 2) instead of' + ' a unicode object?' % address) + raise ValueError('%r does not appear to be an IPv4 or IPv6 network' % address) @@ -205,8 +254,8 @@ def v4_int_to_packed(address): """ try: - return _int_to_bytes(address, 4, 'big') - except: + return _compat_to_bytes(address, 4, 'big') + except (struct.error, OverflowError): raise ValueError("Address negative or too large for IPv4") @@ -221,36 +270,37 @@ def v6_int_to_packed(address): """ try: - return _int_to_bytes(address, 16, 'big') - except: + return _compat_to_bytes(address, 16, 'big') + except (struct.error, OverflowError): raise ValueError("Address negative or too large for IPv6") def _split_optional_netmask(address): """Helper to split the netmask and raise AddressValueError if needed""" - addr = str(address).split('/') + addr = _compat_str(address).split('/') if len(addr) > 2: raise AddressValueError("Only one '/' permitted in %r" % address) return addr def _find_address_range(addresses): - """Find a sequence of IPv#Address. + """Find a sequence of sorted deduplicated IPv#Address. Args: addresses: a list of IPv#Address objects. - Returns: + Yields: A tuple containing the first and last IP addresses in the sequence. """ - first = last = addresses[0] - for ip in addresses[1:]: - if ip._ip == last._ip + 1: - last = ip - else: - break - return (first, last) + it = iter(addresses) + first = last = next(it) + for ip in it: + if ip._ip != last._ip + 1: + yield first, last + first = ip + last = ip + yield first, last def _count_righthand_zero_bits(number, bits): @@ -266,11 +316,7 @@ def _count_righthand_zero_bits(number, bits): """ if number == 0: return bits - for i in range(bits): - if (number >> i) & 1: - return i - # All bits of interest were zero, even if there are more in the number - return bits + return min(bits, _compat_bit_length(~number & (number - 1))) def summarize_address_range(first, last): @@ -304,7 +350,7 @@ def summarize_address_range(first, last): raise TypeError('first and last must be IP addresses, not networks') if first.version != last.version: raise TypeError("%s and %s are not of the same version" % ( - first, last)) + first, last)) if first > last: raise ValueError('last IP address must be greater than first') @@ -320,16 +366,15 @@ def summarize_address_range(first, last): last_int = last._ip while first_int <= last_int: nbits = min(_count_righthand_zero_bits(first_int, ip_bits), - _int_bit_length(last_int - first_int + 1) - 1) - net = ip('%s/%d' % (first, ip_bits - nbits)) + _compat_bit_length(last_int - first_int + 1) - 1) + net = ip((first_int, ip_bits - nbits)) yield net first_int += 1 << nbits if first_int - 1 == ip._ALL_ONES: break - first = first.__class__(first_int) -def _collapse_addresses_recursive(addresses): +def _collapse_addresses_internal(addresses): """Loops through the addresses, collapsing concurrent netblocks. Example: @@ -339,7 +384,7 @@ def _collapse_addresses_recursive(addresses): ip3 = IPv4Network('192.0.2.128/26') ip4 = IPv4Network('192.0.2.192/26') - _collapse_addresses_recursive([ip1, ip2, ip3, ip4]) -> + _collapse_addresses_internal([ip1, ip2, ip3, ip4]) -> [IPv4Network('192.0.2.0/24')] This shouldn't be called directly; it is called via @@ -353,28 +398,29 @@ def _collapse_addresses_recursive(addresses): passed. """ - while True: - last_addr = None - ret_array = [] - optimized = False - - for cur_addr in addresses: - if not ret_array: - last_addr = cur_addr - ret_array.append(cur_addr) - elif (cur_addr.network_address >= last_addr.network_address and - cur_addr.broadcast_address <= last_addr.broadcast_address): - optimized = True - elif cur_addr == list(last_addr.supernet().subnets())[1]: - ret_array[-1] = last_addr = last_addr.supernet() - optimized = True - else: - last_addr = cur_addr - ret_array.append(cur_addr) - - addresses = ret_array - if not optimized: - return addresses + # First merge + to_merge = list(addresses) + subnets = {} + while to_merge: + net = to_merge.pop() + supernet = net.supernet() + existing = subnets.get(supernet) + if existing is None: + subnets[supernet] = net + elif existing != net: + # Merge consecutive subnets + del subnets[supernet] + to_merge.append(supernet) + # Then iterate over resulting networks, skipping subsumed subnets + last = None + for net in sorted(subnets.values()): + if last is not None: + # Since they are sorted, + # last.network_address <= net.network_address is a given. + if last.broadcast_address >= net.broadcast_address: + continue + yield net + last = net def collapse_addresses(addresses): @@ -395,7 +441,6 @@ def collapse_addresses(addresses): TypeError: If passed a list of mixed version objects. """ - i = 0 addrs = [] ips = [] nets = [] @@ -405,12 +450,12 @@ def collapse_addresses(addresses): if isinstance(ip, _BaseAddress): if ips and ips[-1]._version != ip._version: raise TypeError("%s and %s are not of the same version" % ( - ip, ips[-1])) + ip, ips[-1])) ips.append(ip) elif ip._prefixlen == ip._max_prefixlen: if ips and ips[-1]._version != ip._version: raise TypeError("%s and %s are not of the same version" % ( - ip, ips[-1])) + ip, ips[-1])) try: ips.append(ip.ip) except AttributeError: @@ -418,20 +463,18 @@ def collapse_addresses(addresses): else: if nets and nets[-1]._version != ip._version: raise TypeError("%s and %s are not of the same version" % ( - ip, nets[-1])) + ip, nets[-1])) nets.append(ip) # sort and dedup ips = sorted(set(ips)) - nets = sorted(set(nets)) - while i < len(ips): - (first, last) = _find_address_range(ips[i:]) - i = ips.index(last) + 1 - addrs.extend(summarize_address_range(first, last)) + # find consecutive address ranges in the sorted sequence and summarize them + if ips: + for first, last in _find_address_range(ips): + addrs.extend(summarize_address_range(first, last)) - return iter(_collapse_addresses_recursive(sorted( - addrs + nets, key=_BaseNetwork._get_networks_key))) + return _collapse_addresses_internal(addrs + nets) def get_mixed_type_key(obj): @@ -459,43 +502,12 @@ def get_mixed_type_key(obj): return NotImplemented -class _TotalOrderingMixin(object): - # Helper that derives the other comparison operations from - # __lt__ and __eq__ - # We avoid functools.total_ordering because it doesn't handle - # NotImplemented correctly yet (http://bugs.python.org/issue10042) - def __eq__(self, other): - raise NotImplementedError - def __ne__(self, other): - equal = self.__eq__(other) - if equal is NotImplemented: - return NotImplemented - return not equal - def __lt__(self, other): - raise NotImplementedError - def __le__(self, other): - less = self.__lt__(other) - if less is NotImplemented or not less: - return self.__eq__(other) - return less - def __gt__(self, other): - less = self.__lt__(other) - if less is NotImplemented: - return NotImplemented - equal = self.__eq__(other) - if equal is NotImplemented: - return NotImplemented - return not (less or equal) - def __ge__(self, other): - less = self.__lt__(other) - if less is NotImplemented: - return NotImplemented - return not less - class _IPAddressBase(_TotalOrderingMixin): """The mother class.""" + __slots__ = () + @property def exploded(self): """Return the longhand version of the IP address as a string.""" @@ -504,7 +516,18 @@ class _IPAddressBase(_TotalOrderingMixin): @property def compressed(self): """Return the shorthand version of the IP address as a string.""" - return str(self) + return _compat_str(self) + + @property + def reverse_pointer(self): + """The name of the reverse DNS pointer for the IP address, e.g.: + >>> ipaddress.ip_address("127.0.0.1").reverse_pointer + '1.0.0.127.in-addr.arpa' + >>> ipaddress.ip_address("2001:db8::1").reverse_pointer + '1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.8.b.d.0.1.0.0.2.ip6.arpa' + + """ + return self._reverse_pointer() @property def version(self): @@ -523,11 +546,15 @@ class _IPAddressBase(_TotalOrderingMixin): def _check_packed_address(self, address, expected_len): address_len = len(address) if address_len != expected_len: - msg = "%r (len %d != %d) is not permitted as an IPv%d address" + msg = ( + '%r (len %d != %d) is not permitted as an IPv%d address. ' + 'Did you pass in a bytes (str in Python 2) instead of' + ' a unicode object?') raise AddressValueError(msg % (address, address_len, expected_len, self._version)) - def _ip_int_from_prefix(self, prefixlen): + @classmethod + def _ip_int_from_prefix(cls, prefixlen): """Turn the prefix length into a bitwise netmask Args: @@ -537,13 +564,14 @@ class _IPAddressBase(_TotalOrderingMixin): An integer. """ - return self._ALL_ONES ^ (self._ALL_ONES >> prefixlen) + return cls._ALL_ONES ^ (cls._ALL_ONES >> prefixlen) - def _prefix_from_ip_int(self, ip_int): + @classmethod + def _prefix_from_ip_int(cls, ip_int): """Return prefix length from the bitwise netmask. Args: - ip_int: An integer, the netmask in axpanded bitwise format + ip_int: An integer, the netmask in expanded bitwise format Returns: An integer, the prefix length. @@ -552,22 +580,24 @@ class _IPAddressBase(_TotalOrderingMixin): ValueError: If the input intermingles zeroes & ones """ trailing_zeroes = _count_righthand_zero_bits(ip_int, - self._max_prefixlen) - prefixlen = self._max_prefixlen - trailing_zeroes + cls._max_prefixlen) + prefixlen = cls._max_prefixlen - trailing_zeroes leading_ones = ip_int >> trailing_zeroes all_ones = (1 << prefixlen) - 1 if leading_ones != all_ones: - byteslen = self._max_prefixlen // 8 - details = _int_to_bytes(ip_int, byteslen, 'big') + byteslen = cls._max_prefixlen // 8 + details = _compat_to_bytes(ip_int, byteslen, 'big') msg = 'Netmask pattern %r mixes zeroes & ones' raise ValueError(msg % details) return prefixlen - def _report_invalid_netmask(self, netmask_str): + @classmethod + def _report_invalid_netmask(cls, netmask_str): msg = '%r is not a valid netmask' % netmask_str raise NetmaskValueError(msg) - def _prefix_from_prefix_string(self, prefixlen_str): + @classmethod + def _prefix_from_prefix_string(cls, prefixlen_str): """Return prefix length from a numeric string Args: @@ -582,16 +612,17 @@ class _IPAddressBase(_TotalOrderingMixin): # int allows a leading +/- as well as surrounding whitespace, # so we ensure that isn't the case if not _BaseV4._DECIMAL_DIGITS.issuperset(prefixlen_str): - self._report_invalid_netmask(prefixlen_str) + cls._report_invalid_netmask(prefixlen_str) try: prefixlen = int(prefixlen_str) except ValueError: - self._report_invalid_netmask(prefixlen_str) - if not (0 <= prefixlen <= self._max_prefixlen): - self._report_invalid_netmask(prefixlen_str) + cls._report_invalid_netmask(prefixlen_str) + if not (0 <= prefixlen <= cls._max_prefixlen): + cls._report_invalid_netmask(prefixlen_str) return prefixlen - def _prefix_from_ip_string(self, ip_str): + @classmethod + def _prefix_from_ip_string(cls, ip_str): """Turn a netmask/hostmask string into a prefix length Args: @@ -605,24 +636,27 @@ class _IPAddressBase(_TotalOrderingMixin): """ # Parse the netmask/hostmask like an IP address. try: - ip_int = self._ip_int_from_string(ip_str) + ip_int = cls._ip_int_from_string(ip_str) except AddressValueError: - self._report_invalid_netmask(ip_str) + cls._report_invalid_netmask(ip_str) # Try matching a netmask (this would be /1*0*/ as a bitwise regexp). # Note that the two ambiguous cases (all-ones and all-zeroes) are # treated as netmasks. try: - return self._prefix_from_ip_int(ip_int) + return cls._prefix_from_ip_int(ip_int) except ValueError: pass # Invert the bits, and try matching a /0+1+/ hostmask instead. - ip_int ^= self._ALL_ONES + ip_int ^= cls._ALL_ONES try: - return self._prefix_from_ip_int(ip_int) + return cls._prefix_from_ip_int(ip_int) except ValueError: - self._report_invalid_netmask(ip_str) + cls._report_invalid_netmask(ip_str) + + def __reduce__(self): + return self.__class__, (_compat_str(self),) class _BaseAddress(_IPAddressBase): @@ -633,28 +667,27 @@ class _BaseAddress(_IPAddressBase): used by single IP addresses. """ - def __init__(self, address): - if (not isinstance(address, bytes) - and '/' in str(address)): - raise AddressValueError("Unexpected '/' in %r" % address) + __slots__ = () def __int__(self): return self._ip def __eq__(self, other): try: - return (self._ip == other._ip - and self._version == other._version) + return (self._ip == other._ip and + self._version == other._version) except AttributeError: return NotImplemented def __lt__(self, other): - if self._version != other._version: - raise TypeError('%s and %s are not of the same version' % ( - self, other)) + if not isinstance(other, _IPAddressBase): + return NotImplemented if not isinstance(other, _BaseAddress): raise TypeError('%s and %s are not of the same type' % ( - self, other)) + self, other)) + if self._version != other._version: + raise TypeError('%s and %s are not of the same version' % ( + self, other)) if self._ip != other._ip: return self._ip < other._ip return False @@ -662,20 +695,20 @@ class _BaseAddress(_IPAddressBase): # Shorthand for Integer addition and subtraction. This is not # meant to ever support addition/subtraction of addresses. def __add__(self, other): - if not isinstance(other, int): + if not isinstance(other, _compat_int_types): return NotImplemented return self.__class__(int(self) + other) def __sub__(self, other): - if not isinstance(other, int): + if not isinstance(other, _compat_int_types): return NotImplemented return self.__class__(int(self) - other) def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, str(self)) + return '%s(%r)' % (self.__class__.__name__, _compat_str(self)) def __str__(self): - return str(self._string_from_ip_int(self._ip)) + return _compat_str(self._string_from_ip_int(self._ip)) def __hash__(self): return hash(hex(int(self._ip))) @@ -683,6 +716,9 @@ class _BaseAddress(_IPAddressBase): def _get_address_key(self): return (self._version, self) + def __reduce__(self): + return self.__class__, (self._ip,) + class _BaseNetwork(_IPAddressBase): @@ -696,7 +732,7 @@ class _BaseNetwork(_IPAddressBase): self._cache = {} def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, str(self)) + return '%s(%r)' % (self.__class__.__name__, _compat_str(self)) def __str__(self): return '%s/%d' % (self.network_address, self.prefixlen) @@ -710,13 +746,13 @@ class _BaseNetwork(_IPAddressBase): """ network = int(self.network_address) broadcast = int(self.broadcast_address) - for x in long_range(network + 1, broadcast): + for x in _compat_range(network + 1, broadcast): yield self._address_class(x) def __iter__(self): network = int(self.network_address) broadcast = int(self.broadcast_address) - for x in long_range(network, broadcast + 1): + for x in _compat_range(network, broadcast + 1): yield self._address_class(x) def __getitem__(self, n): @@ -724,21 +760,23 @@ class _BaseNetwork(_IPAddressBase): broadcast = int(self.broadcast_address) if n >= 0: if network + n > broadcast: - raise IndexError + raise IndexError('address out of range') return self._address_class(network + n) else: n += 1 if broadcast + n < network: - raise IndexError + raise IndexError('address out of range') return self._address_class(broadcast + n) def __lt__(self, other): - if self._version != other._version: - raise TypeError('%s and %s are not of the same version' % ( - self, other)) + if not isinstance(other, _IPAddressBase): + return NotImplemented if not isinstance(other, _BaseNetwork): raise TypeError('%s and %s are not of the same type' % ( - self, other)) + self, other)) + if self._version != other._version: + raise TypeError('%s and %s are not of the same version' % ( + self, other)) if self.network_address != other.network_address: return self.network_address < other.network_address if self.netmask != other.netmask: @@ -829,21 +867,21 @@ class _BaseNetwork(_IPAddressBase): addr1 = ip_network('192.0.2.0/28') addr2 = ip_network('192.0.2.1/32') - addr1.address_exclude(addr2) = + list(addr1.address_exclude(addr2)) = [IPv4Network('192.0.2.0/32'), IPv4Network('192.0.2.2/31'), - IPv4Network('192.0.2.4/30'), IPv4Network('192.0.2.8/29')] + IPv4Network('192.0.2.4/30'), IPv4Network('192.0.2.8/29')] or IPv6: addr1 = ip_network('2001:db8::1/32') addr2 = ip_network('2001:db8::1/128') - addr1.address_exclude(addr2) = + list(addr1.address_exclude(addr2)) = [ip_network('2001:db8::1/128'), - ip_network('2001:db8::2/127'), - ip_network('2001:db8::4/126'), - ip_network('2001:db8::8/125'), - ... - ip_network('2001:db8:8000::/33')] + ip_network('2001:db8::2/127'), + ip_network('2001:db8::4/126'), + ip_network('2001:db8::8/125'), + ... + ip_network('2001:db8:8000::/33')] Args: other: An IPv4Network or IPv6Network object of the same type. @@ -860,16 +898,15 @@ class _BaseNetwork(_IPAddressBase): """ if not self._version == other._version: raise TypeError("%s and %s are not of the same version" % ( - self, other)) + self, other)) if not isinstance(other, _BaseNetwork): raise TypeError("%s is not a network object" % other) - if not (other.network_address >= self.network_address and - other.broadcast_address <= self.broadcast_address): + if not other.subnet_of(self): raise ValueError('%s not contained in %s' % (other, self)) if other == self: - raise StopIteration + return # Make sure we're comparing the network of other. other = other.__class__('%s/%s' % (other.network_address, @@ -877,12 +914,10 @@ class _BaseNetwork(_IPAddressBase): s1, s2 = self.subnets() while s1 != other and s2 != other: - if (other.network_address >= s1.network_address and - other.broadcast_address <= s1.broadcast_address): + if other.subnet_of(s1): yield s2 s1, s2 = s1.subnets() - elif (other.network_address >= s2.network_address and - other.broadcast_address <= s2.broadcast_address): + elif other.subnet_of(s2): yield s1 s1, s2 = s2.subnets() else: @@ -935,7 +970,7 @@ class _BaseNetwork(_IPAddressBase): # does this need to raise a ValueError? if self._version != other._version: raise TypeError('%s and %s are not of the same type' % ( - self, other)) + self, other)) # self._version == other._version below here: if self.network_address < other.network_address: return -1 @@ -1004,20 +1039,11 @@ class _BaseNetwork(_IPAddressBase): 'prefix length diff %d is invalid for netblock %s' % ( new_prefixlen, self)) - first = self.__class__('%s/%s' % - (self.network_address, - self._prefixlen + prefixlen_diff)) - - yield first - current = first - while True: - broadcast = current.broadcast_address - if broadcast == self.broadcast_address: - return - new_addr = self._address_class(int(broadcast) + 1) - current = self.__class__('%s/%s' % (new_addr, - new_prefixlen)) - + start = int(self.network_address) + end = int(self.broadcast_address) + 1 + step = (int(self.hostmask) + 1) >> prefixlen_diff + for new_addr in _compat_range(start, end, step): + current = self.__class__((new_addr, new_prefixlen)) yield current def supernet(self, prefixlen_diff=1, new_prefix=None): @@ -1051,15 +1077,14 @@ class _BaseNetwork(_IPAddressBase): raise ValueError('cannot set prefixlen_diff and new_prefix') prefixlen_diff = self._prefixlen - new_prefix - if self.prefixlen - prefixlen_diff < 0: + new_prefixlen = self.prefixlen - prefixlen_diff + if new_prefixlen < 0: raise ValueError( 'current prefixlen is %d, cannot have a prefixlen_diff of %d' % (self.prefixlen, prefixlen_diff)) - # TODO (pmoody): optimize this. - t = self.__class__('%s/%d' % (self.network_address, - self.prefixlen - prefixlen_diff), - strict=False) - return t.__class__('%s/%d' % (t.network_address, t.prefixlen)) + return self.__class__(( + int(self.network_address) & (int(self.netmask) << prefixlen_diff), + new_prefixlen)) @property def is_multicast(self): @@ -1073,6 +1098,26 @@ class _BaseNetwork(_IPAddressBase): return (self.network_address.is_multicast and self.broadcast_address.is_multicast) + @staticmethod + def _is_subnet_of(a, b): + try: + # Always false if one is v4 and the other is v6. + if a._version != b._version: + raise TypeError("%s and %s are not of the same version" (a, b)) + return (b.network_address <= a.network_address and + b.broadcast_address >= a.broadcast_address) + except AttributeError: + raise TypeError("Unable to test subnet containment " + "between %s and %s" % (a, b)) + + def subnet_of(self, other): + """Return True if this network is a subnet of other.""" + return self._is_subnet_of(self, other) + + def supernet_of(self, other): + """Return True if this network is a supernet of other.""" + return self._is_subnet_of(other, self) + @property def is_reserved(self): """Test if the address is otherwise IETF reserved. @@ -1153,21 +1198,49 @@ class _BaseV4(object): """ + __slots__ = () + _version = 4 # Equivalent to 255.255.255.255 or 32 bits of 1's. - _ALL_ONES = (2**IPV4LENGTH) - 1 + _ALL_ONES = (2 ** IPV4LENGTH) - 1 _DECIMAL_DIGITS = frozenset('0123456789') # the valid octets for host and netmasks. only useful for IPv4. - _valid_mask_octets = frozenset((255, 254, 252, 248, 240, 224, 192, 128, 0)) + _valid_mask_octets = frozenset([255, 254, 252, 248, 240, 224, 192, 128, 0]) - def __init__(self, address): - self._version = 4 - self._max_prefixlen = IPV4LENGTH + _max_prefixlen = IPV4LENGTH + # There are only a handful of valid v4 netmasks, so we cache them all + # when constructed (see _make_netmask()). + _netmask_cache = {} def _explode_shorthand_ip_string(self): - return str(self) + return _compat_str(self) - def _ip_int_from_string(self, ip_str): + @classmethod + def _make_netmask(cls, arg): + """Make a (netmask, prefix_len) tuple from the given argument. + + Argument can be: + - an integer (the prefix length) + - a string representing the prefix length (e.g. "24") + - a string representing the prefix netmask (e.g. "255.255.255.0") + """ + if arg not in cls._netmask_cache: + if isinstance(arg, _compat_int_types): + prefixlen = arg + else: + try: + # Check for a netmask in prefix length form + prefixlen = cls._prefix_from_prefix_string(arg) + except NetmaskValueError: + # Check for a netmask or hostmask in dotted-quad form. + # This may raise NetmaskValueError. + prefixlen = cls._prefix_from_ip_string(arg) + netmask = IPv4Address(cls._ip_int_from_prefix(prefixlen)) + cls._netmask_cache[arg] = netmask, prefixlen + return cls._netmask_cache[arg] + + @classmethod + def _ip_int_from_string(cls, ip_str): """Turn the given IP string into an integer for comparison. Args: @@ -1188,11 +1261,13 @@ class _BaseV4(object): raise AddressValueError("Expected 4 octets in %r" % ip_str) try: - return _int_from_bytes(map(self._parse_octet, octets), 'big') + return _compat_int_from_byte_vals( + map(cls._parse_octet, octets), 'big') except ValueError as exc: raise AddressValueError("%s in %r" % (exc, ip_str)) - def _parse_octet(self, octet_str): + @classmethod + def _parse_octet(cls, octet_str): """Convert a decimal octet into an integer. Args: @@ -1208,7 +1283,7 @@ class _BaseV4(object): if not octet_str: raise ValueError("Empty octet not permitted") # Whitelist the characters, since int() allows a lot of bizarre stuff. - if not self._DECIMAL_DIGITS.issuperset(octet_str): + if not cls._DECIMAL_DIGITS.issuperset(octet_str): msg = "Only decimal digits permitted in %r" raise ValueError(msg % octet_str) # We do the length check second, since the invalid character error @@ -1228,7 +1303,8 @@ class _BaseV4(object): raise ValueError("Octet %d (> 255) not permitted" % octet_int) return octet_int - def _string_from_ip_int(self, ip_int): + @classmethod + def _string_from_ip_int(cls, ip_int): """Turns a 32-bit integer into dotted decimal notation. Args: @@ -1238,38 +1314,10 @@ class _BaseV4(object): The IP address as a string in dotted decimal notation. """ - return '.'.join(map(str, _int_to_bytes(ip_int, 4, 'big'))) - - def _is_valid_netmask(self, netmask): - """Verify that the netmask is valid. - - Args: - netmask: A string, either a prefix or dotted decimal - netmask. - - Returns: - A boolean, True if the prefix represents a valid IPv4 - netmask. - - """ - mask = netmask.split('.') - if len(mask) == 4: - try: - for x in mask: - if int(x) not in self._valid_mask_octets: - return False - except ValueError: - # Found something that isn't an integer or isn't valid - return False - for idx, y in enumerate(mask): - if idx > 0 and y > mask[idx - 1]: - return False - return True - try: - netmask = int(netmask) - except ValueError: - return False - return 0 <= netmask <= self._max_prefixlen + return '.'.join(_compat_str(struct.unpack(b'!B', b)[0] + if isinstance(b, bytes) + else b) + for b in _compat_to_bytes(ip_int, 4, 'big')) def _is_hostmask(self, ip_str): """Test if the IP string is a hostmask (rather than a netmask). @@ -1292,6 +1340,15 @@ class _BaseV4(object): return True return False + def _reverse_pointer(self): + """Return the reverse DNS pointer name for the IPv4 address. + + This implements the method described in RFC1035 3.5. + + """ + reverse_octets = _compat_str(self).split('.')[::-1] + return '.'.join(reverse_octets) + '.in-addr.arpa' + @property def max_prefixlen(self): return self._max_prefixlen @@ -1305,6 +1362,8 @@ class IPv4Address(_BaseV4, _BaseAddress): """Represent and manipulate single IPv4 Addresses.""" + __slots__ = ('_ip', '__weakref__') + def __init__(self, address): """ @@ -1321,11 +1380,8 @@ class IPv4Address(_BaseV4, _BaseAddress): AddressValueError: If ipaddress isn't a valid IPv4 address. """ - _BaseAddress.__init__(self, address) - _BaseV4.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, int): + if isinstance(address, _compat_int_types): self._check_int_address(address) self._ip = address return @@ -1333,12 +1389,15 @@ class IPv4Address(_BaseV4, _BaseAddress): # Constructing from a packed address if isinstance(address, bytes): self._check_packed_address(address, 4) - self._ip = _int_from_bytes(address, 'big') + bvs = _compat_bytes_to_byte_vals(address) + self._ip = _compat_int_from_byte_vals(bvs, 'big') return # Assume input argument to be string or any object representation # which converts into a formatted IP string. - addr_str = str(address) + addr_str = _compat_str(address) + if '/' in addr_str: + raise AddressValueError("Unexpected '/' in %r" % address) self._ip = self._ip_int_from_string(addr_str) @property @@ -1355,8 +1414,7 @@ class IPv4Address(_BaseV4, _BaseAddress): reserved IPv4 Network range. """ - reserved_network = IPv4Network('240.0.0.0/4') - return self in reserved_network + return self in self._constants._reserved_network @property def is_private(self): @@ -1367,21 +1425,13 @@ class IPv4Address(_BaseV4, _BaseAddress): iana-ipv4-special-registry. """ - return (self in IPv4Network('0.0.0.0/8') or - self in IPv4Network('10.0.0.0/8') or - self in IPv4Network('127.0.0.0/8') or - self in IPv4Network('169.254.0.0/16') or - self in IPv4Network('172.16.0.0/12') or - self in IPv4Network('192.0.0.0/29') or - self in IPv4Network('192.0.0.170/31') or - self in IPv4Network('192.0.2.0/24') or - self in IPv4Network('192.168.0.0/16') or - self in IPv4Network('198.18.0.0/15') or - self in IPv4Network('198.51.100.0/24') or - self in IPv4Network('203.0.113.0/24') or - self in IPv4Network('240.0.0.0/4') or - self in IPv4Network('255.255.255.255/32')) + return any(self in net for net in self._constants._private_networks) + @property + def is_global(self): + return ( + self not in self._constants._public_network and + not self.is_private) @property def is_multicast(self): @@ -1392,8 +1442,7 @@ class IPv4Address(_BaseV4, _BaseAddress): See RFC 3171 for details. """ - multicast_network = IPv4Network('224.0.0.0/4') - return self in multicast_network + return self in self._constants._multicast_network @property def is_unspecified(self): @@ -1404,8 +1453,7 @@ class IPv4Address(_BaseV4, _BaseAddress): RFC 5735 3. """ - unspecified_address = IPv4Address('0.0.0.0') - return self == unspecified_address + return self == self._constants._unspecified_address @property def is_loopback(self): @@ -1415,8 +1463,7 @@ class IPv4Address(_BaseV4, _BaseAddress): A boolean, True if the address is a loopback per RFC 3330. """ - loopback_network = IPv4Network('127.0.0.0/8') - return self in loopback_network + return self in self._constants._loopback_network @property def is_link_local(self): @@ -1426,19 +1473,30 @@ class IPv4Address(_BaseV4, _BaseAddress): A boolean, True if the address is link-local per RFC 3927. """ - linklocal_network = IPv4Network('169.254.0.0/16') - return self in linklocal_network + return self in self._constants._linklocal_network class IPv4Interface(IPv4Address): def __init__(self, address): - if isinstance(address, (bytes, int)): + if isinstance(address, (bytes, _compat_int_types)): IPv4Address.__init__(self, address) self.network = IPv4Network(self._ip) self._prefixlen = self._max_prefixlen return + if isinstance(address, tuple): + IPv4Address.__init__(self, address[0]) + if len(address) > 1: + self._prefixlen = int(address[1]) + else: + self._prefixlen = self._max_prefixlen + + self.network = IPv4Network(address, strict=False) + self.netmask = self.network.netmask + self.hostmask = self.network.hostmask + return + addr = _split_optional_netmask(address) IPv4Address.__init__(self, addr[0]) @@ -1469,7 +1527,8 @@ class IPv4Interface(IPv4Address): if address_less is NotImplemented: return NotImplemented try: - return self.network < other.network + return (self.network < other.network or + self.network == other.network and address_less) except AttributeError: # We *do* allow addresses and interfaces to be sorted. The # unassociated address is considered less than all interfaces. @@ -1478,6 +1537,8 @@ class IPv4Interface(IPv4Address): def __hash__(self): return self._ip ^ self._prefixlen ^ int(self.network.network_address) + __reduce__ = _IPAddressBase.__reduce__ + @property def ip(self): return IPv4Address(self._ip) @@ -1550,24 +1611,31 @@ class IPv4Network(_BaseV4, _BaseNetwork): supplied. """ - - _BaseV4.__init__(self, address) _BaseNetwork.__init__(self, address) - # Constructing from a packed address - if isinstance(address, bytes): + # Constructing from a packed address or integer + if isinstance(address, (_compat_int_types, bytes)): self.network_address = IPv4Address(address) - self._prefixlen = self._max_prefixlen - self.netmask = IPv4Address(self._ALL_ONES) - #fixme: address/network test here + self.netmask, self._prefixlen = self._make_netmask( + self._max_prefixlen) + # fixme: address/network test here. return - # Efficient constructor from integer. - if isinstance(address, int): - self.network_address = IPv4Address(address) - self._prefixlen = self._max_prefixlen - self.netmask = IPv4Address(self._ALL_ONES) - #fixme: address/network test here. + if isinstance(address, tuple): + if len(address) > 1: + arg = address[1] + else: + # We weren't given an address[1] + arg = self._max_prefixlen + self.network_address = IPv4Address(address[0]) + self.netmask, self._prefixlen = self._make_netmask(arg) + packed = int(self.network_address) + if packed & int(self.netmask) != packed: + if strict: + raise ValueError('%s has host bits set' % self) + else: + self.network_address = IPv4Address(packed & + int(self.netmask)) return # Assume input argument to be string or any object representation @@ -1576,20 +1644,14 @@ class IPv4Network(_BaseV4, _BaseNetwork): self.network_address = IPv4Address(self._ip_int_from_string(addr[0])) if len(addr) == 2: - try: - # Check for a netmask in prefix length form - self._prefixlen = self._prefix_from_prefix_string(addr[1]) - except NetmaskValueError: - # Check for a netmask or hostmask in dotted-quad form. - # This may raise NetmaskValueError. - self._prefixlen = self._prefix_from_ip_string(addr[1]) + arg = addr[1] else: - self._prefixlen = self._max_prefixlen - self.netmask = IPv4Address(self._ip_int_from_prefix(self._prefixlen)) + arg = self._max_prefixlen + self.netmask, self._prefixlen = self._make_netmask(arg) if strict: if (IPv4Address(int(self.network_address) & int(self.netmask)) != - self.network_address): + self.network_address): raise ValueError('%s has host bits set' % self) self.network_address = IPv4Address(int(self.network_address) & int(self.netmask)) @@ -1607,10 +1669,44 @@ class IPv4Network(_BaseV4, _BaseNetwork): """ return (not (self.network_address in IPv4Network('100.64.0.0/10') and - self.broadcast_address in IPv4Network('100.64.0.0/10')) and + self.broadcast_address in IPv4Network('100.64.0.0/10')) and not self.is_private) +class _IPv4Constants(object): + + _linklocal_network = IPv4Network('169.254.0.0/16') + + _loopback_network = IPv4Network('127.0.0.0/8') + + _multicast_network = IPv4Network('224.0.0.0/4') + + _public_network = IPv4Network('100.64.0.0/10') + + _private_networks = [ + IPv4Network('0.0.0.0/8'), + IPv4Network('10.0.0.0/8'), + IPv4Network('127.0.0.0/8'), + IPv4Network('169.254.0.0/16'), + IPv4Network('172.16.0.0/12'), + IPv4Network('192.0.0.0/29'), + IPv4Network('192.0.0.170/31'), + IPv4Network('192.0.2.0/24'), + IPv4Network('192.168.0.0/16'), + IPv4Network('198.18.0.0/15'), + IPv4Network('198.51.100.0/24'), + IPv4Network('203.0.113.0/24'), + IPv4Network('240.0.0.0/4'), + IPv4Network('255.255.255.255/32'), + ] + + _reserved_network = IPv4Network('240.0.0.0/4') + + _unspecified_address = IPv4Address('0.0.0.0') + + +IPv4Address._constants = _IPv4Constants + class _BaseV6(object): @@ -1621,15 +1717,37 @@ class _BaseV6(object): """ - _ALL_ONES = (2**IPV6LENGTH) - 1 + __slots__ = () + _version = 6 + _ALL_ONES = (2 ** IPV6LENGTH) - 1 _HEXTET_COUNT = 8 _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef') + _max_prefixlen = IPV6LENGTH - def __init__(self, address): - self._version = 6 - self._max_prefixlen = IPV6LENGTH + # There are only a bunch of valid v6 netmasks, so we cache them all + # when constructed (see _make_netmask()). + _netmask_cache = {} + + @classmethod + def _make_netmask(cls, arg): + """Make a (netmask, prefix_len) tuple from the given argument. - def _ip_int_from_string(self, ip_str): + Argument can be: + - an integer (the prefix length) + - a string representing the prefix length (e.g. "24") + - a string representing the prefix netmask (e.g. "255.255.255.0") + """ + if arg not in cls._netmask_cache: + if isinstance(arg, _compat_int_types): + prefixlen = arg + else: + prefixlen = cls._prefix_from_prefix_string(arg) + netmask = IPv6Address(cls._ip_int_from_prefix(prefixlen)) + cls._netmask_cache[arg] = netmask, prefixlen + return cls._netmask_cache[arg] + + @classmethod + def _ip_int_from_string(cls, ip_str): """Turn an IPv6 ip_str into an integer. Args: @@ -1665,15 +1783,16 @@ class _BaseV6(object): # An IPv6 address can't have more than 8 colons (9 parts). # The extra colon comes from using the "::" notation for a single # leading or trailing zero part. - _max_parts = self._HEXTET_COUNT + 1 + _max_parts = cls._HEXTET_COUNT + 1 if len(parts) > _max_parts: - msg = "At most %d colons permitted in %r" % (_max_parts-1, ip_str) + msg = "At most %d colons permitted in %r" % ( + _max_parts - 1, ip_str) raise AddressValueError(msg) # Disregarding the endpoints, find '::' with nothing in between. # This indicates that a run of zeroes has been skipped. skip_index = None - for i in range(1, len(parts) - 1): + for i in _compat_range(1, len(parts) - 1): if not parts[i]: if skip_index is not None: # Can't have more than one '::' @@ -1697,17 +1816,17 @@ class _BaseV6(object): if parts_lo: msg = "Trailing ':' only permitted as part of '::' in %r" raise AddressValueError(msg % ip_str) # :$ requires ::$ - parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo) + parts_skipped = cls._HEXTET_COUNT - (parts_hi + parts_lo) if parts_skipped < 1: msg = "Expected at most %d other parts with '::' in %r" - raise AddressValueError(msg % (self._HEXTET_COUNT-1, ip_str)) + raise AddressValueError(msg % (cls._HEXTET_COUNT - 1, ip_str)) else: # Otherwise, allocate the entire address to parts_hi. The # endpoints could still be empty, but _parse_hextet() will check # for that. - if len(parts) != self._HEXTET_COUNT: + if len(parts) != cls._HEXTET_COUNT: msg = "Exactly %d parts expected without '::' in %r" - raise AddressValueError(msg % (self._HEXTET_COUNT, ip_str)) + raise AddressValueError(msg % (cls._HEXTET_COUNT, ip_str)) if not parts[0]: msg = "Leading ':' only permitted as part of '::' in %r" raise AddressValueError(msg % ip_str) # ^: requires ^:: @@ -1723,16 +1842,17 @@ class _BaseV6(object): ip_int = 0 for i in range(parts_hi): ip_int <<= 16 - ip_int |= self._parse_hextet(parts[i]) + ip_int |= cls._parse_hextet(parts[i]) ip_int <<= 16 * parts_skipped for i in range(-parts_lo, 0): ip_int <<= 16 - ip_int |= self._parse_hextet(parts[i]) + ip_int |= cls._parse_hextet(parts[i]) return ip_int except ValueError as exc: raise AddressValueError("%s in %r" % (exc, ip_str)) - def _parse_hextet(self, hextet_str): + @classmethod + def _parse_hextet(cls, hextet_str): """Convert an IPv6 hextet string into an integer. Args: @@ -1747,7 +1867,7 @@ class _BaseV6(object): """ # Whitelist the characters, since int() allows a lot of bizarre stuff. - if not self._HEX_DIGITS.issuperset(hextet_str): + if not cls._HEX_DIGITS.issuperset(hextet_str): raise ValueError("Only hex digits permitted in %r" % hextet_str) # We do the length check second, since the invalid character error # is likely to be more informative for the user @@ -1757,7 +1877,8 @@ class _BaseV6(object): # Length check means we can skip checking the integer value return int(hextet_str, 16) - def _compress_hextets(self, hextets): + @classmethod + def _compress_hextets(cls, hextets): """Compresses a list of hextets. Compresses a list of strings, replacing the longest continuous @@ -1804,7 +1925,8 @@ class _BaseV6(object): return hextets - def _string_from_ip_int(self, ip_int=None): + @classmethod + def _string_from_ip_int(cls, ip_int=None): """Turns a 128-bit integer into hexadecimal notation. Args: @@ -1818,15 +1940,15 @@ class _BaseV6(object): """ if ip_int is None: - ip_int = int(self._ip) + ip_int = int(cls._ip) - if ip_int > self._ALL_ONES: + if ip_int > cls._ALL_ONES: raise ValueError('IPv6 address is too large') hex_str = '%032x' % ip_int - hextets = ['%x' % int(hex_str[x:x+4], 16) for x in range(0, 32, 4)] + hextets = ['%x' % int(hex_str[x:x + 4], 16) for x in range(0, 32, 4)] - hextets = self._compress_hextets(hextets) + hextets = cls._compress_hextets(hextets) return ':'.join(hextets) def _explode_shorthand_ip_string(self): @@ -1840,19 +1962,28 @@ class _BaseV6(object): """ if isinstance(self, IPv6Network): - ip_str = str(self.network_address) + ip_str = _compat_str(self.network_address) elif isinstance(self, IPv6Interface): - ip_str = str(self.ip) + ip_str = _compat_str(self.ip) else: - ip_str = str(self) + ip_str = _compat_str(self) ip_int = self._ip_int_from_string(ip_str) hex_str = '%032x' % ip_int - parts = [hex_str[x:x+4] for x in range(0, 32, 4)] + parts = [hex_str[x:x + 4] for x in range(0, 32, 4)] if isinstance(self, (_BaseNetwork, IPv6Interface)): return '%s/%d' % (':'.join(parts), self._prefixlen) return ':'.join(parts) + def _reverse_pointer(self): + """Return the reverse DNS pointer name for the IPv6 address. + + This implements the method described in RFC3596 2.5. + + """ + reverse_chars = self.exploded[::-1].replace(':', '') + return '.'.join(reverse_chars) + '.ip6.arpa' + @property def max_prefixlen(self): return self._max_prefixlen @@ -1866,6 +1997,8 @@ class IPv6Address(_BaseV6, _BaseAddress): """Represent and manipulate single IPv6 Addresses.""" + __slots__ = ('_ip', '__weakref__') + def __init__(self, address): """Instantiate a new IPv6 address object. @@ -1883,11 +2016,8 @@ class IPv6Address(_BaseV6, _BaseAddress): AddressValueError: If address isn't a valid IPv6 address. """ - _BaseAddress.__init__(self, address) - _BaseV6.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, int): + if isinstance(address, _compat_int_types): self._check_int_address(address) self._ip = address return @@ -1895,12 +2025,15 @@ class IPv6Address(_BaseV6, _BaseAddress): # Constructing from a packed address if isinstance(address, bytes): self._check_packed_address(address, 16) - self._ip = _int_from_bytes(address, 'big') + bvs = _compat_bytes_to_byte_vals(address) + self._ip = _compat_int_from_byte_vals(bvs, 'big') return # Assume input argument to be string or any object representation # which converts into a formatted IP string. - addr_str = str(address) + addr_str = _compat_str(address) + if '/' in addr_str: + raise AddressValueError("Unexpected '/' in %r" % address) self._ip = self._ip_int_from_string(addr_str) @property @@ -1917,8 +2050,7 @@ class IPv6Address(_BaseV6, _BaseAddress): See RFC 2373 2.7 for details. """ - multicast_network = IPv6Network('ff00::/8') - return self in multicast_network + return self in self._constants._multicast_network @property def is_reserved(self): @@ -1929,16 +2061,7 @@ class IPv6Address(_BaseV6, _BaseAddress): reserved IPv6 Network ranges. """ - reserved_networks = [IPv6Network('::/8'), IPv6Network('100::/8'), - IPv6Network('200::/7'), IPv6Network('400::/6'), - IPv6Network('800::/5'), IPv6Network('1000::/4'), - IPv6Network('4000::/3'), IPv6Network('6000::/3'), - IPv6Network('8000::/3'), IPv6Network('A000::/3'), - IPv6Network('C000::/3'), IPv6Network('E000::/4'), - IPv6Network('F000::/5'), IPv6Network('F800::/6'), - IPv6Network('FE00::/9')] - - return any(self in x for x in reserved_networks) + return any(self in x for x in self._constants._reserved_networks) @property def is_link_local(self): @@ -1948,8 +2071,7 @@ class IPv6Address(_BaseV6, _BaseAddress): A boolean, True if the address is reserved per RFC 4291. """ - linklocal_network = IPv6Network('fe80::/10') - return self in linklocal_network + return self in self._constants._linklocal_network @property def is_site_local(self): @@ -1963,8 +2085,7 @@ class IPv6Address(_BaseV6, _BaseAddress): A boolean, True if the address is reserved per RFC 3513 2.5.6. """ - sitelocal_network = IPv6Network('fec0::/10') - return self in sitelocal_network + return self in self._constants._sitelocal_network @property def is_private(self): @@ -1975,16 +2096,7 @@ class IPv6Address(_BaseV6, _BaseAddress): iana-ipv6-special-registry. """ - return (self in IPv6Network('::1/128') or - self in IPv6Network('::/128') or - self in IPv6Network('::ffff:0:0/96') or - self in IPv6Network('100::/64') or - self in IPv6Network('2001::/23') or - self in IPv6Network('2001:2::/48') or - self in IPv6Network('2001:db8::/32') or - self in IPv6Network('2001:10::/28') or - self in IPv6Network('fc00::/7') or - self in IPv6Network('fe80::/10')) + return any(self in net for net in self._constants._private_networks) @property def is_global(self): @@ -2064,11 +2176,21 @@ class IPv6Address(_BaseV6, _BaseAddress): class IPv6Interface(IPv6Address): def __init__(self, address): - if isinstance(address, (bytes, int)): + if isinstance(address, (bytes, _compat_int_types)): IPv6Address.__init__(self, address) self.network = IPv6Network(self._ip) self._prefixlen = self._max_prefixlen return + if isinstance(address, tuple): + IPv6Address.__init__(self, address[0]) + if len(address) > 1: + self._prefixlen = int(address[1]) + else: + self._prefixlen = self._max_prefixlen + self.network = IPv6Network(address, strict=False) + self.netmask = self.network.netmask + self.hostmask = self.network.hostmask + return addr = _split_optional_netmask(address) IPv6Address.__init__(self, addr[0]) @@ -2098,7 +2220,8 @@ class IPv6Interface(IPv6Address): if address_less is NotImplemented: return NotImplemented try: - return self.network < other.network + return (self.network < other.network or + self.network == other.network and address_less) except AttributeError: # We *do* allow addresses and interfaces to be sorted. The # unassociated address is considered less than all interfaces. @@ -2107,6 +2230,8 @@ class IPv6Interface(IPv6Address): def __hash__(self): return self._ip ^ self._prefixlen ^ int(self.network.network_address) + __reduce__ = _IPAddressBase.__reduce__ + @property def ip(self): return IPv6Address(self._ip) @@ -2183,21 +2308,29 @@ class IPv6Network(_BaseV6, _BaseNetwork): supplied. """ - _BaseV6.__init__(self, address) _BaseNetwork.__init__(self, address) - # Efficient constructor from integer. - if isinstance(address, int): + # Efficient constructor from integer or packed address + if isinstance(address, (bytes, _compat_int_types)): self.network_address = IPv6Address(address) - self._prefixlen = self._max_prefixlen - self.netmask = IPv6Address(self._ALL_ONES) + self.netmask, self._prefixlen = self._make_netmask( + self._max_prefixlen) return - # Constructing from a packed address - if isinstance(address, bytes): - self.network_address = IPv6Address(address) - self._prefixlen = self._max_prefixlen - self.netmask = IPv6Address(self._ALL_ONES) + if isinstance(address, tuple): + if len(address) > 1: + arg = address[1] + else: + arg = self._max_prefixlen + self.netmask, self._prefixlen = self._make_netmask(arg) + self.network_address = IPv6Address(address[0]) + packed = int(self.network_address) + if packed & int(self.netmask) != packed: + if strict: + raise ValueError('%s has host bits set' % self) + else: + self.network_address = IPv6Address(packed & + int(self.netmask)) return # Assume input argument to be string or any object representation @@ -2207,15 +2340,14 @@ class IPv6Network(_BaseV6, _BaseNetwork): self.network_address = IPv6Address(self._ip_int_from_string(addr[0])) if len(addr) == 2: - # This may raise NetmaskValueError - self._prefixlen = self._prefix_from_prefix_string(addr[1]) + arg = addr[1] else: - self._prefixlen = self._max_prefixlen + arg = self._max_prefixlen + self.netmask, self._prefixlen = self._make_netmask(arg) - self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen)) if strict: if (IPv6Address(int(self.network_address) & int(self.netmask)) != - self.network_address): + self.network_address): raise ValueError('%s has host bits set' % self) self.network_address = IPv6Address(int(self.network_address) & int(self.netmask)) @@ -2223,6 +2355,18 @@ class IPv6Network(_BaseV6, _BaseNetwork): if self._prefixlen == (self._max_prefixlen - 1): self.hosts = self.__iter__ + def hosts(self): + """Generate Iterator over usable hosts in a network. + + This is like __iter__ except it doesn't return the + Subnet-Router anycast address. + + """ + network = int(self.network_address) + broadcast = int(self.broadcast_address) + for x in _compat_range(network + 1, broadcast + 1): + yield self._address_class(x) + @property def is_site_local(self): """Test if the address is reserved for site-local. @@ -2237,3 +2381,39 @@ class IPv6Network(_BaseV6, _BaseNetwork): """ return (self.network_address.is_site_local and self.broadcast_address.is_site_local) + + +class _IPv6Constants(object): + + _linklocal_network = IPv6Network('fe80::/10') + + _multicast_network = IPv6Network('ff00::/8') + + _private_networks = [ + IPv6Network('::1/128'), + IPv6Network('::/128'), + IPv6Network('::ffff:0:0/96'), + IPv6Network('100::/64'), + IPv6Network('2001::/23'), + IPv6Network('2001:2::/48'), + IPv6Network('2001:db8::/32'), + IPv6Network('2001:10::/28'), + IPv6Network('fc00::/7'), + IPv6Network('fe80::/10'), + ] + + _reserved_networks = [ + IPv6Network('::/8'), IPv6Network('100::/8'), + IPv6Network('200::/7'), IPv6Network('400::/6'), + IPv6Network('800::/5'), IPv6Network('1000::/4'), + IPv6Network('4000::/3'), IPv6Network('6000::/3'), + IPv6Network('8000::/3'), IPv6Network('A000::/3'), + IPv6Network('C000::/3'), IPv6Network('E000::/4'), + IPv6Network('F000::/5'), IPv6Network('F800::/6'), + IPv6Network('FE00::/9'), + ] + + _sitelocal_network = IPv6Network('fec0::/10') + + +IPv6Address._constants = _IPv6Constants diff --git a/libs/libfilebot/__init__.py b/libs/libfilebot/__init__.py index c734afaef..4a81ddff2 100644 --- a/libs/libfilebot/__init__.py +++ b/libs/libfilebot/__init__.py @@ -1,5 +1,6 @@ # coding=utf-8 -from main import get_filebot_attrs +from __future__ import absolute_import +from .main import get_filebot_attrs __all__ = ["get_filebot_attrs"] diff --git a/libs/libfilebot/lib.py b/libs/libfilebot/lib.py index ac7e469b0..7820bdc26 100644 --- a/libs/libfilebot/lib.py +++ b/libs/libfilebot/lib.py @@ -1,5 +1,6 @@ # coding=utf-8 +from __future__ import absolute_import import os import sys diff --git a/libs/libfilebot/main.py b/libs/libfilebot/main.py index 9a4e685eb..3e1333e0b 100644 --- a/libs/libfilebot/main.py +++ b/libs/libfilebot/main.py @@ -1,5 +1,7 @@ # coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function import subprocess import sys import traceback @@ -10,7 +12,7 @@ import types import os from pipes import quote -from lib import find_executable +from .lib import find_executable mswindows = False if sys.platform == "win32": @@ -87,7 +89,7 @@ def get_filebot_attrs(fn): args_func, match_func = XATTR_MAP.get(sys.platform, XATTR_MAP["default"]) args = args_func(fn) - if isinstance(args, types.ListType): + if isinstance(args, list): try: env = dict(os.environ) if not mswindows: @@ -132,4 +134,4 @@ def get_filebot_attrs(fn): if __name__ == "__main__": - print get_filebot_attrs(sys.argv[1]) + print(get_filebot_attrs(sys.argv[1])) diff --git a/libs/pyprobe/__init__.py b/libs/pyprobe/__init__.py index b8a1da01a..b3e62d15f 100644 --- a/libs/pyprobe/__init__.py +++ b/libs/pyprobe/__init__.py @@ -1,2 +1,4 @@ -from pyprobe import VideoFileParser +from pyprobe.pyprobe import VideoFileParser +from pyprobe.helpers import timeToTuple, sizeStr + \ No newline at end of file diff --git a/libs/pyprobe/ffprobeparsers.py b/libs/pyprobe/ffprobeparsers.py index 36a395c53..e84954973 100644 --- a/libs/pyprobe/ffprobeparsers.py +++ b/libs/pyprobe/ffprobeparsers.py @@ -1,6 +1,6 @@ from os import path -from baseparser import BaseParser +from pyprobe.baseparser import BaseParser class StreamParser(BaseParser): @@ -44,7 +44,7 @@ class VideoStreamParser(BaseParser): """Returns a tuple (width, height)""" width = data.get("width", None) height = data.get("height", None) - if width is None and height is None: + if width == None and height == None: return None, (0, 0) try: return (width, height), (int(float(width)), int(float(height))) @@ -67,7 +67,7 @@ class VideoStreamParser(BaseParser): input_str = data.get("avg_frame_rate", None) try: num, den = input_str.split("/") - return input_str, round(float(num) / float(den), 3) + return input_str, float(num) / float(den) except (ValueError, ZeroDivisionError, AttributeError): info = cls.average_framerate(data) return input_str, info @@ -125,15 +125,6 @@ class SubtitleStreamParser(BaseParser): return info, (info or "null") return None, "null" - @staticmethod - def value_forced(data): - """Returns a bool """ - disposition = data.get("disposition", None) - if disposition: - info = disposition.get("forced", None) - return bool(info), (bool(info) or False) - return None, "null" - class ChapterParser(BaseParser): @staticmethod @@ -191,7 +182,7 @@ class RootParser(BaseParser): def value_size(data): """Returns an int""" info = data.get("size", None) - if info is None: + if info == None: file_path = data.get("filename", "") if path.isfile(file_path): info = str(path.getsize(file_path)) @@ -204,7 +195,7 @@ class RootParser(BaseParser): def value_bit_rate(cls, data): """Returns an int""" info = data.get("bit_rate", None) - if info is None: + if info == None: _, size = cls.value_size(data) _, duration = cls.value_duration(data) if size and duration: diff --git a/libs/pyprobe/pyprobe.py b/libs/pyprobe/pyprobe.py index bb63fb0e6..529101b72 100644 --- a/libs/pyprobe/pyprobe.py +++ b/libs/pyprobe/pyprobe.py @@ -1,25 +1,51 @@ import json import subprocess +import xml.etree +import xml.etree.ElementTree +from io import StringIO from os import path -from sys import getfilesystemencoding +import re -import ffprobeparsers +from pyprobe import ffprobeparsers, mediainfoparsers class VideoFileParser: def __init__( self, ffprobe="ffprobe", + mediainfo="mediainfo", includeMissing=True, rawMode=False, ): self._ffprobe = ffprobe + self._mediainfo = mediainfo self._includeMissing = includeMissing self._rawMode = rawMode ######################################## # Main Method + def parseMediainfo(self, inputFile): + """Takes an input file and returns the parsed data using mediainfo. + + Args: + inputFile (str): Video file path + + Returns: + dict>: Parsed video info + + Raises: + FileNotFoundError: The input video file or input executable was not found + IOError: Execution failed + + """ + if not path.isfile(inputFile): + raise FileNotFoundError(inputFile + " not found") + self._checkExecutable(self._mediainfo) + self._checkMediainfoVersion(self._mediainfo) + xmlData = self._executeMediainfo(inputFile) + return self._parseMediainfo(xmlData, inputFile) + def parseFfprobe(self, inputFile): """Takes an input file and returns the parsed data using ffprobe. @@ -40,6 +66,122 @@ class VideoFileParser: fdict = self._executeFfprobe(inputFile) return self._parseFfprobe(fdict, inputFile) + ######################################## + # Mediainfo Parsing + + def _executeMediainfo(self, inputFile): + """Executes mediainfo program on input file to get raw info + + Args: + inputFile (str): Video file path + + Returns: + xml.ElementTree.etree: Mediainfo output + + Raises: + IOError: Mediainfo output could not be parsed as XML data + + """ + commandArgs = ["-f", "--Language=raw", "--Output=XML"] + outputXml = self._executeParser(self._mediainfo, commandArgs, inputFile) + try: + xmlRoot = self._decodeMediainfoOutput(outputXml) + except xml.etree.ElementTree.ParseError: + raise IOError("Could not decode mediainfo output for file " + inputFile) + return xmlRoot + + def _parseMediainfo(self, xmlRoot, inputFile): + """Parse mediainfo output into an organized data structure + + Args: + xmlRoot (xml.ElementTree.etree): Mediainfo output + inputFile (str): Video file path + + Returns: + dict>: Parsed video data + + """ + videoInfo = {} + videoInfo["path"] = path.abspath(inputFile) + videoInfo.update( + mediainfoparsers.RootParser.parse( + xmlRoot.find(".//track[@type='General']"), + self._rawMode, + self._includeMissing, + ) + ) + videoInfo.update(self._parseMediainfoStreams(xmlRoot)) + videoInfo.update(self._parseMediainfoChapters(xmlRoot, videoInfo["duration"])) + return videoInfo + + @staticmethod + def _decodeMediainfoOutput(xmlData): + # Strip namespaces from xml string + # Code used from https://stackoverflow.com/a/25920989 + it = xml.etree.ElementTree.iterparse(StringIO(xmlData)) + for _, el in it: + if "}" in el.tag: + el.tag = el.tag.split("}", 1)[1] + return it.root + + def _parseMediainfoStreams(self, xmlData): + """Parses video, audio, and subtitle streams + + Args: + xmlData (dict): Stream data from mediainfo + + Returns: + dict>: Parsed streams - video, audio, and subtitle + + """ + parsedInfo = {"videos": [], "audios": [], "subtitles": []} + for stream in xmlData.findall(".//track"): + streamType = stream.attrib["type"] + if streamType == "Video": + parsedInfo["videos"].append( + mediainfoparsers.VideoStreamParser.parse( + stream, self._rawMode, self._includeMissing + ) + ) + elif streamType == "Audio": + parsedInfo["audios"].append( + mediainfoparsers.AudioStreamParser.parse( + stream, self._rawMode, self._includeMissing + ) + ) + elif streamType == "Text": + parsedInfo["subtitles"].append( + mediainfoparsers.SubtitleStreamParser.parse( + stream, self._rawMode, self._includeMissing + ) + ) + return parsedInfo + + def _parseMediainfoChapters(self, xmlData, duration): + """Since mediainfo does not give end times for each chapter, + start times for the following chapter are added to the previous chapter. + + Args: + xmlData (dict): Stream data from mediainfo + duration (int): Video duration + + Returns: + dict>: Parsed chapters + + """ + parsedInfo = {"chapters": []} + for extra in xmlData.find(".//track[@type='Menu']/extra"): + match = re.fullmatch(r"_\d*_\d\d_\d\d_\d\d\d", extra.tag) + if match: + parsedInfo["chapters"].append( + mediainfoparsers.ChapterParser.parse( + extra, self._rawMode, self._includeMissing + ) + ) + if not self._rawMode: + mediainfoparsers.ChapterParser.addEndTimes(parsedInfo["chapters"], duration) + return parsedInfo + ######################################## # ffprobe Parsing @@ -56,8 +198,6 @@ class VideoFileParser: """ commandArgs = [ - "-v", - "quiet", "-hide_banner", "-show_error", "-show_format", @@ -148,7 +288,7 @@ class VideoFileParser: """ parsedInfo = {"chapters": []} - if fOutput["chapters"] is None: + if fOutput["chapters"] == None: return parsedInfo for chapter in fOutput["chapters"]: parsedInfo["chapters"].append( @@ -174,16 +314,15 @@ class VideoFileParser: IOError: ffprobe execution failed """ - command = [parser] + commandArgs + [inputFile.encode(getfilesystemencoding())] - try: - completedProcess = subprocess.check_output( - command, stderr=subprocess.STDOUT - ) - except subprocess.CalledProcessError as e: + command = [parser] + commandArgs + [inputFile] + completedProcess = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" + ) + if completedProcess.returncode != 0: raise IOError( - "Error occurred during execution - " + e.output + "Error occurred during execution - " + completedProcess.stderr ) - return completedProcess + return completedProcess.stdout @staticmethod def _checkExecutable(executable): @@ -197,17 +336,31 @@ class VideoFileParser: """ try: - subprocess.check_output( + subprocess.run( [executable, "--help"], - stderr=subprocess.STDOUT + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, ) - except OSError: + except FileNotFoundError: raise FileNotFoundError(executable + " not found") - -class FileNotFoundError(Exception): - pass - - -class IOError(Exception): - pass + @staticmethod + def _checkMediainfoVersion(executable): + """Checks if the Mediainfo version is >=17.10 + In the version jump from 0.7.97 to 17.10 came lots of changes + to the way Mediainfo outputs data. Therefore, this will + only support versions >=17.10. + + Some linux software repositories still distribute old + versions of mediainfo, so the user must install + using packages from mediainfo's website. + + """ + command = [executable, "--version"] + completedProcess = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" + ) + match = re.search(r"v\d*(\.\d*)*", completedProcess.stdout) + version = match.group()[1:] + if version.split(".")[0] == "0": + raise IOError("Mediainfo version is <17.10 - (v" + version + ")") diff --git a/libs/requests/__init__.py b/libs/requests/__init__.py index bc168ee53..9a899df67 100644 --- a/libs/requests/__init__.py +++ b/libs/requests/__init__.py @@ -57,10 +57,10 @@ def check_compatibility(urllib3_version, chardet_version): # Check urllib3 for compatibility. major, minor, patch = urllib3_version # noqa: F811 major, minor, patch = int(major), int(minor), int(patch) - # urllib3 >= 1.21.1, <= 1.24 + # urllib3 >= 1.21.1, <= 1.25 assert major == 1 assert minor >= 21 - assert minor <= 24 + assert minor <= 25 # Check chardet for compatibility. major, minor, patch = chardet_version.split('.')[:3] diff --git a/libs/requests/__version__.py b/libs/requests/__version__.py index f5b5d0367..9844f740a 100644 --- a/libs/requests/__version__.py +++ b/libs/requests/__version__.py @@ -5,10 +5,10 @@ __title__ = 'requests' __description__ = 'Python HTTP for Humans.' __url__ = 'http://python-requests.org' -__version__ = '2.21.0' -__build__ = 0x022100 +__version__ = '2.22.0' +__build__ = 0x022200 __author__ = 'Kenneth Reitz' __author_email__ = 'me@kennethreitz.org' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2018 Kenneth Reitz' +__copyright__ = 'Copyright 2019 Kenneth Reitz' __cake__ = u'\u2728 \U0001f370 \u2728' diff --git a/libs/requests/api.py b/libs/requests/api.py index abada96d4..ef71d0759 100644 --- a/libs/requests/api.py +++ b/libs/requests/api.py @@ -19,7 +19,7 @@ def request(method, url, **kwargs): :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary, list of tuples or bytes to send - in the body of the :class:`Request`. + in the query string for the :class:`Request`. :param data: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`. @@ -65,7 +65,7 @@ def get(url, params=None, **kwargs): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary, list of tuples or bytes to send - in the body of the :class:`Request`. + in the query string for the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response diff --git a/libs/subliminal_patch/http.py b/libs/subliminal_patch/http.py index 7ed8ef4ef..a8ce453dc 100644 --- a/libs/subliminal_patch/http.py +++ b/libs/subliminal_patch/http.py @@ -148,7 +148,7 @@ class CFSession(CloudScraper): cache_key = "cf_data3_%s" % domain if not self.cookies.get("cf_clearance", "", domain=domain): - cf_data = region.get(cache_key) + cf_data = str(region.get(cache_key)) if cf_data is not NO_VALUE: cf_cookies, hdrs = cf_data logger.debug("Trying to use old cf data for %s: %s", domain, cf_data) @@ -165,9 +165,9 @@ class CFSession(CloudScraper): pass else: if cf_data and "cf_clearance" in cf_data[0] and cf_data[0]["cf_clearance"]: - if cf_data != region.get(cache_key): + if cf_data != str(region.get(cache_key)): logger.debug("Storing cf data for %s: %s", domain, cf_data) - region.set(cache_key, cf_data) + region.set(cache_key, bytes(cf_data) elif cf_data[0]["cf_clearance"]: logger.debug("CF Live tokens not updated") diff --git a/libs/subliminal_patch/pitcher.py b/libs/subliminal_patch/pitcher.py index be1302943..ce0420cb2 100644 --- a/libs/subliminal_patch/pitcher.py +++ b/libs/subliminal_patch/pitcher.py @@ -243,7 +243,7 @@ class DBCPitcher(DBCProxyLessPitcher): def load_verification(site_name, session, callback=lambda x: None): - ccks = region.get("%s_data" % site_name, expiration_time=15552000) # 6m + ccks = str(region.get("%s_data" % site_name, expiration_time=15552000)) # 6m if ccks != NO_VALUE: cookies, user_agent = ccks logger.debug("%s: Re-using previous user agent: %s", site_name.capitalize(), user_agent) @@ -257,4 +257,4 @@ def load_verification(site_name, session, callback=lambda x: None): def store_verification(site_name, session): - region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"])) + region.set("%s_data" % site_name, bytes(session.cookies._cookies, session.headers["User-Agent"])) diff --git a/libs/subliminal_patch/providers/__init__.py b/libs/subliminal_patch/providers/__init__.py index b475e082d..ced4694f3 100644 --- a/libs/subliminal_patch/providers/__init__.py +++ b/libs/subliminal_patch/providers/__init__.py @@ -22,7 +22,7 @@ class Provider(_Provider): # register providers # fixme: this is bad -for name in os.listdir(os.path.dirname(six.text_type(__file__, get_viable_encoding()))): +for name in os.listdir(os.path.dirname(__file__)): if name in ("__init__.py", "mixins.py", "utils.py") or not name.endswith(".py"): continue diff --git a/libs/subliminal_patch/providers/assrt.py b/libs/subliminal_patch/providers/assrt.py index 6de7c6ca0..4831da792 100644 --- a/libs/subliminal_patch/providers/assrt.py +++ b/libs/subliminal_patch/providers/assrt.py @@ -140,7 +140,7 @@ class AssrtProvider(Provider): logger.debug('No subtitle found') # parse the subtitles - pattern = re.compile(ur'lang(?P\w+)') + pattern = re.compile(r'lang(?P\w+)') subtitles = [] for sub in result['sub']['subs']: if 'lang' not in sub: diff --git a/libs/subliminal_patch/providers/legendastv.py b/libs/subliminal_patch/providers/legendastv.py index 3fe71ab1d..011209e42 100644 --- a/libs/subliminal_patch/providers/legendastv.py +++ b/libs/subliminal_patch/providers/legendastv.py @@ -199,7 +199,7 @@ class LegendasTVProvider(_LegendasTVProvider): # attempt to get the releases from the cache cache_key = releases_key.format(archive_id=a.id, archive_name=a.name) - releases = region.get(cache_key, expiration_time=expiration_time) + releases = str(region.get(cache_key, expiration_time=expiration_time)) # the releases are not in cache or cache is expired if releases == NO_VALUE: @@ -226,7 +226,7 @@ class LegendasTVProvider(_LegendasTVProvider): releases.append(name) # cache the releases - region.set(cache_key, releases) + region.set(cache_key, bytes(releases) # iterate over releases for r in releases: diff --git a/libs/subliminal_patch/providers/opensubtitles.py b/libs/subliminal_patch/providers/opensubtitles.py index 5f4c7bccf..fb9d5b754 100644 --- a/libs/subliminal_patch/providers/opensubtitles.py +++ b/libs/subliminal_patch/providers/opensubtitles.py @@ -154,7 +154,7 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): self.token = response['token'] logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10)) - region.set("os_token", self.token) + region.set("os_token", bytes(self.token)) def use_token_or_login(self, func): if not self.token: @@ -175,7 +175,7 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): logger.info('Logging in') - token = region.get("os_token") + token = str(region.get("os_token")) if token is not NO_VALUE: try: logger.debug('Trying previous token: %r', token[:10]+"X"*(len(token)-10)) diff --git a/libs/subliminal_patch/providers/subscene.py b/libs/subliminal_patch/providers/subscene.py index 3ee2609d2..3129c8b0e 100644 --- a/libs/subliminal_patch/providers/subscene.py +++ b/libs/subliminal_patch/providers/subscene.py @@ -141,7 +141,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): logger.info("Creating session") self.session = RetryingCFSession() - prev_cookies = region.get("subscene_cookies2") + prev_cookies = str(region.get("subscene_cookies2")) if prev_cookies != NO_VALUE: logger.debug("Re-using old subscene cookies: %r", prev_cookies) self.session.cookies.update(prev_cookies) @@ -194,7 +194,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): del cj[cn] logger.debug("Storing cookies: %r", cj) - region.set("subscene_cookies2", cj) + region.set("subscene_cookies2", bytes(cj) return raise ProviderError("Something went wrong when trying to log in #1") @@ -219,9 +219,9 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): acc_filters["SelectedIds"] = selected_ids self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"]) - last_filters = region.get("subscene_filters") + last_filters = str(region.get("subscene_filters")) if last_filters != acc_filters: - region.set("subscene_filters", acc_filters) + region.set("subscene_filters", bytes(acc_filters) logger.debug("Setting account filters to %r", acc_filters) self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False) diff --git a/libs/subscene_api/subscene.py b/libs/subscene_api/subscene.py index 827110369..099370a48 100644 --- a/libs/subscene_api/subscene.py +++ b/libs/subscene_api/subscene.py @@ -38,7 +38,7 @@ if is_PY2: from urllib2 import Request, urlopen else: from contextlib import suppress - from urllib2.request import Request, urlopen + from urllib.request import Request, urlopen from dogpile.cache.api import NO_VALUE from subliminal.cache import region @@ -56,7 +56,7 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\ "Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" -ENDPOINT_RE = re.compile(ur'(?uis).*?.*? + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. """ from __future__ import absolute_import @@ -86,35 +111,32 @@ SSL_WRITE_BLOCKSIZE = 16384 # individual cipher suites. We need to do this because this is how # SecureTransport wants them. CIPHER_SUITES = [ - SecurityConst.TLS_AES_256_GCM_SHA384, - SecurityConst.TLS_CHACHA20_POLY1305_SHA256, - SecurityConst.TLS_AES_128_GCM_SHA256, SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, - SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, - SecurityConst.TLS_DHE_DSS_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384, - SecurityConst.TLS_DHE_DSS_WITH_AES_128_GCM_SHA256, SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, - SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, - SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, - SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, - SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA256, - SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA, - SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA, SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, - SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA, SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, - SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA256, SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA, - SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_AES_256_GCM_SHA384, + SecurityConst.TLS_AES_128_GCM_SHA256, SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384, SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_AES_128_CCM_8_SHA256, + SecurityConst.TLS_AES_128_CCM_SHA256, SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256, SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256, SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA, @@ -122,9 +144,10 @@ CIPHER_SUITES = [ ] # Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of -# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version. +# TLSv1 and a high of TLSv1.3. For everything else, we pin to that version. +# TLSv1 to 1.2 are supported on macOS 10.8+ and TLSv1.3 is macOS 10.13+ _protocol_to_min_max = { - ssl.PROTOCOL_SSLv23: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), + util.PROTOCOL_TLS: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocolMaxSupported), } if hasattr(ssl, "PROTOCOL_SSLv2"): @@ -147,14 +170,13 @@ if hasattr(ssl, "PROTOCOL_TLSv1_2"): _protocol_to_min_max[ssl.PROTOCOL_TLSv1_2] = ( SecurityConst.kTLSProtocol12, SecurityConst.kTLSProtocol12 ) -if hasattr(ssl, "PROTOCOL_TLS"): - _protocol_to_min_max[ssl.PROTOCOL_TLS] = _protocol_to_min_max[ssl.PROTOCOL_SSLv23] def inject_into_urllib3(): """ Monkey-patch urllib3 with SecureTransport-backed SSL-support. """ + util.SSLContext = SecureTransportContext util.ssl_.SSLContext = SecureTransportContext util.HAS_SNI = HAS_SNI util.ssl_.HAS_SNI = HAS_SNI @@ -166,6 +188,7 @@ def extract_from_urllib3(): """ Undo monkey-patching by :func:`inject_into_urllib3`. """ + util.SSLContext = orig_util_SSLContext util.ssl_.SSLContext = orig_util_SSLContext util.HAS_SNI = orig_util_HAS_SNI util.ssl_.HAS_SNI = orig_util_HAS_SNI @@ -458,7 +481,14 @@ class WrappedSocket(object): # Set the minimum and maximum TLS versions. result = Security.SSLSetProtocolVersionMin(self.context, min_version) _assert_no_error(result) + + # TLS 1.3 isn't necessarily enabled by the OS + # so we have to detect when we error out and try + # setting TLS 1.3 if it's allowed. kTLSProtocolMaxSupported + # was added in macOS 10.13 along with kTLSProtocol13. result = Security.SSLSetProtocolVersionMax(self.context, max_version) + if result != 0 and max_version == SecurityConst.kTLSProtocolMaxSupported: + result = Security.SSLSetProtocolVersionMax(self.context, SecurityConst.kTLSProtocol12) _assert_no_error(result) # If there's a trust DB, we need to use it. We do that by telling @@ -667,6 +697,25 @@ class WrappedSocket(object): return der_bytes + def version(self): + protocol = Security.SSLProtocol() + result = Security.SSLGetNegotiatedProtocolVersion(self.context, ctypes.byref(protocol)) + _assert_no_error(result) + if protocol.value == SecurityConst.kTLSProtocol13: + return 'TLSv1.3' + elif protocol.value == SecurityConst.kTLSProtocol12: + return 'TLSv1.2' + elif protocol.value == SecurityConst.kTLSProtocol11: + return 'TLSv1.1' + elif protocol.value == SecurityConst.kTLSProtocol1: + return 'TLSv1' + elif protocol.value == SecurityConst.kSSLProtocol3: + return 'SSLv3' + elif protocol.value == SecurityConst.kSSLProtocol2: + return 'SSLv2' + else: + raise ssl.SSLError('Unknown TLS version: %r' % protocol) + def _reuse(self): self._makefile_refs += 1 diff --git a/libs/urllib3/contrib/socks.py b/libs/urllib3/contrib/socks.py index 811e312ec..636d261fb 100644 --- a/libs/urllib3/contrib/socks.py +++ b/libs/urllib3/contrib/socks.py @@ -1,25 +1,38 @@ # -*- coding: utf-8 -*- """ This module contains provisional support for SOCKS proxies from within -urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and +urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and SOCKS5. To enable its functionality, either install PySocks or install this module with the ``socks`` extra. The SOCKS implementation supports the full range of urllib3 features. It also supports the following SOCKS features: -- SOCKS4 -- SOCKS4a -- SOCKS5 +- SOCKS4A (``proxy_url='socks4a://...``) +- SOCKS4 (``proxy_url='socks4://...``) +- SOCKS5 with remote DNS (``proxy_url='socks5h://...``) +- SOCKS5 with local DNS (``proxy_url='socks5://...``) - Usernames and passwords for the SOCKS proxy -Known Limitations: + .. note:: + It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in + your ``proxy_url`` to ensure that DNS resolution is done from the remote + server instead of client-side when connecting to a domain name. + +SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5 +supports IPv4, IPv6, and domain names. + +When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url`` +will be sent as the ``userid`` section of the SOCKS request:: + + proxy_url="socks4a://@proxy-host" + +When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion +of the ``proxy_url`` will be sent as the username/password to authenticate +with the proxy:: + + proxy_url="socks5h://:@proxy-host" -- Currently PySocks does not support contacting remote websites via literal - IPv6 addresses. Any such connection attempt will fail. You must use a domain - name. -- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any - such connection attempt will fail. """ from __future__ import absolute_import @@ -88,7 +101,7 @@ class SOCKSConnection(HTTPConnection): **extra_kw ) - except SocketTimeout as e: + except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % (self.host, self.timeout)) diff --git a/libs/urllib3/fields.py b/libs/urllib3/fields.py index 37fe64a3e..6a9a5a7f5 100644 --- a/libs/urllib3/fields.py +++ b/libs/urllib3/fields.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import email.utils import mimetypes +import re from .packages import six @@ -19,57 +20,147 @@ def guess_content_type(filename, default='application/octet-stream'): return default -def format_header_param(name, value): +def format_header_param_rfc2231(name, value): """ - Helper function to format and quote a single header parameter. + Helper function to format and quote a single header parameter using the + strategy defined in RFC 2231. Particularly useful for header parameters which might contain - non-ASCII values, like file names. This follows RFC 2231, as - suggested by RFC 2388 Section 4.4. + non-ASCII values, like file names. This follows RFC 2388 Section 4.4. :param name: The name of the parameter, a string expected to be ASCII only. :param value: - The value of the parameter, provided as a unicode string. + The value of the parameter, provided as ``bytes`` or `str``. + :ret: + An RFC-2231-formatted unicode string. """ + if isinstance(value, six.binary_type): + value = value.decode("utf-8") + if not any(ch in value for ch in '"\\\r\n'): - result = '%s="%s"' % (name, value) + result = u'%s="%s"' % (name, value) try: result.encode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): pass else: return result - if not six.PY3 and isinstance(value, six.text_type): # Python 2: + + if not six.PY3: # Python 2: value = value.encode('utf-8') + + # encode_rfc2231 accepts an encoded string and returns an ascii-encoded + # string in Python 2 but accepts and returns unicode strings in Python 3 value = email.utils.encode_rfc2231(value, 'utf-8') value = '%s*=%s' % (name, value) + + if not six.PY3: # Python 2: + value = value.decode('utf-8') + return value +_HTML5_REPLACEMENTS = { + u"\u0022": u"%22", + # Replace "\" with "\\". + u"\u005C": u"\u005C\u005C", + u"\u005C": u"\u005C\u005C", +} + +# All control characters from 0x00 to 0x1F *except* 0x1B. +_HTML5_REPLACEMENTS.update({ + six.unichr(cc): u"%{:02X}".format(cc) + for cc + in range(0x00, 0x1F+1) + if cc not in (0x1B,) +}) + + +def _replace_multiple(value, needles_and_replacements): + + def replacer(match): + return needles_and_replacements[match.group(0)] + + pattern = re.compile( + r"|".join([ + re.escape(needle) for needle in needles_and_replacements.keys() + ]) + ) + + result = pattern.sub(replacer, value) + + return result + + +def format_header_param_html5(name, value): + """ + Helper function to format and quote a single header parameter using the + HTML5 strategy. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows the `HTML5 Working Draft + Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. + + .. _HTML5 Working Draft Section 4.10.22.7: + https://w3c.github.io/html/sec-forms.html#multipart-form-data + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as ``bytes`` or `str``. + :ret: + A unicode string, stripped of troublesome characters. + """ + if isinstance(value, six.binary_type): + value = value.decode("utf-8") + + value = _replace_multiple(value, _HTML5_REPLACEMENTS) + + return u'%s="%s"' % (name, value) + + +# For backwards-compatibility. +format_header_param = format_header_param_html5 + + class RequestField(object): """ A data container for request body parameters. :param name: - The name of this request field. + The name of this request field. Must be unicode. :param data: The data/value body. :param filename: - An optional filename of the request field. + An optional filename of the request field. Must be unicode. :param headers: An optional dict-like object of headers to initially use for the field. + :param header_formatter: + An optional callable that is used to encode and format the headers. By + default, this is :func:`format_header_param_html5`. """ - def __init__(self, name, data, filename=None, headers=None): + def __init__( + self, + name, + data, + filename=None, + headers=None, + header_formatter=format_header_param_html5): self._name = name self._filename = filename self.data = data self.headers = {} if headers: self.headers = dict(headers) + self.header_formatter = header_formatter @classmethod - def from_tuples(cls, fieldname, value): + def from_tuples( + cls, + fieldname, + value, + header_formatter=format_header_param_html5): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. @@ -97,21 +188,24 @@ class RequestField(object): content_type = None data = value - request_param = cls(fieldname, data, filename=filename) + request_param = cls( + fieldname, data, filename=filename, header_formatter=header_formatter) request_param.make_multipart(content_type=content_type) return request_param def _render_part(self, name, value): """ - Overridable helper function to format a single header parameter. + Overridable helper function to format a single header parameter. By + default, this calls ``self.header_formatter``. :param name: The name of the parameter, a string expected to be ASCII only. :param value: The value of the parameter, provided as a unicode string. """ - return format_header_param(name, value) + + return self.header_formatter(name, value) def _render_parts(self, header_parts): """ @@ -133,7 +227,7 @@ class RequestField(object): if value is not None: parts.append(self._render_part(name, value)) - return '; '.join(parts) + return u'; '.join(parts) def render_headers(self): """ @@ -144,15 +238,15 @@ class RequestField(object): sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] for sort_key in sort_keys: if self.headers.get(sort_key, False): - lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + lines.append(u'%s: %s' % (sort_key, self.headers[sort_key])) for header_name, header_value in self.headers.items(): if header_name not in sort_keys: if header_value: - lines.append('%s: %s' % (header_name, header_value)) + lines.append(u'%s: %s' % (header_name, header_value)) - lines.append('\r\n') - return '\r\n'.join(lines) + lines.append(u'\r\n') + return u'\r\n'.join(lines) def make_multipart(self, content_disposition=None, content_type=None, content_location=None): @@ -168,10 +262,10 @@ class RequestField(object): The 'Content-Location' of the request body. """ - self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join([ - '', self._render_parts( - (('name', self._name), ('filename', self._filename)) + self.headers['Content-Disposition'] = content_disposition or u'form-data' + self.headers['Content-Disposition'] += u'; '.join([ + u'', self._render_parts( + ((u'name', self._name), (u'filename', self._filename)) ) ]) self.headers['Content-Type'] = content_type diff --git a/libs/urllib3/packages/ordered_dict.py b/libs/urllib3/packages/ordered_dict.py deleted file mode 100644 index 4479363cc..000000000 --- a/libs/urllib3/packages/ordered_dict.py +++ /dev/null @@ -1,259 +0,0 @@ -# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. -# Passes Python2.7's test suite and incorporates all the latest updates. -# Copyright 2009 Raymond Hettinger, released under the MIT License. -# http://code.activestate.com/recipes/576693/ -try: - from thread import get_ident as _get_ident -except ImportError: - from dummy_thread import get_ident as _get_ident - -try: - from _abcoll import KeysView, ValuesView, ItemsView -except ImportError: - pass - - -class OrderedDict(dict): - 'Dictionary that remembers insertion order' - # An inherited dict maps keys to values. - # The inherited dict provides __getitem__, __len__, __contains__, and get. - # The remaining methods are order-aware. - # Big-O running times for all methods are the same as for regular dictionaries. - - # The internal self.__map dictionary maps keys to links in a doubly linked list. - # The circular doubly linked list starts and ends with a sentinel element. - # The sentinel element never gets deleted (this simplifies the algorithm). - # Each link is stored as a list of length three: [PREV, NEXT, KEY]. - - def __init__(self, *args, **kwds): - '''Initialize an ordered dictionary. Signature is the same as for - regular dictionaries, but keyword arguments are not recommended - because their insertion order is arbitrary. - - ''' - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__root - except AttributeError: - self.__root = root = [] # sentinel node - root[:] = [root, root, None] - self.__map = {} - self.__update(*args, **kwds) - - def __setitem__(self, key, value, dict_setitem=dict.__setitem__): - 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link which goes at the end of the linked - # list, and the inherited dictionary is updated with the new key/value pair. - if key not in self: - root = self.__root - last = root[0] - last[1] = root[0] = self.__map[key] = [last, root, key] - dict_setitem(self, key, value) - - def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which is - # then removed by updating the links in the predecessor and successor nodes. - dict_delitem(self, key) - link_prev, link_next, key = self.__map.pop(key) - link_prev[1] = link_next - link_next[0] = link_prev - - def __iter__(self): - 'od.__iter__() <==> iter(od)' - root = self.__root - curr = root[1] - while curr is not root: - yield curr[2] - curr = curr[1] - - def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' - root = self.__root - curr = root[0] - while curr is not root: - yield curr[2] - curr = curr[0] - - def clear(self): - 'od.clear() -> None. Remove all items from od.' - try: - for node in self.__map.itervalues(): - del node[:] - root = self.__root - root[:] = [root, root, None] - self.__map.clear() - except AttributeError: - pass - dict.clear(self) - - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - - ''' - if not self: - raise KeyError('dictionary is empty') - root = self.__root - if last: - link = root[0] - link_prev = link[0] - link_prev[1] = root - root[0] = link_prev - else: - link = root[1] - link_next = link[1] - root[1] = link_next - link_next[0] = root - key = link[2] - del self.__map[key] - value = dict.pop(self, key) - return key, value - - # -- the following methods do not depend on the internal structure -- - - def keys(self): - 'od.keys() -> list of keys in od' - return list(self) - - def values(self): - 'od.values() -> list of values in od' - return [self[key] for key in self] - - def items(self): - 'od.items() -> list of (key, value) pairs in od' - return [(key, self[key]) for key in self] - - def iterkeys(self): - 'od.iterkeys() -> an iterator over the keys in od' - return iter(self) - - def itervalues(self): - 'od.itervalues -> an iterator over the values in od' - for k in self: - yield self[k] - - def iteritems(self): - 'od.iteritems -> an iterator over the (key, value) items in od' - for k in self: - yield (k, self[k]) - - def update(*args, **kwds): - '''od.update(E, **F) -> None. Update od from dict/iterable E and F. - - If E is a dict instance, does: for k in E: od[k] = E[k] - If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] - Or if E is an iterable of items, does: for k, v in E: od[k] = v - In either case, this is followed by: for k, v in F.items(): od[k] = v - - ''' - if len(args) > 2: - raise TypeError('update() takes at most 2 positional ' - 'arguments (%d given)' % (len(args),)) - elif not args: - raise TypeError('update() takes at least 1 argument (0 given)') - self = args[0] - # Make progressively weaker assumptions about "other" - other = () - if len(args) == 2: - other = args[1] - if isinstance(other, dict): - for key in other: - self[key] = other[key] - elif hasattr(other, 'keys'): - for key in other.keys(): - self[key] = other[key] - else: - for key, value in other: - self[key] = value - for key, value in kwds.items(): - self[key] = value - - __update = update # let subclasses override update without breaking __init__ - - __marker = object() - - def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - - ''' - if key in self: - result = self[key] - del self[key] - return result - if default is self.__marker: - raise KeyError(key) - return default - - def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' - if key in self: - return self[key] - self[key] = default - return default - - def __repr__(self, _repr_running={}): - 'od.__repr__() <==> repr(od)' - call_key = id(self), _get_ident() - if call_key in _repr_running: - return '...' - _repr_running[call_key] = 1 - try: - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - finally: - del _repr_running[call_key] - - def __reduce__(self): - 'Return state information for pickling' - items = [[k, self[k]] for k in self] - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def copy(self): - 'od.copy() -> a shallow copy of od' - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S - and values equal to v (which defaults to None). - - ''' - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - - ''' - if isinstance(other, OrderedDict): - return len(self)==len(other) and self.items() == other.items() - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other - - # -- the following methods are only used in Python 2.7 -- - - def viewkeys(self): - "od.viewkeys() -> a set-like object providing a view on od's keys" - return KeysView(self) - - def viewvalues(self): - "od.viewvalues() -> an object providing a view on od's values" - return ValuesView(self) - - def viewitems(self): - "od.viewitems() -> a set-like object providing a view on od's items" - return ItemsView(self) diff --git a/libs/urllib3/poolmanager.py b/libs/urllib3/poolmanager.py index 32bd97302..a6ade6e90 100644 --- a/libs/urllib3/poolmanager.py +++ b/libs/urllib3/poolmanager.py @@ -20,7 +20,8 @@ __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] log = logging.getLogger(__name__) SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', - 'ssl_version', 'ca_cert_dir', 'ssl_context') + 'ssl_version', 'ca_cert_dir', 'ssl_context', + 'key_password') # All known keyword arguments that could be provided to the pool manager, its # pools, or the underlying connections. This is used to construct a pool key. @@ -34,6 +35,7 @@ _key_fields = ( 'key_block', # bool 'key_source_address', # str 'key_key_file', # str + 'key_key_password', # str 'key_cert_file', # str 'key_cert_reqs', # str 'key_ca_certs', # str @@ -48,7 +50,7 @@ _key_fields = ( 'key__socks_options', # dict 'key_assert_hostname', # bool or string 'key_assert_fingerprint', # str - 'key_server_hostname', #str + 'key_server_hostname', # str ) #: The namedtuple class used to construct keys for the connection pool. diff --git a/libs/urllib3/response.py b/libs/urllib3/response.py index c112690b0..4f857932c 100644 --- a/libs/urllib3/response.py +++ b/libs/urllib3/response.py @@ -6,6 +6,11 @@ import logging from socket import timeout as SocketTimeout from socket import error as SocketError +try: + import brotli +except ImportError: + brotli = None + from ._collections import HTTPHeaderDict from .exceptions import ( BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError, @@ -90,6 +95,25 @@ class GzipDecoder(object): self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) +if brotli is not None: + class BrotliDecoder(object): + # Supports both 'brotlipy' and 'Brotli' packages + # since they share an import name. The top branches + # are for 'brotlipy' and bottom branches for 'Brotli' + def __init__(self): + self._obj = brotli.Decompressor() + + def decompress(self, data): + if hasattr(self._obj, 'decompress'): + return self._obj.decompress(data) + return self._obj.process(data) + + def flush(self): + if hasattr(self._obj, 'flush'): + return self._obj.flush() + return b'' + + class MultiDecoder(object): """ From RFC7231: @@ -118,6 +142,9 @@ def _get_decoder(mode): if mode == 'gzip': return GzipDecoder() + if brotli is not None and mode == 'br': + return BrotliDecoder() + return DeflateDecoder() @@ -155,6 +182,8 @@ class HTTPResponse(io.IOBase): """ CONTENT_DECODERS = ['gzip', 'deflate'] + if brotli is not None: + CONTENT_DECODERS += ['br'] REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, @@ -311,24 +340,32 @@ class HTTPResponse(io.IOBase): if content_encoding in self.CONTENT_DECODERS: self._decoder = _get_decoder(content_encoding) elif ',' in content_encoding: - encodings = [e.strip() for e in content_encoding.split(',') if e.strip() in self.CONTENT_DECODERS] + encodings = [ + e.strip() for e in content_encoding.split(',') + if e.strip() in self.CONTENT_DECODERS] if len(encodings): self._decoder = _get_decoder(content_encoding) + DECODER_ERROR_CLASSES = (IOError, zlib.error) + if brotli is not None: + DECODER_ERROR_CLASSES += (brotli.error,) + def _decode(self, data, decode_content, flush_decoder): """ Decode the data passed in and potentially flush the decoder. """ + if not decode_content: + return data + try: - if decode_content and self._decoder: + if self._decoder: data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: + except self.DECODER_ERROR_CLASSES as e: content_encoding = self.headers.get('content-encoding', '').lower() raise DecodeError( "Received response with content-encoding: %s, but " "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content: + if flush_decoder: data += self._flush_decoder() return data @@ -508,9 +545,10 @@ class HTTPResponse(io.IOBase): headers = r.msg if not isinstance(headers, HTTPHeaderDict): - if PY3: # Python 3 + if PY3: headers = HTTPHeaderDict(headers.items()) - else: # Python 2 + else: + # Python 2.7 headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute @@ -703,3 +741,20 @@ class HTTPResponse(io.IOBase): return self.retries.history[-1].redirect_location else: return self._request_url + + def __iter__(self): + buffer = [b""] + for chunk in self.stream(decode_content=True): + if b"\n" in chunk: + chunk = chunk.split(b"\n") + yield b"".join(buffer) + chunk[0] + b"\n" + for x in chunk[1:-1]: + yield x + b"\n" + if chunk[-1]: + buffer = [chunk[-1]] + else: + buffer = [] + else: + buffer.append(chunk) + if buffer: + yield b"".join(buffer) diff --git a/libs/urllib3/util/__init__.py b/libs/urllib3/util/__init__.py index 2f2770b62..2914bb468 100644 --- a/libs/urllib3/util/__init__.py +++ b/libs/urllib3/util/__init__.py @@ -12,6 +12,7 @@ from .ssl_ import ( resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, + PROTOCOL_TLS, ) from .timeout import ( current_time, @@ -35,6 +36,7 @@ __all__ = ( 'IS_PYOPENSSL', 'IS_SECURETRANSPORT', 'SSLContext', + 'PROTOCOL_TLS', 'Retry', 'Timeout', 'Url', diff --git a/libs/urllib3/util/request.py b/libs/urllib3/util/request.py index 3ddfcd559..280b8530c 100644 --- a/libs/urllib3/util/request.py +++ b/libs/urllib3/util/request.py @@ -5,6 +5,13 @@ from ..packages.six import b, integer_types from ..exceptions import UnrewindableBodyError ACCEPT_ENCODING = 'gzip,deflate' +try: + import brotli as _unused_module_brotli # noqa: F401 +except ImportError: + pass +else: + ACCEPT_ENCODING += ',br' + _FAILEDTELL = object() diff --git a/libs/urllib3/util/selectors.py b/libs/urllib3/util/selectors.py deleted file mode 100644 index d75cb266b..000000000 --- a/libs/urllib3/util/selectors.py +++ /dev/null @@ -1,581 +0,0 @@ -# Backport of selectors.py from Python 3.5+ to support Python < 3.4 -# Also has the behavior specified in PEP 475 which is to retry syscalls -# in the case of an EINTR error. This module is required because selectors34 -# does not follow this behavior and instead returns that no dile descriptor -# events have occurred rather than retry the syscall. The decision to drop -# support for select.devpoll is made to maintain 100% test coverage. - -import errno -import math -import select -import socket -import sys -import time -from collections import namedtuple, Mapping - -try: - monotonic = time.monotonic -except (AttributeError, ImportError): # Python 3.3< - monotonic = time.time - -EVENT_READ = (1 << 0) -EVENT_WRITE = (1 << 1) - -HAS_SELECT = True # Variable that shows whether the platform has a selector. -_SYSCALL_SENTINEL = object() # Sentinel in case a system call returns None. -_DEFAULT_SELECTOR = None - - -class SelectorError(Exception): - def __init__(self, errcode): - super(SelectorError, self).__init__() - self.errno = errcode - - def __repr__(self): - return "".format(self.errno) - - def __str__(self): - return self.__repr__() - - -def _fileobj_to_fd(fileobj): - """ Return a file descriptor from a file object. If - given an integer will simply return that integer back. """ - if isinstance(fileobj, int): - fd = fileobj - else: - try: - fd = int(fileobj.fileno()) - except (AttributeError, TypeError, ValueError): - raise ValueError("Invalid file object: {0!r}".format(fileobj)) - if fd < 0: - raise ValueError("Invalid file descriptor: {0}".format(fd)) - return fd - - -# Determine which function to use to wrap system calls because Python 3.5+ -# already handles the case when system calls are interrupted. -if sys.version_info >= (3, 5): - def _syscall_wrapper(func, _, *args, **kwargs): - """ This is the short-circuit version of the below logic - because in Python 3.5+ all system calls automatically restart - and recalculate their timeouts. """ - try: - return func(*args, **kwargs) - except (OSError, IOError, select.error) as e: - errcode = None - if hasattr(e, "errno"): - errcode = e.errno - raise SelectorError(errcode) -else: - def _syscall_wrapper(func, recalc_timeout, *args, **kwargs): - """ Wrapper function for syscalls that could fail due to EINTR. - All functions should be retried if there is time left in the timeout - in accordance with PEP 475. """ - timeout = kwargs.get("timeout", None) - if timeout is None: - expires = None - recalc_timeout = False - else: - timeout = float(timeout) - if timeout < 0.0: # Timeout less than 0 treated as no timeout. - expires = None - else: - expires = monotonic() + timeout - - args = list(args) - if recalc_timeout and "timeout" not in kwargs: - raise ValueError( - "Timeout must be in args or kwargs to be recalculated") - - result = _SYSCALL_SENTINEL - while result is _SYSCALL_SENTINEL: - try: - result = func(*args, **kwargs) - # OSError is thrown by select.select - # IOError is thrown by select.epoll.poll - # select.error is thrown by select.poll.poll - # Aren't we thankful for Python 3.x rework for exceptions? - except (OSError, IOError, select.error) as e: - # select.error wasn't a subclass of OSError in the past. - errcode = None - if hasattr(e, "errno"): - errcode = e.errno - elif hasattr(e, "args"): - errcode = e.args[0] - - # Also test for the Windows equivalent of EINTR. - is_interrupt = (errcode == errno.EINTR or (hasattr(errno, "WSAEINTR") and - errcode == errno.WSAEINTR)) - - if is_interrupt: - if expires is not None: - current_time = monotonic() - if current_time > expires: - raise OSError(errno=errno.ETIMEDOUT) - if recalc_timeout: - if "timeout" in kwargs: - kwargs["timeout"] = expires - current_time - continue - if errcode: - raise SelectorError(errcode) - else: - raise - return result - - -SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data']) - - -class _SelectorMapping(Mapping): - """ Mapping of file objects to selector keys """ - - def __init__(self, selector): - self._selector = selector - - def __len__(self): - return len(self._selector._fd_to_key) - - def __getitem__(self, fileobj): - try: - fd = self._selector._fileobj_lookup(fileobj) - return self._selector._fd_to_key[fd] - except KeyError: - raise KeyError("{0!r} is not registered.".format(fileobj)) - - def __iter__(self): - return iter(self._selector._fd_to_key) - - -class BaseSelector(object): - """ Abstract Selector class - - A selector supports registering file objects to be monitored - for specific I/O events. - - A file object is a file descriptor or any object with a - `fileno()` method. An arbitrary object can be attached to the - file object which can be used for example to store context info, - a callback, etc. - - A selector can use various implementations (select(), poll(), epoll(), - and kqueue()) depending on the platform. The 'DefaultSelector' class uses - the most efficient implementation for the current platform. - """ - def __init__(self): - # Maps file descriptors to keys. - self._fd_to_key = {} - - # Read-only mapping returned by get_map() - self._map = _SelectorMapping(self) - - def _fileobj_lookup(self, fileobj): - """ Return a file descriptor from a file object. - This wraps _fileobj_to_fd() to do an exhaustive - search in case the object is invalid but we still - have it in our map. Used by unregister() so we can - unregister an object that was previously registered - even if it is closed. It is also used by _SelectorMapping - """ - try: - return _fileobj_to_fd(fileobj) - except ValueError: - - # Search through all our mapped keys. - for key in self._fd_to_key.values(): - if key.fileobj is fileobj: - return key.fd - - # Raise ValueError after all. - raise - - def register(self, fileobj, events, data=None): - """ Register a file object for a set of events to monitor. """ - if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)): - raise ValueError("Invalid events: {0!r}".format(events)) - - key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data) - - if key.fd in self._fd_to_key: - raise KeyError("{0!r} (FD {1}) is already registered" - .format(fileobj, key.fd)) - - self._fd_to_key[key.fd] = key - return key - - def unregister(self, fileobj): - """ Unregister a file object from being monitored. """ - try: - key = self._fd_to_key.pop(self._fileobj_lookup(fileobj)) - except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) - - # Getting the fileno of a closed socket on Windows errors with EBADF. - except socket.error as e: # Platform-specific: Windows. - if e.errno != errno.EBADF: - raise - else: - for key in self._fd_to_key.values(): - if key.fileobj is fileobj: - self._fd_to_key.pop(key.fd) - break - else: - raise KeyError("{0!r} is not registered".format(fileobj)) - return key - - def modify(self, fileobj, events, data=None): - """ Change a registered file object monitored events and data. """ - # NOTE: Some subclasses optimize this operation even further. - try: - key = self._fd_to_key[self._fileobj_lookup(fileobj)] - except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) - - if events != key.events: - self.unregister(fileobj) - key = self.register(fileobj, events, data) - - elif data != key.data: - # Use a shortcut to update the data. - key = key._replace(data=data) - self._fd_to_key[key.fd] = key - - return key - - def select(self, timeout=None): - """ Perform the actual selection until some monitored file objects - are ready or the timeout expires. """ - raise NotImplementedError() - - def close(self): - """ Close the selector. This must be called to ensure that all - underlying resources are freed. """ - self._fd_to_key.clear() - self._map = None - - def get_key(self, fileobj): - """ Return the key associated with a registered file object. """ - mapping = self.get_map() - if mapping is None: - raise RuntimeError("Selector is closed") - try: - return mapping[fileobj] - except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) - - def get_map(self): - """ Return a mapping of file objects to selector keys """ - return self._map - - def _key_from_fd(self, fd): - """ Return the key associated to a given file descriptor - Return None if it is not found. """ - try: - return self._fd_to_key[fd] - except KeyError: - return None - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - -# Almost all platforms have select.select() -if hasattr(select, "select"): - class SelectSelector(BaseSelector): - """ Select-based selector. """ - def __init__(self): - super(SelectSelector, self).__init__() - self._readers = set() - self._writers = set() - - def register(self, fileobj, events, data=None): - key = super(SelectSelector, self).register(fileobj, events, data) - if events & EVENT_READ: - self._readers.add(key.fd) - if events & EVENT_WRITE: - self._writers.add(key.fd) - return key - - def unregister(self, fileobj): - key = super(SelectSelector, self).unregister(fileobj) - self._readers.discard(key.fd) - self._writers.discard(key.fd) - return key - - def _select(self, r, w, timeout=None): - """ Wrapper for select.select because timeout is a positional arg """ - return select.select(r, w, [], timeout) - - def select(self, timeout=None): - # Selecting on empty lists on Windows errors out. - if not len(self._readers) and not len(self._writers): - return [] - - timeout = None if timeout is None else max(timeout, 0.0) - ready = [] - r, w, _ = _syscall_wrapper(self._select, True, self._readers, - self._writers, timeout) - r = set(r) - w = set(w) - for fd in r | w: - events = 0 - if fd in r: - events |= EVENT_READ - if fd in w: - events |= EVENT_WRITE - - key = self._key_from_fd(fd) - if key: - ready.append((key, events & key.events)) - return ready - - -if hasattr(select, "poll"): - class PollSelector(BaseSelector): - """ Poll-based selector """ - def __init__(self): - super(PollSelector, self).__init__() - self._poll = select.poll() - - def register(self, fileobj, events, data=None): - key = super(PollSelector, self).register(fileobj, events, data) - event_mask = 0 - if events & EVENT_READ: - event_mask |= select.POLLIN - if events & EVENT_WRITE: - event_mask |= select.POLLOUT - self._poll.register(key.fd, event_mask) - return key - - def unregister(self, fileobj): - key = super(PollSelector, self).unregister(fileobj) - self._poll.unregister(key.fd) - return key - - def _wrap_poll(self, timeout=None): - """ Wrapper function for select.poll.poll() so that - _syscall_wrapper can work with only seconds. """ - if timeout is not None: - if timeout <= 0: - timeout = 0 - else: - # select.poll.poll() has a resolution of 1 millisecond, - # round away from zero to wait *at least* timeout seconds. - timeout = math.ceil(timeout * 1e3) - - result = self._poll.poll(timeout) - return result - - def select(self, timeout=None): - ready = [] - fd_events = _syscall_wrapper(self._wrap_poll, True, timeout=timeout) - for fd, event_mask in fd_events: - events = 0 - if event_mask & ~select.POLLIN: - events |= EVENT_WRITE - if event_mask & ~select.POLLOUT: - events |= EVENT_READ - - key = self._key_from_fd(fd) - if key: - ready.append((key, events & key.events)) - - return ready - - -if hasattr(select, "epoll"): - class EpollSelector(BaseSelector): - """ Epoll-based selector """ - def __init__(self): - super(EpollSelector, self).__init__() - self._epoll = select.epoll() - - def fileno(self): - return self._epoll.fileno() - - def register(self, fileobj, events, data=None): - key = super(EpollSelector, self).register(fileobj, events, data) - events_mask = 0 - if events & EVENT_READ: - events_mask |= select.EPOLLIN - if events & EVENT_WRITE: - events_mask |= select.EPOLLOUT - _syscall_wrapper(self._epoll.register, False, key.fd, events_mask) - return key - - def unregister(self, fileobj): - key = super(EpollSelector, self).unregister(fileobj) - try: - _syscall_wrapper(self._epoll.unregister, False, key.fd) - except SelectorError: - # This can occur when the fd was closed since registry. - pass - return key - - def select(self, timeout=None): - if timeout is not None: - if timeout <= 0: - timeout = 0.0 - else: - # select.epoll.poll() has a resolution of 1 millisecond - # but luckily takes seconds so we don't need a wrapper - # like PollSelector. Just for better rounding. - timeout = math.ceil(timeout * 1e3) * 1e-3 - timeout = float(timeout) - else: - timeout = -1.0 # epoll.poll() must have a float. - - # We always want at least 1 to ensure that select can be called - # with no file descriptors registered. Otherwise will fail. - max_events = max(len(self._fd_to_key), 1) - - ready = [] - fd_events = _syscall_wrapper(self._epoll.poll, True, - timeout=timeout, - maxevents=max_events) - for fd, event_mask in fd_events: - events = 0 - if event_mask & ~select.EPOLLIN: - events |= EVENT_WRITE - if event_mask & ~select.EPOLLOUT: - events |= EVENT_READ - - key = self._key_from_fd(fd) - if key: - ready.append((key, events & key.events)) - return ready - - def close(self): - self._epoll.close() - super(EpollSelector, self).close() - - -if hasattr(select, "kqueue"): - class KqueueSelector(BaseSelector): - """ Kqueue / Kevent-based selector """ - def __init__(self): - super(KqueueSelector, self).__init__() - self._kqueue = select.kqueue() - - def fileno(self): - return self._kqueue.fileno() - - def register(self, fileobj, events, data=None): - key = super(KqueueSelector, self).register(fileobj, events, data) - if events & EVENT_READ: - kevent = select.kevent(key.fd, - select.KQ_FILTER_READ, - select.KQ_EV_ADD) - - _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) - - if events & EVENT_WRITE: - kevent = select.kevent(key.fd, - select.KQ_FILTER_WRITE, - select.KQ_EV_ADD) - - _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) - - return key - - def unregister(self, fileobj): - key = super(KqueueSelector, self).unregister(fileobj) - if key.events & EVENT_READ: - kevent = select.kevent(key.fd, - select.KQ_FILTER_READ, - select.KQ_EV_DELETE) - try: - _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) - except SelectorError: - pass - if key.events & EVENT_WRITE: - kevent = select.kevent(key.fd, - select.KQ_FILTER_WRITE, - select.KQ_EV_DELETE) - try: - _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) - except SelectorError: - pass - - return key - - def select(self, timeout=None): - if timeout is not None: - timeout = max(timeout, 0) - - max_events = len(self._fd_to_key) * 2 - ready_fds = {} - - kevent_list = _syscall_wrapper(self._kqueue.control, True, - None, max_events, timeout) - - for kevent in kevent_list: - fd = kevent.ident - event_mask = kevent.filter - events = 0 - if event_mask == select.KQ_FILTER_READ: - events |= EVENT_READ - if event_mask == select.KQ_FILTER_WRITE: - events |= EVENT_WRITE - - key = self._key_from_fd(fd) - if key: - if key.fd not in ready_fds: - ready_fds[key.fd] = (key, events & key.events) - else: - old_events = ready_fds[key.fd][1] - ready_fds[key.fd] = (key, (events | old_events) & key.events) - - return list(ready_fds.values()) - - def close(self): - self._kqueue.close() - super(KqueueSelector, self).close() - - -if not hasattr(select, 'select'): # Platform-specific: AppEngine - HAS_SELECT = False - - -def _can_allocate(struct): - """ Checks that select structs can be allocated by the underlying - operating system, not just advertised by the select module. We don't - check select() because we'll be hopeful that most platforms that - don't have it available will not advertise it. (ie: GAE) """ - try: - # select.poll() objects won't fail until used. - if struct == 'poll': - p = select.poll() - p.poll(0) - - # All others will fail on allocation. - else: - getattr(select, struct)().close() - return True - except (OSError, AttributeError) as e: - return False - - -# Choose the best implementation, roughly: -# kqueue == epoll > poll > select. Devpoll not supported. (See above) -# select() also can't accept a FD > FD_SETSIZE (usually around 1024) -def DefaultSelector(): - """ This function serves as a first call for DefaultSelector to - detect if the select module is being monkey-patched incorrectly - by eventlet, greenlet, and preserve proper behavior. """ - global _DEFAULT_SELECTOR - if _DEFAULT_SELECTOR is None: - if _can_allocate('kqueue'): - _DEFAULT_SELECTOR = KqueueSelector - elif _can_allocate('epoll'): - _DEFAULT_SELECTOR = EpollSelector - elif _can_allocate('poll'): - _DEFAULT_SELECTOR = PollSelector - elif hasattr(select, 'select'): - _DEFAULT_SELECTOR = SelectSelector - else: # Platform-specific: AppEngine - raise ValueError('Platform does not have a selector') - return _DEFAULT_SELECTOR() diff --git a/libs/urllib3/util/ssl_.py b/libs/urllib3/util/ssl_.py index 5ae435827..f271ce930 100644 --- a/libs/urllib3/util/ssl_.py +++ b/libs/urllib3/util/ssl_.py @@ -2,13 +2,14 @@ from __future__ import absolute_import import errno import warnings import hmac -import socket +import re from binascii import hexlify, unhexlify from hashlib import md5, sha1, sha256 from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning from ..packages import six +from ..packages.rfc3986 import abnf_regexp SSLContext = None @@ -40,14 +41,33 @@ def _const_compare_digest_backport(a, b): _const_compare_digest = getattr(hmac, 'compare_digest', _const_compare_digest_backport) +# Borrow rfc3986's regular expressions for IPv4 +# and IPv6 addresses for use in is_ipaddress() +_IP_ADDRESS_REGEX = re.compile( + r'^(?:%s|%s|%s)$' % ( + abnf_regexp.IPv4_RE, + abnf_regexp.IPv6_RE, + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + ) +) try: # Test for SSL features import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import wrap_socket, CERT_REQUIRED from ssl import HAS_SNI # Has SNI? except ImportError: pass +try: # Platform-specific: Python 3.6 + from ssl import PROTOCOL_TLS + PROTOCOL_SSLv23 = PROTOCOL_TLS +except ImportError: + try: + from ssl import PROTOCOL_SSLv23 as PROTOCOL_TLS + PROTOCOL_SSLv23 = PROTOCOL_TLS + except ImportError: + PROTOCOL_SSLv23 = PROTOCOL_TLS = 2 + try: from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION @@ -56,25 +76,6 @@ except ImportError: OP_NO_COMPRESSION = 0x20000 -# Python 2.7 doesn't have inet_pton on non-Linux so we fallback on inet_aton in -# those cases. This means that we can only detect IPv4 addresses in this case. -if hasattr(socket, 'inet_pton'): - inet_pton = socket.inet_pton -else: - # Maybe we can use ipaddress if the user has urllib3[secure]? - try: - import ipaddress - - def inet_pton(_, host): - if isinstance(host, bytes): - host = host.decode('ascii') - return ipaddress.ip_address(host) - - except ImportError: # Platform-specific: Non-Linux - def inet_pton(_, host): - return socket.inet_aton(host) - - # A secure default. # Sources for more information on TLS ciphers: # @@ -83,37 +84,35 @@ else: # - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ # # The general intent is: -# - Prefer TLS 1.3 cipher suites # - prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), # - prefer ECDHE over DHE for better performance, # - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and # security, # - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common, -# - disable NULL authentication, MD5 MACs and DSS for security reasons. +# - disable NULL authentication, MD5 MACs, DSS, and other +# insecure ciphers for security reasons. +# - NOTE: TLS 1.3 cipher suites are managed through a different interface +# not exposed by CPython (yet!) and are enabled by default if they're available. DEFAULT_CIPHERS = ':'.join([ - 'TLS13-AES-256-GCM-SHA384', - 'TLS13-CHACHA20-POLY1305-SHA256', - 'TLS13-AES-128-GCM-SHA256', + 'ECDHE+AESGCM', + 'ECDHE+CHACHA20', + 'DHE+AESGCM', + 'DHE+CHACHA20', 'ECDH+AESGCM', - 'ECDH+CHACHA20', 'DH+AESGCM', - 'DH+CHACHA20', - 'ECDH+AES256', - 'DH+AES256', - 'ECDH+AES128', + 'ECDH+AES', 'DH+AES', 'RSA+AESGCM', 'RSA+AES', '!aNULL', '!eNULL', '!MD5', + '!DSS', ]) try: from ssl import SSLContext # Modern SSL? except ImportError: - import sys - class SSLContext(object): # Platform-specific: Python 2 def __init__(self, protocol_version): self.protocol = protocol_version @@ -199,7 +198,7 @@ def resolve_cert_reqs(candidate): constant which can directly be passed to wrap_socket. """ if candidate is None: - return CERT_NONE + return CERT_REQUIRED if isinstance(candidate, str): res = getattr(ssl, candidate, None) @@ -215,7 +214,7 @@ def resolve_ssl_version(candidate): like resolve_cert_reqs """ if candidate is None: - return PROTOCOL_SSLv23 + return PROTOCOL_TLS if isinstance(candidate, str): res = getattr(ssl, candidate, None) @@ -261,7 +260,7 @@ def create_urllib3_context(ssl_version=None, cert_reqs=None, Constructed SSLContext object with specified options :rtype: SSLContext """ - context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + context = SSLContext(ssl_version or PROTOCOL_TLS) context.set_ciphers(ciphers or DEFAULT_CIPHERS) @@ -291,7 +290,7 @@ def create_urllib3_context(ssl_version=None, cert_reqs=None, def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ca_certs=None, server_hostname=None, ssl_version=None, ciphers=None, ssl_context=None, - ca_cert_dir=None): + ca_cert_dir=None, key_password=None): """ All arguments except for server_hostname, ssl_context, and ca_cert_dir have the same meaning as they do when using :func:`ssl.wrap_socket`. @@ -307,6 +306,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, A directory containing CA certificates in multiple separate files, as supported by OpenSSL's -CApath flag or the capath argument to SSLContext.load_verify_locations(). + :param key_password: + Optional password if the keyfile is encrypted. """ context = ssl_context if context is None: @@ -328,14 +329,21 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, raise SSLError(e) raise - # Don't load system certs unless there were no CA certs or - # SSLContext object specified manually. elif ssl_context is None and hasattr(context, 'load_default_certs'): # try to load OS default certs; works well on Windows (require Python3.4+) context.load_default_certs() + # Attempt to detect if we get the goofy behavior of the + # keyfile being encrypted and OpenSSL asking for the + # passphrase via the terminal and instead error out. + if keyfile and key_password is None and _is_key_file_encrypted(keyfile): + raise SSLError("Client private key is encrypted, password is required") + if certfile: - context.load_cert_chain(certfile, keyfile) + if key_password is None: + context.load_cert_chain(certfile, keyfile) + else: + context.load_cert_chain(certfile, keyfile, key_password) # If we detect server_hostname is an IP address then the SNI # extension should not be used according to RFC3546 Section 3.1 @@ -361,7 +369,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, def is_ipaddress(hostname): - """Detects whether the hostname given is an IP address. + """Detects whether the hostname given is an IPv4 or IPv6 address. + Also detects IPv6 addresses with Zone IDs. :param str hostname: Hostname to examine. :return: True if the hostname is an IP address, False otherwise. @@ -369,16 +378,15 @@ def is_ipaddress(hostname): if six.PY3 and isinstance(hostname, bytes): # IDN A-label bytes are ASCII compatible. hostname = hostname.decode('ascii') + return _IP_ADDRESS_REGEX.match(hostname) is not None - families = [socket.AF_INET] - if hasattr(socket, 'AF_INET6'): - families.append(socket.AF_INET6) - for af in families: - try: - inet_pton(af, hostname) - except (socket.error, ValueError, OSError): - pass - else: - return True +def _is_key_file_encrypted(key_file): + """Detects if a key file is encrypted or not.""" + with open(key_file, 'r') as f: + for line in f: + # Look for Proc-Type: 4,ENCRYPTED + if 'ENCRYPTED' in line: + return True + return False diff --git a/libs/urllib3/util/timeout.py b/libs/urllib3/util/timeout.py index cec817e6e..a4d004a84 100644 --- a/libs/urllib3/util/timeout.py +++ b/libs/urllib3/util/timeout.py @@ -131,7 +131,8 @@ class Timeout(object): raise ValueError("Attempted to set %s timeout to %s, but the " "timeout cannot be set to a value less " "than or equal to 0." % (name, value)) - except TypeError: # Python 3 + except TypeError: + # Python 3 raise ValueError("Timeout value %s was %s, but it must be an " "int, float or None." % (name, value)) diff --git a/libs/urllib3/util/url.py b/libs/urllib3/util/url.py index 6b6f9968d..0bc6ced75 100644 --- a/libs/urllib3/util/url.py +++ b/libs/urllib3/util/url.py @@ -1,7 +1,12 @@ from __future__ import absolute_import +import re from collections import namedtuple from ..exceptions import LocationParseError +from ..packages import six, rfc3986 +from ..packages.rfc3986.exceptions import RFC3986Exception, ValidationError +from ..packages.rfc3986.validators import Validator +from ..packages.rfc3986 import abnf_regexp, normalizers, compat, misc url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -10,10 +15,16 @@ url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] # urllib3 infers URLs without a scheme (None) to be http. NORMALIZABLE_SCHEMES = ('http', 'https', None) +# Regex for detecting URLs with schemes. RFC 3986 Section 3.1 +SCHEME_REGEX = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+\-]*:|/)") + +PATH_CHARS = abnf_regexp.UNRESERVED_CHARS_SET | abnf_regexp.SUB_DELIMITERS_SET | {':', '@', '/'} +QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {'?'} + class Url(namedtuple('Url', url_attrs)): """ - Datastructure for representing an HTTP URL. Used as a return value for + Data structure for representing an HTTP URL. Used as a return value for :func:`parse_url`. Both the scheme and host are normalized as they are both case-insensitive according to RFC 3986. """ @@ -23,10 +34,8 @@ class Url(namedtuple('Url', url_attrs)): query=None, fragment=None): if path and not path.startswith('/'): path = '/' + path - if scheme: + if scheme is not None: scheme = scheme.lower() - if host and scheme in NORMALIZABLE_SCHEMES: - host = host.lower() return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) @@ -72,23 +81,23 @@ class Url(namedtuple('Url', url_attrs)): 'http://username:password@host.com:80/path?query#fragment' """ scheme, auth, host, port, path, query, fragment = self - url = '' + url = u'' # We use "is not None" we want things to happen with empty strings (or 0 port) if scheme is not None: - url += scheme + '://' + url += scheme + u'://' if auth is not None: - url += auth + '@' + url += auth + u'@' if host is not None: url += host if port is not None: - url += ':' + str(port) + url += u':' + str(port) if path is not None: url += path if query is not None: - url += '?' + query + url += u'?' + query if fragment is not None: - url += '#' + fragment + url += u'#' + fragment return url @@ -98,6 +107,8 @@ class Url(namedtuple('Url', url_attrs)): def split_first(s, delims): """ + .. deprecated:: 1.25 + Given a string and an iterable of delimiters, split on the first found delimiter. Return two split parts and the matched delimiter. @@ -129,10 +140,44 @@ def split_first(s, delims): return s[:min_idx], s[min_idx + 1:], min_delim +def _encode_invalid_chars(component, allowed_chars, encoding='utf-8'): + """Percent-encodes a URI component without reapplying + onto an already percent-encoded component. Based on + rfc3986.normalizers.encode_component() + """ + if component is None: + return component + + # Try to see if the component we're encoding is already percent-encoded + # so we can skip all '%' characters but still encode all others. + percent_encodings = len(normalizers.PERCENT_MATCHER.findall( + compat.to_str(component, encoding))) + + uri_bytes = component.encode('utf-8', 'surrogatepass') + is_percent_encoded = percent_encodings == uri_bytes.count(b'%') + + encoded_component = bytearray() + + for i in range(0, len(uri_bytes)): + # Will return a single character bytestring on both Python 2 & 3 + byte = uri_bytes[i:i+1] + byte_ord = ord(byte) + if ((is_percent_encoded and byte == b'%') + or (byte_ord < 128 and byte.decode() in allowed_chars)): + encoded_component.extend(byte) + continue + encoded_component.extend('%{0:02x}'.format(byte_ord).encode().upper()) + + return encoded_component.decode(encoding) + + def parse_url(url): """ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is performed to parse incomplete urls. Fields not provided will be None. + This parser is RFC 3986 compliant. + + :param str url: URL to parse into a :class:`.Url` namedtuple. Partly backwards-compatible with :mod:`urlparse`. @@ -145,81 +190,95 @@ def parse_url(url): >>> parse_url('/foo?bar') Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - if not url: # Empty return Url() - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. No whitespace, no plus or - # minus prefixes, no non-integer digits such as ^2 (superscript). - if not port.isdigit(): - raise LocationParseError(url) - try: - port = int(port) - except ValueError: - raise LocationParseError(url) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None + is_string = not isinstance(url, six.binary_type) - elif not host and url: - host = url + # RFC 3986 doesn't like URLs that have a host but don't start + # with a scheme and we support URLs like that so we need to + # detect that problem and add an empty scheme indication. + # We don't get hurt on path-only URLs here as it's stripped + # off and given an empty scheme anyways. + if not SCHEME_REGEX.search(url): + url = "//" + url + def idna_encode(name): + if name and any([ord(x) > 128 for x in name]): + try: + import idna + except ImportError: + raise LocationParseError("Unable to parse URL without the 'idna' module") + try: + return idna.encode(name.lower(), strict=True, std3_rules=True) + except idna.IDNAError: + raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name) + return name + + try: + split_iri = misc.IRI_MATCHER.match(compat.to_str(url)).groupdict() + iri_ref = rfc3986.IRIReference( + split_iri['scheme'], split_iri['authority'], + _encode_invalid_chars(split_iri['path'], PATH_CHARS), + _encode_invalid_chars(split_iri['query'], QUERY_CHARS), + _encode_invalid_chars(split_iri['fragment'], FRAGMENT_CHARS) + ) + has_authority = iri_ref.authority is not None + uri_ref = iri_ref.encode(idna_encoder=idna_encode) + except (ValueError, RFC3986Exception): + return six.raise_from(LocationParseError(url), None) + + # rfc3986 strips the authority if it's invalid + if has_authority and uri_ref.authority is None: + raise LocationParseError(url) + + # Only normalize schemes we understand to not break http+unix + # or other schemes that don't follow RFC 3986. + if uri_ref.scheme is None or uri_ref.scheme.lower() in NORMALIZABLE_SCHEMES: + uri_ref = uri_ref.normalize() + + # Validate all URIReference components and ensure that all + # components that were set before are still set after + # normalization has completed. + validator = Validator() + try: + validator.check_validity_of( + *validator.COMPONENT_NAMES + ).validate(uri_ref) + except ValidationError: + return six.raise_from(LocationParseError(url), None) + + # For the sake of backwards compatibility we put empty + # string values for path if there are any defined values + # beyond the path in the URL. + # TODO: Remove this when we break backwards compatibility. + path = uri_ref.path if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) + if (uri_ref.query is not None + or uri_ref.fragment is not None): + path = "" + else: + path = None + + # Ensure that each part of the URL is a `str` for + # backwards compatibility. + def to_input_type(x): + if x is None: + return None + elif not is_string and not isinstance(x, six.binary_type): + return x.encode('utf-8') + return x + + return Url( + scheme=to_input_type(uri_ref.scheme), + auth=to_input_type(uri_ref.userinfo), + host=to_input_type(uri_ref.host), + port=int(uri_ref.port) if uri_ref.port is not None else None, + path=to_input_type(path), + query=to_input_type(uri_ref.query), + fragment=to_input_type(uri_ref.fragment) + ) def get_host(url): diff --git a/views/menu.tpl b/views/menu.tpl index c144a979c..5d6ffa4aa 100644 --- a/views/menu.tpl +++ b/views/menu.tpl @@ -61,6 +61,7 @@ % from database import TableEpisodes, TableMovies, System % import operator % from config import settings + % from functools import reduce %episodes_missing_subtitles_clause = [ % (TableEpisodes.missing_subtitles != '[]')