You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
4.0 KiB
125 lines
4.0 KiB
5 years ago
|
# -*- coding: utf-8 -*-
|
||
|
# Copyright (c) 2014 Rackspace
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||
|
# implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
"""
|
||
|
Module containing compiled regular expressions and constants.
|
||
|
|
||
|
This module contains important constants, patterns, and compiled regular
|
||
|
expressions for parsing and validating URIs and their components.
|
||
|
"""
|
||
|
|
||
|
import re
|
||
|
|
||
|
from . import abnf_regexp
|
||
|
|
||
|
# These are enumerated for the named tuple used as a superclass of
|
||
|
# URIReference
|
||
|
URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment']
|
||
|
|
||
|
important_characters = {
|
||
|
'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS,
|
||
|
'sub_delimiters': abnf_regexp.SUB_DELIMITERS,
|
||
|
# We need to escape the '*' in this case
|
||
|
're_sub_delimiters': abnf_regexp.SUB_DELIMITERS_RE,
|
||
|
'unreserved_chars': abnf_regexp.UNRESERVED_CHARS,
|
||
|
# We need to escape the '-' in this case:
|
||
|
're_unreserved': abnf_regexp.UNRESERVED_RE,
|
||
|
}
|
||
|
|
||
|
# For details about delimiters and reserved characters, see:
|
||
|
# http://tools.ietf.org/html/rfc3986#section-2.2
|
||
|
GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
|
||
|
SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
|
||
|
RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
|
||
|
# For details about unreserved characters, see:
|
||
|
# http://tools.ietf.org/html/rfc3986#section-2.3
|
||
|
UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
|
||
|
NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
|
||
|
|
||
|
URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
|
||
|
|
||
|
SUBAUTHORITY_MATCHER = re.compile((
|
||
|
'^(?:(?P<userinfo>{0})@)?' # userinfo
|
||
|
'(?P<host>{1})' # host
|
||
|
':?(?P<port>{2})?$' # port
|
||
|
).format(abnf_regexp.USERINFO_RE,
|
||
|
abnf_regexp.HOST_PATTERN,
|
||
|
abnf_regexp.PORT_RE))
|
||
|
|
||
|
|
||
|
HOST_MATCHER = re.compile('^' + abnf_regexp.HOST_RE + '$')
|
||
|
IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$')
|
||
|
IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$')
|
||
|
|
||
|
# Used by host validator
|
||
|
IPv6_NO_RFC4007_MATCHER = re.compile(r'^\[%s\]$' % (
|
||
|
abnf_regexp.IPv6_ADDRZ_RE
|
||
|
))
|
||
|
|
||
|
# Matcher used to validate path components
|
||
|
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
|
||
|
|
||
|
|
||
|
# ##################################
|
||
|
# Query and Fragment Matcher Section
|
||
|
# ##################################
|
||
|
|
||
|
QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
|
||
|
|
||
|
FRAGMENT_MATCHER = QUERY_MATCHER
|
||
|
|
||
|
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
|
||
|
SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE))
|
||
|
|
||
|
RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % (
|
||
|
abnf_regexp.RELATIVE_PART_RE,
|
||
|
abnf_regexp.QUERY_RE,
|
||
|
abnf_regexp.FRAGMENT_RE,
|
||
|
))
|
||
|
|
||
|
# See http://tools.ietf.org/html/rfc3986#section-4.3
|
||
|
ABSOLUTE_URI_MATCHER = re.compile(r'^%s:%s(\?%s)?$' % (
|
||
|
abnf_regexp.COMPONENT_PATTERN_DICT['scheme'],
|
||
|
abnf_regexp.HIER_PART_RE,
|
||
|
abnf_regexp.QUERY_RE[1:-1],
|
||
|
))
|
||
|
|
||
|
# ###############
|
||
|
# IRIs / RFC 3987
|
||
|
# ###############
|
||
|
|
||
|
IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
|
||
|
|
||
|
ISUBAUTHORITY_MATCHER = re.compile((
|
||
|
u'^(?:(?P<userinfo>{0})@)?' # iuserinfo
|
||
|
u'(?P<host>{1})' # ihost
|
||
|
u':?(?P<port>{2})?$' # port
|
||
|
).format(abnf_regexp.IUSERINFO_RE,
|
||
|
abnf_regexp.IHOST_RE,
|
||
|
abnf_regexp.PORT_RE), re.UNICODE)
|
||
|
|
||
|
|
||
|
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
|
||
|
def merge_paths(base_uri, relative_path):
|
||
|
"""Merge a base URI's path with a relative URI's path."""
|
||
|
if base_uri.path is None and base_uri.authority is not None:
|
||
|
return '/' + relative_path
|
||
|
else:
|
||
|
path = base_uri.path or ''
|
||
|
index = path.rfind('/')
|
||
|
return path[:index] + '/' + relative_path
|
||
|
|
||
|
|
||
|
UseExisting = object()
|