bazarr/libs/waitress/utilities.py

##############################################################################
#
# Copyright (c) 2004 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Utility functions
"""

import calendar
import errno
import logging
import os
import re
import stat
import time

from .rfc7230 import OBS_TEXT, VCHAR

logger = logging.getLogger("waitress")
queue_logger = logging.getLogger("waitress.queue")


def find_double_newline(s):
    """Returns the position just after a double newline in the given string."""
    pos = s.find(b"\r\n\r\n")

    if pos >= 0:
        pos += 4

    return pos


def concat(*args):
    return "".join(args)


def join(seq, field=" "):
    return field.join(seq)


def group(s):
    return "(" + s + ")"


short_days = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"]
long_days = [
    "sunday",
    "monday",
    "tuesday",
    "wednesday",
    "thursday",
    "friday",
    "saturday",
]

short_day_reg = group(join(short_days, "|"))
long_day_reg = group(join(long_days, "|"))

daymap = {}

for i in range(7):
    daymap[short_days[i]] = i
    daymap[long_days[i]] = i

hms_reg = join(3 * [group("[0-9][0-9]")], ":")

months = [
    "jan",
    "feb",
    "mar",
    "apr",
    "may",
    "jun",
    "jul",
    "aug",
    "sep",
    "oct",
    "nov",
    "dec",
]

monmap = {}

for i in range(12):
    monmap[months[i]] = i + 1

months_reg = group(join(months, "|"))

# From draft-ietf-http-v11-spec-07.txt/3.3.1
#       Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
#       Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
#       Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format

# rfc822 format
rfc822_date = join(
    [
        concat(short_day_reg, ","),  # day
        group("[0-9][0-9]?"),  # date
        months_reg,  # month
        group("[0-9]+"),  # year
        hms_reg,  # hour minute second
        "gmt",
    ],
    " ",
)

rfc822_reg = re.compile(rfc822_date)


def unpack_rfc822(m):
    g = m.group

    return (
        int(g(4)),  # year
        monmap[g(3)],  # month
        int(g(2)),  # day
        int(g(5)),  # hour
        int(g(6)),  # minute
        int(g(7)),  # second
        0,
        0,
        0,
    )


# rfc850 format
rfc850_date = join(
    [
        concat(long_day_reg, ","),
        join([group("[0-9][0-9]?"), months_reg, group("[0-9]+")], "-"),
        hms_reg,
        "gmt",
    ],
    " ",
)

rfc850_reg = re.compile(rfc850_date)
# they actually unpack the same way
def unpack_rfc850(m):
    g = m.group
    yr = g(4)

    if len(yr) == 2:
        yr = "19" + yr

    return (
        int(yr),  # year
        monmap[g(3)],  # month
        int(g(2)),  # day
        int(g(5)),  # hour
        int(g(6)),  # minute
        int(g(7)),  # second
        0,
        0,
        0,
    )


# parsdate.parsedate - ~700/sec.
# parse_http_date    - ~1333/sec.

weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
monthname = [
    None,
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep",
    "Oct",
    "Nov",
    "Dec",
]


def build_http_date(when):
    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(when)

    return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
        weekdayname[wd],
        day,
        monthname[month],
        year,
        hh,
        mm,
        ss,
    )


def parse_http_date(d):
    d = d.lower()
    m = rfc850_reg.match(d)

    if m and m.end() == len(d):
        retval = int(calendar.timegm(unpack_rfc850(m)))
    else:
        m = rfc822_reg.match(d)

        if m and m.end() == len(d):
            retval = int(calendar.timegm(unpack_rfc822(m)))
        else:
            return 0

    return retval


# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR         =  %x21-7E
#                  ; visible (printing) characters
vchar_re = VCHAR

# RFC 7230 Section 3.2.6 "Field Value Components":
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
# qdtext        = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
# obs-text      = %x80-FF
# quoted-pair   = "\" ( HTAB / SP / VCHAR / obs-text )
obs_text_re = OBS_TEXT

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"

quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'

quoted_string = re.compile(quoted_string_re)
quoted_pair = re.compile(quoted_pair_re)


def undquote(value):
    if value.startswith('"') and value.endswith('"'):
        # So it claims to be DQUOTE'ed, let's validate that
        matches = quoted_string.match(value)

        if matches and matches.end() == len(value):
            # Remove the DQUOTE's from the value
            value = value[1:-1]

            # Remove all backslashes that are followed by a valid vchar or
            # obs-text
            value = quoted_pair.sub(r"\1", value)

            return value
    elif not value.startswith('"') and not value.endswith('"'):
        return value

    raise ValueError("Invalid quoting in value")


def cleanup_unix_socket(path):
    try:
        st = os.stat(path)
    except OSError as exc:
        if exc.errno != errno.ENOENT:
            raise  # pragma: no cover
    else:
        if stat.S_ISSOCK(st.st_mode):
            try:
                os.remove(path)
            except OSError:  # pragma: no cover
                # avoid race condition error during tests
                pass


class Error(object):
    code = 500
    reason = "Internal Server Error"

    def __init__(self, body):
        self.body = body

    def to_response(self):
        status = "%s %s" % (self.code, self.reason)
        body = "%s\r\n\r\n%s" % (self.reason, self.body)
        tag = "\r\n\r\n(generated by waitress)"
        body = body + tag
        headers = [("Content-Type", "text/plain")]

        return status, headers, body

    def wsgi_response(self, environ, start_response):
        status, headers, body = self.to_response()
        start_response(status, headers)
        yield body


class BadRequest(Error):
    code = 400
    reason = "Bad Request"


class RequestHeaderFieldsTooLarge(BadRequest):
    code = 431
    reason = "Request Header Fields Too Large"


class RequestEntityTooLarge(BadRequest):
    code = 413
    reason = "Request Entity Too Large"


class InternalServerError(Error):
    code = 500
    reason = "Internal Server Error"


class ServerNotImplemented(Error):
    code = 501
    reason = "Not Implemented"
WIP 5 years ago			`##############################################################################`
			`#`
			`# Copyright (c) 2004 Zope Foundation and Contributors.`
			`# All Rights Reserved.`
			`#`
			`# This software is subject to the provisions of the Zope Public License,`
			`# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.`
			`# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED`
			`# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED`
			`# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS`
			`# FOR A PARTICULAR PURPOSE.`
			`#`
			`##############################################################################`
			`"""Utility functions`
			`"""`

			`import calendar`
			`import errno`
			`import logging`
			`import os`
			`import re`
			`import stat`
			`import time`

			`from .rfc7230 import OBS_TEXT, VCHAR`

			`logger = logging.getLogger("waitress")`
			`queue_logger = logging.getLogger("waitress.queue")`


			`def find_double_newline(s):`
			`"""Returns the position just after a double newline in the given string."""`
			`pos = s.find(b"\r\n\r\n")`

			`if pos >= 0:`
			`pos += 4`

			`return pos`


			`def concat(*args):`
			`return "".join(args)`


			`def join(seq, field=" "):`
			`return field.join(seq)`


			`def group(s):`
			`return "(" + s + ")"`


			`short_days = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"]`
			`long_days = [`
			`"sunday",`
			`"monday",`
			`"tuesday",`
			`"wednesday",`
			`"thursday",`
			`"friday",`
			`"saturday",`
			`]`

			`short_day_reg = group(join(short_days, "\|"))`
			`long_day_reg = group(join(long_days, "\|"))`

			`daymap = {}`

			`for i in range(7):`
			`daymap[short_days[i]] = i`
			`daymap[long_days[i]] = i`

			`hms_reg = join(3 * [group("[0-9][0-9]")], ":")`

			`months = [`
			`"jan",`
			`"feb",`
			`"mar",`
			`"apr",`
			`"may",`
			`"jun",`
			`"jul",`
			`"aug",`
			`"sep",`
			`"oct",`
			`"nov",`
			`"dec",`
			`]`

			`monmap = {}`

			`for i in range(12):`
			`monmap[months[i]] = i + 1`

			`months_reg = group(join(months, "\|"))`

			`# From draft-ietf-http-v11-spec-07.txt/3.3.1`
			`# Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123`
			`# Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036`
			`# Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format`

			`# rfc822 format`
			`rfc822_date = join(`
			`[`
			`concat(short_day_reg, ","), # day`
			`group("[0-9][0-9]?"), # date`
			`months_reg, # month`
			`group("[0-9]+"), # year`
			`hms_reg, # hour minute second`
			`"gmt",`
			`],`
			`" ",`
			`)`

			`rfc822_reg = re.compile(rfc822_date)`


			`def unpack_rfc822(m):`
			`g = m.group`

			`return (`
			`int(g(4)), # year`
			`monmap[g(3)], # month`
			`int(g(2)), # day`
			`int(g(5)), # hour`
			`int(g(6)), # minute`
			`int(g(7)), # second`
			`0,`
			`0,`
			`0,`
			`)`


			`# rfc850 format`
			`rfc850_date = join(`
			`[`
			`concat(long_day_reg, ","),`
			`join([group("[0-9][0-9]?"), months_reg, group("[0-9]+")], "-"),`
			`hms_reg,`
			`"gmt",`
			`],`
			`" ",`
			`)`

			`rfc850_reg = re.compile(rfc850_date)`
			`# they actually unpack the same way`
			`def unpack_rfc850(m):`
			`g = m.group`
			`yr = g(4)`

			`if len(yr) == 2:`
			`yr = "19" + yr`

			`return (`
			`int(yr), # year`
			`monmap[g(3)], # month`
			`int(g(2)), # day`
			`int(g(5)), # hour`
			`int(g(6)), # minute`
			`int(g(7)), # second`
			`0,`
			`0,`
			`0,`
			`)`


			`# parsdate.parsedate - ~700/sec.`
			`# parse_http_date - ~1333/sec.`

			`weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]`
			`monthname = [`
			`None,`
			`"Jan",`
			`"Feb",`
			`"Mar",`
			`"Apr",`
			`"May",`
			`"Jun",`
			`"Jul",`
			`"Aug",`
			`"Sep",`
			`"Oct",`
			`"Nov",`
			`"Dec",`
			`]`


			`def build_http_date(when):`
			`year, month, day, hh, mm, ss, wd, y, z = time.gmtime(when)`

			`return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (`
			`weekdayname[wd],`
			`day,`
			`monthname[month],`
			`year,`
			`hh,`
			`mm,`
			`ss,`
			`)`


			`def parse_http_date(d):`
			`d = d.lower()`
			`m = rfc850_reg.match(d)`

			`if m and m.end() == len(d):`
			`retval = int(calendar.timegm(unpack_rfc850(m)))`
			`else:`
			`m = rfc822_reg.match(d)`

			`if m and m.end() == len(d):`
			`retval = int(calendar.timegm(unpack_rfc822(m)))`
			`else:`
			`return 0`

			`return retval`


			`# RFC 5234 Appendix B.1 "Core Rules":`
			`# VCHAR = %x21-7E`
			`# ; visible (printing) characters`
			`vchar_re = VCHAR`

			`# RFC 7230 Section 3.2.6 "Field Value Components":`
			`# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE`
			`# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text`
			`# obs-text = %x80-FF`
			`# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )`
			`obs_text_re = OBS_TEXT`

			`# The '\\' between \x5b and \x5d is needed to escape \x5d (']')`
			`qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"`

			`quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"`
			`quoted_string_re = '"(?:(?:' + qdtext_re + ")\|(?:" + quoted_pair_re + '))*"'`

			`quoted_string = re.compile(quoted_string_re)`
			`quoted_pair = re.compile(quoted_pair_re)`


			`def undquote(value):`
			`if value.startswith('"') and value.endswith('"'):`
			`# So it claims to be DQUOTE'ed, let's validate that`
			`matches = quoted_string.match(value)`

			`if matches and matches.end() == len(value):`
			`# Remove the DQUOTE's from the value`
			`value = value[1:-1]`

			`# Remove all backslashes that are followed by a valid vchar or`
			`# obs-text`
			`value = quoted_pair.sub(r"\1", value)`

			`return value`
			`elif not value.startswith('"') and not value.endswith('"'):`
			`return value`

			`raise ValueError("Invalid quoting in value")`


			`def cleanup_unix_socket(path):`
			`try:`
			`st = os.stat(path)`
			`except OSError as exc:`
			`if exc.errno != errno.ENOENT:`
			`raise # pragma: no cover`
			`else:`
			`if stat.S_ISSOCK(st.st_mode):`
			`try:`
			`os.remove(path)`
			`except OSError: # pragma: no cover`
			`# avoid race condition error during tests`
			`pass`


			`class Error(object):`
			`code = 500`
			`reason = "Internal Server Error"`

			`def __init__(self, body):`
			`self.body = body`

			`def to_response(self):`
			`status = "%s %s" % (self.code, self.reason)`
			`body = "%s\r\n\r\n%s" % (self.reason, self.body)`
			`tag = "\r\n\r\n(generated by waitress)"`
			`body = body + tag`
			`headers = [("Content-Type", "text/plain")]`

			`return status, headers, body`

			`def wsgi_response(self, environ, start_response):`
			`status, headers, body = self.to_response()`
			`start_response(status, headers)`
			`yield body`


			`class BadRequest(Error):`
			`code = 400`
			`reason = "Bad Request"`


			`class RequestHeaderFieldsTooLarge(BadRequest):`
			`code = 431`
			`reason = "Request Header Fields Too Large"`


			`class RequestEntityTooLarge(BadRequest):`
			`code = 413`
			`reason = "Request Entity Too Large"`


			`class InternalServerError(Error):`
			`code = 500`
			`reason = "Internal Server Error"`


			`class ServerNotImplemented(Error):`
			`code = 501`
			`reason = "Not Implemented"`