# Copyright (C) 2002-2007 Python Software Foundation
# Author: Ben Gertzfield
# Contact: email-sig@python.org
""" Base64 content transfer encoding per RFCs 2045-2047.
This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8 - bit data using the three 8 - bit bytes in four 7 - bit
characters encoding known as Base64 .
It is used in the MIME standards for email to attach images , audio , and text
using some 8 - bit character sets to messages .
This module provides an interface to encode and decode both headers and bodies
with Base64 encoding .
RFC 2045 defines a method for including character set information in an
` encoded - word ' in a header. This method is commonly used for 8-bit real names
in To : , From : , Cc : , etc . fields , as well as Subject : lines .
This module does not do the line wrapping or end - of - line character conversion
necessary for proper internationalized headers ; it only does dumb encoding and
decoding . To deal with the various line wrapping issues , use the email . header
module .
"""
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from future . builtins import range
from future . builtins import bytes
from future . builtins import str
__all__ = [
' body_decode ' ,
' body_encode ' ,
' decode ' ,
' decodestring ' ,
' header_encode ' ,
' header_length ' ,
]
from base64 import b64encode
from binascii import b2a_base64 , a2b_base64
CRLF = ' \r \n '
NL = ' \n '
EMPTYSTRING = ' '
# See also Charset.py
MISC_LEN = 7
# Helpers
def header_length ( bytearray ) :
""" Return the length of s when it is encoded with base64. """
groups_of_3 , leftover = divmod ( len ( bytearray ) , 3 )
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
n = groups_of_3 * 4
if leftover :
n + = 4
return n
def header_encode ( header_bytes , charset = ' iso-8859-1 ' ) :
""" Encode a single header line with Base64 encoding in a given charset.
charset names the character set to use to encode the header . It defaults
to iso - 8859 - 1. Base64 encoding is defined in RFC 2045.
"""
if not header_bytes :
return " "
if isinstance ( header_bytes , str ) :
header_bytes = header_bytes . encode ( charset )
encoded = b64encode ( header_bytes ) . decode ( " ascii " )
return ' =? %s ?b? %s ?= ' % ( charset , encoded )
def body_encode ( s , maxlinelen = 76 , eol = NL ) :
r """ Encode a string with base64.
Each line will be wrapped at , at most , maxlinelen characters ( defaults to
76 characters ) .
Each line of encoded text will end with eol , which defaults to " \n " . Set
this to " \r \n " if you will be using the result of this function directly
in an email .
"""
if not s :
return s
encvec = [ ]
max_unencoded = maxlinelen * 3 / / 4
for i in range ( 0 , len ( s ) , max_unencoded ) :
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
# adding a newline to the encoded string?
enc = b2a_base64 ( s [ i : i + max_unencoded ] ) . decode ( " ascii " )
if enc . endswith ( NL ) and eol != NL :
enc = enc [ : - 1 ] + eol
encvec . append ( enc )
return EMPTYSTRING . join ( encvec )
def decode ( string ) :
""" Decode a raw base64 string, returning a bytes object.
This function does not parse a full MIME header value encoded with
base64 ( like = ? iso - 8895 - 1 ? b ? bmloISBuaWgh ? = ) - - please use the high
level email . header class for that functionality .
"""
if not string :
return bytes ( )
elif isinstance ( string , str ) :
return a2b_base64 ( string . encode ( ' raw-unicode-escape ' ) )
else :
return a2b_base64 ( string )
# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode