Added support for 7z by upgrading rarfile dependency

- Bump rarfile.py from 4.0 to 4.1
- Add support for 7zip for archive extractions, since unar is a steaming pile
  of C# and unrar is non-free, having some easy-to-package software to
  handle archive makes it easier to run Bazarr on Linux distributions.
pull/2315/head v1.4.1-beta.1
Julien Voisin 5 months ago committed by GitHub
parent c97a98d4f2
commit 4b7cdbc5f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -194,16 +194,28 @@ def init_binaries():
exe = get_binary("unar")
rarfile.UNAR_TOOL = exe
rarfile.UNRAR_TOOL = None
rarfile.tool_setup(unrar=False, unar=True, bsdtar=False, force=True)
rarfile.SEVENZIP_TOOL = None
rarfile.tool_setup(unrar=False, unar=True, bsdtar=False, sevenzip=False, force=True)
except (BinaryNotFound, rarfile.RarCannotExec):
try:
exe = get_binary("unrar")
rarfile.UNRAR_TOOL = exe
rarfile.UNAR_TOOL = None
rarfile.tool_setup(unrar=True, unar=False, bsdtar=False, force=True)
rarfile.SEVENZIP_TOOL = None
rarfile.tool_setup(unrar=True, unar=False, bsdtar=False, sevenzip=False, force=True)
except (BinaryNotFound, rarfile.RarCannotExec):
logging.exception("BAZARR requires a rar archive extraction utilities (unrar, unar) and it can't be found.")
raise BinaryNotFound
try:
exe = get_binary("7z")
rarfile.UNRAR_TOOL = None
rarfile.UNAR_TOOL = None
rarfile.SEVENZIP_TOOL = "7z"
rarfile.tool_setup(unrar=False, unar=False, bsdtar=False, sevenzip=True, force=True)
except (BinaryNotFound, rarfile.RarCannotExec):
logging.exception("BAZARR requires a rar archive extraction utilities (unrar, unar, 7zip) and it can't be found.")
raise BinaryNotFound
else:
logging.debug("Using 7zip from: %s", exe)
return exe
else:
logging.debug("Using UnRAR from: %s", exe)
return exe

@ -59,12 +59,14 @@ import sys
import warnings
from binascii import crc32, hexlify
from datetime import datetime, timezone
from hashlib import blake2s, pbkdf2_hmac, sha1
from hashlib import blake2s, pbkdf2_hmac, sha1, sha256
from pathlib import Path
from struct import Struct, pack, unpack
from subprocess import DEVNULL, PIPE, STDOUT, Popen
from tempfile import mkstemp
AES = None
# only needed for encrypted headers
try:
try:
@ -90,10 +92,10 @@ class AES_CBC_Decrypt:
self.decrypt = ciph.decryptor().update
__version__ = "4.0"
__version__ = "4.1"
# export only interesting items
__all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
__all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
##
## Module configuration. Can be tuned after importing.
@ -108,6 +110,12 @@ UNAR_TOOL = "unar"
#: executable for bsdtar tool
BSDTAR_TOOL = "bsdtar"
#: executable for p7zip/7z tool
SEVENZIP_TOOL = "7z"
#: executable for alternative 7z tool
SEVENZIP2_TOOL = "7zz"
#: default fallback charset
DEFAULT_CHARSET = "windows-1252"
@ -280,6 +288,9 @@ DOS_MODE_SYSTEM = 0x04
DOS_MODE_HIDDEN = 0x02
DOS_MODE_READONLY = 0x01
RAR5_PW_CHECK_SIZE = 8
RAR5_PW_SUM_SIZE = 4
##
## internal constants
##
@ -298,17 +309,7 @@ _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
def _get_rar_version(xfile):
"""Check quickly whether file is rar archive.
"""
with XFile(xfile) as fd:
buf = fd.read(len(RAR5_ID))
if buf.startswith(RAR_ID):
return RAR_V3
elif buf.startswith(RAR5_ID):
return RAR_V5
return 0
FORCE_TOOL = False
def _find_sfx_header(xfile):
@ -340,10 +341,27 @@ def _find_sfx_header(xfile):
## Public interface
##
def get_rar_version(xfile):
"""Check quickly whether file is rar archive.
"""
with XFile(xfile) as fd:
buf = fd.read(len(RAR5_ID))
if buf.startswith(RAR_ID):
return RAR_V3
elif buf.startswith(RAR5_ID):
return RAR_V5
return 0
def is_rarfile(xfile):
"""Check quickly whether file is rar archive.
"""
return _get_rar_version(xfile) > 0
try:
return get_rar_version(xfile) > 0
except OSError:
# File not found or not accessible, ignore
return False
def is_rarfile_sfx(xfile):
@ -634,6 +652,25 @@ class RarInfo:
class RarFile:
"""Parse RAR structure, provide access to files in archive.
Parameters:
file
archive file name or file-like object.
mode
only "r" is supported.
charset
fallback charset to use, if filenames are not already Unicode-enabled.
info_callback
debug callback, gets to see all archive entries.
crc_check
set to False to disable CRC checks
errors
Either "stop" to quietly stop parsing on errors,
or "strict" to raise errors. Default is "stop".
part_only
If True, read only single file and allow it to be middle-part
of multi-volume archive.
"""
#: File name, if available. Unicode string or None.
@ -643,25 +680,7 @@ class RarFile:
comment = None
def __init__(self, file, mode="r", charset=None, info_callback=None,
crc_check=True, errors="stop"):
"""Open and parse a RAR archive.
Parameters:
file
archive file name or file-like object.
mode
only "r" is supported.
charset
fallback charset to use, if filenames are not already Unicode-enabled.
info_callback
debug callback, gets to see all archive entries.
crc_check
set to False to disable CRC checks
errors
Either "stop" to quietly stop parsing on errors,
or "strict" to raise errors. Default is "stop".
"""
crc_check=True, errors="stop", part_only=False):
if is_filelike(file):
self.filename = getattr(file, "name", None)
else:
@ -673,6 +692,7 @@ class RarFile:
self._charset = charset or DEFAULT_CHARSET
self._info_callback = info_callback
self._crc_check = crc_check
self._part_only = part_only
self._password = None
self._file_parser = None
@ -740,6 +760,14 @@ class RarFile:
"""
return self._file_parser.getinfo(name)
def getinfo_orig(self, name):
"""Return RarInfo for file source.
RAR5: if name is hard-linked or copied file,
returns original entry with original filename.
"""
return self._file_parser.getinfo_orig(name)
def open(self, name, mode="r", pwd=None):
"""Returns file-like object (:class:`RarExtFile`) from where the data can be read.
@ -880,12 +908,12 @@ class RarFile:
if ver == RAR_V3:
p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
self._charset, self._strict, self._info_callback,
sfx_ofs)
sfx_ofs, self._part_only)
self._file_parser = p3 # noqa
elif ver == RAR_V5:
p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
self._charset, self._strict, self._info_callback,
sfx_ofs)
sfx_ofs, self._part_only)
self._file_parser = p5 # noqa
else:
raise NotRarFile("Not a RAR file")
@ -984,7 +1012,8 @@ class CommonParser:
_password = None
comment = None
def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset):
def __init__(self, rarfile, password, crc_check, charset, strict,
info_cb, sfx_offset, part_only):
self._rarfile = rarfile
self._password = password
self._crc_check = crc_check
@ -995,6 +1024,7 @@ class CommonParser:
self._info_map = {}
self._vol_list = []
self._sfx_offset = sfx_offset
self._part_only = part_only
def has_header_encryption(self):
"""Returns True if headers are encrypted
@ -1043,7 +1073,16 @@ class CommonParser:
try:
return self._info_map[fname]
except KeyError:
raise NoRarEntry("No such file: %s" % fname)
raise NoRarEntry("No such file: %s" % fname) from None
def getinfo_orig(self, member):
inf = self.getinfo(member)
if inf.file_redir:
redir_type, redir_flags, redir_name = inf.file_redir
# cannot leave to unrar as it expects copied file to exist
if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
inf = self.getinfo(redir_name)
return inf
def parse(self):
"""Process file."""
@ -1080,7 +1119,7 @@ class CommonParser:
if raise_need_first_vol:
# did not find ENDARC with VOLNR
raise NeedFirstVolume("Need to start from first volume", None)
if more_vols:
if more_vols and not self._part_only:
volume += 1
fd.close()
try:
@ -1098,6 +1137,7 @@ class CommonParser:
endarc = False
self._vol_list.append(volfile)
self._main = None
self._hdrenc_main = None
continue
break
h.volume = volume
@ -1105,7 +1145,7 @@ class CommonParser:
if h.type == RAR_BLOCK_MAIN and not self._main:
self._main = h
if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING):
if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
# RAR 2.x does not set FIRSTVOLUME,
# so check it only if NEWNUMBERING is used
if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
@ -1137,7 +1177,8 @@ class CommonParser:
more_vols = True
# RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
raise_need_first_vol = True
if not self._part_only:
raise_need_first_vol = True
if h.needs_password():
self._needs_password = True
@ -1241,6 +1282,8 @@ class CommonParser:
return self._open_unrar(self._rarfile, inf, pwd)
def _open_clear(self, inf):
if FORCE_TOOL:
return self._open_unrar(self._rarfile, inf)
return DirectReader(self, inf)
def _open_hack_core(self, inf, pwd, prefix, suffix):
@ -1288,7 +1331,7 @@ class CommonParser:
# not giving filename avoids encoding related problems
fn = None
if not tmpfile or force_file:
fn = inf.filename
fn = inf.filename.replace("/", os.path.sep)
# read from unrar pipe
cmd = setup.open_cmdline(pwd, rarfile, fn)
@ -1379,6 +1422,9 @@ class RAR3Parser(CommonParser):
buf = fd.read(S_BLK_HDR.size)
if not buf:
return None
if len(buf) < S_BLK_HDR.size:
self._set_error("Unexpected EOF when reading header")
return None
t = S_BLK_HDR.unpack_from(buf)
h.header_crc, h.type, h.flags, h.header_size = t
@ -1751,14 +1797,18 @@ class RAR5Parser(CommonParser):
# AES encrypted headers
_last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
def _get_utf8_password(self):
pwd = self._password
if isinstance(pwd, str):
return pwd.encode("utf8")
return pwd
def _gen_key(self, kdf_count, salt):
if self._last_aes256_key[:2] == (kdf_count, salt):
return self._last_aes256_key[2]
if kdf_count > 24:
raise BadRarFile("Too large kdf_count")
pwd = self._password
if isinstance(pwd, str):
pwd = pwd.encode("utf8")
pwd = self._get_utf8_password()
key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
self._last_aes256_key = (kdf_count, salt, key)
return key
@ -1776,8 +1826,17 @@ class RAR5Parser(CommonParser):
"""
header_offset = fd.tell()
preload = 4 + 3
preload = 4 + 1
start_bytes = fd.read(preload)
if len(start_bytes) < preload:
self._set_error("Unexpected EOF when reading header")
return None
while start_bytes[-1] & 0x80:
b = fd.read(1)
if not b:
self._set_error("Unexpected EOF when reading header")
return None
start_bytes += b
header_crc, pos = load_le32(start_bytes, 0)
hdrlen, pos = load_vint(start_bytes, pos)
if hdrlen > 2 * 1024 * 1024:
@ -1912,15 +1971,39 @@ class RAR5Parser(CommonParser):
h.flags |= RAR_ENDARC_NEXT_VOLUME
return h
def _check_password(self, check_value, kdf_count_shift, salt):
if len(check_value) != RAR5_PW_CHECK_SIZE + RAR5_PW_SUM_SIZE:
return
hdr_check = check_value[:RAR5_PW_CHECK_SIZE]
hdr_sum = check_value[RAR5_PW_CHECK_SIZE:]
sum_hash = sha256(hdr_check).digest()
if sum_hash[:RAR5_PW_SUM_SIZE] != hdr_sum:
return
kdf_count = (1 << kdf_count_shift) + 32
pwd = self._get_utf8_password()
pwd_hash = pbkdf2_hmac("sha256", pwd, salt, kdf_count)
pwd_check = bytearray(RAR5_PW_CHECK_SIZE)
len_mask = RAR5_PW_CHECK_SIZE - 1
for i, v in enumerate(pwd_hash):
pwd_check[i & len_mask] ^= v
if pwd_check != hdr_check:
raise RarWrongPassword()
def _parse_encryption_block(self, h, hdata, pos):
h.encryption_algo, pos = load_vint(hdata, pos)
h.encryption_flags, pos = load_vint(hdata, pos)
h.encryption_kdf_count, pos = load_byte(hdata, pos)
h.encryption_salt, pos = load_bytes(hdata, 16, pos)
if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
h.encryption_check_value = load_bytes(hdata, 12, pos)
h.encryption_check_value, pos = load_bytes(hdata, 12, pos)
if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
raise BadRarFile("Unsupported header encryption cipher")
if h.encryption_check_value and self._password:
self._check_password(h.encryption_check_value, h.encryption_kdf_count, h.encryption_salt)
self._hdrenc_main = h
return h
@ -2157,6 +2240,7 @@ class RarExtFile(io.RawIOBase):
_remain = 0
_returncode = 0
_md_context = None
_seeking = False
def _open_extfile(self, parser, inf):
self.name = inf.filename
@ -2165,7 +2249,10 @@ class RarExtFile(io.RawIOBase):
if self._fd:
self._fd.close()
md_class = self._inf._md_class or NoHashContext
if self._seeking:
md_class = NoHashContext
else:
md_class = self._inf._md_class or NoHashContext
self._md_context = md_class()
self._fd = None
self._remain = self._inf.file_size
@ -2256,7 +2343,9 @@ class RarExtFile(io.RawIOBase):
"""
# disable crc check when seeking
self._md_context = NoHashContext()
if not self._seeking:
self._md_context = NoHashContext()
self._seeking = True
fsize = self._inf.file_size
cur_ofs = self.tell()
@ -2437,6 +2526,11 @@ class DirectReader(RarExtFile):
def _read(self, cnt):
"""Read from potentially multi-volume archive."""
pos = self._fd.tell()
need = self._cur.data_offset + self._cur.add_size - self._cur_avail
if pos != need:
self._fd.seek(need, 0)
buf = []
while cnt > 0:
# next vol needed?
@ -2790,7 +2884,7 @@ def load_le32(buf, pos):
end = pos + 4
if end > len(buf):
raise BadRarFile("cannot load le32")
return S_LONG.unpack_from(buf, pos)[0], pos + 4
return S_LONG.unpack_from(buf, pos)[0], end
def load_bytes(buf, num, pos):
@ -2833,36 +2927,61 @@ def load_windowstime(buf, pos):
return dt, pos
#
# volume numbering
#
_rc_num = re.compile('^[0-9]+$')
def _next_newvol(volfile):
"""New-style next volume
"""
name, ext = os.path.splitext(volfile)
if ext.lower() in ("", ".exe", ".sfx"):
volfile = name + ".rar"
i = len(volfile) - 1
while i >= 0:
if volfile[i] >= "0" and volfile[i] <= "9":
return _inc_volname(volfile, i)
if "0" <= volfile[i] <= "9":
return _inc_volname(volfile, i, False)
if volfile[i] in ("/", os.sep):
break
i -= 1
raise BadRarName("Cannot construct volume name: " + volfile)
def _next_oldvol(volfile):
"""Old-style next volume
"""
# rar -> r00
if volfile[-4:].lower() == ".rar":
return volfile[:-2] + "00"
return _inc_volname(volfile, len(volfile) - 1)
name, ext = os.path.splitext(volfile)
if ext.lower() in ("", ".exe", ".sfx"):
ext = ".rar"
sfx = ext[2:]
if _rc_num.match(sfx):
ext = _inc_volname(ext, len(ext) - 1, True)
else:
# .rar -> .r00
ext = ext[:2] + "00"
return name + ext
def _inc_volname(volfile, i):
def _inc_volname(volfile, i, inc_chars):
"""increase digits with carry, otherwise just increment char
"""
fn = list(volfile)
while i >= 0:
if fn[i] != "9":
if fn[i] == "9":
fn[i] = "0"
i -= 1
if i < 0:
fn.insert(0, "1")
elif "0" <= fn[i] < "9" or inc_chars:
fn[i] = chr(ord(fn[i]) + 1)
break
fn[i] = "0"
i -= 1
else:
fn.insert(i + 1, "1")
break
return "".join(fn)
@ -2957,7 +3076,7 @@ def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=N
# file header
fname = b"data"
date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
mode = 0x20
mode = DOS_MODE_ARCHIVE
fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
date, vers, meth, len(fname), mode)
fhdr += fname
@ -3038,17 +3157,12 @@ def to_datetime(t):
pass
# sanitize invalid values
mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
mon = max(1, min(mon, 12))
day = max(1, min(day, mday[mon]))
h = min(h, 23)
m = min(m, 59)
s = min(s, 59)
if mon == 2 and day == 29:
try:
return datetime(year, mon, day, h, m, s)
except ValueError:
day = 28
return datetime(year, mon, day, h, m, s)
@ -3075,7 +3189,7 @@ class nsdatetime(datetime):
__slots__ = ("nanosecond",)
nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
def __new__(cls, year, month, day, hour=0, minute=0, second=0,
def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
if mod == 0:
@ -3165,9 +3279,9 @@ def custom_popen(cmd):
creationflags=creationflags)
except OSError as ex:
if ex.errno == errno.ENOENT:
raise RarCannotExec("Unrar not installed?")
raise RarCannotExec("Unrar not installed?") from None
if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
raise RarCannotExec("Cannot execute unrar")
raise RarCannotExec("Cannot execute unrar") from None
raise
return p
@ -3244,6 +3358,8 @@ class ToolSetup:
def get_cmdline(self, key, pwd, nodash=False):
cmdline = list(self.setup[key])
cmdline[0] = globals()[cmdline[0]]
if key == "check_cmd":
return cmdline
self.add_password_arg(cmdline, pwd)
if not nodash:
cmdline.append("--")
@ -3259,7 +3375,10 @@ class ToolSetup:
if not isinstance(pwd, str):
pwd = pwd.decode("utf8")
args = self.setup["password"]
if isinstance(args, str):
if args is None:
tool = self.setup["open_cmd"][0]
raise RarCannotExec(f"{tool} does not support passwords")
elif isinstance(args, str):
cmdline.append(args + pwd)
else:
cmdline.extend(args)
@ -3303,10 +3422,30 @@ BSDTAR_CONFIG = {
"errmap": [None],
}
SEVENZIP_CONFIG = {
"open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"),
"check_cmd": ("SEVENZIP_TOOL", "i"),
"password": "-p",
"no_password": ("-p",),
"errmap": [None,
RarWarning, RarFatalError, None, None, # 1..4
None, None, RarUserError, RarMemoryError] # 5..8
}
SEVENZIP2_CONFIG = {
"open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"),
"check_cmd": ("SEVENZIP2_TOOL", "i"),
"password": "-p",
"no_password": ("-p",),
"errmap": [None,
RarWarning, RarFatalError, None, None, # 1..4
None, None, RarUserError, RarMemoryError] # 5..8
}
CURRENT_SETUP = None
def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False):
"""Pick a tool, return cached ToolSetup.
"""
global CURRENT_SETUP
@ -3319,6 +3458,10 @@ def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
lst.append(UNRAR_CONFIG)
if unar:
lst.append(UNAR_CONFIG)
if sevenzip:
lst.append(SEVENZIP_CONFIG)
if sevenzip2:
lst.append(SEVENZIP2_CONFIG)
if bsdtar:
lst.append(BSDTAR_CONFIG)

@ -32,7 +32,7 @@ python-engineio==4.3.4
python-socketio==5.7.2
pytz==2023.3
pytz_deprecation_shim==0.1.0.post0
rarfile==4.0
rarfile==4.1
requests==2.28.1
semver==2.13.0
signalrcore==0.9.5

Loading…
Cancel
Save