parent
99a98a564a
commit
8a981553bf
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Pure python p7zr implementation
|
||||
# Copyright (C) 2019 Hiroshi Miura
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
from py7zr.exceptions import Bad7zFile, DecompressionError, UnsupportedCompressionMethodError
|
||||
from py7zr.py7zr import ArchiveInfo, FileInfo, SevenZipFile, is_7zfile, pack_7zarchive, unpack_7zarchive
|
||||
|
||||
__copyright__ = 'Copyright (C) 2019 Hiroshi Miura'
|
||||
__version__ = "0.7.0"
|
||||
|
||||
__all__ = ['__version__', 'ArchiveInfo', 'FileInfo', 'SevenZipFile', 'is_7zfile',
|
||||
'UnsupportedCompressionMethodError', 'Bad7zFile', 'DecompressionError',
|
||||
'pack_7zarchive', 'unpack_7zarchive']
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/python -u
|
||||
#
|
||||
# p7zr library
|
||||
#
|
||||
# Copyright (c) 2020 Hiroshi Miura <miurahr@linux.com>
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Callback(ABC):
|
||||
"""Abstrat base class for progress callbacks."""
|
||||
|
||||
@abstractmethod
|
||||
def report_start_preparation(self):
|
||||
"""report a start of preparation event such as making list of files and looking into its properties."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def report_start(self, processing_file_path, processing_bytes):
|
||||
"""report a start event of specified archive file and its input bytes."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def report_end(self, processing_file_path, wrote_bytes):
|
||||
"""report an end event of specified archive file and its output bytes."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def report_warning(self, message):
|
||||
"""report an warning event with its message"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def report_postprocess(self):
|
||||
"""report a start of post processing event such as set file properties and permissions or creating symlinks."""
|
||||
pass
|
||||
|
||||
|
||||
class ExtractCallback(Callback):
|
||||
"""Abstrat base class for extraction progress callbacks."""
|
||||
pass
|
||||
|
||||
|
||||
class ArchiveCallback(Callback):
|
||||
"""Abstrat base class for progress callbacks."""
|
||||
pass
|
@ -0,0 +1,384 @@
|
||||
#!/usr/bin/python -u
|
||||
#
|
||||
# p7zr library
|
||||
#
|
||||
# Copyright (c) 2019 Hiroshi Miura <miurahr@linux.com>
|
||||
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
|
||||
# 7-Zip Copyright (C) 1999-2010 Igor Pavlov
|
||||
# LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
import bz2
|
||||
import io
|
||||
import lzma
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
from typing import IO, Any, BinaryIO, Dict, List, Optional, Union
|
||||
|
||||
from py7zr import UnsupportedCompressionMethodError
|
||||
from py7zr.extra import CopyDecompressor, DeflateDecompressor, ISevenZipDecompressor, ZstdDecompressor
|
||||
from py7zr.helpers import MemIO, NullIO, calculate_crc32, readlink
|
||||
from py7zr.properties import READ_BLOCKSIZE, ArchivePassword, CompressionMethod
|
||||
|
||||
if sys.version_info < (3, 6):
|
||||
import pathlib2 as pathlib
|
||||
else:
|
||||
import pathlib
|
||||
try:
|
||||
import zstandard as Zstd # type: ignore
|
||||
except ImportError:
|
||||
Zstd = None
|
||||
|
||||
|
||||
class Worker:
|
||||
"""Extract worker class to invoke handler"""
|
||||
|
||||
def __init__(self, files, src_start: int, header) -> None:
|
||||
self.target_filepath = {} # type: Dict[int, Union[MemIO, pathlib.Path, None]]
|
||||
self.files = files
|
||||
self.src_start = src_start
|
||||
self.header = header
|
||||
|
||||
def extract(self, fp: BinaryIO, parallel: bool, q=None) -> None:
|
||||
"""Extract worker method to handle 7zip folder and decompress each files."""
|
||||
if hasattr(self.header, 'main_streams') and self.header.main_streams is not None:
|
||||
src_end = self.src_start + self.header.main_streams.packinfo.packpositions[-1]
|
||||
numfolders = self.header.main_streams.unpackinfo.numfolders
|
||||
if numfolders == 1:
|
||||
self.extract_single(fp, self.files, self.src_start, src_end, q)
|
||||
else:
|
||||
folders = self.header.main_streams.unpackinfo.folders
|
||||
positions = self.header.main_streams.packinfo.packpositions
|
||||
empty_files = [f for f in self.files if f.emptystream]
|
||||
if not parallel:
|
||||
self.extract_single(fp, empty_files, 0, 0, q)
|
||||
for i in range(numfolders):
|
||||
self.extract_single(fp, folders[i].files, self.src_start + positions[i],
|
||||
self.src_start + positions[i + 1], q)
|
||||
else:
|
||||
filename = getattr(fp, 'name', None)
|
||||
self.extract_single(open(filename, 'rb'), empty_files, 0, 0, q)
|
||||
extract_threads = []
|
||||
for i in range(numfolders):
|
||||
p = threading.Thread(target=self.extract_single,
|
||||
args=(filename, folders[i].files,
|
||||
self.src_start + positions[i], self.src_start + positions[i + 1], q))
|
||||
p.start()
|
||||
extract_threads.append((p))
|
||||
for p in extract_threads:
|
||||
p.join()
|
||||
else:
|
||||
empty_files = [f for f in self.files if f.emptystream]
|
||||
self.extract_single(fp, empty_files, 0, 0, q)
|
||||
|
||||
def extract_single(self, fp: Union[BinaryIO, str], files, src_start: int, src_end: int,
|
||||
q: Optional[queue.Queue]) -> None:
|
||||
"""Single thread extractor that takes file lists in single 7zip folder."""
|
||||
if files is None:
|
||||
return
|
||||
if isinstance(fp, str):
|
||||
fp = open(fp, 'rb')
|
||||
fp.seek(src_start)
|
||||
for f in files:
|
||||
if q is not None:
|
||||
q.put(('s', str(f.filename), str(f.compressed) if f.compressed is not None else '0'))
|
||||
fileish = self.target_filepath.get(f.id, None)
|
||||
if fileish is not None:
|
||||
fileish.parent.mkdir(parents=True, exist_ok=True)
|
||||
with fileish.open(mode='wb') as ofp:
|
||||
if not f.emptystream:
|
||||
# extract to file
|
||||
self.decompress(fp, f.folder, ofp, f.uncompressed[-1], f.compressed, src_end)
|
||||
ofp.seek(0)
|
||||
else:
|
||||
pass # just create empty file
|
||||
elif not f.emptystream:
|
||||
# read and bin off a data but check crc
|
||||
with NullIO() as ofp:
|
||||
self.decompress(fp, f.folder, ofp, f.uncompressed[-1], f.compressed, src_end)
|
||||
if q is not None:
|
||||
q.put(('e', str(f.filename), str(f.uncompressed[-1])))
|
||||
|
||||
def decompress(self, fp: BinaryIO, folder, fq: IO[Any],
|
||||
size: int, compressed_size: Optional[int], src_end: int) -> None:
|
||||
"""decompressor wrapper called from extract method.
|
||||
|
||||
:parameter fp: archive source file pointer
|
||||
:parameter folder: Folder object that have decompressor object.
|
||||
:parameter fq: output file pathlib.Path
|
||||
:parameter size: uncompressed size of target file.
|
||||
:parameter compressed_size: compressed size of target file.
|
||||
:parameter src_end: end position of the folder
|
||||
:returns None
|
||||
"""
|
||||
assert folder is not None
|
||||
out_remaining = size
|
||||
decompressor = folder.get_decompressor(compressed_size)
|
||||
while out_remaining > 0:
|
||||
max_length = min(out_remaining, io.DEFAULT_BUFFER_SIZE)
|
||||
rest_size = src_end - fp.tell()
|
||||
read_size = min(READ_BLOCKSIZE, rest_size)
|
||||
if read_size == 0:
|
||||
tmp = decompressor.decompress(b'', max_length)
|
||||
if len(tmp) == 0:
|
||||
raise Exception("decompression get wrong: no output data.")
|
||||
else:
|
||||
inp = fp.read(read_size)
|
||||
tmp = decompressor.decompress(inp, max_length)
|
||||
if len(tmp) > 0 and out_remaining >= len(tmp):
|
||||
out_remaining -= len(tmp)
|
||||
fq.write(tmp)
|
||||
if out_remaining <= 0:
|
||||
break
|
||||
if fp.tell() >= src_end:
|
||||
if decompressor.crc is not None and not decompressor.check_crc():
|
||||
print('\nCRC error! expected: {}, real: {}'.format(decompressor.crc, decompressor.digest))
|
||||
return
|
||||
|
||||
def _find_link_target(self, target):
|
||||
"""Find the target member of a symlink or hardlink member in the archive.
|
||||
"""
|
||||
targetname = target.as_posix() # type: str
|
||||
linkname = readlink(targetname)
|
||||
# Check windows full path symlinks
|
||||
if linkname.startswith("\\\\?\\"):
|
||||
linkname = linkname[4:]
|
||||
# normalize as posix style
|
||||
linkname = pathlib.Path(linkname).as_posix() # type: str
|
||||
member = None
|
||||
for j in range(len(self.files)):
|
||||
if linkname == self.files[j].origin.as_posix():
|
||||
# FIXME: when API user specify arcname, it will break
|
||||
member = os.path.relpath(linkname, os.path.dirname(targetname))
|
||||
break
|
||||
if member is None:
|
||||
member = linkname
|
||||
return member
|
||||
|
||||
def archive(self, fp: BinaryIO, folder, deref=False):
|
||||
"""Run archive task for specified 7zip folder."""
|
||||
compressor = folder.get_compressor()
|
||||
outsize = 0
|
||||
self.header.main_streams.packinfo.numstreams = 1
|
||||
num_unpack_streams = 0
|
||||
self.header.main_streams.substreamsinfo.digests = []
|
||||
self.header.main_streams.substreamsinfo.digestsdefined = []
|
||||
last_file_index = 0
|
||||
foutsize = 0
|
||||
for i, f in enumerate(self.files):
|
||||
file_info = f.file_properties()
|
||||
self.header.files_info.files.append(file_info)
|
||||
self.header.files_info.emptyfiles.append(f.emptystream)
|
||||
foutsize = 0
|
||||
if f.is_symlink and not deref:
|
||||
last_file_index = i
|
||||
num_unpack_streams += 1
|
||||
link_target = self._find_link_target(f.origin) # type: str
|
||||
tgt = link_target.encode('utf-8') # type: bytes
|
||||
insize = len(tgt)
|
||||
crc = calculate_crc32(tgt, 0) # type: int
|
||||
out = compressor.compress(tgt)
|
||||
outsize += len(out)
|
||||
foutsize += len(out)
|
||||
fp.write(out)
|
||||
self.header.main_streams.substreamsinfo.digests.append(crc)
|
||||
self.header.main_streams.substreamsinfo.digestsdefined.append(True)
|
||||
self.header.main_streams.substreamsinfo.unpacksizes.append(insize)
|
||||
self.header.files_info.files[i]['maxsize'] = foutsize
|
||||
elif not f.emptystream:
|
||||
last_file_index = i
|
||||
num_unpack_streams += 1
|
||||
insize = 0
|
||||
with f.origin.open(mode='rb') as fd:
|
||||
data = fd.read(READ_BLOCKSIZE)
|
||||
insize += len(data)
|
||||
crc = 0
|
||||
while data:
|
||||
crc = calculate_crc32(data, crc)
|
||||
out = compressor.compress(data)
|
||||
outsize += len(out)
|
||||
foutsize += len(out)
|
||||
fp.write(out)
|
||||
data = fd.read(READ_BLOCKSIZE)
|
||||
insize += len(data)
|
||||
self.header.main_streams.substreamsinfo.digests.append(crc)
|
||||
self.header.main_streams.substreamsinfo.digestsdefined.append(True)
|
||||
self.header.files_info.files[i]['maxsize'] = foutsize
|
||||
self.header.main_streams.substreamsinfo.unpacksizes.append(insize)
|
||||
else:
|
||||
out = compressor.flush()
|
||||
outsize += len(out)
|
||||
foutsize += len(out)
|
||||
fp.write(out)
|
||||
if len(self.files) > 0:
|
||||
self.header.files_info.files[last_file_index]['maxsize'] = foutsize
|
||||
# Update size data in header
|
||||
self.header.main_streams.packinfo.packsizes = [outsize]
|
||||
folder.unpacksizes = [sum(self.header.main_streams.substreamsinfo.unpacksizes)]
|
||||
self.header.main_streams.substreamsinfo.num_unpackstreams_folders = [num_unpack_streams]
|
||||
|
||||
def register_filelike(self, id: int, fileish: Union[MemIO, pathlib.Path, None]) -> None:
|
||||
"""register file-ish to worker."""
|
||||
self.target_filepath[id] = fileish
|
||||
|
||||
|
||||
class SevenZipDecompressor:
|
||||
"""Main decompressor object which is properly configured and bind to each 7zip folder.
|
||||
because 7zip folder can have a custom compression method"""
|
||||
|
||||
lzma_methods_map = {
|
||||
CompressionMethod.LZMA: lzma.FILTER_LZMA1,
|
||||
CompressionMethod.LZMA2: lzma.FILTER_LZMA2,
|
||||
CompressionMethod.DELTA: lzma.FILTER_DELTA,
|
||||
CompressionMethod.P7Z_BCJ: lzma.FILTER_X86,
|
||||
CompressionMethod.BCJ_ARM: lzma.FILTER_ARM,
|
||||
CompressionMethod.BCJ_ARMT: lzma.FILTER_ARMTHUMB,
|
||||
CompressionMethod.BCJ_IA64: lzma.FILTER_IA64,
|
||||
CompressionMethod.BCJ_PPC: lzma.FILTER_POWERPC,
|
||||
CompressionMethod.BCJ_SPARC: lzma.FILTER_SPARC,
|
||||
}
|
||||
|
||||
FILTER_BZIP2 = 0x31
|
||||
FILTER_ZIP = 0x32
|
||||
FILTER_COPY = 0x33
|
||||
FILTER_AES = 0x34
|
||||
FILTER_ZSTD = 0x35
|
||||
alt_methods_map = {
|
||||
CompressionMethod.MISC_BZIP2: FILTER_BZIP2,
|
||||
CompressionMethod.MISC_DEFLATE: FILTER_ZIP,
|
||||
CompressionMethod.COPY: FILTER_COPY,
|
||||
CompressionMethod.CRYPT_AES256_SHA256: FILTER_AES,
|
||||
CompressionMethod.MISC_ZSTD: FILTER_ZSTD,
|
||||
}
|
||||
|
||||
def __init__(self, coders: List[Dict[str, Any]], size: int, crc: Optional[int]) -> None:
|
||||
# Get password which was set when creation of py7zr.SevenZipFile object.
|
||||
self.input_size = size
|
||||
self.consumed = 0 # type: int
|
||||
self.crc = crc
|
||||
self.digest = None # type: Optional[int]
|
||||
if self._check_lzma_coders(coders):
|
||||
self._set_lzma_decompressor(coders)
|
||||
else:
|
||||
self._set_alternative_decompressor(coders)
|
||||
|
||||
def _check_lzma_coders(self, coders: List[Dict[str, Any]]) -> bool:
|
||||
res = True
|
||||
for coder in coders:
|
||||
if self.lzma_methods_map.get(coder['method'], None) is None:
|
||||
res = False
|
||||
break
|
||||
return res
|
||||
|
||||
def _set_lzma_decompressor(self, coders: List[Dict[str, Any]]) -> None:
|
||||
filters = [] # type: List[Dict[str, Any]]
|
||||
for coder in coders:
|
||||
if coder['numinstreams'] != 1 or coder['numoutstreams'] != 1:
|
||||
raise UnsupportedCompressionMethodError('Only a simple compression method is currently supported.')
|
||||
filter_id = self.lzma_methods_map.get(coder['method'], None)
|
||||
if filter_id is None:
|
||||
raise UnsupportedCompressionMethodError
|
||||
properties = coder.get('properties', None)
|
||||
if properties is not None:
|
||||
filters[:0] = [lzma._decode_filter_properties(filter_id, properties)] # type: ignore
|
||||
else:
|
||||
filters[:0] = [{'id': filter_id}]
|
||||
self.decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters) # type: Union[bz2.BZ2Decompressor, lzma.LZMADecompressor, ISevenZipDecompressor] # noqa
|
||||
|
||||
def _set_alternative_decompressor(self, coders: List[Dict[str, Any]]) -> None:
|
||||
filter_id = self.alt_methods_map.get(coders[0]['method'], None)
|
||||
if filter_id == self.FILTER_BZIP2:
|
||||
self.decompressor = bz2.BZ2Decompressor()
|
||||
elif filter_id == self.FILTER_ZIP:
|
||||
self.decompressor = DeflateDecompressor()
|
||||
elif filter_id == self.FILTER_COPY:
|
||||
self.decompressor = CopyDecompressor()
|
||||
elif filter_id == self.FILTER_ZSTD and Zstd:
|
||||
self.decompressor = ZstdDecompressor()
|
||||
else:
|
||||
raise UnsupportedCompressionMethodError
|
||||
|
||||
def decompress(self, data: bytes, max_length: Optional[int] = None) -> bytes:
|
||||
self.consumed += len(data)
|
||||
if max_length is not None:
|
||||
folder_data = self.decompressor.decompress(data, max_length=max_length)
|
||||
else:
|
||||
folder_data = self.decompressor.decompress(data)
|
||||
# calculate CRC with uncompressed data
|
||||
if self.crc is not None:
|
||||
self.digest = calculate_crc32(folder_data, self.digest)
|
||||
return folder_data
|
||||
|
||||
def check_crc(self):
|
||||
return self.crc == self.digest
|
||||
|
||||
|
||||
class SevenZipCompressor:
|
||||
|
||||
"""Main compressor object to configured for each 7zip folder."""
|
||||
|
||||
__slots__ = ['filters', 'compressor', 'coders']
|
||||
|
||||
lzma_methods_map_r = {
|
||||
lzma.FILTER_LZMA2: CompressionMethod.LZMA2,
|
||||
lzma.FILTER_DELTA: CompressionMethod.DELTA,
|
||||
lzma.FILTER_X86: CompressionMethod.P7Z_BCJ,
|
||||
}
|
||||
|
||||
def __init__(self, filters=None):
|
||||
if filters is None:
|
||||
self.filters = [{"id": lzma.FILTER_LZMA2, "preset": 7 | lzma.PRESET_EXTREME}, ]
|
||||
else:
|
||||
self.filters = filters
|
||||
self.compressor = lzma.LZMACompressor(format=lzma.FORMAT_RAW, filters=self.filters)
|
||||
self.coders = []
|
||||
for filter in self.filters:
|
||||
if filter is None:
|
||||
break
|
||||
method = self.lzma_methods_map_r[filter['id']]
|
||||
properties = lzma._encode_filter_properties(filter)
|
||||
self.coders.append({'method': method, 'properties': properties, 'numinstreams': 1, 'numoutstreams': 1})
|
||||
|
||||
def compress(self, data):
|
||||
return self.compressor.compress(data)
|
||||
|
||||
def flush(self):
|
||||
return self.compressor.flush()
|
||||
|
||||
|
||||
def get_methods_names(coders: List[dict]) -> List[str]:
|
||||
"""Return human readable method names for specified coders"""
|
||||
methods_name_map = {
|
||||
CompressionMethod.LZMA2: "LZMA2",
|
||||
CompressionMethod.LZMA: "LZMA",
|
||||
CompressionMethod.DELTA: "delta",
|
||||
CompressionMethod.P7Z_BCJ: "BCJ",
|
||||
CompressionMethod.BCJ_ARM: "BCJ(ARM)",
|
||||
CompressionMethod.BCJ_ARMT: "BCJ(ARMT)",
|
||||
CompressionMethod.BCJ_IA64: "BCJ(IA64)",
|
||||
CompressionMethod.BCJ_PPC: "BCJ(POWERPC)",
|
||||
CompressionMethod.BCJ_SPARC: "BCJ(SPARC)",
|
||||
CompressionMethod.CRYPT_AES256_SHA256: "7zAES",
|
||||
}
|
||||
methods_names = [] # type: List[str]
|
||||
for coder in coders:
|
||||
try:
|
||||
methods_names.append(methods_name_map[coder['method']])
|
||||
except KeyError:
|
||||
raise UnsupportedCompressionMethodError("Unknown method {}".format(coder['method']))
|
||||
return methods_names
|
@ -0,0 +1,42 @@
|
||||
#
|
||||
# p7zr library
|
||||
#
|
||||
# Copyright (c) 2019 Hiroshi Miura <miurahr@linux.com>
|
||||
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
|
||||
# 7-Zip Copyright (C) 1999-2010 Igor Pavlov
|
||||
# LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
|
||||
class ArchiveError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Bad7zFile(ArchiveError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedCompressionMethodError(ArchiveError):
|
||||
pass
|
||||
|
||||
|
||||
class DecompressionError(ArchiveError):
|
||||
pass
|
||||
|
||||
|
||||
class InternalError(ArchiveError):
|
||||
pass
|
@ -0,0 +1,122 @@
|
||||
#!/usr/bin/python -u
|
||||
#
|
||||
# p7zr library
|
||||
#
|
||||
# Copyright (c) 2019 Hiroshi Miura <miurahr@linux.com>
|
||||
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
|
||||
# 7-Zip Copyright (C) 1999-2010 Igor Pavlov
|
||||
# LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
import lzma
|
||||
import zlib
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from py7zr import UnsupportedCompressionMethodError
|
||||
from py7zr.helpers import Buffer, calculate_key
|
||||
from py7zr.properties import READ_BLOCKSIZE, CompressionMethod
|
||||
|
||||
try:
|
||||
import zstandard as Zstd # type: ignore
|
||||
except ImportError:
|
||||
Zstd = None
|
||||
|
||||
|
||||
class ISevenZipCompressor(ABC):
|
||||
@abstractmethod
|
||||
def compress(self, data: Union[bytes, bytearray, memoryview]) -> bytes:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def flush(self) -> bytes:
|
||||
pass
|
||||
|
||||
|
||||
class ISevenZipDecompressor(ABC):
|
||||
@abstractmethod
|
||||
def decompress(self, data: Union[bytes, bytearray, memoryview], max_length: int = -1) -> bytes:
|
||||
pass
|
||||
|
||||
|
||||
class DeflateDecompressor(ISevenZipDecompressor):
|
||||
def __init__(self):
|
||||
self.buf = b''
|
||||
self._decompressor = zlib.decompressobj(-15)
|
||||
|
||||
def decompress(self, data: Union[bytes, bytearray, memoryview], max_length: int = -1):
|
||||
if max_length < 0:
|
||||
res = self.buf + self._decompressor.decompress(data)
|
||||
self.buf = b''
|
||||
else:
|
||||
tmp = self.buf + self._decompressor.decompress(data)
|
||||
res = tmp[:max_length]
|
||||
self.buf = tmp[max_length:]
|
||||
return res
|
||||
|
||||
|
||||
class CopyDecompressor(ISevenZipDecompressor):
|
||||
|
||||
def __init__(self):
|
||||
self._buf = bytes()
|
||||
|
||||
def decompress(self, data: Union[bytes, bytearray, memoryview], max_length: int = -1) -> bytes:
|
||||
if max_length < 0:
|
||||
length = len(data)
|
||||
else:
|
||||
length = min(len(data), max_length)
|
||||
buflen = len(self._buf)
|
||||
if length > buflen:
|
||||
res = self._buf + data[:length - buflen]
|
||||
self._buf = data[length - buflen:]
|
||||
else:
|
||||
res = self._buf[:length]
|
||||
self._buf = self._buf[length:] + data
|
||||
return res
|
||||
|
||||
|
||||
class ZstdDecompressor(ISevenZipDecompressor):
|
||||
|
||||
def __init__(self):
|
||||
if Zstd is None:
|
||||
raise UnsupportedCompressionMethodError
|
||||
self.buf = b'' # type: bytes
|
||||
self._ctc = Zstd.ZstdDecompressor() # type: ignore
|
||||
|
||||
def decompress(self, data: Union[bytes, bytearray, memoryview], max_length: int = -1) -> bytes:
|
||||
dobj = self._ctc.decompressobj() # type: ignore
|
||||
if max_length < 0:
|
||||
res = self.buf + dobj.decompress(data)
|
||||
self.buf = b''
|
||||
else:
|
||||
tmp = self.buf + dobj.decompress(data)
|
||||
res = tmp[:max_length]
|
||||
self.buf = tmp[max_length:]
|
||||
return res
|
||||
|
||||
|
||||
class ZstdCompressor(ISevenZipCompressor):
|
||||
|
||||
def __init__(self):
|
||||
if Zstd is None:
|
||||
raise UnsupportedCompressionMethodError
|
||||
self._ctc = Zstd.ZstdCompressor() # type: ignore
|
||||
|
||||
def compress(self, data: Union[bytes, bytearray, memoryview]) -> bytes:
|
||||
return self._ctc.compress(data) # type: ignore
|
||||
|
||||
def flush(self):
|
||||
pass
|
@ -0,0 +1,362 @@
|
||||
#!/usr/bin/python -u
|
||||
#
|
||||
# p7zr library
|
||||
#
|
||||
# Copyright (c) 2019 Hiroshi Miura <miurahr@linux.com>
|
||||
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
#
|
||||
|
||||
import _hashlib # type: ignore # noqa
|
||||
import ctypes
|
||||
import os
|
||||
import pathlib
|
||||
import platform
|
||||
import sys
|
||||
import time as _time
|
||||
import zlib
|
||||
from datetime import datetime, timedelta, timezone, tzinfo
|
||||
from typing import BinaryIO, Optional, Union
|
||||
|
||||
import py7zr.win32compat
|
||||
|
||||
|
||||
def calculate_crc32(data: bytes, value: Optional[int] = None, blocksize: int = 1024 * 1024) -> int:
|
||||
"""Calculate CRC32 of strings with arbitrary lengths."""
|
||||
length = len(data)
|
||||
pos = blocksize
|
||||
if value:
|
||||
value = zlib.crc32(data[:pos], value)
|
||||
else:
|
||||
value = zlib.crc32(data[:pos])
|
||||
while pos < length:
|
||||
value = zlib.crc32(data[pos:pos + blocksize], value)
|
||||
pos += blocksize
|
||||
|
||||
return value & 0xffffffff
|
||||
|
||||
|
||||
def _calculate_key1(password: bytes, cycles: int, salt: bytes, digest: str) -> bytes:
|
||||
"""Calculate 7zip AES encryption key."""
|
||||
if digest not in ('sha256'):
|
||||
raise ValueError('Unknown digest method for password protection.')
|
||||
assert cycles <= 0x3f
|
||||
if cycles == 0x3f:
|
||||
ba = bytearray(salt + password + bytes(32))
|
||||
key = bytes(ba[:32]) # type: bytes
|
||||
else:
|
||||
rounds = 1 << cycles
|
||||
m = _hashlib.new(digest)
|
||||
for round in range(rounds):
|
||||
m.update(salt + password + round.to_bytes(8, byteorder='little', signed=False))
|
||||
key = m.digest()[:32]
|
||||
return key
|
||||
|
||||
|
||||
def _calculate_key2(password: bytes, cycles: int, salt: bytes, digest: str):
|
||||
"""Calculate 7zip AES encryption key.
|
||||
It utilize ctypes and memoryview buffer and zero-copy technology on Python."""
|
||||
if digest not in ('sha256'):
|
||||
raise ValueError('Unknown digest method for password protection.')
|
||||
assert cycles <= 0x3f
|
||||
if cycles == 0x3f:
|
||||
key = bytes(bytearray(salt + password + bytes(32))[:32]) # type: bytes
|
||||
else:
|
||||
rounds = 1 << cycles
|
||||
m = _hashlib.new(digest)
|
||||
length = len(salt) + len(password)
|
||||
|
||||
class RoundBuf(ctypes.LittleEndianStructure):
|
||||
_pack_ = 1
|
||||
_fields_ = [
|
||||
('saltpassword', ctypes.c_ubyte * length),
|
||||
('round', ctypes.c_uint64)
|
||||
]
|
||||
|
||||
buf = RoundBuf()
|
||||
for i, c in enumerate(salt + password):
|
||||
buf.saltpassword[i] = c
|
||||
buf.round = 0
|
||||
mv = memoryview(buf) # type: ignore # noqa
|
||||
while buf.round < rounds:
|
||||
m.update(mv)
|
||||
buf.round += 1
|
||||
key = m.digest()[:32]
|
||||
return key
|
||||
|
||||
|
||||
if platform.python_implementation() == "PyPy":
|
||||
calculate_key = _calculate_key1 # Avoid https://foss.heptapod.net/pypy/pypy/issues/3209
|
||||
else:
|
||||
calculate_key = _calculate_key2 # ver2 is 1.7-2.0 times faster than ver1
|
||||
|
||||
|
||||
def filetime_to_dt(ft):
|
||||
"""Convert Windows NTFS file time into python datetime object."""
|
||||
EPOCH_AS_FILETIME = 116444736000000000
|
||||
us = (ft - EPOCH_AS_FILETIME) // 10
|
||||
return datetime(1970, 1, 1, tzinfo=timezone.utc) + timedelta(microseconds=us)
|
||||
|
||||
|
||||
ZERO = timedelta(0)
|
||||
HOUR = timedelta(hours=1)
|
||||
SECOND = timedelta(seconds=1)
|
||||
|
||||
# A class capturing the platform's idea of local time.
|
||||
# (May result in wrong values on historical times in
|
||||
# timezones where UTC offset and/or the DST rules had
|
||||
# changed in the past.)
|
||||
|
||||
STDOFFSET = timedelta(seconds=-_time.timezone)
|
||||
if _time.daylight:
|
||||
DSTOFFSET = timedelta(seconds=-_time.altzone)
|
||||
else:
|
||||
DSTOFFSET = STDOFFSET
|
||||
|
||||
DSTDIFF = DSTOFFSET - STDOFFSET
|
||||
|
||||
|
||||
class LocalTimezone(tzinfo):
|
||||
|
||||
def fromutc(self, dt):
|
||||
assert dt.tzinfo is self
|
||||
stamp = (dt - datetime(1970, 1, 1, tzinfo=self)) // SECOND
|
||||
args = _time.localtime(stamp)[:6]
|
||||
dst_diff = DSTDIFF // SECOND
|
||||
# Detect fold
|
||||
fold = (args == _time.localtime(stamp - dst_diff))
|
||||
return datetime(*args, microsecond=dt.microsecond, tzinfo=self)
|
||||
|
||||
def utcoffset(self, dt):
|
||||
if self._isdst(dt):
|
||||
return DSTOFFSET
|
||||
else:
|
||||
return STDOFFSET
|
||||
|
||||
def dst(self, dt):
|
||||
if self._isdst(dt):
|
||||
return DSTDIFF
|
||||
else:
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return _time.tzname[self._isdst(dt)]
|
||||
|
||||
def _isdst(self, dt):
|
||||
tt = (dt.year, dt.month, dt.day,
|
||||
dt.hour, dt.minute, dt.second,
|
||||
dt.weekday(), 0, 0)
|
||||
stamp = _time.mktime(tt)
|
||||
tt = _time.localtime(stamp)
|
||||
return tt.tm_isdst > 0
|
||||
|
||||
|
||||
Local = LocalTimezone()
|
||||
TIMESTAMP_ADJUST = -11644473600
|
||||
|
||||
|
||||
class UTC(tzinfo):
|
||||
"""UTC"""
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return "UTC"
|
||||
|
||||
def dst(self, dt):
|
||||
return ZERO
|
||||
|
||||
def _call__(self):
|
||||
return self
|
||||
|
||||
|
||||
class ArchiveTimestamp(int):
|
||||
"""Windows FILETIME timestamp."""
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%d)' % (type(self).__name__, self)
|
||||
|
||||
def totimestamp(self) -> float:
|
||||
"""Convert 7z FILETIME to Python timestamp."""
|
||||
# FILETIME is 100-nanosecond intervals since 1601/01/01 (UTC)
|
||||
return (self / 10000000.0) + TIMESTAMP_ADJUST
|
||||
|
||||
def as_datetime(self):
|
||||
"""Convert FILETIME to Python datetime object."""
|
||||
return datetime.fromtimestamp(self.totimestamp(), UTC())
|
||||
|
||||
@staticmethod
|
||||
def from_datetime(val):
|
||||
return ArchiveTimestamp((val - TIMESTAMP_ADJUST) * 10000000.0)
|
||||
|
||||
|
||||
def islink(path):
|
||||
"""
|
||||
Cross-platform islink implementation.
|
||||
Supports Windows NT symbolic links and reparse points.
|
||||
"""
|
||||
is_symlink = os.path.islink(path)
|
||||
if sys.version_info >= (3, 8) or sys.platform != "win32" or sys.getwindowsversion()[0] < 6:
|
||||
return is_symlink
|
||||
# special check for directory junctions which py38 does.
|
||||
if is_symlink:
|
||||
if py7zr.win32compat.is_reparse_point(path):
|
||||
is_symlink = False
|
||||
return is_symlink
|
||||
|
||||
|
||||
def readlink(path: Union[str, pathlib.Path], *, dir_fd=None) -> Union[str, pathlib.Path]:
|
||||
"""
|
||||
Cross-platform compat implementation of os.readlink and Path.readlink().
|
||||
Supports Windows NT symbolic links and reparse points.
|
||||
When called with path argument as pathlike(str), return result as a pathlike(str).
|
||||
When called with Path object, return also Path object.
|
||||
When called with path argument as bytes, return result as a bytes.
|
||||
"""
|
||||
is_path_pathlib = isinstance(path, pathlib.Path)
|
||||
if sys.version_info >= (3, 9):
|
||||
if is_path_pathlib and dir_fd is None:
|
||||
return path.readlink()
|
||||
else:
|
||||
return os.readlink(path, dir_fd=dir_fd)
|
||||
elif sys.version_info >= (3, 8) or sys.platform != "win32":
|
||||
res = os.readlink(path, dir_fd=dir_fd)
|
||||
# Hack to handle a wrong type of results
|
||||
if isinstance(res, bytes):
|
||||
res = os.fsdecode(res)
|
||||
if is_path_pathlib:
|
||||
return pathlib.Path(res)
|
||||
else:
|
||||
return res
|
||||
elif not os.path.exists(str(path)):
|
||||
raise OSError(22, 'Invalid argument', path)
|
||||
return py7zr.win32compat.readlink(path)
|
||||
|
||||
|
||||
class MemIO:
|
||||
"""pathlib.Path-like IO class to write memory(io.Bytes)"""
|
||||
def __init__(self, buf: BinaryIO):
|
||||
self._buf = buf
|
||||
|
||||
def write(self, data: bytes) -> int:
|
||||
return self._buf.write(data)
|
||||
|
||||
def read(self, length: Optional[int] = None) -> bytes:
|
||||
if length is not None:
|
||||
return self._buf.read(length)
|
||||
else:
|
||||
return self._buf.read()
|
||||
|
||||
def close(self) -> None:
|
||||
self._buf.seek(0)
|
||||
|
||||
def flush(self) -> None:
|
||||
pass
|
||||
|
||||
def seek(self, position: int) -> None:
|
||||
self._buf.seek(position)
|
||||
|
||||
def open(self, mode=None):
|
||||
return self
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
return self
|
||||
|
||||
def mkdir(self, parents=None, exist_ok=False):
|
||||
return None
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
|
||||
class NullIO:
|
||||
"""pathlib.Path-like IO class of /dev/null"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def write(self, data):
|
||||
return len(data)
|
||||
|
||||
def read(self, length=None):
|
||||
if length is not None:
|
||||
return bytes(length)
|
||||
else:
|
||||
return b''
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
def open(self, mode=None):
|
||||
return self
|
||||
|
||||
@property
|
||||
def parent(self):
|
||||
return self
|
||||
|
||||
def mkdir(self):
|
||||
return None
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
|
||||
class BufferOverflow(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Buffer:
|
||||
|
||||
def __init__(self, size: int = 16):
|
||||
self._size = size
|
||||
self._buf = bytearray(size)
|
||||
self._buflen = 0
|
||||
self.view = memoryview(self._buf[0:0])
|
||||
|
||||
def add(self, data: Union[bytes, bytearray, memoryview]):
|
||||
length = len(data)
|
||||
if length + self._buflen > self._size:
|
||||
raise BufferOverflow()
|
||||
self._buf[self._buflen:self._buflen + length] = data
|
||||
self._buflen += length
|
||||
self.view = memoryview(self._buf[0:self._buflen])
|
||||
|
||||
def reset(self) -> None:
|
||||
self._buflen = 0
|
||||
self.view = memoryview(self._buf[0:0])
|
||||
|
||||
def set(self, data: Union[bytes, bytearray, memoryview]) -> None:
|
||||
length = len(data)
|
||||
if length > self._size:
|
||||
raise BufferOverflow()
|
||||
self._buf[0:length] = data
|
||||
self._buflen = length
|
||||
self.view = memoryview(self._buf[0:length])
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self._buflen
|
@ -0,0 +1,155 @@
|
||||
#
|
||||
# p7zr library
|
||||
#
|
||||
# Copyright (c) 2019 Hiroshi Miura <miurahr@linux.com>
|
||||
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
|
||||
# 7-Zip Copyright (C) 1999-2010 Igor Pavlov
|
||||
# LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
import binascii
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
MAGIC_7Z = binascii.unhexlify('377abcaf271c')
|
||||
FINISH_7Z = binascii.unhexlify('377abcaf271d')
|
||||
READ_BLOCKSIZE = 32248
|
||||
QUEUELEN = READ_BLOCKSIZE * 2
|
||||
|
||||
READ_BLOCKSIZE = 32248
|
||||
|
||||
|
||||
class ByteEnum(bytes, Enum):
|
||||
pass
|
||||
|
||||
|
||||
class Property(ByteEnum):
|
||||
"""Hold 7zip property fixed values."""
|
||||
END = binascii.unhexlify('00')
|
||||
HEADER = binascii.unhexlify('01')
|
||||
ARCHIVE_PROPERTIES = binascii.unhexlify('02')
|
||||
ADDITIONAL_STREAMS_INFO = binascii.unhexlify('03')
|
||||
MAIN_STREAMS_INFO = binascii.unhexlify('04')
|
||||
FILES_INFO = binascii.unhexlify('05')
|
||||
PACK_INFO = binascii.unhexlify('06')
|
||||
UNPACK_INFO = binascii.unhexlify('07')
|
||||
SUBSTREAMS_INFO = binascii.unhexlify('08')
|
||||
SIZE = binascii.unhexlify('09')
|
||||
CRC = binascii.unhexlify('0a')
|
||||
FOLDER = binascii.unhexlify('0b')
|
||||
CODERS_UNPACK_SIZE = binascii.unhexlify('0c')
|
||||
NUM_UNPACK_STREAM = binascii.unhexlify('0d')
|
||||
EMPTY_STREAM = binascii.unhexlify('0e')
|
||||
EMPTY_FILE = binascii.unhexlify('0f')
|
||||
ANTI = binascii.unhexlify('10')
|
||||
NAME = binascii.unhexlify('11')
|
||||
CREATION_TIME = binascii.unhexlify('12')
|
||||
LAST_ACCESS_TIME = binascii.unhexlify('13')
|
||||
LAST_WRITE_TIME = binascii.unhexlify('14')
|
||||
ATTRIBUTES = binascii.unhexlify('15')
|
||||
COMMENT = binascii.unhexlify('16')
|
||||
ENCODED_HEADER = binascii.unhexlify('17')
|
||||
START_POS = binascii.unhexlify('18')
|
||||
DUMMY = binascii.unhexlify('19')
|
||||
|
||||
|
||||
class CompressionMethod(ByteEnum):
|
||||
"""Hold fixed values for method parameter."""
|
||||
COPY = binascii.unhexlify('00')
|
||||
DELTA = binascii.unhexlify('03')
|
||||
BCJ = binascii.unhexlify('04')
|
||||
PPC = binascii.unhexlify('05')
|
||||
IA64 = binascii.unhexlify('06')
|
||||
ARM = binascii.unhexlify('07')
|
||||
ARMT = binascii.unhexlify('08')
|
||||
SPARC = binascii.unhexlify('09')
|
||||
# SWAP = 02..
|
||||
SWAP2 = binascii.unhexlify('020302')
|
||||
SWAP4 = binascii.unhexlify('020304')
|
||||
# 7Z = 03..
|
||||
LZMA = binascii.unhexlify('030101')
|
||||
PPMD = binascii.unhexlify('030401')
|
||||
P7Z_BCJ = binascii.unhexlify('03030103')
|
||||
P7Z_BCJ2 = binascii.unhexlify('0303011B')
|
||||
BCJ_PPC = binascii.unhexlify('03030205')
|
||||
BCJ_IA64 = binascii.unhexlify('03030401')
|
||||
BCJ_ARM = binascii.unhexlify('03030501')
|
||||
BCJ_ARMT = binascii.unhexlify('03030701')
|
||||
BCJ_SPARC = binascii.unhexlify('03030805')
|
||||
LZMA2 = binascii.unhexlify('21')
|
||||
# MISC : 04..
|
||||
MISC_ZIP = binascii.unhexlify('0401')
|
||||
MISC_BZIP2 = binascii.unhexlify('040202')
|
||||
MISC_DEFLATE = binascii.unhexlify('040108')
|
||||
MISC_DEFLATE64 = binascii.unhexlify('040109')
|
||||
MISC_Z = binascii.unhexlify('0405')
|
||||
MISC_LZH = binascii.unhexlify('0406')
|
||||
NSIS_DEFLATE = binascii.unhexlify('040901')
|
||||
NSIS_BZIP2 = binascii.unhexlify('040902')
|
||||
#
|
||||
MISC_ZSTD = binascii.unhexlify('04f71101')
|
||||
MISC_BROTLI = binascii.unhexlify('04f71102')
|
||||
MISC_LZ4 = binascii.unhexlify('04f71104')
|
||||
MISC_LZS = binascii.unhexlify('04f71105')
|
||||
MISC_LIZARD = binascii.unhexlify('04f71106')
|
||||
# CRYPTO 06..
|
||||
CRYPT_ZIPCRYPT = binascii.unhexlify('06f10101')
|
||||
CRYPT_RAR29AES = binascii.unhexlify('06f10303')
|
||||
CRYPT_AES256_SHA256 = binascii.unhexlify('06f10701')
|
||||
|
||||
|
||||
class SupportedMethods:
|
||||
"""Hold list of methods which python3 can support."""
|
||||
formats = [{'name': "7z", 'magic': MAGIC_7Z}]
|
||||
codecs = [{'id': CompressionMethod.LZMA, 'name': "LZMA"},
|
||||
{'id': CompressionMethod.LZMA2, 'name': "LZMA2"},
|
||||
{'id': CompressionMethod.DELTA, 'name': "DELTA"},
|
||||
{'id': CompressionMethod.P7Z_BCJ, 'name': "BCJ"},
|
||||
{'id': CompressionMethod.BCJ_PPC, 'name': 'PPC'},
|
||||
{'id': CompressionMethod.BCJ_IA64, 'name': 'IA64'},
|
||||
{'id': CompressionMethod.BCJ_ARM, 'name': "ARM"},
|
||||
{'id': CompressionMethod.BCJ_ARMT, 'name': "ARMT"},
|
||||
{'id': CompressionMethod.BCJ_SPARC, 'name': 'SPARC'}
|
||||
]
|
||||
|
||||
|
||||
# this class is Borg/Singleton
|
||||
class ArchivePassword:
|
||||
|
||||
_shared_state = {
|
||||
'_password': None,
|
||||
}
|
||||
|
||||
def __init__(self, password: Optional[str] = None):
|
||||
self.__dict__ = self._shared_state
|
||||
if password is not None:
|
||||
self._password = password
|
||||
|
||||
def set(self, password):
|
||||
self._password = password
|
||||
|
||||
def get(self):
|
||||
if self._password is not None:
|
||||
return self._password
|
||||
else:
|
||||
return ''
|
||||
|
||||
def __str__(self):
|
||||
if self._password is not None:
|
||||
return self._password
|
||||
else:
|
||||
return ''
|
@ -0,0 +1,174 @@
|
||||
import pathlib
|
||||
import stat
|
||||
import sys
|
||||
from logging import getLogger
|
||||
from typing import Union
|
||||
|
||||
if sys.platform == "win32":
|
||||
import ctypes
|
||||
from ctypes.wintypes import BOOL, DWORD, HANDLE, LPCWSTR, LPDWORD, LPVOID, LPWSTR
|
||||
|
||||
_stdcall_libraries = {}
|
||||
_stdcall_libraries['kernel32'] = ctypes.WinDLL('kernel32')
|
||||
CloseHandle = _stdcall_libraries['kernel32'].CloseHandle
|
||||
CreateFileW = _stdcall_libraries['kernel32'].CreateFileW
|
||||
DeviceIoControl = _stdcall_libraries['kernel32'].DeviceIoControl
|
||||
GetFileAttributesW = _stdcall_libraries['kernel32'].GetFileAttributesW
|
||||
OPEN_EXISTING = 3
|
||||
GENERIC_READ = 2147483648
|
||||
FILE_FLAG_OPEN_REPARSE_POINT = 0x00200000
|
||||
FSCTL_GET_REPARSE_POINT = 0x000900A8
|
||||
FILE_FLAG_BACKUP_SEMANTICS = 0x02000000
|
||||
IO_REPARSE_TAG_MOUNT_POINT = 0xA0000003
|
||||
IO_REPARSE_TAG_SYMLINK = 0xA000000C
|
||||
MAXIMUM_REPARSE_DATA_BUFFER_SIZE = 16 * 1024
|
||||
|
||||
def _check_bit(val: int, flag: int) -> bool:
|
||||
return bool(val & flag == flag)
|
||||
|
||||
class SymbolicLinkReparseBuffer(ctypes.Structure):
|
||||
""" Implementing the below in Python:
|
||||
|
||||
typedef struct _REPARSE_DATA_BUFFER {
|
||||
ULONG ReparseTag;
|
||||
USHORT ReparseDataLength;
|
||||
USHORT Reserved;
|
||||
union {
|
||||
struct {
|
||||
USHORT SubstituteNameOffset;
|
||||
USHORT SubstituteNameLength;
|
||||
USHORT PrintNameOffset;
|
||||
USHORT PrintNameLength;
|
||||
ULONG Flags;
|
||||
WCHAR PathBuffer[1];
|
||||
} SymbolicLinkReparseBuffer;
|
||||
struct {
|
||||
USHORT SubstituteNameOffset;
|
||||
USHORT SubstituteNameLength;
|
||||
USHORT PrintNameOffset;
|
||||
USHORT PrintNameLength;
|
||||
WCHAR PathBuffer[1];
|
||||
} MountPointReparseBuffer;
|
||||
struct {
|
||||
UCHAR DataBuffer[1];
|
||||
} GenericReparseBuffer;
|
||||
} DUMMYUNIONNAME;
|
||||
} REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER;
|
||||
"""
|
||||
# See https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/ntifs/ns-ntifs-_reparse_data_buffer
|
||||
_fields_ = [
|
||||
('flags', ctypes.c_ulong),
|
||||
('path_buffer', ctypes.c_byte * (MAXIMUM_REPARSE_DATA_BUFFER_SIZE - 20))
|
||||
]
|
||||
|
||||
class MountReparseBuffer(ctypes.Structure):
|
||||
_fields_ = [
|
||||
('path_buffer', ctypes.c_byte * (MAXIMUM_REPARSE_DATA_BUFFER_SIZE - 16)),
|
||||
]
|
||||
|
||||
class ReparseBufferField(ctypes.Union):
|
||||
_fields_ = [
|
||||
('symlink', SymbolicLinkReparseBuffer),
|
||||
('mount', MountReparseBuffer)
|
||||
]
|
||||
|
||||
class ReparseBuffer(ctypes.Structure):
|
||||
_anonymous_ = ("u",)
|
||||
_fields_ = [
|
||||
('reparse_tag', ctypes.c_ulong),
|
||||
('reparse_data_length', ctypes.c_ushort),
|
||||
('reserved', ctypes.c_ushort),
|
||||
('substitute_name_offset', ctypes.c_ushort),
|
||||
('substitute_name_length', ctypes.c_ushort),
|
||||
('print_name_offset', ctypes.c_ushort),
|
||||
('print_name_length', ctypes.c_ushort),
|
||||
('u', ReparseBufferField)
|
||||
]
|
||||
|
||||
def is_reparse_point(path: Union[str, pathlib.Path]) -> bool:
|
||||
GetFileAttributesW.argtypes = [LPCWSTR]
|
||||
GetFileAttributesW.restype = DWORD
|
||||
return _check_bit(GetFileAttributesW(str(path)), stat.FILE_ATTRIBUTE_REPARSE_POINT)
|
||||
|
||||
def readlink(path: Union[str, pathlib.Path]) -> Union[str, pathlib.WindowsPath]:
|
||||
# FILE_FLAG_OPEN_REPARSE_POINT alone is not enough if 'path'
|
||||
# is a symbolic link to a directory or a NTFS junction.
|
||||
# We need to set FILE_FLAG_BACKUP_SEMANTICS as well.
|
||||
# See https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-createfilea
|
||||
|
||||
# description from _winapi.c:601
|
||||
# /* REPARSE_DATA_BUFFER usage is heavily under-documented, especially for
|
||||
# junction points. Here's what I've learned along the way:
|
||||
# - A junction point has two components: a print name and a substitute
|
||||
# name. They both describe the link target, but the substitute name is
|
||||
# the physical target and the print name is shown in directory listings.
|
||||
# - The print name must be a native name, prefixed with "\??\".
|
||||
# - Both names are stored after each other in the same buffer (the
|
||||
# PathBuffer) and both must be NUL-terminated.
|
||||
# - There are four members defining their respective offset and length
|
||||
# inside PathBuffer: SubstituteNameOffset, SubstituteNameLength,
|
||||
# PrintNameOffset and PrintNameLength.
|
||||
# - The total size we need to allocate for the REPARSE_DATA_BUFFER, thus,
|
||||
# is the sum of:
|
||||
# - the fixed header size (REPARSE_DATA_BUFFER_HEADER_SIZE)
|
||||
# - the size of the MountPointReparseBuffer member without the PathBuffer
|
||||
# - the size of the prefix ("\??\") in bytes
|
||||
# - the size of the print name in bytes
|
||||
# - the size of the substitute name in bytes
|
||||
# - the size of two NUL terminators in bytes */
|
||||
|
||||
target_is_path = isinstance(path, pathlib.Path)
|
||||
if target_is_path:
|
||||
target = str(path)
|
||||
else:
|
||||
target = path
|
||||
CreateFileW.argtypes = [LPWSTR, DWORD, DWORD, LPVOID, DWORD, DWORD, HANDLE]
|
||||
CreateFileW.restype = HANDLE
|
||||
DeviceIoControl.argtypes = [HANDLE, DWORD, LPVOID, DWORD, LPVOID, DWORD, LPDWORD, LPVOID]
|
||||
DeviceIoControl.restype = BOOL
|
||||
handle = HANDLE(CreateFileW(target, GENERIC_READ, 0, None, OPEN_EXISTING,
|
||||
FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT, 0))
|
||||
buf = ReparseBuffer()
|
||||
ret = DWORD(0)
|
||||
status = DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, None, 0, ctypes.byref(buf),
|
||||
MAXIMUM_REPARSE_DATA_BUFFER_SIZE, ctypes.byref(ret), None)
|
||||
CloseHandle(handle)
|
||||
if not status:
|
||||
logger = getLogger(__file__)
|
||||
logger.error("Failed IOCTL access to REPARSE_POINT {})".format(target))
|
||||
raise ValueError("not a symbolic link or access permission violation")
|
||||
|
||||
if buf.reparse_tag == IO_REPARSE_TAG_SYMLINK:
|
||||
offset = buf.substitute_name_offset
|
||||
ending = offset + buf.substitute_name_length
|
||||
rpath = bytearray(buf.symlink.path_buffer)[offset:ending].decode('UTF-16-LE')
|
||||
elif buf.reparse_tag == IO_REPARSE_TAG_MOUNT_POINT:
|
||||
offset = buf.substitute_name_offset
|
||||
ending = offset + buf.substitute_name_length
|
||||
rpath = bytearray(buf.mount.path_buffer)[offset:ending].decode('UTF-16-LE')
|
||||
else:
|
||||
raise ValueError("not a symbolic link")
|
||||
# on posixmodule.c:7859 in py38, we do that
|
||||
# ```
|
||||
# else if (rdb->ReparseTag == IO_REPARSE_TAG_MOUNT_POINT)
|
||||
# {
|
||||
# name = (wchar_t *)((char*)rdb->MountPointReparseBuffer.PathBuffer +
|
||||
# rdb->MountPointReparseBuffer.SubstituteNameOffset);
|
||||
# nameLen = rdb->MountPointReparseBuffer.SubstituteNameLength / sizeof(wchar_t);
|
||||
# }
|
||||
# else
|
||||
# {
|
||||
# PyErr_SetString(PyExc_ValueError, "not a symbolic link");
|
||||
# }
|
||||
# if (nameLen > 4 && wcsncmp(name, L"\\??\\", 4) == 0) {
|
||||
# /* Our buffer is mutable, so this is okay */
|
||||
# name[1] = L'\\';
|
||||
# }
|
||||
# ```
|
||||
# so substitute prefix here.
|
||||
if rpath.startswith('\\??\\'):
|
||||
rpath = '\\\\' + rpath[2:]
|
||||
if target_is_path:
|
||||
return pathlib.WindowsPath(rpath)
|
||||
else:
|
||||
return rpath
|
Loading…
Reference in new issue