From 8e48c16fc8d4f3b34bca3c32b2ead2ad21198f66 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Thu, 26 Feb 2026 13:03:48 +0100 Subject: [PATCH 1/7] Add middleware loader --- dissect/target/loader.py | 36 +++++++++++++++++++++++++++++++++++ dissect/target/loaders/vbk.py | 36 +++++++++++++++-------------------- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/dissect/target/loader.py b/dissect/target/loader.py index b37ee5a205..7a82a19cbd 100644 --- a/dissect/target/loader.py +++ b/dissect/target/loader.py @@ -152,6 +152,42 @@ def map(self, target: Target) -> None: raise NotImplementedError +class MiddlewareLoader(Loader): + """A base class for preparing arbitrary data to be used by other :class:`Loader`s. + + Instead of mapping data directly to a :class:`Target `, loaders of this type + prepare data in some way and make it available for other :class:`Loader`s to use. + + Subclasses should implement the :method:`detect` method like any other loader, and return a path to the prepared + data in the :method:`prepare` method . The loading mechanism will then use that path to find other loaders to map + the prepared data into the target. + + Feels like forever since I've heard the term "middleware", I'm bringing it back baby! + """ + + def __init__(self, path: Path, *, fallbacks: list[type[Loader]] | None = None, **kwargs): + super().__init__(path, **kwargs) + # This will be the loader that successfully mapped the prepared path + self.loader = None + + @staticmethod + def detect(path: Path) -> bool: + raise NotImplementedError + + def prepare(self, target: Target) -> Path: + raise NotImplementedError + + def map(self, target: Target) -> None: + path = self.prepare(target) + + if (loader := find_loader(path, fallbacks=[DirLoader, RawLoader])) is not None: + ldr = loader(path) + ldr.map(target) + + # Store a reference to the loader if we successfully mapped + self.loader = ldr + + def register(module_name: str, class_name: str, internal: bool = True) -> None: """Registers a ``Loader`` class inside ``LOADERS``. diff --git a/dissect/target/loaders/vbk.py b/dissect/target/loaders/vbk.py index 13d6f6c203..0e911bf537 100644 --- a/dissect/target/loaders/vbk.py +++ b/dissect/target/loaders/vbk.py @@ -7,8 +7,7 @@ from dissect.target.exceptions import LoaderError from dissect.target.filesystem import VirtualFilesystem from dissect.target.filesystems.vbk import VbkFilesystem -from dissect.target.loader import Loader, find_loader -from dissect.target.loaders.raw import RawLoader +from dissect.target.loader import MiddlewareLoader if TYPE_CHECKING: from pathlib import Path @@ -19,7 +18,7 @@ RE_RAW_DISK = re.compile(r"(?:[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})|(?:DEV__.+)") -class VbkLoader(Loader): +class VbkLoader(MiddlewareLoader): """Load Veaam Backup (VBK) files. References: @@ -35,7 +34,7 @@ def __init__(self, path: Path, **kwargs): def detect(path: Path) -> bool: return path.suffix.lower() == ".vbk" - def map(self, target: Target) -> None: + def prepare(self, target: Target) -> Path: # We haven't really researched any of the VBK metadata yet, so just try some common formats root = self.vbkfs.path("/") if (base := next(root.glob("*"), None)) is None: @@ -51,24 +50,19 @@ def map(self, target: Target) -> None: candidates.append(root.joinpath("+".join(map(str, disks)))) - # Try to find a loader - for candidate in candidates: - if candidate.suffix.lower() == ".vmcx": - # For VMCX files we need to massage the file layout a bit - vfs = VirtualFilesystem() - vfs.map_file_entry(candidate.name, candidate) + # We should only have one candidate at this point + if len(candidates) > 1: + raise LoaderError("Unsupported VBK structure, use `-L raw` to manually inspect the VBK") - for entry in chain(base.glob("Ide*/*"), base.glob("Scsi*/*")): - vfs.map_file_entry(entry.name, entry) + candidate = candidates[0] + if candidate.suffix.lower() == ".vmcx": + # For VMCX files we need to massage the file layout a bit + vfs = VirtualFilesystem() + vfs.map_file_entry(candidate.name, candidate) - candidate = vfs.path(candidate.name) + for entry in chain(base.glob("Ide*/*"), base.glob("Scsi*/*")): + vfs.map_file_entry(entry.name, entry) - if (loader := find_loader(candidate, fallbacks=[RawLoader])) is not None: - ldr = loader(candidate) - ldr.map(target) + candidate = vfs.path(candidate.name) - # Store a reference to the loader if we successfully mapped - self.loader = ldr - break - else: - raise LoaderError("Unsupported VBK structure, use `-L raw` to manually inspect the VBK") + return candidate From 1af0e5816921efe4fbd7f5d75ef9191c44d1498c Mon Sep 17 00:00:00 2001 From: Matthijs Vos Date: Mon, 15 Dec 2025 14:15:18 +0100 Subject: [PATCH 2/7] Add generic GZIP Loader --- dissect/target/loaders/gzip.py | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 dissect/target/loaders/gzip.py diff --git a/dissect/target/loaders/gzip.py b/dissect/target/loaders/gzip.py new file mode 100644 index 0000000000..99afe4870c --- /dev/null +++ b/dissect/target/loaders/gzip.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from dissect.target.filesystem import VirtualFilesystem +from dissect.target.loader import Loader, LOADERS + +if TYPE_CHECKING: + from pathlib import Path + + from dissect.target.target import target + +GZ_EXT = (".gz",) + + +class GzipLoader(Loader): + """Allow loading Gzip compressed files. Actual loading is handled by the normal loaders.""" + + def __init__(self, path: Path, **kwargs): + super().__init__(path, **kwargs) + + @staticmethod + def detect(path: Path) -> bool: + return path.name.lower().endswith(GZ_EXT) + + def map(self, target: target.Target) -> None: + filename = self.path.name.removesuffix(".gz") + vfs = VirtualFilesystem() + vfs.map_file(filename, self.path, "gzip") + path = vfs.get(filename) + + for candidate in LOADERS: + try: + target.log.info("Testing sub-loader %s", candidate.__name__) + if candidate.detect(path): + self.subloader = candidate(path) + self.subloader.map(target) + break + except Exception as e: # noqa: PERF203 + target.log.debug("Failed to use loader %s", candidate) + target.log.debug("", exc_info=e) From c8bd22c26f9a09d9c6a9f18d56395c7a319f9929 Mon Sep 17 00:00:00 2001 From: Matthijs Vos Date: Mon, 15 Dec 2025 14:19:06 +0100 Subject: [PATCH 3/7] Add filesystem and loader files --- dissect/target/filesystem.py | 4 ++-- dissect/target/loader.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dissect/target/filesystem.py b/dissect/target/filesystem.py index 57db90646b..84aa980f74 100644 --- a/dissect/target/filesystem.py +++ b/dissect/target/filesystem.py @@ -1058,7 +1058,7 @@ def get(self, path: str) -> FilesystemEntry: def scandir(self) -> Iterator[DirEntry]: raise NotADirectoryError(f"'{self.path}' is not a directory") - def open(self) -> BinaryIO: + def open(self, *args, **kwargs) -> BinaryIO: return VirtualFileHandle(self.entry) def stat(self, follow_symlinks: bool = True) -> fsutil.stat_result: @@ -1118,7 +1118,7 @@ def __init__(self, fs: Filesystem, path: str, entry: str, algo: str = "gzip"): if self._compressor is None: raise ValueError(f"Unsupported compression algorithm {algo}") - def open(self) -> BinaryIO: + def open(self, *args, **kwargs) -> BinaryIO: return self._compressor.open(self.entry, "rb") diff --git a/dissect/target/loader.py b/dissect/target/loader.py index 7a82a19cbd..a9b61dc74b 100644 --- a/dissect/target/loader.py +++ b/dissect/target/loader.py @@ -76,7 +76,7 @@ def __init__( except Exception: log.debug("Failed to resolve loader path %r", path) self.absolute_path = path - self.base_path = self.absolute_path.parent + # self.base_path = self.absolute_path.parent self.parsed_path = parsed_path self.parsed_query = ( dict(urllib.parse.parse_qsl(parsed_path.query, keep_blank_values=True)) if parsed_path else {} @@ -325,4 +325,5 @@ def open(path: str | Path, *, fallbacks: list[type[Loader]] | None = None, **kwa register("log", "LogLoader") register("remote", "RemoteLoader") register("mqtt", "MqttLoader") +register("gzip", "GzipLoader") register("multiraw", "MultiRawLoader") # Should be last From bebd0c0baa2550ec0b3bcc8bd5830880c47481ac Mon Sep 17 00:00:00 2001 From: Matthijs Vos Date: Mon, 15 Dec 2025 14:20:50 +0100 Subject: [PATCH 4/7] Fix linting --- dissect/target/loaders/gzip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dissect/target/loaders/gzip.py b/dissect/target/loaders/gzip.py index 99afe4870c..3cb1060826 100644 --- a/dissect/target/loaders/gzip.py +++ b/dissect/target/loaders/gzip.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from dissect.target.filesystem import VirtualFilesystem -from dissect.target.loader import Loader, LOADERS +from dissect.target.loader import LOADERS, Loader if TYPE_CHECKING: from pathlib import Path @@ -36,6 +36,6 @@ def map(self, target: target.Target) -> None: self.subloader = candidate(path) self.subloader.map(target) break - except Exception as e: # noqa: PERF203 + except Exception as e: target.log.debug("Failed to use loader %s", candidate) target.log.debug("", exc_info=e) From c479dc3a5f55a50dd557fa484269e37e99a2eb21 Mon Sep 17 00:00:00 2001 From: Matthijs Vos Date: Mon, 2 Mar 2026 20:49:14 +0100 Subject: [PATCH 5/7] Rework loader to generic compression loader, and use middleware loader --- dissect/target/loader.py | 4 +- dissect/target/loaders/compression.py | 71 +++++++++++++++++++++++++++ dissect/target/loaders/gzip.py | 41 ---------------- 3 files changed, 73 insertions(+), 43 deletions(-) create mode 100644 dissect/target/loaders/compression.py delete mode 100644 dissect/target/loaders/gzip.py diff --git a/dissect/target/loader.py b/dissect/target/loader.py index a9b61dc74b..9ba9aeadeb 100644 --- a/dissect/target/loader.py +++ b/dissect/target/loader.py @@ -76,7 +76,7 @@ def __init__( except Exception: log.debug("Failed to resolve loader path %r", path) self.absolute_path = path - # self.base_path = self.absolute_path.parent + self.base_path = self.absolute_path.parent self.parsed_path = parsed_path self.parsed_query = ( dict(urllib.parse.parse_qsl(parsed_path.query, keep_blank_values=True)) if parsed_path else {} @@ -325,5 +325,5 @@ def open(path: str | Path, *, fallbacks: list[type[Loader]] | None = None, **kwa register("log", "LogLoader") register("remote", "RemoteLoader") register("mqtt", "MqttLoader") -register("gzip", "GzipLoader") +register("compression", "CompressionLoader") register("multiraw", "MultiRawLoader") # Should be last diff --git a/dissect/target/loaders/compression.py b/dissect/target/loaders/compression.py new file mode 100644 index 0000000000..e70a869ca9 --- /dev/null +++ b/dissect/target/loaders/compression.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from dissect.target.filesystem import VirtualFilesystem +from dissect.target.helpers import fsutil +from dissect.target.helpers.logging import get_logger +from dissect.target.loader import MiddlewareLoader + +if TYPE_CHECKING: + from pathlib import Path + + from dissect.target.target import target + +log = get_logger(__name__) + +COMPRESSION_EXT = (".gz",".lzma",".bz2",".zst") + + +class CompressionLoader(MiddlewareLoader): + """Allow loading compressed files. This does impact performance, so it's recommended to uncompress the file before passing it to Dissect.""" + + def __init__(self, path: Path, **kwargs): + super().__init__(path, **kwargs) + + log.warning( + "file %r is compressed, which will affect performance. " + "Consider uncompressing the archive before passing the file to Dissect.", + path, + ) + + @staticmethod + def detect(path: Path) -> bool: + return path.name.lower().endswith(COMPRESSION_EXT) or is_compressed_magic(path) + + def prepare(self, target: target.Target) -> Path: + filename = self.path.name.removesuffix(".gz") + vfs = VirtualFilesystem() + vfs.map_file_fh(filename, fsutil.open_decompress(self.path)) + + return vfs.path(filename) + + +def is_compressed_magic(path: Path) -> bool: + """ + Check if this is a compressed file based on the magic + Based on the magic check from fsutil.open_decompress + """ + + file = path.open("rb") + + magic = file.read(5) + file.seek(0) + + # Gzip + if magic[:2] == b"\x1f\x8b": + return True + + # LZMA + if magic[:5] == b"\xfd7zXZ": + return True + + # BZ2 + if magic[:3] == b"BZh" and 0x31 <= magic[3] <= 0x39: + return True + + # ZSTD + if magic[:4] in [b"\xfd\x2f\xb5\x28", b"\x28\xb5\x2f\xfd"]: + return True + + return False \ No newline at end of file diff --git a/dissect/target/loaders/gzip.py b/dissect/target/loaders/gzip.py deleted file mode 100644 index 3cb1060826..0000000000 --- a/dissect/target/loaders/gzip.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from dissect.target.filesystem import VirtualFilesystem -from dissect.target.loader import LOADERS, Loader - -if TYPE_CHECKING: - from pathlib import Path - - from dissect.target.target import target - -GZ_EXT = (".gz",) - - -class GzipLoader(Loader): - """Allow loading Gzip compressed files. Actual loading is handled by the normal loaders.""" - - def __init__(self, path: Path, **kwargs): - super().__init__(path, **kwargs) - - @staticmethod - def detect(path: Path) -> bool: - return path.name.lower().endswith(GZ_EXT) - - def map(self, target: target.Target) -> None: - filename = self.path.name.removesuffix(".gz") - vfs = VirtualFilesystem() - vfs.map_file(filename, self.path, "gzip") - path = vfs.get(filename) - - for candidate in LOADERS: - try: - target.log.info("Testing sub-loader %s", candidate.__name__) - if candidate.detect(path): - self.subloader = candidate(path) - self.subloader.map(target) - break - except Exception as e: - target.log.debug("Failed to use loader %s", candidate) - target.log.debug("", exc_info=e) From 7f2b2479874ba08364db0dd843aaa559875c7e6a Mon Sep 17 00:00:00 2001 From: Matthijs Vos Date: Mon, 2 Mar 2026 20:52:33 +0100 Subject: [PATCH 6/7] Linting --- dissect/target/loaders/compression.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dissect/target/loaders/compression.py b/dissect/target/loaders/compression.py index e70a869ca9..d877875d86 100644 --- a/dissect/target/loaders/compression.py +++ b/dissect/target/loaders/compression.py @@ -14,11 +14,14 @@ log = get_logger(__name__) -COMPRESSION_EXT = (".gz",".lzma",".bz2",".zst") +COMPRESSION_EXT = (".gz", ".lzma", ".bz2", ".zst") class CompressionLoader(MiddlewareLoader): - """Allow loading compressed files. This does impact performance, so it's recommended to uncompress the file before passing it to Dissect.""" + """ + Allow loading compressed files. + This does impact performance, so it's recommended to uncompress the file before passing it to Dissect. + """ def __init__(self, path: Path, **kwargs): super().__init__(path, **kwargs) @@ -65,7 +68,4 @@ def is_compressed_magic(path: Path) -> bool: return True # ZSTD - if magic[:4] in [b"\xfd\x2f\xb5\x28", b"\x28\xb5\x2f\xfd"]: - return True - - return False \ No newline at end of file + return magic[:4] in [b"\xfd\x2f\xb5\x28", b"\x28\xb5\x2f\xfd"] From 3ec516f4324181f05632dd42b8c1a23665170da8 Mon Sep 17 00:00:00 2001 From: Matthijs Vos Date: Thu, 19 Mar 2026 13:09:18 +0100 Subject: [PATCH 7/7] Drop support for compressed files in tar loader --- dissect/target/loaders/compression.py | 9 ++--- dissect/target/loaders/tar.py | 47 +----------------------- tests/loaders/test_compression.py | 26 ++++++++++++++ tests/loaders/test_tar.py | 52 +++++++++++++++------------ 4 files changed, 59 insertions(+), 75 deletions(-) create mode 100644 tests/loaders/test_compression.py diff --git a/dissect/target/loaders/compression.py b/dissect/target/loaders/compression.py index d877875d86..5bb397b5eb 100644 --- a/dissect/target/loaders/compression.py +++ b/dissect/target/loaders/compression.py @@ -18,8 +18,7 @@ class CompressionLoader(MiddlewareLoader): - """ - Allow loading compressed files. + """Allow loading compressed files. This does impact performance, so it's recommended to uncompress the file before passing it to Dissect. """ @@ -45,11 +44,9 @@ def prepare(self, target: target.Target) -> Path: def is_compressed_magic(path: Path) -> bool: + """Check if this is a compressed file based on the magic + Based on the magic check from fsutil.open_decompress. """ - Check if this is a compressed file based on the magic - Based on the magic check from fsutil.open_decompress - """ - file = path.open("rb") magic = file.read(5) diff --git a/dissect/target/loaders/tar.py b/dissect/target/loaders/tar.py index 3791bb0222..dd73cf704d 100644 --- a/dissect/target/loaders/tar.py +++ b/dissect/target/loaders/tar.py @@ -20,43 +20,9 @@ from dissect.target import target - log = get_logger(__name__) -TAR_EXT_COMP = ( - ".tar.gz", - ".tar.xz", - ".tar.bz", - ".tar.bz2", - ".tar.lzma", - ".tar.lz", - ".tgz", - ".txz", - ".tbz", - ".tbz2", - ".tlz", - ".tlzma", -) TAR_EXT = (".tar",) - -TAR_MAGIC_COMP = ( - # gzip - b"\x1f\x8b", - # bzip2 - b"\x42\x5a\x68", - # xz - b"\xfd\x37\x7a\x58\x5a\x00", - # lzma - b"\x5d\x00\x00\x01\x00", - b"\x5d\x00\x00\x10\x00", - b"\x5d\x00\x00\x08\x00", - b"\x5d\x00\x00\x10\x00", - b"\x5d\x00\x00\x20\x00", - b"\x5d\x00\x00\x40\x00", - b"\x5d\x00\x00\x80\x00", - b"\x5d\x00\x00\x00\x01", - b"\x5d\x00\x00\x00\x02", -) TAR_MAGIC = (tf.GNU_MAGIC, tf.POSIX_MAGIC) WINDOWS_MEMBERS = ( @@ -146,20 +112,13 @@ class TarLoader(Loader): def __init__(self, path: Path, **kwargs): super().__init__(path, **kwargs) - if is_compressed(path): - log.warning( - "Tar file %r is compressed, which will affect performance. " - "Consider uncompressing the archive before passing the tar file to Dissect.", - path, - ) - self.fh = path.open("rb") self.tar = tf.open(mode="r:*", fileobj=self.fh) # noqa: SIM115 self.subloader = None @staticmethod def detect(path: Path) -> bool: - return path.name.lower().endswith(TAR_EXT + TAR_EXT_COMP) or is_tar_magic(path, TAR_MAGIC + TAR_MAGIC_COMP) + return path.name.lower().endswith(TAR_EXT) or is_tar_magic(path, TAR_MAGIC) def map(self, target: target.Target) -> None: for candidate in self.__subloaders__: @@ -192,7 +151,3 @@ def is_tar_magic(path: Path, magics: Iterable[bytes]) -> bool: continue return True return False - - -def is_compressed(path: Path) -> bool: - return path.name.lower().endswith(TAR_EXT_COMP) or is_tar_magic(path, TAR_MAGIC_COMP) diff --git a/tests/loaders/test_compression.py b/tests/loaders/test_compression.py new file mode 100644 index 0000000000..49b8fa6def --- /dev/null +++ b/tests/loaders/test_compression.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from dissect.target.loaders.compression import CompressionLoader +from dissect.target.target import Target +from tests._utils import absolute_path + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + + +@pytest.mark.parametrize( + "archive", + [ + "_data/loaders/tar/test-archive.tar.gz", + ], +) +@pytest.mark.benchmark +def test_benchmark(benchmark: BenchmarkFixture, archive: str) -> None: + """Benchmark the loading of archives.""" + file = absolute_path(archive) + + benchmark(lambda: CompressionLoader(file).map(Target())) diff --git a/tests/loaders/test_tar.py b/tests/loaders/test_tar.py index d008d3c772..4a3947755d 100644 --- a/tests/loaders/test_tar.py +++ b/tests/loaders/test_tar.py @@ -8,6 +8,7 @@ import pytest from dissect.target.loader import open as loader_open +from dissect.target.loaders.compression import CompressionLoader from dissect.target.loaders.tar import GenericTarSubLoader, TarLoader from dissect.target.plugins.os.windows._os import WindowsPlugin from dissect.target.target import Target @@ -18,6 +19,8 @@ from collections.abc import Callable from pathlib import Path + from pytest_benchmark.fixture import BenchmarkFixture + @pytest.mark.parametrize( ("opener"), @@ -41,12 +44,13 @@ def test_compressed_tar_file(caplog: pytest.LogCaptureFixture) -> None: with caplog.at_level(logging.WARNING): loader = loader_open(path) - assert isinstance(loader, TarLoader) + assert isinstance(loader, CompressionLoader) assert "is compressed" in caplog.text t = Target() loader.map(t) - assert isinstance(loader.subloader, GenericTarSubLoader) + assert isinstance(loader.loader, TarLoader) + assert isinstance(loader.loader.subloader, GenericTarSubLoader) assert len(t.filesystems) == 1 @@ -61,11 +65,12 @@ def test_compressed_tar_file_with_empty_dir() -> None: path = absolute_path("_data/loaders/tar/test-archive-empty-folder.tgz") loader = loader_open(path) - assert isinstance(loader, TarLoader) + assert isinstance(loader, CompressionLoader) t = Target() loader.map(t) - assert isinstance(loader.subloader, GenericTarSubLoader) + assert isinstance(loader.loader, TarLoader) + assert isinstance(loader.loader.subloader, GenericTarSubLoader) assert len(t.filesystems) == 1 @@ -87,11 +92,12 @@ def test_case_sensitivity_windows(tmp_path: Path) -> None: _mkdir(tf, "Windows/System32") loader = loader_open(path) - assert isinstance(loader, TarLoader) + assert isinstance(loader, CompressionLoader) t = Target() loader.map(t) - assert isinstance(loader.subloader, GenericTarSubLoader) + assert isinstance(loader.loader, TarLoader) + assert isinstance(loader.loader.subloader, GenericTarSubLoader) # Make sure the case sensitiveness is changed to False and make sure we detect the target as Windows. assert not t.filesystems[0].case_sensitive @@ -107,11 +113,12 @@ def test_case_sensitivity_linux(tmp_path: Path) -> None: _mkdir(tf, "opt") loader = loader_open(path) - assert isinstance(loader, TarLoader) + assert isinstance(loader, CompressionLoader) t = Target() loader.map(t) - assert isinstance(loader.subloader, GenericTarSubLoader) + assert isinstance(loader.loader, TarLoader) + assert isinstance(loader.loader.subloader, GenericTarSubLoader) assert t.filesystems[0].case_sensitive @@ -121,17 +128,6 @@ def test_case_sensitivity_linux(tmp_path: Path) -> None: [ # regular tar file (True, "file.tar", ""), - # gzip tar file - (True, "file.tar.gz", ""), - (True, "file.tgz", ""), - # bzip2 tar file - (True, "file.tar.bz2", ""), - (True, "file.tar.bz", ""), - (True, "file.tbz", ""), - (True, "file.tbz2", ""), - # xz tar file - (True, "file.tar.xz", ""), - (True, "file.txz", ""), # some things it should not detect (False, "file", "00010203"), (False, "file.zip", "504b0304"), @@ -150,10 +146,6 @@ def test_detect_extension(should_detect: bool, filename: str, buffer: str, tmp_p "file", [ "small.tar", - "small.tar.bz2", - "small.tar.gz", - "small.tar.lz", - "small.tar.xz", ], ) def test_detect_buffer(file: str, tmp_path: Path) -> None: @@ -168,3 +160,17 @@ def test_detect_buffer(file: str, tmp_path: Path) -> None: tmp_tar.write_bytes(small_file.read_bytes()) assert TarLoader.detect(tmp_tar) + + +@pytest.mark.parametrize( + "archive", + [ + "_data/loaders/tar/test-archive.tar", + ], +) +@pytest.mark.benchmark +def test_benchmark(benchmark: BenchmarkFixture, archive: str) -> None: + """Benchmark the loading of archives.""" + file = absolute_path(archive) + + benchmark(lambda: TarLoader(file).map(Target()))