Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dissect/target/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@ def get(self, path: str) -> FilesystemEntry:
def scandir(self) -> Iterator[DirEntry]:
raise NotADirectoryError(f"'{self.path}' is not a directory")

def open(self) -> BinaryIO:
def open(self, *args, **kwargs) -> BinaryIO:
return VirtualFileHandle(self.entry)

def stat(self, follow_symlinks: bool = True) -> fsutil.stat_result:
Expand Down Expand Up @@ -1147,7 +1147,7 @@ def __init__(self, fs: Filesystem, path: str, entry: str, algo: str = "gzip"):
if self._compressor is None:
raise ValueError(f"Unsupported compression algorithm {algo}")

def open(self) -> BinaryIO:
def open(self, *args, **kwargs) -> BinaryIO:
return self._compressor.open(self.entry, "rb")


Expand Down
37 changes: 37 additions & 0 deletions dissect/target/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,42 @@ def map(self, target: Target) -> None:
raise NotImplementedError


class MiddlewareLoader(Loader):
"""A base class for preparing arbitrary data to be used by other :class:`Loader`s.

Instead of mapping data directly to a :class:`Target <dissect.target.target.Target>`, loaders of this type
prepare data in some way and make it available for other :class:`Loader`s to use.

Subclasses should implement the :method:`detect` method like any other loader, and return a path to the prepared
data in the :method:`prepare` method . The loading mechanism will then use that path to find other loaders to map
the prepared data into the target.

Feels like forever since I've heard the term "middleware", I'm bringing it back baby!
"""

def __init__(self, path: Path, *, fallbacks: list[type[Loader]] | None = None, **kwargs):
super().__init__(path, **kwargs)
# This will be the loader that successfully mapped the prepared path
self.loader = None

@staticmethod
def detect(path: Path) -> bool:
raise NotImplementedError

def prepare(self, target: Target) -> Path:
raise NotImplementedError

def map(self, target: Target) -> None:
path = self.prepare(target)

if (loader := find_loader(path, fallbacks=[DirLoader, RawLoader])) is not None:
ldr = loader(path)
ldr.map(target)

# Store a reference to the loader if we successfully mapped
self.loader = ldr


def register(module_name: str, class_name: str, internal: bool = True) -> None:
"""Registers a ``Loader`` class inside ``LOADERS``.

Expand Down Expand Up @@ -290,4 +326,5 @@ def open(path: str | Path, *, fallbacks: list[type[Loader]] | None = None, **kwa
register("log", "LogLoader")
register("remote", "RemoteLoader")
register("mqtt", "MqttLoader")
register("compression", "CompressionLoader")
register("multiraw", "MultiRawLoader") # Should be last
68 changes: 68 additions & 0 deletions dissect/target/loaders/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from dissect.target.filesystem import VirtualFilesystem
from dissect.target.helpers import fsutil
from dissect.target.helpers.logging import get_logger
from dissect.target.loader import MiddlewareLoader

if TYPE_CHECKING:
from pathlib import Path

from dissect.target.target import target

log = get_logger(__name__)

COMPRESSION_EXT = (".gz", ".lzma", ".bz2", ".zst")


class CompressionLoader(MiddlewareLoader):
"""Allow loading compressed files.
This does impact performance, so it's recommended to uncompress the file before passing it to Dissect.
"""

def __init__(self, path: Path, **kwargs):
super().__init__(path, **kwargs)

log.warning(
"file %r is compressed, which will affect performance. "
"Consider uncompressing the archive before passing the file to Dissect.",
path,
)

@staticmethod
def detect(path: Path) -> bool:
return path.name.lower().endswith(COMPRESSION_EXT) or is_compressed_magic(path)

def prepare(self, target: target.Target) -> Path:
filename = self.path.name.removesuffix(".gz")
vfs = VirtualFilesystem()
vfs.map_file_fh(filename, fsutil.open_decompress(self.path))

return vfs.path(filename)


def is_compressed_magic(path: Path) -> bool:
"""Check if this is a compressed file based on the magic
Based on the magic check from fsutil.open_decompress.
"""
file = path.open("rb")

magic = file.read(5)
file.seek(0)

# Gzip
if magic[:2] == b"\x1f\x8b":
return True

# LZMA
if magic[:5] == b"\xfd7zXZ":
return True

# BZ2
if magic[:3] == b"BZh" and 0x31 <= magic[3] <= 0x39:
return True

# ZSTD
return magic[:4] in [b"\xfd\x2f\xb5\x28", b"\x28\xb5\x2f\xfd"]
47 changes: 1 addition & 46 deletions dissect/target/loaders/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,43 +20,9 @@

from dissect.target import target


log = get_logger(__name__)

TAR_EXT_COMP = (
".tar.gz",
".tar.xz",
".tar.bz",
".tar.bz2",
".tar.lzma",
".tar.lz",
".tgz",
".txz",
".tbz",
".tbz2",
".tlz",
".tlzma",
)
TAR_EXT = (".tar",)

TAR_MAGIC_COMP = (
# gzip
b"\x1f\x8b",
# bzip2
b"\x42\x5a\x68",
# xz
b"\xfd\x37\x7a\x58\x5a\x00",
# lzma
b"\x5d\x00\x00\x01\x00",
b"\x5d\x00\x00\x10\x00",
b"\x5d\x00\x00\x08\x00",
b"\x5d\x00\x00\x10\x00",
b"\x5d\x00\x00\x20\x00",
b"\x5d\x00\x00\x40\x00",
b"\x5d\x00\x00\x80\x00",
b"\x5d\x00\x00\x00\x01",
b"\x5d\x00\x00\x00\x02",
)
TAR_MAGIC = (tf.GNU_MAGIC, tf.POSIX_MAGIC)

WINDOWS_MEMBERS = (
Expand Down Expand Up @@ -146,20 +112,13 @@ class TarLoader(Loader):
def __init__(self, path: Path, **kwargs):
super().__init__(path, **kwargs)

if is_compressed(path):
log.warning(
"Tar file %r is compressed, which will affect performance. "
"Consider uncompressing the archive before passing the tar file to Dissect.",
path,
)

self.fh = path.open("rb")
self.tar = tf.open(mode="r:*", fileobj=self.fh) # noqa: SIM115
self.subloader = None

@staticmethod
def detect(path: Path) -> bool:
return path.name.lower().endswith(TAR_EXT + TAR_EXT_COMP) or is_tar_magic(path, TAR_MAGIC + TAR_MAGIC_COMP)
return path.name.lower().endswith(TAR_EXT) or is_tar_magic(path, TAR_MAGIC)

def map(self, target: target.Target) -> None:
for candidate in self.__subloaders__:
Expand Down Expand Up @@ -192,7 +151,3 @@ def is_tar_magic(path: Path, magics: Iterable[bytes]) -> bool:
continue
return True
return False


def is_compressed(path: Path) -> bool:
return path.name.lower().endswith(TAR_EXT_COMP) or is_tar_magic(path, TAR_MAGIC_COMP)
36 changes: 15 additions & 21 deletions dissect/target/loaders/vbk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from dissect.target.exceptions import LoaderError
from dissect.target.filesystem import VirtualFilesystem
from dissect.target.filesystems.vbk import VbkFilesystem
from dissect.target.loader import Loader, find_loader
from dissect.target.loaders.raw import RawLoader
from dissect.target.loader import MiddlewareLoader

if TYPE_CHECKING:
from pathlib import Path
Expand All @@ -19,7 +18,7 @@
RE_RAW_DISK = re.compile(r"(?:[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})|(?:DEV__.+)")


class VbkLoader(Loader):
class VbkLoader(MiddlewareLoader):
"""Load Veaam Backup (VBK) files.

References:
Expand All @@ -35,7 +34,7 @@ def __init__(self, path: Path, **kwargs):
def detect(path: Path) -> bool:
return path.suffix.lower() == ".vbk"

def map(self, target: Target) -> None:
def prepare(self, target: Target) -> Path:
# We haven't really researched any of the VBK metadata yet, so just try some common formats
root = self.vbkfs.path("/")
if (base := next(root.glob("*"), None)) is None:
Expand All @@ -51,24 +50,19 @@ def map(self, target: Target) -> None:

candidates.append(root.joinpath("+".join(map(str, disks))))

# Try to find a loader
for candidate in candidates:
if candidate.suffix.lower() == ".vmcx":
# For VMCX files we need to massage the file layout a bit
vfs = VirtualFilesystem()
vfs.map_file_entry(candidate.name, candidate)
# We should only have one candidate at this point
if len(candidates) > 1:
raise LoaderError("Unsupported VBK structure, use `-L raw` to manually inspect the VBK")

for entry in chain(base.glob("Ide*/*"), base.glob("Scsi*/*")):
vfs.map_file_entry(entry.name, entry)
candidate = candidates[0]
if candidate.suffix.lower() == ".vmcx":
# For VMCX files we need to massage the file layout a bit
vfs = VirtualFilesystem()
vfs.map_file_entry(candidate.name, candidate)

candidate = vfs.path(candidate.name)
for entry in chain(base.glob("Ide*/*"), base.glob("Scsi*/*")):
vfs.map_file_entry(entry.name, entry)

if (loader := find_loader(candidate, fallbacks=[RawLoader])) is not None:
ldr = loader(candidate)
ldr.map(target)
candidate = vfs.path(candidate.name)

# Store a reference to the loader if we successfully mapped
self.loader = ldr
break
else:
raise LoaderError("Unsupported VBK structure, use `-L raw` to manually inspect the VBK")
return candidate
26 changes: 26 additions & 0 deletions tests/loaders/test_compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

from dissect.target.loaders.compression import CompressionLoader
from dissect.target.target import Target
from tests._utils import absolute_path

if TYPE_CHECKING:
from pytest_benchmark.fixture import BenchmarkFixture


@pytest.mark.parametrize(
"archive",
[
"_data/loaders/tar/test-archive.tar.gz",
],
)
@pytest.mark.benchmark
def test_benchmark(benchmark: BenchmarkFixture, archive: str) -> None:
"""Benchmark the loading of archives."""
file = absolute_path(archive)

benchmark(lambda: CompressionLoader(file).map(Target()))
Loading