diff --git a/.gitignore b/.gitignore index 74cecaf..2e944b3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,6 @@ dist/ *.pyc __pycache__/ .pytest_cache/ -tests/docs/api -tests/docs/build +tests/_docs/api +tests/_docs/build .tox/ diff --git a/dissect/hypervisor/tools/vmtar.py b/dissect/hypervisor/tools/vmtar.py new file mode 100644 index 0000000..854da25 --- /dev/null +++ b/dissect/hypervisor/tools/vmtar.py @@ -0,0 +1,20 @@ +import tarfile + +from dissect.hypervisor.util import vmtar + + +def main() -> None: + # We just want to run the main function of the tarfile module, but with our VisorTarFile and is_tarfile functions + type(tarfile.main)( + tarfile.main.__code__, + tarfile.main.__globals__ + | { + "TarFile": vmtar.VisorTarFile, + "is_tarfile": vmtar.is_tarfile, + "open": vmtar.open, + }, + )() + + +if __name__ == "__main__": + main() diff --git a/dissect/hypervisor/util/vmtar.py b/dissect/hypervisor/util/vmtar.py index 4b79203..e52f8cc 100644 --- a/dissect/hypervisor/util/vmtar.py +++ b/dissect/hypervisor/util/vmtar.py @@ -5,6 +5,8 @@ import struct import tarfile +from io import BytesIO +from typing import BinaryIO, Final class VisorTarInfo(tarfile.TarInfo): @@ -49,9 +51,72 @@ def _proc_member(self, tarfile: tarfile.TarFile) -> VisorTarInfo | tarfile.TarIn return super()._proc_member(tarfile) -def VisorTarFile(*args, **kwargs) -> tarfile.TarFile: - return tarfile.TarFile(*args, **kwargs, tarinfo=VisorTarInfo) +class VisorTarFile(tarfile.TarFile): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs, tarinfo=VisorTarInfo) - -def open(*args, **kwargs) -> tarfile.TarFile: - return tarfile.open(*args, **kwargs, tarinfo=VisorTarInfo) + @classmethod + def visoropen(cls, name: str, mode: str = "r", fileobj: BinaryIO | None = None, **kwargs) -> VisorTarFile: + """Open a visor tar file for reading. Supports gzip and lzma compression.""" + if mode not in ("r",): + raise tarfile.TarError("visor currently only supports read mode") + + try: + from gzip import GzipFile + except ImportError: + raise tarfile.CompressionError("gzip module is not available") from None + + try: + from lzma import LZMAError, LZMAFile + except ImportError: + raise tarfile.CompressionError("lzma module is not available") from None + + compressed = False + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except Exception: + try: + fileobj = GzipFile(name, mode + "b", fileobj=fileobj) + except OSError as e: + if fileobj is not None and mode == "r": + raise tarfile.ReadError("not a visor file") from e + raise + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except Exception: + fileobj.seek(0) + fileobj = LZMAFile(fileobj or name, mode) # noqa: SIM115 + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except (LZMAError, EOFError, OSError) as e: + fileobj.close() + if mode == "r": + raise tarfile.ReadError("not a visor file") from e + raise + except: + fileobj.close() + raise + + compressed = True + + # If we get here, we have a valid visor tar file + if fileobj is not None and compressed: + # Just read the entire file into memory, it's probably small + fileobj.seek(0) + fileobj = BytesIO(fileobj.read()) + + t = cls.taropen(name, mode, fileobj, **kwargs) + + t._extfileobj = False + return t + + # Only allow opening visor tar files + OPEN_METH: Final[dict[str, str]] = {"visor": "visoropen"} + + +open = VisorTarFile.open + +is_tarfile = type(tarfile.is_tarfile)(tarfile.is_tarfile.__code__, tarfile.is_tarfile.__globals__ | {"open": open}) diff --git a/pyproject.toml b/pyproject.toml index 8a8d670..9aa0854 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,10 +48,11 @@ dev = [ [project.scripts] envelope-decrypt = "dissect.hypervisor.tools.envelope:main" +vmtar = "dissect.hypervisor.tools.vmtar:main" [tool.ruff] line-length = 120 -required-version = ">=0.9.0" +required-version = ">=0.11.0" [tool.ruff.format] docstring-code-format = true @@ -94,7 +95,7 @@ select = [ ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"] [tool.ruff.lint.per-file-ignores] -"tests/docs/**" = ["INP001"] +"tests/_docs/**" = ["INP001"] [tool.ruff.lint.isort] known-first-party = ["dissect.hypervisor"] diff --git a/tests/data/test.VMRS b/tests/_data/descriptor/hyperv/test.VMRS similarity index 100% rename from tests/data/test.VMRS rename to tests/_data/descriptor/hyperv/test.VMRS diff --git a/tests/data/test.vmcx b/tests/_data/descriptor/hyperv/test.vmcx similarity index 100% rename from tests/data/test.vmcx rename to tests/_data/descriptor/hyperv/test.vmcx diff --git a/tests/data/encrypted.vmx b/tests/_data/descriptor/vmx/encrypted.vmx similarity index 100% rename from tests/data/encrypted.vmx rename to tests/_data/descriptor/vmx/encrypted.vmx diff --git a/tests/data/expanding.hdd/DiskDescriptor.xml b/tests/_data/disk/hdd/expanding.hdd/DiskDescriptor.xml similarity index 100% rename from tests/data/expanding.hdd/DiskDescriptor.xml rename to tests/_data/disk/hdd/expanding.hdd/DiskDescriptor.xml diff --git a/tests/data/expanding.hdd/expanding.hdd b/tests/_data/disk/hdd/expanding.hdd/expanding.hdd similarity index 100% rename from tests/data/expanding.hdd/expanding.hdd rename to tests/_data/disk/hdd/expanding.hdd/expanding.hdd diff --git a/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/plain.hdd/DiskDescriptor.xml b/tests/_data/disk/hdd/plain.hdd/DiskDescriptor.xml similarity index 100% rename from tests/data/plain.hdd/DiskDescriptor.xml rename to tests/_data/disk/hdd/plain.hdd/DiskDescriptor.xml diff --git a/tests/data/plain.hdd/plain.hdd b/tests/_data/disk/hdd/plain.hdd/plain.hdd similarity index 100% rename from tests/data/plain.hdd/plain.hdd rename to tests/_data/disk/hdd/plain.hdd/plain.hdd diff --git a/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/split.hdd/DiskDescriptor.xml b/tests/_data/disk/hdd/split.hdd/DiskDescriptor.xml similarity index 100% rename from tests/data/split.hdd/DiskDescriptor.xml rename to tests/_data/disk/hdd/split.hdd/DiskDescriptor.xml diff --git a/tests/data/split.hdd/split.hdd b/tests/_data/disk/hdd/split.hdd/split.hdd similarity index 100% rename from tests/data/split.hdd/split.hdd rename to tests/_data/disk/hdd/split.hdd/split.hdd diff --git a/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/_data/disk/hdd/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz similarity index 100% rename from tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz rename to tests/_data/disk/hdd/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz diff --git a/tests/data/dynamic.vhd.gz b/tests/_data/disk/vhd/dynamic.vhd.gz similarity index 100% rename from tests/data/dynamic.vhd.gz rename to tests/_data/disk/vhd/dynamic.vhd.gz diff --git a/tests/data/fixed.vhd.gz b/tests/_data/disk/vhd/fixed.vhd.gz similarity index 100% rename from tests/data/fixed.vhd.gz rename to tests/_data/disk/vhd/fixed.vhd.gz diff --git a/tests/data/differencing.avhdx.gz b/tests/_data/disk/vhdx/differencing.avhdx.gz similarity index 100% rename from tests/data/differencing.avhdx.gz rename to tests/_data/disk/vhdx/differencing.avhdx.gz diff --git a/tests/data/dynamic.vhdx.gz b/tests/_data/disk/vhdx/dynamic.vhdx.gz similarity index 100% rename from tests/data/dynamic.vhdx.gz rename to tests/_data/disk/vhdx/dynamic.vhdx.gz diff --git a/tests/data/fixed.vhdx.gz b/tests/_data/disk/vhdx/fixed.vhdx.gz similarity index 100% rename from tests/data/fixed.vhdx.gz rename to tests/_data/disk/vhdx/fixed.vhdx.gz diff --git a/tests/data/sesparse.vmdk.gz b/tests/_data/disk/vmdk/sesparse.vmdk.gz similarity index 100% rename from tests/data/sesparse.vmdk.gz rename to tests/_data/disk/vmdk/sesparse.vmdk.gz diff --git a/tests/data/encryption.info b/tests/_data/util/envelope/encryption.info similarity index 100% rename from tests/data/encryption.info rename to tests/_data/util/envelope/encryption.info diff --git a/tests/data/local.tgz.ve b/tests/_data/util/envelope/local.tgz.ve similarity index 100% rename from tests/data/local.tgz.ve rename to tests/_data/util/envelope/local.tgz.ve diff --git a/tests/data/test.vgz b/tests/_data/util/vmtar/test.vgz similarity index 100% rename from tests/data/test.vgz rename to tests/_data/util/vmtar/test.vgz diff --git a/tests/docs/Makefile b/tests/_docs/Makefile similarity index 100% rename from tests/docs/Makefile rename to tests/_docs/Makefile diff --git a/tests/docs/conf.py b/tests/_docs/conf.py similarity index 96% rename from tests/docs/conf.py rename to tests/_docs/conf.py index 7ef62d3..dc56239 100644 --- a/tests/docs/conf.py +++ b/tests/_docs/conf.py @@ -1,3 +1,5 @@ +project = "dissect.hypervisor" + extensions = [ "autoapi.extension", "sphinx.ext.autodoc", diff --git a/tests/docs/index.rst b/tests/_docs/index.rst similarity index 100% rename from tests/docs/index.rst rename to tests/_docs/index.rst diff --git a/tests/conftest.py b/tests/conftest.py index 91e0069..7ae0635 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,74 +26,74 @@ def open_file_gz(name: str, mode: str = "rb") -> Iterator[BinaryIO]: @pytest.fixture def encrypted_vmx() -> Iterator[BinaryIO]: - yield from open_file("data/encrypted.vmx") + yield from open_file("_data/descriptor/vmx/encrypted.vmx") @pytest.fixture def vmcx() -> Iterator[BinaryIO]: - yield from open_file("data/test.vmcx") + yield from open_file("_data/descriptor/hyperv/test.vmcx") @pytest.fixture def vmrs() -> Iterator[BinaryIO]: - yield from open_file("data/test.VMRS") + yield from open_file("_data/descriptor/hyperv/test.VMRS") @pytest.fixture def fixed_vhd() -> Iterator[BinaryIO]: - yield from open_file_gz("data/fixed.vhd.gz") + yield from open_file_gz("_data/disk/vhd/fixed.vhd.gz") @pytest.fixture def dynamic_vhd() -> Iterator[BinaryIO]: - yield from open_file_gz("data/dynamic.vhd.gz") + yield from open_file_gz("_data/disk/vhd/dynamic.vhd.gz") @pytest.fixture def fixed_vhdx() -> Iterator[BinaryIO]: - yield from open_file_gz("data/fixed.vhdx.gz") + yield from open_file_gz("_data/disk/vhdx/fixed.vhdx.gz") @pytest.fixture def dynamic_vhdx() -> Iterator[BinaryIO]: - yield from open_file_gz("data/dynamic.vhdx.gz") + yield from open_file_gz("_data/disk/vhdx/dynamic.vhdx.gz") @pytest.fixture def differencing_vhdx() -> Iterator[BinaryIO]: - yield from open_file_gz("data/differencing.avhdx.gz") + yield from open_file_gz("_data/disk/vhdx/differencing.avhdx.gz") @pytest.fixture def sesparse_vmdk() -> Iterator[BinaryIO]: - yield from open_file_gz("data/sesparse.vmdk.gz") + yield from open_file_gz("_data/disk/vmdk/sesparse.vmdk.gz") @pytest.fixture def plain_hdd() -> Iterator[str]: - return absolute_path("data/plain.hdd") + return absolute_path("_data/disk/hdd/plain.hdd") @pytest.fixture def expanding_hdd() -> Iterator[str]: - return absolute_path("data/expanding.hdd") + return absolute_path("_data/disk/hdd/expanding.hdd") @pytest.fixture def split_hdd() -> Iterator[str]: - return absolute_path("data/split.hdd") + return absolute_path("_data/disk/hdd/split.hdd") @pytest.fixture def envelope() -> Iterator[BinaryIO]: - yield from open_file("data/local.tgz.ve") + yield from open_file("_data/util/envelope/local.tgz.ve") @pytest.fixture def keystore() -> Iterator[TextIO]: - yield from open_file("data/encryption.info", "r") + yield from open_file("_data/util/envelope/encryption.info", "r") @pytest.fixture def vgz() -> Iterator[BinaryIO]: - yield from open_file("data/test.vgz") + yield from open_file("_data/util/vmtar/test.vgz") diff --git a/tests/descriptor/__init__.py b/tests/descriptor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_hyperv.py b/tests/descriptor/test_hyperv.py similarity index 100% rename from tests/test_hyperv.py rename to tests/descriptor/test_hyperv.py diff --git a/tests/test_ovf.py b/tests/descriptor/test_ovf.py similarity index 100% rename from tests/test_ovf.py rename to tests/descriptor/test_ovf.py diff --git a/tests/test_pvs.py b/tests/descriptor/test_pvs.py similarity index 100% rename from tests/test_pvs.py rename to tests/descriptor/test_pvs.py diff --git a/tests/test_vbox.py b/tests/descriptor/test_vbox.py similarity index 100% rename from tests/test_vbox.py rename to tests/descriptor/test_vbox.py diff --git a/tests/test_vmx.py b/tests/descriptor/test_vmx.py similarity index 100% rename from tests/test_vmx.py rename to tests/descriptor/test_vmx.py diff --git a/tests/disk/__init__.py b/tests/disk/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_hdd.py b/tests/disk/test_hdd.py similarity index 100% rename from tests/test_hdd.py rename to tests/disk/test_hdd.py diff --git a/tests/test_vhd.py b/tests/disk/test_vhd.py similarity index 100% rename from tests/test_vhd.py rename to tests/disk/test_vhd.py diff --git a/tests/test_vhdx.py b/tests/disk/test_vhdx.py similarity index 100% rename from tests/test_vhdx.py rename to tests/disk/test_vhdx.py diff --git a/tests/test_vmdk.py b/tests/disk/test_vmdk.py similarity index 100% rename from tests/test_vmdk.py rename to tests/disk/test_vmdk.py diff --git a/tests/test_vmtar.py b/tests/test_vmtar.py deleted file mode 100644 index a3769fa..0000000 --- a/tests/test_vmtar.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -from typing import BinaryIO - -from dissect.hypervisor.util import vmtar - - -def test_vmtar(vgz: BinaryIO) -> None: - tar = vmtar.open(fileobj=vgz) - - members = {member.name: member for member in tar.getmembers()} - - # The test file has no textPgs/fixUpPgs - assert all(member.is_visor for member in members.values()) - assert set(members.keys()) == { - "test", - "test/file1", - "test/file2", - "test/file3", - "test/subdir", - "test/subdir/file4", - } - - assert tar.extractfile(members["test/file1"]).read() == (b"a" * 512) + b"\n" - assert tar.extractfile(members["test/file2"]).read() == (b"b" * 1024) + b"\n" - assert tar.extractfile(members["test/file3"]).read() == (b"c" * 2048) + b"\n" - assert tar.extractfile(members["test/subdir/file4"]).read() == (b"f" * 2048) + b"\n" diff --git a/tests/util/__init__.py b/tests/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_envelope.py b/tests/util/test_envelope.py similarity index 100% rename from tests/test_envelope.py rename to tests/util/test_envelope.py diff --git a/tests/util/test_vmtar.py b/tests/util/test_vmtar.py new file mode 100644 index 0000000..5fa359b --- /dev/null +++ b/tests/util/test_vmtar.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, BinaryIO + +from dissect.hypervisor.tools.vmtar import main as vmtar_main +from dissect.hypervisor.util import vmtar +from tests.conftest import absolute_path + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def test_vmtar(vgz: BinaryIO) -> None: + tar = vmtar.open(fileobj=vgz) + + members = {member.name: member for member in tar.getmembers()} + + # The test file has no textPgs/fixUpPgs + assert all(member.is_visor for member in members.values()) + assert set(members.keys()) == { + "test/file1", + "test/file2", + "test/file3", + "test/subdir", + "test/subdir/file4", + } + + assert tar.extractfile(members["test/file1"]).read() == (b"a" * 512) + b"\n" + assert tar.extractfile(members["test/file2"]).read() == (b"b" * 1024) + b"\n" + assert tar.extractfile(members["test/file3"]).read() == (b"c" * 2048) + b"\n" + assert tar.extractfile(members["test/subdir/file4"]).read() == (b"f" * 2048) + b"\n" + + +def test_vmtar_tool(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture) -> None: + vgz_path = absolute_path("_data/util/vmtar/test.vgz") + + with monkeypatch.context() as m: + m.setattr("sys.argv", ["vmtar", "-l", str(vgz_path)]) + + vmtar_main() + + out, _ = capsys.readouterr() + assert out.splitlines() == [ + "test/ ", + "test/file3 ", + "test/file2 ", + "test/subdir/ ", + "test/subdir/file4 ", + "test/file1 ", + ] + + with monkeypatch.context() as m: + m.setattr("sys.argv", ["vmtar", "-t", str(vgz_path)]) + + vmtar_main() + + _, err = capsys.readouterr() + assert err.startswith("[