From 194952f9e77d679fbd9c9a1def7811a2bf32e587 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Mon, 16 Mar 2026 10:33:07 +0100 Subject: [PATCH 1/6] Add benchmark for `ls_cmd` using a ext4 test image of around 1000 files --- .../ext4/debian-trixie-bin-ext4.raw.gz | 3 ++ tests/conftest.py | 29 +++++++++++++++++++ tests/tools/test_shell.py | 27 +++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 tests/_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz diff --git a/tests/_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz b/tests/_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz new file mode 100644 index 0000000000..938a358272 --- /dev/null +++ b/tests/_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e42b237abc35dd5f06739bde2f1a21ca037aeec58cb47cc5c855dbfc10faefe +size 102265 diff --git a/tests/conftest.py b/tests/conftest.py index faf53376bd..a9de465f62 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import annotations +import gzip import importlib.util import pathlib import tempfile @@ -626,3 +627,31 @@ def guarded_keychain() -> Iterator[None]: keychain.KEYCHAIN.clear() yield keychain.KEYCHAIN.clear() + + +@pytest.fixture(scope="session") +def path_debian_ext4_raw(tmp_path_factory: pytest.TempPathFactory) -> Iterator[pathlib.Path]: + """Fixture that provides a path to a Debian Trixie ext4 raw image. + + The image only contains a /bin directory with ~1000 files and an /etc directory with some configuration files. + The rest of the filesystem is not included. + + The /bin files are all sparse (filled with zeros). + The files in /etc do contain data. + + The source image is stored compressed in the test data directory to save space. + Compressed size is 100kb and decompresses to 5mb. + """ + tmp_path = tmp_path_factory.mktemp("data") + + raw_path = tmp_path / "debian-trixie-bin-ext4.raw" + with gzip.open(absolute_path("_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz"), "rb") as fh: + raw_path.write_bytes(fh.read()) + + return raw_path + + +@pytest.fixture +def target_debian_ext4_raw(path_debian_ext4_raw: pathlib.Path) -> Target: + """Fixture that provides a Target for a Debian Trixie ext4 raw image.""" + return Target.open(path_debian_ext4_raw) diff --git a/tests/tools/test_shell.py b/tests/tools/test_shell.py index 2875a88c95..70f442adf8 100644 --- a/tests/tools/test_shell.py +++ b/tests/tools/test_shell.py @@ -31,8 +31,11 @@ from collections.abc import Callable, Iterator from pathlib import Path + from pytest_benchmark.fixture import BenchmarkFixture + from dissect.target.target import Target + try: import pexpect import pexpect.expect @@ -603,3 +606,27 @@ def ansi_new_data(cls: pexpect.expect.Expecter, data: bytes) -> int | None: child.expect_exact("ubuntu:/$ ", timeout=5) child.sendline("exit") child.expect(pexpect.EOF, timeout=5) + + +@pytest.mark.benchmark +@pytest.mark.parametrize( + "args", + [ + pytest.param(""), # no flags + pytest.param("-l"), # long listing + ], +) +def test_benchmark_ls_bin( + target_debian_ext4_raw: Target, + benchmark: BenchmarkFixture, + args: str, + capsys: pytest.CaptureFixture, +) -> None: + """Benchmark ls command with different parameters with a /bin directory containing ~1000 files.""" + + def run_ls() -> None: + target_cli = TargetCli(target_debian_ext4_raw) + target_cli.onecmd(f"ls {args} /bin") + capsys.readouterr() + + benchmark(run_ls) From 8bf0bfbc77fc517cd002461b30a882b47083714b Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Mon, 16 Mar 2026 22:29:57 +0100 Subject: [PATCH 2/6] Move capsys.readouterr() outside the benchmark --- tests/tools/test_shell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tools/test_shell.py b/tests/tools/test_shell.py index 70f442adf8..4074185dc9 100644 --- a/tests/tools/test_shell.py +++ b/tests/tools/test_shell.py @@ -627,6 +627,6 @@ def test_benchmark_ls_bin( def run_ls() -> None: target_cli = TargetCli(target_debian_ext4_raw) target_cli.onecmd(f"ls {args} /bin") - capsys.readouterr() benchmark(run_ls) + capsys.readouterr() From d961eb0117911e3d56ab315fc6bb9f407fba2da4 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Tue, 17 Mar 2026 20:32:51 +0100 Subject: [PATCH 3/6] Remove test fixture and inline the gzip open in the test itself. --- tests/conftest.py | 29 ----------------------------- tests/tools/test_shell.py | 26 ++++++++++++++++++-------- 2 files changed, 18 insertions(+), 37 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a9de465f62..faf53376bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ from __future__ import annotations -import gzip import importlib.util import pathlib import tempfile @@ -627,31 +626,3 @@ def guarded_keychain() -> Iterator[None]: keychain.KEYCHAIN.clear() yield keychain.KEYCHAIN.clear() - - -@pytest.fixture(scope="session") -def path_debian_ext4_raw(tmp_path_factory: pytest.TempPathFactory) -> Iterator[pathlib.Path]: - """Fixture that provides a path to a Debian Trixie ext4 raw image. - - The image only contains a /bin directory with ~1000 files and an /etc directory with some configuration files. - The rest of the filesystem is not included. - - The /bin files are all sparse (filled with zeros). - The files in /etc do contain data. - - The source image is stored compressed in the test data directory to save space. - Compressed size is 100kb and decompresses to 5mb. - """ - tmp_path = tmp_path_factory.mktemp("data") - - raw_path = tmp_path / "debian-trixie-bin-ext4.raw" - with gzip.open(absolute_path("_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz"), "rb") as fh: - raw_path.write_bytes(fh.read()) - - return raw_path - - -@pytest.fixture -def target_debian_ext4_raw(path_debian_ext4_raw: pathlib.Path) -> Target: - """Fixture that provides a Target for a Debian Trixie ext4 raw image.""" - return Target.open(path_debian_ext4_raw) diff --git a/tests/tools/test_shell.py b/tests/tools/test_shell.py index 4074185dc9..e1d58e058a 100644 --- a/tests/tools/test_shell.py +++ b/tests/tools/test_shell.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import gzip import os import pathlib import platform @@ -13,7 +14,9 @@ import pytest +from dissect.target.containers.raw import RawContainer from dissect.target.helpers.fsutil import TargetPath, normalize +from dissect.target.target import Target from dissect.target.tools.shell import ( DebugMode, ExtendedCmd, @@ -33,8 +36,6 @@ from pytest_benchmark.fixture import BenchmarkFixture - from dissect.target.target import Target - try: import pexpect @@ -617,16 +618,25 @@ def ansi_new_data(cls: pexpect.expect.Expecter, data: bytes) -> int | None: ], ) def test_benchmark_ls_bin( - target_debian_ext4_raw: Target, benchmark: BenchmarkFixture, args: str, capsys: pytest.CaptureFixture, ) -> None: """Benchmark ls command with different parameters with a /bin directory containing ~1000 files.""" + with gzip.open(absolute_path("_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz"), "rb") as fh: + container = RawContainer(fh) + + t = Target() + t.disks.add(container) + t.apply() + + def run_ls() -> None: + target_cli = TargetCli(t) + target_cli.onecmd(f"ls {args} /bin") - def run_ls() -> None: - target_cli = TargetCli(target_debian_ext4_raw) - target_cli.onecmd(f"ls {args} /bin") + benchmark(run_ls) - benchmark(run_ls) - capsys.readouterr() + out, err = capsys.readouterr() + assert not err + assert "bash" in out + assert "zgrep" in out From ce2e5dab54f17f9a284781b1386614e9507d55e2 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Wed, 18 Mar 2026 19:49:45 +0100 Subject: [PATCH 4/6] Decompress gzip data into memory first to avoid seeking a gzip file --- tests/tools/test_shell.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/tools/test_shell.py b/tests/tools/test_shell.py index e1d58e058a..785769bc28 100644 --- a/tests/tools/test_shell.py +++ b/tests/tools/test_shell.py @@ -624,19 +624,21 @@ def test_benchmark_ls_bin( ) -> None: """Benchmark ls command with different parameters with a /bin directory containing ~1000 files.""" with gzip.open(absolute_path("_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz"), "rb") as fh: - container = RawContainer(fh) + raw_image = fh.read() - t = Target() - t.disks.add(container) - t.apply() + container = RawContainer(BytesIO(raw_image)) - def run_ls() -> None: - target_cli = TargetCli(t) - target_cli.onecmd(f"ls {args} /bin") + t = Target() + t.disks.add(container) + t.apply() - benchmark(run_ls) + def run_ls() -> None: + target_cli = TargetCli(t) + target_cli.onecmd(f"ls {args} /bin") - out, err = capsys.readouterr() - assert not err - assert "bash" in out - assert "zgrep" in out + benchmark(run_ls) + + out, err = capsys.readouterr() + assert not err + assert "bash" in out + assert "zgrep" in out From af2224f51f6874238380ce2288a03c96ad6e8df6 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Wed, 18 Mar 2026 20:41:35 +0100 Subject: [PATCH 5/6] Adding back the image description to the test --- tests/tools/test_shell.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/tools/test_shell.py b/tests/tools/test_shell.py index 785769bc28..2f4f51d580 100644 --- a/tests/tools/test_shell.py +++ b/tests/tools/test_shell.py @@ -622,7 +622,17 @@ def test_benchmark_ls_bin( args: str, capsys: pytest.CaptureFixture, ) -> None: - """Benchmark ls command with different parameters with a /bin directory containing ~1000 files.""" + """Benchmark ls command with different parameters with a /bin directory containing ~1000 files. + + The image only contains a /bin directory with ~1000 files and an /etc directory with some configuration files. + The rest of the filesystem is not included. + + The /bin files are all sparse (filled with zeros). + The files in /etc do contain data. + + The source image is stored compressed in the test data directory to save space. + Compressed size is 100kb and decompresses to 5mb. + """ with gzip.open(absolute_path("_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz"), "rb") as fh: raw_image = fh.read() From b08cff43d93ecce1d6ba92e2b909b485e5843b3f Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Fri, 20 Mar 2026 18:33:05 +0100 Subject: [PATCH 6/6] Add notes about how to reproduce the test image --- tests/tools/test_shell.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/tools/test_shell.py b/tests/tools/test_shell.py index 2f4f51d580..c40985e2f6 100644 --- a/tests/tools/test_shell.py +++ b/tests/tools/test_shell.py @@ -632,6 +632,41 @@ def test_benchmark_ls_bin( The source image is stored compressed in the test data directory to save space. Compressed size is 100kb and decompresses to 5mb. + + How to reproduce the test image + =============================== + + source: debian-live-13.3.0-amd64-standard.iso + sha256: ee2b3d5f9bc67d801eefeddbd5698efbb0b35358724b7ed3db461be3f5e7ecd6 + extract /run/live/medium/filesystem.squashfs from the ISO + sha256 of filesystem.squashfs: 12a5feba804e23823917fa85a86814ec9953e2321c1bc4fc67c729d023ba115c + + Extract walkfs for /bin so we can recreate the same file structure with sparse files. + Save some files from /etc to have some non-sparse files as well: + + $ target-shell filesystem.squashfs + localhost.localdomain:/$ walkfs --walkfs-path /bin > /tmp/walkfs-bin.records + localhost.localdomain:/$ save -o /tmp/etc /etc/debian_version /etc/group /etc/hostname + localhost.localdomain:/$ save -o /tmp/etc /etc/hosts /etc/os-release /etc/passwd /etc/shadow + + Create a raw image and format it with ext4, then mount it: + + $ qemu-img create -f raw debian-trixie-bin-ext4.raw 5M + $ sudo qemu-nbd --connect=/dev/nbd0 debian-trixie-bin-ext4.raw -f raw + $ sudo mkfs.ext4 -m 0 /dev/nbd0 + $ sudo mount /dev/nbd0 /mnt/debian-rw + + Use a small Python script to recreate the files (sparse) from walkfs input, preserving permissions and timestamps + Copy the files from /etc to the mount: + + $ python3 create-walkfs-sparse.py /tmp/walkfs-bin.records /mnt/debian-rw /tmp/etc + + Finally, gzip the raw image to save space: + + $ sync + $ sudo umount /mnt/debian-rw + $ sudo qemu-nbd --disconnect /dev/nbd0 + $ gzip debian-trixie-bin-ext4.raw """ with gzip.open(absolute_path("_data/filesystems/ext4/debian-trixie-bin-ext4.raw.gz"), "rb") as fh: raw_image = fh.read()