Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 3 additions & 72 deletions dissect/archive/wim.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from __future__ import annotations

import io
import struct
from functools import cached_property, lru_cache
from typing import TYPE_CHECKING, BinaryIO, Callable
from functools import cached_property
from typing import TYPE_CHECKING, BinaryIO

from dissect.util.stream import AlignedStream, BufferedStream, RelativeStream
from dissect.util.stream import BufferedStream, CompressedStream, RelativeStream
from dissect.util.ts import wintimestamp

from dissect.archive.c_wim import (
Expand Down Expand Up @@ -428,74 +427,6 @@ def relative(self) -> bool:
return self.info.Flags == SYMLINK_FLAG.RELATIVE


class CompressedStream(AlignedStream):
def __init__(
self,
fh: BinaryIO,
offset: int,
compressed_size: int,
original_size: int,
decompressor: Callable[[bytes], bytes],
chunk_size: int = DEFAULT_CHUNK_SIZE,
):
self.fh = fh
self.offset = offset
self.compressed_size = compressed_size
self.original_size = original_size
self.decompressor = decompressor
self.chunk_size = chunk_size

# Read the chunk table in advance
fh.seek(self.offset)
num_chunks = (original_size + self.chunk_size - 1) // self.chunk_size - 1
if num_chunks == 0:
self._chunks = (0,)
else:
entry_size = "Q" if original_size > 0xFFFFFFFF else "I"
pattern = f"<{num_chunks}{entry_size}"
self._chunks = (0, *struct.unpack(pattern, fh.read(struct.calcsize(pattern))))

self._data_offset = fh.tell()

self._read_chunk = lru_cache(32)(self._read_chunk)
super().__init__(self.original_size)

def _read(self, offset: int, length: int) -> bytes:
result = []

num_chunks = len(self._chunks)
chunk, offset_in_chunk = divmod(offset, self.chunk_size)

while length:
if chunk >= num_chunks:
# We somehow requested more data than we have runs for
break

chunk_offset = self._chunks[chunk]
if chunk < num_chunks - 1:
next_chunk_offset = self._chunks[chunk + 1]
chunk_remaining = self.chunk_size - offset_in_chunk
else:
next_chunk_offset = self.compressed_size
chunk_remaining = (self.original_size - (chunk * self.chunk_size)) - offset_in_chunk

read_length = min(chunk_remaining, length)

buf = self._read_chunk(chunk_offset, next_chunk_offset - chunk_offset)
result.append(buf[offset_in_chunk : offset_in_chunk + read_length])

length -= read_length
offset += read_length
chunk += 1

return b"".join(result)

def _read_chunk(self, offset: int, size: int) -> bytes:
self.fh.seek(self._data_offset + offset)
buf = self.fh.read(size)
return self.decompressor(buf)


def _ts_to_ns(ts: int) -> int:
"""Convert Windows timestamps to nanosecond timestamps."""
return (ts * 100) - 11644473600000000000
Expand Down
3 changes: 3 additions & 0 deletions tests/_data/uncompressed.wim.gz
Git LFS file not shown
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,21 @@ def open_file_gz(name: str, mode: str = "rb") -> Iterator[BinaryIO]:
yield f


@pytest.fixture
def uncompressed_wim() -> Iterator[BinaryIO]:
yield from open_file_gz("_data/uncompressed.wim.gz")


@pytest.fixture
def lzms_wim() -> Iterator[BinaryIO]:
yield from open_file_gz("_data/lzms.wim.gz")


@pytest.fixture
def lzx_wim() -> Iterator[BinaryIO]:
yield from open_file_gz("_data/lzx.wim.gz")


@pytest.fixture
def basic_wim_4k() -> Iterator[BinaryIO]:
yield from open_file_gz("_data/basic4k.wim.gz")
Expand Down
15 changes: 10 additions & 5 deletions tests/test_wim.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,24 @@
("basic_wim_8k", 0x2000),
("basic_wim_16k", 0x4000),
("basic_wim_32k", 0x8000),
("uncompressed_wim", 0),
],
)
def test_wim(fixture: BinaryIO, chunk_size: int, request: pytest.FixtureRequest) -> None:
value = request.getfixturevalue(fixture)
wim = WIM(value)

assert wim.header.CompressionSize == chunk_size

resource = next(iter(wim.resources.values()))
assert resource.open().chunk_size == chunk_size
if chunk_size:
resource = next(iter(wim.resources.values()))
assert resource.open().chunk_size == chunk_size

stream = CompressedStream(wim.fh, resource.offset, resource.size, resource.original_size, decompress, chunk_size)
assert resource.wim.header.CompressionSize == stream.chunk_size
assert resource.open().read() == stream.read()
stream = CompressedStream(
wim.fh, resource.offset, resource.size, resource.original_size, decompress, chunk_size
)
assert resource.wim.header.CompressionSize == stream.chunk_size
assert resource.open().read() == stream.read()

images = list(wim.images())
assert len(images) == 1
Expand Down
Loading