diff --git a/dissect/evidence/aff4/__init__.py b/dissect/evidence/aff4/__init__.py new file mode 100644 index 0000000..e5c7651 --- /dev/null +++ b/dissect/evidence/aff4/__init__.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from dissect.evidence.aff4.aff4 import AFF4, Segment +from dissect.evidence.aff4.metadata import ( + CaseDetails, + CaseNotes, + ContiguousImage, + DiskImage, + FileImage, + Image, + ImageStream, + Information, + Map, + Object, + TimeStamps, + UnresolvedObject, + ZipVolume, +) +from dissect.evidence.aff4.stream import BevyStream, MapStream, SymbolicStream +from dissect.evidence.exception import Error + +__all__ = [ + "AFF4", + "BevyStream", + "CaseDetails", + "CaseNotes", + "ContiguousImage", + "DiskImage", + "Error", + "FileImage", + "Image", + "ImageStream", + "Information", + "Map", + "MapStream", + "Object", + "Segment", + "SymbolicStream", + "TimeStamps", + "UnresolvedObject", + "ZipVolume", +] diff --git a/dissect/evidence/aff4/aff4.py b/dissect/evidence/aff4/aff4.py new file mode 100644 index 0000000..ffc04b7 --- /dev/null +++ b/dissect/evidence/aff4/aff4.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +import urllib.parse +import zipfile +from functools import cached_property +from typing import TYPE_CHECKING, BinaryIO + +from dissect.evidence.aff4.metadata import DiskImage, FileImage, Information, Object, ValueType +from dissect.evidence.aff4.util import parse_turtle + +if TYPE_CHECKING: + import pathlib + +MAX_OPEN_SEGMENTS = 128 + + +class AFF4: + """AFF4 evidence container. + + Args: + fh: A file-like object, ``pathlib.Path`` or a list of those representing the AFF4 segments. + """ + + def __init__(self, fh: BinaryIO | list[BinaryIO] | pathlib.Path | list[pathlib.Path]): + fhs = [fh] if not isinstance(fh, list) else fh + + self.fh = fhs + self._segments: dict[str, Segment] = {} + self._segment_lru = [] + self._segment_map: dict[str, int] = {} + + all_information: dict[str, Object] = {} + + for idx in range(len(self.fh)): + segment = self.segment(idx) + + self._segment_map[segment.uri] = idx + for key, value in segment.information.items(): + if key in all_information and len(value) < len(all_information[key]): + continue + all_information[key] = value + + self.information = Information(self, all_information) + + def segment(self, idx: int | str) -> Segment: + """Open a segment by index or URI. + + Implements a simple LRU cache to limit the number of open segments. + + Args: + idx: Index or URI of the segment to open. + + Returns: + The opened :class:`Segment` object. + """ + if isinstance(idx, str): + idx = self._segment_map[idx] + + # Poor mans LRU + if idx in self._segments: + self._segment_lru.remove(idx) + self._segment_lru.append(idx) + return self._segments[idx] + + if len(self._segment_lru) >= MAX_OPEN_SEGMENTS: + oldest_idx = self._segment_lru.pop(0) + oldest_segment = self._segments.pop(oldest_idx) + + # Don't close it if we received it as a file-like object + if hasattr(oldest_segment.fh, "rb") and not hasattr(self.fh[oldest_idx], "read"): + oldest_segment.fh.close() + + del oldest_segment + + fh = self.fh[idx] + if not hasattr(fh, "read") and fh.is_file(): + fh = fh.open("rb") + + segment = Segment(self, fh) + + self._segments[idx] = segment + self._segment_lru.append(idx) + + return segment + + def disks(self) -> list[DiskImage]: + """List all disk images in the AFF4 evidence.""" + return list(self.information.find("DiskImage")) + + def files(self) -> list[FileImage]: + """List all file images in the AFF4 evidence.""" + return list(self.information.find("FileImage")) + + +class Segment: + """AFF4 segment. + + Args: + aff4: The parent :class:`AFF4` object. + fh: A file-like object or ``pathlib.Path`` representing the segment. + """ + + def __init__(self, aff4: AFF4, fh: BinaryIO | pathlib.Path): + self.aff4 = aff4 + self.fh = fh + self._zip = None + + if hasattr(self.fh, "read"): + self._zip = zipfile.ZipFile(self.fh) + self.path = zipfile.Path(self._zip) + else: + self.path = fh + + @cached_property + def uri(self) -> str: + """Return the URI of the segment.""" + if (path := self.path.joinpath("container.description")).exists(): + return path.read_text() + + if self._zip and self._zip.comment: + return self._zip.comment.split(b"\x00", 1).decode() + + raise ValueError("No URI found in segment") + + @cached_property + def version(self) -> dict[str, str]: + """Return the version information of the segment.""" + if not (path := self.path.joinpath("version.txt")).exists(): + raise ValueError("No version.txt found in segment") + + result = {} + with path.open("rt") as fh: + for line in fh: + if "=" in line: + key, _, value = line.strip().partition("=") + result[key] = value + + return result + + @cached_property + def information(self) -> dict[str, ValueType]: + """Return the parsed ``information.turtle`` of the segment.""" + if not (path := self.path.joinpath("information.turtle")).exists(): + raise ValueError("No information.turtle found in segment") + + with path.open("rt") as fh: + return parse_turtle(fh) + + def get(self, path: str) -> pathlib.Path | zipfile.Path: + """Resolve a path of a file in the segment. + + Args: + path: Path to the file in the segment. + + Returns: + A :class:`Path` or :class:`zipfile.Path` object representing the file. + """ + path = path.removeprefix(self.uri) if path.startswith(self.uri) else urllib.parse.quote_plus(path) + return self.path.joinpath(path) diff --git a/dissect/evidence/aff4/metadata.py b/dissect/evidence/aff4/metadata.py new file mode 100644 index 0000000..dcc6eb4 --- /dev/null +++ b/dissect/evidence/aff4/metadata.py @@ -0,0 +1,458 @@ +from __future__ import annotations + +import datetime +import struct +from typing import TYPE_CHECKING, ClassVar, TypeAlias + +from dissect.util.stream import BufferedStream + +from dissect.evidence.aff4.stream import BevyStream, MapStream +from dissect.evidence.aff4.util import NS_AFF4, NS_RDF, CompressionMethod + +if TYPE_CHECKING: + from collections.abc import Iterator + + from dissect.evidence.aff4.aff4 import AFF4 + + +ValueType: TypeAlias = str | int | bool | datetime.datetime | bytes | list["ValueType"] + + +class Information: + """AFF4 information container. + + Used for accessing AFF4 objects. + + Args: + aff4: The parent :class:`AFF4` object. + objects: A dictionary mapping object IDs to their property dictionaries. + """ + + def __init__(self, aff4: AFF4, objects: dict[str, dict[str, str]]): + self.aff4 = aff4 + self.objects = {id: Object.from_values(self, id, values) for id, values in objects.items()} + + # Register some constants globally + for value in CompressionMethod: + self.objects[value.value] = value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} objects={len(self.objects)}>" + + def get(self, id: str) -> Object | None: + """Get an object by ID.""" + return self.objects.get(id) + + def find(self, type: str) -> Iterator[Object]: + """Find all objects of a given type.""" + search_type = f"{NS_AFF4}{type}" + for obj in self.objects.values(): + if isinstance(obj, Object) and ( + (isinstance(obj.type, str) and obj.type == search_type) + or (isinstance(obj.type, list) and search_type in obj.type) + ): + yield obj + + +class Object: + """AFF4 object. + + Represents a generic AFF4 object. + + Args: + ctx: The parent :class:`Information` object. + id: The ID of the object. + values: A dictionary mapping predicates to their values. + """ + + __type__ = None + __types__: ClassVar[dict[str, type[Object]]] = {} + + def __init_subclass__(cls): + if cls.__type__ is not None: + cls.__types__[cls.__type__] = cls + + def __init__(self, ctx: Information, id: str, values: dict[str, str]): + self.ctx = ctx + self.id = id + self.values = values + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.id} type={self.type!r}>" + + def __getitem__(self, key: str) -> ValueType: + if (result := self.get(key)) is None: + raise KeyError(key) + return result + + @classmethod + def from_values(cls, ctx: Information, id: str, values: dict[str, str]) -> Object: + """Create an object from its values, instantiating the appropriate subclass based on its type.""" + if (type := values.get(f"{NS_RDF}type")) and ( + subcls := cls.__types__.get(type if isinstance(type, str) else type[0]) + ): + return subcls(ctx, id, values) + return cls(ctx, id, values) + + @property + def type(self) -> str | list[str] | None: + """Return the RDF type of the object.""" + return self.values.get(f"{NS_RDF}type") + + def _transform_value(self, value: ValueType) -> ValueType | Object | None: + """Transform a value into an ``Object`` if it's a reference.""" + if isinstance(value, str) and value.startswith("<") and value.endswith(">"): + value = value[1:-1] + return self.ctx.get(value) or UnresolvedObject(self.ctx, value, {}) + return value + + def get(self, predicate: str, *, prefix: str = NS_AFF4) -> ValueType | None: + """Get a property of the object.""" + if (result := self.values.get(f"{prefix}{predicate}")) is not None: + if isinstance(result, list): + result = [self._transform_value(r) for r in result] + result = self._transform_value(result) + return result + + +class UnresolvedObject(Object): + """AFF4 unresolved object. + + Represents an object that could not be resolved from the AFF4 information. + """ + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.id}>" + + def get(self, predicate: str, prefix: str = NS_AFF4) -> ValueType | None: + raise ValueError(f"Cannot get property {predicate} of unresolved object {self.id}") + + +class Map(Object): + """AFF4 Map object.""" + + __type__ = f"{NS_AFF4}Map" + + _ENTRY = struct.Struct(" str: + return f"<{self.__class__.__name__} {self.id} size={self.size}>" + + @property + def size(self) -> int: + """Return the size of the mapped stream.""" + return self["size"] + + @property + def map_gap_default_stream(self) -> str: + """Return the default gap stream type.""" + return self["mapGapDefaultStream"] + + @property + def dependent_stream(self) -> ImageStream | list[ImageStream]: + """Return the dependent stream(s).""" + return self["dependentStream"] + + @property + def target(self) -> Object: + """Return the target (parent) object.""" + return self["target"] + + @property + def stored(self) -> Object: + """Return the volume the stream is stored in.""" + return self["stored"] + + @property + def index(self) -> list[str]: + """Return the list of stream IDs in the map index.""" + segment = self.ctx.aff4.segment(self.stored.id) + return segment.get(self.id).joinpath("idx").read_text().splitlines() + + @property + def map(self) -> dict[int, tuple[int, int, int, int]]: + """Return the mapping of the stream.""" + segment = self.ctx.aff4.segment(self.stored.id) + + result = {} + with segment.get(self.id).joinpath("map").open("rb") as fh: + while buf := fh.read(self._ENTRY.size): + mapped, length, target, idx = self._ENTRY.unpack(buf) + result[mapped + length] = (mapped, length, target, idx) + + return result + + def open(self) -> MapStream: + """Open the mapped stream for reading.""" + return MapStream(self) + + +class ImageStream(Object): + """AFF4 ImageStream object.""" + + __type__ = f"{NS_AFF4}ImageStream" + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.id} target={self.target.id} stored={self.stored.id}>" + + @property + def chunk_size(self) -> int: + """Return the chunk size of the image stream.""" + return self["chunkSize"] + + @property + def chunks_in_segment(self) -> int: + """Return the number of chunks in each segment.""" + return self["chunksInSegment"] + + @property + def compression_method(self) -> CompressionMethod: + """Return the compression method of the image stream.""" + return self["compressionMethod"] + + @property + def size(self) -> int: + """Return the size of the image stream.""" + return self["size"] + + @property + def target(self) -> Object: + """Return the target (parent) object.""" + return self["target"] + + @property + def stored(self) -> Object: + """Return the volume the stream is stored in.""" + return self["stored"] + + def open(self) -> BevyStream: + """Open the image stream for reading.""" + return BevyStream(self) + + +class Image(Object): + """AFF4 Image object.""" + + __type__ = f"{NS_AFF4}Image" + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.id} hash={self.hash}>" + + @property + def hash(self) -> str | list[str]: + """Return the hash(es) of the image.""" + return self["hash"] + + @property + def size(self) -> int: + """Return the size of the image.""" + return self["size"] + + @property + def block_size(self) -> int: + """Return the block size of the image.""" + return self["blockSize"] + + +class ContiguousImage(Image): + """AFF4 ContiguousImage object.""" + + __type__ = f"{NS_AFF4}ContiguousImage" + + @property + def data_stream(self) -> Map | ImageStream: + """Return the data stream of the image.""" + return self["dataStream"] + + def open(self) -> BufferedStream: + """Open the image for reading.""" + return BufferedStream(self.data_stream.open(), size=self.size) + + +class DiskImage(ContiguousImage): + """AFF4 DiskImage object.""" + + __type__ = f"{NS_AFF4}DiskImage" + + +class FileImage(Image): + """AFF4 FileImage object.""" + + __type__ = f"{NS_AFF4}FileImage" + + @property + def birth_time(self) -> datetime.datetime: + """Return the birth time of the file.""" + return self["birthTime"] + + @property + def last_accessed(self) -> datetime.datetime: + """Return the last accessed time of the file.""" + return self["lastAccessed"] + + @property + def last_written(self) -> datetime.datetime: + """Return the last written time of the file.""" + return self["lastWritten"] + + @property + def record_changed(self) -> datetime.datetime: + """Return the record changed time of the file.""" + return self["recordChanged"] + + @property + def original_file_name(self) -> str: + """Return the original file name of the file.""" + return self["originalFileName"] + + @property + def stored(self) -> Object: + """Return the volume the file is stored in.""" + return self["stored"] + + +class ZipVolume(Object): + """AFF4 ZipVolume object.""" + + __type__ = f"{NS_AFF4}ZipVolume" + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.id} creation_time={self.creation_time}>" + + @property + def contains(self) -> list[Object]: + """Return the contained objects.""" + return self["contains"] + + @property + def creation_time(self) -> datetime.datetime: + """Return the creation time of the volume.""" + return self["creationTime"] + + @property + def interface(self) -> str: + """Return the interface of the volume.""" + return self["interface"] + + @property + def stored(self) -> str: + """Return the storage location of the volume.""" + return self["stored"] + + +class CaseDetails(Object): + """AFF4 CaseDetails object.""" + + __type__ = f"{NS_AFF4}CaseDetails" + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.id} case_name={self.case_name!r}>" + + @property + def case_description(self) -> str: + """Return the case description.""" + return self["caseDescription"] + + @property + def case_name(self) -> str: + """Return the case name.""" + return self["caseName"] + + @property + def examiner(self) -> str: + """Return the examiner name.""" + return self["examiner"] + + @property + def stored(self) -> Object: + """Return the storage location of the case details.""" + return self["stored"] + + @property + def target(self) -> Object: + """Return the target (parent) object.""" + return self["target"] + + +class CaseNotes(Object): + """AFF4 CaseNotes object.""" + + __type__ = f"{NS_AFF4}CaseNotes" + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} {self.id} " + f"case_number={self.case_number!r} evidence_number={self.evidence_number!r}>" + ) + + @property + def case_number(self) -> str: + """Return the case number.""" + return self["caseNumber"] + + @property + def evidence_number(self) -> str: + """Return the evidence number.""" + return self["evidenceNumber"] + + @property + def examiner(self) -> str: + """Return the examiner name.""" + return self["examiner"] + + @property + def notes(self) -> str: + """Return the case notes.""" + return self["notes"] + + @property + def timestamp(self) -> datetime.datetime: + """Return the timestamp of the case notes.""" + return self["timestamp"] + + @property + def stored(self) -> Object: + """Return the storage location of the case notes.""" + return self["stored"] + + @property + def target(self) -> Object: + """Return the target (parent) object.""" + return self["target"] + + +class TimeStamps(Object): + """AFF4 TimeStamps object.""" + + __type__ = f"{NS_AFF4}TimeStamps" + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} {self.id} " + f"operation={self.operation!r} start_time={self.start_time} end_time={self.end_time}>" + ) + + @property + def start_time(self) -> datetime.datetime: + """Return the start time of the operation.""" + return self["startTime"] + + @property + def end_time(self) -> datetime.datetime: + """Return the end time of the operation.""" + return self["endTime"] + + @property + def operation(self) -> str: + """Return the operation performed.""" + return self["operation"] + + @property + def stored(self) -> Object: + """Return the storage location of the timestamps.""" + return self["stored"] + + @property + def target(self) -> Object: + """Return the target (parent) object.""" + return self["target"] diff --git a/dissect/evidence/aff4/stream.py b/dissect/evidence/aff4/stream.py new file mode 100644 index 0000000..d1c91f9 --- /dev/null +++ b/dissect/evidence/aff4/stream.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +import struct +import zlib +from bisect import bisect_right +from functools import lru_cache +from typing import TYPE_CHECKING, BinaryIO + +from dissect.util.compression import lz4, snappy +from dissect.util.stream import AlignedStream + +from dissect.evidence.aff4.util import NS_AFF4, CompressionMethod + +if TYPE_CHECKING: + from dissect.evidence.aff4.metadata import ImageStream, Information, Map + + +def _open_stream(ctx: Information, id: str) -> BinaryIO: + """Open a stream by its AFF4 ID.""" + if id == f"{NS_AFF4}Zero": + stream = SymbolicStream(b"\x00") + elif id == f"{NS_AFF4}UnknownData": + stream = SymbolicStream(b"UNKNOWN") + elif id == f"{NS_AFF4}UnreadableData": + stream = SymbolicStream(b"UNREADABLEDATA") + elif id.startswith(f"{NS_AFF4}SymbolicStream"): + stream = SymbolicStream(bytes.fromhex(id[-2:])) + else: + stream = ctx.aff4.information.get(id).open() + + if stream is None: + raise ValueError(f"Could not open stream {id}") + + return stream + + +class MapStream(AlignedStream): + """AFF4 stream implementation for ``Map`` objects implementation.""" + + def __init__(self, map: Map): + self.map = map + + self.default_gap_stream = _open_stream(self.map.ctx, self.map.map_gap_default_stream) + self.streams = [_open_stream(self.map.ctx, entry) for entry in self.map.index] + self.stream_map = self.map.map + self._lookup = list(self.map.map.keys()) + + super().__init__(self.map.size) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + idx = bisect_right(self._lookup, offset) + while length > 0: + mapped_offset, mapped_length, target_offset, stream_idx = self.stream_map[self._lookup[idx]] + + if offset < mapped_offset: + # Hole + read_size = min(length, mapped_offset - offset) + result.append(self.default_gap_stream.read(read_size)) + else: + offset_in_mapping = offset - mapped_offset + read_size = min(length, mapped_length - offset_in_mapping) + + stream = self.streams[stream_idx] + stream.seek(target_offset + offset_in_mapping) + result.append(stream.read(read_size)) + + idx += 1 + + offset += read_size + length -= read_size + + return b"".join(result) + + def close(self) -> None: + for stream in self.streams: + stream.close() + super().close() + + +class SymbolicStream(AlignedStream): + """AFF4 stream that returns a repeating pattern.""" + + def __init__(self, pattern: bytes): + self.pattern = pattern + super().__init__(None) + + def _read(self, offset: int, length: int) -> bytes: + mult, rem = divmod(length, len(self.pattern)) + return (self.pattern * mult) + self.pattern[:rem] + + +class BevyStream(AlignedStream): + """AFF4 stream implementation for bevy stored ``ImageStream`` objects.""" + + _ENTRY = struct.Struct(" tuple[BinaryIO, list[tuple[int, int]]]: + bevy_path = self.path.joinpath(f"{bevy_idx:08d}") + index_path = self.path.joinpath(f"{bevy_idx:08d}.index") + + if not bevy_path.exists() or not index_path.exists(): + raise ValueError(f"Bevy {bevy_idx} does not exist for stream {self.stream.id}") + + index = [] + with index_path.open("rb") as fh: + while buf := fh.read(self._ENTRY.size): + index.append(self._ENTRY.unpack(buf)) + + return bevy_path.open("rb"), index + + def _read_chunk(self, bevy_idx: int, chunk_idx: int) -> bytes: + bevy_idx = chunk_idx // self.stream.chunks_in_segment + bevy_fh, index = self._open_bevy(bevy_idx) + + offset, size = index[chunk_idx % self.stream.chunks_in_segment] + bevy_fh.seek(offset) + buf = bevy_fh.read(size) + + if size == self.stream.chunk_size: + return buf + + if self.compression_method == CompressionMethod.ZLIB: + return zlib.decompress(buf) + + if self.compression_method == CompressionMethod.DEFLATE: + return zlib.decompress(buf, -zlib.MAX_WBITS) + + if self.compression_method == CompressionMethod.LZ4: + return lz4.decompress(buf) + + if self.compression_method in (CompressionMethod.SNAPPY, CompressionMethod.SNAPPY2): + return snappy.decompress(buf) + + raise ValueError(f"Unsupported compression method {self.compression_method} for stream {self.stream.id}") + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + chunk_idx, offset_in_chunk = divmod(offset, self.stream.chunk_size) + + while length > 0: + chunk = self._read_chunk(chunk_idx, chunk_idx) + read_size = min(length, self.stream.chunk_size - offset_in_chunk) + + result.append(chunk[offset_in_chunk : offset_in_chunk + read_size]) + + offset += read_size + length -= read_size + chunk_idx += 1 + offset_in_chunk = 0 + + return b"".join(result) diff --git a/dissect/evidence/aff4/util.py b/dissect/evidence/aff4/util.py new file mode 100644 index 0000000..1515e36 --- /dev/null +++ b/dissect/evidence/aff4/util.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import datetime +from enum import Enum +from typing import TYPE_CHECKING, TextIO + +if TYPE_CHECKING: + from collections.abc import Iterator + +NS_XSD = "http://www.w3.org/2001/XMLSchema#" +NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +NS_AFF4 = "http://aff4.org/Schema#" + + +class CompressionMethod(Enum): + ZLIB = "https://www.ietf.org/rfc/rfc1950.txt" + DEFLATE = "https://tools.ietf.org/html/rfc1951" + SNAPPY = "http://code.google.com/p/snappy/" + SNAPPY2 = "https://github.com/google/snappy" + LZ4 = "https://code.google.com/p/lz4/" + STORED = "http://aff4.org/Schema#NullCompressor" + + +def parse_turtle(fh: TextIO) -> dict[str, str]: + """Poor mans turtle parser. Save the turtles 🐢. + + Args: + fh: A text file-like object containing turtle data. + """ + objects = {} + prefixes = {} + + parts = [] + for line in fh: + if not (line := line.strip()): + continue + + # First construct a full statement + if line.endswith("."): + parts.append(line[:-1]) + full_statement = " ".join(parts) + parts = [] + + # Process a statement + if full_statement.startswith("@prefix"): + _, prefix, uri = full_statement.split(maxsplit=2) + prefixes[prefix] = uri.rstrip(".").strip()[1:-1] + else: + subject = None + for i, statement in enumerate(_iter_statements(full_statement, ";")): + if subject is None: + subject, predicate, object = statement.split(maxsplit=2) + subject = _explode_prefix(subject.strip(), prefixes) + + if subject.startswith("<") and subject.endswith(">"): + subject = subject[1:-1] + else: + predicate, object = statement.split(maxsplit=1) + + predicate = predicate.strip() + object = object.strip() + + if i == 0 and predicate == "a": + predicate = f"{NS_RDF}type" + + predicate = _explode_prefix(predicate, prefixes) + + if len(object := [_parse_object(obj, prefixes) for obj in _iter_statements(object, ",")]) == 1: + object = object[0] + + if subject not in objects: + objects[subject] = {} + + if predicate in objects[subject]: + raise ValueError(f"Duplicate predicate {predicate} for subject {subject}") + + objects[subject][predicate] = object + else: + parts.append(line) + + return objects + + +def _iter_statements(statement: str, delimiter: str) -> Iterator[str]: + """Iterate over statements separated by a delimiter.""" + current = [] + escape = False + in_literal = False + in_uri = False + + for c in statement: + if c == "\\" and not escape: + escape = True + + elif escape: + escape = False + current.append(c) + + elif c in "'\"": + in_literal = not in_literal + current.append(c) + + elif c == "<>" and not in_literal: + in_uri = not in_uri + current.append(c) + + elif c == delimiter and not in_literal and not in_uri: + yield "".join(current).strip() + current = [] + + else: + current.append(c) + + if current: + yield "".join(current).strip() + + +_OBJECT_PARSERS = { + f"{NS_XSD}int": int, + f"{NS_XSD}long": int, + f"{NS_XSD}integer": int, + f"{NS_XSD}boolean": lambda v: v.lower() in ("true", "1"), + f"{NS_XSD}hexBinary": bytes.fromhex, + f"{NS_XSD}dateTime": datetime.datetime.fromisoformat, +} + + +def _parse_object(value: str, prefixes: dict[str, str]) -> str | int: + """Parse a turtle object value.""" + value, _, type = value.partition("^^") + + value = f"<{_explode_prefix(value, prefixes)}>" if value in prefixes else _explode_prefix(value, prefixes) + type = _explode_prefix(type, prefixes) + + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + + if parser := _OBJECT_PARSERS.get(type): + return parser(value) + + return value + + +def _explode_prefix(value: str, prefixes: dict[str, str]) -> str: + """Expand a prefixed turtle value to its full URI.""" + for prefix, uri in prefixes.items(): + if value.startswith(prefix): + return value.replace(prefix, uri, 1) + return value diff --git a/pyproject.toml b/pyproject.toml index 9a09d58..5df32b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ "dissect.cstruct>=4,<5", - "dissect.util>=3,<4", + "dissect.util>=3.24.dev,<4", # TODO update on release ] dynamic = ["version"] diff --git a/tests/_data/aff4/Base-Allocated.aff4 b/tests/_data/aff4/Base-Allocated.aff4 new file mode 100644 index 0000000..f7b1918 --- /dev/null +++ b/tests/_data/aff4/Base-Allocated.aff4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6c705c15339a53cf86b221858f2cd6b85c56f7078287ae99273145efe567c1 +size 3076183 diff --git a/tests/_data/aff4/Base-ExabyteSparse.aff4 b/tests/_data/aff4/Base-ExabyteSparse.aff4 new file mode 100644 index 0000000..fbcb160 --- /dev/null +++ b/tests/_data/aff4/Base-ExabyteSparse.aff4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61419e6bf4bbe54f15680ad4041851d638e02b670bb47f77b947d468596163b +size 3197581 diff --git a/tests/_data/aff4/Base-Linear-ReadError.aff4 b/tests/_data/aff4/Base-Linear-ReadError.aff4 new file mode 100644 index 0000000..c0b70b8 --- /dev/null +++ b/tests/_data/aff4/Base-Linear-ReadError.aff4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1c2edd6bdf37f2efe9c6fa274dd3c100de3fc5152d8a1fd82fb61f41c68e12 +size 2143595 diff --git a/tests/_data/aff4/Base-Linear.aff4 b/tests/_data/aff4/Base-Linear.aff4 new file mode 100644 index 0000000..1559940 --- /dev/null +++ b/tests/_data/aff4/Base-Linear.aff4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcde3297ae95cd9df214bfb79821334628dad08f21ef38374a2c091481e391c0 +size 3177529 diff --git a/tests/_data/aff4/striped/Base-Linear_1.aff4 b/tests/_data/aff4/striped/Base-Linear_1.aff4 new file mode 100644 index 0000000..ea53f8b --- /dev/null +++ b/tests/_data/aff4/striped/Base-Linear_1.aff4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56fea0e0b4c94fb7ce780a39129054fe869ee2fcfa77ee7c6ede4830b035c8c8 +size 1976635 diff --git a/tests/_data/aff4/striped/Base-Linear_2.aff4 b/tests/_data/aff4/striped/Base-Linear_2.aff4 new file mode 100644 index 0000000..9c1f116 --- /dev/null +++ b/tests/_data/aff4/striped/Base-Linear_2.aff4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d46baa88def85b784caf54a3a6c561e08019fbc22424a21f117d00b90c94505 +size 1331311 diff --git a/tests/conftest.py b/tests/conftest.py index 3cfa66f..f41cd28 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,6 @@ from __future__ import annotations from io import BytesIO -from pathlib import Path from typing import TYPE_CHECKING, BinaryIO import pytest @@ -11,10 +10,11 @@ if TYPE_CHECKING: from collections.abc import Iterator + from pathlib import Path def open_data(name: str) -> Iterator[BinaryIO]: - with (Path(__file__).parent / name).open("rb") as fh: + with absolute_path(name).open("rb") as fh: yield fh diff --git a/tests/test_aff4.py b/tests/test_aff4.py new file mode 100644 index 0000000..48f53df --- /dev/null +++ b/tests/test_aff4.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import hashlib + +from dissect.evidence.aff4.aff4 import AFF4 +from tests.conftest import absolute_path + + +def test_aff4_linear() -> None: + aff4 = AFF4(absolute_path("_data/aff4/Base-Linear.aff4")) + + segment = aff4.segment(0) + assert segment.uri == "aff4://685e15cc-d0fb-4dbc-ba47-48117fc77044" + assert segment.version == {"major": "1", "minor": "0", "tool": "Evimetry 2.2.0"} + + assert len(aff4.images()) == 1 + + image = aff4.images()[0] + map_stream = image.data_stream.open() + + assert hashlib.sha1(map_stream.streams[0].read()).hexdigest() == "fbac22cca549310bc5df03b7560afcf490995fbb" + + map_stream.seek(32768) + assert map_stream.read(32768) == b"\x00" * 32768 + + +def test_aff4_allocated() -> None: + aff4 = AFF4(absolute_path("_data/aff4/Base-Allocated.aff4")) + assert len(aff4.images()) == 1 + + image = aff4.images()[0] + stream = image.open() + + stream.seek(17825792) + assert stream.read(518) == b"UNKNOWN" * 74 + stream.seek(82836992) + assert stream.read(512) == (b"NOWNUNK" * (512 // 7)) + b"N" + assert stream.read(8) == b"\x00" * 8 + + +def test_aff4_read_error() -> None: + aff4 = AFF4(absolute_path("_data/aff4/Base-Linear-ReadError.aff4")) + assert len(aff4.images()) == 1 + + image = aff4.images()[0] + stream = image.open() + + stream.seek(15728640) + assert stream.read(65536) == b"UNREADABLEDATA" * (65536 // 14) + b"UN" + + +def test_aff4_exabyte_sparse() -> None: + aff4 = AFF4(absolute_path("_data/aff4/Base-ExabyteSparse.aff4")) + assert len(aff4.images()) == 1 + + image = aff4.images()[0] + stream = image.open() + + stream.seek(1048576) + assert stream.read(512) == b"\x00" * 512 + + stream.seek(4611686018427387648) + assert stream.read(512)[-2:] == b"\x55\xaa" + + +def test_aff4_striped() -> None: + aff4 = AFF4( + [ + absolute_path("_data/aff4/striped/Base-Linear_1.aff4"), + absolute_path("_data/aff4/striped/Base-Linear_2.aff4"), + ] + ) + assert len(aff4.images()) == 1 + + image = aff4.images()[0] + stream = image.open() + + stream.seek(0) + assert hashlib.sha1(stream.read(512)).hexdigest() == "341427eedd4172fa61d85af7c8cf3c8d5a8656d5" + + stream.seek(15335424) + assert hashlib.sha1(stream.read(512)).hexdigest() == "54cc2127dc9f536ead23c9bc898ffb73d528c7ed"