diff --git a/src/mdio/builder/template_registry.py b/src/mdio/builder/template_registry.py index d90d5cfd..6123ed2f 100644 --- a/src/mdio/builder/template_registry.py +++ b/src/mdio/builder/template_registry.py @@ -25,7 +25,10 @@ from mdio.builder.templates.seismic_2d_streamer_shot import Seismic2DStreamerShotGathersTemplate from mdio.builder.templates.seismic_3d_cdp import Seismic3DCdpGathersTemplate from mdio.builder.templates.seismic_3d_coca import Seismic3DCocaGathersTemplate +from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate +from mdio.builder.templates.seismic_3d_obn_single_component import Seismic3DObnSingleComponentGathersTemplate from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.builder.templates.seismic_3d_shot_receiver_line import Seismic3DShotReceiverLineGathersTemplate from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecordsTemplate from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate @@ -138,6 +141,13 @@ def _register_default_templates(self) -> None: self.register(Seismic3DStreamerShotGathersTemplate()) self.register(Seismic3DStreamerFieldRecordsTemplate()) + # OBN (Ocean Bottom Node) data + self.register(Seismic3DObnReceiverGathersTemplate()) + self.register(Seismic3DObnSingleComponentGathersTemplate()) + + # Land/OBC shot-receiver data + self.register(Seismic3DShotReceiverLineGathersTemplate()) + def get(self, template_name: str) -> AbstractDatasetTemplate: """Get an instance of a template from the registry by its name. diff --git a/src/mdio/builder/templates/seismic_3d_obn.py b/src/mdio/builder/templates/seismic_3d_obn.py new file mode 100644 index 00000000..d391ba57 --- /dev/null +++ b/src/mdio/builder/templates/seismic_3d_obn.py @@ -0,0 +1,98 @@ +"""Seismic3DObnReceiverGathersTemplate MDIO v1 dataset templates.""" + +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.base import AbstractDatasetTemplate +from mdio.builder.templates.types import SeismicDataDomain + + +class Seismic3DObnReceiverGathersTemplate(AbstractDatasetTemplate): + """Seismic 3D OBN (Ocean Bottom Node) receiver gathers template.""" + + def __init__(self, data_domain: SeismicDataDomain = "time"): + super().__init__(data_domain=data_domain) + + self._spatial_dim_names = ("component", "receiver", "shot_line", "gun", "shot_point") + self._dim_names = (*self._spatial_dim_names, self._data_domain) + self._physical_coord_names = ( + "group_coord_x", + "group_coord_y", + "source_coord_x", + "source_coord_y", + ) + self._logical_coord_names = ("orig_field_record_num",) + self._var_chunk_shape = (4, 8, 1, 2, 8, 4096) + + @property + def _name(self) -> str: + return "ObnReceiverGathers3D" + + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyType": "3D", "gatherType": "common_receiver"} + + def _add_coordinates(self) -> None: + # Add dimension coordinates + self._builder.add_coordinate( + "component", + dimensions=("component",), + data_type=ScalarType.UINT8, + ) + self._builder.add_coordinate( + "receiver", + dimensions=("receiver",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "shot_line", + dimensions=("shot_line",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "gun", + dimensions=("gun",), + data_type=ScalarType.UINT8, + ) + self._builder.add_coordinate( + "shot_point", + dimensions=("shot_point",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + self._data_domain, + dimensions=(self._data_domain,), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._data_domain)), + ) + + # Add non-dimension coordinates + self._builder.add_coordinate( + "group_coord_x", + dimensions=("receiver",), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")), + ) + self._builder.add_coordinate( + "group_coord_y", + dimensions=("receiver",), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")), + ) + self._builder.add_coordinate( + "source_coord_x", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")), + ) + self._builder.add_coordinate( + "source_coord_y", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")), + ) + self._builder.add_coordinate( + "orig_field_record_num", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.UINT32, + ) diff --git a/src/mdio/builder/templates/seismic_3d_obn_single_component.py b/src/mdio/builder/templates/seismic_3d_obn_single_component.py new file mode 100644 index 00000000..491de27f --- /dev/null +++ b/src/mdio/builder/templates/seismic_3d_obn_single_component.py @@ -0,0 +1,93 @@ +"""Seismic3DObnSingleComponentGathersTemplate MDIO v1 dataset templates.""" + +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.base import AbstractDatasetTemplate +from mdio.builder.templates.types import SeismicDataDomain + + +class Seismic3DObnSingleComponentGathersTemplate(AbstractDatasetTemplate): + """Seismic 3D OBN (Ocean Bottom Node) single-component gathers template.""" + + def __init__(self, data_domain: SeismicDataDomain = "time"): + super().__init__(data_domain=data_domain) + + self._spatial_dim_names = ("receiver", "shot_line", "gun", "shot_point") + self._dim_names = (*self._spatial_dim_names, self._data_domain) + self._physical_coord_names = ( + "group_coord_x", + "group_coord_y", + "source_coord_x", + "source_coord_y", + ) + self._logical_coord_names = ("orig_field_record_num",) + self._var_chunk_shape = (16, 1, 2, 16, 4096) + + @property + def _name(self) -> str: + return "ObnSingleComponentGathers3D" + + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyType": "3D", "gatherType": "common_receiver"} + + def _add_coordinates(self) -> None: + # Add dimension coordinates + self._builder.add_coordinate( + "receiver", + dimensions=("receiver",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "shot_line", + dimensions=("shot_line",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "gun", + dimensions=("gun",), + data_type=ScalarType.UINT8, + ) + self._builder.add_coordinate( + "shot_point", + dimensions=("shot_point",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + self._data_domain, + dimensions=(self._data_domain,), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._data_domain)), + ) + + # Add non-dimension coordinates + self._builder.add_coordinate( + "group_coord_x", + dimensions=("receiver",), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")), + ) + self._builder.add_coordinate( + "group_coord_y", + dimensions=("receiver",), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")), + ) + self._builder.add_coordinate( + "source_coord_x", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")), + ) + self._builder.add_coordinate( + "source_coord_y", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")), + ) + self._builder.add_coordinate( + "orig_field_record_num", + dimensions=("shot_line", "gun", "shot_point"), + data_type=ScalarType.UINT32, + ) diff --git a/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py b/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py new file mode 100644 index 00000000..5f5b5157 --- /dev/null +++ b/src/mdio/builder/templates/seismic_3d_shot_receiver_line.py @@ -0,0 +1,93 @@ +"""Seismic3DShotReceiverLineGathersTemplate MDIO v1 dataset templates.""" + +from typing import Any + +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.base import AbstractDatasetTemplate +from mdio.builder.templates.types import SeismicDataDomain + + +class Seismic3DShotReceiverLineGathersTemplate(AbstractDatasetTemplate): + """Seismic 3D shot-ordered gathers with receiver lines template.""" + + def __init__(self, data_domain: SeismicDataDomain = "time"): + super().__init__(data_domain=data_domain) + + self._spatial_dim_names = ("shot_line", "shot_point", "receiver_line", "receiver") + self._dim_names = (*self._spatial_dim_names, self._data_domain) + self._physical_coord_names = ( + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", + ) + self._logical_coord_names = ("orig_field_record_num",) + self._var_chunk_shape = (1, 32, 1, 32, 2048) + + @property + def _name(self) -> str: + return "ShotReceiverLineGathers3D" + + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyType": "3D", "gatherType": "common_source"} + + def _add_coordinates(self) -> None: + # Add dimension coordinates + self._builder.add_coordinate( + "shot_line", + dimensions=("shot_line",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "shot_point", + dimensions=("shot_point",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "receiver_line", + dimensions=("receiver_line",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + "receiver", + dimensions=("receiver",), + data_type=ScalarType.UINT32, + ) + self._builder.add_coordinate( + self._data_domain, + dimensions=(self._data_domain,), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self._data_domain)), + ) + + # Add non-dimension coordinates + self._builder.add_coordinate( + "source_coord_x", + dimensions=("shot_line", "shot_point"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")), + ) + self._builder.add_coordinate( + "source_coord_y", + dimensions=("shot_line", "shot_point"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")), + ) + self._builder.add_coordinate( + "group_coord_x", + dimensions=("receiver_line", "receiver"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_x")), + ) + self._builder.add_coordinate( + "group_coord_y", + dimensions=("receiver_line", "receiver"), + data_type=ScalarType.FLOAT64, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("group_coord_y")), + ) + self._builder.add_coordinate( + "orig_field_record_num", + dimensions=("shot_line", "shot_point"), + data_type=ScalarType.UINT32, + ) diff --git a/tests/unit/v1/templates/test_seismic_3d_obn.py b/tests/unit/v1/templates/test_seismic_3d_obn.py new file mode 100644 index 00000000..bae5b4c5 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_obn.py @@ -0,0 +1,217 @@ +"""Unit tests for Seismic3DObnReceiverGathersTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) + +# Typical OBN survey dimensions: 4 components, 500 receivers, 10 shot lines, 2 guns, 200 shot points, 4096 samples +DATASET_SIZE_MAP = {"component": 4, "receiver": 500, "shot_line": 10, "gun": 2, "shot_point": 200, "time": 4096} +DATASET_DTYPE_MAP = { + "component": "uint8", + "receiver": "uint32", + "shot_line": "uint32", + "gun": "uint8", + "shot_point": "uint32", + "time": "int32", +} +EXPECTED_COORDINATES = [ + "group_coord_x", + "group_coord_y", + "source_coord_x", + "source_coord_y", + "orig_field_record_num", +] + + +def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 6 dim coords + 5 non-dim coords + 1 data + 1 trace mask + 1 headers = 14 variables + assert len(dataset.variables) == 14 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + for dim_name, dim_size in DATASET_SIZE_MAP.items(): + validate_variable( + dataset, + name=dim_name, + dims=[(dim_name, dim_size)], + coords=[dim_name], + dtype=ScalarType(DATASET_DTYPE_MAP[dim_name]), + ) + + # Verify receiver coordinate variables (indexed by receiver only) + for coord_name in ["group_coord_x", "group_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=[("receiver", DATASET_SIZE_MAP["receiver"])], + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify source coordinate variables (indexed by shot_line, gun, shot_point) + shot_dims = [(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["shot_line", "gun", "shot_point"]] + for coord_name in ["source_coord_x", "source_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=shot_dims, + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify orig_field_record_num coordinate + validate_variable( + dataset, + name="orig_field_record_num", + dims=shot_dims, + coords=["orig_field_record_num"], + dtype=ScalarType.UINT32, + ) + + +class TestSeismic3DObnReceiverGathersTemplate: + """Unit tests for Seismic3DObnReceiverGathersTemplate.""" + + def test_configuration(self) -> None: + """Test template configuration and attributes.""" + t = Seismic3DObnReceiverGathersTemplate(data_domain="time") + + # Template attributes + assert t.name == "ObnReceiverGathers3D" + assert t._dim_names == ("component", "receiver", "shot_line", "gun", "shot_point", "time") + assert t._physical_coord_names == ( + "group_coord_x", + "group_coord_y", + "source_coord_x", + "source_coord_y", + ) + assert t._logical_coord_names == ("orig_field_record_num",) + assert t._var_chunk_shape == (4, 8, 1, 2, 8, 4096) + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == () + + # Verify dataset attributes + attrs = t._load_dataset_attributes() + assert attrs == {"surveyType": "3D", "gatherType": "common_receiver"} + assert t.default_variable_name == "amplitude" + + def test_chunk_size_calculation(self) -> None: + """Test that chunk shape produces approximately 8 MiB chunks. + + The chunk shape (4, 8, 1, 2, 8, 4096) produces: + 4 * 8 * 1 * 2 * 8 * 4096 = 2,097,152 samples. + With float32 (4 bytes): 2,097,152 * 4 = 8,388,608 bytes = 8 MiB. + """ + t = Seismic3DObnReceiverGathersTemplate(data_domain="time") + + # Get the chunk shape + chunk_shape = t.full_chunk_shape + assert chunk_shape == (4, 8, 1, 2, 8, 4096) + + # Calculate the number of samples per chunk + samples_per_chunk = 1 + for dim_size in chunk_shape: + samples_per_chunk *= dim_size + + # With float32 (4 bytes per sample), calculate chunk size in bytes + bytes_per_chunk = samples_per_chunk * 4 + assert bytes_per_chunk == 8 * 1024 * 1024 # 8 MiB + + def test_isometric_chunking(self) -> None: + """Test that receiver and shot_point have balanced chunk sizes for isometric access.""" + t = Seismic3DObnReceiverGathersTemplate(data_domain="time") + + # Extract chunk sizes for receiver and shot_point + # Dimensions: (component, receiver, shot_line, gun, shot_point, time) + chunk_shape = t._var_chunk_shape + receiver_chunk = chunk_shape[1] # receiver + shot_point_chunk = chunk_shape[4] # shot_point + + # Both should be equal for isometric access between receiver and shot domains + assert receiver_chunk == shot_point_chunk == 8 + + def test_build_dataset(self, structured_headers: StructuredType) -> None: + """Test building a complete dataset with the template.""" + t = Seismic3DObnReceiverGathersTemplate(data_domain="time") + + # Add units + t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) + t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) + t.add_units({"time": UNITS_SECOND}) + + sizes = tuple(DATASET_SIZE_MAP.values()) + dataset = t.build_dataset("ObnSurvey3D", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "ObnSurvey3D" + assert dataset.metadata.attributes["surveyType"] == "3D" + assert dataset.metadata.attributes["gatherType"] == "common_receiver" + assert dataset.metadata.attributes["defaultVariableName"] == "amplitude" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + # Verify seismic amplitude variable + seismic = validate_variable( + dataset, + name="amplitude", + dims=list(DATASET_SIZE_MAP.items()), + coords=EXPECTED_COORDINATES, + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.cname == BloscCname.zstd + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (4, 8, 1, 2, 8, 4096) + assert seismic.metadata.stats_v1 is None + + def test_depth_domain(self, structured_headers: StructuredType) -> None: + """Test building a dataset with depth domain.""" + t = Seismic3DObnReceiverGathersTemplate(data_domain="depth") + + assert t.trace_domain == "depth" + assert t._dim_names == ("component", "receiver", "shot_line", "gun", "shot_point", "depth") + + sizes = (4, 100, 5, 2, 50, 2048) # Smaller sizes for this test + dataset = t.build_dataset("ObnSurveyDepth", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "ObnSurveyDepth" + + # Verify depth dimension coordinate exists + depth_coord = next((v for v in dataset.variables if v.name == "depth"), None) + assert depth_coord is not None + assert depth_coord.dimensions[0].name == "depth" + assert depth_coord.dimensions[0].size == 2048 diff --git a/tests/unit/v1/templates/test_seismic_3d_obn_single_component.py b/tests/unit/v1/templates/test_seismic_3d_obn_single_component.py new file mode 100644 index 00000000..7aece2cf --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_obn_single_component.py @@ -0,0 +1,194 @@ +"""Unit tests for Seismic3DObnSingleComponentGathersTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_obn_single_component import Seismic3DObnSingleComponentGathersTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) + +DATASET_SIZE_MAP = {"receiver": 500, "shot_line": 10, "gun": 2, "shot_point": 200, "time": 4096} +DATASET_DTYPE_MAP = { + "receiver": "uint32", + "shot_line": "uint32", + "gun": "uint8", + "shot_point": "uint32", + "time": "int32", +} +EXPECTED_COORDINATES = [ + "group_coord_x", + "group_coord_y", + "source_coord_x", + "source_coord_y", + "orig_field_record_num", +] + + +def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 5 dim coords + 5 non-dim coords + 1 data + 1 trace mask + 1 headers = 13 variables + assert len(dataset.variables) == 13 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + for dim_name, dim_size in DATASET_SIZE_MAP.items(): + validate_variable( + dataset, + name=dim_name, + dims=[(dim_name, dim_size)], + coords=[dim_name], + dtype=ScalarType(DATASET_DTYPE_MAP[dim_name]), + ) + + # Verify receiver coordinate variables + for coord_name in ["group_coord_x", "group_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=[("receiver", DATASET_SIZE_MAP["receiver"])], + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify source coordinate variables + shot_dims = [(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["shot_line", "gun", "shot_point"]] + for coord_name in ["source_coord_x", "source_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=shot_dims, + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DObnSingleComponentGathersTemplate: + """Unit tests for Seismic3DObnSingleComponentGathersTemplate.""" + + def test_configuration(self) -> None: + """Test template configuration and attributes.""" + t = Seismic3DObnSingleComponentGathersTemplate(data_domain="time") + + assert t.name == "ObnSingleComponentGathers3D" + assert t._dim_names == ("receiver", "shot_line", "gun", "shot_point", "time") + assert t._physical_coord_names == ( + "group_coord_x", + "group_coord_y", + "source_coord_x", + "source_coord_y", + ) + assert t._logical_coord_names == ("orig_field_record_num",) + assert t._var_chunk_shape == (16, 1, 2, 16, 4096) + + assert t._builder is None + assert t._dim_sizes == () + + attrs = t._load_dataset_attributes() + assert attrs == {"surveyType": "3D", "gatherType": "common_receiver"} + assert t.default_variable_name == "amplitude" + + def test_chunk_size_calculation(self) -> None: + """Test that chunk shape produces approximately 8 MiB chunks. + + The chunk shape (16, 1, 2, 16, 4096) produces: + 16 * 1 * 2 * 16 * 4096 = 2,097,152 samples. + With float32 (4 bytes): 2,097,152 * 4 = 8,388,608 bytes = 8 MiB. + """ + t = Seismic3DObnSingleComponentGathersTemplate(data_domain="time") + + chunk_shape = t.full_chunk_shape + assert chunk_shape == (16, 1, 2, 16, 4096) + + samples_per_chunk = 1 + for dim_size in chunk_shape: + samples_per_chunk *= dim_size + + bytes_per_chunk = samples_per_chunk * 4 + assert bytes_per_chunk == 8 * 1024 * 1024 # 8 MiB + + def test_isometric_chunking(self) -> None: + """Test that receiver and shot_point have balanced chunk sizes for isometric access.""" + t = Seismic3DObnSingleComponentGathersTemplate(data_domain="time") + + chunk_shape = t._var_chunk_shape + receiver_chunk = chunk_shape[0] # receiver + shot_point_chunk = chunk_shape[3] # shot_point + + assert receiver_chunk == shot_point_chunk == 16 + + def test_build_dataset(self, structured_headers: StructuredType) -> None: + """Test building a complete dataset with the template.""" + t = Seismic3DObnSingleComponentGathersTemplate(data_domain="time") + + t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) + t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) + t.add_units({"time": UNITS_SECOND}) + + sizes = tuple(DATASET_SIZE_MAP.values()) + dataset = t.build_dataset("ObnSurvey3D", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "ObnSurvey3D" + assert dataset.metadata.attributes["surveyType"] == "3D" + assert dataset.metadata.attributes["gatherType"] == "common_receiver" + assert dataset.metadata.attributes["defaultVariableName"] == "amplitude" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + seismic = validate_variable( + dataset, + name="amplitude", + dims=list(DATASET_SIZE_MAP.items()), + coords=EXPECTED_COORDINATES, + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.cname == BloscCname.zstd + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (16, 1, 2, 16, 4096) + assert seismic.metadata.stats_v1 is None + + def test_depth_domain(self, structured_headers: StructuredType) -> None: + """Test building a dataset with depth domain.""" + t = Seismic3DObnSingleComponentGathersTemplate(data_domain="depth") + + assert t.trace_domain == "depth" + assert t._dim_names == ("receiver", "shot_line", "gun", "shot_point", "depth") + + sizes = (100, 5, 2, 50, 2048) + dataset = t.build_dataset("ObnSurveyDepth", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "ObnSurveyDepth" + + depth_coord = next((v for v in dataset.variables if v.name == "depth"), None) + assert depth_coord is not None + assert depth_coord.dimensions[0].name == "depth" + assert depth_coord.dimensions[0].size == 2048 diff --git a/tests/unit/v1/templates/test_seismic_3d_shot_receiver_line.py b/tests/unit/v1/templates/test_seismic_3d_shot_receiver_line.py new file mode 100644 index 00000000..34fab494 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_shot_receiver_line.py @@ -0,0 +1,201 @@ +"""Unit tests for Seismic3DShotReceiverLineGathersTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_shot_receiver_line import Seismic3DShotReceiverLineGathersTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) + +DATASET_SIZE_MAP = { + "shot_line": 10, + "shot_point": 200, + "receiver_line": 50, + "receiver": 100, + "time": 4096, +} +DATASET_DTYPE_MAP = { + "shot_line": "uint32", + "shot_point": "uint32", + "receiver_line": "uint32", + "receiver": "uint32", + "time": "int32", +} +EXPECTED_COORDINATES = [ + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", + "orig_field_record_num", +] + + +def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 5 dim coords + 5 non-dim coords + 1 data + 1 trace mask + 1 headers = 13 variables + assert len(dataset.variables) == 13 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != domain], + coords=EXPECTED_COORDINATES, + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + for dim_name, dim_size in DATASET_SIZE_MAP.items(): + validate_variable( + dataset, + name=dim_name, + dims=[(dim_name, dim_size)], + coords=[dim_name], + dtype=ScalarType(DATASET_DTYPE_MAP[dim_name]), + ) + + # Verify source coordinate variables + shot_dims = [(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["shot_line", "shot_point"]] + for coord_name in ["source_coord_x", "source_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=shot_dims, + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify receiver coordinate variables + receiver_dims = [(k, v) for k, v in DATASET_SIZE_MAP.items() if k in ["receiver_line", "receiver"]] + for coord_name in ["group_coord_x", "group_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=receiver_dims, + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DShotReceiverLineGathersTemplate: + """Unit tests for Seismic3DShotReceiverLineGathersTemplate.""" + + def test_configuration(self) -> None: + """Test template configuration and attributes.""" + t = Seismic3DShotReceiverLineGathersTemplate(data_domain="time") + + assert t.name == "ShotReceiverLineGathers3D" + assert t._dim_names == ("shot_line", "shot_point", "receiver_line", "receiver", "time") + assert t._physical_coord_names == ( + "source_coord_x", + "source_coord_y", + "group_coord_x", + "group_coord_y", + ) + assert t._logical_coord_names == ("orig_field_record_num",) + assert t._var_chunk_shape == (1, 32, 1, 32, 2048) + + assert t._builder is None + assert t._dim_sizes == () + + attrs = t._load_dataset_attributes() + assert attrs == {"surveyType": "3D", "gatherType": "common_source"} + assert t.default_variable_name == "amplitude" + + def test_chunk_size_calculation(self) -> None: + """Test that chunk shape produces approximately 8 MiB chunks. + + The chunk shape (1, 32, 1, 32, 2048) produces: + 1 * 32 * 1 * 32 * 2048 = 2,097,152 samples. + With float32 (4 bytes): 2,097,152 * 4 = 8,388,608 bytes = 8 MiB. + """ + t = Seismic3DShotReceiverLineGathersTemplate(data_domain="time") + + chunk_shape = t.full_chunk_shape + assert chunk_shape == (1, 32, 1, 32, 2048) + + samples_per_chunk = 1 + for dim_size in chunk_shape: + samples_per_chunk *= dim_size + + bytes_per_chunk = samples_per_chunk * 4 + assert bytes_per_chunk == 8 * 1024 * 1024 # 8 MiB + + def test_isometric_chunking(self) -> None: + """Test that shot_point and receiver have balanced chunk sizes for isometric access.""" + t = Seismic3DShotReceiverLineGathersTemplate(data_domain="time") + + chunk_shape = t._var_chunk_shape + shot_point_chunk = chunk_shape[1] # shot_point + receiver_chunk = chunk_shape[3] # receiver + + assert shot_point_chunk == receiver_chunk == 32 + + def test_build_dataset(self, structured_headers: StructuredType) -> None: + """Test building a complete dataset with the template.""" + t = Seismic3DShotReceiverLineGathersTemplate(data_domain="time") + + t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) + t.add_units({"group_coord_x": UNITS_METER, "group_coord_y": UNITS_METER}) + t.add_units({"time": UNITS_SECOND}) + + sizes = tuple(DATASET_SIZE_MAP.values()) + dataset = t.build_dataset("LandSurvey3D", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "LandSurvey3D" + assert dataset.metadata.attributes["surveyType"] == "3D" + assert dataset.metadata.attributes["gatherType"] == "common_source" + assert dataset.metadata.attributes["defaultVariableName"] == "amplitude" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + seismic = validate_variable( + dataset, + name="amplitude", + dims=list(DATASET_SIZE_MAP.items()), + coords=EXPECTED_COORDINATES, + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.cname == BloscCname.zstd + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (1, 32, 1, 32, 2048) + assert seismic.metadata.stats_v1 is None + + def test_depth_domain(self, structured_headers: StructuredType) -> None: + """Test building a dataset with depth domain.""" + t = Seismic3DShotReceiverLineGathersTemplate(data_domain="depth") + + assert t.trace_domain == "depth" + assert t._dim_names == ("shot_line", "shot_point", "receiver_line", "receiver", "depth") + + sizes = (5, 100, 20, 50, 2048) + dataset = t.build_dataset("LandSurveyDepth", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "LandSurveyDepth" + + depth_coord = next((v for v in dataset.variables if v.name == "depth"), None) + assert depth_coord is not None + assert depth_coord.dimensions[0].name == "depth" + assert depth_coord.dimensions[0].size == 2048 diff --git a/tests/unit/v1/templates/test_template_registry.py b/tests/unit/v1/templates/test_template_registry.py index e0b641f0..c6ca4595 100644 --- a/tests/unit/v1/templates/test_template_registry.py +++ b/tests/unit/v1/templates/test_template_registry.py @@ -34,6 +34,9 @@ "StreamerShotGathers2D", "StreamerShotGathers3D", "StreamerFieldRecords3D", + "ObnReceiverGathers3D", + "ObnSingleComponentGathers3D", + "ShotReceiverLineGathers3D", ] @@ -240,7 +243,7 @@ def test_list_all_templates(self) -> None: registry.register(template2) templates = registry.list_all_templates() - assert len(templates) == 17 + 2 # 17 default + 2 custom + assert len(templates) == 20 + 2 # 20 default + 2 custom assert "Template_One" in templates assert "Template_Two" in templates @@ -250,7 +253,7 @@ def test_clear_templates(self) -> None: # Default templates are always installed templates = list_templates() - assert len(templates) == 17 + assert len(templates) == 20 # Add some templates template1 = MockDatasetTemplate("Template1") @@ -259,7 +262,7 @@ def test_clear_templates(self) -> None: registry.register(template1) registry.register(template2) - assert len(registry.list_all_templates()) == 17 + 2 # 17 default + 2 custom + assert len(registry.list_all_templates()) == 20 + 2 # 20 default + 2 custom # Clear all registry.clear() @@ -392,7 +395,7 @@ def test_list_templates_global(self) -> None: register_template(template2) templates = list_templates() - assert len(templates) == 19 # 17 default + 2 custom + assert len(templates) == 22 # 20 default + 2 custom assert "template1" in templates assert "template2" in templates @@ -435,7 +438,7 @@ def register_template_worker(template_id: int) -> None: assert len(errors) == 0 assert len(results) == 10 # Including default templates - assert len(registry.list_all_templates()) == 27 # 17 default + 10 registered + assert len(registry.list_all_templates()) == 30 # 20 default + 10 registered # Check all templates are registered for i in range(10):