From fe0d0a377479c7c8873b6671f312e6bb340cc370 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Thu, 8 Jan 2026 15:40:38 +1300 Subject: [PATCH 01/17] fix(merge_ts): revive hdf5 version of merge-ts --- pyproject.toml | 1 + workflow/scripts/merge_ts.py | 158 +++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 workflow/scripts/merge_ts.py diff --git a/pyproject.toml b/pyproject.toml index 7b706a7..0b1e250 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ generate-rupture-propagation = "workflow.scripts.generate_rupture_propagation:ap copy-domain-parameters = "workflow.scripts.copy_velocity_model_parameters:app" create-e3d-par = "workflow.scripts.create_e3d_par:app" generate-stoch = "workflow.scripts.generate_stoch:app" +merge-ts = "workflow.scripts.merge_ts:app" hf-sim = "workflow.scripts.hf_sim:app" bb-sim = "workflow.scripts.bb_sim:app" im-calc = "workflow.scripts.im_calc:app" diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py new file mode 100644 index 0000000..122b43d --- /dev/null +++ b/workflow/scripts/merge_ts.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +"""Merge EMOD3D Timeslices. + +Description +----------- +Merge the output timeslice files of EMOD3D. + +Inputs +------ +1. A directory containing EMOD3D timeslice files. + +Outputs +------- +1. A merged output timeslice file. + +Environment +----------- +Can be run in the cybershake container. Can also be run from your own computer using the `merge-ts` command which is installed after running `pip install workflow@git+https://github.com/ucgmsim/workflow`. + +Usage +----- +`merge_ts XYTS_DIRECTORY output.h5` + +For More Help +------------- +See the output of `merge-ts --help`. +""" + +from pathlib import Path +from typing import Annotated + +import numpy as np +import tqdm +import typer +import xarray as xr + +from qcore import cli, coordinates, xyts + +app = typer.Typer() + + +@cli.from_docstring(app) # type: ignore +def merge_ts_hdf5( + component_xyts_directory: Annotated[ + Path, + typer.Argument( + dir_okay=True, + file_okay=False, + exists=True, + readable=True, + ), + ], + output: Annotated[ + Path, + typer.Argument(dir_okay=False, writable=True), + ], + glob_pattern: str = "*xyts-*.e3d", + complevel: int = 4, +) -> None: + """Merge XYTS files. + + Parameters + ---------- + component_xyts_directory : Path + The input xyts directory containing files to merge. + output : Path + The output xyts file. + glob_pattern : str, optional + Set a custom glob pattern for merging the xyts files, by default "*xyts-*.e3d". + complevel : int, optional + Set the compression level for the output HDF5 file. Range + between 1-9 (9 being the highest level of compression). + Defaults to 4. + """ + component_xyts_files = sorted( + [ + xyts.XYTSFile( + xyts_file_path, proc_local_file=True, meta_only=True, round_dt=False + ) + for xyts_file_path in component_xyts_directory.glob(glob_pattern) + ], + key=lambda xyts_file: (xyts_file.y0, xyts_file.x0), + ) + top_left = component_xyts_files[0] + nt = top_left.nt + nx = top_left.nx + ny = top_left.ny + components = 3 + + xyts_proc_header_size = 72 + + waveform_data = np.empty((nt, ny, nx), dtype=np.uint16) + for xyts_file in tqdm.tqdm(component_xyts_files, unit="files"): + x0 = xyts_file.x0 + y0 = xyts_file.y0 + x1 = x0 + xyts_file.local_nx + y1 = y0 + xyts_file.local_ny + data = np.fromfile( + xyts_file.xyts_path, dtype=np.float32, offset=xyts_proc_header_size + ).reshape((nt, components, xyts_file.local_ny, xyts_file.local_nx)) + magnitude = np.linalg.norm(data, axis=1) / 0.1 + np.round(magnitude, out=magnitude) + waveform_data[:, y0:y1, x0:x1] = magnitude.astype(np.uint16) + + proj = coordinates.SphericalProjection( + mlon=top_left.mlon, mlat=top_left.mlat, mrot=top_left.mrot + ) + dx = top_left.hh + dt = top_left.dt + y, x = np.meshgrid( + np.arange(ny, dtype=np.float64), np.arange(nx, dtype=np.float64), indexing="ij" + ) + lat, lon = proj.inverse(x.flatten(), y.flatten()).T + lat = lat.reshape(y.shape) + lon = lon.reshape(y.shape) + time = np.arange(nt) * dt + dset = xr.Dataset( + { + "waveform": (("time", "y", "x"), waveform_data), + }, + coords={ + "time": ("time", time), + "y": ("y", np.arange(ny)), + "x": ("x", np.arange(nx)), + "latitude": (("y", "x"), lat), + "longitude": (("y", "x"), lon), + }, + attrs={ + "dx": dx, + "dy": dx, + "dt": dt, + "mlon": top_left.mlon, + "mlat": top_left.mlat, + "mrot": top_left.mrot, + }, + ) + + dset["waveform"].attrs.update( + { + "scale_factor": 0.1, + "add_offset": 0.0, + "units": "cm/s", + "_FillValue": -9999, + } + ) + + dset.to_netcdf( + output, + engine="h5netcdf", + encoding={ + "waveform": { + "dtype": "int16", + "compression": "zlib", + "complevel": complevel, + "shuffle": True, + } + }, + ) From f7ecbf7c2519e106ee06246e42d752ec3c4dadbc Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Thu, 8 Jan 2026 16:36:18 +1300 Subject: [PATCH 02/17] refactor(merge_ts): extract out logic into smaller functions --- workflow/scripts/merge_ts.py | 339 ++++++++++++++++++++++++++++------- 1 file changed, 273 insertions(+), 66 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 122b43d..65fc20d 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -26,10 +26,13 @@ See the output of `merge-ts --help`. """ +import dataclasses +from dataclasses import dataclass from pathlib import Path from typing import Annotated import numpy as np +import numpy.typing as npt import tqdm import typer import xarray as xr @@ -39,7 +42,255 @@ app = typer.Typer() -@cli.from_docstring(app) # type: ignore +def read_component_xyts_files( + xyts_directory: Path, glob_pattern: str +) -> list[xyts.XYTSFile]: + """Read XYTS headers from component XYTS directory. + + Parameters + ---------- + xyts_directory : Path + The directory containing e3d files. + glob_pattern : str + The glob pattern to search for xyts files. + + Returns + ------- + list[xyts.XYTSFile] + A list of XYTS files with parsed metadata. + """ + return [ + xyts.XYTSFile( + xyts_file_path, proc_local_file=True, meta_only=True, round_dt=False + ) + for xyts_file_path in xyts_directory.glob(glob_pattern) + ] + + +WaveformArray = npt.ndarray[tuple[int, int, int, int], np.float32] +QuantisedArray = npt.ndarray[tuple[int, int, int], np.uint16] +CoordinateArray = npt.ndarray[tuple[int, int], npt.float64] + + +@dataclass +class WaveformData: + """Waveform data object""" + + x_start: int + """Global x-start of waveform data.""" + x_end: int + """Global x-end of waveform data.""" + y_start: int + """Global y-start of waveform data.""" + y_end: int + """Global y-end of waveform data.""" + data: WaveformArray + """Waveform data.""" + + +def read_waveform_data(xyts_file: xyts.XYTSFile) -> WaveformData: + """Read waveform data from an XYTS file. + + Parameters + ---------- + xyts_file : xyts.XYTSFile + The XYTS file to read file. + + Returns + ------- + WaveformData + The extracted waveform data. + + Raises + ------ + ValueError + If the XYTS file is not a local XYTS file (output of EMOD3D). + Local XYTS files will have non-None ``local_nx`` and + ``local_ny`` attributes. + """ + nt = xyts_file.nt + components = len(xyts_file.comps) + ny = xyts_file.local_ny + nx = xyts_file.local_nx + if not (ny and nx): + raise ValueError( + "Encountered invalid XYTS component file (must have local ny and local nx both set)." + ) + x0 = xyts_file.x0 + y0 = xyts_file.y0 + x1 = x0 + nx + y1 = y0 + ny + xyts_proc_header_size = 72 + data = np.fromfile( + xyts_file.xyts_path, dtype=np.float32, offset=xyts_proc_header_size + ).reshape((nt, components, ny, nx)) + waveform_data = WaveformData(x_start=x0, y_start=y0, x_end=x1, y_end=y1, data=data) + return waveform_data + + +@dataclass +class Metadata: + """XYTS file metadata.""" + + nx: int + """Number of x gridpoints.""" + ny: int + """Number of y gridpoints.""" + nt: int + """Number of timesteps.""" + dx: float + """Spatial resolution.""" + dt: float + """Temporal resolution.""" + mlon: float + """Model origin longitude.""" + mlat: float + """Model origin latitude.""" + mrot: float + """Model rotation.""" + + +def extract_metadata(xyts_file: xyts.XYTSFile) -> Metadata: + """Extract metadata from an XYTS file. + + Parameters + ---------- + xyts_file : xyts.XYTSFile + The XYTS file to extract from. + + Returns + ------- + Metadata + The metadata extracted from the XYTS file. + """ + nt = xyts_file.nt + nx = xyts_file.nx + ny = xyts_file.ny + dx = xyts_file.hh + mlat = xyts_file.mlat + mlon = xyts_file.mlon + mrot = xyts_file.mrot + dt = xyts_file.dt + return Metadata( + dx=dx, + dt=dt, + mlon=mlon, + mlat=mlat, + mrot=mrot, + nx=nx, + ny=ny, + nt=nt, + ) + + +def xyts_lat_lon_coordinates( + metadata: Metadata, +) -> tuple[CoordinateArray, CoordinateArray]: + """Generate the lat/lon coordinates corresponding to a model. + + Parameters + ---------- + metadata : Metadata + The metadata describing a model (mrot, mlat, mlon, nx, ny, + dx). + + Returns + ------- + tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]] + A ``lat`` and ``lon`` meshgrid such that ``waveform_data[i, j]`` + has latitude ``lat[i, j]`` and longitude ``lon[i, j]``. + """ + proj = coordinates.SphericalProjection( + mlon=metadata.mlon, + mlat=metadata.mlat, + mrot=metadata.mrot, + ) + y, x = np.meshgrid( + np.arange(metadata.ny, dtype=np.float64), + np.arange(metadata.nx, dtype=np.float64), + indexing="ij", + ) + # dx = dy, so the following is ok. + # Shift gridpoints so that they are origin centred. + y -= metadata.ny * metadata.dx / 2 + x -= metadata.nx * metadata.dx / 2 + lat, lon = proj.inverse(x.flatten(), y.flatten()).T + lat = lat.reshape(y.shape) + lon = lon.reshape(y.shape) + return lat, lon + + +def create_xyts_dataset( + data: QuantisedArray, + lat: CoordinateArray, + lon: CoordinateArray, + time: npt.ndarray[int, np.float64], + metadata: Metadata, +) -> xr.Dataset: + """Create an XYTS dataset from given waveform data, coordinate meshgrid, time and metadata. + + Parameters + ---------- + data : npt.ndarray[tuple[int, int, int], np.uint16] + Quantised waveform data. + lat : npt.ndarray[tuple[int, int], np.float64] + Latitude meshgrid. + lon : npt.ndarray[tuple[int, int], np.float64] + Longitude meshgrid. + time : npt.ndarray[int, np.float64] + Time array. + metadata : Metadata + Metadata object. + + Returns + ------- + xr.Dataset + An xarray dataset with coordinates ``time``, ``y`` and ``x`` + indexing the waveform data, lat and lon arrays. Metadata + populates the attributes. + """ + dset = xr.Dataset( + { + "waveform": (("time", "y", "x"), data), + }, + coords={ + "time": ("time", time), + "y": ("y", np.arange(metadata.ny)), + "x": ("x", np.arange(metadata.nx)), + "latitude": (("y", "x"), lat), + "longitude": (("y", "x"), lon), + }, + attrs=dataclasses.asdict(Metadata), + ) + + return dset + + +def set_scale(dset: xr.Dataset, scale: float) -> None: + """Set dataset scale properties. + + This function sets the appropriate netcdf properties and units to + transparently read the uint16 quantised waveform values as 64-bit + floating point arrays. + + Parameters + ---------- + dset : xr.Dataset + Dataset to update. + scale : float + Scale for waveform quantisation. + """ + dset["waveform"].attrs.update( + { + "scale_factor": scale, + "add_offset": 0.0, + "units": "cm/s", + "_FillValue": -9999, + } + ) + + +@cli.from_docstring(app) def merge_ts_hdf5( component_xyts_directory: Annotated[ Path, @@ -55,6 +306,7 @@ def merge_ts_hdf5( typer.Argument(dir_okay=False, writable=True), ], glob_pattern: str = "*xyts-*.e3d", + scale: float = 0.1, complevel: int = 4, ) -> None: """Merge XYTS files. @@ -67,82 +319,37 @@ def merge_ts_hdf5( The output xyts file. glob_pattern : str, optional Set a custom glob pattern for merging the xyts files, by default "*xyts-*.e3d". + scale : float + Set the scale for quantising XYTS outputs. complevel : int, optional Set the compression level for the output HDF5 file. Range between 1-9 (9 being the highest level of compression). Defaults to 4. """ - component_xyts_files = sorted( - [ - xyts.XYTSFile( - xyts_file_path, proc_local_file=True, meta_only=True, round_dt=False - ) - for xyts_file_path in component_xyts_directory.glob(glob_pattern) - ], - key=lambda xyts_file: (xyts_file.y0, xyts_file.x0), + component_xyts_files = read_component_xyts_files( + component_xyts_directory, glob_pattern ) - top_left = component_xyts_files[0] - nt = top_left.nt - nx = top_left.nx - ny = top_left.ny - components = 3 - xyts_proc_header_size = 72 + # XYTS files contain certain repeated metadata, so we can extract + # a "sample" file for this common metadata. + sample_xyts_file = component_xyts_files[0] + metadata = extract_metadata(sample_xyts_file) - waveform_data = np.empty((nt, ny, nx), dtype=np.uint16) + waveform_data = np.empty((metadata.nt, metadata.ny, metadata.nx), dtype=np.uint16) for xyts_file in tqdm.tqdm(component_xyts_files, unit="files"): - x0 = xyts_file.x0 - y0 = xyts_file.y0 - x1 = x0 + xyts_file.local_nx - y1 = y0 + xyts_file.local_ny - data = np.fromfile( - xyts_file.xyts_path, dtype=np.float32, offset=xyts_proc_header_size - ).reshape((nt, components, xyts_file.local_ny, xyts_file.local_nx)) - magnitude = np.linalg.norm(data, axis=1) / 0.1 + local_data = read_waveform_data(xyts_file) + magnitude = np.linalg.norm(local_data.data, axis=1) / scale np.round(magnitude, out=magnitude) - waveform_data[:, y0:y1, x0:x1] = magnitude.astype(np.uint16) + waveform_data[ + :, + local_data.y_start : local_data.y_end, + local_data.x_start : local_data.x_end, + ] = magnitude.astype(np.uint16) - proj = coordinates.SphericalProjection( - mlon=top_left.mlon, mlat=top_left.mlat, mrot=top_left.mrot - ) - dx = top_left.hh - dt = top_left.dt - y, x = np.meshgrid( - np.arange(ny, dtype=np.float64), np.arange(nx, dtype=np.float64), indexing="ij" - ) - lat, lon = proj.inverse(x.flatten(), y.flatten()).T - lat = lat.reshape(y.shape) - lon = lon.reshape(y.shape) - time = np.arange(nt) * dt - dset = xr.Dataset( - { - "waveform": (("time", "y", "x"), waveform_data), - }, - coords={ - "time": ("time", time), - "y": ("y", np.arange(ny)), - "x": ("x", np.arange(nx)), - "latitude": (("y", "x"), lat), - "longitude": (("y", "x"), lon), - }, - attrs={ - "dx": dx, - "dy": dx, - "dt": dt, - "mlon": top_left.mlon, - "mlat": top_left.mlat, - "mrot": top_left.mrot, - }, - ) - - dset["waveform"].attrs.update( - { - "scale_factor": 0.1, - "add_offset": 0.0, - "units": "cm/s", - "_FillValue": -9999, - } - ) + lat, lon = xyts_lat_lon_coordinates(metadata) + time = np.arange(metadata.nt) * metadata.dt + dset = create_xyts_dataset(waveform_data, lat, lon, time, metadata) + set_scale(dset, scale) dset.to_netcdf( output, From d4a8da4d012fae3a349d02204ce7cf46d1d95caf Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Thu, 8 Jan 2026 16:59:47 +1300 Subject: [PATCH 03/17] fix: correct numpy types for arrays --- workflow/scripts/merge_ts.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 65fc20d..40ab13b 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -32,7 +32,6 @@ from typing import Annotated import numpy as np -import numpy.typing as npt import tqdm import typer import xarray as xr @@ -67,9 +66,10 @@ def read_component_xyts_files( ] -WaveformArray = npt.ndarray[tuple[int, int, int, int], np.float32] -QuantisedArray = npt.ndarray[tuple[int, int, int], np.uint16] -CoordinateArray = npt.ndarray[tuple[int, int], npt.float64] +WaveformArray = np.ndarray[tuple[int, int, int, int], np.dtype[np.float32]] +QuantisedArray = np.ndarray[tuple[int, int, int], np.dtype[np.uint16]] +CoordinateArray = np.ndarray[tuple[int, int], np.dtype[np.float64]] +TimeArray = np.ndarray[tuple[int], np.dtype[np.float64]] @dataclass @@ -224,7 +224,7 @@ def create_xyts_dataset( data: QuantisedArray, lat: CoordinateArray, lon: CoordinateArray, - time: npt.ndarray[int, np.float64], + time: TimeArray, metadata: Metadata, ) -> xr.Dataset: """Create an XYTS dataset from given waveform data, coordinate meshgrid, time and metadata. @@ -347,7 +347,7 @@ def merge_ts_hdf5( ] = magnitude.astype(np.uint16) lat, lon = xyts_lat_lon_coordinates(metadata) - time = np.arange(metadata.nt) * metadata.dt + time = np.arange(metadata.nt, dtype=np.float64) * metadata.dt dset = create_xyts_dataset(waveform_data, lat, lon, time, metadata) set_scale(dset, scale) From 09d4ae47547aa598ad9413af49acaa7e77ab6dd5 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Thu, 8 Jan 2026 17:00:08 +1300 Subject: [PATCH 04/17] fix: correct lat lon array logic --- workflow/scripts/merge_ts.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 40ab13b..4c3299f 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -138,8 +138,10 @@ class Metadata: """Number of y gridpoints.""" nt: int """Number of timesteps.""" + resolution: float + """Spatial resolution (of simulation).""" dx: float - """Spatial resolution.""" + """Spatial resolution (of XYTS file).""" dt: float """Temporal resolution.""" mlon: float @@ -166,20 +168,23 @@ def extract_metadata(xyts_file: xyts.XYTSFile) -> Metadata: nt = xyts_file.nt nx = xyts_file.nx ny = xyts_file.ny - dx = xyts_file.hh + resolution = xyts_file.hh + dx = xyts_file.dx mlat = xyts_file.mlat mlon = xyts_file.mlon mrot = xyts_file.mrot dt = xyts_file.dt + # The casts here convert from numpy to python types return Metadata( - dx=dx, - dt=dt, - mlon=mlon, - mlat=mlat, - mrot=mrot, - nx=nx, - ny=ny, - nt=nt, + resolution=float(resolution), + dx=float(dx), + dt=float(dt), + mlon=float(mlon), + mlat=float(mlat), + mrot=float(mrot), + nx=int(nx), + ny=int(ny), + nt=int(nt), ) @@ -212,8 +217,9 @@ def xyts_lat_lon_coordinates( ) # dx = dy, so the following is ok. # Shift gridpoints so that they are origin centred. - y -= metadata.ny * metadata.dx / 2 - x -= metadata.nx * metadata.dx / 2 + y = (y - metadata.ny / 2) * metadata.dx + x = (x - metadata.nx / 2) * metadata.dx + lat, lon = proj.inverse(x.flatten(), y.flatten()).T lat = lat.reshape(y.shape) lon = lon.reshape(y.shape) From cc493a1334874a4888cd192db29efc1303bf72d4 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Thu, 8 Jan 2026 17:00:18 +1300 Subject: [PATCH 05/17] fix: Metadata -> metadata --- workflow/scripts/merge_ts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 4c3299f..e4b9ab8 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -266,7 +266,7 @@ def create_xyts_dataset( "latitude": (("y", "x"), lat), "longitude": (("y", "x"), lon), }, - attrs=dataclasses.asdict(Metadata), + attrs=dataclasses.asdict(metadata), ) return dset From 662a6a1b56a2864844a9646e65320cc6cc5aae12 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:19:28 +1300 Subject: [PATCH 06/17] refactor(merge-ts): extract module-level constant --- workflow/scripts/merge_ts.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index e4b9ab8..7b4ff3a 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -88,6 +88,9 @@ class WaveformData: """Waveform data.""" +XYTS_PROC_HEADER_SIZE = 72 + + def read_waveform_data(xyts_file: xyts.XYTSFile) -> WaveformData: """Read waveform data from an XYTS file. @@ -120,9 +123,9 @@ def read_waveform_data(xyts_file: xyts.XYTSFile) -> WaveformData: y0 = xyts_file.y0 x1 = x0 + nx y1 = y0 + ny - xyts_proc_header_size = 72 + data = np.fromfile( - xyts_file.xyts_path, dtype=np.float32, offset=xyts_proc_header_size + xyts_file.xyts_path, dtype=np.float32, offset=XYTS_PROC_HEADER_SIZE ).reshape((nt, components, ny, nx)) waveform_data = WaveformData(x_start=x0, y_start=y0, x_end=x1, y_end=y1, data=data) return waveform_data From f3c6b2e428b549be0e529d28138fb19b7a19dec5 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:19:53 +1300 Subject: [PATCH 07/17] docs(merge-ts): remove outdated npt.ndarray accessors --- workflow/scripts/merge_ts.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 7b4ff3a..2b81d0f 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -196,6 +196,9 @@ def xyts_lat_lon_coordinates( ) -> tuple[CoordinateArray, CoordinateArray]: """Generate the lat/lon coordinates corresponding to a model. + Generates a ``lat`` and ``lon`` meshgrid such that ``waveform_data[i, j]`` + has latitude ``lat[i, j]`` and longitude ``lon[i, j]``. + Parameters ---------- metadata : Metadata @@ -204,9 +207,10 @@ def xyts_lat_lon_coordinates( Returns ------- - tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]] - A ``lat`` and ``lon`` meshgrid such that ``waveform_data[i, j]`` - has latitude ``lat[i, j]`` and longitude ``lon[i, j]``. + lat : array of float64 + The latitude coordinate meshgrid. + lon : array of float64 + The longitude coordinate meshgrid. """ proj = coordinates.SphericalProjection( mlon=metadata.mlon, @@ -240,13 +244,13 @@ def create_xyts_dataset( Parameters ---------- - data : npt.ndarray[tuple[int, int, int], np.uint16] + data : (nt, ny, nx) array of uint16 Quantised waveform data. - lat : npt.ndarray[tuple[int, int], np.float64] + lat : (ny, nx) array of float64 Latitude meshgrid. - lon : npt.ndarray[tuple[int, int], np.float64] + lon : (ny, nx) array of float64 Longitude meshgrid. - time : npt.ndarray[int, np.float64] + time : (nt,) array of float64 Time array. metadata : Metadata Metadata object. From 9635a2e929052204bf981c3766f0a522f7551175 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:20:17 +1300 Subject: [PATCH 08/17] feat(merge-ts): validate metadata and array shapes before dataset creation --- workflow/scripts/merge_ts.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 2b81d0f..8a69a24 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -262,6 +262,22 @@ def create_xyts_dataset( indexing the waveform data, lat and lon arrays. Metadata populates the attributes. """ + (nt, ny, nx) = data.shape + if metadata.nx != nx or metadata.ny != ny or metadata.nt != nt: + raise ValueError( + f"Metadata does not match data, {metadata.nx=}, {metadata.ny=}, {metadata.nt=} but data {nx=}, {ny=}, {nt=}" + ) + elif lon.shape != (ny, nx): + raise ValueError( + f"Longitude shape incompatible, {lon.shape=} but {data.shape=}" + ) + elif lat.shape != (ny, nx): + raise ValueError(f"Latitude shape incompatible, {lat.shape=} but {data.shape=}") + elif time.shape != (nt,): + raise ValueError( + f"Time shape incompatible, expected {(nt,)} but found {time.shape=}" + ) + dset = xr.Dataset( { "waveform": (("time", "y", "x"), data), From abd9e0ef51eadc01535645eec3c782b2a4715b08 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:20:33 +1300 Subject: [PATCH 09/17] feat(merge-ts): raise FileNotFoundError when glob pattern does not match directory --- workflow/scripts/merge_ts.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 8a69a24..5811b3e 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -358,6 +358,10 @@ def merge_ts_hdf5( component_xyts_files = read_component_xyts_files( component_xyts_directory, glob_pattern ) + if not component_xyts_files: + raise FileNotFoundError( + f"No files in '{component_xyts_directory}' match glob '{glob_pattern}'" + ) # XYTS files contain certain repeated metadata, so we can extract # a "sample" file for this common metadata. From a46183e9c82f9d73547aba4c6e3a75198a904f2e Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:21:27 +1300 Subject: [PATCH 10/17] docs(merge-ts): merge_ts -> merge-ts --- workflow/scripts/merge_ts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 5811b3e..b194581 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -19,7 +19,7 @@ Usage ----- -`merge_ts XYTS_DIRECTORY output.h5` +`merge-ts XYTS_DIRECTORY output.h5` For More Help ------------- From 7b8a6bc450fd07bbb095de31b54e4ae539a96ca4 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:21:54 +1300 Subject: [PATCH 11/17] fix(merge-ts): correctly label dtype as uint16 --- workflow/scripts/merge_ts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index b194581..88575dd 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -389,7 +389,7 @@ def merge_ts_hdf5( engine="h5netcdf", encoding={ "waveform": { - "dtype": "int16", + "dtype": "uint16", "compression": "zlib", "complevel": complevel, "shuffle": True, From 306ef2650f7324949c6565890ca0255d4571de4b Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:44:18 +1300 Subject: [PATCH 12/17] fix(merge-ts): use valid fill value --- workflow/scripts/merge_ts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 88575dd..c7dd04c 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -309,12 +309,14 @@ def set_scale(dset: xr.Dataset, scale: float) -> None: scale : float Scale for waveform quantisation. """ + bounds = np.iinfo(np.uint16) + max_bound = bounds.max dset["waveform"].attrs.update( { "scale_factor": scale, "add_offset": 0.0, "units": "cm/s", - "_FillValue": -9999, + "_FillValue": max_bound, # Reserve max bound for NaN values } ) From e5eae370b11f88f06fadb99bc6fb285fd70f89f7 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:44:43 +1300 Subject: [PATCH 13/17] refactor(merge-ts): extract more robust quantisation logic --- workflow/scripts/merge_ts.py | 38 +++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index c7dd04c..29c237b 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -321,6 +321,37 @@ def set_scale(dset: xr.Dataset, scale: float) -> None: ) +def quantise_array(waveform_data: WaveformArray, scale: float) -> QuantisedArray: + """Quantise a floating point waveform array. + + Quantisation consists of scaling ``waveform_data`` by ``scale``, + rounding, clipping and casting to uint16. + + Parameters + ---------- + waveform_data : WaveformArray + The input waveform array. It is assumed that + ``waveform_data[i, j] >= 0``. + scale : float + The scale to quantise. + + + Returns + ------- + QuantisedArray + A quantised version of ``waveform_data``. NaN values are set + to 2^16 - 1. Out of scale values are also set to 2^16 - 1. For + this reason, treat 2^16 - 1 as NaN or a fill-value. + """ + scaled = waveform_data / scale + bounds = np.iinfo(np.uint16) + max_bound = bounds.max + np.nan_to_num(scaled, nan=max_bound, copy=False) + np.clip(scaled, 0, max_bound, out=scaled) + np.round(scaled, out=scaled) + return scaled.astype(np.uint16) + + @cli.from_docstring(app) def merge_ts_hdf5( component_xyts_directory: Annotated[ @@ -371,15 +402,16 @@ def merge_ts_hdf5( metadata = extract_metadata(sample_xyts_file) waveform_data = np.empty((metadata.nt, metadata.ny, metadata.nx), dtype=np.uint16) + for xyts_file in tqdm.tqdm(component_xyts_files, unit="files"): local_data = read_waveform_data(xyts_file) - magnitude = np.linalg.norm(local_data.data, axis=1) / scale - np.round(magnitude, out=magnitude) + magnitude = np.linalg.norm(local_data.data, axis=1) + quantised = quantise_array(magnitude, scale) waveform_data[ :, local_data.y_start : local_data.y_end, local_data.x_start : local_data.x_end, - ] = magnitude.astype(np.uint16) + ] = quantised lat, lon = xyts_lat_lon_coordinates(metadata) time = np.arange(metadata.nt, dtype=np.float64) * metadata.dt From 0365d32397cf348b03648e87635d5d4a3efd8071 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:46:32 +1300 Subject: [PATCH 14/17] docs(merge-ts): apply PR suggestions Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- workflow/scripts/merge_ts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 29c237b..1a162d4 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -97,7 +97,7 @@ def read_waveform_data(xyts_file: xyts.XYTSFile) -> WaveformData: Parameters ---------- xyts_file : xyts.XYTSFile - The XYTS file to read file. + The XYTS file to read from. Returns ------- @@ -381,8 +381,8 @@ def merge_ts_hdf5( The output xyts file. glob_pattern : str, optional Set a custom glob pattern for merging the xyts files, by default "*xyts-*.e3d". - scale : float - Set the scale for quantising XYTS outputs. + scale : float, optional + Set the scale for quantising XYTS outputs. Defaults to 0.1. complevel : int, optional Set the compression level for the output HDF5 file. Range between 1-9 (9 being the highest level of compression). From f0c7538ae2d662b46e7ffb49d0596bdba30a072d Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 09:55:00 +1300 Subject: [PATCH 15/17] docs(merge-ts): improve quantisation docstring and reverse max_bound for NaN --- workflow/scripts/merge_ts.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index 29c237b..e5eb8c1 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -322,32 +322,31 @@ def set_scale(dset: xr.Dataset, scale: float) -> None: def quantise_array(waveform_data: WaveformArray, scale: float) -> QuantisedArray: - """Quantise a floating point waveform array. + r""" + Quantise a floating-point waveform array into 16-bit unsigned integers. - Quantisation consists of scaling ``waveform_data`` by ``scale``, - rounding, clipping and casting to uint16. + The transformation follows the formula: + $$output = \text{round}(\text{clip}(\frac{waveform\_data}{scale}, 0, 65534))$$ Parameters ---------- waveform_data : WaveformArray - The input waveform array. It is assumed that - ``waveform_data[i, j] >= 0``. + The input floating-point array. All values are expected to be >= 0. scale : float - The scale to quantise. - + The quantisation step size (resolution). For example, a scale of 0.1 + means the output represents increments of 0.1 from the input. Returns ------- - QuantisedArray - A quantised version of ``waveform_data``. NaN values are set - to 2^16 - 1. Out of scale values are also set to 2^16 - 1. For - this reason, treat 2^16 - 1 as NaN or a fill-value. + QuantisedArray (uint16) + The discrete representation of the waveform. Values are capped at + 65534 to reserve 65535 as a NaN indicator. """ scaled = waveform_data / scale bounds = np.iinfo(np.uint16) max_bound = bounds.max np.nan_to_num(scaled, nan=max_bound, copy=False) - np.clip(scaled, 0, max_bound, out=scaled) + np.clip(scaled, 0, max_bound - 1, out=scaled) np.round(scaled, out=scaled) return scaled.astype(np.uint16) From 06f520540e30ccab7015c0cb2d587e123f52e5c6 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 10:00:20 +1300 Subject: [PATCH 16/17] fix(merge-ts): add CLI validation for options --- workflow/scripts/merge_ts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index e5eb8c1..6f3a639 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -367,8 +367,8 @@ def merge_ts_hdf5( typer.Argument(dir_okay=False, writable=True), ], glob_pattern: str = "*xyts-*.e3d", - scale: float = 0.1, - complevel: int = 4, + scale: Annotated[float, typer.Option(min=0)] = 0.1, + complevel: Annotated[int, typer.Option(min=1, max=9)] = 4, ) -> None: """Merge XYTS files. From 7db7ba3e4d6ac36998abb42f1c8eb73a16807b17 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Fri, 9 Jan 2026 10:02:02 +1300 Subject: [PATCH 17/17] fix(merge-ts): use fill-value as initialisation --- workflow/scripts/merge_ts.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/merge_ts.py b/workflow/scripts/merge_ts.py index a1fbccc..63d2ebc 100644 --- a/workflow/scripts/merge_ts.py +++ b/workflow/scripts/merge_ts.py @@ -399,8 +399,11 @@ def merge_ts_hdf5( # a "sample" file for this common metadata. sample_xyts_file = component_xyts_files[0] metadata = extract_metadata(sample_xyts_file) - - waveform_data = np.empty((metadata.nt, metadata.ny, metadata.nx), dtype=np.uint16) + bounds = np.iinfo(np.uint16) + nan_value = bounds.max + waveform_data = np.full( + (metadata.nt, metadata.ny, metadata.nx), nan_value, dtype=np.uint16 + ) for xyts_file in tqdm.tqdm(component_xyts_files, unit="files"): local_data = read_waveform_data(xyts_file)