From 203588075c611180bc11c033fc3f65c0596464fe Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 6 Aug 2025 15:43:26 +0200 Subject: [PATCH 01/63] wip: first draft before testing. --- xdas/core/coordinates.py | 453 ++++++++++++++++++++++++++++++++++----- 1 file changed, 399 insertions(+), 54 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index cf638e7..2073533 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -345,6 +345,58 @@ def to_dict(self): def from_dict(cls, dct): return cls(**dct) + def format_index(self, idx, bounds="raise"): + idx = np.asarray(idx) + if not np.issubdtype(idx.dtype, np.integer): + raise IndexError("only integer are valid index") + idx = idx + (idx < 0) * len(self) + if bounds == "raise": + if np.any(idx < 0) or np.any(idx >= len(self)): + raise IndexError("index is out of bounds") + elif bounds == "clip": + idx = np.clip(idx, 0, len(self)) + return idx + + def format_index_slice(self, slc): + start = slc.start + stop = slc.stop + step = slc.step + if start is None: + start = 0 + if stop is None: + stop = len(self) + if step is None: + step = 1 + start = self.format_index(start, bounds="clip") + stop = self.format_index(stop, bounds="clip") + return slice(start, stop, step) + + def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): + if start is not None: + try: + start_index = self.get_indexer(start, method="bfill") + except KeyError: + start_index = len(self) + else: + start_index = None + if stop is not None: + try: + end_index = self.get_indexer(stop, method="ffill") + stop_index = end_index + 1 + except KeyError: + stop_index = 0 + else: + stop_index = None + if step is not None: + raise NotImplementedError("cannot use step yet") + if ( + (not endpoint) + and (stop is not None) + and (self[stop_index - 1].values == stop) + ): + stop_index -= 1 + return slice(start_index, stop_index) + class ScalarCoordinate(Coordinate): def __new__(cls, *args, **kwargs): @@ -705,18 +757,6 @@ def get_value(self, index): index = self.format_index(index) return forward(index, self.tie_indices, self.tie_values) - def format_index(self, idx, bounds="raise"): - idx = np.asarray(idx) - if not np.issubdtype(idx.dtype, np.integer): - raise IndexError("only integer are valid index") - idx = idx + (idx < 0) * len(self) - if bounds == "raise": - if np.any(idx < 0) or np.any(idx >= len(self)): - raise IndexError("index is out of bounds") - elif bounds == "clip": - idx = np.clip(idx, 0, len(self)) - return idx - def slice_index(self, index_slice): index_slice = self.format_index_slice(index_slice) start_index, stop_index, step_index = ( @@ -725,11 +765,13 @@ def slice_index(self, index_slice): index_slice.step, ) if stop_index - start_index <= 0: - return self.__class__(dict(tie_indices=[], tie_values=[])) + return self.__class__(dict(tie_indices=[], tie_values=[], dim=self.dim)) elif (stop_index - start_index) <= step_index: tie_indices = [0] tie_values = [self.get_value(start_index)] - return self.__class__(dict(tie_indices=tie_indices, tie_values=tie_values)) + return self.__class__( + dict(tie_indices=tie_indices, tie_values=tie_values, dim=self.dim) + ) else: end_index = stop_index - 1 start_value = self.get_value(start_index) @@ -752,20 +794,6 @@ def slice_index(self, index_slice): coord = coord.decimate(step_index) return coord - def format_index_slice(self, slc): - start = slc.start - stop = slc.stop - step = slc.step - if start is None: - start = 0 - if stop is None: - stop = len(self) - if step is None: - step = 1 - start = self.format_index(start, bounds="clip") - stop = self.format_index(stop, bounds="clip") - return slice(start, stop, step) - def get_indexer(self, value, method=None): if isinstance(value, str): value = np.datetime64(value) @@ -786,32 +814,6 @@ def get_indexer(self, value, method=None): raise e return indexer - def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): - if start is not None: - try: - start_index = self.get_indexer(start, method="bfill") - except KeyError: - start_index = len(self) - else: - start_index = None - if stop is not None: - try: - end_index = self.get_indexer(stop, method="ffill") - stop_index = end_index + 1 - except KeyError: - stop_index = 0 - else: - stop_index = None - if step is not None: - raise NotImplementedError("cannot use step yet") - if ( - (not endpoint) - and (stop is not None) - and (self[stop_index - 1].values == stop) - ): - stop_index -= 1 - return slice(start_index, stop_index) - def append(self, other): if not isinstance(other, self.__class__): raise TypeError(f"cannot append {type(other)} to {self.__class__}") @@ -959,6 +961,349 @@ def to_dict(self): return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} +class SampledCoordinate(Coordinate): + """ + A coordinate that is sampled at regular intervals. + + Parameters + ---------- + data : dict-like + The data of the coordinate. + dim : str, optional + The dimension name of the coordinate, by default None. + dtype : str or numpy.dtype, optional + The data type of the coordinate, by default None. + """ + + def __new__(cls, *args, **kwargs): + return object.__new__(cls) + + def __init__(self, data=None, dim=None, dtype=None): + if data is None: + data = {"tie_values": [], "tie_samples": [], "sampling_interval": None} + data, dim = parse(data, dim) + if not self.__class__.isvalid(data): + raise TypeError("`data` must be dict-like") + if not set(data) == {"tie_values", "tie_samples", "sampling_interval"}: + raise ValueError( + "keys `tie_values`, `tie_samples`, and `sampling_interval` must be provided" + ) + tie_values = np.asarray(data["tie_values"], dtype=dtype) + tie_samples = np.asarray(data["tie_samples"]) + sampling_interval = np.asarray(data["sampling_interval"]) + if not tie_values.ndim == 1: + raise ValueError("`tie_values` must be 1D") + if not tie_samples.ndim == 1: + raise ValueError("`tie_samples` must be 1D") + if not len(tie_values) == len(tie_samples): + raise ValueError("`tie_values` and `tie_samples` must have the same length") + if not ( + np.issubdtype(tie_values.dtype, np.number) + or np.issubdtype(tie_values.dtype, np.datetime64) + ): + raise ValueError("`tie_values` must have either numeric or datetime dtype") + if not self.empty: + if not np.issubdtype(tie_samples.dtype, np.integer): + raise ValueError("`tie_samples` must be integer-like") + if not np.all(tie_samples > 0): + raise ValueError("`tie_samples` must be positive integers") + if not np.isscalar(sampling_interval): + raise ValueError("`sampling_interval` must be a scalar value") + if np.issubdtype(sampling_interval.dtype, np.datetime64): + sampling_interval = sampling_interval.astype("timedelta64[ns]") + else: + sampling_interval = np.asarray(sampling_interval).astype(dtype) + tie_samples = tie_samples.astype(int) + self.data = dict( + tie_values=tie_values, + tie_samples=tie_samples, + sampling_interval=sampling_interval, + ) + self.dim = dim + + @staticmethod + def isvalid(data): + match data: + case { + "tie_values": _, + "tie_samples": _, + "sampling_interval": _, + }: + return True + case _: + return False + + def __len__(self): + if self.empty: + return 0 + else: + return sum(self.tie_samples) + + def __repr__(self): + if self.empty: + return "empty coordinate" + elif len(self) == 1: + return f"{self.tie_values[0]}" + else: + if np.issubdtype(self.dtype, np.floating): + return f"{self.start:.3f} to {self.end:.3f}" + elif np.issubdtype(self.dtype, np.datetime64): + self.start = format_datetime(self.start) + self.end = format_datetime(self.end) + return f"{self.start} to {self.end}" + else: + return f"{self.start} to {self.end}" + + def __getitem__(self, item): + if isinstance(item, slice): + return self.slice_index(item) + elif np.isscalar(item): + return ScalarCoordinate(self.get_value(item), None) + else: + return DenseCoordinate(self.get_value(item), self.dim) + + def __add__(self, other): + return self.__class__( + { + "tie_values": self.tie_values + other, + "tie_samples": self.tie_samples, + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def __sub__(self, other): + return self.__class__( + { + "tie_values": self.tie_values - other, + "tie_samples": self.tie_samples, + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def __array__(self, dtype=None): + out = self.values + if dtype is not None: + out = out.__array__(dtype) + return out + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + raise NotImplementedError + + def __array_function__(self, func, types, args, kwargs): + raise NotImplementedError + + @property + def tie_values(self): + return self.data["tie_values"] + + @property + def tie_samples(self): + return self.data["tie_samples"] + + @property + def sampling_interval(self): + return self.data["sampling_interval"] + + @property + def tie_indices(self): + return np.concatenate(([0], np.cumsum(self.tie_samples[:-1]))) + + @property + def empty(self): + return self.tie_values.shape == (0,) + + @property + def dtype(self): + return self.tie_values.dtype + + @property + def ndim(self): + return self.tie_values.ndim + + @property + def shape(self): + return (len(self),) + + @property + def indices(self): + if self.empty: + return np.array([], dtype="int") + else: + return np.arange(len(self)) + + @property + def values(self): + if self.empty: + return np.array([], dtype=self.dtype) + else: + return self.get_value(self.indices) + + @property + def start(self): + return self.tie_values[0] + + @property + def end(self): + return self.tie_values[-1] + self.sampling_interval * self.tie_samples[-1] + + def equals(self, other): + return ( + np.array_equal(self.tie_values, other.tie_values) + and np.array_equal(self.tie_samples, other.tie_samples) + and self.sampling_interval == other.sampling_interval + and self.dim == other.dim + and self.dtype == other.dtype + ) + + def get_value(self, index): + index = self.format_index(index) + if np.any(index < 0) or np.any(index >= len(self)): + raise IndexError("index is out of bounds") + reference = np.searchsorted(self.tie_indices, index) + return self.tie_values[reference] + ( + (index - self.tie_indices[reference]) * self.sampling_interval + ) + + def slice_index(self, index_slice): + index_slice = self.format_index_slice(index_slice) + start_index, stop_index, step_index = ( + index_slice.start, + index_slice.stop, + index_slice.step, + ) + if stop_index - start_index <= 0: + return self.__class__( + dict( + tie_values=[], + tie_samples=[], + sampling_interval=self.sampling_interval, + ), + self.dim, + ) + elif (stop_index - start_index) <= step_index: + tie_values = [self.get_value(start_index)] + tie_samples = [stop_index - start_index] + return self.__class__( + dict( + tie_values=tie_values, + tie_samples=tie_samples, + sampling_interval=self.sampling_interval, + ), + self.dim, + ) + else: + # keep tie values, number of samples and related tie indices contained in the slice + mask = (start_index < self.tie_indices) & (self.tie_indices <= stop_index) + tie_values = self.tie_values[mask] + tie_samples = self.tie_samples[mask] + tie_indices = self.tie_indices[mask] + + # insert the missing start value + start_value = self.get_value(start_index) + tie_values = np.concatenate([[start_value], self.tie_values[mask]]) + + # insert the missing start number of samples and adjust the end one + tie_samples = np.concatenate( + [[start_index - tie_indices[0]], tie_samples[mask]] + ) + tie_samples[-1] = stop_index - tie_indices[-1] + + # repack data and decimate if needed + data = { + "tie_values": tie_values, + "tie_samples": tie_samples, + "sampling_interval": self.sampling_interval, + } + coord = self.__class__(data, self.dim) + if step_index != 1: + coord = coord.decimate(step_index) + return coord + + def get_indexer(self, value, method=None): + if isinstance(value, str): + value = np.datetime64(value) + else: + value = np.asarray(value) + # Check that value lies within the coordinate value range (vectorized) + if np.any(value < self.start) or np.any(value > self.end): + raise KeyError("index not found") + if not is_strictly_increasing(self.tie_values): + raise ValueError("tie_values must be strictly increasing") + reference = np.searchsorted(self.tie_values, value) + offset = (value - self.tie_values[reference]) / self.sampling_interval + if method == "nearest": + offset = np.round(offset).astype(int) + elif method == "ffill": + offset = np.floor(offset).astype(int) + elif method == "bfill": + offset = np.ceil(offset).astype(int) + else: + raise ValueError("method must be one of 'nearest', 'ffill', or 'bfill'") + return self.tie_indices[reference] + offset + + def append(self, other): + if not isinstance(other, self.__class__): + raise TypeError(f"cannot append {type(other)} to {self.__class__}") + if not self.dim == other.dim: + raise ValueError("cannot append coordinate with different dimension") + if self.empty: + return other + if other.empty: + return self + if not self.dtype == other.dtype: + raise ValueError("cannot append coordinate with different dtype") + if not self.sampling_interval == other.sampling_interval: + raise ValueError( + "cannot append coordinate with different sampling intervals" + ) + tie_values = np.concatenate([self.tie_values, other.tie_values]) + tie_samples = np.concatenate([self.tie_samples, other.tie_samples + len(self)]) + return self.__class__( + { + "tie_values": tie_values, + "tie_samples": tie_samples, + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def decimate(self, q): + raise NotImplementedError("decimation is not implemented for SampledCoordinate") + + def simplify(self, tolerance=None): + raise NotImplementedError( + "simplification is not implemented for SampledCoordinate" + ) + + def get_discontinuities(self): + raise NotImplementedError( + "get_discontinuities is not implemented for SampledCoordinate" + ) + + def get_availabilities(self): + raise NotImplementedError( + "get_availabilities is not implemented for SampledCoordinate" + ) + + @classmethod + def from_array(cls, arr, dim=None, sampling_interval=None): + raise NotImplementedError("from_array is not implemented for SampledCoordinate") + + def to_dict(self): + tie_values = self.data["tie_values"] + tie_samples = self.data["tie_samples"] + if np.issubdtype(tie_values.dtype, np.datetime64): + tie_values = tie_values.astype(str) + data = { + "tie_values": tie_values.tolist(), + "tie_samples": tie_samples.tolist(), + "sampling_interval": self.sampling_interval, + } + return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + + def parse(data, dim=None): if isinstance(data, tuple): if dim is None: From f9e456f14461581f9d6873587500a86b4bbd49a5 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 6 Aug 2025 15:48:22 +0200 Subject: [PATCH 02/63] Fix wrong edit in InterpCoordinate. --- xdas/core/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 2073533..1f71202 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -765,12 +765,12 @@ def slice_index(self, index_slice): index_slice.step, ) if stop_index - start_index <= 0: - return self.__class__(dict(tie_indices=[], tie_values=[], dim=self.dim)) + return self.__class__(dict(tie_indices=[], tie_values=[]), dim=self.dim) elif (stop_index - start_index) <= step_index: tie_indices = [0] tie_values = [self.get_value(start_index)] return self.__class__( - dict(tie_indices=tie_indices, tie_values=tie_values, dim=self.dim) + dict(tie_indices=tie_indices, tie_values=tie_values), dim=self.dim ) else: end_index = stop_index - 1 From a70ad5acf0f3fef61ba8a7cb4fbc32a5da6e7d6a Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 08:33:23 +0100 Subject: [PATCH 03/63] rename tie_samples -> tie_lengths --- xdas/core/coordinates.py | 70 ++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 1f71202..66edd5c 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -980,43 +980,43 @@ def __new__(cls, *args, **kwargs): def __init__(self, data=None, dim=None, dtype=None): if data is None: - data = {"tie_values": [], "tie_samples": [], "sampling_interval": None} + data = {"tie_values": [], "tie_lengths": [], "sampling_interval": None} data, dim = parse(data, dim) if not self.__class__.isvalid(data): raise TypeError("`data` must be dict-like") - if not set(data) == {"tie_values", "tie_samples", "sampling_interval"}: + if not set(data) == {"tie_values", "tie_lengths", "sampling_interval"}: raise ValueError( - "keys `tie_values`, `tie_samples`, and `sampling_interval` must be provided" + "keys `tie_values`, `tie_lengths`, and `sampling_interval` must be provided" ) tie_values = np.asarray(data["tie_values"], dtype=dtype) - tie_samples = np.asarray(data["tie_samples"]) + tie_lengths = np.asarray(data["tie_lengths"]) sampling_interval = np.asarray(data["sampling_interval"]) if not tie_values.ndim == 1: raise ValueError("`tie_values` must be 1D") - if not tie_samples.ndim == 1: - raise ValueError("`tie_samples` must be 1D") - if not len(tie_values) == len(tie_samples): - raise ValueError("`tie_values` and `tie_samples` must have the same length") + if not tie_lengths.ndim == 1: + raise ValueError("`tie_lengths` must be 1D") + if not len(tie_values) == len(tie_lengths): + raise ValueError("`tie_values` and `tie_lengths` must have the same length") if not ( np.issubdtype(tie_values.dtype, np.number) or np.issubdtype(tie_values.dtype, np.datetime64) ): raise ValueError("`tie_values` must have either numeric or datetime dtype") if not self.empty: - if not np.issubdtype(tie_samples.dtype, np.integer): - raise ValueError("`tie_samples` must be integer-like") - if not np.all(tie_samples > 0): - raise ValueError("`tie_samples` must be positive integers") + if not np.issubdtype(tie_lengths.dtype, np.integer): + raise ValueError("`tie_lengths` must be integer-like") + if not np.all(tie_lengths > 0): + raise ValueError("`tie_lengths` must be positive integers") if not np.isscalar(sampling_interval): raise ValueError("`sampling_interval` must be a scalar value") if np.issubdtype(sampling_interval.dtype, np.datetime64): sampling_interval = sampling_interval.astype("timedelta64[ns]") else: sampling_interval = np.asarray(sampling_interval).astype(dtype) - tie_samples = tie_samples.astype(int) + tie_lengths = tie_lengths.astype(int) self.data = dict( tie_values=tie_values, - tie_samples=tie_samples, + tie_lengths=tie_lengths, sampling_interval=sampling_interval, ) self.dim = dim @@ -1026,7 +1026,7 @@ def isvalid(data): match data: case { "tie_values": _, - "tie_samples": _, + "tie_lengths": _, "sampling_interval": _, }: return True @@ -1037,7 +1037,7 @@ def __len__(self): if self.empty: return 0 else: - return sum(self.tie_samples) + return sum(self.tie_lengths) def __repr__(self): if self.empty: @@ -1066,7 +1066,7 @@ def __add__(self, other): return self.__class__( { "tie_values": self.tie_values + other, - "tie_samples": self.tie_samples, + "tie_lengths": self.tie_lengths, "sampling_interval": self.sampling_interval, }, self.dim, @@ -1076,7 +1076,7 @@ def __sub__(self, other): return self.__class__( { "tie_values": self.tie_values - other, - "tie_samples": self.tie_samples, + "tie_lengths": self.tie_lengths, "sampling_interval": self.sampling_interval, }, self.dim, @@ -1099,8 +1099,8 @@ def tie_values(self): return self.data["tie_values"] @property - def tie_samples(self): - return self.data["tie_samples"] + def tie_lengths(self): + return self.data["tie_lengths"] @property def sampling_interval(self): @@ -1108,7 +1108,7 @@ def sampling_interval(self): @property def tie_indices(self): - return np.concatenate(([0], np.cumsum(self.tie_samples[:-1]))) + return np.concatenate(([0], np.cumsum(self.tie_lengths[:-1]))) @property def empty(self): @@ -1146,12 +1146,12 @@ def start(self): @property def end(self): - return self.tie_values[-1] + self.sampling_interval * self.tie_samples[-1] + return self.tie_values[-1] + self.sampling_interval * self.tie_lengths[-1] def equals(self, other): return ( np.array_equal(self.tie_values, other.tie_values) - and np.array_equal(self.tie_samples, other.tie_samples) + and np.array_equal(self.tie_lengths, other.tie_lengths) and self.sampling_interval == other.sampling_interval and self.dim == other.dim and self.dtype == other.dtype @@ -1177,18 +1177,18 @@ def slice_index(self, index_slice): return self.__class__( dict( tie_values=[], - tie_samples=[], + tie_lengths=[], sampling_interval=self.sampling_interval, ), self.dim, ) elif (stop_index - start_index) <= step_index: tie_values = [self.get_value(start_index)] - tie_samples = [stop_index - start_index] + tie_lengths = [stop_index - start_index] return self.__class__( dict( tie_values=tie_values, - tie_samples=tie_samples, + tie_lengths=tie_lengths, sampling_interval=self.sampling_interval, ), self.dim, @@ -1197,7 +1197,7 @@ def slice_index(self, index_slice): # keep tie values, number of samples and related tie indices contained in the slice mask = (start_index < self.tie_indices) & (self.tie_indices <= stop_index) tie_values = self.tie_values[mask] - tie_samples = self.tie_samples[mask] + tie_lengths = self.tie_lengths[mask] tie_indices = self.tie_indices[mask] # insert the missing start value @@ -1205,15 +1205,15 @@ def slice_index(self, index_slice): tie_values = np.concatenate([[start_value], self.tie_values[mask]]) # insert the missing start number of samples and adjust the end one - tie_samples = np.concatenate( - [[start_index - tie_indices[0]], tie_samples[mask]] + tie_lengths = np.concatenate( + [[start_index - tie_indices[0]], tie_lengths[mask]] ) - tie_samples[-1] = stop_index - tie_indices[-1] + tie_lengths[-1] = stop_index - tie_indices[-1] # repack data and decimate if needed data = { "tie_values": tie_values, - "tie_samples": tie_samples, + "tie_lengths": tie_lengths, "sampling_interval": self.sampling_interval, } coord = self.__class__(data, self.dim) @@ -1259,11 +1259,11 @@ def append(self, other): "cannot append coordinate with different sampling intervals" ) tie_values = np.concatenate([self.tie_values, other.tie_values]) - tie_samples = np.concatenate([self.tie_samples, other.tie_samples + len(self)]) + tie_lengths = np.concatenate([self.tie_lengths, other.tie_lengths + len(self)]) return self.__class__( { "tie_values": tie_values, - "tie_samples": tie_samples, + "tie_lengths": tie_lengths, "sampling_interval": self.sampling_interval, }, self.dim, @@ -1293,12 +1293,12 @@ def from_array(cls, arr, dim=None, sampling_interval=None): def to_dict(self): tie_values = self.data["tie_values"] - tie_samples = self.data["tie_samples"] + tie_lengths = self.data["tie_lengths"] if np.issubdtype(tie_values.dtype, np.datetime64): tie_values = tie_values.astype(str) data = { "tie_values": tie_values.tolist(), - "tie_samples": tie_samples.tolist(), + "tie_lengths": tie_lengths.tolist(), "sampling_interval": self.sampling_interval, } return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} From e0147e8ac0d696415a5cd6fee2e3544ec7a5950e Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 09:28:02 +0100 Subject: [PATCH 04/63] Improve SampleCoordinate __ini__ parsing. --- xdas/core/coordinates.py | 43 +++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index e345729..2a89406 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1034,41 +1034,52 @@ def __new__(cls, *args, **kwargs): return object.__new__(cls) def __init__(self, data=None, dim=None, dtype=None): + # empty if data is None: data = {"tie_values": [], "tie_lengths": [], "sampling_interval": None} + empty = True + else: + empty = False + + # parse data data, dim = parse(data, dim) if not self.__class__.isvalid(data): - raise TypeError("`data` must be dict-like") - if not set(data) == {"tie_values", "tie_lengths", "sampling_interval"}: - raise ValueError( - "keys `tie_values`, `tie_lengths`, and `sampling_interval` must be provided" + raise TypeError( + "`data` must be dict-like and contain `tie_values`, `tie_lengths`, and " + "`sampling_interval`" ) tie_values = np.asarray(data["tie_values"], dtype=dtype) tie_lengths = np.asarray(data["tie_lengths"]) sampling_interval = np.asarray(data["sampling_interval"]) + + # check shapes if not tie_values.ndim == 1: raise ValueError("`tie_values` must be 1D") if not tie_lengths.ndim == 1: raise ValueError("`tie_lengths` must be 1D") if not len(tie_values) == len(tie_lengths): raise ValueError("`tie_values` and `tie_lengths` must have the same length") - if not ( - np.issubdtype(tie_values.dtype, np.number) - or np.issubdtype(tie_values.dtype, np.datetime64) - ): - raise ValueError("`tie_values` must have either numeric or datetime dtype") - if not self.empty: + + # check dtypes + if not empty: + if not ( + np.issubdtype(tie_values.dtype, np.number) + or np.issubdtype(tie_values.dtype, np.datetime64) + ): + raise ValueError( + "`tie_values` must have either numeric or datetime dtype" + ) if not np.issubdtype(tie_lengths.dtype, np.integer): raise ValueError("`tie_lengths` must be integer-like") if not np.all(tie_lengths > 0): - raise ValueError("`tie_lengths` must be positive integers") + raise ValueError("`tie_lengths` must be strictly positive integers") if not np.isscalar(sampling_interval): raise ValueError("`sampling_interval` must be a scalar value") - if np.issubdtype(sampling_interval.dtype, np.datetime64): - sampling_interval = sampling_interval.astype("timedelta64[ns]") - else: - sampling_interval = np.asarray(sampling_interval).astype(dtype) - tie_lengths = tie_lengths.astype(int) + if np.issubdtype(tie_values.dtype, np.datetime64): + if not np.issubdtype(sampling_interval.dtype, np.timedelta64): + raise ValueError( + "`sampling_interval` must be timedelta64 for datetime64 `tie_values`" + ) self.data = dict( tie_values=tie_values, tie_lengths=tie_lengths, From 8e8ff55737db2a1dc3757ef5b747ebff90ea2f4f Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 09:28:41 +0100 Subject: [PATCH 05/63] add get_sampling_interval suppor SampleCoordinate. --- xdas/core/coordinates.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 2a89406..652f0a5 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1413,6 +1413,8 @@ def get_sampling_interval(da, dim, cast=True): num = num[mask] den = den[mask] d = np.median(num / den) + elif isinstance(coord, SampledCoordinate): + d = coord.sampling_interval else: d = (coord[-1].values - coord[0].values) / (len(coord) - 1) d = np.asarray(d) From edead467726675a3e589914cb1abcdbb45b63b8c Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 10:05:48 +0100 Subject: [PATCH 06/63] Refactor Coordinate to_netcdf. --- xdas/core/coordinates.py | 31 ++++++++++++++++++++++ xdas/core/dataarray.py | 55 ++++++++++++++-------------------------- 2 files changed, 50 insertions(+), 36 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 652f0a5..57f35d9 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -362,6 +362,12 @@ def to_dataarray(self): def to_dict(self): raise NotImplementedError + def to_netcdf(self, ds, attrs): + ds = ds.assign_coords( + {self.name: (self.dim, self.values) if self.dim else self.values} + ) + return ds, attrs + @classmethod def from_dict(cls, dct): return cls(**dct) @@ -1015,6 +1021,31 @@ def to_dict(self): } return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + def to_netcdf(self, ds, attrs): + mapping = f"{self.name}: {self.name}_indices {self.name}_values" + if "coordinate_interpolation" in attrs: + attrs["coordinate_interpolation"] += " " + mapping + else: + attrs["coordinate_interpolation"] = mapping + tie_indices = self.tie_indices + tie_values = ( + self.tie_values.astype("M8[ns]") + if np.issubdtype(self.tie_values.dtype, np.datetime64) + else self.tie_values + ) + interp_attrs = { + "interpolation_name": "linear", + "tie_points_mapping": f"{self.name}_points: {self.name}_indices {self.name}_values", + } + ds.update( + { + f"{self.name}_interpolation": ((), np.nan, interp_attrs), + f"{self.name}_indices": (f"{self.name}_points", tie_indices), + f"{self.name}_values": (f"{self.name}_points", tie_values), + } + ) + return ds, attrs + class SampledCoordinate(Coordinate): """ diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index 2dd762a..b63e759 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -874,36 +874,17 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): """ if virtual is None: virtual = isinstance(self.data, (VirtualArray, DaskArray)) + + # initialize ds = xr.Dataset(attrs={"Conventions": "CF-1.9"}) - mappings = [] - for name, coord in self.coords.items(): - if coord.isinterp(): - mappings.append(f"{name}: {name}_indices {name}_values") - tie_indices = coord.tie_indices - tie_values = ( - coord.tie_values.astype("M8[ns]") - if np.issubdtype(coord.tie_values.dtype, np.datetime64) - else coord.tie_values - ) - attrs = { - "interpolation_name": "linear", - "tie_points_mapping": f"{name}_points: {name}_indices {name}_values", - } - ds.update( - { - f"{name}_interpolation": ((), np.nan, attrs), - f"{name}_indices": (f"{name}_points", tie_indices), - f"{name}_values": (f"{name}_points", tie_values), - } - ) - else: - ds = ds.assign_coords( - {name: (coord.dim, coord.values) if coord.dim else coord.values} - ) - mapping = " ".join(mappings) - attrs = {} if self.attrs is None else self.attrs - attrs |= {"coordinate_interpolation": mapping} if mapping else attrs - name = "__values__" if self.name is None else self.name + variable_attrs = {} if self.attrs is None else self.attrs + variable_name = "__values__" if self.name is None else self.name + + # prepare metadata + for coord in self.coords.values(): + ds, variable_attrs = coord.to_netcdf(ds, variable_attrs) + + # write data with h5netcdf.File(fname, mode=mode) as file: if group is not None and group not in file: file.create_group(group) @@ -912,7 +893,7 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): if not virtual: encoding = {} if encoding is None else encoding variable = file.create_variable( - name, + variable_name, self.dims, self.dtype, data=self.values, @@ -922,15 +903,15 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): if encoding is not None: raise ValueError("cannot use `encoding` with in virtual mode") if isinstance(self.data, VirtualArray): - self.data.to_dataset(file._h5group, name) - variable = file._variable_cls(file, name, self.dims) - file._variables[name] = variable + self.data.to_dataset(file._h5group, variable_name) + variable = file._variable_cls(file, variable_name, self.dims) + file._variables[variable_name] = variable variable._attach_dim_scales() variable._attach_coords() variable._ensure_dim_id() elif isinstance(self.data, DaskArray): variable = file.create_variable( - name, + variable_name, self.dims, self.dtype, ) @@ -941,8 +922,10 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): raise ValueError( "can only use `virtual=True` with a virtual array as data" ) - if attrs: - variable.attrs.update(attrs) + if variable_attrs: + variable.attrs.update(variable_attrs) + + # add metadata ds.to_netcdf(fname, mode="a", group=group, engine="h5netcdf") @classmethod From dbf3d2ebf3c239f68b51906d3e636e5ae37dbe8f Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 11:00:55 +0100 Subject: [PATCH 07/63] Refactor DataArray.to_netcdf: extract create_variable for virtual backend. --- xdas/core/dataarray.py | 29 ++++++++++++++--------------- xdas/dask/__init__.py | 2 +- xdas/dask/core.py | 7 +++++++ xdas/virtual.py | 9 +++++++++ 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index b63e759..d2857bd 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -12,7 +12,7 @@ from dask.array import Array as DaskArray from numpy.lib.mixins import NDArrayOperatorsMixin -from ..dask.core import dumps, from_dict, loads, to_dict +from ..dask.core import create_variable, from_dict, loads, to_dict from ..virtual import VirtualArray, VirtualSource, _to_human from .coordinates import Coordinate, Coordinates, get_sampling_interval @@ -886,10 +886,15 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): # write data with h5netcdf.File(fname, mode=mode) as file: + # group if group is not None and group not in file: file.create_group(group) file = file if group is None else file[group] + + # dims file.dimensions.update(self.sizes) + + # variable if not virtual: encoding = {} if encoding is None else encoding variable = file.create_variable( @@ -903,29 +908,23 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): if encoding is not None: raise ValueError("cannot use `encoding` with in virtual mode") if isinstance(self.data, VirtualArray): - self.data.to_dataset(file._h5group, variable_name) - variable = file._variable_cls(file, variable_name, self.dims) - file._variables[variable_name] = variable - variable._attach_dim_scales() - variable._attach_coords() - variable._ensure_dim_id() - elif isinstance(self.data, DaskArray): - variable = file.create_variable( - variable_name, - self.dims, - self.dtype, + variable = self.data.create_variable( + file, variable_name, self.dims, self.dtype ) - variable.attrs.update( - {"__dask_array__": np.frombuffer(dumps(self.data), "uint8")} + elif isinstance(self.data, DaskArray): + variable = create_variable( + self.data, file, variable_name, self.dims, self.dtype ) else: raise ValueError( "can only use `virtual=True` with a virtual array as data" ) + + # attrs if variable_attrs: variable.attrs.update(variable_attrs) - # add metadata + # write metadata ds.to_netcdf(fname, mode="a", group=group, engine="h5netcdf") @classmethod diff --git a/xdas/dask/__init__.py b/xdas/dask/__init__.py index 60e4af0..4f612c9 100644 --- a/xdas/dask/__init__.py +++ b/xdas/dask/__init__.py @@ -1 +1 @@ -from .core import dumps, loads +from .core import create_variable, dumps, loads diff --git a/xdas/dask/core.py b/xdas/dask/core.py index 2397223..911145a 100644 --- a/xdas/dask/core.py +++ b/xdas/dask/core.py @@ -1,8 +1,15 @@ +import numpy as np from dask.array import Array from . import serial +def create_variable(arr, file, name, dims=None, dtype=None): + variable = file.create_variable(name, dims, dtype) + variable.attrs.update({"__dask_array__": np.frombuffer(dumps(arr), "uint8")}) + return variable + + def dumps(arr): """Serialize a dask array.""" return serial.dumps(to_dict(arr)) diff --git a/xdas/virtual.py b/xdas/virtual.py index e5ce9aa..92aba6b 100644 --- a/xdas/virtual.py +++ b/xdas/virtual.py @@ -49,6 +49,15 @@ def nbytes(self): else: return 0 + def create_variable(self, file, name, dims=None, dtype=None): + self.to_dataset(file._h5group, name) + variable = file._variable_cls(file, name, dims) + file._variables[name] = variable + variable._attach_dim_scales() + variable._attach_coords() + variable._ensure_dim_id() + return variable + class VirtualStack(VirtualArray): def __init__(self, sources=[], axis=0): From 32f7edfafa896756b24941067dd2bdf1ee04693b Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 11:49:18 +0100 Subject: [PATCH 08/63] Refactor DataArray.from_netcdf. --- xdas/core/dataarray.py | 64 +++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index d2857bd..4be0f54 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -944,55 +944,49 @@ def from_netcdf(cls, fname, group=None): DataArray The openend data array. """ + # read metadata with xr.open_dataset(fname, group=group, engine="h5netcdf") as ds: + # check file format if not ("Conventions" in ds.attrs and "CF" in ds.attrs["Conventions"]): raise TypeError( "file format not recognized. please provide the file format " "with the `engine` keyword argument" ) + + # identify the "main" data array if len(ds) == 1: name, da = next(iter(ds.items())) - coords = { - name: ( - ( - coord.dims[0], - ( - coord.values.astype("U") - if coord.dtype == np.dtype("O") - else coord.values - ), - ) - if coord.dims - else coord.values - ) - for name, coord in da.coords.items() - } else: - data_vars = [ - var - for var in ds.values() - if "coordinate_interpolation" in var.attrs - ] + data_vars = { + name: var + for name, var in ds.items() + if any("coordinate" in attr for attr in var.attrs) + } if len(data_vars) == 1: - da = data_vars[0] + name, da = next(iter(data_vars.items())) else: raise ValueError("several possible data arrays detected") - coords = { - name: ( + + # read regular coordinates + coords = { + name: ( + ( + coord.dims[0], ( - coord.dims[0], - ( - coord.values.astype("U") - if coord.dtype == np.dtype("O") - else coord.values - ), - ) - if coord.dims - else coord.values + coord.values.astype("U") + if coord.dtype == np.dtype("O") + else coord.values + ), ) - for name, coord in da.coords.items() - } - mapping = da.attrs.pop("coordinate_interpolation") + if coord.dims + else coord.values + ) + for name, coord in da.coords.items() + } + + # read advanced coordinates + mapping = da.attrs.pop("coordinate_interpolation", None) + if mapping is not None: matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) for match in matches: dim, indices, values = match From fb9ee294467fe2f3f88f1950aa2233329752e109 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 14:06:50 +0100 Subject: [PATCH 09/63] Refactor: use __subclasses__ when iterating on possible type of Coordinate. --- xdas/core/coordinates.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 57f35d9..ec864fa 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -250,14 +250,10 @@ def __new__(cls, data=None, dim=None, dtype=None): if data is None: raise TypeError("cannot infer coordinate type if no `data` is provided") data, dim = parse(data, dim) - if ScalarCoordinate.isvalid(data): - return object.__new__(ScalarCoordinate) - elif DenseCoordinate.isvalid(data): - return object.__new__(DenseCoordinate) - elif InterpCoordinate.isvalid(data): - return object.__new__(InterpCoordinate) - else: - raise TypeError("could not parse `data`") + for subcls in cls.__subclasses__(): + if subcls.isvalid(data): + return object.__new__(subcls) + raise TypeError("could not parse `data`") def __getitem__(self, item): data = self.data.__getitem__(item) From a9048a7ee5f8cfb0b17eebae06f5342255caca84 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 14:31:44 +0100 Subject: [PATCH 10/63] Refactoring + Coordinate.from_dataset --- xdas/core/coordinates.py | 197 ++++++++++++++++++++++++++------------- xdas/core/dataarray.py | 12 +-- 2 files changed, 136 insertions(+), 73 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index ec864fa..858b88f 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1,5 +1,6 @@ from copy import copy, deepcopy from functools import wraps +import re import numpy as np import pandas as pd @@ -212,6 +213,10 @@ def from_dict(cls, dct): dct["dims"], ) + @classmethod + def from_dataset(cls, ds, name): + return Coordinate.from_dataset(ds, name) + def copy(self, deep=True): if deep: func = deepcopy @@ -268,6 +273,9 @@ def __len__(self): def __repr__(self): return np.array2string(self.data, threshold=0, edgeitems=1) + def __reduce__(self): + return self.__class__, (self.data, self.dim), {"_parent": self.parent} + def __add__(self, other): return self.__class__(self.data + other, self.dim) @@ -310,6 +318,22 @@ def values(self): def empty(self): return len(self) == 0 + @property + def parent(self): + return getattr(self, "_parent", None) + + @property + def name(self): + if self.parent is None: + return self.dim + return next((name for name in self.parent if self.parent[name] is self), None) + + def isdim(self): + if self.parent is None or self.name is None: + return None + else: + return self.parent.isdim(self.name) + def equals(self, other): ... def to_index(self, item, method=None, endpoint=True): @@ -318,56 +342,6 @@ def to_index(self, item, method=None, endpoint=True): else: return self.get_indexer(item, method) - def isscalar(self): - return isinstance(self, ScalarCoordinate) - - def isdense(self): - return isinstance(self, DenseCoordinate) - - def isinterp(self): - return isinstance(self, InterpCoordinate) - - def append(self, other): - raise NotImplementedError(f"append is not implemented for {self.__class__}") - - def to_dataarray(self): - from .dataarray import DataArray # TODO: avoid defered import? - - if self.name is None: - raise ValueError("cannot convert unnamed coordinate to DataArray") - - if self.parent is None: - return DataArray( - self.values, - {self.dim: self}, - dims=[self.dim], - name=self.name, - ) - else: - return DataArray( - self.values, - { - name: coord - for name, coord in self.parent.items() - if coord.dim == self.dim - }, - dims=[self.dim], - name=self.name, - ) - - def to_dict(self): - raise NotImplementedError - - def to_netcdf(self, ds, attrs): - ds = ds.assign_coords( - {self.name: (self.dim, self.values) if self.dim else self.values} - ) - return ds, attrs - - @classmethod - def from_dict(cls, dct): - return cls(**dct) - def format_index(self, idx, bounds="raise"): idx = np.asarray(idx) if not np.issubdtype(idx.dtype, np.integer): @@ -420,24 +394,63 @@ def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): stop_index -= 1 return slice(start_index, stop_index) - def __reduce__(self): - return self.__class__, (self.data, self.dim), {"_parent": self.parent} + def isscalar(self): + return isinstance(self, ScalarCoordinate) - @property - def parent(self): - return getattr(self, "_parent", None) + def isdense(self): + return isinstance(self, DenseCoordinate) - @property - def name(self): - if self.parent is None: - return self.dim - return next((name for name in self.parent if self.parent[name] is self), None) + def isinterp(self): + return isinstance(self, InterpCoordinate) - def isdim(self): - if self.parent is None or self.name is None: - return None + def append(self, other): + raise NotImplementedError(f"append is not implemented for {self.__class__}") + + def to_dataarray(self): + from .dataarray import DataArray # TODO: avoid defered import? + + if self.name is None: + raise ValueError("cannot convert unnamed coordinate to DataArray") + + if self.parent is None: + return DataArray( + self.values, + {self.dim: self}, + dims=[self.dim], + name=self.name, + ) else: - return self.parent.isdim(self.name) + return DataArray( + self.values, + { + name: coord + for name, coord in self.parent.items() + if coord.dim == self.dim + }, + dims=[self.dim], + name=self.name, + ) + + def to_dict(self): + raise NotImplementedError + + @classmethod + def from_dict(cls, dct): + return cls(**dct) + + def to_dataset(self, ds, attrs): + ds = ds.assign_coords( + {self.name: (self.dim, self.values) if self.dim else self.values} + ) + return ds, attrs + + @classmethod + def from_dataset(cls, ds, name): + coords = {} + for subcls in cls.__subclasses__(): + if hasattr(subcls, "from_dataset"): + coords |= subcls.from_dataset(ds, name) + return coords class ScalarCoordinate(Coordinate): @@ -1017,7 +1030,7 @@ def to_dict(self): } return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - def to_netcdf(self, ds, attrs): + def to_dataset(self, ds, attrs): mapping = f"{self.name}: {self.name}_indices {self.name}_values" if "coordinate_interpolation" in attrs: attrs["coordinate_interpolation"] += " " + mapping @@ -1042,6 +1055,18 @@ def to_netcdf(self, ds, attrs): ) return ds, attrs + @classmethod + def from_dataset(cls, ds, name): + coords = {} + mapping = ds[name].attrs.pop("coordinate_interpolation", None) + if mapping is not None: + matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) + for match in matches: + dim, indices, values = match + data = {"tie_indices": ds[indices], "tie_values": ds[values]} + coords[dim] = Coordinate(data, dim) + return coords + class SampledCoordinate(Coordinate): """ @@ -1396,6 +1421,48 @@ def to_dict(self): } return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + def to_dataset(self, ds, attrs): + mapping = f"{self.name}: {self.name}_values {self.name}_lengths" + if "coordinate_sampling" in attrs: + attrs["coordinate_sampling"] += " " + mapping + else: + attrs["coordinate_sampling"] = mapping + tie_values = ( + self.tie_values.astype("M8[ns]") + if np.issubdtype(self.tie_values.dtype, np.datetime64) + else self.tie_values + ) + tie_lengths = self.tie_lengths + interp_attrs = { + "sampling_interval": self.sampling_interval, + "tie_points_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", + } + ds.update( + { + f"{self.name}_sampling": ((), np.nan, interp_attrs), + f"{self.name}_values": (f"{self.name}_points", tie_values), + f"{self.name}_lengths": (f"{self.name}_points", tie_lengths), + } + ) + return ds, attrs + + @classmethod + def from_dataset(cls, dataset, name): + coords = {} + mapping = dataset[name].attrs.pop("coordinate_sampling", None) + if mapping is not None: + matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) + for match in matches: + dim, values, lengths = match + sampling_interval = ... + data = { + "tie_values": dataset[values], + "tie_lengths": dataset[lengths], + "sampling_interval": sampling_interval, + } + coords[dim] = Coordinate(data, dim) + return coords + def parse(data, dim=None): if isinstance(data, tuple): diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index 4be0f54..11e4bbb 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -882,7 +882,7 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): # prepare metadata for coord in self.coords.values(): - ds, variable_attrs = coord.to_netcdf(ds, variable_attrs) + ds, variable_attrs = coord.to_dataset(ds, variable_attrs) # write data with h5netcdf.File(fname, mode=mode) as file: @@ -985,13 +985,9 @@ def from_netcdf(cls, fname, group=None): } # read advanced coordinates - mapping = da.attrs.pop("coordinate_interpolation", None) - if mapping is not None: - matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) - for match in matches: - dim, indices, values = match - data = {"tie_indices": ds[indices], "tie_values": ds[values]} - coords[dim] = Coordinate(data, dim) + coords |= Coordinates.from_dataset(ds, name) + + # read data with h5py.File(fname) as file: if group: file = file[group] From c908381c4c4c871caa0f88c9555a8f96282dad67 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 14:34:56 +0100 Subject: [PATCH 11/63] Refactor: treat regular and advance coordinates with the same API. --- xdas/core/coordinates.py | 18 ++++++++++++++++++ xdas/core/dataarray.py | 21 ++------------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 858b88f..8277896 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -645,6 +645,24 @@ def to_dict(self): data = self.data.tolist() return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + @classmethod + def from_dataset(cls, ds, name): + return { + name: ( + ( + coord.dims[0], + ( + coord.values.astype("U") + if coord.dtype == np.dtype("O") + else coord.values + ), + ) + if coord.dims + else coord.values + ) + for name, coord in ds[name].coords.items() + } + class InterpCoordinate(Coordinate): """ diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index 11e4bbb..c5ca93c 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -967,25 +967,8 @@ def from_netcdf(cls, fname, group=None): else: raise ValueError("several possible data arrays detected") - # read regular coordinates - coords = { - name: ( - ( - coord.dims[0], - ( - coord.values.astype("U") - if coord.dtype == np.dtype("O") - else coord.values - ), - ) - if coord.dims - else coord.values - ) - for name, coord in da.coords.items() - } - - # read advanced coordinates - coords |= Coordinates.from_dataset(ds, name) + # read coordinates + coords = Coordinates.from_dataset(ds, name) # read data with h5py.File(fname) as file: From c2104a737dfa6cc87a5e9b735649286f8bf719d5 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:03:50 +0100 Subject: [PATCH 12/63] Refactor: in DataArray.from_netcdf, cleaner data retrieval. --- xdas/core/dataarray.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index c5ca93c..f9a7cf4 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -955,15 +955,15 @@ def from_netcdf(cls, fname, group=None): # identify the "main" data array if len(ds) == 1: - name, da = next(iter(ds.items())) + name = next(iter(ds.keys())) else: data_vars = { - name: var - for name, var in ds.items() + key: var + for key, var in ds.items() if any("coordinate" in attr for attr in var.attrs) } if len(data_vars) == 1: - name, da = next(iter(data_vars.items())) + name = next(iter(data_vars.keys())) else: raise ValueError("several possible data arrays detected") @@ -971,16 +971,23 @@ def from_netcdf(cls, fname, group=None): coords = Coordinates.from_dataset(ds, name) # read data - with h5py.File(fname) as file: - if group: - file = file[group] - name = "__values__" if da.name is None else da.name - variable = file[name] - if "__dask_array__" in variable.attrs: - data = loads(da.attrs.pop("__dask_array__")) - else: - data = VirtualSource(file[name]) - return cls(data, coords, da.dims, da.name, None if da.attrs == {} else da.attrs) + if "__dask_array__" in ds[name].attrs: + data = loads(ds[name].attrs.pop("__dask_array__")) + else: + with h5py.File(fname) as file: + if group: + file = file[group] + variable = file["__values__" if name is None else name] + data = VirtualSource(variable) + + # pack everything + return cls( + data, + coords, + ds[name].dims, + name, + None if ds[name].attrs == {} else ds[name].attrs, + ) def to_dict(self): """Convert the DataArray to a dictionary.""" From 0dfa3fc5a3adfe3ecd58b3d89cae0452f5369dee Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:07:13 +0100 Subject: [PATCH 13/63] Rename ds -> dataset. --- xdas/core/coordinates.py | 36 ++++++++++++++++++------------------ xdas/core/dataarray.py | 28 +++++++++++++++------------- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 8277896..c3af298 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -214,8 +214,8 @@ def from_dict(cls, dct): ) @classmethod - def from_dataset(cls, ds, name): - return Coordinate.from_dataset(ds, name) + def from_dataset(cls, dataset, name): + return Coordinate.from_dataset(dataset, name) def copy(self, deep=True): if deep: @@ -438,18 +438,18 @@ def to_dict(self): def from_dict(cls, dct): return cls(**dct) - def to_dataset(self, ds, attrs): - ds = ds.assign_coords( + def to_dataset(self, dataset, attrs): + dataset = dataset.assign_coords( {self.name: (self.dim, self.values) if self.dim else self.values} ) - return ds, attrs + return dataset, attrs @classmethod - def from_dataset(cls, ds, name): + def from_dataset(cls, dataset, name): coords = {} for subcls in cls.__subclasses__(): if hasattr(subcls, "from_dataset"): - coords |= subcls.from_dataset(ds, name) + coords |= subcls.from_dataset(dataset, name) return coords @@ -646,7 +646,7 @@ def to_dict(self): return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} @classmethod - def from_dataset(cls, ds, name): + def from_dataset(cls, dataset, name): return { name: ( ( @@ -660,7 +660,7 @@ def from_dataset(cls, ds, name): if coord.dims else coord.values ) - for name, coord in ds[name].coords.items() + for name, coord in dataset[name].coords.items() } @@ -1048,7 +1048,7 @@ def to_dict(self): } return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - def to_dataset(self, ds, attrs): + def to_dataset(self, dataset, attrs): mapping = f"{self.name}: {self.name}_indices {self.name}_values" if "coordinate_interpolation" in attrs: attrs["coordinate_interpolation"] += " " + mapping @@ -1064,24 +1064,24 @@ def to_dataset(self, ds, attrs): "interpolation_name": "linear", "tie_points_mapping": f"{self.name}_points: {self.name}_indices {self.name}_values", } - ds.update( + dataset.update( { f"{self.name}_interpolation": ((), np.nan, interp_attrs), f"{self.name}_indices": (f"{self.name}_points", tie_indices), f"{self.name}_values": (f"{self.name}_points", tie_values), } ) - return ds, attrs + return dataset, attrs @classmethod - def from_dataset(cls, ds, name): + def from_dataset(cls, dataset, name): coords = {} - mapping = ds[name].attrs.pop("coordinate_interpolation", None) + mapping = dataset[name].attrs.pop("coordinate_interpolation", None) if mapping is not None: matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) for match in matches: dim, indices, values = match - data = {"tie_indices": ds[indices], "tie_values": ds[values]} + data = {"tie_indices": dataset[indices], "tie_values": dataset[values]} coords[dim] = Coordinate(data, dim) return coords @@ -1439,7 +1439,7 @@ def to_dict(self): } return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - def to_dataset(self, ds, attrs): + def to_dataset(self, dataset, attrs): mapping = f"{self.name}: {self.name}_values {self.name}_lengths" if "coordinate_sampling" in attrs: attrs["coordinate_sampling"] += " " + mapping @@ -1455,14 +1455,14 @@ def to_dataset(self, ds, attrs): "sampling_interval": self.sampling_interval, "tie_points_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", } - ds.update( + dataset.update( { f"{self.name}_sampling": ((), np.nan, interp_attrs), f"{self.name}_values": (f"{self.name}_points", tie_values), f"{self.name}_lengths": (f"{self.name}_points", tie_lengths), } ) - return ds, attrs + return dataset, attrs @classmethod def from_dataset(cls, dataset, name): diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index f9a7cf4..e3ba7a9 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -876,13 +876,13 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): virtual = isinstance(self.data, (VirtualArray, DaskArray)) # initialize - ds = xr.Dataset(attrs={"Conventions": "CF-1.9"}) + dataset = xr.Dataset(attrs={"Conventions": "CF-1.9"}) variable_attrs = {} if self.attrs is None else self.attrs variable_name = "__values__" if self.name is None else self.name # prepare metadata for coord in self.coords.values(): - ds, variable_attrs = coord.to_dataset(ds, variable_attrs) + dataset, variable_attrs = coord.to_dataset(dataset, variable_attrs) # write data with h5netcdf.File(fname, mode=mode) as file: @@ -925,7 +925,7 @@ def to_netcdf(self, fname, mode="w", group=None, virtual=None, encoding=None): variable.attrs.update(variable_attrs) # write metadata - ds.to_netcdf(fname, mode="a", group=group, engine="h5netcdf") + dataset.to_netcdf(fname, mode="a", group=group, engine="h5netcdf") @classmethod def from_netcdf(cls, fname, group=None): @@ -945,21 +945,23 @@ def from_netcdf(cls, fname, group=None): The openend data array. """ # read metadata - with xr.open_dataset(fname, group=group, engine="h5netcdf") as ds: + with xr.open_dataset(fname, group=group, engine="h5netcdf") as dataset: # check file format - if not ("Conventions" in ds.attrs and "CF" in ds.attrs["Conventions"]): + if not ( + "Conventions" in dataset.attrs and "CF" in dataset.attrs["Conventions"] + ): raise TypeError( "file format not recognized. please provide the file format " "with the `engine` keyword argument" ) # identify the "main" data array - if len(ds) == 1: - name = next(iter(ds.keys())) + if len(dataset) == 1: + name = next(iter(dataset.keys())) else: data_vars = { key: var - for key, var in ds.items() + for key, var in dataset.items() if any("coordinate" in attr for attr in var.attrs) } if len(data_vars) == 1: @@ -968,11 +970,11 @@ def from_netcdf(cls, fname, group=None): raise ValueError("several possible data arrays detected") # read coordinates - coords = Coordinates.from_dataset(ds, name) + coords = Coordinates.from_dataset(dataset, name) # read data - if "__dask_array__" in ds[name].attrs: - data = loads(ds[name].attrs.pop("__dask_array__")) + if "__dask_array__" in dataset[name].attrs: + data = loads(dataset[name].attrs.pop("__dask_array__")) else: with h5py.File(fname) as file: if group: @@ -984,9 +986,9 @@ def from_netcdf(cls, fname, group=None): return cls( data, coords, - ds[name].dims, + dataset[name].dims, name, - None if ds[name].attrs == {} else ds[name].attrs, + None if dataset[name].attrs == {} else dataset[name].attrs, ) def to_dict(self): From 5ca84454a1c3945ed917ed039cd1bf4003aec80b Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:47:18 +0100 Subject: [PATCH 14/63] Implement decimation method in SampledCoordinate class --- xdas/core/coordinates.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index c3af298..6fee083 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1406,7 +1406,14 @@ def append(self, other): ) def decimate(self, q): - raise NotImplementedError("decimation is not implemented for SampledCoordinate") + return self.__class__( + { + "tie_values": self.tie_values, + "tie_lengths": (self.tie_lengths + q - 1) // q, + "sampling_interval": self.sampling_interval * q, + }, + self.dim, + ) def simplify(self, tolerance=None): raise NotImplementedError( From 680bf9088ad665809356ff6100c80fbb7ca9a01b Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:47:33 +0100 Subject: [PATCH 15/63] Implement simplification method in SampledCoordinate class --- xdas/core/coordinates.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 6fee083..30eb0fe 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1416,8 +1416,22 @@ def decimate(self, q): ) def simplify(self, tolerance=None): - raise NotImplementedError( - "simplification is not implemented for SampledCoordinate" + tie_values = [self.tie_values[0]] + tie_lengths = [self.tie_lengths[0]] + for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]): + delta = value - (tie_values[-1] + self.sampling_interval * tie_lengths[-1]) + if np.abs(delta) <= tolerance: + tie_lengths[-1] += length + else: + tie_values.append(value) + tie_lengths.append(length) + return self.__class__( + { + "tie_values": np.array(tie_values), + "tie_lengths": np.array(tie_lengths), + "sampling_interval": self.sampling_interval, + }, + self.dim, ) def get_discontinuities(self): From 25ae329f4cb82c00c39091529ea754c7af87712b Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:48:24 +0100 Subject: [PATCH 16/63] Implement get_discontinuities method in SampledCoordinate class --- xdas/core/coordinates.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 30eb0fe..ee1d0a3 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1435,9 +1435,34 @@ def simplify(self, tolerance=None): ) def get_discontinuities(self): - raise NotImplementedError( - "get_discontinuities is not implemented for SampledCoordinate" - ) + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + records = [] + for index in self.tie_indices[: -1]: + start_index = index + end_index = index + 1 + start_value = self.get_value(index) + end_value = self.get_value(index + 1) + record = { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": ("gap" if end_value > start_value else "overlap"), + } + records.append(record) + return pd.DataFrame.from_records(records) + def get_availabilities(self): raise NotImplementedError( From 258a5ae771fa713f8f794c95b19b63f6684cc5c1 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:48:53 +0100 Subject: [PATCH 17/63] Implement get_availabilities method in SampledCoordinate class --- xdas/core/coordinates.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index ee1d0a3..e712db0 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1465,9 +1465,34 @@ def get_discontinuities(self): def get_availabilities(self): - raise NotImplementedError( - "get_availabilities is not implemented for SampledCoordinate" - ) + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + records = [] + for index, value, length in zip(self.tie_indices, self.tie_values, self.tie_indices): + start_index = index + end_index = index + length - 1 + start_value = value + end_value = value + self.sampling_interval * (length - 1) + records.append( + { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": "data", + } + ) + return pd.DataFrame.from_records(records) @classmethod def from_array(cls, arr, dim=None, sampling_interval=None): From 492c2e57ef2b19a31b8c2fd9895c9cd7527b7bd3 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 17 Dec 2025 15:49:15 +0100 Subject: [PATCH 18/63] format code --- xdas/core/coordinates.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index e712db0..65e7c0a 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1447,8 +1447,8 @@ def get_discontinuities(self): ] ) records = [] - for index in self.tie_indices[: -1]: - start_index = index + for index in self.tie_indices[:-1]: + start_index = index end_index = index + 1 start_value = self.get_value(index) end_value = self.get_value(index + 1) @@ -1463,7 +1463,6 @@ def get_discontinuities(self): records.append(record) return pd.DataFrame.from_records(records) - def get_availabilities(self): if self.empty: return pd.DataFrame( @@ -1477,7 +1476,9 @@ def get_availabilities(self): ] ) records = [] - for index, value, length in zip(self.tie_indices, self.tie_values, self.tie_indices): + for index, value, length in zip( + self.tie_indices, self.tie_values, self.tie_indices + ): start_index = index end_index = index + length - 1 start_value = value From 0ecb7f84cef4b3c9870ac99ec4cb1f7ca86cce65 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 09:59:19 +0100 Subject: [PATCH 19/63] Fix and test SampledCoordinate slicing. --- tests/test_sampled_coordinate.py | 267 +++++++++++++++++++++++++++++++ xdas/core/coordinates.py | 143 ++++++++--------- 2 files changed, 330 insertions(+), 80 deletions(-) create mode 100644 tests/test_sampled_coordinate.py diff --git a/tests/test_sampled_coordinate.py b/tests/test_sampled_coordinate.py new file mode 100644 index 0000000..d914a4c --- /dev/null +++ b/tests/test_sampled_coordinate.py @@ -0,0 +1,267 @@ +import numpy as np +import pytest + +from xdas.core.coordinates import SampledCoordinate, ScalarCoordinate, DenseCoordinate + + +class TestSampledCoordinateBasics: + def test_isvalid(self): + assert SampledCoordinate.isvalid( + {"tie_values": [0.0], "tie_lengths": [1], "sampling_interval": 1.0} + ) + assert SampledCoordinate.isvalid( + { + "tie_values": [np.datetime64("2000-01-01T00:00:00")], + "tie_lengths": [1], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + assert not SampledCoordinate.isvalid({"tie_values": [0.0], "tie_lengths": [1]}) + assert not SampledCoordinate.isvalid({}) + + def test_init_and_empty(self): + empty = SampledCoordinate() + assert empty.empty + assert len(empty) == 0 + assert empty.dtype is not None + assert empty.shape == (0,) + assert empty.ndim == 1 + assert empty.values.size == 0 + + def test_init_validation_numeric(self): + # valid numeric + coord = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + assert len(coord) == 3 + assert coord.start == 0.0 + assert coord.end == 3.0 + + # mismatched lengths + with pytest.raises(ValueError): + SampledCoordinate( + { + "tie_values": [0.0, 10.0], + "tie_lengths": [3], + "sampling_interval": 1.0, + } + ) + # non-integer lengths + with pytest.raises(ValueError): + SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [1.5], "sampling_interval": 1.0} + ) + # non-positive lengths + with pytest.raises(ValueError): + SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [0], "sampling_interval": 1.0} + ) + # sampling interval must be scalar + with pytest.raises(ValueError): + SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": [1.0]} + ) + + def test_init_validation_datetime(self): + # valid datetime with timedelta sampling interval + t0 = np.datetime64("2000-01-01T00:00:00") + coord = SampledCoordinate( + { + "tie_values": [t0], + "tie_lengths": [2], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + assert coord.start == t0 + assert coord.end == t0 + np.timedelta64(2, "s") + + # invalid: datetime with numeric sampling interval + with pytest.raises(ValueError): + SampledCoordinate( + {"tie_values": [t0], "tie_lengths": [2], "sampling_interval": 1} + ) + + +class TestSampledCoordinateIndexing: + def make_coord(self): + # Two segments: [0,1,2] and [10,11] + return SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + + def test_len_indices_values(self): + coord = self.make_coord() + assert len(coord) == 5 + assert np.array_equal(coord.indices, np.arange(5)) + assert np.array_equal(coord.values, np.array([0.0, 1.0, 2.0, 10.0, 11.0])) + + def test_get_value_scalar_and_vector(self): + coord = self.make_coord() + # scalar + assert coord.get_value(0) == 0.0 + assert coord.get_value(1) == 1.0 + assert coord.get_value(2) == 2.0 + assert coord.get_value(3) == 10.0 + assert coord.get_value(4) == 11.0 + # vectorized + vals = coord.get_value([0, 2, 3, 4]) + assert np.array_equal(vals, np.array([0.0, 2.0, 10.0, 11.0])) + # bounds + with pytest.raises(IndexError): + coord.get_value(-6) + with pytest.raises(IndexError): + coord.get_value(5) + + def test_getitem(self): + coord = self.make_coord() + # scalar -> ScalarCoordinate + item = coord[1] + assert isinstance(item, ScalarCoordinate) + assert item.values == 1.0 + # slice -> SampledCoordinate or compatible + sub = coord[1:4] + assert isinstance(sub, SampledCoordinate) + # array -> DenseCoordinate of values + arr = coord[[0, 4]] + assert isinstance(arr, DenseCoordinate) + assert np.array_equal(arr.values, np.array([0.0, 11.0])) + + def test_repr(self): + # Just ensure it returns a string + coord = self.make_coord() + assert isinstance(repr(coord), str) + + +class TestSampledCoordinateSlicing: + def make_coord(self): + # Two segments: [0,1,2] and [10,11] + return SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + + def test_slice_within_segment(self): + coord = self.make_coord() + sliced = coord[0:2] + assert isinstance(sliced, SampledCoordinate) + assert len(sliced) == 2 + assert np.array_equal(sliced.values, np.array([0.0, 1.0])) + + def test_slice_cross_segments(self): + coord = self.make_coord() + sliced = coord[1:4] + assert isinstance(sliced, SampledCoordinate) + assert len(sliced) == 3 + assert np.array_equal(sliced.values, np.array([1.0, 2.0, 10.0])) + + def test_slice_full(self): + coord = self.make_coord() + sliced = coord[:] + assert sliced.equals(coord) + + +class TestSampledCoordinateAppend: + def test_append_two_coords(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = SampledCoordinate( + {"tie_values": [10.0], "tie_lengths": [2], "sampling_interval": 1.0} + ) + result = coord1.append(coord2) + assert len(result) == 5 + assert result.tie_values[0] == 0.0 + assert result.tie_values[1] == 10.0 + + def test_append_empty(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = SampledCoordinate() + assert coord1.append(coord2).equals(coord1) + assert coord2.append(coord1).equals(coord1) + + +class TestSampledCoordinateDecimate: + def test_decimate(self): + coord = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [10], "sampling_interval": 1.0} + ) + decimated = coord.decimate(2) + assert decimated.sampling_interval == 2.0 + assert decimated.tie_lengths[0] == 5 # (10 + 2 - 1) // 2 = 5 + + +class TestSampledCoordinateSimplify: + def test_simplify_continuous(self): + # Two continuous segments should merge + coord = SampledCoordinate( + { + "tie_values": [0.0, 3.0], + "tie_lengths": [3, 2], + "sampling_interval": 1.0, + } + ) + simplified = coord.simplify(tolerance=0.1) + # If continuous (end of first == start of second), should merge + assert len(simplified.tie_values) <= 2 + + +class TestSampledCoordinateGetIndexer: + def make_coord(self): + return SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + + def test_get_indexer_exact(self): + coord = self.make_coord() + idx = coord.get_indexer(0.0, method="nearest") + assert idx == 0 + idx = coord.get_indexer(10.0, method="nearest") + assert idx == 3 + + def test_get_indexer_nearest(self): + coord = self.make_coord() + idx = coord.get_indexer(0.5, method="nearest") + assert idx in [0, 1] + + def test_get_indexer_out_of_bounds(self): + coord = self.make_coord() + with pytest.raises(KeyError): + coord.get_indexer(100.0) + + +class TestSampledCoordinateArithmetic: + def test_add(self): + coord = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + result = coord + 10.0 + assert result.tie_values[0] == 10.0 + assert np.array_equal(result.values, np.array([10.0, 11.0, 12.0])) + + def test_sub(self): + coord = SampledCoordinate( + {"tie_values": [10.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + result = coord - 5.0 + assert result.tie_values[0] == 5.0 + assert np.array_equal(result.values, np.array([5.0, 6.0, 7.0])) + + +class TestSampledCoordinateSerialization: + def test_to_from_dict(self): + coord = SampledCoordinate( + { + "tie_values": [0.0, 10.0], + "tie_lengths": [3, 2], + "sampling_interval": 1.0, + }, + dim="time", + ) + d = coord.to_dict() + # round-trip via Coordinate factory + from xdas.core.coordinates import Coordinate + + back = Coordinate.from_dict(d) + assert isinstance(back, SampledCoordinate) + assert back.equals(coord) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 65e7c0a..fb00419 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -364,6 +364,8 @@ def format_index_slice(self, slc): stop = len(self) if step is None: step = 1 + if step <= 0: + raise NotImplementedError("negative or zero step when slicing is not supported yet") start = self.format_index(start, bounds="clip") stop = self.format_index(stop, bounds="clip") return slice(start, stop, step) @@ -1120,7 +1122,7 @@ def __init__(self, data=None, dim=None, dtype=None): ) tie_values = np.asarray(data["tie_values"], dtype=dtype) tie_lengths = np.asarray(data["tie_lengths"]) - sampling_interval = np.asarray(data["sampling_interval"]) + sampling_interval = data["sampling_interval"] # check shapes if not tie_values.ndim == 1: @@ -1146,17 +1148,35 @@ def __init__(self, data=None, dim=None, dtype=None): if not np.isscalar(sampling_interval): raise ValueError("`sampling_interval` must be a scalar value") if np.issubdtype(tie_values.dtype, np.datetime64): - if not np.issubdtype(sampling_interval.dtype, np.timedelta64): + if not np.issubdtype(np.asarray(sampling_interval).dtype, np.timedelta64): raise ValueError( "`sampling_interval` must be timedelta64 for datetime64 `tie_values`" ) - self.data = dict( - tie_values=tie_values, - tie_lengths=tie_lengths, - sampling_interval=sampling_interval, - ) + + # store data + self.data = { + "tie_values": tie_values, + "tie_lengths": tie_lengths, + "sampling_interval": sampling_interval, + } self.dim = dim + @property + def tie_values(self): + return self.data["tie_values"] + + @property + def tie_lengths(self): + return self.data["tie_lengths"] + + @property + def sampling_interval(self): + return self.data["sampling_interval"] + + @property + def dtype(self): + return self.tie_values.dtype + @staticmethod def isvalid(data): match data: @@ -1184,9 +1204,9 @@ def __repr__(self): if np.issubdtype(self.dtype, np.floating): return f"{self.start:.3f} to {self.end:.3f}" elif np.issubdtype(self.dtype, np.datetime64): - self.start = format_datetime(self.start) - self.end = format_datetime(self.end) - return f"{self.start} to {self.end}" + start_str = format_datetime(self.start) + end_str = format_datetime(self.end) + return f"{start_str} to {end_str}" else: return f"{self.start} to {self.end}" @@ -1230,18 +1250,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def __array_function__(self, func, types, args, kwargs): raise NotImplementedError - @property - def tie_values(self): - return self.data["tie_values"] - - @property - def tie_lengths(self): - return self.data["tie_lengths"] - - @property - def sampling_interval(self): - return self.data["sampling_interval"] - @property def tie_indices(self): return np.concatenate(([0], np.cumsum(self.tie_lengths[:-1]))) @@ -1250,10 +1258,6 @@ def tie_indices(self): def empty(self): return self.tie_values.shape == (0,) - @property - def dtype(self): - return self.tie_values.dtype - @property def ndim(self): return self.tie_values.ndim @@ -1297,65 +1301,40 @@ def get_value(self, index): index = self.format_index(index) if np.any(index < 0) or np.any(index >= len(self)): raise IndexError("index is out of bounds") - reference = np.searchsorted(self.tie_indices, index) + reference = np.searchsorted(self.tie_indices, index, side="right") - 1 return self.tie_values[reference] + ( (index - self.tie_indices[reference]) * self.sampling_interval ) def slice_index(self, index_slice): index_slice = self.format_index_slice(index_slice) - start_index, stop_index, step_index = ( - index_slice.start, - index_slice.stop, - index_slice.step, - ) - if stop_index - start_index <= 0: - return self.__class__( - dict( - tie_values=[], - tie_lengths=[], - sampling_interval=self.sampling_interval, - ), - self.dim, - ) - elif (stop_index - start_index) <= step_index: - tie_values = [self.get_value(start_index)] - tie_lengths = [stop_index - start_index] - return self.__class__( - dict( - tie_values=tie_values, - tie_lengths=tie_lengths, - sampling_interval=self.sampling_interval, - ), - self.dim, - ) - else: - # keep tie values, number of samples and related tie indices contained in the slice - mask = (start_index < self.tie_indices) & (self.tie_indices <= stop_index) - tie_values = self.tie_values[mask] - tie_lengths = self.tie_lengths[mask] - tie_indices = self.tie_indices[mask] - # insert the missing start value - start_value = self.get_value(start_index) - tie_values = np.concatenate([[start_value], self.tie_values[mask]]) + # get indices relative to tie points + relative_start_index = np.clip(index_slice.start - self.tie_indices, 0, self.tie_lengths) + relative_stop_index = np.clip(index_slice.stop - self.tie_indices, 0, self.tie_lengths) - # insert the missing start number of samples and adjust the end one - tie_lengths = np.concatenate( - [[start_index - tie_indices[0]], tie_lengths[mask]] - ) - tie_lengths[-1] = stop_index - tie_indices[-1] + # keep segments with data + mask = relative_start_index < relative_stop_index + + # compute new tie points ane lengths + tie_values = self.tie_values[mask] + relative_start_index[mask] * self.sampling_interval + tie_lengths = relative_stop_index[mask] - relative_start_index[mask] + + # adjust for step if needed + if index_slice.step == 1: + sampling_interval = self.sampling_interval + else: + tie_lengths = (self.tie_lengths + index_slice.step - 1) // index_slice.step, + sampling_interval = self.sampling_interval * index_slice.step + + # build new coordinate + data = { + "tie_values": tie_values, + "tie_lengths": tie_lengths, + "sampling_interval": sampling_interval, + } + return self.__class__(data, self.dim) - # repack data and decimate if needed - data = { - "tie_values": tie_values, - "tie_lengths": tie_lengths, - "sampling_interval": self.sampling_interval, - } - coord = self.__class__(data, self.dim) - if step_index != 1: - coord = coord.decimate(step_index) - return coord def get_indexer(self, value, method=None): if isinstance(value, str): @@ -1367,16 +1346,20 @@ def get_indexer(self, value, method=None): raise KeyError("index not found") if not is_strictly_increasing(self.tie_values): raise ValueError("tie_values must be strictly increasing") - reference = np.searchsorted(self.tie_values, value) + reference = np.searchsorted(self.tie_values, value, side="right") - 1 offset = (value - self.tie_values[reference]) / self.sampling_interval - if method == "nearest": + if method is None: + if np.any(offset % 1 != 0): + raise KeyError("index not found") + offset = offset.astype(int) + elif method == "nearest": offset = np.round(offset).astype(int) elif method == "ffill": offset = np.floor(offset).astype(int) elif method == "bfill": offset = np.ceil(offset).astype(int) else: - raise ValueError("method must be one of 'nearest', 'ffill', or 'bfill'") + raise ValueError("method must be one of `None`, 'nearest', 'ffill', or 'bfill'") return self.tie_indices[reference] + offset def append(self, other): @@ -1395,7 +1378,7 @@ def append(self, other): "cannot append coordinate with different sampling intervals" ) tie_values = np.concatenate([self.tie_values, other.tie_values]) - tie_lengths = np.concatenate([self.tie_lengths, other.tie_lengths + len(self)]) + tie_lengths = np.concatenate([self.tie_lengths, other.tie_lengths]) return self.__class__( { "tie_values": tie_values, From 57e554a96cb2d5a1d25985ae2b96d9ea56f06a14 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 10:01:23 +0100 Subject: [PATCH 20/63] minor refactoring. --- xdas/core/coordinates.py | 52 +++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index fb00419..916af7a 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -365,7 +365,9 @@ def format_index_slice(self, slc): if step is None: step = 1 if step <= 0: - raise NotImplementedError("negative or zero step when slicing is not supported yet") + raise NotImplementedError( + "negative or zero step when slicing is not supported yet" + ) start = self.format_index(start, bounds="clip") stop = self.format_index(stop, bounds="clip") return slice(start, stop, step) @@ -1148,7 +1150,9 @@ def __init__(self, data=None, dim=None, dtype=None): if not np.isscalar(sampling_interval): raise ValueError("`sampling_interval` must be a scalar value") if np.issubdtype(tie_values.dtype, np.datetime64): - if not np.issubdtype(np.asarray(sampling_interval).dtype, np.timedelta64): + if not np.issubdtype( + np.asarray(sampling_interval).dtype, np.timedelta64 + ): raise ValueError( "`sampling_interval` must be timedelta64 for datetime64 `tie_values`" ) @@ -1310,21 +1314,29 @@ def slice_index(self, index_slice): index_slice = self.format_index_slice(index_slice) # get indices relative to tie points - relative_start_index = np.clip(index_slice.start - self.tie_indices, 0, self.tie_lengths) - relative_stop_index = np.clip(index_slice.stop - self.tie_indices, 0, self.tie_lengths) + relative_start_index = np.clip( + index_slice.start - self.tie_indices, 0, self.tie_lengths + ) + relative_stop_index = np.clip( + index_slice.stop - self.tie_indices, 0, self.tie_lengths + ) # keep segments with data mask = relative_start_index < relative_stop_index # compute new tie points ane lengths - tie_values = self.tie_values[mask] + relative_start_index[mask] * self.sampling_interval + tie_values = ( + self.tie_values[mask] + relative_start_index[mask] * self.sampling_interval + ) tie_lengths = relative_stop_index[mask] - relative_start_index[mask] # adjust for step if needed if index_slice.step == 1: sampling_interval = self.sampling_interval else: - tie_lengths = (self.tie_lengths + index_slice.step - 1) // index_slice.step, + tie_lengths = ( + (self.tie_lengths + index_slice.step - 1) // index_slice.step, + ) sampling_interval = self.sampling_interval * index_slice.step # build new coordinate @@ -1335,7 +1347,6 @@ def slice_index(self, index_slice): } return self.__class__(data, self.dim) - def get_indexer(self, value, method=None): if isinstance(value, str): value = np.datetime64(value) @@ -1348,18 +1359,21 @@ def get_indexer(self, value, method=None): raise ValueError("tie_values must be strictly increasing") reference = np.searchsorted(self.tie_values, value, side="right") - 1 offset = (value - self.tie_values[reference]) / self.sampling_interval - if method is None: - if np.any(offset % 1 != 0): - raise KeyError("index not found") - offset = offset.astype(int) - elif method == "nearest": - offset = np.round(offset).astype(int) - elif method == "ffill": - offset = np.floor(offset).astype(int) - elif method == "bfill": - offset = np.ceil(offset).astype(int) - else: - raise ValueError("method must be one of `None`, 'nearest', 'ffill', or 'bfill'") + match method: + case None: + if np.any(offset % 1 != 0): + raise KeyError("index not found") + offset = offset.astype(int) + case "nearest": + offset = np.round(offset).astype(int) + case "ffill": + offset = np.floor(offset).astype(int) + case "bfill": + offset = np.ceil(offset).astype(int) + case _: + raise ValueError( + "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" + ) return self.tie_indices[reference] + offset def append(self, other): From c310b6e6e38667d2cb6fb3a528a2af09e23311d8 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 11:17:14 +0100 Subject: [PATCH 21/63] Fix SampleCoordinate stepped slicing. Add tests. --- tests/test_sampled_coordinate.py | 95 ++++++++++++++++++++++++++++++++ xdas/core/coordinates.py | 15 ++--- 2 files changed, 99 insertions(+), 11 deletions(-) diff --git a/tests/test_sampled_coordinate.py b/tests/test_sampled_coordinate.py index d914a4c..610f5d4 100644 --- a/tests/test_sampled_coordinate.py +++ b/tests/test_sampled_coordinate.py @@ -2,6 +2,7 @@ import pytest from xdas.core.coordinates import SampledCoordinate, ScalarCoordinate, DenseCoordinate +import pandas as pd class TestSampledCoordinateBasics: @@ -132,6 +133,100 @@ def test_repr(self): assert isinstance(repr(coord), str) +class TestSampledCoordinateSliceEdgeCases: + def make_coord(self): + return SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + + def test_slice_negative_and_out_of_bounds(self): + coord = self.make_coord() + # negative slice indices + s = coord[-4:-1] + assert isinstance(s, SampledCoordinate) + # slice that extends beyond bounds should clip + s2 = coord[-10:10] + assert s2.equals(coord) + + def test_slice_step_decimate(self): + coord = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [10], "sampling_interval": 1.0} + ) + stepped = coord[::2] + decimated = coord.decimate(2) + assert isinstance(stepped, SampledCoordinate) + assert decimated.equals(stepped) + + +class TestSampledCoordinateAppendErrors: + def test_append_sampling_interval_mismatch(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = SampledCoordinate( + {"tie_values": [10.0], "tie_lengths": [2], "sampling_interval": 2.0} + ) + with pytest.raises(ValueError): + coord1.append(coord2) + + def test_append_dtype_mismatch(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = SampledCoordinate( + { + "tie_values": [np.datetime64("2000-01-01T00:00:00")], + "tie_lengths": [1], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + with pytest.raises(ValueError): + coord1.append(coord2) + + +class TestSampledCoordinateDiscontinuitiesAvailabilities: + def test_discontinuities_and_availabilities(self): + # tie_lengths set to create 2 segments + coord = SampledCoordinate( + {"tie_values": [0.0, 5.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + dis = coord.get_discontinuities() + avail = coord.get_availabilities() + # expect DataFrame with specific columns + for df in (dis, avail): + assert isinstance(df, pd.DataFrame) + assert set(df.columns) >= { + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + } + # availabilities should list segments (2 segments -> 2 records) + assert len(avail) >= 1 + + +class TestSampledCoordinateToDatasetAndDict: + def test_to_dict_contains_expected_keys(self): + coord = SampledCoordinate( + { + "tie_values": [0.0, 10.0], + "tie_lengths": [3, 2], + "sampling_interval": 1.0, + }, + dim="time", + ) + d = coord.to_dict() + assert "dim" in d + assert "data" in d + assert set(d["data"].keys()) >= { + "tie_values", + "tie_lengths", + "sampling_interval", + } + + class TestSampledCoordinateSlicing: def make_coord(self): # Two segments: [0,1,2] and [10,11] diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index 916af7a..eb3ad69 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1313,6 +1313,8 @@ def get_value(self, index): def slice_index(self, index_slice): index_slice = self.format_index_slice(index_slice) + # TODO: optimize when start and/or stop are None + # get indices relative to tie points relative_start_index = np.clip( index_slice.start - self.tie_indices, 0, self.tie_lengths @@ -1334,9 +1336,7 @@ def slice_index(self, index_slice): if index_slice.step == 1: sampling_interval = self.sampling_interval else: - tie_lengths = ( - (self.tie_lengths + index_slice.step - 1) // index_slice.step, - ) + tie_lengths = (self.tie_lengths + index_slice.step - 1) // index_slice.step sampling_interval = self.sampling_interval * index_slice.step # build new coordinate @@ -1403,14 +1403,7 @@ def append(self, other): ) def decimate(self, q): - return self.__class__( - { - "tie_values": self.tie_values, - "tie_lengths": (self.tie_lengths + q - 1) // q, - "sampling_interval": self.sampling_interval * q, - }, - self.dim, - ) + return self[::q] def simplify(self, tolerance=None): tie_values = [self.tie_values[0]] From 9b36f358e3e2511a318e44821b942d4517e618ee Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 11:28:32 +0100 Subject: [PATCH 22/63] Fix bouds checks for SampledCooridnate. Add tests. --- tests/test_sampled_coordinate.py | 65 ++++++++++++++++++++++++++++++++ xdas/core/coordinates.py | 2 +- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/tests/test_sampled_coordinate.py b/tests/test_sampled_coordinate.py index 610f5d4..4645c3a 100644 --- a/tests/test_sampled_coordinate.py +++ b/tests/test_sampled_coordinate.py @@ -360,3 +360,68 @@ def test_to_from_dict(self): back = Coordinate.from_dict(d) assert isinstance(back, SampledCoordinate) assert back.equals(coord) + + +class TestSampledCoordinateDatetime: + def make_dt_coord(self): + t0 = np.datetime64("2000-01-01T00:00:00") + return SampledCoordinate( + { + "tie_values": [t0, t0 + np.timedelta64(10, "s")], + "tie_lengths": [3, 2], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + + def test_datetime_values_and_dtype(self): + coord = self.make_dt_coord() + assert np.issubdtype(coord.dtype, np.datetime64) + vals = coord.values + assert np.issubdtype(vals.dtype, np.datetime64) + assert vals[0] == np.datetime64("2000-01-01T00:00:00") + assert vals[3] == np.datetime64("2000-01-01T00:00:10") + + def test_get_value_datetime(self): + coord = self.make_dt_coord() + assert coord.get_value(1) == np.datetime64("2000-01-01T00:00:01") + assert coord.get_value(4) == np.datetime64("2000-01-01T00:00:11") + with pytest.raises(IndexError): + coord.get_value(5) + + def test_get_indexer_datetime_methods(self): + coord = self.make_dt_coord() + t = np.datetime64("2000-01-01T00:00:01.500") + # exact required when method=None -> should raise + with pytest.raises(KeyError): + coord.get_indexer(t) + # method variants + assert coord.get_indexer(t, method="nearest") in [1, 2] + assert coord.get_indexer(t, method="ffill") == 1 + assert coord.get_indexer(t, method="bfill") == 2 + # bounds + with pytest.raises(KeyError): + coord.get_indexer(np.datetime64("1999-12-31T23:59:59")) + with pytest.raises(KeyError): + coord.get_indexer(np.datetime64("2000-01-01T00:00:12")) + + def test_start_end_properties_datetime(self): + coord = self.make_dt_coord() + assert coord.start == np.datetime64("2000-01-01T00:00:00") + # end is last tie_value + sampling_interval * last_length + assert coord.end == np.datetime64("2000-01-01T00:00:12") + + +class TestSampledCoordinateIndexerEdgeCases: + def test_invalid_method_raises(self): + coord = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + with pytest.raises(ValueError): + coord.get_indexer(0.0, method="bad") + + def test_non_increasing_tie_values_raises(self): + coord = SampledCoordinate( + {"tie_values": [2.0, 1.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + with pytest.raises(ValueError): + coord.get_indexer(2.0) diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates.py index eb3ad69..846b5b8 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates.py @@ -1353,7 +1353,7 @@ def get_indexer(self, value, method=None): else: value = np.asarray(value) # Check that value lies within the coordinate value range (vectorized) - if np.any(value < self.start) or np.any(value > self.end): + if np.any(value < self.start) or np.any(value >= self.end): raise KeyError("index not found") if not is_strictly_increasing(self.tie_values): raise ValueError("tie_values must be strictly increasing") From 61e71c77abaf11b4bd4e49a9116ce45d63529159 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 11:54:36 +0100 Subject: [PATCH 23/63] Refactoring: WIP one module per coordinate class. --- tests/test_sampled_coordinate.py | 4 +- xdas/core/coordinates/__init__.py | 10 + .../{coordinates.py => coordinates/core.py} | 475 +----------------- xdas/core/coordinates/interp.py | 454 +++++++++++++++++ 4 files changed, 490 insertions(+), 453 deletions(-) create mode 100644 xdas/core/coordinates/__init__.py rename xdas/core/{coordinates.py => coordinates/core.py} (70%) create mode 100644 xdas/core/coordinates/interp.py diff --git a/tests/test_sampled_coordinate.py b/tests/test_sampled_coordinate.py index 4645c3a..643485d 100644 --- a/tests/test_sampled_coordinate.py +++ b/tests/test_sampled_coordinate.py @@ -1,8 +1,8 @@ import numpy as np +import pandas as pd import pytest -from xdas.core.coordinates import SampledCoordinate, ScalarCoordinate, DenseCoordinate -import pandas as pd +from xdas.core.coordinates import DenseCoordinate, SampledCoordinate, ScalarCoordinate class TestSampledCoordinateBasics: diff --git a/xdas/core/coordinates/__init__.py b/xdas/core/coordinates/__init__.py new file mode 100644 index 0000000..49f1725 --- /dev/null +++ b/xdas/core/coordinates/__init__.py @@ -0,0 +1,10 @@ +from .core import ( + Coordinate, + Coordinates, + DefaultCoordinate, + DenseCoordinate, + SampledCoordinate, + ScalarCoordinate, + get_sampling_interval, +) +from .interp import InterpCoordinate diff --git a/xdas/core/coordinates.py b/xdas/core/coordinates/core.py similarity index 70% rename from xdas/core/coordinates.py rename to xdas/core/coordinates/core.py index 846b5b8..ec4dea1 100644 --- a/xdas/core/coordinates.py +++ b/xdas/core/coordinates/core.py @@ -1,10 +1,9 @@ +import re from copy import copy, deepcopy from functools import wraps -import re import numpy as np import pandas as pd -from xinterp import forward, inverse def wraps_first_last(func): @@ -399,19 +398,25 @@ def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): return slice(start_index, stop_index) def isscalar(self): - return isinstance(self, ScalarCoordinate) + return False + + def isdefault(self): + return False def isdense(self): - return isinstance(self, DenseCoordinate) + return False def isinterp(self): - return isinstance(self, InterpCoordinate) + return False + + def issampled(self): + return False def append(self, other): raise NotImplementedError(f"append is not implemented for {self.__class__}") def to_dataarray(self): - from .dataarray import DataArray # TODO: avoid defered import? + from ..dataarray import DataArray # TODO: avoid defered import? if self.name is None: raise ValueError("cannot convert unnamed coordinate to DataArray") @@ -485,6 +490,9 @@ def isvalid(data): data = np.asarray(data) return (data.dtype != np.dtype(object)) and (data.ndim == 0) + def isscalar(self): + return True + def equals(self, other): if isinstance(other, self.__class__): return self.data == other.data @@ -541,6 +549,9 @@ def isvalid(data): case _: return False + def isdefault(self): + return True + @property def empty(self): return bool(self.data["size"]) @@ -596,6 +607,9 @@ def isvalid(data): data = np.asarray(data) return (data.dtype != np.dtype(object)) and (data.ndim == 1) + def isdense(self): + return True + @property def index(self): return pd.Index(self.data) @@ -668,428 +682,6 @@ def from_dataset(cls, dataset, name): } -class InterpCoordinate(Coordinate): - """ - Array-like object used to represent piecewise evenly spaced coordinates using the - CF convention. - - The coordinate ticks are describes by the mean of tie points that are interpolated - when intermediate values are required. Coordinate objects provides label based - selections methods. - - Parameters - ---------- - tie_indices : sequence of integers - The indices of the tie points. Must include index 0 and be strictly increasing. - tie_values : sequence of float or datetime64 - The values of the tie points. Must be strictly increasing to enable label-based - selection. The len of `tie_indices` and `tie_values` sizes must match. - """ - - def __new__(cls, *args, **kwargs): - return object.__new__(cls) - - def __init__(self, data=None, dim=None, dtype=None): - if data is None: - data = {"tie_indices": [], "tie_values": []} - data, dim = parse(data, dim) - if not self.__class__.isvalid(data): - raise TypeError("`data` must be dict-like") - if not set(data) == {"tie_indices", "tie_values"}: - raise ValueError( - "both `tie_indices` and `tie_values` key should be provided" - ) - tie_indices = np.asarray(data["tie_indices"]) - tie_values = np.asarray(data["tie_values"], dtype=dtype) - if not tie_indices.ndim == 1: - raise ValueError("`tie_indices` must be 1D") - if not tie_values.ndim == 1: - raise ValueError("`tie_values` must be 1D") - if not len(tie_indices) == len(tie_values): - raise ValueError("`tie_indices` and `tie_values` must have the same length") - if not tie_indices.shape == (0,): - if not np.issubdtype(tie_indices.dtype, np.integer): - raise ValueError("`tie_indices` must be integer-like") - if not tie_indices[0] == 0: - raise ValueError("`tie_indices` must start with a zero") - if not is_strictly_increasing(tie_indices): - raise ValueError("`tie_indices` must be strictly increasing") - if not ( - np.issubdtype(tie_values.dtype, np.number) - or np.issubdtype(tie_values.dtype, np.datetime64) - ): - raise ValueError("`tie_values` must have either numeric or datetime dtype") - tie_indices = tie_indices.astype(int) - self.data = dict(tie_indices=tie_indices, tie_values=tie_values) - self.dim = dim - - @staticmethod - def isvalid(data): - match data: - case {"tie_indices": _, "tie_values": _}: - return True - case _: - return False - - def __len__(self): - if self.empty: - return 0 - else: - return self.tie_indices[-1] - self.tie_indices[0] + 1 - - def __repr__(self): - if len(self) == 0: - return "empty coordinate" - elif len(self) == 1: - return f"{self.tie_values[0]}" - else: - if np.issubdtype(self.dtype, np.floating): - return f"{self.tie_values[0]:.3f} to {self.tie_values[-1]:.3f}" - elif np.issubdtype(self.dtype, np.datetime64): - start = format_datetime(self.tie_values[0]) - end = format_datetime(self.tie_values[-1]) - return f"{start} to {end}" - else: - return f"{self.tie_values[0]} to {self.tie_values[-1]}" - - def __getitem__(self, item): - if isinstance(item, slice): - return self.slice_index(item) - elif np.isscalar(item): - return ScalarCoordinate(self.get_value(item), None) - else: - return DenseCoordinate(self.get_value(item), self.dim) - - def __add__(self, other): - return self.__class__( - {"tie_indices": self.tie_indices, "tie_values": self.tie_values + other}, - self.dim, - ) - - def __sub__(self, other): - return self.__class__( - {"tie_indices": self.tie_indices, "tie_values": self.tie_values - other}, - self.dim, - ) - - def __array__(self, dtype=None): - out = self.values - if dtype is not None: - out = out.__array__(dtype) - return out - - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError - - def __array_function__(self, func, types, args, kwargs): - raise NotImplementedError - - @property - def tie_indices(self): - return self.data["tie_indices"] - - @property - def tie_values(self): - return self.data["tie_values"] - - @property - def empty(self): - return self.tie_indices.shape == (0,) - - @property - def dtype(self): - return self.tie_values.dtype - - @property - def ndim(self): - return self.tie_values.ndim - - @property - def shape(self): - return (len(self),) - - @property - def indices(self): - if self.empty: - return np.array([], dtype="int") - else: - return np.arange(self.tie_indices[-1] + 1) - - @property - def values(self): - if self.empty: - return np.array([], dtype=self.dtype) - else: - return self.get_value(self.indices) - - def equals(self, other): - return ( - np.array_equal(self.tie_indices, other.tie_indices) - and np.array_equal(self.tie_values, other.tie_values) - and self.dim == other.dim - and self.dtype == other.dtype - ) - - def get_value(self, index): - index = self.format_index(index) - return forward(index, self.tie_indices, self.tie_values) - - def slice_index(self, index_slice): - index_slice = self.format_index_slice(index_slice) - start_index, stop_index, step_index = ( - index_slice.start, - index_slice.stop, - index_slice.step, - ) - if stop_index - start_index <= 0: - return self.__class__(dict(tie_indices=[], tie_values=[]), dim=self.dim) - elif (stop_index - start_index) <= step_index: - tie_indices = [0] - tie_values = [self.get_value(start_index)] - return self.__class__( - dict(tie_indices=tie_indices, tie_values=tie_values), dim=self.dim - ) - else: - end_index = stop_index - 1 - start_value = self.get_value(start_index) - end_value = self.get_value(end_index) - mask = (start_index < self.tie_indices) & (self.tie_indices < end_index) - tie_indices = np.insert( - self.tie_indices[mask], - (0, self.tie_indices[mask].size), - (start_index, end_index), - ) - tie_values = np.insert( - self.tie_values[mask], - (0, self.tie_values[mask].size), - (start_value, end_value), - ) - tie_indices -= tie_indices[0] - data = {"tie_indices": tie_indices, "tie_values": tie_values} - coord = self.__class__(data, self.dim) - if step_index != 1: - coord = coord.decimate(step_index) - return coord - - def get_indexer(self, value, method=None): - if isinstance(value, str): - value = np.datetime64(value) - else: - value = np.asarray(value) - try: - indexer = inverse(value, self.tie_indices, self.tie_values, method) - except ValueError as e: - if str(e) == "fp must be strictly increasing": - raise ValueError( - "overlaps were found in the coordinate. If this is due to some " - "jitter in the tie values, consider smoothing the coordinate by " - "including some tolerance. This can be done by " - "`da[dim] = da[dim].simplify(tolerance)`, or by specifying a " - "tolerance when opening multiple files." - ) - else: - raise e - return indexer - - def append(self, other): - if not isinstance(other, self.__class__): - raise TypeError(f"cannot append {type(other)} to {self.__class__}") - if not self.dim == other.dim: - raise ValueError("cannot append coordinate with different dimension") - if self.empty: - return other - if other.empty: - return self - if not self.dtype == other.dtype: - raise ValueError("cannot append coordinate with different dtype") - coord = self.__class__( - { - "tie_indices": np.append( - self.tie_indices, other.tie_indices + len(self) - ), - "tie_values": np.append(self.tie_values, other.tie_values), - }, - self.dim, - ) - return coord - - def decimate(self, q): - tie_indices = (self.tie_indices // q) * q - for k in range(1, len(tie_indices) - 1): - if tie_indices[k] == tie_indices[k - 1]: - tie_indices[k] += q - tie_values = [self.get_value(idx) for idx in tie_indices] - tie_indices //= q - return self.__class__( - dict(tie_indices=tie_indices, tie_values=tie_values), self.dim - ) - - def simplify(self, tolerance=None): - if tolerance is None: - if np.issubdtype(self.dtype, np.datetime64): - tolerance = np.timedelta64(0, "ns") - else: - tolerance = 0.0 - tie_indices, tie_values = douglas_peucker( - self.tie_indices, self.tie_values, tolerance - ) - return self.__class__( - dict(tie_indices=tie_indices, tie_values=tie_values), self.dim - ) - - def get_discontinuities(self): - """ - Returns a DataFrame containing information about the discontinuities. - - Returns - ------- - pandas.DataFrame - A DataFrame with the following columns: - - - start_index : int - The index where the discontinuity starts. - - end_index : int - The index where the discontinuity ends. - - start_value : float - The value at the start of the discontinuity. - - end_value : float - The value at the end of the discontinuity. - - delta : float - The difference between the end_value and start_value. - - type : str - The type of the discontinuity, either "gap" or "overlap". - - """ - (indices,) = np.nonzero(np.diff(self.tie_indices) == 1) - records = [] - for index in indices: - start_index = self.tie_indices[index] - end_index = self.tie_indices[index + 1] - start_value = self.tie_values[index] - end_value = self.tie_values[index + 1] - record = { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": ("gap" if end_value > start_value else "overlap"), - } - records.append(record) - return pd.DataFrame.from_records(records) - - def get_availabilities(self): - """ - Returns a DataFrame containing information about the data availability. - - Returns - ------- - pandas.DataFrame - A DataFrame with the following columns: - - - start_index : int - The index where the discontinuity starts. - - end_index : int - The index where the discontinuity ends. - - start_value : float - The value at the start of the discontinuity. - - end_value : float - The value at the end of the discontinuity. - - delta : float - The difference between the end_value and start_value. - - type : str - The type of the discontinuity, always "data". - - """ - if self.empty: - return pd.DataFrame( - columns=[ - "start_index", - "end_index", - "start_value", - "end_value", - "delta", - "type", - ] - ) - (cut_pos,) = np.nonzero(np.diff(self.tie_indices) == 1) - # start each segment after the previous cut (or at 0) - starts = np.concatenate(([0], cut_pos + 1)) - # end each segment at the cut position (or at n-1 for the last) - ends = np.concatenate((cut_pos, [len(self.tie_indices) - 1])) - records = [] - for s, e in zip(starts, ends): - start_index = self.tie_indices[s] - end_index = self.tie_indices[e] - start_value = self.tie_values[s] - end_value = self.tie_values[e] - records.append( - { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": "data", - } - ) - return pd.DataFrame.from_records(records) - - @classmethod - def from_array(cls, arr, dim=None, tolerance=None): - return cls( - {"tie_indices": np.arange(len(arr)), "tie_values": arr}, dim - ).simplify(tolerance) - - def to_dict(self): - tie_indices = self.data["tie_indices"] - tie_values = self.data["tie_values"] - if np.issubdtype(tie_values.dtype, np.datetime64): - tie_values = tie_values.astype(str) - data = { - "tie_indices": tie_indices.tolist(), - "tie_values": tie_values.tolist(), - } - return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - - def to_dataset(self, dataset, attrs): - mapping = f"{self.name}: {self.name}_indices {self.name}_values" - if "coordinate_interpolation" in attrs: - attrs["coordinate_interpolation"] += " " + mapping - else: - attrs["coordinate_interpolation"] = mapping - tie_indices = self.tie_indices - tie_values = ( - self.tie_values.astype("M8[ns]") - if np.issubdtype(self.tie_values.dtype, np.datetime64) - else self.tie_values - ) - interp_attrs = { - "interpolation_name": "linear", - "tie_points_mapping": f"{self.name}_points: {self.name}_indices {self.name}_values", - } - dataset.update( - { - f"{self.name}_interpolation": ((), np.nan, interp_attrs), - f"{self.name}_indices": (f"{self.name}_points", tie_indices), - f"{self.name}_values": (f"{self.name}_points", tie_values), - } - ) - return dataset, attrs - - @classmethod - def from_dataset(cls, dataset, name): - coords = {} - mapping = dataset[name].attrs.pop("coordinate_interpolation", None) - if mapping is not None: - matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) - for match in matches: - dim, indices, values = match - data = {"tie_indices": dataset[indices], "tie_values": dataset[values]} - coords[dim] = Coordinate(data, dim) - return coords - - class SampledCoordinate(Coordinate): """ A coordinate that is sampled at regular intervals. @@ -1193,6 +785,9 @@ def isvalid(data): case _: return False + def issampled(self): + return True + def __len__(self): if self.empty: return 0 @@ -1580,7 +1175,7 @@ def get_sampling_interval(da, dim, cast=True): "cannot compute sample spacing on a dimension with less than 2 points" ) coord = da[dim] - if isinstance(coord, InterpCoordinate): + if coord.isinterp(): num = np.diff(coord.tie_values) den = np.diff(coord.tie_indices) mask = den != 1 @@ -1604,28 +1199,6 @@ def is_strictly_increasing(x): return np.all(np.diff(x) > 0) -def douglas_peucker(x, y, epsilon): - mask = np.ones(len(x), dtype=bool) - stack = [(0, len(x))] - while stack: - start, stop = stack.pop() - ysimple = forward( - x[start:stop], - x[[start, stop - 1]], - y[[start, stop - 1]], - ) - d = np.abs(y[start:stop] - ysimple) - index = np.argmax(d) - dmax = d[index] - index += start - if dmax > epsilon: - stack.append([start, index + 1]) - stack.append([index, stop]) - else: - mask[start + 1 : stop - 1] = False - return x[mask], y[mask] - - def format_datetime(x): string = str(x) if "." in string: diff --git a/xdas/core/coordinates/interp.py b/xdas/core/coordinates/interp.py new file mode 100644 index 0000000..5e60be3 --- /dev/null +++ b/xdas/core/coordinates/interp.py @@ -0,0 +1,454 @@ +import re + +import numpy as np +import pandas as pd +from xinterp import forward, inverse + +from .core import Coordinate, format_datetime, is_strictly_increasing, parse + + +class InterpCoordinate(Coordinate): + """ + Array-like object used to represent piecewise evenly spaced coordinates using the + CF convention. + + The coordinate ticks are describes by the mean of tie points that are interpolated + when intermediate values are required. Coordinate objects provides label based + selections methods. + + Parameters + ---------- + tie_indices : sequence of integers + The indices of the tie points. Must include index 0 and be strictly increasing. + tie_values : sequence of float or datetime64 + The values of the tie points. Must be strictly increasing to enable label-based + selection. The len of `tie_indices` and `tie_values` sizes must match. + """ + + def __new__(cls, *args, **kwargs): + return object.__new__(cls) + + def __init__(self, data=None, dim=None, dtype=None): + if data is None: + data = {"tie_indices": [], "tie_values": []} + data, dim = parse(data, dim) + if not self.__class__.isvalid(data): + raise TypeError("`data` must be dict-like") + if not set(data) == {"tie_indices", "tie_values"}: + raise ValueError( + "both `tie_indices` and `tie_values` key should be provided" + ) + tie_indices = np.asarray(data["tie_indices"]) + tie_values = np.asarray(data["tie_values"], dtype=dtype) + if not tie_indices.ndim == 1: + raise ValueError("`tie_indices` must be 1D") + if not tie_values.ndim == 1: + raise ValueError("`tie_values` must be 1D") + if not len(tie_indices) == len(tie_values): + raise ValueError("`tie_indices` and `tie_values` must have the same length") + if not tie_indices.shape == (0,): + if not np.issubdtype(tie_indices.dtype, np.integer): + raise ValueError("`tie_indices` must be integer-like") + if not tie_indices[0] == 0: + raise ValueError("`tie_indices` must start with a zero") + if not is_strictly_increasing(tie_indices): + raise ValueError("`tie_indices` must be strictly increasing") + if not ( + np.issubdtype(tie_values.dtype, np.number) + or np.issubdtype(tie_values.dtype, np.datetime64) + ): + raise ValueError("`tie_values` must have either numeric or datetime dtype") + tie_indices = tie_indices.astype(int) + self.data = dict(tie_indices=tie_indices, tie_values=tie_values) + self.dim = dim + + @staticmethod + def isvalid(data): + match data: + case {"tie_indices": _, "tie_values": _}: + return True + case _: + return False + + def isinterp(self): + return True + + def __len__(self): + if self.empty: + return 0 + else: + return self.tie_indices[-1] - self.tie_indices[0] + 1 + + def __repr__(self): + if len(self) == 0: + return "empty coordinate" + elif len(self) == 1: + return f"{self.tie_values[0]}" + else: + if np.issubdtype(self.dtype, np.floating): + return f"{self.tie_values[0]:.3f} to {self.tie_values[-1]:.3f}" + elif np.issubdtype(self.dtype, np.datetime64): + start = format_datetime(self.tie_values[0]) + end = format_datetime(self.tie_values[-1]) + return f"{start} to {end}" + else: + return f"{self.tie_values[0]} to {self.tie_values[-1]}" + + def __getitem__(self, item): + if isinstance(item, slice): + return self.slice_index(item) + elif np.isscalar(item): + return Coordinate(self.get_value(item), None) + else: + return Coordinate(self.get_value(item), self.dim) + + def __add__(self, other): + return self.__class__( + {"tie_indices": self.tie_indices, "tie_values": self.tie_values + other}, + self.dim, + ) + + def __sub__(self, other): + return self.__class__( + {"tie_indices": self.tie_indices, "tie_values": self.tie_values - other}, + self.dim, + ) + + def __array__(self, dtype=None): + out = self.values + if dtype is not None: + out = out.__array__(dtype) + return out + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + raise NotImplementedError + + def __array_function__(self, func, types, args, kwargs): + raise NotImplementedError + + @property + def tie_indices(self): + return self.data["tie_indices"] + + @property + def tie_values(self): + return self.data["tie_values"] + + @property + def empty(self): + return self.tie_indices.shape == (0,) + + @property + def dtype(self): + return self.tie_values.dtype + + @property + def ndim(self): + return self.tie_values.ndim + + @property + def shape(self): + return (len(self),) + + @property + def indices(self): + if self.empty: + return np.array([], dtype="int") + else: + return np.arange(self.tie_indices[-1] + 1) + + @property + def values(self): + if self.empty: + return np.array([], dtype=self.dtype) + else: + return self.get_value(self.indices) + + def equals(self, other): + return ( + np.array_equal(self.tie_indices, other.tie_indices) + and np.array_equal(self.tie_values, other.tie_values) + and self.dim == other.dim + and self.dtype == other.dtype + ) + + def get_value(self, index): + index = self.format_index(index) + return forward(index, self.tie_indices, self.tie_values) + + def slice_index(self, index_slice): + index_slice = self.format_index_slice(index_slice) + start_index, stop_index, step_index = ( + index_slice.start, + index_slice.stop, + index_slice.step, + ) + if stop_index - start_index <= 0: + return self.__class__(dict(tie_indices=[], tie_values=[]), dim=self.dim) + elif (stop_index - start_index) <= step_index: + tie_indices = [0] + tie_values = [self.get_value(start_index)] + return self.__class__( + dict(tie_indices=tie_indices, tie_values=tie_values), dim=self.dim + ) + else: + end_index = stop_index - 1 + start_value = self.get_value(start_index) + end_value = self.get_value(end_index) + mask = (start_index < self.tie_indices) & (self.tie_indices < end_index) + tie_indices = np.insert( + self.tie_indices[mask], + (0, self.tie_indices[mask].size), + (start_index, end_index), + ) + tie_values = np.insert( + self.tie_values[mask], + (0, self.tie_values[mask].size), + (start_value, end_value), + ) + tie_indices -= tie_indices[0] + data = {"tie_indices": tie_indices, "tie_values": tie_values} + coord = self.__class__(data, self.dim) + if step_index != 1: + coord = coord.decimate(step_index) + return coord + + def get_indexer(self, value, method=None): + if isinstance(value, str): + value = np.datetime64(value) + else: + value = np.asarray(value) + try: + indexer = inverse(value, self.tie_indices, self.tie_values, method) + except ValueError as e: + if str(e) == "fp must be strictly increasing": + raise ValueError( + "overlaps were found in the coordinate. If this is due to some " + "jitter in the tie values, consider smoothing the coordinate by " + "including some tolerance. This can be done by " + "`da[dim] = da[dim].simplify(tolerance)`, or by specifying a " + "tolerance when opening multiple files." + ) + else: + raise e + return indexer + + def append(self, other): + if not isinstance(other, self.__class__): + raise TypeError(f"cannot append {type(other)} to {self.__class__}") + if not self.dim == other.dim: + raise ValueError("cannot append coordinate with different dimension") + if self.empty: + return other + if other.empty: + return self + if not self.dtype == other.dtype: + raise ValueError("cannot append coordinate with different dtype") + coord = self.__class__( + { + "tie_indices": np.append( + self.tie_indices, other.tie_indices + len(self) + ), + "tie_values": np.append(self.tie_values, other.tie_values), + }, + self.dim, + ) + return coord + + def decimate(self, q): + tie_indices = (self.tie_indices // q) * q + for k in range(1, len(tie_indices) - 1): + if tie_indices[k] == tie_indices[k - 1]: + tie_indices[k] += q + tie_values = [self.get_value(idx) for idx in tie_indices] + tie_indices //= q + return self.__class__( + dict(tie_indices=tie_indices, tie_values=tie_values), self.dim + ) + + def simplify(self, tolerance=None): + if tolerance is None: + if np.issubdtype(self.dtype, np.datetime64): + tolerance = np.timedelta64(0, "ns") + else: + tolerance = 0.0 + tie_indices, tie_values = douglas_peucker( + self.tie_indices, self.tie_values, tolerance + ) + return self.__class__( + dict(tie_indices=tie_indices, tie_values=tie_values), self.dim + ) + + def get_discontinuities(self): + """ + Returns a DataFrame containing information about the discontinuities. + + Returns + ------- + pandas.DataFrame + A DataFrame with the following columns: + + - start_index : int + The index where the discontinuity starts. + - end_index : int + The index where the discontinuity ends. + - start_value : float + The value at the start of the discontinuity. + - end_value : float + The value at the end of the discontinuity. + - delta : float + The difference between the end_value and start_value. + - type : str + The type of the discontinuity, either "gap" or "overlap". + + """ + (indices,) = np.nonzero(np.diff(self.tie_indices) == 1) + records = [] + for index in indices: + start_index = self.tie_indices[index] + end_index = self.tie_indices[index + 1] + start_value = self.tie_values[index] + end_value = self.tie_values[index + 1] + record = { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": ("gap" if end_value > start_value else "overlap"), + } + records.append(record) + return pd.DataFrame.from_records(records) + + def get_availabilities(self): + """ + Returns a DataFrame containing information about the data availability. + + Returns + ------- + pandas.DataFrame + A DataFrame with the following columns: + + - start_index : int + The index where the discontinuity starts. + - end_index : int + The index where the discontinuity ends. + - start_value : float + The value at the start of the discontinuity. + - end_value : float + The value at the end of the discontinuity. + - delta : float + The difference between the end_value and start_value. + - type : str + The type of the discontinuity, always "data". + + """ + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + (cut_pos,) = np.nonzero(np.diff(self.tie_indices) == 1) + # start each segment after the previous cut (or at 0) + starts = np.concatenate(([0], cut_pos + 1)) + # end each segment at the cut position (or at n-1 for the last) + ends = np.concatenate((cut_pos, [len(self.tie_indices) - 1])) + records = [] + for s, e in zip(starts, ends): + start_index = self.tie_indices[s] + end_index = self.tie_indices[e] + start_value = self.tie_values[s] + end_value = self.tie_values[e] + records.append( + { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": "data", + } + ) + return pd.DataFrame.from_records(records) + + @classmethod + def from_array(cls, arr, dim=None, tolerance=None): + return cls( + {"tie_indices": np.arange(len(arr)), "tie_values": arr}, dim + ).simplify(tolerance) + + def to_dict(self): + tie_indices = self.data["tie_indices"] + tie_values = self.data["tie_values"] + if np.issubdtype(tie_values.dtype, np.datetime64): + tie_values = tie_values.astype(str) + data = { + "tie_indices": tie_indices.tolist(), + "tie_values": tie_values.tolist(), + } + return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + + def to_dataset(self, dataset, attrs): + mapping = f"{self.name}: {self.name}_indices {self.name}_values" + if "coordinate_interpolation" in attrs: + attrs["coordinate_interpolation"] += " " + mapping + else: + attrs["coordinate_interpolation"] = mapping + tie_indices = self.tie_indices + tie_values = ( + self.tie_values.astype("M8[ns]") + if np.issubdtype(self.tie_values.dtype, np.datetime64) + else self.tie_values + ) + interp_attrs = { + "interpolation_name": "linear", + "tie_points_mapping": f"{self.name}_points: {self.name}_indices {self.name}_values", + } + dataset.update( + { + f"{self.name}_interpolation": ((), np.nan, interp_attrs), + f"{self.name}_indices": (f"{self.name}_points", tie_indices), + f"{self.name}_values": (f"{self.name}_points", tie_values), + } + ) + return dataset, attrs + + @classmethod + def from_dataset(cls, dataset, name): + coords = {} + mapping = dataset[name].attrs.pop("coordinate_interpolation", None) + if mapping is not None: + matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) + for match in matches: + dim, indices, values = match + data = {"tie_indices": dataset[indices], "tie_values": dataset[values]} + coords[dim] = Coordinate(data, dim) + return coords + + +def douglas_peucker(x, y, epsilon): + mask = np.ones(len(x), dtype=bool) + stack = [(0, len(x))] + while stack: + start, stop = stack.pop() + ysimple = forward( + x[start:stop], + x[[start, stop - 1]], + y[[start, stop - 1]], + ) + d = np.abs(y[start:stop] - ysimple) + index = np.argmax(d) + dmax = d[index] + index += start + if dmax > epsilon: + stack.append([start, index + 1]) + stack.append([index, stop]) + else: + mask[start + 1 : stop - 1] = False + return x[mask], y[mask] From d9c82d204b91ffddbb6cc24f238ff9f3f24ffe4a Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 11:58:31 +0100 Subject: [PATCH 24/63] Refactoring: separeate sampled module for SamplesCoordinate. --- xdas/core/coordinates/__init__.py | 2 +- xdas/core/coordinates/core.py | 459 +---------------------------- xdas/core/coordinates/sampled.py | 463 ++++++++++++++++++++++++++++++ 3 files changed, 465 insertions(+), 459 deletions(-) create mode 100644 xdas/core/coordinates/sampled.py diff --git a/xdas/core/coordinates/__init__.py b/xdas/core/coordinates/__init__.py index 49f1725..d910530 100644 --- a/xdas/core/coordinates/__init__.py +++ b/xdas/core/coordinates/__init__.py @@ -3,8 +3,8 @@ Coordinates, DefaultCoordinate, DenseCoordinate, - SampledCoordinate, ScalarCoordinate, get_sampling_interval, ) from .interp import InterpCoordinate +from .sampled import SampledCoordinate diff --git a/xdas/core/coordinates/core.py b/xdas/core/coordinates/core.py index ec4dea1..5dc0559 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/core/coordinates/core.py @@ -682,463 +682,6 @@ def from_dataset(cls, dataset, name): } -class SampledCoordinate(Coordinate): - """ - A coordinate that is sampled at regular intervals. - - Parameters - ---------- - data : dict-like - The data of the coordinate. - dim : str, optional - The dimension name of the coordinate, by default None. - dtype : str or numpy.dtype, optional - The data type of the coordinate, by default None. - """ - - def __new__(cls, *args, **kwargs): - return object.__new__(cls) - - def __init__(self, data=None, dim=None, dtype=None): - # empty - if data is None: - data = {"tie_values": [], "tie_lengths": [], "sampling_interval": None} - empty = True - else: - empty = False - - # parse data - data, dim = parse(data, dim) - if not self.__class__.isvalid(data): - raise TypeError( - "`data` must be dict-like and contain `tie_values`, `tie_lengths`, and " - "`sampling_interval`" - ) - tie_values = np.asarray(data["tie_values"], dtype=dtype) - tie_lengths = np.asarray(data["tie_lengths"]) - sampling_interval = data["sampling_interval"] - - # check shapes - if not tie_values.ndim == 1: - raise ValueError("`tie_values` must be 1D") - if not tie_lengths.ndim == 1: - raise ValueError("`tie_lengths` must be 1D") - if not len(tie_values) == len(tie_lengths): - raise ValueError("`tie_values` and `tie_lengths` must have the same length") - - # check dtypes - if not empty: - if not ( - np.issubdtype(tie_values.dtype, np.number) - or np.issubdtype(tie_values.dtype, np.datetime64) - ): - raise ValueError( - "`tie_values` must have either numeric or datetime dtype" - ) - if not np.issubdtype(tie_lengths.dtype, np.integer): - raise ValueError("`tie_lengths` must be integer-like") - if not np.all(tie_lengths > 0): - raise ValueError("`tie_lengths` must be strictly positive integers") - if not np.isscalar(sampling_interval): - raise ValueError("`sampling_interval` must be a scalar value") - if np.issubdtype(tie_values.dtype, np.datetime64): - if not np.issubdtype( - np.asarray(sampling_interval).dtype, np.timedelta64 - ): - raise ValueError( - "`sampling_interval` must be timedelta64 for datetime64 `tie_values`" - ) - - # store data - self.data = { - "tie_values": tie_values, - "tie_lengths": tie_lengths, - "sampling_interval": sampling_interval, - } - self.dim = dim - - @property - def tie_values(self): - return self.data["tie_values"] - - @property - def tie_lengths(self): - return self.data["tie_lengths"] - - @property - def sampling_interval(self): - return self.data["sampling_interval"] - - @property - def dtype(self): - return self.tie_values.dtype - - @staticmethod - def isvalid(data): - match data: - case { - "tie_values": _, - "tie_lengths": _, - "sampling_interval": _, - }: - return True - case _: - return False - - def issampled(self): - return True - - def __len__(self): - if self.empty: - return 0 - else: - return sum(self.tie_lengths) - - def __repr__(self): - if self.empty: - return "empty coordinate" - elif len(self) == 1: - return f"{self.tie_values[0]}" - else: - if np.issubdtype(self.dtype, np.floating): - return f"{self.start:.3f} to {self.end:.3f}" - elif np.issubdtype(self.dtype, np.datetime64): - start_str = format_datetime(self.start) - end_str = format_datetime(self.end) - return f"{start_str} to {end_str}" - else: - return f"{self.start} to {self.end}" - - def __getitem__(self, item): - if isinstance(item, slice): - return self.slice_index(item) - elif np.isscalar(item): - return ScalarCoordinate(self.get_value(item), None) - else: - return DenseCoordinate(self.get_value(item), self.dim) - - def __add__(self, other): - return self.__class__( - { - "tie_values": self.tie_values + other, - "tie_lengths": self.tie_lengths, - "sampling_interval": self.sampling_interval, - }, - self.dim, - ) - - def __sub__(self, other): - return self.__class__( - { - "tie_values": self.tie_values - other, - "tie_lengths": self.tie_lengths, - "sampling_interval": self.sampling_interval, - }, - self.dim, - ) - - def __array__(self, dtype=None): - out = self.values - if dtype is not None: - out = out.__array__(dtype) - return out - - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - raise NotImplementedError - - def __array_function__(self, func, types, args, kwargs): - raise NotImplementedError - - @property - def tie_indices(self): - return np.concatenate(([0], np.cumsum(self.tie_lengths[:-1]))) - - @property - def empty(self): - return self.tie_values.shape == (0,) - - @property - def ndim(self): - return self.tie_values.ndim - - @property - def shape(self): - return (len(self),) - - @property - def indices(self): - if self.empty: - return np.array([], dtype="int") - else: - return np.arange(len(self)) - - @property - def values(self): - if self.empty: - return np.array([], dtype=self.dtype) - else: - return self.get_value(self.indices) - - @property - def start(self): - return self.tie_values[0] - - @property - def end(self): - return self.tie_values[-1] + self.sampling_interval * self.tie_lengths[-1] - - def equals(self, other): - return ( - np.array_equal(self.tie_values, other.tie_values) - and np.array_equal(self.tie_lengths, other.tie_lengths) - and self.sampling_interval == other.sampling_interval - and self.dim == other.dim - and self.dtype == other.dtype - ) - - def get_value(self, index): - index = self.format_index(index) - if np.any(index < 0) or np.any(index >= len(self)): - raise IndexError("index is out of bounds") - reference = np.searchsorted(self.tie_indices, index, side="right") - 1 - return self.tie_values[reference] + ( - (index - self.tie_indices[reference]) * self.sampling_interval - ) - - def slice_index(self, index_slice): - index_slice = self.format_index_slice(index_slice) - - # TODO: optimize when start and/or stop are None - - # get indices relative to tie points - relative_start_index = np.clip( - index_slice.start - self.tie_indices, 0, self.tie_lengths - ) - relative_stop_index = np.clip( - index_slice.stop - self.tie_indices, 0, self.tie_lengths - ) - - # keep segments with data - mask = relative_start_index < relative_stop_index - - # compute new tie points ane lengths - tie_values = ( - self.tie_values[mask] + relative_start_index[mask] * self.sampling_interval - ) - tie_lengths = relative_stop_index[mask] - relative_start_index[mask] - - # adjust for step if needed - if index_slice.step == 1: - sampling_interval = self.sampling_interval - else: - tie_lengths = (self.tie_lengths + index_slice.step - 1) // index_slice.step - sampling_interval = self.sampling_interval * index_slice.step - - # build new coordinate - data = { - "tie_values": tie_values, - "tie_lengths": tie_lengths, - "sampling_interval": sampling_interval, - } - return self.__class__(data, self.dim) - - def get_indexer(self, value, method=None): - if isinstance(value, str): - value = np.datetime64(value) - else: - value = np.asarray(value) - # Check that value lies within the coordinate value range (vectorized) - if np.any(value < self.start) or np.any(value >= self.end): - raise KeyError("index not found") - if not is_strictly_increasing(self.tie_values): - raise ValueError("tie_values must be strictly increasing") - reference = np.searchsorted(self.tie_values, value, side="right") - 1 - offset = (value - self.tie_values[reference]) / self.sampling_interval - match method: - case None: - if np.any(offset % 1 != 0): - raise KeyError("index not found") - offset = offset.astype(int) - case "nearest": - offset = np.round(offset).astype(int) - case "ffill": - offset = np.floor(offset).astype(int) - case "bfill": - offset = np.ceil(offset).astype(int) - case _: - raise ValueError( - "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" - ) - return self.tie_indices[reference] + offset - - def append(self, other): - if not isinstance(other, self.__class__): - raise TypeError(f"cannot append {type(other)} to {self.__class__}") - if not self.dim == other.dim: - raise ValueError("cannot append coordinate with different dimension") - if self.empty: - return other - if other.empty: - return self - if not self.dtype == other.dtype: - raise ValueError("cannot append coordinate with different dtype") - if not self.sampling_interval == other.sampling_interval: - raise ValueError( - "cannot append coordinate with different sampling intervals" - ) - tie_values = np.concatenate([self.tie_values, other.tie_values]) - tie_lengths = np.concatenate([self.tie_lengths, other.tie_lengths]) - return self.__class__( - { - "tie_values": tie_values, - "tie_lengths": tie_lengths, - "sampling_interval": self.sampling_interval, - }, - self.dim, - ) - - def decimate(self, q): - return self[::q] - - def simplify(self, tolerance=None): - tie_values = [self.tie_values[0]] - tie_lengths = [self.tie_lengths[0]] - for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]): - delta = value - (tie_values[-1] + self.sampling_interval * tie_lengths[-1]) - if np.abs(delta) <= tolerance: - tie_lengths[-1] += length - else: - tie_values.append(value) - tie_lengths.append(length) - return self.__class__( - { - "tie_values": np.array(tie_values), - "tie_lengths": np.array(tie_lengths), - "sampling_interval": self.sampling_interval, - }, - self.dim, - ) - - def get_discontinuities(self): - if self.empty: - return pd.DataFrame( - columns=[ - "start_index", - "end_index", - "start_value", - "end_value", - "delta", - "type", - ] - ) - records = [] - for index in self.tie_indices[:-1]: - start_index = index - end_index = index + 1 - start_value = self.get_value(index) - end_value = self.get_value(index + 1) - record = { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": ("gap" if end_value > start_value else "overlap"), - } - records.append(record) - return pd.DataFrame.from_records(records) - - def get_availabilities(self): - if self.empty: - return pd.DataFrame( - columns=[ - "start_index", - "end_index", - "start_value", - "end_value", - "delta", - "type", - ] - ) - records = [] - for index, value, length in zip( - self.tie_indices, self.tie_values, self.tie_indices - ): - start_index = index - end_index = index + length - 1 - start_value = value - end_value = value + self.sampling_interval * (length - 1) - records.append( - { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": "data", - } - ) - return pd.DataFrame.from_records(records) - - @classmethod - def from_array(cls, arr, dim=None, sampling_interval=None): - raise NotImplementedError("from_array is not implemented for SampledCoordinate") - - def to_dict(self): - tie_values = self.data["tie_values"] - tie_lengths = self.data["tie_lengths"] - if np.issubdtype(tie_values.dtype, np.datetime64): - tie_values = tie_values.astype(str) - data = { - "tie_values": tie_values.tolist(), - "tie_lengths": tie_lengths.tolist(), - "sampling_interval": self.sampling_interval, - } - return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - - def to_dataset(self, dataset, attrs): - mapping = f"{self.name}: {self.name}_values {self.name}_lengths" - if "coordinate_sampling" in attrs: - attrs["coordinate_sampling"] += " " + mapping - else: - attrs["coordinate_sampling"] = mapping - tie_values = ( - self.tie_values.astype("M8[ns]") - if np.issubdtype(self.tie_values.dtype, np.datetime64) - else self.tie_values - ) - tie_lengths = self.tie_lengths - interp_attrs = { - "sampling_interval": self.sampling_interval, - "tie_points_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", - } - dataset.update( - { - f"{self.name}_sampling": ((), np.nan, interp_attrs), - f"{self.name}_values": (f"{self.name}_points", tie_values), - f"{self.name}_lengths": (f"{self.name}_points", tie_lengths), - } - ) - return dataset, attrs - - @classmethod - def from_dataset(cls, dataset, name): - coords = {} - mapping = dataset[name].attrs.pop("coordinate_sampling", None) - if mapping is not None: - matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) - for match in matches: - dim, values, lengths = match - sampling_interval = ... - data = { - "tie_values": dataset[values], - "tie_lengths": dataset[lengths], - "sampling_interval": sampling_interval, - } - coords[dim] = Coordinate(data, dim) - return coords - - def parse(data, dim=None): if isinstance(data, tuple): if dim is None: @@ -1182,7 +725,7 @@ def get_sampling_interval(da, dim, cast=True): num = num[mask] den = den[mask] d = np.median(num / den) - elif isinstance(coord, SampledCoordinate): + elif coord.issampled(): d = coord.sampling_interval else: d = (coord[-1].values - coord[0].values) / (len(coord) - 1) diff --git a/xdas/core/coordinates/sampled.py b/xdas/core/coordinates/sampled.py new file mode 100644 index 0000000..70b2e0a --- /dev/null +++ b/xdas/core/coordinates/sampled.py @@ -0,0 +1,463 @@ +import re + +import numpy as np +import pandas as pd + +from .core import Coordinate, format_datetime, is_strictly_increasing, parse + + +class SampledCoordinate(Coordinate): + """ + A coordinate that is sampled at regular intervals. + + Parameters + ---------- + data : dict-like + The data of the coordinate. + dim : str, optional + The dimension name of the coordinate, by default None. + dtype : str or numpy.dtype, optional + The data type of the coordinate, by default None. + """ + + def __new__(cls, *args, **kwargs): + return object.__new__(cls) + + def __init__(self, data=None, dim=None, dtype=None): + # empty + if data is None: + data = {"tie_values": [], "tie_lengths": [], "sampling_interval": None} + empty = True + else: + empty = False + + # parse data + data, dim = parse(data, dim) + if not self.__class__.isvalid(data): + raise TypeError( + "`data` must be dict-like and contain `tie_values`, `tie_lengths`, and " + "`sampling_interval`" + ) + tie_values = np.asarray(data["tie_values"], dtype=dtype) + tie_lengths = np.asarray(data["tie_lengths"]) + sampling_interval = data["sampling_interval"] + + # check shapes + if not tie_values.ndim == 1: + raise ValueError("`tie_values` must be 1D") + if not tie_lengths.ndim == 1: + raise ValueError("`tie_lengths` must be 1D") + if not len(tie_values) == len(tie_lengths): + raise ValueError("`tie_values` and `tie_lengths` must have the same length") + + # check dtypes + if not empty: + if not ( + np.issubdtype(tie_values.dtype, np.number) + or np.issubdtype(tie_values.dtype, np.datetime64) + ): + raise ValueError( + "`tie_values` must have either numeric or datetime dtype" + ) + if not np.issubdtype(tie_lengths.dtype, np.integer): + raise ValueError("`tie_lengths` must be integer-like") + if not np.all(tie_lengths > 0): + raise ValueError("`tie_lengths` must be strictly positive integers") + if not np.isscalar(sampling_interval): + raise ValueError("`sampling_interval` must be a scalar value") + if np.issubdtype(tie_values.dtype, np.datetime64): + if not np.issubdtype( + np.asarray(sampling_interval).dtype, np.timedelta64 + ): + raise ValueError( + "`sampling_interval` must be timedelta64 for datetime64 `tie_values`" + ) + + # store data + self.data = { + "tie_values": tie_values, + "tie_lengths": tie_lengths, + "sampling_interval": sampling_interval, + } + self.dim = dim + + @property + def tie_values(self): + return self.data["tie_values"] + + @property + def tie_lengths(self): + return self.data["tie_lengths"] + + @property + def sampling_interval(self): + return self.data["sampling_interval"] + + @property + def dtype(self): + return self.tie_values.dtype + + @staticmethod + def isvalid(data): + match data: + case { + "tie_values": _, + "tie_lengths": _, + "sampling_interval": _, + }: + return True + case _: + return False + + def issampled(self): + return True + + def __len__(self): + if self.empty: + return 0 + else: + return sum(self.tie_lengths) + + def __repr__(self): + if self.empty: + return "empty coordinate" + elif len(self) == 1: + return f"{self.tie_values[0]}" + else: + if np.issubdtype(self.dtype, np.floating): + return f"{self.start:.3f} to {self.end:.3f}" + elif np.issubdtype(self.dtype, np.datetime64): + start_str = format_datetime(self.start) + end_str = format_datetime(self.end) + return f"{start_str} to {end_str}" + else: + return f"{self.start} to {self.end}" + + def __getitem__(self, item): + if isinstance(item, slice): + return self.slice_index(item) + elif np.isscalar(item): + return Coordinate(self.get_value(item), None) + else: + return Coordinate(self.get_value(item), self.dim) + + def __add__(self, other): + return self.__class__( + { + "tie_values": self.tie_values + other, + "tie_lengths": self.tie_lengths, + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def __sub__(self, other): + return self.__class__( + { + "tie_values": self.tie_values - other, + "tie_lengths": self.tie_lengths, + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def __array__(self, dtype=None): + out = self.values + if dtype is not None: + out = out.__array__(dtype) + return out + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + raise NotImplementedError + + def __array_function__(self, func, types, args, kwargs): + raise NotImplementedError + + @property + def tie_indices(self): + return np.concatenate(([0], np.cumsum(self.tie_lengths[:-1]))) + + @property + def empty(self): + return self.tie_values.shape == (0,) + + @property + def ndim(self): + return self.tie_values.ndim + + @property + def shape(self): + return (len(self),) + + @property + def indices(self): + if self.empty: + return np.array([], dtype="int") + else: + return np.arange(len(self)) + + @property + def values(self): + if self.empty: + return np.array([], dtype=self.dtype) + else: + return self.get_value(self.indices) + + @property + def start(self): + return self.tie_values[0] + + @property + def end(self): + return self.tie_values[-1] + self.sampling_interval * self.tie_lengths[-1] + + def equals(self, other): + return ( + np.array_equal(self.tie_values, other.tie_values) + and np.array_equal(self.tie_lengths, other.tie_lengths) + and self.sampling_interval == other.sampling_interval + and self.dim == other.dim + and self.dtype == other.dtype + ) + + def get_value(self, index): + index = self.format_index(index) + if np.any(index < 0) or np.any(index >= len(self)): + raise IndexError("index is out of bounds") + reference = np.searchsorted(self.tie_indices, index, side="right") - 1 + return self.tie_values[reference] + ( + (index - self.tie_indices[reference]) * self.sampling_interval + ) + + def slice_index(self, index_slice): + index_slice = self.format_index_slice(index_slice) + + # TODO: optimize when start and/or stop are None + + # get indices relative to tie points + relative_start_index = np.clip( + index_slice.start - self.tie_indices, 0, self.tie_lengths + ) + relative_stop_index = np.clip( + index_slice.stop - self.tie_indices, 0, self.tie_lengths + ) + + # keep segments with data + mask = relative_start_index < relative_stop_index + + # compute new tie points ane lengths + tie_values = ( + self.tie_values[mask] + relative_start_index[mask] * self.sampling_interval + ) + tie_lengths = relative_stop_index[mask] - relative_start_index[mask] + + # adjust for step if needed + if index_slice.step == 1: + sampling_interval = self.sampling_interval + else: + tie_lengths = (self.tie_lengths + index_slice.step - 1) // index_slice.step + sampling_interval = self.sampling_interval * index_slice.step + + # build new coordinate + data = { + "tie_values": tie_values, + "tie_lengths": tie_lengths, + "sampling_interval": sampling_interval, + } + return self.__class__(data, self.dim) + + def get_indexer(self, value, method=None): + if isinstance(value, str): + value = np.datetime64(value) + else: + value = np.asarray(value) + # Check that value lies within the coordinate value range (vectorized) + if np.any(value < self.start) or np.any(value >= self.end): + raise KeyError("index not found") + if not is_strictly_increasing(self.tie_values): + raise ValueError("tie_values must be strictly increasing") + reference = np.searchsorted(self.tie_values, value, side="right") - 1 + offset = (value - self.tie_values[reference]) / self.sampling_interval + match method: + case None: + if np.any(offset % 1 != 0): + raise KeyError("index not found") + offset = offset.astype(int) + case "nearest": + offset = np.round(offset).astype(int) + case "ffill": + offset = np.floor(offset).astype(int) + case "bfill": + offset = np.ceil(offset).astype(int) + case _: + raise ValueError( + "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" + ) + return self.tie_indices[reference] + offset + + def append(self, other): + if not isinstance(other, self.__class__): + raise TypeError(f"cannot append {type(other)} to {self.__class__}") + if not self.dim == other.dim: + raise ValueError("cannot append coordinate with different dimension") + if self.empty: + return other + if other.empty: + return self + if not self.dtype == other.dtype: + raise ValueError("cannot append coordinate with different dtype") + if not self.sampling_interval == other.sampling_interval: + raise ValueError( + "cannot append coordinate with different sampling intervals" + ) + tie_values = np.concatenate([self.tie_values, other.tie_values]) + tie_lengths = np.concatenate([self.tie_lengths, other.tie_lengths]) + return self.__class__( + { + "tie_values": tie_values, + "tie_lengths": tie_lengths, + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def decimate(self, q): + return self[::q] + + def simplify(self, tolerance=None): + tie_values = [self.tie_values[0]] + tie_lengths = [self.tie_lengths[0]] + for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]): + delta = value - (tie_values[-1] + self.sampling_interval * tie_lengths[-1]) + if np.abs(delta) <= tolerance: + tie_lengths[-1] += length + else: + tie_values.append(value) + tie_lengths.append(length) + return self.__class__( + { + "tie_values": np.array(tie_values), + "tie_lengths": np.array(tie_lengths), + "sampling_interval": self.sampling_interval, + }, + self.dim, + ) + + def get_discontinuities(self): + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + records = [] + for index in self.tie_indices[:-1]: + start_index = index + end_index = index + 1 + start_value = self.get_value(index) + end_value = self.get_value(index + 1) + record = { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": ("gap" if end_value > start_value else "overlap"), + } + records.append(record) + return pd.DataFrame.from_records(records) + + def get_availabilities(self): + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + records = [] + for index, value, length in zip( + self.tie_indices, self.tie_values, self.tie_indices + ): + start_index = index + end_index = index + length - 1 + start_value = value + end_value = value + self.sampling_interval * (length - 1) + records.append( + { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": "data", + } + ) + return pd.DataFrame.from_records(records) + + @classmethod + def from_array(cls, arr, dim=None, sampling_interval=None): + raise NotImplementedError("from_array is not implemented for SampledCoordinate") + + def to_dict(self): + tie_values = self.data["tie_values"] + tie_lengths = self.data["tie_lengths"] + if np.issubdtype(tie_values.dtype, np.datetime64): + tie_values = tie_values.astype(str) + data = { + "tie_values": tie_values.tolist(), + "tie_lengths": tie_lengths.tolist(), + "sampling_interval": self.sampling_interval, + } + return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + + def to_dataset(self, dataset, attrs): + mapping = f"{self.name}: {self.name}_values {self.name}_lengths" + if "coordinate_sampling" in attrs: + attrs["coordinate_sampling"] += " " + mapping + else: + attrs["coordinate_sampling"] = mapping + tie_values = ( + self.tie_values.astype("M8[ns]") + if np.issubdtype(self.tie_values.dtype, np.datetime64) + else self.tie_values + ) + tie_lengths = self.tie_lengths + interp_attrs = { + "sampling_interval": self.sampling_interval, + "tie_points_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", + } + dataset.update( + { + f"{self.name}_sampling": ((), np.nan, interp_attrs), + f"{self.name}_values": (f"{self.name}_points", tie_values), + f"{self.name}_lengths": (f"{self.name}_points", tie_lengths), + } + ) + return dataset, attrs + + @classmethod + def from_dataset(cls, dataset, name): + coords = {} + mapping = dataset[name].attrs.pop("coordinate_sampling", None) + if mapping is not None: + matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) + for match in matches: + dim, values, lengths = match + sampling_interval = ... + data = { + "tie_values": dataset[values], + "tie_lengths": dataset[lengths], + "sampling_interval": sampling_interval, + } + coords[dim] = Coordinate(data, dim) + return coords From 87bc375ac5ee1e0d50837259704ae8889f4edd0e Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 12:03:22 +0100 Subject: [PATCH 25/63] Refactoring: separate dense module for DenseCoordinate. --- xdas/core/coordinates/__init__.py | 2 +- xdas/core/coordinates/core.py | 95 ------------------------------ xdas/core/coordinates/dense.py | 97 +++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 96 deletions(-) create mode 100644 xdas/core/coordinates/dense.py diff --git a/xdas/core/coordinates/__init__.py b/xdas/core/coordinates/__init__.py index d910530..37dbff5 100644 --- a/xdas/core/coordinates/__init__.py +++ b/xdas/core/coordinates/__init__.py @@ -2,9 +2,9 @@ Coordinate, Coordinates, DefaultCoordinate, - DenseCoordinate, ScalarCoordinate, get_sampling_interval, ) +from .dense import DenseCoordinate from .interp import InterpCoordinate from .sampled import SampledCoordinate diff --git a/xdas/core/coordinates/core.py b/xdas/core/coordinates/core.py index 5dc0559..48e8e7b 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/core/coordinates/core.py @@ -1,9 +1,7 @@ -import re from copy import copy, deepcopy from functools import wraps import numpy as np -import pandas as pd def wraps_first_last(func): @@ -589,99 +587,6 @@ def to_dict(self): return {"dim": self.dim, "data": self.data.tolist(), "dtype": str(self.dtype)} -class DenseCoordinate(Coordinate): - def __new__(cls, *args, **kwargs): - return object.__new__(cls) - - def __init__(self, data=None, dim=None, dtype=None): - if data is None: - data = [] - data, dim = parse(data, dim) - if not self.isvalid(data): - raise TypeError("`data` must be array-like") - self.data = np.asarray(data, dtype=dtype) - self.dim = dim - - @staticmethod - def isvalid(data): - data = np.asarray(data) - return (data.dtype != np.dtype(object)) and (data.ndim == 1) - - def isdense(self): - return True - - @property - def index(self): - return pd.Index(self.data) - - def equals(self, other): - if isinstance(other, self.__class__): - return ( - np.array_equal(self.data, other.data) - and self.dim == other.dim - and self.dtype == other.dtype - ) - else: - return False - - def get_indexer(self, value, method=None): - if np.isscalar(value): - out = self.index.get_indexer([value], method).item() - else: - out = self.index.get_indexer(value, method) - if np.any(out == -1): - raise KeyError("index not found") - return out - - def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): - slc = self.index.slice_indexer(start, stop, step) - if ( - (not endpoint) - and (stop is not None) - and (self[slc.stop - 1].values == stop) - ): - slc = slice(slc.start, slc.stop - 1, slc.step) - return slc - - def append(self, other): - if not isinstance(other, self.__class__): - raise TypeError(f"cannot append {type(other)} to {self.__class__}") - if not self.dim == other.dim: - raise ValueError("cannot append coordinate with different dimension") - if self.empty: - return other - if other.empty: - return self - if not self.dtype == other.dtype: - raise ValueError("cannot append coordinate with different dtype") - return self.__class__(np.concatenate([self.data, other.data]), self.dim) - - def to_dict(self): - if np.issubdtype(self.dtype, np.datetime64): - data = self.data.astype(str).tolist() - else: - data = self.data.tolist() - return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - - @classmethod - def from_dataset(cls, dataset, name): - return { - name: ( - ( - coord.dims[0], - ( - coord.values.astype("U") - if coord.dtype == np.dtype("O") - else coord.values - ), - ) - if coord.dims - else coord.values - ) - for name, coord in dataset[name].coords.items() - } - - def parse(data, dim=None): if isinstance(data, tuple): if dim is None: diff --git a/xdas/core/coordinates/dense.py b/xdas/core/coordinates/dense.py new file mode 100644 index 0000000..9ebef73 --- /dev/null +++ b/xdas/core/coordinates/dense.py @@ -0,0 +1,97 @@ +import numpy as np +import pandas as pd + +from .core import Coordinate, parse + + +class DenseCoordinate(Coordinate): + def __new__(cls, *args, **kwargs): + return object.__new__(cls) + + def __init__(self, data=None, dim=None, dtype=None): + if data is None: + data = [] + data, dim = parse(data, dim) + if not self.isvalid(data): + raise TypeError("`data` must be array-like") + self.data = np.asarray(data, dtype=dtype) + self.dim = dim + + @staticmethod + def isvalid(data): + data = np.asarray(data) + return (data.dtype != np.dtype(object)) and (data.ndim == 1) + + def isdense(self): + return True + + @property + def index(self): + return pd.Index(self.data) + + def equals(self, other): + if isinstance(other, self.__class__): + return ( + np.array_equal(self.data, other.data) + and self.dim == other.dim + and self.dtype == other.dtype + ) + else: + return False + + def get_indexer(self, value, method=None): + if np.isscalar(value): + out = self.index.get_indexer([value], method).item() + else: + out = self.index.get_indexer(value, method) + if np.any(out == -1): + raise KeyError("index not found") + return out + + def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): + slc = self.index.slice_indexer(start, stop, step) + if ( + (not endpoint) + and (stop is not None) + and (self[slc.stop - 1].values == stop) + ): + slc = slice(slc.start, slc.stop - 1, slc.step) + return slc + + def append(self, other): + if not isinstance(other, self.__class__): + raise TypeError(f"cannot append {type(other)} to {self.__class__}") + if not self.dim == other.dim: + raise ValueError("cannot append coordinate with different dimension") + if self.empty: + return other + if other.empty: + return self + if not self.dtype == other.dtype: + raise ValueError("cannot append coordinate with different dtype") + return self.__class__(np.concatenate([self.data, other.data]), self.dim) + + def to_dict(self): + if np.issubdtype(self.dtype, np.datetime64): + data = self.data.astype(str).tolist() + else: + data = self.data.tolist() + return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} + + @classmethod + def from_dataset(cls, dataset, name): + return { + name: ( + ( + coord.dims[0], + ( + coord.values.astype("U") + if coord.dtype == np.dtype("O") + else coord.values + ), + ) + if coord.dims + else coord.values + ) + for name, coord in dataset[name].coords.items() + } From e9d1923bf8eb2ab16f20313e98a765b85ab740f7 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 12:15:00 +0100 Subject: [PATCH 26/63] Refactoring: separate scalar module for ScalarCoordinate. --- xdas/core/coordinates/__init__.py | 2 +- xdas/core/coordinates/core.py | 50 ++---------------------------- xdas/core/coordinates/scalar.py | 51 +++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 49 deletions(-) create mode 100644 xdas/core/coordinates/scalar.py diff --git a/xdas/core/coordinates/__init__.py b/xdas/core/coordinates/__init__.py index 37dbff5..c6a3ca1 100644 --- a/xdas/core/coordinates/__init__.py +++ b/xdas/core/coordinates/__init__.py @@ -2,9 +2,9 @@ Coordinate, Coordinates, DefaultCoordinate, - ScalarCoordinate, get_sampling_interval, ) from .dense import DenseCoordinate from .interp import InterpCoordinate from .sampled import SampledCoordinate +from .scalar import ScalarCoordinate diff --git a/xdas/core/coordinates/core.py b/xdas/core/coordinates/core.py index 48e8e7b..64c3e98 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/core/coordinates/core.py @@ -258,6 +258,8 @@ def __new__(cls, data=None, dim=None, dtype=None): raise TypeError("could not parse `data`") def __getitem__(self, item): + from .scalar import ScalarCoordinate + data = self.data.__getitem__(item) if ScalarCoordinate.isvalid(data): return ScalarCoordinate(data) @@ -460,54 +462,6 @@ def from_dataset(cls, dataset, name): return coords -class ScalarCoordinate(Coordinate): - def __new__(cls, *args, **kwargs): - return object.__new__(cls) - - def __init__(self, data=None, dim=None, dtype=None): - if data is None: - raise TypeError("scalar coordinate cannot be empty, please provide a value") - data, dim = parse(data, dim) - if dim is not None: - raise ValueError("a scalar coordinate cannot be a dim") - if not self.__class__.isvalid(data): - raise TypeError("`data` must be scalar-like") - self.data = np.asarray(data, dtype=dtype) - - @property - def dim(self): - return None - - @dim.setter - def dim(self, value): - if value is not None: - raise ValueError("A scalar coordinate cannot have a `dim` other that None") - - @staticmethod - def isvalid(data): - data = np.asarray(data) - return (data.dtype != np.dtype(object)) and (data.ndim == 0) - - def isscalar(self): - return True - - def equals(self, other): - if isinstance(other, self.__class__): - return self.data == other.data - else: - return False - - def to_index(self, item, method=None, endpoint=True): - raise NotImplementedError("cannot get index of scalar coordinate") - - def to_dict(self): - if np.issubdtype(self.dtype, np.datetime64): - data = self.data.astype(str).item() - else: - data = self.data.item() - return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} - - class DefaultCoordinate(Coordinate): def __new__(cls, *args, **kwargs): return object.__new__(cls) diff --git a/xdas/core/coordinates/scalar.py b/xdas/core/coordinates/scalar.py new file mode 100644 index 0000000..ec30f66 --- /dev/null +++ b/xdas/core/coordinates/scalar.py @@ -0,0 +1,51 @@ +import numpy as np + +from .core import Coordinate, parse + + +class ScalarCoordinate(Coordinate): + def __new__(cls, *args, **kwargs): + return object.__new__(cls) + + def __init__(self, data=None, dim=None, dtype=None): + if data is None: + raise TypeError("scalar coordinate cannot be empty, please provide a value") + data, dim = parse(data, dim) + if dim is not None: + raise ValueError("a scalar coordinate cannot be a dim") + if not self.__class__.isvalid(data): + raise TypeError("`data` must be scalar-like") + self.data = np.asarray(data, dtype=dtype) + + @property + def dim(self): + return None + + @dim.setter + def dim(self, value): + if value is not None: + raise ValueError("A scalar coordinate cannot have a `dim` other that None") + + @staticmethod + def isvalid(data): + data = np.asarray(data) + return (data.dtype != np.dtype(object)) and (data.ndim == 0) + + def isscalar(self): + return True + + def equals(self, other): + if isinstance(other, self.__class__): + return self.data == other.data + else: + return False + + def to_index(self, item, method=None, endpoint=True): + raise NotImplementedError("cannot get index of scalar coordinate") + + def to_dict(self): + if np.issubdtype(self.dtype, np.datetime64): + data = self.data.astype(str).item() + else: + data = self.data.item() + return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} From e695ab93b0db9940c6429f06ea1fb515610c2aa9 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 12:18:29 +0100 Subject: [PATCH 27/63] Refactoring: separate default module for DefaultCoordinate. --- xdas/core/coordinates/__init__.py | 2 +- xdas/core/coordinates/core.py | 79 ----------------------------- xdas/core/coordinates/default.py | 84 +++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 80 deletions(-) create mode 100644 xdas/core/coordinates/default.py diff --git a/xdas/core/coordinates/__init__.py b/xdas/core/coordinates/__init__.py index c6a3ca1..09f1735 100644 --- a/xdas/core/coordinates/__init__.py +++ b/xdas/core/coordinates/__init__.py @@ -1,9 +1,9 @@ from .core import ( Coordinate, Coordinates, - DefaultCoordinate, get_sampling_interval, ) +from .default import DefaultCoordinate from .dense import DenseCoordinate from .interp import InterpCoordinate from .sampled import SampledCoordinate diff --git a/xdas/core/coordinates/core.py b/xdas/core/coordinates/core.py index 64c3e98..a7c011b 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/core/coordinates/core.py @@ -462,85 +462,6 @@ def from_dataset(cls, dataset, name): return coords -class DefaultCoordinate(Coordinate): - def __new__(cls, *args, **kwargs): - return object.__new__(cls) - - def __init__(self, data=None, dim=None, dtype=None): - if data is None: - data = {"size": 0} - data, dim = parse(data, dim) - if not self.isvalid(data): - raise TypeError("`data` must be a mapping {'size': }") - if dtype is not None: - raise ValueError("`dtype` is not supported for DefaultCoordinate") - self.data = data - self.dim = dim - - def __len__(self): - if self.data["size"] is None: - return 0 - else: - return self.data["size"] - - def __getitem__(self, item): - data = self.__array__()[item] - if ScalarCoordinate.isvalid(data): - return ScalarCoordinate(data) - else: - return Coordinate(data, self.dim) - - def __array__(self, dtype=None): - return np.arange(self.data["size"], dtype=dtype) - - @staticmethod - def isvalid(data): - match data: - case {"size": None | int(_)}: - return True - case _: - return False - - def isdefault(self): - return True - - @property - def empty(self): - return bool(self.data["size"]) - - @property - def dtype(self): - return np.int64 - - @property - def ndim(self): - return 1 - - @property - def shape(self): - return (len(self),) - - def equals(self, other): - if isinstance(other, self.__class__): - return self.data["size"] == other.data["size"] - - def get_indexer(self, value, method=None): - return value - - def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): - return slice(start, stop, step) - - def append(self, other): - if not isinstance(other, self.__class__): - raise TypeError(f"cannot append {type(other)} to {self.__class__}") - if not self.dim == other.dim: - raise ValueError("cannot append coordinate with different dimension") - return self.__class__({"size": len(self) + len(other)}, self.dim) - - def to_dict(self): - return {"dim": self.dim, "data": self.data.tolist(), "dtype": str(self.dtype)} - - def parse(data, dim=None): if isinstance(data, tuple): if dim is None: diff --git a/xdas/core/coordinates/default.py b/xdas/core/coordinates/default.py new file mode 100644 index 0000000..960db62 --- /dev/null +++ b/xdas/core/coordinates/default.py @@ -0,0 +1,84 @@ +import numpy as np + +from .core import Coordinate, parse + + +class DefaultCoordinate(Coordinate): + def __new__(cls, *args, **kwargs): + return object.__new__(cls) + + def __init__(self, data=None, dim=None, dtype=None): + if data is None: + data = {"size": 0} + data, dim = parse(data, dim) + if not self.isvalid(data): + raise TypeError("`data` must be a mapping {'size': }") + if dtype is not None: + raise ValueError("`dtype` is not supported for DefaultCoordinate") + self.data = data + self.dim = dim + + def __len__(self): + if self.data["size"] is None: + return 0 + else: + return self.data["size"] + + def __getitem__(self, item): + from .scalar import ScalarCoordinate + + data = self.__array__()[item] + if ScalarCoordinate.isvalid(data): + return ScalarCoordinate(data) + else: + return Coordinate(data, self.dim) + + def __array__(self, dtype=None): + return np.arange(self.data["size"], dtype=dtype) + + @staticmethod + def isvalid(data): + match data: + case {"size": None | int(_)}: + return True + case _: + return False + + def isdefault(self): + return True + + @property + def empty(self): + return bool(self.data["size"]) + + @property + def dtype(self): + return np.int64 + + @property + def ndim(self): + return 1 + + @property + def shape(self): + return (len(self),) + + def equals(self, other): + if isinstance(other, self.__class__): + return self.data["size"] == other.data["size"] + + def get_indexer(self, value, method=None): + return value + + def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): + return slice(start, stop, step) + + def append(self, other): + if not isinstance(other, self.__class__): + raise TypeError(f"cannot append {type(other)} to {self.__class__}") + if not self.dim == other.dim: + raise ValueError("cannot append coordinate with different dimension") + return self.__class__({"size": len(self) + len(other)}, self.dim) + + def to_dict(self): + return {"dim": self.dim, "data": self.data.tolist(), "dtype": str(self.dtype)} From b2c16cc49567ea9a15f67467119819387628eea1 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 12:23:29 +0100 Subject: [PATCH 28/63] Add Coordinate.isscalar. --- xdas/core/coordinates/core.py | 13 +++++++------ xdas/core/coordinates/default.py | 9 +++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/xdas/core/coordinates/core.py b/xdas/core/coordinates/core.py index a7c011b..967dbc5 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/core/coordinates/core.py @@ -258,13 +258,9 @@ def __new__(cls, data=None, dim=None, dtype=None): raise TypeError("could not parse `data`") def __getitem__(self, item): - from .scalar import ScalarCoordinate - data = self.data.__getitem__(item) - if ScalarCoordinate.isvalid(data): - return ScalarCoordinate(data) - else: - return Coordinate(data, self.dim) + dim = None if isscalar(data) else self.dim + return Coordinate(data, dim) def __len__(self): return self.data.__len__() @@ -515,6 +511,11 @@ def get_sampling_interval(da, dim, cast=True): return d +def isscalar(data): + data = np.asarray(data) + return (data.dtype != np.dtype(object)) and (data.ndim == 0) + + def is_strictly_increasing(x): if np.issubdtype(x.dtype, np.datetime64): return np.all(np.diff(x) > np.timedelta64(0, "ns")) diff --git a/xdas/core/coordinates/default.py b/xdas/core/coordinates/default.py index 960db62..d52c9d9 100644 --- a/xdas/core/coordinates/default.py +++ b/xdas/core/coordinates/default.py @@ -1,6 +1,6 @@ import numpy as np -from .core import Coordinate, parse +from .core import Coordinate, isscalar, parse class DefaultCoordinate(Coordinate): @@ -25,13 +25,10 @@ def __len__(self): return self.data["size"] def __getitem__(self, item): - from .scalar import ScalarCoordinate data = self.__array__()[item] - if ScalarCoordinate.isvalid(data): - return ScalarCoordinate(data) - else: - return Coordinate(data, self.dim) + dim = None if isscalar(data) else self.dim + return Coordinate(data, dim) def __array__(self, dtype=None): return np.arange(self.data["size"], dtype=dtype) From 5a9146bdec16ed0d1adb31c30cf32a833a7f4596 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 15:26:18 +0100 Subject: [PATCH 29/63] Refactor get_sampling_ingerval: now a method of each Coordinate subclass. --- xdas/core/coordinates/core.py | 31 +++++++++++-------------------- xdas/core/coordinates/default.py | 4 +++- xdas/core/coordinates/interp.py | 13 +++++++++++++ xdas/core/coordinates/sampled.py | 3 +++ xdas/core/coordinates/scalar.py | 3 +++ xdas/core/routines.py | 4 ++-- 6 files changed, 35 insertions(+), 23 deletions(-) diff --git a/xdas/core/coordinates/core.py b/xdas/core/coordinates/core.py index 967dbc5..f70c36c 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/core/coordinates/core.py @@ -323,6 +323,15 @@ def name(self): return self.dim return next((name for name in self.parent if self.parent[name] is self), None) + def get_sampling_interval(self, cast=True): + if len(self) < 2: + return None + delta = (self[-1].values - self[0].values) / (len(self) - 1) + delta = np.asarray(delta) # TODO: why? + if cast and np.issubdtype(delta.dtype, np.timedelta64): + delta = delta / np.timedelta64(1, "s") + return delta + def isdim(self): if self.parent is None or self.name is None: return None @@ -488,27 +497,9 @@ def get_sampling_interval(da, dim, cast=True): ------- float The sample spacing. + """ - if da.sizes[dim] < 2: - raise ValueError( - "cannot compute sample spacing on a dimension with less than 2 points" - ) - coord = da[dim] - if coord.isinterp(): - num = np.diff(coord.tie_values) - den = np.diff(coord.tie_indices) - mask = den != 1 - num = num[mask] - den = den[mask] - d = np.median(num / den) - elif coord.issampled(): - d = coord.sampling_interval - else: - d = (coord[-1].values - coord[0].values) / (len(coord) - 1) - d = np.asarray(d) - if cast and np.issubdtype(d.dtype, np.timedelta64): - d = d / np.timedelta64(1, "s") - return d + return da[dim].get_sampling_interval(cast=cast) def isscalar(data): diff --git a/xdas/core/coordinates/default.py b/xdas/core/coordinates/default.py index d52c9d9..8b11220 100644 --- a/xdas/core/coordinates/default.py +++ b/xdas/core/coordinates/default.py @@ -25,7 +25,6 @@ def __len__(self): return self.data["size"] def __getitem__(self, item): - data = self.__array__()[item] dim = None if isscalar(data) else self.dim return Coordinate(data, dim) @@ -60,6 +59,9 @@ def ndim(self): def shape(self): return (len(self),) + def get_sampling_interval(self, cast=True): + return 1 + def equals(self, other): if isinstance(other, self.__class__): return self.data["size"] == other.data["size"] diff --git a/xdas/core/coordinates/interp.py b/xdas/core/coordinates/interp.py index 5e60be3..6a3f7f6 100644 --- a/xdas/core/coordinates/interp.py +++ b/xdas/core/coordinates/interp.py @@ -164,6 +164,19 @@ def values(self): else: return self.get_value(self.indices) + def get_sampling_interval(self, cast=True): + if len(self) < 2: + return None + num = np.diff(self.tie_values) + den = np.diff(self.tie_indices) + mask = den != 1 + num = num[mask] + den = den[mask] + delta = np.median(num / den) + if cast and np.issubdtype(delta.dtype, np.timedelta64): + delta = delta / np.timedelta64(1, "s") + return delta + def equals(self, other): return ( np.array_equal(self.tie_indices, other.tie_indices) diff --git a/xdas/core/coordinates/sampled.py b/xdas/core/coordinates/sampled.py index 70b2e0a..28fead8 100644 --- a/xdas/core/coordinates/sampled.py +++ b/xdas/core/coordinates/sampled.py @@ -112,6 +112,9 @@ def isvalid(data): def issampled(self): return True + def get_sampling_interval(self, cast=True): + return self.sampling_interval + def __len__(self): if self.empty: return 0 diff --git a/xdas/core/coordinates/scalar.py b/xdas/core/coordinates/scalar.py index ec30f66..f0cda93 100644 --- a/xdas/core/coordinates/scalar.py +++ b/xdas/core/coordinates/scalar.py @@ -26,6 +26,9 @@ def dim(self, value): if value is not None: raise ValueError("A scalar coordinate cannot have a `dim` other that None") + def get_sampling_interval(self, cast=True): + return None + @staticmethod def isvalid(data): data = np.asarray(data) diff --git a/xdas/core/routines.py b/xdas/core/routines.py index bc9927f..187c88d 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -612,9 +612,9 @@ def initialize(self, da): if self.dim in self.dims else da.coords.drop_coords(self.dim) ) - try: + if self.dim in da.coords: self.delta = get_sampling_interval(da, self.dim) - except (ValueError, KeyError): + else: self.delta = None self.dtype = da.dtype From bcddaf15187d282359c9c36b5dfc1b36e7addc61 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 16:14:17 +0100 Subject: [PATCH 30/63] Massive paths refactoring. --- tests/{ => coordinates}/test_coordinates.py | 88 +----------------- .../test_sampled.py} | 9 +- tests/coordinates/test_scalar.py | 90 +++++++++++++++++++ tests/test_core.py | 83 ++++++++--------- tests/test_dataarray.py | 85 +++++++++--------- tests/test_datacollection.py | 32 +++---- tests/test_processing.py | 26 +++--- tests/test_routines.py | 90 ++++++++++--------- tests/test_virtual.py | 10 +-- tests/test_xarray.py | 14 +-- xdas/__init__.py | 7 +- xdas/atoms/ml.py | 2 +- xdas/atoms/signal.py | 2 +- xdas/{core => }/coordinates/__init__.py | 0 xdas/{core => }/coordinates/core.py | 2 +- xdas/{core => }/coordinates/default.py | 0 xdas/{core => }/coordinates/dense.py | 0 xdas/{core => }/coordinates/interp.py | 0 xdas/{core => }/coordinates/sampled.py | 0 xdas/{core => }/coordinates/scalar.py | 0 xdas/core/dataarray.py | 4 +- xdas/core/routines.py | 3 +- xdas/fft.py | 2 +- xdas/io/__init__.py | 1 - xdas/io/asn.py | 3 +- xdas/io/miniseed.py | 2 +- xdas/processing/__init__.py | 1 - xdas/signal.py | 2 +- xdas/spectral.py | 2 +- xdas/trigger.py | 2 +- 30 files changed, 281 insertions(+), 281 deletions(-) rename tests/{ => coordinates}/test_coordinates.py (89%) rename tests/{test_sampled_coordinate.py => coordinates/test_sampled.py} (99%) create mode 100644 tests/coordinates/test_scalar.py rename xdas/{core => }/coordinates/__init__.py (100%) rename xdas/{core => }/coordinates/core.py (99%) rename xdas/{core => }/coordinates/default.py (100%) rename xdas/{core => }/coordinates/dense.py (100%) rename xdas/{core => }/coordinates/interp.py (100%) rename xdas/{core => }/coordinates/sampled.py (100%) rename xdas/{core => }/coordinates/scalar.py (100%) diff --git a/tests/test_coordinates.py b/tests/coordinates/test_coordinates.py similarity index 89% rename from tests/test_coordinates.py rename to tests/coordinates/test_coordinates.py index 0b295fa..2a3166a 100644 --- a/tests/test_coordinates.py +++ b/tests/coordinates/test_coordinates.py @@ -3,93 +3,7 @@ import pytest import xdas -from xdas.core.coordinates import DenseCoordinate, InterpCoordinate, ScalarCoordinate - - -class TestScalarCoordinate: - valid = [ - 1, - np.array(1), - 1.0, - np.array(1.0), - "label", - np.array("label"), - np.datetime64(1, "s"), - ] - invalid = [[1], np.array([1]), {"key": "value"}] - - def test_isvalid(self): - for data in self.valid: - assert ScalarCoordinate.isvalid(data) - for data in self.invalid: - assert not ScalarCoordinate.isvalid(data) - - def test_init(self): - coord = ScalarCoordinate(1) - assert coord.data == 1 - assert coord.dim is None - coord = ScalarCoordinate(1, None) - assert coord.dim is None - with pytest.raises(ValueError): - ScalarCoordinate(1, "dim") - for data in self.valid: - assert ScalarCoordinate(data).data == np.array(data) - for data in self.invalid: - with pytest.raises(TypeError): - ScalarCoordinate(data) - - def test_getitem(self): - assert ScalarCoordinate(1)[...].equals(ScalarCoordinate(1)) - with pytest.raises(IndexError): - ScalarCoordinate(1)[:] - with pytest.raises(IndexError): - ScalarCoordinate(1)[0] - - def test_len(self): - with pytest.raises(TypeError): - len(ScalarCoordinate(1)) - - def test_repr(self): - for data in self.valid: - assert ScalarCoordinate(data).__repr__() == np.array2string( - np.asarray(data), threshold=0, edgeitems=1 - ) - - def test_array(self): - for data in self.valid: - assert ScalarCoordinate(data).__array__() == np.array(data) - - def test_dtype(self): - for data in self.valid: - assert ScalarCoordinate(data).dtype == np.array(data).dtype - - def test_values(self): - for data in self.valid: - assert ScalarCoordinate(data).values == np.array(data) - - def test_equals(self): - for data in self.valid: - coord = ScalarCoordinate(data) - assert coord.equals(coord) - assert ScalarCoordinate(1).equals(ScalarCoordinate(np.array(1))) - - def test_to_index(self): - with pytest.raises(NotImplementedError): - ScalarCoordinate(1).to_index("item") - - def test_isinstance(self): - assert ScalarCoordinate(1).isscalar() - assert not ScalarCoordinate(1).isdense() - assert not ScalarCoordinate(1).isinterp() - - def test_to_from_dict(self): - for data in self.valid: - coord = ScalarCoordinate(data) - assert ScalarCoordinate.from_dict(coord.to_dict()).equals(coord) - - def test_empty(self): - with pytest.raises(TypeError, match="cannot be empty"): - ScalarCoordinate() +from xdas.coordinates import DenseCoordinate, InterpCoordinate, ScalarCoordinate class TestDenseCoordinate: diff --git a/tests/test_sampled_coordinate.py b/tests/coordinates/test_sampled.py similarity index 99% rename from tests/test_sampled_coordinate.py rename to tests/coordinates/test_sampled.py index 643485d..14ea457 100644 --- a/tests/test_sampled_coordinate.py +++ b/tests/coordinates/test_sampled.py @@ -2,7 +2,12 @@ import pandas as pd import pytest -from xdas.core.coordinates import DenseCoordinate, SampledCoordinate, ScalarCoordinate +from xdas.coordinates import ( + Coordinate, + DenseCoordinate, + SampledCoordinate, + ScalarCoordinate, +) class TestSampledCoordinateBasics: @@ -355,8 +360,6 @@ def test_to_from_dict(self): ) d = coord.to_dict() # round-trip via Coordinate factory - from xdas.core.coordinates import Coordinate - back = Coordinate.from_dict(d) assert isinstance(back, SampledCoordinate) assert back.equals(coord) diff --git a/tests/coordinates/test_scalar.py b/tests/coordinates/test_scalar.py new file mode 100644 index 0000000..9060f60 --- /dev/null +++ b/tests/coordinates/test_scalar.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +from xdas.coordinates import ScalarCoordinate + + +class TestScalarCoordinate: + valid = [ + 1, + np.array(1), + 1.0, + np.array(1.0), + "label", + np.array("label"), + np.datetime64(1, "s"), + ] + invalid = [[1], np.array([1]), {"key": "value"}] + + def test_isvalid(self): + for data in self.valid: + assert ScalarCoordinate.isvalid(data) + for data in self.invalid: + assert not ScalarCoordinate.isvalid(data) + + def test_init(self): + coord = ScalarCoordinate(1) + assert coord.data == 1 + assert coord.dim is None + coord = ScalarCoordinate(1, None) + assert coord.dim is None + with pytest.raises(ValueError): + ScalarCoordinate(1, "dim") + for data in self.valid: + assert ScalarCoordinate(data).data == np.array(data) + for data in self.invalid: + with pytest.raises(TypeError): + ScalarCoordinate(data) + + def test_getitem(self): + assert ScalarCoordinate(1)[...].equals(ScalarCoordinate(1)) + with pytest.raises(IndexError): + ScalarCoordinate(1)[:] + with pytest.raises(IndexError): + ScalarCoordinate(1)[0] + + def test_len(self): + with pytest.raises(TypeError): + len(ScalarCoordinate(1)) + + def test_repr(self): + for data in self.valid: + assert ScalarCoordinate(data).__repr__() == np.array2string( + np.asarray(data), threshold=0, edgeitems=1 + ) + + def test_array(self): + for data in self.valid: + assert ScalarCoordinate(data).__array__() == np.array(data) + + def test_dtype(self): + for data in self.valid: + assert ScalarCoordinate(data).dtype == np.array(data).dtype + + def test_values(self): + for data in self.valid: + assert ScalarCoordinate(data).values == np.array(data) + + def test_equals(self): + for data in self.valid: + coord = ScalarCoordinate(data) + assert coord.equals(coord) + assert ScalarCoordinate(1).equals(ScalarCoordinate(np.array(1))) + + def test_to_index(self): + with pytest.raises(NotImplementedError): + ScalarCoordinate(1).to_index("item") + + def test_isinstance(self): + assert ScalarCoordinate(1).isscalar() + assert not ScalarCoordinate(1).isdense() + assert not ScalarCoordinate(1).isinterp() + + def test_to_from_dict(self): + for data in self.valid: + coord = ScalarCoordinate(data) + assert ScalarCoordinate.from_dict(coord.to_dict()).equals(coord) + + def test_empty(self): + with pytest.raises(TypeError, match="cannot be empty"): + ScalarCoordinate() diff --git a/tests/test_core.py b/tests/test_core.py index aeb5c2d..51c33b2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,7 +4,8 @@ import numpy as np import pytest -import xdas +import xdas as xd +from xdas.coordinates import InterpCoordinate from xdas.synthetics import wavelet_wavefronts from xdas.virtual import VirtualStack @@ -20,7 +21,7 @@ def generate(self, datetime): else: t = {"tie_indices": [0, shape[0] - 1], "tie_values": [0, 3.0 - 1 / 100]} s = {"tie_indices": [0, shape[1] - 1], "tie_values": [0, 990.0]} - return xdas.DataArray( + return xd.DataArray( data=np.random.randn(*shape), coords={ "time": t, @@ -39,7 +40,7 @@ def test_open_mfdatatree(self): for idx, da in enumerate(wavelet_wavefronts(nchunk=3), start=1): da.to_netcdf(os.path.join(dirname, f"{idx:03d}.nc")) da = wavelet_wavefronts() - dc = xdas.open_mfdatatree( + dc = xd.open_mfdatatree( os.path.join(dirpath, "{node}", "00[acquisition].nc") ) assert list(dc.keys()) == keys @@ -51,10 +52,10 @@ def test_open_mfdataarray(self): wavelet_wavefronts().to_netcdf(os.path.join(dirpath, "sample.nc")) for idx, da in enumerate(wavelet_wavefronts(nchunk=3), start=1): da.to_netcdf(os.path.join(dirpath, f"{idx:03}.nc")) - da_monolithic = xdas.open_dataarray(os.path.join(dirpath, "sample.nc")) - da_chunked = xdas.open_mfdataarray(os.path.join(dirpath, "00*.nc")) + da_monolithic = xd.open_dataarray(os.path.join(dirpath, "sample.nc")) + da_chunked = xd.open_mfdataarray(os.path.join(dirpath, "00*.nc")) assert da_monolithic.equals(da_chunked) - da_chunked = xdas.open_mfdataarray( + da_chunked = xd.open_mfdataarray( [ os.path.join(dirpath, fname) for fname in ["001.nc", "002.nc", "003.nc"] @@ -62,9 +63,9 @@ def test_open_mfdataarray(self): ) assert da_monolithic.equals(da_chunked) with pytest.raises(FileNotFoundError): - xdas.open_mfdataarray("not_existing_files_*.nc") + xd.open_mfdataarray("not_existing_files_*.nc") with pytest.raises(FileNotFoundError): - xdas.open_mfdataarray(["not_existing_file.nc"]) + xd.open_mfdataarray(["not_existing_file.nc"]) def test_open_mfdataarray_grouping(self): with TemporaryDirectory() as dirpath: @@ -90,7 +91,7 @@ def test_open_mfdataarray_grouping(self): for da in wavelet_wavefronts(**acq): da.to_netcdf(os.path.join(dirpath, f"{count:03d}.nc")) count += 1 - dc = xdas.open_mfdataarray(os.path.join(dirpath, "*.nc")) + dc = xd.open_mfdataarray(os.path.join(dirpath, "*.nc")) assert len(dc) == 3 for da, acq in zip(dc, acqs): acq |= {"nchunk": None} @@ -108,28 +109,28 @@ def test_concatenate(self): }, "distance": da1["distance"], } - expected = xdas.DataArray(data, coords) - result = xdas.concatenate([da1, da2]) + expected = xd.DataArray(data, coords) + result = xd.concatenate([da1, da2]) assert result.equals(expected) # concatenate an empty data array - result = xdas.concatenate([da1, da2.isel(time=slice(0, 0))]) + result = xd.concatenate([da1, da2.isel(time=slice(0, 0))]) assert result.equals(da1) # concat of sources and stacks with TemporaryDirectory() as tmp_path: da1.to_netcdf(os.path.join(tmp_path, "da1.nc")) da2.to_netcdf(os.path.join(tmp_path, "da2.nc")) - da1 = xdas.open_dataarray(os.path.join(tmp_path, "da1.nc")) - da2 = xdas.open_dataarray(os.path.join(tmp_path, "da2.nc")) - result = xdas.concatenate([da1, da2]) + da1 = xd.open_dataarray(os.path.join(tmp_path, "da1.nc")) + da2 = xd.open_dataarray(os.path.join(tmp_path, "da2.nc")) + result = xd.concatenate([da1, da2]) assert isinstance(result.data, VirtualStack) assert result.equals(expected) da1.data = VirtualStack([da1.data]) da2.data = VirtualStack([da2.data]) - result = xdas.concatenate([da1, da2]) + result = xd.concatenate([da1, da2]) assert isinstance(result.data, VirtualStack) assert result.equals(expected) # concat of 3D data arrays with unsorted coords: - da1 = xdas.DataArray( + da1 = xd.DataArray( data=np.zeros((5, 4, 3)), coords={ "phase": ["A", "B", "C"], @@ -138,7 +139,7 @@ def test_concatenate(self): }, dims=("time", "distance", "phase"), ) - da2 = xdas.DataArray( + da2 = xd.DataArray( data=np.ones((7, 4, 3)), coords={ "phase": ["A", "B", "C"], @@ -147,7 +148,7 @@ def test_concatenate(self): }, dims=("time", "distance", "phase"), ) - expected = xdas.DataArray( + expected = xd.DataArray( data=np.concatenate((np.zeros((5, 4, 3)), np.ones((7, 4, 3))), axis=0), coords={ "time": {"tie_indices": [0, 11], "tie_values": [0, 11]}, @@ -155,9 +156,9 @@ def test_concatenate(self): "phase": ["A", "B", "C"], }, ) - assert xdas.concatenate((da1, da2), dim="time").equals(expected) + assert xd.concatenate((da1, da2), dim="time").equals(expected) # concat dense coordinates - da1 = xdas.DataArray( + da1 = xd.DataArray( data=np.zeros((5, 4, 3)), coords={ "phase": ["A", "B", "C"], @@ -166,7 +167,7 @@ def test_concatenate(self): }, dims=("time", "distance", "phase"), ) - da2 = xdas.DataArray( + da2 = xd.DataArray( data=np.ones((7, 4, 3)), coords={ "phase": ["A", "B", "C"], @@ -175,7 +176,7 @@ def test_concatenate(self): }, dims=("time", "distance", "phase"), ) - expected = xdas.DataArray( + expected = xd.DataArray( data=np.concatenate((np.zeros((5, 4, 3)), np.ones((7, 4, 3))), axis=0), coords={ "phase": ["A", "B", "C"], @@ -184,34 +185,34 @@ def test_concatenate(self): }, dims=("time", "distance", "phase"), ) - assert xdas.concatenate((da1, da2), dim="time").equals(expected) + assert xd.concatenate((da1, da2), dim="time").equals(expected) # stack da = wavelet_wavefronts() objs = [obj for obj in da] - result = xdas.concatenate(objs, dim="time") - result["time"] = xdas.InterpCoordinate.from_array(result["time"].values) + result = xd.concatenate(objs, dim="time") + result["time"] = InterpCoordinate.from_array(result["time"].values) assert result.equals(da) objs = [obj.drop_coords("time") for obj in da] - result = xdas.concatenate(objs, dim="time") + result = xd.concatenate(objs, dim="time") assert result.equals(da.drop_coords("time")) def test_open_dataarray(self): with pytest.raises(FileNotFoundError): - xdas.open_dataarray("not_existing_file.nc") + xd.open_dataarray("not_existing_file.nc") def test_open_datacollection(self): with pytest.raises(FileNotFoundError): - xdas.open_datacollection("not_existing_file.nc") + xd.open_datacollection("not_existing_file.nc") def test_asdataarray(self): da = self.generate(False) - out = xdas.asdataarray(da.to_xarray()) + out = xd.asdataarray(da.to_xarray()) assert np.array_equal(out.data, da.data) for dim in da.dims: assert np.array_equal(out[dim].values, da[dim].values) def test_split(self): - da = xdas.DataArray( + da = xd.DataArray( np.ones(30), { "time": { @@ -220,22 +221,22 @@ def test_split(self): }, }, ) - assert xdas.concatenate(xdas.split(da)).equals(da) - assert xdas.split(da, tolerance=20.0)[0].equals(da) + assert xd.concatenate(xd.split(da)).equals(da) + assert xd.split(da, tolerance=20.0)[0].equals(da) def test_chunk(self): da = wavelet_wavefronts() - assert xdas.concatenate(xdas.split(da, 3)).equals(da) + assert xd.concatenate(xd.split(da, 3)).equals(da) def test_align(self): - da1 = xdas.DataArray(np.arange(2), {"x": [0, 1]}) - da2 = xdas.DataArray(np.arange(3), {"y": [2, 3, 4]}) - da1, da2 = xdas.align(da1, da2) + da1 = xd.DataArray(np.arange(2), {"x": [0, 1]}) + da2 = xd.DataArray(np.arange(3), {"y": [2, 3, 4]}) + da1, da2 = xd.align(da1, da2) assert da1.sizes == {"x": 2, "y": 1} assert da2.sizes == {"x": 1, "y": 3} - da3 = xdas.DataArray(np.arange(4).reshape(2, 2), {"x": [0, 1], "y": [2, 3]}) + da3 = xd.DataArray(np.arange(4).reshape(2, 2), {"x": [0, 1], "y": [2, 3]}) with pytest.raises(ValueError, match="incompatible sizes"): - xdas.align(da1, da2, da3) - da3 = xdas.DataArray(np.arange(6).reshape(2, 3), {"x": [1, 2], "y": [2, 3, 4]}) + xd.align(da1, da2, da3) + da3 = xd.DataArray(np.arange(6).reshape(2, 3), {"x": [1, 2], "y": [2, 3, 4]}) with pytest.raises(ValueError, match="differs from one data array to another"): - xdas.align(da1, da2, da3) + xd.align(da1, da2, da3) diff --git a/tests/test_dataarray.py b/tests/test_dataarray.py index a0a9c0b..4a251f7 100644 --- a/tests/test_dataarray.py +++ b/tests/test_dataarray.py @@ -7,9 +7,8 @@ import numpy as np import pytest -import xdas -from xdas.core.coordinates import Coordinates, DenseCoordinate, InterpCoordinate -from xdas.core.dataarray import DataArray +import xdas as xd +from xdas.coordinates import Coordinates, DenseCoordinate, InterpCoordinate from xdas.synthetics import wavelet_wavefronts @@ -20,12 +19,12 @@ def generate(self, dense=False): else: coords = {"dim": {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}} data = 0.1 * np.arange(9) - da = xdas.DataArray(data, coords) + da = xd.DataArray(data, coords) return da def test_init_without_coords(self): data = np.arange(2 * 3 * 5).reshape(2, 3, 5) - da = xdas.DataArray(data) + da = xd.DataArray(data) assert np.array_equal(da.data, data) assert da.dims == ("dim_0", "dim_1", "dim_2") assert da.coords == {} @@ -53,30 +52,30 @@ def test_init_and_properties(self): assert da.dtype == np.float64 da = self.generate(dense=True) assert isinstance(da["dim"], DenseCoordinate) - da = DataArray() + da = xd.DataArray() assert np.array_equal(da.values, np.array(np.nan), equal_nan=True) assert da.coords == {} assert da.dims == tuple() - da = DataArray([[]]) + da = xd.DataArray([[]]) assert da.dims == ("dim_0", "dim_1") assert da.ndim == 2 - da = DataArray(1) + da = xd.DataArray(1) assert da.dims == tuple() assert da.ndim == 0 def test_raises_on_data_and_coords_mismatch(self): with pytest.raises(ValueError, match="different number of dimensions"): - DataArray(np.zeros(3), dims=("time", "distance")) + xd.DataArray(np.zeros(3), dims=("time", "distance")) with pytest.raises( ValueError, match="inferred number of dimensions 2 from `coords` does not match `data` dimensionality of 1", ): - DataArray(np.zeros(3), coords={"time": [1], "distance": [1]}) + xd.DataArray(np.zeros(3), coords={"time": [1], "distance": [1]}) with pytest.raises(ValueError, match="conflicting sizes for dimension"): - DataArray(np.zeros((2, 3)), coords={"time": [1, 2], "distance": [1, 2]}) + xd.DataArray(np.zeros((2, 3)), coords={"time": [1, 2], "distance": [1, 2]}) def test_coords_setter(self): - da = xdas.DataArray(np.arange(3 * 11).reshape(3, 11)) + da = xd.DataArray(np.arange(3 * 11).reshape(3, 11)) da["dim_0"] = [1, 2, 4] da["dim_1"] = {"tie_indices": [0, 10], "tie_values": [0.0, 100.0]} da["dim_0"] = [1, 2, 3] @@ -163,7 +162,7 @@ def test_sel(self): assert "distance" not in result.coords def test_better_error_when_sel_with_overlaps(self): - da = DataArray( + da = xd.DataArray( np.arange(80).reshape(20, 4), { "time": { @@ -195,7 +194,7 @@ def test_to_xarray(self): result = da.to_xarray() assert np.array_equal(result.values, da.values) assert np.array_equal(result["dim"].values, da["dim"].values) - da = da.sel(dim=slice(1000, 2000)) # empty dataarray + da = da.sel(dim=slice(1000, 2000)) # empty xd.dataarray result = da.to_xarray() assert np.array_equal(result.values, da.values) assert np.array_equal(result["dim"].values, da["dim"].values) @@ -203,7 +202,7 @@ def test_to_xarray(self): def test_from_xarray(self): da = self.generate() da = da.to_xarray() - result = DataArray.from_xarray(da) + result = xd.DataArray.from_xarray(da) assert np.array_equal(result.values, da.values) assert np.array_equal(result["dim"].values, da["dim"].values) @@ -219,7 +218,7 @@ def test_stream(self): assert st[0].stats.npts == da.sizes["time"] assert np.datetime64(st[0].stats.starttime.datetime) == da["time"][0].values assert np.datetime64(st[0].stats.endtime.datetime) == da["time"][-1].values - result = DataArray.from_stream(st) + result = xd.DataArray.from_stream(st) assert np.array_equal(result.values.T, da.values) assert result.sizes == { "channel": da.sizes["distance"], @@ -231,10 +230,10 @@ def test_dense_str(self): coord = [f"D{k}" for k in range(9)] coords = Coordinates({"dim": coord}) data = 0.1 * np.arange(9) - DataArray(data, coords) + xd.DataArray(data, coords) def test_single_index_selection(self): - da = DataArray( + da = xd.DataArray( np.arange(12).reshape(3, 4), { "time": {"tie_values": [0.0, 1.0], "tie_indices": [0, 2]}, @@ -244,7 +243,7 @@ def test_single_index_selection(self): da_getitem = da[1] da_isel = da.isel(time=1) da_sel = da.sel(time=0.5) - da_expected = DataArray( + da_expected = xd.DataArray( np.array([4, 5, 6, 7]), {"time": (None, 0.5), "distance": [0.0, 10.0, 20.0, 30.0]}, ) @@ -254,7 +253,7 @@ def test_single_index_selection(self): da_getitem = da[:, 1] da_isel = da.isel(distance=1) da_sel = da.sel(distance=10.0) - da_expected = DataArray( + da_expected = xd.DataArray( np.array([1, 5, 9]), { "time": {"tie_values": [0.0, 1.0], "tie_indices": [0, 2]}, @@ -266,7 +265,7 @@ def test_single_index_selection(self): assert da_sel.equals(da_expected) def test_assign_coords(self): - da = DataArray( + da = xd.DataArray( data=np.zeros(3), coords={"time": np.array([3, 4, 5])}, ) @@ -277,7 +276,7 @@ def test_assign_coords(self): assert np.array_equal(result["relative_time"].values, [0, 1, 2]) def test_swap_dims(self): - da = DataArray( + da = xd.DataArray( data=[0, 1], coords={"x": ["a", "b"], "y": ("x", [0, 1])}, ) @@ -294,7 +293,7 @@ def test_swap_dims(self): da.swap_dims({"z": "x"}) def test_to_xarray_non_dimensional(self): - da = DataArray( + da = xd.DataArray( data=np.zeros(3), coords={ "time": np.array([3, 4, 5]), @@ -308,7 +307,7 @@ def test_to_xarray_non_dimensional(self): assert result.dims == da.dims def test_netcdf_non_dimensional(self): - da = DataArray( + da = xd.DataArray( data=np.zeros(3), coords={ "time": np.array([3, 4, 5]), @@ -318,16 +317,16 @@ def test_netcdf_non_dimensional(self): with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") da.to_netcdf(path) - result = xdas.open_dataarray(path) + result = xd.open_dataarray(path) assert result.equals(da) with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "da.nc") da = wavelet_wavefronts().assign_coords(lon=("distance", np.arange(401))) da.to_netcdf(path) - tmp = xdas.open_dataarray(path) + tmp = xd.open_dataarray(path) path = path = os.path.join(dirpath, "vds.nc") tmp.to_netcdf(path) - result = xdas.open_dataarray(path) + result = xd.open_dataarray(path) assert result.equals(da) def test_transpose(self): @@ -345,16 +344,16 @@ def test_transpose(self): da.transpose("space", "frequency") def test_expand_dims(self): - da = DataArray([1.0, 2.0, 3.0], {"x": [0, 1, 2]}) + da = xd.DataArray([1.0, 2.0, 3.0], {"x": [0, 1, 2]}) result = da.expand_dims("y", 0) assert result.dims == ("y", "x") assert result.shape == (1, 3) - da = DataArray([1.0, 2.0, 3.0], {"x": [0, 1, 2], "y": 0}, dims=("x",)) + da = xd.DataArray([1.0, 2.0, 3.0], {"x": [0, 1, 2], "y": 0}, dims=("x",)) result = da.expand_dims("y") assert result.dims == ("y", "x") assert result.shape == (1, 3) - assert result["y"].equals(xdas.Coordinate([0], dim="y")) + assert result["y"].equals(xd.Coordinate([0], dim="y")) def test_io(self): # both coords interpolated @@ -362,7 +361,7 @@ def test_io(self): with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") da.to_netcdf(path) - da_recovered = DataArray.from_netcdf(path) + da_recovered = xd.DataArray.from_netcdf(path) assert da.equals(da_recovered) # mixed interpolated and dense @@ -370,7 +369,7 @@ def test_io(self): with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") da.to_netcdf(path) - da_recovered = DataArray.from_netcdf(path) + da_recovered = xd.DataArray.from_netcdf(path) assert da.equals(da_recovered) # only dense coords @@ -378,11 +377,11 @@ def test_io(self): with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") da.to_netcdf(path) - da_recovered = DataArray.from_netcdf(path) + da_recovered = xd.DataArray.from_netcdf(path) assert da.equals(da_recovered) def test_io_with_zfp_compression(self): - da = DataArray(np.random.rand(101, 101)) + da = xd.DataArray(np.random.rand(101, 101)) with TemporaryDirectory() as tmpdir: tmpfile_uncompressed = os.path.join(tmpdir, "uncompressed.nc") da.to_netcdf(tmpfile_uncompressed) @@ -398,7 +397,7 @@ def test_io_with_zfp_compression(self): chunk_compressed_size = os.path.getsize(tmpfile_chunk_compressed) assert chunk_compressed_size < uncompressed_size assert compressed_size < chunk_compressed_size - _da = DataArray.from_netcdf(tmpfile_compressed) + _da = xd.DataArray.from_netcdf(tmpfile_compressed) assert np.abs(da - _da).max().values < 0.001 def test_io_dask(self): @@ -414,7 +413,7 @@ def test_io_dask(self): for chunk in chunks ] data = dask.array.concatenate(chunks, axis=1) - expected = DataArray( + expected = xd.DataArray( data, coords={"time": np.arange(3), "distance": np.arange(10)}, attrs={"version": "1.0"}, @@ -422,7 +421,7 @@ def test_io_dask(self): ) fname = os.path.join(tmpdir, "tmp.nc") expected.to_netcdf(fname) - result = xdas.open_dataarray(fname) + result = xd.open_dataarray(fname) assert isinstance(result.data, dask.array.Array) assert np.array_equal(expected.values, result.values) assert expected.dtype == result.dtype @@ -432,16 +431,16 @@ def test_io_dask(self): assert expected.attrs == result.attrs def test_io_non_dimensional(self): - expected = DataArray(coords={"dim": 0}, dims=()) + expected = xd.DataArray(coords={"dim": 0}, dims=()) with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") expected.to_netcdf(path) - result = DataArray.from_netcdf(path) + result = xd.DataArray.from_netcdf(path) assert expected.equals(result) def test_io_attrs(self): attrs = {"description": "test"} - da = DataArray( + da = xd.DataArray( np.arange(3), coords={"time": np.array([3, 4, 5])}, attrs=attrs, @@ -449,13 +448,13 @@ def test_io_attrs(self): with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") da.to_netcdf(path) - result = DataArray.from_netcdf(path) + result = xd.DataArray.from_netcdf(path) assert result.attrs == attrs assert result.equals(da) - da = xdas.open_dataarray(path) + da = xd.open_dataarray(path) path = os.path.join(dirpath, "vds.nc") da.to_netcdf(path) - result = xdas.open_dataarray(path) + result = xd.open_dataarray(path) assert result.attrs == attrs assert result.equals(da) diff --git a/tests/test_datacollection.py b/tests/test_datacollection.py index 1f61b64..7ebd2c0 100644 --- a/tests/test_datacollection.py +++ b/tests/test_datacollection.py @@ -4,7 +4,7 @@ import h5py import pytest -import xdas +import xdas as xd import xdas.signal as xs from xdas.core.datacollection import get_depth from xdas.synthetics import wavelet_wavefronts @@ -12,10 +12,10 @@ class TestDataCollection: def nest(self, da): - return xdas.DataCollection( + return xd.DataCollection( { - "das1": xdas.DataCollection([da, da], "acquisition"), - "das2": xdas.DataCollection([da, da, da], "acquisition"), + "das1": xd.DataCollection([da, da], "acquisition"), + "das2": xd.DataCollection([da, da, da], "acquisition"), }, "instrument", ) @@ -30,12 +30,12 @@ def test_init(self): "das2": ("acquisition", [da, da, da]), }, ) - result = xdas.DataCollection(data) + result = xd.DataCollection(data) assert result.equals(dc) def test_io(self): da = wavelet_wavefronts() - dc = xdas.DataCollection( + dc = xd.DataCollection( { "das1": da, "das2": da, @@ -45,27 +45,27 @@ def test_io(self): with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") dc.to_netcdf(path) - result = xdas.DataCollection.from_netcdf(path) + result = xd.DataCollection.from_netcdf(path) assert result.equals(dc) - dc = xdas.DataCollection([da, da], "instrument") + dc = xd.DataCollection([da, da], "instrument") with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") dc.to_netcdf(path) - result = xdas.DataCollection.from_netcdf(path) + result = xd.DataCollection.from_netcdf(path) assert result.equals(dc) - dc = xdas.DataCollection( + dc = xd.DataCollection( { - "das1": xdas.DataCollection([da, da], "acquisition"), - "das2": xdas.DataCollection([da, da, da], "acquisition"), + "das1": xd.DataCollection([da, da], "acquisition"), + "das2": xd.DataCollection([da, da, da], "acquisition"), }, "instrument", ) with TemporaryDirectory() as dirpath: path = os.path.join(dirpath, "tmp.nc") dc.to_netcdf(path) - result = xdas.DataCollection.from_netcdf(path) + result = xd.DataCollection.from_netcdf(path) assert result.equals(dc) - result = xdas.open_datacollection(path) + result = xd.open_datacollection(path) assert result.equals(dc) def test_depth_counter(self): @@ -108,9 +108,9 @@ def test_query(self): da = wavelet_wavefronts() dc = self.nest(da) result = dc.query(instrument="das1", acquisition=0) - expected = xdas.DataCollection( + expected = xd.DataCollection( { - "das1": xdas.DataCollection([da], "acquisition"), + "das1": xd.DataCollection([da], "acquisition"), }, "instrument", ) diff --git a/tests/test_processing.py b/tests/test_processing.py index 3acfb62..5f72976 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -10,10 +10,10 @@ import pandas as pd import scipy.signal as sp -import xdas +import xdas as xd import xdas.processing as xp from xdas.atoms import Partial, Sequential -from xdas.processing.core import ( +from xdas.processing import ( DataArrayLoader, DataArrayWriter, DataFrameWriter, @@ -31,7 +31,7 @@ def test_stateful(self): with tempfile.TemporaryDirectory() as tempdir: # generate test dataarray wavelet_wavefronts().to_netcdf(os.path.join(tempdir, "sample.nc")) - da = xdas.open_dataarray(os.path.join(tempdir, "sample.nc")) + da = xd.open_dataarray(os.path.join(tempdir, "sample.nc")) # declare processing sequence sos = sp.iirfilter(4, 0.1, btype="lowpass", output="sos") @@ -189,20 +189,20 @@ def publish(): result.append(packet) if n == len(packets): break - return xdas.concatenate(result) + return xd.concatenate(result) def test_publish_and_subscribe(self): - expected = xdas.synthetics.dummy() - packets = xdas.split(expected, 10) - address = f"tcp://localhost:{xdas.io.get_free_port()}" + expected = xd.synthetics.dummy() + packets = xd.split(expected, 10) + address = f"tcp://localhost:{xd.io.get_free_port()}" result = self._publish_and_subscribe(packets, address) assert result.equals(expected) def test_encoding(self): - expected = xdas.synthetics.dummy() - packets = xdas.split(expected, 10) - address = f"tcp://localhost:{xdas.io.get_free_port()}" + expected = xd.synthetics.dummy() + packets = xd.split(expected, 10) + address = f"tcp://localhost:{xd.io.get_free_port()}" encoding = {"chunks": (10, 10), **hdf5plugin.Zfp(accuracy=1e-6)} result = self._publish_and_subscribe(packets, address, encoding=encoding) @@ -221,7 +221,7 @@ def test_without_gap(self): endtime = starttime + np.timedelta64(10, "ms") * (data.shape[0] - 1) distance = 5.0 * np.arange(data.shape[1]) - da = xdas.DataArray( + da = xd.DataArray( data=data, coords={ "time": { @@ -275,7 +275,7 @@ def test_without_gap(self): def test_with_gap(self): with tempfile.TemporaryDirectory() as tempdir: - da = xdas.DataArray( + da = xd.DataArray( data=np.random.randint( low=-1000, high=1000, size=(900, 10), dtype=np.int32 ), @@ -350,7 +350,7 @@ def test_flat(self): endtime = starttime + np.timedelta64(10, "ms") * (data.shape[0] - 1) distance = 5.0 * np.arange(data.shape[1]) - da = xdas.DataArray( + da = xd.DataArray( data=data, coords={ "time": { diff --git a/tests/test_routines.py b/tests/test_routines.py index 32d1d4d..d74e00a 100644 --- a/tests/test_routines.py +++ b/tests/test_routines.py @@ -2,9 +2,7 @@ import pytest import xdas as xd -from xdas.core.coordinates import Coordinates -from xdas.core.dataarray import DataArray -from xdas.core.routines import Bag, CompatibilityError, combine_by_coords +from xdas.core.routines import Bag, CompatibilityError class TestBag: @@ -14,30 +12,30 @@ def test_bag_initialization(self): assert bag.objs == [] def test_bag_append_initializes(self): - da = DataArray( + da = xd.DataArray( np.random.rand(10, 5), {"time": np.arange(10), "space": np.arange(5)} ) bag = Bag(dim="time") bag.append(da) assert len(bag.objs) == 1 assert bag.objs[0] is da - assert bag.subcoords.equals(Coordinates({"space": np.arange(5)})) + assert bag.subcoords.equals(xd.Coordinates({"space": np.arange(5)})) assert bag.subshape == (5,) assert bag.dims == ("time", "space") assert bag.delta def test_bag_append_compatible(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.rand(10, 5), dims=("time", "space")) + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) bag = Bag(dim="time") bag.append(da1) bag.append(da2) assert len(bag.objs) == 2 assert bag.objs[1] is da2 - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10, 5), {"time": np.arange(10), "space": np.arange(5)} ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10, 5), {"time": np.arange(10, 20), "space": np.arange(5)} ) bag = Bag(dim="time") @@ -47,36 +45,38 @@ def test_bag_append_compatible(self): assert bag.objs[1] is da2 def test_bag_append_incompatible_dims(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.rand(10, 5), dims=("space", "time")) + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray(np.random.rand(10, 5), dims=("space", "time")) bag = Bag(dim="time") bag.append(da1) with pytest.raises(CompatibilityError): bag.append(da2) def test_bag_append_incompatible_shape(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.rand(10, 6), dims=("time", "space")) + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray(np.random.rand(10, 6), dims=("time", "space")) bag = Bag(dim="time") bag.append(da1) with pytest.raises(CompatibilityError): bag.append(da2) def test_bag_append_incompatible_dtype(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.randint(0, 10, size=(10, 5)), dims=("time", "space")) + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray( + np.random.randint(0, 10, size=(10, 5)), dims=("time", "space") + ) bag = Bag(dim="time") bag.append(da1) with pytest.raises(CompatibilityError): bag.append(da2) def test_bag_append_incompatible_coords(self): - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"space": np.arange(5)}, ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"space": np.arange(5) + 1}, @@ -87,12 +87,12 @@ def test_bag_append_incompatible_coords(self): bag.append(da2) def test_bag_append_incompatible_sampling_interval(self): - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"time": np.arange(10)}, ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"time": np.arange(10) * 2}, @@ -106,91 +106,93 @@ def test_bag_append_incompatible_sampling_interval(self): class TestCombineByCoords: def test_basic(self): # without coords - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - combined = combine_by_coords([da1, da2], dim="time", squeeze=True) + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + combined = xd.combine_by_coords([da1, da2], dim="time", squeeze=True) assert combined.shape == (20, 5) # with coords - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10, 5), coords={"time": np.arange(10), "space": np.arange(5)}, ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10, 5), coords={"time": np.arange(10, 20), "space": np.arange(5)}, ) - combined = combine_by_coords([da1, da2], dim="time", squeeze=True) + combined = xd.combine_by_coords([da1, da2], dim="time", squeeze=True) assert combined.shape == (20, 5) def test_incompatible_shape(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.rand(10, 6), dims=("time", "space")) - dc = combine_by_coords([da1, da2], dim="time") + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray(np.random.rand(10, 6), dims=("time", "space")) + dc = xd.combine_by_coords([da1, da2], dim="time") assert len(dc) == 2 assert dc[0].equals(da1) assert dc[1].equals(da2) def test_incompatible_dims(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.rand(10, 5), dims=("space", "time")) - dc = combine_by_coords([da1, da2], dim="time") + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray(np.random.rand(10, 5), dims=("space", "time")) + dc = xd.combine_by_coords([da1, da2], dim="time") assert len(dc) == 2 assert dc[0].equals(da1) assert dc[1].equals(da2) def test_incompatible_dtype(self): - da1 = DataArray(np.random.rand(10, 5), dims=("time", "space")) - da2 = DataArray(np.random.randint(0, 10, size=(10, 5)), dims=("time", "space")) - dc = combine_by_coords([da1, da2], dim="time") + da1 = xd.DataArray(np.random.rand(10, 5), dims=("time", "space")) + da2 = xd.DataArray( + np.random.randint(0, 10, size=(10, 5)), dims=("time", "space") + ) + dc = xd.combine_by_coords([da1, da2], dim="time") assert len(dc) == 2 assert dc[0].equals(da1) assert dc[1].equals(da2) def test_incompatible_coords(self): - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"space": np.arange(5)}, ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"space": np.arange(5) + 1}, ) - dc = combine_by_coords([da1, da2], dim="time") + dc = xd.combine_by_coords([da1, da2], dim="time") assert len(dc) == 2 assert dc[0].equals(da1) assert dc[1].equals(da2) def test_incompatible_sampling_interval(self): - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"time": np.arange(10)}, ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10, 5), dims=("time", "space"), coords={"time": np.arange(10) * 2}, ) - dc = combine_by_coords([da1, da2], dim="time") + dc = xd.combine_by_coords([da1, da2], dim="time") assert len(dc) == 2 assert dc[0].equals(da1) assert dc[1].equals(da2) def test_expand_scalar_coordinate(self): - da1 = DataArray( + da1 = xd.DataArray( np.random.rand(10), dims=("time",), coords={"time": np.arange(10), "space": 0}, ) - da2 = DataArray( + da2 = xd.DataArray( np.random.rand(10), dims=("time",), coords={"time": np.arange(10), "space": 1}, ) - dc = combine_by_coords([da1, da2], dim="space", squeeze=True) + dc = xd.combine_by_coords([da1, da2], dim="space", squeeze=True) assert dc.shape == (2, 10) assert dc.dims == ("space", "time") assert dc.coords["space"].values.tolist() == [0, 1] @@ -198,7 +200,7 @@ def test_expand_scalar_coordinate(self): class TestOpenMFDataArray: def test_warn_on_corrupted_files(self, tmp_path): - expected = DataArray( + expected = xd.DataArray( np.random.rand(10, 5), coords={ "time": np.arange(10), diff --git a/tests/test_virtual.py b/tests/test_virtual.py index 4e3e2a0..94b80b3 100644 --- a/tests/test_virtual.py +++ b/tests/test_virtual.py @@ -5,7 +5,7 @@ import numpy as np import pytest -import xdas +import xdas as xd from xdas.synthetics import wavelet_wavefronts from xdas.virtual import ( Selection, @@ -22,11 +22,11 @@ class TestFunctional: # TODO: move elsewhere def test_all(self): with tempfile.TemporaryDirectory() as dirpath: expected = wavelet_wavefronts() - chunks = xdas.split(expected, 3) + chunks = xd.split(expected, 3) for index, chunk in enumerate(chunks, start=1): chunk.to_netcdf(os.path.join(dirpath, f"{index:03d}.nc")) - da = xdas.open_dataarray(os.path.join(dirpath, "002.nc")) + da = xd.open_dataarray(os.path.join(dirpath, "002.nc")) datasource = da.data assert np.allclose(np.asarray(datasource[0]), da.load().values[0]) assert np.allclose(np.asarray(datasource[0][1]), da.load().values[0][1]) @@ -68,9 +68,9 @@ def test_dtypes(self, tmp_path): np.complex128, ) for dtype in dtypes: - expected = xdas.DataArray(np.zeros((3, 5), dtype=dtype)) + expected = xd.DataArray(np.zeros((3, 5), dtype=dtype)) expected.to_netcdf(tmp_path / "data.nc") - result = xdas.open_dataarray(tmp_path / "data.nc") + result = xd.open_dataarray(tmp_path / "data.nc") assert result.equals(expected) diff --git a/tests/test_xarray.py b/tests/test_xarray.py index 382a4fe..9e87a62 100644 --- a/tests/test_xarray.py +++ b/tests/test_xarray.py @@ -1,7 +1,7 @@ import numpy as np +import xdas as xd import xdas.core.methods as xm -from xdas.core.dataarray import DataArray from xdas.synthetics import wavelet_wavefronts @@ -15,19 +15,19 @@ def test_returns_dataarray(self): "quantile", ]: result = func(da, 0.5) - assert isinstance(result, DataArray) + assert isinstance(result, xd.DataArray) result = getattr(da, name)(0.5) - assert isinstance(result, DataArray) + assert isinstance(result, xd.DataArray) elif name == "diff": result = func(da, "time") - assert isinstance(result, DataArray) + assert isinstance(result, xd.DataArray) result = getattr(da, name)("time") - assert isinstance(result, DataArray) + assert isinstance(result, xd.DataArray) else: result = func(da) - assert isinstance(result, DataArray) + assert isinstance(result, xd.DataArray) result = getattr(da, name)() - assert isinstance(result, DataArray) + assert isinstance(result, xd.DataArray) def test_mean(self): da = wavelet_wavefronts() diff --git a/xdas/__init__.py b/xdas/__init__.py index e9a85cb..44220eb 100644 --- a/xdas/__init__.py +++ b/xdas/__init__.py @@ -1,11 +1,6 @@ -from . import atoms, config, fft, io, parallel, processing, signal, synthetics, virtual -from .core import coordinates, dataarray, datacollection, methods, numpy, routines -from .core.coordinates import ( +from .coordinates import ( Coordinate, Coordinates, - DenseCoordinate, - InterpCoordinate, - ScalarCoordinate, get_sampling_interval, ) from .core.dataarray import DataArray diff --git a/xdas/atoms/ml.py b/xdas/atoms/ml.py index f3fc4fa..5add9e2 100644 --- a/xdas/atoms/ml.py +++ b/xdas/atoms/ml.py @@ -2,9 +2,9 @@ import numpy as np -from ..atoms import Atom, State from ..core.dataarray import DataArray from ..core.routines import concatenate +from .core import Atom, State class LazyModule: diff --git a/xdas/atoms/signal.py b/xdas/atoms/signal.py index 7378a7d..21f4027 100644 --- a/xdas/atoms/signal.py +++ b/xdas/atoms/signal.py @@ -3,7 +3,7 @@ import numpy as np import scipy.signal as sp -from ..core.coordinates import Coordinate, get_sampling_interval +from ..coordinates.core import Coordinate, get_sampling_interval from ..core.dataarray import DataArray from ..core.routines import concatenate, split from ..parallel import parallelize diff --git a/xdas/core/coordinates/__init__.py b/xdas/coordinates/__init__.py similarity index 100% rename from xdas/core/coordinates/__init__.py rename to xdas/coordinates/__init__.py diff --git a/xdas/core/coordinates/core.py b/xdas/coordinates/core.py similarity index 99% rename from xdas/core/coordinates/core.py rename to xdas/coordinates/core.py index f70c36c..d9e6813 100644 --- a/xdas/core/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -421,7 +421,7 @@ def append(self, other): raise NotImplementedError(f"append is not implemented for {self.__class__}") def to_dataarray(self): - from ..dataarray import DataArray # TODO: avoid defered import? + from ..core.dataarray import DataArray # TODO: avoid defered import? if self.name is None: raise ValueError("cannot convert unnamed coordinate to DataArray") diff --git a/xdas/core/coordinates/default.py b/xdas/coordinates/default.py similarity index 100% rename from xdas/core/coordinates/default.py rename to xdas/coordinates/default.py diff --git a/xdas/core/coordinates/dense.py b/xdas/coordinates/dense.py similarity index 100% rename from xdas/core/coordinates/dense.py rename to xdas/coordinates/dense.py diff --git a/xdas/core/coordinates/interp.py b/xdas/coordinates/interp.py similarity index 100% rename from xdas/core/coordinates/interp.py rename to xdas/coordinates/interp.py diff --git a/xdas/core/coordinates/sampled.py b/xdas/coordinates/sampled.py similarity index 100% rename from xdas/core/coordinates/sampled.py rename to xdas/coordinates/sampled.py diff --git a/xdas/core/coordinates/scalar.py b/xdas/coordinates/scalar.py similarity index 100% rename from xdas/core/coordinates/scalar.py rename to xdas/coordinates/scalar.py diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index e3ba7a9..468aa4d 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -1,6 +1,4 @@ import copy -import json -import re import warnings from functools import partial @@ -12,9 +10,9 @@ from dask.array import Array as DaskArray from numpy.lib.mixins import NDArrayOperatorsMixin +from ..coordinates import Coordinates, get_sampling_interval from ..dask.core import create_variable, from_dict, loads, to_dict from ..virtual import VirtualArray, VirtualSource, _to_human -from .coordinates import Coordinate, Coordinates, get_sampling_interval HANDLED_NUMPY_FUNCTIONS = {} HANDLED_METHODS = {} diff --git a/xdas/core/routines.py b/xdas/core/routines.py index 187c88d..f34c4b8 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -11,8 +11,9 @@ import xarray as xr from tqdm import tqdm +from ..coordinates.core import Coordinates, get_sampling_interval +from ..coordinates.interp import InterpCoordinate from ..virtual import VirtualSource, VirtualStack -from .coordinates import Coordinates, InterpCoordinate, get_sampling_interval from .dataarray import DataArray from .datacollection import DataCollection, DataMapping, DataSequence diff --git a/xdas/fft.py b/xdas/fft.py index 5d6ca31..bb47bb8 100644 --- a/xdas/fft.py +++ b/xdas/fft.py @@ -1,7 +1,7 @@ import numpy as np from .atoms.core import atomized -from .core.coordinates import get_sampling_interval +from .coordinates.core import get_sampling_interval from .core.dataarray import DataArray from .parallel import parallelize diff --git a/xdas/io/__init__.py b/xdas/io/__init__.py index 54f6f22..926a1a9 100644 --- a/xdas/io/__init__.py +++ b/xdas/io/__init__.py @@ -1,2 +1 @@ -from . import apsensing, asn, febus, miniseed, optasense, silixa, sintela, terra15 from .core import get_free_port diff --git a/xdas/io/asn.py b/xdas/io/asn.py index 7dc4234..44da7a5 100644 --- a/xdas/io/asn.py +++ b/xdas/io/asn.py @@ -4,8 +4,7 @@ import numpy as np import zmq -from xdas.core.coordinates import get_sampling_interval - +from ..coordinates.core import get_sampling_interval from ..core.dataarray import DataArray from ..virtual import VirtualSource diff --git a/xdas/io/miniseed.py b/xdas/io/miniseed.py index 37e7461..6a2d469 100644 --- a/xdas/io/miniseed.py +++ b/xdas/io/miniseed.py @@ -2,7 +2,7 @@ import numpy as np import obspy -from ..core.coordinates import Coordinate, Coordinates +from ..coordinates.core import Coordinate, Coordinates from ..core.dataarray import DataArray diff --git a/xdas/processing/__init__.py b/xdas/processing/__init__.py index b8ffae4..2cbe1d7 100644 --- a/xdas/processing/__init__.py +++ b/xdas/processing/__init__.py @@ -1,4 +1,3 @@ -from . import monitor from .core import ( DataArrayLoader, DataArrayWriter, diff --git a/xdas/signal.py b/xdas/signal.py index 5d15552..fefb4d5 100644 --- a/xdas/signal.py +++ b/xdas/signal.py @@ -2,7 +2,7 @@ import scipy.signal as sp from .atoms.core import atomized -from .core.coordinates import Coordinate, get_sampling_interval +from .coordinates.core import Coordinate, get_sampling_interval from .core.dataarray import DataArray from .parallel import parallelize from .spectral import stft diff --git a/xdas/spectral.py b/xdas/spectral.py index 44485ad..c4a6e27 100644 --- a/xdas/spectral.py +++ b/xdas/spectral.py @@ -2,7 +2,7 @@ from scipy.fft import fft, fftfreq, fftshift, rfft, rfftfreq from scipy.signal import get_window -from .core.coordinates import get_sampling_interval +from .coordinates.core import get_sampling_interval from .core.dataarray import DataArray from .parallel import parallelize diff --git a/xdas/trigger.py b/xdas/trigger.py index b0a62ee..02142b6 100644 --- a/xdas/trigger.py +++ b/xdas/trigger.py @@ -3,7 +3,7 @@ from numba import njit from .atoms.core import Atom, State, atomized -from .core.coordinates import Coordinate +from .coordinates.core import Coordinate class Trigger(Atom): From 1c51119b3d175b56bba52105f54e882404a33bde Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 16:19:06 +0100 Subject: [PATCH 31/63] Refactor: put dense and interp coord tests in separate files. --- tests/coordinates/test_coordinates.py | 512 ++------------------------ tests/coordinates/test_dense.py | 137 +++++++ tests/coordinates/test_interp.py | 318 ++++++++++++++++ 3 files changed, 487 insertions(+), 480 deletions(-) create mode 100644 tests/coordinates/test_dense.py create mode 100644 tests/coordinates/test_interp.py diff --git a/tests/coordinates/test_coordinates.py b/tests/coordinates/test_coordinates.py index 2a3166a..d0825bd 100644 --- a/tests/coordinates/test_coordinates.py +++ b/tests/coordinates/test_coordinates.py @@ -1,516 +1,68 @@ import numpy as np -import pandas as pd import pytest -import xdas -from xdas.coordinates import DenseCoordinate, InterpCoordinate, ScalarCoordinate - - -class TestDenseCoordinate: - valid = [ - [1, 2, 3], - np.array([1, 2, 3]), - [1.0, 2.0, 3.0], - np.array([1.0, 2.0, 3.0]), - ["a", "b", "c"], - np.array(["a", "b", "c"]), - np.array([1, 2, 3], dtype="datetime64[s]"), - ] - invalid = [ - 1, - np.array(1), - 1.0, - np.array(1.0), - "label", - np.array("label"), - np.datetime64(1, "s"), - {"key": "value"}, - ] - - def test_isvalid(self): - for data in self.valid: - assert DenseCoordinate.isvalid(data) - for data in self.invalid: - assert not DenseCoordinate.isvalid(data) - - def test_init(self): - coord = DenseCoordinate([1, 2, 3]) - assert np.array_equiv(coord.data, [1, 2, 3]) - assert coord.dim is None - coord = DenseCoordinate([1, 2, 3], "dim") - assert coord.dim == "dim" - for data in self.valid: - assert np.array_equiv(DenseCoordinate(data).data, data) - for data in self.invalid: - with pytest.raises(TypeError): - DenseCoordinate(data) - - def test_getitem(self): - assert np.array_equiv(DenseCoordinate([1, 2, 3])[...].values, [1, 2, 3]) - assert isinstance(DenseCoordinate([1, 2, 3])[...], DenseCoordinate) - assert np.array_equiv(DenseCoordinate([1, 2, 3])[:].values, [1, 2, 3]) - assert isinstance(DenseCoordinate([1, 2, 3])[:], DenseCoordinate) - assert np.array_equiv(DenseCoordinate([1, 2, 3])[1].values, 2) - assert isinstance(DenseCoordinate([1, 2, 3])[1], ScalarCoordinate) - assert np.array_equiv(DenseCoordinate([1, 2, 3])[1:].values, [2, 3]) - assert isinstance(DenseCoordinate([1, 2, 3])[1:], DenseCoordinate) - - def test_len(self): - for data in self.valid: - assert len(DenseCoordinate(data)) == 3 - - def test_repr(self): - for data in self.valid: - assert DenseCoordinate(data).__repr__() == np.array2string( - np.asarray(data), threshold=0, edgeitems=1 - ) - - def test_array(self): - for data in self.valid: - assert np.array_equiv(DenseCoordinate(data).__array__(), data) - - def test_dtype(self): - for data in self.valid: - assert DenseCoordinate(data).dtype == np.array(data).dtype - - def test_values(self): - for data in self.valid: - assert np.array_equiv(DenseCoordinate(data).values, data) - - def test_index(self): - for data in self.valid: - assert DenseCoordinate(data).index.equals(pd.Index(data)) - - def test_equals(self): - for data in self.valid: - coord = DenseCoordinate(data) - assert coord.equals(coord) - assert DenseCoordinate([1, 2, 3]).equals(DenseCoordinate([1, 2, 3])) - - def test_isinstance(self): - assert not DenseCoordinate([1, 2, 3]).isscalar() - assert DenseCoordinate([1, 2, 3]).isdense() - assert not DenseCoordinate([1, 2, 3]).isinterp() - - def test_get_indexer(self): - assert DenseCoordinate([1, 2, 3]).get_indexer(2) == 1 - assert np.array_equiv(DenseCoordinate([1, 2, 3]).get_indexer([2, 3]), [1, 2]) - assert DenseCoordinate([1, 2, 3]).get_indexer(2.1, method="nearest") == 1 - assert DenseCoordinate([1, 2, 3]).get_indexer(2.1, method="ffill") == 1 - assert DenseCoordinate([1, 2, 3]).get_indexer(2.1, method="bfill") == 2 - - def test_get_slice_indexer(self): - assert np.array_equiv( - DenseCoordinate([1, 2, 3]).slice_indexer(start=2), slice(1, 3) - ) - - def test_to_index(self): - assert DenseCoordinate([1, 2, 3]).to_index(2) == 1 - assert np.array_equiv(DenseCoordinate([1, 2, 3]).to_index([2, 3]), [1, 2]) - assert np.array_equiv( - DenseCoordinate([1, 2, 3]).to_index(slice(2, None)), slice(1, 3) - ) - - def test_to_from_dict(self): - for data in self.valid: - coord = DenseCoordinate(data) - assert DenseCoordinate.from_dict(coord.to_dict()).equals(coord) - - def test_empty(self): - coord = DenseCoordinate() - assert coord.empty - - def test_append(self): - coord0 = DenseCoordinate() - coord1 = DenseCoordinate([1, 2, 3]) - coord2 = DenseCoordinate([4, 5, 6]) - - result = coord1.append(coord2) - expected = DenseCoordinate([1, 2, 3, 4, 5, 6]) - assert result.equals(expected) - - result = coord2.append(coord1) - expected = DenseCoordinate([4, 5, 6, 1, 2, 3]) - assert result.equals(expected) - - assert coord0.append(coord0).empty - assert coord0.append(coord1).equals(coord1) - assert coord1.append(coord0).equals(coord1) - - -class TestInterpCoordinate: - valid = [ - {"tie_indices": [], "tie_values": []}, - {"tie_indices": [0], "tie_values": [100.0]}, - {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}, - {"tie_indices": [0, 8], "tie_values": [100, 900]}, - { - "tie_indices": [0, 8], - "tie_values": [ - np.datetime64("2000-01-01T00:00:00"), - np.datetime64("2000-01-01T00:00:08"), - ], - }, - {"tie_indices": np.array([0, 8], dtype="int16"), "tie_values": [100.0, 900.0]}, - ] - invalid = [ - 1, - np.array(1), - 1.0, - np.array(1.0), - "label", - np.array("label"), - np.datetime64(1, "s"), - [1, 2, 3], - np.array([1, 2, 3]), - [1.0, 2.0, 3.0], - np.array([1.0, 2.0, 3.0]), - ["a", "b", "c"], - np.array(["a", "b", "c"]), - np.array([1, 2, 3], dtype="datetime64[s]"), - {"key": "value"}, - ] - error = [ - {"tie_indices": 0, "tie_values": [100.0]}, - {"tie_indices": [0], "tie_values": 100.0}, - {"tie_indices": [0, 7, 8], "tie_values": [100.0, 900.0]}, - {"tie_indices": [0.0, 8.0], "tie_values": [100.0, 900.0]}, - {"tie_indices": [1, 9], "tie_values": [100.0, 900.0]}, - {"tie_indices": [8, 0], "tie_values": [100.0, 900.0]}, - {"tie_indices": [8, 0], "tie_values": ["a", "b"]}, - ] - - def test_isvalid(self): - for data in self.valid: - assert InterpCoordinate.isvalid(data) - for data in self.invalid: - assert not InterpCoordinate.isvalid(data) - - def test_init(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert np.array_equiv(coord.data["tie_indices"], [0, 8]) - assert np.array_equiv(coord.data["tie_values"], [100.0, 900.0]) - assert coord.dim is None - coord = InterpCoordinate( - {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}, "dim" - ) - assert coord.dim == "dim" - for data in self.valid: - coord = InterpCoordinate(data) - assert np.array_equiv(coord.data["tie_indices"], data["tie_indices"]) - assert np.array_equiv(coord.data["tie_values"], data["tie_values"]) - for data in self.invalid: - with pytest.raises(TypeError): - InterpCoordinate(data) - for data in self.error: - with pytest.raises(ValueError): - InterpCoordinate(data) - - def test_len(self): - assert ( - len(InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]})) - == 9 - ) - assert len(InterpCoordinate(dict(tie_indices=[], tie_values=[]))) == 0 - - @pytest.mark.parametrize("valid_input", valid) - def test_repr(self, valid_input): - coord = InterpCoordinate(data=valid_input) - my_coord = repr(coord) - assert isinstance(my_coord, str) - - def test_equals(self): - coord1 = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - coord2 = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord1.equals(coord2) - - def test_getitem(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert isinstance(coord[0], ScalarCoordinate) - assert coord[0].values == 100.0 - assert coord[4].values == 500.0 - assert coord[8].values == 900.0 - assert coord[-1].values == 900.0 - assert coord[-2].values == 800.0 - assert np.allclose(coord[[1, 2, 3]].values, [200.0, 300.0, 400.0]) - with pytest.raises(IndexError): - coord[9] - coord[-9] - assert coord[0:2].equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[100.0, 200.0])) - ) - assert coord[:].equals(coord) - assert coord[6:3].equals(InterpCoordinate(dict(tie_indices=[], tie_values=[]))) - assert coord[1:2].equals( - InterpCoordinate(dict(tie_indices=[0], tie_values=[200.0])) - ) - assert coord[-3:-1].equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[700.0, 800.0])) - ) - - def test_setitem(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - with pytest.raises(TypeError): - coord[1] = 0 - coord[:] = 0 - - def test_asarray(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert np.allclose(np.asarray(coord), coord.values) - - def test_empty(self): - assert not InterpCoordinate( - {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]} - ).empty - assert InterpCoordinate(dict(tie_indices=[], tie_values=[])).empty - - def test_dtype(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.dtype == np.float64 - - def test_ndim(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.ndim == 1 - assert isinstance(coord.ndim, int) - - def test_shape(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.shape == (9,) - - def test_format_index(self): - # TODO - pass - - def test_format_index_slice(self): - # TODO - pass - - def test_get_value(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.get_value(0) == 100.0 - assert coord.get_value(4) == 500.0 - assert coord.get_value(8) == 900.0 - assert coord.get_value(-1) == 900.0 - assert coord.get_value(-9) == 100.0 - assert np.allclose(coord.get_value([1, 2, 3, -2]), [200.0, 300.0, 400.0, 800.0]) - with pytest.raises(IndexError): - coord.get_value(-10) - coord.get_value(9) - coord.get_value(0.5) - starttime = np.datetime64("2000-01-01T00:00:00") - endtime = np.datetime64("2000-01-01T00:00:08") - coord = InterpCoordinate( - dict(tie_indices=[0, 8], tie_values=[starttime, endtime]) - ) - assert coord.get_value(0) == starttime - assert coord.get_value(4) == np.datetime64("2000-01-01T00:00:04") - assert coord.get_value(8) == endtime - assert coord.get_value(-1) == endtime - assert coord.get_value(-9) == starttime - - def test_get_index(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.get_indexer(100.0) == 0 - assert coord.get_indexer(900.0) == 8 - assert coord.get_indexer(0.0, "nearest") == 0 - assert coord.get_indexer(1000.0, "nearest") == 8 - assert coord.get_indexer(125.0, "nearest") == 0 - assert coord.get_indexer(175.0, "nearest") == 1 - assert coord.get_indexer(175.0, "ffill") == 0 - assert coord.get_indexer(200.0, "ffill") == 1 - assert coord.get_indexer(200.0, "bfill") == 1 - assert coord.get_indexer(125.0, "bfill") == 1 - assert np.all(np.equal(coord.get_indexer([100.0, 900.0]), [0, 8])) - with pytest.raises(KeyError): - assert coord.get_indexer(0.0) == 0 - assert coord.get_indexer(1000.0) == 8 - assert coord.get_indexer(150.0) == 0 - assert coord.get_indexer(1000.0, "bfill") == 8 - assert coord.get_indexer(0.0, "ffill") == 0 - - starttime = np.datetime64("2000-01-01T00:00:00") - endtime = np.datetime64("2000-01-01T00:00:08") - coord = InterpCoordinate( - dict(tie_indices=[0, 8], tie_values=[starttime, endtime]) - ) - assert coord.get_indexer(starttime) == 0 - assert coord.get_indexer(endtime) == 8 - assert coord.get_indexer(str(starttime)) == 0 - assert coord.get_indexer(str(endtime)) == 8 - assert coord.get_indexer("2000-01-01T00:00:04.1", "nearest") == 4 - - def test_indices(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert np.all(np.equal(coord.indices, np.arange(9))) - - def test_values(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert np.allclose(coord.values, np.arange(100.0, 1000.0, 100.0)) - - def test_get_index_slice(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.slice_indexer(100.0, 200.0) == slice(0, 2) - assert coord.slice_indexer(150.0, 250.0) == slice(1, 2) - assert coord.slice_indexer(300.0, 500.0) == slice(2, 5) - assert coord.slice_indexer(0.0, 500.0) == slice(0, 5) - assert coord.slice_indexer(125.0, 175.0) == slice(1, 1) - assert coord.slice_indexer(0.0, 50.0) == slice(0, 0) - assert coord.slice_indexer(1000.0, 1100.0) == slice(9, 9) - assert coord.slice_indexer(1000.0, 500.0) == slice(9, 5) - assert coord.slice_indexer(None, None) == slice(None, None) - - def test_slice_index(self): - coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) - assert coord.slice_index(slice(0, 2)).equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[100.0, 200.0])) - ) - assert coord.slice_index(slice(7, None)).equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[800.0, 900.0])) - ) - assert coord.slice_index(slice(None, None)).equals(coord) - assert coord.slice_index(slice(0, 0)).equals( - InterpCoordinate(dict(tie_indices=[], tie_values=[])) - ) - assert coord.slice_index(slice(4, 2)).equals( - InterpCoordinate(dict(tie_indices=[], tie_values=[])) - ) - assert coord.slice_index(slice(9, 9)).equals( - InterpCoordinate(dict(tie_indices=[], tie_values=[])) - ) - assert coord.slice_index(slice(3, 3)).equals( - InterpCoordinate(dict(tie_indices=[], tie_values=[])) - ) - assert coord.slice_index(slice(0, -1)).equals( - InterpCoordinate(dict(tie_indices=[0, 7], tie_values=[100.0, 800.0])) - ) - assert coord.slice_index(slice(0, -2)).equals( - InterpCoordinate(dict(tie_indices=[0, 6], tie_values=[100.0, 700.0])) - ) - assert coord.slice_index(slice(-2, None)).equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[800.0, 900.0])) - ) - assert coord.slice_index(slice(1, 2)).equals( - InterpCoordinate(dict(tie_indices=[0], tie_values=[200.0])) - ) - assert coord.slice_index(slice(1, 3, 2)).equals( - InterpCoordinate(dict(tie_indices=[0], tie_values=[200.0])) - ) - assert coord.slice_index(slice(None, None, 2)).equals( - InterpCoordinate(dict(tie_indices=[0, 4], tie_values=[100.0, 900.0])) - ) - assert coord.slice_index(slice(None, None, 3)).equals( - InterpCoordinate(dict(tie_indices=[0, 2], tie_values=[100.0, 700.0])) - ) - assert coord.slice_index(slice(None, None, 4)).equals( - InterpCoordinate(dict(tie_indices=[0, 2], tie_values=[100.0, 900.0])) - ) - assert coord.slice_index(slice(None, None, 5)).equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[100.0, 600.0])) - ) - assert coord.slice_index(slice(2, 7, 3)).equals( - InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[300.0, 600.0])) - ) - - def test_to_index(self): - # TODO - pass - - def test_simplify(self): - xp = np.sort(np.random.choice(10000, 1000, replace=False)) - xp[0] = 0 - xp[-1] = 10000 - yp = xp + (np.random.rand(1000) - 0.5) - coord = InterpCoordinate({"tie_indices": xp, "tie_values": yp}) - assert len(coord.simplify(1.0).tie_indices) == 2 - - def test_singleton(self): - coord = InterpCoordinate({"tie_indices": [0], "tie_values": [1.0]}) - assert coord[0].values == 1.0 - - def test_to_from_dict(self): - for data in self.valid: - coord = InterpCoordinate(data) - assert InterpCoordinate.from_dict(coord.to_dict()).equals(coord) - - def test_append(self): - coord0 = InterpCoordinate() - coord1 = InterpCoordinate({"tie_indices": [0, 2], "tie_values": [0, 20]}) - coord2 = InterpCoordinate({"tie_indices": [0, 2], "tie_values": [30, 50]}) - - result = coord1.append(coord2).simplify() - expected = InterpCoordinate({"tie_indices": [0, 5], "tie_values": [0, 50]}) - assert result.equals(expected) - - result = coord2.append(coord1).simplify() - expected = InterpCoordinate( - {"tie_indices": [0, 2, 3, 5], "tie_values": [30, 50, 0, 20]} - ) - assert result.equals(expected) - - assert coord0.append(coord0).empty - assert coord0.append(coord1).equals(coord1) - assert coord1.append(coord0).equals(coord1) +import xdas as xd class TestCoordinate: def test_new(self): - assert xdas.Coordinate(1).isscalar() - assert xdas.Coordinate([1]).isdense() - assert xdas.Coordinate({"tie_values": [], "tie_indices": []}).isinterp() - coord = xdas.Coordinate(xdas.Coordinate([1]), "dim") + assert xd.Coordinate(1).isscalar() + assert xd.Coordinate([1]).isdense() + assert xd.Coordinate({"tie_values": [], "tie_indices": []}).isinterp() + coord = xd.Coordinate(xd.Coordinate([1]), "dim") assert coord.isdense() assert coord.dim == "dim" def test_to_dataarray(self): - coord = xdas.Coordinate([1, 2, 3], "dim") + coord = xd.Coordinate([1, 2, 3], "dim") result = coord.to_dataarray() - expected = xdas.DataArray([1, 2, 3], {"dim": [1, 2, 3]}, name="dim") + expected = xd.DataArray([1, 2, 3], {"dim": [1, 2, 3]}, name="dim") assert result.equals(expected) def test_empty(self): with pytest.raises(TypeError, match="cannot infer coordinate type"): - xdas.Coordinate() + xd.Coordinate() def test_isdim(self): - coord = xdas.Coordinate([1, 2, 3]) + coord = xd.Coordinate([1, 2, 3]) assert coord.isdim() is None - coord = xdas.Coordinate([1, 2, 3], "dim") + coord = xd.Coordinate([1, 2, 3], "dim") assert coord.isdim() is None - coords = xdas.Coordinates({"dim": coord}) + coords = xd.Coordinates({"dim": coord}) assert coords["dim"].isdim() - coords = xdas.Coordinates({"other_dim": coord}) + coords = xd.Coordinates({"other_dim": coord}) assert not coords["other_dim"].isdim() def test_name(self): - coord = xdas.Coordinate([1, 2, 3]) + coord = xd.Coordinate([1, 2, 3]) assert coord.name is None - coord = xdas.Coordinate([1, 2, 3], "dim") + coord = xd.Coordinate([1, 2, 3], "dim") assert coord.name == "dim" - coords = xdas.Coordinates({"dim": coord}) + coords = xd.Coordinates({"dim": coord}) assert coords["dim"].name == "dim" - coords = xdas.Coordinates({"other_dim": coord}) + coords = xd.Coordinates({"other_dim": coord}) assert coords["other_dim"].name == "other_dim" def test_to_dataarray(self): - coord = xdas.Coordinate([1, 2, 3]) + coord = xd.Coordinate([1, 2, 3]) with pytest.raises(ValueError, match="unnamed coordinate"): coord.to_dataarray() - coord = xdas.Coordinate([1, 2, 3], "dim") + coord = xd.Coordinate([1, 2, 3], "dim") result = coord.to_dataarray() - expected = xdas.DataArray([1, 2, 3], {"dim": [1, 2, 3]}, name="dim") + expected = xd.DataArray([1, 2, 3], {"dim": [1, 2, 3]}, name="dim") assert result.equals(expected) - coords = xdas.Coordinates({"dim": coord}) + coords = xd.Coordinates({"dim": coord}) result = coords["dim"].to_dataarray() assert result.equals(expected) - coords = xdas.Coordinates({"other_dim": coord}) + coords = xd.Coordinates({"other_dim": coord}) result = coords["other_dim"].to_dataarray() - expected = xdas.DataArray( + expected = xd.DataArray( [1, 2, 3], coords={"other_dim": coord}, dims=["dim"], name="other_dim" ) assert result.equals(expected) coords["dim"] = [4, 5, 6] result = coords["dim"].to_dataarray() - expected = xdas.DataArray( + expected = xd.DataArray( [4, 5, 6], coords={"dim": [4, 5, 6], "other_dim": ("dim", [1, 2, 3])}, dims=["dim"], @@ -518,7 +70,7 @@ def test_to_dataarray(self): ) assert result.equals(expected) result = coords["other_dim"].to_dataarray() - expected = xdas.DataArray( + expected = xd.DataArray( [1, 2, 3], coords={"dim": [4, 5, 6], "other_dim": ("dim", [1, 2, 3])}, dims=["dim"], @@ -529,7 +81,7 @@ def test_to_dataarray(self): class TestCoordinates: def test_init(self): - coords = xdas.Coordinates( + coords = xd.Coordinates( {"dim": ("dim", {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]})} ) coord = coords["dim"] @@ -537,12 +89,12 @@ def test_init(self): assert np.allclose(coord.tie_indices, [0, 8]) assert np.allclose(coord.tie_values, [100.0, 900.0]) assert coords.isdim("dim") - coords = xdas.Coordinates({"dim": [1.0, 2.0, 3.0]}) + coords = xd.Coordinates({"dim": [1.0, 2.0, 3.0]}) coord = coords["dim"] assert coord.isdense() assert np.allclose(coord.values, [1.0, 2.0, 3.0]) assert coords.isdim("dim") - coords = xdas.Coordinates( + coords = xd.Coordinates( { "dim_0": ( "dim_0", @@ -556,17 +108,17 @@ def test_init(self): ) assert coords.isdim("dim_0") assert not coords.isdim("dim_1") - coords = xdas.Coordinates() + coords = xd.Coordinates() assert coords == dict() assert coords.dims == tuple() def test_first_last(self): - coords = xdas.Coordinates({"dim_0": [1.0, 2.0, 3.0], "dim_1": [1.0, 2.0, 3.0]}) + coords = xd.Coordinates({"dim_0": [1.0, 2.0, 3.0], "dim_1": [1.0, 2.0, 3.0]}) assert coords["first"].dim == "dim_0" assert coords["last"].dim == "dim_1" def test_setitem(self): - coords = xdas.Coordinates() + coords = xd.Coordinates() coords["dim_0"] = [1, 2, 4] assert coords.dims == ("dim_0",) coords["dim_1"] = {"tie_indices": [0, 10], "tie_values": [0.0, 100.0]} @@ -591,5 +143,5 @@ def test_to_from_dict(self): "channel": ("distance", ["DAS01", "DAS02", "DAS03"]), "interrogator": (None, "SRN"), } - coords = xdas.Coordinates(coords) - assert xdas.Coordinates.from_dict(coords.to_dict()).equals(coords) + coords = xd.Coordinates(coords) + assert xd.Coordinates.from_dict(coords.to_dict()).equals(coords) diff --git a/tests/coordinates/test_dense.py b/tests/coordinates/test_dense.py new file mode 100644 index 0000000..0524bac --- /dev/null +++ b/tests/coordinates/test_dense.py @@ -0,0 +1,137 @@ +import numpy as np +import pandas as pd +import pytest + +from xdas.coordinates import DenseCoordinate, ScalarCoordinate + + +class TestDenseCoordinate: + valid = [ + [1, 2, 3], + np.array([1, 2, 3]), + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0]), + ["a", "b", "c"], + np.array(["a", "b", "c"]), + np.array([1, 2, 3], dtype="datetime64[s]"), + ] + invalid = [ + 1, + np.array(1), + 1.0, + np.array(1.0), + "label", + np.array("label"), + np.datetime64(1, "s"), + {"key": "value"}, + ] + + def test_isvalid(self): + for data in self.valid: + assert DenseCoordinate.isvalid(data) + for data in self.invalid: + assert not DenseCoordinate.isvalid(data) + + def test_init(self): + coord = DenseCoordinate([1, 2, 3]) + assert np.array_equiv(coord.data, [1, 2, 3]) + assert coord.dim is None + coord = DenseCoordinate([1, 2, 3], "dim") + assert coord.dim == "dim" + for data in self.valid: + assert np.array_equiv(DenseCoordinate(data).data, data) + for data in self.invalid: + with pytest.raises(TypeError): + DenseCoordinate(data) + + def test_getitem(self): + assert np.array_equiv(DenseCoordinate([1, 2, 3])[...].values, [1, 2, 3]) + assert isinstance(DenseCoordinate([1, 2, 3])[...], DenseCoordinate) + assert np.array_equiv(DenseCoordinate([1, 2, 3])[:].values, [1, 2, 3]) + assert isinstance(DenseCoordinate([1, 2, 3])[:], DenseCoordinate) + assert np.array_equiv(DenseCoordinate([1, 2, 3])[1].values, 2) + assert isinstance(DenseCoordinate([1, 2, 3])[1], ScalarCoordinate) + assert np.array_equiv(DenseCoordinate([1, 2, 3])[1:].values, [2, 3]) + assert isinstance(DenseCoordinate([1, 2, 3])[1:], DenseCoordinate) + + def test_len(self): + for data in self.valid: + assert len(DenseCoordinate(data)) == 3 + + def test_repr(self): + for data in self.valid: + assert DenseCoordinate(data).__repr__() == np.array2string( + np.asarray(data), threshold=0, edgeitems=1 + ) + + def test_array(self): + for data in self.valid: + assert np.array_equiv(DenseCoordinate(data).__array__(), data) + + def test_dtype(self): + for data in self.valid: + assert DenseCoordinate(data).dtype == np.array(data).dtype + + def test_values(self): + for data in self.valid: + assert np.array_equiv(DenseCoordinate(data).values, data) + + def test_index(self): + for data in self.valid: + assert DenseCoordinate(data).index.equals(pd.Index(data)) + + def test_equals(self): + for data in self.valid: + coord = DenseCoordinate(data) + assert coord.equals(coord) + assert DenseCoordinate([1, 2, 3]).equals(DenseCoordinate([1, 2, 3])) + + def test_isinstance(self): + assert not DenseCoordinate([1, 2, 3]).isscalar() + assert DenseCoordinate([1, 2, 3]).isdense() + assert not DenseCoordinate([1, 2, 3]).isinterp() + + def test_get_indexer(self): + assert DenseCoordinate([1, 2, 3]).get_indexer(2) == 1 + assert np.array_equiv(DenseCoordinate([1, 2, 3]).get_indexer([2, 3]), [1, 2]) + assert DenseCoordinate([1, 2, 3]).get_indexer(2.1, method="nearest") == 1 + assert DenseCoordinate([1, 2, 3]).get_indexer(2.1, method="ffill") == 1 + assert DenseCoordinate([1, 2, 3]).get_indexer(2.1, method="bfill") == 2 + + def test_get_slice_indexer(self): + assert np.array_equiv( + DenseCoordinate([1, 2, 3]).slice_indexer(start=2), slice(1, 3) + ) + + def test_to_index(self): + assert DenseCoordinate([1, 2, 3]).to_index(2) == 1 + assert np.array_equiv(DenseCoordinate([1, 2, 3]).to_index([2, 3]), [1, 2]) + assert np.array_equiv( + DenseCoordinate([1, 2, 3]).to_index(slice(2, None)), slice(1, 3) + ) + + def test_to_from_dict(self): + for data in self.valid: + coord = DenseCoordinate(data) + assert DenseCoordinate.from_dict(coord.to_dict()).equals(coord) + + def test_empty(self): + coord = DenseCoordinate() + assert coord.empty + + def test_append(self): + coord0 = DenseCoordinate() + coord1 = DenseCoordinate([1, 2, 3]) + coord2 = DenseCoordinate([4, 5, 6]) + + result = coord1.append(coord2) + expected = DenseCoordinate([1, 2, 3, 4, 5, 6]) + assert result.equals(expected) + + result = coord2.append(coord1) + expected = DenseCoordinate([4, 5, 6, 1, 2, 3]) + assert result.equals(expected) + + assert coord0.append(coord0).empty + assert coord0.append(coord1).equals(coord1) + assert coord1.append(coord0).equals(coord1) diff --git a/tests/coordinates/test_interp.py b/tests/coordinates/test_interp.py new file mode 100644 index 0000000..95b1d77 --- /dev/null +++ b/tests/coordinates/test_interp.py @@ -0,0 +1,318 @@ +import numpy as np +import pytest + +from xdas.coordinates import InterpCoordinate, ScalarCoordinate + + +class TestInterpCoordinate: + valid = [ + {"tie_indices": [], "tie_values": []}, + {"tie_indices": [0], "tie_values": [100.0]}, + {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}, + {"tie_indices": [0, 8], "tie_values": [100, 900]}, + { + "tie_indices": [0, 8], + "tie_values": [ + np.datetime64("2000-01-01T00:00:00"), + np.datetime64("2000-01-01T00:00:08"), + ], + }, + {"tie_indices": np.array([0, 8], dtype="int16"), "tie_values": [100.0, 900.0]}, + ] + invalid = [ + 1, + np.array(1), + 1.0, + np.array(1.0), + "label", + np.array("label"), + np.datetime64(1, "s"), + [1, 2, 3], + np.array([1, 2, 3]), + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0]), + ["a", "b", "c"], + np.array(["a", "b", "c"]), + np.array([1, 2, 3], dtype="datetime64[s]"), + {"key": "value"}, + ] + error = [ + {"tie_indices": 0, "tie_values": [100.0]}, + {"tie_indices": [0], "tie_values": 100.0}, + {"tie_indices": [0, 7, 8], "tie_values": [100.0, 900.0]}, + {"tie_indices": [0.0, 8.0], "tie_values": [100.0, 900.0]}, + {"tie_indices": [1, 9], "tie_values": [100.0, 900.0]}, + {"tie_indices": [8, 0], "tie_values": [100.0, 900.0]}, + {"tie_indices": [8, 0], "tie_values": ["a", "b"]}, + ] + + def test_isvalid(self): + for data in self.valid: + assert InterpCoordinate.isvalid(data) + for data in self.invalid: + assert not InterpCoordinate.isvalid(data) + + def test_init(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert np.array_equiv(coord.data["tie_indices"], [0, 8]) + assert np.array_equiv(coord.data["tie_values"], [100.0, 900.0]) + assert coord.dim is None + coord = InterpCoordinate( + {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}, "dim" + ) + assert coord.dim == "dim" + for data in self.valid: + coord = InterpCoordinate(data) + assert np.array_equiv(coord.data["tie_indices"], data["tie_indices"]) + assert np.array_equiv(coord.data["tie_values"], data["tie_values"]) + for data in self.invalid: + with pytest.raises(TypeError): + InterpCoordinate(data) + for data in self.error: + with pytest.raises(ValueError): + InterpCoordinate(data) + + def test_len(self): + assert ( + len(InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]})) + == 9 + ) + assert len(InterpCoordinate(dict(tie_indices=[], tie_values=[]))) == 0 + + @pytest.mark.parametrize("valid_input", valid) + def test_repr(self, valid_input): + coord = InterpCoordinate(data=valid_input) + my_coord = repr(coord) + assert isinstance(my_coord, str) + + def test_equals(self): + coord1 = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + coord2 = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord1.equals(coord2) + + def test_getitem(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert isinstance(coord[0], ScalarCoordinate) + assert coord[0].values == 100.0 + assert coord[4].values == 500.0 + assert coord[8].values == 900.0 + assert coord[-1].values == 900.0 + assert coord[-2].values == 800.0 + assert np.allclose(coord[[1, 2, 3]].values, [200.0, 300.0, 400.0]) + with pytest.raises(IndexError): + coord[9] + coord[-9] + assert coord[0:2].equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[100.0, 200.0])) + ) + assert coord[:].equals(coord) + assert coord[6:3].equals(InterpCoordinate(dict(tie_indices=[], tie_values=[]))) + assert coord[1:2].equals( + InterpCoordinate(dict(tie_indices=[0], tie_values=[200.0])) + ) + assert coord[-3:-1].equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[700.0, 800.0])) + ) + + def test_setitem(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + with pytest.raises(TypeError): + coord[1] = 0 + coord[:] = 0 + + def test_asarray(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert np.allclose(np.asarray(coord), coord.values) + + def test_empty(self): + assert not InterpCoordinate( + {"tie_indices": [0, 8], "tie_values": [100.0, 900.0]} + ).empty + assert InterpCoordinate(dict(tie_indices=[], tie_values=[])).empty + + def test_dtype(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.dtype == np.float64 + + def test_ndim(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.ndim == 1 + assert isinstance(coord.ndim, int) + + def test_shape(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.shape == (9,) + + def test_format_index(self): + # TODO + pass + + def test_format_index_slice(self): + # TODO + pass + + def test_get_value(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.get_value(0) == 100.0 + assert coord.get_value(4) == 500.0 + assert coord.get_value(8) == 900.0 + assert coord.get_value(-1) == 900.0 + assert coord.get_value(-9) == 100.0 + assert np.allclose(coord.get_value([1, 2, 3, -2]), [200.0, 300.0, 400.0, 800.0]) + with pytest.raises(IndexError): + coord.get_value(-10) + coord.get_value(9) + coord.get_value(0.5) + starttime = np.datetime64("2000-01-01T00:00:00") + endtime = np.datetime64("2000-01-01T00:00:08") + coord = InterpCoordinate( + dict(tie_indices=[0, 8], tie_values=[starttime, endtime]) + ) + assert coord.get_value(0) == starttime + assert coord.get_value(4) == np.datetime64("2000-01-01T00:00:04") + assert coord.get_value(8) == endtime + assert coord.get_value(-1) == endtime + assert coord.get_value(-9) == starttime + + def test_get_index(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.get_indexer(100.0) == 0 + assert coord.get_indexer(900.0) == 8 + assert coord.get_indexer(0.0, "nearest") == 0 + assert coord.get_indexer(1000.0, "nearest") == 8 + assert coord.get_indexer(125.0, "nearest") == 0 + assert coord.get_indexer(175.0, "nearest") == 1 + assert coord.get_indexer(175.0, "ffill") == 0 + assert coord.get_indexer(200.0, "ffill") == 1 + assert coord.get_indexer(200.0, "bfill") == 1 + assert coord.get_indexer(125.0, "bfill") == 1 + assert np.all(np.equal(coord.get_indexer([100.0, 900.0]), [0, 8])) + with pytest.raises(KeyError): + assert coord.get_indexer(0.0) == 0 + assert coord.get_indexer(1000.0) == 8 + assert coord.get_indexer(150.0) == 0 + assert coord.get_indexer(1000.0, "bfill") == 8 + assert coord.get_indexer(0.0, "ffill") == 0 + + starttime = np.datetime64("2000-01-01T00:00:00") + endtime = np.datetime64("2000-01-01T00:00:08") + coord = InterpCoordinate( + dict(tie_indices=[0, 8], tie_values=[starttime, endtime]) + ) + assert coord.get_indexer(starttime) == 0 + assert coord.get_indexer(endtime) == 8 + assert coord.get_indexer(str(starttime)) == 0 + assert coord.get_indexer(str(endtime)) == 8 + assert coord.get_indexer("2000-01-01T00:00:04.1", "nearest") == 4 + + def test_indices(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert np.all(np.equal(coord.indices, np.arange(9))) + + def test_values(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert np.allclose(coord.values, np.arange(100.0, 1000.0, 100.0)) + + def test_get_index_slice(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.slice_indexer(100.0, 200.0) == slice(0, 2) + assert coord.slice_indexer(150.0, 250.0) == slice(1, 2) + assert coord.slice_indexer(300.0, 500.0) == slice(2, 5) + assert coord.slice_indexer(0.0, 500.0) == slice(0, 5) + assert coord.slice_indexer(125.0, 175.0) == slice(1, 1) + assert coord.slice_indexer(0.0, 50.0) == slice(0, 0) + assert coord.slice_indexer(1000.0, 1100.0) == slice(9, 9) + assert coord.slice_indexer(1000.0, 500.0) == slice(9, 5) + assert coord.slice_indexer(None, None) == slice(None, None) + + def test_slice_index(self): + coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) + assert coord.slice_index(slice(0, 2)).equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[100.0, 200.0])) + ) + assert coord.slice_index(slice(7, None)).equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[800.0, 900.0])) + ) + assert coord.slice_index(slice(None, None)).equals(coord) + assert coord.slice_index(slice(0, 0)).equals( + InterpCoordinate(dict(tie_indices=[], tie_values=[])) + ) + assert coord.slice_index(slice(4, 2)).equals( + InterpCoordinate(dict(tie_indices=[], tie_values=[])) + ) + assert coord.slice_index(slice(9, 9)).equals( + InterpCoordinate(dict(tie_indices=[], tie_values=[])) + ) + assert coord.slice_index(slice(3, 3)).equals( + InterpCoordinate(dict(tie_indices=[], tie_values=[])) + ) + assert coord.slice_index(slice(0, -1)).equals( + InterpCoordinate(dict(tie_indices=[0, 7], tie_values=[100.0, 800.0])) + ) + assert coord.slice_index(slice(0, -2)).equals( + InterpCoordinate(dict(tie_indices=[0, 6], tie_values=[100.0, 700.0])) + ) + assert coord.slice_index(slice(-2, None)).equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[800.0, 900.0])) + ) + assert coord.slice_index(slice(1, 2)).equals( + InterpCoordinate(dict(tie_indices=[0], tie_values=[200.0])) + ) + assert coord.slice_index(slice(1, 3, 2)).equals( + InterpCoordinate(dict(tie_indices=[0], tie_values=[200.0])) + ) + assert coord.slice_index(slice(None, None, 2)).equals( + InterpCoordinate(dict(tie_indices=[0, 4], tie_values=[100.0, 900.0])) + ) + assert coord.slice_index(slice(None, None, 3)).equals( + InterpCoordinate(dict(tie_indices=[0, 2], tie_values=[100.0, 700.0])) + ) + assert coord.slice_index(slice(None, None, 4)).equals( + InterpCoordinate(dict(tie_indices=[0, 2], tie_values=[100.0, 900.0])) + ) + assert coord.slice_index(slice(None, None, 5)).equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[100.0, 600.0])) + ) + assert coord.slice_index(slice(2, 7, 3)).equals( + InterpCoordinate(dict(tie_indices=[0, 1], tie_values=[300.0, 600.0])) + ) + + def test_to_index(self): + # TODO + pass + + def test_simplify(self): + xp = np.sort(np.random.choice(10000, 1000, replace=False)) + xp[0] = 0 + xp[-1] = 10000 + yp = xp + (np.random.rand(1000) - 0.5) + coord = InterpCoordinate({"tie_indices": xp, "tie_values": yp}) + assert len(coord.simplify(1.0).tie_indices) == 2 + + def test_singleton(self): + coord = InterpCoordinate({"tie_indices": [0], "tie_values": [1.0]}) + assert coord[0].values == 1.0 + + def test_to_from_dict(self): + for data in self.valid: + coord = InterpCoordinate(data) + assert InterpCoordinate.from_dict(coord.to_dict()).equals(coord) + + def test_append(self): + coord0 = InterpCoordinate() + coord1 = InterpCoordinate({"tie_indices": [0, 2], "tie_values": [0, 20]}) + coord2 = InterpCoordinate({"tie_indices": [0, 2], "tie_values": [30, 50]}) + + result = coord1.append(coord2).simplify() + expected = InterpCoordinate({"tie_indices": [0, 5], "tie_values": [0, 50]}) + assert result.equals(expected) + + result = coord2.append(coord1).simplify() + expected = InterpCoordinate( + {"tie_indices": [0, 2, 3, 5], "tie_values": [30, 50, 0, 20]} + ) + assert result.equals(expected) + + assert coord0.append(coord0).empty + assert coord0.append(coord1).equals(coord1) + assert coord1.append(coord0).equals(coord1) From b5e0132c1319a0622d08ca20ed883606d861065e Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 16:19:33 +0100 Subject: [PATCH 32/63] Refactoring: merge tests with same name. --- tests/coordinates/test_coordinates.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/coordinates/test_coordinates.py b/tests/coordinates/test_coordinates.py index d0825bd..9f87e33 100644 --- a/tests/coordinates/test_coordinates.py +++ b/tests/coordinates/test_coordinates.py @@ -13,12 +13,6 @@ def test_new(self): assert coord.isdense() assert coord.dim == "dim" - def test_to_dataarray(self): - coord = xd.Coordinate([1, 2, 3], "dim") - result = coord.to_dataarray() - expected = xd.DataArray([1, 2, 3], {"dim": [1, 2, 3]}, name="dim") - assert result.equals(expected) - def test_empty(self): with pytest.raises(TypeError, match="cannot infer coordinate type"): xd.Coordinate() @@ -44,6 +38,10 @@ def test_name(self): assert coords["other_dim"].name == "other_dim" def test_to_dataarray(self): + coord = xd.Coordinate([1, 2, 3], "dim") + result = coord.to_dataarray() + expected = xd.DataArray([1, 2, 3], {"dim": [1, 2, 3]}, name="dim") + assert result.equals(expected) coord = xd.Coordinate([1, 2, 3]) with pytest.raises(ValueError, match="unnamed coordinate"): coord.to_dataarray() From 09d837d5c55856666b73436842aa72c3cf6f10ab Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 18 Dec 2025 16:30:27 +0100 Subject: [PATCH 33/63] add Coordinate.get_div_points for more generic xdas.split. --- xdas/coordinates/core.py | 5 +++++ xdas/coordinates/interp.py | 6 ++++++ xdas/core/routines.py | 12 +----------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index d9e6813..f8d0fa5 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -420,6 +420,11 @@ def issampled(self): def append(self, other): raise NotImplementedError(f"append is not implemented for {self.__class__}") + def get_div_points(self): + raise NotImplementedError( + f"get_div_points is not implemented for {self.__class__}" + ) + def to_dataarray(self): from ..core.dataarray import DataArray # TODO: avoid defered import? diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index 6a3f7f6..d12a859 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -292,6 +292,12 @@ def simplify(self, tolerance=None): dict(tie_indices=tie_indices, tie_values=tie_values), self.dim ) + def get_div_points(self): + (points,) = np.nonzero(np.diff(self.tie_indices, prepend=[0]) == 1) + div_points = [self.tie_indices[point] for point in points] + div_points = [0] + div_points + [len(self)] + return div_points + def get_discontinuities(self): """ Returns a DataFrame containing information about the discontinuities. diff --git a/xdas/core/routines.py b/xdas/core/routines.py index f34c4b8..de810c8 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -12,7 +12,6 @@ from tqdm import tqdm from ..coordinates.core import Coordinates, get_sampling_interval -from ..coordinates.interp import InterpCoordinate from ..virtual import VirtualSource, VirtualStack from .dataarray import DataArray from .datacollection import DataCollection, DataMapping, DataSequence @@ -779,16 +778,7 @@ def split(da, indices_or_sections="discontinuities", dim="first", tolerance=None if isinstance(indices_or_sections, str) and ( indices_or_sections == "discontinuities" ): - if isinstance(da[dim], InterpCoordinate): - coord = da[dim].simplify(tolerance) - (points,) = np.nonzero(np.diff(coord.tie_indices, prepend=[0]) == 1) - div_points = [coord.tie_indices[point] for point in points] - div_points = [0] + div_points + [da.sizes[dim]] - else: - raise TypeError( - "discontinuities can only be found on dimension that have as type " - "`InterpCoordinate`." - ) + div_points = da[dim].simplify(tolerance).get_div_points() elif isinstance(indices_or_sections, int): nsamples = da.sizes[dim] nchunk = indices_or_sections From fa7d5bbece114140ea5485e3f87ca0c69ef38d6c Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 19 Dec 2025 12:26:30 +0100 Subject: [PATCH 34/63] WIP: make split on discontinuities work on any coord type. --- xdas/coordinates/core.py | 2 +- xdas/coordinates/dense.py | 11 +++++++++++ xdas/coordinates/interp.py | 7 +++++-- xdas/coordinates/sampled.py | 10 ++++++++++ xdas/core/routines.py | 2 +- 5 files changed, 28 insertions(+), 4 deletions(-) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index f8d0fa5..cd999da 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -420,7 +420,7 @@ def issampled(self): def append(self, other): raise NotImplementedError(f"append is not implemented for {self.__class__}") - def get_div_points(self): + def get_div_points(self, tolerance=None): raise NotImplementedError( f"get_div_points is not implemented for {self.__class__}" ) diff --git a/xdas/coordinates/dense.py b/xdas/coordinates/dense.py index 9ebef73..ff4d355 100644 --- a/xdas/coordinates/dense.py +++ b/xdas/coordinates/dense.py @@ -71,6 +71,17 @@ def append(self, other): raise ValueError("cannot append coordinate with different dtype") return self.__class__(np.concatenate([self.data, other.data]), self.dim) + def get_div_points(self, tolerance=None): + deltas = np.diff(self.data) + if tolerance is not None: + div_points = np.nonzero(np.abs(deltas) >= tolerance)[0] + 1 + else: + raise NotImplementedError( + "get_div_points without tolerance is not implemented for DenseCoordinate" + ) + div_points = np.concatenate(([0], div_points, [len(self)])) + return div_points + def to_dict(self): if np.issubdtype(self.dtype, np.datetime64): data = self.data.astype(str).tolist() diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index d12a859..40e8588 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -292,11 +292,14 @@ def simplify(self, tolerance=None): dict(tie_indices=tie_indices, tie_values=tie_values), self.dim ) - def get_div_points(self): + def get_div_points(self, tolerance=None): (points,) = np.nonzero(np.diff(self.tie_indices, prepend=[0]) == 1) + deltas = self.tie_values[points] - self.tie_values[points - 1] + if tolerance is not None: + points = points[np.abs(deltas) >= tolerance] div_points = [self.tie_indices[point] for point in points] div_points = [0] + div_points + [len(self)] - return div_points + return np.array(div_points) def get_discontinuities(self): """ diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 28fead8..40ac11e 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -346,6 +346,16 @@ def simplify(self, tolerance=None): self.dim, ) + def get_div_points(self, tolerance=None): + div_points = self.tie_indices[1:] + if tolerance is not None: + deltas = self.tie_values[1:] - ( + self.tie_values[:-1] + self.sampling_interval * self.tie_lengths[:-1] + ) + div_points = div_points[np.abs(deltas) >= tolerance] + div_points = np.concatenate(([0], div_points, [len(self)])) + return div_points + def get_discontinuities(self): if self.empty: return pd.DataFrame( diff --git a/xdas/core/routines.py b/xdas/core/routines.py index de810c8..0893392 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -778,7 +778,7 @@ def split(da, indices_or_sections="discontinuities", dim="first", tolerance=None if isinstance(indices_or_sections, str) and ( indices_or_sections == "discontinuities" ): - div_points = da[dim].simplify(tolerance).get_div_points() + div_points = da[dim].get_div_points(tolerance) elif isinstance(indices_or_sections, int): nsamples = da.sizes[dim] nchunk = indices_or_sections From ec256a4327b0acb235e00109a15492244cc67eee Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 19 Dec 2025 15:03:30 +0100 Subject: [PATCH 35/63] Add `Coordinate.get_split_indices` and move `get_availabilities` and `get_discontinuities` as a generic method in `Coordinate`. --- xdas/coordinates/core.py | 113 +++++++++++++++++++++++++++++++++++- xdas/coordinates/interp.py | 113 +++--------------------------------- xdas/coordinates/sampled.py | 70 ++-------------------- xdas/core/routines.py | 13 ++--- 4 files changed, 129 insertions(+), 180 deletions(-) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index cd999da..a159a3a 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -1,7 +1,9 @@ from copy import copy, deepcopy from functools import wraps +from itertools import pairwise import numpy as np +import pandas as pd def wraps_first_last(func): @@ -420,11 +422,118 @@ def issampled(self): def append(self, other): raise NotImplementedError(f"append is not implemented for {self.__class__}") - def get_div_points(self, tolerance=None): + def get_split_indices(self, tolerance=None): raise NotImplementedError( - f"get_div_points is not implemented for {self.__class__}" + f"get_split_indices is not implemented for {self.__class__}" ) + def get_discontinuities(self, tolerance=None): + """ + Returns a DataFrame containing information about the discontinuities. + + Returns + ------- + pandas.DataFrame + A DataFrame with the following columns: + + - start_index : int + The index where the discontinuity starts. + - end_index : int + The index where the discontinuity ends. + - start_value : float + The value at the start of the discontinuity. + - end_value : float + The value at the end of the discontinuity. + - delta : float + The difference between the end_value and start_value. + - type : str + The type of the discontinuity, either "gap" or "overlap". + + """ + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + indices = self.get_split_indices(tolerance) + records = [] + for index in indices: + start_index = index + end_index = index + 1 + start_value = self.get_value(index) + end_value = self.get_value(index + 1) + delta = end_value - start_value + if tolerance is not None and np.abs(delta) < tolerance: + continue + record = { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": ("gap" if end_value > start_value else "overlap"), + } + records.append(record) + return pd.DataFrame.from_records(records) + + def get_availabilities(self): + """ + Returns a DataFrame containing information about the data availability. + + Returns + ------- + pandas.DataFrame + A DataFrame with the following columns: + + - start_index : int + The index where the discontinuity starts. + - end_index : int + The index where the discontinuity ends. + - start_value : float + The value at the start of the discontinuity. + - end_value : float + The value at the end of the discontinuity. + - delta : float + The difference between the end_value and start_value. + - type : str + The type of the discontinuity, always "data". + + """ + if self.empty: + return pd.DataFrame( + columns=[ + "start_index", + "end_index", + "start_value", + "end_value", + "delta", + "type", + ] + ) + indices = np.concatenate([[0], self.get_split_indices(), [len(self)]]) + records = [] + for start_index, stop_index in pairwise(indices): + end_index = stop_index - 1 + start_value = self.get_value(start_index) + end_value = self.get_value(end_index) + records.append( + { + "start_index": start_index, + "end_index": end_index, + "start_value": start_value, + "end_value": end_value, + "delta": end_value - start_value, + "type": "data", + } + ) + return pd.DataFrame.from_records(records) + def to_dataarray(self): from ..core.dataarray import DataArray # TODO: avoid defered import? diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index 40e8588..d795849 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -292,112 +292,15 @@ def simplify(self, tolerance=None): dict(tie_indices=tie_indices, tie_values=tie_values), self.dim ) - def get_div_points(self, tolerance=None): - (points,) = np.nonzero(np.diff(self.tie_indices, prepend=[0]) == 1) - deltas = self.tie_values[points] - self.tie_values[points - 1] - if tolerance is not None: - points = points[np.abs(deltas) >= tolerance] - div_points = [self.tie_indices[point] for point in points] - div_points = [0] + div_points + [len(self)] - return np.array(div_points) - - def get_discontinuities(self): - """ - Returns a DataFrame containing information about the discontinuities. - - Returns - ------- - pandas.DataFrame - A DataFrame with the following columns: - - - start_index : int - The index where the discontinuity starts. - - end_index : int - The index where the discontinuity ends. - - start_value : float - The value at the start of the discontinuity. - - end_value : float - The value at the end of the discontinuity. - - delta : float - The difference between the end_value and start_value. - - type : str - The type of the discontinuity, either "gap" or "overlap". - - """ + def get_split_indices(self, tolerance=None): (indices,) = np.nonzero(np.diff(self.tie_indices) == 1) - records = [] - for index in indices: - start_index = self.tie_indices[index] - end_index = self.tie_indices[index + 1] - start_value = self.tie_values[index] - end_value = self.tie_values[index + 1] - record = { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": ("gap" if end_value > start_value else "overlap"), - } - records.append(record) - return pd.DataFrame.from_records(records) - - def get_availabilities(self): - """ - Returns a DataFrame containing information about the data availability. - - Returns - ------- - pandas.DataFrame - A DataFrame with the following columns: - - - start_index : int - The index where the discontinuity starts. - - end_index : int - The index where the discontinuity ends. - - start_value : float - The value at the start of the discontinuity. - - end_value : float - The value at the end of the discontinuity. - - delta : float - The difference between the end_value and start_value. - - type : str - The type of the discontinuity, always "data". - - """ - if self.empty: - return pd.DataFrame( - columns=[ - "start_index", - "end_index", - "start_value", - "end_value", - "delta", - "type", - ] - ) - (cut_pos,) = np.nonzero(np.diff(self.tie_indices) == 1) - # start each segment after the previous cut (or at 0) - starts = np.concatenate(([0], cut_pos + 1)) - # end each segment at the cut position (or at n-1 for the last) - ends = np.concatenate((cut_pos, [len(self.tie_indices) - 1])) - records = [] - for s, e in zip(starts, ends): - start_index = self.tie_indices[s] - end_index = self.tie_indices[e] - start_value = self.tie_values[s] - end_value = self.tie_values[e] - records.append( - { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": "data", - } - ) - return pd.DataFrame.from_records(records) + indices += 1 + if tolerance is not None: + deltas = self.tie_values[indices + 1] - self.tie_values[indices] + indices = indices[np.abs(deltas) >= tolerance] + return np.array( + [self.tie_indices[index] for index in indices], dtype=self.tie_indices.dtype + ) @classmethod def from_array(cls, arr, dim=None, tolerance=None): diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 40ac11e..d7677c9 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -346,76 +346,14 @@ def simplify(self, tolerance=None): self.dim, ) - def get_div_points(self, tolerance=None): - div_points = self.tie_indices[1:] + def get_split_indices(self, tolerance=None): + indices = self.tie_indices[1:] if tolerance is not None: deltas = self.tie_values[1:] - ( self.tie_values[:-1] + self.sampling_interval * self.tie_lengths[:-1] ) - div_points = div_points[np.abs(deltas) >= tolerance] - div_points = np.concatenate(([0], div_points, [len(self)])) - return div_points - - def get_discontinuities(self): - if self.empty: - return pd.DataFrame( - columns=[ - "start_index", - "end_index", - "start_value", - "end_value", - "delta", - "type", - ] - ) - records = [] - for index in self.tie_indices[:-1]: - start_index = index - end_index = index + 1 - start_value = self.get_value(index) - end_value = self.get_value(index + 1) - record = { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": ("gap" if end_value > start_value else "overlap"), - } - records.append(record) - return pd.DataFrame.from_records(records) - - def get_availabilities(self): - if self.empty: - return pd.DataFrame( - columns=[ - "start_index", - "end_index", - "start_value", - "end_value", - "delta", - "type", - ] - ) - records = [] - for index, value, length in zip( - self.tie_indices, self.tie_values, self.tie_indices - ): - start_index = index - end_index = index + length - 1 - start_value = value - end_value = value + self.sampling_interval * (length - 1) - records.append( - { - "start_index": start_index, - "end_index": end_index, - "start_value": start_value, - "end_value": end_value, - "delta": end_value - start_value, - "type": "data", - } - ) - return pd.DataFrame.from_records(records) + indices = indices[np.abs(deltas) <= tolerance] + return indices @classmethod def from_array(cls, arr, dim=None, sampling_interval=None): diff --git a/xdas/core/routines.py b/xdas/core/routines.py index 0893392..c3a75dc 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -4,6 +4,7 @@ from collections import defaultdict from concurrent.futures import ProcessPoolExecutor, as_completed from glob import glob +from itertools import pairwise import numpy as np import pandas as pd @@ -778,8 +779,9 @@ def split(da, indices_or_sections="discontinuities", dim="first", tolerance=None if isinstance(indices_or_sections, str) and ( indices_or_sections == "discontinuities" ): - div_points = da[dim].get_div_points(tolerance) - elif isinstance(indices_or_sections, int): + indices_or_sections = da[dim].get_split_indices(tolerance) + + if isinstance(indices_or_sections, int): nsamples = da.sizes[dim] nchunk = indices_or_sections if nchunk <= 0: @@ -790,12 +792,9 @@ def split(da, indices_or_sections="discontinuities", dim="first", tolerance=None chunks = extras * [chunk_size + 1] + (nchunk - extras) * [chunk_size] div_points = np.cumsum([0] + chunks, dtype=np.int64) else: - div_points = [0] + indices_or_sections + [da.sizes[dim]] + div_points = np.concatenate([[0], indices_or_sections, [da.sizes[dim]]]) return DataCollection( - [ - da.isel({dim: slice(div_points[idx], div_points[idx + 1])}) - for idx in range(len(div_points) - 1) - ] + [da.isel({dim: slice(start, stop)}) for start, stop in pairwise(div_points)] ) From 44c4b2484fc25372d97623d1d8a085ec085e404c Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 19 Dec 2025 16:36:19 +0100 Subject: [PATCH 36/63] Fix SampledCoordinate tolerance and cast. --- xdas/coordinates/sampled.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index d7677c9..ab89f12 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -113,7 +113,10 @@ def issampled(self): return True def get_sampling_interval(self, cast=True): - return self.sampling_interval + delta = self.sampling_interval + if cast and np.issubdtype(delta.dtype, np.timedelta64): + delta = delta / np.timedelta64(1, "s") + return delta def __len__(self): if self.empty: @@ -328,6 +331,8 @@ def decimate(self, q): return self[::q] def simplify(self, tolerance=None): + if tolerance is None: + tolerance = np.array(0, dtype=self.sampling_interval.dtype) tie_values = [self.tie_values[0]] tie_lengths = [self.tie_lengths[0]] for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]): From c07519116f629c190162742c77fcd4306cfc5fe4 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Sat, 20 Dec 2025 18:40:56 +0100 Subject: [PATCH 37/63] Add Coordinare[type].from_block. --- xdas/coordinates/core.py | 14 ++++++++++++++ xdas/coordinates/default.py | 2 +- xdas/coordinates/dense.py | 7 ++++++- xdas/coordinates/interp.py | 12 +++++++++++- xdas/coordinates/sampled.py | 22 ++++++++++++++++------ 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index a159a3a..484a8fc 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -250,6 +250,16 @@ def _assign_parent(self, parent): class Coordinate: + _registry = {} + + def __init_subclass__(cls, *, name=None, **kwargs): + super().__init_subclass__(**kwargs) + if name is not None: + Coordinate._registry[name] = cls + + def __class_getitem__(cls, item): + return cls._registry[item] + def __new__(cls, data=None, dim=None, dtype=None): if data is None: raise TypeError("cannot infer coordinate type if no `data` is provided") @@ -580,6 +590,10 @@ def from_dataset(cls, dataset, name): coords |= subcls.from_dataset(dataset, name) return coords + @classmethod + def from_block(cls, start, size, step, dim=None, dtype=None): + raise NotImplementedError + def parse(data, dim=None): if isinstance(data, tuple): diff --git a/xdas/coordinates/default.py b/xdas/coordinates/default.py index 8b11220..8023479 100644 --- a/xdas/coordinates/default.py +++ b/xdas/coordinates/default.py @@ -3,7 +3,7 @@ from .core import Coordinate, isscalar, parse -class DefaultCoordinate(Coordinate): +class DefaultCoordinate(Coordinate, name="default"): def __new__(cls, *args, **kwargs): return object.__new__(cls) diff --git a/xdas/coordinates/dense.py b/xdas/coordinates/dense.py index ff4d355..698bdb2 100644 --- a/xdas/coordinates/dense.py +++ b/xdas/coordinates/dense.py @@ -4,7 +4,7 @@ from .core import Coordinate, parse -class DenseCoordinate(Coordinate): +class DenseCoordinate(Coordinate, name="dense"): def __new__(cls, *args, **kwargs): return object.__new__(cls) @@ -106,3 +106,8 @@ def from_dataset(cls, dataset, name): ) for name, coord in dataset[name].coords.items() } + + @classmethod + def from_block(cls, start, size, step, dim=None, dtype=None): + data = start + step * np.arange(size) + return cls(data, dim=dim, dtype=dtype) diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index d795849..e95124d 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -7,7 +7,7 @@ from .core import Coordinate, format_datetime, is_strictly_increasing, parse -class InterpCoordinate(Coordinate): +class InterpCoordinate(Coordinate, name="interpolated"): """ Array-like object used to represent piecewise evenly spaced coordinates using the CF convention. @@ -356,6 +356,16 @@ def from_dataset(cls, dataset, name): coords[dim] = Coordinate(data, dim) return coords + @classmethod + def from_block(cls, start, size, step, dim=None, dtype=None): + return cls( + { + "tie_indices": [0, size - 1], + "tie_values": [start, start + step * (size - 1)], + }, + dim=dim, + ) + def douglas_peucker(x, y, epsilon): mask = np.ones(len(x), dtype=bool) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index ab89f12..3fc4876 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -1,12 +1,11 @@ import re import numpy as np -import pandas as pd from .core import Coordinate, format_datetime, is_strictly_increasing, parse -class SampledCoordinate(Coordinate): +class SampledCoordinate(Coordinate, name="sampled"): """ A coordinate that is sampled at regular intervals. @@ -142,10 +141,10 @@ def __repr__(self): def __getitem__(self, item): if isinstance(item, slice): return self.slice_index(item) - elif np.isscalar(item): - return Coordinate(self.get_value(item), None) else: - return Coordinate(self.get_value(item), self.dim) + return Coordinate( + self.get_value(item), None if np.isscalar(item) else self.dim + ) def __add__(self, other): return self.__class__( @@ -280,7 +279,9 @@ def get_indexer(self, value, method=None): # Check that value lies within the coordinate value range (vectorized) if np.any(value < self.start) or np.any(value >= self.end): raise KeyError("index not found") - if not is_strictly_increasing(self.tie_values): + if not is_strictly_increasing( + self.tie_values + ): # TODO: make it work even in this case raise ValueError("tie_values must be strictly increasing") reference = np.searchsorted(self.tie_values, value, side="right") - 1 offset = (value - self.tie_values[reference]) / self.sampling_interval @@ -417,3 +418,12 @@ def from_dataset(cls, dataset, name): } coords[dim] = Coordinate(data, dim) return coords + + @classmethod + def from_block(cls, start, size, step, dim=None, dtype=None): + data = { + "tie_values": [start], + "tie_lengths": [size], + "sampling_interval": step, + } + return cls(data, dim=dim, dtype=dtype) From 902dc662c1789702deb4827c49c227e5a17d52ce Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Sat, 20 Dec 2025 18:57:25 +0100 Subject: [PATCH 38/63] WIP: add `ctype` kwargs to read functions for Coordinate type selection. --- xdas/io/apsensing.py | 9 +++++---- xdas/io/asn.py | 8 ++++---- xdas/io/febus.py | 20 +++++++++++-------- xdas/io/miniseed.py | 46 ++++++++++++++++---------------------------- xdas/io/optasense.py | 10 +++++++--- xdas/io/silixa.py | 14 ++++++-------- xdas/io/sintela.py | 10 +++++++--- xdas/io/terra15.py | 9 +++++---- 8 files changed, 63 insertions(+), 63 deletions(-) diff --git a/xdas/io/apsensing.py b/xdas/io/apsensing.py index be03a7f..937b165 100644 --- a/xdas/io/apsensing.py +++ b/xdas/io/apsensing.py @@ -1,11 +1,12 @@ import h5py import numpy as np +from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource -def read(fname): +def read(fname, ctype="interpolated"): with h5py.File(fname, "r") as file: t0 = file["Metadata"]["Timestamp"][()].item().decode() fs = file["DAQ"]["RepetitionFrequency"][()].item() @@ -18,6 +19,6 @@ def read(fname): t0 = np.datetime64(t0) dt = np.timedelta64(round(1e9 / fs), "ns") nt, nd = data.shape - t = {"tie_indices": [0, nt - 1], "tie_values": [t0, t0 + (nt - 1) * dt]} - d = {"tie_indices": [0, nd - 1], "tie_values": [0.0, (nd - 1) * dx]} - return DataArray(data, {"time": t, "distance": d}) + time = Coordinate[ctype].from_block(t0, nt, dt, dim="time") + distance = Coordinate[ctype].from_block(0.0, nd, dx, dim="distance") + return DataArray(data, {"time": time, "distance": distance}) diff --git a/xdas/io/asn.py b/xdas/io/asn.py index 44da7a5..3e93536 100644 --- a/xdas/io/asn.py +++ b/xdas/io/asn.py @@ -4,12 +4,12 @@ import numpy as np import zmq -from ..coordinates.core import get_sampling_interval +from ..coordinates.core import Coordinate, get_sampling_interval from ..core.dataarray import DataArray from ..virtual import VirtualSource -def read(fname): +def read(fname, ctype="interpolated"): with h5py.File(fname, "r") as file: header = file["header"] t0 = np.datetime64(round(header["time"][()] * 1e9), "ns") @@ -17,8 +17,8 @@ def read(fname): dx = header["dx"][()] * np.median(np.diff(header["channels"])) data = VirtualSource(file["data"]) nt, nx = data.shape - time = {"tie_indices": [0, nt - 1], "tie_values": [t0, t0 + (nt - 1) * dt]} - distance = {"tie_indices": [0, nx - 1], "tie_values": [0.0, (nx - 1) * dx]} + time = Coordinate[ctype].from_block(t0, nt, dt, dim="time") + distance = Coordinate[ctype].from_block(0.0, nx, dx, dim="distance") return DataArray(data, {"time": time, "distance": distance}) diff --git a/xdas/io/febus.py b/xdas/io/febus.py index 3488352..747b174 100644 --- a/xdas/io/febus.py +++ b/xdas/io/febus.py @@ -3,12 +3,13 @@ import h5py import numpy as np +from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..core.routines import concatenate from ..virtual import VirtualSource -def read(fname, overlaps=None, offset=None): +def read(fname, overlaps=None, offset=None, ctype="interpolated"): """ Open a Febus file into a xdas DataArray object. @@ -87,16 +88,19 @@ def read(fname, overlaps=None, offset=None): dt, dx = delta _, nt, nx = chunks.shape + dt = np.rint(1e6 * dt).astype("m8[us]").astype("m8[ns]") + dc = [] for t0, chunk in zip(times, chunks): - time = { - "tie_indices": [0, nt - 1], - "tie_values": np.rint(1e6 * np.array([t0, t0 + (nt - 1) * dt])) - .astype("M8[us]") - .astype("M8[ns]"), - } - distance = {"tie_indices": [0, nx - 1], "tie_values": [0.0, (nx - 1) * dx]} + + t0 = np.rint(1e6 * t0).astype("M8[us]").astype("M8[ns]") + time = Coordinate[ctype].from_block(t0, nt, dt, dim="time") + distance = Coordinate[ctype].from_block(0.0, nx, dx, dim="distance") da = DataArray(chunk, {"time": time, "distance": distance}, name=name) dc.append(da) return concatenate(dc, "time") + + +def _to_datetime64(data): + return diff --git a/xdas/io/miniseed.py b/xdas/io/miniseed.py index 6a2d469..375e25d 100644 --- a/xdas/io/miniseed.py +++ b/xdas/io/miniseed.py @@ -6,15 +6,15 @@ from ..core.dataarray import DataArray -def read(fname, ignore_last_sample=False): - shape, dtype, coords, method = read_header(fname, ignore_last_sample) +def read(fname, ignore_last_sample=False, ctype="interpolated"): + shape, dtype, coords, method = read_header(fname, ignore_last_sample, ctype) data = dask.array.from_delayed( dask.delayed(read_data)(fname, method, ignore_last_sample), shape, dtype ) return DataArray(data, coords) -def read_header(path, ignore_last_sample): +def read_header(path, ignore_last_sample, ctype): st = obspy.read(path, headonly=True) dtype = uniquifiy(tr.data.dtype for tr in st) @@ -33,16 +33,20 @@ def read_header(path, ignore_last_sample): tmp_st = st.select(channel=channels[0]) for n, tr in enumerate(tmp_st): if n == 0: - time = get_time_coord(tr, ignore_last_sample=False) + time = get_time_coord(tr, ignore_last_sample=False, ctype=ctype) elif n == len(tmp_st) - 1: - time = time.append(get_time_coord(tr, ignore_last_sample)) + time = time.append(get_time_coord(tr, ignore_last_sample, ctype=ctype)) else: - time = time.append(get_time_coord(tr, ignore_last_sample=False)) + time = time.append( + get_time_coord(tr, ignore_last_sample=False, ctype=ctype) + ) else: method = "synchronized" - time = get_time_coord(st[0], ignore_last_sample) + time = get_time_coord(st[0], ignore_last_sample, ctype) - if not all(get_time_coord(tr, ignore_last_sample).equals(time) for tr in st): + if not all( + get_time_coord(tr, ignore_last_sample, ctype).equals(time) for tr in st + ): raise ValueError("All traces must be synchronized") network = uniquifiy(tr.stats.network for tr in st) @@ -85,27 +89,11 @@ def read_data(path, method, ignore_last_sample): return np.array(data) -def get_time_coord(tr, ignore_last_sample): - if ignore_last_sample: - return Coordinate( - { - "tie_indices": [0, tr.stats.npts - 2], - "tie_values": [ - np.datetime64(tr.stats.starttime), - np.datetime64(tr.stats.endtime - tr.stats.delta), - ], - } - ) - else: - return Coordinate( - { - "tie_indices": [0, tr.stats.npts - 1], - "tie_values": [ - np.datetime64(tr.stats.starttime), - np.datetime64(tr.stats.endtime), - ], - } - ) +def get_time_coord(tr, ignore_last_sample, ctype): + t0 = np.datetime64(tr.stats.starttime) + dt = np.rint(1e6 * tr.stats.delta).astype("m8[us]").astype("m8[ns]") + nt = tr.stats.npts - int(ignore_last_sample) + return Coordinate[ctype].from_block(t0, nt, dt, dim="time") def uniquifiy(seq): diff --git a/xdas/io/optasense.py b/xdas/io/optasense.py index 97d9208..16e175c 100644 --- a/xdas/io/optasense.py +++ b/xdas/io/optasense.py @@ -1,11 +1,12 @@ import h5py import numpy as np +from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource -def read(fname): +def read(fname, ctype="interpolated"): with h5py.File(fname, "r") as file: acquisition = file["Acquisition"] dx = acquisition.attrs["SpatialSamplingInterval"] @@ -14,6 +15,9 @@ def read(fname): tend = np.datetime64(rawdata.attrs["PartEndTime"][:-1]) data = VirtualSource(rawdata) nd, nt = data.shape - time = {"tie_indices": [0, nt - 1], "tie_values": [tstart, tend]} - distance = {"tie_indices": [0, nd - 1], "tie_values": [0.0, (nd - 1) * dx]} + time = { + "tie_indices": [0, nt - 1], + "tie_values": [tstart, tend], + } # TODO: use from_block + distance = Coordinate[ctype].from_block(0.0, nd, dx, dim="distance") return DataArray(data, {"distance": distance, "time": time}) diff --git a/xdas/io/silixa.py b/xdas/io/silixa.py index f795583..ae97867 100644 --- a/xdas/io/silixa.py +++ b/xdas/io/silixa.py @@ -1,31 +1,29 @@ import dask import numpy as np +from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from .tdms import TdmsReader -def read(fname): - shape, dtype, coords = read_header(fname) +def read(fname, ctype="interpolated"): + shape, dtype, coords = read_header(fname, ctype) data = dask.array.from_delayed(dask.delayed(read_data)(fname), shape, dtype) return DataArray(data, coords) -def read_header(fname): +def read_header(fname, ctype): with TdmsReader(fname) as tdms: props = tdms.get_properties() shape = tdms.channel_length, tdms.fileinfo["n_channels"] dtype = tdms._data_type t0 = np.datetime64(props["GPSTimeStamp"]) dt = np.timedelta64(round(1e9 / props["SamplingFrequency[Hz]"]), "ns") - time = { - "tie_indices": [0, shape[0] - 1], - "tie_values": [t0, t0 + dt * (shape[0] - 1)], - } + time = Coordinate[ctype].from_block(t0, shape[0], dt, dim="time") distance = { "tie_indices": [0, shape[1] - 1], "tie_values": [props["Start Distance (m)"], props["Stop Distance (m)"]], - } + } # TODO: use from_block coords = {"time": time, "distance": distance} return shape, dtype, coords diff --git a/xdas/io/sintela.py b/xdas/io/sintela.py index ba6fc86..605f50f 100644 --- a/xdas/io/sintela.py +++ b/xdas/io/sintela.py @@ -1,11 +1,12 @@ import h5py import numpy as np +from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource -def read(fname): +def read(fname, ctype="interpolated"): with h5py.File(fname, "r") as file: acquisition = file["Acquisition"] dx = acquisition.attrs["SpatialSamplingInterval"] @@ -14,6 +15,9 @@ def read(fname): tend = np.datetime64(rawdata.attrs["PartEndTime"].decode().split("+")[0]) data = VirtualSource(rawdata) nt, nd = data.shape - time = {"tie_indices": [0, nt - 1], "tie_values": [tstart, tend]} - distance = {"tie_indices": [0, nd - 1], "tie_values": [0.0, (nd - 1) * dx]} + time = { + "tie_indices": [0, nt - 1], + "tie_values": [tstart, tend], + } # TODO: use from_block + distance = Coordinate[ctype].from_block(0.0, nd, dx, dim="distance") return DataArray(data, {"time": time, "distance": distance}) diff --git a/xdas/io/terra15.py b/xdas/io/terra15.py index 7c969ac..e6cf242 100644 --- a/xdas/io/terra15.py +++ b/xdas/io/terra15.py @@ -3,11 +3,12 @@ import h5py import numpy as np +from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource -def read(fname, tz=timezone.utc): +def read(fname, tz=timezone.utc, ctype="interpolated"): with h5py.File(fname, "r") as file: ti = np.datetime64( datetime.fromtimestamp(file["data_product"]["gps_time"][0], tz=tz) @@ -19,6 +20,6 @@ def read(fname, tz=timezone.utc): dx = file.attrs["dx"] data = VirtualSource(file["data_product"]["data"]) nt, nd = data.shape - t = {"tie_indices": [0, nt - 1], "tie_values": [ti, tf]} - d = {"tie_indices": [0, nd - 1], "tie_values": [d0, d0 + (nd - 1) * dx]} - return DataArray(data, {"time": t, "distance": d}) + time = {"tie_indices": [0, nt - 1], "tie_values": [ti, tf]} # TODO: use from_block + ctype = Coordinate[ctype].from_block(d0, nd, dx, dim="distance") + return DataArray(data, {"time": time, "distance": ctype}) From c9b3358445bcbf29a0b0de1ab531c2e21860604f Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 24 Dec 2025 17:23:19 +0100 Subject: [PATCH 39/63] Fixing stepped slicing in SampledCoordinate and add tests. --- tests/coordinates/test_sampled.py | 46 +++++++++++++++++++++++++++++-- xdas/coordinates/core.py | 2 +- xdas/coordinates/sampled.py | 42 ++++++++++++++-------------- 3 files changed, 65 insertions(+), 25 deletions(-) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index 14ea457..26a2482 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -109,14 +109,26 @@ def test_get_value_scalar_and_vector(self): assert coord.get_value(2) == 2.0 assert coord.get_value(3) == 10.0 assert coord.get_value(4) == 11.0 + # negative index + assert coord.get_value(-1) == 11.0 + assert coord.get_value(-2) == 10.0 + assert coord.get_value(-3) == 2.0 + assert coord.get_value(-4) == 1.0 + assert coord.get_value(-5) == 0.0 # vectorized - vals = coord.get_value([0, 2, 3, 4]) - assert np.array_equal(vals, np.array([0.0, 2.0, 10.0, 11.0])) + vals = coord.get_value([0, 1, 2, 3, 4, -5, -4, -3, -2, -1]) + assert np.array_equal( + vals, np.array([0.0, 1.0, 2.0, 10.0, 11.0, 0.0, 1.0, 2.0, 10.0, 11.0]) + ) # bounds with pytest.raises(IndexError): coord.get_value(-6) with pytest.raises(IndexError): coord.get_value(5) + with pytest.raises(IndexError): + coord.get_value([0, 5]) + with pytest.raises(IndexError): + coord.get_value([-6, 0]) def test_getitem(self): coord = self.make_coord() @@ -127,6 +139,36 @@ def test_getitem(self): # slice -> SampledCoordinate or compatible sub = coord[1:4] assert isinstance(sub, SampledCoordinate) + assert np.array_equal(sub.values, np.array([1.0, 2.0, 10.0])) + # slice negative + sub_neg = coord[-4:-1] + assert isinstance(sub_neg, SampledCoordinate) + assert np.array_equal(sub_neg.values, np.array([1.0, 2.0, 10.0])) + # full slice + full = coord[:] + assert full.equals(coord) + # None bound indexing + none_start = coord[None:3] + assert isinstance(none_start, SampledCoordinate) + assert np.array_equal(none_start.values, np.array([0.0, 1.0, 2.0])) + none_end = coord[2:None] + assert isinstance(none_end, SampledCoordinate) + assert np.array_equal(none_end.values, np.array([2.0, 10.0, 11.0])) + # step slice -> SampledCoordinate + step = coord[::2] + assert isinstance(step, SampledCoordinate) + assert np.array_equal(step.values, np.array([0.0, 2.0, 11.0])) + # step slice with start/stop + step_ss = coord[1:5:2] + assert isinstance(step_ss, SampledCoordinate) + assert np.array_equal(step_ss.values, np.array([1.0, 10.0])) + # negative step slice with start/stop + step_ss_neg = coord[-4:-1:2] + assert isinstance(step_ss_neg, SampledCoordinate) + assert np.array_equal(step_ss_neg.values, np.array([1.0, 10.0])) + # negative step slice -> raise NotImplementedError + with pytest.raises(NotImplementedError): + coord[::-1] # array -> DenseCoordinate of values arr = coord[[0, 4]] assert isinstance(arr, DenseCoordinate) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index 484a8fc..eb107c0 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -370,7 +370,7 @@ def format_index(self, idx, bounds="raise"): idx = np.clip(idx, 0, len(self)) return idx - def format_index_slice(self, slc): + def format_index_slice(self, slc): # TODO: use slice.indices instead start = slc.start stop = slc.stop step = slc.step diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 3fc4876..82284b1 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -235,33 +235,31 @@ def get_value(self, index): ) def slice_index(self, index_slice): - index_slice = self.format_index_slice(index_slice) + # normalize slice + start, stop, step = index_slice.indices(len(self)) - # TODO: optimize when start and/or stop are None + if step < 0: + raise NotImplementedError("negative slice step is not implemented") - # get indices relative to tie points - relative_start_index = np.clip( - index_slice.start - self.tie_indices, 0, self.tie_lengths - ) - relative_stop_index = np.clip( - index_slice.stop - self.tie_indices, 0, self.tie_lengths - ) + # align stop + stop += (start - stop) % step # TODO: check for negative step - # keep segments with data - mask = relative_start_index < relative_stop_index + # get relative start and stop within each tie + q, r = np.divmod(start - self.tie_indices, step) + lo = np.maximum(q, 0) * step + r - # compute new tie points ane lengths - tie_values = ( - self.tie_values[mask] + relative_start_index[mask] * self.sampling_interval - ) - tie_lengths = relative_stop_index[mask] - relative_start_index[mask] + q, r = np.divmod(self.tie_indices + self.tie_lengths - stop, step) + hi = self.tie_lengths - np.maximum(q, 0) * step + r - # adjust for step if needed - if index_slice.step == 1: - sampling_interval = self.sampling_interval - else: - tie_lengths = (self.tie_lengths + index_slice.step - 1) // index_slice.step - sampling_interval = self.sampling_interval * index_slice.step + # filter empty segments + mask = hi > lo + lo = lo[mask] + hi = hi[mask] + + # compute new tie values, tie lengths and sampling interval + tie_values = self.tie_values[mask] + lo * self.sampling_interval + tie_lengths = (hi - lo) // step + sampling_interval = self.sampling_interval * step # build new coordinate data = { From be45e77cd58033746afc70873b529a0b5d941ea8 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 24 Dec 2025 17:34:37 +0100 Subject: [PATCH 40/63] Remove unused format_index_slice method and related test. Use slice.indices instead. --- docs/api/xdas.md | 1 - tests/coordinates/test_interp.py | 4 ---- tests/coordinates/test_sampled.py | 1 + xdas/coordinates/core.py | 18 ------------------ xdas/coordinates/interp.py | 9 +++------ xdas/coordinates/sampled.py | 4 +--- 6 files changed, 5 insertions(+), 32 deletions(-) diff --git a/docs/api/xdas.md b/docs/api/xdas.md index 3fcfd50..118b079 100644 --- a/docs/api/xdas.md +++ b/docs/api/xdas.md @@ -318,7 +318,6 @@ Methods InterpCoordinate.get_value InterpCoordinate.format_index InterpCoordinate.slice_index - InterpCoordinate.format_index_slice InterpCoordinate.get_indexer InterpCoordinate.slice_indexer InterpCoordinate.decimate diff --git a/tests/coordinates/test_interp.py b/tests/coordinates/test_interp.py index 95b1d77..f9ef27a 100644 --- a/tests/coordinates/test_interp.py +++ b/tests/coordinates/test_interp.py @@ -147,10 +147,6 @@ def test_format_index(self): # TODO pass - def test_format_index_slice(self): - # TODO - pass - def test_get_value(self): coord = InterpCoordinate({"tie_indices": [0, 8], "tie_values": [100.0, 900.0]}) assert coord.get_value(0) == 100.0 diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index 26a2482..9ee05dd 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -191,6 +191,7 @@ def test_slice_negative_and_out_of_bounds(self): # negative slice indices s = coord[-4:-1] assert isinstance(s, SampledCoordinate) + assert np.array_equal(s.values, np.array([1.0, 2.0, 10.0])) # slice that extends beyond bounds should clip s2 = coord[-10:10] assert s2.equals(coord) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index eb107c0..6ee921b 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -370,24 +370,6 @@ def format_index(self, idx, bounds="raise"): idx = np.clip(idx, 0, len(self)) return idx - def format_index_slice(self, slc): # TODO: use slice.indices instead - start = slc.start - stop = slc.stop - step = slc.step - if start is None: - start = 0 - if stop is None: - stop = len(self) - if step is None: - step = 1 - if step <= 0: - raise NotImplementedError( - "negative or zero step when slicing is not supported yet" - ) - start = self.format_index(start, bounds="clip") - stop = self.format_index(stop, bounds="clip") - return slice(start, stop, step) - def slice_indexer(self, start=None, stop=None, step=None, endpoint=True): if start is not None: try: diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index e95124d..661c4d0 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -190,12 +190,9 @@ def get_value(self, index): return forward(index, self.tie_indices, self.tie_values) def slice_index(self, index_slice): - index_slice = self.format_index_slice(index_slice) - start_index, stop_index, step_index = ( - index_slice.start, - index_slice.stop, - index_slice.step, - ) + start_index, stop_index, step_index = index_slice.indices(len(self)) + if step_index < 0: + raise NotImplementedError("negative slice step is not implemented") if stop_index - start_index <= 0: return self.__class__(dict(tie_indices=[], tie_values=[]), dim=self.dim) elif (stop_index - start_index) <= step_index: diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 82284b1..8945755 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -226,9 +226,7 @@ def equals(self, other): ) def get_value(self, index): - index = self.format_index(index) - if np.any(index < 0) or np.any(index >= len(self)): - raise IndexError("index is out of bounds") + index = self.format_index(index, bounds="raise") reference = np.searchsorted(self.tie_indices, index, side="right") - 1 return self.tie_values[reference] + ( (index - self.tie_indices[reference]) * self.sampling_interval From 6f9716d0c88c02faa607e3e7c379f3b6bb1eb397 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 7 Jan 2026 14:49:35 +0100 Subject: [PATCH 41/63] Refactor coordinates method order. --- xdas/coordinates/default.py | 59 ++++++++++++++---------- xdas/coordinates/dense.py | 13 ++++-- xdas/coordinates/interp.py | 89 +++++++++++++++++++----------------- xdas/coordinates/sampled.py | 90 ++++++++++++++++++------------------- xdas/coordinates/scalar.py | 6 +-- 5 files changed, 142 insertions(+), 115 deletions(-) diff --git a/xdas/coordinates/default.py b/xdas/coordinates/default.py index 8023479..df24191 100644 --- a/xdas/coordinates/default.py +++ b/xdas/coordinates/default.py @@ -8,16 +8,47 @@ def __new__(cls, *args, **kwargs): return object.__new__(cls) def __init__(self, data=None, dim=None, dtype=None): + # empty if data is None: data = {"size": 0} + + # parse data data, dim = parse(data, dim) if not self.isvalid(data): raise TypeError("`data` must be a mapping {'size': }") + + # check dtype if dtype is not None: raise ValueError("`dtype` is not supported for DefaultCoordinate") + + # store data self.data = data self.dim = dim + @property + def empty(self): + return bool(self.data["size"]) + + @property + def dtype(self): + return np.int64 + + @property + def ndim(self): + return 1 + + @property + def shape(self): + return (len(self),) + + @staticmethod + def isvalid(data): + match data: + case {"size": None | int(_)}: + return True + case _: + return False + def __len__(self): if self.data["size"] is None: return 0 @@ -32,33 +63,15 @@ def __getitem__(self, item): def __array__(self, dtype=None): return np.arange(self.data["size"], dtype=dtype) - @staticmethod - def isvalid(data): - match data: - case {"size": None | int(_)}: - return True - case _: - return False + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + raise NotImplementedError + + def __array_function__(self, func, types, args, kwargs): + raise NotImplementedError def isdefault(self): return True - @property - def empty(self): - return bool(self.data["size"]) - - @property - def dtype(self): - return np.int64 - - @property - def ndim(self): - return 1 - - @property - def shape(self): - return (len(self),) - def get_sampling_interval(self, cast=True): return 1 diff --git a/xdas/coordinates/dense.py b/xdas/coordinates/dense.py index 698bdb2..ff8c535 100644 --- a/xdas/coordinates/dense.py +++ b/xdas/coordinates/dense.py @@ -9,14 +9,23 @@ def __new__(cls, *args, **kwargs): return object.__new__(cls) def __init__(self, data=None, dim=None, dtype=None): + # empty if data is None: data = [] + + # parse data data, dim = parse(data, dim) if not self.isvalid(data): raise TypeError("`data` must be array-like") + + # store data self.data = np.asarray(data, dtype=dtype) self.dim = dim + @property + def index(self): + return pd.Index(self.data) + @staticmethod def isvalid(data): data = np.asarray(data) @@ -25,10 +34,6 @@ def isvalid(data): def isdense(self): return True - @property - def index(self): - return pd.Index(self.data) - def equals(self, other): if isinstance(other, self.__class__): return ( diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py index 661c4d0..741e4b4 100644 --- a/xdas/coordinates/interp.py +++ b/xdas/coordinates/interp.py @@ -29,8 +29,11 @@ def __new__(cls, *args, **kwargs): return object.__new__(cls) def __init__(self, data=None, dim=None, dtype=None): + # empty if data is None: data = {"tie_indices": [], "tie_values": []} + + # parse data data, dim = parse(data, dim) if not self.__class__.isvalid(data): raise TypeError("`data` must be dict-like") @@ -40,12 +43,16 @@ def __init__(self, data=None, dim=None, dtype=None): ) tie_indices = np.asarray(data["tie_indices"]) tie_values = np.asarray(data["tie_values"], dtype=dtype) + + # check shapes if not tie_indices.ndim == 1: raise ValueError("`tie_indices` must be 1D") if not tie_values.ndim == 1: raise ValueError("`tie_values` must be 1D") if not len(tie_indices) == len(tie_values): raise ValueError("`tie_indices` and `tie_values` must have the same length") + + # check dtypes if not tie_indices.shape == (0,): if not np.issubdtype(tie_indices.dtype, np.integer): raise ValueError("`tie_indices` must be integer-like") @@ -58,10 +65,50 @@ def __init__(self, data=None, dim=None, dtype=None): or np.issubdtype(tie_values.dtype, np.datetime64) ): raise ValueError("`tie_values` must have either numeric or datetime dtype") + + # store data tie_indices = tie_indices.astype(int) self.data = dict(tie_indices=tie_indices, tie_values=tie_values) self.dim = dim + @property + def tie_indices(self): + return self.data["tie_indices"] + + @property + def tie_values(self): + return self.data["tie_values"] + + @property + def dtype(self): + return self.tie_values.dtype + + @property + def empty(self): + return self.tie_indices.shape == (0,) + + @property + def ndim(self): + return self.tie_values.ndim + + @property + def shape(self): + return (len(self),) + + @property + def indices(self): + if self.empty: + return np.array([], dtype="int") + else: + return np.arange(self.tie_indices[-1] + 1) + + @property + def values(self): + if self.empty: + return np.array([], dtype=self.dtype) + else: + return self.get_value(self.indices) + @staticmethod def isvalid(data): match data: @@ -70,9 +117,6 @@ def isvalid(data): case _: return False - def isinterp(self): - return True - def __len__(self): if self.empty: return 0 @@ -126,43 +170,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def __array_function__(self, func, types, args, kwargs): raise NotImplementedError - @property - def tie_indices(self): - return self.data["tie_indices"] - - @property - def tie_values(self): - return self.data["tie_values"] - - @property - def empty(self): - return self.tie_indices.shape == (0,) - - @property - def dtype(self): - return self.tie_values.dtype - - @property - def ndim(self): - return self.tie_values.ndim - - @property - def shape(self): - return (len(self),) - - @property - def indices(self): - if self.empty: - return np.array([], dtype="int") - else: - return np.arange(self.tie_indices[-1] + 1) - - @property - def values(self): - if self.empty: - return np.array([], dtype=self.dtype) - else: - return self.get_value(self.indices) + def isinterp(self): + return True def get_sampling_interval(self, cast=True): if len(self) < 2: diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 8945755..05d3253 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -96,6 +96,44 @@ def sampling_interval(self): def dtype(self): return self.tie_values.dtype + @property + def tie_indices(self): + return np.concatenate(([0], np.cumsum(self.tie_lengths[:-1]))) + + @property + def empty(self): + return self.tie_values.shape == (0,) + + @property + def ndim(self): + return self.tie_values.ndim + + @property + def shape(self): + return (len(self),) + + @property + def indices(self): + if self.empty: + return np.array([], dtype="int") + else: + return np.arange(len(self)) + + @property + def values(self): + if self.empty: + return np.array([], dtype=self.dtype) + else: + return self.get_value(self.indices) + + @property + def start(self): + return self.tie_values[0] + + @property + def end(self): + return self.tie_values[-1] + self.sampling_interval * self.tie_lengths[-1] + @staticmethod def isvalid(data): match data: @@ -108,15 +146,6 @@ def isvalid(data): case _: return False - def issampled(self): - return True - - def get_sampling_interval(self, cast=True): - delta = self.sampling_interval - if cast and np.issubdtype(delta.dtype, np.timedelta64): - delta = delta / np.timedelta64(1, "s") - return delta - def __len__(self): if self.empty: return 0 @@ -178,43 +207,14 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def __array_function__(self, func, types, args, kwargs): raise NotImplementedError - @property - def tie_indices(self): - return np.concatenate(([0], np.cumsum(self.tie_lengths[:-1]))) - - @property - def empty(self): - return self.tie_values.shape == (0,) - - @property - def ndim(self): - return self.tie_values.ndim - - @property - def shape(self): - return (len(self),) - - @property - def indices(self): - if self.empty: - return np.array([], dtype="int") - else: - return np.arange(len(self)) - - @property - def values(self): - if self.empty: - return np.array([], dtype=self.dtype) - else: - return self.get_value(self.indices) - - @property - def start(self): - return self.tie_values[0] + def issampled(self): + return True - @property - def end(self): - return self.tie_values[-1] + self.sampling_interval * self.tie_lengths[-1] + def get_sampling_interval(self, cast=True): + delta = self.sampling_interval + if cast and np.issubdtype(delta.dtype, np.timedelta64): + delta = delta / np.timedelta64(1, "s") + return delta def equals(self, other): return ( diff --git a/xdas/coordinates/scalar.py b/xdas/coordinates/scalar.py index f0cda93..8a97da4 100644 --- a/xdas/coordinates/scalar.py +++ b/xdas/coordinates/scalar.py @@ -26,9 +26,6 @@ def dim(self, value): if value is not None: raise ValueError("A scalar coordinate cannot have a `dim` other that None") - def get_sampling_interval(self, cast=True): - return None - @staticmethod def isvalid(data): data = np.asarray(data) @@ -37,6 +34,9 @@ def isvalid(data): def isscalar(self): return True + def get_sampling_interval(self, cast=True): + return None + def equals(self, other): if isinstance(other, self.__class__): return self.data == other.data From 5d733d460a56b4f5c338895bdfc17a1019d300a0 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 7 Jan 2026 16:55:39 +0100 Subject: [PATCH 42/63] Fix and test get_indexer. --- tests/coordinates/test_sampled.py | 27 +++++++++++++++++++++ xdas/coordinates/sampled.py | 40 +++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index 9ee05dd..4004220 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -206,6 +206,33 @@ def test_slice_step_decimate(self): assert decimated.equals(stepped) +class TestSampledCoordinateValueBasedIndexing: + def make_coord(self): + return SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) # two segments: [0, 1, 2] and [10, 11] + + def test_get_indexer_exact(self): + coord = self.make_coord() + assert coord.get_indexer(0.0, method=None) == 0 + assert coord.get_indexer(10.0, method=None) == 3 + with pytest.raises(KeyError): + coord.get_indexer(1.5, method=None) + with pytest.raises(KeyError): + coord.get_indexer(5.0, method=None) + + def test_get_indexer_nearest(self): + coord = self.make_coord() + vals = [0.4, 0.6, 10.4, 10.6, -10.0, 20.0, 6.4, 6.6, 6.5] + expected = [0, 1, 3, 4, 0, 4, 2, 3, 3] + for v, e in zip(vals, expected): + idx = coord.get_indexer(v, method="nearest") + print(f"Value: {v}, Index: {idx}, Expected: {e}") + assert idx == e + idxs = coord.get_indexer(vals, method="nearest") + assert np.array_equal(idxs, np.array(expected)) + + class TestSampledCoordinateAppendErrors: def test_append_sampling_interval_mismatch(self): coord1 = SampledCoordinate( diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 05d3253..7f4baa6 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -272,26 +272,58 @@ def get_indexer(self, value, method=None): value = np.datetime64(value) else: value = np.asarray(value) - # Check that value lies within the coordinate value range (vectorized) - if np.any(value < self.start) or np.any(value >= self.end): - raise KeyError("index not found") if not is_strictly_increasing( self.tie_values ): # TODO: make it work even in this case raise ValueError("tie_values must be strictly increasing") + + # find preceeding tie point reference = np.searchsorted(self.tie_values, value, side="right") - 1 + reference = np.maximum(reference, 0) + + # overlaps + before = np.maximum(reference - 1, 0) + ends = ( + self.tie_values[before] + self.tie_lengths[before] * self.sampling_interval + ) + if np.any((reference > 0) & (value < ends)): + raise KeyError("value is in an overlap region") + + # gap + after = np.minimum(reference + 1, len(self.tie_values) - 1) + ends = ( + self.tie_values[reference] + + self.tie_lengths[reference] * self.sampling_interval + ) + mask = (reference < len(self.tie_values) - 1) & ( + value - ends >= self.tie_values[after] - value + ) + reference = np.where(mask, after, reference) + offset = (value - self.tie_values[reference]) / self.sampling_interval + match method: case None: - if np.any(offset % 1 != 0): + if np.any( + (offset % 1 != 0) + | (offset < 0) + | (offset >= self.tie_lengths[reference]) + ): raise KeyError("index not found") offset = offset.astype(int) case "nearest": offset = np.round(offset).astype(int) + offset = np.clip(offset, 0, self.tie_lengths[reference] - 1) case "ffill": offset = np.floor(offset).astype(int) + if np.any(offset > self.tie_lengths[reference] - 1): + raise KeyError("index not found") + offset = np.maximum(offset, 0) case "bfill": offset = np.ceil(offset).astype(int) + if np.any(offset < 0): + raise KeyError("index not found") + offset = np.minimum(offset, self.tie_lengths[reference] - 1) case _: raise ValueError( "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" From 98215a405bef7be77aad5f7178ef63cdedc6bffd Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 8 Jan 2026 11:17:55 +0100 Subject: [PATCH 43/63] Fix & Test SampledCoordinate.get_indexer. --- tests/coordinates/test_sampled.py | 36 ++++++++++++++++++++++++++--- xdas/coordinates/sampled.py | 38 ++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index 4004220..c90dcb3 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -223,15 +223,45 @@ def test_get_indexer_exact(self): def test_get_indexer_nearest(self): coord = self.make_coord() - vals = [0.4, 0.6, 10.4, 10.6, -10.0, 20.0, 6.4, 6.6, 6.5] - expected = [0, 1, 3, 4, 0, 4, 2, 3, 3] + vals = [0.0, 0.4, 0.6, 1.0, 10.4, 10.6, -10.0, 20.0, 5.9, 6.0, 6.1] + expected = [0, 0, 1, 1, 3, 4, 0, 4, 2, 3, 3] for v, e in zip(vals, expected): idx = coord.get_indexer(v, method="nearest") - print(f"Value: {v}, Index: {idx}, Expected: {e}") assert idx == e + # vectorized idxs = coord.get_indexer(vals, method="nearest") assert np.array_equal(idxs, np.array(expected)) + def test_get_indexer_ffill(self): + coord = self.make_coord() + vals = [0.0, 0.4, 0.6, 1.0, 10.4, 10.6, 20.0, 5.9, 6.0, 6.1] + expected = [0, 0, 0, 1, 3, 3, 4, 2, 2, 2] + for v, e in zip(vals, expected): + idx = coord.get_indexer(v, method="ffill") + assert idx == e + with pytest.raises(KeyError): + coord.get_indexer(-10.0, method="ffill") + # vectorized + idxs = coord.get_indexer(vals, method="ffill") + assert np.array_equal(idxs, np.array(expected)) + with pytest.raises(KeyError): + coord.get_indexer([-10.0, 0.0], method="ffill") + + def test_get_indexer_bfill(self): + coord = self.make_coord() + vals = [0.0, 0.4, 0.6, 1.0, 10.4, 10.6, -10.0, 5.9, 6.0, 6.1] + expected = [0, 1, 1, 1, 4, 4, 0, 3, 3, 3] + for v, e in zip(vals, expected): + idx = coord.get_indexer(v, method="bfill") + assert idx == e + with pytest.raises(KeyError): + coord.get_indexer(20.0, method="bfill") + # vectorized + idxs = coord.get_indexer(vals, method="bfill") + assert np.array_equal(idxs, np.array(expected)) + with pytest.raises(KeyError): + coord.get_indexer([11.0, 20.0], method="bfill") + class TestSampledCoordinateAppendErrors: def test_append_sampling_interval_mismatch(self): diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 7f4baa6..e55df06 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -283,22 +283,34 @@ def get_indexer(self, value, method=None): # overlaps before = np.maximum(reference - 1, 0) - ends = ( - self.tie_values[before] + self.tie_lengths[before] * self.sampling_interval + end = ( + self.tie_values[before] + + (self.tie_lengths[before] - 1) * self.sampling_interval ) - if np.any((reference > 0) & (value < ends)): + if np.any((reference > 0) & (value < end)): raise KeyError("value is in an overlap region") # gap after = np.minimum(reference + 1, len(self.tie_values) - 1) - ends = ( + end = ( self.tie_values[reference] - + self.tie_lengths[reference] * self.sampling_interval + + (self.tie_lengths[reference] - 1) * self.sampling_interval ) - mask = (reference < len(self.tie_values) - 1) & ( - value - ends >= self.tie_values[after] - value - ) - reference = np.where(mask, after, reference) + match method: + case "nearest": + mask = (reference < len(self.tie_values) - 1) & ( + value - end >= self.tie_values[after] - value + ) + reference = np.where(mask, after, reference) + case "bfill": + mask = (reference < len(self.tie_values) - 1) & (value >= end) + reference = np.where(mask, after, reference) + case "ffill" | None: + pass + case _: + raise ValueError( + "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" + ) offset = (value - self.tie_values[reference]) / self.sampling_interval @@ -316,14 +328,14 @@ def get_indexer(self, value, method=None): offset = np.clip(offset, 0, self.tie_lengths[reference] - 1) case "ffill": offset = np.floor(offset).astype(int) - if np.any(offset > self.tie_lengths[reference] - 1): + if np.any(offset < 0): raise KeyError("index not found") - offset = np.maximum(offset, 0) + offset = np.minimum(offset, self.tie_lengths[reference] - 1) case "bfill": offset = np.ceil(offset).astype(int) - if np.any(offset < 0): + if np.any(offset > self.tie_lengths[reference] - 1): raise KeyError("index not found") - offset = np.minimum(offset, self.tie_lengths[reference] - 1) + offset = np.maximum(offset, 0) case _: raise ValueError( "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" From 0539b0009443b44b467ab4b5fd83cec3cf24bd6c Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Thu, 8 Jan 2026 11:36:03 +0100 Subject: [PATCH 44/63] Add datetime tests to SampledCoordinate.get_indexer. --- tests/coordinates/test_sampled.py | 73 +++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index c90dcb3..6c770b8 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -212,7 +212,18 @@ def make_coord(self): {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} ) # two segments: [0, 1, 2] and [10, 11] + def make_coord_datetime(self): + t0 = np.datetime64("2000-01-01T00:00:00") + return SampledCoordinate( + { + "tie_values": [t0, t0 + np.timedelta64(10, "s")], + "tie_lengths": [3, 2], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + def test_get_indexer_exact(self): + # float coord = self.make_coord() assert coord.get_indexer(0.0, method=None) == 0 assert coord.get_indexer(10.0, method=None) == 3 @@ -221,10 +232,34 @@ def test_get_indexer_exact(self): with pytest.raises(KeyError): coord.get_indexer(5.0, method=None) + # datetime + coord = self.make_coord_datetime() + t0 = coord[0].values + assert coord.get_indexer(t0, method=None) == 0 + assert coord.get_indexer(t0 + np.timedelta64(10, "s"), method=None) == 3 + with pytest.raises(KeyError): + coord.get_indexer(t0 + np.timedelta64(1500, "ms"), method=None) + with pytest.raises(KeyError): + coord.get_indexer(t0 + np.timedelta64(5, "s"), method=None) + def test_get_indexer_nearest(self): + # float coord = self.make_coord() vals = [0.0, 0.4, 0.6, 1.0, 10.4, 10.6, -10.0, 20.0, 5.9, 6.0, 6.1] expected = [0, 0, 1, 1, 3, 4, 0, 4, 2, 3, 3] + # scalar + for v, e in zip(vals, expected): + idx = coord.get_indexer(v, method="nearest") + assert idx == e + # vectorized + idxs = coord.get_indexer(vals, method="nearest") + assert np.array_equal(idxs, np.array(expected)) + + # datetime + coord = self.make_coord_datetime() + t0 = coord[0].values + vals = t0 + np.rint(1000 * np.array(vals)).astype("timedelta64[ms]") + # scalar for v, e in zip(vals, expected): idx = coord.get_indexer(v, method="nearest") assert idx == e @@ -233,9 +268,11 @@ def test_get_indexer_nearest(self): assert np.array_equal(idxs, np.array(expected)) def test_get_indexer_ffill(self): + # float coord = self.make_coord() vals = [0.0, 0.4, 0.6, 1.0, 10.4, 10.6, 20.0, 5.9, 6.0, 6.1] expected = [0, 0, 0, 1, 3, 3, 4, 2, 2, 2] + # scalar for v, e in zip(vals, expected): idx = coord.get_indexer(v, method="ffill") assert idx == e @@ -247,10 +284,30 @@ def test_get_indexer_ffill(self): with pytest.raises(KeyError): coord.get_indexer([-10.0, 0.0], method="ffill") + # datetime + coord = self.make_coord_datetime() + t0 = coord[0].values + vals = t0 + np.rint(1000 * np.array(vals)).astype("timedelta64[ms]") + print(vals) + # scalar + for v, e in zip(vals, expected): + idx = coord.get_indexer(v, method="ffill") + print(f"v={v}, idx={idx}, expected={e}") + assert idx == e + with pytest.raises(KeyError): + coord.get_indexer(t0 - np.timedelta64(10, "s"), method="ffill") + # vectorized + idxs = coord.get_indexer(vals, method="ffill") + assert np.array_equal(idxs, np.array(expected)) + with pytest.raises(KeyError): + coord.get_indexer([t0 - np.timedelta64(10, "s"), t0], method="ffill") + def test_get_indexer_bfill(self): + # float coord = self.make_coord() vals = [0.0, 0.4, 0.6, 1.0, 10.4, 10.6, -10.0, 5.9, 6.0, 6.1] expected = [0, 1, 1, 1, 4, 4, 0, 3, 3, 3] + # scalar for v, e in zip(vals, expected): idx = coord.get_indexer(v, method="bfill") assert idx == e @@ -262,6 +319,22 @@ def test_get_indexer_bfill(self): with pytest.raises(KeyError): coord.get_indexer([11.0, 20.0], method="bfill") + # datetime + coord = self.make_coord_datetime() + t0 = coord[0].values + vals = t0 + np.rint(1000 * np.array(vals)).astype("timedelta64[ms]") + # scalar + for v, e in zip(vals, expected): + idx = coord.get_indexer(v, method="bfill") + assert idx == e + with pytest.raises(KeyError): + coord.get_indexer(t0 + np.timedelta64(20, "s"), method="bfill") + # vectorized + idxs = coord.get_indexer(vals, method="bfill") + assert np.array_equal(idxs, np.array(expected)) + with pytest.raises(KeyError): + coord.get_indexer([t0, t0 + np.timedelta64(20, "s")], method="bfill") + class TestSampledCoordinateAppendErrors: def test_append_sampling_interval_mismatch(self): From c21746e3ceac1bbda8ad92518a4d5616417a8b8b Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 15:49:10 +0100 Subject: [PATCH 45/63] WIP write and read sampled coordinates. --- tests/coordinates/test_sampled.py | 126 ++++++++++++++++++++++++------ xdas/coordinates/core.py | 2 +- xdas/coordinates/sampled.py | 27 ++++--- 3 files changed, 118 insertions(+), 37 deletions(-) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index 6c770b8..125f4a6 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -1,7 +1,10 @@ +import tempfile + import numpy as np import pandas as pd import pytest +import xdas as xd from xdas.coordinates import ( Coordinate, DenseCoordinate, @@ -288,11 +291,9 @@ def test_get_indexer_ffill(self): coord = self.make_coord_datetime() t0 = coord[0].values vals = t0 + np.rint(1000 * np.array(vals)).astype("timedelta64[ms]") - print(vals) # scalar for v, e in zip(vals, expected): idx = coord.get_indexer(v, method="ffill") - print(f"v={v}, idx={idx}, expected={e}") assert idx == e with pytest.raises(KeyError): coord.get_indexer(t0 - np.timedelta64(10, "s"), method="ffill") @@ -336,7 +337,56 @@ def test_get_indexer_bfill(self): coord.get_indexer([t0, t0 + np.timedelta64(20, "s")], method="bfill") -class TestSampledCoordinateAppendErrors: +class TestSampledCoordinateAppend: + def test_append_two_coords(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = SampledCoordinate( + {"tie_values": [10.0], "tie_lengths": [2], "sampling_interval": 1.0} + ) + expected = SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + result = coord1.append(coord2) + assert result.equals(expected) + + def test_append_two_datetime_coords(self): + coord1 = SampledCoordinate( + { + "tie_values": [np.datetime64("2000-01-01T00:00:00")], + "tie_lengths": [3], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + coord2 = SampledCoordinate( + { + "tie_values": [np.datetime64("2000-01-01T00:00:10")], + "tie_lengths": [2], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + expected = SampledCoordinate( + { + "tie_values": [ + np.datetime64("2000-01-01T00:00:00"), + np.datetime64("2000-01-01T00:00:10"), + ], + "tie_lengths": [3, 2], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + result = coord1.append(coord2) + assert result.equals(expected) + + def test_append_empty(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = SampledCoordinate() + assert coord1.append(coord2).equals(coord1) + assert coord2.append(coord1).equals(coord1) + def test_append_sampling_interval_mismatch(self): coord1 = SampledCoordinate( {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} @@ -432,28 +482,6 @@ def test_slice_full(self): assert sliced.equals(coord) -class TestSampledCoordinateAppend: - def test_append_two_coords(self): - coord1 = SampledCoordinate( - {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} - ) - coord2 = SampledCoordinate( - {"tie_values": [10.0], "tie_lengths": [2], "sampling_interval": 1.0} - ) - result = coord1.append(coord2) - assert len(result) == 5 - assert result.tie_values[0] == 0.0 - assert result.tie_values[1] == 10.0 - - def test_append_empty(self): - coord1 = SampledCoordinate( - {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} - ) - coord2 = SampledCoordinate() - assert coord1.append(coord2).equals(coord1) - assert coord2.append(coord1).equals(coord1) - - class TestSampledCoordinateDecimate: def test_decimate(self): coord = SampledCoordinate( @@ -601,3 +629,51 @@ def test_non_increasing_tie_values_raises(self): ) with pytest.raises(ValueError): coord.get_indexer(2.0) + + +class TestSampledCoordinateToNetCDF: + def make_dataarray(self): + return xd.DataArray( + np.random.rand(20, 30), + { + "time": { + "tie_values": [ + np.datetime64("2000-01-01T00:00:00.000000000"), + np.datetime64("2000-01-01T00:00:10.000000000"), + ], + "tie_lengths": [5, 15], + "sampling_interval": np.timedelta64(1_000_000_000, "ns").astype( + "timedelta64[ns]" + ), + }, + "distance": { + "tie_values": [0.0], + "tie_lengths": [30], + "sampling_interval": 1.0, + }, + }, + ) + + def test_to_dataset_and_back(self): + import xarray as xr + + da = self.make_dataarray() + dataset = xr.Dataset() + variable_attrs = {} + + # prepare metadata + for coord in da.coords.values(): + dataset, variable_attrs = coord.to_dataset(dataset, variable_attrs) + + dataset["data"] = xr.DataArray(attrs=variable_attrs) + coords = xd.Coordinates.from_dataset(dataset, "data") + + assert coords.equals(da.coords) + + def test_to_netcdf_and_back(self): + expected = self.make_dataarray() + + with tempfile.NamedTemporaryFile(suffix=".nc", delete=False) as file: + expected.to_netcdf(file.name) + result = xd.open_dataarray(file.name) + assert result.equals(expected) diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py index 6ee921b..4154467 100644 --- a/xdas/coordinates/core.py +++ b/xdas/coordinates/core.py @@ -214,7 +214,7 @@ def from_dict(cls, dct): @classmethod def from_dataset(cls, dataset, name): - return Coordinate.from_dataset(dataset, name) + return cls(Coordinate.from_dataset(dataset, name)) def copy(self, deep=True): if deep: diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index e55df06..f49b0ba 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -49,8 +49,9 @@ def __init__(self, data=None, dim=None, dtype=None): if not len(tie_values) == len(tie_lengths): raise ValueError("`tie_values` and `tie_lengths` must have the same length") - # check dtypes + # check dtypes and values if not empty: + # tie_values if not ( np.issubdtype(tie_values.dtype, np.number) or np.issubdtype(tie_values.dtype, np.datetime64) @@ -58,10 +59,14 @@ def __init__(self, data=None, dim=None, dtype=None): raise ValueError( "`tie_values` must have either numeric or datetime dtype" ) + + # tie_lengths if not np.issubdtype(tie_lengths.dtype, np.integer): raise ValueError("`tie_lengths` must be integer-like") if not np.all(tie_lengths > 0): raise ValueError("`tie_lengths` must be strictly positive integers") + + # sampling_interval if not np.isscalar(sampling_interval): raise ValueError("`sampling_interval` must be a scalar value") if np.issubdtype(tie_values.dtype, np.datetime64): @@ -418,7 +423,9 @@ def to_dict(self): return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} def to_dataset(self, dataset, attrs): - mapping = f"{self.name}: {self.name}_values {self.name}_lengths" + mapping = ( + f"{self.name}: {self.name}_values {self.name}_lengths {self.name}_sampling" + ) if "coordinate_sampling" in attrs: attrs["coordinate_sampling"] += " " + mapping else: @@ -430,12 +437,11 @@ def to_dataset(self, dataset, attrs): ) tie_lengths = self.tie_lengths interp_attrs = { - "sampling_interval": self.sampling_interval, - "tie_points_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", + "tie_point_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", } dataset.update( { - f"{self.name}_sampling": ((), np.nan, interp_attrs), + f"{self.name}_sampling": ((), self.sampling_interval, interp_attrs), f"{self.name}_values": (f"{self.name}_points", tie_values), f"{self.name}_lengths": (f"{self.name}_points", tie_lengths), } @@ -447,14 +453,13 @@ def from_dataset(cls, dataset, name): coords = {} mapping = dataset[name].attrs.pop("coordinate_sampling", None) if mapping is not None: - matches = re.findall(r"(\w+): (\w+) (\w+)", mapping) + matches = re.findall(r"(\w+): (\w+) (\w+) (\w+)", mapping) for match in matches: - dim, values, lengths = match - sampling_interval = ... + dim, values, lengths, sampling = match data = { - "tie_values": dataset[values], - "tie_lengths": dataset[lengths], - "sampling_interval": sampling_interval, + "tie_values": dataset[values].values, + "tie_lengths": dataset[lengths].values, + "sampling_interval": dataset[sampling].values[()], } coords[dim] = Coordinate(data, dim) return coords From 71a14e47e64b53147582092d00aaeb320bb67c38 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 16:14:17 +0100 Subject: [PATCH 46/63] Fix CF conventions for SampledCoordinate. --- xdas/coordinates/sampled.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index f49b0ba..9853a28 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -423,9 +423,7 @@ def to_dict(self): return {"dim": self.dim, "data": data, "dtype": str(self.dtype)} def to_dataset(self, dataset, attrs): - mapping = ( - f"{self.name}: {self.name}_values {self.name}_lengths {self.name}_sampling" - ) + mapping = f"{self.name}: {self.name}_sampling" if "coordinate_sampling" in attrs: attrs["coordinate_sampling"] += " " + mapping else: @@ -437,7 +435,7 @@ def to_dataset(self, dataset, attrs): ) tie_lengths = self.tie_lengths interp_attrs = { - "tie_point_mapping": f"{self.name}_points: {self.name}_values {self.name}_lengths", + "tie_point_mapping": f"{self.dim}: {self.name}_values {self.name}_lengths", } dataset.update( { @@ -453,15 +451,18 @@ def from_dataset(cls, dataset, name): coords = {} mapping = dataset[name].attrs.pop("coordinate_sampling", None) if mapping is not None: - matches = re.findall(r"(\w+): (\w+) (\w+) (\w+)", mapping) + matches = re.findall(r"(\w+): (\w+)", mapping) for match in matches: - dim, values, lengths, sampling = match + name, sampling = match + dim, values, lengths = re.match( + r"(\w+): (\w+) (\w+)", dataset[sampling].attrs["tie_point_mapping"] + ).groups() data = { "tie_values": dataset[values].values, "tie_lengths": dataset[lengths].values, "sampling_interval": dataset[sampling].values[()], } - coords[dim] = Coordinate(data, dim) + coords[name] = Coordinate(data, dim) return coords @classmethod From 999c1d2e59f0f3863648bef1f8f12c12679e15ff Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 17:00:03 +0100 Subject: [PATCH 47/63] Improve SampledCoordinate.simlify testing. --- tests/coordinates/test_sampled.py | 43 ++++++++++++++++++++++++++++--- xdas/coordinates/sampled.py | 3 ++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index 125f4a6..c0f02b7 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -502,9 +502,46 @@ def test_simplify_continuous(self): "sampling_interval": 1.0, } ) - simplified = coord.simplify(tolerance=0.1) - # If continuous (end of first == start of second), should merge - assert len(simplified.tie_values) <= 2 + result = coord.simplify() + expected = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [5], "sampling_interval": 1.0} + ) + assert result.equals(expected) + + def test_simplify_with_tolerance(self): + # Two nearly continuous segments should merge with tolerance + coord = SampledCoordinate( + { + "tie_values": [0.0, 3.1], + "tie_lengths": [3, 2], + "sampling_interval": 1.0, + } + ) + result = coord.simplify(tolerance=0.2) + expected = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [5], "sampling_interval": 1.0} + ) + assert result.equals(expected) + # more advanced test + coord = SampledCoordinate( + { + "tie_values": 10 * np.arange(100) + np.random.rand(100) * 0.2 - 0.1, + "tie_lengths": 10 * np.ones(100, dtype=int), + "sampling_interval": 1.0, + } + ) + result = coord.simplify(tolerance=0.2) + assert len(result.tie_values) == 1 + # extra test + coord = SampledCoordinate( + { + "tie_values": 10 * np.arange(100) + np.random.rand(100) * 0.2 - 0.1, + "tie_lengths": 10 * np.ones(100, dtype=int), + "sampling_interval": 1.0, + } + ) + result = coord.simplify(tolerance=0.1) + assert np.all(np.abs(result.values - coord.values) <= 0.1) class TestSampledCoordinateGetIndexer: diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 9853a28..cc02a4e 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -69,6 +69,7 @@ def __init__(self, data=None, dim=None, dtype=None): # sampling_interval if not np.isscalar(sampling_interval): raise ValueError("`sampling_interval` must be a scalar value") + sampling_interval = np.asarray(sampling_interval)[()] # ensure numpy scalar if np.issubdtype(tie_values.dtype, np.datetime64): if not np.issubdtype( np.asarray(sampling_interval).dtype, np.timedelta64 @@ -378,7 +379,7 @@ def decimate(self, q): def simplify(self, tolerance=None): if tolerance is None: - tolerance = np.array(0, dtype=self.sampling_interval.dtype) + tolerance = np.array(0, dtype=self.sampling_interval.dtype)[()] tie_values = [self.tie_values[0]] tie_lengths = [self.tie_lengths[0]] for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]): From 795c251a2fd3aca61ca86f3db774e470533fddf0 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 18:11:30 +0100 Subject: [PATCH 48/63] Acheive 100% test coverage for sampled. --- tests/coordinates/test_sampled.py | 190 +++++++++++++++++++++++++++++- xdas/coordinates/sampled.py | 10 +- 2 files changed, 190 insertions(+), 10 deletions(-) diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py index c0f02b7..82d083d 100644 --- a/tests/coordinates/test_sampled.py +++ b/tests/coordinates/test_sampled.py @@ -36,6 +36,7 @@ def test_init_and_empty(self): assert empty.shape == (0,) assert empty.ndim == 1 assert empty.values.size == 0 + assert empty.indices.size == 0 def test_init_validation_numeric(self): # valid numeric @@ -45,6 +46,8 @@ def test_init_validation_numeric(self): assert len(coord) == 3 assert coord.start == 0.0 assert coord.end == 3.0 + assert coord.issampled() + coord.get_sampling_interval() == 1.0 # mismatched lengths with pytest.raises(ValueError): @@ -71,6 +74,12 @@ def test_init_validation_numeric(self): {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": [1.0]} ) + # non-numeric tie_values + with pytest.raises(ValueError): + SampledCoordinate( + {"tie_values": ["a"], "tie_lengths": [3], "sampling_interval": 1.0} + ) + def test_init_validation_datetime(self): # valid datetime with timedelta sampling interval t0 = np.datetime64("2000-01-01T00:00:00") @@ -83,6 +92,8 @@ def test_init_validation_datetime(self): ) assert coord.start == t0 assert coord.end == t0 + np.timedelta64(2, "s") + assert coord.get_sampling_interval() == 1 + assert coord.get_sampling_interval(cast=False) == np.timedelta64(1, "s") # invalid: datetime with numeric sampling interval with pytest.raises(ValueError): @@ -90,6 +101,34 @@ def test_init_validation_datetime(self): {"tie_values": [t0], "tie_lengths": [2], "sampling_interval": 1} ) + def test_invalid_data(self): + # lack of required keys + with pytest.raises(ValueError): + SampledCoordinate({"tie_values": [0.0], "tie_lengths": [3]}) + with pytest.raises(ValueError): + SampledCoordinate({"tie_lengths": [3], "sampling_interval": 1.0}) + with pytest.raises(ValueError): + SampledCoordinate({"tie_values": [0.0], "sampling_interval": 1.0}) + + def test_invalid_shapes(self): + # tie_values and tie_lengths must be 1D + with pytest.raises(ValueError): + SampledCoordinate( + { + "tie_values": [[0.0, 10.0]], + "tie_lengths": [3, 2], + "sampling_interval": 1.0, + } + ) + with pytest.raises(ValueError): + SampledCoordinate( + { + "tie_values": [0.0, 10.0], + "tie_lengths": [[3], [2]], + "sampling_interval": 1.0, + } + ) + class TestSampledCoordinateIndexing: def make_coord(self): @@ -133,6 +172,13 @@ def test_get_value_scalar_and_vector(self): with pytest.raises(IndexError): coord.get_value([-6, 0]) + def test_values(self): + coord = self.make_coord() + expected = np.array([0.0, 1.0, 2.0, 10.0, 11.0]) + assert np.array_equal(coord.values, expected) + assert np.array_equal(coord.__array__(), expected) + assert np.array_equal(coord.__array__(dtype=expected.dtype), expected) + def test_getitem(self): coord = self.make_coord() # scalar -> ScalarCoordinate @@ -176,11 +222,36 @@ def test_getitem(self): arr = coord[[0, 4]] assert isinstance(arr, DenseCoordinate) assert np.array_equal(arr.values, np.array([0.0, 11.0])) + # negative step is not implemented yet + with pytest.raises(NotImplementedError): + coord[4:0:-1] def test_repr(self): - # Just ensure it returns a string - coord = self.make_coord() - assert isinstance(repr(coord), str) + # floating coord + floating = self.make_coord() + assert isinstance(repr(floating), str) + # integer coord + integer = SampledCoordinate( + {"tie_values": [0], "tie_lengths": [3], "sampling_interval": 1} + ) + assert isinstance(repr(integer), str) + # empty coord + empty = SampledCoordinate() + assert repr(empty) == "empty coordinate" + # singleton + singleton = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [1], "sampling_interval": 1.0} + ) + assert isinstance(repr(singleton), str) + # numeric coord + datetime = SampledCoordinate( + { + "tie_values": [np.datetime64("2000-01-01T00:00:00")], + "tie_lengths": [3], + "sampling_interval": np.timedelta64(1, "s"), + } + ) + assert isinstance(repr(datetime), str) class TestSampledCoordinateSliceEdgeCases: @@ -336,6 +407,31 @@ def test_get_indexer_bfill(self): with pytest.raises(KeyError): coord.get_indexer([t0, t0 + np.timedelta64(20, "s")], method="bfill") + def test_get_indexer_overlap(self): + coord = SampledCoordinate( + {"tie_values": [0.0, 2.0], "tie_lengths": [3, 3], "sampling_interval": 1.0} + ) # segments: [0,1,2] and [2,3,4] + assert coord.get_indexer(1.0) == 1 + assert coord.get_indexer(3.0) == 4 + with pytest.raises(KeyError): + coord.get_indexer(2.0) + coord = SampledCoordinate( + {"tie_values": [0.0, 2.0], "tie_lengths": [5, 5], "sampling_interval": 1.0} + ) # segments: [0,1,2,3,4] and [2,3,4,5,6] + assert coord.get_indexer(1.0) == 1 + assert coord.get_indexer(6.0) == 9 + with pytest.raises(KeyError): + coord.get_indexer(2.0) + with pytest.raises(KeyError): + coord.get_indexer(2.5, method="nearest") + with pytest.raises(KeyError): + coord.get_indexer(4.0) + + def test_get_indexer_invalid_method(self): + coord = self.make_coord() + with pytest.raises(ValueError): + coord.get_indexer(0.0, method="invalid") + class TestSampledCoordinateAppend: def test_append_two_coords(self): @@ -411,6 +507,26 @@ def test_append_dtype_mismatch(self): with pytest.raises(ValueError): coord1.append(coord2) + def test_append_type_mismatch(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + coord2 = DenseCoordinate(np.array([10.0, 11.0])) + with pytest.raises(TypeError): + coord1.append(coord2) + + def test_append_dimension_mismatch(self): + coord1 = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0}, + dim="time", + ) + coord2 = SampledCoordinate( + {"tie_values": [10.0], "tie_lengths": [2], "sampling_interval": 1.0}, + dim="depth", + ) + with pytest.raises(ValueError): + coord1.append(coord2) + class TestSampledCoordinateDiscontinuitiesAvailabilities: def test_discontinuities_and_availabilities(self): @@ -454,6 +570,25 @@ def test_to_dict_contains_expected_keys(self): "sampling_interval", } + def test_to_dict_with_datetime(self): + t0 = np.datetime64("2000-01-01T00:00:00") + coord = SampledCoordinate( + { + "tie_values": [t0, t0 + np.timedelta64(10, "s")], + "tie_lengths": [3, 2], + "sampling_interval": np.timedelta64(1, "s"), + }, + dim="time", + ) + d = coord.to_dict() + assert "dim" in d + assert "data" in d + assert set(d["data"].keys()) >= { + "tie_values", + "tie_lengths", + "sampling_interval", + } + class TestSampledCoordinateSlicing: def make_coord(self): @@ -644,6 +779,11 @@ def test_get_indexer_datetime_methods(self): coord.get_indexer(np.datetime64("1999-12-31T23:59:59")) with pytest.raises(KeyError): coord.get_indexer(np.datetime64("2000-01-01T00:00:12")) + # string input + assert coord.get_indexer("2000-01-01T00:00:01.500", method="nearest") in [1, 2] + # invalid method + with pytest.raises(ValueError): + coord.get_indexer(t, method="bad") def test_start_end_properties_datetime(self): coord = self.make_dt_coord() @@ -714,3 +854,47 @@ def test_to_netcdf_and_back(self): expected.to_netcdf(file.name) result = xd.open_dataarray(file.name) assert result.equals(expected) + + +class TestGetSplitIndices: + def test_get_split_indices_no_tolerance(self): + coord = SampledCoordinate( + {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0} + ) + div_points = coord.get_split_indices() + expected = np.array([3]) # indices where segments end + assert np.array_equal(div_points, expected) + + def test_get_split_indices_with_tolerance(self): + coord = SampledCoordinate( + { + "tie_values": [0.0, 3.1, 10.0], + "tie_lengths": [3, 2, 2], + "sampling_interval": 1.0, + } + ) + div_points = coord.get_split_indices(tolerance=0.2) + expected = np.array([5]) # only the second gap exceeds tolerance + assert np.array_equal(div_points, expected) + + +class TestFromBlock: + def test_from_block(self): + result = SampledCoordinate.from_block(start=0.0, size=5, step=1.0) + expected = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [5], "sampling_interval": 1.0} + ) + assert result.equals(expected) + + +class TestNotImplementedMethods: + def test_raises(self): + coord = SampledCoordinate( + {"tie_values": [0.0], "tie_lengths": [3], "sampling_interval": 1.0} + ) + with pytest.raises(NotImplementedError): + coord.__array_ufunc__(None, None) + with pytest.raises(NotImplementedError): + coord.__array_function__(None, None, None, None) + with pytest.raises(NotImplementedError): + coord.from_array(None) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index cc02a4e..7afd7cd 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -33,7 +33,7 @@ def __init__(self, data=None, dim=None, dtype=None): # parse data data, dim = parse(data, dim) if not self.__class__.isvalid(data): - raise TypeError( + raise ValueError( "`data` must be dict-like and contain `tie_values`, `tie_lengths`, and " "`sampling_interval`" ) @@ -293,7 +293,7 @@ def get_indexer(self, value, method=None): self.tie_values[before] + (self.tie_lengths[before] - 1) * self.sampling_interval ) - if np.any((reference > 0) & (value < end)): + if np.any((reference > 0) & (value <= end)): raise KeyError("value is in an overlap region") # gap @@ -342,10 +342,6 @@ def get_indexer(self, value, method=None): if np.any(offset > self.tie_lengths[reference] - 1): raise KeyError("index not found") offset = np.maximum(offset, 0) - case _: - raise ValueError( - "method must be one of `None`, 'nearest', 'ffill', or 'bfill'" - ) return self.tie_indices[reference] + offset def append(self, other): @@ -404,7 +400,7 @@ def get_split_indices(self, tolerance=None): deltas = self.tie_values[1:] - ( self.tie_values[:-1] + self.sampling_interval * self.tie_lengths[:-1] ) - indices = indices[np.abs(deltas) <= tolerance] + indices = indices[np.abs(deltas) > tolerance] return indices @classmethod From a1bd76a42790d6e4b374d731c55ec0669a4e7d4d Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 18:58:27 +0100 Subject: [PATCH 49/63] Fix xdas.io.__init__.py --- xdas/io/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xdas/io/__init__.py b/xdas/io/__init__.py index 926a1a9..54f6f22 100644 --- a/xdas/io/__init__.py +++ b/xdas/io/__init__.py @@ -1 +1,2 @@ +from . import apsensing, asn, febus, miniseed, optasense, silixa, sintela, terra15 from .core import get_free_port From de02a6c2b39f6b976d4841ddf9d32eb47d114010 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 18:58:48 +0100 Subject: [PATCH 50/63] Remove unintentionally added file --- .coverage | Bin 53248 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .coverage diff --git a/.coverage b/.coverage deleted file mode 100644 index 1f297399c8e31876e1f07878b2cc2ed5d864599b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI)Piz}i9tZIG=W(6Hd4;O7vZ2a56w-(!CLlnNiMQS}zQx zb0Z__g|`E%SQUY1m4p|~vnO7hpF1%!v;c2#iO zDcdD0u$>AI7DbY};fu1o(9t%!)X9X?VSc$(u*;My5EUw-?%6fVTj6KL%4C=zK3X7_ zgJ=U4BJ7GIQ}APP_6hEZ1>p&&B>X6fV|ICBVB+28KstBmka|&sb+Wt?{SEfihBkDn za(aOZD!E=+Mql(Sr?e>iNp5Y|u;fyl%Ry9I&#mPHmsf2ktfu4Jf$ch6EQ?Yj5ak^! zI4RMRiSW8>LuI`oEA&Q`Gd@AQaTC@~MtNw;9LDT4YDFS{vr((d zlS4V80|)MIId{t&xN&8F_PN>NTz2Z)b2<}pc}AmL4aYv$5Zfnzl6P`WhShM~NiHiv zE#TfkGxyjbRiRcXmi=>8dc9S!8i5P)NS;E`4*czf zOi!cP-n>iGk^6UVH?o)XHWJy3s#WeTgk47F=F!Qa*4*7^@(G6J$&SX|I3Y#)i?%iy zEXBZ!RkzsN*1{I!3+f;=79`0Q8cs=e6<^k&1;QF!p!#KKx@QI=SD}>3z@JP#b+yiO7}FO>nFOG!zH5&BgL+=S$Zs~;lj98gEZ&hhDsav*3 zU6#h+=nBAP!m>q)t)klq!uBgy!{#ZkJ)L-?0y^u7J*AomlT-J8-t~Q(F01mgQN%FGnGNoEKfUDl8|uHc*b!J{{Bcc^Jd(SEZ*qW%w(yjLP}(Fs9_<&@*=mAni`rPA0hJEuHOG>A|}K2Dv|r!IO+nMtAXUO8eVzrA2XDanaT2q!oz3t_d!agu#RjYsvC?$rH4buB8*>`cE|jdA15OaxU+Z56G8t ztuZy7yZ?T*)rdP|G|m)!x~SKzBuDsn?Ci9c=V-vCS3BM9MA~(6$kpfwa&U{z zU@=liQpK-z1}5YUzRETf`ojbP2tWV=5P$##AOHafKmY;|fWVz6psSjimiPY}`&?n` zG{Xb|2tWV=5P$##AOHafKmY;|fWZC~7}E43Ecq9Y2bijl=Hh<>cy#)~>4yiXRvP{300Izz00bZa0SG_<0uWG?Q$~xa zd$>#Oz5h4ojaIh5Vs^OyPd#I_M*1tI>z29q{@-{)Z}wMI_x-0C|6g@wZ`u*BlTTe9G&1SQ_b#-g@+Iq9{kAqo*_T6JRTkZC0 zyRE;br_@aA5q;|eS&l{zgMXxacjM}-mp;9IeQLJ-Lnc1JkfQwk=+SpnozGp#ynp$> zGpmhM{jc{fEiEWKrX5p_OyH^1Ioe{1ctmpA{^e(G-<*V^ruzFcp%Kb#G}|Ia>A*arI_yTSg=R%rzj z1Rwwb2tWV=5P$##AOHafKmY>!LO{MTp_<|Q6KX0It{F!7r0e06rWy1eLHPNt0uX=z1Rwwb2tWV=5P$##Ah1UP`T0NY|M!T8Y!H9|1Rwwb2tWV=5P$##AOL}V zA`pH5{|ov5|KGE3**EMz?2CO8I>Lki1Rwwb2tWV=5P$##AOHafKww)yH|b~o^^}|$ Oa;D3fCTH^Z|9=E3Y{Ys1 From a2eb4a1160b81358fd936c5860281a81b33a9714 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 19:32:32 +0100 Subject: [PATCH 51/63] allow per dim ctype for coordinates. --- xdas/io/apsensing.py | 8 +++++--- xdas/io/asn.py | 12 +++++++----- xdas/io/core.py | 23 +++++++++++++++++++++++ xdas/io/febus.py | 8 +++++--- xdas/io/optasense.py | 6 ++++-- xdas/io/silixa.py | 6 ++++-- xdas/io/sintela.py | 6 ++++-- xdas/io/terra15.py | 8 +++++--- 8 files changed, 57 insertions(+), 20 deletions(-) diff --git a/xdas/io/apsensing.py b/xdas/io/apsensing.py index 937b165..fca5089 100644 --- a/xdas/io/apsensing.py +++ b/xdas/io/apsensing.py @@ -4,9 +4,11 @@ from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource +from .core import parse_ctype -def read(fname, ctype="interpolated"): +def read(fname, ctype=None): + ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: t0 = file["Metadata"]["Timestamp"][()].item().decode() fs = file["DAQ"]["RepetitionFrequency"][()].item() @@ -19,6 +21,6 @@ def read(fname, ctype="interpolated"): t0 = np.datetime64(t0) dt = np.timedelta64(round(1e9 / fs), "ns") nt, nd = data.shape - time = Coordinate[ctype].from_block(t0, nt, dt, dim="time") - distance = Coordinate[ctype].from_block(0.0, nd, dx, dim="distance") + time = Coordinate[ctype["time"]].from_block(t0, nt, dt, dim="time") + distance = Coordinate[ctype["distance"]].from_block(0.0, nd, dx, dim="distance") return DataArray(data, {"time": time, "distance": distance}) diff --git a/xdas/io/asn.py b/xdas/io/asn.py index 3e93536..5ed5bb5 100644 --- a/xdas/io/asn.py +++ b/xdas/io/asn.py @@ -7,9 +7,11 @@ from ..coordinates.core import Coordinate, get_sampling_interval from ..core.dataarray import DataArray from ..virtual import VirtualSource +from .core import parse_ctype -def read(fname, ctype="interpolated"): +def read(fname, ctype=None): + ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: header = file["header"] t0 = np.datetime64(round(header["time"][()] * 1e9), "ns") @@ -17,8 +19,8 @@ def read(fname, ctype="interpolated"): dx = header["dx"][()] * np.median(np.diff(header["channels"])) data = VirtualSource(file["data"]) nt, nx = data.shape - time = Coordinate[ctype].from_block(t0, nt, dt, dim="time") - distance = Coordinate[ctype].from_block(0.0, nx, dx, dim="distance") + time = Coordinate[ctype["time"]].from_block(t0, nt, dt, dim="time") + distance = Coordinate[ctype["distance"]].from_block(0.0, nx, dx, dim="distance") return DataArray(data, {"time": time, "distance": distance}) @@ -105,7 +107,7 @@ def _update_header(self, message): roiTable = header["roiTable"][0] di = (roiTable["roiStart"] // roiTable["roiDec"]) * header["dx"] de = (roiTable["roiEnd"] // roiTable["roiDec"]) * header["dx"] - self.distance = { + self.distance = { # TODO: use from_block "tie_indices": [0, header["nChannels"] - 1], "tie_values": [di, de], } @@ -114,7 +116,7 @@ def _update_header(self, message): def _unpack(self, message): t0 = np.frombuffer(message[:8], "datetime64[ns]").reshape(()) data = np.frombuffer(message[8:], self.dtype).reshape(self.shape) - time = { + time = { # TODO: use from_block "tie_indices": [0, self.shape[0] - 1], "tie_values": [t0, t0 + (self.shape[0] - 1) * self.delta], } diff --git a/xdas/io/core.py b/xdas/io/core.py index a4171a7..806740a 100644 --- a/xdas/io/core.py +++ b/xdas/io/core.py @@ -18,3 +18,26 @@ def get_free_port(): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("", 0)) return s.getsockname()[1] + + +def parse_ctype(ctype): + if ctype is None: + ctype = { + "time": "interpolated", + "distance": "interpolated", + } + elif isinstance(ctype, str): + ctype = { + "time": ctype, + "distance": ctype, + } + elif isinstance(ctype, dict): + ctype = { + "time": ctype.get("time", "interpolated"), + "distance": ctype.get("distance", "interpolated"), + } + else: + raise ValueError( + "ctype must be None, str, or dict with 'time' and/or 'distance' keys" + ) + return ctype diff --git a/xdas/io/febus.py b/xdas/io/febus.py index 747b174..e1c2f84 100644 --- a/xdas/io/febus.py +++ b/xdas/io/febus.py @@ -7,9 +7,10 @@ from ..core.dataarray import DataArray from ..core.routines import concatenate from ..virtual import VirtualSource +from .core import parse_ctype -def read(fname, overlaps=None, offset=None, ctype="interpolated"): +def read(fname, overlaps=None, offset=None, ctype=None): """ Open a Febus file into a xdas DataArray object. @@ -41,6 +42,7 @@ def read(fname, overlaps=None, offset=None, ctype="interpolated"): A data array containing the data from the Febus file. """ + ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: (device_name,) = list(file.keys()) source = file[device_name]["Source1"] @@ -94,8 +96,8 @@ def read(fname, overlaps=None, offset=None, ctype="interpolated"): for t0, chunk in zip(times, chunks): t0 = np.rint(1e6 * t0).astype("M8[us]").astype("M8[ns]") - time = Coordinate[ctype].from_block(t0, nt, dt, dim="time") - distance = Coordinate[ctype].from_block(0.0, nx, dx, dim="distance") + time = Coordinate[ctype["time"]].from_block(t0, nt, dt, dim="time") + distance = Coordinate[ctype["distance"]].from_block(0.0, nx, dx, dim="distance") da = DataArray(chunk, {"time": time, "distance": distance}, name=name) dc.append(da) diff --git a/xdas/io/optasense.py b/xdas/io/optasense.py index 16e175c..f4f34bf 100644 --- a/xdas/io/optasense.py +++ b/xdas/io/optasense.py @@ -4,9 +4,11 @@ from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource +from .core import parse_ctype -def read(fname, ctype="interpolated"): +def read(fname, ctype=None): + ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: acquisition = file["Acquisition"] dx = acquisition.attrs["SpatialSamplingInterval"] @@ -19,5 +21,5 @@ def read(fname, ctype="interpolated"): "tie_indices": [0, nt - 1], "tie_values": [tstart, tend], } # TODO: use from_block - distance = Coordinate[ctype].from_block(0.0, nd, dx, dim="distance") + distance = Coordinate[ctype["distance"]].from_block(0.0, nd, dx, dim="distance") return DataArray(data, {"distance": distance, "time": time}) diff --git a/xdas/io/silixa.py b/xdas/io/silixa.py index ae97867..2c64e68 100644 --- a/xdas/io/silixa.py +++ b/xdas/io/silixa.py @@ -3,10 +3,12 @@ from ..coordinates.core import Coordinate from ..core.dataarray import DataArray +from .core import parse_ctype from .tdms import TdmsReader -def read(fname, ctype="interpolated"): +def read(fname, ctype=None): + ctype = parse_ctype(ctype) shape, dtype, coords = read_header(fname, ctype) data = dask.array.from_delayed(dask.delayed(read_data)(fname), shape, dtype) return DataArray(data, coords) @@ -19,7 +21,7 @@ def read_header(fname, ctype): dtype = tdms._data_type t0 = np.datetime64(props["GPSTimeStamp"]) dt = np.timedelta64(round(1e9 / props["SamplingFrequency[Hz]"]), "ns") - time = Coordinate[ctype].from_block(t0, shape[0], dt, dim="time") + time = Coordinate[ctype["time"]].from_block(t0, shape[0], dt, dim="time") distance = { "tie_indices": [0, shape[1] - 1], "tie_values": [props["Start Distance (m)"], props["Stop Distance (m)"]], diff --git a/xdas/io/sintela.py b/xdas/io/sintela.py index 605f50f..6090282 100644 --- a/xdas/io/sintela.py +++ b/xdas/io/sintela.py @@ -4,9 +4,11 @@ from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource +from .core import parse_ctype -def read(fname, ctype="interpolated"): +def read(fname, ctype=None): + ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: acquisition = file["Acquisition"] dx = acquisition.attrs["SpatialSamplingInterval"] @@ -19,5 +21,5 @@ def read(fname, ctype="interpolated"): "tie_indices": [0, nt - 1], "tie_values": [tstart, tend], } # TODO: use from_block - distance = Coordinate[ctype].from_block(0.0, nd, dx, dim="distance") + distance = Coordinate[ctype["distance"]].from_block(0.0, nd, dx, dim="distance") return DataArray(data, {"time": time, "distance": distance}) diff --git a/xdas/io/terra15.py b/xdas/io/terra15.py index e6cf242..e9a539e 100644 --- a/xdas/io/terra15.py +++ b/xdas/io/terra15.py @@ -6,9 +6,11 @@ from ..coordinates.core import Coordinate from ..core.dataarray import DataArray from ..virtual import VirtualSource +from .core import parse_ctype -def read(fname, tz=timezone.utc, ctype="interpolated"): +def read(fname, ctype=None, tz=timezone.utc): + ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: ti = np.datetime64( datetime.fromtimestamp(file["data_product"]["gps_time"][0], tz=tz) @@ -21,5 +23,5 @@ def read(fname, tz=timezone.utc, ctype="interpolated"): data = VirtualSource(file["data_product"]["data"]) nt, nd = data.shape time = {"tie_indices": [0, nt - 1], "tie_values": [ti, tf]} # TODO: use from_block - ctype = Coordinate[ctype].from_block(d0, nd, dx, dim="distance") - return DataArray(data, {"time": time, "distance": ctype}) + distance = Coordinate[ctype["distance"]].from_block(d0, nd, dx, dim="distance") + return DataArray(data, {"time": time, "distance": distance}) From 0ee8f3150b03855d82b584d64d24facdc12b7b22 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 19:35:39 +0100 Subject: [PATCH 52/63] Add python 3.14 for testing in github actions. --- .github/workflows/code-coverage.yaml | 2 +- .github/workflows/tests.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-coverage.yaml b/.github/workflows/code-coverage.yaml index 0d31cd9..1451a47 100644 --- a/.github/workflows/code-coverage.yaml +++ b/.github/workflows/code-coverage.yaml @@ -13,7 +13,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.13 + python-version: 3.14 - name: Install dependencies run: | diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index c3e631f..7a4e350 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v4 From 2ed500c86693e0f25e96e9ce25232a443b7ed37e Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 21:16:41 +0100 Subject: [PATCH 53/63] Fix sampling interval handling for older xarray versions --- xdas/coordinates/sampled.py | 14 ++++++++++++++ xdas/core/dataarray.py | 9 ++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 7afd7cd..0a8d8b5 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -459,6 +459,20 @@ def from_dataset(cls, dataset, name): "tie_lengths": dataset[lengths].values, "sampling_interval": dataset[sampling].values[()], } + + # TODO: remove when dropping support for python 3.10 + import xarray + + if ( + xarray.__version__ < "2025.7" + and "dtype" in dataset[sampling].attrs + and "units" in dataset[sampling].attrs + ): + data["sampling_interval"] = np.array( + data["sampling_interval"], + dtype=dataset[sampling].attrs["dtype"], + )[()] + coords[name] = Coordinate(data, dim) return coords diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index 468aa4d..d769493 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -943,7 +943,14 @@ def from_netcdf(cls, fname, group=None): The openend data array. """ # read metadata - with xr.open_dataset(fname, group=group, engine="h5netcdf") as dataset: + with xr.open_dataset( + fname, + group=group, + engine="h5netcdf", + decode_timedelta=( + xr.__version__ >= "2025.7" + ), # TODO: remove when dropping support for python 3.10 + ) as dataset: # check file format if not ( "Conventions" in dataset.attrs and "CF" in dataset.attrs["Conventions"] From b1e3dface4c1757cd6bed5da0c121791c5ba8aaf Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Wed, 14 Jan 2026 21:45:19 +0100 Subject: [PATCH 54/63] Manual timedelta handling. --- xdas/coordinates/sampled.py | 36 +++++++++++++++++++++++++++--------- xdas/core/dataarray.py | 7 +------ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index 0a8d8b5..a7a37e0 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -5,6 +5,17 @@ from .core import Coordinate, format_datetime, is_strictly_increasing, parse +CODE_TO_UNITS = { + "h": "hours", + "m": "minutes", + "s": "seconds", + "ms": "milliseconds", + "us": "microseconds", + "ns": "nanoseconds", +} +UNITS_TO_CODE = {v: k for k, v in CODE_TO_UNITS.items()} + + class SampledCoordinate(Coordinate, name="sampled"): """ A coordinate that is sampled at regular intervals. @@ -434,9 +445,19 @@ def to_dataset(self, dataset, attrs): interp_attrs = { "tie_point_mapping": f"{self.dim}: {self.name}_values {self.name}_lengths", } + + # timedelta + if np.issubdtype(self.sampling_interval.dtype, np.timedelta64): + code, count = np.datetime_data(self.sampling_interval.dtype) + interp_attrs["dtype"] = "timedelta64[ns]" + interp_attrs["units"] = CODE_TO_UNITS[code] + sampling_interval = count * self.sampling_interval.astype(int) + else: + sampling_interval = self.sampling_interval + dataset.update( { - f"{self.name}_sampling": ((), self.sampling_interval, interp_attrs), + f"{self.name}_sampling": ((), sampling_interval, interp_attrs), f"{self.name}_values": (f"{self.name}_points", tie_values), f"{self.name}_lengths": (f"{self.name}_points", tie_lengths), } @@ -460,18 +481,15 @@ def from_dataset(cls, dataset, name): "sampling_interval": dataset[sampling].values[()], } - # TODO: remove when dropping support for python 3.10 - import xarray - + # timedelta if ( - xarray.__version__ < "2025.7" - and "dtype" in dataset[sampling].attrs + "dtype" in dataset[sampling].attrs and "units" in dataset[sampling].attrs ): - data["sampling_interval"] = np.array( + data["sampling_interval"] = np.timedelta64( data["sampling_interval"], - dtype=dataset[sampling].attrs["dtype"], - )[()] + UNITS_TO_CODE[dataset[sampling].attrs.pop("units")], + ).astype(dataset[sampling].attrs.pop("dtype")) coords[name] = Coordinate(data, dim) return coords diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index d769493..c8efebc 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -944,12 +944,7 @@ def from_netcdf(cls, fname, group=None): """ # read metadata with xr.open_dataset( - fname, - group=group, - engine="h5netcdf", - decode_timedelta=( - xr.__version__ >= "2025.7" - ), # TODO: remove when dropping support for python 3.10 + fname, group=group, engine="h5netcdf", decode_timedelta=False ) as dataset: # check file format if not ( From 24e4afab9cf11e59d9f7e918fbdbc8e47c5302f5 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 18:33:53 +0100 Subject: [PATCH 55/63] Add release notes for SampleCoordinate feature --- docs/release-notes.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release-notes.md b/docs/release-notes.md index 0dd5b92..d52e3f0 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -1,5 +1,8 @@ # Release notes +## 0.X.X +- Add SampleCoordinate for more SEED-like coordinates (@atrabattoni). + ## 0.2.4 - Add StreamWriter to write long time series to miniSEED (@marbail). - Fix OptaSense engine wrong axis attribution (@smouellet). From 3f24463926f1ce64f2d4f6c1fb1436492430944d Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 18:34:48 +0100 Subject: [PATCH 56/63] Fix xdas.__init__.py. --- xdas/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xdas/__init__.py b/xdas/__init__.py index 44220eb..9326541 100644 --- a/xdas/__init__.py +++ b/xdas/__init__.py @@ -1,8 +1,21 @@ +from . import ( + atoms, + config, + coordinates, + fft, + io, + parallel, + processing, + signal, + synthetics, + virtual, +) from .coordinates import ( Coordinate, Coordinates, get_sampling_interval, ) +from .core import dataarray, datacollection, methods, numpy, routines from .core.dataarray import DataArray from .core.datacollection import DataCollection, DataMapping, DataSequence from .core.methods import * From d85b6e4921eeed8cfe321836f1fb4e2374761acc Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 18:35:06 +0100 Subject: [PATCH 57/63] Remove unnecessary blank line in sampled.py --- xdas/coordinates/sampled.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py index a7a37e0..34485ea 100644 --- a/xdas/coordinates/sampled.py +++ b/xdas/coordinates/sampled.py @@ -4,7 +4,6 @@ from .core import Coordinate, format_datetime, is_strictly_increasing, parse - CODE_TO_UNITS = { "h": "hours", "m": "minutes", From 035fd83d6c820ae085486bcf679ebc8f02774376 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 18:35:49 +0100 Subject: [PATCH 58/63] Quick update of docs API. --- docs/api/coordinates.md | 216 ++++++++++++++++++++++++++++++++++++++++ docs/api/index.md | 4 +- docs/api/xdas.md | 158 ----------------------------- 3 files changed, 219 insertions(+), 159 deletions(-) create mode 100644 docs/api/coordinates.md diff --git a/docs/api/coordinates.md b/docs/api/coordinates.md new file mode 100644 index 0000000..e94b157 --- /dev/null +++ b/docs/api/coordinates.md @@ -0,0 +1,216 @@ +```{eval-rst} +.. currentmodule:: xdas.coordinates +``` +# xdas.coordinates + +## Coordinates + +Constructor + + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + Coordinates +``` + +Methods + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + Coordinates.isdim + Coordinates.get_query + Coordinates.to_index + Coordinates.equals + Coordinates.to_dict + Coordinates.copy + Coordinates.drop_dims + Coordinates.drop_coords +``` + +### Coordinate + +Constructor + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + Coordinate +``` + +Attributes + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + Coordinate.dtype + Coordinate.ndim + Coordinate.shape + Coordinate.values +``` + +Methods + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + Coordinate.to_index + Coordinate.isscalar + Coordinate.isdense + Coordinate.isinterp +``` + + +### ScalarCoordinate + +Constructor + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + ScalarCoordinate +``` + +Methods + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + ScalarCoordinate.isvalid + ScalarCoordinate.equals + ScalarCoordinate.to_index + ScalarCoordinate.to_dict +``` + +### DenseCoordinate + +Constructor + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + DenseCoordinate +``` + +Methods + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + DenseCoordinate.isvalid + DenseCoordinate.index + DenseCoordinate.get_indexer + DenseCoordinate.slice_indexer + DenseCoordinate.to_dict +``` + +### InterpCoordinate + +Constructor + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + InterpCoordinate +``` + +Attributes + + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + InterpCoordinate.tie_indices + InterpCoordinate.tie_values + InterpCoordinate.empty + InterpCoordinate.dtype + InterpCoordinate.ndim + InterpCoordinate.shape + InterpCoordinate.indices + InterpCoordinate.values +``` + +Methods + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + InterpCoordinate.isvalid + InterpCoordinate.equals + InterpCoordinate.get_value + InterpCoordinate.format_index + InterpCoordinate.slice_index + InterpCoordinate.get_indexer + InterpCoordinate.slice_indexer + InterpCoordinate.decimate + InterpCoordinate.simplify + InterpCoordinate.get_discontinuities + InterpCoordinate.from_array + InterpCoordinate.to_dict +``` + + +### SampledCoordinate + +Constructor + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + SampledCoordinate +``` + +Attributes + + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + SampledCoordinate.tie_values + SampledCoordinate.tie_lengths + SampledCoordinate.tie_indices + SampledCoordinate.sampling_interval + SampledCoordinate.empty + SampledCoordinate.dtype + SampledCoordinate.ndim + SampledCoordinate.shape + SampledCoordinate.indices + SampledCoordinate.values +``` + +Methods + +```{eval-rst} +.. autosummary:: + :toctree: ../_autosummary + + SampledCoordinate.isvalid + SampledCoordinate.equals + SampledCoordinate.get_sampling_interval + SampledCoordinate.get_value + SampledCoordinate.slice_index + SampledCoordinate.get_indexer + SampledCoordinate.slice_indexer + SampledCoordinate.append + SampledCoordinate.decimate + SampledCoordinate.simplify + SampledCoordinate.get_split_indices + SampledCoordinate.from_array + SampledCoordinate.to_dict + SampledCoordinate.from_block +``` \ No newline at end of file diff --git a/docs/api/index.md b/docs/api/index.md index 3f2d665..d16f304 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -5,9 +5,11 @@ xdas atoms -io +coordinates fft +io parallel +picking processing signal synthetics diff --git a/docs/api/xdas.md b/docs/api/xdas.md index 118b079..be198ce 100644 --- a/docs/api/xdas.md +++ b/docs/api/xdas.md @@ -168,161 +168,3 @@ Methods DataSequence.map ``` -### Coordinates - -Constructor - - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - Coordinates -``` - -Methods - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - Coordinates.isdim - Coordinates.get_query - Coordinates.to_index - Coordinates.equals - Coordinates.to_dict - Coordinates.copy - Coordinates.drop_dims - Coordinates.drop_coords -``` - -### Coordinate - -Constructor - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - Coordinate -``` - -Attributes - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - Coordinate.dtype - Coordinate.ndim - Coordinate.shape - Coordinate.values -``` - -Methods - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - Coordinate.to_index - Coordinate.isscalar - Coordinate.isdense - Coordinate.isinterp -``` - - -### ScalarCoordinate - -Constructor - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - ScalarCoordinate -``` - -Methods - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - ScalarCoordinate.isvalid - ScalarCoordinate.equals - ScalarCoordinate.to_index - ScalarCoordinate.to_dict -``` - -### DenseCoordinate - -Constructor - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - DenseCoordinate -``` - -Methods - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - DenseCoordinate.isvalid - DenseCoordinate.index - DenseCoordinate.get_indexer - DenseCoordinate.slice_indexer - DenseCoordinate.to_dict -``` - -### InterpCoordinate - -Constructor - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - InterpCoordinate -``` - -Attributes - - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - InterpCoordinate.tie_indices - InterpCoordinate.tie_values - InterpCoordinate.empty - InterpCoordinate.dtype - InterpCoordinate.ndim - InterpCoordinate.shape - InterpCoordinate.indices - InterpCoordinate.values -``` - -Methods - -```{eval-rst} -.. autosummary:: - :toctree: ../_autosummary - - InterpCoordinate.isvalid - InterpCoordinate.equals - InterpCoordinate.get_value - InterpCoordinate.format_index - InterpCoordinate.slice_index - InterpCoordinate.get_indexer - InterpCoordinate.slice_indexer - InterpCoordinate.decimate - InterpCoordinate.simplify - InterpCoordinate.get_discontinuities - InterpCoordinate.from_array - InterpCoordinate.to_dict -``` \ No newline at end of file From 6c2b21da388cb6d94dcc44fe2971240a3172dd51 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 18:44:29 +0100 Subject: [PATCH 59/63] WIP: Making a coordinates docs tree. --- docs/getting-started.md | 2 +- docs/user-guide/coordinates/index.md | 3 +++ docs/user-guide/{ => coordinates}/interpolated-coordinates.md | 4 ++-- .../{data-structure => data-structures}/dataarray.md | 4 ++-- .../{data-structure => data-structures}/datacollection.md | 0 docs/user-guide/{data-structure => data-structures}/index.md | 2 +- docs/user-guide/index.md | 4 ++-- docs/user-guide/virtual-datasets.md | 2 +- 8 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 docs/user-guide/coordinates/index.md rename docs/user-guide/{ => coordinates}/interpolated-coordinates.md (98%) rename docs/user-guide/{data-structure => data-structures}/dataarray.md (95%) rename docs/user-guide/{data-structure => data-structures}/datacollection.md (100%) rename docs/user-guide/{data-structure => data-structures}/index.md (98%) diff --git a/docs/getting-started.md b/docs/getting-started.md index edff32e..f717052 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -65,7 +65,7 @@ da Xdas only loads the metadata from each file and returns a {py:class}`~xdas.DataArray` object. This object has mainly two attributes. First a `data` attribute that contain the data. Here a {py:class}`~xdas.VirtualStack` object that is a pointer to the different files we opened. Second, a `coords` attribute that contains the metadata related to how the space and the time are sampled. Here both dimensions are labeled using {py:class}`~xdas.InterpCoordinate` objects. Those allow to concisely store the time and space information, including potential gaps and overlaps. See the [](user-guide/interpolated-coordinates) section for more information. -Note that if you want to create a single data collection object for multiple acquisitions (i.e. different instruments or several acquisition with different parameters), you can use the [DataCollection](user-guide/data-structure/datacollection) structure. +Note that if you want to create a single data collection object for multiple acquisitions (i.e. different instruments or several acquisition with different parameters), you can use the [DataCollection](user-guide/data-structures/datacollection) structure. ```{note} For Febus users, converting native files into Xdas NetCDF format generally improves I/O operations and reduce the amount of data by a factor two. This can be done by looping over Febus files and running: `xdas.open_dataarray("path_to_febus_file.h5", engine="febus").to_netcdf("path_to_xdas_file.nc", virtual=False)`. The converted files can then be linked as described above. diff --git a/docs/user-guide/coordinates/index.md b/docs/user-guide/coordinates/index.md new file mode 100644 index 0000000..1277089 --- /dev/null +++ b/docs/user-guide/coordinates/index.md @@ -0,0 +1,3 @@ +# Coordinates + +TODO \ No newline at end of file diff --git a/docs/user-guide/interpolated-coordinates.md b/docs/user-guide/coordinates/interpolated-coordinates.md similarity index 98% rename from docs/user-guide/interpolated-coordinates.md rename to docs/user-guide/coordinates/interpolated-coordinates.md index d4821f8..6ae7aee 100644 --- a/docs/user-guide/interpolated-coordinates.md +++ b/docs/user-guide/coordinates/interpolated-coordinates.md @@ -59,7 +59,7 @@ is to `simplify` the coordinates, increasing the tolerance such that the overlap disappear. ``` -# Gaps and Overlaps +## Gaps and Overlaps Gaps and Overlaps can be easily identified based on the tie point positions, and extracted with: @@ -79,7 +79,7 @@ coord = coord.simplify(tolerance=0.0) coord ``` -# Temporal Coordinates +## Temporal Coordinates The main use of coordinates in *xdas* is to deal with long time series. By default *xdas* uses `"datetime64[us]"` dtype. Microseconds are used because to perform diff --git a/docs/user-guide/data-structure/dataarray.md b/docs/user-guide/data-structures/dataarray.md similarity index 95% rename from docs/user-guide/data-structure/dataarray.md rename to docs/user-guide/data-structures/dataarray.md index 7d7a62e..9e48ff4 100644 --- a/docs/user-guide/data-structure/dataarray.md +++ b/docs/user-guide/data-structures/dataarray.md @@ -16,7 +16,7 @@ os.chdir("../../_data") {py:class}`~xdas.DataArray` is the base class to load and manipulate big datasets to in *xdas*. It is mainly composed of two attributes: - `data`: any N-dimensional array-like object. Compared to *xarray* `xdas.DataArray` are more permissive to the kinds of array-like objects that can be used. In particular, [virtual arrays](../virtual-datasets) can be used. -- `coords`: a dict-like container of coordinates. As opposed to *xarray*, which uses dense arrays to label each point, *xdas* also implements [interpolated coordinates](../interpolated-coordinates) that provides an efficient representation of evenly spaced data (gracefully handling gaps and small sampling variations). +- `coords`: a dict-like container of coordinates. As opposed to *xarray*, which uses dense arrays to label each point, *xdas* also implements [interpolated coordinates](../coordinates/interpolated-coordinates) that provides an efficient representation of evenly spaced data (gracefully handling gaps and small sampling variations). ![](/_static/dataarray.svg) @@ -30,7 +30,7 @@ In the following examples, we use only one `DataArray`, if you have several `Dat ## Creating a DataArray -The user can wrap together an n-dimensional array and some related coordinates. See the related description of how to create coordinates [here](../interpolated-coordinates.md). For example: +The user can wrap together an n-dimensional array and some related coordinates. See the related description of how to create coordinates [here](../coordinates/interpolated-coordinates.md). For example: ```{code-cell} diff --git a/docs/user-guide/data-structure/datacollection.md b/docs/user-guide/data-structures/datacollection.md similarity index 100% rename from docs/user-guide/data-structure/datacollection.md rename to docs/user-guide/data-structures/datacollection.md diff --git a/docs/user-guide/data-structure/index.md b/docs/user-guide/data-structures/index.md similarity index 98% rename from docs/user-guide/data-structure/index.md rename to docs/user-guide/data-structures/index.md index 3745bc9..bc15826 100644 --- a/docs/user-guide/data-structure/index.md +++ b/docs/user-guide/data-structures/index.md @@ -1,4 +1,4 @@ -# Data Structure +# Data Structures Xdas leverages two main data structures. diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md index 4ab4214..b832334 100644 --- a/docs/user-guide/index.md +++ b/docs/user-guide/index.md @@ -3,10 +3,10 @@ ```{toctree} :maxdepth: 1 -data-structure/index +data-structures/index +coordinates/index data-formats virtual-datasets -interpolated-coordinates miniseed convert-displacement atoms diff --git a/docs/user-guide/virtual-datasets.md b/docs/user-guide/virtual-datasets.md index 752b14a..f271d99 100644 --- a/docs/user-guide/virtual-datasets.md +++ b/docs/user-guide/virtual-datasets.md @@ -49,7 +49,7 @@ To handle individual files, multiple files, and virtual datasets, *xdas* offers | {py:func}`xdas.open_mfdatatree` | {py:class}`~xdas.DataCollection` | Open a directory tree of files, organizing data in a data collection. | | {py:func}`xdas.open_datacollection` | {py:class}`~xdas.DataCollection` | Open a (virtual) collection. | -Please refer to the [](data-structure/datacollection.md) section for the functions that return a data collection. +Please refer to the [](data-structures/datacollection.md) section for the functions that return a data collection. ## Linking multi-file datasets From 3f06a9e7bb519e116b45fa90c060bd5d58f8c3a6 Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 19:31:42 +0100 Subject: [PATCH 60/63] Add some doc about coordinate system. --- docs/user-guide/coordinates/index.md | 44 ++++++++++++++++++- .../coordinates/sampled-coordinates.md | 9 ++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 docs/user-guide/coordinates/sampled-coordinates.md diff --git a/docs/user-guide/coordinates/index.md b/docs/user-guide/coordinates/index.md index 1277089..8efd8dc 100644 --- a/docs/user-guide/coordinates/index.md +++ b/docs/user-guide/coordinates/index.md @@ -1,3 +1,45 @@ +--- +file_format: mystnb +kernelspec: + name: python3 +--- + # Coordinates -TODO \ No newline at end of file + +{py:class}`~xdas.DataArray` is the base class in *xdas*. It is mainly composed of a N-dimensional array and of a set of {py:class}`~xdas.Coordinate` objects that are gathered in a {py:class}`~xdas.Coordinates` dict-like object than can be accessed by the `DataArray.coords` attribute. Xdas comme with several flavours of {py:class}`~xdas.Coordinate` objects. + +| Type | Description | `data` | +|:---|:---|:---:| +| {py:class}`~xdas.coordinates.ScalarCoordinate` | Used to label 0D dimensions | `{"value": any}` | +| {py:class}`~xdas.coordinates.DefaultCoordinate` | Each value is equal to its index | `{"size": int}` | +| {py:class}`~xdas.coordinates.DenseCoordinate` | Each index is mapped to a given value | `array-like[any]` | +| {py:class}`~xdas.coordinates.InterpCoordinate` | Values are interpolated linearly between tie points | `{"tie_indices": array-like[int], "tie_values": array-like[any]}` | +| {py:class}`~xdas.coordinates.SampledCoordinate` | Values are given as a multiple of a fixed sampling interval and several start values | `{"tie_values": array-like[any], "tie_indices": array-like[int], "sampling_interval": any}` | + +## Per type information + +```{toctree} +:maxdepth: 1 + +interpolated-coordinates +sampled-coordinates +``` + + \ No newline at end of file diff --git a/docs/user-guide/coordinates/sampled-coordinates.md b/docs/user-guide/coordinates/sampled-coordinates.md new file mode 100644 index 0000000..e02500f --- /dev/null +++ b/docs/user-guide/coordinates/sampled-coordinates.md @@ -0,0 +1,9 @@ +--- +file_format: mystnb +kernelspec: + name: python3 +--- + +# Sampled Coordinates + +TODO \ No newline at end of file From c3e6798fc0b6e53b688eaff1080c0e325e08f72e Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 19:34:28 +0100 Subject: [PATCH 61/63] Update coordinate documentation to include 'name' column in type descriptions --- docs/user-guide/coordinates/index.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/user-guide/coordinates/index.md b/docs/user-guide/coordinates/index.md index 8efd8dc..5968924 100644 --- a/docs/user-guide/coordinates/index.md +++ b/docs/user-guide/coordinates/index.md @@ -9,13 +9,13 @@ kernelspec: {py:class}`~xdas.DataArray` is the base class in *xdas*. It is mainly composed of a N-dimensional array and of a set of {py:class}`~xdas.Coordinate` objects that are gathered in a {py:class}`~xdas.Coordinates` dict-like object than can be accessed by the `DataArray.coords` attribute. Xdas comme with several flavours of {py:class}`~xdas.Coordinate` objects. -| Type | Description | `data` | -|:---|:---|:---:| -| {py:class}`~xdas.coordinates.ScalarCoordinate` | Used to label 0D dimensions | `{"value": any}` | -| {py:class}`~xdas.coordinates.DefaultCoordinate` | Each value is equal to its index | `{"size": int}` | -| {py:class}`~xdas.coordinates.DenseCoordinate` | Each index is mapped to a given value | `array-like[any]` | -| {py:class}`~xdas.coordinates.InterpCoordinate` | Values are interpolated linearly between tie points | `{"tie_indices": array-like[int], "tie_values": array-like[any]}` | -| {py:class}`~xdas.coordinates.SampledCoordinate` | Values are given as a multiple of a fixed sampling interval and several start values | `{"tie_values": array-like[any], "tie_indices": array-like[int], "sampling_interval": any}` | +| Type | Description | `name` | `data` | +|:---|:---|:---:|:---:| +| {py:class}`~xdas.coordinates.ScalarCoordinate` | Used to label 0D dimensions | `scalar` | `{"value": any}` | +| {py:class}`~xdas.coordinates.DefaultCoordinate` | Each value is equal to its index | `default` | `{"size": int}` | +| {py:class}`~xdas.coordinates.DenseCoordinate` | Each index is mapped to a given value | `dense` | `array-like[any]` | +| {py:class}`~xdas.coordinates.InterpCoordinate` | Values are interpolated linearly between tie points | `interpolated` | `{"tie_indices": array-like[int], "tie_values": array-like[any]}` | +| {py:class}`~xdas.coordinates.SampledCoordinate` | Values are given as a multiple of a fixed sampling interval and several start values | `sampled` | `{"tie_values": array-like[any], "tie_indices": array-like[int], "sampling_interval": any}` | ## Per type information From 556ee949ea5dc1214dad620697232909f45c84fb Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Fri, 16 Jan 2026 19:37:09 +0100 Subject: [PATCH 62/63] Fix typo in coordinates documentation and clarify information location --- docs/user-guide/coordinates/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/user-guide/coordinates/index.md b/docs/user-guide/coordinates/index.md index 5968924..62cb7a4 100644 --- a/docs/user-guide/coordinates/index.md +++ b/docs/user-guide/coordinates/index.md @@ -17,6 +17,8 @@ kernelspec: | {py:class}`~xdas.coordinates.InterpCoordinate` | Values are interpolated linearly between tie points | `interpolated` | `{"tie_indices": array-like[int], "tie_values": array-like[any]}` | | {py:class}`~xdas.coordinates.SampledCoordinate` | Values are given as a multiple of a fixed sampling interval and several start values | `sampled` | `{"tie_values": array-like[any], "tie_indices": array-like[int], "sampling_interval": any}` | +In the current state fo the documentation most of the coordinate information can be found in the [Interpolated Coordinate](interpolated-coordinates) page. + ## Per type information ```{toctree} From 0ebb613c59f44dcb9c5a515d8c1900fa935996cb Mon Sep 17 00:00:00 2001 From: Alister Trabattoni Date: Tue, 20 Jan 2026 18:33:58 +0100 Subject: [PATCH 63/63] Minor formating. --- xdas/__init__.py | 6 +----- xdas/coordinates/__init__.py | 6 +----- xdas/core/dataarray.py | 3 ++- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/xdas/__init__.py b/xdas/__init__.py index 9326541..e98f9d1 100644 --- a/xdas/__init__.py +++ b/xdas/__init__.py @@ -10,11 +10,7 @@ synthetics, virtual, ) -from .coordinates import ( - Coordinate, - Coordinates, - get_sampling_interval, -) +from .coordinates import Coordinate, Coordinates, get_sampling_interval from .core import dataarray, datacollection, methods, numpy, routines from .core.dataarray import DataArray from .core.datacollection import DataCollection, DataMapping, DataSequence diff --git a/xdas/coordinates/__init__.py b/xdas/coordinates/__init__.py index 09f1735..f7eaeae 100644 --- a/xdas/coordinates/__init__.py +++ b/xdas/coordinates/__init__.py @@ -1,8 +1,4 @@ -from .core import ( - Coordinate, - Coordinates, - get_sampling_interval, -) +from .core import Coordinate, Coordinates, get_sampling_interval from .default import DefaultCoordinate from .dense import DenseCoordinate from .interp import InterpCoordinate diff --git a/xdas/core/dataarray.py b/xdas/core/dataarray.py index c8efebc..5100cee 100644 --- a/xdas/core/dataarray.py +++ b/xdas/core/dataarray.py @@ -129,7 +129,8 @@ def __array__(self, dtype=None): return self.data.__array__(dtype) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - from .routines import broadcast_coords, broadcast_to # TODO: circular import + from .routines import broadcast_coords # TODO: circular import + from .routines import broadcast_to if not method == "__call__": return NotImplemented