diff --git a/strax/chunk.py b/strax/chunk.py index bb6bf84a3..9a4f40e37 100644 --- a/strax/chunk.py +++ b/strax/chunk.py @@ -1,6 +1,7 @@ import typing as ty import numpy as np +import pandas as pd import numba import strax @@ -17,7 +18,7 @@ class Chunk: data_type: str data_kind: str dtype: np.dtype - + # run_id is not superfluous to track: # this could change during the run in superruns (in the future) run_id: str @@ -27,6 +28,7 @@ class Chunk: data: np.ndarray target_size_mb: int + _index: pd.IntervalIndex = None def __init__(self, *, @@ -112,6 +114,12 @@ def nbytes(self): @property def duration(self): return self.end - self.start + + @property + def index(self): + if self._index is None: + self._index = pd.IntervalIndex.from_arrays(self.data['time'], strax.endtime(self.data)) + return self._index @property def is_superrun(self): @@ -292,6 +300,28 @@ def concatenate(cls, chunks): data=np.concatenate([c.data for c in chunks]), target_size_mb=max([c.target_size_mb for c in chunks])) + def overlaps(self, start,end=None): + """ + Return data that overlaps the interval (start, end] + + Args: + start ([type]): interval start time or pd.Interval + end ([type], optional): interval end time. Defaults to None. + + Raises: + ValueError: if end is not given and start is not an interval. + + Returns: + [type]: array or overlapping data + """ + + if isinstance(start, pd.Interval): + dt = start + elif end is not None: + dt = pd.Interval(start,end) + else: + raise ValueError("Must supply interval of start and end times.") + return self.data[self.index.overlaps(dt)] @export def continuity_check(chunk_iter):