From 8c96c3ebccd3e9dcd076508603d1de61cd8b9960 Mon Sep 17 00:00:00 2001 From: Yossi Mosbacher Date: Sat, 28 Aug 2021 21:45:10 +0300 Subject: [PATCH 1/2] cached interval index for chunks --- strax/chunk.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/strax/chunk.py b/strax/chunk.py index bb6bf84a3..d7680c6ff 100644 --- a/strax/chunk.py +++ b/strax/chunk.py @@ -1,6 +1,7 @@ import typing as ty import numpy as np +import pandas as pd import numba import strax @@ -17,7 +18,7 @@ class Chunk: data_type: str data_kind: str dtype: np.dtype - + # run_id is not superfluous to track: # this could change during the run in superruns (in the future) run_id: str @@ -27,6 +28,7 @@ class Chunk: data: np.ndarray target_size_mb: int + _index: pd.IntervalIndex = None def __init__(self, *, @@ -112,6 +114,12 @@ def nbytes(self): @property def duration(self): return self.end - self.start + + @property + def index(self): + if self._index is None: + self._index = pd.IntervalIndex.from_arrays(self.data['time'], strax.endtime(self.data)) + return self._index @property def is_superrun(self): From 471ebc3719251404692516273de0f8929e00b82b Mon Sep 17 00:00:00 2001 From: Yossi Mosbacher Date: Sat, 28 Aug 2021 21:58:35 +0300 Subject: [PATCH 2/2] overlaps method for chunks --- strax/chunk.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/strax/chunk.py b/strax/chunk.py index d7680c6ff..9a4f40e37 100644 --- a/strax/chunk.py +++ b/strax/chunk.py @@ -300,6 +300,28 @@ def concatenate(cls, chunks): data=np.concatenate([c.data for c in chunks]), target_size_mb=max([c.target_size_mb for c in chunks])) + def overlaps(self, start,end=None): + """ + Return data that overlaps the interval (start, end] + + Args: + start ([type]): interval start time or pd.Interval + end ([type], optional): interval end time. Defaults to None. + + Raises: + ValueError: if end is not given and start is not an interval. + + Returns: + [type]: array or overlapping data + """ + + if isinstance(start, pd.Interval): + dt = start + elif end is not None: + dt = pd.Interval(start,end) + else: + raise ValueError("Must supply interval of start and end times.") + return self.data[self.index.overlaps(dt)] @export def continuity_check(chunk_iter):