Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions docs/user-guide/overview.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,6 @@ If not, the [`match()`](`modelskill.match`) function can be used to match the ob

If the observations and model results are not in the same data source (e.g. dfs0 file),
they will need to be defined and then matched in space and time with the `match()` function.
In simple cases, observations and model results can be defined directly in the `match()` function:

```{python}
import modelskill as ms
cmp = ms.match("../data/obs.dfs0", "../data/model.dfs0",
obs_item="obs_WL", mod_item="WL",
gtype='point')
```

But in most cases, the observations and model results will need to be defined separately first.


### Define observations
Expand Down
71 changes: 25 additions & 46 deletions notebooks/Simple_timeseries_compare.ipynb

Large diffs are not rendered by default.

136 changes: 25 additions & 111 deletions src/modelskill/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,27 @@

from modelskill.model.point import PointModelResult

from . import Quantity, __version__, model_result
from . import Quantity, __version__
from .comparison import Comparer, ComparerCollection
from .model._base import Alignable
from .model.dfsu import DfsuModelResult
from .model.dummy import DummyModelResult
from .model.grid import GridModelResult
from .model.track import TrackModelResult
from .obs import Observation, PointObservation, TrackObservation, observation
from .obs import Observation, PointObservation, TrackObservation
from .timeseries import TimeSeries
from .types import Period

TimeDeltaTypes = Union[float, int, np.timedelta64, pd.Timedelta, timedelta]
IdxOrNameTypes = Optional[Union[int, str]]
GeometryTypes = Optional[Literal["point", "track", "unstructured", "grid"]]
MRTypes = Union[
PointModelResult,
GridModelResult,
DfsuModelResult,
TrackModelResult,
DummyModelResult,
]
MRInputType = Union[
str,
Path,
Expand All @@ -48,11 +55,9 @@
xr.Dataset,
xr.DataArray,
TimeSeries,
GridModelResult,
DfsuModelResult,
TrackModelResult,
DummyModelResult,
MRTypes,
]
ObsTypes = Union[PointObservation, TrackObservation]
ObsInputType = Union[
str,
Path,
Expand All @@ -61,7 +66,7 @@
mikeio.Dfs0,
pd.DataFrame,
pd.Series,
Observation,
ObsTypes,
]

T = TypeVar("T", bound="TimeSeries")
Expand Down Expand Up @@ -164,12 +169,9 @@ def from_matched(

@overload
def match(
obs: Observation,
mod: Union[MRInputType, Sequence[MRInputType]],
obs: ObsTypes,
mod: MRTypes | Sequence[MRTypes],
*,
obs_item: Optional[IdxOrNameTypes] = None,
mod_item: Optional[IdxOrNameTypes] = None,
gtype: Optional[GeometryTypes] = None,
max_model_gap: Optional[float] = None,
spatial_method: Optional[str] = None,
obs_no_overlap: Literal["ignore", "error", "warn"] = "error",
Expand All @@ -178,12 +180,9 @@ def match(

@overload
def match(
obs: Iterable[Observation],
mod: Union[MRInputType, Sequence[MRInputType]],
obs: Iterable[ObsTypes],
mod: MRTypes | Sequence[MRTypes],
*,
obs_item: Optional[IdxOrNameTypes] = None,
mod_item: Optional[IdxOrNameTypes] = None,
gtype: Optional[GeometryTypes] = None,
max_model_gap: Optional[float] = None,
spatial_method: Optional[str] = None,
obs_no_overlap: Literal["ignore", "error", "warn"] = "error",
Expand All @@ -194,9 +193,6 @@ def match(
obs,
mod,
*,
obs_item=None,
mod_item=None,
gtype=None,
max_model_gap=None,
spatial_method: Optional[str] = None,
obs_no_overlap: Literal["ignore", "error", "warn"] = "error",
Expand All @@ -212,17 +208,10 @@ def match(

Parameters
----------
obs : (str, Path, pd.DataFrame, Observation, Sequence[Observation])
obs : (Observation, Sequence[Observation])
Observation(s) to be compared
mod : (str, Path, pd.DataFrame, ModelResult, Sequence[ModelResult])
mod : (ModelResult, Sequence[ModelResult])
Model result(s) to be compared
obs_item : int or str, optional
observation item if obs is a file/dataframe, by default None
mod_item : (int, str), optional
model item if mod is a file/dataframe, by default None
gtype : (str, optional)
Geometry type of the model result (if mod is a file/dataframe).
If not specified, it will be guessed.
max_model_gap : (float, optional)
Maximum time gap (s) in the model result (e.g. for event-based
model results), by default None
Expand Down Expand Up @@ -251,9 +240,6 @@ def match(
return _match_single_obs(
obs,
mod,
obs_item=obs_item,
mod_item=mod_item,
gtype=gtype,
max_model_gap=max_model_gap,
spatial_method=spatial_method,
obs_no_overlap=obs_no_overlap,
Expand Down Expand Up @@ -284,9 +270,6 @@ def match(
_match_single_obs(
o,
mod,
obs_item=obs_item,
mod_item=mod_item,
gtype=gtype,
max_model_gap=max_model_gap,
spatial_method=spatial_method,
obs_no_overlap=obs_no_overlap,
Expand All @@ -300,46 +283,40 @@ def match(


def _match_single_obs(
obs: ObsInputType,
mod: Union[MRInputType, Sequence[MRInputType]],
obs: ObsTypes,
mod: MRTypes | Sequence[MRTypes],
*,
obs_item: Optional[int | str] = None,
mod_item: Optional[int | str] = None,
gtype: Optional[GeometryTypes] = None,
max_model_gap: Optional[float] = None,
spatial_method: Optional[str] = None,
obs_no_overlap: Literal["ignore", "error", "warn"] = "error",
) -> Optional[Comparer]:
observation = _parse_single_obs(obs, obs_item, gtype=gtype)

if isinstance(mod, get_args(MRInputType)):
models: list = [mod]
else:
models = mod # type: ignore

model_results = [_parse_single_model(m, item=mod_item, gtype=gtype) for m in models]
names = [m.name for m in model_results]
names = [m.name for m in models]
if len(names) != len(set(names)):
raise ValueError(f"Duplicate model names found: {names}")

raw_mod_data = {
m.name: (
m.extract(observation, spatial_method=spatial_method)
m.extract(obs, spatial_method=spatial_method)
if isinstance(m, (DfsuModelResult, GridModelResult, DummyModelResult))
else m
)
for m in model_results
for m in models
}

matched_data = match_space_time(
observation=observation,
observation=obs,
raw_mod_data=raw_mod_data,
max_model_gap=max_model_gap,
obs_no_overlap=obs_no_overlap,
)
if matched_data is None:
return None
matched_data.attrs["weight"] = observation.weight
matched_data.attrs["weight"] = obs.weight

# TODO where does this line belong?
matched_data.attrs["modelskill_version"] = __version__
Expand Down Expand Up @@ -420,66 +397,3 @@ def mo_kind(k: str) -> bool:
data = data.dropna(dim="time", subset=mo_cols)

return data


def _parse_single_obs(
obs: ObsInputType,
obs_item: Optional[int | str],
gtype: Optional[GeometryTypes],
) -> PointObservation | TrackObservation:
if isinstance(obs, (PointObservation, TrackObservation)):
if obs_item is not None:
raise ValueError(
"obs_item argument not allowed if obs is an modelskill.Observation type"
)
return obs
else:
# observation factory can only handle track and point
return observation(obs, item=obs_item, gtype=gtype) # type: ignore


def _parse_single_model(
mod: MRInputType,
item: Optional[IdxOrNameTypes] = None,
gtype: Optional[GeometryTypes] = None,
) -> (
PointModelResult
| TrackModelResult
| GridModelResult
| DfsuModelResult
| DummyModelResult
):
if isinstance(
mod,
(
str,
Path,
pd.DataFrame,
xr.Dataset,
xr.DataArray,
mikeio.Dfs0,
mikeio.Dataset,
mikeio.DataArray,
mikeio.dfsu.Dfsu2DH,
),
):
try:
return model_result(mod, item=item, gtype=gtype)
except ValueError as e:
raise ValueError(
f"Could not compare. Unknown model result type {type(mod)}. {str(e)}"
)
else:
if item is not None:
raise ValueError("item argument not allowed if mod is a ModelResult type")
assert isinstance(
mod,
(
PointModelResult,
TrackModelResult,
GridModelResult,
DfsuModelResult,
DummyModelResult,
),
)
return mod
40 changes: 16 additions & 24 deletions tests/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ def test_match_dataarray(o1, o3):

# Using a mikeio.DataArray instead of a Dfs file, makes it possible to select a subset of data

cc = ms.match([o1, o3], da)
cc = ms.match([o1, o3], ms.DfsuModelResult(da))
assert cc.n_models == 1
assert cc["c2"].n_points == 41

da2 = mikeio.read(fn, area=[0, 2, 52, 54], time=slice("2017-10-28 00:00", None))[
0
] # Spatio/temporal subset

cc2 = ms.match([o1, o3], da2)
cc2 = ms.match([o1, o3], ms.DfsuModelResult(da2))
assert cc2["c2"].n_points == 19


Expand Down Expand Up @@ -231,22 +231,21 @@ def test_small_multi_model_shifted_time_match():
# observation has four timesteps, but only three of them are in the Simple model and three in the NotSimple model
# the number of overlapping points for all three datasets are 2, but three if we look at the models individually

with pytest.warns(UserWarning):
cmp1 = ms.match(obs=obs, mod=mod)
cmp1 = ms.match(obs=obs, mod=mod)
assert cmp1.n_points == 3
cmp1 = ms.match(obs=ms.PointObservation(obs), mod=ms.PointModelResult(mod))
cmp1 = ms.match(obs=ms.PointObservation(obs), mod=ms.PointModelResult(mod))
assert cmp1.n_points == 3

cmp2 = ms.match(obs=obs, mod=mod2)
assert cmp2.n_points == 3
cmp2 = ms.match(obs=ms.PointObservation(obs), mod=ms.PointModelResult(mod2))
assert cmp2.n_points == 3

mcmp = ms.match(
obs=obs,
mod=[
ms.PointModelResult(mod, name="foo"),
ms.PointModelResult(mod2, name="bar"),
],
)
assert mcmp.n_points == 2
mcmp = ms.match(
obs=ms.PointObservation(obs),
mod=[
ms.PointModelResult(mod, name="foo"),
ms.PointModelResult(mod2, name="bar"),
],
)
assert mcmp.n_points == 2


def test_matched_data_single_model():
Expand Down Expand Up @@ -400,7 +399,7 @@ def test_save_comparercollection(o1, o3, tmp_path):
fn = "tests/testdata/SW/HKZN_local_2017_DutchCoast.dfsu"
da = mikeio.read(fn, time=slice("2017-10-28 00:00", None))[0]

cc = ms.match([o1, o3], da)
cc = ms.match([o1, o3], ms.DfsuModelResult(da))

fn = tmp_path / "cc.msk"
cc.save(fn)
Expand All @@ -427,13 +426,6 @@ def test_wind_directions():
assert df.loc["obs", "c_rmse"] == pytest.approx(1.322875655532)


def test_specifying_mod_item_not_allowed_twice(o1, mr1):
# item was already specified in the construction of the DfsuModelResult

with pytest.raises(ValueError, match="item"):
ms.match(obs=o1, mod=mr1, mod_item=1)


def test_obs_and_mod_can_not_have_same_aux_item_names():
obs_df = pd.DataFrame(
{"wl": [1.0, 2.0, 3.0], "wind_speed": [1.0, 2.0, 3.0]},
Expand Down
Loading