From 4c2af26c693776af2d89bcdb4b655345cbf4fa4b Mon Sep 17 00:00:00 2001 From: larsevj Date: Tue, 23 Dec 2025 11:55:44 +0100 Subject: [PATCH 1/2] Add initial pandas 3 support --- pyproject.toml | 2 +- src/subscript/fmuobs/parsers.py | 21 ++++++++++----------- src/subscript/fmuobs/writers.py | 12 ++++-------- tests/test_check_swatinit.py | 4 +++- tests/test_csv2ofmvol.py | 4 +++- tests/test_fmuobs.py | 1 + tests/test_fmuobs_parsers.py | 2 ++ tests/test_fmuobs_writers.py | 1 + tests/test_ofmvol2csv.py | 8 ++++++-- 9 files changed, 31 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d86a3f5f3..6679daf6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "matplotlib", "numpy", "opm>=2023.04", - "pandas", + "pandas >= 2", "pydantic", "pyscal", "pyyaml", diff --git a/src/subscript/fmuobs/parsers.py b/src/subscript/fmuobs/parsers.py index 1321db43b..fd7044f56 100644 --- a/src/subscript/fmuobs/parsers.py +++ b/src/subscript/fmuobs/parsers.py @@ -5,7 +5,6 @@ import re from pathlib import Path -import numpy as np import pandas as pd from subscript import getLogger @@ -431,20 +430,20 @@ def compute_date_from_days( to this starttime, and converted to DATE. Returns: - pd.DataFrame. DATE column is always of type datetime64 + pd.DataFrame. DATE column is always datetime-like + (datetime64 unit depends on pandas) """ assert isinstance(dframe, pd.DataFrame) - if starttime and "DAYS" in dframe: - if "DATE" not in dframe: - dframe["DATE"] = np.nan - start = pd.to_datetime(starttime) - date_needed_rows = ~dframe["DAYS"].isna() & dframe["DATE"].isna() - dframe["DATE"] = pd.to_datetime(dframe["DATE"]) - dframe.loc[date_needed_rows, "DATE"] = start + pd.to_timedelta( - dframe.loc[date_needed_rows, "DAYS"], "d" - ) + if "DATE" in dframe: dframe["DATE"] = pd.to_datetime(dframe["DATE"]) + + if starttime and "DAYS" in dframe: + computed = pd.to_datetime(starttime) + pd.to_timedelta(dframe["DAYS"], unit="D") + dframe["DATE"] = ( + dframe["DATE"].fillna(computed) if "DATE" in dframe else computed + ) + return dframe diff --git a/src/subscript/fmuobs/writers.py b/src/subscript/fmuobs/writers.py index c3d7d7c0a..a52f71a1d 100644 --- a/src/subscript/fmuobs/writers.py +++ b/src/subscript/fmuobs/writers.py @@ -292,14 +292,10 @@ def convert_dframe_date_to_str(dframe: pd.DataFrame) -> pd.DataFrame: pd.DataFrame: DATE as a string type """ if "DATE" in dframe: - with pd.option_context("future.no_silent_downcasting", True): - dframe = dframe.copy() - dframe["DATE"] = ( - dframe["DATE"] - .astype(str) - .replace(["NaT", "NaN", "nan"], np.nan) - .infer_objects(copy=False) - ) + dframe = dframe.copy() + dframe["DATE"] = ( + dframe["DATE"].astype(str).replace(["NaT", "NaN", "nan"], np.nan) + ) return dframe diff --git a/tests/test_check_swatinit.py b/tests/test_check_swatinit.py index 8cef86cf4..3e5d6d6cd 100644 --- a/tests/test_check_swatinit.py +++ b/tests/test_check_swatinit.py @@ -558,7 +558,9 @@ def test_eqlnum2(tmp_path, mocker): def test_reorder_dframe_for_nonnans(inputrows, expected): """Test that rows with less NaNs will be prioritized through the reorder function""" pd.testing.assert_frame_equal( - reorder_dframe_for_nonnans(pd.DataFrame(inputrows)), pd.DataFrame(expected) + reorder_dframe_for_nonnans(pd.DataFrame(inputrows)), + pd.DataFrame(expected), + check_column_type=False, ) diff --git a/tests/test_csv2ofmvol.py b/tests/test_csv2ofmvol.py index cfdb0dd9e..d9744641d 100644 --- a/tests/test_csv2ofmvol.py +++ b/tests/test_csv2ofmvol.py @@ -279,7 +279,9 @@ def test_df2vol(dframe, expected_lines): else: # (bogus columns in dframe must be ignored) pd.testing.assert_frame_equal( - dframe[backagain_df.columns].fillna(value=0.0), backagain_df + dframe[backagain_df.columns].fillna(value=0.0), + backagain_df, + check_index_type=False, ) diff --git a/tests/test_fmuobs.py b/tests/test_fmuobs.py index 8a4125e72..c8ddf5bc5 100644 --- a/tests/test_fmuobs.py +++ b/tests/test_fmuobs.py @@ -198,6 +198,7 @@ def test_roundtrip_yaml(filename, readonly_testdata_dir): yaml_roundtrip_dframe.sort_index(axis="columns").sort_values("LABEL"), dframe.sort_index(axis="columns").sort_values("LABEL"), check_like=True, + check_dtype=False, ) diff --git a/tests/test_fmuobs_parsers.py b/tests/test_fmuobs_parsers.py index fc96f1e63..b824dc5de 100644 --- a/tests/test_fmuobs_parsers.py +++ b/tests/test_fmuobs_parsers.py @@ -508,11 +508,13 @@ def test_ertobs2df_starttime(string, expected): pd.testing.assert_frame_equal( ertobs2df(string, starttime="2020-01-01").sort_index(axis=1), expected.sort_index(axis=1), + check_dtype=False, ) # Test again with datetime object passed, not string: pd.testing.assert_frame_equal( ertobs2df(string, starttime=datetime.date(2020, 1, 1)).sort_index(axis=1), expected.sort_index(axis=1), + check_dtype=False, ) diff --git a/tests/test_fmuobs_writers.py b/tests/test_fmuobs_writers.py index 1aa21bfc5..a61e99457 100644 --- a/tests/test_fmuobs_writers.py +++ b/tests/test_fmuobs_writers.py @@ -533,6 +533,7 @@ def test_convert_dframe_date_to_str(dframe, expected_dframe): pd.testing.assert_frame_equal( convert_dframe_date_to_str(dframe), expected_dframe, + check_dtype=False, ) diff --git a/tests/test_ofmvol2csv.py b/tests/test_ofmvol2csv.py index 15f8c1364..3581a8943 100644 --- a/tests/test_ofmvol2csv.py +++ b/tests/test_ofmvol2csv.py @@ -270,7 +270,7 @@ def test_parse_well(inputlines, expected): inputlines = ofmvol2csv.cleanse_ofm_lines(inputlines) colnames = ofmvol2csv.extract_columnnames(inputlines) dframe = ofmvol2csv.parse_well(inputlines[1:], colnames) - pd.testing.assert_frame_equal(dframe, expected) + pd.testing.assert_frame_equal(dframe, expected, check_index_type=False) @pytest.mark.parametrize( @@ -362,7 +362,11 @@ def test_process_volstr(inputlines, expected): expected["DATE"] = pd.to_datetime(expected["DATE"]) expected = expected.set_index(["WELL", "DATE"]) dframe = ofmvol2csv.process_volstr("\n".join(inputlines)) - pd.testing.assert_frame_equal(dframe, expected) + pd.testing.assert_frame_equal( + dframe, + expected, + check_index_type=False, + ) @pytest.mark.parametrize( From b19f02eaad77e09d06292a1fe8c2a78b4ad59a7a Mon Sep 17 00:00:00 2001 From: larsevj Date: Thu, 15 Jan 2026 15:54:27 +0100 Subject: [PATCH 2/2] Test with both pandas 2 and 3 --- .github/workflows/subscript.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/subscript.yml b/.github/workflows/subscript.yml index 627725eba..a13711abf 100644 --- a/.github/workflows/subscript.yml +++ b/.github/workflows/subscript.yml @@ -26,9 +26,17 @@ jobs: strategy: matrix: python-version: ["3.11", "3.12", "3.13"] + pandas-version: [""] + include: + - python-version: "3.13" + pandas-version: ">=2,<3" + - python-version: "3.13" + pandas-version: ">=3.0.0rc2" + exclude: + - python-version: "3.13" + pandas-version: "" steps: - - name: Install Ubuntu dependencies run: | sudo apt-get update @@ -63,6 +71,10 @@ jobs: run: | uv pip install ".[tests, docs]" + - name: Pin pandas version + if: matrix.pandas-version + run: uv pip install "pandas${{ matrix.pandas-version }}" + - name: Log OPM-Flow version run: | flow --version