NatLabRockies · martin-springer · Nov 4, 2025 · Nov 4, 2025 · Jan 30, 2026 · Jan 30, 2026
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
@@ -133,8 +133,6 @@ Normalization
    normalize_with_expected_power
    normalize_with_pvwatts
    pvwatts_dc_power
-   delta_index
-   check_series_frequency
 
 
 Aggregation

diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst
@@ -2,8 +2,13 @@ Requirements
 ------------
 * Removed pvlib version restrictions in setup.py. Previously "pvlib >= 0.11.0, <0.12.0", now "pvlib".
 * Updated pvlib version in requirements.txt from 0.11.0 to 0.14.0
+<<<<<<< fix-numpy-and-pandas-compatibility
+* Removed pandas upper version restriction in setup.py. Now "pandas >= 1.4.4" to support pandas 3.0.
+* Removed numpy upper version restriction in setup.py. Now "numpy >= 1.22.4" to support numpy 2.x.
+=======
 * Added pandas upper version restriction in setup.py. Now "pandas >= 1.4.4, <3.0.0".
 * Added numpy upper version restriction in setup.py. Now "numpy >= 1.22.4, <2.3.0".
+>>>>>>> development
 * Updated pandas version in requirements.txt from 2.2.2 to 2.2.3 for python 3.13 compativility.
 * Updated scipy version in requirements.txt from 1.13.1 to 1.14.1 for python 3.13 compatibility.
 * Updated h5py version in requirements.txt from 3.11.0 to 3.12.0 for python 3.13 compatibility.
@@ -29,6 +34,33 @@ Requirements
 * Updated pytz version in requirements.txt from 2024.1 to 2025.2 for python 3.13 compatibility.
 
 
+Deprecations
+------------
+* Removed deprecated ``normalization.delta_index`` function (deprecated in v2.0.0).
+  The private ``_delta_index`` helper remains available for internal use.
+* Removed deprecated ``normalization.check_series_frequency`` function (deprecated in v2.0.0).
+  The private ``_check_series_frequency`` helper remains available for internal use.
+
+
+Bug Fixes
+---------
+* Fixed pandas 3.0 compatibility in ``normalization.py`` by using ``.total_seconds()``
+  instead of ``.view('int64')`` with hardcoded nanosecond divisors. Pandas 3.0 changed
+  the default datetime resolution from nanoseconds (``datetime64[ns]``) to microseconds
+  (``datetime64[us]``). Affected functions: ``_delta_index``, ``_t_step_nanoseconds``,
+  ``_aggregate``, ``_interpolate_series``.
+* Fixed datetime resolution preservation in ``normalization.interpolate()`` to ensure
+  output maintains the same resolution as input (e.g., ``datetime64[us]``).
+* Fixed numpy 2.x compatibility in ``soiling.py`` by using ``.item()`` and explicit
+  indexing to extract scalar values from numpy arrays, as implicit array-to-scalar
+  conversion is deprecated.
+* Fixed xgboost 3.x compatibility in ``filtering.xgboost_clip_filter()`` by using
+  ``xgb.DMatrix`` with explicit feature names for model prediction.
+* Fixed pandas 4.0 deprecation warnings by changing lowercase ``'d'`` to uppercase
+  ``'D'`` in Timedelta strings and using ``axis=`` keyword argument for DataFrame
+  aggregation methods.
+
+
 Enhancements
 ------------
 * Modified ``TrendAnalysis._filter()`` to allow ``clip_filter`` to use ``pv_energy``

diff --git a/rdtools/degradation.py b/rdtools/degradation.py
@@ -261,7 +261,7 @@ def degradation_year_on_year(energy_normalized, recenter=True,
     # Auto center
     if recenter:
         start = energy_normalized.index[0]
-        oneyear = start + pd.Timedelta('364d')
+        oneyear = start + pd.Timedelta('364D')
         renorm = utilities.robust_median(energy_normalized[start:oneyear])
     else:
         renorm = 1.0
@@ -280,7 +280,7 @@ def degradation_year_on_year(energy_normalized, recenter=True,
                        tolerance=pd.Timedelta('8D')
                        )
 
-    df['time_diff_years'] = (df.dt - df.dt_right) / pd.Timedelta('365d')
+    df['time_diff_years'] = (df.dt - df.dt_right) / pd.Timedelta('365D')
     df['yoy'] = 100.0 * (df.energy - df.energy_right) / (df.time_diff_years)
     df.index = df.dt
 

diff --git a/rdtools/filtering.py b/rdtools/filtering.py
@@ -846,30 +846,31 @@ def xgboost_clip_filter(power_ac, mounting_type="fixed"):
         power_ac_df["mounting_config"] == "fixed", "mounting_config_bool"
     ] = 0
     # Subset the dataframe to only include model inputs
-    power_ac_df = power_ac_df[
-        [
-            "first_order_derivative_backward",
-            "first_order_derivative_forward",
-            "first_order_derivative_backward_rolling_avg",
-            "first_order_derivative_forward_rolling_avg",
-            "sampling_frequency",
-            "mounting_config_bool",
-            "scaled_value",
-            "rolling_average",
-            "daily_max",
-            "percent_daily_max",
-            "deriv_max",
-            "deriv_backward_rolling_stdev",
-            "deriv_backward_rolling_mean",
-            "deriv_backward_rolling_median",
-            "deriv_backward_rolling_min",
-            "deriv_backward_rolling_max",
-        ]
-    ].dropna()
+    feature_cols = [
+        "first_order_derivative_backward",
+        "first_order_derivative_forward",
+        "first_order_derivative_backward_rolling_avg",
+        "first_order_derivative_forward_rolling_avg",
+        "sampling_frequency",
+        "mounting_config_bool",
+        "scaled_value",
+        "rolling_average",
+        "daily_max",
+        "percent_daily_max",
+        "deriv_max",
+        "deriv_backward_rolling_stdev",
+        "deriv_backward_rolling_mean",
+        "deriv_backward_rolling_median",
+        "deriv_backward_rolling_min",
+        "deriv_backward_rolling_max",
+    ]
+    power_ac_df = power_ac_df[feature_cols].dropna()
     # Run the power_ac_df dataframe through the XGBoost ML model,
-    # and return boolean outputs
+    # and return boolean outputs. Use DMatrix with explicit feature names
+    # for xgboost 3.x compatibility.
+    dmatrix = xgb.DMatrix(power_ac_df, feature_names=feature_cols)
     xgb_predictions = pd.Series(
-        xgboost_clipping_model.predict(power_ac_df).astype(bool)
+        (xgboost_clipping_model.get_booster().predict(dmatrix) > 0.5).astype(bool)
     )
     # Add datetime as an index
     xgb_predictions.index = power_ac_df.index

diff --git a/rdtools/normalization.py b/rdtools/normalization.py
@@ -4,7 +4,6 @@
 import numpy as np
 from scipy.optimize import minimize
 import warnings
-from rdtools._deprecation import deprecated
 
 
 class ConvergenceError(Exception):
@@ -175,44 +174,6 @@ def normalize_with_pvwatts(energy, pvwatts_kws):
     return energy_normalized, insolation
 
 
-def _delta_index(series):
-    '''
-    Takes a pandas series with a DatetimeIndex as input and
-    returns (time step sizes, average time step size) in hours
-
-    Parameters
-    ----------
-    series : pandas.Series
-        A pandas timeseries
-
-    Returns
-    -------
-    deltas : pandas.Series
-        A timeseries representing the timestep sizes of ``series``
-    mean : float
-        The average timestep
-    '''
-
-    if series.index.freq is None:
-        # If there is no frequency information, explicitly calculate interval
-        # sizes. Length of each interval calculated by using 'int64' to convert
-        # to nanoseconds.
-        hours = pd.Series(series.index.view('int64') / (10.0**9 * 3600.0))
-        hours.index = series.index
-        deltas = hours.diff()
-    else:
-        # If there is frequency information, pandas shift can be used to gain
-        # a meaningful interval for the first element of the timeseries
-        # Length of each interval calculated by using 'int64' to convert to
-        # nanoseconds.
-        deltas = (series.index - series.index.shift(-1)).view('int64') / \
-                 (10.0**9 * 3600.0)
-    return deltas, np.mean(deltas[~np.isnan(deltas)])
-
-
-delta_index = deprecated('2.0.0', removal='3.0.0')(_delta_index)
-
-
 def irradiance_rescale(irrad, irrad_sim, max_iterations=100,
                        method='iterative', convergence_threshold=1e-6):
     '''
@@ -335,17 +296,46 @@ def _check_series_frequency(series, series_description):
     return freq
 
 
-check_series_frequency = deprecated('2.0.0', removal='3.0.0')(_check_series_frequency)
+def _delta_index(series):
+    '''
+    Takes a pandas series with a DatetimeIndex as input and
+    returns (time step sizes, average time step size) in hours.
+
+    Parameters
+    ----------
+    series : pandas.Series
+        A pandas timeseries
+
+    Returns
+    -------
+    deltas : pandas.Series
+        A timeseries representing the timestep sizes of ``series``
+    mean : float
+        The average timestep
+    '''
+    # Use total_seconds() for resolution-agnostic calculation (pandas 3.0+)
+    if series.index.freq is None:
+        # If there is no frequency information, explicitly calculate interval sizes
+        deltas = pd.Series(series.index).diff().dt.total_seconds() / 3600.0
+        deltas.index = series.index
+    else:
+        # If there is frequency information, pandas shift can be used to gain
+        # a meaningful interval for the first element of the timeseries
+        deltas = pd.Series(
+            (series.index - series.index.shift(-1)).total_seconds() / 3600.0,
+            index=series.index
+        )
+    return deltas, deltas.mean()
 
 
 def _t_step_nanoseconds(time_series):
     '''
     return a series of right labeled differences in the index of time_series
     in nanoseconds
     '''
-    t_steps = np.diff(time_series.index.view('int64')).astype('float')
-    t_steps = np.insert(t_steps, 0, np.nan)
-    t_steps = pd.Series(index=time_series.index, data=t_steps)
+    # Use total_seconds() for resolution-agnostic calculation (pandas 3.0+)
+    t_steps = pd.Series(time_series.index).diff().dt.total_seconds() * 1e9
+    t_steps.index = time_series.index
     return t_steps
 
 
@@ -485,26 +475,30 @@ def _aggregate(time_series, target_frequency, max_timedelta, series_type):
     union_index = time_series.index.union(output_dummy.index)
     time_series = time_series.dropna()
 
+    # Return NaN series if no valid data remains after dropna
+    if len(time_series) == 0:
+        return pd.Series(np.nan, index=output_dummy.index)
+
     values = time_series.values
 
     # Identify gaps (including from nans) bigger than max_time_delta
-    timestamps = time_series.index.view('int64')
-    timestamps = pd.Series(timestamps, index=time_series.index)
-    t_diffs = timestamps.diff()
+    # Use total_seconds() for resolution-agnostic calculation (pandas 3.0+)
+    t_diffs = pd.Series(time_series.index).diff().dt.total_seconds() * 1e9
+    t_diffs.index = time_series.index
     # Keep track of the gap size but with refilled NaNs and new
     # timestamps from target freq
     t_diffs = t_diffs.reindex(union_index, method='bfill')
 
-    max_interval_nanoseconds = max_timedelta.total_seconds() * 10.0**9
+    max_interval_nanoseconds = max_timedelta.total_seconds() * 1e9
 
     gap_mask = t_diffs > max_interval_nanoseconds
     if time_series.index[0] != union_index[0]:
         # mask leading NaNs
         gap_mask[:time_series.index[0]] = True
 
     time_series = time_series.reindex(union_index)
-    t_diffs = np.diff(time_series.index.view('int64'))
-    t_diffs_hours = t_diffs / 10**9 / 3600.0
+    # Use total_seconds() for resolution-agnostic calculation
+    t_diffs_hours = pd.Series(time_series.index).diff().dt.total_seconds().values[1:] / 3600.0
     if series_type == 'instantaneous':
         # interpolate with trapz sum
         time_series = time_series.interpolate(method='time')
@@ -574,39 +568,41 @@ def _interpolate_series(time_series, target_index, max_timedelta=None,
     df = pd.DataFrame(time_series)
     df = df.dropna()
 
-    # convert to integer index and calculate the size of gaps in input
-    timestamps = df.index.view("int64").copy()
+    # convert to numeric index (seconds since epoch) for interpolation
+    # Use total_seconds() for resolution-agnostic calculation (pandas 3.0+)
+    epoch = pd.Timestamp('1970-01-01', tz=df.index.tz)
+    timestamps = (df.index - epoch).total_seconds().values
     df["timestamp"] = timestamps
-    df["gapsize_ns"] = df["timestamp"].diff()
+    df["gapsize_s"] = df["timestamp"].diff()
     df.index = timestamps
 
-    valid_indput_index = df.index.copy()
+    valid_input_index = df.index.copy()
 
     if max_timedelta is None:
-        max_interval_nanoseconds = 2 * df['gapsize_ns'].median()
+        max_interval_seconds = 2 * df['gapsize_s'].median()
     else:
-        max_interval_nanoseconds = max_timedelta.total_seconds() * 10.0**9
+        max_interval_seconds = max_timedelta.total_seconds()
 
-    fraction_excluded = (df['gapsize_ns'] > max_interval_nanoseconds).mean()
+    fraction_excluded = (df['gapsize_s'] > max_interval_seconds).mean()
     if fraction_excluded > warning_threshold:
         warnings.warn("Fraction of excluded data "
                       f"({100*fraction_excluded:0.02f}%) "
                       "exceeded threshold",
                       UserWarning)
 
-    # put data on index that includes both original and target indicies
-    target_timestamps = pd.Index(target_index.view('int64'))
+    # put data on index that includes both original and target indices
+    target_timestamps = pd.Index((target_index - epoch).total_seconds())
     union_index = df.index.append(target_timestamps)
     union_index = union_index.drop_duplicates(keep='first')
     df = df.reindex(union_index)
     df = df.sort_index()
 
     # calculate the gap size in the original data (timestamps)
-    df['gapsize_ns'] = df['gapsize_ns'].bfill()
-    df.loc[valid_indput_index, 'gapsize_ns'] = 0
+    df['gapsize_s'] = df['gapsize_s'].bfill()
+    df.loc[valid_input_index, 'gapsize_s'] = 0
 
     # perform the interpolation when the max gap size criterion is satisfied
-    df_valid = df[df['gapsize_ns'] <= max_interval_nanoseconds].copy()
+    df_valid = df[df['gapsize_s'] <= max_interval_seconds].copy()
     df_valid['interpolated_data'] = \
         df_valid['data'].interpolate(method='index')
 
@@ -615,8 +611,8 @@ def _interpolate_series(time_series, target_index, max_timedelta=None,
     out = pd.Series(df['interpolated_data'])
     out = out.loc[target_timestamps]
     out.name = original_name
-    out.index = pd.to_datetime(out.index, utc=True).tz_convert(target_index.tz)
-    out = out.reindex(target_index)
+    # Convert seconds back to datetime, matching target_index
+    out.index = target_index
 
     return out
 
@@ -665,6 +661,11 @@ def interpolate(time_series, target, max_timedelta=None, warning_threshold=0.1):
         target_index = pd.date_range(time_series.index.min(),
                                      time_series.index.max(),
                                      freq=target)
+        # Preserve the input series' datetime resolution (e.g., 'us' vs 'ns')
+        if hasattr(time_series.index, 'unit'):
+            input_unit = time_series.index.unit
+            if hasattr(target_index, 'unit') and target_index.unit != input_unit:
+                target_index = target_index.as_unit(input_unit)
 
     if (time_series.index.tz is None) ^ (target_index.tz is None):
         raise ValueError('Either time_series or target is time-zone aware but '