diff --git a/upstream_delineator/delineator_utils/consolidate.py b/upstream_delineator/delineator_utils/consolidate.py index 86ba8a2..4fac25a 100644 --- a/upstream_delineator/delineator_utils/consolidate.py +++ b/upstream_delineator/delineator_utils/consolidate.py @@ -27,7 +27,7 @@ def show_area_stats(G: nx.Graph) -> None: :param G: :return: None """ - areas = [data['area'] for node, data in G.nodes(data=True)] + areas = [data['area'] for node, data in G.nodes(data=True) if 'area' in data] # Step 3: Calculate statistics using NumPy n = len(areas) diff --git a/upstream_delineator/delineator_utils/delineate.py b/upstream_delineator/delineator_utils/delineate.py index 75504e5..45d9380 100644 --- a/upstream_delineator/delineator_utils/delineate.py +++ b/upstream_delineator/delineator_utils/delineate.py @@ -41,6 +41,7 @@ save_network, validate, write_geodata, + ensure_int, ) # Shapely throws a bunch of FutureWarnings. Safe to ignore for now, as long as we @@ -243,7 +244,7 @@ def addnode(B: list, node_id): assert len(terminal_node_df) == 1, "Should only have one outlet per watershed" terminal_node_id = terminal_node_df.index[0] # The terminal comid is the unit catchment that contains (overlaps) the outlet point - terminal_comid = terminal_node_df['COMID'].iat[0] + terminal_comid = int(terminal_node_df['COMID'].iat[0]) # Let upstream_comids be the list of unit catchments (and river reaches) that are in the basin upstream_comids = [] @@ -255,7 +256,7 @@ def addnode(B: list, node_id): # of the outlet, and therefore we cannot process them to get expected results. gage_list = gages_gdf.index.tolist() for id in gage_list: - comid = gages_gdf.at[id, 'COMID'] + comid = int(gages_gdf.at[id, 'COMID']) if comid not in upstream_comids: gages_gdf.drop(id, inplace=True) raise Warning(f"The point with id = {id} is not contained in the watershed of the first point.") @@ -294,11 +295,12 @@ def addnode(B: list, node_id): # For now, put the split polygon geometry into a field in `gages_gdf` gages_gdf['polygon'] = None - gages_gdf['polygon_area'] = 0 + gages_gdf['polygon_area'] = 0.0 # Iterate over the gages, and run `split_catchment()` for every gage for gage_id in gages_gdf.index: - comid = gages_gdf.at[gage_id, 'COMID'] + gage_id = int(gage_id) + comid = int(gages_gdf.at[gage_id, 'COMID']) lat = gages_gdf.at[gage_id, 'lat'] lng = gages_gdf.at[gage_id, 'lng'] catchment_poly = subbasins_gdf.loc[comid, 'geometry'] @@ -319,7 +321,7 @@ def addnode(B: list, node_id): # Now, we no longer need the downstream portion of the terminal unit catchment # so remove its row from the subbasins GeoDataFrame subbasins_gdf = subbasins_gdf.drop(terminal_comid) - subbasins_gdf.at[terminal_node_id, 'nextdown'] = 0 + subbasins_gdf.at[terminal_node_id, 'nextdown'] = int(0) # Create a NETWORK GRAPH of the river basin. if config.get("VERBOSE"): print("Creating Network GRAPH") @@ -402,19 +404,20 @@ def addnode(B: list, node_id): subbasins_gdf.geometry = subbasins_gdf.geometry.apply(lambda p: close_holes(p, 0)) subbasins_gdf.reset_index(inplace=True) subbasins_gdf.rename(columns={'target': 'comid'}, inplace=True) + subbasins_gdf['comid'] = subbasins_gdf['comid'].astype(int) subbasins_gdf.set_index('comid', inplace=True) # After the dissolve operation, no way to preserve correct information in column 'nextdown' # But it is present in the Graph, so update the GeoDataFrame subbasins_gdf with that information # Add column `nextdownid` and the stream orders based on data in the graph - subbasins_gdf['nextdown'] = 0 + subbasins_gdf['nextdown'] = int(0) for idx in subbasins_gdf.index: try: nextdown = list(G.successors(idx))[0] except Exception: nextdown = 0 - subbasins_gdf.at[idx, 'nextdown'] = nextdown + subbasins_gdf.at[idx, 'nextdown'] = int(nextdown) # Round all the areas to four decimals (just for appearances) subbasins_gdf['unitarea'] = subbasins_gdf['unitarea'].round(1) @@ -435,6 +438,7 @@ def addnode(B: list, node_id): myrivers_gdf = myrivers_gdf.dissolve(by="target", aggfunc=agg) myrivers_gdf.reset_index(inplace=True) myrivers_gdf.rename(columns={'target': 'comid'}, inplace=True) + myrivers_gdf['comid'] = myrivers_gdf['comid'].astype(int) myrivers_gdf.set_index('comid', inplace=True) # We can now delete the river reach segment that belonged to the downstream portion of the @@ -446,11 +450,12 @@ def addnode(B: list, node_id): # Add the fields `nextdown` and the stream orders to the rivers. for idx in myrivers_gdf.index: + idx = int(idx) try: nextdown = list(G.successors(idx))[0] except Exception: nextdown = 0 - myrivers_gdf.at[idx, 'nextdown'] = nextdown + myrivers_gdf.at[idx, 'nextdown'] = int(nextdown) myrivers_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order'] myrivers_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order'] @@ -460,6 +465,7 @@ def addnode(B: list, node_id): subbasins_gdf['shreve_order'] = 0 for idx in subbasins_gdf.index: + idx = int(idx) subbasins_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order'] subbasins_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order'] @@ -478,8 +484,10 @@ def addnode(B: list, node_id): subbasins_gdf.reset_index(inplace=True) subbasins_gdf.rename(columns={'index': 'comid'}, inplace=True) + subbasins_gdf['comid'] = subbasins_gdf['comid'].astype(int) myrivers_gdf.reset_index(inplace=True) myrivers_gdf.rename(columns={'index': 'comid'}, inplace=True) + myrivers_gdf['comid'] = myrivers_gdf['comid'].astype(int) # The rivers data will no longer be accurate, so drop these columns cols = ["lengthdir", "sinuosity", "slope", "uparea", "order", "strmDrop_t", "slope_taud", "NextDownID", @@ -527,7 +535,7 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd selected_rows.drop(columns=['COMID'], inplace=True) # Add the column `nextdown` to our temporary GeoDataFrame selected_rows to prevent type problems below - selected_rows['nextdown'] = -999 + selected_rows['nextdown'] = int(-999) # Here is where we add the newly-created split catchments corresponding to our outlet points. subbasins_gdf = pd.concat([subbasins_gdf, selected_rows]) @@ -537,9 +545,9 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd # Add the column `nextdown`; we will populate it below for node, comid in new_nodes.items(): - rows = subbasins_gdf['nextdown'] == comid - subbasins_gdf.loc[rows, 'nextdown'] = node - subbasins_gdf.at[node, 'nextdown'] = comid + rows = subbasins_gdf['nextdown'] == int(comid) + subbasins_gdf.loc[rows, 'nextdown'] = int(node) + subbasins_gdf.at[node, 'nextdown'] = int(comid) # Subtract the polygon geometry of the split catchment from its parent unit catchment comid_poly = subbasins_gdf.loc[comid, 'geometry'] @@ -574,7 +582,7 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd # This is the same as largest area to smallest area gages_set.sort_values(by='unitarea', inplace=True) # This one-liner fills in the correct network connection information for nested outlets - gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(comid) + gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(int(comid)) gages_set.sort_values(by='unitarea', ascending=False, inplace=True) subbasins_gdf = pd.concat([subbasins_gdf, gages_set]) @@ -633,10 +641,10 @@ def update_split_catchment_geo(gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gd # After the last nested gage, we need to fix the connection info for # any rows that previously had the unit catchment with comid as its 'nextdown' - rows = subbasins_gdf['nextdown'] == comid + rows = subbasins_gdf['nextdown'] == int(comid) indices = list(rows.index[rows]) indices.remove(first_node) - subbasins_gdf.loc[indices, 'nextdown'] = last_node + subbasins_gdf.loc[indices, 'nextdown'] = int(last_node) return subbasins_gdf @@ -660,6 +668,9 @@ def make_gages_gdf(input_csv: str, csv_dtypes: dict =None) -> gpd.GeoDataFrame: # Check that the CSV file includes at a minimum: id, lat, lng and that all values are appropriate validate(gages_df) + # Convert id to integer to ensure consistent node IDs in graph + gages_df['id'] = gages_df['id'].apply(ensure_int) + # When a row's id matches its outlet_id, it is an outlet gages_df["is_outlet"] = gages_df["outlet_id"] == gages_df["id"] diff --git a/upstream_delineator/delineator_utils/graph_tools.py b/upstream_delineator/delineator_utils/graph_tools.py index cf82a7e..1f83892 100644 --- a/upstream_delineator/delineator_utils/graph_tools.py +++ b/upstream_delineator/delineator_utils/graph_tools.py @@ -17,6 +17,10 @@ def make_river_network(df: DataFrame, terminal_node=None) -> nx.DiGraph: # Populate the graph's nodes and edges. for node_id, nextdown in df['nextdown'].items(): + # Ensure node_id is an integer + node_id = int(node_id) + nextdown = int(nextdown) + # Add node with comid as node ID G.add_node(node_id) G.nodes[node_id]['area'] = df.at[node_id, 'unitarea'] @@ -118,6 +122,10 @@ def insert_node(G: nx.DiGraph, node, comid) -> nx.DiGraph: into a unit catchment that is a "leaf" node, or one with a Strahler order 1 or into a "stem" node, one with Strahler order > 1. """ + + # Ensure both node and comid are integers + node = int(node) + comid = int(comid) # Get the Strahler order of the node we're inserting into. order = G.nodes[comid]['strahler_order'] diff --git a/upstream_delineator/delineator_utils/util.py b/upstream_delineator/delineator_utils/util.py index 5cdc4cb..815323d 100644 --- a/upstream_delineator/delineator_utils/util.py +++ b/upstream_delineator/delineator_utils/util.py @@ -3,7 +3,7 @@ import pickle import re import warnings -from functools import cache, partial +from functools import cache from typing import Union import requests @@ -34,6 +34,30 @@ simpledec = re.compile(r"\d*\.\d+") +def ensure_int(value) -> int: + """ + Safely convert a value to an integer. + Handles string representations of numbers as well as numeric types. + + Args: + value: Value to convert to int (can be str, float, int, etc.) + + Returns: + int: The converted integer value + + Raises: + ValueError: If the value cannot be converted to a valid integer + """ + try: + if isinstance(value, str): + # Try to convert string to float first (handles "1.0" -> 1) + return int(float(value)) + else: + return int(value) + except (ValueError, TypeError) as e: + raise ValueError(f"Cannot convert '{value}' to int: {e}") + + def get_largest(input_poly: Union[MultiPolygon, Polygon]) -> Polygon: """ Converts a Shapely MultiPolygon to a Shapely Polygon @@ -200,17 +224,17 @@ def calc_area(poly: Polygon) -> float: if poly.is_empty: return 0 - projected_poly = shapely.ops.transform( - partial( - pyproj.transform, - pyproj.Proj(init=PROJ_WGS84), - pyproj.Proj( - proj='aea', - lat_1=poly.bounds[1], - lat_2=poly.bounds[3] - ) - ), - poly) + aea_proj = pyproj.Proj( + proj='aea', + lat_1=poly.bounds[1], + lat_2=poly.bounds[3] + ) + transformer = pyproj.Transformer.from_proj( + pyproj.Proj(init=PROJ_WGS84), + aea_proj, + always_xy=True + ) + projected_poly = shapely.ops.transform(transformer.transform, poly) # Get the area in m^2 return projected_poly.area / 1e6 @@ -229,17 +253,17 @@ def calc_length(line: LineString) -> float: if line.is_empty: return 0 - projected_line = shapely.ops.transform( - partial( - pyproj.transform, - pyproj.Proj(init=PROJ_WGS84), - pyproj.Proj( - proj='aea', - lat_1=line.bounds[1], - lat_2=line.bounds[3] - ) - ), - line) + aea_proj = pyproj.Proj( + proj='aea', + lat_1=line.bounds[1], + lat_2=line.bounds[3] + ) + transformer = pyproj.Transformer.from_proj( + pyproj.Proj(init=PROJ_WGS84), + aea_proj, + always_xy=True + ) + projected_line = shapely.ops.transform(transformer.transform, line) # Get the area in m^2 return projected_line.length / 1e3