From 4beb86ec3b8b815ce835a1b596c2a9bdbff87c83 Mon Sep 17 00:00:00 2001 From: henrykironde Date: Thu, 18 Dec 2025 01:12:49 -0500 Subject: [PATCH 1/6] Fix: Handle no-data values in TIF files during prediction Previously, when reading TIF files with rasterio, no-data values were not masked before normalization. This caused invalid values (e.g., -9999 normalized to -39.2) to be passed to the model, potentially affecting prediction accuracy. --- src/deepforest/datasets/cropmodel.py | 3 +++ src/deepforest/datasets/prediction.py | 3 ++- src/deepforest/model.py | 1 + src/deepforest/utilities.py | 23 +++++++++++++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/deepforest/datasets/cropmodel.py b/src/deepforest/datasets/cropmodel.py index 7ae0e0399..64c3ea9af 100644 --- a/src/deepforest/datasets/cropmodel.py +++ b/src/deepforest/datasets/cropmodel.py @@ -12,6 +12,8 @@ from torch.utils.data import Dataset from torchvision import transforms +from deepforest.utilities import read_raster_window + def bounding_box_transform(augmentations=None, resize=None): """Create transform pipeline for bounding box data. @@ -122,6 +124,7 @@ def __getitem__(self, idx): width = int(max(1, xmax - xmin)) height = int(max(1, ymax - ymin)) box = self.src.read(window=Window(col_off, row_off, width, height)) + box = read_raster_window(box, nodata_value=self.src.nodata) box = np.rollaxis(box, 0, 3) if self.transform: diff --git a/src/deepforest/datasets/prediction.py b/src/deepforest/datasets/prediction.py index f366be5c2..5e4356de3 100644 --- a/src/deepforest/datasets/prediction.py +++ b/src/deepforest/datasets/prediction.py @@ -13,7 +13,7 @@ from torch.utils.data import Dataset, default_collate from deepforest import preprocess -from deepforest.utilities import format_geometry +from deepforest.utilities import format_geometry, read_raster_window # Base prediction class @@ -491,6 +491,7 @@ def get_crop(self, idx): window = self.windows[idx] with rio.open(self.path) as src: window_data = src.read(window=Window(window.x, window.y, window.w, window.h)) + window_data = read_raster_window(window_data, nodata_value=src.nodata) # Convert to torch tensor and rearrange dimensions window_data = torch.from_numpy(window_data).float() # Convert to torch tensor diff --git a/src/deepforest/model.py b/src/deepforest/model.py index 95dd45196..6aba112e8 100644 --- a/src/deepforest/model.py +++ b/src/deepforest/model.py @@ -297,6 +297,7 @@ def write_crops(self, root_dir, images, boxes, labels, savedir): # Crop the image using the square box coordinates img = src.read(window=((int(ymin), int(ymax)), (int(xmin), int(xmax)))) + img = utilities.read_raster_window(img, nodata_value=src.nodata) # Save the cropped image as a PNG file using opencv image_basename = os.path.splitext(os.path.basename(image))[0] img_path = os.path.join(savedir, label, f"{image_basename}_{index}.png") diff --git a/src/deepforest/utilities.py b/src/deepforest/utilities.py index 7d81acbb5..edea655fd 100644 --- a/src/deepforest/utilities.py +++ b/src/deepforest/utilities.py @@ -82,6 +82,28 @@ def load_config( return config +def read_raster_window(data, nodata_value=None, masked=True): + """Apply no-data value masking to raster data. + + This function masks no-data values to 0, ensuring consistent handling + across all DeepForest components. + + Args: + data: numpy.ndarray with shape (bands, height, width) containing raster data + nodata_value: The no-data value to mask. If None, no masking is applied. + masked: If True, mask no-data values to 0. Default True. + + Returns: + numpy.ndarray: Raster data with shape (bands, height, width). No-data values + are set to 0 if masked=True and nodata_value is not None. + """ + if masked and nodata_value is not None: + nodata_mask = np.any(data == nodata_value, axis=0) + data[:, nodata_mask] = 0 + + return data + + class DownloadProgressBar(tqdm): """Download progress bar class.""" @@ -587,6 +609,7 @@ def crop_raster(bounds, rgb_path=None, savedir=None, filename=None, driver="GTif left, bottom, right, top, transform=src.transform ) ) + img = read_raster_window(img, nodata_value=src.nodata) cropped_transform = rasterio.windows.transform( rasterio.windows.from_bounds( left, bottom, right, top, transform=src.transform From 8f74a1e3ace7bb69ad59da972b4a3b062389f7cb Mon Sep 17 00:00:00 2001 From: henrykironde Date: Mon, 22 Dec 2025 01:47:14 -0500 Subject: [PATCH 2/6] Refactor nodata masking to use rasterio's dataset_mask API - Rename read_raster_window to apply_nodata_mask - Combine src.read() with masking in single function - Use rasterio's dataset_mask() for efficient masking - Update all call sites to new signature --- src/deepforest/datasets/cropmodel.py | 5 ++-- src/deepforest/datasets/prediction.py | 7 ++--- src/deepforest/model.py | 5 ++-- src/deepforest/utilities.py | 38 ++++++++++++++++----------- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/deepforest/datasets/cropmodel.py b/src/deepforest/datasets/cropmodel.py index 64c3ea9af..fa32fda32 100644 --- a/src/deepforest/datasets/cropmodel.py +++ b/src/deepforest/datasets/cropmodel.py @@ -12,7 +12,7 @@ from torch.utils.data import Dataset from torchvision import transforms -from deepforest.utilities import read_raster_window +from deepforest.utilities import apply_nodata_mask def bounding_box_transform(augmentations=None, resize=None): @@ -123,8 +123,7 @@ def __getitem__(self, idx): row_off = int(ymin) width = int(max(1, xmax - xmin)) height = int(max(1, ymax - ymin)) - box = self.src.read(window=Window(col_off, row_off, width, height)) - box = read_raster_window(box, nodata_value=self.src.nodata) + box = apply_nodata_mask(self.src, Window(col_off, row_off, width, height)) box = np.rollaxis(box, 0, 3) if self.transform: diff --git a/src/deepforest/datasets/prediction.py b/src/deepforest/datasets/prediction.py index 5e4356de3..05ca3a9f5 100644 --- a/src/deepforest/datasets/prediction.py +++ b/src/deepforest/datasets/prediction.py @@ -13,7 +13,7 @@ from torch.utils.data import Dataset, default_collate from deepforest import preprocess -from deepforest.utilities import format_geometry, read_raster_window +from deepforest.utilities import apply_nodata_mask, format_geometry # Base prediction class @@ -490,8 +490,9 @@ def window_list(self): def get_crop(self, idx): window = self.windows[idx] with rio.open(self.path) as src: - window_data = src.read(window=Window(window.x, window.y, window.w, window.h)) - window_data = read_raster_window(window_data, nodata_value=src.nodata) + window_data = apply_nodata_mask( + src, Window(window.x, window.y, window.w, window.h) + ) # Convert to torch tensor and rearrange dimensions window_data = torch.from_numpy(window_data).float() # Convert to torch tensor diff --git a/src/deepforest/model.py b/src/deepforest/model.py index 6aba112e8..0f74a3b9e 100644 --- a/src/deepforest/model.py +++ b/src/deepforest/model.py @@ -296,8 +296,9 @@ def write_crops(self, root_dir, images, boxes, labels, savedir): xmin, ymin, xmax, ymax = square_box # Crop the image using the square box coordinates - img = src.read(window=((int(ymin), int(ymax)), (int(xmin), int(xmax)))) - img = utilities.read_raster_window(img, nodata_value=src.nodata) + img = utilities.apply_nodata_mask( + src, ((int(ymin), int(ymax)), (int(xmin), int(xmax))) + ) # Save the cropped image as a PNG file using opencv image_basename = os.path.splitext(os.path.basename(image))[0] img_path = os.path.join(savedir, label, f"{image_basename}_{index}.png") diff --git a/src/deepforest/utilities.py b/src/deepforest/utilities.py index edea655fd..1b2537fd7 100644 --- a/src/deepforest/utilities.py +++ b/src/deepforest/utilities.py @@ -82,24 +82,32 @@ def load_config( return config -def read_raster_window(data, nodata_value=None, masked=True): - """Apply no-data value masking to raster data. +def apply_nodata_mask(src, window): + """Read raster window and apply no-data value masking. - This function masks no-data values to 0, ensuring consistent handling - across all DeepForest components. + This function reads a window from a rasterio dataset and masks no-data + values to 0, ensuring consistent handling across all DeepForest components. + Uses rasterio's built-in dataset mask for efficient masking. Args: - data: numpy.ndarray with shape (bands, height, width) containing raster data - nodata_value: The no-data value to mask. If None, no masking is applied. - masked: If True, mask no-data values to 0. Default True. + src: rasterio.DatasetReader opened in 'r' mode + window: rasterio.windows.Window or tuple defining the window to read Returns: numpy.ndarray: Raster data with shape (bands, height, width). No-data values - are set to 0 if masked=True and nodata_value is not None. + are set to 0. """ - if masked and nodata_value is not None: - nodata_mask = np.any(data == nodata_value, axis=0) - data[:, nodata_mask] = 0 + data = src.read(window=window) + + # Use rasterio's dataset_mask to get the mask (True = valid, False = nodata) + if src.nodata is not None: + mask = src.dataset_mask(window=window) + # Invert mask: True where nodata, False where valid + nodata_mask = ~mask + # Set nodata pixels to 0 for all bands + # Apply 2D mask to each band: data shape is (bands, height, width) + for band_idx in range(data.shape[0]): + data[band_idx, nodata_mask] = 0 return data @@ -604,12 +612,10 @@ def crop_raster(bounds, rgb_path=None, savedir=None, filename=None, driver="GTif driver = "PNG" else: # Read projected data using rasterio and crop - img = src.read( - window=rasterio.windows.from_bounds( - left, bottom, right, top, transform=src.transform - ) + window = rasterio.windows.from_bounds( + left, bottom, right, top, transform=src.transform ) - img = read_raster_window(img, nodata_value=src.nodata) + img = apply_nodata_mask(src, window) cropped_transform = rasterio.windows.transform( rasterio.windows.from_bounds( left, bottom, right, top, transform=src.transform From a4a22d35067b57007c128c286fc03122e0c06a1d Mon Sep 17 00:00:00 2001 From: henrykironde Date: Wed, 24 Dec 2025 22:53:19 -0500 Subject: [PATCH 3/6] Avoid out-of-bounds errors --- src/deepforest/datasets/cropmodel.py | 5 ++++- src/deepforest/utilities.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/deepforest/datasets/cropmodel.py b/src/deepforest/datasets/cropmodel.py index fa32fda32..dd851d9e4 100644 --- a/src/deepforest/datasets/cropmodel.py +++ b/src/deepforest/datasets/cropmodel.py @@ -123,7 +123,10 @@ def __getitem__(self, idx): row_off = int(ymin) width = int(max(1, xmax - xmin)) height = int(max(1, ymax - ymin)) - box = apply_nodata_mask(self.src, Window(col_off, row_off, width, height)) + # Clip window to image bounds to avoid out-of-bounds errors + window = Window(col_off, row_off, width, height) + window = window.intersection(Window(0, 0, self._image_width, self._image_height)) + box = apply_nodata_mask(self.src, window) box = np.rollaxis(box, 0, 3) if self.transform: diff --git a/src/deepforest/utilities.py b/src/deepforest/utilities.py index 1b2537fd7..e20516002 100644 --- a/src/deepforest/utilities.py +++ b/src/deepforest/utilities.py @@ -10,6 +10,7 @@ import xmltodict from omegaconf import DictConfig, OmegaConf from PIL import Image +from rasterio.windows import Window from tqdm import tqdm from deepforest import _ROOT @@ -97,17 +98,40 @@ def apply_nodata_mask(src, window): numpy.ndarray: Raster data with shape (bands, height, width). No-data values are set to 0. """ + # Clip window to image bounds before reading to ensure consistent dimensions + if isinstance(window, Window): + full_window = Window(0, 0, src.width, src.height) + try: + window = window.intersection(full_window) + except rasterio.errors.WindowError as exc: + # Window is completely outside image bounds + raise ValueError( + f"Window {window} is completely outside image bounds " + f"(width={src.width}, height={src.height})" + ) from exc + data = src.read(window=window) # Use rasterio's dataset_mask to get the mask (True = valid, False = nodata) if src.nodata is not None: mask = src.dataset_mask(window=window) - # Invert mask: True where nodata, False where valid + expected_height, expected_width = data.shape[1], data.shape[2] + if mask.shape[0] > expected_height: + mask = mask[:expected_height, :] + if mask.shape[1] > expected_width: + mask = mask[:, :expected_width] + if mask.shape != (expected_height, expected_width): + return data nodata_mask = ~mask + assert nodata_mask.shape == (expected_height, expected_width), ( + f"nodata_mask shape {nodata_mask.shape} != expected {(expected_height, expected_width)}" + ) # Set nodata pixels to 0 for all bands # Apply 2D mask to each band: data shape is (bands, height, width) + # Use np.where to safely apply mask without indexing issues for band_idx in range(data.shape[0]): - data[band_idx, nodata_mask] = 0 + # np.where(condition, x, y): where condition is True, use x (0), else use y (data) + data[band_idx] = np.where(nodata_mask, 0, data[band_idx]) return data From 8df58ebcbec74c1d1dfa2c384fe9c4c479977513 Mon Sep 17 00:00:00 2001 From: henrykironde Date: Wed, 7 Jan 2026 15:00:51 -0500 Subject: [PATCH 4/6] rephrasing apply_nodata_mask docsting --- src/deepforest/utilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/deepforest/utilities.py b/src/deepforest/utilities.py index e20516002..ee9daa366 100644 --- a/src/deepforest/utilities.py +++ b/src/deepforest/utilities.py @@ -87,8 +87,8 @@ def apply_nodata_mask(src, window): """Read raster window and apply no-data value masking. This function reads a window from a rasterio dataset and masks no-data - values to 0, ensuring consistent handling across all DeepForest components. - Uses rasterio's built-in dataset mask for efficient masking. + values to 0 for more consistent predictions. If no nodata value is set, + the data are returned unmodified. Args: src: rasterio.DatasetReader opened in 'r' mode From 2ec2e7d19525d6e44236081b046523a448711eab Mon Sep 17 00:00:00 2001 From: Henry Senyondo Date: Thu, 15 Jan 2026 13:05:02 -0500 Subject: [PATCH 5/6] Test empty files --- tests/test_model_prediction.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/test_model_prediction.py diff --git a/tests/test_model_prediction.py b/tests/test_model_prediction.py new file mode 100644 index 000000000..ba0699801 --- /dev/null +++ b/tests/test_model_prediction.py @@ -0,0 +1,31 @@ +import numpy as np +import pandas as pd +import pytest + +from deepforest import main + + +@pytest.mark.parametrize( + "model_name", + [ + "weecology/deepforest-bird", + # "weecology/deepforest-everglades-bird-species-detector", + # "weecology/deepforest-tree", + # "weecology/deepforest-livestock", + # "weecology/cropmodel-deadtrees", + ], +) +def test_white_image_predict_tile_no_predictions_bird_model(model_name): + """All-white image should yield no detections with various models.""" + m = main.deepforest() + m.create_trainer() + m.load_model(model_name) + # Create a white image (uint8 RGB) + white = np.full((2048, 2048, 3), 255, dtype=np.uint8) + res = m.predict_tile( + image=white, + patch_size=128, + patch_overlap=0.0, + iou_threshold=m.config.nms_thresh, + ) + assert (res is None) or (isinstance(res, pd.DataFrame) and res.empty) From 11e2793e19d9a507464d213c5cd3987a00a7252b Mon Sep 17 00:00:00 2001 From: henry senyondo Date: Thu, 15 Jan 2026 16:37:16 -0500 Subject: [PATCH 6/6] Update assertion for model prediction results --- tests/test_model_prediction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_model_prediction.py b/tests/test_model_prediction.py index ba0699801..cc3b8eda2 100644 --- a/tests/test_model_prediction.py +++ b/tests/test_model_prediction.py @@ -28,4 +28,5 @@ def test_white_image_predict_tile_no_predictions_bird_model(model_name): patch_overlap=0.0, iou_threshold=m.config.nms_thresh, ) - assert (res is None) or (isinstance(res, pd.DataFrame) and res.empty) + assert len(res) == 0 + #assert (res is None) or (isinstance(res, pd.DataFrame) and res.empty)