From e05b65615f980fbf9120b1b333c74df85b2da2c5 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 27 Feb 2026 12:24:02 -0800 Subject: [PATCH 1/2] Adding average calculations to zonal_statistics. RE:#370 --- HISTORY.rst | 3 +++ src/pygeoprocessing/geoprocessing.py | 21 ++++++++++++++++++--- tests/test_geoprocessing.py | 21 ++++++++++++++++++++- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index dd70a57e..ef3d9331 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,9 @@ Unreleased Changes References to the old name and website domain have been updated to reflect this change. https://github.com/natcap/pygeoprocessing/issues/458 +* The average value of valid pixels is now computed in + ``pygeoprocessing.zonal_statistics`` for each feature. + https://github.com/natcap/pygeoprocessing/issues/370 2.4.10 (2026-01-13) ------------------- diff --git a/src/pygeoprocessing/geoprocessing.py b/src/pygeoprocessing/geoprocessing.py index 5af4bef7..cc4a45eb 100644 --- a/src/pygeoprocessing/geoprocessing.py +++ b/src/pygeoprocessing/geoprocessing.py @@ -37,7 +37,8 @@ from .geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS from .geoprocessing_core import DEFAULT_OSR_AXIS_MAPPING_STRATEGY from .geoprocessing_core import INT8_CREATION_OPTIONS -from .utils import GDALUseExceptions, gdal_use_exceptions +from .utils import gdal_use_exceptions +from .utils import GDALUseExceptions # This is used to efficiently pass data to the raster stats worker if available if sys.version_info >= (3, 8): @@ -1590,6 +1591,7 @@ def zonal_statistics( - 'max': maximum valid pixel value - 'sum': sum of valid pixel values - 'count': number of valid pixels + - 'average': the mean value of valid pixels - 'nodata_count': number of nodata pixels - 'value_counts': number of pixels having each unique value @@ -1637,7 +1639,7 @@ def zonal_statistics( If `base_raster_path_band` is a tuple, the return value is a nested dictionary of stats for that raster band. Top-level keys are the aggregate feature FIDs. Each nested FID dictionary then contains - statistics about that feature: 'min', 'max', 'sum', 'count', + statistics about that feature: 'min', 'max', 'sum', 'count', 'average', 'nodata_count', and optionally 'value_counts'. Example:: { 0: { @@ -1645,6 +1647,7 @@ def zonal_statistics( 'max': 14, 'sum': 42, 'count': 8, + 'average': 5.25, 'nodata_count': 1, 'value_counts': { 2: 5, @@ -1719,7 +1722,8 @@ def zonal_statistics( # don't overlap any valid pixels. def default_aggregate_dict(): default_aggregate_dict = { - 'min': None, 'max': None, 'count': 0, 'nodata_count': 0, 'sum': 0} + 'min': None, 'max': None, 'count': 0, 'nodata_count': 0, 'sum': 0, + 'average': None} if include_value_counts: default_aggregate_dict['value_counts'] = collections.Counter() return default_aggregate_dict @@ -1906,6 +1910,10 @@ def default_aggregate_dict(): feature_data.max(), aggregate_stats_list[i][fid]['max']) aggregate_stats_list[i][fid]['sum'] += feature_data.sum() aggregate_stats_list[i][fid]['count'] += feature_data.size + aggregate_stats_list[i][fid]['average'] = ( + aggregate_stats_list[i][fid]['sum'] / + aggregate_stats_list[i][fid]['count'] + ) if include_value_counts: # .update() here is operating on a Counter, so values are # ADDED, not replaced. @@ -1976,6 +1984,7 @@ def default_aggregate_dict(): aggregate_stats_list[i][unset_fid]['min'] = 0 aggregate_stats_list[i][unset_fid]['max'] = 0 aggregate_stats_list[i][unset_fid]['sum'] = 0 + aggregate_stats_list[i][unset_fid]['average'] = 0 else: aggregate_stats_list[i][unset_fid]['min'] = numpy.min( unset_fid_block) @@ -1986,6 +1995,12 @@ def default_aggregate_dict(): aggregate_stats_list[i][unset_fid]['count'] = unset_fid_block.size aggregate_stats_list[i][unset_fid]['nodata_count'] = numpy.sum( unset_fid_nodata_mask) + if aggregate_stats_list[i][unset_fid]['count'] == 0: + aggregate_stats_list[i][unset_fid]['average'] = None + else: + aggregate_stats_list[i][unset_fid]['average'] = ( + aggregate_stats_list[i][unset_fid]['sum'] / + aggregate_stats_list[i][unset_fid]['count']) if include_value_counts: # .update() here is operating on a Counter, so values are ADDED, # not replaced. diff --git a/tests/test_geoprocessing.py b/tests/test_geoprocessing.py index 75a4522b..eb67394f 100644 --- a/tests/test_geoprocessing.py +++ b/tests/test_geoprocessing.py @@ -32,10 +32,10 @@ from pygeoprocessing.geoprocessing_core import DEFAULT_CREATION_OPTIONS from pygeoprocessing.geoprocessing_core import \ DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS -from pygeoprocessing.utils import gdal_use_exceptions from pygeoprocessing.geoprocessing_core import INT8_CREATION_OPTIONS from pygeoprocessing.geoprocessing_core import \ INT8_GTIFF_CREATION_TUPLE_OPTIONS +from pygeoprocessing.utils import gdal_use_exceptions _DEFAULT_ORIGIN = (444720, 3751320) _DEFAULT_PIXEL_SIZE = (30, -30) @@ -129,6 +129,7 @@ class TestGeoprocessing(unittest.TestCase): def setUp(self): """Create a temporary workspace that's deleted later.""" + self.maxDiff = None self.workspace_dir = tempfile.mkdtemp() def tearDown(self): @@ -1156,11 +1157,13 @@ def test_zonal_stats_multiple_rasters(self): 'max': 1, 'min': 1, 'nodata_count': 0, + 'average': 1, 'sum': 81}, 1: { 'count': 1, 'max': 1, 'min': 1, + 'average': 1, 'nodata_count': 0, 'sum': 1} }, { @@ -1168,12 +1171,14 @@ def test_zonal_stats_multiple_rasters(self): 'count': 81, 'max': 2, 'min': 2, + 'average': 2, 'nodata_count': 0, 'sum': 162}, 1: { 'count': 1, 'max': 2, 'min': 2, + 'average': 2, 'nodata_count': 0, 'sum': 2} }] @@ -1318,16 +1323,19 @@ def test_zonal_statistics(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1, 'sum': 81.0}, 1: { 'count': 1, 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1, 'sum': 1.0}, 2: { 'min': None, 'max': None, + 'average': None, 'count': 0, 'nodata_count': 0, 'sum': 0.0}} @@ -1408,17 +1416,20 @@ def test_zonal_statistics_3d_polygon(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 81.0}, 1: { 'count': 1, 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 1.0}, 2: { 'min': None, 'max': None, 'count': 0, + 'average': None, 'nodata_count': 0, 'sum': 0.0}} self.assertEqual(result, expected_result) @@ -1462,6 +1473,7 @@ def test_zonal_statistics_multipolygon(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 2 }}) @@ -1536,6 +1548,7 @@ def test_zonal_statistics_3d_multipolygon(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1, 'sum': 2 }}) @@ -1596,6 +1609,7 @@ def test_zonal_statistics_value_counts(self): 'min': 1.0, 'nodata_count': 0, 'sum': 81.0, + 'average': 1, 'value_counts': {1.0: 81}}, 1: { 'count': 1, @@ -1603,6 +1617,7 @@ def test_zonal_statistics_value_counts(self): 'min': 1.0, 'nodata_count': 0, 'sum': 1.0, + 'average': 1, 'value_counts': {1.0: 1}}, 2: { 'min': None, @@ -1610,6 +1625,7 @@ def test_zonal_statistics_value_counts(self): 'count': 0, 'nodata_count': 0, 'sum': 0.0, + 'average': None, 'value_counts': {}} } self.assertEqual(result, expected_result) @@ -1651,6 +1667,7 @@ def test_zonal_statistics_nodata(self): 'max': None, 'min': None, 'nodata_count': 81, + 'average': None, 'sum': 0.0}} self.assertEqual(result, expected_result) @@ -1688,6 +1705,7 @@ def test_zonal_statistics_nodata_is_zero(self): 'max': 1, 'min': 1, 'nodata_count': 2, + 'average': 1.0, 'sum': 2.0}} self.assertEqual(result, expected_result) @@ -1725,6 +1743,7 @@ def test_zonal_statistics_named_layer(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 81.0}} self.assertEqual(result, expected_result) From 743aec53a0d355e5e4f780acaa96b588b6961b41 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 27 Feb 2026 12:26:16 -0800 Subject: [PATCH 2/2] Forgot to remove self.maxDiff=None. RE:#370 --- tests/test_geoprocessing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_geoprocessing.py b/tests/test_geoprocessing.py index eb67394f..c3b11b2d 100644 --- a/tests/test_geoprocessing.py +++ b/tests/test_geoprocessing.py @@ -129,7 +129,6 @@ class TestGeoprocessing(unittest.TestCase): def setUp(self): """Create a temporary workspace that's deleted later.""" - self.maxDiff = None self.workspace_dir = tempfile.mkdtemp() def tearDown(self):