diff --git a/HISTORY.rst b/HISTORY.rst index 10f97c3a..2fc61c9f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,9 @@ Unreleased Changes References to the old name and website domain have been updated to reflect this change. https://github.com/natcap/pygeoprocessing/issues/458 +* The average value of valid pixels is now computed in + ``pygeoprocessing.zonal_statistics`` for each feature. + https://github.com/natcap/pygeoprocessing/issues/370 * Handling the case where a floating-point raster passed to ``pygeoprocessing.reclassify_raster`` may have a NaN nodata value. https://github.com/natcap/pygeoprocessing/issues/454 diff --git a/src/pygeoprocessing/geoprocessing.py b/src/pygeoprocessing/geoprocessing.py index 95132f34..13d35edc 100644 --- a/src/pygeoprocessing/geoprocessing.py +++ b/src/pygeoprocessing/geoprocessing.py @@ -1591,6 +1591,7 @@ def zonal_statistics( - 'max': maximum valid pixel value - 'sum': sum of valid pixel values - 'count': number of valid pixels + - 'average': the mean value of valid pixels - 'nodata_count': number of nodata pixels - 'value_counts': number of pixels having each unique value @@ -1638,7 +1639,7 @@ def zonal_statistics( If `base_raster_path_band` is a tuple, the return value is a nested dictionary of stats for that raster band. Top-level keys are the aggregate feature FIDs. Each nested FID dictionary then contains - statistics about that feature: 'min', 'max', 'sum', 'count', + statistics about that feature: 'min', 'max', 'sum', 'count', 'average', 'nodata_count', and optionally 'value_counts'. Example:: { 0: { @@ -1646,6 +1647,7 @@ def zonal_statistics( 'max': 14, 'sum': 42, 'count': 8, + 'average': 5.25, 'nodata_count': 1, 'value_counts': { 2: 5, @@ -1720,7 +1722,8 @@ def zonal_statistics( # don't overlap any valid pixels. def default_aggregate_dict(): default_aggregate_dict = { - 'min': None, 'max': None, 'count': 0, 'nodata_count': 0, 'sum': 0} + 'min': None, 'max': None, 'count': 0, 'nodata_count': 0, 'sum': 0, + 'average': None} if include_value_counts: default_aggregate_dict['value_counts'] = collections.Counter() return default_aggregate_dict @@ -1907,6 +1910,10 @@ def default_aggregate_dict(): feature_data.max(), aggregate_stats_list[i][fid]['max']) aggregate_stats_list[i][fid]['sum'] += feature_data.sum() aggregate_stats_list[i][fid]['count'] += feature_data.size + aggregate_stats_list[i][fid]['average'] = ( + aggregate_stats_list[i][fid]['sum'] / + aggregate_stats_list[i][fid]['count'] + ) if include_value_counts: # .update() here is operating on a Counter, so values are # ADDED, not replaced. @@ -1977,6 +1984,7 @@ def default_aggregate_dict(): aggregate_stats_list[i][unset_fid]['min'] = 0 aggregate_stats_list[i][unset_fid]['max'] = 0 aggregate_stats_list[i][unset_fid]['sum'] = 0 + aggregate_stats_list[i][unset_fid]['average'] = 0 else: aggregate_stats_list[i][unset_fid]['min'] = numpy.min( unset_fid_block) @@ -1987,6 +1995,12 @@ def default_aggregate_dict(): aggregate_stats_list[i][unset_fid]['count'] = unset_fid_block.size aggregate_stats_list[i][unset_fid]['nodata_count'] = numpy.sum( unset_fid_nodata_mask) + if aggregate_stats_list[i][unset_fid]['count'] == 0: + aggregate_stats_list[i][unset_fid]['average'] = None + else: + aggregate_stats_list[i][unset_fid]['average'] = ( + aggregate_stats_list[i][unset_fid]['sum'] / + aggregate_stats_list[i][unset_fid]['count']) if include_value_counts: # .update() here is operating on a Counter, so values are ADDED, # not replaced. diff --git a/tests/test_geoprocessing.py b/tests/test_geoprocessing.py index aee982f0..0567ef12 100644 --- a/tests/test_geoprocessing.py +++ b/tests/test_geoprocessing.py @@ -1183,11 +1183,13 @@ def test_zonal_stats_multiple_rasters(self): 'max': 1, 'min': 1, 'nodata_count': 0, + 'average': 1, 'sum': 81}, 1: { 'count': 1, 'max': 1, 'min': 1, + 'average': 1, 'nodata_count': 0, 'sum': 1} }, { @@ -1195,12 +1197,14 @@ def test_zonal_stats_multiple_rasters(self): 'count': 81, 'max': 2, 'min': 2, + 'average': 2, 'nodata_count': 0, 'sum': 162}, 1: { 'count': 1, 'max': 2, 'min': 2, + 'average': 2, 'nodata_count': 0, 'sum': 2} }] @@ -1345,16 +1349,19 @@ def test_zonal_statistics(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1, 'sum': 81.0}, 1: { 'count': 1, 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1, 'sum': 1.0}, 2: { 'min': None, 'max': None, + 'average': None, 'count': 0, 'nodata_count': 0, 'sum': 0.0}} @@ -1435,17 +1442,20 @@ def test_zonal_statistics_3d_polygon(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 81.0}, 1: { 'count': 1, 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 1.0}, 2: { 'min': None, 'max': None, 'count': 0, + 'average': None, 'nodata_count': 0, 'sum': 0.0}} self.assertEqual(result, expected_result) @@ -1489,6 +1499,7 @@ def test_zonal_statistics_multipolygon(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 2 }}) @@ -1563,6 +1574,7 @@ def test_zonal_statistics_3d_multipolygon(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1, 'sum': 2 }}) @@ -1623,6 +1635,7 @@ def test_zonal_statistics_value_counts(self): 'min': 1.0, 'nodata_count': 0, 'sum': 81.0, + 'average': 1, 'value_counts': {1.0: 81}}, 1: { 'count': 1, @@ -1630,6 +1643,7 @@ def test_zonal_statistics_value_counts(self): 'min': 1.0, 'nodata_count': 0, 'sum': 1.0, + 'average': 1, 'value_counts': {1.0: 1}}, 2: { 'min': None, @@ -1637,6 +1651,7 @@ def test_zonal_statistics_value_counts(self): 'count': 0, 'nodata_count': 0, 'sum': 0.0, + 'average': None, 'value_counts': {}} } self.assertEqual(result, expected_result) @@ -1678,6 +1693,7 @@ def test_zonal_statistics_nodata(self): 'max': None, 'min': None, 'nodata_count': 81, + 'average': None, 'sum': 0.0}} self.assertEqual(result, expected_result) @@ -1715,6 +1731,7 @@ def test_zonal_statistics_nodata_is_zero(self): 'max': 1, 'min': 1, 'nodata_count': 2, + 'average': 1.0, 'sum': 2.0}} self.assertEqual(result, expected_result) @@ -1752,6 +1769,7 @@ def test_zonal_statistics_named_layer(self): 'max': 1.0, 'min': 1.0, 'nodata_count': 0, + 'average': 1.0, 'sum': 81.0}} self.assertEqual(result, expected_result)