eii_analysis/eii_analysis.py at main · springinnovate/eii_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
"""
EI Analysis


Introduction
Purpose
Discover, summarize and compare different indices or components of ecosystem integrity (EI) within the SOKNOT region
Assess which are most sensitive to change, and reflect our understanding on the ground of how (and where) the system has been changing
Compare WCMC's Ecosystem Integrity Index (EII) with other indices recently calculated for SOKNOT (e.g., WRI's Global Pasture Watch data, the Protected Area Isolation Index), and derive conclusions about relative usability and utility of each
Make recommendations to donor offices and field teams about how to move forward with representing integrity in SOKNOT: the sensitivity of different metrics to detecting change in the places where we work and whether these would be sufficient to represent the impact of our work
"""

import logging
import math
import os
import sys

from ecoshard import geoprocessing
from ecoshard import taskgraph
from osgeo import gdal
from statsmodels.tsa.stattools import acf
import numba
import numpy as np
import pymannkendall as mk

gdal.SetCacheMax(2**27)


logging.basicConfig(
    level=logging.DEBUG,
    format=(
        "%(asctime)s (%(relativeCreated)d) %(levelname)s %(name)s"
        " [%(funcName)s:%(lineno)d] %(message)s"
    ),
    stream=sys.stdout,
)
LOGGER = logging.getLogger(__name__)
logging.getLogger("ecoshard.taskgraph").setLevel(logging.INFO)
logging.getLogger("rasterio").setLevel(logging.WARNING)
logging.getLogger("fiona").setLevel(logging.WARNING)
logging.getLogger("matplotlib.font_manager").setLevel(logging.WARNING)
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("PIL").setLevel(logging.WARNING)
WORKSPACE_DIR = "workspace_eii_analysis"
os.makedirs(WORKSPACE_DIR, exist_ok=True)

SOKNOT_VECTOR_PATH = (
    "../data_platform/Conservation_Activities/SOKNOT/SOKNOT_ProjectAreas.gpkg"
)

# EII - Ecosystem Integrity Index
EII_RASTER_PATH = "../data_platform/Nature/eii_soknot/eii_soknot_ts_2020.tif"

GPW_SAVGOL_RASTER_PATTERN = r"Z:\data_platform\Nature\global_pasture_watch_rasters\gpw_grassland_rf.savgol.bthr_c_30m_{year}0101_{year}1231_go_epsg.4326_v1.tif"
GPW_GRASS_RASTER_PATTERN = r"Z:\data_platform\Nature\global_pasture_watch_rasters\gpw_gpp.grass_lue.model_m_30m_s_{year}0101_{year}1231_go_epsg.4326_v1.tif"
GRASS_NODATA = 65000  # this just appears to be a weird value that's nodata
YEAR_RANGE = range(2000, 2020)

MASK_VALUES = [1, 2]


def _mask_op(value_array, mask_array, mask_value, nodata):
    return np.where(mask_array == mask_value, value_array, nodata)


def pixel_acf(nlags):
    def _pixel_acf(series):
        # if series has NaNs, handle or skip them first
        if np.any(np.isnan(series)):
            return np.full(nlags + 1, np.nan)
        return acf(
            series, nlags=nlags, fft=False
        )  # returns [acf at lag0, lag1, ..., lag5]

    return _pixel_acf


@numba.njit(inline="always")
def _normal_cdf(x):
    """Φ(x) for N(0, 1) using erf (numba‑compatible)."""
    return 0.5 * (1.0 + math.erf(x / math.sqrt(2.0)))


@numba.njit
def _fast_acf(series, nlags):
    n = series.size
    mu = np.mean(series)
    c0 = np.sum((series - mu) ** 2) / n
    out = np.empty(nlags + 1, np.float32)
    out[0] = 1.0  # lag‑0
    for lag in range(1, nlags + 1):
        cov = np.sum((series[:-lag] - mu) * (series[lag:] - mu)) / n
        out[lag] = cov / c0 if c0 != 0 else np.nan
    return out


@numba.njit
def _mann_kendall_sen(series):
    """Return (Sen-slope, p‑value) for one pixel series (NaNs allowed)."""
    # drop NaNs
    clean = series[~np.isnan(series)]
    n = clean.size
    if n < 2:
        return math.nan, math.nan

    # Mann‑Kendall S and Sen slopes
    m = n * (n - 1) // 2
    slopes = np.empty(m, np.float32)
    s_val = 0
    k = 0
    for i in range(n - 1):
        for j in range(i + 1, n):
            diff = clean[j] - clean[i]
            slopes[k] = diff / (j - i)
            s_val += math.copysign(1.0, diff) if diff != 0 else 0.0
            k += 1
    slopes.sort()
    slope = slopes[m // 2] if m & 1 else 0.5 * (slopes[m // 2 - 1] + slopes[m // 2])

    var_s = n * (n - 1) * (2 * n + 5) / 18.0
    if var_s == 0:
        return slope, math.nan

    z = (s_val - math.copysign(1.0, s_val)) / math.sqrt(var_s) if s_val != 0 else 0.0
    p = 2.0 * (1.0 - _normal_cdf(abs(z)))  # two‑sided

    return slope, p


@numba.njit(parallel=True)
def compute_trends_and_acf(stack, nlags):
    years, rows, cols = stack.shape
    slope_r = np.full((rows, cols), np.nan, np.float32)
    p_r = np.full((rows, cols), np.nan, np.float32)
    acf_r = np.full((nlags + 1, rows, cols), np.nan, np.float32)

    for i in numba.prange(rows):
        for j in range(cols):
            s = stack[:, i, j]
            if np.sum(~np.isnan(s)) >= 2:
                slope, p = _mann_kendall_sen(s)
                slope_r[i, j] = slope
                p_r[i, j] = p
                acf_r[:, i, j] = _fast_acf(s, nlags)
    return slope_r, p_r, acf_r


def main():
    LOGGER.info("starting")
    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, 20, 10)
    align_task_lookup = {}
    for mask_value in MASK_VALUES:
        mask_task_list = []
        for year in YEAR_RANGE:
            LOGGER.info(f"analyze {year}")
            gpw_grass_raster_path = GPW_GRASS_RASTER_PATTERN.format(year=year)
            gpw_raster_info = geoprocessing.get_raster_info(gpw_grass_raster_path)
            gpw_savgol_raster_path = GPW_SAVGOL_RASTER_PATTERN.format(year=year)

            for path in [
                gpw_grass_raster_path,
                gpw_savgol_raster_path,
            ]:
                print(path)
                if not os.path.exists(path):
                    LOGGER.error(f"{path} not found")
                    return

            base_raster_path_list = [
                gpw_grass_raster_path,
                gpw_savgol_raster_path,
            ]
            aligned_raster_path_list = [
                os.path.join(WORKSPACE_DIR, f"aligned_{os.path.basename(path)}")
                for path in base_raster_path_list
            ]

            if year not in align_task_lookup:
                align_task = task_graph.add_task(
                    func=geoprocessing.align_and_resize_raster_stack,
                    args=(
                        base_raster_path_list,
                        aligned_raster_path_list,
                        ["nearest", "nearest"],
                        gpw_raster_info["pixel_size"],
                        "intersection",
                    ),
                    kwargs={
                        "base_vector_path_list": [SOKNOT_VECTOR_PATH],
                        "target_projection_wkt": gpw_raster_info["projection_wkt"],
                        "vector_mask_options": {
                            "mask_vector_path": SOKNOT_VECTOR_PATH,
                        },
                    },
                    target_path_list=aligned_raster_path_list,
                    task_name=f"align for {year}",
                )
                align_task_lookup[year] = align_task
            else:
                align_task = align_task_lookup[year]

            masked_grass_raster_path = (
                f"%s_masked_by_{mask_value}%s"
                % os.path.splitext(aligned_raster_path_list[0])
            )
            gpw_nodata = gpw_raster_info["nodata"][0]
            mask_task = task_graph.add_task(
                func=geoprocessing.raster_calculator,
                args=(
                    [
                        (aligned_raster_path_list[0], 1),
                        (aligned_raster_path_list[1], 1),
                        (mask_value, "raw"),
                        (gpw_nodata, "raw"),
                    ],
                    _mask_op,
                    masked_grass_raster_path,
                    gdal.GDT_Float32,
                    gpw_nodata,
                ),
                dependent_task_list=[align_task],
                target_path_list=[masked_grass_raster_path],
                task_name=f"mask {year} by {mask_value}",
            )
            mask_task_list.append((year, mask_task, masked_grass_raster_path))

        array_list = []
        final_mask = None
        for year, mask_task, masked_grass_raster_path in mask_task_list:
            mask_task.join()
            LOGGER.info(f"reading arrays for {year}")
            local_grass_array = gdal.OpenEx(masked_grass_raster_path).ReadAsArray()
            LOGGER.info(f"masking arrays for {year}")
            array_list.append(local_grass_array)
            mask_array = (local_grass_array > 0) & (local_grass_array != GRASS_NODATA)
            if final_mask is None:
                final_mask = mask_array.copy()
            else:
                final_mask &= mask_array  # Intersection: valid for all years

        # Stack into one 3D NumPy array
        LOGGER.info("stack into one array")
        stack = np.stack(array_list, axis=0)  # shape: (years, rows, cols)

        # Apply final mask (valid in all years), broadcast over the year dimension
        LOGGER.info("apply mask")
        stack[:, ~final_mask] = np.nan  # or another nodata value you prefer

        nlags = 1
        LOGGER.info("apply pixel_acf")
        sen_slope, p_values, acf_stack = compute_trends_and_acf(stack, nlags)

        # stack: numpy array with shape (20, rows, cols)
        def pixel_trend(series):
            res = mk.original_test(series)
            return np.array([res.slope, res.p])

        target_slope_path = os.path.join(
            WORKSPACE_DIR, f"{mask_value}mask_slope_change_dir.tif"
        )
        target_p_path = os.path.join(WORKSPACE_DIR, f"{mask_value}mask_p_vals.tif")
        for target_path in [target_p_path, target_slope_path]:
            geoprocessing.new_raster_from_base(
                masked_grass_raster_path,
                target_path,
                gdal.GDT_Float32,
                [-9999],
            )

        r = gdal.OpenEx(target_slope_path, gdal.OF_RASTER | gdal.GA_Update)
        b = r.GetRasterBand(1)
        b.WriteArray(sen_slope)

        r = gdal.OpenEx(target_p_path, gdal.OF_RASTER | gdal.GA_Update)
        b = r.GetRasterBand(1)
        b.WriteArray(p_values)

    task_graph.join()
    task_graph.close()


if __name__ == "__main__":
    main()