DeepExtremes
diff --git a/‎Manifest.toml‎
Lines changed: 1271 additions & 936 deletions b/‎Manifest.toml‎
Lines changed: 1271 additions & 936 deletions
diff --git a/‎Project.toml‎
Lines changed: 7 additions & 28 deletions b/‎Project.toml‎
Lines changed: 7 additions & 28 deletions
diff --git a/‎README.md‎
Lines changed: 169 additions & 2 deletions b/‎README.md‎
Lines changed: 169 additions & 2 deletions
diff --git a/‎SlurmScripts/add2cube.py‎
Lines changed: 12 additions & 0 deletions b/‎SlurmScripts/add2cube.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎SlurmScripts/compute_pei_add.slurm‎
Lines changed: 19 additions & 0 deletions b/‎SlurmScripts/compute_pei_add.slurm‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎SlurmScripts/compute_pet.slurm‎
Lines changed: 13 additions & 0 deletions b/‎SlurmScripts/compute_pet.slurm‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎SlurmScripts/consolidate_metadata.sh‎
Lines changed: 19 additions & 0 deletions b/‎SlurmScripts/consolidate_metadata.sh‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎SlurmScripts/largest_labels.slurm‎
Lines changed: 14 additions & 0 deletions b/‎SlurmScripts/largest_labels.slurm‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎SlurmScripts/qdoy.slurm‎
Lines changed: 13 additions & 0 deletions b/‎SlurmScripts/qdoy.slurm‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎SlurmScripts/rechunk_tp.slurm‎
Lines changed: 14 additions & 0 deletions b/‎SlurmScripts/rechunk_tp.slurm‎
Lines changed: 14 additions & 0 deletions
@@ -1,52 +1,31 @@
-name = "ExtremeEvents"
-uuid = "ace4fa40-4b0d-4fab-a937-802c9281da09"
-authors = ["Mel Wey <45990429+melwey@users.noreply.github.com> and contributors"]
-version = "0.2.0"
-
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-DataConvenience = "3b531cbf-ee43-4e67-8118-dca2c9372f86"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 DiskArrayTools = "fcd2136c-9f69-4db6-97e5-f31981721d63"
 DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-EarthDataLab = "359177bc-a543-11e8-11b7-bb015dba3358"
-FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
+GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6"
 ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5"
 ImageMorphology = "787d08f9-d448-5407-9aad-5290dd7ab264"
-IndexedTables = "6deec6e2-d858-57c5-ab9b-e6ca5bd20e43"
 JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
-Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
+MannKendall = "1761a1e1-e032-445b-870e-f3b23132a90a"
 NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
 OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e"
-OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338"
-ParallelUtilities = "fad6cfc8-4f83-11e9-06cc-151124046ad0"
-Pipe = "b98c9c47-44ae-5843-9183-064241ee97a0"
-PlotThemes = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a"
-PlotUtils = "995b91a9-d308-5afd-9ec6-746e21dbc043"
-PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae"
+QuantileRegressions = "c6596682-b856-542f-9fff-31404643e889"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 RollingFunctions = "b0e4dd01-7b14-53d8-9b45-175a3e362653"
 SlurmClusterManager = "c82cd089-7bf7-41d7-976b-6b5d413cbe0a"
 SphericalConvolutions = "7186af5b-8736-480f-81ff-8eb527665490"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
+StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 WeightedOnlineStats = "bbac0a1f-7c9d-5672-960b-c6ca726e5d5d"
 YAXArrayBase = "90b8fcef-0c2d-428d-9c56-5f86629e9d14"
 YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
 Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
-
-[compat]
-julia = "1.8"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
@@ -1,2 +1,169 @@
-# ExtremeEvents
-Extreme events detection for DeepExtremes project.
+# Workflow for building and analysing Dheed v3
+Dheed v3 is an ERA5 based global database dry and hot extreme events from 1950 to 2022, developed in the context of ESA funded project [DeepExtremes](https://eo4society.esa.int/projects/deep-extremes/).
+The workflow runs in Julia 1.10.0, except for the consolidation of the data cubes, which is run in python. 
+Most steps of the workflow were run on the [MPI BGC-jena](https://bgc-jena.mpg.de) cluster. The input data are hourly ERA5 data retrieved from the [Copernicus Climate Data Store](https://cds.climate.copernicus.eu/) and stored on a local server as netcdf files. Some derived variables were calculated prior to the processing presented here.
+
+## Preprocessing
+
+1. Rechunk ERA5 data, agregating from hourly to daily: t2m (min, mean, max), tp (sum), ssrd (sum).
+
+```
+cd SlurmScripts
+sbatch rechunk_data.slurm
+```
+
+*Output*: ERA5Cube.zarr
+
+2. Compute PET on hourly data (t2m, tp , u10, v10, sp, snr (calc), vpd_cf (calc)) and aggregate to daily.
+
+```
+cd SlurmScripts
+sbatch compute_pet.slurm
+```
+
+*Output*: PET yearly cubes
+
+3. Rechunk PET and add to ER5cube
+
+The daily PET is rechunked to match the chunk size of the ERA5cube and added to it.
+
+```
+cd SlurmScripts
+sbatch rechunk_pet.slurm
+```
+
+*Output*: updated ERA5Cube.zarr
+
+4. Consolidate metadata of ERA5Cube
+
+Metadata from the different variables in the ERA5cube are consolidated, so as to reduce the number of read operations on the backend store.
+
+```
+python -c import zarr; g = zarr.open_group(path2cube); zarr.consolidate_metadata(g.store)
+```
+
+*Output*: consolidated ERA5Cube.zarr
+
+5. Compute PEI
+
+The precipitation evaporation index (PEI) is a moving average of the water balance between daily potential evapotranspiration and precipitation. The moving window is 30, 90 or 180 days.
+
+```
+cd SlurmScripts
+sbatch compute_pei.slurm
+```
+
+*Output*: PEICube.zarr
+
+
+## Processing
+### Temporal analysis
+
+The time series of the four indicators: t2mmax, PEI_30, PEI_90 and PEI_180 are ranked transformed between 0 and 1. No convolutional spatial filter is run on the results to smoothe the extent of the extreme events.
+
+```
+cd SlurmScripts
+sbatch smooth_events.slurm
+```
+
+*Output*: tmax_ranked.zarr and pei_ranks.zarr
+
+### Compute extremes
+A pass over threshold is applied to the rescaled indicators and they are combined into a Byte integer (Int8), with one bit for each indicator and an extra bit encoding for non extremes. The first bit (little end) encodes the maximum temperature extremes.
+
+```
+cd SlurmScripts
+sbatch compute_events.slurm
+```
+
+*Output*: EventCube.zarr
+
+### Label extreme events
+Unique labels are assigned to blobs of co-occurrent hot and dry extremes, i.e. where values are uneven (t2mmax extremes) and larger than one (PEI extremes), connected in space and time. A filter is applied before runnning the connected component analysis: temperature extremes must last at least three consecutive days.
+
+*Note*: this will reduce the total number of tmax extremes in the cube...
+
+Because the connected component analysis requires to load the full cube into memory and the algorithm is greedy, the analysis was split into seven tasks, covering each 13 years, with three years overlap between two successive periods.
+
+```
+cd SlurmScripts
+sbatch label_events.slurm
+```
+
+*Output*: 
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_1950_1962.zarr
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_1960_1972.zarr
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_1970_1982.zarr
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_1980_1992.zarr
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_1990_2002.zarr
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_2000_2012.zarr
+- labelcube_ranked_pot0.01_ne0.1_cmp_S1_T3_2010_2022.zarr
+
+The labels are then merged into a single mergedlabels cube with `scripts/merge_labels.jl`.
+
+*Output*: 
+- mergedlabels.zarr
+
+### Compute statistics
+Statistics for all labelled events are computed and gathered in a unique table.
+
+```
+cd SlurmScripts
+sbatch stats_extremes_merged.slurm
+```
+
+*Output*: 
+- MergedEventStats_landonly.csv
+
+## Figures and Postprocessing
+
+### fig01_workflow.png
+
+Flowchart designed in ppt. 
+
+### fig02_workflow-plot-33.png
+
+Example of dry and hot extreme event detection workflow over the 2003 summer heatwave in Europe.
+
+See `scripts/fig4dheed.jl`
+
+### fig03: timeseries
+
+Timeseries of indicators for 2 contrasted locations: Jena (timeseries_11.59_50.92_2012_2023.png) and Niamey (timeseries_2.1254_13.5116_2012_2023.png).
+
+See `scripts/plot_t2mmax.jl`
+
+### fig04 - fig06: Trends -- Annual global/continental summary
+
+Trends in annual global/continental indicators.
+
+Compute annual statistics of indicators, globally and by continent.
+
+See `scripts/plot_ind_annual.jl`
+
+Compute annual statistics of EventCube at global and continental scale, by summing all extremes by type.
+
+See `scripts/hist_EventType.jl`
+
+Plot the results of the EventCube analysis.
+
+```
+julia --project="ExtremeEvents.toml" plot_EventType.jl
+```
+
+### fig07: events_stats_ranked_pot0.01_ne0.1_cmp_S1_T3_2010_2022_landonly_1970.png
+
+Extract largest and longest events from `MergedEventStats_landonly` with `largest_labels.jl` and plot statistics with `plot_stats.jl`.
+
+### fig08: largest_ranked_pot0.01_ne0.1_cmp_S1_T3_2010_2022_landonly_1970.png
+
+A map of the spatial footprint of the largest events is generated with `plot_stats.jl`.
+
+### fig09: Validation
+
+Compare MergedEventStats_landonly with table of reported events compiled *a priori* with `SanityCheck.jl`. 
+
+### fig10: Lytton.png
+
+Extract timeseries at single locations with `plot_city.jl`
+
@@ -0,0 +1,12 @@
+import xarray as xr
+
+def add2cube(oldcube, newcube):
+    ds = xr.open_zarr(newcube)
+    ds.to_zarr(
+        oldcube,
+        mode = "a",
+        append_dim = "time",
+        consolidated = True,
+    )
+
+    
@@ -0,0 +1,19 @@
+#!/bin/bash
+#SBATCH --job-name pei_jl
+#SBATCH -o ./pei-%A.out
+#SBATCH -p work
+#SBATCH --ntasks=8
+#SBATCH --cpus-per-task=1
+#SBATCH --mem-per-cpu=10GB
+#SBATCH --time=2:00:00
+#SBATCH --mail-type FAIL,END
+
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK}
+
+julia --project=../ ../scripts/compute_pei_add.jl 2023 2023
+
+OLDCUBE="/Net/Groups/BGI/scratch/mweynants/DeepExtremes/v3/PEICube.zarr"
+NEWCUBE="/Net/Groups/BGI/scratch/mweynants/DeepExtremes/v3/PEICube_2023.zarr"
+srun /Net/Groups/BGI/scratch/mweynants/miniconda3/bin/python -c "import add2cube as ac;ac.add2cube(\"${OLDCUBE}\", \"${NEWCUBE}\")"
+
+echo "Success?"
@@ -0,0 +1,13 @@
+#!/bin/bash
+#SBATCH --job-name pet_jl10
+#SBATCH -o ./pet-%A.out
+#SBATCH -p work
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem-per-cpu=10GB
+#SBATCH --time=2-00
+
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK}
+
+julia --project=../  ../scripts/compute_pet.jl 2023
@@ -0,0 +1,19 @@
+#!/bin/bash 
+PATH2CUBE="/Net/Groups/BGI/scratch/mweynants/DeepExtremes/v3/ERA5Cube_2023.zarr"
+python -c "import zarr; g = zarr.open_group(\"${PATH2CUBE}\"); zarr.consolidate_metadata(g.store)"
+
+# # manually edit .zmetadata: modify per dimensions :Time => :time
+# # find
+#  ".zattrs": {
+#             "_ARRAY_DIMENSIONS": [
+#                 "Time",
+# # replace by
+# ".zattrs": {
+#             "_ARRAY_DIMENSIONS": [
+#                 "time",
+# # in ./time/.zattrs, replace Time  => time
+# # in ./pet/.zattrs
+# rm -rf /Net/Groups/BGI/scratch/mweynants/DeepExtremes/v3/ERA5Cube.zarr/Time
+# rm -rf /Net/Groups/BGI/scratch/mweynants/DeepExtremes/v3/ERA5Cube.zarr/layer
+# # consolidate metadata
+# python -c "import zarr; g = zarr.open_group(\"${PATH2CUBE}\"); zarr.consolidate_metadata(g.store)"
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH --job-name jl_llbls
+#SBATCH -o ./llbls-%A.out
+#SBATCH --mail-type FAIL,END
+#SBATCH -p work
+#SBATCH --ntasks=8
+#SBATCH --nodes=1
+#SBATCH --cpus-per-task=8
+#SBATCH --mem-per-cpu=7GB
+#SBATCH --time=5-00
+
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK}
+
+julia --project=../ ../scripts/largest_labels.jl
@@ -0,0 +1,13 @@
+#!/bin/bash
+#SBATCH --job-name julia_qdoy
+#SBATCH -o ./detect-%A.out
+#SBATCH -p work
+#SBATCH --ntasks=10
+#SBATCH --nodes=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem-per-cpu=20GB
+#SBATCH --time=36:00:00
+
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK}
+
+julia --project=../ ../scripts/qdoy.jl
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH --job-name rchk_tp
+#SBATCH -o ./rechunk-htp-%A.out
+#SBATCH --partition work
+#SBATCH --mail-type FAIL,END
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem-per-cpu=40GB
+#SBATCH --time=5-00
+#SBATCH --nodes=1
+
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK}
+
+julia --project=../ ../scripts/Rechunk_htp.jl