From 558b03ee29f7ea483613de597268075e4bfa2a02 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Wed, 12 Nov 2025 21:54:49 +1100 Subject: [PATCH 1/9] WIP lucie integration into package --- .../lucie/src/lucie/__init__.py | 2 + .../{ => src/lucie}/dataset_generator.py | 0 .../lucie/inference.py} | 4 +- .../{ => src/lucie}/torch_harmonics_local.py | 6 +- .../{LUCIE_train.py => src/lucie/train.py} | 178 ++++++++++-------- 5 files changed, 109 insertions(+), 81 deletions(-) create mode 100644 packages/bundled_models/lucie/src/lucie/__init__.py rename packages/bundled_models/lucie/{ => src/lucie}/dataset_generator.py (100%) rename packages/bundled_models/lucie/{LUCIE_inference.py => src/lucie/inference.py} (99%) rename packages/bundled_models/lucie/{ => src/lucie}/torch_harmonics_local.py (99%) rename packages/bundled_models/lucie/{LUCIE_train.py => src/lucie/train.py} (55%) diff --git a/packages/bundled_models/lucie/src/lucie/__init__.py b/packages/bundled_models/lucie/src/lucie/__init__.py new file mode 100644 index 00000000..95a13f6c --- /dev/null +++ b/packages/bundled_models/lucie/src/lucie/__init__.py @@ -0,0 +1,2 @@ +from lucie import train +from lucie import torch_harmonics_local diff --git a/packages/bundled_models/lucie/dataset_generator.py b/packages/bundled_models/lucie/src/lucie/dataset_generator.py similarity index 100% rename from packages/bundled_models/lucie/dataset_generator.py rename to packages/bundled_models/lucie/src/lucie/dataset_generator.py diff --git a/packages/bundled_models/lucie/LUCIE_inference.py b/packages/bundled_models/lucie/src/lucie/inference.py similarity index 99% rename from packages/bundled_models/lucie/LUCIE_inference.py rename to packages/bundled_models/lucie/src/lucie/inference.py index a36c102f..1c345ebe 100644 --- a/packages/bundled_models/lucie/LUCIE_inference.py +++ b/packages/bundled_models/lucie/src/lucie/inference.py @@ -34,7 +34,7 @@ import torch -from torch_harmonics_local import * +from lucie.torch_harmonics_local import * device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -42,7 +42,7 @@ torch.cuda.set_device(0) -def inference( +def infer( model, steps, initial_frame, forcing, initial_forcing_idx, prog_means, prog_stds, diag_means, diag_stds, diff_stds ): inf_data = [] diff --git a/packages/bundled_models/lucie/torch_harmonics_local.py b/packages/bundled_models/lucie/src/lucie/torch_harmonics_local.py similarity index 99% rename from packages/bundled_models/lucie/torch_harmonics_local.py rename to packages/bundled_models/lucie/src/lucie/torch_harmonics_local.py index 18024be2..e7b4e08e 100644 --- a/packages/bundled_models/lucie/torch_harmonics_local.py +++ b/packages/bundled_models/lucie/src/lucie/torch_harmonics_local.py @@ -11,7 +11,7 @@ # from torch_harmonics import * import torch.nn.functional as F import torch.fft -from torch.cuda import amp +from torch import amp # was from torch.cuda import amp import math import logging @@ -1158,7 +1158,7 @@ def forward(self, x): # pragma: no cover x = x.float() B, C, H, W = x.shape - with amp.autocast(enabled=False): + with amp.autocast(str(device), enabled=False): x = self.forward_transform(x) if self.scale_residual: x = x.contiguous() @@ -1179,7 +1179,7 @@ def forward(self, x): # pragma: no cover # x = self._contract(x, self.weight, separable=self.separable, operator_type=self.operator_type) # x = x.contiguous() - with amp.autocast(enabled=False): + with amp.autocast(str(device), enabled=False): x = self.inverse_transform(x) if hasattr(self, "bias"): diff --git a/packages/bundled_models/lucie/LUCIE_train.py b/packages/bundled_models/lucie/src/lucie/train.py similarity index 55% rename from packages/bundled_models/lucie/LUCIE_train.py rename to packages/bundled_models/lucie/src/lucie/train.py index dc185d69..e7d769ef 100644 --- a/packages/bundled_models/lucie/LUCIE_train.py +++ b/packages/bundled_models/lucie/src/lucie/train.py @@ -35,18 +35,18 @@ from torch.utils.data import TensorDataset, DataLoader -from torch_harmonics_local import * +from lucie.torch_harmonics_local import * from torch.optim.lr_scheduler import CosineAnnealingLR -from LUCIE_inference import inference +from lucie import inference -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -if torch.cuda.is_available(): - torch.cuda.set_device(0) - -def integrate_grid(ugrid, dimensionless=False, polar_opt=0): +def integrate_grid( + ugrid, + nlon, + quad_weights, + dimensionless=False, polar_opt=0): dlon = 2 * torch.pi / nlon radius = 1 if dimensionless else radius @@ -59,10 +59,10 @@ def integrate_grid(ugrid, dimensionless=False, polar_opt=0): return out -def l2loss_sphere(prd, tar, relative=False, squared=True): - loss = integrate_grid((prd - tar) ** 2, dimensionless=True).sum(dim=-1) +def l2loss_sphere(prd, tar, nlon, quad_weights, relative=False, squared=True): + loss = integrate_grid((prd - tar) ** 2, nlon, quad_weights, dimensionless=True).sum(dim=-1) if relative: - loss = loss / integrate_grid(tar**2, dimensionless=True).sum(dim=-1) + loss = loss / integrate_grid(tar**2, nlon, quad_weights, dimensionless=True).sum(dim=-1) if not squared: loss = torch.sqrt(loss) @@ -72,21 +72,31 @@ def l2loss_sphere(prd, tar, relative=False, squared=True): def train_model( + device, model, train_loader, val_loader, optimizer, + nlon=96, scheduler=None, nepochs=20, + quad_weights=None, nfuture=0, num_examples=256, num_valid=8, reg_rate=0, -): + ): + ''' + Train your own weights for the LUCIE model + ''' infer_bias = 1e80 recall_count = 0 + + print("Starting Training") for epoch in tqdm(range(nepochs)): + + if epoch < 149: if scheduler is not None: scheduler.step() @@ -97,6 +107,7 @@ def train_model( optimizer.zero_grad() model.train() + batch_num = 0 for inp, tar in train_loader: batch_num += 1 @@ -106,7 +117,7 @@ def train_model( tar = tar.to(device) prd = model(inp) - loss_delta = l2loss_sphere(prd[:, :5, :, :], tar[:, :5, :, :], relative=True) + loss_delta = l2loss_sphere(prd[:, :5, :, :], tar[:, :5, :, :], nlon, quad_weights, relative=True) loss_tp = torch.mean((prd[:, 5:, :, :] - tar[:, 5:, :, :]) ** 2) loss = loss_delta + loss_tp / tar.shape[1] @@ -127,7 +138,7 @@ def train_model( if epoch % 10 == 0: rollout_steps = 2920 rollout = torch.tensor( - inference( + inference.infer( model, rollout_steps, data_inp[0:1].to(device), @@ -156,66 +167,81 @@ def train_model( break -data = load_data("era5_T30_regridded.npz")[..., :6] -true_clim = torch.tensor(np.mean(data, axis=0)).to(device).permute(2, 0, 1) - -data = np.load("era5_T30_preprocessed.npz") # standardized data with mean and stds generated from dataset_generator.py -data_inp = torch.tensor(data["data_inp"], dtype=torch.float32) # input data -data_tar = torch.tensor(data["data_tar"], dtype=torch.float32) -raw_means = torch.tensor(data["raw_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) -raw_stds = torch.tensor(data["raw_stds"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) -prog_means = raw_means[:, :5] -prog_stds = raw_stds[:, :5] -diag_means = torch.tensor(data["diag_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) -diag_stds = torch.tensor(data["diag_stds"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) -diff_means = torch.tensor(data["diff_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) -diff_stds = torch.tensor(data["diff_stds"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) - -ntrain = 16000 -nval = 100 - -train_set = TensorDataset(data_inp[:ntrain], data_tar[:ntrain]) -val_set = TensorDataset(data_inp[ntrain : ntrain + nval], data_tar[ntrain : ntrain + nval]) - -train_loader = DataLoader(train_set, batch_size=16, shuffle=True) -val_loader = DataLoader(val_set, batch_size=4, shuffle=False) - - -grid = "legendre-gauss" -nlat = 48 -nlon = 96 -hard_thresholding_fraction = 0.9 -lmax = ceil(nlat / 1) -mmax = lmax -modes_lat = int(nlat * hard_thresholding_fraction) -modes_lon = int(nlon // 2 * hard_thresholding_fraction) -modes_lat = modes_lon = min(modes_lat, modes_lon) -sht = RealSHT(nlat, nlon, lmax=modes_lat, mmax=modes_lon, grid=grid, csphase=False) -radius = 6.37122e6 -cost, quad_weights = legendre_gauss_weights(nlat, -1, 1) -quad_weights = (torch.as_tensor(quad_weights).reshape(-1, 1)).to(device) - -model = SphericalFourierNeuralOperatorNet( - params={}, - spectral_transform="sht", - filter_type="linear", - operator_type="dhconv", - img_shape=(48, 96), - num_layers=8, - in_chans=7, - out_chans=6, - scale_factor=1, - embed_dim=72, - activation_function="silu", - big_skip=True, - pos_embed="latlon", - use_mlp=True, - normalization_layer="instance_norm", - hard_thresholding_fraction=hard_thresholding_fraction, - mlp_ratio=2.0, -).to(device) - -optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0) -scheduler = CosineAnnealingLR(optimizer, T_max=150, eta_min=1e-5) -train_model(model, train_loader, val_loader, optimizer, scheduler=scheduler, nepochs=500) -torch.save(model.state_dict(), "model.pth") + +def load_data_and_train( + device, + regridded_data, + preprocessed_data, + *, + ntrain: int | None = 16000, + nval: int | None = 100): + ''' + + args: + unprocessed_data + reprocessed_data: dictionary or numpy collection containing 'diagn_means', 'diag_stds', 'diff_means' and 'diff_stds' + + ''' + + regridded_data = regridded_data[..., :6] + true_clim = torch.tensor(np.mean(regridded_data, axis=0)).to(device).permute(2, 0, 1) + + data = preprocessed_data + data_inp = torch.tensor(data["data_inp"], dtype=torch.float32) # input data + data_tar = torch.tensor(data["data_tar"], dtype=torch.float32) + raw_means = torch.tensor(data["raw_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) + raw_stds = torch.tensor(data["raw_stds"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) + prog_means = raw_means[:, :5] + prog_stds = raw_stds[:, :5] + diag_means = torch.tensor(data["diag_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) + diag_stds = torch.tensor(data["diag_stds"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) + diff_means = torch.tensor(data["diff_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) + diff_stds = torch.tensor(data["diff_stds"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) + + train_set = TensorDataset(data_inp[:ntrain], data_tar[:ntrain]) + val_set = TensorDataset(data_inp[ntrain : ntrain + nval], data_tar[ntrain : ntrain + nval]) + + train_loader = DataLoader(train_set, batch_size=16, shuffle=True) + val_loader = DataLoader(val_set, batch_size=4, shuffle=False) + + + grid = "legendre-gauss" + nlat = 48 + nlon = 96 + hard_thresholding_fraction = 0.9 + lmax = ceil(nlat / 1) + mmax = lmax + modes_lat = int(nlat * hard_thresholding_fraction) + modes_lon = int(nlon // 2 * hard_thresholding_fraction) + modes_lat = modes_lon = min(modes_lat, modes_lon) + sht = RealSHT(nlat, nlon, lmax=modes_lat, mmax=modes_lon, grid=grid, csphase=False) + radius = 6.37122e6 + cost, quad_weights = legendre_gauss_weights(nlat, -1, 1) + quad_weights = (torch.as_tensor(quad_weights).reshape(-1, 1)).to(torch.float32).to(device) # mps only supports float32, todo only do this if mps + print('a') + + model = SphericalFourierNeuralOperatorNet( + params={}, + spectral_transform="sht", + filter_type="linear", + operator_type="dhconv", + img_shape=(48, 96), + num_layers=8, + in_chans=7, + out_chans=6, + scale_factor=1, + embed_dim=72, + activation_function="silu", + big_skip=True, + pos_embed="latlon", + use_mlp=True, + normalization_layer="instance_norm", + hard_thresholding_fraction=hard_thresholding_fraction, + mlp_ratio=2.0, + ).to(device) + + print('b') + optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0) + scheduler = CosineAnnealingLR(optimizer, T_max=150, eta_min=1e-5) + train_model(device, model, train_loader, val_loader, optimizer, nlon=nlon, quad_weights=quad_weights, scheduler=scheduler, nepochs=500) + torch.save(model.state_dict(), "model.pth") From 84507941e4dcbb1ac7beda9e01846460cec72697 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Wed, 12 Nov 2025 22:41:42 +1100 Subject: [PATCH 2/9] Lucie model is installable Basic notebook is functional Still need to migrate data into an accessor and pipeline pattern --- notebooks/tutorial/LUCIE/LUCIE-Training.ipynb | 234 ++++++++++++++++++ packages/bundled_models/lucie/pyproject.toml | 58 +++++ .../bundled_models/lucie/src/lucie/train.py | 39 ++- 3 files changed, 323 insertions(+), 8 deletions(-) create mode 100644 notebooks/tutorial/LUCIE/LUCIE-Training.ipynb create mode 100644 packages/bundled_models/lucie/pyproject.toml diff --git a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb new file mode 100644 index 00000000..a48302fc --- /dev/null +++ b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "508c446b-21c6-447e-a119-a6a16d78b6e0", + "metadata": {}, + "outputs": [], + "source": [ + "import lucie\n", + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f69a338a-ff4e-465f-a664-cd76630baa52", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b81e2c08-bf62-49fc-9090-0595cbfd24ab", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"mps\" if torch.backends.mps.is_available() else \"cpu\")\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else device)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4180dd8c-ff64-466b-b3bc-9771b2053a57", + "metadata": {}, + "outputs": [], + "source": [ + "regridded_path = Path.home() / 'dev/data/lucie' / 'era5_T30_regridded.npz'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7f5ca64a-87c8-4cae-a2e3-3a4788066a73", + "metadata": {}, + "outputs": [], + "source": [ + "regridded_data = lucie.train.load_data(regridded_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b53d4754-4303-4325-801a-afa626aac582", + "metadata": {}, + "outputs": [], + "source": [ + "preprocessed_path = Path.home() / 'dev/data/lucie' / 'era5_T30_preprocessed.npz'\n", + "preprocessed_data = np.load(preprocessed_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "22148013-b8d6-40c7-8c11-9f8545295b85", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting Training\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/5 [00:00=3.11, <3.14" +keywords = ["lucie"] +maintainers = [ + {name = "Tennessee Leeuwenburg", email = "tennessee.leeuwenburg@bom.gov.au"} +] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + 'pyearthtools.training[lightning]>=0.5.0', + 'pyearthtools.zoo>=0.5.0', + 'pyearthtools.data>=0.5.0', + 'pyearthtools.pipeline>=0.5.0', + 'torch_optimizer', + 'timm', +] + + +[project.urls] +homepage = "https://pyearthtools.readthedocs.io/" +documentation = "https://pyearthtools.readthedocs.io/" +repository = "https://github.com/ACCESS-Community-Hub/PyEarthTools" + +[project.entry-points."pyearthtools.zoo.model"] +Global_FCNXT = "lucie.registered_model:LucieRM" + +[tool.isort] +profile = "black" + +[tool.black] +line-length = 120 + +[tool.mypy] +warn_return_any = true +warn_unused_configs = true + +[[tool.mypy.overrides]] +ignore_missing_imports = true + +[tool.hatch.version] +path = "src/pyearthtools/pipeline/__init__.py" + +[tool.hatch.build.targets.wheel] +packages = ["src/pyearthtools/"] diff --git a/packages/bundled_models/lucie/src/lucie/train.py b/packages/bundled_models/lucie/src/lucie/train.py index e7d769ef..58374883 100644 --- a/packages/bundled_models/lucie/src/lucie/train.py +++ b/packages/bundled_models/lucie/src/lucie/train.py @@ -77,10 +77,17 @@ def train_model( train_loader, val_loader, optimizer, + data_inp=None, + prog_means=None, + prog_stds=None, + diag_means=None, + diag_stds=None, + diff_stds=None, nlon=96, scheduler=None, nepochs=20, quad_weights=None, + true_clim=None, nfuture=0, num_examples=256, num_valid=8, @@ -93,6 +100,8 @@ def train_model( infer_bias = 1e80 recall_count = 0 + debug_sample_limit = 5 + print("Starting Training") for epoch in tqdm(range(nepochs)): @@ -109,10 +118,17 @@ def train_model( model.train() batch_num = 0 + + zz = 0 + for inp, tar in train_loader: batch_num += 1 loss = 0 + zz += 1 + if zz > debug_sample_limit: + break + inp = inp.to(device) tar = tar.to(device) prd = model(inp) @@ -135,8 +151,9 @@ def train_model( loss.backward() optimizer.step() - if epoch % 10 == 0: - rollout_steps = 2920 + if epoch % 1 == 0: + # rollout_steps = 2920 + rollout_steps = 50 rollout = torch.tensor( inference.infer( model, @@ -186,7 +203,7 @@ def load_data_and_train( regridded_data = regridded_data[..., :6] true_clim = torch.tensor(np.mean(regridded_data, axis=0)).to(device).permute(2, 0, 1) - data = preprocessed_data + data = preprocessed_data # dictionary-like numpy array data_inp = torch.tensor(data["data_inp"], dtype=torch.float32) # input data data_tar = torch.tensor(data["data_tar"], dtype=torch.float32) raw_means = torch.tensor(data["raw_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) @@ -214,11 +231,10 @@ def load_data_and_train( modes_lat = int(nlat * hard_thresholding_fraction) modes_lon = int(nlon // 2 * hard_thresholding_fraction) modes_lat = modes_lon = min(modes_lat, modes_lon) - sht = RealSHT(nlat, nlon, lmax=modes_lat, mmax=modes_lon, grid=grid, csphase=False) + # sht = RealSHT(nlat, nlon, lmax=modes_lat, mmax=modes_lon, grid=grid, csphase=False) radius = 6.37122e6 - cost, quad_weights = legendre_gauss_weights(nlat, -1, 1) + _cost, quad_weights = legendre_gauss_weights(nlat, -1, 1) quad_weights = (torch.as_tensor(quad_weights).reshape(-1, 1)).to(torch.float32).to(device) # mps only supports float32, todo only do this if mps - print('a') model = SphericalFourierNeuralOperatorNet( params={}, @@ -240,8 +256,15 @@ def load_data_and_train( mlp_ratio=2.0, ).to(device) - print('b') optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0) scheduler = CosineAnnealingLR(optimizer, T_max=150, eta_min=1e-5) - train_model(device, model, train_loader, val_loader, optimizer, nlon=nlon, quad_weights=quad_weights, scheduler=scheduler, nepochs=500) + train_model(device, model, train_loader, val_loader, optimizer, + prog_means=prog_means, + prog_stds=prog_stds, + diag_means=diag_means, + diag_stds=diag_stds, + diff_stds=diff_stds, + true_clim=true_clim, + # data_inp=data_inp, nlon=nlon, quad_weights=quad_weights, scheduler=scheduler, nepochs=500) + data_inp=data_inp, nlon=nlon, quad_weights=quad_weights, scheduler=scheduler, nepochs=5) torch.save(model.state_dict(), "model.pth") From be000f5eb57eda8c6dc6efeadb784a12d6594756 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 13 Nov 2025 09:40:27 +1100 Subject: [PATCH 3/9] Initial training notebook executes and produces a model weights file without error --- notebooks/Gallery.ipynb | 3 +- notebooks/tutorial/LUCIE/LUCIE-Training.ipynb | 149 +++++++----------- .../bundled_models/lucie/src/lucie/train.py | 23 ++- 3 files changed, 72 insertions(+), 103 deletions(-) diff --git a/notebooks/Gallery.ipynb b/notebooks/Gallery.ipynb index 47a615ce..3f3e5dbb 100644 --- a/notebooks/Gallery.ipynb +++ b/notebooks/Gallery.ipynb @@ -40,7 +40,8 @@ "| **Simplified weather model** | Train a reduced-size weather model on a standard GPU with fetchable dataset | ![Image showing FourCastMini prediction outputs](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_FourCastMini_Demo_18_1.png) | [Train and run a simplified global weather model (low hardware and data requirements)](./tutorial/FourCastMini_Demo.ipynb) | 18 Aug 2025 |\n", "| **MLX Demo** | Shows how to integrate PyEarthTools with a non-PyTorch framework (Apple MLX) optimised for M-series chips | ![Image showing weather model outputs from MLX demo](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_MLX-Demo-Custom-Arch_13_1.png) | [MLX Framework Example](./tutorial/MLX-Demo-Custom-Arch.ipynb) | 8 Jun 2025 | \n", "| **Convolutional Neural Net on ERA5** | Shows all steps to train a CNN on ERA5, running on CPU or a standard GPU | ![Image showing weather model outputs](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_CNN-Model-Training_44_1.png) | [End-to-end CNN Training Example](./tutorial/CNN-Model-Training.ipynb) | 25 Aug 2025 |\n", - "| **Radar Visualisation** | Shows how to visualise radar data as a time-series, in 2D and in 3D | ![Image showing a top down view of radar data](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_RadarVisualisation_10_1.png) | [Radar Visualisation](./RadarVisualisation.ipynb) | 23 Aug 2025 |\n" + "| **Radar Visualisation** | Shows how to visualise radar data as a time-series, in 2D and in 3D | ![Image showing a top down view of radar data](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_RadarVisualisation_10_1.png) | [Radar Visualisation](./RadarVisualisation.ipynb) | 23 Aug 2025 |\n", + "| **LLUCIE Climate Model** | Train a climate model | (no image) | [LUCIE-Training](./tutorial/LUCIE/LUCIE-Training.ipynb) | 23 Aug 2025 |\n" ] }, { diff --git a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb index a48302fc..f0bb209f 100644 --- a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb +++ b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb @@ -1,9 +1,37 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "a3575c36-ed8d-4bae-ab90-aefe441949f9", + "metadata": {}, + "source": [ + "# Training the LUCIE model\n", + "\n", + "LUCIE is a climate model developed by Haiwen Guan, Troy Arcomano, Ashesh Chattopadhyay and Romit Maulik (2024). See their preprint at https://arxiv.org/html/2405.16297v1 and the archive of their trainind data, code and results here https://zenodo.org/records/14829609.\n", + "\n", + "The code in PyEarthTools was based on their code repository at https://github.com/ISCLPennState/LUCIE, which is made available under the MIT license (see the PyEarthTools NOTICE file for full information on this point)\n", + "\n", + "LUCIE is a model which of interest to climate researchers due to its long-term stability for rollouts for many decades. This model is licensed in a compatible fashion, so we are able to provide a bundled, customised version of LUCIE which can be used within the PyEarthTools framework, integrated with its data pipelines and configurable to work flexibly.\n", + "\n", + "We have only just begun the process of this integration, and so for now the model does not make extensive use of the PyEarthTools classes. This is expected to change fairly quickly, and as this happens, this notebook will be updated. However, in the interests of providing the bundled version to the community as soon as possible for those already seeking to work with the model, we present it in a \"work in progress\" fashion.\n", + "\n", + "The intention is to:\n", + " - [done] Supply the source code to train and run the model in PyEarthTools\n", + " - [done] Validate that the model can train without obvious code-level errors\n", + " - Validate inference and reproduce the training results to ensure the trained model is valid\n", + " - Support library updates and other changes\n", + " - Support multiple ML backends beyond CUDA\n", + " - Support connection to multiple data sources through PET data accessors\n", + " - Move the normalisation into a PET pipeline so it can be easily modified and experimented with\n", + "\n", + "If you would like to know more, or get involved with this work, please [let us know on the issue tracker](https://github.com/ACCESS-Community-Hub/PyEarthTools/issues/211)\n", + "\n" + ] + }, { "cell_type": "code", - "execution_count": 1, - "id": "508c446b-21c6-447e-a119-a6a16d78b6e0", + "execution_count": 10, + "id": "e5068eca-cfcc-4dec-bf88-8b1fb870dc3b", "metadata": {}, "outputs": [], "source": [ @@ -13,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 11, "id": "f69a338a-ff4e-465f-a664-cd76630baa52", "metadata": {}, "outputs": [], @@ -24,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 12, "id": "b81e2c08-bf62-49fc-9090-0595cbfd24ab", "metadata": {}, "outputs": [], @@ -35,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 13, "id": "4180dd8c-ff64-466b-b3bc-9771b2053a57", "metadata": {}, "outputs": [], @@ -45,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "id": "7f5ca64a-87c8-4cae-a2e3-3a4788066a73", "metadata": {}, "outputs": [], @@ -55,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "id": "b53d4754-4303-4325-801a-afa626aac582", "metadata": {}, "outputs": [], @@ -66,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 16, "id": "22148013-b8d6-40c7-8c11-9f8545295b85", "metadata": {}, "outputs": [ @@ -81,36 +109,12 @@ "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/5 [00:00 Date: Thu, 13 Nov 2025 12:01:31 +1100 Subject: [PATCH 4/9] Update pyproject toml Update notebook to use test settings for training by default --- notebooks/tutorial/LUCIE/LUCIE-Training.ipynb | 2 +- packages/bundled_models/lucie/pyproject.toml | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb index f0bb209f..7bfe50e4 100644 --- a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb +++ b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb @@ -7,7 +7,7 @@ "source": [ "# Training the LUCIE model\n", "\n", - "LUCIE is a climate model developed by Haiwen Guan, Troy Arcomano, Ashesh Chattopadhyay and Romit Maulik (2024). See their preprint at https://arxiv.org/html/2405.16297v1 and the archive of their trainind data, code and results here https://zenodo.org/records/14829609.\n", + "LUCIE is a climate model developed by Haiwen Guan, Troy Arcomano, Ashesh Chattopadhyay and Romit Maulik (2024). See their preprint at https://doi.org/10.48550/arXiv.2405.16297 and the archive of their trainind data, code and results here https://zenodo.org/records/14829609.\n", "\n", "The code in PyEarthTools was based on their code repository at https://github.com/ISCLPennState/LUCIE, which is made available under the MIT license (see the PyEarthTools NOTICE file for full information on this point)\n", "\n", diff --git a/packages/bundled_models/lucie/pyproject.toml b/packages/bundled_models/lucie/pyproject.toml index 68ada0b7..34fbb687 100644 --- a/packages/bundled_models/lucie/pyproject.toml +++ b/packages/bundled_models/lucie/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "pyearthtools-bundled-lucie" version = "0.6.0" -description = "FourCastNeXt Bundled Model" +description = "LUCIE Bundled Model" readme = "README.md" requires-python = ">=3.11, <3.14" keywords = ["lucie"] @@ -21,10 +21,10 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - 'pyearthtools.training[lightning]>=0.5.0', - 'pyearthtools.zoo>=0.5.0', - 'pyearthtools.data>=0.5.0', - 'pyearthtools.pipeline>=0.5.0', + 'pyearthtools.training[lightning]>=0.5.1', + 'pyearthtools.zoo>=0.5.1', + 'pyearthtools.data>=0.5.1', + 'pyearthtools.pipeline>=0.5.1', 'torch_optimizer', 'timm', ] @@ -35,9 +35,6 @@ homepage = "https://pyearthtools.readthedocs.io/" documentation = "https://pyearthtools.readthedocs.io/" repository = "https://github.com/ACCESS-Community-Hub/PyEarthTools" -[project.entry-points."pyearthtools.zoo.model"] -Global_FCNXT = "lucie.registered_model:LucieRM" - [tool.isort] profile = "black" @@ -52,7 +49,7 @@ warn_unused_configs = true ignore_missing_imports = true [tool.hatch.version] -path = "src/pyearthtools/pipeline/__init__.py" +path = "src/lucie/__init__.py" [tool.hatch.build.targets.wheel] -packages = ["src/pyearthtools/"] +packages = ["src/lucie/"] From c28fa204c287987924cd4219dc6e018d38c6cb16 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 13 Nov 2025 16:26:53 +1100 Subject: [PATCH 5/9] Code reformatting --- notebooks/tutorial/LUCIE/LUCIE-Training.ipynb | 35 ++++++++++--------- .../bundled_models/lucie/src/lucie/train.py | 34 +++++++++--------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb index 7bfe50e4..2bab43e5 100644 --- a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb +++ b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "id": "e5068eca-cfcc-4dec-bf88-8b1fb870dc3b", "metadata": {}, "outputs": [], @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "id": "f69a338a-ff4e-465f-a664-cd76630baa52", "metadata": {}, "outputs": [], @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "id": "b81e2c08-bf62-49fc-9090-0595cbfd24ab", "metadata": {}, "outputs": [], @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "id": "4180dd8c-ff64-466b-b3bc-9771b2053a57", "metadata": {}, "outputs": [], @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "id": "7f5ca64a-87c8-4cae-a2e3-3a4788066a73", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 6, "id": "b53d4754-4303-4325-801a-afa626aac582", "metadata": {}, "outputs": [], @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "id": "22148013-b8d6-40c7-8c11-9f8545295b85", "metadata": {}, "outputs": [ @@ -111,10 +111,11 @@ "text": [ " 0%| | 0/2 [00:00 Date: Thu, 13 Nov 2025 19:52:13 +1100 Subject: [PATCH 6/9] Update Gallery spelling of LUCIE Add commit hash basis for LUCIE and improve README for LUCIE --- notebooks/Gallery.ipynb | 2 +- packages/bundled_models/lucie/README.md | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/notebooks/Gallery.ipynb b/notebooks/Gallery.ipynb index 3f3e5dbb..19eb9779 100644 --- a/notebooks/Gallery.ipynb +++ b/notebooks/Gallery.ipynb @@ -41,7 +41,7 @@ "| **MLX Demo** | Shows how to integrate PyEarthTools with a non-PyTorch framework (Apple MLX) optimised for M-series chips | ![Image showing weather model outputs from MLX demo](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_MLX-Demo-Custom-Arch_13_1.png) | [MLX Framework Example](./tutorial/MLX-Demo-Custom-Arch.ipynb) | 8 Jun 2025 | \n", "| **Convolutional Neural Net on ERA5** | Shows all steps to train a CNN on ERA5, running on CPU or a standard GPU | ![Image showing weather model outputs](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_CNN-Model-Training_44_1.png) | [End-to-end CNN Training Example](./tutorial/CNN-Model-Training.ipynb) | 25 Aug 2025 |\n", "| **Radar Visualisation** | Shows how to visualise radar data as a time-series, in 2D and in 3D | ![Image showing a top down view of radar data](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_RadarVisualisation_10_1.png) | [Radar Visualisation](./RadarVisualisation.ipynb) | 23 Aug 2025 |\n", - "| **LLUCIE Climate Model** | Train a climate model | (no image) | [LUCIE-Training](./tutorial/LUCIE/LUCIE-Training.ipynb) | 23 Aug 2025 |\n" + "| **LUCIE Climate Model** | Train a climate model | (no image) | [LUCIE-Training](./tutorial/LUCIE/LUCIE-Training.ipynb) | 23 Aug 2025 |\n" ] }, { diff --git a/packages/bundled_models/lucie/README.md b/packages/bundled_models/lucie/README.md index 3eebfac5..6581acbe 100644 --- a/packages/bundled_models/lucie/README.md +++ b/packages/bundled_models/lucie/README.md @@ -1,12 +1,16 @@ # LUCIE: Lightweight Uncoupled ClImate Emulator -Please note - this is a fork of https://github.com/ISCLPennState/LUCIE which has been adapted included in PyEarthTools for the purposes of maintenance, compatbility and to supply an integrated approach to using the LUCIE model within the PyEarthTools framework. +Please note - this is a adaptation of https://github.com/ISCLPennState/LUCIE which has been modified for inclusion in PyEarthTools for the purposes of maintenance, compatbility and to supply an integrated approach to using the LUCIE model within the PyEarthTools framework. + +This code was copied from the LUCIE repository from commit hash 19a1d6ebe844f49893f92e8b377ebdca8f6aa0e6 (Jul 9th, 2025). --- ## Paper & Data -- [arXiv Preprint: arxiv.org/abs/2405.16297](https://arxiv.org/abs/2405.16297) +These are the links for the original paper, code and data published by the LUCIE authors. The code was published to Zenodo under a Creative Commons license but the license in their github repository was MIT to allow improved code re-use. + +- [arXiv Preprint: https://doi.org/10.48550/arXiv.2405.16297](https://doi.org/10.48550/arXiv.2405.16297) - [Zenodo Archive: zenodo.org/records/15164648](https://zenodo.org/records/15164648) --- @@ -22,4 +26,4 @@ This repository prvides the following: 5. The data generator file that precprocesses the regridded ERA5 data. ## Note -Please refer to the zenodo link for the regridded ERA5 data. The link also includes the preprocessed data from the data generator file. +Please refer to the LUCIE zenodo link for the regridded ERA5 data. The link also includes the preprocessed data from the data generator file. From 9278e1f0745bbe1264eb1822a9d9cf26dc4d778a Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 13 Nov 2025 19:58:31 +1100 Subject: [PATCH 7/9] Update arxiv and zenodo links in the README and tutorial for LUCIE --- notebooks/tutorial/LUCIE/LUCIE-Training.ipynb | 2 +- packages/bundled_models/lucie/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb index 2bab43e5..82f84149 100644 --- a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb +++ b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb @@ -7,7 +7,7 @@ "source": [ "# Training the LUCIE model\n", "\n", - "LUCIE is a climate model developed by Haiwen Guan, Troy Arcomano, Ashesh Chattopadhyay and Romit Maulik (2024). See their preprint at https://doi.org/10.48550/arXiv.2405.16297 and the archive of their trainind data, code and results here https://zenodo.org/records/14829609.\n", + "LUCIE is a climate model developed by Haiwen Guan, Troy Arcomano, Ashesh Chattopadhyay and Romit Maulik (2024). See their preprint at https://doi.org/10.48550/arXiv.2405.16297 and the archive of their training data, code and results here https://doi.org/10.5281/zenodo.15164648.\n", "\n", "The code in PyEarthTools was based on their code repository at https://github.com/ISCLPennState/LUCIE, which is made available under the MIT license (see the PyEarthTools NOTICE file for full information on this point)\n", "\n", diff --git a/packages/bundled_models/lucie/README.md b/packages/bundled_models/lucie/README.md index 6581acbe..dca21111 100644 --- a/packages/bundled_models/lucie/README.md +++ b/packages/bundled_models/lucie/README.md @@ -11,7 +11,7 @@ This code was copied from the LUCIE repository from commit hash 19a1d6ebe844f498 These are the links for the original paper, code and data published by the LUCIE authors. The code was published to Zenodo under a Creative Commons license but the license in their github repository was MIT to allow improved code re-use. - [arXiv Preprint: https://doi.org/10.48550/arXiv.2405.16297](https://doi.org/10.48550/arXiv.2405.16297) -- [Zenodo Archive: zenodo.org/records/15164648](https://zenodo.org/records/15164648) +- [Zenodo Archive: [https://doi.org/10.5281/zenodo.15164648](https://doi.org/10.5281/zenodo.15164648) --- From 7b275322ca7488aec96da352196ae981091c9b2f Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 13 Nov 2025 21:04:22 +1100 Subject: [PATCH 8/9] Add LUCIE inference notebook demonstrating model outputs from the trained model --- .../tutorial/LUCIE/LUCIE-Inference.ipynb | 147 ++++++++++++++++++ notebooks/tutorial/LUCIE/LUCIE-Training.ipynb | 2 + .../lucie/src/lucie/inference.py | 31 ++-- .../bundled_models/lucie/src/lucie/train.py | 7 +- 4 files changed, 173 insertions(+), 14 deletions(-) create mode 100644 notebooks/tutorial/LUCIE/LUCIE-Inference.ipynb diff --git a/notebooks/tutorial/LUCIE/LUCIE-Inference.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Inference.ipynb new file mode 100644 index 00000000..b8be428a --- /dev/null +++ b/notebooks/tutorial/LUCIE/LUCIE-Inference.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "175e2165-f568-48e2-bedb-1245603b1ab5", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import lucie\n", + "import lucie.inference\n", + "from pathlib import Path\n", + "import numpy as np\n", + "import xarray as xr" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bc8460bd-8691-403f-8cbb-dbeb4e39875a", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"mps\" if torch.backends.mps.is_available() else \"cpu\")\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else device)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e5000929-4fec-4b6c-82c1-a5769287aa38", + "metadata": {}, + "outputs": [], + "source": [ + "regridded_path = Path.home() / 'dev/data/lucie' / 'era5_T30_regridded.npz'\n", + "regridded_data = lucie.train.load_data(regridded_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3facc4a4-ec3f-4fc8-be29-c244ec4268e2", + "metadata": {}, + "outputs": [], + "source": [ + "preprocessed_path = Path.home() / 'dev/data/lucie' / 'era5_T30_preprocessed.npz'\n", + "preprocessed_data = np.load(preprocessed_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fb5d5b70-681c-4cc4-b973-7b259bb6e81d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1min 49s, sys: 30.3 s, total: 2min 19s\n", + "Wall time: 1min 54s\n" + ] + } + ], + "source": [ + "%%time\n", + "%%capture\n", + "\n", + "# Note - these timings were obtained on a laptop, not on a high-performance GPU.\n", + "\n", + "predictions = lucie.inference.load_data_and_predict(device, regridded_data, preprocessed_data,model_weights_pth='model.pth')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9f9e517e-a6e6-4cff-bc82-fe0cff6c89ec", + "metadata": {}, + "outputs": [], + "source": [ + "da = xr.DataArray(predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e0f847a4-edd0-4dc4-9a2c-278c5df6127e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Please note - this image was generated from only a few samples of training and does not represent the final model\n", + "da[5][0].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25598079-8d9b-4893-a807-cfe1c50d35b8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb index 82f84149..8aaf970a 100644 --- a/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb +++ b/notebooks/tutorial/LUCIE/LUCIE-Training.ipynb @@ -15,6 +15,8 @@ "\n", "We have only just begun the process of this integration, and so for now the model does not make extensive use of the PyEarthTools classes. This is expected to change fairly quickly, and as this happens, this notebook will be updated. However, in the interests of providing the bundled version to the community as soon as possible for those already seeking to work with the model, we present it in a \"work in progress\" fashion.\n", "\n", + "You need to manually download the original published dataset from Zenodo, and update the paths in this notebook to point to them. The initial focus will be on reproducing the paper fairly closely using the same data and only slightly modified code (changes to support more devices and updates for compatibility), true enough to the original. Subsequently, we will develop the code further to be adaptable to new data sources.\n", + "\n", "The intention is to:\n", " - [done] Supply the source code to train and run the model in PyEarthTools\n", " - [done] Validate that the model can train without obvious code-level errors\n", diff --git a/packages/bundled_models/lucie/src/lucie/inference.py b/packages/bundled_models/lucie/src/lucie/inference.py index 1c345ebe..93dcd756 100644 --- a/packages/bundled_models/lucie/src/lucie/inference.py +++ b/packages/bundled_models/lucie/src/lucie/inference.py @@ -28,6 +28,7 @@ # import torch_harmonics.distributed as thd # from torch_harmonics import * +# from torch._C import float32 import torch.fft from tqdm import tqdm @@ -42,8 +43,9 @@ torch.cuda.set_device(0) -def infer( - model, steps, initial_frame, forcing, initial_forcing_idx, prog_means, prog_stds, diag_means, diag_stds, diff_stds +def infer(device, + model, steps, initial_frame, forcing, initial_forcing_idx, + prog_means, prog_stds, diag_means, diag_stds, diff_stds ): inf_data = [] model.eval() @@ -76,14 +78,17 @@ def infer( return inf_data -if __name__ == "__main__": +def load_data_and_predict( + device, + regridded_data, + preprocessed_data, # standardised data generated by dataset_generator.py + model_weights_pth='model.pth', + ): - data = load_data("era5_T30_regridded.npz")[..., :6] - true_clim = torch.tensor(np.mean(data, axis=0)).to(device).permute(2, 0, 1) + regridded_data = regridded_data[..., :6] + true_clim = torch.tensor(np.mean(regridded_data, axis=0)).to(device).permute(2, 0, 1) - data = np.load( - "era5_T30_preprocessed.npz" - ) # standardized data with mean and stds generated from dataset_generator.py + data = preprocessed_data # dictionary-like numpy array data_inp = torch.tensor(data["data_inp"], dtype=torch.float32) # input data data_tar = torch.tensor(data["data_tar"], dtype=torch.float32) raw_means = torch.tensor(data["raw_means"], dtype=torch.float32).reshape(1, -1, 1, 1).to(device) @@ -107,7 +112,8 @@ def infer( sht = RealSHT(nlat, nlon, lmax=modes_lat, mmax=modes_lon, grid=grid, csphase=False) radius = 6.37122e6 cost, quad_weights = legendre_gauss_weights(nlat, -1, 1) - quad_weights = (torch.as_tensor(quad_weights).reshape(-1, 1)).to(device) + quad_weights = (torch.as_tensor(quad_weights).reshape(-1, 1)).to(torch.float32).to(device) + # quad_weights = (torch.as_tensor(quad_weights).reshape(-1, 1)).to(device) model = SphericalFourierNeuralOperatorNet( params={}, @@ -129,7 +135,7 @@ def infer( mlp_ratio=2.0, ).to(device) - path = torch.load("regular_8x72_fftreg_baseline.pth") + path = torch.load(model_weights_pth) model.load_state_dict(path) forcing = data_inp[:1460, -2:] # repeating tisr and constant oro @@ -137,7 +143,8 @@ def infer( rollout_step = 14600 initial_frame_idx = 16000 + 100 forcing_initial_idx = (16000 + 100) % 1460 + 1 - rollout = inference( + rollout = infer( + device, model, rollout_step, data_inp[initial_frame_idx].unsqueeze(0).to(device), @@ -149,3 +156,5 @@ def infer( diag_stds, diff_stds, ) + + return rollout diff --git a/packages/bundled_models/lucie/src/lucie/train.py b/packages/bundled_models/lucie/src/lucie/train.py index bc3c527f..623ae39f 100644 --- a/packages/bundled_models/lucie/src/lucie/train.py +++ b/packages/bundled_models/lucie/src/lucie/train.py @@ -145,11 +145,12 @@ def train_model( loss.backward() optimizer.step() - if epoch % 1 == 0: - # rollout_steps = 2920 - rollout_steps = 50 + if epoch % 10 == 0: + rollout_steps = 2920 # Per paper + # rollout_steps = 50 # Testing rollout = torch.tensor( inference.infer( + device, model, rollout_steps, data_inp[0:1].to(device), From 6fd0176c692783dafca4a838de7bd217183773c6 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 13 Nov 2025 21:10:24 +1100 Subject: [PATCH 9/9] Link LUCIE inference notebook into the gallery --- notebooks/Gallery.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/notebooks/Gallery.ipynb b/notebooks/Gallery.ipynb index 19eb9779..644e323c 100644 --- a/notebooks/Gallery.ipynb +++ b/notebooks/Gallery.ipynb @@ -35,13 +35,14 @@ "\n", "These tutorials can be run on a 4GB GPU using relatively low volumes of data (3-10GB). They will also work in HPC environments.\n", "\n", - "| Title | Description | Image | Notebooks | Last Tested |\n", + "| Topic | Description | Image | Notebooks | Last Tested |\n", "|-------|--------------|-------|-------------|-------------|\n", "| **Simplified weather model** | Train a reduced-size weather model on a standard GPU with fetchable dataset | ![Image showing FourCastMini prediction outputs](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_FourCastMini_Demo_18_1.png) | [Train and run a simplified global weather model (low hardware and data requirements)](./tutorial/FourCastMini_Demo.ipynb) | 18 Aug 2025 |\n", "| **MLX Demo** | Shows how to integrate PyEarthTools with a non-PyTorch framework (Apple MLX) optimised for M-series chips | ![Image showing weather model outputs from MLX demo](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_MLX-Demo-Custom-Arch_13_1.png) | [MLX Framework Example](./tutorial/MLX-Demo-Custom-Arch.ipynb) | 8 Jun 2025 | \n", "| **Convolutional Neural Net on ERA5** | Shows all steps to train a CNN on ERA5, running on CPU or a standard GPU | ![Image showing weather model outputs](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_CNN-Model-Training_44_1.png) | [End-to-end CNN Training Example](./tutorial/CNN-Model-Training.ipynb) | 25 Aug 2025 |\n", "| **Radar Visualisation** | Shows how to visualise radar data as a time-series, in 2D and in 3D | ![Image showing a top down view of radar data](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_RadarVisualisation_10_1.png) | [Radar Visualisation](./RadarVisualisation.ipynb) | 23 Aug 2025 |\n", - "| **LUCIE Climate Model** | Train a climate model | (no image) | [LUCIE-Training](./tutorial/LUCIE/LUCIE-Training.ipynb) | 23 Aug 2025 |\n" + "| **LUCIE Climate Model** | Train a climate model | (no image) | [LUCIE-Training](./tutorial/LUCIE/LUCIE-Training.ipynb) | 13 Nov 2025 |\n", + "| **LUCIE Climate Model** | Make predictions from a climate model | (no image) | [LUCIE-Inference](./tutorial/LUCIE/LUCIE-Inference.ipynb) | 13 Nov 2025 |\n" ] }, {