From d4e23437edf6fd4092b046d83253b2cdfff928bb Mon Sep 17 00:00:00 2001 From: Pierre Navaro Date: Thu, 23 Dec 2021 10:21:21 +0100 Subject: [PATCH 1/6] Update AnDA_analog_forecasting.py Remove numpy.repeat calls and use numpy.newaxis instead --- AnDA_codes/AnDA_analog_forecasting.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/AnDA_codes/AnDA_analog_forecasting.py b/AnDA_codes/AnDA_analog_forecasting.py index cd9af2f..e8303e8 100644 --- a/AnDA_codes/AnDA_analog_forecasting.py +++ b/AnDA_codes/AnDA_analog_forecasting.py @@ -62,20 +62,20 @@ def AnDA_analog_forecasting(x, AF): xf_tmp[:,i_var] = AF.catalog.successors[np.ix_(index_knn[i_N,:],i_var)] # weighted mean and covariance - xf_mean[i_N,i_var] = np.sum(xf_tmp[:,i_var]*np.repeat(weights[i_N,:][np.newaxis].T,len(i_var),1),0) - E_xf = (xf_tmp[:,i_var]-np.repeat(xf_mean[i_N,i_var][np.newaxis],AF.k,0)).T - cov_xf = 1.0/(1.0-np.sum(np.power(weights[i_N,:],2)))*np.dot(np.repeat(weights[i_N,:][np.newaxis],len(i_var),0)*E_xf,E_xf.T) + xf_mean[i_N,i_var] = np.sum(xf_tmp[:,i_var]*weights[i_N,:,np.newaxis],0) + E_xf = (xf_tmp[:,i_var]-xf_mean[np.newaxis,i_N,i_var]).T + cov_xf = 1.0/(1.0-np.sum(np.power(weights[i_N,:],2)))*np.dot(weights[np.newaxis,i_N,:]*E_xf,E_xf.T) # method "locally-incremental" elif (AF.regression == 'increment'): # compute the analog forecasts - xf_tmp[:,i_var] = np.repeat(x[i_N,i_var][np.newaxis],AF.k,0) + AF.catalog.successors[np.ix_(index_knn[i_N,:],i_var)]-AF.catalog.analogs[np.ix_(index_knn[i_N,:],i_var)] + xf_tmp[:,i_var] = x[np.newaxis,i_N,i_var] + AF.catalog.successors[np.ix_(index_knn[i_N,:],i_var)]-AF.catalog.analogs[np.ix_(index_knn[i_N,:],i_var)] # weighted mean and covariance - xf_mean[i_N,i_var] = np.sum(xf_tmp[:,i_var]*np.repeat(weights[i_N,:][np.newaxis].T,len(i_var),1),0) - E_xf = (xf_tmp[:,i_var]-np.repeat(xf_mean[i_N,i_var][np.newaxis],AF.k,0)).T - cov_xf = 1.0/(1-np.sum(np.power(weights[i_N,:],2)))*np.dot(np.repeat(weights[i_N,:][np.newaxis],len(i_var),0)*E_xf,E_xf.T) + xf_mean[i_N,i_var] = np.sum(xf_tmp[:,i_var]*weights[i_N,:,np.newaxis],0) + E_xf = (xf_tmp[:,i_var]-xf_mean[np.newaxis,i_N,i_var]).T + cov_xf = 1.0/(1-np.sum(np.power(weights[i_N,:],2)))*np.dot(weights[np.newaxis,i_N,:]*E_xf,E_xf.T) # method "locally-linear" elif (AF.regression == 'local_linear'): From f564a9a376c4c3a6908321d21b67419ecf00db97 Mon Sep 17 00:00:00 2001 From: Pierre Navaro Date: Thu, 23 Dec 2021 14:59:45 +0100 Subject: [PATCH 2/6] Update test_AnDA.ipynb Suppress the warning for matplotlib in the notebook --- test_AnDA.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_AnDA.ipynb b/test_AnDA.ipynb index bdeac6e..4983973 100644 --- a/test_AnDA.ipynb +++ b/test_AnDA.ipynb @@ -286,8 +286,7 @@ "### COMPARISON BETWEEN CLASSICAL AND ANALOG DATA ASSIMILATION\n", "\n", "# plot\n", - "fig=plt.figure()\n", - "ax=fig.gca(projection='3d')\n", + "ax = plt.figure().add_subplot(projection='3d')\n", "line1,=ax.plot(xt.values[:,0],xt.values[:,1],xt.values[:,2],'k')\n", "line2,=ax.plot(x_hat_analog.values[:,0],x_hat_analog.values[:,1],x_hat_analog.values[:,2],'r')\n", "line3,=ax.plot(x_hat_classical.values[:,0],x_hat_classical.values[:,1],x_hat_classical.values[:,2],'b')\n", From c934f8438736b8cf358f39c9240680b3576a0dd4 Mon Sep 17 00:00:00 2001 From: Pierre Navaro Date: Thu, 23 Dec 2021 15:50:06 +0100 Subject: [PATCH 3/6] Update AnDA_model_forecasting.py --- AnDA_codes/AnDA_model_forecasting.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/AnDA_codes/AnDA_model_forecasting.py b/AnDA_codes/AnDA_model_forecasting.py index 6c5ae49..3300771 100644 --- a/AnDA_codes/AnDA_model_forecasting.py +++ b/AnDA_codes/AnDA_model_forecasting.py @@ -18,19 +18,18 @@ def AnDA_model_forecasting(x,GD): # initializations N, n = x.shape; xf = np.zeros([N,n]); - xf_mean = np.zeros([N,n]); - if (GD.model == 'Lorenz_63'): - for i_N in range(0,N): - S = odeint(AnDA_Lorenz_63,x[i_N,:],np.arange(0,GD.dt_integration+0.000001,GD.dt_integration),args=(GD.parameters.sigma,GD.parameters.rho,GD.parameters.beta)); - xf[i_N,:] = S[-1,:]; + fmodel = {} - elif (GD.model == 'Lorenz_96'): - for i_N in range(0,N): - S = odeint(AnDA_Lorenz_96,x[i_N,:],np.arange(0,GD.dt_integration+0.000001,GD.dt_integration),args=(GD.parameters.F,GD.parameters.J)); - xf[i_N,:] = S[-1,:]; + fmodel['Lorenz_63'] = lambda x0: odeint(AnDA_Lorenz_63,x0,(0,GD.dt_integration), + args=(GD.parameters.sigma,GD.parameters.rho,GD.parameters.beta))[-1] + + fmodel['Lorenz_96'] = lambda x0: odeint(AnDA_Lorenz_96,x0,(0,GD.dt_integration), + args=(GD.parameters.F,GD.parameters.J))[-1] + + xf = np.apply_along_axis(fmodel[GD.model], 1, x) + xf_mean = np.copy(xf) - xf_mean = xf; return xf, xf_mean From b0f80e07ba395ed05736bd56e074c6120772e916 Mon Sep 17 00:00:00 2001 From: Pierre Navaro Date: Thu, 23 Dec 2021 16:24:14 +0100 Subject: [PATCH 4/6] Use RandomState --- AnDA_codes/AnDA_analog_forecasting.py | 8 +++++--- AnDA_codes/AnDA_data_assimilation.py | 18 ++++++++++-------- AnDA_codes/AnDA_generate_data.py | 23 ++++++++++------------- AnDA_codes/AnDA_stat_functions.py | 8 ++++---- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/AnDA_codes/AnDA_analog_forecasting.py b/AnDA_codes/AnDA_analog_forecasting.py index e8303e8..16c23ae 100644 --- a/AnDA_codes/AnDA_analog_forecasting.py +++ b/AnDA_codes/AnDA_analog_forecasting.py @@ -13,7 +13,7 @@ from AnDA_codes.AnDA_stat_functions import mk_stochastic, sample_discrete from numpy.linalg import pinv -def AnDA_analog_forecasting(x, AF): +def AnDA_analog_forecasting(x, AF, seed=1): """ Apply the analog method on catalog of historical data to generate forecasts. """ # initializations @@ -22,6 +22,8 @@ def AnDA_analog_forecasting(x, AF): xf_mean = np.zeros([N,n]) stop_condition = 0 i_var = np.array([0]) + + rng = np.random.RandomState(seed) # local or global analog forecasting while (stop_condition !=1): @@ -147,12 +149,12 @@ def AnDA_analog_forecasting(x, AF): # Gaussian sampling if (AF.sampling =='gaussian'): # random sampling from the multivariate Gaussian distribution - xf[i_N,i_var] = np.random.multivariate_normal(xf_mean[i_N,i_var],cov_xf) + xf[i_N,i_var] = rng.multivariate_normal(xf_mean[i_N,i_var],cov_xf) # Multinomial sampling elif (AF.sampling =='multinomial'): # random sampling from the multinomial distribution of the weights - i_good = sample_discrete(weights[i_N,:],1,1) + i_good = sample_discrete(rng, weights[i_N,:],1,1) xf[i_N,i_var] = xf_tmp[i_good,i_var] # error diff --git a/AnDA_codes/AnDA_data_assimilation.py b/AnDA_codes/AnDA_data_assimilation.py index 7a28b87..7c0d375 100644 --- a/AnDA_codes/AnDA_data_assimilation.py +++ b/AnDA_codes/AnDA_data_assimilation.py @@ -13,12 +13,14 @@ from AnDA_codes.AnDA_stat_functions import resampleMultinomial, inv_using_SVD from tqdm import tqdm -def AnDA_data_assimilation(yo, DA): +def AnDA_data_assimilation(yo, DA, seed=1): """ Apply stochastic and sequential data assimilation technics using model forecasting or analog forecasting. """ + rng = np.random.RandomState(seed) + # dimensions n = len(DA.xb) T = yo.values.shape[0] @@ -45,7 +47,7 @@ class x_hat: for k in tqdm(range(0,T)): # update step (compute forecasts) if k==0: - xf = np.random.multivariate_normal(DA.xb, DA.B, DA.N) + xf = rng.multivariate_normal(DA.xb, DA.B, DA.N) else: xf, m_xa_part_tmp = DA.m(x_hat.part[k-1,:,:]) m_xa_part[k,:,:] = m_xa_part_tmp @@ -55,7 +57,7 @@ class x_hat: # analysis step (correct forecasts with observations) i_var_obs = np.where(~np.isnan(yo.values[k,:]))[0] if (len(i_var_obs)>0): - eps = np.random.multivariate_normal(np.zeros(len(i_var_obs)),DA.R[np.ix_(i_var_obs,i_var_obs)],DA.N) + eps = rng.multivariate_normal(np.zeros(len(i_var_obs)),DA.R[np.ix_(i_var_obs,i_var_obs)],DA.N) yf = np.dot(DA.H[i_var_obs,:],xf.T).T SIGMA = np.dot(np.dot(DA.H[i_var_obs,:],Pf[k,:,:]),DA.H[i_var_obs,:].T)+DA.R[np.ix_(i_var_obs,i_var_obs)] SIGMA_INV = np.linalg.inv(SIGMA) @@ -94,7 +96,7 @@ class x_hat: k_count = 0 m_xa_traj = [] weights_tmp = np.zeros(DA.N) - xf = np.random.multivariate_normal(DA.xb, DA.B, DA.N) + xf = rng.multivariate_normal(DA.xb, DA.B, DA.N) i_var_obs = np.where(~np.isnan(yo.values[k,:]))[0] if (len(i_var_obs)>0): # weights @@ -103,7 +105,7 @@ class x_hat: # normalization weights_tmp = weights_tmp/np.sum(weights_tmp) # resampling - indic = resampleMultinomial(weights_tmp) + indic = resampleMultinomial(rng, weights_tmp) x_hat.part[k,:,:] = xf[indic,:] weights_tmp_indic = weights_tmp[indic]/sum(weights_tmp[indic]) x_hat.values[k,:] = sum(xf[indic,:]*weights_tmp_indic[np.newaxis].T,0) @@ -113,12 +115,12 @@ class x_hat: # weights weights_tmp = 1.0/N # resampling - indic = resampleMultinomial(weights_tmp) + indic = resampleMultinomial(rng, weights_tmp) x_hat.weights[k,:] = weights_tmp_indic for k in tqdm(range(1,T)): # update step (compute forecasts) and add small Gaussian noise - xf, tej = DA.m(x_hat.part[k-1,:,:]) +np.random.multivariate_normal(np.zeros(xf.shape[1]),DA.B/100.0,xf.shape[0]) + xf, tej = DA.m(x_hat.part[k-1,:,:]) + rng.multivariate_normal(np.zeros(xf.shape[1]),DA.B/100.0,xf.shape[0]) if (k_countGD.dt_obs: - print('Error: GD.dt_obs must be bigger than GD.dt_states'); + raise ValueError('GD.dt_obs must be bigger than GD.dt_states'); if (np.mod(GD.dt_obs,GD.dt_states)!=0): - print('Error: GD.dt_obs must be a multiple of GD.dt_states'); + raise ValueError('GD.dt_obs must be a multiple of GD.dt_states'); # use this to generate the same data for different simulations - np.random.seed(1); + rng = np.random.RandomState(seed); if (GD.model == 'Lorenz_63'): - # 5 time steps (to be in the attractor space) + # Got to time = 5 to be in the attractor space x0 = np.array([8.0,0.0,30.0]); S = odeint(AnDA_Lorenz_63,x0,np.arange(0,5+0.000001,GD.dt_integration),args=(GD.parameters.sigma,GD.parameters.rho,GD.parameters.beta)); x0 = S[S.shape[0]-1,:]; @@ -51,7 +51,7 @@ class catalog: xt.values = S[t_xt,:]; # generate partial/noisy observations (yo) - eps = np.random.multivariate_normal(np.zeros(3),GD.sigma2_obs*np.eye(3,3),T_test); + eps = rng.multivariate_normal(np.zeros(3),GD.sigma2_obs*np.eye(3,3),T_test); yo_tmp = S[t_xt,:]+eps[t_xt,:]; t_yo = np.arange(0,T_test,GD.dt_obs); i_t_obs = np.where((np.in1d(t_xt,t_yo))==True)[0]; @@ -62,7 +62,7 @@ class catalog: #generate catalog S = odeint(AnDA_Lorenz_63,S[S.shape[0]-1,:],np.arange(0.01,GD.nb_loop_train+0.000001,GD.dt_integration),args=(GD.parameters.sigma,GD.parameters.rho,GD.parameters.beta)); T_train = S.shape[0]; - eta = np.random.multivariate_normal(np.zeros(3),GD.sigma2_catalog*np.eye(3,3),T_train); + eta = rng.multivariate_normal(np.zeros(3),GD.sigma2_catalog*np.eye(3,3),T_train); catalog_tmp = S+eta; catalog.analogs = catalog_tmp[0:-GD.dt_states,:]; catalog.successors = catalog_tmp[GD.dt_states:,:] @@ -70,7 +70,7 @@ class catalog: elif (GD.model == 'Lorenz_96'): - # 5 time steps (to be in the attractor space) + # Got to time = 5 to be in the attractor space x0 = GD.parameters.F*np.ones(GD.parameters.J); x0[np.int(np.around(GD.parameters.J/2))] = x0[np.int(np.around(GD.parameters.J/2))] + 0.01; S = odeint(AnDA_Lorenz_96,x0,np.arange(0,5+0.000001,GD.dt_integration),args=(GD.parameters.F,GD.parameters.J)); @@ -84,7 +84,7 @@ class catalog: xt.values = S[t_xt,:]; # generate partial/noisy observations (yo) - eps = np.random.multivariate_normal(np.zeros(GD.parameters.J),GD.sigma2_obs*np.eye(GD.parameters.J),T_test); + eps = rng.multivariate_normal(np.zeros(GD.parameters.J),GD.sigma2_obs*np.eye(GD.parameters.J),T_test); yo_tmp = S[t_xt,:]+eps[t_xt,:]; t_yo = np.arange(0,T_test,GD.dt_obs); i_t_obs = np.where((np.in1d(t_xt,t_yo))==True)[0]; @@ -95,15 +95,12 @@ class catalog: # generate catalog S = odeint(AnDA_Lorenz_96,S[S.shape[0]-1,:],np.arange(0.01,GD.nb_loop_train+0.000001,GD.dt_integration),args=(GD.parameters.F,GD.parameters.J)); T_train = S.shape[0]; - eta = np.random.multivariate_normal(np.zeros(GD.parameters.J),GD.sigma2_catalog*np.eye(GD.parameters.J,GD.parameters.J),T_train); + eta = rng.multivariate_normal(np.zeros(GD.parameters.J),GD.sigma2_catalog*np.eye(GD.parameters.J,GD.parameters.J),T_train); catalog_tmp = S+eta; catalog.analogs = catalog_tmp[0:-GD.dt_states,:]; catalog.successors = catalog_tmp[GD.dt_states:,:] catalog.source = GD.parameters; - # reinitialize random generator number - np.random.seed() - return catalog, xt, yo; diff --git a/AnDA_codes/AnDA_stat_functions.py b/AnDA_codes/AnDA_stat_functions.py index 9a1d5c6..1efbd41 100644 --- a/AnDA_codes/AnDA_stat_functions.py +++ b/AnDA_codes/AnDA_stat_functions.py @@ -41,19 +41,19 @@ def mk_stochastic(T): T = T/normaliser.astype(float); return T; -def sample_discrete(prob, r, c): +def sample_discrete(rng, prob, r, c): """ Sampling from a non-uniform distribution. """ # this speedup is due to Peter Acklam cumprob = np.cumsum(prob); n = len(cumprob); - R = np.random.rand(r,c); + R = rng.rand(r,c); M = np.zeros([r,c]); for i in range(0,n-1): M = M+1*(R>cumprob[i]); return int(M) -def resampleMultinomial(w): +def resampleMultinomial(rng, w): """ Multinomial resampler. """ M = np.max(w.shape); @@ -62,7 +62,7 @@ def resampleMultinomial(w): i = 0; indx = []; while (i<=(M-1)): - sampl = np.random.rand(1,1); + sampl = rng.rand(1,1); j = 0; while (Q[j] Date: Sat, 25 Dec 2021 11:02:45 +0100 Subject: [PATCH 5/6] put a seed in analog forecasting function was not a good idea --- AnDA_codes/AnDA_analog_forecasting.py | 8 +++----- AnDA_codes/AnDA_stat_functions.py | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/AnDA_codes/AnDA_analog_forecasting.py b/AnDA_codes/AnDA_analog_forecasting.py index 16c23ae..0772914 100644 --- a/AnDA_codes/AnDA_analog_forecasting.py +++ b/AnDA_codes/AnDA_analog_forecasting.py @@ -13,7 +13,7 @@ from AnDA_codes.AnDA_stat_functions import mk_stochastic, sample_discrete from numpy.linalg import pinv -def AnDA_analog_forecasting(x, AF, seed=1): +def AnDA_analog_forecasting(x, AF): """ Apply the analog method on catalog of historical data to generate forecasts. """ # initializations @@ -23,8 +23,6 @@ def AnDA_analog_forecasting(x, AF, seed=1): stop_condition = 0 i_var = np.array([0]) - rng = np.random.RandomState(seed) - # local or global analog forecasting while (stop_condition !=1): @@ -149,12 +147,12 @@ def AnDA_analog_forecasting(x, AF, seed=1): # Gaussian sampling if (AF.sampling =='gaussian'): # random sampling from the multivariate Gaussian distribution - xf[i_N,i_var] = rng.multivariate_normal(xf_mean[i_N,i_var],cov_xf) + xf[i_N,i_var] = np.random.multivariate_normal(xf_mean[i_N,i_var],cov_xf) # Multinomial sampling elif (AF.sampling =='multinomial'): # random sampling from the multinomial distribution of the weights - i_good = sample_discrete(rng, weights[i_N,:],1,1) + i_good = sample_discrete(weights[i_N,:],1,1) xf[i_N,i_var] = xf_tmp[i_good,i_var] # error diff --git a/AnDA_codes/AnDA_stat_functions.py b/AnDA_codes/AnDA_stat_functions.py index 1efbd41..5795433 100644 --- a/AnDA_codes/AnDA_stat_functions.py +++ b/AnDA_codes/AnDA_stat_functions.py @@ -41,13 +41,13 @@ def mk_stochastic(T): T = T/normaliser.astype(float); return T; -def sample_discrete(rng, prob, r, c): +def sample_discrete(prob, r, c): """ Sampling from a non-uniform distribution. """ # this speedup is due to Peter Acklam cumprob = np.cumsum(prob); n = len(cumprob); - R = rng.rand(r,c); + R = np.random.rand(r,c); M = np.zeros([r,c]); for i in range(0,n-1): M = M+1*(R>cumprob[i]); From 68ec640e08e03814984b8df45191adc8f8ef2481 Mon Sep 17 00:00:00 2001 From: Pierre Navaro Date: Thu, 6 Jan 2022 10:09:14 +0100 Subject: [PATCH 6/6] Update AnDA_generate_data.py --- AnDA_codes/AnDA_generate_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AnDA_codes/AnDA_generate_data.py b/AnDA_codes/AnDA_generate_data.py index e7c91d0..28f7cb2 100644 --- a/AnDA_codes/AnDA_generate_data.py +++ b/AnDA_codes/AnDA_generate_data.py @@ -38,7 +38,7 @@ class catalog: if (GD.model == 'Lorenz_63'): - # Got to time = 5 to be in the attractor space + # Go to time = 5 to be in the attractor space x0 = np.array([8.0,0.0,30.0]); S = odeint(AnDA_Lorenz_63,x0,np.arange(0,5+0.000001,GD.dt_integration),args=(GD.parameters.sigma,GD.parameters.rho,GD.parameters.beta)); x0 = S[S.shape[0]-1,:]; @@ -70,7 +70,7 @@ class catalog: elif (GD.model == 'Lorenz_96'): - # Got to time = 5 to be in the attractor space + # Go to time = 5 to be in the attractor space x0 = GD.parameters.F*np.ones(GD.parameters.J); x0[np.int(np.around(GD.parameters.J/2))] = x0[np.int(np.around(GD.parameters.J/2))] + 0.01; S = odeint(AnDA_Lorenz_96,x0,np.arange(0,5+0.000001,GD.dt_integration),args=(GD.parameters.F,GD.parameters.J));