diff --git a/.github/workflows/draft_pdf.yml b/.github/workflows/draft_pdf.yml index db558c2..a349eba 100644 --- a/.github/workflows/draft_pdf.yml +++ b/.github/workflows/draft_pdf.yml @@ -14,10 +14,10 @@ jobs: # This should be the path to the paper within your repo. paper-path: paper/paper.md - name: Upload - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: paper # This is the output path where Pandoc will write the compiled # PDF. Note, this should be the same directory as the input # paper.md - path: paper/paper.pdf \ No newline at end of file + path: paper/paper.pdf diff --git a/pynm/models/gamlss.py b/pynm/models/gamlss.py index daf2cc1..7fd1158 100644 --- a/pynm/models/gamlss.py +++ b/pynm/models/gamlss.py @@ -176,9 +176,10 @@ def fit(self,train_data): family={self.family}, data=train_data, method={self.method})''') - - def predict(self,test_data,what='mu'): - """Predict from fitted gamlss model. + + + def predict(self, test_data,score, what='mu'): + """Predict from fitted gamlss model and retrieve AIC and BIC. Parameters ---------- @@ -189,6 +190,45 @@ def predict(self,test_data,what='mu'): """ ro.globalenv['model'] = self.model ro.globalenv['test_data'] = test_data + params = r(f'''predictAll(model,newdata=test_data)''') + + + ro.globalenv['params'] = params + + percentiles = r(f'p{self.family}(test_data${score}, params$mu, params$sigma, params$nu, params$tau)') + ro.globalenv['percentiles'] = percentiles + + mu_pred = r(f'''predict(model, newdata=test_data, parameter="{what}")''') + #sigma_pred = r(f'''predict(model, newdata=test_data, parameter="{what}", se.fit=TRUE)''') + sigma_pred= r(f'''predict(model, newdata=test_data, parameter="sigma", type="response")''') + res = r(f'''resid(model)''') + + + quantiles = r(f'q{self.family}(pnorm(1), params$mu,params$sigma, params$nu,params$tau)') + ro.globalenv['quantiles'] = quantiles + + sigma = r(f'''params$sigma''') + tau = r(f'''params$tau''') + nu = r(f'''params$nu''') + z_scores = r(f'''qnorm(percentiles)''') + + # Compute AIC and BIC + aic = r('AIC(model)') + bic = r('BIC(model)') + if self.family == 'GA': + q_lower = r(f'q{self.family}(0.025, params$mu, params$sigma)') + q_upper = r(f'q{self.family}(0.975, params$mu, params$sigma)') + else: + q_lower = r(f'q{self.family}(0.025, params$mu, params$sigma, params$nu, params$tau)') + q_upper = r(f'q{self.family}(0.975, params$mu, params$sigma, params$nu, params$tau)') + prediction_interval = ro.r['data.frame'](lower=q_lower, upper=q_upper) + + return mu_pred,sigma_pred, res, z_scores, q_lower, q_upper, sigma, tau, nu, aic, bic + + def summary(self): + """Generate and return the summary of the fitted GAMLSS model.""" + ro.globalenv['fitted_model'] = self.model # Store the fitted model in the R global environment + model_summary = r('summary(fitted_model)') # Use R's summary function on the fitted model - res = r(f'''predict(model,newdata=test_data,parameter="{what}")''') - return res + print(model_summary) # Print the summary to the console + return model_summary # Return the summary object for further use or inspection diff --git a/pynm/pynm.py b/pynm/pynm.py index f11de5e..1a1bc86 100644 --- a/pynm/pynm.py +++ b/pynm/pynm.py @@ -160,6 +160,9 @@ def __init__(self, data, score, group, confounds, self.RMSE_GAMLSS = None self.SMSE_GAMLSS = None self.MSLL_GAMLSS = None + self.AIC_GAMLSS = None + self.BIC_GAMLSS = None + if seed is not None: np.random.seed(seed) @@ -733,7 +736,9 @@ def _svgp_normative_model(self,conf_mat,score,ctr_mask,nu=2.5,length_scale=1,len y_pred = means.numpy() y_true = score residuals = (y_true - y_pred).astype(float) - + + self.AIC_GAMLSS=aic + self.BIC_GAMLSS=bic self.data['GP_pred'] = y_pred self.data['GP_sigma'] = sigma.numpy() self.data['GP_residuals'] = residuals @@ -744,7 +749,7 @@ def _svgp_normative_model(self,conf_mat,score,ctr_mask,nu=2.5,length_scale=1,len self.MSLL_GP = msll - def gamlss_normative_model(self,mu=None,sigma=None,nu=None,tau=None,family='SHASHo2',method='RS',cv_folds=1): + def gamlss_normative_model(self,mu=None,sigma=None,nu=None,tau=None,train_col=None,family='SHASHo2',method='RS',cv_folds=1): """Compute GAMLSS normative model. Parameters @@ -785,13 +790,13 @@ def gamlss_normative_model(self,mu=None,sigma=None,nu=None,tau=None,family='SHAS nan_cols = ['LOESS_pred','LOESS_residuals','LOESS_z','LOESS_rank','LOESS_sigma', 'Centiles_pred','Centiles_residuals','Centiles_z','Centiles','Centiles_rank','Centiles_sigma', 'Centiles_95','Centiles_5','Centiles_32','Centiles_68'] - gamlss_data = self.data[[c for c in self.data.columns if c not in nan_cols]] - + gamlss_data = self.data[[c for c in train_col if c not in nan_cols]] + if cv_folds == 1: gamlss.fit(gamlss_data[ctr_mask]) - mu_pred = gamlss.predict(gamlss_data,what='mu') - sigma_pred = gamlss.predict(gamlss_data,what='sigma') + mu_pred,sigma_pred, res,z_scores, q_lower, q_upper, sigma, tau, nu, aic, bic = gamlss.predict(gamlss_data,self.score,what='mu') + # For MSLL y_train_mean = np.mean(self.data[self.score].values[ctr_mask]) @@ -818,8 +823,8 @@ def gamlss_normative_model(self,mu=None,sigma=None,nu=None,tau=None,family='SHAS gamlss.fit(X_train) - cv_mu_pred = gamlss.predict(X_test,what='mu') - cv_sigma_pred = gamlss.predict(X_test,what='sigma') + cv_mu_pred,cv_sigma_pred, res, z_scores, q_lower, q_upper, sigma, tau, nu, aic, bic = gamlss.predict(X_test) + r = RMSE(X_test[self.score].values,cv_mu_pred) s = SMSE(X_test[self.score].values,cv_mu_pred) @@ -840,14 +845,18 @@ def gamlss_normative_model(self,mu=None,sigma=None,nu=None,tau=None,family='SHAS mu_pred = gamlss.predict(gamlss_data,what='mu') sigma_pred = gamlss.predict(gamlss_data,what='sigma') - self.data['GAMLSS_pred'] = mu_pred + self.data['GAMLSS_pred'] = mu_pred self.data['GAMLSS_sigma'] = sigma_pred - self.data['GAMLSS_residuals'] = self.data[self.score] - self.data['GAMLSS_pred'] - self.data['GAMLSS_z'] = self.data['GAMLSS_residuals']/self.data['GAMLSS_sigma'] + self.data['GAMLSS_z'] = z_scores + self.data['GAMLSS_lower_error'] = q_lower + self.data['GAMLSS_upper_error'] = q_upper self.RMSE_GAMLSS = rmse self.SMSE_GAMLSS = smse self.MSLL_GAMLSS = msll + + self.AIC_GAMLSS = aic + self.BIC_GAMLSS = bic def report(self): """ Prints the values of each metric (SMSE, RMSE, MSLL) for the models that have been run.