-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paths.sirus_guided_example.R
More file actions
126 lines (102 loc) · 4.93 KB
/
s.sirus_guided_example.R
File metadata and controls
126 lines (102 loc) · 4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Guided example for running S-SIRUS
# Authors: L. Patelli, N. Golini, R. Ignaccolo, M. Cameletti
# Last update: 09/08/2024
# For details see the companion paper: https://arxiv.org/abs/2408.05537
rm(list = ls())
# The following packages are required
library(RandomForestsGLS)
library(tidyverse)
library(glmnet)
library(BRISC)
library(sirus)
# Set the working directory to your folder
# from where all the following R files will be sourced
# Load the necessary functions ---------------------------------
source("s.sirus.R") # This script contains the main S-SIRUS functions
source("s.sirus_rfgls.R")
source("s.sirus_formula.R")
source("s.sirus_RcppExports.R")
source("s.sirus_utility.R")
# Load the simulated data ---------------------------------
# See Scenario A in the companion paper
load("SimulatedData.RData")
str(data)
# Save the response variable in a separate vector
y = data$y
data$y = NULL
# In the following code the first 400 observations
# are used as training data and the last 100 observations
# as test data (as in the companion paper).
# Please, note this setting is quite LONG to run
# For a quick overview of the results,
# use a smaller number of observations
# (e.g., 100 training data and 25 test data)
n_tr = 400 # number of training data
n_te = 100 # number of test data
seed = 1234
# S-SIRUS cross-validation --------------------------------------------------------------------------------
# This is required for tuning the p0 parameter
s.sirus.cv_output <- s.sirus.cv(data = data[1:n_tr,],
y = y[1:n_tr],
incl.coords = FALSE,
type = "reg",
nfold = 2, # number of K CV folds
ncv = 2, # number of CV repetitions
num.rule.max = 25,
q = 10, # for continuous predictors discretization
num.trees.step = 1000, # b: number of trees per step
alpha = 0.05, #
max.depth = 2,
seed = seed)
# This function returns the 2 CV plots:
# error and stability versus the number of rules when p0 varies.
s.sirus.plot.cv(s.sirus.cv_output)
# S-SIRUS fit ----------------------------------------------------------------------------
# This is required for the fit of the S-SIRUS rules
s.sirus.fit.p0_output = s.sirus.fit(data = data[1:n_tr,],
y = y[1:n_tr],
incl.coords = FALSE,
type = "reg",
max.depth = 2,
p0 = s.sirus.cv_output$p0.stab,
q = 10,
num.trees.step = 1000, # b: number of trees per step
seed = seed)
# This function returns the S-SIRUS list of rules
# with the corresponding weights
s.sirus.print(s.sirus.fit.p0_output, digits = 3)
# S-SIRUS predictions --------------------------------------------------------------------
# This is required for predicting the response variable
# in any new location using S-SIRUS-RK
# Large Scale (LS) prediction for the training data
# (coordinates are not provided as predictors)
LS_hat_s.sirus_tr = s.sirus.predict(s.sirus.fit.p0_output,
data[1:n_tr,-c(1:2)]) #omit coordinates
# Compute residuals
residuals_s.sirus_tr = y[1:n_tr] - LS_hat_s.sirus_tr
# Prepare the coordinates matrix separately for training and test data
coords_tr = as.matrix(data[1:n_tr, 1:2])
coords_te = as.matrix(data[(n_tr+1):(n_tr+n_te), 1:2])
# Estimation of the spatial parameters
# using the training data residuals and BRISC functions
set.seed(seed)
est_theta_tr_res = BRISC_estimation(coords_tr,
x = matrix(1, nrow(coords_tr), 1),
y = residuals_s.sirus_tr,
verbose = FALSE)
est_theta_tr_res$Theta
# Small scale prediction for the test data
# using BRISC functions
set.seed(seed)
pred_smallscale_te <- BRISC_prediction(est_theta_tr_res,
coords_te,
X.0 = matrix(1, nrow(coords_te), 1),
verbose = FALSE)
length(pred_smallscale_te$prediction)
# Large Scale (LS) prediction for the test data
# (coordinates are not provided as predictors)
LS_hat_s.sirus_te = s.sirus.predict(s.sirus.fit.p0_output,
data[(n_tr+1):(n_tr+n_te),-c(1:2)])
# Response variable prediction for the test data
# as sum of the large scale and the small scale
y_hat_s.sirus_te = LS_hat_s.sirus_te + pred_smallscale_te$prediction