Comparison between smt and scikit-learn Gaussian Process with fixed Noise

Hello,

I am trying to compare smt and scikit-learn in Gaussian Process with fixed Noise. I've noticed that the predicted variance from the smt model is much larger than the one from the scikit-learn model. I understand that optimisers for hyperparameters are different, but I am still wondering if there are any other factors which have caused the difference. I have attached my code in the end for your information. Thank you very much in advance.

**Truth Function:**
y = x * np.sin(x)

**Noise on output:**
normal(0.0, 0.5)

**GP kernal function**
squar_exp (in smt)
RBF (in scikit-learn)
_I believe these are the same but please correct me if I am wrong_

**Package versions:**
Python 3.12.1
scikit-learn==1.7.2
scipy==1.12.0
smt==2.9.5
numpy==1.26.4

<img width="1049" height="877" alt="Image" src="https://github.com/user-attachments/assets/aae02591-b5bd-4bf7-9f75-220bbc79f4a3" />



```
import numpy as np
import matplotlib.pyplot as plt
from smt.surrogate_models import KRG

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessRegressor


# truth
x = np.linspace(start=0, stop=10, num=1_000).reshape(-1, 1)
y = np.squeeze(x * np.sin(x))

# training data
n_train = 5 # number of points for training
rng = np.random.RandomState(1)
training_indices = rng.choice(np.arange(y.size), size=n_train, replace=False)
xt, yt0 = x[training_indices], y[training_indices]

# add noise to training points
noise_std = 0.5
yt = yt0+ rng.normal(loc=0.0, scale=noise_std, size=yt0.shape)


# smt models
# noise-free Kriging model
smt_noise_free = KRG(theta0=[1.0], theta_bounds=[1e-2, 1e2], n_start=10) 
smt_noise_free.set_training_values(xt, yt)
smt_noise_free.train()

# noisy Kriging model with fixed variance
smt_noise_fixed = KRG(theta0=[1.0], theta_bounds=[1e-2, 1e2], noise0=[noise_std**2], n_start=10) 
smt_noise_fixed.set_training_values(xt, yt)
smt_noise_fixed.train()


# scikit learn models
kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))

# noise-free Kriging model
skl_noise_free = GaussianProcessRegressor(
    kernel=kernel, n_restarts_optimizer=10
)
skl_noise_free.fit(xt.reshape(-1, 1), yt.reshape(-1, 1))

# noisy Kriging model with fixed variance
skl_noise_fixed = GaussianProcessRegressor(
    kernel=kernel, alpha=noise_std**2, n_restarts_optimizer=10
)
skl_noise_fixed.fit(xt.reshape(-1, 1), yt.reshape(-1, 1))


# predictions
y_noise_free_smt = smt_noise_free.predict_values(x) # predictive mean
var_noise_free_smt = smt_noise_free.predict_variances(x) # predictive variance

y_noise_fixed_smt= smt_noise_fixed.predict_values(x) # predictive mean
var_noise_fixed_smt = smt_noise_fixed.predict_variances(x) # predictive variance

y_noise_free_skl, std_noise_free_skl = skl_noise_free.predict(x, return_std=True)
y_noise_fixed_skl, std_noise_fixed_skl = skl_noise_fixed.predict(x, return_std=True)



# plotting predictions +- 3 std confidence intervals
plt.rcParams['figure.figsize'] = [12, 10]
fig, axes = plt.subplots(2, 2)

# noise free smt model
axes[0,0].fill_between(np.ravel(x),
                     np.ravel(y_noise_free_smt-3*np.sqrt(var_noise_free_smt)),
                     np.ravel(y_noise_free_smt+3*np.sqrt(var_noise_free_smt)),
                     alpha=0.2, label='3-sd confidence intervals')
axes[0,0].scatter(xt, yt, label="training data")
axes[0,0].plot(x, y_noise_free_smt, label='pred')
axes[0,0].plot(x, y, label='truth')
axes[0,0].set_title('noise-free smt Kriging model')
axes[0,0].legend(loc=0)
axes[0,0].set_xlabel(r'$x$')
axes[0,0].set_ylabel(r'$y$')

# noise fixed smt model
axes[0,1].fill_between(np.ravel(x),
                     np.ravel(y_noise_fixed_smt-3*np.sqrt(var_noise_fixed_smt)),
                     np.ravel(y_noise_fixed_smt+3*np.sqrt(var_noise_fixed_smt)),
                     alpha=0.2, label='3-sd confidence intervals')
axes[0,1].scatter(xt, yt, label="training data")
axes[0,1].plot(x, y_noise_fixed_smt, label='pred')
axes[0,1].plot(x, y, label='truth')
axes[0,1].set_title('noise-fixed smt Kriging model')
axes[0,1].set_xlabel(r'$x$')
axes[0,1].set_ylabel(r'$y$')


# noise free scikit learn model
axes[1,0].fill_between(np.ravel(x),
                     np.ravel(y_noise_free_skl-3*std_noise_free_skl),
                     np.ravel(y_noise_free_skl+3*std_noise_free_skl),
                     alpha=0.2, label='3-sd confidence intervals')
axes[1,0].scatter(xt, yt, label="training data")
axes[1,0].plot(x, y_noise_free_skl, label='pred')
axes[1,0].plot(x, y, label='truth')
axes[1,0].set_title('noise-free scikit learn Kriging model')
axes[1,0].set_xlabel(r'$x$')
axes[1,0].set_ylabel(r'$y$')

# noise fixed scikit learn model
axes[1,1].fill_between(np.ravel(x),
                     np.ravel(y_noise_fixed_skl-3*std_noise_fixed_skl),
                     np.ravel(y_noise_fixed_skl+3*std_noise_fixed_skl),
                     alpha=0.2, label='3-sd confidence intervals')
axes[1,1].scatter(xt, yt, label="training data")
axes[1,1].plot(x, y_noise_fixed_skl, label='pred')
axes[1,1].plot(x, y, label='truth')
axes[1,1].set_title('noise-fixed scikit learn Kriging model')
axes[1,1].set_xlabel(r'$x$')
axes[1,1].set_ylabel(r'$y$')
plt.setp(axes, xlim=[0, 10], ylim=[-10, 20])

plt.show()

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Comparison between smt and scikit-learn Gaussian Process with fixed Noise #757

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Comparison between smt and scikit-learn Gaussian Process with fixed Noise #757

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions