From 88de8ef86fa14cad3c34c0d4c77dbbd5ec669365 Mon Sep 17 00:00:00 2001 From: Rutgervdspek Date: Fri, 21 Nov 2025 13:02:47 +0100 Subject: [PATCH] updated new functions and tests --- NAMESPACE | 6 +- NEWS.md | 11 ++ R/{CCovEst.R => CondCovEst.R} | 154 +++++++++++++----------- R/{CEllGenEst.R => CondEllGenEst.R} | 80 +++++++----- R/{CMeanEst.R => CondMeanEst.R} | 12 +- R/EllDistrEst.R | 40 ++++-- man/{CCovEst.Rd => CondCovEst.Rd} | 33 +++-- man/{CEllGenEst.Rd => CondEllGenEst.Rd} | 36 +++--- man/{CMeanEst.Rd => CondMeanEst.Rd} | 15 +-- tests/testthat/test-CondCovEst.R | 101 ++++++++++++++++ tests/testthat/test-CondEllGenEst.R | 90 ++++++++++++++ tests/testthat/test-CondMeanEst.R | 58 +++++++++ 12 files changed, 488 insertions(+), 148 deletions(-) rename R/{CCovEst.R => CondCovEst.R} (56%) rename R/{CEllGenEst.R => CondEllGenEst.R} (73%) rename R/{CMeanEst.R => CondMeanEst.R} (87%) rename man/{CCovEst.Rd => CondCovEst.Rd} (81%) rename man/{CEllGenEst.Rd => CondEllGenEst.Rd} (85%) rename man/{CMeanEst.Rd => CondMeanEst.Rd} (80%) create mode 100644 tests/testthat/test-CondCovEst.R create mode 100644 tests/testthat/test-CondEllGenEst.R create mode 100644 tests/testthat/test-CondMeanEst.R diff --git a/NAMESPACE b/NAMESPACE index 09b4ef7..30b5dfe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,8 @@ # Generated by roxygen2: do not edit by hand -export(CCovEst) -export(CEllGenEst) -export(CMeanEst) +export(CondCovEst) +export(CondEllGenEst) +export(CondMeanEst) export(Convert_g1_To_Fg1) export(Convert_g1_To_Qg1) export(Convert_g1_To_f1) diff --git a/NEWS.md b/NEWS.md index 8803f8f..ea9c25a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +* New functions: + + - `CondMeanEst`: nonparametric estimation of the conditional mean. + (#2, thanks to Rutger van der Spek) + + - `CondCovEst`: nonparametric estimation of the conditional covariance matrix. + (#2, thanks to Rutger van der Spek) + + - `CondEllGenEst`: nonparametric estimation of the conditional density generator + of an elliptical distribution. + (#2, thanks to Rutger van der Spek) # ElliptCopulas 0.1.4 diff --git a/R/CCovEst.R b/R/CondCovEst.R similarity index 56% rename from R/CCovEst.R rename to R/CondCovEst.R index 4d451a1..ce2bc0a 100644 --- a/R/CCovEst.R +++ b/R/CondCovEst.R @@ -16,7 +16,7 @@ #' The supported estimators are: #' #' \describe{ -#' \item{type = 1}{ +#' \item{type = "`grid_mean`"}{ #' Covariance estimator using the conditional mean evaluated at the grid point: #' \deqn{ #' \widehat{\Sigma}_n(z) @@ -26,7 +26,7 @@ #' } #' } #' -#' \item{type = 2}{ +#' \item{type = "`obs_mean`"}{ #' Covariance estimator using the conditional mean evaluated at each observation: #' \deqn{ #' \widetilde{\Sigma}_n(z) @@ -36,7 +36,7 @@ #' } #' } #' -#' \item{type = 3}{ +#' \item{type = "`pairwise`"}{ #' Pairwise covariance estimator: #' \deqn{ #' \widecheck{\Sigma}_n(z) @@ -63,12 +63,21 @@ #' #' @template param-Kernel #' -#' @param type integer in \{1,2,3\} indicating which estimator to compute. +#' @param type indicates which estimator to use. Possible choices are "`grid_mean`", +#' "`obs_mean`" and "`pairwise`". #' #' @return An array of dimension \eqn{d \times d \times \code{length(gridZ)}}, #' \eqn{\widehat{\Sigma}_n(z)} containing the estimated conditional covariance #' matrices of the \eqn{d}-dimensional random variable \eqn{X} at each point of `gridZ`. #' +#' @details +#' Computational complexity: +#' \itemize{ +#' \item `grid_mean`: O(n * d^2 * \code{length(gridZ))}. +#' \item `obs_mean`: O(n * d^2 * \code{length(gridZ))}. +#' \item `pairwise`: O(n^2 * d^2 * \code{length(gridZ))}. +#' } +#' #' @examples #' # Comparison between the estimated and true conditional covariance #' @@ -81,7 +90,7 @@ #' gridZ = seq(-2, 2, length.out = 50) #' h = 0.2 #' -#' Sigma_est = CCovEst(X, Z, gridZ, h, type = 1) +#' Sigma_est = CondCovEst(X, Z, gridZ, h, type = 1) #' cov_X1X2 = sapply(1:length(gridZ), function(i) Sigma_est[1,2,i]) #' true_cov = 0.3 * gridZ #' @@ -93,8 +102,8 @@ #' #' @export #' -CCovEst <- function(dataMatrix, observedZ, gridZ, h , Kernel = "epanechnikov", - type = 1) +CondCovEst <- function(dataMatrix, observedZ, gridZ, h , Kernel = "epanechnikov", + type = "grid_mean") { d = ncol( dataMatrix ) n = nrow( dataMatrix ) @@ -108,89 +117,96 @@ CCovEst <- function(dataMatrix, observedZ, gridZ, h , Kernel = "epanechnikov", class = "DifferentLengthsError") ) } - if(type == 1){ - meanEst = CMeanEst( - dataMatrix = dataMatrix, observedZ = observedZ, - gridZ = gridZ, h = h, - Kernel = Kernel) + estimate = switch(type, + + grid_mean = { + + meanEst = CondMeanEst( + dataMatrix = dataMatrix, observedZ = observedZ, + gridZ = gridZ, h = h, + Kernel = Kernel) - matrixWeights = matrix(data = NA, nrow = n, ncol = nz) + matrixWeights = matrix(data = NA, nrow = n, ncol = nz) - for(i in 1:nz){ - matrixWeights[,i] = computeWeights( - vectorZ = observedZ,h = h, - pointZ = gridZ[i], Kernel = Kernel, - normalization = TRUE) - } + for(i in 1:nz){ + matrixWeights[,i] = computeWeights( + vectorZ = observedZ,h = h, + pointZ = gridZ[i], Kernel = Kernel, + normalization = TRUE) + } - estimate = array(data = NA, dim = c(d,d,nz)) + estimate = array(data = NA, dim = c(d,d,nz)) - for(i in 1:nz){ - S = matrix(0,d,d) - for(j in 1:n){ - diff = dataMatrix[j,] - meanEst[,i] - S = S + matrixWeights[j,i] * (diff %*% t(diff)) + for(i in 1:nz){ + diffMat = t(t(dataMatrix) - meanEst[,i]) + W = diag(matrixWeights[,i]) + estimate[,,i] = t(diffMat) %*% W %*% diffMat } - estimate[,,i] = S - } - } + estimate + }, - if(type == 2){ + obs_mean = { - matrixWeights = matrix(data = NA, nrow = n, ncol = nz) + matrixWeights = matrix(data = NA, nrow = n, ncol = nz) - for(i in 1:nz){ - matrixWeights[,i] = computeWeights( - vectorZ = observedZ, h = h, - pointZ = gridZ[i], Kernel = Kernel, - normalization = TRUE) - } + for(i in 1:nz){ + matrixWeights[,i] = computeWeights( + vectorZ = observedZ, h = h, + pointZ = gridZ[i], Kernel = Kernel, + normalization = TRUE) + } - estimate = array(data = NA, dim = c(d,d,nz)) + estimate = array(data = NA, dim = c(d,d,nz)) - for(i in 1:nz){ - meanEst = CMeanEst( + meanEst = CondMeanEst( dataMatrix = dataMatrix, observedZ = observedZ, - gridZ = observedZ[i], h = h, + gridZ = observedZ, h = h, Kernel = Kernel) - S = matrix(0,d,d) - for(j in 1:n){ - diff = dataMatrix[j,] - meanEst - S = S + matrixWeights[j,i] * (diff %*% t(diff)) + + diffMat = dataMatrix - t(meanEst) + + for(i in 1:nz){ + diffMat = dataMatrix - t(meanEst) + estimate[,,i] = t(diffMat) %*% (diffMat * matrixWeights[,i]) } - estimate[,,i] = S - } - } - if(type == 3){ + estimate + }, - matrixWeights = matrix(data = NA, nrow = n, ncol = nz) + pairwise = { - for(i in 1:nz){ - matrixWeights[,i] = computeWeights( - vectorZ = observedZ, h = h, - pointZ = gridZ[i], Kernel = Kernel, - normalization = FALSE) - } + matrixWeights = matrix(data = NA, nrow = n, ncol = nz) - estimate = array(data = NA, dim = c(d,d,nz)) + for(i in 1:nz){ + matrixWeights[,i] = computeWeights( + vectorZ = observedZ, h = h, + pointZ = gridZ[i], Kernel = Kernel, + normalization = FALSE) + } - for(i in 1:nz){ - S = matrix(0,d,d) - denom = 0 - for(j in 1:(n-1)){ - for(k in (j+1):n){ - w = matrixWeights[j,i] * matrixWeights[k,i] - diff = dataMatrix[j,] - dataMatrix[k,] - S = S + w * (diff %*% t(diff)) - denom = denom + w + estimate = array(data = NA, dim = c(d,d,nz)) + + for(i in 1:nz){ + S = matrix(0,d,d) + denom = 0 + for(j in 1:(n-1)){ + for(k in (j+1):n){ + w = matrixWeights[j,i] * matrixWeights[k,i] + diff = dataMatrix[j,] - dataMatrix[k,] + S = S + w * (diff %*% t(diff)) + denom = denom + w + } } + S = S / (2* denom) + estimate[,,i] = S } - S = S/ (2* denom) - estimate[,,i] = S - } - } + + estimate + }, + + stop("Invalid type. Must be one of 'grid_mean', 'obs_mean', or 'pairwise'.") + ) return(estimate) } diff --git a/R/CEllGenEst.R b/R/CondEllGenEst.R similarity index 73% rename from R/CEllGenEst.R rename to R/CondEllGenEst.R index d16e48e..bf6f307 100644 --- a/R/CEllGenEst.R +++ b/R/CondEllGenEst.R @@ -63,8 +63,8 @@ #' \eqn{\psi(\xi)}. If not specified, the Silverman's rule of thumb is used. #' #' @return A matrix of size \eqn{\code{length(grid)} \times -#' \code{length(gridZ)}}, \eqn{\widehat{g}_{n}(\xi \mid z)} containing the estimated conditional density -#' generator at each \eqn{\xi} and \eqn{z}. +#' \code{length(gridZ)}}, \eqn{\widehat{g}_{n}(\xi \mid z)} containing the +#' estimated conditional density generator at each \eqn{\xi} and \eqn{z}. #' #' @references Liebscher, E. (2005). A semiparametric density estimator based on #' elliptical distributions. Journal of Multivariate Analysis, 92(1), 205-222. @@ -79,20 +79,25 @@ #' data_X = matrix(NA, n, d) #' for (i in 1:n) { #' mean_i = rep(Z[i], d) -#' sigma_i = matrix(c(Z[i], Z[i]/2, Z[i]/3, -#' Z[i]/2, Z[i], Z[i]/2, -#' Z[i]/3, Z[i]/2, Z[i]), nrow = d) -#' data_X[i,] = mvtnorm::rmvnorm(1, mean = mean_i, sigma = sigma_i) +#' sigma_i = matrix(c(Z[i], Z[i]/2, Z[i]/3, +#' Z[i]/2, Z[i], Z[i]/2, +#' Z[i]/3, Z[i]/2, Z[i]), nrow = d) +#' data_X[i,] = EllDistrSim( +#' n = 1, d = d, +#' A = chol(sigma_i), +#' mu = mean_i, +#' density_R2 = function(x) { (2*pi)^(-d/2) * exp(-x/2) } +#' ) #' } #' #' h <- 1.06 * sd(Z) * n^(-1/5) #' gridZ <- c(0.2, 0.8) #' -#' mu_est = CMeanEst(data_X, Z, gridZ, h) -#' Sigma_est = CCovEst(data_X, Z, gridZ, h, type = 1) +#' mu_est = CondMeanEst(data_X, Z, gridZ, h) +#' Sigma_est = CondCovEst(data_X, Z, gridZ, h, type = 1) #' #' grid = seq(0, 8, length.out = 80) -#' g_est = CEllGenEst( +#' g_est = CondEllGenEst( #' dataMatrix = data_X, #' observedZ = Z, #' mu = mu_est, @@ -118,10 +123,11 @@ #' #' @export #' -CEllGenEst <- function(dataMatrix, observedZ, mu, sigma, gridZ, grid, h, - Kernel = "epanechnikov", a = 1, h_psi = NULL) +CondEllGenEst <- function(dataMatrix, observedZ, mu, sigma, gridZ, grid, h, + Kernel = "epanechnikov", a = 1, h_psi = NULL, + mpfr = FALSE, precBits = 100, dopb = TRUE) { - kernelFun = ElliptCopulas:::getKernel(Kernel = Kernel) + kernelFun = getKernel(Kernel = Kernel) d = ncol( dataMatrix ) n = nrow( dataMatrix ) nz = length( gridZ ) @@ -161,15 +167,33 @@ CEllGenEst <- function(dataMatrix, observedZ, mu, sigma, gridZ, grid, h, )) } + if(mpfr) { + if (!requireNamespace("Rmpfr")){ + stop("`Rmpfr` package should be installed to use the option `mpfr = TRUE`.") + } + + a = Rmpfr::mpfr(a, precBits = precBits) + d = Rmpfr::mpfr(d, precBits = precBits) + grid = Rmpfr::mpfr(grid, precBits = precBits) + } + R = matrix(data = NA, nrow = n, ncol = nz) psiR = matrix(data = NA, nrow = n, ncol = nz) - for(i in 1:n){ - for(j in 1:nz){ - R[i,j] = t(dataMatrix[i,] - mu[,j]) %*% solve(sigma[,,j]) %*% - (dataMatrix[i,] - mu[,j]) - psiR[i,j] = -a + (a ^ (d/2) + ( R[i,j] ) ^ (d/2) ) ^ (2/d) + if(dopb){ pb = pbapply::startpb(max = nz*n + nz*n + nz*n1 + n1*nz) } + counter = 0 + + for(i in 1:nz){ + if(mpfr){ + X_centered = t(t(dataMatrix) - mu[,i]) + R[,i] = rowSums((X_centered %*% solve(sigma[,,i])) * X_centered) + psiR[,i] = as.numeric(-a + (a^(d/2) + Rmpfr::mpfr(R[,i], precBits)^(d/2))^(2/d)) + } else { + X_centered = t(t(dataMatrix) - mu[,i]) + R[,i] = rowSums((X_centered %*% solve(sigma[,,i])) * X_centered) + psiR[,i] = -a + (a^(d/2) + R[,i]^(d/2))^(2/d) } + if(dopb){ counter = counter + n; pbapply::setpb(pb, counter) } } matrixWeights = matrix(data = NA, nrow = n, ncol = nz) @@ -179,11 +203,12 @@ CEllGenEst <- function(dataMatrix, observedZ, mu, sigma, gridZ, grid, h, vectorZ = observedZ, h = h, pointZ = gridZ[i], Kernel = Kernel, normalization = TRUE) + if(dopb){ counter = counter + n; pbapply::setpb(pb, counter) } } qEst = matrix(data = NA, nrow = n1, ncol = nz) - psiGrid = -a + (a ^ (d/2) + ( grid ) ^ (d/2) ) ^ (2/d) + psiGrid = as.numeric(-a + (a ^ (d/2) + ( grid ) ^ (d/2) ) ^ (2/d)) if(is.null(h_psi)) { psiR_pooled = as.vector(psiR) @@ -192,13 +217,12 @@ CEllGenEst <- function(dataMatrix, observedZ, mu, sigma, gridZ, grid, h, for(i in 1:nz){ for(j in 1:n1){ - S = 0 - for(k in 1:n){ - S = S + matrixWeights[k,i] / (h_psi) * - ( kernelFun( (psiGrid[j] - psiR[k,i]) / h_psi ) + - kernelFun( (psiGrid[j] + psiR[k,i]) / h_psi ) ) - } - qEst[j,i] = S + qEst[j,i] = sum( + matrixWeights[,i] / h_psi * + (kernelFun((psiGrid[j] - psiR[,i]) / h_psi) + + kernelFun((psiGrid[j] + psiR[,i]) / h_psi)) + ) + if(dopb){ counter = counter + 1; pbapply::setpb(pb, counter) } } } @@ -208,10 +232,10 @@ CEllGenEst <- function(dataMatrix, observedZ, mu, sigma, gridZ, grid, h, for(i in 1:n1){ psiPGrid = grid[i]^(d/2 - 1) * (a ^ (d/2) + grid[i]^(d/2)) ^ (2/d - 1) - for(j in 1:nz){ - gEst[i,j] = 1/s_d * grid[i]^(-d/2 + 1) * psiPGrid * qEst[i,j] - } + gEst[i,] = as.numeric(1/s_d * grid[i]^(-d/2 + 1) * psiPGrid * qEst[i,]) + if(dopb){ counter = counter + nz; pbapply::setpb(pb, counter) } } + if(dopb){ pbapply::closepb(pb) } return(gEst) } diff --git a/R/CMeanEst.R b/R/CondMeanEst.R similarity index 87% rename from R/CMeanEst.R rename to R/CondMeanEst.R index cdd39da..beab034 100644 --- a/R/CMeanEst.R +++ b/R/CondMeanEst.R @@ -14,9 +14,9 @@ #' w_{n,i}(z) #' = #' \frac{ K\!\left( \frac{Z_i - z}{h} \right) } -#' { \sum_{j=1}^n K\!\left( \frac{Z_j - z}{h} \right) } . +#' { \sum_{j=1}^n K\!\left( \frac{Z_j - z}{h} \right) }. #' } -#' +#' Here, \eqn{K} is the kernel function, and \eqn{h > 0} the bandwidth. #' #' @param dataMatrix a matrix of size \eqn{n \times d} containing the \eqn{n} #' observations of the \eqn{d}-dimensional response variable \eqn{X}. The pairs @@ -44,10 +44,10 @@ #' X = matrix(2 * Z + rnorm(n), ncol = d) #' gridZ = seq(-2, 2, length.out = 50) #' h = 0.3 -#' CMean_estimates = CMeanEst(X, Z, gridZ, h) +#' CondMean_estimates = CondMeanEst(X, Z, gridZ, h) #' #' true_mean = 2 * gridZ -#' plot(gridZ, CMean_estimates, type = "l", col = "blue", lwd = 2, +#' plot(gridZ, CondMean_estimates, type = "l", col = "blue", lwd = 2, #' ylab = "Conditional Mean", xlab = "Z") #' lines(gridZ, true_mean, col = "red", lwd = 2, lty = 2) #' legend("topleft", legend = c("Estimated", "True"), col = c("blue", "red"), @@ -55,7 +55,7 @@ #' #' @export #' -CMeanEst <- function(dataMatrix, observedZ, gridZ, h, Kernel = "epanechnikov") +CondMeanEst <- function(dataMatrix, observedZ, gridZ, h, Kernel = "epanechnikov") { d = ncol( dataMatrix ) n = nrow( dataMatrix ) @@ -90,7 +90,7 @@ CMeanEst <- function(dataMatrix, observedZ, gridZ, h, Kernel = "epanechnikov") computeWeights <- function(vectorZ, h, pointZ, Kernel = "epanechnikov", normalization = TRUE) { n = length(vectorZ) - kernelFun = ElliptCopulas:::getKernel(Kernel = Kernel) + kernelFun = getKernel(Kernel = Kernel) u = (vectorZ - pointZ) / h w_raw = kernelFun(u) diff --git a/R/EllDistrEst.R b/R/EllDistrEst.R index bdd9022..e6c681d 100644 --- a/R/EllDistrEst.R +++ b/R/EllDistrEst.R @@ -152,14 +152,22 @@ EllDistrEst <- function(X, mu = 0, Sigma_m1 = diag(d), if (length(a) == 1){ vector_Y = rep(NA , n) - for (i in 1:n) { + if (mpfr) { # The matrix product is the expensive part (in high dimensions) # and should not use the mpfr library. # (mpfr is only used in the exponentiation, after) - vector_Y[i] = as.numeric( - -a + (a ^ (d/2) + ( (X[i,] - mu) %*% Sigma_m1 %*% (X[i,] - mu) ) - ^ (d/2) ) ^ (2/d) ) - if (dopb){ pbapply::setpb(pb, i) } + for (i in 1:n) { + x_quad = Rmpfr::mpfr(as.numeric((X[i,] - mu) %*% Sigma_m1 %*% (X[i,] - mu)), + precBits) + vector_Y[i] = as.numeric( -a + (a^(d/2) + (x_quad)^(d/2))^(2/d) ) + if (dopb){ pbapply::setpb(pb, i) } + } + } else { + for (i in 1:n) { + x_quad = as.numeric((X[i,]-mu) %*% Sigma_m1 %*% (X[i,] - mu)) + vector_Y[i] = as.numeric( -a + (a^(d/2) + (x_quad) ^(d/2))^(2/d) ) + if (dopb){ pbapply::setpb(pb, i) } + } } for (i1 in 1:n1){ @@ -168,7 +176,7 @@ EllDistrEst <- function(X, mu = 0, Sigma_m1 = diag(d), psiPZ = z^(d/2 - 1) * (a ^ (d/2) + z^(d/2)) ^ (2/d - 1) # This should use mean.default() (not the mpfr version) to save computation time. h_ny = (1/h[i1]) * base::mean( kernelFun((psiZ - vector_Y)/h[i1]) + - kernelFun((psiZ + vector_Y)/h[i1]) ) + kernelFun((psiZ + vector_Y)/h[i1]) ) gn_z = 1/s_d * z^(-d/2 + 1) * psiPZ * h_ny grid_g[i1] = as.numeric(gn_z) @@ -179,14 +187,22 @@ EllDistrEst <- function(X, mu = 0, Sigma_m1 = diag(d), matrix_Y = matrix(nrow = n1, ncol = n) - for (i in 1:n) { + if (mpfr) { # The matrix product is the expensive part (in high dimensions) # and should not use the mpfr library. # (mpfr is only used in the exponentiation, after) - matrix_Y[, i] = as.numeric( - -a + (a ^ (d/2) + c( (X[i,] - mu) %*% Sigma_m1 %*% (X[i,] - mu) ) - ^ (d/2) ) ^ (2/d) ) - if (dopb){ pbapply::setpb(pb, i) } + for (i in 1:n) { + x_quad = Rmpfr::mpfr(as.numeric((X[i,] - mu) %*% Sigma_m1 %*% (X[i,] - mu)), + precBits) + matrix_Y[, i] = as.numeric( -a + (a^(d/2) + (x_quad)^(d/2))^(2/d) ) + if (dopb){ pbapply::setpb(pb, i) } + } + } else { + for (i in 1:n) { + x_quad = as.numeric((X[i,]-mu) %*% Sigma_m1 %*% (X[i,]-mu)) + matrix_Y[, i] = -a + (a^(d/2) + (x_quad)^(d/2))^(2/d) + if (dopb){ pbapply::setpb(pb, i) } + } } for (i1 in 1:n1){ @@ -195,7 +211,7 @@ EllDistrEst <- function(X, mu = 0, Sigma_m1 = diag(d), psiPZ = z^(d/2 - 1) * (a[i1] ^ (d/2) + z^(d/2)) ^ (2/d - 1) # This should use mean.default() (not the mpfr version) to save computation time. h_ny = (1/h[i1]) * base::mean( kernelFun((psiZ - matrix_Y[i1, ])/h[i1]) + - kernelFun((psiZ + matrix_Y[i1, ])/h[i1]) ) + kernelFun((psiZ + matrix_Y[i1, ])/h[i1]) ) gn_z = 1/s_d * z^(-d/2 + 1) * psiPZ * h_ny grid_g[i1] = as.numeric(gn_z) diff --git a/man/CCovEst.Rd b/man/CondCovEst.Rd similarity index 81% rename from man/CCovEst.Rd rename to man/CondCovEst.Rd index a1dc915..4303587 100644 --- a/man/CCovEst.Rd +++ b/man/CondCovEst.Rd @@ -1,10 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CCovEst.R -\name{CCovEst} -\alias{CCovEst} +% Please edit documentation in R/CondCovEst.R +\name{CondCovEst} +\alias{CondCovEst} \title{Conditional Covariance Estimator} \usage{ -CCovEst(dataMatrix, observedZ, gridZ, h, Kernel = "epanechnikov", type = 1) +CondCovEst( + dataMatrix, + observedZ, + gridZ, + h, + Kernel = "epanechnikov", + type = "grid_mean" +) } \arguments{ \item{dataMatrix}{a matrix of size \eqn{n \times d} containing the \eqn{n} @@ -22,7 +29,8 @@ variable \eqn{Z}.} \item{Kernel}{name of the kernel. Possible choices are \code{"gaussian"}, \code{"epanechnikov"}, \code{"triangular"}.} -\item{type}{integer in \{1,2,3\} indicating which estimator to compute.} +\item{type}{indicates which estimator to use. Possible choices are "\code{grid_mean}", +"\code{obs_mean}" and "\code{pairwise}".} } \value{ An array of dimension \eqn{d \times d \times \code{length(gridZ)}}, @@ -47,7 +55,7 @@ where \eqn{K} is the chosen kernel and \eqn{h} is the bandwidth. The supported estimators are: \describe{ -\item{type = 1}{ +\item{type = "\code{grid_mean}"}{ Covariance estimator using the conditional mean evaluated at the grid point: \deqn{ \widehat{\Sigma}_n(z) @@ -57,7 +65,7 @@ Covariance estimator using the conditional mean evaluated at the grid point: } } -\item{type = 2}{ +\item{type = "\code{obs_mean}"}{ Covariance estimator using the conditional mean evaluated at each observation: \deqn{ \widetilde{\Sigma}_n(z) @@ -67,7 +75,7 @@ Covariance estimator using the conditional mean evaluated at each observation: } } -\item{type = 3}{ +\item{type = "\code{pairwise}"}{ Pairwise covariance estimator: \deqn{ \widecheck{\Sigma}_n(z) @@ -78,6 +86,13 @@ Pairwise covariance estimator: } } } + +Computational complexity: +\itemize{ +\item \code{grid_mean}: O(n * d^2 * \code{length(gridZ))}. +\item \code{obs_mean}: O(n * d^2 * \code{length(gridZ))}. +\item \code{pairwise}: O(n^2 * d^2 * \code{length(gridZ))}. +} } \examples{ # Comparison between the estimated and true conditional covariance @@ -91,7 +106,7 @@ X = cbind(X1, X2) gridZ = seq(-2, 2, length.out = 50) h = 0.2 -Sigma_est = CCovEst(X, Z, gridZ, h, type = 1) +Sigma_est = CondCovEst(X, Z, gridZ, h, type = 1) cov_X1X2 = sapply(1:length(gridZ), function(i) Sigma_est[1,2,i]) true_cov = 0.3 * gridZ diff --git a/man/CEllGenEst.Rd b/man/CondEllGenEst.Rd similarity index 85% rename from man/CEllGenEst.Rd rename to man/CondEllGenEst.Rd index c73cf78..0c941d4 100644 --- a/man/CEllGenEst.Rd +++ b/man/CondEllGenEst.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CEllGenEst.R -\name{CEllGenEst} -\alias{CEllGenEst} +% Please edit documentation in R/CondEllGenEst.R +\name{CondEllGenEst} +\alias{CondEllGenEst} \title{Conditional Density Generator Estimator for Elliptical Distributions} \usage{ -CEllGenEst( +CondEllGenEst( dataMatrix, observedZ, mu, @@ -14,7 +14,10 @@ CEllGenEst( h, Kernel = "epanechnikov", a = 1, - h_psi = NULL + h_psi = NULL, + mpfr = FALSE, + precBits = 100, + dopb = TRUE ) } \arguments{ @@ -49,8 +52,8 @@ evaluated.} } \value{ A matrix of size \eqn{\code{length(grid)} \times - \code{length(gridZ)}}, \eqn{\widehat{g}_{n}(\xi \mid z)} containing the estimated conditional density -generator at each \eqn{\xi} and \eqn{z}. + \code{length(gridZ)}}, \eqn{\widehat{g}_{n}(\xi \mid z)} containing the +estimated conditional density generator at each \eqn{\xi} and \eqn{z}. } \description{ This function estimates the conditional density generator \eqn{g_z} of a @@ -98,20 +101,25 @@ Z = runif(n) data_X = matrix(NA, n, d) for (i in 1:n) { mean_i = rep(Z[i], d) - sigma_i = matrix(c(Z[i], Z[i]/2, Z[i]/3, - Z[i]/2, Z[i], Z[i]/2, - Z[i]/3, Z[i]/2, Z[i]), nrow = d) - data_X[i,] = mvtnorm::rmvnorm(1, mean = mean_i, sigma = sigma_i) + sigma_i = matrix(c(Z[i], Z[i]/2, Z[i]/3, + Z[i]/2, Z[i], Z[i]/2, + Z[i]/3, Z[i]/2, Z[i]), nrow = d) + data_X[i,] = EllDistrSim( + n = 1, d = d, + A = chol(sigma_i), + mu = mean_i, + density_R2 = function(x) { (2*pi)^(-d/2) * exp(-x/2) } + ) } h <- 1.06 * sd(Z) * n^(-1/5) gridZ <- c(0.2, 0.8) -mu_est = CMeanEst(data_X, Z, gridZ, h) -Sigma_est = CCovEst(data_X, Z, gridZ, h, type = 1) +mu_est = CondMeanEst(data_X, Z, gridZ, h) +Sigma_est = CondCovEst(data_X, Z, gridZ, h, type = 1) grid = seq(0, 8, length.out = 80) -g_est = CEllGenEst( +g_est = CondEllGenEst( dataMatrix = data_X, observedZ = Z, mu = mu_est, diff --git a/man/CMeanEst.Rd b/man/CondMeanEst.Rd similarity index 80% rename from man/CMeanEst.Rd rename to man/CondMeanEst.Rd index bea0a52..8b953db 100644 --- a/man/CMeanEst.Rd +++ b/man/CondMeanEst.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CMeanEst.R -\name{CMeanEst} -\alias{CMeanEst} +% Please edit documentation in R/CondMeanEst.R +\name{CondMeanEst} +\alias{CondMeanEst} \title{Conditional Mean Estimator} \usage{ -CMeanEst(dataMatrix, observedZ, gridZ, h, Kernel = "epanechnikov") +CondMeanEst(dataMatrix, observedZ, gridZ, h, Kernel = "epanechnikov") } \arguments{ \item{dataMatrix}{a matrix of size \eqn{n \times d} containing the \eqn{n} @@ -43,8 +43,9 @@ where the normalized kernel weights are w_{n,i}(z) = \frac{ K\!\left( \frac{Z_i - z}{h} \right) } - { \sum_{j=1}^n K\!\left( \frac{Z_j - z}{h} \right) } . + { \sum_{j=1}^n K\!\left( \frac{Z_j - z}{h} \right) }. } +Here, \eqn{K} is the kernel function, and \eqn{h > 0} the bandwidth. } \examples{ # Comparison between the estimated and true conditional mean @@ -54,10 +55,10 @@ Z = runif(n, -2, 2) X = matrix(2 * Z + rnorm(n), ncol = d) gridZ = seq(-2, 2, length.out = 50) h = 0.3 -CMean_estimates = CMeanEst(X, Z, gridZ, h) +CondMean_estimates = CondMeanEst(X, Z, gridZ, h) true_mean = 2 * gridZ -plot(gridZ, CMean_estimates, type = "l", col = "blue", lwd = 2, +plot(gridZ, CondMean_estimates, type = "l", col = "blue", lwd = 2, ylab = "Conditional Mean", xlab = "Z") lines(gridZ, true_mean, col = "red", lwd = 2, lty = 2) legend("topleft", legend = c("Estimated", "True"), col = c("blue", "red"), diff --git a/tests/testthat/test-CondCovEst.R b/tests/testthat/test-CondCovEst.R new file mode 100644 index 0000000..a528516 --- /dev/null +++ b/tests/testthat/test-CondCovEst.R @@ -0,0 +1,101 @@ +test_that("CondCovEst throws error for mismatched lengths", { + X = matrix(1:6, nrow = 3) + Z = c(1, 2) + gridZ = 1 + h = 0.5 + + expect_error( + CondCovEst(X, Z, gridZ, h), + class = "DifferentLengthsError" + ) +}) + + +test_that("CondCovEst returns correct dimensions", { + X = matrix(rnorm(20), nrow = 10, ncol = 2) + Z = seq(0, 1, length.out = 10) + gridZ = seq(0, 1, length.out = 4) + h = 0.5 + + est = CondCovEst(X, Z, gridZ, h) + + expect_equal(dim(est), c(2, 2, 4)) +}) + + +test_that("grid_mean estimator matches manual covariance computation in simple case", { + n = 50 + Z = seq(0, 1, length.out = n) + noise = rnorm(n, sd = 0.5) + X = matrix(Z + noise, ncol = 1) + gridZ = 0.5 + h = 0.3 + + est = CondCovEst(X, Z, gridZ, h, type = "grid_mean") + + w = computeWeights(Z, h, gridZ) + mu = sum(w * X) + manual = sum(w * (X - mu)^2) + + expect_equal(as.numeric(est[,,1]), manual, tolerance = 1e-6) +}) + + +test_that("obs_mean estimator computes covariance using observation-wise means", { + n = 30 + Z = seq(0, 1, length.out = n) + X = matrix(rnorm(n), ncol = 1) + gridZ = 0.6 + h = 0.3 + + est = CondCovEst(X, Z, gridZ, h, type = "obs_mean") + + w = computeWeights(Z, h, gridZ) + mu_obs = CondMeanEst(X, Z, Z, h) + residuals = X - t(mu_obs) + manual = sum(w * (residuals)^2) + + expect_equal(as.numeric(est[,,1]), manual, tolerance = 1e-6) +}) + + +test_that("pairwise estimator returns symmetric matrices", { + n = 20 + Z = seq(0, 1, length.out = n) + X = cbind(rnorm(n), rnorm(n)) + gridZ = seq(0, 1, length.out = 3) + h = 0.3 + + est = CondCovEst(X, Z, gridZ, h, type = "pairwise") + + for (i in 1:3) { + expect_true(isSymmetric(est[,,i])) + } +}) + + +test_that("pairwise estimator matches manual computation in simple case", { + n = 10 + Z = seq(0, 1, length.out = n) + X = cbind(rnorm(n), rnorm(n)) + gridZ = 0.5 + h = 0.4 + + est = CondCovEst(X, Z, gridZ, h, type = "pairwise") + + w = computeWeights(Z, h, gridZ, normalization = FALSE) + denom = 0 + S = matrix(0, 2, 2) + + for (i in 1:(n-1)) { + for (j in (i+1):n) { + wij = w[i] * w[j] + diff = X[i,] - X[j,] + S = S + wij * (diff %*% t(diff)) + denom = denom + wij + } + } + manual = S / (2 * denom) + + expect_equal(est[,,1], manual, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-CondEllGenEst.R b/tests/testthat/test-CondEllGenEst.R new file mode 100644 index 0000000..a35933f --- /dev/null +++ b/tests/testthat/test-CondEllGenEst.R @@ -0,0 +1,90 @@ +test_that("CondEllGenEst throws error for mismatched lengths of Z", { + dataMatrix = matrix(rnorm(20), nrow = 10, ncol = 2) + observedZ = rnorm(9) # wrong length + mu = matrix(0, nrow = 2, ncol = 1) + sigma = array(diag(2), dim = c(2, 2, 1)) + + expect_error( + CondEllGenEst( + dataMatrix = dataMatrix, + observedZ = observedZ, + mu = mu, sigma = sigma, + gridZ = 0.5, grid = seq(0, 3, length.out = 10), + h = 1 + ), + class = "DifferentLengthsError" + ) +}) + +test_that("CondEllGenEst throws error for incompatible mu dimensions", { + dataMatrix = matrix(rnorm(20), nrow = 10, ncol = 2) + observedZ = rnorm(10) + mu = matrix(0, nrow = 2, ncol = 2) # Should be 1 column + sigma = array(diag(2), dim = c(2, 2, 1)) + + expect_error( + CondEllGenEst( + dataMatrix = dataMatrix, + observedZ = observedZ, + mu = mu, sigma = sigma, + gridZ = 0.5, grid = seq(0, 3, length.out = 10), + h = 1 + ), + class = "DifferentLengthsError" + ) +}) + +test_that("CondEllGenEst returns matrix with correct dimensions", { + n = 30 + d = 2 + + set.seed(1) + dataMatrix = matrix(rnorm(n*d), nrow = n) + observedZ = runif(n) + + gridZ = c(0.2, 0.8) + grid = seq(0, 3, length.out = 15) + + mu = matrix(c(0.2, 0.8, 0.2, 0.8), nrow = d) # 2x2 + sigma = array(0, dim = c(d, d, 2)) + sigma[,,1] = diag(2) + sigma[,,2] = diag(2) + + gEst = CondEllGenEst( + dataMatrix = dataMatrix, + observedZ = observedZ, + mu = mu, sigma = sigma, + gridZ = gridZ, grid = grid, + h = 0.3 + ) + + expect_true(is.matrix(gEst)) + expect_equal(dim(gEst), c(length(grid), length(gridZ))) +}) + +test_that("CondEllGenEst produces finite, non-negative values for simple Gaussian data", { + n = 40 + d = 2 + set.seed(123) + + Z = runif(n) + dataMatrix = cbind(rnorm(n, mean = Z), rnorm(n, mean = Z)) + + gridZ = c(0.3, 0.7) + grid = seq(0, 4, length.out = 20) + + mu = rbind(gridZ, gridZ) + sigma = array(0, dim = c(2,2,2)) + sigma[,,] = diag(2) + + gEst = CondEllGenEst( + dataMatrix = dataMatrix, + observedZ = Z, + mu = mu, sigma = sigma, + gridZ = gridZ, grid = grid, + h = 0.2 + ) + + expect_true(all(is.finite(gEst))) + expect_true(all(gEst >= 0)) +}) diff --git a/tests/testthat/test-CondMeanEst.R b/tests/testthat/test-CondMeanEst.R new file mode 100644 index 0000000..129aaee --- /dev/null +++ b/tests/testthat/test-CondMeanEst.R @@ -0,0 +1,58 @@ +test_that("CondMeanEst throws error for mismatched lengths", { + X = matrix(1:6, nrow = 3) + Z = c(1, 2) + gridZ = 1 + h = 0.5 + + expect_error( + CondMeanEst(X, Z, gridZ, h), + class = "DifferentLengthsError" + ) +}) + +test_that("computeWeights returns correct normalized weights", { + Z = c(0, 1) + h = 1 + point = 0 + + w = computeWeights(Z, h, point, Kernel = "epanechnikov") + + expect_equal(sum(w), 1) + expect_false(any(is.na(w))) +}) + +test_that("CondMeanEst computes correct mean in simple case", { + Z = c(0, 1, 2) + X = matrix(Z, ncol = 1) + grid = 1 + h = 1 + + est = CondMeanEst(X, Z, grid, h) + + w = computeWeights(Z, h, 1, Kernel = "epanechnikov") + manual = sum(w * Z) + + expect_equal(as.numeric(est), manual) +}) + +test_that("CondMeanEst output has correct dimensions", { + X = matrix(rnorm(20), nrow = 10, ncol = 2) + Z = runif(10) + gridZ = seq(0, 1, length.out = 5) + h = 0.5 + + est = CondMeanEst(X, Z, gridZ, h) + + expect_equal(dim(est), c(2, 5)) +}) + +test_that("computeWeights warns when all weights are zero", { + Z = c(0, 0) + point = 10 + h = 0.1 + + expect_warning( + computeWeights(Z, h, point, Kernel = "epanechnikov"), + "All kernel weights are zero" + ) +})