Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
13b1b99
fda.package
igargikaushik Dec 22, 2023
99fa683
fda2.0
igargikaushik Dec 22, 2023
4c87f3c
GeneDta1.0
igargikaushik Dec 24, 2023
5296296
Rename GeneDta1.0 to GeneData1.0
igargikaushik Dec 24, 2023
1c090c4
Create geneData2.0
igargikaushik Dec 24, 2023
aad95cc
Update GeneData1.0
igargikaushik Dec 25, 2023
f3a3e09
README.md
igargikaushik Dec 25, 2023
351474e
Update GeneData1.0
igargikaushik Dec 26, 2023
a85412b
geneData2.0
igargikaushik Dec 26, 2023
d7761ef
Merge pull request #1 from igargikaushik/igargikaushik-patch-2
igargikaushik Dec 26, 2023
45c209f
listFiles
igargikaushik Dec 26, 2023
be3e357
data.sas
igargikaushik Dec 26, 2023
9802fc4
data3.0.sas
igargikaushik Dec 26, 2023
8b211d1
Create data4.0.sas
igargikaushik Dec 26, 2023
7681714
Merge pull request #7 from igargikaushik/igargikaushik-4
igargikaushik Dec 26, 2023
18f0a7d
Create data5.0.sas
igargikaushik Dec 26, 2023
83f53d2
Merge pull request #8 from igargikaushik/igargikaushik-6
igargikaushik Dec 26, 2023
955e4fe
data6.0.sas
igargikaushik Dec 26, 2023
048b2f0
Merge pull request #9 from igargikaushik/igargikaushik-7
igargikaushik Dec 26, 2023
67c0669
data7.0.sas
igargikaushik Dec 26, 2023
c09a5fa
Merge pull request #10 from igargikaushik/igargikaushik-8
igargikaushik Dec 26, 2023
fd46020
Merge pull request #6 from igargikaushik/igargikaushik-3
igargikaushik Dec 31, 2023
21ff9d5
Merge pull request #4 from igargikaushik/igargikaushik
igargikaushik Mar 17, 2024
1da8b76
Merge pull request #5 from igargikaushik/igargikaushik-1
igargikaushik Mar 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions Assignments/fda.package
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
install.packages("fda")
library(fda)
#provide directory of the RData
setwd("/Users/cao/Dropbox/Teaching/FDA/SummerCourse2017/R")

# Use the scalar-on-function functional linear model to find the effect of
# the daily tempreture curve on the annual precipitations.
# x_i(t): the daily tempreture curve for the i-th city
# y_i : the annual precipitation for the i-th city;

# obtain the annual precipitation for 35 cities
annualprec = log10(apply(daily$precav,2,sum))
(ny = length(annualprec))
# Define the 65 Fourier basis functions
tempbasis65 = create.fourier.basis(c(0,365),65)

# Smooth the daily temperature data to obtain a smooth temperature curve
tempSmooth65 = smooth.basis(day.5, daily$tempav, tempbasis65)
tempfd65 = tempSmooth65$fd


templist = vector("list",2)
templist[[1]] = rep(1,35)
templist[[2]] = tempfd65



# create a constant basis for the intercept
conbasis = create.constant.basis(c(0,365))
plot(conbasis)
# Define the small number of basis functions for beta(t)
# We do not use roughness penalty here
nbasis = 11
betabasis5 = create.fourier.basis(c(0,365),nbasis)
betalist1 = vector("list",2)
betalist1[[1]] = conbasis
betalist1[[2]] = betabasis5

# fit the functional linear model
fRegressList1 = fRegress(annualprec,templist,betalist1)

betaestlist1 = fRegressList1$betaestlist
length(betaestlist1)
# betaestlist1 has two elements. The first element is the intercept
# The second element is the slope beta(t)

# obtain beta(t)
tempbetafd1 = betaestlist1[[2]]$fd

quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(tempbetafd1, xlab="Day", ylab="Beta for temperature")

# obtain the intercept alpha
coef(betaestlist1[[1]])

mean(annualprec)

# fitted value yhat
annualprechat1 = fRegressList1$yhatfdobj
# fitted residual
annualprecres1 = annualprec - annualprechat1
# sum squared residuals
SSE1 = sum(annualprecres1^2)

# sum squared residuals for the null model y = alpha + \epsilon
SSE0 = sum((annualprec - mean(annualprec))^2)

# F test for the overall effect of x_i(t)
# H0: y = alpha + \epsilon
# H1: y = alpha + \int [beta(t)x(t)]dt + epsilon
(Fratio = ((SSE0-SSE1)/(nbasis-1)/(SSE1/(ny-nbasis))))

# 95% quantile of F(4,30)
qf(0.95,nbasis-1,ny-nbasis)

# Fratio >>qf(0.95,nbasis-1,ny-nbasis)
# indicating that x_i(t) has a significant effect on y_i

# calculate the p-value
1-pf(Fratio,nbasis-1,ny-nbasis)
# p-value is 1.7*10^(-8) indicating that x_i(t) has a significant effect on y_i



# Penalized Estimation

#Using the harmonic acceleration differential operator
# to define roughness penalty on beta(t)
Lcoef = c(0,(2*pi/365)^2,0)
harmaccelLfd = vec2Lfd(Lcoef, c(0,365))

# We use 35 Fourier basis functions to represent beta(t)
betabasis35 = create.fourier.basis(c(0, 365), 35)

# Choosing Smoothing Parameters using cross-validation

loglam = seq(5,15,0.5)
nlam = length(loglam)
SSE.CV = rep(NA,nlam)
for (ilam in 1:nlam) {
print(paste("log lambda =", loglam[ilam]))
lambda = 10^(loglam[ilam])
betalisti = betalist1
betalisti[[2]] = fdPar(betabasis35, harmaccelLfd, lambda)
fRegi = fRegress.CV(annualprec, templist, betalisti)
SSE.CV[ilam] = fRegi$SSE.CV
}
quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(loglam, SSE.CV, type="b", lwd=4,
xlab="log smoothing parameter lambda",
ylab="Cross-validation score", cex.lab=4,cex.axis=4)

# Choose lambda which minimize SSE.CV
lambda = 10^12.5
betafdPar. = fdPar(betabasis35, harmaccelLfd, lambda)

betalist2 = betalist1
betalist2[[2]] = betafdPar.
# the first element of betalist2 is still the constant for the intercept

# do functional linear model
annPrecTemp = fRegress(annualprec, templist, betalist2)

# get beta(t)
betaestlist2 = annPrecTemp$betaestlist

# get fitted value yhat
annualprechat2 = annPrecTemp$yhatfdobj

# get the effective degrees of freedom
print(annPrecTemp$df)

# do the F test
# test the overall effect of x_i(t) on y_i
(SSE2 = sum((annualprec-annualprechat2)^2))
(Fratio2 = ((SSE0-SSE2)/(annPrecTemp$df-1))/(SSE2/(ny-annPrecTemp$df)))
c(Fratio,Fratio2)
# Fratio2 > Fratio

# 95% quantile
qf(0.95,annPrecTemp$df-1,ny-annPrecTemp$df)

# p-value
1-pf(Fratio2,annPrecTemp$df-1,ny-annPrecTemp$df)

# plot beta(t)
quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(betaestlist2[[2]]$fd, xlab="Days",ylab="beta(t)",lwd=4,cex.lab=4,cex.axis=4)

# plot yhat vs. y
quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(annualprechat2, annualprec, lwd=4,cex.lab=4,cex.axis=4)
abline(lm(annualprec~annualprechat2), lty='dashed', lwd=4)
abline(0,1,lty=1, lwd=4,col="red")





# Confidence Intervals

# fitted residuals
resid = annualprec - annualprechat2

# estimate sigma^2
SigmaE. = sum(resid^2)/(35-annPrecTemp$df)
SigmaE = SigmaE.*diag(rep(1,35))

# for smoothing temperature
y2cMap = tempSmooth65$y2cMap

# obtain point-wise standard error for beta(t)
stderrList = fRegress.stderr(annPrecTemp, y2cMap, SigmaE)

betafdPar = betaestlist2[[2]]
betafd = betafdPar$fd
betastderrList = stderrList$betastderrlist
betastderrfd = betastderrList[[2]]

quartz()
plot(betafd, xlab="Day", ylab="Temperature Reg. Coeff.",
ylim=c(-6e-4,1.2e-03), lwd=4,cex.lab=4,cex.axis=4)
lines(betafd+2*betastderrfd, lty=2, lwd=4)
lines(betafd-2*betastderrfd, lty=2, lwd=4)
122 changes: 122 additions & 0 deletions Assignments/fda2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
library(fda)
#provide directory of the RData
setwd("/Users/cao/Dropbox/Teaching/FDA/SummerCourse2018/R")

daybasis365 <- create.fourier.basis(c(0, 365), 365)

# define the roughness penalty on the fitted function
# Lfdobj=int2Lfd(2) is to use the second derivative to
# define the rought penalty

fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=1e4)
y = CanadianWeather$dailyAv[,'Vancouver','Temperature.C']

# Do the penalized spline smoothing
# This function returns a fd object
precfd = smooth.basis(1:365,y,fdParobj)

quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
# plot the data
plot(1:365,y,col=2,xlab='day',ylab='precipitation',main='Vancouver',
cex.lab=1.5,cex.axis=1.5)
# plot the fitted curve
lines(precfd$fd,lwd=2,col=4)

# basis matrix
bvals = eval.basis(1:365,daybasis365)
# chat = y2cMap%*% y
# infMat is the smoothing matrix
infMat = bvals%*%precfd$y2cMap
dim(infMat)
matplot(infMat[,c(20,180,300)],type='l',ylab='influence',xlab='day',
main='Observations 20, 180, 300',cex.lab=1.5,cex.axis=1.5,lwd=2)

# define the harmonic acceleration differential operator
# L = D^3 f(t) + 0 * D^2 f(t) + w^2 * D^f(t) + 0 * f(t)
# c(0,(2*pi/365)^2,0) is the vector of coefficients to
# the lower derivatives
harmLfd = vec2Lfd(c(0,(2*pi/365)^2,0), c(0, 365))

# evaluate of the fitted curve under the harmonic acceleration differential operator
harmfdvals = eval.fd(1:365,precfd$fd,harmLfd)

# The y-axis is Lf(t)
plot(1:365,harmfdvals,type='l',lwd=2,col=4,xlab='day',ylab='Harmonic Acceleration',
main ='Vancouver',cex.lab=1.5,cex.axis=1.5)


# Show how gcv varies with the values of lambda;

lambdas = exp(seq(-1,22,by=1))

gcvs = rep(0,length(lambdas))
dfs = rep(0,length(lambdas))
ocvs = rep(0,length(lambdas))
errs = rep(0,length(lambdas)) # SSE

for(i in 1:length(lambdas)){
fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=lambdas[i])
precfd = smooth.basis(1:365,y,fdParobj)

dfs[i] = precfd$df
gcvs[i] = precfd$gcv
errs[i] = precfd$SSE

# this is the smoothing matrix
hatmat = bvals%*%precfd$y2cMap

ocvs[i] = mean( (y-eval.fd(1:365,precfd$fd))^2/(1-diag(hatmat))^2 )


quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
title = paste('ln(lambda) = ',log(lambdas[i]),sep='')
plot(1:365,y,col=2,xlab='day',ylab='Precipitation',main=title,
cex.lab=1.5,cex.axis=1.5)
lines(precfd$fd,lwd=2,col=4)
readline(prompt="Press [enter] to continue")
}


par(mfrow=c(2,2))
l = log(lambdas)
plot(l,dfs,type='l',col=2,xlab='log lambda',ylab='df',cex.lab=1.5,cex.axis=1.5)
plot(l,errs,type='l',col=2,xlab='log lambda',ylab='SSE',cex.lab=1.5,cex.axis=1.5)
plot(l,ocvs,type='l',col=2,xlab='log lambda',ylab='OCV',cex.lab=1.5,cex.axis=1.5)
plot(l,gcvs,type='l',col=2,xlab='log lambda',ylab='GCV',cex.lab=1.5,cex.axis=1.5)

gcvs
i = 10
fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=lambdas[i])
precfd = smooth.basis(1:365,y,fdParobj)

par(mfrow=c(1,1))
plot(1:365,y,col=2,xlab='day',ylab='precipitation',cex.lab=1.5,cex.axis=1.5,
main="Vancouver")
lines(precfd$fd,col=4,lwd=2)


# fitted value
yhat = eval.fd(1:365,precfd$fd)
# estimate sigma
ny = length(y)
sig2 = precfd$SSE/(ny-precfd$df)
### Now a couple of probes

y2cMap = precfd$y2cMap

dfvals = eval.fd(1:365,precfd$fd,1)
dbvals = eval.basis(1:365,daybasis365,Lfdobj=1)



dfvar = diag(dbvals%*%y2cMap%*%t(y2cMap)%*%t(dbvals))*sig2

quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(1:365,dfvals,type='l',col=4,lwd=2,xlab='day',ylab='D precipitation',
cex.lab=1.5,cex.axis=1.5,ylim=c(-0.2,0.2))
lines(1:365,dfvals+2*sqrt(dfvar),lty=2,lwd=2,col=4)
lines(1:365,dfvals-2*sqrt(dfvar),lty=2,lwd=2,col=4)
abline(h=0,col=2)
1 change: 1 addition & 0 deletions GeneData1.0

Large diffs are not rendered by default.

13 changes: 0 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1 @@
# FDAworkshop
Materials for Functional Data Analysis Workshop


Recorded Lecture
https://www.dropbox.com/sh/gg8slxqsicg0zrl/AADWzz3aYDcRVK8mco1WOxv8a?dl=0

Sumit your first assignment here
https://goo.gl/forms/7js5JbZo4aXavlZd2

Sumit your second assignment here
https://goo.gl/forms/T4hdcLbBMkEQ7GR62

Please put your R codes and graphs in one zipped file. Please name your zipped files with your first name_Assignment1.zip
Loading