Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions Assignments/fda.package
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
install.packages("fda")
library(fda)
#provide directory of the RData
setwd("/Users/cao/Dropbox/Teaching/FDA/SummerCourse2017/R")

# Use the scalar-on-function functional linear model to find the effect of
# the daily tempreture curve on the annual precipitations.
# x_i(t): the daily tempreture curve for the i-th city
# y_i : the annual precipitation for the i-th city;

# obtain the annual precipitation for 35 cities
annualprec = log10(apply(daily$precav,2,sum))
(ny = length(annualprec))
# Define the 65 Fourier basis functions
tempbasis65 = create.fourier.basis(c(0,365),65)

# Smooth the daily temperature data to obtain a smooth temperature curve
tempSmooth65 = smooth.basis(day.5, daily$tempav, tempbasis65)
tempfd65 = tempSmooth65$fd


templist = vector("list",2)
templist[[1]] = rep(1,35)
templist[[2]] = tempfd65



# create a constant basis for the intercept
conbasis = create.constant.basis(c(0,365))
plot(conbasis)
# Define the small number of basis functions for beta(t)
# We do not use roughness penalty here
nbasis = 11
betabasis5 = create.fourier.basis(c(0,365),nbasis)
betalist1 = vector("list",2)
betalist1[[1]] = conbasis
betalist1[[2]] = betabasis5

# fit the functional linear model
fRegressList1 = fRegress(annualprec,templist,betalist1)

betaestlist1 = fRegressList1$betaestlist
length(betaestlist1)
# betaestlist1 has two elements. The first element is the intercept
# The second element is the slope beta(t)

# obtain beta(t)
tempbetafd1 = betaestlist1[[2]]$fd

quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(tempbetafd1, xlab="Day", ylab="Beta for temperature")

# obtain the intercept alpha
coef(betaestlist1[[1]])

mean(annualprec)

# fitted value yhat
annualprechat1 = fRegressList1$yhatfdobj
# fitted residual
annualprecres1 = annualprec - annualprechat1
# sum squared residuals
SSE1 = sum(annualprecres1^2)

# sum squared residuals for the null model y = alpha + \epsilon
SSE0 = sum((annualprec - mean(annualprec))^2)

# F test for the overall effect of x_i(t)
# H0: y = alpha + \epsilon
# H1: y = alpha + \int [beta(t)x(t)]dt + epsilon
(Fratio = ((SSE0-SSE1)/(nbasis-1)/(SSE1/(ny-nbasis))))

# 95% quantile of F(4,30)
qf(0.95,nbasis-1,ny-nbasis)

# Fratio >>qf(0.95,nbasis-1,ny-nbasis)
# indicating that x_i(t) has a significant effect on y_i

# calculate the p-value
1-pf(Fratio,nbasis-1,ny-nbasis)
# p-value is 1.7*10^(-8) indicating that x_i(t) has a significant effect on y_i



# Penalized Estimation

#Using the harmonic acceleration differential operator
# to define roughness penalty on beta(t)
Lcoef = c(0,(2*pi/365)^2,0)
harmaccelLfd = vec2Lfd(Lcoef, c(0,365))

# We use 35 Fourier basis functions to represent beta(t)
betabasis35 = create.fourier.basis(c(0, 365), 35)

# Choosing Smoothing Parameters using cross-validation

loglam = seq(5,15,0.5)
nlam = length(loglam)
SSE.CV = rep(NA,nlam)
for (ilam in 1:nlam) {
print(paste("log lambda =", loglam[ilam]))
lambda = 10^(loglam[ilam])
betalisti = betalist1
betalisti[[2]] = fdPar(betabasis35, harmaccelLfd, lambda)
fRegi = fRegress.CV(annualprec, templist, betalisti)
SSE.CV[ilam] = fRegi$SSE.CV
}
quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(loglam, SSE.CV, type="b", lwd=4,
xlab="log smoothing parameter lambda",
ylab="Cross-validation score", cex.lab=4,cex.axis=4)

# Choose lambda which minimize SSE.CV
lambda = 10^12.5
betafdPar. = fdPar(betabasis35, harmaccelLfd, lambda)

betalist2 = betalist1
betalist2[[2]] = betafdPar.
# the first element of betalist2 is still the constant for the intercept

# do functional linear model
annPrecTemp = fRegress(annualprec, templist, betalist2)

# get beta(t)
betaestlist2 = annPrecTemp$betaestlist

# get fitted value yhat
annualprechat2 = annPrecTemp$yhatfdobj

# get the effective degrees of freedom
print(annPrecTemp$df)

# do the F test
# test the overall effect of x_i(t) on y_i
(SSE2 = sum((annualprec-annualprechat2)^2))
(Fratio2 = ((SSE0-SSE2)/(annPrecTemp$df-1))/(SSE2/(ny-annPrecTemp$df)))
c(Fratio,Fratio2)
# Fratio2 > Fratio

# 95% quantile
qf(0.95,annPrecTemp$df-1,ny-annPrecTemp$df)

# p-value
1-pf(Fratio2,annPrecTemp$df-1,ny-annPrecTemp$df)

# plot beta(t)
quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(betaestlist2[[2]]$fd, xlab="Days",ylab="beta(t)",lwd=4,cex.lab=4,cex.axis=4)

# plot yhat vs. y
quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(annualprechat2, annualprec, lwd=4,cex.lab=4,cex.axis=4)
abline(lm(annualprec~annualprechat2), lty='dashed', lwd=4)
abline(0,1,lty=1, lwd=4,col="red")





# Confidence Intervals

# fitted residuals
resid = annualprec - annualprechat2

# estimate sigma^2
SigmaE. = sum(resid^2)/(35-annPrecTemp$df)
SigmaE = SigmaE.*diag(rep(1,35))

# for smoothing temperature
y2cMap = tempSmooth65$y2cMap

# obtain point-wise standard error for beta(t)
stderrList = fRegress.stderr(annPrecTemp, y2cMap, SigmaE)

betafdPar = betaestlist2[[2]]
betafd = betafdPar$fd
betastderrList = stderrList$betastderrlist
betastderrfd = betastderrList[[2]]

quartz()
plot(betafd, xlab="Day", ylab="Temperature Reg. Coeff.",
ylim=c(-6e-4,1.2e-03), lwd=4,cex.lab=4,cex.axis=4)
lines(betafd+2*betastderrfd, lty=2, lwd=4)
lines(betafd-2*betastderrfd, lty=2, lwd=4)
122 changes: 122 additions & 0 deletions Assignments/fda2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
library(fda)
#provide directory of the RData
setwd("/Users/cao/Dropbox/Teaching/FDA/SummerCourse2018/R")

daybasis365 <- create.fourier.basis(c(0, 365), 365)

# define the roughness penalty on the fitted function
# Lfdobj=int2Lfd(2) is to use the second derivative to
# define the rought penalty

fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=1e4)
y = CanadianWeather$dailyAv[,'Vancouver','Temperature.C']

# Do the penalized spline smoothing
# This function returns a fd object
precfd = smooth.basis(1:365,y,fdParobj)

quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
# plot the data
plot(1:365,y,col=2,xlab='day',ylab='precipitation',main='Vancouver',
cex.lab=1.5,cex.axis=1.5)
# plot the fitted curve
lines(precfd$fd,lwd=2,col=4)

# basis matrix
bvals = eval.basis(1:365,daybasis365)
# chat = y2cMap%*% y
# infMat is the smoothing matrix
infMat = bvals%*%precfd$y2cMap
dim(infMat)
matplot(infMat[,c(20,180,300)],type='l',ylab='influence',xlab='day',
main='Observations 20, 180, 300',cex.lab=1.5,cex.axis=1.5,lwd=2)

# define the harmonic acceleration differential operator
# L = D^3 f(t) + 0 * D^2 f(t) + w^2 * D^f(t) + 0 * f(t)
# c(0,(2*pi/365)^2,0) is the vector of coefficients to
# the lower derivatives
harmLfd = vec2Lfd(c(0,(2*pi/365)^2,0), c(0, 365))

# evaluate of the fitted curve under the harmonic acceleration differential operator
harmfdvals = eval.fd(1:365,precfd$fd,harmLfd)

# The y-axis is Lf(t)
plot(1:365,harmfdvals,type='l',lwd=2,col=4,xlab='day',ylab='Harmonic Acceleration',
main ='Vancouver',cex.lab=1.5,cex.axis=1.5)


# Show how gcv varies with the values of lambda;

lambdas = exp(seq(-1,22,by=1))

gcvs = rep(0,length(lambdas))
dfs = rep(0,length(lambdas))
ocvs = rep(0,length(lambdas))
errs = rep(0,length(lambdas)) # SSE

for(i in 1:length(lambdas)){
fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=lambdas[i])
precfd = smooth.basis(1:365,y,fdParobj)

dfs[i] = precfd$df
gcvs[i] = precfd$gcv
errs[i] = precfd$SSE

# this is the smoothing matrix
hatmat = bvals%*%precfd$y2cMap

ocvs[i] = mean( (y-eval.fd(1:365,precfd$fd))^2/(1-diag(hatmat))^2 )


quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
title = paste('ln(lambda) = ',log(lambdas[i]),sep='')
plot(1:365,y,col=2,xlab='day',ylab='Precipitation',main=title,
cex.lab=1.5,cex.axis=1.5)
lines(precfd$fd,lwd=2,col=4)
readline(prompt="Press [enter] to continue")
}


par(mfrow=c(2,2))
l = log(lambdas)
plot(l,dfs,type='l',col=2,xlab='log lambda',ylab='df',cex.lab=1.5,cex.axis=1.5)
plot(l,errs,type='l',col=2,xlab='log lambda',ylab='SSE',cex.lab=1.5,cex.axis=1.5)
plot(l,ocvs,type='l',col=2,xlab='log lambda',ylab='OCV',cex.lab=1.5,cex.axis=1.5)
plot(l,gcvs,type='l',col=2,xlab='log lambda',ylab='GCV',cex.lab=1.5,cex.axis=1.5)

gcvs
i = 10
fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=lambdas[i])
precfd = smooth.basis(1:365,y,fdParobj)

par(mfrow=c(1,1))
plot(1:365,y,col=2,xlab='day',ylab='precipitation',cex.lab=1.5,cex.axis=1.5,
main="Vancouver")
lines(precfd$fd,col=4,lwd=2)


# fitted value
yhat = eval.fd(1:365,precfd$fd)
# estimate sigma
ny = length(y)
sig2 = precfd$SSE/(ny-precfd$df)
### Now a couple of probes

y2cMap = precfd$y2cMap

dfvals = eval.fd(1:365,precfd$fd,1)
dbvals = eval.basis(1:365,daybasis365,Lfdobj=1)



dfvar = diag(dbvals%*%y2cMap%*%t(y2cMap)%*%t(dbvals))*sig2

quartz()
par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
plot(1:365,dfvals,type='l',col=4,lwd=2,xlab='day',ylab='D precipitation',
cex.lab=1.5,cex.axis=1.5,ylim=c(-0.2,0.2))
lines(1:365,dfvals+2*sqrt(dfvar),lty=2,lwd=2,col=4)
lines(1:365,dfvals-2*sqrt(dfvar),lty=2,lwd=2,col=4)
abline(h=0,col=2)
1 change: 1 addition & 0 deletions GeneData1.0

Large diffs are not rendered by default.

13 changes: 0 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1 @@
# FDAworkshop
Materials for Functional Data Analysis Workshop


Recorded Lecture
https://www.dropbox.com/sh/gg8slxqsicg0zrl/AADWzz3aYDcRVK8mco1WOxv8a?dl=0

Sumit your first assignment here
https://goo.gl/forms/7js5JbZo4aXavlZd2

Sumit your second assignment here
https://goo.gl/forms/T4hdcLbBMkEQ7GR62

Please put your R codes and graphs in one zipped file. Please name your zipped files with your first name_Assignment1.zip
59 changes: 59 additions & 0 deletions data4.0.sas
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
libname sdtm "C:\review\sdtm";

/*statistics avail@:
https://github.com/igargikaushik/FDAworkshop/new/master */
data fda;
length tsparmcd $10;
tsparmcd='ACTSUB'; output;
tsparmcd='ADAPT'; output;
tsparmcd='ADDON'; output;
tsparmcd='AGEMAX'; output;
tsparmcd='AGEMIN'; output;
tsparmcd='COMPTRT'; output;
tsparmcd='DCUTDESC'; output;
tsparmcd='DCUTDTC'; output;
tsparmcd='EXTTIND'; output;
tsparmcd='FCNTRY'; output;
tsparmcd='HLTSUBJI'; output;
tsparmcd='LENGTH'; output;
tsparmcd='NARMS'; output;
tsparmcd='NCOHORT'; output;
tsparmcd='OBJPRIM'; output;
tsparmcd='OBJSEC'; output;
tsparmcd='OUTMSPRI'; output;
tsparmcd='PDPSTIND'; output;
tsparmcd='PDSTIND'; output;
tsparmcd='PIPIND'; output;
tsparmcd='PLANSUB'; output;
tsparmcd='RDIND'; output;
tsparmcd='REGID'; output;
tsparmcd='SENDTC'; output;
tsparmcd='SEXPOP'; output;
tsparmcd='SPONSOR'; output;
tsparmcd='SDTMVER'; output;
tsparmcd='SDTIGVER'; output;
tsparmcd='STOPRULE'; output;
tsparmcd='SSTDTC'; output;
tsparmcd='STYPE'; output;
tsparmcd='TBLIND'; output;
tsparmcd='TCNTRL'; output;
tsparmcd='THERAREA'; output;
tsparmcd='TITLE'; output;
tsparmcd='TPHASE'; output;
tsparmcd='TTYPE'; output;
run;

proc sql;
create table actual as
select distinct tsparmcd from sdtm.ts;

proc sort data=fda; by tsparmcd;

data data4;
merge fda(in=a) actual(in=b);
by tsparmcd;
if a and not b;

proc print data=data4;
title " TS parameters desired by FDA but not in the TS domain";
run;
12 changes: 12 additions & 0 deletions data5.0.sas
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
libname sdtm "C:\review\sdtm";
proc contents data=sdtm._all_ out=out1 noprint;


proc sort data=out1;
by memname aryan name;
run;

proc print data=out1(keep=memname aryan name label);
id memname;
by memname;
run;
Loading