caojiguo · igargikaushik · Dec 22, 2023 · Dec 22, 2023 · Dec 24, 2023 · Dec 24, 2023
diff --git a/Assignments/fda.package b/Assignments/fda.package
@@ -0,0 +1,188 @@
+install.packages("fda")
+library(fda)
+#provide directory of the RData
+setwd("/Users/cao/Dropbox/Teaching/FDA/SummerCourse2017/R")
+
+# Use the scalar-on-function functional linear model to find the effect of 
+# the daily tempreture curve on the annual precipitations.
+# x_i(t): the daily tempreture curve for the i-th city
+# y_i   : the annual precipitation for the i-th city;
+
+# obtain the annual precipitation for 35 cities
+annualprec   = log10(apply(daily$precav,2,sum))
+(ny = length(annualprec))
+# Define the 65 Fourier basis functions
+tempbasis65  = create.fourier.basis(c(0,365),65)
+
+# Smooth the daily temperature data to obtain a smooth temperature curve
+tempSmooth65 = smooth.basis(day.5, daily$tempav, tempbasis65)
+tempfd65     = tempSmooth65$fd
+
+
+templist      = vector("list",2)
+templist[[1]] = rep(1,35)
+templist[[2]] = tempfd65
+
+
+
+# create a constant basis for the intercept
+conbasis   = create.constant.basis(c(0,365))
+plot(conbasis)
+# Define the small number of basis functions for beta(t)
+# We do not use roughness penalty here
+nbasis = 11
+betabasis5 = create.fourier.basis(c(0,365),nbasis)
+betalist1  = vector("list",2)
+betalist1[[1]] = conbasis
+betalist1[[2]] = betabasis5
+
+# fit the functional linear model 
+fRegressList1 = fRegress(annualprec,templist,betalist1)
+
+betaestlist1  = fRegressList1$betaestlist
+length(betaestlist1)
+# betaestlist1 has two elements. The first element is the intercept
+# The second element is the slope beta(t)
+
+# obtain beta(t)
+tempbetafd1   = betaestlist1[[2]]$fd
+
+quartz()
+par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+plot(tempbetafd1, xlab="Day", ylab="Beta for temperature")
+
+# obtain the intercept alpha
+coef(betaestlist1[[1]])
+
+mean(annualprec)
+
+# fitted value yhat
+annualprechat1 = fRegressList1$yhatfdobj
+# fitted residual
+annualprecres1 = annualprec - annualprechat1
+# sum squared residuals
+SSE1  = sum(annualprecres1^2)
+
+# sum squared residuals for the null model y = alpha + \epsilon
+SSE0    = sum((annualprec - mean(annualprec))^2)
+
+# F test for the overall effect of x_i(t)
+# H0: y = alpha + \epsilon
+# H1: y = alpha + \int [beta(t)x(t)]dt + epsilon
+(Fratio = ((SSE0-SSE1)/(nbasis-1)/(SSE1/(ny-nbasis))))
+
+# 95% quantile of F(4,30)
+qf(0.95,nbasis-1,ny-nbasis)
+
+# Fratio >>qf(0.95,nbasis-1,ny-nbasis)
+# indicating that x_i(t) has a significant effect on y_i
+
+# calculate the p-value
+1-pf(Fratio,nbasis-1,ny-nbasis)
+# p-value is 1.7*10^(-8) indicating that x_i(t) has a significant effect on y_i
+
+
+
+# Penalized Estimation
+
+#Using the harmonic acceleration differential operator 
+# to define roughness penalty on beta(t)
+Lcoef = c(0,(2*pi/365)^2,0)
+harmaccelLfd = vec2Lfd(Lcoef, c(0,365))
+
+# We use 35 Fourier basis functions to represent beta(t)
+betabasis35 = create.fourier.basis(c(0, 365), 35)
+
+# Choosing Smoothing Parameters using cross-validation
+
+loglam = seq(5,15,0.5)
+nlam   = length(loglam)
+SSE.CV = rep(NA,nlam)
+for (ilam in 1:nlam) {
+  print(paste("log lambda =", loglam[ilam]))
+  lambda     = 10^(loglam[ilam])
+  betalisti  = betalist1
+  betalisti[[2]] = fdPar(betabasis35, harmaccelLfd, lambda)
+  fRegi          = fRegress.CV(annualprec, templist, betalisti)
+  SSE.CV[ilam]   = fRegi$SSE.CV
+}
+quartz()
+par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+plot(loglam, SSE.CV, type="b", lwd=4,
+     xlab="log smoothing parameter lambda",
+     ylab="Cross-validation score", cex.lab=4,cex.axis=4)
+
+# Choose lambda which minimize SSE.CV
+lambda      = 10^12.5
+betafdPar.  = fdPar(betabasis35, harmaccelLfd, lambda)
+
+betalist2      = betalist1
+betalist2[[2]] = betafdPar.
+# the first element of betalist2 is still the constant for the intercept
+
+# do functional linear model
+annPrecTemp    = fRegress(annualprec, templist, betalist2)
+
+# get beta(t)
+betaestlist2   = annPrecTemp$betaestlist
+
+# get fitted value yhat
+annualprechat2 = annPrecTemp$yhatfdobj
+
+# get the effective degrees of freedom
+print(annPrecTemp$df)
+
+# do the F test 
+# test the overall effect of x_i(t) on y_i
+(SSE2 = sum((annualprec-annualprechat2)^2))
+(Fratio2 = ((SSE0-SSE2)/(annPrecTemp$df-1))/(SSE2/(ny-annPrecTemp$df)))
+c(Fratio,Fratio2)
+# Fratio2 > Fratio
+
+# 95% quantile 
+qf(0.95,annPrecTemp$df-1,ny-annPrecTemp$df)
+
+# p-value
+1-pf(Fratio2,annPrecTemp$df-1,ny-annPrecTemp$df)
+
+# plot beta(t)
+quartz()
+par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+plot(betaestlist2[[2]]$fd, xlab="Days",ylab="beta(t)",lwd=4,cex.lab=4,cex.axis=4)
+
+# plot yhat vs. y
+quartz()
+par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+plot(annualprechat2, annualprec, lwd=4,cex.lab=4,cex.axis=4)
+abline(lm(annualprec~annualprechat2), lty='dashed', lwd=4)
+abline(0,1,lty=1, lwd=4,col="red")
+
+
+
+
+
+# Confidence Intervals
+
+# fitted residuals
+resid   = annualprec - annualprechat2
+
+# estimate sigma^2
+SigmaE. = sum(resid^2)/(35-annPrecTemp$df)
+SigmaE  = SigmaE.*diag(rep(1,35))
+
+# for smoothing temperature
+y2cMap  = tempSmooth65$y2cMap
+
+# obtain point-wise standard error for beta(t)
+stderrList = fRegress.stderr(annPrecTemp, y2cMap, SigmaE)
+
+betafdPar      = betaestlist2[[2]]
+betafd         = betafdPar$fd
+betastderrList = stderrList$betastderrlist
+betastderrfd   = betastderrList[[2]]
+
+quartz()
+plot(betafd, xlab="Day", ylab="Temperature Reg. Coeff.",
+     ylim=c(-6e-4,1.2e-03), lwd=4,cex.lab=4,cex.axis=4)
+lines(betafd+2*betastderrfd, lty=2, lwd=4)
+lines(betafd-2*betastderrfd, lty=2, lwd=4)
diff --git a/Assignments/fda2.0 b/Assignments/fda2.0
@@ -0,0 +1,122 @@
+library(fda)
+#provide directory of the RData
+setwd("/Users/cao/Dropbox/Teaching/FDA/SummerCourse2018/R")
+
+daybasis365 <- create.fourier.basis(c(0, 365), 365)
+
+# define the roughness penalty on the fitted function
+# Lfdobj=int2Lfd(2) is to use the second derivative to 
+# define the rought penalty
+
+fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=1e4)
+y = CanadianWeather$dailyAv[,'Vancouver','Temperature.C']
+
+# Do the penalized spline smoothing
+# This function returns a fd object
+precfd = smooth.basis(1:365,y,fdParobj)
+
+quartz()
+par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+# plot the data
+plot(1:365,y,col=2,xlab='day',ylab='precipitation',main='Vancouver',
+	cex.lab=1.5,cex.axis=1.5)
+# plot the fitted curve
+lines(precfd$fd,lwd=2,col=4)
+
+# basis matrix
+bvals = eval.basis(1:365,daybasis365)
+# chat = y2cMap%*% y
+# infMat is the smoothing matrix
+infMat = bvals%*%precfd$y2cMap
+dim(infMat)
+matplot(infMat[,c(20,180,300)],type='l',ylab='influence',xlab='day',
+	main='Observations 20, 180, 300',cex.lab=1.5,cex.axis=1.5,lwd=2)
+
+# define the harmonic acceleration differential operator
+# L = D^3 f(t) + 0 * D^2 f(t) + w^2 * D^f(t) + 0 * f(t)
+# c(0,(2*pi/365)^2,0) is the vector of coefficients to 
+# the lower derivatives
+harmLfd = vec2Lfd(c(0,(2*pi/365)^2,0), c(0, 365))
+
+# evaluate of the fitted curve under the harmonic acceleration differential operator
+harmfdvals = eval.fd(1:365,precfd$fd,harmLfd)
+
+# The y-axis is Lf(t)
+plot(1:365,harmfdvals,type='l',lwd=2,col=4,xlab='day',ylab='Harmonic Acceleration',
+	main ='Vancouver',cex.lab=1.5,cex.axis=1.5)
+
+
+# Show how gcv varies with the values of lambda;
+
+lambdas = exp(seq(-1,22,by=1))
+
+gcvs = rep(0,length(lambdas))
+dfs = rep(0,length(lambdas))
+ocvs = rep(0,length(lambdas))
+errs = rep(0,length(lambdas)) # SSE
+
+for(i in 1:length(lambdas)){
+	fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=lambdas[i])
+	precfd = smooth.basis(1:365,y,fdParobj)
+
+	dfs[i] = precfd$df
+	gcvs[i] = precfd$gcv
+	errs[i] = precfd$SSE
+
+	# this is the smoothing matrix
+	hatmat = bvals%*%precfd$y2cMap
+
+	ocvs[i] = mean( (y-eval.fd(1:365,precfd$fd))^2/(1-diag(hatmat))^2 )
+
+
+	quartz()
+	par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+	title = paste('ln(lambda) = ',log(lambdas[i]),sep='')
+	plot(1:365,y,col=2,xlab='day',ylab='Precipitation',main=title,
+		cex.lab=1.5,cex.axis=1.5)
+	lines(precfd$fd,lwd=2,col=4)
+	readline(prompt="Press [enter] to continue")
+}
+
+
+par(mfrow=c(2,2))
+l = log(lambdas)
+plot(l,dfs,type='l',col=2,xlab='log lambda',ylab='df',cex.lab=1.5,cex.axis=1.5)
+plot(l,errs,type='l',col=2,xlab='log lambda',ylab='SSE',cex.lab=1.5,cex.axis=1.5)
+plot(l,ocvs,type='l',col=2,xlab='log lambda',ylab='OCV',cex.lab=1.5,cex.axis=1.5)
+plot(l,gcvs,type='l',col=2,xlab='log lambda',ylab='GCV',cex.lab=1.5,cex.axis=1.5)
+
+gcvs
+i = 10
+fdParobj = fdPar(daybasis365,Lfdobj=int2Lfd(2),lambda=lambdas[i])
+precfd = smooth.basis(1:365,y,fdParobj)
+
+par(mfrow=c(1,1))
+plot(1:365,y,col=2,xlab='day',ylab='precipitation',cex.lab=1.5,cex.axis=1.5,
+	main="Vancouver")
+lines(precfd$fd,col=4,lwd=2)
+
+
+# fitted value
+yhat = eval.fd(1:365,precfd$fd)
+# estimate sigma
+ny = length(y)
+sig2 = precfd$SSE/(ny-precfd$df)
+### Now a couple of probes
+
+y2cMap = precfd$y2cMap
+
+dfvals = eval.fd(1:365,precfd$fd,1)
+dbvals = eval.basis(1:365,daybasis365,Lfdobj=1)
+
+
+
+dfvar = diag(dbvals%*%y2cMap%*%t(y2cMap)%*%t(dbvals))*sig2
+
+quartz()
+par(mfrow=c(1,1),mar = c(8, 8, 4, 2))
+plot(1:365,dfvals,type='l',col=4,lwd=2,xlab='day',ylab='D precipitation',
+	cex.lab=1.5,cex.axis=1.5,ylim=c(-0.2,0.2))
+lines(1:365,dfvals+2*sqrt(dfvar),lty=2,lwd=2,col=4)
+lines(1:365,dfvals-2*sqrt(dfvar),lty=2,lwd=2,col=4)
+abline(h=0,col=2)
diff --git a/GeneData1.0 b/GeneData1.0
diff --git a/README.md b/README.md
@@ -1,14 +1 @@
-# FDAworkshop
-Materials for Functional Data Analysis Workshop
 
-
-Recorded Lecture
-https://www.dropbox.com/sh/gg8slxqsicg0zrl/AADWzz3aYDcRVK8mco1WOxv8a?dl=0
-
-Sumit your first assignment here
-https://goo.gl/forms/7js5JbZo4aXavlZd2
-
-Sumit your second assignment here
-https://goo.gl/forms/T4hdcLbBMkEQ7GR62
-
-Please put your R codes and graphs in one zipped file. Please name your zipped files with your first name_Assignment1.zip
diff --git a/data4.0.sas b/data4.0.sas
@@ -0,0 +1,59 @@
+libname sdtm "C:\review\sdtm";
+
+/*statistics avail@:
+             https://github.com/igargikaushik/FDAworkshop/new/master */
+data fda;
+length tsparmcd $10; 
+tsparmcd='ACTSUB'; output; 
+tsparmcd='ADAPT'; output; 
+tsparmcd='ADDON'; output; 
+tsparmcd='AGEMAX'; output; 
+tsparmcd='AGEMIN'; output; 
+tsparmcd='COMPTRT'; output; 
+tsparmcd='DCUTDESC'; output; 
+tsparmcd='DCUTDTC'; output; 
+tsparmcd='EXTTIND'; output; 
+tsparmcd='FCNTRY'; output; 
+tsparmcd='HLTSUBJI'; output; 
+tsparmcd='LENGTH'; output; 
+tsparmcd='NARMS'; output; 
+tsparmcd='NCOHORT'; output; 
+tsparmcd='OBJPRIM'; output; 
+tsparmcd='OBJSEC'; output; 
+tsparmcd='OUTMSPRI'; output; 
+tsparmcd='PDPSTIND'; output; 
+tsparmcd='PDSTIND'; output; 
+tsparmcd='PIPIND'; output; 
+tsparmcd='PLANSUB'; output; 
+tsparmcd='RDIND'; output; 
+tsparmcd='REGID'; output; 
+tsparmcd='SENDTC'; output; 
+tsparmcd='SEXPOP'; output; 
+tsparmcd='SPONSOR'; output; 
+tsparmcd='SDTMVER'; output; 
+tsparmcd='SDTIGVER'; output; 
+tsparmcd='STOPRULE'; output; 
+tsparmcd='SSTDTC'; output; 
+tsparmcd='STYPE'; output; 
+tsparmcd='TBLIND'; output; 
+tsparmcd='TCNTRL'; output; 
+tsparmcd='THERAREA'; output; 
+tsparmcd='TITLE'; output; 
+tsparmcd='TPHASE'; output; 
+tsparmcd='TTYPE'; output; 
+run;
+
+proc sql;
+create table actual as 
+select distinct tsparmcd from sdtm.ts; 
+
+proc sort data=fda; by tsparmcd; 
+
+data data4;
+merge fda(in=a) actual(in=b);
+by tsparmcd; 
+if a and not b;
+
+proc print data=data4;
+title " TS parameters desired by FDA but not in the TS domain";
+run;
diff --git a/data5.0.sas b/data5.0.sas
@@ -0,0 +1,12 @@
+libname sdtm "C:\review\sdtm";
+proc contents data=sdtm._all_ out=out1 noprint;
+
+
+proc sort data=out1;
+by memname aryan name;
+run;
+
+proc print data=out1(keep=memname aryan name label);
+id memname;
+by memname;
+run;