From 1bcb1a5b60ad1d01afa1a45b895486dbb021c942 Mon Sep 17 00:00:00 2001
From: Aihua Lin <61153831+aihualin@users.noreply.github.com>
Date: Thu, 14 Oct 2021 15:35:15 +0200
Subject: [PATCH 1/7] step2_effective_sample_size.m

---
 step2_effective_sample_size.m | 48 +++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 step2_effective_sample_size.m

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
new file mode 100644
index 0000000..c8d204e
--- /dev/null
+++ b/step2_effective_sample_size.m
@@ -0,0 +1,48 @@
+clear all;
+close all;
+clc;
+
+n_cohort=250;
+n_sample=randi([50 70],1,n_cohort);
+n_sample_t=sum(n_sample);
+index_cohort=cumsum(n_sample);
+
+mu=0;
+sigma=10;
+x_t=normrnd(mu,sigma,[n_sample_t,1]);
+beta=0.06;
+eps=rand(n_sample_t,1);
+y_t=x_t*beta+eps;
+rat=var(x_t*beta)/var(y_t);
+
+cov=0:2000:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates
+n_cov=size(cov,2)
+for i=1:n_cohort 
+    x_i=x_t(1:index_cohort(i));
+    y_i=y_t(1:index_cohort(i));
+    y_i=y_i/norm(y_i);% normalize
+    for j=1:n_cov
+        mc=normrnd(mu,sigma,[index_cohort(i),j]);
+        % generate effect size of covariates
+        r=linspace(0.1,0.2,j)';
+        beta_es=mc\y_i;%(y-eps);
+        y_res=y_i-mc*beta_es; % residulize
+        
+        [coef, t(i,j)] = nancorr(y_res, x_i);
+    end
+end
+z2=t.*t;
+  
+figure
+for j1=1:n_cov
+    plot(index_cohort-cov(j1),z2(:,j1),'LineWidth',2)
+    %plot(index_cohort,z2(:,j1),'LineWidth',2)
+    legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; 
+    hold on
+end
+xlabel('Sample size')
+ylabel('Z^2')
+%xlim([-index_cohort(end) index_cohort(end)])
+xlim([0 index_cohort(end)])
+legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
+

From 323a1df7ab8fbb97beeb9396482fd40f6594da40 Mon Sep 17 00:00:00 2001
From: Aihua Lin <61153831+aihualin@users.noreply.github.com>
Date: Thu, 14 Oct 2021 15:46:36 +0200
Subject: [PATCH 2/7] Update step2_effective_sample_size.m

---
 step2_effective_sample_size.m | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
index c8d204e..9d9bd65 100644
--- a/step2_effective_sample_size.m
+++ b/step2_effective_sample_size.m
@@ -2,7 +2,7 @@
 close all;
 clc;
 
-n_cohort=250;
+n_cohort=45;
 n_sample=randi([50 70],1,n_cohort);
 n_sample_t=sum(n_sample);
 index_cohort=cumsum(n_sample);
@@ -15,16 +15,16 @@
 y_t=x_t*beta+eps;
 rat=var(x_t*beta)/var(y_t);
 
-cov=0:2000:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates
+cov=0:200:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates
 n_cov=size(cov,2)
 for i=1:n_cohort 
     x_i=x_t(1:index_cohort(i));
     y_i=y_t(1:index_cohort(i));
     y_i=y_i/norm(y_i);% normalize
     for j=1:n_cov
-        mc=normrnd(mu,sigma,[index_cohort(i),j]);
+        mc=normrnd(mu,sigma,[index_cohort(i),cov(j)]);
         % generate effect size of covariates
-        r=linspace(0.1,0.2,j)';
+        r=linspace(0.1,0.2,cov(j))';
         beta_es=mc\y_i;%(y-eps);
         y_res=y_i-mc*beta_es; % residulize
         
@@ -35,14 +35,15 @@
   
 figure
 for j1=1:n_cov
-    plot(index_cohort-cov(j1),z2(:,j1),'LineWidth',2)
+    plot(index_cohort,z2(:,j1),'LineWidth',2)
     %plot(index_cohort,z2(:,j1),'LineWidth',2)
     legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; 
     hold on
 end
 xlabel('Sample size')
 ylabel('Z^2')
-%xlim([-index_cohort(end) index_cohort(end)])
+xlim([-index_cohort(end) index_cohort(end)])
 xlim([0 index_cohort(end)])
 legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
 
+

From f727d75f0e28953a606e1ae135ecd498e7c21d43 Mon Sep 17 00:00:00 2001
From: Aihua Lin <61153831+aihualin@users.noreply.github.com>
Date: Thu, 14 Oct 2021 15:56:52 +0200
Subject: [PATCH 3/7] Update step2_effective_sample_size.m

---
 step2_effective_sample_size.m | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
index 9d9bd65..4ea6296 100644
--- a/step2_effective_sample_size.m
+++ b/step2_effective_sample_size.m
@@ -2,8 +2,8 @@
 close all;
 clc;
 
-n_cohort=45;
-n_sample=randi([50 70],1,n_cohort);
+n_cohort=11;
+n_sample=randi([250 270],1,n_cohort);
 n_sample_t=sum(n_sample);
 index_cohort=cumsum(n_sample);
 
@@ -15,8 +15,9 @@
 y_t=x_t*beta+eps;
 rat=var(x_t*beta)/var(y_t);
 
-cov=0:200:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates
-n_cov=size(cov,2)
+cov=0:500:index_cohort(end);% number of covariates
+n_cov=size(cov,2)% loop for different number of covariates
+
 for i=1:n_cohort 
     x_i=x_t(1:index_cohort(i));
     y_i=y_t(1:index_cohort(i));
@@ -43,7 +44,8 @@
 xlabel('Sample size')
 ylabel('Z^2')
 xlim([-index_cohort(end) index_cohort(end)])
-xlim([0 index_cohort(end)])
+xlim([index_cohort(1) index_cohort(end)])
 legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
 
 
+

From 16add001b1e784481ea21fb7b801cbb5bbb13c38 Mon Sep 17 00:00:00 2001
From: Aihua Lin <61153831+aihualin@users.noreply.github.com>
Date: Thu, 14 Oct 2021 17:56:09 +0200
Subject: [PATCH 4/7] Update step2_effective_sample_size.m

---
 step2_effective_sample_size.m | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
index 4ea6296..bd138f8 100644
--- a/step2_effective_sample_size.m
+++ b/step2_effective_sample_size.m
@@ -47,5 +47,22 @@
 xlim([index_cohort(1) index_cohort(end)])
 legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
 
+%z_eff=z2/beta;
+z_eff=index_cohort(end)*z2./z2(:,1)
+
+
+%% effective sample size
+figure
+X = cov
+
+Y = floor(z_eff(n_cohort,:))
+
+plot(X,Y,'k-s')
+xlim([0 cov(end)*1.1])
+ylim([0 Y(1)*1.1])
+strValues = strtrim(cellstr(num2str([X(:) Y(:)],'(%d,%d)')));
+text(X,Y,strValues,'VerticalAlignment','bottom');
+xlabel('Number of covariates')
+ylabel('Effective sample size')
 
 

From bc27abfa934371448bac1d448119c1a870504548 Mon Sep 17 00:00:00 2001
From: Aihua Lin <61153831+aihualin@users.noreply.github.com>
Date: Thu, 14 Oct 2021 19:58:56 +0200
Subject: [PATCH 5/7] Update step2_effective_sample_size.m

---
 step2_effective_sample_size.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
index bd138f8..e6308ca 100644
--- a/step2_effective_sample_size.m
+++ b/step2_effective_sample_size.m
@@ -47,7 +47,7 @@
 xlim([index_cohort(1) index_cohort(end)])
 legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
 
-%z_eff=z2/beta;
+%z_eff=z2/coe;
 z_eff=index_cohort(end)*z2./z2(:,1)
 
 

From 1d5a08948e6ac5219f45d05ed2513eb6a2cf5ddf Mon Sep 17 00:00:00 2001
From: Oleksandr Frei <oleksanf.uio.no@Oleksandrs-MacBook-Air.local>
Date: Fri, 15 Oct 2021 23:34:23 +0200
Subject: [PATCH 6/7] compare pre-regularization with multiple regression

---
 step2_effective_sample_size.m      | 68 ------------------------
 step2_effective_sample_size_alex.m | 84 ++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 68 deletions(-)
 delete mode 100644 step2_effective_sample_size.m
 create mode 100644 step2_effective_sample_size_alex.m

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
deleted file mode 100644
index e6308ca..0000000
--- a/step2_effective_sample_size.m
+++ /dev/null
@@ -1,68 +0,0 @@
-clear all;
-close all;
-clc;
-
-n_cohort=11;
-n_sample=randi([250 270],1,n_cohort);
-n_sample_t=sum(n_sample);
-index_cohort=cumsum(n_sample);
-
-mu=0;
-sigma=10;
-x_t=normrnd(mu,sigma,[n_sample_t,1]);
-beta=0.06;
-eps=rand(n_sample_t,1);
-y_t=x_t*beta+eps;
-rat=var(x_t*beta)/var(y_t);
-
-cov=0:500:index_cohort(end);% number of covariates
-n_cov=size(cov,2)% loop for different number of covariates
-
-for i=1:n_cohort 
-    x_i=x_t(1:index_cohort(i));
-    y_i=y_t(1:index_cohort(i));
-    y_i=y_i/norm(y_i);% normalize
-    for j=1:n_cov
-        mc=normrnd(mu,sigma,[index_cohort(i),cov(j)]);
-        % generate effect size of covariates
-        r=linspace(0.1,0.2,cov(j))';
-        beta_es=mc\y_i;%(y-eps);
-        y_res=y_i-mc*beta_es; % residulize
-        
-        [coef, t(i,j)] = nancorr(y_res, x_i);
-    end
-end
-z2=t.*t;
-  
-figure
-for j1=1:n_cov
-    plot(index_cohort,z2(:,j1),'LineWidth',2)
-    %plot(index_cohort,z2(:,j1),'LineWidth',2)
-    legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; 
-    hold on
-end
-xlabel('Sample size')
-ylabel('Z^2')
-xlim([-index_cohort(end) index_cohort(end)])
-xlim([index_cohort(1) index_cohort(end)])
-legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
-
-%z_eff=z2/coe;
-z_eff=index_cohort(end)*z2./z2(:,1)
-
-
-%% effective sample size
-figure
-X = cov
-
-Y = floor(z_eff(n_cohort,:))
-
-plot(X,Y,'k-s')
-xlim([0 cov(end)*1.1])
-ylim([0 Y(1)*1.1])
-strValues = strtrim(cellstr(num2str([X(:) Y(:)],'(%d,%d)')));
-text(X,Y,strValues,'VerticalAlignment','bottom');
-xlabel('Number of covariates')
-ylabel('Effective sample size')
-
-
diff --git a/step2_effective_sample_size_alex.m b/step2_effective_sample_size_alex.m
new file mode 100644
index 0000000..905576e
--- /dev/null
+++ b/step2_effective_sample_size_alex.m
@@ -0,0 +1,84 @@
+
+mu = 0;
+sigma = 1;
+beta = 0.4;
+
+n_sample_t = 5002;   % _t means 'total'
+n_cov_t = 300;
+cov_size=0:100:n_cov_t;              % number of covariates
+cohort_size=2:1000:n_sample_t;
+
+x_t = normrnd(mu, sigma, [n_sample_t, 1]);
+c_t = normrnd(mu, sigma, [n_sample_t, n_cov_t]);
+
+eps = rand(n_sample_t, 1);
+y_t = x_t*beta + eps;
+rat = var(x_t*beta)/var(y_t);fprintf('variance: %f\n',rat)
+y_t=y_t/norm(y_t);                   % normalize
+
+t_preres = nan(length(cohort_size), length(cov_size));
+t_multreg = nan(length(cohort_size), length(cov_size));
+
+effN_preres = nan(length(cohort_size), length(cov_size));
+effN_multreg = nan(length(cohort_size), length(cov_size));
+
+%rand('seed', 123)
+
+for i=1:length(cohort_size) 
+    fprintf('subjects: %f\n',cohort_size(i))
+    x = x_t(1:cohort_size(i));
+    y = y_t(1:cohort_size(i));
+    for j = 1:length(cov_size)
+        fprintf('covariates: %f\n',cov_size(j))
+        c = c_t(1:cohort_size(i), 1:cov_size(j));
+        y_res   = y - c*(c\y); % pre-residulize
+        [coef, t_preres(i,j)] = nancorr(y_res, x);
+    
+        if (cov_size(j) ~= 0) && (cov_size(j)+2 >= cohort_size(i)), continue; end
+       
+        % try multiple regression (instead of pre-residualization)
+        % https://stats.stackexchange.com/questions/27916/standard-errors-for-multiple-regression-coefficients
+        X = [x c];
+        hat_beta = X \ y;
+        s2=var(y - X * hat_beta);
+        %Sigma = s2 * inv(X' * X); se = sqrt(Sigma(1,1)); 
+        b1 = zeros(size(X, 2), 1); b1(1)=1; a=(X' * X \ b1); se = sqrt(s2 * a(1)); 
+        t_multreg(i, j) = hat_beta(1) / se;
+    end
+end
+z2_preres=t_preres.*t_preres;
+z2_multreg=t_multreg.*t_multreg;
+
+for j = 1:length(cov_size)
+    effN_multreg(:, j) =  z2_multreg(:, j) ./ z2_multreg(:, 1) .* cohort_size';
+    effN_preres(:, j) =  z2_preres(:, j) ./ z2_preres(:, 1) .* cohort_size';
+end
+
+figure(1); clf; hold on;legendInfo={};
+for j=1:length(cov_size)
+    plot(cohort_size, z2_preres(:,j), 'LineWidth', 2)
+    legendInfo{j} = ['No.cov= ' num2str(cov_size(j))]; 
+end
+set(gca,'ColorOrderIndex',1)
+for j=1:length(cov_size)
+    plot(cohort_size, z2_multreg(:,j), '--', 'LineWidth', 2)
+end
+xlabel('Sample size')
+ylabel('Z^2')
+legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
+title({'solid lines: pre-regularization' 'dashed lines: multiple regression'})
+
+figure(2); clf; hold on; legendInfo={};
+for i=1:length(cohort_size)
+    plot(cov_size, effN_preres(i, 1) - effN_preres(i,:), 'LineWidth', 2)
+    legendInfo{i} = ['No.subj= ' num2str(cohort_size(i))]; 
+end
+set(gca,'ColorOrderIndex',1)
+for i=1:length(cohort_size)
+    plot(cov_size, effN_multreg(i,1) - effN_multreg(i,:), '--', 'LineWidth', 2)
+end
+xlabel('No.cov')
+ylabel('totalN - effN (delta)')
+legend(legendInfo, 'location', 'northwest')
+
+

From 1e1d3591f5d257452bdf11c611dc13c08d02725c Mon Sep 17 00:00:00 2001
From: Oleksandr Frei <oleksanf.uio.no@Oleksandrs-MacBook-Air.local>
Date: Fri, 15 Oct 2021 23:52:33 +0200
Subject: [PATCH 7/7] restore the file

---
 step2_effective_sample_size.m | 68 +++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 step2_effective_sample_size.m

diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m
new file mode 100644
index 0000000..e6308ca
--- /dev/null
+++ b/step2_effective_sample_size.m
@@ -0,0 +1,68 @@
+clear all;
+close all;
+clc;
+
+n_cohort=11;
+n_sample=randi([250 270],1,n_cohort);
+n_sample_t=sum(n_sample);
+index_cohort=cumsum(n_sample);
+
+mu=0;
+sigma=10;
+x_t=normrnd(mu,sigma,[n_sample_t,1]);
+beta=0.06;
+eps=rand(n_sample_t,1);
+y_t=x_t*beta+eps;
+rat=var(x_t*beta)/var(y_t);
+
+cov=0:500:index_cohort(end);% number of covariates
+n_cov=size(cov,2)% loop for different number of covariates
+
+for i=1:n_cohort 
+    x_i=x_t(1:index_cohort(i));
+    y_i=y_t(1:index_cohort(i));
+    y_i=y_i/norm(y_i);% normalize
+    for j=1:n_cov
+        mc=normrnd(mu,sigma,[index_cohort(i),cov(j)]);
+        % generate effect size of covariates
+        r=linspace(0.1,0.2,cov(j))';
+        beta_es=mc\y_i;%(y-eps);
+        y_res=y_i-mc*beta_es; % residulize
+        
+        [coef, t(i,j)] = nancorr(y_res, x_i);
+    end
+end
+z2=t.*t;
+  
+figure
+for j1=1:n_cov
+    plot(index_cohort,z2(:,j1),'LineWidth',2)
+    %plot(index_cohort,z2(:,j1),'LineWidth',2)
+    legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; 
+    hold on
+end
+xlabel('Sample size')
+ylabel('Z^2')
+xlim([-index_cohort(end) index_cohort(end)])
+xlim([index_cohort(1) index_cohort(end)])
+legend(legendInfo,'Position',[0.7 0.2 0.1 0.2])
+
+%z_eff=z2/coe;
+z_eff=index_cohort(end)*z2./z2(:,1)
+
+
+%% effective sample size
+figure
+X = cov
+
+Y = floor(z_eff(n_cohort,:))
+
+plot(X,Y,'k-s')
+xlim([0 cov(end)*1.1])
+ylim([0 Y(1)*1.1])
+strValues = strtrim(cellstr(num2str([X(:) Y(:)],'(%d,%d)')));
+text(X,Y,strValues,'VerticalAlignment','bottom');
+xlabel('Number of covariates')
+ylabel('Effective sample size')
+
+