From 1bcb1a5b60ad1d01afa1a45b895486dbb021c942 Mon Sep 17 00:00:00 2001 From: Aihua Lin <61153831+aihualin@users.noreply.github.com> Date: Thu, 14 Oct 2021 15:35:15 +0200 Subject: [PATCH 1/7] step2_effective_sample_size.m --- step2_effective_sample_size.m | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 step2_effective_sample_size.m diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m new file mode 100644 index 0000000..c8d204e --- /dev/null +++ b/step2_effective_sample_size.m @@ -0,0 +1,48 @@ +clear all; +close all; +clc; + +n_cohort=250; +n_sample=randi([50 70],1,n_cohort); +n_sample_t=sum(n_sample); +index_cohort=cumsum(n_sample); + +mu=0; +sigma=10; +x_t=normrnd(mu,sigma,[n_sample_t,1]); +beta=0.06; +eps=rand(n_sample_t,1); +y_t=x_t*beta+eps; +rat=var(x_t*beta)/var(y_t); + +cov=0:2000:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates +n_cov=size(cov,2) +for i=1:n_cohort + x_i=x_t(1:index_cohort(i)); + y_i=y_t(1:index_cohort(i)); + y_i=y_i/norm(y_i);% normalize + for j=1:n_cov + mc=normrnd(mu,sigma,[index_cohort(i),j]); + % generate effect size of covariates + r=linspace(0.1,0.2,j)'; + beta_es=mc\y_i;%(y-eps); + y_res=y_i-mc*beta_es; % residulize + + [coef, t(i,j)] = nancorr(y_res, x_i); + end +end +z2=t.*t; + +figure +for j1=1:n_cov + plot(index_cohort-cov(j1),z2(:,j1),'LineWidth',2) + %plot(index_cohort,z2(:,j1),'LineWidth',2) + legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; + hold on +end +xlabel('Sample size') +ylabel('Z^2') +%xlim([-index_cohort(end) index_cohort(end)]) +xlim([0 index_cohort(end)]) +legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) + From 323a1df7ab8fbb97beeb9396482fd40f6594da40 Mon Sep 17 00:00:00 2001 From: Aihua Lin <61153831+aihualin@users.noreply.github.com> Date: Thu, 14 Oct 2021 15:46:36 +0200 Subject: [PATCH 2/7] Update step2_effective_sample_size.m --- step2_effective_sample_size.m | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m index c8d204e..9d9bd65 100644 --- a/step2_effective_sample_size.m +++ b/step2_effective_sample_size.m @@ -2,7 +2,7 @@ close all; clc; -n_cohort=250; +n_cohort=45; n_sample=randi([50 70],1,n_cohort); n_sample_t=sum(n_sample); index_cohort=cumsum(n_sample); @@ -15,16 +15,16 @@ y_t=x_t*beta+eps; rat=var(x_t*beta)/var(y_t); -cov=0:2000:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates +cov=0:200:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates n_cov=size(cov,2) for i=1:n_cohort x_i=x_t(1:index_cohort(i)); y_i=y_t(1:index_cohort(i)); y_i=y_i/norm(y_i);% normalize for j=1:n_cov - mc=normrnd(mu,sigma,[index_cohort(i),j]); + mc=normrnd(mu,sigma,[index_cohort(i),cov(j)]); % generate effect size of covariates - r=linspace(0.1,0.2,j)'; + r=linspace(0.1,0.2,cov(j))'; beta_es=mc\y_i;%(y-eps); y_res=y_i-mc*beta_es; % residulize @@ -35,14 +35,15 @@ figure for j1=1:n_cov - plot(index_cohort-cov(j1),z2(:,j1),'LineWidth',2) + plot(index_cohort,z2(:,j1),'LineWidth',2) %plot(index_cohort,z2(:,j1),'LineWidth',2) legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; hold on end xlabel('Sample size') ylabel('Z^2') -%xlim([-index_cohort(end) index_cohort(end)]) +xlim([-index_cohort(end) index_cohort(end)]) xlim([0 index_cohort(end)]) legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) + From f727d75f0e28953a606e1ae135ecd498e7c21d43 Mon Sep 17 00:00:00 2001 From: Aihua Lin <61153831+aihualin@users.noreply.github.com> Date: Thu, 14 Oct 2021 15:56:52 +0200 Subject: [PATCH 3/7] Update step2_effective_sample_size.m --- step2_effective_sample_size.m | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m index 9d9bd65..4ea6296 100644 --- a/step2_effective_sample_size.m +++ b/step2_effective_sample_size.m @@ -2,8 +2,8 @@ close all; clc; -n_cohort=45; -n_sample=randi([50 70],1,n_cohort); +n_cohort=11; +n_sample=randi([250 270],1,n_cohort); n_sample_t=sum(n_sample); index_cohort=cumsum(n_sample); @@ -15,8 +15,9 @@ y_t=x_t*beta+eps; rat=var(x_t*beta)/var(y_t); -cov=0:200:index_cohort(end);%sort(randperm(55,n_cov));% number of covariates -n_cov=size(cov,2) +cov=0:500:index_cohort(end);% number of covariates +n_cov=size(cov,2)% loop for different number of covariates + for i=1:n_cohort x_i=x_t(1:index_cohort(i)); y_i=y_t(1:index_cohort(i)); @@ -43,7 +44,8 @@ xlabel('Sample size') ylabel('Z^2') xlim([-index_cohort(end) index_cohort(end)]) -xlim([0 index_cohort(end)]) +xlim([index_cohort(1) index_cohort(end)]) legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) + From 16add001b1e784481ea21fb7b801cbb5bbb13c38 Mon Sep 17 00:00:00 2001 From: Aihua Lin <61153831+aihualin@users.noreply.github.com> Date: Thu, 14 Oct 2021 17:56:09 +0200 Subject: [PATCH 4/7] Update step2_effective_sample_size.m --- step2_effective_sample_size.m | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m index 4ea6296..bd138f8 100644 --- a/step2_effective_sample_size.m +++ b/step2_effective_sample_size.m @@ -47,5 +47,22 @@ xlim([index_cohort(1) index_cohort(end)]) legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) +%z_eff=z2/beta; +z_eff=index_cohort(end)*z2./z2(:,1) + + +%% effective sample size +figure +X = cov + +Y = floor(z_eff(n_cohort,:)) + +plot(X,Y,'k-s') +xlim([0 cov(end)*1.1]) +ylim([0 Y(1)*1.1]) +strValues = strtrim(cellstr(num2str([X(:) Y(:)],'(%d,%d)'))); +text(X,Y,strValues,'VerticalAlignment','bottom'); +xlabel('Number of covariates') +ylabel('Effective sample size') From bc27abfa934371448bac1d448119c1a870504548 Mon Sep 17 00:00:00 2001 From: Aihua Lin <61153831+aihualin@users.noreply.github.com> Date: Thu, 14 Oct 2021 19:58:56 +0200 Subject: [PATCH 5/7] Update step2_effective_sample_size.m --- step2_effective_sample_size.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m index bd138f8..e6308ca 100644 --- a/step2_effective_sample_size.m +++ b/step2_effective_sample_size.m @@ -47,7 +47,7 @@ xlim([index_cohort(1) index_cohort(end)]) legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) -%z_eff=z2/beta; +%z_eff=z2/coe; z_eff=index_cohort(end)*z2./z2(:,1) From 1d5a08948e6ac5219f45d05ed2513eb6a2cf5ddf Mon Sep 17 00:00:00 2001 From: Oleksandr Frei Date: Fri, 15 Oct 2021 23:34:23 +0200 Subject: [PATCH 6/7] compare pre-regularization with multiple regression --- step2_effective_sample_size.m | 68 ------------------------ step2_effective_sample_size_alex.m | 84 ++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 68 deletions(-) delete mode 100644 step2_effective_sample_size.m create mode 100644 step2_effective_sample_size_alex.m diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m deleted file mode 100644 index e6308ca..0000000 --- a/step2_effective_sample_size.m +++ /dev/null @@ -1,68 +0,0 @@ -clear all; -close all; -clc; - -n_cohort=11; -n_sample=randi([250 270],1,n_cohort); -n_sample_t=sum(n_sample); -index_cohort=cumsum(n_sample); - -mu=0; -sigma=10; -x_t=normrnd(mu,sigma,[n_sample_t,1]); -beta=0.06; -eps=rand(n_sample_t,1); -y_t=x_t*beta+eps; -rat=var(x_t*beta)/var(y_t); - -cov=0:500:index_cohort(end);% number of covariates -n_cov=size(cov,2)% loop for different number of covariates - -for i=1:n_cohort - x_i=x_t(1:index_cohort(i)); - y_i=y_t(1:index_cohort(i)); - y_i=y_i/norm(y_i);% normalize - for j=1:n_cov - mc=normrnd(mu,sigma,[index_cohort(i),cov(j)]); - % generate effect size of covariates - r=linspace(0.1,0.2,cov(j))'; - beta_es=mc\y_i;%(y-eps); - y_res=y_i-mc*beta_es; % residulize - - [coef, t(i,j)] = nancorr(y_res, x_i); - end -end -z2=t.*t; - -figure -for j1=1:n_cov - plot(index_cohort,z2(:,j1),'LineWidth',2) - %plot(index_cohort,z2(:,j1),'LineWidth',2) - legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; - hold on -end -xlabel('Sample size') -ylabel('Z^2') -xlim([-index_cohort(end) index_cohort(end)]) -xlim([index_cohort(1) index_cohort(end)]) -legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) - -%z_eff=z2/coe; -z_eff=index_cohort(end)*z2./z2(:,1) - - -%% effective sample size -figure -X = cov - -Y = floor(z_eff(n_cohort,:)) - -plot(X,Y,'k-s') -xlim([0 cov(end)*1.1]) -ylim([0 Y(1)*1.1]) -strValues = strtrim(cellstr(num2str([X(:) Y(:)],'(%d,%d)'))); -text(X,Y,strValues,'VerticalAlignment','bottom'); -xlabel('Number of covariates') -ylabel('Effective sample size') - - diff --git a/step2_effective_sample_size_alex.m b/step2_effective_sample_size_alex.m new file mode 100644 index 0000000..905576e --- /dev/null +++ b/step2_effective_sample_size_alex.m @@ -0,0 +1,84 @@ + +mu = 0; +sigma = 1; +beta = 0.4; + +n_sample_t = 5002; % _t means 'total' +n_cov_t = 300; +cov_size=0:100:n_cov_t; % number of covariates +cohort_size=2:1000:n_sample_t; + +x_t = normrnd(mu, sigma, [n_sample_t, 1]); +c_t = normrnd(mu, sigma, [n_sample_t, n_cov_t]); + +eps = rand(n_sample_t, 1); +y_t = x_t*beta + eps; +rat = var(x_t*beta)/var(y_t);fprintf('variance: %f\n',rat) +y_t=y_t/norm(y_t); % normalize + +t_preres = nan(length(cohort_size), length(cov_size)); +t_multreg = nan(length(cohort_size), length(cov_size)); + +effN_preres = nan(length(cohort_size), length(cov_size)); +effN_multreg = nan(length(cohort_size), length(cov_size)); + +%rand('seed', 123) + +for i=1:length(cohort_size) + fprintf('subjects: %f\n',cohort_size(i)) + x = x_t(1:cohort_size(i)); + y = y_t(1:cohort_size(i)); + for j = 1:length(cov_size) + fprintf('covariates: %f\n',cov_size(j)) + c = c_t(1:cohort_size(i), 1:cov_size(j)); + y_res = y - c*(c\y); % pre-residulize + [coef, t_preres(i,j)] = nancorr(y_res, x); + + if (cov_size(j) ~= 0) && (cov_size(j)+2 >= cohort_size(i)), continue; end + + % try multiple regression (instead of pre-residualization) + % https://stats.stackexchange.com/questions/27916/standard-errors-for-multiple-regression-coefficients + X = [x c]; + hat_beta = X \ y; + s2=var(y - X * hat_beta); + %Sigma = s2 * inv(X' * X); se = sqrt(Sigma(1,1)); + b1 = zeros(size(X, 2), 1); b1(1)=1; a=(X' * X \ b1); se = sqrt(s2 * a(1)); + t_multreg(i, j) = hat_beta(1) / se; + end +end +z2_preres=t_preres.*t_preres; +z2_multreg=t_multreg.*t_multreg; + +for j = 1:length(cov_size) + effN_multreg(:, j) = z2_multreg(:, j) ./ z2_multreg(:, 1) .* cohort_size'; + effN_preres(:, j) = z2_preres(:, j) ./ z2_preres(:, 1) .* cohort_size'; +end + +figure(1); clf; hold on;legendInfo={}; +for j=1:length(cov_size) + plot(cohort_size, z2_preres(:,j), 'LineWidth', 2) + legendInfo{j} = ['No.cov= ' num2str(cov_size(j))]; +end +set(gca,'ColorOrderIndex',1) +for j=1:length(cov_size) + plot(cohort_size, z2_multreg(:,j), '--', 'LineWidth', 2) +end +xlabel('Sample size') +ylabel('Z^2') +legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) +title({'solid lines: pre-regularization' 'dashed lines: multiple regression'}) + +figure(2); clf; hold on; legendInfo={}; +for i=1:length(cohort_size) + plot(cov_size, effN_preres(i, 1) - effN_preres(i,:), 'LineWidth', 2) + legendInfo{i} = ['No.subj= ' num2str(cohort_size(i))]; +end +set(gca,'ColorOrderIndex',1) +for i=1:length(cohort_size) + plot(cov_size, effN_multreg(i,1) - effN_multreg(i,:), '--', 'LineWidth', 2) +end +xlabel('No.cov') +ylabel('totalN - effN (delta)') +legend(legendInfo, 'location', 'northwest') + + From 1e1d3591f5d257452bdf11c611dc13c08d02725c Mon Sep 17 00:00:00 2001 From: Oleksandr Frei Date: Fri, 15 Oct 2021 23:52:33 +0200 Subject: [PATCH 7/7] restore the file --- step2_effective_sample_size.m | 68 +++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 step2_effective_sample_size.m diff --git a/step2_effective_sample_size.m b/step2_effective_sample_size.m new file mode 100644 index 0000000..e6308ca --- /dev/null +++ b/step2_effective_sample_size.m @@ -0,0 +1,68 @@ +clear all; +close all; +clc; + +n_cohort=11; +n_sample=randi([250 270],1,n_cohort); +n_sample_t=sum(n_sample); +index_cohort=cumsum(n_sample); + +mu=0; +sigma=10; +x_t=normrnd(mu,sigma,[n_sample_t,1]); +beta=0.06; +eps=rand(n_sample_t,1); +y_t=x_t*beta+eps; +rat=var(x_t*beta)/var(y_t); + +cov=0:500:index_cohort(end);% number of covariates +n_cov=size(cov,2)% loop for different number of covariates + +for i=1:n_cohort + x_i=x_t(1:index_cohort(i)); + y_i=y_t(1:index_cohort(i)); + y_i=y_i/norm(y_i);% normalize + for j=1:n_cov + mc=normrnd(mu,sigma,[index_cohort(i),cov(j)]); + % generate effect size of covariates + r=linspace(0.1,0.2,cov(j))'; + beta_es=mc\y_i;%(y-eps); + y_res=y_i-mc*beta_es; % residulize + + [coef, t(i,j)] = nancorr(y_res, x_i); + end +end +z2=t.*t; + +figure +for j1=1:n_cov + plot(index_cohort,z2(:,j1),'LineWidth',2) + %plot(index_cohort,z2(:,j1),'LineWidth',2) + legendInfo{j1} = ['No.cov= ' num2str(cov(j1))]; + hold on +end +xlabel('Sample size') +ylabel('Z^2') +xlim([-index_cohort(end) index_cohort(end)]) +xlim([index_cohort(1) index_cohort(end)]) +legend(legendInfo,'Position',[0.7 0.2 0.1 0.2]) + +%z_eff=z2/coe; +z_eff=index_cohort(end)*z2./z2(:,1) + + +%% effective sample size +figure +X = cov + +Y = floor(z_eff(n_cohort,:)) + +plot(X,Y,'k-s') +xlim([0 cov(end)*1.1]) +ylim([0 Y(1)*1.1]) +strValues = strtrim(cellstr(num2str([X(:) Y(:)],'(%d,%d)'))); +text(X,Y,strValues,'VerticalAlignment','bottom'); +xlabel('Number of covariates') +ylabel('Effective sample size') + +