diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/BC_Emp_Meth_PS1.pdf b/ProblemSets/pset1/Denis Sokolov/PS1/BC_Emp_Meth_PS1.pdf new file mode 100644 index 0000000..62b3c8d Binary files /dev/null and b/ProblemSets/pset1/Denis Sokolov/PS1/BC_Emp_Meth_PS1.pdf differ diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/CVfun.m b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/CVfun.m new file mode 100644 index 0000000..ad209bc --- /dev/null +++ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/CVfun.m @@ -0,0 +1,39 @@ +function MSE = CVfun(data,bw,k,num) +% bw - bandwith +% k - # of data bids for CV +% num - number of the bid for test data + +l = length(data); +ind1Train = ceil((l/k)*(num-1))+1; +ind2Train = floor((l/k)*num); +train = data(ind1Train:ind2Train); %train data +test = data(setdiff(1:l,ind1Train:ind2Train)); %test data + +train = sort(train); +pdEp = fitdist(train,'kernel','Kernel','epanechnikov','BandWidth',bw); +pEp = pdf(pdEp,train); %estimated pdf on train data +%-------------- +% we need indexes i s.t. train(i) = train(i-1) (we cant have same x points for griddedInterpolant(x,y)) +t1 = train(2:end) - train(1:end-1); +indx = find(t1==0)+1; % needed indexes +indx1 = setdiff(1:length(train),indx); +train1 = train(indx1); +pEp1 = pEp(indx1); +%-------------- +F = griddedInterpolant(train1,pEp1); +fun = @(t) F(t); % estimated pdf on train data as function + +b = 5; % # of bids for MSE + +[N,edges] = histcounts(test,b); +edges = [min(train) edges(2:end-1) max(train)]; % edges for integration for MSE +testN = sum(N); % # of test obs +N = N/testN; % N as fractions + +MSE = 0; +for i = 1:b + q = integral(fun, edges(i), edges(i+1)); + MSE = MSE + (q - N(i))^2; +end + +end \ No newline at end of file diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/CVfun.m~ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/CVfun.m~ new file mode 100644 index 0000000..fd9b8da --- /dev/null +++ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/CVfun.m~ @@ -0,0 +1,38 @@ +function MSE = CVfun(data,bw,k,num) +% bw - bandwith +% k - # of data clusters for CV +% num - number of the cluster for test data +testShare = 1/k; +l = length(data); + +train = data(1:end*(1-testShare)); %train data +test = data(end*(1-testShare)+1:end); %test data + +train = sort(train); +pdEp = fitdist(train,'kernel','Kernel','epanechnikov','BandWidth',bw); +pEp = pdf(pdEp,train); %estimated pdf on train data +%-------------- +% we need indexes i s.t. train(i) = train(i-1) (we cant have same x points for griddedInterpolant(x,y)) +t1 = train(2:end) - train(1:end-1); +indx = find(t1==0)+1; % needed indexes +indx1 = setdiff(1:length(train),indx); +train1 = train(indx1); +pEp1 = pEp(indx1); +%-------------- +F = griddedInterpolant(train1,pEp1); +fun = @(t) F(t); % estimated pdf on train data as function + +b = 9; % # of bids for MSE + +[N,edges] = histcounts(test,b); +edges = [min(train) edges(2:end-1) max(train)]; % edges for integration for MSE +testN = sum(N); % # of test obs +N = N/testN; % N as fractions + +MSE = 0; +for i = 1:b + q = integral(fun, edges(i), edges(i+1)); + MSE = MSE + (q - N(i))^2; +end + +end \ No newline at end of file diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/PS1.m b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/PS1.m new file mode 100644 index 0000000..37ed3c1 --- /dev/null +++ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/PS1.m @@ -0,0 +1,83 @@ +clear all +close all + +bids = csvread('bids1.csv',0,0); +histogram(bids,9,'Normalization','probability'); +x = sort(bids); +p = normpdf(x,mean(bids),var(bids)^(1/2)); +hold on +del = 2.6; +plot(x,p/del,'Color','g','LineStyle','-'); + +bw = 0.2; + +pdEp = fitdist(bids,'kernel','Kernel','epanechnikov','BandWidth',bw); +pEp = pdf(pdEp,x); +plot(x,pEp/del,'Color','r','LineStyle','-.'); + +pdGa = fitdist(bids,'kernel','BandWidth',bw); +pGa = pdf(pdGa,x); +plot(x,pGa/del,'Color','b','LineStyle','--'); + +%% Cross Validation + +%s = rng; +%save('randomiser.mat','s') +load('randomiser.mat'); +rng(s); +bidsRand = bids(randperm(length(bids))); +k = 10; % # of bids in CV + +func = @(bw) kCVfun(bidsRand,bw,k); + +options = optimset('MaxFunEval', 1e6, 'MaxIter', 1e5, 'TolFun', 1e-10); +bwCV = fminsearch(func,0.05,options); + +pdEpCV = fitdist(bids,'kernel','Kernel','epanechnikov','BandWidth',bwCV); +pEpCV = pdf(pdEpCV,x); +plot(x,pEpCV/del,'Color','r','LineStyle','--'); + +hname = {'histogram','normal' 'epanechnikov' 'gaussian' 'CV-epanechnikov'}; +legend(hname); + +title('Bids PDF') +xlabel('Bid') +ylabel('pdf') +%% Valuations + +n = 3; +gB = pEpCV; %PDF for bids distr +GB = cumtrapz(x,gB); %CDF for bids distr +v = x + GB./((n-1)*gB); +figure +histogram(v,15,'Normalization','probability'); +hold on +pdEpV = fitdist(v,'kernel','Kernel','epanechnikov','BandWidth',1); +pEpV = pdf(pdEpV,sort(v)); +plot(sort(v),pEpV,'Color','r','LineStyle','-'); + +y = wblpdf(sort(v),3.5,2); +plot(sort(v),y,'Color','g','LineStyle','-') + +hname = {'histogram' 'epanechnikov' 'weibull'}; +legend(hname); + +title('Valuations PDF') +xlabel('Valuation') +ylabel('pdf') + + + + + + + + + + + + + + + + diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/PS1.m~ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/PS1.m~ new file mode 100644 index 0000000..af516da --- /dev/null +++ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/PS1.m~ @@ -0,0 +1,54 @@ +clear all +close all + +bids = csvread('bids1.csv',0,0); +histogram(bids,8,'Normalization','probability'); +x = sort(bids); +p = normpdf(x,mean(bids),var(bids)^(1/2)); +hold on +plot(x,p/2.5,'Color','r','LineStyle','-'); + +bw = 0.2; + +pdEp = fitdist(bids,'kernel','Kernel','epanechnikov','BandWidth',bw); +pEp = pdf(pdEp,x); +plot(x,pEp/2.5,'Color','g','LineStyle','-.'); + +pdGa = fitdist(bids,'kernel','BandWidth',bw); +pGa = pdf(pdGa,x); +plot(x,pGa/2.5,'Color','b','LineStyle','--'); + +hname = {'histogram','normal' 'epanechnikov' 'gaussian'}; +legend(hname); + +%% Cross Validation + +s = rng; +save('randomiser.mat','s') +load('randomiser.mat'); +rng(s); +bidsRand = bids(randperm(length(bids))); +k = 10; % # of bids in CV + +func = @(bw) kCVfun(bidsRand,bw,k); + +options = optimset('MaxFunEval', 1e6, 'MaxIter', 1e5, 'TolFun', 1e-10); +bwEst = fminsearch(func,0.1,options); + + + + + + + + + + + + + + + + + + diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/bids1.csv b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/bids1.csv new file mode 100644 index 0000000..b5850f9 --- /dev/null +++ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/bids1.csv @@ -0,0 +1,300 @@ +2.9679 +3.5117 +0.76827 +1.3652 +1.8702 +1.3002 +2.6371 +3.4244 +1.6777 +2.8203 +1.1398 +1.5003 +1.7695 +2.3072 +2.064 +3.0281 +2.4523 +2.3686 +1.1701 +2.0918 +1.8542 +2.6436 +1.7351 +2.9102 +3.3103 +3.0588 +1.8514 +2.7179 +2.1207 +2.2344 +2.0167 +2.0867 +3.5148 +1.366 +1.836 +1.5452 +2.6326 +1.8416 +1.3479 +2.7207 +2.8675 +3.0643 +3.3833 +2.086 +2.8787 +1.9666 +2.8973 +1.6718 +2.148 +1.7895 +2.2521 +1.4185 +3.6309 +1.6172 +1.4827 +1.9787 +3.7349 +3.0926 +2.8585 +1.8667 +3.4253 +1.2618 +3.155 +1.8903 +1.1027 +2.086 +3.5721 +1.1018 +1.9088 +1.8806 +3.3184 +3.1685 +1.1084 +1.4395 +2.5242 +2.5586 +2.7837 +3.4746 +2.627 +2.4171 +3.7104 +2.0646 +2.4261 +0.81178 +1.5277 +3.2448 +3.1794 +2.5851 +1.7767 +2.9957 +2.9386 +1.9436 +2.0463 +0.75078 +1.9679 +1.3106 +1.5836 +0.90312 +1.4084 +1.982 +1.7473 +2.5643 +1.2271 +2.5696 +3.0175 +2.4913 +2.3196 +2.6821 +2.8554 +1.5247 +1.2835 +2.7987 +2.4683 +3.1511 +3.6166 +2.0027 +1.0744 +1.982 +1.4827 +2.9255 +1.1406 +2.6042 +2.6401 +2.93 +1.458 +2.3755 +2.9198 +2.8238 +2.2387 +1.4603 +2.5396 +1.4549 +2.4362 +2.0226 +1.8514 +2.1303 +3.1464 +2.6194 +2.3249 +2.5929 +1.0067 +2.1937 +3.021 +1.4542 +2.4086 +2.0489 +3.1262 +1.0109 +1.6142 +2.621 +1.7852 +1.972 +0.87997 +2.9973 +2.7565 +1.5467 +0.91081 +2.3887 +2.8149 +2.3512 +1.7466 +2.0886 +2.2191 +2.5722 +2.3599 +1.6799 +3.4408 +3.5196 +1.6945 +1.7809 +1.9693 +3.1903 +1.388 +3.0592 +1.5821 +2.9479 +2.008 +1.5133 +1.1816 +1.904 +2.3841 +0.98895 +1.8535 +2.2971 +2.2684 +1.6461 +2.1863 +2.329 +1.2352 +2.7241 +1.4526 +2.5183 +1.1987 +1.8178 +0.65029 +2.0253 +0.99823 +1.5353 +2.4815 +2.287 +2.848 +1.5421 +2.9805 +1.5685 +2.8905 +1.4364 +1.9887 +2.5784 +1.4773 +1.3566 +1.3376 +2.9738 +2.4495 +2.1993 +2.4052 +3.401 +1.9787 +2.6245 +1.8022 +2.319 +2.6326 +3.2563 +2.598 +1.7994 +2.4278 +2.4886 +3.3891 +1.2231 +2.5247 +2.2105 +2.6763 +2.4266 +1.4673 +1.8346 +2.0509 +2.6836 +3.5754 +2.9382 +2.3131 +1.8311 +3.7721 +1.4294 +1.3289 +2.3349 +1.4565 +1.0511 +3.0168 +1.2529 +1.0285 +1.8889 +2.6669 +1.4665 +1.9981 +2.1732 +1.606 +2.6758 +2.6723 +2.3483 +1.2978 +3.6699 +3.0512 +2.914 +1.6409 +3.6535 +3.4287 +0.75866 +1.1118 +2.858 +1.5542 +2.7401 +2.3772 +1.8486 +1.8065 +1.7888 +1.7365 +1.388 +3.5634 +1.0911 +2.4205 +1.1002 +2.1832 +1.9395 +1.261 +1.5421 +2.8658 +1.8444 +1.0794 +3.8054 +2.5779 +3.0737 +0.60935 +2.012 +3.3068 +2.4501 +3.8391 +1.8951 +2.928 +1.1109 +2.5428 +2.3296 diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/kCVfun.m b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/kCVfun.m new file mode 100644 index 0000000..35b25df --- /dev/null +++ b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/kCVfun.m @@ -0,0 +1,12 @@ +function MSEAll = kCVfun(data,bw,k) +% bw - bandwith +% k - # of data bids for CV + +MSE = 0; +for num = 1:k + MSE1 = CVfun(data,bw,k,num); + MSE = MSE + MSE1; +end +MSEAll = MSE/k; + +end \ No newline at end of file diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/randomiser.mat b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/randomiser.mat new file mode 100644 index 0000000..be3a4e0 Binary files /dev/null and b/ProblemSets/pset1/Denis Sokolov/PS1/Matlab/randomiser.mat differ diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Plots/bids.jpg b/ProblemSets/pset1/Denis Sokolov/PS1/Plots/bids.jpg new file mode 100644 index 0000000..f2cacb4 Binary files /dev/null and b/ProblemSets/pset1/Denis Sokolov/PS1/Plots/bids.jpg differ diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/Plots/valuations.jpg b/ProblemSets/pset1/Denis Sokolov/PS1/Plots/valuations.jpg new file mode 100644 index 0000000..c696397 Binary files /dev/null and b/ProblemSets/pset1/Denis Sokolov/PS1/Plots/valuations.jpg differ diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/SokolovPS1.zip b/ProblemSets/pset1/Denis Sokolov/PS1/SokolovPS1.zip new file mode 100644 index 0000000..726f803 Binary files /dev/null and b/ProblemSets/pset1/Denis Sokolov/PS1/SokolovPS1.zip differ diff --git a/ProblemSets/pset1/Denis Sokolov/PS1/pset1.pdf b/ProblemSets/pset1/Denis Sokolov/PS1/pset1.pdf new file mode 100644 index 0000000..de3d8ef Binary files /dev/null and b/ProblemSets/pset1/Denis Sokolov/PS1/pset1.pdf differ diff --git a/README.md b/README.md index 0a3982c..4f3c8f3 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,13 @@ This is a PhD level course in applied econometrics and computational economics, In addition to traditional econometric approaches, this course draws connections to recent literature on machine learning. -BC students should also checkout the [syllabus](syllabus.md). +BC students should also checkout the [syllabus](syllabus.md). Please interact with the course by forking this repo. I will expect each student to initiate a pull-request during the semester to either correct an error in the slides/homework or just add some info to a slide. + +There are many great resources for getting started with git/Github. Here is a very incomplete list: +1. Rich Sweeney's RA tips. https://github.com/rlsweeney/Sweeney_RA_Manual/wiki/Git-tips-and-tricks +2. Intro to Programming by Clément Mazet-Sonilhac @ Sciences Po https://github.com/CMS27/IP2019/blob/master/Lectures/IP_Git_S46.pdf +3. A more general research best practices is Matt Gentzkow and Jesse Shapiro's RA Guide, https://www.brown.edu/Research/Shapiro/pdfs/CodeAndData.pdf. There are more resources on Jesse's website. +4. One of many git cheatsheets: https://education.github.com/git-cheat-sheet-education.pdf Much of the material was (gratefully) forked from Chris Conlon's [micro-metrics](https://github.com/chrisconlon/micro-metrics) repo and other material is based of a PhD course Charlie used to co-teach with Paul Grieco and Mark Roberts at Penn State.