From e5be7c8334455300645e27dadfabf863791b1b29 Mon Sep 17 00:00:00 2001 From: Linqi Zhang Date: Mon, 27 Jan 2020 22:45:21 -0500 Subject: [PATCH] Submit ps1 --- ProblemSets/pset1/PS1_Linqi.html | 569 +++++++++++++++++++++++++++++++ ProblemSets/pset1/PS1_Linqi.rmd | 177 ++++++++++ 2 files changed, 746 insertions(+) create mode 100644 ProblemSets/pset1/PS1_Linqi.html create mode 100644 ProblemSets/pset1/PS1_Linqi.rmd diff --git a/ProblemSets/pset1/PS1_Linqi.html b/ProblemSets/pset1/PS1_Linqi.html new file mode 100644 index 0000000..10009d2 --- /dev/null +++ b/ProblemSets/pset1/PS1_Linqi.html @@ -0,0 +1,569 @@ + + + + + + + + + + + + + + +PS1_Report + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Load the simulated bids

+

Simulated bids data description: a First Price Sealed Bid (FPSB) auction with three bidders.

+
library(tidyverse)
+library(fitdistrplus) # to use function fitdist
+library(spatstat) # to use function CDF
+
+dt <- read_csv("bids1.csv", col_names = c("bids"))
+ggplot (data = dt, aes(dt$bids)) + geom_histogram(binwidth = 0.25)
+

+
+
+

1.Estimate the density of bids

+
+
a. an assumed normal distribution
+
fit.normal <- fitdist(dt$bids, "norm") 
+denscomp(fit.normal)
+

+
+
+

b. a Gaussian kernel

+
fhat.1b <- density(dt$bids, bw="nrd0", adjust = 1,
+        kernel = "gaussian")
+plot(fhat.1b)
+

+
+
+

c. an Epanechnikov kernel

+

In general case, the Silverman’s plug-in bandwidth takes the formula \(1.159*\delta_k*min(s,IQ/1.34)*n^{1/5}\), where \(\delta_k\) is the canonical bandwidth of the chosen kernel function. It is approximately 0.7764 for gaussian, and 1.7188 for epanechnikov. For epanechnikov, the Silverman’s plug-in bandwidth is \(1.99*min(s,IQ/1.34)*n^{1/5}\). In the density() function, when bw=nrd0, it uses the Silverman’s plug in bandwidth for gaussian kernel, where constant term is \(1.159*\delta_k = 0.9\). So we need to transform it to epanechnikov by setting adjust = 1.99/0.9 = 2.2.

+
fhat.1c <- density(dt$bids, bw="nrd0", adjust = 2.2,
+                   kernel = "epanechnikov") 
+plot(fhat.1c)
+

+
+
+
+

2. Use cross validation to select the optimal bandwidth

+

The LSCV criterion function is given by: \(\int_{-\infty}^{\infty} \hat f(x)^2 - \frac{2}{n}\sum_i \hat f_{-i}(X_i)\).

+
bids <- dt$bids
+X = bids
+G <- function(h){
+  fhat=Vectorize(function(x) density(X,from=x,to=x,n=1,bw=h,kernel = "epanechnikov")$y)
+  fhati=Vectorize(function(i) density(X[-i],from=X[i],to=X[i],n=1,bw=h,kernel = "epanechnikov")$y)
+  F=fhati(1:length(X))
+  return(integrate(function(x) fhat(x)^2,-Inf,Inf)$value-2*mean(F))  
+}
+vx=seq(.15,.5,by=.01)
+vy=Vectorize(G)(vx)
+df=data.frame(vx,vy)
+
+qplot(vx,vy,geom="line",data=df)
+

+
h.cv<- optimize(G,interval=c(.1,1))
+
+
+

3. Compare the estimated density functions

+
data <- as.data.frame(bids)
+ggplot(data,aes(bids))  + geom_histogram(aes(y = stat(density))) +
+  geom_line(stat = "density",bw = "nrd0", aes(col = 'Gaussian-Plug in')) +
+  geom_line(stat = "density", bw = "nrd0", adjust = 2.2,
+            kernel = "epanechnikov", aes(col = 'Epanechnikov-Plug in')) + 
+  geom_line(stat = "density", bw = h.cv$minimum,
+            kernel = "epanechnikov", aes(col = 'Epanechnikov-LSCV')) +
+  geom_line(stat = 'function', fun = dnorm, 
+            args = as.list(fit.normal$estimate), aes(col = 'Normal')) +
+  scale_color_manual(name = 'Kernel and Bandwidth Choice', 
+                     values = c('darkviolet', 'seagreen', 'darkturquoise','darkgoldenrod3'))
+

+

From the plots we can see the Gaussian kernel with Silverman’s plug-in bandwidth fit the data best.

+
+
+

4. Recover the valuation implied for each bid

+

\(\hat v = b + \frac{\hat G_B (b)}{(n-1)\hat g_B(b)},\) where \(n=3\) is the number of bidders, \(G()\) is the CDF of the bids, and \(g()\) is the density of the bids.

+
n = 3
+fhat.4 <- function(x) density(dt$bids, from=x, to=x, n=1, bw=h.cv$minimum, adjust = 1,
+                   kernel = "epanechnikov")$y
+fhat.4.pdf <- density(dt$bids, bw=h.cv$minimum, adjust = 1,
+                   kernel = "epanechnikov")
+Fhat.4 <- CDF(fhat.4.pdf)
+vhat <- bids + Fhat.4(bids) / ((n-1)*sapply(bids,fhat.4))
+
+
+

5. Estimate the density of valuations

+
vhat_pdf <- density(vhat, bw="nrd0", adjust = 2.2,
+                   kernel = "epanechnikov") 
+plot(vhat_pdf)
+

+
+
+

6. Guess the DGP

+

The estimated distribution of valuations is strictly positive, bell-shaped and right-skewed. It looks like a log-normal distribution with \(\mu=1, \sigma=0.5\).

+
data.6 <- as.data.frame(vhat)
+ggplot(data.6,aes(vhat))  + geom_histogram(aes(y = stat(density)), bins=22) +
+  geom_line(stat = "density", bw = "nrd0", adjust = 2.2,
+            kernel = "epanechnikov", aes(col = 'epanechnikov, adjust=2.2')) +
+  geom_line(stat = 'function', fun = dlnorm, args = list(meanlog = 1, sdlog = 0.5),
+            aes(col = 'hypothetic lognormal')) +
+  geom_line(stat = "density", bw = "nrd0", adjust = 1,
+            kernel = "epanechnikov", aes(col = 'epanechnikov, adjust=1')) +
+  geom_line(stat = "density", bw = "nrd0", adjust = 1,
+            kernel = "gaussian", aes(col = 'gaussian')) +  
+  scale_color_manual(name = 'Density', 
+                     values = c('darkviolet', 'seagreen', 'darkturquoise','darkgoldenrod3'))
+

+
+
+

7. Estimate the impact of adding an additional bidder

+

Step 1: Approximate the recovered distribution of values \(\hat v\) using a log normal.

+
fit.lognormal <- fitdist(vhat, "lnorm") 
+mu <- fit.lognormal$estimate[1]
+sigma <- fit.lognormal$estimate[2]
+

Step 2: Use the above estimated log normal parameters to recompute the bid function. (here I assume the reservation price being 0)

+
fun.bid <- function(valuation, bidder, mu, sigma) {
+  Fval = function(v) plnorm(v, meanlog = mu, sdlog = sigma)
+  b = valuation - (1/ Fval(valuation)^(bidder - 1))*
+    integrate(function(x) Fval(x)^(bidder - 1), 0, valuation)$value 
+  return(b)
+}
+

Step 3: Simulate \(M = 100\) sets of \(L=100\) auction pairs (with 3 and 4 bidders), drawing \(v\) from the recovered set of values \(\hat v\) with replacement. (in addition to drawing \(v\) from \(\hat v\) we obtained in Q5, I also tried drawing from the approximate log normal distribution.)

+
L <- 100
+M <- 100
+set.seed(420)
+fun.win <- function(bidder,bs="FALSE"){
+  if (bs=="TRUE") {
+    simVal <- sample(vhat, bidder*L, replace = TRUE)
+    simBid <- mapply(fun.bid, simVal, bidder, mu, sigma)
+  }
+  else {
+    simVal <- rlnorm(bidder*L, meanlog = mu, sdlog = sigma)
+    simBid <- mapply(fun.bid, simVal, bidder, mu, sigma)
+  }
+  return(matrix(simBid,ncol=100))
+}
+# when bs is set to true, v are drawn from vhat
+simBid3 <- replicate(M,fun.win(3, bs="TRUE")) 
+simBid4 <- replicate(M,fun.win(4, bs="TRUE"))
+

Step 4: For each \(m \in M\), compute \(\theta_m\) equals the expected difference in the winning bid going from 3 to 4 bidders.

+
win3 <- colMeans(apply(simBid3,2:3,max))
+win4 <- colMeans(apply(simBid4,2:3,max))
+theta <- win4 - win3
+

Step 5: Report 95% confidence intervals on \(\theta\)

+
fit.normal.theta <- fitdist(theta, "norm")
+mu.theta <- fit.normal.theta$estimate[1]
+sig.theta <- fit.normal.theta$estimate[2]
+error <- qnorm(0.975)*sig.theta/sqrt(length(theta))
+left <- mu.theta - error
+right <- mu.theta + error
+cat("The 95% confidence interval is [",left, "," ,right,"].")
+
## The 95% confidence interval is [ 0.3950057 , 0.4272277 ].
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/ProblemSets/pset1/PS1_Linqi.rmd b/ProblemSets/pset1/PS1_Linqi.rmd new file mode 100644 index 0000000..2849ed8 --- /dev/null +++ b/ProblemSets/pset1/PS1_Linqi.rmd @@ -0,0 +1,177 @@ +--- +title: "PS1_Report" +author: "Linqi Zhang" +date: "1/24/2020" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +### Load the simulated bids + +Simulated bids data description: a First Price Sealed Bid (FPSB) auction with three bidders. +```{r, message = FALSE, warning = FALSE, echo=TRUE} +library(tidyverse) +library(fitdistrplus) # to use function fitdist +library(spatstat) # to use function CDF + +dt <- read_csv("bids1.csv", col_names = c("bids")) +ggplot (data = dt, aes(dt$bids)) + geom_histogram(binwidth = 0.25) +``` + +### 1.Estimate the density of bids + +##### a. an assumed normal distribution +```{r, message = FALSE, warning = FALSE} +fit.normal <- fitdist(dt$bids, "norm") +denscomp(fit.normal) +``` + +#### b. a Gaussian kernel +```{r} +fhat.1b <- density(dt$bids, bw="nrd0", adjust = 1, + kernel = "gaussian") +plot(fhat.1b) +``` + +#### c. an Epanechnikov kernel +In general case, the Silverman's plug-in bandwidth takes the formula $1.159*\delta_k*min(s,IQ/1.34)*n^{1/5}$, where $\delta_k$ is the canonical bandwidth of the chosen kernel function. It is approximately 0.7764 for gaussian, and 1.7188 for epanechnikov. +For epanechnikov, the Silverman's plug-in bandwidth is $1.99*min(s,IQ/1.34)*n^{1/5}$. +In the density() function, when bw=nrd0, it uses the Silverman's plug in bandwidth for gaussian kernel, where constant term is $1.159*\delta_k = 0.9$. So we need to transform it to epanechnikov by setting adjust = 1.99/0.9 = 2.2. +```{r} +fhat.1c <- density(dt$bids, bw="nrd0", adjust = 2.2, + kernel = "epanechnikov") +plot(fhat.1c) +``` + +### 2. Use cross validation to select the optimal bandwidth +The LSCV criterion function is given by: $\int_{-\infty}^{\infty} \hat f(x)^2 - \frac{2}{n}\sum_i \hat f_{-i}(X_i)$. +```{r} +bids <- dt$bids +X = bids +G <- function(h){ + fhat=Vectorize(function(x) density(X,from=x,to=x,n=1,bw=h,kernel = "epanechnikov")$y) + fhati=Vectorize(function(i) density(X[-i],from=X[i],to=X[i],n=1,bw=h,kernel = "epanechnikov")$y) + F=fhati(1:length(X)) + return(integrate(function(x) fhat(x)^2,-Inf,Inf)$value-2*mean(F)) +} +vx=seq(.15,.5,by=.01) +vy=Vectorize(G)(vx) +df=data.frame(vx,vy) + +qplot(vx,vy,geom="line",data=df) +h.cv<- optimize(G,interval=c(.1,1)) +``` + +### 3. Compare the estimated density functions +```{r, message = FALSE, warning = FALSE} +data <- as.data.frame(bids) +ggplot(data,aes(bids)) + geom_histogram(aes(y = stat(density))) + + geom_line(stat = "density",bw = "nrd0", aes(col = 'Gaussian-Plug in')) + + geom_line(stat = "density", bw = "nrd0", adjust = 2.2, + kernel = "epanechnikov", aes(col = 'Epanechnikov-Plug in')) + + geom_line(stat = "density", bw = h.cv$minimum, + kernel = "epanechnikov", aes(col = 'Epanechnikov-LSCV')) + + geom_line(stat = 'function', fun = dnorm, + args = as.list(fit.normal$estimate), aes(col = 'Normal')) + + scale_color_manual(name = 'Kernel and Bandwidth Choice', + values = c('darkviolet', 'seagreen', 'darkturquoise','darkgoldenrod3')) +``` + +From the plots we can see the Gaussian kernel with Silverman's plug-in bandwidth fit the data best. + +### 4. Recover the valuation implied for each bid +$\hat v = b + \frac{\hat G_B (b)}{(n-1)\hat g_B(b)},$ where $n=3$ is the number of bidders, $G()$ is the CDF of the bids, and $g()$ is the density of the bids. +```{r} +n = 3 +fhat.4 <- function(x) density(dt$bids, from=x, to=x, n=1, bw=h.cv$minimum, adjust = 1, + kernel = "epanechnikov")$y +fhat.4.pdf <- density(dt$bids, bw=h.cv$minimum, adjust = 1, + kernel = "epanechnikov") +Fhat.4 <- CDF(fhat.4.pdf) +vhat <- bids + Fhat.4(bids) / ((n-1)*sapply(bids,fhat.4)) +``` + +### 5. Estimate the density of valuations +```{r} +vhat_pdf <- density(vhat, bw="nrd0", adjust = 2.2, + kernel = "epanechnikov") +plot(vhat_pdf) +``` + +### 6. Guess the DGP +The estimated distribution of valuations is strictly positive, bell-shaped and right-skewed. It looks like a log-normal distribution with $\mu=1, \sigma=0.5$. +```{r, message = FALSE, warning = FALSE} +data.6 <- as.data.frame(vhat) +ggplot(data.6,aes(vhat)) + geom_histogram(aes(y = stat(density)), bins=22) + + geom_line(stat = "density", bw = "nrd0", adjust = 2.2, + kernel = "epanechnikov", aes(col = 'epanechnikov, adjust=2.2')) + + geom_line(stat = 'function', fun = dlnorm, args = list(meanlog = 1, sdlog = 0.5), + aes(col = 'hypothetic lognormal')) + + geom_line(stat = "density", bw = "nrd0", adjust = 1, + kernel = "epanechnikov", aes(col = 'epanechnikov, adjust=1')) + + geom_line(stat = "density", bw = "nrd0", adjust = 1, + kernel = "gaussian", aes(col = 'gaussian')) + + scale_color_manual(name = 'Density', + values = c('darkviolet', 'seagreen', 'darkturquoise','darkgoldenrod3')) +``` + +### 7. Estimate the impact of adding an additional bidder +Step 1: Approximate the recovered distribution of values $\hat v$ using a log normal. +```{r, message = FALSE, warning = FALSE} +fit.lognormal <- fitdist(vhat, "lnorm") +mu <- fit.lognormal$estimate[1] +sigma <- fit.lognormal$estimate[2] +``` + +Step 2: Use the above estimated log normal parameters to recompute the bid function. (here I assume the reservation price being 0) +```{r} +fun.bid <- function(valuation, bidder, mu, sigma) { + Fval = function(v) plnorm(v, meanlog = mu, sdlog = sigma) + b = valuation - (1/ Fval(valuation)^(bidder - 1))* + integrate(function(x) Fval(x)^(bidder - 1), 0, valuation)$value + return(b) +} +``` + +Step 3: Simulate $M = 100$ sets of $L=100$ auction pairs (with 3 and 4 bidders), drawing $v$ from the recovered set of values $\hat v$ with replacement. (in addition to drawing $v$ from $\hat v$ we obtained in Q5, I also tried drawing from the approximate log normal distribution.) +```{r} +L <- 100 +M <- 100 +set.seed(420) +fun.win <- function(bidder,bs="FALSE"){ + if (bs=="TRUE") { + simVal <- sample(vhat, bidder*L, replace = TRUE) + simBid <- mapply(fun.bid, simVal, bidder, mu, sigma) + } + else { + simVal <- rlnorm(bidder*L, meanlog = mu, sdlog = sigma) + simBid <- mapply(fun.bid, simVal, bidder, mu, sigma) + } + return(matrix(simBid,ncol=100)) +} +# when bs is set to true, v are drawn from vhat +simBid3 <- replicate(M,fun.win(3, bs="TRUE")) +simBid4 <- replicate(M,fun.win(4, bs="TRUE")) +``` + +Step 4: For each $m \in M$, compute $\theta_m$ equals the expected difference in the winning bid going from 3 to 4 bidders. +```{r} +win3 <- colMeans(apply(simBid3,2:3,max)) +win4 <- colMeans(apply(simBid4,2:3,max)) +theta <- win4 - win3 +``` + +Step 5: Report 95% confidence intervals on $\theta$ +```{r, message = FALSE, warning = FALSE} +fit.normal.theta <- fitdist(theta, "norm") +mu.theta <- fit.normal.theta$estimate[1] +sig.theta <- fit.normal.theta$estimate[2] +error <- qnorm(0.975)*sig.theta/sqrt(length(theta)) +left <- mu.theta - error +right <- mu.theta + error +cat("The 95% confidence interval is [",left, "," ,right,"].") +``` +