regression-notes/notes1.tex at master · hillegass/regression-notes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Cheatsheet
% LaTeX Template
% Version 1.0 (12/12/15)
%
% This template has been downloaded from:
% http://www.LaTeXTemplates.com
%
% Original author:
% Michael Müller (https://github.com/cmichi/latex-template-collection) with
% extensive modifications by Vel (vel@LaTeXTemplates.com)
%
% License:
% The MIT License (see included LICENSE file)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%----------------------------------------------------------------------------------------
%	PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
%----------------------------------------------------------------------------------------

\documentclass[11pt]{scrartcl} % 11pt font size

\usepackage[utf8]{inputenc} % Required for inputting international characters
\usepackage[T1]{fontenc} % Output font encoding for international characters

\usepackage[margin=0pt, landscape]{geometry} % Page margins and orientation

\usepackage{graphicx} % Required for including images
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{parskip}
\usepackage{color} % Required for color customization
\definecolor{mygray}{gray}{.75} % Custom color

\usepackage{url} % Required for the \url command to easily display URLs

\usepackage[ % This block contains information used to annotate the PDF
colorlinks=false,
pdftitle={Cheatsheet},
pdfauthor={Aaron Hillegass},
pdfsubject={Compilation of useful shortcuts}
]{hyperref}

\setlength{\unitlength}{1mm} % Set the length that numerical units are measured in
\setlength{\parindent}{0pt} % Stop paragraph indentation

\renewcommand{\dots}{\ \dotfill{}\ } % Fills in the right amount of dots

\newcommand{\command}[2]{#1~\dotfill{}~#2\\} % Custom command for adding a shorcut

\newcommand{\sectiontitle}[1]{\paragraph{#1} \ \\} % Custom command for subsection titles

%----------------------------------------------------------------------------------------

\begin{document}

\begin{picture}(297,210) % Create a container for the page content


%----------------------------------------------------------------------------------------
%	FIRST COLUMN SPECIFICATION
%----------------------------------------------------------------------------------------

\put(5,205){ % Divide the page
\begin{minipage}[t]{92mm} % Create a box to house text

%----------------------------------------------------------------------------------------
%	HEADING ONE
%----------------------------------------------------------------------------------------

\paragraph{Regression} Let's say a given system has $p$ inputs and one output. Looking at the the historical inputs $\{ x_1, \ldots, x_n \}$ and the corresponding outputs $\{ y_1, \ldots, y_n \}$, we would like to make a guess what $y_i$ will be for an a new $x_i$.

\paragraph{Simple Linear Regression} In simple linear regression, there is only one input and our guess of $y_i$ will be given by the following:
\begin{equation*}
y_i = \beta_0  + \beta_1 x_i + \epsilon_i
\end{equation*}
Where $E(\epsilon_i) = 0$ and the variance of $\epsilon$ is $\sigma_2$.

The mean of the observed inputs is
\begin{equation*}
\bar{x} = \frac{\sum_{i=1}^{n} x_i}{n}
\end{equation*}

The mean of the observed outputs is
\begin{equation*}
\bar{y} = \frac{\sum_{i=1}^{n} y_i}{n}
\end{equation*}


\paragraph{Least Squared Estimate (SLR)}
\begin{equation*}
S_{xy} = \sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})
\end{equation*}
\begin{equation*}
S_{xx} = \sum_{i=1}^{n} (x_i - \bar{x})^2
\end{equation*}
\begin{equation*}
\hat{\beta_1} = \frac{S_{xy}}{S_{xx}}
\end{equation*}

Using $\beta_1$, you can estimate $\beta_0$:
\begin{equation*}
\hat{\beta_0} = \bar{y} - \hat{\beta_1}\bar{x}
\end{equation*}

The prediction for $x_i$ is
\begin{equation*}
\hat{y_i} = \bar{\beta_0} + \hat{\beta_1}x_i
\end{equation*}

The prediction error (or residual) is
\begin{equation*}
\hat{\epsilon_i} = y_i - \hat{y_i}
\end{equation*}


\end{minipage} % End the first column of text
} % End the first division of the page

%----------------------------------------------------------------------------------------
%	SECOND COLUMN SPECIFICATION
%----------------------------------------------------------------------------------------

\put(100,205){ % Divide the page
\begin{minipage}[t]{92mm} % Create a box to house text

\paragraph{ANOVA}
\begin{equation*}
SST = S_{yy} = \sum_{i=1}^{n} (y_i - \bar{y})^2  = SSR + SSE
\end{equation*}
(SST has $n-1$ degrees of freedom) Note that
\begin{equation*}
SSR = \hat{\beta_1} S_{xy} = \sum_{i=1}^n \left( \hat{y_i} - \bar{y}\right) ^2 = SST - SSE
\end{equation*}
(SSR has 1 degree of freedom) and
\begin{equation*}
SSE = \sum_{i=1}^{n} \hat{\epsilon_i}^2  = SST + SSR
\end{equation*}
(SSE has $n -2$ degrees of freedom).

\begin{equation*}
MSR = \frac{SSR}{df(SSR)}
\end{equation*}

\begin{equation*}
\hat{\sigma}^2 = MSE = \frac{SSE}{df(SSE)}
\end{equation*}

\begin{equation*}
F statistic = \frac{MSR}{MSE}
\end{equation*}
which follows Snedecor's F-distribution with $df_1 = df(SSR)$ and $df_2 = df(SSE)$. The p-value is the tail probability of the observed F-statistic. Anything smaller than 0.05 is pretty good.

\begin{equation*}
R^2 = \frac{SSR}{SST} = 1 - \frac{SSE}{SST} = \hat{\beta_1}^2 \frac{S_{xx}}{S_{yy}} = \hat{\beta_1}\frac{S_{xy}}{S_{yy}}
\end{equation*}

\paragraph{Quality of Parameters}
The standard error of our estimate of $\hat{\beta_1}$ is
\begin{equation*}
s.e.(\hat{\beta_1}) = \sqrt{\frac{\hat{\sigma}^2 }{S_{xx}}}
\end{equation*}
The T-statistic for $\hat{\beta_1}$:
\begin{equation*}
\frac{\hat{\beta_1}}{s.e.(\hat{\beta_1})}
\end{equation*}
which follows Student's distribution with $df = n - 2$. The p-value is the tail probability of the observed t-statistic. Once again, anything smaller than 0.05 is pretty good.

\end{minipage} % End the second column of text
} % End the second division of the page

%----------------------------------------------------------------------------------------
%	THIRD COLUMN SPECIFICATION
%----------------------------------------------------------------------------------------

\put(195,205){ % Divide the page
\begin{minipage}[t]{94mm} % Create a box to house tex

\paragraph{Confidence Interval of Expectation} The prediction of the mean response at $x = x_0$ is given by
\begin{equation*}
E(Y) = \hat{y_0} = \hat{\beta_0} + \hat{\beta_1} x_0
\end{equation*}


The standard error of the prediction of $E(Y)$ at $x_0$ is given by
\begin{equation*}
s.e.(prediction) = \sqrt{\hat{\sigma}^2 \left( \frac{1}{n} + \frac{x_0 - \bar{x}^2}{S_{xx}}\right)}
\end{equation*}

Thus, $100(1-\alpha)$ confidence interval of $E(Y)$ at $x = x_0$ is
\begin{equation*}
Point Predition \pm \left( t_{\alpha/2, df=n-2}\right) \left(s.e.(prediction) \right)
\end{equation*}

\paragraph{Confidence Interval of New Observation} The prediction is the same. But the standard error is bigger:
\begin{equation*}
s.e.(prediction) = \sqrt{\hat{\sigma}^2 \left( 1 + \frac{1}{n} + \frac{x_0 - \bar{x}^2}{S_{xx}}\right)}
\end{equation*}

The confidence interval is calculated the same as above using the t-distribution.

\paragraph{Adjusted $R^2$}
\begin{equation*}
R_{adj}^2 = 1 - \left(1 - R^2\right)\frac{n - 1}{n - k - 1}
\end{equation*}
where $k$ is the number of parameters.

\paragraph{Variance Inflation Factor}
For a variable $X_j$ that is suspected of being correlated with other variables, we remove it if its VIF is greater than 5.
\begin{equation*}
VIF(X_j) = \frac{1}{1 - R_{j}^2}
\end{equation*}
where $R_j^2$ is the $R^2$ of the regression run without $X_j$.

\paragraph{MLR in matrices} Let $X$ be the matrix where the inputs for each sample are a row and the first item in the row is 1. Let $Y$ be the column vector of outputs. Let $\beta$ be the column vector of coefficients. Let $\Sigma$ be a column vector of residuals.
\begin{equation*}
Y = X\beta + \Sigma
\end{equation*}


%----------------------------------------------------------------------------------------

\end{minipage} % End the third column of text
} % End the third division of the page
\end{picture} % End the container for the entire page

%----------------------------------------------------------------------------------------

\end{document}