-
Notifications
You must be signed in to change notification settings - Fork 3
/
mae283_lec05.tex
304 lines (260 loc) · 12.8 KB
/
mae283_lec05.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
\mainmatter%
\setcounter{page}{1}
\lectureseries[\course]{\course}
\auth[\lecAuth]{Lecturer: \lecAuth\\ Scribe: \scribe}
\date{October 8, 2009}
\setaddress%
% the following hack starts the lecture numbering at 5
\setcounter{lecture}{4}
\setcounter{chapter}{4}
\lecture{Estimating Covariance and Spectral Functions}
\section{Covariance and Spectral Functions}
The covariance and spectral density functions are defined as
\begin{definition}{Covariance and Spectral Density} \\
Auto-covariance function
\begin{align}
\label{eq:autocov}
R_u(\tau) = \bar{E}\{u(t)u(t-\tau)\}
\end{align}
Cross-covariance function
\begin{align}
\label{eq:xcov}
R_{yu}(\tau) = \bar{E}\{y(t)u(t-\tau)\}
\end{align}
Auto-spectral density function
\begin{align}
\label{eq:autospec}
\Phi_u(\w) = \sum_{\tau=-\infty}^\infty R_u(\tau)e^{-j\w\tau} = \mathcal{F}\{R_u(\tau)\}
\end{align}
Cross-spectral density function
\begin{align}
\label{eq:xspec}
\Phi_{yu}(\w) = \mathcal{F}\{R_{yu}(\tau)\}
\end{align}
\end{definition}
\subsection{Applications}
Suppose a system is given by $y(t) = G_0(q)u(t)+w(t)$ and assume that $u\perp v$.
Then
\begin{equation*}
R_{yu}(\tau) = g_0(k)
\end{equation*}
if the sequence of inputs $\{u(t)\}$ is white noise with unit variance and says that we can get the impulse response, or Markov, parameters from the cross-covariance of the input and output.
We can also find the other values in (\ref{eq:autocov} -~\ref{eq:xspec}) as
\begin{align*}
R_{yu}(\tau) &= G_0(q)R_u(\tau) \\
\Phi_{yu}(\w) &= G_0(e^{j\w})\Phi_u(\w) \\
\Phi_y(\w) &= |G_0(e^{j\w})|^2\Phi_u(\w) + \Phi_v(\w)
\end{align*}
\subsection{Properties}
Notice that $R_u(\tau)=R_u(-\tau)$.
The product $u(t)u(t-\tau)$ remains the same and implies that the auto-covariance function is symmetric about $\tau=0$.
The auto-spectral density function is real-valued, $\Phi_u(\w) = \mathcal{F}\{R_u(\tau)\}\in\mathbb{R}$.
Any Fourier transform is real when the signal is symmetric.
The cross-covariance function is \textit{not} symmetric, $R_{yu}(\tau)\neq R_{yu}(-\tau)$.
The cross-spectral density function is complex-valued in general,
\begin{equation*}
\Phi_{yu}(\w) = \mathcal{F}\{R_{yu}(\tau)\}\in\mathbb{C} \rightarrow \Phi_{yu}(\w)\in\mathbb{C} = G_0(e^{j\w})\in\mathbb{C} \cdot \Phi_u(\w)\in\mathbb{R}
\end{equation*}
Let $y(t) = G_0(q)u(t)+v(t)$, where $\{u(t)\}$ is white noise and $u\perp v$.
This defines a causal system.
In a causal system $R_{yu}(-\tau)=0$.
This means that the system output is not reacting to an input \textit{before} the input is applied.
Recall that $R_{yu}(-\tau)=g_0(k)$ because the input is white noise.
The $g_0(k)$ values represent the impulse response and they \textit{must} be zero for input that has not occurred, as represented by $t=-\tau$.
\section{Finite-time Estimates}
In the textbook, Ljung calls this non-parametric identification.
The goal is to find estimates of (\ref{eq:autocov} -~\ref{eq:xspec}).
We have to find estimates for these values because they are defined assuming that there are an infite number of samples and we will only have a finite number of samples.
\subsection{Correlation analysis}
\begin{align*}
R_{yu}(\tau) &= G_0(q)R_u(\tau) \Rightarrow R_u(\tau) = \begin{cases} \lambda, & \tau=0 \\ 0, & |\tau|\neq 0 \end{cases} \\
&= \lambda g_0(\tau) \Rightarrow g_0(\tau) = \frac{R_{yu}(\tau)}{\lambda}
\end{align*}
Therefore, we need to find estimates for $R_{yu}(\tau)$ and $R_u(\tau)$ to find the impulse response coefficients.
\subsection{Estimation of Covariance Functions}
From (\ref{eq:autocov}) we have
\begin{equation*}
R_u(\tau) = \bar{E}\{u(t)u(t-\tau)\} = \lim_{N\to\infty}\frac{1}{N}\sum_{t=1}^N E\{u(t)u(t-\tau)\}
\end{equation*}
A proposed form of the estimate is
\begin{equation*}
\hat{R}_u^N(\tau) = \frac{1}{N}\sum_{t=1}^N u(t)u(t-\tau)
\end{equation*}
The idea is that we will leave out the expectation operation because we do not know what it is.
We hope that taking enough samples and averaging them will lead to a good estimate of the true covariance.
Looking at the expected value of this proposed covariance estimate, we would like it to have the property
\begin{equation*}
\lim_{N\to\infty} E\{\ruhat\} = R_u(\tau) \text{~w.p.~} 1
\end{equation*}
If that is the case then it is called a \textit{consistent} estimate.
The result is actually
\begin{align}
\label{eq:autocovest}
E\{\ruhat\} = \frac{N-|\tau|}{N}R_u(\tau)
\end{align}
where $\frac{N-|\tau|}{N}$ is a weighting factor with the property that $\lim_{N\to\infty}E\{\ruhat\}=R_u(\tau)$.
Notice that the proposed estimate is biased by the weighting term, but it is asymptotically stable.
Also, the estimate gets better as more data is sampled.
\subsection{Unbiased Estimate of $R_u(\tau)$}
To get an unbiased estimate of $R_u(\tau)$ we use
\begin{equation*}
\hat{\hat{R}}_u^N(\tau) = \frac{1}{N-|\tau|}\sum_{t=1}^N u(t)u(t-\tau)
\end{equation*}
We can only sum samples over $N-|\tau|$.
In the end, we will use the biased estimate anyway! Later we will look at the math to show why.
It is mostly because over a smaller number of samples we do not trust the estimate that much and we want to capture that information.
As the time-shift $\tau$ gets larger Figure~\ref{fig:05timeShift} shows that we have fewer samples to use when calculating the covariance.
Figure~\ref{fig:05triWindow} shows the triangular window that represents the weighting factor and shows how the weighting factor forces the estimate to zero for large values of $\tau$.
In \textsc{Matlab} the function \texttt{xcorr} can be used to find the covariance estimate by using \texttt{xcorr (u, u, \'biased\')}.
\begin{figure}[ht!]
\centering
\includegraphics[width=.6\textwidth]{images/05timeShift}
\caption{Effect of time-shifting a signal.}%
\label{fig:05timeShift}
\end{figure}
\begin{figure}[ht!]
\centering
\includegraphics[width=.3\textwidth]{images/05triWindow}
\caption{Triangular window from weighting factor.}%
\label{fig:05triWindow}
\end{figure}
\subsection{Covariance of Estimator}
\begin{align*}
E\{u(t)u(t-\tau)\} &= \begin{cases} \lambda, & \tau=0 \\ R_u(\tau), & |\tau|\neq 0 \end{cases} \\
\text{cov}\{\ruhat,\hat{R}_u^N(\tau+k)\} &\sim \frac{1}{N}\sum_{m=-\infty}^\infty R_u(m)R_u(m+k) + R_u(m+\tau+k)R_u(m-\tau)
\end{align*}
Let $m=k$, then
\begin{align*}
\text{cov}\{\ruhat,\hat{R}_u^N(\tau+k)\} &\sim \frac{1}{N}\underbrace{\sum_{m=-\infty}^\infty R_u^2(m) + R_u(m+\tau)R_u(m-\tau)}_C
\end{align*}
Note that since $\{u(t)\}$ is quasi-stationary $R_u(\tau)\to0$ eventually.
This leads to
\begin{align*}
&\text{cov}\{\ruhat\ruhat\} \sim \mathcal{O}(\tfrac{1}{N}) < \tfrac{C}{N} \\
&\lim_{N\to\infty}\text{cov}\{\ruhat\ruhat\} = 0
\end{align*}
This means that the variance goes to zero with probability $1$ as $N\to\infty$.
See Figure~\ref{fig:05estDist}.
\begin{figure}[ht!]
\centering
\includegraphics[width=.4\textwidth]{images/05estDist}
\caption{Distributions of estimators as $N\to\infty$.}%
\label{fig:05estDist}
\end{figure}
\begin{example}
\begin{align*}
\ruhat &= \begin{cases} \frac{1}{N}\sum_{t=1}^N u(t)u(t-\tau), & |\tau|\leq N-1 \\ 0, & |\tau|\geq N \end{cases} \\
&\qquad[\text{use DTFT}] \\
\sum_{\tau=-\infty}^\infty\ruhat e^{-j\w\tau} &= \phiuhat = \frac{1}{N}\left|U_N(\w)\right|^2 \triangleq \text{~periodogram} \\
&\qquad[\text{use DTFT}] \\
U_N(\w) &= \sum_{t=0}^{N-1}u(t)e^{-j\w t}
\end{align*}
Later it will be shown why $\phiuhat$ is \textit{not} a good estimate.
However, now we can compute $\ruhat$ using (\ref{eq:autocovest}) or
\begin{align*}
\mathcal{F}\{u(t)\} &= U_N(\w) \\
\mathcal{F}^{-1}\{U_N(\w)\overline{U_N(\w)}\} &= \ruhat
\end{align*}
The benefit of this last representation is that Fourier transforms can be \textit{very} fast to compute using digital systems and this is important when there are a large number of estimates to find.
$\lozenge$
\end{example}
\subsection{Cross-covariance Functions}
This analysis is very similar to that for the auto-covariance functions.
From (\ref{eq:xcov}) we have
\begin{align*}
R_{yu}(\tau) = \bar{E}\{y(t)u(t-\tau)\} &= \lim_{N\to\infty}\frac{1}{N}\sum{t=1}^N E\{y(t)u(t-\tau)\} \\
\ryuhat &= \frac{1}{N}\sum_{t=1}^N y(t)u(t-\tau) \\
E\{R_{yu}(\tau)\} &= \frac{N-|\tau|}{N}R_{yu}(\tau) \sim \mathcal{O}\left(\frac{1}{N}\right) \\
\lim_{N\to\infty}\ryuhat &= R_{yu}(\tau) \text{~w.p.~} 1
\end{align*}
The last equality implies that there is no variance in the estimate when an infinite number of samples are available.
It is a stronger statement than
\begin{equation*}
\lim_{N\to\infty}E\{\ryuhat\} = R_{yu}(\tau)
\end{equation*}
which has a variance that could be large.
In \textsc{Matlab} the cross-covariance between two signals can be found using \texttt{xcorr (u, y, \'unbiased\')}, where the order is input then output.
We can also find the cross-covariance using fast Fourier transforms by using
\begin{align*}
\mathcal{F}\{u(t)\} &= U_N(\w) \\
\mathcal{F}\{y(t)\} &= Y_N(\w) \\
\mathcal{F}^{-1}\{Y_N(\w)\overline{U_N(\w)}\} &= R_{yu}(\tau)
\end{align*}
\subsection{Applications}
Using the system $y(t) = G_0(q)u(t)+v(t)$ we have
\begin{align*}
\sum_{t=1}^N y(t)u(t-\tau) &= G_0(q)\frac{1}{N}\sum_{t=1}^N u(t)u(t-\tau) + \frac{1}{N}\sum_{t=1}^N v(t)u(t-\tau) \\
\ryuhat &= G_0(q)\ruhat + \hat{R}_{vu}^N(\tau)
\end{align*}
If $u\perp v$ then $R_{vu}(\tau)=0$, but $\hat{R}_{vu}^N(\tau)\neq 0$ because there is some bias that we will call $v(t)$.
Also, if $\{u(t)\}$ is white noise then $R_u(\tau)$ will be a $\delta$-function, but $\ruhat$ will not be a perfect $\delta$-function.
See Figure~\ref{fig:05xcov}.
This leads to
\begin{align*}
\ryuhat &= \hat{g}_0(\tau) \\
\lim_{N\to\infty}E\{\ryuhat\} &= g_0(\tau)
\end{align*}
If $v(t)=0$ then $\ryuhat=\hat{g}_0(\tau)$ is \textit{still} an estimate because we do not have a perfect $\delta$-function so we do not get a perfect impulse response, but it typically gives a pretty close estimate of the parameters $g_0(\tau)$.
\begin{figure}[ht!]
\centering
\includegraphics[width=.5\textwidth]{images/05xcov}
\caption{Estimated covariance when the input is not a perfect $\delta$-function.}%
\label{fig:05xcov}
\end{figure}
\section{Finite Impulse Response}
Assume $G_0(q) = \sum_{k=0}^n g_0(k)q^{-k}$.
This means that $G_0(q)$ is the finite impulse response (FIR) because we are only looking at $n$ samples, not an infinte amount of samples.
When $\{u(t)\}$ is white noise how do we estimate the parameters $g_0(k)$? We cannot use $\hat{g}_0(k) = \hat{R}_{yu}^N(k)$ because $R_u(\tau)\neq \delta_d(\tau)$, where $\delta_d(\tau)$ is a $\delta$-function.
However, we can use
\begin{equation*}
R_{yu}(\tau) = G_0(q)R_u(\tau) = \sum_{k=0}^n g_0(k)R_u(\tau-k)
\end{equation*}
Solving for the first few values of $k$ gives
\begin{align*}
R_{yu}(0) &= g_0(0)R_u(0) + g_0(1)R_u(-1) + g_0(2)R_u(-2) + \cdots + g_0(n)R_u(-n) \\
&= g_0(0)R_u(0) + g_0(1)R_u(1) + g_0(2)R_u(2) + \cdots + g_0(n)R_u(n) \\
R_{yu}(1) &= g_0(0)R_u(1) + g_0(1)R_u(0) + g_0(2)R_u(-1) + \cdots + g_0(n)R_u(-n) \\
&= g_0(0)R_u(1) + g_0(1)R_u(0) + g_0(2)R_u(1) + \cdots + g_0(n)R_u(n)
\end{align*}
where we use the symmetry property of $R_u(k)$.
Writing this in matrix form gives
\begin{align*}
\left[\begin{array}{c}
R_{yu}(0) \\ R_{yu}(1) \\ R_{yu}(2) \\ \vdots \\ R_{yu}(n)
\end{array}\right] =
\left[\begin{array}{c c c c c}
R_u(0) & R_u(1) & R_u(2) & \cdots & R_u(n) \\
R_u(1) & R_u(0) & R_u(1) & \cdots & R_u(n-1) \\
R_u(2) & R_u(1) & R_u(0) & \cdots & R_u(n-2) \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
R_u(n) & R_u(n-1) & R_u(n-2) & \cdots & R_u(0)
\end{array}\right]
\left[\begin{array}{c}
g_0(0) \\ g_0(1) \\ g_0(2) \\ \vdots \\ g_0(n)
\end{array}\right]
\end{align*}
This can be rewritten as
\begin{equation*}
\mathbf{R}_{yu} = \mathbf{R}_u\mathbf{\theta}
\end{equation*}
The matrix $\mathbf{R}_u$ is a symmetric Toeplitz matrix.
When working with a finite number of samples we replace all of the values in the vectors and matrix with their estimated values and rearrange to get
\begin{align*}
\hat{\mathbf{R}}_{yu} = \hat{\mathbf{R}}_u\mathbf{\theta} \\
\mathbf{\theta} = \hat{\mathbf{R}}_u^{-1}\hat{\mathbf{R}}_{yu}
\end{align*}
This works even when the noise is not white.
However, if the noise is white then $\hat{\mathbf{R}}_u$ is a diagonal matrix.
Furthermore, if the noise has a variance $\sigma=1$ then $\hat{\mathbf{R}}_u=I$.
Notice that $\hat{\mathbf{R}}_u$ only depends on the input sequence $\{u(t)\}$.
The invertibility of $\hat{\mathbf{R}}_u$ is a property of $\{u(t)\}$ and when it is possible is referred to as the ``persistence of excitation''.
It is the responsibility of the engineer to create the input signal $\{u(t)\}$ such that $\hat{\mathbf{R}}_u$ is invertible.
\begin{example}
Suppose that the input signal is $u(t)=\sin\w t$.
Then we will only have $\text{rank}\{\hat{\mathbf{R}}_u\}=2$.
We would only be able to estimate the first two parameters, $g_0(0)$ and $g_0(1)$.
This is because a sinusoid only has two parameters, the frequency and amplitude or the frequency and phase shift.
If two sinusoids are used as input then we could estimate four parameters.
Note that as $N\to\infty$ the parameters $g_0(k)$ go to the real values and are not estimated values.
$\lozenge$
\end{example}