% graphical interpretation of Lagrange Multipliers
% Funtion E(x,y) that is minimized, a long-wavelngth sinusoid
% gradient list, for plotting purposed
% a b b b c c d d d, e e e e
gi = [40, 30, 40, 25, 40, 10, 20, 10, 15, 4, 5, 8, 12]';
gj = [40, 30, 25, 40, 10, 40, 10, 24, 18, 16, 10, 6, 3]';
axis( [x(1), x(N), y(1), y(M)] );
imagesc( [x(1), x(N)], [y(1), y(M)], E );
% plot direction of gradient at selected points
g = [dEdx(i,j), dEdy(i,j)]';
plot( x(i), y(j), 'wo', 'LineWidth', 3 );
plot( [x(i),x(i)+sn*g(1)]', [y(j),y(j)+sn*g(2)]', 'w-', 'LineWidth', 3 );
% constraint c(x,y)=0 is a parametric curve [xc(s), yc(s)]
% where the parameter s varies from 0 to 1 along curve
% I specify the curve, not the constraint itself
% line connecting endpoints
% embelish line into a curve
yc = yc - 0.05*exp( -((s-0.1).^2)/(2*0.1*0.1) );
yc = yc - 0.03*exp( -((s-0.2).^2)/(2*0.1*0.1) );
plot( xc, yc', 'k-', 'LineWidth', 3);
% plot arrows on one side of curve
p = [xc(k), yc(k)]'; % point on curve
t = [dxc(k), dyc(k)]'; % tangent to curve
n = [t(2), -t(1)]; % normal to curve
plot( [p(1),p(1)+sn*n(1)]', [p(2),p(2)+sn*n(2)]', '-', 'LineWidth',2, 'Color', [0.7,0.7,0.7] );
% tabulate the deviation between n and gradE/|gradE| along
e = zeros(Ns-1,1); % deviation
for k=(1:Ns-1) % tabulate deviation
p = [xc(k), yc(k)]'; % point on curve
i = floor(p(1)/Dx)+1; % pixel of this point
t = [dxc(k), dyc(k)]'; % tangent to curve
n = [t(2), -t(1)]; % normal to curve
g = [dEdx(i,j), dEdy(i,j)]'; % gradient at this point
g = g/sqrt(g'*g); % direction of gradient
e(k) = (n(1)-g(1))^2 + (n(2)-g(2))^2; % deviation
ie(k) = i; % back-pointer i(k)
je(k) = j; % back-pointer j(k)
% find and plot point where gradE and n are anti-parallel
plot( x(i), y(j), 'ko', 'LineWidth', 4 );
fprintf('Caption: Graphical interpretation of Laplace multipliers.\n');
Caption: Graphical interpretation of Laplace multipliers.
fprintf('Function E(x,y) being minimized (colors), gradient of E (white\n');
Function E(x,y) being minimized (colors), gradient of E (white
fprintf('bars, with circle on downslope end), constraint surface (black curve)\n');
bars, with circle on downslope end), constraint surface (black curve)
fprintf('and its normals (grey bars), point on constraint surface where\n');
and its normals (grey bars), point on constraint surface where
fprintf('E is minimized (black circle). Gradient and normals are parallel\n');
E is minimized (black circle). Gradient and normals are parallel
fprintf('at this point\n');
% plot deviation along curve
axis( [0, 1, 0, max(e) ] );
plot( s(1:Ns-1), e, 'k-', 'LineWidth', 3 );
fprintf('Caption: Quantity E(x(s),y(s)) being minimized\n');
Caption: Quantity E(x(s),y(s)) being minimized
fprintf('as a function of arc-length s along constraint curve\n');
as a function of arc-length s along constraint curve
% Example of complex least squares
% the model is a constant plus to complex exponentials of
w1 = 10.0; % angular frequency 1
w2 = 25.0; % angular freqeucny 1
f1 = ones(N,1); % constant
f2 = exp(complex(0.0,1.0)*w1*t); % first complex exponential
f3 = exp(complex(0.0,1.0)*w2*t); % second complex exponential
mtrue = [complex(1,2); complex(3,4); complex(5,6)];
G = [f1,f2,f3]; % data kernel
dtrue = G*mtrue; % true data
sd = 1.0; % standsrd deviation of data
% observed data is true data plus random noise
dobs = dtrue + random('Normal',0.0,sd/sqrt(2),N,1)+complex(0.0,1.0)*random('Normal',0.0,sd/sqrt(2),N,1);
% ususl least squares formula, becuase in MATLAB, G' is the Hermetial
% trsnspose of complex matrix G
dpre = G*mest; % predicted data
plot(t,real(dpre),'k-','LineWidth',2);
plot(t,real(dobs),'k*','LineWidth',1);
plot(t,imag(dpre),'r-','LineWidth',2);
plot(t,imag(dobs),'r*','LineWidth',2);
fprintf('Caption: data, with real part in black and imaginary part in red\n');
Caption: data, with real part in black and imaginary part in red
fprintf('observed (stars), predicted (lines)\n');
observed (stars), predicted (lines)
fprintf('1: true %.2f+%.2fi est %.2f+%.2fi\n', real(mtrue(1)), imag(mtrue(1)), real(mest(1)), imag(mest(1)));
1: true 1.00+2.00i est 1.06+1.99i
fprintf('2: true %.2f+%.2fi est %.2f+%.2fi\n', real(mtrue(2)), imag(mtrue(2)), real(mest(2)), imag(mest(2)));
2: true 3.00+4.00i est 3.04+3.94i
fprintf('3: true %.2f+%.2fi est %.2f+%.2fi\n', real(mtrue(3)), imag(mtrue(3)), real(mest(3)), imag(mest(3)));
3: true 5.00+6.00i est 5.07+5.96i
% inverse of a "resized matrix"
% example of updating the inverse of a symmetric matrix
% as rows/cols are continually delete/added to it, as process that
% csn be useful in a moving window analysis
N=500; % size of test matrix A
N1=2; % size of section deleted/added om each iterstion
NITER=1000; % total number of iterations
NCORR1=200; % correct after NCORR1 iterations
NCORR2=1; % number of correction iterations
fprintf('Matrix A is %d by %d with %d rows/cols updated\n',N,N,N1);
Matrix A is 500 by 500 with 2 rows/cols updated
E = zeros(NITER,1); % Error
% the test matrix is a 2D Gaussisn sutocovarince matrix
% with positions (xp, yp), plus a little added to main diagonal
% as might be encountered in a Gaussian Process Regression problem
xp = random('Uniform',0,N,N,1);
yp = random('Uniform',0,N,N,1);
s = N/5; % scale length of autocovariance
A(i,j) = exp(-0.5 * ((xp(i)-xp(j))^2 + (yp(i)-yp(j))^2 ) / s2 );
s2d = 0.1; % add to main disgonal
A = (A+A')/2; % make sure A is exactly symmetric
AI = inv(A); % inverse of A
% delete phase, using Woodbury Formula
ZTRPIRTZ = (ZTRPIRTZ+ZTRPIRTZ')/2;
% at this point, A is reduced to Y
% randomly choose x,y position of new points
xpnew = random('Uniform',0,N,N,1);
ypnew = random('Uniform',0,N,N,1);
xp = [ xp(1:N2,1); xpnew(N2+1:N,1) ];
yp = [ yp(1:N2,1); ypnew(N2+1:N,1) ];
% recompute the complete A for test purposes only
Anew(i,j) = exp(-0.5 * ((xp(i)-xp(j))^2 + (yp(i)-yp(j))^2 ) / s2 );
Anew = Anew + s2d * eye(N);
% add based on bordering method
X = Y; % the existing psrt of the matrix
Y = Anew(N2+1:N,N2+1:N); % the new sections added
Z = Anew(N2+1:N,1:N2); % the new sections added
ZXIZT = (ZXIZT+ZXIZT')/2;
YMZMIZT = (YMZMIZT+YMZMIZT')/2;
if( mod(itt,NCORR1) == 0 )
% maximum error of inverse
E(itt) = sqrt(e'*e)/N; % note len(e) is N^2
axis( [0, NITER-1, -15, 0] );
plot( n, log10(E), 'k-', 'LineWidth', 2);
for i = (0:NCORR1:NITER-1)
plot([i,i]', [-15,-5]', 'r:', 'LineWidth', 1);
xlabel('iteration number, i');
ylabel('log10 rms error, E(i)');
fprintf('Caption: Root mean square error of A*AI-I as a function of\n');
Caption: Root mean square error of A*AI-I as a function of
fprintf('iteration number (black curve), with correction points (red)\n');
iteration number (black curve), with correction points (red)
% This code parallels "Method Summary 1, Generalized Least Squares"
% global necessary to use biconjugate gradient solver
% Step 1: State the problem in words.
% This is a data smoothing problem where the model
% parameters are a discretized version of a function
% m(x) (at equal increments Dx) and the data are noisy
% measurements of the same model function.
% set up the auxiliary variable, x
% Step 2: Organize the problem in standard form
% number of model parameters = number of data = Nx
% and the data kernel is the identity matrix, G=I
% this is a synthetic test, so there is no real data
% the true model is a "ringy" function peaked at x=0
% the observed data is the true model plus random noise
mtrue=Atrue*exp(-2*pi*abs(x)/(b*Ltrue)).*cos(2*pi*x/Ltrue);
sigmad = 0.03; % noise level of the observations
dobs = dtrue+random('Normal',0,sigmad,N,1);
No=floor(7*N/8); % create an outlier in the data
% When you examine the plot, you should take notice of:
% the overall shape of the curve
% the existence of an outlier toward the right hand side
axis( [xmin, xmax, -2, 2] );
plot( x, dobs, 'ko', 'LineWidth', 2 );
fprintf('Caption: Observed data d(x)\n');
Caption: Observed data d(x)
% I'm going to remove the outlier "manually"
% by interpolating nearby points
dobs(No) = 0.5*(dobs(No-1)+dobs(No+1));
% Step 4: Establish the accuracy of the data
% In this case, we know the sigmad, because it
% was specified when the synthetic data were
% created. I'm going to use this "correct" value
% Step 5: State the prior information in words.
% We assume that the function is smooth, meaning
% that its second derivative is small
% Step 6: Organize the prior information in standard
% form, H is a first differnce operator, h=0. There
% are two few rows in H than there are model parameters
% use row-column-value method
% H = spdiags([z, -2*z, z], [0, 1, 2], K, M);
H = sparse( Hr, Hc, Hv, K, M);
% Step 7: Establish the accuracy of the a priori information
% Suppose that I know that the function is roughly sinusoidal
% with an amplitude of about A and a wavelength of about L.
% Then if the function were a cosine wave m = A cos(2 pi x / L)
% its second derivative would be d2m/dx2 = -A (2 pi/ L)^2
% cos(2 pi x / L). The variance of the second derivative
% would therefore be about 0.5 A^2 (2 pi/ L)^4 (since the
% average value of cos^2 is 0.5). So I'm going to use this
% amount as an estimate of the variance of the prior information;
% that is, the second derivative is zero +/- sigmah.
sigma2h = 0.5 * (A^2) * ((2*pi/L)^4);
% Step 8: Estimate model parameter and their covariance
% set up the matrix F and vector f
% F=spalloc(N+K,M,3*(N+K));
% use row-column-value method
k=1; % this is a trick to count rows of F
for i=(1:N) % the G, dobs part
for i=(1:K) % The H, h=0 part
% F(k,i+1) = -2*Dx2i/sigmah;
Fv(Nel) = -2.0*Dx2i/sigmah;
% F(k,i+2) = Dx2i/sigmah;
gdaFsparse = sparse( Fr, Fc, Fv, N+K, M);
% solve Fm=f by biconjugate gradients
mest = bicg( @gda_FTFmul, FTf, 1e-5, 4*(N+K) );
bicg converged at iteration 10 to a solution with relative residual 4.7e-06.
% note that it fits the observations pretty well
axis( [xmin, xmax, -2, 2] );
plot( x, mest, 'r-', 'LineWidth', 3 );
plot( x, dobs, 'ko', 'LineWidth', 2 );
% Step 9: (see a little further, below)
% Step 10: Confidence interval calulation
% The covariance matrix is [cov m] = inv(F'*F)
% However, we do not necessarily need to compute the complete
% matrix. The code here solves for the k-th column, say ck,
% (or row, since it is symmetric). The idea is to solve the equation
% (F'F) ck = s, where s is a vector that is all zeros except
% a 1 in row k. Then ck is the k-th column of inv(F'*F)
varlist=[1:floor(N/20):N]; % list of values of x at which
% to calculate error bars
ck = bicg( @gda_FTFmul, s, 1e-5, 4*M );
% variance of estimated model parameters is k-th element of ck
% 95% confidence interval
mlow = mest(k)-2*sigmamest;
mhigh = mest(k)+2*sigmamest;
plot( [x(k), x(k)], [mlow, mhigh], 'b-', 'LineWidth', 3 );
end
bicg converged at iteration 12 to a solution with relative residual 6.3e-06.
bicg converged at iteration 12 to a solution with relative residual 7.8e-06.
bicg converged at iteration 12 to a solution with relative residual 8.4e-06.
bicg converged at iteration 13 to a solution with relative residual 3.1e-06.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 13 to a solution with relative residual 3.1e-06.
bicg converged at iteration 12 to a solution with relative residual 8.4e-06.
bicg converged at iteration 12 to a solution with relative residual 7.8e-06.
bicg converged at iteration 12 to a solution with relative residual 6.3e-06.
fprintf('Caption: Observed data d(x) (black circles),\n');
Caption: Observed data d(x) (black circles),
fprintf('estimated model m(x) (red curve) and 95 percent\n');
estimated model m(x) (red curve) and 95 percent
fprintf('confidence bounds (blue)\n');
% Note that the error bars are a small fraction of the
% overall range of the function, meaning that we haven't
% smoothed the function completely away!
% Step 9: Examine the model resolution
% The resolution matrix is RG = GMG * G
% with GMG = inv(F'F) G' inv(covd)
% However, as with covariance, one does not need to
% compute every row. This code just does a few rows.
% Note that the plot shows that the resolution is
% peaked along the main diagonal, but that it has a
% finite width of about 1 (meaning that there is some
% smoothing (but that's what we wanted) and a little
% tiny bit of negative sidelobes (which is not so
% good, but unavoidable with 2nd derivative smoothing).
reslist=[1:floor(N/9):N]; % do these rows
axis( [xmin, xmax, xmin-(xmax-xmin)/Nres, xmax+(xmax-xmin)/Nres] );
vard = sigma2d * ones(N,1); % variance of data
% (the code can handle the case where it varies)
% compute k-th row of the resolution matrix
% first: compute column of inverse of F'F
% note F'F symmetric, so column is also row
ck = bicg( @gda_FTFmul, s, 1e-5, 4*M );
% second: row of generalized inverse GMG(k,:)=gi
gi = ((ck')*(G'))./(vard');
% third: row of the resolution matrix
sc = -((xmax-xmin)/Nres)/max(r_row);
plot( x, sc*r_row'+x(k), 'k-', 'LineWidth', 3 );
end
bicg converged at iteration 12 to a solution with relative residual 6.3e-06.
bicg converged at iteration 13 to a solution with relative residual 3.4e-06.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 1e-05.
bicg converged at iteration 12 to a solution with relative residual 8.6e-06.
bicg converged at iteration 12 to a solution with relative residual 6.4e-06.
fprintf('Caption: Model resolution matrix\n');
Caption: Model resolution matrix
% Step 11: Examine the individual errors
% Note that the prediction error is of
% uniform size of about unity, indicating
% that our a priori data variance is about
% right. It is also of similar size across
% the plot, indicating that our assumption
% that the error is uniform is about right,
% too. One the other hand, the error
% in a priori information is a little less
% than unity and not uniform across the
% plot, indicating that our assumptions on
% the size and uniformity of sigmah is not
axis( [xmin, xmax, -2, 2] );
plot( [xmin, xmax]', [0, 0]', 'b:', 'LineWidth', 2);
plot( x, e/sigmad, 'ko', 'LineWidth', 3 );
fprintf('Caption: Normalized prediction error.\n');
Caption: Normalized prediction error.
axis( [xmin, xmax, -2, 2] );
plot( [xmin, xmax]', [0, 0]', 'b:', 'LineWidth', 2);
plot( x(2:N-1), l/sigmah, 'ko', 'LineWidth', 3 );
fprintf('Caption: Normalized error in prior information.\n');
Caption: Normalized error in prior information.
% Step 12: Examine the total error phiest
% in order to address the null hypothesis
% that departure from the expected value of
% nu is due to random error.
% In the plot, total error phiest is usually
% close to the minimum bound (sometimes on
% one side, sometimes on the other), indicating
% that one of the two variances, sigma2d or
% sigma2h is overestimated. (Our estimate of
% sigma2h is probably too large. Multiplying
% it by 0.75 produces a better result).
phiest = (f-fpre)'*(f-fpre);
axis( [0, 1.1*pmax, 0, 1] );
plot( [p1, p1]', [0, 0.2]', 'r-', 'LineWidth', 2);
plot( [p2, p2]', [0, 0.2]', 'r-', 'LineWidth', 2);
plot( [phiest, phiest]', [0, 0.4]', 'k-', 'LineWidth', 3);
fprintf('Caption: Total error phi (black) and confidence bounds (red)');
Caption: Total error phi (black) and confidence bounds (red)
% Step 13: Two different models, A and B?
% We have only one model, so this step is not relevant
% This code parallels "Method Summary 2, Grid Search".
% a function f(t) is composed of the sum of two
% sinusoids of unknown amplitude and frequency
% d(t) = A1 cos(2 pi f1 t) + B1 sin(2 pi f1 t)
% = A2 cos(2 pi f2 t) + B2 sin(2 pi f2 t)
% true data and synthetic observed data
dtrue = A1true*cos(pi2*f1true*t)+B1true*sin(pi2*f1true*t);
dtrue = dtrue+A2true*cos(pi2*f2true*t)+B2true*sin(pi2*f2true*t);
dobs = dtrue+random('Normal',0,sigmad, N, 1 );
axis( [t(1), t(end), -2, 2] );
plot( t, dobs, 'ko', 'LineWidth', 2 );
plot( t, dobs, 'k-', 'LineWidth', 2 );
% Step 1. This problem is non-linear in the frequencies
% [f1, f2] but linear in the amplitudes [A1, B1, A2, B2].
% The strategy is to grid search over [f1, f2], computing
% [A1, B1, A2, B2] at each grid node via least squares.
% Step 2. By visual inspection of the data, the frequencies
% of variation are about 10 Hz. So we grid search in the
% (5-15) Hz band to be sure we enclose [f1, f2]. The
% data are a fairly slowly varying function of frequency,
% so a 201 by 201 grid should be sufficient. Note that
% the problem is symmetrical in f1 and f2, so we could
% just search the f2>f1 part of the error surface E(f1,f2).
% However, coding this case seems more trouble than its
% worth. We do have to be careful not to do the "f1 exactly
% equal to f2" case, because the least square data kernel
% is redundant in this case. I avoid this problem by setting
% making the f1 and f2 grid points slightly different.
Df1=(f1max-f1min)/(Nf1-1);
f1list = f1min + Df1*[0:Nf1-1]';
Df2=(f1max-f1min)/(Nf2-1);
f2list = f2min + Df2*[0:Nf2-1]';
for if1 = [1:Nf1] % loop over values of f1
for if2 = [1:Nf2] % loop over values of f2
f1 = f1list(if1); % set [f1, f2]
% create data kernel for m=[A1, B1, A2, B2] for fixed [f1, f2]
G = [ cos(pi2*f1*t), sin(pi2*f1*t), cos(pi2*f2*t), sin(pi2*f2*t) ];
mest = (G'*G)\(G'*dobs); % least squares solution
dpre = G*mest; % predicted data
e = dobs - dpre; % prediction error
E(if1,if2) = e'*e; % tabulate overall error
% refine minimum using quadratic approximation. Note that I am
% using the prior veriance of the data in the calculation of covariance
[ f1est, f2est, E0, covf1f2, status ] = gda_Esurface( f2list, f1list, E, sigmad2 );
% recompute m=[A1, B1, A2, B2] for the refined [f1, f2]
G = [ cos(pi2*f1est*t), sin(pi2*f1est*t), cos(pi2*f2est*t), sin(pi2*f2est*t) ];
sigma2dest = Efinal/(N-M); % posterior variance in the data
% recompute the covariance of frequencies based on the posterior variance
[ f1est, f2est, E0, covf1f2, status ] = gda_Esurface( f1list, f2list, E, sigma2dest );
sigmaf1 = sqrt( covf1f2(1,1) );
sigmaf2 = sqrt( covf1f2(2,2) );
% write out the frequencies and their 95% confidence intervals
fprintf('f1: %.3f +/- %.3f (95%%)\n', f1est, 2*sigmaf1 );
f1: 11.396 +/- 0.042 (95%)
fprintf('f2: %.3f +/- %.3f (95%%)\n', f2est, 2*sigmaf2 );
f2: 7.131 +/- 0.016 (95%)
% Use the standard least squares estimate of the covariance of the
% amplitudes (using the posterior variance of the data)
covm = sigma2dest * inv(G'*G);
A1est = mest(1); B1est=mest(2); A2est = mest(3); B2est=mest(4);
sigmaA1 = sqrt( covm(1,1) );
sigmaB1 = sqrt( covm(2,2) );
sigmaA2 = sqrt( covm(3,3) );
sigmaB2 = sqrt( covm(4,4) );
% write out the amplitudes and their 95% confidence intervals
fprintf('A1: %.3f +/- %.3f (95%%)\n', A1est, 2*sigmaA1 );
A1: 0.201 +/- 0.029 (95%)
fprintf('B1: %.3f +/- %.3f (95%%)\n', B1est, 2*sigmaB1 );
B1: 0.311 +/- 0.029 (95%)
fprintf('A2: %.3f +/- %.3f (95%%)\n', A2est, 2*sigmaA2 );
A2: 0.887 +/- 0.029 (95%)
fprintf('B2: %.3f +/- %.3f (95%%)\n', B2est, 2*sigmaB2 );
B2: 0.512 +/- 0.029 (95%)
% plot the predicted data
plot( t, dpre, 'r-', 'LineWidth', 3 );
fprintf('Caption: Observed data (black) and predicted data (red)\n');
Caption: Observed data (black) and predicted data (red)
% plot of error surface and estimated frequencies
axis( [f1min, f1max, f2min, f2max] );
% plot the error surface. Note the horizontal stripes
% on the image parallel to an f1 or f2 of 7 Hz. This
% frequency has the dominant amplitude, so when one
% of the frequenices is about 7 Hz, the prediction will
% fit the data to a reasonable degree. As expected, the
% error surface is symmtrical about the f1=f2 line. We
% anticipated this behavior (see above).
imagesc( [f2min, f2max], [f1min, f1max], E );
xlabel('frequency f2 (Hz)');
ylabel('frequency f1 (Hz)');
% plot estimated frequencies and their 95% confidence intervals
plot( [f2est-sigmaf2, f2est+sigmaf2]', [f1est,f1est]', 'w-', 'LineWidth', 3 );
plot( [f2est, f2est]', [f1est-sigmaf1,f1est+sigmaf1]', 'w-', 'LineWidth', 3 );
fprintf('Caption: Error surface (colors) with estimated frequencies\n');
Caption: Error surface (colors) with estimated frequencies
fprintf('and their 95 percent confidence intervals (white)\n');
and their 95 percent confidence intervals (white)
% This code parallels "Method Summary 3, Nonlinear Least Squares"
% global necessary to use biconjugate gradient solver
% Step 1: State the problem in words.
% This is a simple modification of the data smoothing
% solve in the example for Methods Summary 1, Least
% Squares. As before, the the model parameters are
% a discretized version of a function m(x) (at equal
% increments Dx). But now the the data are nonlinear
% functions of each model parameter d(i) = erf( m(i) ).
% The effor function erf(z) is sigmoidal in shape. Near
% the origin, erf(z)~z, but for large |z| it asymtopes
% to +/- 1. It thus squeezes the larger model parameters
% into a small range. The derivative is d erf(z) / dz
% = (2/sqt(pi)) exp(-z^2)
% set up the auxiliary variable, x
% Step 2: Organize the problem in standard form
% number of model parameters = number of data = Nx
% and the data kernel is the identity matrix, G=I
% this is a synthetic test, so there is no real data
% the true model is a "ringy" function peaked at x=0
% the observed data is the true model plus random noise
mtrue=Atrue*exp(-2*pi*abs(x)/(b*Ltrue)).*cos(2*pi*x/Ltrue);
sigmad = 0.1; % noise level of the observations
dobs = dtrue+random('Normal',0,sigmad,N,1);
% Establish the accuracy of the data
% In this case, we know the data has accuracy
% sigmad, because it was specified when the
% synthetic data were created. I'm going to use
% When you examine the plot, you should take notice of:
% the overall shape of the curve
% the shape of the central peak, which is rather flat
% (which is due to the nonlinearity; information about
% the height of the central peak is being lost, and
% that will limit the success of the inversion).
axis( [xmin, xmax, -1, 1] );
plot( x, dobs, 'ko', 'LineWidth', 2 );
fprintf('Caption: observed data d(x)\n');
Caption: observed data d(x)
% State the prior information in words.
% We assume that the function is smooth, meaning
% that its second derivative is small
% Organize the prior information in standard
% form, H is a first differnce operator, h=0. There
% are two few rows in H than there are model parameters
% use the row-column-value method to make the sparse mstrix H
% an alterntive would be the single command
% H = spdiags([z, -2*z, z], [0, 1, 2], K, M);
H = sparse(Hr,Hc,Hv,K,M);
% Establish the accuracy of the a priori information
% Suppose that I know that the function is roughly sinusoidal
% with an amplitude of about A and a wavelength of about L.
% Then if the function were a cosine wave m = A cos(2 pi x / L)
% its second derivative would be d2m/dx2 = -A (2 pi/ L)^2
% cos(2 pi x / L). The variance of the second derivative
% would therefore be about 0.5 A^2 (2 pi/ L)^4 (since the
% average value of cos^2 is 0.5). So I'm going to use this
% amount as an estimate of the variance of the prior information;
% that is, the second derivative is zero +/- sigmah.
sigma2h = 0.5 * (A^2) * ((2*pi/L)^4);
% Step 3: Decide up a reasonable trial solution
% Step 4: Linearized the data equation
Niter = 100; % Maximum number of iterations
% perturbations in data and prior information
% The data kernel G is diagonal
dddm = (2/sqrt(pi)) * exp(-mk.^2);
% Use row-col-value method, but an
% alterntive would be the command
% G = spdiags(dddm, 0, N, M);
Gr = zeros(MAXELEMENTS,1);
Gc = zeros(MAXELEMENTS,1);
Gv = zeros(MAXELEMENTS,1);
% Step 5: Form the combined equation
% Set up the matrix F and vector f
% Use row-col-value method
Fr = zeros(MAXELEMENTS,1);
Fc = zeros(MAXELEMENTS,1);
Fv = zeros(MAXELEMENTS,1);
k=1; % this is a trick to count rows of F
for i=(1:N) % the G, dobs part
% gdaFsparse(k,i) = dddm(i)/sigmad;
for i=(1:K) % The H, h=0 part
% gdaFsparse(k,i) = Dx2i/sigmah;
% gdaFsparse(k,i+1) = -2*Dx2i/sigmah;
% gdaFsparse(k,i+2) = Dx2i/sigmah;
gdaFsparse=sparse(Fr,Fc,Fv,N+K,M);
% Step 6: Iteratively improve the solution
% solve F Dm = f by biconjugate gradients
Dm = bicg( @gda_FTFmul, FTf, 1e-5, 4*(N+K) );
% terminate for very small Dm
end % next iteration
bicg converged at iteration 31 to a solution with relative residual 7.6e-06.
bicg converged at iteration 53 to a solution with relative residual 7.7e-06.
bicg converged at iteration 69 to a solution with relative residual 8.3e-06.
bicg converged at iteration 71 to a solution with relative residual 8.9e-06.
bicg converged at iteration 66 to a solution with relative residual 9.6e-06.
% Step 8: Examine the results
% note that the solution is not able to fit
% the true central peak very well
axis( [xmin, xmax, -5, 5] );
plot( x, mtrue, 'k-', 'LineWidth', 3 );
plot( x, mest, 'r-', 'LineWidth', 2 );
% Confidence interval calulation
% The covariance matrix is [cov m] = inv(F'*F)
% However, we do not necessarily need to compute the complete
% matrix. The code here solves for the k-th column, say ck,
% (or row, since it is symmetric). The idea is to solve the equation
% (F'F) ck = s, where s is a vector that is all zeros except
% a 1 in row k. Then ck is the k-th column of inv(F'*F)
varlist=[1:floor(N/20):N]; % list of values of x at which
% to calculate error bars
ck = bicg( @gda_FTFmul, s, 1e-5, 4*M );
% variance of estimated model parameters is k-th element of ck
% 95% confidence interval
mlow = mest(k)-2*sigmamest;
mhigh = mest(k)+2*sigmamest;
plot( [x(k), x(k)], [mlow, mhigh], 'b-', 'LineWidth', 3 );
end
bicg converged at iteration 37 to a solution with relative residual 7.7e-06.
bicg converged at iteration 36 to a solution with relative residual 9e-06.
bicg converged at iteration 38 to a solution with relative residual 9.1e-06.
bicg converged at iteration 39 to a solution with relative residual 8.8e-06.
bicg converged at iteration 46 to a solution with relative residual 7.1e-06.
bicg converged at iteration 50 to a solution with relative residual 7.3e-06.
bicg converged at iteration 65 to a solution with relative residual 9.5e-06.
bicg converged at iteration 67 to a solution with relative residual 9.5e-06.
bicg converged at iteration 71 to a solution with relative residual 7.8e-06.
bicg converged at iteration 71 to a solution with relative residual 6.8e-06.
bicg converged at iteration 66 to a solution with relative residual 7.2e-06.
bicg converged at iteration 71 to a solution with relative residual 8.5e-06.
bicg converged at iteration 70 to a solution with relative residual 8.6e-06.
bicg converged at iteration 67 to a solution with relative residual 8.1e-06.
bicg converged at iteration 62 to a solution with relative residual 9.7e-06.
bicg converged at iteration 50 to a solution with relative residual 8.6e-06.
bicg converged at iteration 46 to a solution with relative residual 8.1e-06.
bicg converged at iteration 40 to a solution with relative residual 7.9e-06.
bicg converged at iteration 39 to a solution with relative residual 7.4e-06.
bicg converged at iteration 36 to a solution with relative residual 7.1e-06.
bicg converged at iteration 36 to a solution with relative residual 9e-06.
fprintf('Caption: True model (black), estimated model (red) and 95 percent confidence error bars (blue)\n');
Caption: True model (black), estimated model (red) and 95 percent confidence error bars (blue)
% Note that the error bars scale with the size of
% the solution. That's due to the erf() nonlinearity,
% that makes distinguishing a m say of 2 from a m say of 3.
% Step 9: Examine the model resolution
% The resolution matrix is RG = GMG * G
% with GMG = inv(F'F) G' inv(covd)
% However, as with covariance, one does not need to
% compute every row. This code just does a few rows.
% Note that the plot shows that the resolution is
% mostly peaked along the main diagonal.
reslist=[1:floor(N/9):N]; % do these rows
axis( [xmin, xmax, xmin-(xmax-xmin)/Nres, xmax+(xmax-xmin)/Nres] );
vard = sigma2d * ones(N,1); % variance of data
% (the code can handle the case where it varies)
% compute k-th row of the resolution matrix
% first: compute column of inverse of F'F
% note F'F symmetric, so column is also row
ck = bicg( @gda_FTFmul, s, 1e-5, 4*M );
% second: row of generalized inverse GMG(k,:)=gi
gi = ((ck')*(G'))./(vard');
% third: row of the resolution matrix
sc = -((xmax-xmin)/Nres)/max(r_row);
plot( x, sc*r_row'+x(k), 'k-', 'LineWidth', 3 );
end
bicg converged at iteration 37 to a solution with relative residual 7.7e-06.
bicg converged at iteration 37 to a solution with relative residual 9.7e-06.
bicg converged at iteration 45 to a solution with relative residual 9.9e-06.
bicg converged at iteration 69 to a solution with relative residual 9.5e-06.
bicg converged at iteration 70 to a solution with relative residual 9.6e-06.
bicg converged at iteration 71 to a solution with relative residual 8.5e-06.
bicg converged at iteration 69 to a solution with relative residual 8.7e-06.
bicg converged at iteration 48 to a solution with relative residual 9.4e-06.
bicg converged at iteration 39 to a solution with relative residual 9e-06.
bicg converged at iteration 33 to a solution with relative residual 9.5e-06.
fprintf('Caption: resolution matrix\n');
Caption: resolution matrix
% Examine the individual errors
% Note that the normalized prediction error is
% much larger than unity, and not uniform with x,
% indicating that that the model is not fitting
% the data very well. The model is fitting the
% priori information is a little better, since
% the size of the normalized error is closer to
% unity. However, it is not not uniform in x.
% Our assumptions on the size and uniformity of
% sigmah is not quite right.
axis( [xmin, xmax, -10, 10] );
plot( [xmin, xmax]', [0, 0]', 'b:', 'LineWidth', 2);
plot( x, e/sigmad, 'ko', 'LineWidth', 3 );
fprintf('Caption: Normalized prediction error\n');
Caption: Normalized prediction error
axis( [xmin, xmax, -2, 2] );
plot( [xmin, xmax]', [0, 0]', 'b:', 'LineWidth', 2);
plot( x(2:N-1), l/sigmah, 'ko', 'LineWidth', 3 );
fprintf('Caption: Normalized error in prior information\n');
Caption: Normalized error in prior information
% Step 12: Examine the total error phiest
% in order to address the null hypothesis
% that departure from the expected value of
% nu is due to random error.
% In the plot, total error phiest is way
% above minimu the maximum bound, indicating
% that the high total error is unlikely to be
% due to random variation (but for some other
% reason, like a poor theory.
phiest = (f-fpre)'*(f-fpre);
axis( [0, 1.1*pmax, 0, 1] );
plot( [p1, p1]', [0, 0.2]', 'r-', 'LineWidth', 2);
plot( [p2, p2]', [0, 0.2]', 'r-', 'LineWidth', 2);
plot( [phiest, phiest]', [0, 0.4]', 'k-', 'LineWidth', 3);
fprintf('Caption: total error phi (black) and confidence bounds (red)\n');
Caption: total error phi (black) and confidence bounds (red)
% We have only one model, so a F-Test is not relevant
% This code parallels "Method Summary 4, MCMC Inversion"
sigmad = 2.0; % data error
sigmap = 0.2; % neighborhood
mtrue = [0.6, 0.4]'; % true solution
mstart = [0.62, 0.42]'; % starting solution
c1 = 1.00; % constant in d(m)
c2 = 1.04; % constant in d(m)
Nr = 100000; % number of realizations
Nb = 10000; % discard burn in
N = 101; % number of data
% Step 1: Organize the problem in standard form
% This is fundamentally a curve-fitting problem of the form d=g(m)
% the cruve is the following function. I am using a quasi-function
% coding technique to avoid using a function, as we have not used
% really should use a function for this synthetic data
myd = sin(4*pi*(c1+(mym(1)^2)*x))+sin(4*pi*(c2+(mym(2)^2)*x));
% Step 2: Decide whether MCMC is appropriate
% Note that this problem is highly non-unique
% if (m1,m2) minimizes the erro, then so does (-m1,m2), (m1,-m1) and (-m2,-m2)
% furthermore, when c1=c2, m1 and m2 can be interchanged without changing the error
% thus a single estimate mest made using Newton's method is not going to be much use
% we really want to learn something about the form of the non-uniqueness
% instead, I examine the global attributes of the solution, and especially
% how polar angle and radius vary
% synthetic observed dat data
dobs = dtrue + random('Normal',0.0,sigmad,N,1);
% Step 3: Examine the data
% Plot data. I have intentionally made the noise level very high
plot(x,dtrue,'k-','LineWidth',2);
fprintf('Caption: True data (curve), observed data(circles)\n');
Caption: True data (curve), observed data(circles)
% Step 4: Establish the accuracy of the data
% The data synthetic, so I know the variance sigmad2
% For plotting purposes, I am exhaustivey computing the error
% surface E(m). That's unnecessary for MCMC, and can ony be done
% when the number M of model parameters is small
m1 = mmin+(mmax-mmin)*[0:Nm-1]'/(Nm-1);
m2 = mmin+(mmax-mmin)*[0:Nm-1]'/(Nm-1);
myd = sin(4*pi*(c1+(mym(1)^2)*x))+sin(4*pi*(c2+(mym(2)^2)*x));
% Step 5: Choose a likelihood pdf describing the behavior of the data
% I'm using a Normal PDF with variance sigma2d
% Step 6: Step 6: State the prior information
% the model is near the m1=m2=0, with large variance
% Step 7: State the accuracy of the prior information
% Step 8: Choose a prior pdf describing the behavior of the prior model
%Step 11: choose length and first value of the Markov chain of models
mr1 = zeros(Nr,1); % markov chain of m1's
mr2 = zeros(Nr,1); % markov chain of m2's
mr1(1)=mstart(1); % starting link of the chain (for m1)
mr2(1)=mstart(2); % starting link of the chain (for m2)
myd = sin(4*pi*(c1+(mym(1)^2)*x))+sin(4*pi*(c2+(mym(2)^2)*x));
e = dobs-dpre; % prediction error
% Step 9: Form the likelihood, the logarithm of the posterior pdf
L = -0.5*e'*e/sigmad2-0.5*l'*l/sigmam2;
% tep 12: Build the Markov chain using the Metropolis-Hastings algorithm:
% Step 10: Choose a pdf for finding a proposed successor
mp1 = random('Normal',mr1(i-1),sigmap);
mp2 = random('Normal',mr2(i-1),sigmap);
myd = sin(4*pi*(c1+(mym(1)^2)*x))+sin(4*pi*(c2+(mym(2)^2)*x));
Lp = -0.5*ep'*ep/sigmad2-0.5*lp'*lp/sigmam2;
% add to end of markov chain
% discard beginning of chain to allow for burn-in
% Step 13: Assess the ensemble solution
% plot solution ensemble on error surface
axis( [mmin,mmax,mmin,mmax] );
imagesc([mmin,mmax],[mmin,mmax],E);
fprintf('Error surface (colors) with ensemble solution (white dots) superimposed\n');
Error surface (colors) with ensemble solution (white dots) superimposed
% compute radius and polar angle of solution
th(i) = (180.0/pi)*atan2(mr1(i),mr2(i));
r(i) = sqrt(mr1(i)^2+mr2(i)^2);
% histogram of polar angle
bins = binmin + (binmax-binmin)*[0:Nbins]'/(Nbins-1);
axis( [binmin,binmax,0.0,1.1*hmax] );
plot(bins,h,'k-','LineWidth',2);
fprintf('Caption: Histogram of polar angle of members of ensemble solution\n');
Caption: Histogram of polar angle of members of ensemble solution
bins = binmin + (binmax-binmin)*[0:Nbins]'/(Nbins-1);
axis( [binmin,binmax,0.0,1.1*hmax] );
imagesc([mmin,mmax],[mmin,mmax],E);
plot(bins,h,'k-','LineWidth',2);
fprintf('Caption: Histogram of radius of members of ensemble solution\n');
Caption: Histogram of radius of members of ensemble solution
% mean radius as a function of polar angle
axis( [-180,180,0.0,2.0] );
k = find( (th>=i) & (th<(i+1)) );
plot(i,meanr,'ko','lineWidth',2);
fprintf('Caption: Mean radius vs polar angle (1 deg bins)\n');
Caption: Mean radius vs polar angle (1 deg bins)
% This code parallels "Method Summary 5,
% Bootstrap Confidence Intervals"
% This example relies on syntheic data created by the
% gdama17_10 script. That script makes two
% true pulses p1(t) and p2(t) that have an amplitude
% spectral density of exactly
% s1(f)=A1*exp(-pi*abs(f)*tstar1) and
% s2(f)=A2*exp(-pi*abs(f)*tstar2)
% with tstar1=0.1 and tstar2=0.03. However, random noise
% is added to the pulses before computing their spectrum,
% so the observed pulses do not have exactly this spectrum.
% We calculate the 95% confidence of m2 = pi*(tstar2-tstar1) by
% bootstrapping and compare it to the estimate obtained via Newton's method
% the data file 'pulses.txt' has three columns,
% frequency, s1(f) and s2(f)
% Step 1: Identify the parameter for which confidence intervals are needed
% The parameter is m2 = pi*(tstar2-tstar1)
% Step 2: Identify the data and a method of estimating m2 from the data
% the data are the spectral rations, d=s2(f)/sq(f)
% we use linearized least squares to estimate m2
% true solution, known since this is a synthetic example
D = load('../data/pulses.txt');
logdobs = logs2obs-logs1obs;
% linearized fit, to get starting value for Newton's method
logmls = (G'*G)\(G'*logdobs);
dprels = exp(logmls(1)+logmls(2)*f);
mls = [exp(logmls(1)), logmls(2) ]';
% step 3: reference solution via Newton's method
% di = gi(m1,m2) = m1 exp( m2 f )
dev = max(abs(mest-mlast));
% estimate of covariance of m2 via Newton's method
% it can be compared with the bootstrap result
sigmam2 = sqrt(covm(2,2));
fprintf('true solution: %.4f %.4f\n',mtrue(1),mtrue(2));
true solution: 1.0000 -0.0628
fprintf('linearzed solution: %.4f %.4f\n',mls(1),mls(2));
linearzed solution: 0.9663 -0.0547
fprintf('iterative solution: %.4f %.4f (%d iter)\n',mest(1),mest(2),i);
iterative solution: 1.0186 -0.0588 (4 iter)
axis( [f(1), f(end), 0, 1.1*dmax] );
plot( f, dprels, 'r-', 'LineWidth', 2 );
plot( f, dpre, 'g-', 'LineWidth', 2 );
plot( f, dobs, 'ko', 'LineWidth', 2 );
ylabel('d = s2(f)/s1(f)');
fprintf('Caption: spectral ratio of two pulses, linearized estimate (red),\n');
Caption: spectral ratio of two pulses, linearized estimate (red),
fprintf('iterative estimate (green), observed (circles)\n');
iterative estimate (green), observed (circles)
axis( [f(1), f(end), dmax-7, dmax+1] );
plot( f, logdprels, 'r-', 'LineWidth', 2 );
plot( f, logdpre, 'g-', 'LineWidth', 2 );
plot( f, logdobs, 'ko', 'LineWidth', 2 );
ylabel('d = log(s2(f))-log(s1(f))');
title('log spectral ratio of two pulses');
% plot whose horizontal axis is m2
% the true value of m2 is the vertical blue bar
% the uterative least squares estimate of m2 is the vertical red bar
% and its 95% confidence is the horizontal red bar
axis( [0.04, 0.08, 0, 1] );
plot( [-mtrue(2), -mtrue(2)]', [0, 0.4], 'b-', 'LineWidth', 3 );
plot( [-mest(2), -mest(2)]', [0, 0.35], 'r-', 'LineWidth', 3 );
plot( [-mest(2)-2*sigmam2, -mest(2)+2*sigmam2]', [0.3, 0.3], 'r-', 'LineWidth', 3 );
% now use the bootstrap method to compute an empirical
% probability density function of Dtstar and plot it
Nrs = 10000; % number of realizations
m2 = zeros(Nrs,1); % tabulate the m2 of each realization
for ir = (1:Nrs) % loop over realizations
% randomly resample the data
frs = f(j); % resampled frequencies
drs = dobs(j); % resampled data
% solution by Newton's method
Gk = [ x, mrs(1)*frs.*x];
dev = max(abs(mrs-mlast));
m2(ir) = -mrs(2); % tabulate minus m2
% Now construct a histogram of the m2 values
% and scale it to an empirical probability density function
Nbin = 100; % number of bins in histogram
binmin = 0.04; % range of histogram
Dbin = (binmax-binmin)/(Nbin-1);
bins = binmin + Dbin*[0:Nbin-1]';
p = hist(m2,bins); % histogram = empirical pdf p(q)
p = p/(Dbin*sum(p)); % normalize to unit area
pmax = max(p); % find maximum for plotting purposes
plot( bins, 0.5*p/pmax, 'k-', 'LineWidth', 3 );
P = Dbin*cumsum(p); % integrate to an empirical cumulative distribution P(q)
% use the cumulative distribution to find the left and right
% edges of the 95% confidence interval and plot then as
% green bars. The upshot is that they agree pretty well with
% the least squares estimate (red horizontal bar). Because of
% the tendency of the logarithm to preferentially amplify
% small values, the center of the pdf is centered a little
% to the left of the true Dtstar value. However, the displacement
% is small compared to the width of the confidence interval.
plot( [qL, qL]', [0, 0.2]', 'g-', 'LineWidth', 2 );
plot( [qR, qR]', [0, 0.2]', 'g-', 'LineWidth', 2 );
fprintf('Caption: Decay rate parameter m(2): true (blue), iterative estimate\n');
Caption: Decay rate parameter m(2): true (blue), iterative estimate
fprintf('and its 95 percent confidence (red), bootstrsp histograam (black curve)\n');
and its 95 percent confidence (red), bootstrsp histograam (black curve)
fprintf('and its 95 percent confidence (green).\n');
and its 95 percent confidence (green).
fprintf('linearized estimate of sigma-m2 %.4f\n',sigmam2);
linearized estimate of sigma-m2 0.0021
fprintf('bootstrap estimate of sigma-m2 %.4f\n',(qR-qL)/4);
bootstrap estimate of sigma-m2 0.0024
fprintf('The Newton method estimate of sigma-m2 agrees well with the bootstap estimate\n');
The Newton method estimate of sigma-m2 agrees well with the bootstap estimate
% This code parallels "Method Summary 6, Factor Analysis".
% This is a hypothetical scenario with synthetic data, only,
% in which the object is to determine the source(s) of a pollutant
% from observations of its concentration in dust collected
% on a 2D grid of sampling sites. The pollutant is an element
% with four isotopes. The presumption is that each source
% (two factories and a natural background source) has a distinct
% isotopic pattern and that a dust sample at any given location
% is a mixture of these patterns. The pollutant emitted from the
% factories is concentrated near the factories; the natural
% pollutant is spread more uniformly across the area.
% This section creates the synthetic data.
% The factories will be numbered 1 and 2; the natural source 3
% factory locations, both as indices and (x,y) positions
ix1=3; x1=x(ix1); iy1=3; y1=y(iy1);
ix2=17; x2=x(ix2); iy2=17; y2=y(iy2);
% the factors are the fraction of the 4 isotopes for each source
% note that isotope 1 is present in a smaller
% fraction than the other three. However, the
% measurement technique for determining it is also better.
f1 = [0.0104, 0.2084, 0.1762, 0.6050]';
f2 = [0.0165, 0.2748, 0.2365, 0.4722]';
f3 = [0.0140, 0.2410, 0.2210, 0.5240]';
% We now build the sample matrix S(i,i). It gives
% the amount of isotope j observed at station i
% measured, say, in picograms per square meter per day
% of dust deposited on a surface exposed to the
N = Nx*Ny; % number of samples
M = 4; % number of isotopes
Strue = zeros( N, M ); % true (noise free) samples matrix
Sobs = zeros( N, M ); % observed (noisy) sample matrix
jofixiy =zeros(Nx,Ny); % sample number as function of (ix,iy)
R1 = sqrt( (x(ix)-x1)^2 + (y(iy)-y1)^2 ); % distance to factory 1
R2 = sqrt( (x(ix)-x2)^2 + (y(iy)-y2)^2 ); % distance to factory 2
C1 = 800*(1/((R1/10)+1))+random('uniform',0,v,1); % loading 1 declines with R1
C2 = 200*(1/((R2/10)+1))+random('uniform',0,v,1); % loading 2 declines with R2
C3 = 10+random('uniform',0,v,1); % loading 3 is more-or-less constant
Strue(j,:) = C1*f1' + C2*f2' + C3*f3';
n = random('Normal', 0, sigmaS, 1, M );
Sobs(j,:) = Strue(j,:) + n./w;
% Step 1: State the problem in words
% A dust sample is assumed to contain four isotopes
% of a toxic chemical element. The pollutant gets
% into the dust from factories (which are spatially
% localized) and natural sources (which are spatially
% distributed). A factor is the pattern of isotopes
% associated with a particular source. A sample is
% a mixture of the factors, where the loading represents
% the mass per unit area per unit time of the pollutant
% deposited from each souce. In our scenario, the wind
% is physically mixing the dust particles, so that
% each sampe site receives some dust from each source.
% Step 2: Organize the data as a sample matrix S
% The matrix Sobs(i,j) is already in the correct form
% (isotope j at station i).
% Step 3: Establish weights that reflect the importance
% of the elements. The first isotope is known to have been
% measured with much better (say 10x better) accuracy than
% Step 4: Perform singular value decomposition and
% form the factor matrix F and loading matrix C
[U,SIGMA,V] = svd(Sobs*diag(w),0);
Fpre = V'*diag(1./w); % factors
Cpre = U*SIGMA; % loadings
% Step 5: Determine the number P of important factors
% Plot the diagonal of ? as a function of row index i
% and choose P to include all rows with “large” ?_ii
% Since there are only M=4 singular values, we opt to
fprintf('Singular values\n');
fprintf('%f %f %f %f\n', SIGMA(1,1), SIGMA(2,2), SIGMA(3,3), SIGMA(4,4) );
6665.694340 134.651059 0.318151 0.201155
% We find diag(SIGMA) = [10189 183 3 0.2]
% SIGMA(4,4) is very much smaller than the rest
% Step 6: Reduce the number of factors from M to P
% Step 7: Predict the data and examine the error
fprintf('RMS error of Sobs - SpreP\n');
RMS error of Sobs - SpreP
fprintf(' isotope %d is %f (mean is %f)\n', i, rmsi, mi );
end
isotope 1 is 0.000875 (mean is 5.828983)
isotope 2 is 0.003867 (mean is 110.752145)
isotope 3 is 0.003027 (mean is 94.233782)
isotope 4 is 0.000710 (mean is 286.792442)
% The rms errors of the isotopes are
% 0.000805, 0.003498, 0.002850, and 0.000652
% These seem acceptably small, given the overall
% size of the different isotopes in the sample matrix, which are
% 0.921996, 19.129602, 16.122053, and 60.246785
% Step 7: Interpret the factors and their loadings
% print out the factors, normalizing them so the isotopes in
fprintf('factor %d: %f %f %f %f\n', i, FpreP(i,1)/norm, FpreP(i,2)/norm, FpreP(i,3)/norm, FpreP(i,4)/norm);
end
factor 1: 0.011629 0.221646 0.188523 0.578202
factor 2: 0.072062 0.877075 0.789386 -0.738523
factor 3: -0.024313 -5.881991 6.829341 0.076962
% its not so clear to me that these factors are helpful,
% because the do not have a 1:1 correspondence to sources
% A map of the factor loading show a clear
% spatial pattern with two peaks, which we can assume
% to be the locations of two factories that are
% emitting the pollutant.
gda_draw(' ', map1,'caption C1',' ',map2,'caption C2',' ',map3,'caption C3');
fprintf('Caption: Maps of the first three factor loadings\n');
Caption: Maps of the first three factor loadings
% measuring the locations of the two sources by eye gives indices
% measuring a point far from either source gives index
% An informative quantity is the sample closest to
% each of the point sources, and furthest away from both.
% We use SpreP and not Sobs, because we hope the former has
% rejected some of the noise present in the latter
s1 = SpreP( jofixiy(kx1,ky1), :);
s2 = SpreP( jofixiy(kx2,ky2), :);
s3 = SpreP( jofixiy(kx3,ky3), :);
% these give some sense of the composition of the sources
fprintf('Samples at three chosen points\n');
Samples at three chosen points
fprintf('s1 %f %f %f %f\n', s1(1), s1(2), s1(3), s1(4) );
s1 0.010909 0.213858 0.181341 0.593891
fprintf('s2 %f %f %f %f\n', s2(1), s2(2), s2(3), s2(4) );
s2 0.013025 0.236843 0.202341 0.547791
fprintf('s3 %f %f %f %f\n', s3(1), s3(2), s3(3), s3(4) );
s3 0.011713 0.222514 0.189418 0.576354
% perhaps even a better estimate of sources 1 and 2
% is constructed by subtracting the background 3
% We use SpreP and not Sobs, because we hope the former has
% rejected some of the noise present in the latter
p1 = SpreP( jofixiy(kx1,ky1), :) - SpreP( jofixiy(kx3,ky3), :);
p2 = SpreP( jofixiy(kx2,ky2), :) - SpreP( jofixiy(kx3,ky3), :);
fprintf('Best estimate of the two point sources\n');
Best estimate of the two point sources
fprintf('p1 %f %f %f %f\n', p1(1), p1(2), p1(3), p1(4) );
p1 0.010291 0.207207 0.175135 0.607368
fprintf('p2 %f %f %f %f\n', p2(1), p2(2), p2(3), p2(4) );
p2 0.018159 0.292915 0.252912 0.436014
% makes data for example for
% Methods Summary 3, Bootstrap Confidence Intervals
% note: filename currenlty mypulses.txt; change back to pulses.txt
% to overwrite data used in analysi script
f = Df*[0:Nf-1,-(Nf-2:-1:1)]';
p1t = exp(-pi*abs(f)*tstar1);
p1true = circshift(p1true,Nt/2);
p1obs = p1true+random('Normal',0,sigmap,Nt,1);
p2t = exp(-pi*abs(f)*tstar2);
p2true = circshift(p2true,Nt/2);
p2obs = p2true+random('Normal',0,sigmap,Nt,1);
% minimum phase pulses with this spectrum
[z,p1obsmp] = rceps(p1obs);
[z,p2obsmp] = rceps(p2obs);
% discard near-zero imaginary part
axis( [t(1), t(end), -0.1*pmax, 1.1*pmax] );
plot( t, p1obsmp, 'k-', 'LineWidth', 2 );
plot( t, p2obsmp, 'r-', 'LineWidth', 2 );
fprintf('Caption: time series 1 (black) and time series 2 (red)\n');
Caption: time series 1 (black) and time series 2 (red)
s1obsmp = abs(Dt*fft(p1obsmp));
logs1obsmp = log(abs(s1obsmp));
s2obsmp = abs(Dt*fft(p2obsmp));
logs2obsmp = log(abs(s2obsmp));
Nfs = floor(3*Nf/4); % max freq saved
% plot log amplitude spectra of time series
smax = max([s1obsmp;s2obsmp]);
axis( [0, fny, 0, smax+1] );
plot( f(1:Nfs), s1obsmp(1:Nfs), 'k-', 'LineWidth', 2 );
plot( f(1:Nfs), s2obsmp(1:Nfs), 'r-', 'LineWidth', 2 );
fprintf('Caption: Plot of spectral amplitude of time series 1 (black) and time series 2 (red)\n');
Caption: Plot of spectral amplitude of time series 1 (black) and time series 2 (red)
% plot log amplitude spectra of time series
smax = max([logs1obsmp;logs2obsmp]);
axis( [0, fny, smax-7, smax+1] );
plot( f(1:Nfs), logs1obsmp(1:Nfs), 'k-', 'LineWidth', 2 );
plot( f(1:Nfs), logs2obsmp(1:Nfs), 'r-', 'LineWidth', 2 );
fprintf('Caption: Log plot of spectral amplitude of time seroes 1 (black) and time series 2 (red)\n');
Caption: Log plot of spectral amplitude of time seroes 1 (black) and time series 2 (red)
dlmwrite('../data/mypulses.txt',[f(1:Nfs),s1obsmp(1:Nfs), s2obsmp(1:Nfs)],'delimiter','\t');