Live Script edama_04
This Live Script supports Chapter 4 of Environmental Data Analysis with MATLAB or PYTHON, Third Edition, by WIlliam Menke
The script naming system has the form edama_CC_SS, where CC is the chapter number and SS is the script number.
% edama_04_01: data kernel for straight-line linear model
% di = m1 + m2 *xi
clear all; % clears previously-defined variables
% performing a clear at the top of a LiveSript is a good idea
% standrd set-up for an x-axis
N = 10;
Dx=1.0;
x = Dx*[0:N-1]';
% manually set the columns of G
M=2; % number of columns
G=zeros(N,M); % create G of all zeroes
G(:,1)=1; % column 1 of ones
G(:,2)=x; % column 2 of x's
G
% eda0ma_4_02: data kernel for quadratic curve linear model
% di = m1 + m2*xi + m3*xi
% standrd set-up for an x-axis
N = 10;
Dx=1.0;
x = Dx*[0:N-1]';
% manually set the columns of G
M=3; % number of columns
G=zeros(N,M); % create G of all zeroes
G(:,1)=1; % column 1 of ones
G(:,2)=x; % column 2 of x's
G(:,3)=x.^2; % column 3 of x-squared
G
% edama_04_03: data kernel for polynomial linear model
% di = m1 + m2 *xi + m2 * x^2 + ...
% standard setup for x-axis
N = 32;
Dx=1/(N-1);
x = Dx*[0:N-1]'; % x defined on 0 to 1
M=N; % G is square
G=zeros(N,M); % create a G containing zeroes
G(:,1) = 1; % handle first column individually
for i = [2:M] % loop over remaining columns
G(:,i) = x .^ (i-1); % column is x raised to (i-1) power
end
eda_draw(G,'caption G');
% edama_04_04: data kernel for Fourier sum linear model
% standard setup for x-axis
N = 32;
Dx=1/(N-1);
x = Dx*[0:N-1]'; % x defined on 0 to 1
xmax=x(N);
M=N-2; % G not square, since I omit the lambda=infinity case here
% wavelengths lambda
Mo2=M/2; % variable equal to M/2
lambda= 2*xmax ./ [1:Mo2]';
G=zeros(N,M); % create G of zeroes
for i = [1:Mo2] % loop over (cos, sin) pairs
ic = 2*i-1; % column for cos( ... lambda(i) )
is = 2*i; % column for sin( ... lambda(i) )
G(:,ic) = cos( 2*pi*x/lambda(i) ); % set cosine column
G(:,is) = sin( 2*pi*x/lambda(i) ); % set sine column
end
eda_draw(G,'caption G');
% edama_04_05: data kernel viewed as a concatenation of columns
% standard setup for x-axis
N = 32;
Dx=1/(N-1);
x = Dx*[0:N-1]'; % x defined on 0 to 1
xmax=x(N);
M=N-2; % G not square, since I omit the lambda=infinity case here
Mo2=M/2; % variable for M/2
% largest wavelenth is 2*xmax
% subsequent wavelengths get smaller
lambda= 2*xmax ./ [1:Mo2]';
G=zeros(N,M); % create G of zeroes
for i = [1:Mo2] % loop over (cos, sin) pairs
ic = 2*i-1; % column for cos( ... lambda(i) )
is = 2*i; % column for sin( ... lambda(i) )
G(:,ic) = cos( 2*pi*x/lambda(i) ); % set cosine column
G(:,is) = sin( 2*pi*x/lambda(i) ); % set sine column
end
% foor display purposes, break out columns of G into vectors gi
g1 = G(:,1);
g2 = G(:,2);
g3 = G(:,3);
g4 = G(:,4);
gm = G(:,M);
eda_draw(G,'caption Gm',' ',g1,'caption g1',' ',g2,'caption g2',' ',g3,'caption g3',' ',g4,'caption g4', '...',' ',gm,'caption gM');
% edama_04_06: data kernels for weigthed average linear model
% note: this implementation requires an odd number of weights
% bud does not require them to be symmetric
N = 32;
M=N; % G is square
% three point average
w = [1, 2, 1]'; % three point weighted average
La = length(w); % length of w
Lw = floor(La/2)+1; % position middle weight
n = sum(w); % sum of weights
w = w/n; % normalized weights
wf = flipud(w); % weights, flipped upside-down
c = zeros(N,1); % initialize left column
r = zeros(M,1); % initialize top row
c(1:Lw)=wf(Lw:La); % copy weights to left column
r(1:Lw)=w(Lw:La); % copy weight to top row
G = toeplitz(c,r); % create data kernel
G3 = G; % save for plotting puposes
% give point average
w = [1, 2, 3, 2, 1]'; % three point weighted average
La = length(w); % length of w
Lw = floor(La/2)+1; % position middle weight
n = sum(w); % sum of weights
w = w/n; % normalized weights
wf = flipud(w); % weights, flipped upside-down
c = zeros(N,1); % initialize left column
r = zeros(M,1); % initialize top row
c(1:Lw)=wf(Lw:La); % copy weights to left column
r(1:Lw)=w(Lw:La); % copy weight to top row
G = toeplitz(c,r); % create data kernel
G5 = G; % save for plotting puposes
% nine point average
w = [1, 2, 3, 4, 5, 4, 3, 2, 1]'; % three point weighted average
La = length(w); % length of w
Lw = floor(La/2)+1; % position middle weight
n = sum(w); % sum of weights
w = w/n; % normalized weights
wf = flipud(w); % weights, flipped upside-down
c = zeros(N,1); % initialize left column
r = zeros(M,1); % initialize top row
c(1:Lw)=wf(Lw:La); % copy weights to left column
r(1:Lw)=w(Lw:La); % copy weight to top row
G = toeplitz(c,r); % create data kernel
G9= G; % save for plotting puposes
eda_draw(' ',G3,'caption G-3',' ',G5,'caption G-5',' ',G9,'caption G-9');
% eda04_07: data plot with error bars
% reads in noisy, linear data from file
% and computes and plots prediction error for a particular
% choice of intercept and slope, m1 and m2
% model paramerters are intercept and slope
M=2;
mest=[1.1, 2.3]'; % model estimate
% load (x,dobs) data from file
% made with Excel using 1+2*x+0.5*(rand()-1)
T = load('../Data/linedata01.txt');
% now independently name the two columns of data
x = T(:,1); % auxiliary varible
dobs = T(:,2); % observed data
N = length(dobs);
% plot data as black circles
figure(1);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis([-6 6 -15 15]);
plot(x,dobs,'ko','LineWidth',2);
xlabel('x');
ylabel('d');
titlestr = sprintf('intercept %.2f slope %.2f', mest(1), mest(2) );
title(titlestr);
% data kernel for straight-line case
G=zeros(N,M);
G(:,1)=1;
G(:,2)=x;
% compute predicted data and errors
dpre = G*mest; % predicted data
e=dobs-dpre; % individual errors
E = e'*e; % total error
% compute and plot predicted data
plot(x,dpre,'k-','LineWidth',2);
% compute and plot error
for i = [1:N]
y1 = dobs(i);
y2 = dobs(i)-e(i);
plot( [x(i), x(i)], [y1, y2], 'k:' ,'LineWidth',2 );
end
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis([-6 6 -5 5]);
plot(x,e,'ko','LineWidth',2);
plot([-6,6]',[0,0]','k:','LineWidth',1);
for i = [1:N]
plot( [x(i), x(i)], [0, e(i)], 'k:' ,'LineWidth',2 );
end
xlabel('x');
ylabel('e');
fprintf('error E = %.2f\n',E);
% edama_04_08: grid search for intercept and slope
% of trend line fitted to noisy data
% load (x,dobs) data from file
T = load('../Data/linedata01.txt');
% now independently name the two columns of data
x = T(:,1);
dobs = T(:,2);
N = length(dobs);
M = 2;
% data kernel for straight line case
G=zeros(N,M);
G(:,1)=1;
G(:,2)=x;
% define (m1, m2) grid
L1=101; L2=101;
m1min=0; m1max=4;
m2min=0; m2max=4;
m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);
m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);
% evaluate error at each grid point
E=zeros(L1,L2);
for i = [1:L1] % loop over m1's
for j = [1:L2] % loop over m2's
mest = [ m1(i), m2(j) ]'; % put m's into vector form
dpre = G*mest; % predicted data
e = dobs-dpre; % prediction error
E(i,j) = e'*e; % total error
end
end
% best estimate is the one with smallest error
% find the (i,j) for which E(i,j) is minimum
[Etemp, k] = min(E);
[Emin, j] = min(Etemp);
i=k(j);
% estimated model parameters
m1est = m1(i);
m2est = m2(j);
mest = [m1est,m2est]';
% note: display log of error for better range of grey tones
eda_draw(' ',log(E),'caption E(m1,m2)');
dpre = G*mest; % predicted data
e = dobs-dpre; % individual errors
E = e'*e; % total error
fprintf('Emin: %.2f\n', Emin );
fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );
fprintf('m1est: %.2f m2est: %.2f\n', m1est, m2est );
% plot results
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis([-6 6 -15 15]);
plot(x,dobs,'ko','LineWidth',2);
for i = [1:N]
plot( [x(i), x(i)], [dobs(i), dpre(i)], 'k:' ,'LineWidth',2 );
end
plot(x,dpre,'k-','LineWidth',2);
xlabel('x');
ylabel('d');
titlestr = sprintf('intercept %.2f slope %.2f', mest(1), mest(2) );
title(titlestr);
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis([-6 6 -5 5]);
plot(x,e,'ko','LineWidth',2);
plot([-6,6]', [0,0]','k-','LineWidth',1);
for i = [1:N]
plot( [x(i), x(i)], [0, e(i)], 'k:' ,'LineWidth',2 );
end
xlabel('x');
ylabel('e');
% eda04_09: grid search with shifted x values
% load (x,dobs) data from file
T = load('../Data/linedata01.txt');
% now independently name the two columns of data
x = T(:,1);
dobs = T(:,2);
N = length(dobs);
M = 2;
% this code does a grod search three times
% Step 1: shift x values
xplus=x+2;
% unshifted: d = m1' + m2' * x
% shifted: d = m1 + m2 * (x+2)
% = (m1+ 2*m2) + m2*x
% so m1' = (m1+ 2*m2) and m2'=m2
% data kerner for straight line case
G=zeros(N,M);
G(:,1)=1;
G(:,2)=xplus;
% define the (m1, m2) grid
L1=100; L2=100;
m1min=-10; m1max=10;
m2min=-10; m2max=10;
m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);
m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);
% evaluate the error at each grid point
Eplus=zeros(L1,L2);
for i = [1:L1] % loop over m1's
for j = [1:L2] % loop over m2's
mest = [ m1(i), m2(j) ]'; % put m's into vector form
dpre = G*mest; % predicted data
e = dobs-dpre; % individual errors
Eplus(i,j) = e'*e; % total error
end
end
% best estimate is the one with smallest error
% find the (i,j) for which E(i,j) is minimum
[Etemp, k] = min(Eplus);
[Emin, j] = min(Etemp);
i=k(j);
% estmated model parameters
m1est = m1(i);
m2est = m2(j);
fprintf('shift: x+2' );
fprintf('Emin: %.2f\n', Emin );
fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );
fprintf('m1est: %.2f m2est: %.2f\n', m1est+2*m2est, m2est );
% Step 2: no shift
x0=x;
% define (m1, m2) grid
L1=100; L2=100;
m1min=-10; m1max=10;
m2min=-10; m2max=10;
m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);
m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);
% data kerner for straight line case
G=zeros(N,M);
G(:,1)=1;
G(:,2)=x0;
% evaluate error at each grid point
E0=zeros(L1,L2);
for i = [1:L1] % loop over m1's
for j = [1:L2] % loop over m2's
mest = [ m1(i), m2(j) ]'; % put m's into vector form
dpre = G*mest; % predicted data
e = dobs-dpre; % prediction error
E0(i,j) = e'*e; % total error
end
end
% search grid for minimum E
[Etemp, k] = min(E0);
[Emin, j] = min(Etemp);
i=k(j);
m1est = m1(i);
m2est = m2(j);
fprintf('no shift' );
fprintf('Emin: %.2f\n', Emin );
fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );
fprintf('m1est: %.2f m2est: %.2f\n', m1est+2*m2est, m2est );
% Step 3: shift x values the other way
xminus=x-2;
% define (m1, m2) grid
L1=100; L2=100;
m1min=-10; m1max=10;
m2min=-10; m2max=10;
m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);
m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);
% data kerner for straight line case
G=zeros(N,M);
G(:,1)=1;
G(:,2)=xminus;
% evaluate error at each grid point
Eminus=zeros(L1,L2);
for i = [1:L1] % loop over m1's
for j = [1:L2] % loop over m2's
mest = [ m1(i), m2(j) ]'; % put m's into vector form
dpre = G*mest; % predicted data
e = dobs-dpre; % prediction error
Eminus(i,j) = e'*e; % total error
end
end
% search grid for minimum E
[Etemp, k] = min(Eminus);
[Emin, j] = min(Etemp);
i=k(j);
m1est = m1(i);
m2est = m2(j);
fprintf('shift: x-2' );
fprintf('Emin: %.2f\n', Emin );
fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );
fprintf('m1est: %.2f m2est: %.2f\n', m1est+2*m2est, m2est );
eda_draw(' ',log(Eplus),'caption E+', ' ',log(E0),'caption E0', ' ',log(Eminus),'caption E-');
% edama_04_10: least-squares solution
% for straight line linear model
% reads in noisy, linear data from file
% and computes and plots predicted data
% using least-squares estimate of intercept
% and slope, m1 and m2
% load file of noisy data
T = load('../Data/linedata01.txt');
% now independently name the two columns of data
x = T(:,1);
dobs = T(:,2);
N = length(dobs);
M=2;
% data kernel for straight line case
G=zeros(N,M);
G(:,1)=1;
G(:,2)=x;
% standard least squares solution
GTG = G'*G;
mest = GTG\(G'*dobs); % least squares estimate of model parameters
% quantities derived from least squares solution
dpre = G*mest; % predicted data
e=dobs-dpre; % individual errors
E = e'*e; % total error
sigmad2 = E/(N-M); % posterior variance of data
Cm = sigmad2*inv(GTG); % covariance of model parameters
sigmam1 = sqrt( Cm(1,1) ); % sqrt variance of model par 1
sigmam2 = sqrt( Cm(2,2) ); % sqrt variance of model par 2
fprintf('total error E: %.2f\n', E);
fprintf("intercept %.2f +/- %.2f (%%95)\n", mest(1), 2*sigmam1);
fprintf("slope t %.2f +/- %.2f (%%95)\n", mest(2), 2*sigmam2);
% plot data as black circles
figure(1);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
axis([-6 6 -15 15]);
hold on;
plot(x,dobs,'ko','LineWidth',2);
plot(x,dpre,'k-','LineWidth',2);
for i = [1:N]
plot([x(i),x(i)]', [dobs(i),dpre(i)]', 'k:');
end
xlabel('x');
ylabel('d');
titlestr = sprintf('intercept %.2f slope %.2f', mest(1), mest(2) );
title(titlestr);
subplot(2,1,2);
set(gca,'LineWidth',2);
axis([-6 6 -5 5]);
hold on;
plot(x,e,'ko','LineWidth',2);
plot([-6,6]', [0,0]','k-','LineWidth',1);
for i = [1:N]
plot([x(i),x(i)]', [0,e(i)]', 'k:');
end
xlabel('x');
ylabel('e');
% eda04_11: trend of Black Rock Forest temperature data
% read the Black Rock Forest Temperature data
Draw=load('../Data/brf_temp.txt');
traw=Draw(:,1); % time (auxiliary variable)
draw=Draw(:,2); % temperature (data)
Nraw=length(draw);
% select only good data
% find() returns a list of indices of data that pass the test
k = find( (draw~=0) & (draw>-40) & (draw<38) );
% select these indices from the raw data
t=traw(k);
d=draw(k);
N=length(d); % length of good data
% set up the data kernel
Ty=365.25; % number of days per year
M=4; % number of model parameters
G=zeros(N,M); % initialize data kernel to zero
G(:,1)=1; % constant in time, m1
G(:,2)=t; % linear in time, with slope m2
G(:,3)=cos(2*pi*t/Ty); % cosine in time, period of 1 year, m3
G(:,4)=sin(2*pi*t/Ty); % sine in time, period of 1 year, m4
% predict data
GTG = G'*G;
mest = GTG\(G'*d); % standard least squares
dpre = G*mest; % predicted data
e = d - dpre; % individual errors
E = e'*e; % total error
% plot the data
figure(1);
clf;
subplot(3,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, 5000, -40, 40] );
plot(t,d,'k-','LineWidth',2);
xlabel('time, days');
ylabel('obs temp, C');
% plot prediction
subplot(3,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, 5000, -40, 40] );
plot(t,dpre,'k-','LineWidth',2);;
xlabel('time, days');
ylabel('pre temp, C')
% plot error
subplot(3,1,3);
set(gca,'LineWidth',2);
hold on;
axis( [0, 5000, -40, 40] );
plot(t,e,'k-','LineWidth',2);
xlabel('time, days');
ylabel('error, C')
% confidence interval for slope
sigmad2A = (0.01)^2; % prior error, 0.01 deg C
sigmad2B = E/(N-M); % posterior error, from fit
GTGI = inv(GTG);
CmA = sigmad2A*GTGI; % covariance of model parameters, using prior error in data
CmB = sigmad2B*GTGI; % covariance of model parameters, using posterior error in data
slope = Ty*mest(2); % slope, converted to units of deg C per year
errslopeA = Ty*sqrt(CmA(2,2)); % error in slope (prior case) in deg C per year
errslopeB = Ty*sqrt(CmB(2,2)); % error in slope (posterior case) in deg C per year
fprintf("using prior error: slope %e +/- %e deg C per year (95%%)\n", slope, 2*errslopeA );
fprintf("using posterior error: slope %e +/- %e deg C per year (95%%)\n", slope, 2*errslopeB );