Untitled

Live Script edama_04

This Live Script supports Chapter 4 of Environmental Data Analysis with MATLAB or PYTHON, Third Edition, by WIlliam Menke

The script naming system has the form edama_CC_SS, where CC is the chapter number and SS is the script number.

% edama_04_01: data kernel for straight-line linear model
% di = m1 + m2 *xi
clear all;  % clears previously-defined variables
% performing a clear at the top of a LiveSript is a good idea
% standrd set-up for an x-axis
N = 10;
Dx=1.0;
x = Dx*[0:N-1]';
% manually set the columns of G
M=2;            % number of columns
G=zeros(N,M);   % create G of all zeroes
G(:,1)=1;       % column 1 of ones
G(:,2)=x;       % column 2 of x's
G
G = 
     1     0
     1     1
     1     2
     1     3
     1     4
     1     5
     1     6
     1     7
     1     8
     1     9

% eda0ma_4_02: data kernel for quadratic curve linear model
% di = m1 + m2*xi + m3*xi
% standrd set-up for an x-axis
N = 10;
Dx=1.0;
x = Dx*[0:N-1]';
% manually set the columns of G
M=3;             % number of columns
G=zeros(N,M);    % create G of all zeroes
G(:,1)=1;        % column 1 of ones
G(:,2)=x;        % column 2 of x's
G(:,3)=x.^2;     % column 3 of x-squared
G
G = 
     1     0     0
     1     1     1
     1     2     4
     1     3     9
     1     4    16
     1     5    25
     1     6    36
     1     7    49
     1     8    64
     1     9    81

% edama_04_03: data kernel for polynomial linear model

% di = m1 + m2 *xi + m2 * x^2 + ...

% standard setup for x-axis

N = 32;

Dx=1/(N-1);

x = Dx*[0:N-1]'; % x defined on 0 to 1

M=N; % G is square

G=zeros(N,M); % create a G containing zeroes

G(:,1) = 1; % handle first column individually

for i = [2:M] % loop over remaining columns

G(:,i) = x .^ (i-1); % column is x raised to (i-1) power

end

eda_draw(G,'caption G');

% edama_04_04: data kernel for Fourier sum linear model

% standard setup for x-axis

N = 32;

Dx=1/(N-1);

x = Dx*[0:N-1]'; % x defined on 0 to 1

xmax=x(N);

M=N-2; % G not square, since I omit the lambda=infinity case here

% wavelengths lambda

Mo2=M/2; % variable equal to M/2

lambda= 2*xmax ./ [1:Mo2]';

G=zeros(N,M); % create G of zeroes

for i = [1:Mo2] % loop over (cos, sin) pairs

ic = 2*i-1; % column for cos( ... lambda(i) )

is = 2*i; % column for sin( ... lambda(i) )

G(:,ic) = cos( 2*pi*x/lambda(i) ); % set cosine column

G(:,is) = sin( 2*pi*x/lambda(i) ); % set sine column

end

eda_draw(G,'caption G');

% edama_04_05: data kernel viewed as a concatenation of columns

% standard setup for x-axis

N = 32;

Dx=1/(N-1);

x = Dx*[0:N-1]'; % x defined on 0 to 1

xmax=x(N);

M=N-2; % G not square, since I omit the lambda=infinity case here

Mo2=M/2; % variable for M/2

% largest wavelenth is 2*xmax

% subsequent wavelengths get smaller

lambda= 2*xmax ./ [1:Mo2]';

G=zeros(N,M); % create G of zeroes

for i = [1:Mo2] % loop over (cos, sin) pairs

ic = 2*i-1; % column for cos( ... lambda(i) )

is = 2*i; % column for sin( ... lambda(i) )

G(:,ic) = cos( 2*pi*x/lambda(i) ); % set cosine column

G(:,is) = sin( 2*pi*x/lambda(i) ); % set sine column

end

% foor display purposes, break out columns of G into vectors gi

g1 = G(:,1);

g2 = G(:,2);

g3 = G(:,3);

g4 = G(:,4);

gm = G(:,M);

eda_draw(G,'caption Gm',' ',g1,'caption g1',' ',g2,'caption g2',' ',g3,'caption g3',' ',g4,'caption g4', '...',' ',gm,'caption gM');

% edama_04_06: data kernels for weigthed average linear model

% note: this implementation requires an odd number of weights

% bud does not require them to be symmetric

N = 32;

M=N; % G is square

% three point average

w = [1, 2, 1]'; % three point weighted average

La = length(w); % length of w

Lw = floor(La/2)+1; % position middle weight

n = sum(w); % sum of weights

w = w/n; % normalized weights

wf = flipud(w); % weights, flipped upside-down

c = zeros(N,1); % initialize left column

r = zeros(M,1); % initialize top row

c(1:Lw)=wf(Lw:La); % copy weights to left column

r(1:Lw)=w(Lw:La); % copy weight to top row

G = toeplitz(c,r); % create data kernel

G3 = G; % save for plotting puposes

% give point average

w = [1, 2, 3, 2, 1]'; % three point weighted average

La = length(w); % length of w

Lw = floor(La/2)+1; % position middle weight

n = sum(w); % sum of weights

w = w/n; % normalized weights

wf = flipud(w); % weights, flipped upside-down

c = zeros(N,1); % initialize left column

r = zeros(M,1); % initialize top row

c(1:Lw)=wf(Lw:La); % copy weights to left column

r(1:Lw)=w(Lw:La); % copy weight to top row

G = toeplitz(c,r); % create data kernel

G5 = G; % save for plotting puposes

% nine point average

w = [1, 2, 3, 4, 5, 4, 3, 2, 1]'; % three point weighted average

La = length(w); % length of w

Lw = floor(La/2)+1; % position middle weight

n = sum(w); % sum of weights

w = w/n; % normalized weights

wf = flipud(w); % weights, flipped upside-down

c = zeros(N,1); % initialize left column

r = zeros(M,1); % initialize top row

c(1:Lw)=wf(Lw:La); % copy weights to left column

r(1:Lw)=w(Lw:La); % copy weight to top row

G = toeplitz(c,r); % create data kernel

G9= G; % save for plotting puposes

eda_draw(' ',G3,'caption G-3',' ',G5,'caption G-5',' ',G9,'caption G-9');

% eda04_07: data plot with error bars

% reads in noisy, linear data from file

% and computes and plots prediction error for a particular

% choice of intercept and slope, m1 and m2

% model paramerters are intercept and slope

M=2;

mest=[1.1, 2.3]'; % model estimate

% load (x,dobs) data from file

% made with Excel using 1+2*x+0.5*(rand()-1)

T = load('../Data/linedata01.txt');

% now independently name the two columns of data

x = T(:,1); % auxiliary varible

dobs = T(:,2); % observed data

N = length(dobs);

% plot data as black circles

figure(1);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

set(gca,'LineWidth',2);

set(gca,'FontSize',14);

axis([-6 6 -15 15]);

plot(x,dobs,'ko','LineWidth',2);

xlabel('x');

ylabel('d');

titlestr = sprintf('intercept %.2f slope %.2f', mest(1), mest(2) );

title(titlestr);

% data kernel for straight-line case

G=zeros(N,M);

G(:,1)=1;

G(:,2)=x;

% compute predicted data and errors

dpre = G*mest; % predicted data

e=dobs-dpre; % individual errors

E = e'*e; % total error

% compute and plot predicted data

plot(x,dpre,'k-','LineWidth',2);

% compute and plot error

for i = [1:N]

y1 = dobs(i);

y2 = dobs(i)-e(i);

plot( [x(i), x(i)], [y1, y2], 'k:' ,'LineWidth',2 );

end

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

set(gca,'LineWidth',2);

set(gca,'FontSize',14);

axis([-6 6 -5 5]);

plot(x,e,'ko','LineWidth',2);

plot([-6,6]',[0,0]','k:','LineWidth',1);

for i = [1:N]

plot( [x(i), x(i)], [0, e(i)], 'k:' ,'LineWidth',2 );

end

xlabel('x');

ylabel('e');

fprintf('error E = %.2f\n',E);

error E = 32.42

% edama_04_08: grid search for intercept and slope

% of trend line fitted to noisy data

% load (x,dobs) data from file

T = load('../Data/linedata01.txt');

% now independently name the two columns of data

x = T(:,1);

dobs = T(:,2);

N = length(dobs);

M = 2;

% data kernel for straight line case

G=zeros(N,M);

G(:,1)=1;

G(:,2)=x;

% define (m1, m2) grid

L1=101; L2=101;

m1min=0; m1max=4;

m2min=0; m2max=4;

m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);

m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);

% evaluate error at each grid point

E=zeros(L1,L2);

for i = [1:L1] % loop over m1's

for j = [1:L2] % loop over m2's

mest = [ m1(i), m2(j) ]'; % put m's into vector form

dpre = G*mest; % predicted data

e = dobs-dpre; % prediction error

E(i,j) = e'*e; % total error

end

% best estimate is the one with smallest error

% find the (i,j) for which E(i,j) is minimum

[Etemp, k] = min(E);

[Emin, j] = min(Etemp);

i=k(j);

% estimated model parameters

m1est = m1(i);

m2est = m2(j);

mest = [m1est,m2est]';

% note: display log of error for better range of grey tones

eda_draw(' ',log(E),'caption E(m1,m2)');

dpre = G*mest; % predicted data

e = dobs-dpre; % individual errors

E = e'*e; % total error

fprintf('Emin: %.2f\n', Emin );

Emin: 23.50

fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );

m1true:  1.00   m2true:   2.00

fprintf('m1est: %.2f m2est: %.2f\n', m1est, m2est );

m1est:   0.96   m2est:    2.00

% plot results

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

set(gca,'LineWidth',2);

set(gca,'FontSize',14);

axis([-6 6 -15 15]);

plot(x,dobs,'ko','LineWidth',2);

for i = [1:N]

plot( [x(i), x(i)], [dobs(i), dpre(i)], 'k:' ,'LineWidth',2 );

end

plot(x,dpre,'k-','LineWidth',2);

xlabel('x');

ylabel('d');

titlestr = sprintf('intercept %.2f slope %.2f', mest(1), mest(2) );

title(titlestr);

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

set(gca,'LineWidth',2);

set(gca,'FontSize',14);

axis([-6 6 -5 5]);

plot(x,e,'ko','LineWidth',2);

plot([-6,6]', [0,0]','k-','LineWidth',1);

for i = [1:N]

plot( [x(i), x(i)], [0, e(i)], 'k:' ,'LineWidth',2 );

end

xlabel('x');

ylabel('e');

% eda04_09: grid search with shifted x values

% load (x,dobs) data from file

T = load('../Data/linedata01.txt');

% now independently name the two columns of data

x = T(:,1);

dobs = T(:,2);

N = length(dobs);

M = 2;

% this code does a grod search three times

% Step 1: shift x values

xplus=x+2;

% unshifted: d = m1' + m2' * x

% shifted: d = m1 + m2 * (x+2)

% = (m1+ 2*m2) + m2*x

% so m1' = (m1+ 2*m2) and m2'=m2

% data kerner for straight line case

G=zeros(N,M);

G(:,1)=1;

G(:,2)=xplus;

% define the (m1, m2) grid

L1=100; L2=100;

m1min=-10; m1max=10;

m2min=-10; m2max=10;

m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);

m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);

% evaluate the error at each grid point

Eplus=zeros(L1,L2);

for i = [1:L1] % loop over m1's

for j = [1:L2] % loop over m2's

mest = [ m1(i), m2(j) ]'; % put m's into vector form

dpre = G*mest; % predicted data

e = dobs-dpre; % individual errors

Eplus(i,j) = e'*e; % total error

end

% best estimate is the one with smallest error

% find the (i,j) for which E(i,j) is minimum

[Etemp, k] = min(Eplus);

[Emin, j] = min(Etemp);

i=k(j);

% estmated model parameters

m1est = m1(i);

m2est = m2(j);

fprintf('shift: x+2' );

shift: x+2

fprintf('Emin: %.2f\n', Emin );

Emin: 24.56

fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );

m1true:  1.00   m2true:   2.00

fprintf('m1est: %.2f m2est: %.2f\n', m1est+2*m2est, m2est );

m1est:   0.91   m2est:    1.92

% Step 2: no shift

x0=x;

% define (m1, m2) grid

L1=100; L2=100;

m1min=-10; m1max=10;

m2min=-10; m2max=10;

m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);

m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);

% data kerner for straight line case

G=zeros(N,M);

G(:,1)=1;

G(:,2)=x0;

% evaluate error at each grid point

E0=zeros(L1,L2);

for i = [1:L1] % loop over m1's

for j = [1:L2] % loop over m2's

mest = [ m1(i), m2(j) ]'; % put m's into vector form

dpre = G*mest; % predicted data

e = dobs-dpre; % prediction error

E0(i,j) = e'*e; % total error

end

% search grid for minimum E

[Etemp, k] = min(E0);

[Emin, j] = min(Etemp);

i=k(j);

m1est = m1(i);

m2est = m2(j);

fprintf('no shift' );

no shift

fprintf('Emin: %.2f\n', Emin );

Emin: 24.56

fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );

m1true:  1.00   m2true:   2.00

fprintf('m1est: %.2f m2est: %.2f\n', m1est+2*m2est, m2est );

m1est:   4.75   m2est:    1.92

% Step 3: shift x values the other way

xminus=x-2;

% define (m1, m2) grid

L1=100; L2=100;

m1min=-10; m1max=10;

m2min=-10; m2max=10;

m1=m1min+(m1max-m1min)*[0:L1-1]'/(L1-1);

m2=m2min+(m2max-m2min)*[0:L2-1]'/(L2-1);

% data kerner for straight line case

G=zeros(N,M);

G(:,1)=1;

G(:,2)=xminus;

% evaluate error at each grid point

Eminus=zeros(L1,L2);

for i = [1:L1] % loop over m1's

for j = [1:L2] % loop over m2's

mest = [ m1(i), m2(j) ]'; % put m's into vector form

dpre = G*mest; % predicted data

e = dobs-dpre; % prediction error

Eminus(i,j) = e'*e; % total error

end

% search grid for minimum E

[Etemp, k] = min(Eminus);

[Emin, j] = min(Etemp);

i=k(j);

m1est = m1(i);

m2est = m2(j);

fprintf('shift: x-2' );

shift: x-2

fprintf('Emin: %.2f\n', Emin );

Emin: 24.56

fprintf('m1true: %.2f m2true: %.2f\n', 1, 2 );

m1true:  1.00   m2true:   2.00

fprintf('m1est: %.2f m2est: %.2f\n', m1est+2*m2est, m2est );

m1est:   8.59   m2est:    1.92

eda_draw(' ',log(Eplus),'caption E+', ' ',log(E0),'caption E0', ' ',log(Eminus),'caption E-');

% edama_04_10: least-squares solution

% for straight line linear model

% reads in noisy, linear data from file

% and computes and plots predicted data

% using least-squares estimate of intercept

% and slope, m1 and m2

% load file of noisy data

T = load('../Data/linedata01.txt');

% now independently name the two columns of data

x = T(:,1);

dobs = T(:,2);

N = length(dobs);

M=2;

% data kernel for straight line case

G=zeros(N,M);

G(:,1)=1;

G(:,2)=x;

% standard least squares solution

GTG = G'*G;

mest = GTG\(G'*dobs); % least squares estimate of model parameters

% quantities derived from least squares solution

dpre = G*mest; % predicted data

e=dobs-dpre; % individual errors

E = e'*e; % total error

sigmad2 = E/(N-M); % posterior variance of data

Cm = sigmad2*inv(GTG); % covariance of model parameters

sigmam1 = sqrt( Cm(1,1) ); % sqrt variance of model par 1

sigmam2 = sqrt( Cm(2,2) ); % sqrt variance of model par 2

fprintf('total error E: %.2f\n', E);

total error E: 23.46

fprintf("intercept %.2f +/- %.2f (%%95)\n", mest(1), 2*sigmam1);

intercept 0.94 +/- 0.97 (%95)

fprintf("slope t %.2f +/- %.2f (%%95)\n", mest(2), 2*sigmam2);

slope     t 2.02 +/- 0.31 (%95)

% plot data as black circles

figure(1);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

axis([-6 6 -15 15]);

hold on;

plot(x,dobs,'ko','LineWidth',2);

plot(x,dpre,'k-','LineWidth',2);

for i = [1:N]

plot([x(i),x(i)]', [dobs(i),dpre(i)]', 'k:');

end

xlabel('x');

ylabel('d');

titlestr = sprintf('intercept %.2f slope %.2f', mest(1), mest(2) );

title(titlestr);

subplot(2,1,2);

set(gca,'LineWidth',2);

axis([-6 6 -5 5]);

hold on;

plot(x,e,'ko','LineWidth',2);

plot([-6,6]', [0,0]','k-','LineWidth',1);

for i = [1:N]

plot([x(i),x(i)]', [0,e(i)]', 'k:');

end

xlabel('x');

ylabel('e');

% eda04_11: trend of Black Rock Forest temperature data

% read the Black Rock Forest Temperature data

Draw=load('../Data/brf_temp.txt');

traw=Draw(:,1); % time (auxiliary variable)

draw=Draw(:,2); % temperature (data)

Nraw=length(draw);

% select only good data

% find() returns a list of indices of data that pass the test

k = find( (draw~=0) & (draw>-40) & (draw<38) );

% select these indices from the raw data

t=traw(k);

d=draw(k);

N=length(d); % length of good data

% set up the data kernel

Ty=365.25; % number of days per year

M=4; % number of model parameters

G=zeros(N,M); % initialize data kernel to zero

G(:,1)=1; % constant in time, m1

G(:,2)=t; % linear in time, with slope m2

G(:,3)=cos(2*pi*t/Ty); % cosine in time, period of 1 year, m3

G(:,4)=sin(2*pi*t/Ty); % sine in time, period of 1 year, m4

% predict data

GTG = G'*G;

mest = GTG\(G'*d); % standard least squares

dpre = G*mest; % predicted data

e = d - dpre; % individual errors

E = e'*e; % total error

% plot the data

figure(1);

clf;

subplot(3,1,1);

set(gca,'LineWidth',2);

hold on;

axis( [0, 5000, -40, 40] );

plot(t,d,'k-','LineWidth',2);

xlabel('time, days');

ylabel('obs temp, C');

% plot prediction

subplot(3,1,2);

set(gca,'LineWidth',2);

hold on;

axis( [0, 5000, -40, 40] );

plot(t,dpre,'k-','LineWidth',2);;

xlabel('time, days');

ylabel('pre temp, C')

% plot error

subplot(3,1,3);

set(gca,'LineWidth',2);

hold on;

axis( [0, 5000, -40, 40] );

plot(t,e,'k-','LineWidth',2);

xlabel('time, days');

ylabel('error, C')

% confidence interval for slope

sigmad2A = (0.01)^2; % prior error, 0.01 deg C

sigmad2B = E/(N-M); % posterior error, from fit

GTGI = inv(GTG);

CmA = sigmad2A*GTGI; % covariance of model parameters, using prior error in data

CmB = sigmad2B*GTGI; % covariance of model parameters, using posterior error in data

slope = Ty*mest(2); % slope, converted to units of deg C per year

errslopeA = Ty*sqrt(CmA(2,2)); % error in slope (prior case) in deg C per year

errslopeB = Ty*sqrt(CmB(2,2)); % error in slope (posterior case) in deg C per year

fprintf("using prior error: slope %e +/- %e deg C per year (95%%)\n", slope, 2*errslopeA );

using prior error:     slope -3.163630e-02 +/- 1.626957e-05 deg C per year (95%)

fprintf("using posterior error: slope %e +/- %e deg C per year (95%%)\n", slope, 2*errslopeB );

using posterior error: slope -3.163630e-02 +/- 9.143627e-03 deg C per year (95%)