Live Script edama_08
This Live Script supports Chapter 8 of Environmental Data Analysis with MATLAB or PYTHON, Third Edition, by WIlliam Menke
The script naming system has the form edama_CC_SS, where CC is the chapter number and SS is the script number.
% eda08_01: factors shown on terniary diagram
clear all;
% positions of vertices of terneary diagram
a = [0,0]';
b = [1,0]';
c = [0.5,0.5*sqrt(3)]';
f1 = [ 0.15, 0.25, 0.60]'; f1 = f1/sum(f1); % factor 1
f2 = [ 0.25, 0.45, 0.30]'; f2 = f2/sum(f2); % factor 2
% terniary diagram
figure(1);
clf;
axis( [0, 1, 0, 1] );
axis equal;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
axis off;
hold on;
% bounding triangle
plot( [a(1), b(1)], [a(2), b(2)], 'k-');
plot( [b(1), c(1)], [b(2), c(2)], 'k-');
plot( [c(1), a(1)], [c(2), a(2)], 'k-');
text( a(1)-0.05, a(2), 'A');
text( b(1)+0.05, b(2), 'B');
text( c(1), c(2)+0.05, 'C');
% first set of grid lines
L=10;
for j = [1:L-1]
for k = [1:2]
v1 = j/L;
v2 = (1-v1)*(k-1);
v3 = 1 - (v1+v2);
x(:,k)=v1*a+v2*b+v3*c;
end
plot( [x(1,1), x(1,2)], [x(2,1), x(2,2)], 'k:' );
end
% second set of grid lines
L=10;
for j = [1:L-1]
for k = [1:2]
v3 = j/L;
v1 = (1-v3)*(k-1);
v2 = 1 - (v1+v3);
x(:,k)=v1*a+v2*b+v3*c;
end
plot( [x(1,1), x(1,2)], [x(2,1), x(2,2)], 'k:' );
end
% third set of grid lines
L=10;
for j = [1:L-1]
for k = [1:2]
v2 = j/L;
v1 = (1-v2)*(k-1);
v3 = 1 - (v1+v2);
x(:,k)=v1*a+v2*b+v3*c;
end
plot( [x(1,1), x(1,2)], [x(2,1), x(2,2)], 'k:' );
end
% plot factors
y = f1(1)*a+f1(2)*b+f1(3)*c;
plot( y(1), y(2), 'k*' );
text( y(1), y(2)+0.05, 'f1' );
y = f2(1)*a+f2(2)*b+f2(3)*c;
plot( y(1), y(2), 'k*' );
text( y(1), y(2)-0.05, 'f2' );
% create and plot samples lying between f1 and f2
N=10;
for j=[1:N-1] % loop over samples
% sample s(i) is a linear mix of factors f1 and f2
% with factor loadings c1(i) and c2(i); that is,
% s(i) = c1(i)*f1 + c2(i)*f2
c1=j/N;
c2=1-c1;
s = c1*f1 + c2*f2;
y = s(1)*a+s(2)*b+s(3)*c;
plot( y(1), y(2), 'ko' );
end
% eda08_02: factors shown on terniary diagram
% this is edama_08_01 w/o the grid lines and w/o annotation of factors
% positions of vertices of terneary diagram
a = [0,0]';
b = [1,0]';
c = [0.5,0.5*sqrt(3)]';
f1 = [ 0.15, 0.25, 0.60]'; f1 = f1/sum(f1); % factor 1
f2 = [ 0.25, 0.45, 0.30]'; f2 = f2/sum(f2); % factor 2
% terniary diagram
figure(1);
clf;
axis( [0, 1, 0, 1] );
axis equal;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
axis off;
hold on;
% bounding triangle
plot( [a(1), b(1)], [a(2), b(2)], 'k-');
plot( [b(1), c(1)], [b(2), c(2)], 'k-');
plot( [c(1), a(1)], [c(2), a(2)], 'k-');
text( a(1)-0.05, a(2), 'A');
text( b(1)+0.05, b(2), 'B');
text( c(1), c(2)+0.05, 'C');
% plot factors
y = f1(1)*a+f1(2)*b+f1(3)*c;
plot( y(1), y(2), 'k*' );
y = f2(1)*a+f2(2)*b+f2(3)*c;
plot( y(1), y(2), 'k*' );
% create and plot samples lying between f1 and f2
N=10;
for j=[1:N-1] % loop over samples
% sample s(i) is a linear mix of factors f1 and f2
% with factor loadings c1(i) and c2(i); that is,
% s(i) = c1(i)*f1 + c2(i)*f2
c1=j/N;
c2=1-c1;
s = c1*f1 + c2*f2;
y = s(1)*a+s(2)*b+s(3)*c;
plot( y(1), y(2), 'ko' );
end
% eda08_03: alternative factors shown on terniary diagram
% this is different choice of factors than eda08_02
% positions of vertices of terneary diagram
a = [0,0]';
b = [1,0]';
c = [0.5,0.5*sqrt(3)]';
f1 = [ 0.15, 0.25, 0.60]'; f1 = f1/sum(f1); % factor 1
f2 = [ 0.25, 0.45, 0.30]'; f2 = f2/sum(f2); % factor 2
f1a = 0.5*(f1+f2); % factor 1a
f2a = f1+0.5*(f1-f2); f2 = f2/sum(f2); % factor 2a
% terniary diagram
figure(1);
clf;
axis( [0, 1, 0, 1] );
axis equal;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
axis off;
hold on;
% bounding triangle
plot( [a(1), b(1)], [a(2), b(2)], 'k-');
plot( [b(1), c(1)], [b(2), c(2)], 'k-');
plot( [c(1), a(1)], [c(2), a(2)], 'k-');
text( a(1)-0.05, a(2), 'A');
text( b(1)+0.05, b(2), 'B');
text( c(1), c(2)+0.05, 'C');
% plot factors
y = f1a(1)*a+f1a(2)*b+f1a(3)*c;
plot( y(1), y(2), 'k*' );
y = f2a(1)*a+f2a(2)*b+f2a(3)*c;
plot( y(1), y(2), 'k*' );
% create and plot samples lying between f1 and f2
N=10;
for j=[1:N-1] % loop ovr samples
% sample s(i) is a linear mix of factors f1 and f2
% with factor loadings c1(i) and c2(i); that is,
% s(i) = c1(i)*f1 + c2(i)*f2
c1=j/N;
c2=1-c1;
s = c1*f1 + c2*f2;
y = s(1)*a+s(2)*b+s(3)*c;
plot( y(1), y(2), 'ko' );
end
% edama_08_04: factor analysis of Atlantic Rocks dataset
% using singular value decomposition
% load data from file
D = load('../Data/rocks.txt');
% break out individual chemical species
sio2 = D(:,1); % SiO2
tio2 = D(:,2); % TiO2
als03 = D(:,3); % Al203
feot = D(:,4); % FeO-total
mgo = D(:,5); % MgO
cao = D(:,6); % CaO
na20 = D(:,7); % Na2O
k20 = D(:,8); % K2O
Ns = size(D);
N = Ns(1);
M = Ns(2);
% compute factors and factor loadings using
% singular value decompostion
S=D; % sample matrix
[U, SIGMA, V] = svd(S,0); % svd
sigma = diag(SIGMA); % singular values
Ns = length(sigma); % number of singular values
F = V'; % factors
C = U*SIGMA; % loadings
% plot singular values
figure(1);
clf;
set(gca,'LineWidth',2);
hold on;
plot( [1:Ns], sigma, 'k-', 'LineWidth', 2 );
plot( [1:Ns], sigma, 'ko', 'LineWidth', 2 );
title('singular values, s(i)');
xlabel('index, i');
ylabel('s(i)');
% display first five factors
for j = [1:5]
f1=F(j,:);
fprintf('factor %d\n', j);
fprintf('SiO2 %.4f\n', f1(1));
fprintf('TiO2 %.4f\n', f1(2));
fprintf('Al203 %.4f\n', f1(3));
fprintf('FeO-total %.4f\n', f1(4));
fprintf('MgO %.4f\n', f1(5));
fprintf('CaO %.4f\n', f1(6));
fprintf('Na2O %.4f\n', f1(7));
fprintf('K2O %.4f\n', f1(8));
fprintf('\n');
end
cmin=(-50);
cmax=50;
figure(2);
clf;
set(gca, 'LineWidth', 2);
Dc = (cmax-cmin)/50;
axis( [cmin-Dc,cmax+Dc,cmin-Dc,cmax+Dc,cmin-Dc,cmax+Dc] );
hold on
plot3( C(:,2), C(:,3), C(:,4), 'k.', 'LineWidth', 2 );
plot3( [cmin,cmin], [cmin,cmin], [cmin,cmax], 'k-', 'LineWidth', 2 );
plot3( [cmin,cmin], [cmin,cmax], [cmin,cmin], 'k-', 'LineWidth', 2 );
plot3( [cmin,cmax], [cmin,cmin], [cmin,cmin], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmax], [cmax,cmax], [cmax,cmin], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmax], [cmax,cmin], [cmax,cmax], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmin], [cmax,cmax], [cmax,cmax], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmin], [cmin,cmin], [cmax,cmax], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmax], [cmin,cmin], [cmax,cmin], 'k-', 'LineWidth', 2 );
plot3( [cmin,cmin], [cmax,cmin], [cmax,cmax], 'k-', 'LineWidth', 2 );
plot3( [cmin,cmin], [cmax,cmax], [cmax,cmin], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmax], [cmax,cmin], [cmin,cmin], 'k-', 'LineWidth', 2 );
plot3( [cmax,cmin], [cmax,cmax], [cmin,cmin], 'k-', 'LineWidth', 2 );
xlabel('C(2)');
ylabel('C(3)');
zlabel('C(4)');
% edama_08_05: varimax procedure applied to 2 factors
% define two non-spiky factors, fs and ft
M=4;
fA = [ 1, 1, 1, 1 ]';
fB = [ 1, -1, 1, -1 ]';
fA = fA / sqrt( fA'*fA ); % normalize to unit length
fB = fB / sqrt( fB'*fB ); % normalize to unit length
% standard varimax procedure to determine rotation angle q
u = fA.^2 - fB.^2;
v = 2* fA .* fB;
A = 2*M*u'*v;
B = sum(u)*sum(v);
top = A - B;
C = M*(u'*u-v'*v);
D = (sum(u)^2)-(sum(v)^2);
bot = C - D;
q = 0.25 * atan2(top,bot);
% rotate factors
cq = cos(q);
sq = sin(q);
fAp = cq*fA + sq*fB;
fBp = -sq*fA + cq*fB;
% display results
fA
fB
fAp
fBp
% edama_08_06: factor analysis on Atlabtic Rocks dataset
% using singular value decomposition
% and the varimax procedure
% load data from file
S = load('../Data/rocks.txt');
% break out chemical species
sio2 = S(:,1); % SiO2
tio2 = S(:,2); % TiO2
als03 = S(:,3); % Al203
feot = S(:,4); % FeO-total
mgo = S(:,5); % MgO
cao = S(:,6); % CaO
na20 = S(:,7); % Na2O
k20 = S(:,8); % K2O
Ns = size(S);
N = Ns(1);
M = Ns(2);
%singular value decompostion
[U, SIGMA, V] = svd(S,0);
% keep only first five singular values
P=5; % keeo 5 factors
F = V(:,1:P)'; % the 5 factors
C = U(:,1:P)*SIGMA(1:P,1:P); % their loadings
SP=C*F; % reconstruction of S with P factors
% Varimax rotatuo of factors 2,3,4,5
% initial rotated factor matrix, FP, abd rotation matrix, MR
MR=eye(P,P);
FP=F;
% spike these factors using the varimax procedure
k = [2, 3, 4, 5]';
Nk = length(k);
for iter = [1:3] % three iterations almost always sufficient
for ii = [1:Nk] % first factor of pair
for jj = [ii+1:Nk] % second factor of pair
% spike factors i and j
i=k(ii);
j=k(jj);
% copy factors from matrix to vectors
fA = FP(i,:)';
fB = FP(j,:)';
% standard varimax procedure to determine rotation angle q
u = fA.^2 - fB.^2;
v = 2* fA .* fB;
AA = 2*M*u'*v;
BB = sum(u)*sum(v);
top = AA - BB;
CC = M*(u'*u-v'*v);
DD = (sum(u)^2)-(sum(v)^2);
bot = CC - DD;
q = 0.25 * atan2(top,bot);
cq = cos(q);
sq = sin(q);
fAp = cq*fA + sq*fB;
fBp = -sq*fA + cq*fB;
% put back into factor matrix, FP
FP(i,:) = fAp';
FP(j,:) = fBp';
% accumulate rotation
mA = MR(i,:)';
mB = MR(j,:)';
mAp = cq*mA + sq*mB;
mBp = -sq*mA + cq*mB;
MR(i,:) = mAp';
MR(j,:) = mBp';
end
end
end
CP=C*MR'; % new factor loadings
SPP=CP*FP; % reconstruction
% check error of reconstruction
Smax = max(max(abs(S)));
Emax1 = max(max(abs(S - SP)));
Emax2 = max(max(abs(S - SPP)));
fprintf('max relative error in S-CF: %e\n', Emax1/Smax);
fprintf('max relative error in S-CP*FP: %e\n', Emax2/Smax);
fprintf('\n\n\n', Emax2/Smax);
% display first five factors
for j = [1:5]
f1=FP(j,:);
fprintf('factor %d\n', j);
fprintf('SiO2 %.2f\n', f1(1));
fprintf('TiO2 %.2f\n', f1(2));
fprintf('Al203 %.2f', f1(3));
fprintf('FeO-total %.2f\n', f1(4));
fprintf('MgO %.2f\n', f1(5));
fprintf('CaO %.2f\n', f1(6));
fprintf('Na2O %.2f\n', f1(7));
fprintf('K2O %f.2\n', f1(8))
fprintf('\n');
end
eda_draw(' ', abs(F(2,:))', 'caption f2', abs(F(3,:))', 'caption f3', abs(F(4,:))', 'caption f4', abs(F(5,:))', 'caption f5', ...
' becomes ', ...
abs(FP(2,:))', 'caption f2p', abs(FP(3,:))', 'caption f3p', abs(FP(4,:))', 'caption f4p', abs(FP(5,:))', 'caption f5p');
% edama_08_07: example of weighting in factor analysis
%
clear all;
% Part 1: Make synthetic chemical data with a wide range of concentrations
% typical analytic errors
% [Fe Cu Zn Pb As Ag Au]
se = [0.010000, 0.010000, 0.010000, 0.001000, 0.001000, 0.0000010, 0.0000001 ]';
% 7 elements, three factors
M = 7;
P = 3;
% [Fe Cu Zn Pb As Ag Au]
v1 = [0.200000, 0.004000, 0.010000, 0.010000, 0.001000, 0.0000100, 0.0000010 ]';
v2 = [0.050000, 0.180000, 0.180000, 0.030000, 0.000200, 0.0000400, 0.0000005 ]';
v3 = [0.100000, 0.080000, 0.110000, 0.010000, 0.000700, 0.0000100, 0.0000001 ]';
F = [v1, v2, v3]';
% randomly chosen loadings
N = 10000;
c1 = random('uniform',0,1,N,1);
c2 = random('uniform',0,1,N,1);
c3 = 1-(c1+c2);
C = [c1, c2, c3];
% true sample matrix
Strue = C*F;
% observed sample matrix is true plus Normally-distributed random noise
for i=[1:M]
Sobs(:,i) = Strue(:,i) + random('Normal',0,se(i),N,1);
end
Nbins = 100;
Fe_bins = 5*[0:Nbins-1]'/(Nbins-1);
Au_bins = 5*[0:Nbins-1]'/(Nbins-1);
% Part two factor analysis with no weighting
[U1,SIGMA1,V1] = svd(Sobs,0);
Fpre1 = V1'; % factors
Cpre1 = U1*SIGMA1; % loadings
% P=2 significant factors
Fpre1P = Fpre1(1:P,:);
Cpre1P = Cpre1(:,1:P);
Spre1P = Cpre1P*Fpre1P;
figure(1);
clf;
% error
e1 = abs((Strue-Spre1P));
Fe1hist = hist(e1(:,1)./se(1),Fe_bins);
Au1hist = hist(e1(:,M)./se(M),Au_bins);
subplot(1,2,1);
hold on;
set(gca,'LineWidth',2);
axis( [Fe_bins(1), Fe_bins(end), 0, 1] );
plot( Fe_bins, Fe1hist/max(Fe1hist), 'k-', 'LineWidth', 2 );
xlabel('error/se');
ylabel('count');
subplot(1,2,2);
set(gca,'LineWidth',2);
hold on;
axis( [Au_bins(1), Au_bins(end), 0, 1] );
plot( Au_bins, Au1hist/max(Au1hist), 'k-', 'LineWidth', 2 );
xlabel('error/se');
ylabel('count');
% Part three factor analysis with weighting by analytic precision
w = 1./se;
[U2,SIGMA2,V2] = svd(Sobs*diag(w),0);
Fpre2 = V2'*diag(1./w); % factors
Cpre2 = U2*SIGMA2; % loadings
% factors
Fpre2P = Fpre2(1:P,:);
Cpre2P = Cpre2(:,1:P);
Spre2P = Cpre2P*Fpre2P;
% error
e2 = abs((Strue-Spre2P));
Fe2hist = hist(e2(:,1)./se(1),Fe_bins);
Au2hist = hist(e2(:,M)./se(M),Au_bins);
subplot(1,2,1);
plot( Fe_bins, Fe2hist/max(Fe2hist), '-', 'LineWidth', 2, 'Color', [0.8,0.8,0.8] );
subplot(1,2,2);
plot( Au_bins, Au2hist/max(Au2hist), '-', 'LineWidth', 2, 'Color', [0.8,0.8,0.8] );
% eda08_08
% factor analysis on Mid Atlantic Ridge Rocks dataset
% Q-mode factors with geographical plots
% load data from file
D = load('../Data/rocks_with_lat_lon.txt');
% break out chemical species and lat/lons
sio2 = D(:,1); % SiO2
tio2 = D(:,2); % TiO2
als03 = D(:,3); % Al203
feot = D(:,4); % FeO-total
mgo = D(:,5); % MgO
cao = D(:,6); % CaO
na20 = D(:,7); % Na2O
k20 = D(:,8); % K2O
lat = D(:,9); % lat
lon = D(:,10); % lon
Ns = size(D);
N = Ns(1);
M = Ns(2)-2;
% compute factors and factor loadings using singular value decompostion
S=D(:,1:M); % sample matrix
[U, SIGMA, V] = svd(S',0); % singular value decomposition
sigma = diag(SIGMA); % singular values
Ns = length(sigma); % number of singular values
F = V'; % factors
C = U*SIGMA; % loadings
P = 4; % keep P=4 factors
FP = F(1:P,:); % the four factors
CP = C(:,1:P); % loadings of these factors
% map bounds
left = -60;
right = 10;
bottom = -70;
top = 70;
% plot factor 2 on map, with colors scales by factor values
figure(1);
clf;
subplot(1,3,1);
set(gca,'LineWidth',2);
hold on;
axis( [left, right, bottom, top] );
nf = 2;
fmin = mean(FP(nf,:))-2*std(FP(nf,:));
fmax = mean(FP(nf,:))+2*std(FP(nf,:));
fmin = -2*std(FP(nf,:));
fmax = 2*std(FP(nf,:));
for i=[1:N]
h = 3;
c = (FP(nf,i)-fmin)/(fmax-fmin);
if( c<0 )
c=0;
elseif ( c>1)
c=1;
end
rectangle('Position',[lon(i), lat(i), h, h], 'FaceColor', [c,c,c]);
end
xlabel('lon, deg');
ylabel('lat, deg');
title('Factor 2');
% plot factor 3 on map, with colors scales by factor values
subplot(1,3,2);
set(gca,'LineWidth',2);
hold on;
axis( [left, right, bottom, top] );
nf = 3;
fmin = -2*std(FP(nf,:));
fmax = 2*std(FP(nf,:));
for i=[1:N]
h = 3;
c = (FP(nf,i)-fmin)/(fmax-fmin);
if( c<0 )
c=0;
elseif ( c>1)
c=1;
end
rectangle('Position',[lon(i), lat(i), h, h], 'FaceColor', [c,c,c]);
end
xlabel('lon, deg');
ylabel('lat, deg');
title('Factor 3');
% plot factor 4 on map, with colors scales by factor values
subplot(1,3,3);
set(gca,'LineWidth',2);
hold on;
axis( [left, right, bottom, top] );
nf = 4;
fmin = -2*std(FP(nf,:));
fmax = 2*std(FP(nf,:));
for i=[1:N]
h = 3;
c = (FP(nf,i)-fmin)/(fmax-fmin);
if( c<0 )
c=0;
elseif ( c>1)
c=1;
end
rectangle('Position',[lon(i), lat(i), h, h], 'FaceColor', [c,c,c]);
end
xlabel('lon, deg');
ylabel('lat, deg');
title('Factor 4');
% plot coastlines on maps using digitized coastline from file
for myfig = [1:3]
subplot(1,3,myfig);
coastfile = '../Data/coastdata.txt';
fd = fopen(coastfile);
N = 0;
for i=[1:100000]
line = fgetl(fd);
if ( ~ischar(line) )
break;
end
if( line(1) == '>' )
if( N == 0 )
continue;
end
plot( mylon, mylat, 'k.', 'LineWidth', 2 );
N=0;
else
c = sscanf(line,'%f%f');
N=N+1;
mylat(N) = c(2);
mylon(N) = c(1);
end
end
fclose(fd);
end
% edama_08_09: plot CAC Pacific Sea Surface Temperature (SST)
% load the data file
D=load('../Data/cac_sst.txt');
% Set up sizes of arrays. See the info file for a description
% of how the data file is organized. The data are monthly,
% and we want to plot a big grid of smallish images, one for
% each month
[I, J] = size(D);
NBLOCK=85;
NLON = NBLOCK-1;
NLAT = J-1;
IMAGES = floor(I/NBLOCK);
Dt = 1/12;
% arrays for SST, date, lats, lons
SST = zeros(NLON,NLAT,IMAGES);
thedate = zeros(IMAGES,1); % in month.year format
theyear = zeros(IMAGES,1); % year extracted from thedate
themonth = zeros(IMAGES,1); % month extracted from thedate
lats=zeros(NLAT,1);
lons=zeros(NLON,1);
% cut up data into SST arrays
lats = D(1,2:J)';
lons = D(2:NBLOCK,1);
for j=[1:IMAGES]
k1 = 2+(j-1)*NBLOCK;
k2 = k1+NLON-1;
thedate(j)=D(k1-1,1);
themonth(j)=floor(thedate(j));
theyear(j)=floor(10000*(thedate(j)-themonth(j))+0.1);
SST(:,:,j) = D(k1:k2,2:J);
end
% some definitions related to plotting
MONTHS=12;
YEARS=ceil(IMAGES/12);
YBLOCKSIZE = 6;
YBLOCKS = ceil(YEARS/YBLOCKSIZE);
% plor data, 12 months by six years of maps per figure
for yb = [1:YBLOCKS]
figure(yb);
clf;
bw=0.9*(256-linspace(0,255,256)')/256;
colormap([bw,bw,bw]);
for m = [1:MONTHS]
for y = [1:YBLOCKSIZE]
j = m+MONTHS*(y-1)+MONTHS*YBLOCKSIZE*(yb-1);
k = y + (m-1)*YBLOCKSIZE;
if( j<=IMAGES)
subplot(MONTHS, YBLOCKSIZE, k);
hold on;
axis equal;
axis ij;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
MAP=flipud(SST(:,:,j)'); % reorient so it plots as map
range=max(max(MAP))-min(min(MAP));
if( range==0 )
range=1;
end
imagesc( (MAP-min(min(MAP)))/range );
axis tight;
if( y==1 )
ylabel(sprintf('%d',themonth(j)));
end
if( m==12 )
xlabel(sprintf('%d',theyear(j)));
end
end
end
end
end
% edama_08_10: plot singular values of CAC Pacific SST dataset
% load the data from file
D=load('../Data/cac_sst.txt');
% set up sizes of arrays. Se the info file for information
% on how the data file is organized.
[I, J] = size(D);
NBLOCK=85;
NLON = NBLOCK-1;
NLAT = J-1;
IMAGES = floor(I/NBLOCK);
Dt = 1/12;
% arrays for SST, date, lats, lons
SST = zeros(NLON,NLAT,IMAGES);
thedate = zeros(IMAGES,1); % in month.year format
theyear = zeros(IMAGES,1); % year extracted from thedate
themonth = zeros(IMAGES,1); % month extracted from thedate
lats=zeros(NLAT,1);
lons=zeros(NLON,1);
% cut up data into SST arrays
lats = D(1,2:J)';
lons = D(2:NBLOCK,1);
for j=[1:IMAGES]
k1 = 2+(j-1)*NBLOCK;
k2 = k1+NLON-1;
thedate(j)=D(k1-1,1);
themonth(j)=floor(thedate(j));
theyear(j)=floor(10000*(thedate(j)-themonth(j))+0.1);
SST(:,:,j) = D(k1:k2,2:J);
end
% some definitions related to plotting
MONTHS=12;
YEARS=ceil(IMAGES/12);
YBLOCKSIZE = 6;
YBLOCKS = ceil(YEARS/YBLOCKSIZE);
% fold out so each SST image is a row in the sample matrix
SSTV = zeros( IMAGES, NLAT*NLON );
for j=[1:IMAGES]
for p = [1:NLAT]
for q = [1:NLON]
k = p + (q-1)*NLAT;
SSTV(j,k) = SST( q, p, j );
end
end
end
% singular value decomposition
[U,S,V] = svd(SSTV,0);
s=diag(S);
Ns=length(s);
F=V'; % factors
C=U*S; % loadings
% plot singular values
figure(1);
clf;
plot( [1:Ns]', s, 'k-');
hold on;
xlabel('index, i');
ylabel('s(i)');
title('singular values, s(i)');
% edama_08_11: EOF's of CAC Pacific SST dataset
% plot EOF's and corresponding loading timeseries
% load the data from file
D=load('../Data/cac_sst.txt');
% set up sizes of arrays. See the info file for information
% on how the data file is organized
[I, J] = size(D);
NBLOCK=85;
NLON = NBLOCK-1;
NLAT = J-1;
IMAGES = floor(I/NBLOCK);
Dt = 1/12;
% arrays for SST, date, lats, lons
SST = zeros(NLON,NLAT,IMAGES);
thedate = zeros(IMAGES,1); % in month.year format
theyear = zeros(IMAGES,1); % year extracted from thedate
themonth = zeros(IMAGES,1); % month extracted from thedate
lats=zeros(NLAT,1);
lons=zeros(NLON,1);
% cut up data into SST arrays
lats = D(1,2:J)';
lons = D(2:NBLOCK,1);
for j=[1:IMAGES]
k1 = 2+(j-1)*NBLOCK;
k2 = k1+NLON-1;
thedate(j)=D(k1-1,1);
themonth(j)=floor(thedate(j));
theyear(j)=floor(10000*(thedate(j)-themonth(j))+0.1);
SST(:,:,j) = D(k1:k2,2:J);
end
% some definitions related to plotting
MONTHS=12;
YEARS=ceil(IMAGES/12);
YBLOCKSIZE = 6;
YBLOCKS = ceil(YEARS/YBLOCKSIZE);
% fold out SST image into rows of the sample matrix
SSTV = zeros( IMAGES, NLAT*NLON );
for j=[1:IMAGES]
for p = [1:NLAT]
for q = [1:NLON]
k = p + (q-1)*NLAT;
SSTV(j,k) = SST( q, p, j );
end
end
end
% singular value decomposition
[U,S,V] = svd(SSTV,0);
s=diag(S);
Ns=length(s);
F=V'; % EOFs (factors)
C=U*S; % loadings
% fold EOFs back into images
Nf = 12;
SSTF = zeros(NLON,NLAT,Nf);
for j=[1:Nf]
for p = [1:NLAT]
for q = [1:NLON]
k = p + (q-1)*NLAT;
SSTF( q, p, j ) = F(j,k);
end
end
end
% plot first Nf EOF's
figure(1);
clf;
bw=0.9*(256-linspace(0,255,256)')/256;
colormap([bw,bw,bw]);
for f = [1:Nf]
subplot(3, 4, f);
axis equal;
hold on;
axis ij;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
MAP=flipud(SSTF(:,:,f)'); % reorient so it plots as map
range=max(max(MAP))-min(min(MAP));
if( range==0 )
range=1;
end
imagesc( (MAP-min(min(MAP)))/range );
axis tight;
title(sprintf('EOF %d',f));
end
% plot corresponding amplitude timeseries
figure(2);
clf;
for f = [1:Nf]
subplot(3, 4, f);
plot( theyear(1)+[1:IMAGES]'/12, C(:,f), 'k-' );
hold on;
axis( [theyear(1), theyear(IMAGES)+1, min(C(:,1)), max(C(:,1)) ] );
title(sprintf('C(%d,t)',f));
end
% eda08_12: approximate images using most-significant EOF's
% using CAC Pacific Sea Surface Temperature (SST) dataset
% load the data from file
D=load('../Data/cac_sst.txt');
% set up sizes of arrays. See the infor file for information
% on how the data file is organized
[I, J] = size(D);
NBLOCK=85;
NLON = NBLOCK-1;
NLAT = J-1;
IMAGES = floor(I/NBLOCK);
Dt = 1/12;
% arrays for SST, date, lats, lons
SST = zeros(NLON,NLAT,IMAGES);
thedate = zeros(IMAGES,1); % in month.year format
theyear = zeros(IMAGES,1); % year extracted from thedate
themonth = zeros(IMAGES,1); % month extracted from thedate
lats=zeros(NLAT,1);
lons=zeros(NLON,1);
% cut up data into SST arrays
lats = D(1,2:J)';
lons = D(2:NBLOCK,1);
for j=[1:IMAGES]
k1 = 2+(j-1)*NBLOCK;
k2 = k1+NLON-1;
thedate(j)=D(k1-1,1);
themonth(j)=floor(thedate(j));
theyear(j)=floor(10000*(thedate(j)-themonth(j))+0.1);
SST(:,:,j) = D(k1:k2,2:J);
end
% some definitions related to plotting
MONTHS=12;
YEARS=ceil(IMAGES/12);
YBLOCKSIZE = 6;
YBLOCKS = ceil(YEARS/YBLOCKSIZE);
% fold out EOF's, each into one row of sample matrix
SSTV = zeros( IMAGES, NLAT*NLON );
for j=[1:IMAGES]
for p = [1:NLAT]
for q = [1:NLON]
k = p + (q-1)*NLAT;
SSTV(j,k) = SST( q, p, j );
end
end
end
% singular value decomposition
[U,S,V] = svd(SSTV,0);
% reconstruct with only Nf EOF's
F=V'; % EOS's (factors)
C=U*S; % loadings
Nf = 5; % use only Nf=5 EOFs
% pprox rebuild of sample matrix with only Nf EOFs
SSTR = C(:,1:Nf)*F(1:Nf,:);
% fold EOF's back into images
SSTRF = zeros(NLON,NLAT,IMAGES);
for j=[1:IMAGES]
for p = [1:NLAT]
for q = [1:NLON]
k = p + (q-1)*NLAT;
SSTRF( q, p, j ) = SSTR(j,k);
end
end
end
% plot approximate SST data
for yb = [1:YBLOCKS]
figure(yb);
clf;
bw=0.9*(256-linspace(0,255,256)')/256;
colormap([bw,bw,bw]);
for m = [1:MONTHS]
for y = [1:YBLOCKSIZE]
j = m+MONTHS*(y-1)+MONTHS*YBLOCKSIZE*(yb-1);
k = y + (m-1)*YBLOCKSIZE;
if( j<=IMAGES)
subplot(MONTHS, YBLOCKSIZE, k);
hold on;
axis equal;
axis ij;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
MAP=flipud(SSTRF(:,:,j)'); % reorient so it plots as map
range=max(max(MAP))-min(min(MAP));
if( range==0 )
range=1;
end
imagesc( (MAP-min(min(MAP)))/range );
axis tight;
if( y==1 )
ylabel(sprintf('%d',themonth(j)));
end
if( m==12 )
xlabel(sprintf('%d',theyear(j)));
end
end
end
end
end
% eda08_13, create a dataset for cluster analysis
% Note: In order to prevent ooverwriting of the originally
% distributed dataset, filenames have been changed from
% "../Data/threeclusters.txt" to "../Data/mythreeclusters.txt"
% "../Data/threeclustermeans.txt" to "../Data/threeclustermeans.txt"
% change them back if you want ...
clear all;
K=3; % three clusters
% 100 samples per cluster
N1=100;
N2=100;
N3=100;
N=N1+N2+N3;
M=2; % two elements
% means set by hand
mu = zeros(K,M);
mu(1,1) = 1.0;
mu(1,2) = 2.0;
mu(2,1) = 3.0;
mu(2,2) = 4.0;
mu(3,1) = 4.0;
mu(3,2) = 1.0;
% sample matrix randomly assigned
S = zeros(N,M);
sc = 0.5;
S(1:N1,1) = random( 'Normal', mu(1,1), sc, N1,1);
S(1:N1,2) = random( 'Normal',mu(1,2), sc, N1,1);
S(N1+1:N1+N2,1) = random( 'Normal',mu(2,1), sc, N2,1);
S(N1+1:N1+N2,2) = random( 'Normal',mu(2,2), sc, N2,1);
S(N1+N2+1:N,1) = random( 'Normal',mu(3,1), sc, N3,1);
S(N1+N2+1:N,2) = random( 'Normal',mu(3,2), sc, N3,1);
% cluster membership
k1 = ones(N1,1);
k2 = 2*ones(N2,1);
k3 = 2*ones(N3,1);
myk=[k1;k2;k3];
figure(1);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, 5, 0, 5] );
for i=[1:N]
if myk(i)==1
plot( S(i,1), S(i,2), 'k.', 'LineWidth', 1 );
elseif myk(i)==2
plot( S(i,1), S(i,2), 'k+', 'LineWidth', 1 );
else
plot( S(i,1), S(i,2), 'ko', 'LineWidth', 1 );
end
end
plot( mu(1:K,1), mu(1:K,2), 'b^', 'LineWidth', 2 );
xlabel('x');
ylabel('y');
dlmwrite('../Data/mythreeclusters.txt',[S,myk], 'delimiter', '\t');
dlmwrite('../Data/mythreeclustermeans.txt', mu, 'delimiter', '\t');
% edama_08_14, cluster analysis w/ prior mean initialization
% load the data
K = 3;
D = load('../Data/threeclusters.txt');
[N, M] = size(D);
M = M-1;
S = D(1:N,1:2);
myk = D(1:N,3);
mu = load('../Data/threeclustermeans.txt');
% initial means are the prior means
initmu = zeros(K,M);
initmu(1,1) = 1.2;
initmu(1,2) = 2.2;
initmu(2,1) = 3.4;
initmu(2,2) = 4.4;
initmu(3,1) = 4.1;
initmu(3,2) = 1.1;
% k-mean analysis
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(S, initmu);
disp('cluster means');
disp(mymu);
disp('mean sample distance');
disp(mean(sqrt(mydist2)));
disp('cluster counts');
disp(mycount);
disp('mean inter-cluster distance');
disp(mean(sqrt(myKdist2)));
figure(1);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, 5, 0, 5] );
plot( initmu(1:K,1), initmu(1:K,2), 'r^', 'LineWidth', 2 );
for i=[1:N]
if myk(i)==1
plot( S(i,1), S(i,2), 'k.', 'LineWidth', 1 );
elseif myk(i)==2
plot( S(i,1), S(i,2), 'k+', 'LineWidth', 1 );
else
plot( S(i,1), S(i,2), 'ko', 'LineWidth', 1 );
end
end
plot( mymu(1:K,1), mymu(1:K,2), 'b^', 'LineWidth', 2 );
xlabel('x');
ylabel('y');
% edama_08_15, cluster analysis w/ forgy initialization
% load the data
K = 3;
D = load('../Data/threeclusters.txt');
[N, M] = size(D);
M = M-1;
S = D(1:N,1:2);
myk = D(1:N,3);
mu = load('../Data/threeclustermeans.txt');
% initial means are randomly-chosen samples
initmu = eda_forgy_mean(S,K);
% k-mean analysis
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(S, initmu);
disp('cluster means');
disp(mymu);
disp('mean sample distance');
disp(mean(sqrt(mydist2)));
disp('cluster counts');
disp(mycount);
disp('mean inter-cluster distance');
disp(mean(sqrt(myKdist2)));
figure(1);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, 5, 0, 5] );
plot( initmu(1:K,1), initmu(1:K,2), 'r^', 'LineWidth', 2 );
for i=[1:N]
if myk(i)==1
plot( S(i,1), S(i,2), 'k.', 'LineWidth', 1 );
elseif myk(i)==2
plot( S(i,1), S(i,2), 'k+', 'LineWidth', 1 );
else
plot( S(i,1), S(i,2), 'ko', 'LineWidth', 1 );
end
end
plot( mymu(1:K,1), mymu(1:K,2), 'b^', 'LineWidth', 2 );
xlabel('x');
ylabel('y');
% edama_08_16, cluster analysis w/ random-partition initialization
% load the data
K = 3;
D = load('../Data/threeclusters.txt');
[N, M] = size(D);
M = M-1;
S = D(1:N,1:2);
myk = D(1:N,3);
mu = load('../Data/threeclustermeans.txt');
% initial means are means randomly-chosen groups of samples
initmu = eda_random_partition_mean(S,K);
% k-mean analysis
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(S, initmu);
disp('cluster means');
disp(mymu);
disp('mean sample distance');
disp(mean(sqrt(mydist2)));
disp('cluster counts');
disp(mycount);
disp('mean inter-cluster distance');
disp(mean(sqrt(myKdist2)));
figure(1);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, 5, 0, 5] );
plot( initmu(1:K,1), initmu(1:K,2), 'r^', 'LineWidth', 2 );
for i=[1:N]
if myk(i)==1
plot( S(i,1), S(i,2), 'k.', 'LineWidth', 1 );
elseif myk(i)==2
plot( S(i,1), S(i,2), 'k+', 'LineWidth', 1 );
else
plot( S(i,1), S(i,2), 'ko', 'LineWidth', 1 );
end
end
plot( mymu(1:K,1), mymu(1:K,2), 'b^', 'LineWidth', 2 );
xlabel('x');
ylabel('y');
% edama_08_17, cluster analysis w/ Bradley-Fayyad initialization
% load the data
K = 3;
D = load('../Data/threeclusters.txt');
[N, M] = size(D);
M = M-1;
S = D(1:N,1:2);
myk = D(1:N,3);
mu = load('../Data/threeclustermeans.txt');
% initial means are means randomly-chosen groups of samples
% bradley-fayyad parameters
Nsets = 100; % number of small sets
fraction = 0.05; % size of small sets = fraction of N
initmu = eda_bradley_fayyad_mean(S,K,Nsets,fraction);
% k-mean analysis
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(S, initmu);
disp('cluster means');
disp(mymu);
disp('mean sample distance');
disp(mean(sqrt(mydist2)));
disp('cluster counts');
disp(mycount);
disp('mean inter-cluster distance');
disp(mean(sqrt(myKdist2)));
figure(1);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, 5, 0, 5] );
plot( initmu(1:K,1), initmu(1:K,2), 'r^', 'LineWidth', 2 );
for i=[1:N]
if myk(i)==1
plot( S(i,1), S(i,2), 'k.', 'LineWidth', 1 );
elseif myk(i)==2
plot( S(i,1), S(i,2), 'k+', 'LineWidth', 1 );
else
plot( S(i,1), S(i,2), 'ko', 'LineWidth', 1 );
end
end
plot( mymu(1:K,1), mymu(1:K,2), 'b^', 'LineWidth', 2 );
xlabel('x');
ylabel('y');
% edama_08_18, more clusters than actually occur
% load the data
D = load('../Data/threeclusters.txt');
[N, M] = size(D);
M = M-1;
S = D(1:N,1:2);
myk = D(1:N,3);
mu = load('../Data/threeclustermeans.txt');
% six clusters
K6=6;
% initial means are randomly-chosen samples
initmu = eda_forgy_mean(S,K6);
% k-mean analysis
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(S, initmu);
disp('cluster means');
disp(mymu);
disp('mean sample distance');
disp(mean(sqrt(mydist2)));
disp('cluster counts');
disp(mycount);
disp('mean inter-cluster distance');
disp(mean(sqrt(myKdist2)));
disp('inter-cluster distances');
disp(sqrt(myKdist2));
figure(1);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, 5, 0, 5] );
for i=[1:N]
if myk(i)==1
plot( S(i,1), S(i,2), 'k.', 'LineWidth', 1 );
elseif myk(i)==2
plot( S(i,1), S(i,2), 'k+', 'LineWidth', 1 );
elseif myk(i)==3
plot( S(i,1), S(i,2), 'k^', 'LineWidth', 1 );
elseif myk(i)==4
plot( S(i,1), S(i,2), 'kx', 'LineWidth', 1 );
elseif myk(i)==5
plot( S(i,1), S(i,2), 'kv', 'LineWidth', 1 );
else
plot( S(i,1), S(i,2), 'ks', 'LineWidth', 1 );
end
end
plot( mymu(1:K6,1), mymu(1:K6,2), 'b^', 'LineWidth', 2 );
xlabel('x');
ylabel('y');
% edama_08_19, clusters in the Atlantic Rocks dataset
% read data
S = load('../Data/rocks.txt');
[N, M] = size(S);
% examine mean-sample distances for sequence of number
% of clusters
KMAX = 10;
Klist = zeros(KMAX,1);
Dlist = zeros(KMAX,1);
fprintf('K\tmean_dist\n');
for K=[1:KMAX]
initmu = eda_forgy_mean(S,K);
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(S, initmu);
Klist(K)=K;
Dlist(K)=mean(sqrt(mydist2));
fprintf('%d\t%.3f\n', K, Dlist(K));
end
% plot of distances
figure(1);
clf;
hold on;
axis( [0, KMAX, 0, 3] );
plot( Klist, Dlist, 'k-', 'LineWidth', 2 );
plot( Klist, Dlist, 'ko', 'LineWidth', 2 );
xlabel('K');
ylabel('mean Rij');
% edama_08_20, print 8 clusters in the Atlantic Rocks dataset
% read the data
S = load('../Data/rocks.txt');
[N, M] = size(S);
K=8; % 8 clusters
% k-means analysis
initmu = eda_forgy_mean(S,K); % forgy initial means
[mymu, myk, mydist, mycount, myKdist] = eda_kmeans(S, initmu);
% There is a problem printing text from LiveScripts using a
% sequence of fprintf()'s. Line feeds are not supressed when
% a newline is omitted. Here is a quick fixup: all the text
% is copied to a string, then rinted at once
str = '';
% print table of cluster means
names = {'SiO2', 'TiO2', 'Al203', 'FeO-t', 'MgO', 'CaO', 'Na2O', 'K2O' };
str=[str,'cluster'];
for j=[1:M]
str=[str, sprintf('\t%s', char(names(j))) ];
end
str=[str, sprintf('\tcount\n') ];
for i=[1:K]
str= [ str, sprintf('%d',i) ];
for j=[1:M]
str=[ str, sprintf('\t%.2f', mymu(i,j)) ];
end
str=[ str, sprintf('\t%d\n', mycount(i)) ];
end
fprintf('%s',str);
fprintf('\n\n',str);
% edama_08_21, 3d plot of clusters in the Atlantic Rocks dataset
% read the data
S = load('../Data/rocks.txt');
[N, M] = size(S);
names = {'SiO2', 'TiO2', 'Al203', 'FeO-t', 'MgO', 'CaO', 'Na2O', 'K2O' };
m1 = 3; % Al203
xmin = 0.0;
xmax = 50.0;
m2 = 5; % MgO
ymin = 0.0;
ymax = 50.0;
m3 = 6; % Ca0
zmin = 0.0;
zmax = 20.0;
K=8; % 8 clusters
% k-means analysis
initmu = eda_forgy_mean(S,K); % forgy initial means
%[mymu, myk, mydist, mycount, myKdist] = eda_kmeans(S, initmu);
figure(2);
clf;
hold on;
set(gca, 'LineWidth', 2);
Dx = (xmax-xmin)/50;
Dy = (ymax-ymin)/50;
Dz = (zmax-zmin)/50;
axis( [xmin-Dx, xmax+Dx, ymin-Dy, ymax+Dy, zmin-Dz, zmax+Dz] );
hold on
plot3( S(:,m1), S(:,m2), S(:,m3), 'k.', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymin,ymin], [zmin,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymin,ymax], [zmin,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmax], [ymin,ymin], [zmin,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymax,ymax], [zmax,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymax,ymin], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmin], [ymax,ymax], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmin], [ymin,ymin], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymin,ymin], [zmax,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymax,ymin], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymax,ymax], [zmax,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymax,ymin], [zmin,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmin], [ymax,ymax], [zmin,zmin], 'k-', 'LineWidth', 2 );
xlabel(char(names(m1)));
ylabel(char(names(m2)));
zlabel(char(names(m3)));
view(97,6.5);
figure(3);
clf;
hold on;
set(gca, 'LineWidth', 2);
Dx = (xmax-xmin)/50;
Dy = (ymax-ymin)/50;
Dz = (zmax-zmin)/50;
axis( [xmin-Dx, xmax+Dx, ymin-Dy, ymax+Dy, zmin-Dz, zmax+Dz] );
hold on
plot3( mymu(:,m1), mymu(:,m2), mymu(:,m3), 'ko', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymin,ymin], [zmin,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymin,ymax], [zmin,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmax], [ymin,ymin], [zmin,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymax,ymax], [zmax,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymax,ymin], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmin], [ymax,ymax], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmin], [ymin,ymin], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymin,ymin], [zmax,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymax,ymin], [zmax,zmax], 'k-', 'LineWidth', 2 );
plot3( [xmin,xmin], [ymax,ymax], [zmax,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmax], [ymax,ymin], [zmin,zmin], 'k-', 'LineWidth', 2 );
plot3( [xmax,xmin], [ymax,ymax], [zmin,zmin], 'k-', 'LineWidth', 2 );
xlabel(char(names(m1)));
ylabel(char(names(m2)));
zlabel(char(names(m3)));
view(97,6.5);
% edama_08_22, Script to support Cribsheet 8.2 on EOF's
% standard set-up of v-axis
M=100;
Dv = 1.0;
vmin = 0.0;
vmax = vmin + Dv*(M-1);
v = Dv*[0:M-1]';
% standard set-up of u-axis
N=1000;
Du = 1.0;
umin = 0.0;
umax = umin + Du*(N-1);
u = Du*[0:N-1]';
% Part 1: make synthetic data
% four true patterns
fv1 = mod(v,10)/10;
v0 = (vmin+vmax)/2;
s2 = (vmax-vmin)/5;
fv2 = exp(-0.5*((v-v0).^2)/s2);
v1 = (vmin+vmax)/4;
v2 = 3*(vmin+vmax)/4;
s2 = (vmax-vmin)/10;
fv3 = exp(-0.5*((v-v1).^2)/s2)-exp(-0.5*((v-v2).^2)/s2);
L = vmax-vmin;
n=10;
fv4 = cos(n*pi*v/L);
% plot true patterns
disp('true matterns');
figure(1);
clf;
subplot(4,1,1);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [vmin, vmax, -max(abs(fv1)), max(abs(fv1))]);
plot( v, fv1, 'k-', 'LineWidth', 2);
xlabel('v');
ylabel('fv1');
subplot(4,1,2);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [vmin, vmax, -max(abs(fv2)), max(abs(fv2))]);
plot( v, fv2, 'k-', 'LineWidth', 2);
xlabel('v');
ylabel('fv2');
subplot(4,1,3);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [vmin, vmax, -max(abs(fv3)), max(abs(fv3))]);
plot( v, fv3, 'k-', 'LineWidth', 2);
xlabel('v');
ylabel('fv3');
subplot(4,1,4);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [vmin, vmax, -max(abs(fv4)), max(abs(fv4))]);
plot( v, fv4, 'k-', 'LineWidth', 2);
xlabel('v');
ylabel('fv4');
% true v dependencies of amplitude of patterns
L = umax-umin;
cu1 = (u-umin)/L;
cu2 = ((u-umin).^0.25)/(L^0.25);
n=10;
cu3 = cos(n*pi*u/L);
cu4 = (100-mod(u,100))/100;
% plot true amplitudes
disp('ampltudes of true patterns');
figure(2);
clf;
subplot(4,1,1);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [umin, umax, -max(abs(cu1)), max(abs(cu1))]);
plot( u, cu1, 'k-', 'LineWidth', 2);
xlabel('u');
ylabel('cu1');
subplot(4,1,2);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [umin, umax, -max(abs(cu2)), max(abs(cu2))]);
plot( u, cu2, 'k-', 'LineWidth', 2);
xlabel('u');
ylabel('cu1');
subplot(4,1,3);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [umin, umax, -max(abs(cu3)), max(abs(cu3))]);
plot( u, cu3, 'k-', 'LineWidth', 2);
xlabel('u');
ylabel('cu3');
subplot(4,1,4);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [umin, umax, -max(abs(cu4)), max(abs(cu4))]);
plot( u, cu4, 'k-', 'LineWidth', 2);
xlabel('u');
ylabel('cu4');
S = [cu1, cu2, cu3, cu4] * [fv1'; fv2'; fv3'; fv4'];
Smax = max(max(abs(S)));
S = S + random('Normal',0,(0.1*Smax),N,M);
figure(3);
clf;
hold on;
colormap('gray');
axis([0, M, 0, N] );
imagesc(flipud(S));
xlabel('v');
ylabel('u');
title('S');
% Part 2: singular value decompsition
% F is a matrix of the EOF's and
% C is a matrix of their amplitudes
[U1,SIGMA,V1] = svd(S,0);
F = V1'; % factors
C = U1*SIGMA; % loadings
sigma = diag(SIGMA);
figure(4);
clf;
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
axis( [0, M, 0, max(sigma)]);
plot( [1:M]', sigma, 'k-', 'LineWidth', 1);
plot( [1:M]', sigma, 'k*', 'LineWidth', 2);
xlabel('i');
ylabel('sigma i');
title('singular values');
% Part 3: selection of number P of significant EOS's
% On the basis of the last plot, P=4
P = 4;
FP = F(1:P,:);
CP = C(:,1:P);
SP = CP*FP;
figure(5);
clf;
hold on;
colormap('gray');
axis([0, M, 0, N] );
imagesc(flipud(SP));
xlabel('v');
ylabel('u');
title('SP');
% Part 3: plot EOF's as a function of v
% and their amplitudes as a function of u
% plot EOF's
disp('EOFs');
figure(6);
clf;
for i=[1:P]
subplot(P,1,i);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
fvi=FP(i,:)';
axis( [vmin, vmax, -max(abs(fvi)), max(abs(fvi))])
plot( v, fvi, 'k-', 'LineWidth', 2);
xlabel('v');
ylabel(sprintf('fv%d',i));
end
% plot amplitudes
disp('ampltudes of EOFs');
figure(7);
clf;
for i=[1:P]
subplot(P,1,i);
hold on;
set(gca,'LineWidth',2);
set(gca,'FontSize',14);
cui=CP(:,i);
axis( [umin, umax, -max(abs(cui)), max(abs(cui))]);
plot( u, cui, 'k-', 'LineWidth', 2);
xlabel('u');
ylabel(sprintf('cu%d',i));
end
fprintf('Note that the factors do not match the original patterns,');
fprintf('although they do bear some resemblence to them.');
fprintf('That''s because any linear combination of factors will do.');
fprintf('Note also that fv4 and cu4 are very noisy. That''s because the');
fprintf('p=4 singular value is close to the noise level.');