Untitled

Live Script edama_12

This Live Script supports Chapter 12 of Environmental Data Analysis with MATLAB or PYTHON, Third Edition, by WIlliam Menke

The script naming system has the form edama_CC_SS, where CC is the chapter number and SS is the script number.

% edama_12_01, p.d.f. of square of a random variable

clear all;

fprintf('edama_12_01, p.d.f. of square of a random variable\n');

edama_12_01, p.d.f. of square of a random variable

% standard set-up of e-axis

L=101;

emin=0.01;

emax=3;

De=(emax-emin)/(L-1);

e=emin+De*[0:L-1]';

% Nornal p.d.f. for p(e), for e>=0

pe = sqrt(2/pi)*exp(-0.5*e.^2);

% integrate to get area under p(e) (should be 1)

Ae = De*sum(pe);

% stadard setup of E axis

Emin=0.01;

Emax=3;

DE=(Emax-Emin)/(L-1);

E=Emin+DE*[0:L-1]';

% formula for p(E)

pE = sqrt(1./(2*pi*E)).*exp(-0.5*E);

% integrate to get area under p(E) (should be 1)

AE = DE*sum(pE);

% plot

eda_draw(' ',pe,'caption p(e)',' ',pE,'caption p(E)');

fprintf('area under p.s.f.s e: %.2f, E: %.2f\n', Ae, AE);

area under p.s.f.s e: 1.00, E: 0.91

% edama_12_02: chi-squared p.d.f.

clear all;

fprintf('edama_12_02: chi-squared p.d.f.\n');

edama_12_02: chi-squared p.d.f.

% standard set-up of X2-axis

L=1001;

X2min=0;

X2max=10;

DX2=(X2max-X2min)/(L-1);

X2=X2min+DX2*[0:L-1]';

figure(1);

clf;

set(gca,'LineWidth',2);

hold on;

axis( [X2min, X2max, 0, 1] );

for N = [1:5] % loop over degrees of freedom N

pX2 = chi2pdf(X2,N); % chi-squared p.d.f.

plot(X2,pX2,'k-','LineWidth',2);

end

% edama_12_03: student t p.d.f.

clear all;

fprintf('edama_12_03: student t p.s.f.\n');

edama_12_03: student t p.s.f.

% standard set-up of t-axis

L=1001;

tmin=-5;

tmax=5;

Dt=(tmax-tmin)/(L-1);

t=tmin+Dt*[0:L-1]';

figure(1);

clf;

set(gca,'LineWidth',2);

hold on;

axis( [tmin, tmax, 0, 0.5] );

% plot p(t,N) for several values of N

for N = [1:5] % loop over degrees of freedom N

pt = tpdf(t,N);

plot(t,pt,'k-','LineWidth',2);

end

% edama_12_04: F p.d.f.

clear all;

fprintf('edama_12_04: F p.d.f.\n');

edama_12_04: F p.d.f.

% standard set-up of F-axis

L=1001;

Fmin=0;

Fmax=5;

DF=(Fmax-Fmin)/(L-1);

F=Fmin+DF*[0:L-1]';

figure(1);

clf;

set(gca,'LineWidth',2);

hold on;

axis( [Fmin, Fmax, 0, 1] );

i=1;

for M = [2, 5, 25, 50] % loop over degrees of freedom M

subplot(4,1,i);

i=i+1;

set(gca,'LineWidth',2);

hold on;

for N = [2, 5, 25, 50] % loop over degrees of freedom N

pF = fpdf(F,N,M);

plot(F,pF,'k-','LineWidth',2);

end

% edama_12_05: simulated particle size data example
% note: original filenames ../Data/testA.txt and ../Data/testB.txt
% changed to ../Data/mytestA.txt and ../Data/mytestB.txt
% so that this script will not overwrite the originals
clear all;
fprintf('edama_12_05: simulated particle size data example\n');
edama_12_05: simulated particle size data example
N=25; % number of data
d=100; % mean
s2dtrue=1; % variance
sdtrue=sqrt(s2dtrue);
% calibration test 1
dobs1=random('Normal',d,sdtrue,N,1);
dlmwrite('../Data/mytestA.txt',dobs1,'\t');
fprintf('wrote file ../Data/mytestA.txt\n');
wrote file ../Data/mytestA.txt
% calibration test 2
dobs2=random('Normal',d,sdtrue,N,1);
dlmwrite('../Data/mytestB.txt',dobs2,'\t');
fprintf('wrote file ../Data/mytestB.txt\n');
wrote file ../Data/mytestB.txt

% edama_12_06: Z and chi-squared tests of particle size data
clear all;
fprintf('edama_12_06: statistics of partical size data set\n');
edama_12_06: statistics of partical size data set
% Z distribution, two-sided test
Z=0.278;
Pi=normcdf(abs(Z),0,1)-normcdf(-abs(Z),0,1);
Po=1-(normcdf(abs(Z),0,1)-normcdf(-abs(Z),0,1));
fprintf('Z distribution, two sided test:\n');
Z distribution, two sided test:
fprintf('  The probability that Z is inside the interval %.4f to %.4f\n',-Z, Z);
  The probability that Z is inside the interval -0.2780 to 0.2780
fprintf('    is %.4f\n',Pi);
    is 0.2190
fprintf('  The probability that Z is outside the interval %.4f to %.4f\n',-Z, Z);
  The probability that Z is outside the interval -0.2780 to 0.2780
fprintf('    is %.4f\n',Po);
    is 0.7810
% Chi-squared distribution, one-sided test
X2=21.9;
N=25;
Pi=chi2cdf(X2,N);
Po=1-chi2cdf(X2,N);
fprintf('Chi-squared distribution with %d degrees of freedom:\n',N);
Chi-squared distribution with 25 degrees of freedom:
fprintf('   The Probability that X2 is inside the interval 0 to %.4f\n',X2);
   The Probability that X2 is inside the interval 0 to 21.9000
fprintf('     is %.4f\n', Pi);
     is 0.3585
fprintf('   The Probability that X2 is outside the interval 0 to %.4f\n',X2);
   The Probability that X2 is outside the interval 0 to 21.9000
fprintf('     is %.4f\n', Po);
     is 0.3585

% edama_12_07, statistical tests of particle size data
dobsA=load('../Data/testA.txt');
dtrueA = 100.0; % known size of calibrations standard
% calculate useful statistics
fprintf('file testA.txt\n');
file testA.txt
NA=length(dobsA);
fprintf('N %d\n', NA );
N 25
dbartrueA = dtrueA;
fprintf('true mean %.4f', dbartrueA );
true mean 100.0000
dbarestA  = sum(dobsA)/NA;
fprintf('estimated mean %.4f\n', dbarestA );
estimated mean 100.1610
sd2trueA=1;
fprintf('true variance %.4f\n', sd2trueA );
true variance 1.0000
sdtrueA=sqrt(sd2trueA);
fprintf('true root-variance %.4f\n', sdtrueA );
true root-variance 1.0000
sd2estA=((dobsA-dtrueA)'*(dobsA-dtrueA))/NA;
fprintf('estimated variance (given true data) %.4f\n', sd2estA );
estimated variance (given true data) 0.8964
sdestA=sqrt(sd2estA);
fprintf('estimated root-variance (given true data) %.4f\n', sdestA );
estimated root-variance (given true data) 0.9468
sd2estpA=((dobsA-dbarestA)'*(dobsA-dbarestA))/(NA-1);
fprintf('estimated variance (given estimated mean) %.4f\n', sd2estpA );
estimated variance (given estimated mean) 0.9068
sdestpA=sqrt(sd2estpA);
fprintf('estimated root-variance (given estimated mean) %.4f\n', sdestpA );
estimated root-variance (given estimated mean) 0.9522
ZA = (dbarestA-dbartrueA)/(sdtrueA/sqrt(NA));
fprintf('Z %.4f\n', ZA );
Z 0.8052
PZA = 1 - (normcdf(abs(ZA),0,1)-normcdf(-abs(ZA),0,1));
fprintf('Probability of |Z| exceeding %.4f is %.4f\n', abs(ZA), PZA );
Probability of |Z| exceeding 0.8052 is 0.4207
chi2A = ((dobsA-dtrueA)'/sdtrueA) * ((dobsA-dtrueA)/sdtrueA);
fprintf('chi-squared %.4f\n', chi2A );
chi-squared 22.4110
Pchi2A = 1 - chi2cdf(chi2A,NA);
fprintf('Probability of chi2 exceeding %.4f is %.4f\n', chi2A, Pchi2A );
Probability of chi2 exceeding 22.4110 is 0.6119
tA = (dbarestA-dbartrueA)/(sdestA/sqrt(NA));
fprintf('t %.4f\n', tA );
t 0.8504
PtA = 1 - (tcdf(abs(tA),NA)-tcdf(-abs(tA),NA));
fprintf('Probability of |t| exceeding %f is %.4f\n', abs(tA), PtA );
Probability of |t| exceeding 0.850439 is 0.4032
fprintf('\n');
% load second dataset
dobsB=load('../Data/testB.txt');
dtrueB=100; % known size of calibration standard
% calculate useful statistics
fprintf('file testB.txt');
file testB.txt
NB=length(dobsB);
fprintf('N %d', NB );
N 25
dbartrueB = dtrueB;
fprintf('true mean %.4f\n', dbartrueB );
true mean 100.0000
dbarestB  = sum(dobsB)/NB;
fprintf('estimated mean %.4f\n', dbarestB );
estimated mean 99.7177
sd2trueB=1;
fprintf('true variance %.4f\n', sd2trueB );
true variance 1.0000
sdtrueB=sqrt(sd2trueB);
fprintf('true root-variance %.4f\n', sdtrueB );
true root-variance 1.0000
sd2estB=((dobsB-dtrueB)'*(dobsB-dtrueB))/NB;
fprintf('estimated variance (given true data) %.4f\n', sd2estB );
estimated variance (given true data) 0.5854
sdestB=sqrt(sd2estB);
fprintf('estimated root-variance (given true data) %.4f\n', sdestB );
estimated root-variance (given true data) 0.7651
sd2estpB=((dobsB-dbarestB)'*(dobsB-dbarestB))/(NB-1);
fprintf('estimated variance (given estimated mean) %.4f\n', sd2estpB );
estimated variance (given estimated mean) 0.5267
sdestpB=sqrt(sd2estpB);
fprintf('estimated root-variance (given estimated mean) %.4f\n', sdestpB );
estimated root-variance (given estimated mean) 0.7258
ZB = (dbarestB-dbartrueB)/(sdtrueB/sqrt(NB));
fprintf('Z %.4f\n', ZB );
Z -1.4116
PZB = 1 - (normcdf(abs(ZB),0,1)-normcdf(-abs(ZB),0,1));
fprintf('Probability of |Z| exceeding %.4f is %.4f\n', abs(ZB), PZB );
Probability of |Z| exceeding 1.4116 is 0.1581
chi2B = ((dobsB-dtrueB)'/sdtrueB) * ((dobsB-dtrueB)/sdtrueB);
fprintf('chi-squared %.4f\n', chi2B );
chi-squared 14.6340
Pchi2B = 1 - chi2cdf(chi2B,NB);
fprintf('Probability of chi2 exceeding %.4f is %.4f\n', chi2B, Pchi2B );
Probability of chi2 exceeding 14.6340 is 0.9495
tB = (dbarestB-dbartrueB)/(sdestB/sqrt(NB));
fprintf('t %f', tB );
t -1.845018
PtB = 1 - (tcdf(abs(tB),NB)-tcdf(-abs(tB),NB));
fprintf('Probability of |t| exceeding %.4f is %.4f\n', abs(tB), PtB );
Probability of |t| exceeding 1.8450 is 0.0769
disp(' ');
 
% are the means of the two tests significantly different ?
fprintf('difference between two means with Z-test');
difference between two means with Z-test
A = dbarestA - dbarestB;
B = sqrt(sd2trueA/NA+sd2estpB/NB);
Z = A/B;
fprintf('Z %.4f\n', Z );
Z 1.7941
% are the means of the two tests significantly different ?
PZ = 1 - (normcdf(abs(Z),0,1)-normcdf(-abs(Z),0,1));
fprintf('Probability of |Z| exceeding %.4f is %.4f\n', abs(Z), PZ );
Probability of |Z| exceeding 1.7941 is 0.0728
disp(' ');
 
fprintf('difference between two means with t-test\n');
difference between two means with t-test
A = dbarestA - dbarestB;
B = sqrt(sd2estpA/NA+sd2estpB/NB);
t = A/B;
C = (sd2estpA/NA+sd2estpB/NB)^2;
D = ((sd2estpA/NA)^2)/(NA-1) + ((sd2estpB/NB)^2)/(NB-1);
M=round(C/D);
fprintf('t %.4f degrees of freedom %d\n', t, M );
t 1.8515 degrees of freedom 45
% are the means of the two tests significantly different ?
Pt = 1 - (tcdf(abs(t),M)-tcdf(-abs(t),M));
fprintf('Probability of |t| exceeding %.4f is %.4f\n', abs(t), Pt );
Probability of |t| exceeding 1.8515 is 0.0707
fprintf('\n');
% are the variances of the two tests significantly different ?
F = (chi2A/NA) / (chi2B/NB);
if( F<1 )
    F=1/F;
end
fprintf('1/F %.4f F %.4f\n', 1/F, F );
1/F 0.6530 F 1.5314
PF = 1 - (fcdf(F,NA,NB)-fcdf(1/F,NA,NB));
fprintf('Probability of F outside interval %.4f %.4f is %.4f\n', 1/F, F, PF );
Probability of F outside interval 0.6530 1.5314 is 0.2933

% edama_12_08: F-test to assess difference in fit

% illustration of F-test to assess difference in fit of two models

% using a small fragment of Black Rock Forest temperature data

clear all;

fprintf('edama_12_08: F-test to assess difference in fit\n');

edama_12_08: F-test to assess difference in fit

% load data from file

d = load('../Data/brf_fragment.txt');

N = length(d);

t = 10+[0:N-1]'; % hours from start of orignal record

figure(1);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

plot(t,d,'ko','LineWidth',2);

% fit 1, straight line

M=2;

G=zeros(N,M);

G(:,1)=1;

G(:,2)=t;

mestA=(G'*G)\(G'*d);

dpreA = G*mestA;

plot(t,dpreA,'k-','LineWidth',2);

title('linear fit');

xlabel('time t, hours');

ylabel('d(i)');

EA = (d-dpreA)'*(d-dpreA);

vA = N-M;

fprintf('linear error %.4f, degrees of freedom %d\n', EA, vA);

linear error 31.5222, degrees of freedom 49

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

plot(t,d,'ko','LineWidth',2);

% fit 2, cubic line

M=4;

G=zeros(N,M);

G(:,1)=1;

G(:,2)=t;

G(:,3)=t.*t;

G(:,4)=t.^3;

mestB=(G'*G)\(G'*d);

dpreB = G*mestB;

plot(t,dpreB,'k-','LineWidth',2);

title('cubic fit');

xlabel('time t, hours');

ylabel('d(i)');

EB = (d-dpreB)'*(d-dpreB);

vB = N-M;

fprintf('cubic error %.4f, degrees of freedom %d\n', EB, vB);

cubic error 27.1846, degrees of freedom 47

fprintf('improvement in error %.4f\n', (EA-EB)/EA);

improvement in error 0.1376

F = (EA/vA) / (EB/vB);

fprintf('1/F %.4f F %.4f\n', 1/F, F);

1/F 0.8991 F 1.1122

if( F<1 )

F=1/F

end

P = 1 - (fcdf(F,vA,vB)-fcdf(1/F,vA,vB));

fprintf('P(F<%.4f or F>%.4f) = %.4f\n', 1/F, F, P);

P(F<0.8991 or F>1.1122) = 0.7141

if( P>0.05)

fprintf('Null hypothesis cannot be excluded to 95% confidence\n')

else

fprintf('Null hypothesis can be excluded to 95% confidence\n')

end

Null hypothesis cannot be excluded to 95

% edama_12_09: variance of power spectral density (untapered)

% case of untapered random timeseries

clear all;

fprintf('edama_12_09: variance of power spectral density (untapered)\n');

edama_12_09: variance of power spectral density (untapered)

% create an uncorrelated random timeseries

N=2^10;

sd2true=64.0;

sdtrue=sqrt(sd2true);

d = random('Normal',0,sdtrue,N,1);

% generic time/frequency set up

Dt=0.025;

T = N*Dt;

t=Dt*[0:N-1]';

fmax=1/(2.0*Dt);

Df=fmax/(N/2);

f=Df*[0:N/2,-N/2+1:-1]';

Nf=N/2+1;

Dw=2*pi*Df;

w=Dw*[0:N/2,-N/2+1:-1]';

Nw=Nf;

% constant window function

W = ones(N,1);

d = W.*d;

% window changes variance of signal by ff

ff = sum(W.*W)/N;

% true and estimated variance of d

sd2est=std(d)^2;

fprintf('true data variance %.4f\n', ff*sd2true);

true data variance 64.0000

fprintf('estimated data variance %.4f\n', sd2est);

estimated data variance 62.2814

% compute power spectral density

fftraw = fft(d); % raw DFT output

dbar = Dt*fftraw(1:Nf); % Fourier transform, + freqs only

s2 = (2/T) * (abs(dbar).^2); % power spectral density

f=f(1:Nf);

w=w(1:Nf);

% statistics of power spectral density

s2meanest = mean(s2);

s2varest=std(s2)^2;

s2sigmaest=sqrt(s2varest);

sd2psd=Nf*Df*s2meanest;

fprintf('variance of data estimated from PSD %.4f\n', sd2psd);

variance of data estimated from PSD 62.4121

fprintf('\n');

% true mean and variance

p=2; % degrees of freedom

c = ( ff*sd2true ) / (2*Nf*Df); % normalization factor

s2meantrue = p*c;

s2vartrue = 2*p*c^2;

disp('power spectral density');

power spectral density

fprintf('true mean %.4f variance %.4f\n',s2meantrue,s2vartrue);

true mean 3.1938 variance 10.2001

fprintf('estimated mean %.4f variance %.4f\n',s2meanest,s2varest);

estimated mean 3.1145 variance 10.0289

fprintf('\n');

% plot data

figure(1)

clf;

set(gca,'LineWidth',2);

hold on;

plot(t,d,'k-','LineWidth',2);

plot([0, T]', [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );

plot([0, T]', [0, 0], 'k-', 'LineWidth', 1 );

plot([0, T]', [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );

% plot PSD

figure(2)

clf;

set(gca,'LineWidth',2);

hold on;

plot(f,s2,'k-','LineWidth',2);

% plot mean on spectra

plot([0, fmax], [s2meantrue, s2meantrue],'k-','LineWidth',2);

% histogram of values

Nhist=100;

[s2hist, bins] = hist(s2,Nhist);

% plot histogram

figure(3)

clf;

set(gca,'LineWidth',2);

hold on;

axis( [min(bins), max(bins), 0, max(s2hist)] );

plot(bins,s2hist,'k-','LineWidth',2);

plot([s2meantrue, s2meantrue], [0, max(s2hist)],'k-','LineWidth',2);

% plot true chi-squared distribution

Db=(bins(2)-bins(1))/c;

s2histtrue=Nf*Db*chi2pdf(bins/c,p);

plot(bins,s2histtrue,'k-','LineWidth',2);

% plot 95% confidence level on spectra

figure(2);

pv=0.95;

cv = chi2inv(0.95,p);

fprintf('%.4f confidence at %.4f',pv,cv*c);

0.9500 confidence at 9.5677

percent=100 * sum(s2hist(find(bins<=cv*c))) / sum(s2hist);

fprintf('%.4f percent of points below %.4f confidence\n',percent,pv);

95.9064 percent of points below 0.9500 confidence

disp(' ');

plot([f(1), f(Nf)], [cv*c, cv*c],'k-','LineWidth',2);

figure(3);

plot([cv*c, cv*c], [0, max(s2hist)],'k-','LineWidth',2);

% edama_12_10: variance of power spectral density (tapered)

% case of random timeseries tapered with Hamming window

clear all;

fprintf('edama_12_09: variance of power spectral density (tapered)\n');

edama_12_09: variance of power spectral density (tapered)

% create an uncorrelated random timeseries

N=2^10;

sd2true=64.0;

sdtrue=sqrt(sd2true);

d = random('Normal',0,sdtrue,N,1);

% generic time/frequency set up

Dt=0.025;

T = N*Dt;

t=Dt*[0:N-1]';

fmax=1/(2.0*Dt);

Df=fmax/(N/2);

f=Df*[0:N/2,-N/2+1:-1]';

Nf=N/2+1;

Dw=2*pi*Df;

w=Dw*[0:N/2,-N/2+1:-1]';

Nw=Nf;

% Hamming window function

W = 0.54 - 0.46*cos(2*pi*[0:N-1]'/(N-1));

d = W.*d;

% window changes variance of signal by ff

ff = sum(W.*W)/N;

% true and estimated variance of d

sd2est=std(d)^2;

fprintf('true data variance %.4f\n', ff*sd2true);

true data variance 25.4092

fprintf('estimated data variance %.4f\n', sd2est);

estimated data variance 28.2656

% compute power spectral density

fftraw = fft(d); % raw DFT output

dbar = Dt*fftraw(1:Nf); % Fourier transform, + freqs only

s2 = (2/T) * (abs(dbar).^2); % power spectral density

f=f(1:Nf);

w=w(1:Nf);

% statistics of power spectral density

s2meanest = mean(s2);

s2varest=std(s2)^2;

s2sigmaest=sqrt(s2varest);

sd2psd=Nf*Df*s2meanest;

fprintf('variance of data estimated from PSD %.4f', sd2psd);

variance of data estimated from PSD 28.2713

fprintf('\n');

% true mean and variance

p=2; % degrees of freedom

c = ( ff*sd2true ) / (2*Nf*Df); % normalization factor

s2meantrue = p*c;

s2vartrue = 2*p*c^2;

disp('power spectral density');

power spectral density

fprintf('true mean %.4f variance %.4f\n',s2meantrue,s2vartrue);

true mean 1.2680 variance 1.6078

fprintf('estimated mean %.4f variance %.4f\n',s2meanest,s2varest);

estimated mean 1.4108 variance 1.9641

disp(' ');

% plot data

figure(1)

clf;

set(gca,'LineWidth',2);

hold on;

plot(t,d,'k-','LineWidth',2);

plot([0, T]', [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );

plot([0, T]', [0, 0], 'k-', 'LineWidth', 1 );

plot([0, T]', [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );

% plot PSD

figure(2)

clf;

set(gca,'LineWidth',2);

hold on;

plot(f,s2,'k-','LineWidth',2);

% plot mean on spectra

plot([0, fmax], [s2meantrue, s2meantrue],'k-','LineWidth',2);

% histogram of values

Nhist=100;

[s2hist, bins] = hist(s2,Nhist);

% plot histogram

figure(3)

clf;

set(gca,'LineWidth',2);

hold on;

axis( [min(bins), max(bins), 0, max(s2hist)] );

plot(bins,s2hist,'k-','LineWidth',2);

plot([s2meantrue, s2meantrue], [0, max(s2hist)],'k-','LineWidth',2);

% plot true chi-squared distribution

Db=(bins(2)-bins(1))/c;

s2histtrue=Nf*Db*chi2pdf(bins/c,p);

plot(bins,s2histtrue,'k-','LineWidth',2);

% plot 95% confidence level on spectra

figure(2);

pv=0.95;

cv = chi2inv(0.95,p);

fprintf('%.4f confidence at %.4f\n',pv,cv*c);

0.9500 confidence at 3.7985

percent=100 * sum(s2hist(find(bins<=cv*c))) / sum(s2hist);

fprintf('%.4f percent of points below %.4f confidence\n',percent,pv);

92.9825 percent of points below 0.9500 confidence

fprintf('\n');

plot([f(1), f(Nf)], [cv*c, cv*c],'k-','LineWidth',2);

figure(3);

plot([cv*c, cv*c], [0, max(s2hist)],'k-','LineWidth',2);

% probability that PSD exceeds maximum observed value

speak = max(s2);

ppeak = chi2cdf(speak/c,p);

fprintf('largest spectral peak: %e\n',speak);

largest spectral peak: 9.602763e+00

fprintf('probability of largest peak: %.8f\n',ppeak);

probability of largest peak: 0.99948599

fprintf(' probability of PSD exceeding maximum observed value %.8f\n', 1-ppeak);

   probability of PSD exceeding maximum observed value 0.00051401

fprintf('Nf probability of PSD exceeding maximum observed value %.8f\n', 1-(ppeak^Nf));

Nf probability of PSD exceeding maximum observed value 0.23183836

% edama_12_11: confidence of spectral peak

% for random timeseries plus a small amplitude sinusoid

clear all;

fprintf('edama_12_11: confidence of spectral peak\n');

edama_12_11: confidence of spectral peak

% create an uncorrelated random timeseries

N=2^10;

sd2true=64.0;

sdtrue=sqrt(sd2true);

d = random('Normal',0,sdtrue,N,1);

% generic time set up

Dt=0.025;

T = N*Dt;

t=Dt*[0:N-1]';

% generic frequency set up

fmax=1/(2.0*Dt);

Df=fmax/(N/2);

f=Df*[0:N/2,-N/2+1:-1]';

Nf=N/2+1;

Dw=2*pi*Df;

w=Dw*[0:N/2,-N/2+1:-1]';

Nw=Nf;

% low amplitude periodic signal buried in the noise

d = d + 0.25*sdtrue*cos(2*pi*(fmax/4)*t);

% Hamming window function

W = 0.54 - 0.46*cos(2*pi*[0:N-1]'/(N-1));

d = W.*d;

% window changes variance of signal by ff

ff = sum(W.*W)/N;

% true and estimated variance of d

sd2est=std(d)^2;

fprintf('true data variance %.4f\n', ff*sd2true);

true data variance 25.4092

fprintf('estimated data variance %.4f\n', sd2est);

estimated data variance 27.1697

% compute power spectral density

fftraw = fft(d); % raw DFT output

dbar = Dt*fftraw(1:Nf); % Fourier transform, + freqs only

s2 = (2/T) * (abs(dbar).^2); % power spectral density

f=f(1:Nf);

w=w(1:Nf);

% statistics of power spectral density

s2meanest = mean(s2);

s2varest=std(s2)^2;

s2sigmaest=sqrt(s2varest);

sd2psd=Nf*Df*s2meanest;

fprintf('variance of data estimated from PSD %.4f\n', sd2psd);

variance of data estimated from PSD 27.1775

fprintf('\n');

% true mean and variance

p=2; % degrees of freedom

c = ( ff*sd2true ) / (2*Nf*Df); % normalization factor

s2meantrue = p*c;

s2vartrue = 2*p*c^2;

disp('power spectral density\n');

power spectral density\n

fprintf('true mean %.4f variance %.4f\n',s2meantrue,s2vartrue);

true mean 1.2680 variance 1.6078

fprintf('estimated mean %.4f variance %.4f\n',s2meanest,s2varest);

estimated mean 1.3562 variance 1.9392

fprintf('\n');

% plot data

figure(1)

clf;

set(gca,'LineWidth',2);

hold on;

plot(t,d,'k-','LineWidth',2);

plot([0, T]', [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );

plot([0, T]', [0, 0], 'k-', 'LineWidth', 1 );

plot([0, T]', [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );

% plot PSD

figure(2)

clf;

set(gca,'LineWidth',2);

hold on;

plot(f,s2,'k-','LineWidth',2);

% plot mean on spectra

plot([0, fmax], [s2meantrue, s2meantrue],'k-','LineWidth',2);

% histogram of values

Nhist=100;

[s2hist, bins] = hist(s2,Nhist);

% plot histogram

figure(3)

clf;

set(gca,'LineWidth',2);

hold on;

axis( [min(bins), max(bins), 0, max(s2hist)] );

plot(bins,s2hist,'k-','LineWidth',2);

plot([s2meantrue, s2meantrue], [0, max(s2hist)],'k-','LineWidth',2);

% plot true chi-squared distribution

Db=(bins(2)-bins(1))/c;

s2histtrue=Nf*Db*chi2pdf(bins/c,p);

plot(bins,s2histtrue,'k-','LineWidth',2);

% plot 95% confidence level on spectra

figure(2);

pv=0.95;

cv = chi2inv(0.95,p);

fprintf('%f confidence at %.4f\n',pv,cv*c);

0.950000 confidence at 3.7985

percent=100 * sum(s2hist(find(bins<=cv*c))) / sum(s2hist);

fprintf('%f percent of points below %.4f confidence\n',percent,pv);

93.762183 percent of points below 0.9500 confidence

fprintf('\n');

plot([f(1), f(Nf)], [cv*c, cv*c],'k-','LineWidth',2);

figure(3);

plot([cv*c, cv*c], [0, max(s2hist)],'k-','LineWidth',2);

% probability that PSD exceeds maximum observed value

speak = max(s2);

ppeak = chi2cdf(speak/c,p);

fprintf('amplitude of largest peak %e\n', speak);

amplitude of largest peak 1.110185e+01

fprintf('probability of largest peak %.8f\n', ppeak);_

probability of largest peak 0.99984241

probability of PSD exceeding maximum observed value 0.00015759

Nf probability of PSD exceeding maximum observed value 0.07766711

edama_12_12, p.s.d. of AR1 process

Dt=0.6;

T = N*Dt;

t=Dt*[0:N-1]';

% standard setup of frequency axis, f

fmax=1/(2.0*Dt);

Df=fmax/(N/2);

f=Df*[0:N/2,-N/2+1:-1]';

Nf=N/2+1;

Dw=2*pi*Df;

w=Dw*[0:N/2,-N/2+1:-1]';

Nw=Nf;

% delete nonnegative frequencies from axes

f=f(1:Nf);

w=w(1:Nf);

% plot of spectra with different beta's

betalist = [0.25, 0.5, 0.75]';

fprintf('p.s.d. of AR1 process with these betas');

p.s.d. of AR1 process with these betas

betalist'

ans =

    0.2500    0.5000    0.7500

figure(1);

clf;

hold on;

axis( [0, fmax, 0, 1/(1+(betalist(end)^2)-2*betalist(end)) ] );

for beta=betalist'

denom = (1 + (beta^2) - 2*beta*cos(2*pi*0.5*f/fmax) );

plot( f, 1./denom, 'k-', 'LineWidth', 2 );

end

xlabel('f');

ylabel('|u(f)|^2');

% edama_12_13, 95% confidence limit of AR1 process

clear all;

fprintf('edama_12_13, 95%% confidence limit of AR1 process\n');

edama_12_13, 95% confidence limit of AR1 process

% tunable constants

beta = 0.60; % beta of AR1 process

sigma = 100; % sigma of x

N=1024;

tmaxplot=120;

ADDSINUSOID=0; % add sinusoid to random timeseries on 1

EXACT95=0; % Exact confidence on 1, (mean+2sigma) approx on 0

% standard set up a time axis, t

Dt=0.6;

T = N*Dt;

t=Dt*[0:N-1]';

% standard setup of frequency axis, f

fmax=1/(2.0*Dt);

Df=fmax/(N/2);

f=Df*[0:N/2,-N/2+1:-1]';

Nf=N/2+1;

Dw=2*pi*Df;

w=Dw*[0:N/2,-N/2+1:-1]';

Nw=Nf;

% uncorrelated random time series x(t)

x = random('Normal',0,sigma,N,1);

% AR1 process y(t)

y = zeros(N,1);

y(1) = x(1);

for i=[2:N]

y(i) = beta*y(i-1) + x(i);

end

if( ADDSINUSOID )

sigmay2 = var(y);

sigmay = sqrt(sigmay2);

Ac=0.25*sigmay;

fc=fmax/4;

y = y + Ac*cos(2*pi*fc*t);

fprintf('A small amplitude sinusoid of amplitude %.2f and frequency %.2f has been added\n',Ac,fc)

end

% plot entire time series

fprintf('plot time series x(t) and y(t)\n');

plot time series x(t) and y(t)

figure(1);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

axis( [0, Dt*(N-1), min(x), max(x) ] );

plot( t, x, 'k-', 'LineWidth', 2 );

xlabel('t');

ylabel('x(t)');

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

axis( [0, Dt*(N-1), min(y), max(y) ] );

plot( t, y, 'k-', 'LineWidth', 2 );

xlabel('t');

ylabel('y(t)');

% plot enlarged portion of time series

fprintf('plot enlarged portions of time series x(t) and y(t)\n');

plot enlarged portions of time series x(t) and y(t)

figure(2);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

axis( [0, tmaxplot, min(x), max(x) ] );

plot( t, x, 'k-', 'LineWidth', 2 );

xlabel('t');

ylabel('x(t)');

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

axis( [0, tmaxplot, min(y), max(y) ] );

plot( t, y, 'k-', 'LineWidth', 2 );

xlabel('t');

ylabel('y(t)');

% compute power spectral density

xfftraw = fft(x); % raw DFT output

xbar = Dt*xfftraw(1:Nf); % Fourier transform, + freqs only

xs2 = (2/T) * (abs(xbar).^2); % power spectral density

xs2=xs2(1:Nf);

yfftraw = fft(y); % raw DFT output

ybar = Dt*yfftraw(1:Nf); % Fourier transform, + freqs only

ys2 = (2/T) * (abs(ybar).^2); % power spectral density

% delete nonnegative frequencies from axes

f=f(1:Nf);

w=w(1:Nf);

% plot of spectra

fprintf('plot power spretral density, |x(f)|^2 and |y(f)|^2\n');

plot power spretral density, |x(f)|^2 and |y(f)|^2

figure(3);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

axis( [0, max(f), 0, max(xs2) ] );

plot( f, xs2, 'k-', 'LineWidth', 2 );

xlabel('f');

ylabel('|x(f)|^2');

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

axis( [0, max(f), 0, max(ys2) ] );

plot( f, ys2, 'k-', 'LineWidth', 2 );

xlabel('f');

ylabel('|y(f)|^2');

% plot of spectra, onto which other stiff will be overlain

fprintf('plot power spretral density, |x(f)|^2 and |y(f)|^2\n');

plot power spretral density, |x(f)|^2 and |y(f)|^2

fprintf('with other stuff overlain\n');

with other stuff overlain

figure(4);

clf;

subplot(2,1,1);

set(gca,'LineWidth',2);

hold on;

axis( [0, max(f), 0, max(xs2) ] );

plot( f, xs2, 'k-', 'LineWidth', 2 );

xlabel('f');

ylabel('|x(f)|^2');

subplot(2,1,2);

set(gca,'LineWidth',2);

hold on;

axis( [0, max(f), 0, max(ys2) ] );

plot( f, ys2, 'k-', 'LineWidth', 2 );

xlabel('f');

ylabel('|y(f)|^2');

Nr = 1000;

Nr95 = 950;

Nr50 = 500;

S = zeros(Nf, Nr );

% repeat for Nr realizations

for ir=[1:Nr]

% uncorrelated time series

x = random('Normal',0,sigma,N,1);

% build AR1 time sereis

y = zeros(N,1);

y(1) = x(1);

for i=[2:N]

y(i) = beta*y(i-1) + x(i);

end

yfftraw = fft(y); % FFT of y

ybar = Dt*yfftraw(1:Nf); % FT of nonnegative f's

ys2 = (2/T) * (abs(ybar).^2); % p.s.d.

S(:,ir)=ys2(:); % save p.s.d.

end

% statistics of the ensemble of p.s.d.'s

S95 = zeros(Nf,1); % 95 percent level

Smean = zeros(Nf,1); % mean

for ifr=[1:Nf] % loop over frequencies

v = S(ifr,:)'; % the p.s.d. at one frequency

v = sort(v); % sort the values

S95(ifr)=v(Nr95); % 95% less than this value

Smean(ifr)=mean(v); % mean

end

plot( f, S95, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );

plot( f, Smean, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );

% theoretical spectrum

p=2; % degrees of freedom

% in this code, the window function is just w(i)=1

ff = 1; % variance of window function

c = ( ff*(sigma^2) ) / (2*Nf*Df); % scaling constant

xs2meantrue = p*c; % mean of p.s.d. of x

xs2vartrue = 2*p*c^2; % variance of p.s.d. of x

fprintf('scaling constant c: %e\n',c);

scaling constant c: 5.988304e+03

fprintf('pc: %e\n',xs2meantrue);

pc: 1.197661e+04

fprintf('2pc^2: %e\n',xs2vartrue);

2pc^2: 1.434391e+08

% the AR1 filter v(f)

denom = (1 + (beta^2) - 2*beta*cos(pi*f/fmax) );

% mean of p.s.d. of y

ys2mean = xs2meantrue ./ denom;

% variance of p.s.d. of y

if( EXACT95 )

C95 = chi2inv(0.95,p);

ys295 = C95*(c ./ denom);

fprintf('Exact 95%% confidence level used\n');

else

ys295 = ys2mean + 2*sqrt(xs2vartrue) ./ denom;

fprintf('Approximate 95%% confidence level used\n');

end

Approximate 95% confidence level used

plot( f, ys2mean, 'k--', 'LineWidth', 2 );

plot( f, ys295, 'k-', 'LineWidth', 2 );

% edama_12_14: bootstrap statistics of slope

% in least squares fit of a straight line to a small

% fragment of Black Rock Forest temperature data

clear all;

fprintf('edama_12_14: bootstrap statistics of slope\n');

edama_12_14: bootstrap statistics of slope

% load data and set up time axis

dorig = load('../Data/brf_fragment.txt');

N = length(dorig);

Dt=1.0;

torig = 10*Dt+Dt*[0:N-1]'; % hours from start of orignal record

% least-squares slope and its variance of original data

M=2;

G=zeros(N,M);

G(:,1)=1;

G(:,2)=torig;

mest=(G'*G)\(G'*dorig);

slopeorig=mest(2);

% usual error estimate based on propagagtion

% of posterior estimate of sigma2 of data into

% a sigma2 of slope

e = dorig-G*mest;

E = e'*e;

sigma2dorig = E/N;

Cmorig = sigma2dorig * inv(G'*G);

sigma2morig = Cmorig(2,2);

% number of realizations;

Nr = 10000;

slope = zeros(Nr,1);

for p = [1:Nr] % loop over realizations

% resample

rowindex=unidrnd(N,N,1);

t = torig(rowindex);

d = dorig(rowindex);

% straight line fit

M=2;

G=zeros(N,M);

G(:,1)=1;

G(:,2)=t;

mest=(G'*G)\(G'*d);

slope(p)=mest(2);

end

% compute histogram of slopes and normalize

% normalize to a probability density distribution

% with Dt*sum(pbootstrap)=1;

Nbins = 100;

[shist, bins]=hist(slope, Nbins);

Db = bins(2)-bins(1);

pbootstrap = shist / (Db*sum(shist));

% compute Normal distribution of slopes

% using parameters estimated from original data

pnormal = (1/sqrt(2*pi*sigma2morig)) * exp(-((bins-slopeorig).^2)/(2*sigma2morig));

figure(1);

clf;

set(gca,'LineWidth',2);

hold on;

axis( [bins(1), bins(Nbins), 0, 1.2*max(pbootstrap)]);

plot(bins,pbootstrap,'k-','LineWidth',2);

plot(bins,pnormal,'k-','LineWidth',2);

xlabel('slope, b');

ylabel('p(b)');

% mean (expectation) and variance of bootstrap distibution

mb = Db*sum(bins.*pbootstrap);

vb = Db*sum(((bins-mb).^2).*pbootstrap);

% 5% and 95% confidence of bootstrap distibution

Pbootstrap = Db*cumsum(pbootstrap);

ilo = find(Pbootstrap>=0.025,1);

ihi = find(Pbootstrap>=0.975,1);

blo = bins(ilo);

bhi = bins(ihi);

fprintf('standard method: slope %.4f +/- %.4f (2 sigma)\n', slopeorig, 2*sqrt(sigma2morig));

standard method:   slope 0.5252 +/- 0.0150 (2 sigma)

fprintf('bootstrap method: slope %.4f +/- %.4f (2 sigma)\n', mb, 2*sqrt(vb));

bootstrap method:  slope 0.5250 +/- 0.0173 (2 sigma)

fprintf('bootstrap method: slope between %.4f and %.4f (95 percent confidence)\n', blo, bhi);

bootstrap method:  slope between 0.5079 and 0.5424 (95 percent confidence)

% edama_12_15: bootstrap statistics using Atlantic Rock data set

% Atlantic Rocks dataset

% bootstrap error analysis for

% CaO/Na2O ratio of varimax factor 2

% note CaO/Na2O are elements 6/7

clear all;

fprintf('eda12_15: bootstrap statistics using Atlantic Rock data set\n');

eda12_15: bootstrap statistics using Atlantic Rock data set

% load data

Draw = load('../Data/rocks.txt');

% 1 SiO2

% 2 TiO2

% 3 Al203

% 4 FeO-total

% 5 MgO

% 6 CaO

% 7 Na2O

% 8 K2O

Ns = size(Draw);

N = Ns(1);

M = Ns(2);

% number of realizations

Nr=1000;

ratio = zeros(Nr,1);

for ir = [1:Nr]

% randomply sample Draw

D = zeros(N,M);

D = Draw(unidrnd(N,N,1),:);

% compute factors and factor loadings using singular value decompostion

[U, S, V] = svd(D,0);

% keep only first five singular values

P=5;

F = V(:,1:P)';

C = U(:,1:P)*S(1:P,1:P);

% initial rotated factor matrix, FP, and rotation matrix, MR

MR=eye(P,P);

FP=F;

% spike these factors using the varimax procedure

k = [2, 3, 4, 5]';

Nk = length(k);

for iter = [1:3]

for ii = [1:Nk]

for jj = [ii+1:Nk]

% spike factors i and j

i=k(ii);

j=k(jj);

% copy factors from matrix to vectors

fA = FP(i,:)';

fB = FP(j,:)';

% standard varimax procedure to determine rotation angle q

u = fA.^2 - fB.^2;

v = 2* fA .* fB;

A = 2*M*u'*v;

B = sum(u)*sum(v);

top = A - B;

C = M*(u'*u-v'*v);

D = (sum(u)^2)-(sum(v)^2);

bot = C - D;

q = 0.25 * atan2(top,bot);

cq = cos(q);

sq = sin(q);

fAp = cq*fA + sq*fB;

fBp = -sq*fA + cq*fB;

% put back into factor matrix, FP

FP(i,:) = fAp';

FP(j,:) = fBp';

% accumulate rotation

mA = MR(i,:)';

mB = MR(j,:)';

mAp = cq*mA + sq*mB;

mBp = -sq*mA + cq*mB;

MR(i,:) = mAp';

MR(j,:) = mBp';

end

% new factor loadings

CP=C*MR';

% ratio of elemnts 6 and 7 of factor 2

ratio(ir)=FP(2,6)/FP(2,7);

end

% compute histogram, convert to probability density distribution

Nb=100;

[rhist,bins] = hist(ratio,Nb);

Db=bins(2)-bins(1);

pbootstrap=rhist/(Db*sum(rhist));

% mean, variance

mr = Db * sum( bins .* pbootstrap );

vr = Db * sum( ((bins-mr).^2) .* pbootstrap );

disp('CaO/Na2O ratio of Factor 2');

CaO/Na2O ratio of Factor 2

fprintf('mean %f variance %.4f\n',mr,vr);

mean 0.485850 variance 0.0002

% confidence intervals

Pbootstrap=Db*cumsum(pbootstrap);

ilo = find( Pbootstrap>=0.025, 1 );

ihi = find( Pbootstrap>=0.975, 1 );

rlo = bins(ilo);

rhi = bins(ihi);

fprintf('%.4f < ratio < %.4f (95 percent confidence)\n',rlo,rhi);

0.4591 < ratio < 0.5128 (95 percent confidence)

figure(1);

clf;

set(gca,'LineWidth',2);

hold on;

axis( [bins(1), bins(Nb), 0, max(pbootstrap) ] );

plot( bins, pbootstrap, 'k-', 'LineWidth', 2);

plot( [mr, mr], [0, 0.05*max(pbootstrap)], 'k-', 'LineWidth', 2);

plot( [rhi, rhi], [0, 0.05*max(pbootstrap)], 'k-', 'LineWidth', 2);

plot( [rlo, rlo], [0, 0.05*max(pbootstrap)], 'k-', 'LineWidth', 2);

xlabel('CaO/Na2O ratio, r');

ylabel('p(r)');