Live Script edama_12
This Live Script supports Chapter 12 of Environmental Data Analysis with MATLAB or PYTHON, Third Edition, by WIlliam Menke
The script naming system has the form edama_CC_SS, where CC is the chapter number and SS is the script number.
% edama_12_01, p.d.f. of square of a random variable
clear all;
fprintf('edama_12_01, p.d.f. of square of a random variable\n');
edama_12_01, p.d.f. of square of a random variable
% standard set-up of e-axis
L=101;
emin=0.01;
emax=3;
De=(emax-emin)/(L-1);
e=emin+De*[0:L-1]';
% Nornal p.d.f. for p(e), for e>=0
pe = sqrt(2/pi)*exp(-0.5*e.^2);
% integrate to get area under p(e) (should be 1)
Ae = De*sum(pe);
% stadard setup of E axis
Emin=0.01;
Emax=3;
DE=(Emax-Emin)/(L-1);
E=Emin+DE*[0:L-1]';
% formula for p(E)
pE = sqrt(1./(2*pi*E)).*exp(-0.5*E);
% integrate to get area under p(E) (should be 1)
AE = DE*sum(pE);
% plot
eda_draw(' ',pe,'caption p(e)',' ',pE,'caption p(E)');
fprintf('area under p.s.f.s e: %.2f, E: %.2f\n', Ae, AE);
area under p.s.f.s e: 1.00, E: 0.91
% edama_12_02: chi-squared p.d.f.
clear all;
fprintf('edama_12_02: chi-squared p.d.f.\n');
edama_12_02: chi-squared p.d.f.
% standard set-up of X2-axis
L=1001;
X2min=0;
X2max=10;
DX2=(X2max-X2min)/(L-1);
X2=X2min+DX2*[0:L-1]';
figure(1);
clf;
set(gca,'LineWidth',2);
hold on;
axis( [X2min, X2max, 0, 1] );
for N = [1:5] % loop over degrees of freedom N
pX2 = chi2pdf(X2,N); % chi-squared p.d.f.
plot(X2,pX2,'k-','LineWidth',2);
end
% edama_12_03: student t p.d.f.
clear all;
fprintf('edama_12_03: student t p.s.f.\n');
edama_12_03: student t p.s.f.
% standard set-up of t-axis
L=1001;
tmin=-5;
tmax=5;
Dt=(tmax-tmin)/(L-1);
t=tmin+Dt*[0:L-1]';
figure(1);
clf;
set(gca,'LineWidth',2);
hold on;
axis( [tmin, tmax, 0, 0.5] );
% plot p(t,N) for several values of N
for N = [1:5] % loop over degrees of freedom N
pt = tpdf(t,N);
plot(t,pt,'k-','LineWidth',2);
end
% edama_12_04: F p.d.f.
clear all;
fprintf('edama_12_04: F p.d.f.\n');
edama_12_04: F p.d.f.
% standard set-up of F-axis
L=1001;
Fmin=0;
Fmax=5;
DF=(Fmax-Fmin)/(L-1);
F=Fmin+DF*[0:L-1]';
figure(1);
clf;
set(gca,'LineWidth',2);
hold on;
axis( [Fmin, Fmax, 0, 1] );
i=1;
for M = [2, 5, 25, 50] % loop over degrees of freedom M
subplot(4,1,i);
i=i+1;
set(gca,'LineWidth',2);
hold on;
for N = [2, 5, 25, 50] % loop over degrees of freedom N
pF = fpdf(F,N,M);
plot(F,pF,'k-','LineWidth',2);
end
end
% edama_12_05: simulated particle size data example
% note: original filenames ../Data/testA.txt and ../Data/testB.txt
% changed to ../Data/mytestA.txt and ../Data/mytestB.txt
% so that this script will not overwrite the originals
clear all;
fprintf('edama_12_05: simulated particle size data example\n');
edama_12_05: simulated particle size data example
N=25; % number of data
d=100; % mean
s2dtrue=1; % variance
sdtrue=sqrt(s2dtrue);
% calibration test 1
dobs1=random('Normal',d,sdtrue,N,1);
dlmwrite('../Data/mytestA.txt',dobs1,'\t');
fprintf('wrote file ../Data/mytestA.txt\n');
wrote file ../Data/mytestA.txt
% calibration test 2
dobs2=random('Normal',d,sdtrue,N,1);
dlmwrite('../Data/mytestB.txt',dobs2,'\t');
fprintf('wrote file ../Data/mytestB.txt\n');
wrote file ../Data/mytestB.txt
% edama_12_06: Z and chi-squared tests of particle size data
clear all;
fprintf('edama_12_06: statistics of partical size data set\n');
edama_12_06: statistics of partical size data set
% Z distribution, two-sided test
Z=0.278;
Pi=normcdf(abs(Z),0,1)-normcdf(-abs(Z),0,1);
Po=1-(normcdf(abs(Z),0,1)-normcdf(-abs(Z),0,1));
fprintf('Z distribution, two sided test:\n');
Z distribution, two sided test:
fprintf(' The probability that Z is inside the interval %.4f to %.4f\n',-Z, Z);
The probability that Z is inside the interval -0.2780 to 0.2780
fprintf(' is %.4f\n',Pi);
is 0.2190
fprintf(' The probability that Z is outside the interval %.4f to %.4f\n',-Z, Z);
The probability that Z is outside the interval -0.2780 to 0.2780
fprintf(' is %.4f\n',Po);
is 0.7810
% Chi-squared distribution, one-sided test
X2=21.9;
N=25;
Pi=chi2cdf(X2,N);
Po=1-chi2cdf(X2,N);
fprintf('Chi-squared distribution with %d degrees of freedom:\n',N);
Chi-squared distribution with 25 degrees of freedom:
fprintf(' The Probability that X2 is inside the interval 0 to %.4f\n',X2);
The Probability that X2 is inside the interval 0 to 21.9000
fprintf(' is %.4f\n', Pi);
is 0.3585
fprintf(' The Probability that X2 is outside the interval 0 to %.4f\n',X2);
The Probability that X2 is outside the interval 0 to 21.9000
fprintf(' is %.4f\n', Po);
is 0.3585
% edama_12_07, statistical tests of particle size data
dobsA=load('../Data/testA.txt');
dtrueA = 100.0; % known size of calibrations standard
% calculate useful statistics
fprintf('file testA.txt\n');
file testA.txt
NA=length(dobsA);
fprintf('N %d\n', NA );
N 25
dbartrueA = dtrueA;
fprintf('true mean %.4f', dbartrueA );
true mean 100.0000
dbarestA = sum(dobsA)/NA;
fprintf('estimated mean %.4f\n', dbarestA );
estimated mean 100.1610
sd2trueA=1;
fprintf('true variance %.4f\n', sd2trueA );
true variance 1.0000
sdtrueA=sqrt(sd2trueA);
fprintf('true root-variance %.4f\n', sdtrueA );
true root-variance 1.0000
sd2estA=((dobsA-dtrueA)'*(dobsA-dtrueA))/NA;
fprintf('estimated variance (given true data) %.4f\n', sd2estA );
estimated variance (given true data) 0.8964
sdestA=sqrt(sd2estA);
fprintf('estimated root-variance (given true data) %.4f\n', sdestA );
estimated root-variance (given true data) 0.9468
sd2estpA=((dobsA-dbarestA)'*(dobsA-dbarestA))/(NA-1);
fprintf('estimated variance (given estimated mean) %.4f\n', sd2estpA );
estimated variance (given estimated mean) 0.9068
sdestpA=sqrt(sd2estpA);
fprintf('estimated root-variance (given estimated mean) %.4f\n', sdestpA );
estimated root-variance (given estimated mean) 0.9522
ZA = (dbarestA-dbartrueA)/(sdtrueA/sqrt(NA));
fprintf('Z %.4f\n', ZA );
Z 0.8052
PZA = 1 - (normcdf(abs(ZA),0,1)-normcdf(-abs(ZA),0,1));
fprintf('Probability of |Z| exceeding %.4f is %.4f\n', abs(ZA), PZA );
Probability of |Z| exceeding 0.8052 is 0.4207
chi2A = ((dobsA-dtrueA)'/sdtrueA) * ((dobsA-dtrueA)/sdtrueA);
fprintf('chi-squared %.4f\n', chi2A );
chi-squared 22.4110
Pchi2A = 1 - chi2cdf(chi2A,NA);
fprintf('Probability of chi2 exceeding %.4f is %.4f\n', chi2A, Pchi2A );
Probability of chi2 exceeding 22.4110 is 0.6119
tA = (dbarestA-dbartrueA)/(sdestA/sqrt(NA));
fprintf('t %.4f\n', tA );
t 0.8504
PtA = 1 - (tcdf(abs(tA),NA)-tcdf(-abs(tA),NA));
fprintf('Probability of |t| exceeding %f is %.4f\n', abs(tA), PtA );
Probability of |t| exceeding 0.850439 is 0.4032
fprintf('\n');
% load second dataset
dobsB=load('../Data/testB.txt');
dtrueB=100; % known size of calibration standard
% calculate useful statistics
fprintf('file testB.txt');
file testB.txt
NB=length(dobsB);
fprintf('N %d', NB );
N 25
dbartrueB = dtrueB;
fprintf('true mean %.4f\n', dbartrueB );
true mean 100.0000
dbarestB = sum(dobsB)/NB;
fprintf('estimated mean %.4f\n', dbarestB );
estimated mean 99.7177
sd2trueB=1;
fprintf('true variance %.4f\n', sd2trueB );
true variance 1.0000
sdtrueB=sqrt(sd2trueB);
fprintf('true root-variance %.4f\n', sdtrueB );
true root-variance 1.0000
sd2estB=((dobsB-dtrueB)'*(dobsB-dtrueB))/NB;
fprintf('estimated variance (given true data) %.4f\n', sd2estB );
estimated variance (given true data) 0.5854
sdestB=sqrt(sd2estB);
fprintf('estimated root-variance (given true data) %.4f\n', sdestB );
estimated root-variance (given true data) 0.7651
sd2estpB=((dobsB-dbarestB)'*(dobsB-dbarestB))/(NB-1);
fprintf('estimated variance (given estimated mean) %.4f\n', sd2estpB );
estimated variance (given estimated mean) 0.5267
sdestpB=sqrt(sd2estpB);
fprintf('estimated root-variance (given estimated mean) %.4f\n', sdestpB );
estimated root-variance (given estimated mean) 0.7258
ZB = (dbarestB-dbartrueB)/(sdtrueB/sqrt(NB));
fprintf('Z %.4f\n', ZB );
Z -1.4116
PZB = 1 - (normcdf(abs(ZB),0,1)-normcdf(-abs(ZB),0,1));
fprintf('Probability of |Z| exceeding %.4f is %.4f\n', abs(ZB), PZB );
Probability of |Z| exceeding 1.4116 is 0.1581
chi2B = ((dobsB-dtrueB)'/sdtrueB) * ((dobsB-dtrueB)/sdtrueB);
fprintf('chi-squared %.4f\n', chi2B );
chi-squared 14.6340
Pchi2B = 1 - chi2cdf(chi2B,NB);
fprintf('Probability of chi2 exceeding %.4f is %.4f\n', chi2B, Pchi2B );
Probability of chi2 exceeding 14.6340 is 0.9495
tB = (dbarestB-dbartrueB)/(sdestB/sqrt(NB));
fprintf('t %f', tB );
t -1.845018
PtB = 1 - (tcdf(abs(tB),NB)-tcdf(-abs(tB),NB));
fprintf('Probability of |t| exceeding %.4f is %.4f\n', abs(tB), PtB );
Probability of |t| exceeding 1.8450 is 0.0769
disp(' ');
% are the means of the two tests significantly different ?
fprintf('difference between two means with Z-test');
difference between two means with Z-test
A = dbarestA - dbarestB;
B = sqrt(sd2trueA/NA+sd2estpB/NB);
Z = A/B;
fprintf('Z %.4f\n', Z );
Z 1.7941
% are the means of the two tests significantly different ?
PZ = 1 - (normcdf(abs(Z),0,1)-normcdf(-abs(Z),0,1));
fprintf('Probability of |Z| exceeding %.4f is %.4f\n', abs(Z), PZ );
Probability of |Z| exceeding 1.7941 is 0.0728
disp(' ');
fprintf('difference between two means with t-test\n');
difference between two means with t-test
A = dbarestA - dbarestB;
B = sqrt(sd2estpA/NA+sd2estpB/NB);
t = A/B;
C = (sd2estpA/NA+sd2estpB/NB)^2;
D = ((sd2estpA/NA)^2)/(NA-1) + ((sd2estpB/NB)^2)/(NB-1);
M=round(C/D);
fprintf('t %.4f degrees of freedom %d\n', t, M );
t 1.8515 degrees of freedom 45
% are the means of the two tests significantly different ?
Pt = 1 - (tcdf(abs(t),M)-tcdf(-abs(t),M));
fprintf('Probability of |t| exceeding %.4f is %.4f\n', abs(t), Pt );
Probability of |t| exceeding 1.8515 is 0.0707
fprintf('\n');
% are the variances of the two tests significantly different ?
F = (chi2A/NA) / (chi2B/NB);
if( F<1 )
F=1/F;
end
fprintf('1/F %.4f F %.4f\n', 1/F, F );
1/F 0.6530 F 1.5314
PF = 1 - (fcdf(F,NA,NB)-fcdf(1/F,NA,NB));
fprintf('Probability of F outside interval %.4f %.4f is %.4f\n', 1/F, F, PF );
Probability of F outside interval 0.6530 1.5314 is 0.2933
% edama_12_08: F-test to assess difference in fit
% illustration of F-test to assess difference in fit of two models
% using a small fragment of Black Rock Forest temperature data
clear all;
fprintf('edama_12_08: F-test to assess difference in fit\n');
edama_12_08: F-test to assess difference in fit
% load data from file
d = load('../Data/brf_fragment.txt');
N = length(d);
t = 10+[0:N-1]'; % hours from start of orignal record
figure(1);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
plot(t,d,'ko','LineWidth',2);
% fit 1, straight line
M=2;
G=zeros(N,M);
G(:,1)=1;
G(:,2)=t;
mestA=(G'*G)\(G'*d);
dpreA = G*mestA;
plot(t,dpreA,'k-','LineWidth',2);
title('linear fit');
xlabel('time t, hours');
ylabel('d(i)');
EA = (d-dpreA)'*(d-dpreA);
vA = N-M;
fprintf('linear error %.4f, degrees of freedom %d\n', EA, vA);
linear error 31.5222, degrees of freedom 49
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
plot(t,d,'ko','LineWidth',2);
% fit 2, cubic line
M=4;
G=zeros(N,M);
G(:,1)=1;
G(:,2)=t;
G(:,3)=t.*t;
G(:,4)=t.^3;
mestB=(G'*G)\(G'*d);
dpreB = G*mestB;
plot(t,dpreB,'k-','LineWidth',2);
title('cubic fit');
xlabel('time t, hours');
ylabel('d(i)');
EB = (d-dpreB)'*(d-dpreB);
vB = N-M;
fprintf('cubic error %.4f, degrees of freedom %d\n', EB, vB);
cubic error 27.1846, degrees of freedom 47
fprintf('improvement in error %.4f\n', (EA-EB)/EA);
improvement in error 0.1376
F = (EA/vA) / (EB/vB);
fprintf('1/F %.4f F %.4f\n', 1/F, F);
1/F 0.8991 F 1.1122
if( F<1 )
F=1/F
end
P = 1 - (fcdf(F,vA,vB)-fcdf(1/F,vA,vB));
fprintf('P(F<%.4f or F>%.4f) = %.4f\n', 1/F, F, P);
P(F<0.8991 or F>1.1122) = 0.7141
if( P>0.05)
fprintf('Null hypothesis cannot be excluded to 95% confidence\n')
else
fprintf('Null hypothesis can be excluded to 95% confidence\n')
end
Null hypothesis cannot be excluded to 95
% edama_12_09: variance of power spectral density (untapered)
% case of untapered random timeseries
clear all;
fprintf('edama_12_09: variance of power spectral density (untapered)\n');
edama_12_09: variance of power spectral density (untapered)
% create an uncorrelated random timeseries
N=2^10;
sd2true=64.0;
sdtrue=sqrt(sd2true);
d = random('Normal',0,sdtrue,N,1);
% generic time/frequency set up
Dt=0.025;
T = N*Dt;
t=Dt*[0:N-1]';
fmax=1/(2.0*Dt);
Df=fmax/(N/2);
f=Df*[0:N/2,-N/2+1:-1]';
Nf=N/2+1;
Dw=2*pi*Df;
w=Dw*[0:N/2,-N/2+1:-1]';
Nw=Nf;
% constant window function
W = ones(N,1);
d = W.*d;
% window changes variance of signal by ff
ff = sum(W.*W)/N;
% true and estimated variance of d
sd2est=std(d)^2;
fprintf('true data variance %.4f\n', ff*sd2true);
true data variance 64.0000
fprintf('estimated data variance %.4f\n', sd2est);
estimated data variance 62.2814
% compute power spectral density
fftraw = fft(d); % raw DFT output
dbar = Dt*fftraw(1:Nf); % Fourier transform, + freqs only
s2 = (2/T) * (abs(dbar).^2); % power spectral density
f=f(1:Nf);
w=w(1:Nf);
% statistics of power spectral density
s2meanest = mean(s2);
s2varest=std(s2)^2;
s2sigmaest=sqrt(s2varest);
sd2psd=Nf*Df*s2meanest;
fprintf('variance of data estimated from PSD %.4f\n', sd2psd);
variance of data estimated from PSD 62.4121
fprintf('\n');
% true mean and variance
p=2; % degrees of freedom
c = ( ff*sd2true ) / (2*Nf*Df); % normalization factor
s2meantrue = p*c;
s2vartrue = 2*p*c^2;
disp('power spectral density');
power spectral density
fprintf('true mean %.4f variance %.4f\n',s2meantrue,s2vartrue);
true mean 3.1938 variance 10.2001
fprintf('estimated mean %.4f variance %.4f\n',s2meanest,s2varest);
estimated mean 3.1145 variance 10.0289
fprintf('\n');
% plot data
figure(1)
clf;
set(gca,'LineWidth',2);
hold on;
plot(t,d,'k-','LineWidth',2);
plot([0, T]', [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );
plot([0, T]', [0, 0], 'k-', 'LineWidth', 1 );
plot([0, T]', [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );
% plot PSD
figure(2)
clf;
set(gca,'LineWidth',2);
hold on;
plot(f,s2,'k-','LineWidth',2);
% plot mean on spectra
plot([0, fmax], [s2meantrue, s2meantrue],'k-','LineWidth',2);
% histogram of values
Nhist=100;
[s2hist, bins] = hist(s2,Nhist);
% plot histogram
figure(3)
clf;
set(gca,'LineWidth',2);
hold on;
axis( [min(bins), max(bins), 0, max(s2hist)] );
plot(bins,s2hist,'k-','LineWidth',2);
plot([s2meantrue, s2meantrue], [0, max(s2hist)],'k-','LineWidth',2);
% plot true chi-squared distribution
Db=(bins(2)-bins(1))/c;
s2histtrue=Nf*Db*chi2pdf(bins/c,p);
plot(bins,s2histtrue,'k-','LineWidth',2);
% plot 95% confidence level on spectra
figure(2);
pv=0.95;
cv = chi2inv(0.95,p);
fprintf('%.4f confidence at %.4f',pv,cv*c);
0.9500 confidence at 9.5677
percent=100 * sum(s2hist(find(bins<=cv*c))) / sum(s2hist);
fprintf('%.4f percent of points below %.4f confidence\n',percent,pv);
95.9064 percent of points below 0.9500 confidence
disp(' ');
plot([f(1), f(Nf)], [cv*c, cv*c],'k-','LineWidth',2);
figure(3);
plot([cv*c, cv*c], [0, max(s2hist)],'k-','LineWidth',2);
% edama_12_10: variance of power spectral density (tapered)
% case of random timeseries tapered with Hamming window
clear all;
fprintf('edama_12_09: variance of power spectral density (tapered)\n');
edama_12_09: variance of power spectral density (tapered)
% create an uncorrelated random timeseries
N=2^10;
sd2true=64.0;
sdtrue=sqrt(sd2true);
d = random('Normal',0,sdtrue,N,1);
% generic time/frequency set up
Dt=0.025;
T = N*Dt;
t=Dt*[0:N-1]';
fmax=1/(2.0*Dt);
Df=fmax/(N/2);
f=Df*[0:N/2,-N/2+1:-1]';
Nf=N/2+1;
Dw=2*pi*Df;
w=Dw*[0:N/2,-N/2+1:-1]';
Nw=Nf;
% Hamming window function
W = 0.54 - 0.46*cos(2*pi*[0:N-1]'/(N-1));
d = W.*d;
% window changes variance of signal by ff
ff = sum(W.*W)/N;
% true and estimated variance of d
sd2est=std(d)^2;
fprintf('true data variance %.4f\n', ff*sd2true);
true data variance 25.4092
fprintf('estimated data variance %.4f\n', sd2est);
estimated data variance 28.2656
% compute power spectral density
fftraw = fft(d); % raw DFT output
dbar = Dt*fftraw(1:Nf); % Fourier transform, + freqs only
s2 = (2/T) * (abs(dbar).^2); % power spectral density
f=f(1:Nf);
w=w(1:Nf);
% statistics of power spectral density
s2meanest = mean(s2);
s2varest=std(s2)^2;
s2sigmaest=sqrt(s2varest);
sd2psd=Nf*Df*s2meanest;
fprintf('variance of data estimated from PSD %.4f', sd2psd);
variance of data estimated from PSD 28.2713
fprintf('\n');
% true mean and variance
p=2; % degrees of freedom
c = ( ff*sd2true ) / (2*Nf*Df); % normalization factor
s2meantrue = p*c;
s2vartrue = 2*p*c^2;
disp('power spectral density');
power spectral density
fprintf('true mean %.4f variance %.4f\n',s2meantrue,s2vartrue);
true mean 1.2680 variance 1.6078
fprintf('estimated mean %.4f variance %.4f\n',s2meanest,s2varest);
estimated mean 1.4108 variance 1.9641
disp(' ');
% plot data
figure(1)
clf;
set(gca,'LineWidth',2);
hold on;
plot(t,d,'k-','LineWidth',2);
plot([0, T]', [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );
plot([0, T]', [0, 0], 'k-', 'LineWidth', 1 );
plot([0, T]', [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );
% plot PSD
figure(2)
clf;
set(gca,'LineWidth',2);
hold on;
plot(f,s2,'k-','LineWidth',2);
% plot mean on spectra
plot([0, fmax], [s2meantrue, s2meantrue],'k-','LineWidth',2);
% histogram of values
Nhist=100;
[s2hist, bins] = hist(s2,Nhist);
% plot histogram
figure(3)
clf;
set(gca,'LineWidth',2);
hold on;
axis( [min(bins), max(bins), 0, max(s2hist)] );
plot(bins,s2hist,'k-','LineWidth',2);
plot([s2meantrue, s2meantrue], [0, max(s2hist)],'k-','LineWidth',2);
% plot true chi-squared distribution
Db=(bins(2)-bins(1))/c;
s2histtrue=Nf*Db*chi2pdf(bins/c,p);
plot(bins,s2histtrue,'k-','LineWidth',2);
% plot 95% confidence level on spectra
figure(2);
pv=0.95;
cv = chi2inv(0.95,p);
fprintf('%.4f confidence at %.4f\n',pv,cv*c);
0.9500 confidence at 3.7985
percent=100 * sum(s2hist(find(bins<=cv*c))) / sum(s2hist);
fprintf('%.4f percent of points below %.4f confidence\n',percent,pv);
92.9825 percent of points below 0.9500 confidence
fprintf('\n');
plot([f(1), f(Nf)], [cv*c, cv*c],'k-','LineWidth',2);
figure(3);
plot([cv*c, cv*c], [0, max(s2hist)],'k-','LineWidth',2);
% probability that PSD exceeds maximum observed value
speak = max(s2);
ppeak = chi2cdf(speak/c,p);
fprintf('largest spectral peak: %e\n',speak);
largest spectral peak: 9.602763e+00
fprintf('probability of largest peak: %.8f\n',ppeak);
probability of largest peak: 0.99948599
fprintf(' probability of PSD exceeding maximum observed value %.8f\n', 1-ppeak);
probability of PSD exceeding maximum observed value 0.00051401
fprintf('Nf probability of PSD exceeding maximum observed value %.8f\n', 1-(ppeak^Nf));
Nf probability of PSD exceeding maximum observed value 0.23183836
% edama_12_11: confidence of spectral peak
% for random timeseries plus a small amplitude sinusoid
clear all;
fprintf('edama_12_11: confidence of spectral peak\n');
edama_12_11: confidence of spectral peak
% create an uncorrelated random timeseries
N=2^10;
sd2true=64.0;
sdtrue=sqrt(sd2true);
d = random('Normal',0,sdtrue,N,1);
% generic time set up
Dt=0.025;
T = N*Dt;
t=Dt*[0:N-1]';
% generic frequency set up
fmax=1/(2.0*Dt);
Df=fmax/(N/2);
f=Df*[0:N/2,-N/2+1:-1]';
Nf=N/2+1;
Dw=2*pi*Df;
w=Dw*[0:N/2,-N/2+1:-1]';
Nw=Nf;
% low amplitude periodic signal buried in the noise
d = d + 0.25*sdtrue*cos(2*pi*(fmax/4)*t);
% Hamming window function
W = 0.54 - 0.46*cos(2*pi*[0:N-1]'/(N-1));
d = W.*d;
% window changes variance of signal by ff
ff = sum(W.*W)/N;
% true and estimated variance of d
sd2est=std(d)^2;
fprintf('true data variance %.4f\n', ff*sd2true);
true data variance 25.4092
fprintf('estimated data variance %.4f\n', sd2est);
estimated data variance 27.1697
% compute power spectral density
fftraw = fft(d); % raw DFT output
dbar = Dt*fftraw(1:Nf); % Fourier transform, + freqs only
s2 = (2/T) * (abs(dbar).^2); % power spectral density
f=f(1:Nf);
w=w(1:Nf);
% statistics of power spectral density
s2meanest = mean(s2);
s2varest=std(s2)^2;
s2sigmaest=sqrt(s2varest);
sd2psd=Nf*Df*s2meanest;
fprintf('variance of data estimated from PSD %.4f\n', sd2psd);
variance of data estimated from PSD 27.1775
fprintf('\n');
% true mean and variance
p=2; % degrees of freedom
c = ( ff*sd2true ) / (2*Nf*Df); % normalization factor
s2meantrue = p*c;
s2vartrue = 2*p*c^2;
disp('power spectral density\n');
power spectral density\n
fprintf('true mean %.4f variance %.4f\n',s2meantrue,s2vartrue);
true mean 1.2680 variance 1.6078
fprintf('estimated mean %.4f variance %.4f\n',s2meanest,s2varest);
estimated mean 1.3562 variance 1.9392
fprintf('\n');
% plot data
figure(1)
clf;
set(gca,'LineWidth',2);
hold on;
plot(t,d,'k-','LineWidth',2);
plot([0, T]', [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );
plot([0, T]', [0, 0], 'k-', 'LineWidth', 1 );
plot([0, T]', [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-', 'LineWidth', 2 );
% plot PSD
figure(2)
clf;
set(gca,'LineWidth',2);
hold on;
plot(f,s2,'k-','LineWidth',2);
% plot mean on spectra
plot([0, fmax], [s2meantrue, s2meantrue],'k-','LineWidth',2);
% histogram of values
Nhist=100;
[s2hist, bins] = hist(s2,Nhist);
% plot histogram
figure(3)
clf;
set(gca,'LineWidth',2);
hold on;
axis( [min(bins), max(bins), 0, max(s2hist)] );
plot(bins,s2hist,'k-','LineWidth',2);
plot([s2meantrue, s2meantrue], [0, max(s2hist)],'k-','LineWidth',2);
% plot true chi-squared distribution
Db=(bins(2)-bins(1))/c;
s2histtrue=Nf*Db*chi2pdf(bins/c,p);
plot(bins,s2histtrue,'k-','LineWidth',2);
% plot 95% confidence level on spectra
figure(2);
pv=0.95;
cv = chi2inv(0.95,p);
fprintf('%f confidence at %.4f\n',pv,cv*c);
0.950000 confidence at 3.7985
percent=100 * sum(s2hist(find(bins<=cv*c))) / sum(s2hist);
fprintf('%f percent of points below %.4f confidence\n',percent,pv);
93.762183 percent of points below 0.9500 confidence
fprintf('\n');
plot([f(1), f(Nf)], [cv*c, cv*c],'k-','LineWidth',2);
figure(3);
plot([cv*c, cv*c], [0, max(s2hist)],'k-','LineWidth',2);
% probability that PSD exceeds maximum observed value
speak = max(s2);
ppeak = chi2cdf(speak/c,p);
fprintf('amplitude of largest peak %e\n', speak);
amplitude of largest peak 1.110185e+01
fprintf('probability of largest peak %.8f\n', ppeak);_
probability of largest peak 0.99984241
probability of PSD exceeding maximum observed value 0.00015759
Nf probability of PSD exceeding maximum observed value 0.07766711
edama_12_12, p.s.d. of AR1 process
Dt=0.6;
T = N*Dt;
t=Dt*[0:N-1]';
% standard setup of frequency axis, f
fmax=1/(2.0*Dt);
Df=fmax/(N/2);
f=Df*[0:N/2,-N/2+1:-1]';
Nf=N/2+1;
Dw=2*pi*Df;
w=Dw*[0:N/2,-N/2+1:-1]';
Nw=Nf;
% delete nonnegative frequencies from axes
f=f(1:Nf);
w=w(1:Nf);
% plot of spectra with different beta's
betalist = [0.25, 0.5, 0.75]';
fprintf('p.s.d. of AR1 process with these betas');
p.s.d. of AR1 process with these betas
betalist'
ans =
0.2500 0.5000 0.7500
figure(1);
clf;
hold on;
axis( [0, fmax, 0, 1/(1+(betalist(end)^2)-2*betalist(end)) ] );
for beta=betalist'
denom = (1 + (beta^2) - 2*beta*cos(2*pi*0.5*f/fmax) );
plot( f, 1./denom, 'k-', 'LineWidth', 2 );
end
xlabel('f');
ylabel('|u(f)|^2');
% edama_12_13, 95% confidence limit of AR1 process
clear all;
fprintf('edama_12_13, 95%% confidence limit of AR1 process\n');
edama_12_13, 95% confidence limit of AR1 process
% tunable constants
beta = 0.60; % beta of AR1 process
sigma = 100; % sigma of x
N=1024;
tmaxplot=120;
ADDSINUSOID=0; % add sinusoid to random timeseries on 1
EXACT95=0; % Exact confidence on 1, (mean+2sigma) approx on 0
% standard set up a time axis, t
Dt=0.6;
T = N*Dt;
t=Dt*[0:N-1]';
% standard setup of frequency axis, f
fmax=1/(2.0*Dt);
Df=fmax/(N/2);
f=Df*[0:N/2,-N/2+1:-1]';
Nf=N/2+1;
Dw=2*pi*Df;
w=Dw*[0:N/2,-N/2+1:-1]';
Nw=Nf;
% uncorrelated random time series x(t)
x = random('Normal',0,sigma,N,1);
% AR1 process y(t)
y = zeros(N,1);
y(1) = x(1);
for i=[2:N]
y(i) = beta*y(i-1) + x(i);
end
if( ADDSINUSOID )
sigmay2 = var(y);
sigmay = sqrt(sigmay2);
Ac=0.25*sigmay;
fc=fmax/4;
y = y + Ac*cos(2*pi*fc*t);
fprintf('A small amplitude sinusoid of amplitude %.2f and frequency %.2f has been added\n',Ac,fc)
end
% plot entire time series
fprintf('plot time series x(t) and y(t)\n');
plot time series x(t) and y(t)
figure(1);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, Dt*(N-1), min(x), max(x) ] );
plot( t, x, 'k-', 'LineWidth', 2 );
xlabel('t');
ylabel('x(t)');
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, Dt*(N-1), min(y), max(y) ] );
plot( t, y, 'k-', 'LineWidth', 2 );
xlabel('t');
ylabel('y(t)');
% plot enlarged portion of time series
fprintf('plot enlarged portions of time series x(t) and y(t)\n');
plot enlarged portions of time series x(t) and y(t)
figure(2);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, tmaxplot, min(x), max(x) ] );
plot( t, x, 'k-', 'LineWidth', 2 );
xlabel('t');
ylabel('x(t)');
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, tmaxplot, min(y), max(y) ] );
plot( t, y, 'k-', 'LineWidth', 2 );
xlabel('t');
ylabel('y(t)');
% compute power spectral density
xfftraw = fft(x); % raw DFT output
xbar = Dt*xfftraw(1:Nf); % Fourier transform, + freqs only
xs2 = (2/T) * (abs(xbar).^2); % power spectral density
xs2=xs2(1:Nf);
yfftraw = fft(y); % raw DFT output
ybar = Dt*yfftraw(1:Nf); % Fourier transform, + freqs only
ys2 = (2/T) * (abs(ybar).^2); % power spectral density
% delete nonnegative frequencies from axes
f=f(1:Nf);
w=w(1:Nf);
% plot of spectra
fprintf('plot power spretral density, |x(f)|^2 and |y(f)|^2\n');
plot power spretral density, |x(f)|^2 and |y(f)|^2
figure(3);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, max(f), 0, max(xs2) ] );
plot( f, xs2, 'k-', 'LineWidth', 2 );
xlabel('f');
ylabel('|x(f)|^2');
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, max(f), 0, max(ys2) ] );
plot( f, ys2, 'k-', 'LineWidth', 2 );
xlabel('f');
ylabel('|y(f)|^2');
% plot of spectra, onto which other stiff will be overlain
fprintf('plot power spretral density, |x(f)|^2 and |y(f)|^2\n');
plot power spretral density, |x(f)|^2 and |y(f)|^2
fprintf('with other stuff overlain\n');
with other stuff overlain
figure(4);
clf;
subplot(2,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, max(f), 0, max(xs2) ] );
plot( f, xs2, 'k-', 'LineWidth', 2 );
xlabel('f');
ylabel('|x(f)|^2');
subplot(2,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, max(f), 0, max(ys2) ] );
plot( f, ys2, 'k-', 'LineWidth', 2 );
xlabel('f');
ylabel('|y(f)|^2');
Nr = 1000;
Nr95 = 950;
Nr50 = 500;
S = zeros(Nf, Nr );
% repeat for Nr realizations
for ir=[1:Nr]
% uncorrelated time series
x = random('Normal',0,sigma,N,1);
% build AR1 time sereis
y = zeros(N,1);
y(1) = x(1);
for i=[2:N]
y(i) = beta*y(i-1) + x(i);
end
yfftraw = fft(y); % FFT of y
ybar = Dt*yfftraw(1:Nf); % FT of nonnegative f's
ys2 = (2/T) * (abs(ybar).^2); % p.s.d.
S(:,ir)=ys2(:); % save p.s.d.
end
% statistics of the ensemble of p.s.d.'s
S95 = zeros(Nf,1); % 95 percent level
Smean = zeros(Nf,1); % mean
for ifr=[1:Nf] % loop over frequencies
v = S(ifr,:)'; % the p.s.d. at one frequency
v = sort(v); % sort the values
S95(ifr)=v(Nr95); % 95% less than this value
Smean(ifr)=mean(v); % mean
end
plot( f, S95, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( f, Smean, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
% theoretical spectrum
p=2; % degrees of freedom
% in this code, the window function is just w(i)=1
ff = 1; % variance of window function
c = ( ff*(sigma^2) ) / (2*Nf*Df); % scaling constant
xs2meantrue = p*c; % mean of p.s.d. of x
xs2vartrue = 2*p*c^2; % variance of p.s.d. of x
fprintf('scaling constant c: %e\n',c);
scaling constant c: 5.988304e+03
fprintf('pc: %e\n',xs2meantrue);
pc: 1.197661e+04
fprintf('2pc^2: %e\n',xs2vartrue);
2pc^2: 1.434391e+08
% the AR1 filter v(f)
denom = (1 + (beta^2) - 2*beta*cos(pi*f/fmax) );
% mean of p.s.d. of y
ys2mean = xs2meantrue ./ denom;
% variance of p.s.d. of y
if( EXACT95 )
C95 = chi2inv(0.95,p);
ys295 = C95*(c ./ denom);
fprintf('Exact 95%% confidence level used\n');
else
ys295 = ys2mean + 2*sqrt(xs2vartrue) ./ denom;
fprintf('Approximate 95%% confidence level used\n');
end
Approximate 95% confidence level used
plot( f, ys2mean, 'k--', 'LineWidth', 2 );
plot( f, ys295, 'k-', 'LineWidth', 2 );
% edama_12_14: bootstrap statistics of slope
% in least squares fit of a straight line to a small
% fragment of Black Rock Forest temperature data
clear all;
fprintf('edama_12_14: bootstrap statistics of slope\n');
edama_12_14: bootstrap statistics of slope
% load data and set up time axis
dorig = load('../Data/brf_fragment.txt');
N = length(dorig);
Dt=1.0;
torig = 10*Dt+Dt*[0:N-1]'; % hours from start of orignal record
% least-squares slope and its variance of original data
M=2;
G=zeros(N,M);
G(:,1)=1;
G(:,2)=torig;
mest=(G'*G)\(G'*dorig);
slopeorig=mest(2);
% usual error estimate based on propagagtion
% of posterior estimate of sigma2 of data into
% a sigma2 of slope
e = dorig-G*mest;
E = e'*e;
sigma2dorig = E/N;
Cmorig = sigma2dorig * inv(G'*G);
sigma2morig = Cmorig(2,2);
% number of realizations;
Nr = 10000;
slope = zeros(Nr,1);
for p = [1:Nr] % loop over realizations
% resample
rowindex=unidrnd(N,N,1);
t = torig(rowindex);
d = dorig(rowindex);
% straight line fit
M=2;
G=zeros(N,M);
G(:,1)=1;
G(:,2)=t;
mest=(G'*G)\(G'*d);
slope(p)=mest(2);
end
% compute histogram of slopes and normalize
% normalize to a probability density distribution
% with Dt*sum(pbootstrap)=1;
Nbins = 100;
[shist, bins]=hist(slope, Nbins);
Db = bins(2)-bins(1);
pbootstrap = shist / (Db*sum(shist));
% compute Normal distribution of slopes
% using parameters estimated from original data
pnormal = (1/sqrt(2*pi*sigma2morig)) * exp(-((bins-slopeorig).^2)/(2*sigma2morig));
figure(1);
clf;
set(gca,'LineWidth',2);
hold on;
axis( [bins(1), bins(Nbins), 0, 1.2*max(pbootstrap)]);
plot(bins,pbootstrap,'k-','LineWidth',2);
plot(bins,pnormal,'k-','LineWidth',2);
xlabel('slope, b');
ylabel('p(b)');
% mean (expectation) and variance of bootstrap distibution
mb = Db*sum(bins.*pbootstrap);
vb = Db*sum(((bins-mb).^2).*pbootstrap);
% 5% and 95% confidence of bootstrap distibution
Pbootstrap = Db*cumsum(pbootstrap);
ilo = find(Pbootstrap>=0.025,1);
ihi = find(Pbootstrap>=0.975,1);
blo = bins(ilo);
bhi = bins(ihi);
fprintf('standard method: slope %.4f +/- %.4f (2 sigma)\n', slopeorig, 2*sqrt(sigma2morig));
standard method: slope 0.5252 +/- 0.0150 (2 sigma)
fprintf('bootstrap method: slope %.4f +/- %.4f (2 sigma)\n', mb, 2*sqrt(vb));
bootstrap method: slope 0.5250 +/- 0.0173 (2 sigma)
fprintf('bootstrap method: slope between %.4f and %.4f (95 percent confidence)\n', blo, bhi);
bootstrap method: slope between 0.5079 and 0.5424 (95 percent confidence)
% edama_12_15: bootstrap statistics using Atlantic Rock data set
% Atlantic Rocks dataset
% bootstrap error analysis for
% CaO/Na2O ratio of varimax factor 2
% note CaO/Na2O are elements 6/7
clear all;
fprintf('eda12_15: bootstrap statistics using Atlantic Rock data set\n');
eda12_15: bootstrap statistics using Atlantic Rock data set
% load data
Draw = load('../Data/rocks.txt');
% 1 SiO2
% 2 TiO2
% 3 Al203
% 4 FeO-total
% 5 MgO
% 6 CaO
% 7 Na2O
% 8 K2O
Ns = size(Draw);
N = Ns(1);
M = Ns(2);
% number of realizations
Nr=1000;
ratio = zeros(Nr,1);
for ir = [1:Nr]
% randomply sample Draw
D = zeros(N,M);
D = Draw(unidrnd(N,N,1),:);
% compute factors and factor loadings using singular value decompostion
[U, S, V] = svd(D,0);
% keep only first five singular values
P=5;
F = V(:,1:P)';
C = U(:,1:P)*S(1:P,1:P);
% initial rotated factor matrix, FP, and rotation matrix, MR
MR=eye(P,P);
FP=F;
% spike these factors using the varimax procedure
k = [2, 3, 4, 5]';
Nk = length(k);
for iter = [1:3]
for ii = [1:Nk]
for jj = [ii+1:Nk]
% spike factors i and j
i=k(ii);
j=k(jj);
% copy factors from matrix to vectors
fA = FP(i,:)';
fB = FP(j,:)';
% standard varimax procedure to determine rotation angle q
u = fA.^2 - fB.^2;
v = 2* fA .* fB;
A = 2*M*u'*v;
B = sum(u)*sum(v);
top = A - B;
C = M*(u'*u-v'*v);
D = (sum(u)^2)-(sum(v)^2);
bot = C - D;
q = 0.25 * atan2(top,bot);
cq = cos(q);
sq = sin(q);
fAp = cq*fA + sq*fB;
fBp = -sq*fA + cq*fB;
% put back into factor matrix, FP
FP(i,:) = fAp';
FP(j,:) = fBp';
% accumulate rotation
mA = MR(i,:)';
mB = MR(j,:)';
mAp = cq*mA + sq*mB;
mBp = -sq*mA + cq*mB;
MR(i,:) = mAp';
MR(j,:) = mBp';
end
end
end
% new factor loadings
CP=C*MR';
% ratio of elemnts 6 and 7 of factor 2
ratio(ir)=FP(2,6)/FP(2,7);
end
% compute histogram, convert to probability density distribution
Nb=100;
[rhist,bins] = hist(ratio,Nb);
Db=bins(2)-bins(1);
pbootstrap=rhist/(Db*sum(rhist));
% mean, variance
mr = Db * sum( bins .* pbootstrap );
vr = Db * sum( ((bins-mr).^2) .* pbootstrap );
disp('CaO/Na2O ratio of Factor 2');
CaO/Na2O ratio of Factor 2
fprintf('mean %f variance %.4f\n',mr,vr);
mean 0.485850 variance 0.0002
% confidence intervals
Pbootstrap=Db*cumsum(pbootstrap);
ilo = find( Pbootstrap>=0.025, 1 );
ihi = find( Pbootstrap>=0.975, 1 );
rlo = bins(ilo);
rhi = bins(ihi);
fprintf('%.4f < ratio < %.4f (95 percent confidence)\n',rlo,rhi);
0.4591 < ratio < 0.5128 (95 percent confidence)
figure(1);
clf;
set(gca,'LineWidth',2);
hold on;
axis( [bins(1), bins(Nb), 0, max(pbootstrap) ] );
plot( bins, pbootstrap, 'k-', 'LineWidth', 2);
plot( [mr, mr], [0, 0.05*max(pbootstrap)], 'k-', 'LineWidth', 2);
plot( [rhi, rhi], [0, 0.05*max(pbootstrap)], 'k-', 'LineWidth', 2);
plot( [rlo, rlo], [0, 0.05*max(pbootstrap)], 'k-', 'LineWidth', 2);
xlabel('CaO/Na2O ratio, r');
ylabel('p(r)');