# edapy_12_00 clear all variables and import vatious modules

# History
# 2022/06/26 -  checked with most recent Python & etc. software
# 2024/05/26 -  Patches to accomodate new verions of numpy, scipy, matplotlib
#               patched dot product to return scalar value, and similar issues
#                  associated with depreciation of equivalence of 1x1 matrix and scalar

%reset -f
import os
from datetime import date
from math import exp, pi, sin, cos, tan, sqrt, floor, ceil, log, atan2
import numpy as np
import scipy.sparse.linalg as las
import scipy.interpolate as ip
import scipy.spatial as sp
from scipy import sparse
import scipy.linalg as la
import scipy.signal as sg
import scipy.stats as stats
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap

# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def eda_cvec(*argv):
    t = int;
    Nt = 0;
    for a in argv:
        v = eda_cvec1(a);
        N,M = np.shape(v);
        Nt = Nt + N;
        if( N==0 ):
            continue; # skip vector of zero length
        if (t==int) and isinstance(v[0,0],float):
            t=float;
        elif isinstance(v[0,0],complex):
            t=complex;
    w = np.zeros((Nt,1),dtype=t);
    Nt = 0;
    for a in argv:
        v = eda_cvec1(a);
        N,M = np.shape(v);
        w[Nt:Nt+N,0] = v[0:N,0];
        Nt = Nt + N;
    return w;

# function to make a numpy N-by-1 column vector
# c=eda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=eda_cvec(v1,v2,...) concatenates
# many argiments.
def eda_cvec1(v):
    if isinstance(v, int) or isinstance(v, np.int32):
        w = np.zeros((1,1),dtype=int);
        w[0,0] = v;
        return w;
    elif isinstance(v, float):
        w = np.zeros((1,1),dtype=float);
        w[0,0] = v;
        return w;
    elif isinstance(v, complex):
        w = np.zeros((1,1),dtype=complex);
        w[0,0] = v;
        return w;
    elif isinstance(v, np.ndarray):
        s = np.shape(v);
        if len(s) == 1:
            return np.copy(np.reshape(v,(s[0],1)));
        else:
            [r,c]=s;
            if( c==1 ):
                return(np.copy(v));
            elif(r==1):
                return(np.copy(v.T));
            else:
                raise TypeError("eda_cvec: %d by %d ndarray not allowed" % (r, c));
    elif isinstance(v, list):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(v, np.int32):
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("eda_cvec: list contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = v;
        return w;
    elif isinstance(v, tuple):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(v, np.int32):
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("eda_cvec: tuple contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = v;
        return w;
    else:
        raise TypeError("eda_cvec: %s not supported" % type(v));

# eda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def eda_draw(*argv):
    bw = np.zeros((256,4));
    v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
    bw[:,0] = v;
    bw[:,1] = v;
    bw[:,2] = v;
    bw[:,3] = np.ones(256);
    bwcmap = ListedColormap(bw);
    # size of plot
    W = 16;
    H = 4;
    fig1 = plt.figure(1);
    # figsize width and height in inches
    fig1.set_size_inches(W,H);
    ax1 = plt.subplot(1,1,1);
    plt.axis([0, W, -H/2, H/2]);
    plt.axis('off');
    LM = W/6;    # matrix width and heoght
    LV = W/40;   # vector width
    FS = 0.12;    # character width
    TO = 0.4;    # title vertical offset
    SP = 0.2;    # space between objects
    LS = 0.2;    # leading space
    p = LS; # starting x-position
    istitle=0; # flags presence of a title
    for a in argv:
        if isinstance(a,np.ndarray):
            sh = np.shape(a);
            if len(sh) == 1:  # conversion to nx1 array
                n = sh[0];
                m = 1;
                ap = a;
                a = np.zeros((n,1));
                a[:,0] = ap;
            else:
                n = sh[0];
                m = sh[1];
            if m==1:
                pold=p;
                left=p;
                right=p+LV;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LV;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
            else:
                pold=p;
                left=p;
                right=p+LM;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LM;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
        elif isinstance(a,str):
            ns = len(a);
            istitle=0;
            if( ns>=6 ):
                if 'title ' in a[0:6]:
                    istitle=1;
                    titlestr=a[6:];
            if( istitle != 1):
                plt.text(p,0,a);
                p = p + ns*FS + SP;
    plt.show();
    return 1;

# edapy_12_01: p.d.f. of square of a random variable

# standard set-up of e-axis
L=101;       # length L
emin=0.01;   # start value; avoid zero
emax=3.00;   # end value
De=(emax-emin)/(L-1); # sampling interva;
e = eda_cvec( emin+De*np.linspace(0,L-1,L)); # vector e

# Nornal p.d.f. for p(e), for e>=0
pe = sqrt(2/pi)*np.exp(-0.5*np.power(e,2));

Ae = De*np.sum(pe); # area under p.d.f.

# stadard setup of E axis
Emin=0.01;   # start value, avoid zero
Emax=3;      # end value
DE=(Emax-Emin)/(L-1);  # sampling interval
E = eda_cvec(Emin+DE*np.linspace(0,L-1,L)); # vector E

pE = np.sqrt(np.reciprocal(2*pi*E)) * np.exp(-0.5*E); # chi2 p.d.f.
AE = DE*np.sum(pE); # area under p.d.f.

eda_draw(' ',pe,'caption p(e)',' ',pE,'caption p(E)');

print("area under p(e): %.4f" % (Ae) );
print("area under p(E): %.4f" % (AE) );

area under p(e): 1.0014
area under p(E): 0.9090

# edapy_12_02: chi-squared p.d.f.

# standard set-up of X2-axis
L=1001;                    # length
X2min=0.0;                 # start value
X2max=10.0;                # end value
DX2=(X2max-X2min)/(L-1);   # sampling
X2 = eda_cvec(np.linspace(X2min,X2max,L)); # vector X2

# plot setup
fig1 = plt.figure(1,figsize=(10,6));
plt.axis([X2min, X2max, 0, 1]);
plt.xlabel('X2');
plt.ylabel('p(xX2)');
plt.title('chi-squared pdf');

# plot p(X2,N) p.d.f for several values of N
for N in range(1,6):  # loop over degrees of freedom N
    pX2 = eda_cvec(stats.chi2.pdf(X2,N)); # chi-squared p.d.f.
    plt.plot(X2,pX2,'k-');

# edapy_12_03: student t pdf

# standard set-up of t-axis
L=1001;             # length
tmin=-5.0;          # start value
tmax=5.0;           # end value
Dt=(tmax-tmin)/(L-1); # sampling interval
t = eda_cvec(np.linspace(tmin,tmax,L));

# plot setup
fig1 = plt.figure(1,figsize=(10,6));
plt.axis([tmin, tmax, 0, 0.5]);
plt.xlabel('x');
plt.ylabel('p(x)');
plt.title('students t pdf');

# plot p(t,N) for several values of N
pt = np.zeros((L,1))
for N in range(1,6):
    pt = eda_cvec(stats.t.pdf(t,N)); # t p.d.f.
    plt.plot(t,pt,'k-');

# edapy_12_04: F pdf

# make a vector F
L=1001;                # length
Fmin=0.0;              # start value
Fmax=5.0;              # end value
DF=(Fmax-Fmin)/(L-1);  # sampling interval
F=eda_cvec(np.linspace(Fmin,Fmax,L));

# plot setup
fig1 = plt.figure(1,figsize=(12,12));

# plot of p(F,N,M) has several frames, N constant for each frame
i = 1;
for M in  [2, 5, 25, 50]:
    ax1 = plt.subplot(4,1,i);
    plt.axis([Fmin, Fmax, 0, 2]);
    plt.xlabel('F');
    plt.ylabel("p(F,%d,.)" % (M) );
    if(i==1):
        plt.title('F pdf for M, N in [2, 5, 25, 50]' );
    i=i+1;
    for N in  [2, 5, 25, 50]:
        pF = eda_cvec(stats.f.pdf(F,N,M));
        plt.plot(F,pF,'k-');

# edapy_12_05,  simulated particle size data example

# note: to prevent this code from overwriting the existing
# files testA.txt and testB.txt, I have changed the filenames
# to mytestA.txt and mytestB.txt

N=25;
d=100;

s2dtrue=1;
sdtrue=sqrt(s2dtrue);

# calibration test 1
dobs1 = np.random.normal(d,sdtrue,(N,1));
np.savetxt("../Data/mytestA.txt",dobs1, delimiter="\t");

# calibration test 2
dobs2 = np.random.normal(d,sdtrue,(N,1));
np.savetxt("../Data/mytestB.txt",dobs2, delimiter="\t");

# edapy_12_06:, Z and chi-squared tests of particle size data

# Z distribution, two-sided test
Z=0.278;
Pi = stats.norm.cdf(abs(Z),0.0,1.0)-stats.norm.cdf(-abs(Z),0.0,1.0);
Po = 1.0-(stats.norm.cdf(abs(Z),0.0,1.0)-stats.norm.cdf(-abs(Z),0.0,1.0));
print("Z distribution, with Z=%.4f, two sided test:" % (Z) );
print("  The probability that Z is inside  the interval %.4f to %.4f" % (-Z, Z) );
print("    is %.4f" % (Pi) );
print("  The probability that Z is outside the interval %.4f to %.4f" % (-Z, Z));
print("    is %.4f" % (Po) );

# Chi-squared distribution, one-sided test
X2=21.9;
N=25;
Pi=stats.chi2.cdf(X2,N);
Po=1.0-stats.chi2.cdf(X2,N);
print("Chi-squared distribution with %d degrees of freedom, one sized test:" % (N) );
print("   The Probability that X2 is inside the interval 0 to %.4f" % (X2) );
print("     is %.4f" % (Pi) );
print("   The Probability that X2 is outside the interval 0 to %.4f" % (X2) );
print("     is %.4f" % (Po));

Z distribution, with Z=0.2780, two sided test:
  The probability that Z is inside  the interval -0.2780 to 0.2780
    is 0.2190
  The probability that Z is outside the interval -0.2780 to 0.2780
    is 0.7810
Chi-squared distribution with 25 degrees of freedom, one sized test:
   The Probability that X2 is inside the interval 0 to 21.9000
     is 0.3585
   The Probability that X2 is outside the interval 0 to 21.9000
     is 0.6415

# edapy_12_07: statistical tests of particle size data

# Part A: first dataset

# load data
D=np.genfromtxt('../Data/testA.txt', delimiter='\t');
s = np.shape(D);
NA = s[0];
dobsA=eda_cvec(D);

dtrueA = 100.0;     # known size of calibrations standard'
dbartrueA = dtrueA; # known mean of measurement
sd2trueA=1.0;       # known variance of measurement
sdtrueA=1.0;        # known root-variance of measurement

# calculate useful statistics
print('file testA.txt');

# number of observations
print("N is %d" % (NA) );

dbartrueA = dtrueA;
print("true mean %.4f" % (dbartrueA) );

dbarestA = np.sum(dobsA)/NA;
print("estimated mean %.4f" % (dbarestA) );

print("true variance %.4f" % (sd2trueA) );
print("true root-variance %.4f" % (sdtrueA) );

Ddsq = np.matmul( (dobsA-dbartrueA).T, (dobsA-dbartrueA)); Ddsq=Ddsq[0,0];
sd2estA = Ddsq / NA;
print("estimated variance (given true mean) %.4f" % sd2estA );

sdestA = sqrt(sd2estA);
print("estimated root-variance (given true mean) %.4f" % (sdestA) );

dobsAsq = np.matmul( (dobsA-dbarestA).T, (dobsA-dbarestA)); dobsAsq=dobsAsq[0,0];
sd2estpA = dobsAsq / (NA-1);
print("estimated variance (given estimated mean) %.4f" % (sd2estpA) );

sdestpA=sqrt(sd2estpA);
print("estimated root-variance (given estimated mean) %.4f" % (sdestpA));

ZA = (dbarestA-dbartrueA)/(sdtrueA/sqrt(NA));
print("Z is %.4f" % (ZA) );

PZA = 1 - (stats.norm.cdf(abs(ZA),0.0,1.0)-stats.norm.cdf(-abs(ZA),0.0,1.0));
print("Probability of |Z| exceeding %.4f is %.4f" % (abs(ZA), PZA) );

chi2A = np.matmul( ((dobsA-dtrueA).T/sdtrueA), ((dobsA-dtrueA)/sdtrueA) ); chi2A=chi2A[0,0];
print("chi-squared is %.4f" % (chi2A) );

Pchi2A = 1 - stats.chi2.cdf(chi2A,NA);
print("Probability of chi2 exceeding %.4f is %.4f" % (chi2A, Pchi2A) );

tA = (dbarestA-dbartrueA)/(sdestA/sqrt(NA));
print("t is %.4f" % (tA) );

PtA = 1.0 - (stats.t.cdf(abs(tA),NA)-
             stats.t.cdf(-abs(tA),NA));
print("Probability of |t| exceeding %.4f is %.4f" % (abs(tA), PtA) );

print(' ');

# Part B: second dataset

# load data
D=np.genfromtxt('../Data/testB.txt', delimiter='\t');
s = np.shape(D);
NB = s[0];
dobsB=eda_cvec(D);

dtrueB = 100.0;           # known size of calibrations standard'
dbartrueB = dtrueB;       # known mean
sd2trueB = 1.0;           # known variance of measurement
sdtrueB = sqrt(sd2trueB); # known root-variance of measurement

# calculate useful statistics
print('file testB.txt');

# number of observations
print("N is %d" % (NB) );

print("true mean %.4f" % (dbartrueB) );

dbarestB = np.sum(dobsB)/NB;
print("estimated mean %.4f" % (dbarestB) );

print("true variance %.4f" % (sd2trueB) );
print("true root-variance %.4f" % (sdtrueB) );

Ddsq = np.matmul( (dobsB-dbartrueB).T, (dobsB-dbartrueB)); Ddsq=Ddsq[0,0];
sd2estB = Ddsq / (NB);
print("estimated variance (given true mean) %.4f" % sd2estB );

sdestB = sqrt(sd2estB);
print("estimated root-variance (given true mean) %.4f" % (sdestB) );

Ddsq = np.matmul( (dobsB-dbarestB).T, (dobsB-dbarestB)); Ddsq=Ddsq[0,0];
sd2estpB = Ddsq / (NB-1);
print("estimated variance (given estimated mean) %.4f" % (sd2estpB) );

sdestpB=sqrt(sd2estpB);
print("estimated root-variance (given estimated mean) %.4f" % (sdestpB));

ZB = (dbarestB-dbartrueB)/(sdtrueB/sqrt(NB));
print("Z is %.4f" % (ZB) );

PZB = 1 - (stats.norm.cdf(abs(ZB),0.0,1.0)-stats.norm.cdf(-abs(ZB),0.0,1.0));
print("Probability of |Z| exceeding %.4f is %.4f" % (abs(ZB), PZB) );

chi2B = np.matmul( ((dobsB-dtrueB).T/sdtrueB), ((dobsB-dtrueB)/sdtrueB) ); chi2B=chi2B[0,0];
print("chi-squared is %.4f" % (chi2B) );

Pchi2B = 1 - stats.chi2.cdf(chi2B,NB);
print("Probability of chi2 exceeding %.4f is %.4f" % (chi2B, Pchi2B) );

tB = (dbarestB-dbartrueB)/(sdestB/sqrt(NB));
print("t is %.4f" % (tB) );

PtB = 1.0 - ( stats.t.cdf(abs(tB),NB) - stats.t.cdf(-abs(tB),NB) );
print("Probability of |t| exceeding %.4f is %.4f" % (abs(tB), PtB) );

print(' ');

# PART 3 comparison of the tw datsets

# are the means of the two tests significantly different ?

print("significance of difference between two means, using Z-test");
A = dbarestA - dbarestB;
B = sqrt(sd2trueA/NA+sd2estpB/NB);
Z = A/B;
print("Z is %.4f" % (Z) );

# are the means of the two tests significantly different ?
PZ = 1 - (stats.norm.cdf(abs(Z),0,1)-stats.norm.cdf(-abs(Z),0,1));
print("Probability of |Z| exceeding %.4f is %.4f" % (abs(Z), PZ) );
print(' ');

print("difference between two means with t-test");
A = dbarestA - dbarestB;
B = sqrt(sd2estpA/NA+sd2estpB/NB);
t = A/B;
C = (sd2estpA/NA+sd2estpB/NB)**2;
D = ((sd2estpA/NA)**2)/(NA-1) + ((sd2estpB/NB)**2)/(NB-1);
M  =floor(C/D+0.5);
print("t is %.4f and has %d degrees of freedom" % (t, M) );

# are the means of the two tests significantly different ?
Pt = 1 - (stats.t.cdf(abs(t),M)-stats.t.cdf(-abs(t),M));
print("Probability of |t| exceeding %.4f is %.4f" % (abs(t), Pt) );
print(' ');

# are the variances of the two tests significantly different ?
F = (chi2A/NA) / (chi2B/NB);
if( F<1 ):
    F=1/F;
PF = 1 - (stats.f.cdf(F,NA,NB)-stats.f.cdf(1/F,NA,NB));
print("1/F is %.4f and F is %.4f" % (1/F, F) );
print("Probability of F outside interval (%.4f,%.4f) is %.4f" % (1/F, F, PF) );

file testA.txt
N is 25
true mean 100.0000
estimated mean 100.1620
true variance 1.0000
true root-variance 1.0000
estimated variance (given true mean) 0.8108
estimated root-variance (given true mean) 0.9004
estimated variance (given estimated mean) 0.8172
estimated root-variance (given estimated mean) 0.9040
Z is 0.8098
Probability of |Z| exceeding 0.8098 is 0.4181
chi-squared is 20.2689
Probability of chi2 exceeding 20.2689 is 0.7326
t is 0.8994
Probability of |t| exceeding 0.8994 is 0.3770
 
file testB.txt
N is 25
true mean 100.0000
estimated mean 99.7455
true variance 1.0000
true root-variance 1.0000
estimated variance (given true mean) 1.1273
estimated root-variance (given true mean) 1.0618
estimated variance (given estimated mean) 1.1068
estimated root-variance (given estimated mean) 1.0521
Z is -1.2727
Probability of |Z| exceeding 1.2727 is 0.2031
chi-squared is 28.1837
Probability of chi2 exceeding 28.1837 is 0.2995
t is -1.1987
Probability of |t| exceeding 1.1987 is 0.2419
 
significance of difference between two means, using Z-test
Z is 1.4348
Probability of |Z| exceeding 1.4348 is 0.1514
 
difference between two means with t-test
t is 1.5014 and has 47 degrees of freedom
Probability of |t| exceeding 1.5014 is 0.1400
 
1/F is 0.7192 and F is 1.3905
Probability of F outside interval (0.7192,1.3905) is 0.4156

# edapy_12_08: F-test to assess difference in fit
# illustration of F-test to assess difference in fit of two models
# using a small fragment of Black Rock Forest temperature data

# load data from file
D=np.genfromtxt('../Data/brf_fragment.txt', delimiter='\t');
s = np.shape(D);
N = s[0];
t = eda_cvec(10.0+np.linspace(0,N-1,N));  # hours from start of orignal record
dobs = eda_cvec(D);

# plot setup
fig1 = plt.figure(1,figsize=(10,6));
plt.plot(t,dobs,'ko');
plt.title('linear fit');
plt.xlabel('time t, hours');
plt.ylabel('d(i)');

# fit 1, straight line
M=2;   # number of mode parameters
G=np.zeros((N,M));  # set up data kernel
G[:,0]=np.ones((N,1)).ravel();
G[:,1]=t.ravel();
mestA=la.solve( np.matmul(G.T,G), np.matmul(G.T, dobs) ); # least squares fit
dpreA = np.matmul( G, mestA ); # predicted data
plt.plot(t,dpreA,'k--');
EA = np.matmul( (dobs-dpreA).T, (dobs-dpreA) );  EA=EA[0,0]; # total error
vA = N-M; # degrees of freedom
plt.show();

# plot setup
fig2 = plt.figure(2,figsize=(10,6));
plt.plot(t,dobs,'ko');
plt.title('cubic fit');
plt.xlabel('time t, hours');
plt.ylabel('d(i)');

# fit 2, cubic fit
M=4; # number of model parameters
G=np.zeros((N,M));  # set up data kernel
G[:,0]=np.ones((N,1)).ravel();
G[:,1]=t.ravel();
G[:,2]=np.power(t,2).ravel();
G[:,3]=np.power(t,3).ravel();
mestB=la.solve( np.matmul(G.T,G), np.matmul(G.T, dobs) ); # least squares solution
dpreB = np.matmul( G, mestB ); # predicted data
plt.plot(t,dpreB,'k--');
EB = np.matmul( (dobs-dpreB).T, (dobs-dpreB) ); EB=EB[0,0] # total error
vB = N-M; # degrees of freedom
plt.show();

print("linear error %.2f, degrees of freedom %d" % (EA, vA) );
print("cubic  error %.2f, degrees of freedom %d" % (EB, vB) );

print("improvement in error %.2f" % ((EA-EB)/EA) );
F = (EA/vA) / (EB/vB);
print("1/F %.4f F %.4f" % (1/F,F) );

if( F<1 ):
    F=1/F

P = 1 - (stats.f.cdf(F,vA,vB)-stats.f.cdf(1/F,vA,vB));
print("P(F<%.4f or F>%.4f) = %f" % (1/F, F, P) );

if( P>0.05):
    print('Null hypothesis cannot be excluded to 95% confidence')
else:
    print('Null hypothesis can be excluded to 95% confidence')

linear error 31.52, degrees of freedom 49
cubic  error 27.18, degrees of freedom 47
improvement in error 0.14
1/F 0.8991 F 1.1122
P(F<0.8991 or F>1.1122) = 0.714073
Null hypothesis cannot be excluded to 95% confidence

# edapy_12_09: variance of power spectral density (untapered)
# case of untapered random timeseries

# generic time set up of t-axis
N=floor(2**10);  # length of time vector
Dt=0.025;        # sampling inteval
T = N*Dt;        # duration
t = eda_cvec(Dt*np.linspace(0,N-1,N)); # time vector

# create an uncorrelated random timeseries
sd2true=64.0;          # variance
sdtrue=sqrt(sd2true);  # standard deviation
draw = np.random.normal(0,sdtrue,(N,1)); # time series of random data

# generic frequency set up
No2 = floor(N/2);   # integer version of N.2
fmax=1/(2.0*Dt);    # nyquist frequency
Df=fmax/No2;        # frequency sampling
Nf=No2+1;           # number of non-negative frequencies
# full (pos and neg) frequency vector
f=eda_cvec(Df*np.concatenate((np.linspace(0,No2,Nf),np.linspace(-No2+1,-1,No2-1)),axis=0));
Dw=2.0*pi*Df;       # angular feqweuncy sampling
w=2.0*pi*f;         # full (pos and neg) angular fedquency vector
Nw=Nf;              # number of non-negative angular frequencies
fpos=eda_cvec(Df * np.linspace(0,No2,Nf)); # vector of non-negative frequencies
wpos=2.0*pi*fpos;   # vector of non-negative angular frequencies
                    
# constant window function
W = np.ones((N,1));
d = np.multiply( W, draw );


# window changes variance of signal by ff
ff = np.sum(np.multiply(W,W))/N;

# true and estimated variance of d
sd2est=np.std(d)**2;  # estimated variance
print("true      data variance %.4f" % (ff*sd2true) );
print("estimated data variance %.4f" % (sd2est));
    
# compute power spectral density
fftraw = np.fft.fft(d,axis=0);                # Fourier transform
dbar = eda_cvec(Dt*fftraw[0:Nf,0]);           # keep non-negative frequencies only
s2 = (2/T) * np.power(np.abs(dbar),2);        # power spectral density

# statistics of power spectral density
s2meanest = np.mean(s2);        # mean of p.s.d.
s2varest=np.std(s2)**2;         # variance of p.s.d.
s2sigmaest=sqrt(s2varest);      # standard deviation
sd2psd=Nf*Df*s2meanest;         # variance of data estimated from PSD 
print("variance of data estimated from PSD %.2f" % (sd2psd));
print(' ');

# true mean and variance
p=2; # degrees of freedom
c = (ff*sd2true ) / (2*Nf*Df);  # normalization factor
s2meantrue = p*c;
s2vartrue = 2*p*(c**2);
print("power spectral density");
print("true mean %.4f variance %.4f" % (s2meantrue,s2vartrue));
print("estimated mean %.4f variance %.4f" % (s2meanest,s2varest));
print(' ');


# plot data
fig1 = plt.figure(1,figsize=(10,6));
damax = np.amax(d);
plt.axis([0, T, -1.5*damax, 1.5*damax])
plt.plot(t,d,'k-');
plt.xlabel('time t (s)');
plt.ylabel('d(t)');
plt.title('random time series');
plt.plot([0, T], [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-' );
plt.plot([0, T], [0, 0], 'k-', 'LineWidth', 1 );
plt.plot([0, T], [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-' );

# plot PSD
fig2 = plt.figure(2,figsize=(10,6));
plt.xlabel('frequency f (Hz)');
plt.ylabel('ss(f)');
plt.title('power spectral density');
plt.plot(fpos,s2,'k-');

# plot mean on spectra
plt.plot([0, fmax], [s2meantrue, s2meantrue],'k-');
    
# histogram of values
Nhist=100;            # number of bins in histogram
s2min = np.min(s2);   # minumun bin value
s2max = np.max(s2);   # maximum bin value
ct, ed = np.histogram(s2,Nhist,(s2min,s2max)); # histogram
Nc = len(ct);         # length of count vector ct
Ne = len(ed);         # length of edges vector ed
s2hist = eda_cvec(ct);                          # counts as a vector   
centers = eda_cvec(0.5*(ed[1:Ne]+ed[0:Ne-1]));  # bins centers as a vector
    
# chi-squared pdf
Db=(centers[1,0]-centers[0,0])/c;              # sampling divided by c
s2histtrue=Nf*Db*stats.chi2.pdf(centers/c,p);  # normalied chi-squared p.d.f.

# plot 95% confidence level on spectra
pv=0.95;
cv = stats.chi2.ppf(pv,p);
plt.plot([fpos[0,0], fpos[Nf-1,0]], [cv*c, cv*c],'k-');
plt.show();
print("%.4f confidence level is at s2 = %.4f" % (pv,cv*c) );
percent = 100 * np.sum( s2hist[centers<=cv*c]) / np.sum(s2hist);
print("%.4f percent of points are below %.4f confidence" % (percent,pv));
print(' ');

# plot histograms
fig3 = plt.figure(3,figsize=(10,6));
plt.axis([0, np.max(centers), 0, np.max(s2hist) ])
plt.xlabel('s2)');
plt.ylabel('p(s2)');
plt.title('histogram of power spectral density');
plt.plot(centers,s2hist,'k-');
plt.plot(centers,s2histtrue,'k-');
plt.plot([cv*c, cv*c], [0, 0.1*np.max(s2hist)],'k:');
plt.show();

true      data variance 64.0000
estimated data variance 60.7897
variance of data estimated from PSD 60.80
 
power spectral density
true mean 3.1938 variance 10.2001
estimated mean 3.0338 variance 9.0016

0.9500 confidence level is at s2 = 9.5677
96.2963 percent of points are below 0.9500 confidence

# edapy_12_10: variance of power spectral density (tapered)
# case of random timeseries tapered with Hamming window

# generic time set up
N=floor(2**10);  # number of points in time series
Dt=0.025;        # sampling interval
T = N*Dt;        # duration
t = eda_cvec(Dt*np.linspace(0,N-1,N)); # time vector

# create an uncorrelated random timeseries
sd2true=64.0;          # variance
sdtrue=sqrt(sd2true);  # std dev
draw = np.random.normal(0,sdtrue,(N,1)); # time series of random data

# generic frequency set up
No2 = floor(N/2);   # integer version of N.2
fmax=1/(2.0*Dt);    # nyquist frequency
Df=fmax/No2;        # frequency sampling
Nf=No2+1;           # number of non-negative frequencies
# full (pos and neg) frequency vector
f=eda_cvec(Df*np.concatenate((np.linspace(0,No2,Nf),np.linspace(-No2+1,-1,No2-1)),axis=0));
Dw=2.0*pi*Df;       # angular feqweuncy sampling
w=2.0*pi*f;         # full (pos and neg) angular fedquency vector
Nw=Nf;              # number of non-negative angular frequencies
fpos=eda_cvec(Df * np.linspace(0,No2,Nf)); # vector of non-negative frequencies
wpos=2.0*pi*fpos;   # vector of non-negative angular frequencies                              

#  Hamming window function
W = eda_cvec( 0.54 - 0.46*np.cos(2*pi*np.linspace(0,N-1,N)/(N-1)) );
d = np.multiply( W, draw );
# window changes variance of signal by ff

ff = np.sum(np.multiply(W,W))/N;

# true and estimated variance of d
sd2est=np.std(d)**2;
print("true      data variance %.4f" % (ff*sd2true) );
print("estimated data variance %.4f" % (sd2est));

# compute power spectral density
fftraw = np.zeros((N,1), dtype=complex);
fftraw = np.fft.fft(d,axis=0);                # Fourier transform
dbar = eda_cvec(Dt*fftraw[0:Nf,0]);           # keep non-negative frequencies, only
s2 = (2/T) * np.power(np.abs(dbar),2);        # power spectral density

# statistics of power spectral density
s2meanest = np.mean(s2);                      # mean
s2varest=np.std(s2)**2;                       # vaiance
s2sigmaest=sqrt(s2varest);                    # standard deviation
sd2psd=Nf*Df*s2meanest;                       # variance of data estimated from PSD
print("variance of data estimated from PSD %.2f" % (sd2psd));
print(' ');

# true mean and variance
p=2; # degrees of freedom
c = ( ff*sd2true ) / (2*Nf*Df);  # normalization factor
s2meantrue = p*c;                # mean of chi-squaed p.d.f., with normalization
s2vartrue = 2*p*(c**2);          # variance of chi-squaed p.d.f., with normalization
print("power spectral density");
print("true mean %.4f variance %.4f" % (s2meantrue,s2vartrue));
print("estimated mean %.4f variance %.4f" % (s2meanest,s2varest));
print(' ');

# plot data
fig1 = plt.figure(1,figsize=(10,6));
damax = np.amax(d);
plt.axis([0, T, -1.5*damax, 1.5*damax])
plt.plot(t,d,'k-');
plt.xlabel('time t (s)');
plt.ylabel('d(t)');
plt.title('random time series');
plt.plot([0, T], [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-' );
plt.plot([0, T], [0, 0], 'k-', 'LineWidth', 1 );
plt.plot([0, T], [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-' );

# plot p.s.d.
fig2 = plt.figure(2,figsize=(10,6));
plt.xlabel('frequency f (Hz)');
plt.ylabel('ss(f)');
plt.title('power spectral density');
plt.plot(fpos,s2,'k-');

# plot mean on spectra
plt.plot([0, fmax], [s2meantrue, s2meantrue],'k-');

# histogram of values
Nhist=100               # number of bins in histogram
s2min = np.min(s2);     # start bin
s2max = np.max(s2);     # end bin
ct, ed = np.histogram(s2,Nhist,(s2min,s2max)); # histogram
Nc = len(ct);           # number of counts ct
Ne = len(ed);           # number of edges ed
s2hist = eda_cvec(ct);  # counts as a vector
centers = eda_cvec(0.5*(ed[1:Ne]+ed[0:Ne-1])); # edges as a vector

# chi-squared pdf
Db=(centers[1,0]-centers[0,0])/c;               # normalization factor
s2histtrue=Nf*Db*stats.chi2.pdf(centers/c,p);   # true hisogram from chi-squared p.d.f.

# plot 95% confidence level on spectra
pv=0.95;
cv = stats.chi2.ppf(pv,p);
plt.plot([fpos[0,0], fpos[Nf-1,0]], [cv*c, cv*c],'k-');
plt.show();
print("%.4f confidence level is at s2 = %.4f" % (pv,cv*c) );
percent = 100 * np.sum( s2hist[centers<=cv*c]) / np.sum(s2hist);
print("%.4f percent of points are below %.4f confidence" % (percent,pv));
print(' ');

fig3 = plt.figure(3,figsize=(10,6));
plt.axis([0, np.max(centers), 0, np.max(s2hist) ])
plt.xlabel('s2)');
plt.ylabel('p(s2)');
plt.title('histogram of power spectral density');
plt.plot(centers,s2hist,'k-');
plt.plot(centers,s2histtrue,'k-');
plt.plot([cv*c, cv*c], [0, 0.1*np.max(s2hist)],'k:');
plt.show();

true      data variance 25.4092
estimated data variance 24.3849
variance of data estimated from PSD 24.62
 
power spectral density
true mean 1.2680 variance 1.6078
estimated mean 1.2286 variance 1.4417

0.9500 confidence level is at s2 = 3.7985
96.1014 percent of points are below 0.9500 confidence

# edapy_12_11, confidence of spectral peak
# for random noise plus a small ampliude sinusoid

# generic time set up
N=floor(2**10);  # number of points in time series
Dt=0.025;        # sampling interval
T = N*Dt;        # duration
t = eda_cvec(Dt*np.linspace(0,N-1,N)); # time vector

# generic frequency set up
No2 = floor(N/2);   # integer version of N.2
fmax=1/(2.0*Dt);    # nyquist frequency
Df=fmax/No2;        # frequency sampling
Nf=No2+1;           # number of non-negative frequencies
# full (pos and neg) frequency vector
f=eda_cvec(Df*np.concatenate((np.linspace(0,No2,Nf),np.linspace(-No2+1,-1,No2-1)),axis=0));
Dw=2.0*pi*Df;       # angular feqweuncy sampling
w=2.0*pi*f;         # full (pos and neg) angular fedquency vector
Nw=Nf;              # number of non-negative angular frequencies
fpos=eda_cvec(Df * np.linspace(0,No2,Nf)); # vector of non-negative frequencies
wpos=2.0*pi*fpos;   # vector of non-negative angular frequencies 

# low amplitude periodic signal buried in the noise
sd2true=64.0;          # variance
sdtrue=sqrt(sd2true);  # standard deviation
# data = signal + noise
draw = 0.25*sdtrue*np.cos(2*pi*(fmax/4)*t) + np.random.normal(0,sdtrue,(N,1));                

#  Hamming window function
W = eda_cvec(0.54 - 0.46*np.cos(2*pi*np.linspace(0,N-1,N)/(N-1)));
d = np.multiply( W, draw );

# window changes variance of signal by ff
ff = np.sum(np.multiply(W,W))/N;

# true and estimated variance of d
sd2est=np.std(d)**2;
print("true      data variance %.4f" % (ff*sd2true) );
print("estimated data variance %.4f" % (sd2est));

# compute power spectral density
fftraw = np.fft.fft(d,axis=0);         # Fourier transform
dbar = eda_cvec(Dt*fftraw[0:Nf,0]);    # keep non-negative frequencies, only
s2 = (2/T) * np.power(np.abs(dbar),2); # power spectral density

# statistics of power spectral density
s2meanest = np.mean(s2);     # mean
s2varest=np.std(s2)**2;      # variance
s2sigmaest=sqrt(s2varest);   # standard deviation
sd2psd=Nf*Df*s2meanest;      # variance of data estimated from PSD
print("variance of data estimated from PSD %.2f" % (sd2psd));
print(' ');

# true mean and variance
p=2; # degrees of freedom
c = ( ff*sd2true ) / (2*Nf*Df);  # normalization factor
s2meantrue = p*c;                # true mean of normalizenchi-suqared distribution
s2vartrue = 2*p*(c**2);          # true varoance of normalized chi-suqared distribution
print("power spectral density");
print("true mean %.4f variance %.4f" % (s2meantrue,s2vartrue));
print("estimated mean %.4f variance %.4f" % (s2meanest,s2varest));
print(' ');

# plot data
fig1 = plt.figure(1,figsize=(10,6));
damax = np.amax(d);
plt.axis([0, T, -1.5*damax, 1.5*damax])
plt.plot(t,d,'k-');
plt.xlabel('time t (s)');
plt.ylabel('d(t)');
plt.title('random time series');
plt.plot([0, T], [-2*sdtrue*sqrt(ff), -2*sdtrue*sqrt(ff)], 'k-' );
plt.plot([0, T], [0, 0], 'k-', 'LineWidth', 1 );
plt.plot([0, T], [2*sdtrue*sqrt(ff), 2*sdtrue*sqrt(ff)], 'k-' );

# plot PSD
fig2 = plt.figure(2,figsize=(10,6));
plt.xlabel('frequency f (Hz)');
plt.ylabel('ss(f)');
plt.title('power spectral density');
plt.plot(fpos,s2,'k-');

# plot mean on spectra
plt.plot([0, fmax], [s2meantrue, s2meantrue],'k-');

# histogram of values
Nhist=100;            # number of bins in histogram
s2min = np.min(s2);   # start bin
s2max = np.max(s2);   # end bin
ct, ed = np.histogram(s2,Nhist,(s2min,s2max)); # histogram
Nc = len(ct);         # number of counts ct
Ne = len(ed);         # number of edges ed
s2hist = eda_cvec(ct);                         # counts as a vector
centers = eda_cvec(0.5*(ed[1:Ne]+ed[0:Ne-1])); # bin centers as a vector

# chi-squared pdf
Db=(centers[1,0]-centers[0,0])/c;
# true histogram derived from normalized chi-squared distribution
s2histtrue=Nf*Db*stats.chi2.pdf(centers/c,p);  

# plot 95% confidence level on spectra
pv=0.95;
cv = stats.chi2.ppf(pv,p);
plt.plot([fpos[0,0], fpos[Nf-1,0]], [cv*c, cv*c],'k-');
plt.show();
print("%.4f confidence level is at s2 = %.4f" % (pv,cv*c) );
percent = 100 * np.sum( s2hist[centers<=cv*c]) / np.sum(s2hist);
print("%.4f percent of points are below %.4f confidence" % (percent,pv));
print(' ');

fig3 = plt.figure(3,figsize=(10,6));
plt.axis([0, np.max(centers), 0, np.max(s2hist) ])
plt.xlabel('s2)');
plt.ylabel('p(s2)');
plt.title('histogram of power spectral density');
plt.plot(centers,s2hist,'k-');
plt.plot(centers,s2histtrue,'k-');
plt.plot([s2meantrue, s2meantrue], [0, 0.1*np.max(s2hist)],'k:');
plt.plot([cv*c, cv*c], [0, 0.1*np.max(s2hist)],'k:');
plt.show();

# probability that PSD exceeds maximum observed value
speak = np.max(s2);
print("largest spectral peak: %e" % (speak) );
ppeak = stats.chi2.cdf(speak/c,p);
print("probability of laregst peak %.8f" % (ppeak) );
print("probability of PSD exceeding maximum observed value %.8f" % (1-ppeak) );
print("Nf probability of PSD exceeding maximum observed value %.8f" % (1-(ppeak**Nf)) );

true      data variance 25.4092
estimated data variance 26.8266
variance of data estimated from PSD 26.93
 
power spectral density
true mean 1.2680 variance 1.6078
estimated mean 1.3439 variance 1.5975

0.9500 confidence level is at s2 = 3.7985
93.9571 percent of points are below 0.9500 confidence

largest spectral peak: 8.923954e+00
probability of laregst peak 0.99912205
probability of PSD exceeding maximum observed value 0.00087795
Nf probability of PSD exceeding maximum observed value 0.36274558

# edapy_12_12, psd of AR1 process

# tunable constants
beta = 0.60;     # beta or AR1 process
sigma = 100;     # sigma of x
N=1024;          # length of time series

# standard set up a time axis, t
Dt=0.6;
T = N*Dt;
t= eda_cvec(Dt*np.linspace(0,N-1,N)); 

# standard setup of frequency axis, f
No2 = floor(N/2);   # integer version of N.2
fmax=1/(2.0*Dt);    # nyquist frequency
Df=fmax/No2;        # frequency sampling
Nf=No2+1;           # number of non-negative frequencies
# full (pos and neg) frequency vector
f=eda_cvec(Df*np.concatenate((np.linspace(0,No2,Nf),np.linspace(-No2+1,-1,No2-1)),axis=0));
Dw=2.0*pi*Df;       # angular feqweuncy sampling
w=2.0*pi*f;         # full (pos and neg) angular fedquency vector
Nw=Nf;              # number of non-negative angular frequencies
fpos=eda_cvec(Df * np.linspace(0,No2,Nf)); # vector of non-negative frequencies
wpos=2.0*pi*fpos;   # vector of non-negative angular frequencies 

# delete nonnegative frequencies from axes
f=f[0:Nf,0:1];
w=w[0:Nf,0:1];

# plot of spectra with different beta's
betalist = eda_cvec( 0.25, 0.5, 0.75);
NB,i = np.shape(betalist);
print('p.s.d. of AR1 process with these betas');
print(betalist.T);

fig1 = plt.figure(1,figsize=(10,6));
plt.axis( [0, fmax, 0, 1/(1+(betalist[NB-1,0]-2*betalist[NB-1,0])) ]  );
plt.xlabel('f');
plt.ylabel('1/|v(f)|^2');

for beta in betalist:
    denom = (1 + (beta**2) - 2*beta*np.cos(2*pi*0.5*f/fmax) );
    plt.plot( f, np.divide(1,denom), 'k-', lw=2 );

p.s.d. of AR1 process with these betas
[[0.25 0.5  0.75]]

# edapy_12_13, 95% confidence limit of AR1 process

# tunable constants
beta =    0.6;   # beta of AR1 process
sigma = 100.0;   # sigma of x
N=1024;          # length of time series
tmaxplot=120.0;  # max time on enlarged plot
ADDSINUSOID=0;   # add sinusoid to random timeseries on 1
EXACT95=0;       # Exact confidence on 1, (mean+2sigma) approx on 0

# standard set up a time axis, t
Dt=0.6;
T = N*Dt;
t=eda_cvec(Dt*np.linspace(0,N-1,N)); 

# standard setup of frequency axis, f
No2 = floor(N/2);   # integer version of N.2
fmax=1/(2.0*Dt);    # nyquist frequency
Df=fmax/No2;        # frequency sampling
Nf=No2+1;           # number of non-negative frequencies
# full (pos and neg) frequency vector
f=eda_cvec(Df*np.concatenate((np.linspace(0,No2,Nf),np.linspace(-No2+1,-1,No2-1)),axis=0));
Dw=2.0*pi*Df;       # angular feqweuncy sampling
w=2.0*pi*f;         # full (pos and neg) angular fedquency vector
Nw=Nf;              # number of non-negative angular frequencies
fpos=eda_cvec(Df * np.linspace(0,No2,Nf)); # vector of non-negative frequencies
wpos=2.0*pi*fpos;   # vector of non-negative angular frequencies 

# uncorrelated random time series x(t)
x = np.random.normal(loc=0,scale=sigma,size=(N,1));

# AR1 process y(t)
y = np.zeros((N,1));
y[0,0] = x[0,0];
for i in range(1,N):
    y[i,0] = beta*y[i-1,0] + x[i,0];

if( ADDSINUSOID ):
    sigmay2 = np.var(y);
    sigmay = sqrt(sigmay2);
    Ac=0.25*sigmay;
    fc=fmax/4.0;
    y = y + Ac*np.cos(2.0*pi*fc*t);
    print("A small amplitude sinusoid of amplitude %.2f and frequency %.2f has been added" % (Ac,fc));

# plot entire time series
print('plot time series x(t) and y(t)');
fig1 = plt.figure(1,figsize=(10,6));

ax1 = plt.subplot(2,1,1);
plt.axis( [0, Dt*(N-1), min(x), max(x) ] );
plt.plot( t, x, 'k-', lw=2 );
plt.xlabel('t');
plt.ylabel('x(t)');
ax1 = plt.subplot(2,1,2);
plt.axis( [0, Dt*(N-1), min(y), max(y) ] );
plt.plot( t, y, 'k-', 'LineWidth', 2 );
plt.xlabel('t');
plt.ylabel('y(t)');
plt.show();

# plot enlarged portion of time series
print('plot enlarged portions of time series x(t) and y(t)');
fig2 = plt.figure(2,figsize=(10,6));
ax1 = plt.subplot(2,1,1);
plt.axis( [0, tmaxplot, min(x), max(x) ] );
plt.plot( t, x, 'k-', lw=2 );
plt.xlabel('t');
plt.ylabel('x(t)');
ax1 = plt.subplot(2,1,2);
plt.axis( [0, tmaxplot, min(y), max(y) ] );
plt.plot( t, y, 'k-', lw=2 );
plt.xlabel('t');
plt.ylabel('y(t)');
plt.show();

# compute power spectral density
xfftraw = np.fft.fft(x,axis=0);         # raw DFT output
xbar = Dt*xfftraw[0:Nf,0:1];            # Fourier transform, + freqs only
xs2 = (2/T)*np.power(np.abs(xbar),2);   # power spectral density
xs2=xs2[0:Nf,0:1];                      # nonnegative f's only
yfftraw = np.fft.fft(y,axis=0);         # raw DFT output
ybar = Dt*yfftraw[0:Nf,0:1];            # Fourier transform, + freqs only
ys2 = (2/T) * np.power(abs(ybar),2);    # power spectral density

# delete nonnegative frequencies from axes
f=f[0:Nf];
w=w[0:Nf];

# plot of spectra
print('plot power spretral density, |x(f)|^2 and |y(f)|^2');
fig3 = plt.figure(3,figsize=(10,6));
ax1 = plt.subplot(2,1,1);
plt.axis( [0, np.max(f), 0, np.max(xs2) ] );
plt.plot( f, xs2, 'k-', lw=2 );
plt.xlabel('f');
plt.ylabel('|x(f)|**2');
ax1 = plt.subplot(2,1,2);
plt.axis( [0, np.max(f), 0, np.max(ys2) ] );
plt.plot( f, ys2, 'k-', lw=2 );
plt.xlabel('f');
plt.ylabel('|y(f)|**2');
plt.show();

# plot of spectra, onto which other stiff will be overlain
print('plot power spretral density, |x(f)|**2 and |y(f)|**2');
print('with other stuff overlain');
fig4 = plt.figure(4,figsize=(10,6));
ax1 = plt.subplot(2,1,1);
plt.axis( [0, np.max(f), 0, np.max(xs2) ] );
plt.plot( f, xs2, 'k-', lw=2 );
plt.xlabel('f');
plt.ylabel('|x(f)|**2');
ax1 = plt.subplot(2,1,2);
plt.axis( [0, np.max(f), 0, np.max(ys2) ] );
plt.plot( f, ys2, 'k-', lw=2 );
plt.xlabel('f');
plt.ylabel('|y(f)|**2');

Nr = 1000;
Nr95 = 950;
Nr50 = 500;
S = np.zeros((Nf,Nr));

# repeat for Nr realizations
for ir in range(Nr):
    # uncorrelated time series
    x = np.random.normal(loc=0.0,scale=sigma,size=(N,1));
    # build AR1 time sereis
    y = np.zeros((N,1));
    y[0,0] = x[0,0];
    for i in range(1,N):
        y[i,0] = beta*y[i-1,0] + x[i,0];
    yfftraw = np.fft.fft(y,axis=0);  # FFT of y
    ybar = Dt*yfftraw[0:Nf,0:1]; # FT of nonnegative f's
    ys2 = (2/T) * np.power(np.abs(ybar),2); # p.s.d.
    S[0:Nf,ir:ir+1]=ys2[0:Nf,0:1]; # save p.s.d.

# statistics of the ensemble of p.s.d.'s
S95 = np.zeros((Nf,1));   # 95 percent level
Smean = np.zeros((Nf,1)); # mean
for ifr in range(Nf):   # loop over frequencies
    v = S[ifr:ifr+1,0:Nr].T; # the p.s.d. at one frequency
    v = eda_cvec(np.sort(v,axis=0)); # sort the values
    S95[ifr,0]=v[Nr95,0];  # 95% less than this value
    Smean[ifr,0]=np.mean(v); # mean

plt.plot( f, S95,   '-', lw=5, color=[0.8,0.8,0.8] );
plt.plot( f, Smean, '-', lw=5, color=[0.8,0.8,0.8] );

# theoretical spectrum
p=2;    # degrees of freedom
# in this code, the window function is just w(i)=1
ff = 1.0; # variance of window function
c = ( ff*(sigma**2) ) / (2*Nf*Df);  # scaling constant
xs2meantrue = p*c;        # mean of p.s.d. of x
xs2vartrue = 2*p*(c**2);  # variance of p.s.d. of x
print("scaling constant c: %e" % (c) );
print("pc: %e" % (xs2meantrue) );
print("2pc**2: %e" % (xs2vartrue) );
# the AR1 filter v(f)
denom = (1.0 + (beta**2) - 2.0*beta*np.cos(pi*f/fmax) );
# mean of p.s.d. of y
ys2mean =  xs2meantrue*np.reciprocal(denom);
# variance of p.s.d. of y
if( EXACT95 ):
    C95 = stats.chi2.ppf(0.95,p);
    ys295 = C95*c*np.reciprocal(denom);
    print('Exact 95%% confidence level used');
else:
    ys295 = ys2mean + 2.0*sqrt(xs2vartrue)*np.reciprocal(denom);
    print('Approximate 95% confidence level used');
plt.plot( f, ys2mean, 'k--', lw=2 );
plt.plot( f, ys295, 'k-', lw=2 );

plot time series x(t) and y(t)

plot enlarged portions of time series x(t) and y(t)

plot power spretral density, |x(f)|^2 and |y(f)|^2

plot power spretral density, |x(f)|**2 and |y(f)|**2
with other stuff overlain
scaling constant c: 5.988304e+03
pc: 1.197661e+04
2pc**2: 1.434391e+08
Approximate 95% confidence level used

# edapy_12_14: bootstrap statistics of slope
# in least squares fit of a straight line to a small
# fragment of Black Rock Forest temperature data

# load data and set up time axis
D=np.genfromtxt('../Data/brf_fragment.txt', delimiter='\t');
s = np.shape(D);
N = s[0];  # number of data
torig = eda_cvec(10+np.linspace(0,N-1,N));  # time vector, hours from start of orignal record
dorig = eda_cvec(D);                        # data vector

# least-squares slope and its variance of original data
M=2;                    # number of model parameters
G=np.zeros((N,M));      # data kernel setup
G[:,0]=np.ones((N,));
G[:,1]=torig.ravel();
GTG = np.matmul(G.T,G); # G_transpose time G
mest = la.solve( GTG, np.matmul(G.T,dorig) ); # least squares solution
slopeorig=mest[1,0];    # break out estimated slope
    
# usual error estimate based on propagagtion
# of posterior estimate of sigma2 of data into
# a sigma2 of slope
e = dorig - np.matmul(G,mest);  # individual errors
E = np.matmul(e.T,e);           # total error
sigma2dorig = E/N;              # posterior variance of data
Cmorig = sigma2dorig * la.inv(GTG);  # posterior covariance of model parametres
sigma2morig = Cmorig[1,1];      # breal out variance of slope

# number of realizations;
Nr = 10000;
slope = np.zeros((Nr,1));    # vecor of bootstraped slopes

# loop over realizations
for p in range(Nr):
    # resample
    # N random integers, each bewteen 1 and N-1
    rowindex=np.random.randint(N,size=N); 
    # random resampling of time with duplication
    t = eda_cvec(torig[rowindex,0]);
    # random resampling of data with duplication
    d = eda_cvec(dorig[rowindex,0]);
    # straight line fit
    M=2;                     # number of model parameters
    G=np.zeros((N,M));       # data akernel setup
    G[:,0]=np.ones((N,));
    G[:,1]=t.ravel();
    GTG = np.matmul(G.T,G);  # G-transpose times G
    mest=la.solve(GTG,np.matmul(G.T,d)); # least sq soln
    slope[p,0]=mest[1,0];    # save slope in vector

# compute histogram of slopes and normalize
# normalize to a probability density distribution
# with Dt*sum(pbootstrap)=1;

Nhist=1000;      # number of bins
slopemin = 0.2;  # staring bin
slopemax = 0.8;  # ending bin
ct, ed = np.histogram(slope,Nhist,(slopemin,slopemax)); # histogram
Nc = len(ct);    # length of counts ct
Ne = len(ed);    # length of edges ed
slopehist = eda_cvec(ct); # counts as a vector
# bin centers as a vector
centers = eda_cvec(0.5*(ed[1:Ne]+ed[0:Ne-1]));
# approximate pdf via normalized histogram
Ds = centers[1,0] - centers[0,0];  # bin sampling
norm = Ds*np.sum(slopehist);       # normalization factor
pbootstrap =slopehist/norm;        # p.d.f. from histogram

# compute Normal distribution of slopes
# using parameters estimated from original data
norm = (1/sqrt(2*pi*sigma2morig));
pnormal = norm * np.exp(-np.power((centers-slopeorig),2)/(2*sigma2morig));

# plot pdf
fig1 = plt.figure(1,figsize=(10,6));
pdfmax = np.max(pbootstrap);
plt.axis([0.4, 0.6, 0, 1.5*pdfmax]);
plt.plot(centers,pbootstrap,'k-');
plt.plot(centers,pnormal,'k:');
plt.xlabel('slope b');
plt.ylabel('p(b)');

# mean (expectation) and variance of bootstrap distibution
# approximate integral for mean as sum
mb = Ds*np.sum( np.multiply( centers, pbootstrap) );
# approximate integral for variance as sum
vb = Ds*np.sum( np.multiply( np.power(centers-mb,2) , pbootstrap) ); 

# 5% and 95% confidence of
# cumulative probability function
Pbootstrap = eda_cvec(Ds*np.cumsum(pbootstrap));
# find 0.025 cumulative probability
ilovec = np.where(Pbootstrap>=0.025); 
# np.where() returns a bunch of stuff,
# but what we want, ilo, is the first thing
ilox = ilovec[0];
ilo = ilox[0];
blo = centers[ilo,0];
# find 0.975 cumulative probability
ihivec = np.where(Pbootstrap>=0.975); 
ihix = ihivec[0];
ihi = ihix[0]
bhi = centers[ihi,0];

plt.plot( [blo, blo], [0, 0.2*pdfmax], 'k-'  );
plt.plot( [bhi, bhi], [0, 0.2*pdfmax], 'k-'  );
plt.plot( [mb, mb], [0, 0.3*pdfmax], 'k-'  );
plt.show();

print("standard method:   slope %.4f +/- %.4f (2 sigma)" % (slopeorig, 2*sqrt(sigma2morig)));
print("bootstrap method:  slope %.4f +/- %.4f (2 sigma)" % (mb, 2*sqrt(vb)))
print("bootstrap method:  slope between %.4f and %.4f (95 percent confidence)" % (blo, bhi));

standard method:   slope 0.5252 +/- 0.0150 (2 sigma)
bootstrap method:  slope 0.5251 +/- 0.0175 (2 sigma)
bootstrap method:  slope between 0.5087 and 0.5423 (95 percent confidence)

# edapy_12_15: bootstrap statistics using Atlantic Rock data set
# Atlantic Rocks dataset
# bootstrap error analysis for
# CaO/Na2O ratio of varimax factor 2
# note CaO/Na2O are elements 6/7

# load data
Draw = np.genfromtxt('../Data/rocks.txt', delimiter='\t')
N, M = np.shape(Draw);

# number of realizations
Nr=10000;
ratio = np.zeros((Nr,1));

for ir in range(Nr):

    # randomly resample
    rowindex=np.random.randint(N,size=N);  # N random integers, each between 0 and N-1
    D = np.zeros((N,M));
    D[:,:] = Draw[rowindex,:];

    # compute factors and factor loadings using singular value decompostion
    [U, s, VT] = la.svd(D,full_matrices=False);
    sh = np.shape(s);
    Ns = sh[0];

    # keep only first few singular values
    P=5;
    F = np.copy(VT[0:P,:]);
    C = np.matmul( U[:,0:P], np.diag(s[0:P]));
    
    # spike these factors using the varimax procedure
    k = [1, 2, 3, 4];
    Nk = len(k);

    # initial rotated factor matrix, FP, abd rotation matrix, MR
    MR=np.identity(P);
    FP=np.copy(F);
    
    fA = np.zeros((M,1));
    fB = np.zeros((M,1));
    fAp = np.zeros((M,1));
    fBp = np.zeros((M,1));
    mA = np.zeros((P,1));
    mB = np.zeros((P,1));
    mAp = np.zeros((P,1));
    mBp = np.zeros((P,1));
    for iter in range(3):
        for ii in range(Nk):
            for jj in range(ii+1,Nk):
                # spike factors i and j
                i=k[ii];
                j=k[jj];
                # copy factors from matrix to vectors
                fA[:,0] = np.copy(FP[i,:]);
                fB[:,0] = np.copy(FP[j,:]);
                # standard varimax procedure to determine rotation angle q
                u = np.power(fA,2) - np.power(fB,2);
                v = 2.0 * np.multiply(fA, fB);
                AA = 2*M*np.matmul(u.T,v); AA=AA[0,0];
                BB = np.sum(u)*np.sum(v);
                top = AA - BB;
                CC = M*(np.matmul(u.T,u)-np.matmul(v.T,v)); CC=CC[0,0];
                DD = (np.sum(u)**2)-(np.sum(v)**2);
                bot =  CC - DD;
                q = 0.25 * atan2(top,bot);
                # rotate factors
                cq = cos(q);
                sq = sin(q);
                fAp =  cq*fA + sq*fB;
                fBp = -sq*fA + cq*fB;
                # put back into factor matrix, FP
                FP[i,:] = fAp.ravel();
                FP[j,:] = fBp.ravel();
                # accumulate rotation
                mA[:,0] = np.copy(MR[i,:]);
                mB[:,0] = np.copy(MR[j,:]);
                mAp =  cq*mA + sq*mB;
                mBp = -sq*mA + cq*mB;
                MR[i,:] = mAp.ravel();
                MR[j,:] = mBp.ravel();

    # new factor loadings
    CP = np.matmul(C,MR.T);
    
    # MATLAB: ratio of elemnts 6 and 7 of factor 2 
    # Python: ratio of elemnts 5 and 6 of factor 1
    ratio[ir,0]=FP[1,5]/FP[1,6];

# make histogram of ratios
Nhist=500;       # number of bins
ratiomin = 0.2;  # start bin
ratiomax = 0.8;  # end bin
ct, ed = np.histogram(ratio,Nhist,(ratiomin,ratiomax)); # histogram
Nc = len(ct);    # number of counts ct
Ne = len(ed);    # number rof edges ed
ratiohist = eda_cvec(ct);  # counts as a vector
centers = eda_cvec(0.5*(ed[1:Ne]+ed[0:Ne-1])); # bn centers as a vector

# approximate pdf via normalized histogram
Dr = centers[1,0] - centers[0,0];   # bin spacing
norm = Dr*np.sum(ratiohist);        # normalization of p.d.f.
pbootstrap = ratiohist/norm;        # p.d.f. estimated from histogram

# mean (expectation) and variance of bootstrap distibution
mr = Dr*np.sum( np.multiply( centers, pbootstrap) );
vr = Dr*np.sum( np.multiply( np.power(centers-mr,2) , pbootstrap) );
print("CaO/Na2O ratio of Factor 2");
print("mean %.6f variance %.6f" % (mr,vr) );

# 5% and 95% confidence of bootstrap distibution
Pbootstrap = eda_cvec(Dr*np.cumsum(pbootstrap));

# 2.5% connfidence
ilovec = np.where( (Pbootstrap>=0.025).ravel() );
# np.where() returns a bunch of stuff, we want the first, ilo
ilox = ilovec[0];
ilo = ilox[0];
blo = centers[ilo,0];

# 97.5% connfidence
ihivec = np.where(Pbootstrap>=0.975);
# np.where() returns a bunch of stuff, we want the first, ihi
ihix = ihivec[0];
ihi = ihix[0]
bhi = centers[ihi,0];

# plot pdf
fig1 = plt.figure(1,figsize=(10,6));
pdfmax = np.max(pbootstrap);
plt.axis([0.4, 0.6, 0, 1.5*pdfmax]);
plt.plot(centers,pbootstrap,'k-');
plt.xlabel('CaO/Na2O ratio, r');
plt.ylabel('p(r)');

plt.plot( [blo, blo], [0, 0.2*pdfmax], 'k-'  );
plt.plot( [bhi, bhi], [0, 0.2*pdfmax], 'k-'  );
plt.plot( [mr, mr], [0, 0.3*pdfmax], 'k-'  );
plt.show();

print("bootstrap method:  ratio %.4f +/- %.4f (2 sigma)" % (mr, 2*sqrt(vr)))
print("bootstrap method:  ratio between %.4f and %.4f (95 percent confidence)" % (blo, bhi));

CaO/Na2O ratio of Factor 2
mean 0.485676 variance 0.000194

bootstrap method:  ratio 0.4857 +/- 0.0279 (2 sigma)
bootstrap method:  ratio between 0.4586 and 0.5138 (95 percent confidence)