In [1]:
# gdapy17_00 clears all variables and import various modules
# you must run this cell first, before running any of others
# note that these examples use inverse theory techniques that
# are only covered in later chapters of the book
# History:
# 2023/04/12 - Created by W. Menke
# 2024/05/23 - Patches to accomodate new verions of numpy, scipy, matplotlib
# patched dot product to return scalar value
# las.bigc() changed tol keyword to rtol
# las.bigc() cast q[0] to float array
# A reset deletes any previously-created variables from memory
%reset -f
# import various packages
import numpy as np # matrices & etc
from matplotlib import pyplot as plt # general plotting
from matplotlib import patches # plotting shapes
from datetime import date # date and time arithmetic
from math import exp, pi, sin, cos, tan, sqrt, floor, log10, nan, atan2 # math functions
from scipy.special import erf
import scipy.linalg as la # linear algebra functions
import scipy.signal as sg # signal processing
import os # operating system
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
import scipy.signal as sg
import scipy.stats as st
import scipy.sparse.linalg as las
from scipy import sparse
import matplotlib
# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def gda_cvec(*argv):
t = int;
Nt = 0;
for a in argv:
v = gda_cvec1(a);
N,M = np.shape(v);
Nt = Nt + N;
if( N==0 ):
continue; # skip vector of zero length
if (t==int) and isinstance(v[0,0],float):
t=float;
elif isinstance(v[0,0],complex):
t=complex;
w = np.zeros((Nt,1),dtype=t);
Nt = 0;
for a in argv:
v = gda_cvec1(a);
N,M = np.shape(v);
#w[Nt:Nt+N,0] = v[0:N,0];
w[Nt:Nt+N,0:1] = v; # patch 20230418 was #w[Nt:Nt+N,0] = v[0:N,0];
Nt = Nt + N;
return w;
# function to make a numpy N-by-1 column vector
# c=gda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=gda_cvec(v1,v2,...) concatenates
# many argiments.
def gda_cvec1(v):
if isinstance(v, int) or isinstance(v, np.int32):
w = np.zeros((1,1),dtype=int);
w[0,0] = v;
return w;
elif isinstance(v, float):
w = np.zeros((1,1),dtype=float);
w[0,0] = v;
return w;
elif isinstance(v, complex):
w = np.zeros((1,1),dtype=complex);
w[0,0] = v;
return w;
elif isinstance(v, np.ndarray):
s = np.shape(v);
if len(s) == 1:
return np.copy(np.reshape(v,(s[0],1)));
else:
[r,c]=s;
if( c==1 ):
return(np.copy(v));
elif(r==1):
return(np.copy(v.T));
else:
raise TypeError("gda_cvec: %d by %d ndarray not allowed" % (r, c));
elif isinstance(v, list):
r = len(v);
t = int;
for vi in v:
if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
pass;
elif isinstance(vi,float):
t=float;
elif isinstance(vi,complex):
t=complex;
break;
else:
raise TypeError("gda_cvec: list contains unsupported type %s" % type(vi));
w = np.zeros((r,1),dtype=t);
w[:,0] = np.array(v); # patch v -> np.array(v)
return w;
elif isinstance(v, tuple):
r = len(v);
t = int;
for vi in v:
if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
pass;
elif isinstance(vi,float):
t=float;
elif isinstance(vi,complex):
t=complex;
break;
else:
raise TypeError("gda_cvec: tuple contains unsupported type %s" % type(vi));
w = np.zeros((r,1),dtype=t);
w[:,0] = np.array(list(v)); # patch v -> np.array(list(v));
return w;
else:
raise TypeError("gda_cvec: %s not supported" % type(v));
# gda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def gda_draw(*argv):
DOCOLOR=True;
if( DOCOLOR ):
bwcmap = matplotlib.colormaps['jet'];
else:
bw = np.zeros((256,4));
v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
bw[:,0] = v;
bw[:,1] = v;
bw[:,2] = v;
bw[:,3] = np.ones(256);
bwcmap = ListedColormap(bw);
# size of plot
W = 16;
H = 4;
fig1 = plt.figure(1);
# figsize width and height in inches
fig1.set_size_inches(W,H);
ax1 = plt.subplot(1,1,1);
plt.axis([0, W, -H/2, H/2]);
plt.axis('off');
LM = W/6; # matrix width and heoght
LV = W/40; # vector width
FS = 0.12; # character width
TO = 0.4; # title vertical offset
SP = 0.2; # space between objects
LS = 0.2; # leading space
p = LS; # starting x-position
istitle=0; # flags presence of a title
for a in argv:
if isinstance(a,np.ndarray):
sh = np.shape(a);
if len(sh) == 1: # conversion to nx1 array
n = sh[0];
m = 1;
ap = a;
a = np.zeros((n,1));
a[:,0] = ap;
else:
n = sh[0];
m = sh[1];
if m==1:
pold=p;
left=p;
right=p+LV;
bottom=-LM/2;
top=LM/2;
plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
p = p+LV;
pm = (p+pold)/2;
if istitle:
plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
istitle=0;
p = p+SP;
else:
pold=p;
left=p;
right=p+LM;
bottom=-LM/2;
top=LM/2;
plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
p = p+LM;
pm = (p+pold)/2;
if istitle:
plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
istitle=0;
p = p+SP;
elif isinstance(a,str):
ns = len(a);
istitle=0;
if( ns>=6 ):
if 'title ' in a[0:6]:
istitle=1;
titlestr=a[6:];
if( istitle != 1):
plt.text(p,0,a);
p = p + ns*FS + SP;
plt.show();
return 1;
# bandpass filter, used in seismological example, but hand
# in a variety of settings involving time series
def gda_chebyshevfilt(d, Dt, flow, fhigh):
# (dout,u,v)=gda_chebyshevfilt(d, Dt, flow, fhigh);
# chebyshev IIR bandpass filter
# d - input array of data
# Dt - sampling interval
# flow - low pass frequency, Hz
# fhigh - high pass frequency, Hz
# dout - output array of data
# u - the numerator filter
# v - the denominator filter
# these filters can be used again using dout=filter(u,v,din);
# make sure input timeseries is a column vector
s = np.shape(d);
N = s[0];
if(N==1):
dd = np.zeros((N,1));
dd[:,0] = d;
else:
dd=d;
# sampling rate
rate=1/Dt;
# ripple parameter, set to ten percent
ripple=0.1;
# normalise frequency
fl=2.0*flow/rate;
fh=2.0*fhigh/rate;
# center frequency
cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );
# bandwidth
bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );
# ripple parameter factor
rpf = sqrt((sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple));
a = 0.5*(rpf-1.0/rpf);
b = 0.5*(rpf+1.0/rpf);
u=np.zeros((5,1));
v=np.zeros((5,1));
theta = 3*pi/4;
sr = a * cos(theta);
si = b * sin(theta);
es = sqrt(sr*sr+si*si);
tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
v[0,0] = 1.0;
v[1,0] = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
v[2,0] = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
v[3,0] = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
v[4,0] = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
tmp = 4.0*es*es*bw*bw/tmp;
u[0,0] = tmp;
u[1,0] = 0.0;
u[2,0] = -2.0*tmp;
u[3,0] = 0.0;
u[4,0] = tmp;
dout = sg.lfilter(u.ravel(),v.ravel(),dd.ravel());
return (gda_cvec(dout),gda_cvec(u),gda_cvec(v));
# function to perform the multiplication FT (F v)
def gda_FTFmul(v):
# this function is used by bicongugate gradient to
# solve the generalized least squares problem Fm=F.
# Note that "F" must be called "gdaFsparse".
global gdaFsparse;
s = np.shape(v);
if(len(s)==1):
vv = np.zeros((s[0],1));
vv[:,0] = v;
else:
vv=v;
# weirdly, "*" multiplies sparse matrices just fine
temp = gdaFsparse*vv;
return gdaFsparse.transpose()*temp;
# function to perform the multiplication FT f
def gda_FTFrhs(f):
# this function is used by bicongugate gradient to
# solve the generalized least squares problem Fm=F.
# Note that "F" must be called "gdaFsparse".
global gdaFsparse;
return gdaFsparse.transpose()*f;
def gda_matrixmin(E):
E1 = np.min(E,axis=0);
k = np.argmin(E,axis=0);
Emin = np.min(E1);
ic = np.argmin(E1);
ir = k[ic];
return ir, ic, Emin;
def gda_Esurface( xc, yr, E, sigmad2 ):
# gda_Esurface: analysis of 2D error surface
# input:
# E: matrix of errors xc increases with columns, yr with rows
# xc, yr: corresponding vectors of (xc, yr) values; must be evenly-spaced
# sigmad2: variance of data that went into E=e'*e with e=dobs-dpre
# output:
# x0, y0: location of minimum in E
# E0: value of E at minimum
# covxy: covariance matrix
# status: 1 on success, 0 on fail
# method: quadratic approximation
# default return values
x0=0.0;
y0=0.0;
E0=0.0;
covxy=np.zeros((2,2));
status=0;
Nx,i = np.shape(xc);
Ny,i = np.shape(yr);
Dx = xc[1,0]-xc[0,0];
Dy = yr[1,0]-yr[0,0];
ir,ic,Emin = gda_matrixmin(E)
if( (ir<1) or (ir>(Ny-2)) or (ic<1) or (ic>(Nx-2)) ):
# minimum not in interior
return x0, y0, E0, covxy, status;
# quadratic model with 9 data
# E = m1 + m2*x + m3*y + m4*x*x/2 + m5*x*y + m6*y*y/2
# dE/dx = m2 + m4*x + m5*y
# dE/dy = m3 + m5*x + m6*y
# at minimum
# 0 = d1 + D2 [x,y]' so [x,y] = -inv(D2)*d1;
x = Dx*gda_cvec(-1, 0, 1, -1, 0, 1, -1, 0, 1);
y = Dy*gda_cvec(-1, -1, -1, 0, 0, 0, 1, 1, 1);
d = gda_cvec( E[ir-1,ic-1], E[ir-1,ic], E[ir-1,ic+1],
E[ir,ic-1], E[ir,ic], E[ir,ic+1],
E[ir+1,ic-1], E[ir+1,ic], E[ir+1,ic+1]);
G = np.concatenate((np.ones((9,1)),x,y,
0.5*np.multiply(x,x),np.multiply(x,y),0.5*np.multiply(y,y)),axis=1);
GTG = np.matmul(G.T,G);
GTd = np.matmul(G.T,d);
m = la.solve(GTG,GTd);
d1 = gda_cvec(m[1,0], m[2,0] );
D2 = np.array( [[m[3,0], m[4,0]], [m[4,0], m[5,0]]] );
invD2 = la.inv(D2);
v = -np.matmul(invD2,d1);
x0 = xc[ic,0]+v[0,0];
y0 = yr[ir,0]+v[1,0];
#E0= m(1) + m(2)*v(1) + m(3)*v(2) + m(4)*v(1)*v(1)/2 + m(5)*v(1)*v(2) + m(6)*v(2)*v(2)/2;
E0 = m[0,0] + m[1,0]*v[0,0] + m[2,0]*v[1,0] + m[3,0]*v[0,0]*v[0,0]/2.0 + m[4,0]*v[0,0]*v[1,0] + m[5,0]*v[1,0]*v[1,0]/2.0;
covxy = (sigmad2)*2.0*invD2;
status=1;
return x0, y0, E0, covxy, status;
In [6]:
# gdapy17_01
# graphical interpretation of Lagrange Multipliers
print("gdapy17_01");
# (x,y) grid
N=51;
M=51;
Dx=0.01;
Dy=0.01;
x=gda_cvec( Dx*np.linspace(0,N-1,N) );
y=gda_cvec( Dy*np.linspace(0,M-1,N) );
# Funtion E(x,y) that is minimized, a long-wavelngth sinusoid
E=-np.matmul(np.sin(x),np.sin(y).T);
# gradient of E (note tensor product)
dEdx = -np.matmul(np.cos(x),np.sin(y).T);
dEdy = -np.matmul(np.sin(x),np.cos(y).T);
# gradient list, for plotting purposed
gs = 0.1; # scale factor
# a b b b c c d d d, e e e e
gi = gda_cvec( 40, 30, 40, 25, 40, 10, 20, 10, 15, 4, 5, 8, 12)-1;
gj = gda_cvec( 40, 30, 25, 40, 10, 40, 10, 24, 18, 16, 10, 6, 3)-1;
Ng,i = np.shape(gi);
# plot E(x,y)
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
mycmap=matplotlib.colormaps['jet'];
plt.axis( [x[0,0], x[N-1,0], y[0,0], y[M-1,0] ] );
left=x[0,0];
right=x[N-1,0];
bottom=y[M-1,0];
top=y[0,0];
vm=np.min(E);
plt.imshow( E, cmap=mycmap, vmin=vm, vmax=0.0, extent=(left,right,bottom,top) );
plt.colorbar();
plt.gca().invert_yaxis();
plt.xlabel("x");
plt.ylabel("y");
# plot direction of gradient at selected points
sn = 0.04;
for k in range(Ng):
i = gi[k,0];
j = gj[k,0];
g = gda_cvec( dEdx[i,j], dEdy[i,j] );
g = g/np.sqrt(np.matmul(g.T,g));
plt.plot( x[i,0], y[j,0], "wo", lw=3 );
plt.plot( gda_cvec(x[i,0],x[i,0]+sn*g[0,0]), gda_cvec(y[j,0],y[j,0]+sn*g[1,0]), "w-", lw=3);
# constraint c(x,y)=0 is a parametric curve [xc(s), yc(s)]
# where the parameter s varies from 0 to 1 along curve
# I specify the curve, not the constraint itself
Ns = 101;
s = gda_cvec(np.linspace(0,Ns-1,Ns))/(Ns-1);
# endpoints
cx1 = 0.00;
cy1 = 0.38;
cx2 = 0.25;
cy2 = 0.00;
# line connecting endpoints
xc = (1-s)*cx1 + s*cx2;
yc = (1-s)*cy1 + s*cy2;
# embelish line into a curve
yc = yc - 0.1*np.sin(pi*s);
yc = yc - 0.05*np.exp( -np.power(s-0.1,2)/(2*0.1*0.1) );
yc = yc - 0.03*np.exp( -np.power(s-0.2,2)/(2*0.1*0.1) );
plt.plot( xc, yc, "k-", lw=3);
# plot arrows on one side of curve
dxc = gda_cvec(np.diff(xc,axis=0)); # dx/ds
dyc = gda_cvec(np.diff(yc,axis=0)); # dy/ds
for k in range(0,Ns-1,7):
p = gda_cvec( xc[k,0], yc[k,0] ); # point on curve
t = gda_cvec( dxc[k,0], dyc[k,0] ); # tangent to curve
tsq = np.matmul(t.T,t); tsq=tsq[0,0];
t = t / sqrt(tsq);
n = gda_cvec( t[1,0], -t[0,0] ); # normal to curve
sn = 0.03;
plt.plot( gda_cvec(p[0,0],p[0,0]+sn*n[0,0]), gda_cvec(p[1,0],p[1,0]+sn*n[1,0]), "-", lw=2, color=[0.7,0.7,0.7] );
# tabulate the deviation between n and gradE/|gradE| along
# the constraint curve
e = np.zeros((Ns-1,1)); # deviation
ie = np.zeros((Ns-1,1),dtype=int);
je = np.zeros((Ns-1,1),dtype=int);
for k in range(Ns-1): # tabulate deviation
p = gda_cvec(xc[k,0], yc[k,0] ); # point on curve
i = int(floor(p[0,0]/Dx)); # pixel of this point
j = int(floor(p[1,0]/Dy));
t = gda_cvec(dxc[k,0], dyc[k,0]); # tangent to curve
tsq = np.matmul(t.T,t); tsq=tsq[0,0];
t = t / sqrt(tsq);
n = gda_cvec(t[1,0], -t[0,0]); # normal to curve
g = gda_cvec(dEdx[i,j], dEdy[i,j]); # gradient at this point
gsq = np.matmul(g.T,g); gsq=gsq[0,0];
g = g/sqrt(gsq); # direction of gradient
eij = (n[0,0]-g[0,0])**2 + (n[1,0]-g[1,0])**2; # deviation
e[k,0] = eij;
ie[k,0] = i; # back-pointer i(k)
je[k,0] = j; # back-pointer j(k)
# find and plot point where gradE and n are anti-parallel
emin = np.min(e);
k = np.argmin(e);
i = ie[k,0];
j = je[k,0];
plt.plot( x[i,0], y[j,0], "ko", lw=4 );
plt.show();
print("Caption: Graphical interpretation of Laplace multipliers.");
print("Function E(x,y) being minimized (colors), gradient of E (white");
print("bar)s, with circle on downslope end), constraint surface (black curve)");
print("and its normals (grey bars), point on constraint surface where");
print("E is minimized (black circle). Gradient and normals are parallel");
print("at this point");
# plot deviation along curve
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [0, 1, 0, np.max(e) ] );
plt.plot( s[0:Ns-1,0:1], e, "k-", lw=3 );
plt.xlabel("s");
plt.ylabel("E");
plt.show();
print("Caption: Quantity E(x(s),y(s)) being minimized");
print("as a function of arc-length s along constraint curve");
gdapy17_01
Caption: Graphical interpretation of Laplace multipliers. Function E(x,y) being minimized (colors), gradient of E (white bar)s, with circle on downslope end), constraint surface (black curve) and its normals (grey bars), point on constraint surface where E is minimized (black circle). Gradient and normals are parallel at this point
Caption: Quantity E(x(s),y(s)) being minimized as a function of arc-length s along constraint curve
In [7]:
# gdapy17_02
# Example of complex least squares
# the model is a constant plus to complex exponentials of
# different frequency
print("gdapy17_02");
N = 200;
Dt = 0.01;
t = gda_cvec(Dt*np.linspace(0,N-1,N));
tmin = t[0,0];
tmax = t[N-1,0];
w1 = 10.0; # angular frequency 1
w2 = 25.0; # angular freqeucny 1
f1 = np.ones((N,1)); # constant
f2 = np.exp(complex(0.0,1.0)*w1*t); # first complex exponential
f3 = np.exp(complex(0.0,1.0)*w2*t); # second complex exponential
mtrue = gda_cvec( complex(1,2), complex(3,4), complex(5,6) );
G = np.concatenate( (f1,f2,f3), axis=1 );
dtrue = np.matmul(G,mtrue); # true data
sd = 1.0; # standsrd deviation of data
# observed data is true data plus random noise
nr=np.random.normal(loc=0.0,scale=sd/sqrt(2),size=(N,1))
ni=np.random.normal(loc=0.0,scale=sd/sqrt(2),size=(N,1))*complex(0.0,1.0);
dobs = dtrue + nr + ni;
# ususl least squares formula, becuase in MATLAB, G' is the Hermetial
# trsnspose of complex matrix G
GHG = np.matmul(np.conjugate(G.T),G);
GHd = np.matmul(np.conjugate(G.T),dobs);
mest = la.solve(GHG,GHd);
dpre = np.matmul(G,mest); # predicted data
# plot data
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.plot(t,np.real(dpre),"k-",lw=2);
plt.plot(t,np.real(dobs),"k*",lw=2);
plt.plot(t,np.imag(dpre),"r-",lw=2);
plt.plot(t,np.imag(dobs),"r*",lw=2);
plt.xlabel("t");
plt.ylabel("d");
plt.show();
print("Caption: data, with real part in black and imaginary part in red");
print("observed (stars), predicted (lines).");
# print solution
print(" ");
print("solution:");
print("1: true %.2f+%.2fi est %.2f+%.2fi" % (np.real(mtrue[0,0]), np.imag(mtrue[0,0]), np.real(mest[0,0]), np.imag(mest[0,0])));
print("2: true %.2f+%.2fi est %.2f+%.2fi" % (np.real(mtrue[1,0]), np.imag(mtrue[1,0]), np.real(mest[1,0]), np.imag(mest[1,0])));
print("3: true %.2f+%.2fi est %.2f+%.2fi" % (np.real(mtrue[2,0]), np.imag(mtrue[2,0]), np.real(mest[2,0]), np.imag(mest[2,0])));
gdapy17_02
Caption: data, with real part in black and imaginary part in red observed (stars), predicted (lines). solution: 1: true 1.00+2.00i est 1.02+2.04i 2: true 3.00+4.00i est 2.94+3.94i 3: true 5.00+6.00i est 4.98+6.11i
In [9]:
# gdapy17_03
# inverse of a "resized matrix"
# example of updating the inverse of a symmetric matrix
# as rows/cols are continually delete/added to it, as process that
# csn be useful in a moving window analysis
N=500; # size of test matrix A
N1=2; # size of section deleted/added om each iterstion
NITER=1000; # total number of iterations
NCORR1=200; # correct after NCORR1 iterations
NCORR2=1; # number of correction iterations
print("gdapy17_03");
print("Matrix A is %d by %d with %d rows/cols updated" % (N,N,N1) );
E = np.zeros((NITER,1)); # Error
# the test matrix is a 2D Gaussisn sutocovarince matrix
# with positions (xp, yp), plus a little added to main diagonal
# as might be encountered in a Gaussian Process Regression problem
xp = np.random.uniform(low=0.0,high=N,size=(N,1));
yp = np.random.uniform(low=0.0,high=N,size=(N,1));
A = np.zeros((N,N));
s = N/5; # scale length of autocovariance
s2 = s**2;
for i in range(N):
for j in range(N):
A[i,j] = exp(-0.5 * ((xp[i,0]-xp[j,0])**2 + (yp[i,0]-yp[j,0])**2 ) / s2 );
s2d = 0.1; # add to main disgonal
A = A + s2d * np.identity(N);
A = (A+A.T)/2.0; # make sure A is exactly symmetric
AI = la.inv(A); # inverse of A
for itt in range(NITER):
# delete phase, using Woodbury Formula
X = A[0:N1,0:N1];
Y = A[N1:N,N1:N];
Z = A[N1:N,0:N1];
P = AI[0:N1,0:N1];
Q = AI[N1:N,N1:N];
R = AI[N1:N,0:N1];
PI = la.inv(P);
PI = (PI+PI.T)/2.0;
ZTR = np.matmul(Z.T,R);
RTZ = ZTR.T;
ZTQZ = np.matmul(np.matmul(Z.T,Q),Z);
ZTQZ = (ZTQZ+ZTQZ.T)/2.0;
ZTRPIRTZ = np.matmul(np.matmul(ZTR,PI),RTZ);
ZTRPIRTZ = (ZTRPIRTZ+ZTRPIRTZ.T)/2.0;
C = ZTRPIRTZ - ZTQZ;
C = (C+C.T)/2.0;
D = -la.inv(PI-C);
D = (D+D.T)/2.0;
T = -np.matmul(Q,Z);
RTT = np.matmul(R,T.T);
F1 = RTT+RTT.T;
F2 = np.matmul(np.matmul(R,C),R.T);
F3 = np.matmul(np.matmul(T,D),T.T);
FS = F1 + F2 + F3;
FS = (FS+FS.T)/2.0;
YI = Q - FS;
# at this point, A is reduced to Y
# AI is reduced to YI
# reset A, AI tp N, YI
A = Y;
AI = YI;
N = N-N1;
# add phase
N = N+N1;
N2 = N-N1;
# randomly choose x,y position of new points
xpnew = np.random.uniform(low=0.0,high=N,size=(N,1));
ypnew = np.random.uniform(low=0.0,high=N,size=(N,1));
xp = gda_cvec( xp[0:N2,0:1], xpnew[N2:N,0:1] );
yp = gda_cvec( yp[0:N2,0:1], ypnew[N2:N,0:1] );
# recompute the complete A for test purposes only
# (coded more efficiently than above because its inside a loop)
W = np.ones((1,N));
Mx = np.matmul(xp,W);
Dx = Mx-Mx.T;
My = np.matmul(yp,W);
Dy = My-My.T;
Dx2pDy2 = np.power(Dx,2)+np.power(Dy,2);
Anew = np.exp(-0.5*Dx2pDy2/s2 );
Anew = Anew + s2d * np.identity(N);
Anew = (Anew+Anew.T)/2.0;
# add based on bordering method
X = Y; # the existing psrt of the matrix
Y = Anew[N2:N,N2:N]; # the new sections added
Z = Anew[N2:N,0:N2]; # the new sections added
A = np.concatenate((np.concatenate((X,Z.T),axis=1),np.concatenate((Z,Y),axis=1)),axis=0);
XI = YI;
ZXI = np.matmul(Z,XI);
ZXIZT = np.matmul(ZXI,Z.T);
ZXIZT = (ZXIZT+ZXIZT.T)/2.0;
YMZMIZT = Y - ZXIZT;
YMZMIZT = (YMZMIZT+YMZMIZT.T)/2.0;
QQ = la.inv(YMZMIZT);
QQ = (QQ+QQ.T)/2.0;
# Matlab: RR = -YMZMIZT\ZXI;
RR = -la.solve(YMZMIZT,ZXI);
PP = XI + np.matmul(np.matmul(ZXI.T,QQ),ZXI);
PP = (PP+PP.T)/2.0;
AI = np.concatenate((np.concatenate((PP,RR.T),axis=1),np.concatenate((RR,QQ),axis=1)),axis=0);
AI = (AI+AI.T)/2.0;
Nr, Nc = np.shape(A);
# correct every so often
if( (itt%NCORR1) == 0 ):
for k in range(NCORR2):
AI = 2*AI - np.matmul(np.matmul(AI,A),AI);
AI = (AI+AI.T)/2.0;
# maximum error of inverse
Ae = np.matmul(A,AI)-np.identity(N);
e = gda_cvec(Ae.ravel());
esq = np.matmul(e.T,e); esq=esq[0,0];
E[itt,0] = sqrt(esq)/N; # note len(e) is N**2
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [0, NITER-1, -15, 0] );
n = gda_cvec(np.linspace(1,NITER,NITER));
plt.plot( n, np.log10(E), "k-", lw=2);
for i in range(0,NITER-1,NCORR1):
plt.plot(gda_cvec(i,i), gda_cvec(-15,-5), "r:", lw=1);
plt.xlabel('iteration number, i');
plt.ylabel('log10 rms error, E(i)');
plt.show();
print("Caption: Root mean square error of A*AI-I as a function of");
print("iteration number (black curve), with correction points (red)");
gdapy17_03 Matrix A is 500 by 500 with 2 rows/cols updated
Caption: Root mean square error of A*AI-I as a function of iteration number (black curve), with correction points (red)
In [11]:
# gdama17_04
# This code parallels "Method Summary 1, Generalized Least Squares"
print("gdapy7_04");
# necessary to use variable gdaFsparse for biconjugate gradient solver
# Step 1: State the problem in words.
# This is a data smoothing problem where the model
# parameters are a discretized version of a function
# m(x) (at equal increments Dx) and the data are noisy
# measurements of the same model function.
# set up the auxiliary variable, x
Nx=101;
xmin = -10;
xmax = 10;
xL = (xmax-xmin);
Dx = xL/(Nx-1);
Dx2i = 1/(Dx**2);
x = gda_cvec(xmin + Dx*np.linspace(0.0,Nx,Nx));
# Step 2: Organize the problem in standard form
# number of model parameters = number of data = Nx
# and the data kernel is the identity matrix, G=I
N=Nx;
M=N;
# use row-col-value method, which is maybe a bit
# awkward, for something as imple as an identity
# matrix
NELEMENTS = N+10;
Gr = np.zeros((NELEMENTS,1),dtype=int);
Gc = np.zeros((NELEMENTS,1),dtype=int);
Gv = np.zeros((NELEMENTS,1));
Nel = 0;
for i in range(N):
Gr[Nel,0]=i;
Gc[Nel,0]=i;
Gv[Nel,0]=1.0;
Nel=Nel+1;
# truncate
Gr=Gr[0:Nel,0:1];
Gc=Gc[0:Nel,0:1];
Gv=Gv[0:Nel,0:1];
gdaGsparse=sparse.coo_matrix((Gv.ravel(),(Gr.ravel(),Gc.ravel())),shape=(N,M));
# this is a synthetic test, so there is no real data
# the true model is a "ringy" function peaked at x=0
# the observed data is the true model plus random noise
Atrue = 1.9; # amplitude
Ltrue=6.21; # wavelength
b=4.0; # decay parameter
mtrue=Atrue*np.multiply(np.exp(-2*pi*np.abs(x)/(b*Ltrue)),np.cos(2*pi*x/Ltrue));
dtrue = mtrue;
sigmad = 0.03; # noise level of the observations
sigma2d = sigmad**2;
dobs = dtrue+np.random.normal(loc=0.0,scale=sigmad,size=(N,1));
No=int(floor(7*N/8)); # create an outlier in the data
dobs[No,0] = 0.3;
# Step 3: Plot the data
# When you examine the plot, you should take notice of:
# the overall shape of the curve
# the noise level
# the existence of an outlier toward the right hand side
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -2.0, 2.0] );
plt.plot( x, dobs, "ko", lw=2);
plt.xlabel("x");
plt.ylabel("d(x)");
plt.show();
print("Caption: Observed data d(x)");
# I'm going to remove the outlier "manually"
# by interpolating nearby points
dobs[No,0] = 0.5*(dobs[No-1,0]+dobs[No+1,0]);
# Step 4: Establish the accuracy of the data
# In this case, we know the sigmad, because it
# was specified when the synthetic data were
# created. I'm going to use this "correct" value
# Step 5: State the prior information in words.
# We assume that the function is smooth, meaning
# that its second derivative is small
# Step 6: Organize the prior information in standard
# form, H is a first differnce operator, h=0. There
# are two few rows in H than there are model parameters
K=M-2;
z = Dx2i*np.ones((M,1));
# use row-column-value method
NELEMENT = 3*K+10;
Hr = np.zeros((NELEMENT,1),dtype=int);
Hc = np.zeros((NELEMENT,1),dtype=int);
Hv = np.zeros((NELEMENT,1));
Nel=0; # element counter
for i in range(K):
Hr[Nel,0]=i;
Hc[Nel,0]=i;
Hv[Nel,0]=z[i,0];
Nel=Nel+1;
Hr[Nel,0]=i;
Hc[Nel,0]=i+1;
Hv[Nel,0]=-2.0*z[i,0];
Nel=Nel+1;
Hr[Nel,0]=i;
Hc[Nel,0]=i+2;
Hv[Nel,0]=z[i,0];
Nel=Nel+1;
h = np.zeros((K,1));
# truncate
Hr = Hr[0:Nel,0:1];
Hc = Hc[0:Nel,0:1];
Hv = Hv[0:Nel,0:1];
gdaHsparse=sparse.coo_matrix((Hv.ravel(),(Hr.ravel(),Hc.ravel())),shape=(K,M));
# Step 7: Establish the accuracy of the a priori information
# Suppose that I know that the function is roughly sinusoidal
# with an amplitude of about A and a wavelength of about L.
# Then if the function were a cosine wave m = A cos(2 pi x / L)
# its second derivative would be d2m/dx2 = -A (2 pi/ L)^2
# cos(2 pi x / L). The variance of the second derivative
# would therefore be about 0.5 A^2 (2 pi/ L)^4 (since the
# average value of cos^2 is 0.5). So I'm going to use this
# amount as an estimate of the variance of the prior information;
# that is, the second derivative is zero +/- sigmah.
A = 2.0;
L = 6.0;
sigma2h = 0.5 * (A**2) * ((2*pi/L)**4);
sigmah = sqrt(sigma2h);
# Step 8: Estimate model parameter and their covariance
# set up the matrix F and vector f
# F=spalloc(N+K,M,3*(N+K));
# use row-column-value method
Fr = np.concatenate( (Gr,Hr+N), axis=0 );
Fc = np.concatenate( (Gc,Hc), axis=0 );
Fv = np.concatenate( (Gv/sigmad,Hv/sigmah), axis=0 );
f = np.concatenate( (dobs/sigmad,h/sigmah), axis=0 );
fsave = np.copy(f);
L = N+K;
gdaFsparse=sparse.coo_matrix((Fv.ravel(),(Fr.ravel(),Fc.ravel())),shape=(L,M));
del Gr; del Gc; del Gv;
del Hr; del Hc; del Hv;
del Fr; del Fc; del Fv;
# define linear operator needed for biconjugate gradient solver
LO=las.LinearOperator(shape=(M,M),matvec=gda_FTFmul,rmatvec=gda_FTFmul);
# solve using conjugate gradient algorithm
mest = np.zeros((M,1));
tol = 1e-12; # tolerance
maxit = 3*(L+M); # maximum number of iterations allowed
# solve Fm=f by biconjugate gradients
FTf=gda_FTFrhs(f)
q=las.bicg(LO,FTf,rtol=tol,maxiter=maxit);
mest = gda_cvec(q[0].astype(float));
# plot the solution
# note that it fits the observations pretty well
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -2, 2] );
plt.plot( x, mest, "r-", lw=3 );
plt.plot( x, dobs, "ko", lw=2 );
plt.xlabel("x");
plt.ylabel("d(x) and m(x)");
# Step 9: (see a little further, below)
# Step 10: Confidence interval calulation
# The covariance matrix is [cov m] = inv(F'*F)
# However, we do not necessarily need to compute the complete
# matrix. The code here solves for the k-th column, say ck,
# (or row, since it is symmetric). The idea is to solve the equation
# (F'F) ck = s, where s is a vector that is all zeros except
# a 1 in row k. Then ck is the k-th column of inv(F'*F)
varlist=np.arange(0,N,int(floor(N/20))); # list of values of x at which
# to calculate error bars
for k in varlist.tolist():
# solve (F'F) ck = s
s = np.zeros((M,1));
s[k,0]=1.0;
q=las.bicg(LO,s,rtol=tol,maxiter=maxit);
ck =gda_cvec(q[0].astype(float));
# variance of estimated model parameters is k-th element of ck
sigmamest = sqrt(ck[k,0]);
# 95% confidence interval
mlow = mest[k,0]-2*sigmamest;
mhigh = mest[k,0]+2*sigmamest;
# plot error bars
plt.plot( gda_cvec(x[k,0],x[k,0]), gda_cvec(mlow, mhigh), "b-", lw=3 );
plt.show();
print("Caption: Observed data d(x) (circle), solution m(x) (red) 95 percent confidence bars (blue)");
# Note that the error bars are a small fraction of the
# overall range of the function, meaning that we haven't
# smoothed the function completely away!
# Step 9: Examine the model resolution
# The resolution matrix is RG = GMG * G
# with GMG = inv(F'F) G' inv(covd)
# However, as with covariance, one does not need to
# compute every row. This code just does a few rows.
# Note that the plot shows that the resolution is
# peaked along the main diagonal, but that it has a
# finite width of about 1 (meaning that there is some
# smoothing (but that's what we wanted) and a little
# tiny bit of negative sidelobes (which is not so
# good, but unavoidable with 2nd derivative smoothing).
reslist=np.arange(0,N,int(floor(N/9)));
Nres=len(reslist.tolist());
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, xmin-(xmax-xmin)/Nres, xmax+(xmax-xmin)/Nres] );
plt.xlabel("x");
plt.ylabel("x");
ax1.invert_yaxis();
vard = sigma2d * np.ones((N,1)); # variance of data
# (the code can handle the case where it varies)
for k in reslist.tolist():
# compute k-th row of the resolution matrix
# first: compute column of inverse of F'F
# note F'F symmetric, so column is also row
s = np.zeros((M,1));
s[k,0]=1.0;
q=las.bicg(LO,s,rtol=tol,maxiter=maxit);
ck=gda_cvec(q[0].astype(float));
# second: row of generalized inverse GMG(k,:)=gi
# MATLAB: gi = ((ck')*(G'))./(vard');
gi = np.divide( (ck.T*gdaGsparse.T),vard.T); # ok to use *, as one matrix spare
# third: row of the resolution matrix
r_row = gi*gdaGsparse; # ok to use *, as one matrix spare
sc = -((xmax-xmin)/Nres)/np.max(r_row);
plt.plot( x, sc*r_row.T+x[k,0], "k-", lw=3 );
plt.show();
print("Caption: Model resolution matrix");
# Step 11: Examine the individual errors
# Note that the prediction error is of
# uniform size of about unity, indicating
# that our a priori data variance is about
# right. It is also of similar size across
# the plot, indicating that our assumption
# that the error is uniform is about right,
# too. One the other hand, the error
# in a priori information is a little less
# than unity and not uniform across the
# plot, indicating that our assumptions on
# the size and uniformity of sigmah is not
# quite right.
# Plot prediction error
dpre = gdaGsparse*mest;
e = dobs-dpre;
fig3 = plt.figure(3,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -2.0, 2.0] );
plt.plot( gda_cvec(xmin, xmax), gda_cvec(0, 0), "b:", lw=2);
plt.plot( x, e/sigmad, "ko", lw=3 );
plt.xlabel("x");
plt.ylabel("e/sigmad");
plt.show();
print("Caption: Normalized prediction error");
# plot a priori error
hpre = gdaHsparse*mest;
l = h-hpre;
fig4 = plt.figure(4,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -2.0, 2.0] );
plt.plot( gda_cvec(xmin, xmax), gda_cvec(0, 0), "b:", lw=2);
plt.plot( x[1:N-1,0:1], l/sigmah, "ko", lw=3 );
plt.xlabel("x");
plt.ylabel("l/sigmah");
plt.show();
print("Caption: Normalized error in prior information");
# Step 12: Examine the total error phiest
# in order to address the null hypothesis
# that departure from the expected value of
# nu is due to random error.
# In the plot, total error phiest is usually
# close to the minimum bound (sometimes on
# one side, sometimes on the other), indicating
# that one of the two variances, sigma2d or
# sigma2h is overestimated. (Our estimate of
# sigma2h is probably too large. Multiplying
# it by 0.75 produces a better result).
fpre = gdaFsparse*mest;
phiest = np.matmul((f-fpre).T,(f-fpre)); phiest=phiest[0,0];
nu = N+K-M;
p1 = nu - 2.0*sqrt(2*nu);
p2 = nu + 2.0*sqrt(2*nu);
fig5 = plt.figure(5,figsize=(12,5));
ax1=plt.subplot(1,1,1);
pmax = max(phiest,p2);
plt.axis( [0, 1.1*pmax, 0.0, 1.0] );
plt.plot( gda_cvec(p1, p1), gda_cvec(0, 0.2), "r-", lw=2);
plt.plot( gda_cvec(p2, p2), gda_cvec(0, 0.2), "r-", lw=2);
plt.plot( gda_cvec(phiest, phiest), gda_cvec(0, 0.4), "k-", lw=3);
plt.xlabel("phi");
plt.show();
print("Caption: Total error phi (black) and confidence bounds (red)");
# Step 13: Two different models, A and B?
# We have only one model, so this step is not relevant
gdapy7_04
Caption: Observed data d(x)
Caption: Observed data d(x) (circle), solution m(x) (red) 95 percent confidence bars (blue)
Caption: Model resolution matrix
Caption: Normalized prediction error
Caption: Normalized error in prior information
Caption: Total error phi (black) and confidence bounds (red)
In [12]:
# gdapy17_05
# This code parallels "Method Summary 2, Grid Search".
print("gdapy17_05");
# make synthetic data
# a function f(t) is composed of the sum of two
# sinusoids of unknown amplitude and frequency
# d(t) = A1 cos(2 pi f1 t) + B1 sin(2 pi f1 t)
# = A2 cos(2 pi f2 t) + B2 sin(2 pi f2 t)
# true values
A1true = 0.9;
B1true = 0.5;
A2true = 0.2;
B2true = 0.3;
f1true = 7.1;
f2true = 11.4;
# time axis
N = 101;
Dt = 0.01;
t = gda_cvec( Dt*np.linspace(0,N-1,N) );
# true data and synthetic observed data
pi2 = 2*pi;
dtrue = A1true*np.cos(pi2*f1true*t)+B1true*np.sin(pi2*f1true*t);
dtrue = dtrue+A2true*np.cos(pi2*f2true*t)+B2true*np.sin(pi2*f2true*t);
sigmad = 0.1;
sigmad2 = sigmad**2;
dobs = dtrue+np.random.normal(loc=0.0,scale=sigmad,size=(N,1) );
# plot observed data
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [t[0,0], t[N-1,0], -2.0, 2.0] );
plt.plot( t, dobs, "ko", lw=2 );
plt.plot( t, dobs, "k-", lw=2 );
plt.xlabel("time t (s)");
plt.ylabel("d(t)");
# Step 1. This problem is non-linear in the frequencies
# [f1, f2] but linear in the amplitudes [A1, B1, A2, B2].
# The strategy is to grid search over [f1, f2], computing
# [A1, B1, A2, B2] at each grid node via least squares.
# Step 2. By visual inspection of the data, the frequencies
# of variation are about 10 Hz. So we grid search in the
# (5-15) Hz band to be sure we enclose [f1, f2]. The
# data are a fairly slowly varying function of frequency,
# so a 201 by 201 grid should be sufficient. Note that
# the problem is symmetrical in f1 and f2, so we could
# just search the f2>f1 part of the error surface E(f1,f2).
# However, coding this case seems more trouble than its
# worth. We do have to be careful not to do the "f1 exactly
# equal to f2" case, because the least square data kernel
# is redundant in this case. I avoid this problem by setting
# making the f1 and f2 grid points slightly different.
Nf1 = 201;
f1min=5.03;
f1max=15.0;
Df1=(f1max-f1min)/(Nf1-1);
f1list = gda_cvec( f1min + Df1*np.linspace(0,Nf1-1,Nf1) );
Nf2 = 201;
f2min=5.05;
f2max=15.0;
Df2=(f1max-f1min)/(Nf2-1);
f2list = gda_cvec( f2min + Df2*np.linspace(0,Nf2-1,Nf2) );
# Step 3: The Grid Searh
E = np.zeros( (Nf1, Nf2) );
M = 6;
for if1 in range(Nf1): # loop over values of f1
for if2 in range(Nf2): # loop over values of f2
f1 = f1list[if1,0]; # set [f1, f2]
f2 = f2list[if2,0];
# create data kernel for m=[A1, B1, A2, B2] for fixed [f1, f2]
G = np.concatenate((np.cos(pi2*f1*t),np.sin(pi2*f1*t),
np.cos(pi2*f2*t),np.sin(pi2*f2*t)), axis=1 );
GTG = np.matmul(G.T,G);
GTd = np.matmul(G.T,dobs);
mest = la.solve(GTG,GTd); # least squares solution
dpre = np.matmul(G,mest); # predicted data
e = dobs - dpre; # prediction error
myE = np.matmul(e.T,e); myE=myE[0,0];
E[if1,if2] = myE; # tabulate overall error
# refine minimum using quadratic approximation. Note that I am
# using the prior veriance of the data in the calculation of covariance
f1est, f2est, E0, covf1f2, status = gda_Esurface( f2list, f1list, E, sigmad2 );
# recompute m=[A1, B1, A2, B2] for the refined [f1, f2]
G = np.concatenate((np.cos(pi2*f1est*t),np.sin(pi2*f1est*t),
np.cos(pi2*f2est*t),np.sin(pi2*f2est*t)), axis=1 );
GTG = np.matmul(G.T,G);
GTd = np.matmul(G.T,dobs);
mest = la.solve(GTG,GTd); # least squares solution
dpre = np.matmul(G,mest);
e = dobs - dpre;
Efinal = np.matmul(e.T,e); Efinal=Efinal[0,0];
sigma2dest = Efinal/(N-M); # posterior variance in the data
# recompute the covariance of frequencies based on the posterior variance
f1est, f2est, E0, covf1f2, status = gda_Esurface( f1list, f2list, E, sigma2dest );
sigmaf1 = sqrt( covf1f2[0,0] );
sigmaf2 = sqrt( covf1f2[1,1] );
# write out the frequencies and their 95% confidence intervals
print("f1: %.3f +/- %.3f (95%%)" % (f1est, 2.0*sigmaf1) );
print("f2: %.3f +/- %.3f (95%%)" % (f2est, 2.0*sigmaf2) );
# Use the standard least squares estimate of the covariance of the
# amplitudes (using the posterior variance of the data)
covm = sigma2dest * la.inv(GTG);
A1est = mest[0,0]; B1est=mest[1,0]; A2est = mest[2,0]; B2est=mest[3,0];
sigmaA1 = sqrt( covm[0,0] );
sigmaB1 = sqrt( covm[1,1] );
sigmaA2 = sqrt( covm[2,2] );
sigmaB2 = sqrt( covm[3,3] );
# write out the amplitudes and their 95% confidence intervals
print("A1: %.3f +/- %.3f (95%%)" % (A1est, 2.0*sigmaA1) );
print("B1: %.3f +/- %.3f (95%%)" % (B1est, 2.0*sigmaB1) );
print("A2: %.3f +/- %.3f (95%%)" % (A2est, 2.0*sigmaA2) );
print("B2: %.3f +/- %.3f (95%%)" % (B2est, 2.0*sigmaB2) );
# plot the predicted data
plt.plot( t, dpre, "r-", lw=3 );
plt.show();
print("Caption: Observed data (black) and predicted data (red)");
# plot the error surface. Note the horizontal stripes
# on the image parallel to an f1 or f2 of 7 Hz. This
# frequency has the dominant amplitude, so when one
# of the frequenices is about 7 Hz, the prediction will
# fit the data to a reasonable degree. As expected, the
# error surface is symmtrical about the f1=f2 line. We
# anticipated this behavior (see above).
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
mycmap=matplotlib.colormaps['jet'];
plt.axis( [f1min, f1max, f2min, f2max] );
left=f2min; right=f2max;
top=f1min; bottom=f1max;
plt.imshow( E, cmap=mycmap, vmin=np.min(E), vmax=np.max(E), extent=(left,right,bottom,top) );
plt.colorbar();
plt.xlabel("frequency f2 (Hz)");
plt.ylabel("frequency f1 (Hz)");
# plot estimated frequencies and their 95% confidence intervals
plt.plot( gda_cvec(f2est-sigmaf2,f2est+sigmaf2), gda_cvec(f1est,f1est), "w-", lw=3 );
plt.plot( gda_cvec(f2est,f2est), gda_cvec(f1est-sigmaf1,f1est+sigmaf1), "w-", lw=3 );\
plt.show();
print("Caption: Error surface (colors) with estimated frequencies")
print("and their 95 percent confidence intervals (white)");
gdapy17_05 f1: 11.362 +/- 0.041 (95%) f2: 7.130 +/- 0.015 (95%) A1: 0.254 +/- 0.028 (95%) B1: 0.286 +/- 0.028 (95%) A2: 0.916 +/- 0.028 (95%) B2: 0.557 +/- 0.028 (95%)
Caption: Observed data (black) and predicted data (red)
Caption: Error surface (colors) with estimated frequencies and their 95 percent confidence intervals (white)
In [14]:
# gdapy17_06
# This code parallels "Method Summary 3, Nonlinear Least Squares"
# global gdaFsparse necessary to use biconjugate gradient solver
print("gdapy17_06");
# Step 1: State the problem in words.
# This is a simple modification of the data smoothing
# solve in the example for Methods Summary 1, Least
# Squares. As before, the the model parameters are
# a discretized version of a function m(x) (at equal
# increments Dx). But now the the data are nonlinear
# functions of each model parameter d(i) = erf( m(i) ).
# The effor function erf(z) is sigmoidal in shape. Near
# the origin, erf(z)~z, but for large |z| it asymtopes
# to +/- 1. It thus squeezes the larger model parameters
# into a small range. The derivative is d erf(z) / dz
# = (2/sqt(pi)) exp(-z^2)
# set up the auxiliary variable, x
Nx=101;
xmin = -10.0;
xmax = 10.0;
xL = (xmax-xmin);
Dx = xL/(Nx-1);
Dx2i = 1.0/(Dx**2);
x = gda_cvec( xmin + Dx*np.linspace(0,Nx-1,Nx) );
# Step 2: Organize the problem in standard form
# number of model parameters = number of data = Nx
# and the data kernel is the identity matrix, G=I
N=Nx;
M=N;
# this is a synthetic test, so there is no real data
# the true model is a "ringy" function peaked at x=0
# the observed data is the true model plus random noise
Atrue = 1.9; # amplitude
Ltrue=6.21; # wavelength
b=4.0; # decay parameter
mtrue=Atrue*np.multiply(np.exp(-2.0*pi*np.abs(x)/(b*Ltrue)),np.cos(2*pi*x/Ltrue));
dtrue = erf(mtrue);
sigmad = 0.1; # noise level of the observations
sigma2d = sigmad**2;
dobs = dtrue+np.random.normal(loc=0.0,scale=sigmad,size=(N,1));
# Establish the accuracy of the data
# In this case, we know the data has accuracy
# sigmad, because it was specified when the
# synthetic data were created. I'm going to use
# this "correct" value
# Plot the data
# When you examine the plot, you should take notice of:
# the overall shape of the curve
# the noise level
# the shape of the central peak, which is rather flat
# (which is due to the nonlinearity; information about
# the height of the central peak is being lost, and
# that will limit the success of the inversion).
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -1, 1] );
plt.plot( x, dobs, "ko", lw=2 );
plt.xlabel("x");
plt.ylabel("d(x)");
plt.show();
print("Caption: observed data d(x)");
# State the prior information in words.
# We assume that the function is smooth, meaning
# that its second derivative is small
# Organize the prior information in standard
# form, H is a first differnce operator, h=0. There
# are two few rows in H than there are model parameters
K=M-2;
z = Dx2i*np.ones((M,1));
# use the row-column-value method to make the sparse mstrix H
# an alterntive would be the single command
NELEMENTS = 3*M+10;
Hr = np.zeros((NELEMENTS,1),dtype=int);
Hc = np.zeros((NELEMENTS,1),dtype=int);
Hv= np.zeros((NELEMENTS,1));
Nel=0;
for i in range(K):
Hr[Nel,0]= i;
Hc[Nel,0]= i;
Hv[Nel,0]= z[i,0];
Nel = Nel+1;
Hr[Nel,0]= i;
Hc[Nel,0]= i+1;
Hv[Nel,0]= -2*z[i,0];
Nel = Nel+1;
Hr[Nel,0]= i;
Hc[Nel,0]= i+2;
Hv[Nel,0]= z[i,0];
Nel = Nel+1;
# delete extra rows
Hr = Hr[0:Nel,0:1];
Hc = Hc[0:Nel,0:1];
Hv = Hv[0:Nel,0:1];
gdaHsparse=sparse.coo_matrix((Hv.ravel(),(Hr.ravel(),Hc.ravel())),shape=(K,M));
# right hand side
h = np.zeros((K,1));
# Establish the accuracy of the a priori information
# Suppose that I know that the function is roughly sinusoidal
# with an amplitude of about A and a wavelength of about L.
# Then if the function were a cosine wave m = A cos(2 pi x / L)
# its second derivative would be d2m/dx2 = -A (2 pi/ L)^2
# cos(2 pi x / L). The variance of the second derivative
# would therefore be about 0.5 A^2 (2 pi/ L)^4 (since the
# average value of cos^2 is 0.5). So I'm going to use this
# amount as an estimate of the variance of the prior information;
# that is, the second derivative is zero +/- sigmah.
A = 2.0;
L = 6.0;
sigma2h = 0.5 * (A**2) * ((2*pi/L)**4);
sigmah = sqrt(sigma2h);
# Step 3: Decide up a reasonable trial solution
# Let's try zeros
mk = np.zeros((M,1));
# Step 4: Linearized the data equation
# iterations
Niter = 100; # Maximum number of iterations
Niter = 2; # Maximum number of iterations
for iter in range(Niter):
# perturbations in data and prior information
Dd = dobs - erf(mk);
Dh = h - gdaHsparse*mk # matrix is sparse, so * OK;
# The data kernel G is diagonal
dddm = (2/sqrt(pi)) * np.exp(-np.power(mk,2));
# Use row-col-value method, but an
# alterntive would be the command
# G = spdiags(dddm, 0, N, M);
MAXELEMENTS = max(N,M);
Nel=0;
Gr = np.zeros((MAXELEMENTS,1),dtype=int);
Gc = np.zeros((MAXELEMENTS,1),dtype=int);
Gv = np.zeros((MAXELEMENTS,1));
for i in range(N):
Gr[Nel,0] = i;
Gc[Nel,0] = i;
Gv[Nel,0] = dddm[i,0];
Nel=Nel+1;
# delete unused rows
Gr = Gr[0:Nel,0:1];
Gc = Gc[0:Nel,0:1];
Gv = Gv[0:Nel,0:1];
# make sparse matrix
gdaGsparse=sparse.coo_matrix((Gv.ravel(),(Gr.ravel(),Gc.ravel())),shape=(N,M));
# Step 5: Form the combined equation
# Set up the matrix F and vector f
L=N+K;
# Use row-col-value method
Fr = np.concatenate( (Gr,Hr+N), axis=0);
Fc = np.concatenate( (Gc,Hc), axis=0);
Fv = np.concatenate( (Gv/sigmad,Hv/sigmah), axis=0);
gdaFsparse=sparse.coo_matrix((Fv.ravel(),(Fr.ravel(),Fc.ravel())),shape=(L,M));
# delete element vectors
del Fr; del Fc; del Fv;
# right hand side vector
f=np.concatenate((Dd/sigmad,Dh/sigmah), axis=0 );
# Step 6: Iteratively improve the solution
# solve F Dm = f by biconjugate gradients
# define linear operator needed for biconjugate gradient solver
LO=las.LinearOperator(shape=(M,M),matvec=gda_FTFmul,rmatvec=gda_FTFmul);
tol = 1e-12; # tolerance
maxit = 3*(L+M); # maximum number of iterations allowed
FTf = gda_FTFrhs(f);
q=las.bicg(LO,FTf,rtol=tol,maxiter=maxit);
Dm = gda_cvec(q[0].astype(float));
# update solution
mk = mk + Dm;
# Step 7: Stop iterating
# terminate for very small Dm
Dmsq = np.matmul(Dm.T,Dm); Dmsq=Dmsq[0,0];
Sm = Dmsq/M;
Smlimit = 1E-6;
if( Sm < Smlimit ):
break;
mest = mk;
# Step 8: Examine the results
# plot the solution
# note that the solution is not able to fit
# the true central peak very well
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -5.0, 5.0] );
plt.plot( x, mtrue, "k-", lw=3 );
plt.plot( x, mest, "r-", lw=2 );
plt.xlabel("x");
plt.ylabel("m(x)");
# Confidence interval calulation
# The covariance matrix is [cov m] = inv(F'*F)
# However, we do not necessarily need to compute the complete
# matrix. The code here solves for the k-th column, say ck,
# (or row, since it is symmetric). The idea is to solve the equation
# (F'F) ck = s, where s is a vector that is all zeros except
# a 1 in row k. Then ck is the k-th column of inv(F'*F)
varlist=np.arange(0,N,int(floor(N/20))); # do these rows
for k in varlist.tolist():
# solve (F'F) ck = s
s = np.zeros((M,1));
s[k,0]=1.0;
q=las.bicg(LO,s,rtol=tol,maxiter=maxit);
ck = gda_cvec(q[0].astype(float));
# variance of estimated model parameters is k-th element of ck
sigmamest = sqrt(ck[k,0]);
# 95% confidence interval
mlow = mest[k,0]-2*sigmamest;
mhigh = mest[k,0]+2*sigmamest;
# plot error bars
plt.plot( gda_cvec(x[k,0], x[k,0]), gda_cvec(mlow, mhigh), "b-", lw=3 );
plt.show();
print("Caption: True model (black), estimated model (red) and 95 percent confidence error bars (blue)");
# Note that the error bars scale with the size of
# the solution. That's due to the erf() nonlinearity,
# that makes distinguishing a m say of 2 from a m say of 3.
# Step 9: Examine the model resolution
# The resolution matrix is RG = GMG * G
# with GMG = inv(F'F) G' inv(covd)
# However, as with covariance, one does not need to
# compute every row. This code just does a few rows.
# Note that the plot shows that the resolution is
# mostly peaked along the main diagonal.
reslist=np.arange(0,N,int(floor(N/9))); # do these rows
Nres=len(reslist.tolist());
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, xmin-(xmax-xmin)/Nres, xmax+(xmax-xmin)/Nres] );
ax1.invert_yaxis();
plt.xlabel('x');
plt.ylabel('x');
vard = sigma2d * np.ones((N,1)); # variance of data
# (the code can handle the case where it varies)
for k in reslist.tolist():
# compute k-th row of the resolution matrix
# first: compute column of inverse of F'F
# note F'F symmetric, so column is also row
s = np.zeros((M,1));
s[k,0]=1.0;
q=las.bicg(LO,s,rtol=tol,maxiter=maxit);
ck = gda_cvec(q[0].astype(float));
# second: row of generalized inverse GMG(k,:)=gi
# MATLAB: gi = ((ck')*(G'))./(vard');
gi = np.divide( (ck.T*gdaGsparse.T),vard.T); # ok to use *, as one matrix spare
# third: row of the resolution matrix
r_row = gi*gdaGsparse; # ok to use *, as one matrix spare
sc = -((xmax-xmin)/Nres)/np.max(r_row);
plt.plot( x, sc*r_row.T+x[k,0], "k-", lw=3 );
plt.show();
print("Caption: resolution matrix");
# Examine the individual errors
# Note that the normalized prediction error is
# much larger than unity, and not uniform with x,
# indicating that that the model is not fitting
# the data very well. The model is fitting the
# priori information is a little better, since
# the size of the normalized error is closer to
# unity. However, it is not not uniform in x.
# Our assumptions on the size and uniformity of
# sigmah is not quite right.
# Plot prediction error
dpre = gdaGsparse*mest;
e = dobs-dpre;
fig3 = plt.figure(3,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [xmin, xmax, -10.0, 10.0] );
plt.plot( gda_cvec(xmin, xmax), gda_cvec(0.0, 0.0), "b:", lw=2);
plt.plot( x, e/sigmad, "ko", lw=3 );
plt.xlabel("x");
plt.ylabel("e/sigmad");
plt.show();
print("Caption: Normalized prediction error");
# plot a priori error
fig4 = plt.figure(4,figsize=(12,5));
ax1=plt.subplot(1,1,1);
hpre = gdaHsparse*mest;
l = h-hpre;
plt.axis( [xmin, xmax, -2.0, 2.0] );
plt.plot( gda_cvec(xmin, xmax), gda_cvec(0, 0), "b:", lw=2);
plt.plot( x[1:N-1], l/sigmah, "ko", lw=3 );
plt.xlabel("x");
plt.ylabel("l/sigmah");
plt.show();
print("Caption: Normalized error in prior information");
# Step 12: Examine the total error phiest
# in order to address the null hypothesis
# that departure from the expected value of
# nu is due to random error.
# In the plot, total error phiest is way
# above minimu the maximum bound, indicating
# that the high total error is unlikely to be
# #due to random variation (but for some other
# reason, like a poor theory.
fpre = gdaFsparse*mest;
phiest = np.matmul((f-fpre).T,(f-fpre));
nu = N+K-M;
p1 = nu - 2.0*sqrt(2.0*nu);
p2 = nu + 2.0*sqrt(2.0*nu);
fig4 = plt.figure(4,figsize=(12,5));
ax1=plt.subplot(1,1,1);
pmax = max([phiest,p2]);
plt.axis( [0.0, 1.1*pmax, 0.0, 1.0] );
plt.plot( gda_cvec(p1, p1), gda_cvec(0, 0.2), "r-", lw=2);
plt.plot( gda_cvec(p2, p2), gda_cvec(0, 0.2), "r-", lw=2);
plt.plot( gda_cvec(phiest, phiest), gda_cvec(0, 0.4), "k-", lw=3);
plt.xlabel('phi');
plt.show();
print("Caption: total error phi (black) and confidence bounds (red)");
print("The prediction error is much higher than expected, because the");
print("data is not fit well near the origin, because the prior information");
print("of smoothness is incorrect there. The actual model has a kink");
print("at the origin.")
# Step 13: F-Test
# We have only one model, so a F-Test is not relevant
gdapy17_06
Caption: observed data d(x)
Caption: True model (black), estimated model (red) and 95 percent confidence error bars (blue)
Caption: resolution matrix
Caption: Normalized prediction error
Caption: Normalized error in prior information
Caption: total error phi (black) and confidence bounds (red) The prediction error is much higher than expected, because the data is not fit well near the origin, because the prior information of smoothness is incorrect there. The actual model has a kink at the origin.
In [15]:
# gdapy17_07
# This code parallels "Method Summary 4, MCMC Inversion"
print("gdapy17_07");
# tunable parameters
sigmad = 2.0; # data error
sigmap = 0.2; # neighborhood
mtrue = gda_cvec(0.6, 0.4) # true solution
mstart = gda_cvec(0.62, 0.42); # starting solution
c1 = 1.00; # constant in d(m)
c2 = 1.04; # constant in d(m)
Nr = 100000; # number of realizations
Nb = 10000; # discard burn in
N = 101; # number of data
# Step 1: Organize the problem in standard form
# This is fundamentally a curve-fitting problem of the form d=g(m)
# x-axis
xmax = 1.0;
x = gda_cvec(np.linspace(0,xmax,N));
# the curve is the following function. I am using a quasi-function
# coding technique to avoid using a function, as we have not used
# functions in the book
# really should use a function for this synthetic data
mym = mtrue;
myd = np.sin(4*pi*(c1+(mym[0,0]**2)*x))+np.sin(4*pi*(c2+(mym[1,0]**2)*x));
dtrue=myd;
# Step 2: Decide whether MCMC is appropriate
# Note that this problem is highly non-unique
# if (m1,m2) minimizes the erro, then so does (-m1,m2), (m1,-m1) and (-m2,-m2)
# furthermore, when c1=c2, m1 and m2 can be interchanged without changing the error
# thus a single estimate mest made using Newton's method is not going to be much use
# we really want to learn something about the form of the non-uniqueness
# instead, I examine the global attributes of the solution, and especially
# how polar angle and radius vary
# synthetic observed dat data
sigmad2 = sigmad**2;
dobs = dtrue + np.random.normal(loc=0.0,scale=sigmad,size=(N,1));
# Step 3: Examine the data
# Plot data. I have intentionally made the noise level very high
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.plot(x,dtrue,"k-",lw=2);
plt.plot(x,dobs,"ko");
plt.xlabel("x");
plt.ylabel("d(x)");
plt.show();
print("Caption: True data (curve), observed data(circles)");
# Step 4: Establish the accuracy of the data
# The data synthetic, so I know the variance sigmad2
# For plotting purposes, I am exhaustivey computing the error
# surface E(m). That's unnecessary for MCMC, and can ony be done
# when the number M of model parameters is small
Nm = 201;
mmin=-1.0;
mmax=1.0;
m1 = gda_cvec(mmin+(mmax-mmin)*np.linspace(0,Nm-1,Nm)/(Nm-1));
m2 = gda_cvec(mmin+(mmax-mmin)*np.linspace(0,Nm-1,Nm)/(Nm-1));
E = np.zeros((Nm,Nm));
for i in range(Nm):
for j in range(Nm):
mym = gda_cvec(m1[i,0], m2[j,0]);
myd = np.sin(4*pi*(c1+(mym[0,0]**2)*x))+np.sin(4*pi*(c2+(mym[1,0]**2)*x));
dpre = myd;
e = dobs-dpre;
esq = np.matmul(e.T,e); esq=esq[0,0];
E[i,j]=esq/sigmad2;
Emax = np.max(E);
# Step 5: Choose a likelihood pdf describing the behavior of the data
# I'm using a Normal PDF with variance sigma2d
# Step 6: Step 6: State the prior information
# the model is near the m1=m2=0, with large variance
mprior = gda_cvec(0.0,0.0);
# Step 7: State the accuracy of the prior information
sigmam = 10.0;
sigmam2=sigmam**2;
# Step 8: Choose a prior pdf describing the behavior of the prior model
# I'm using a normal pdf
# Step 11: choose length and first value of the Markov chain of models
# length os Nr
mr1 = np.zeros((Nr,1)); # markov chain of m1's
mr2 = np.zeros((Nr,1)); # markov chain of m2's
mr1[0,0]=mstart[0,0]; # starting link of the chain (for m1)
mr2[0,0]=mstart[1,0]; # starting link of the chain (for m2)
# evaluate function g(m)
mym=gda_cvec(mr1[0,0],mr2[0,0]);
myd = np.sin(4*pi*(c1+(mym[0,0]**2)*x))+np.sin(4*pi*(c2+(mym[1,0]**2)*x));
dpre = myd;
e = dobs-dpre; # prediction error
l = mprior-mym;
# Step 9: Form the likelihood, the logarithm of the posterior pdf
esq = np.matmul(e.T,e); esq=esq[0,0];
lsq = np.matmul(l.T,l); lsq=lsq[0,0];
L = -0.5*esq/sigmad2-0.5*lsq/sigmam2;
# Step 12: Build the Markov chain using the Metropolis-Hastings algorithm:
for i in range(1,Nr):
# Step 10: Choose a pdf for finding a proposed successor
# proposed successor
mp1 = np.random.normal(loc=mr1[i-1,0],scale=sigmap);
mp2 = np.random.normal(loc=mr2[i-1,0],scale=sigmap);
mym=gda_cvec(mp1,mp2);
myd = np.sin(4*pi*(c1+(mym[0,0]**2)*x))+np.sin(4*pi*(c2+(mym[1,0]**2)*x));
dp = myd;
ep = dobs-dp;
epsq = np.matmul(ep.T,ep); epsq=epsq[0,0];
lp = mprior-mym;
lpsq = np.matmul(lp.T,lp); lpsq=lpsq[0,0];
Lp = -0.5*epsq/sigmad2-0.5*lpsq/sigmam2;
# add to end of markov chain
if( Lp > L ):
mr1[i,0] = mp1;
mr2[i,0] = mp2;
L=Lp;
else:
alpha = exp(Lp-L);
r=np.random.uniform(low=0.0,high=1.0);
if(alpha>r):
mr1[i,0] = mp1;
mr2[i,0] = mp2;
L=Lp;
else:
mr1[i,0] = mr1[i-1,0];
mr2[i,0] = mr2[i-1,0];
# discard beginning of chain to allow for burn-in
mr1 = mr1[Nb:Nr,0:1];
mr2 = mr2[Nb:Nr,0:1];
Nr,i = np.shape(mr1);
# Step 13: Assess the ensemble solution
# plot solution ensemble on error surface
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
mycmap=matplotlib.colormaps['jet'];
plt.axis( [mmin,mmax,mmin,mmax] );
left=mmin;
right=mmax;
bottom=mmin;
top=mmax;
plt.imshow( E, cmap=mycmap, vmin=np.min(E), vmax=np.max(E), extent=(left,right,bottom,top) );
plt.xlabel('m2');
plt.ylabel('m1');
plt.colorbar();
plt.plot(mr2,mr1,'w.');
plt.show();
print("Caption: Error surface (colors) with ensemble solution (white dots) superimposed");
# compute radius and polar angle of solution
th = np.zeros((Nr,1));
r = np.zeros((Nr,1));
for i in range(Nr):
th[i,0] = (180.0/pi)*atan2(mr1[i,0],mr2[i,0]);
r[i,0] = sqrt(mr1[i,0]**2+mr2[i,0]**2);
# histogram of polar angle
Lb = 360; # number of bins in histogram
h, e = np.histogram(th,Lb,(-180,179.0)); # create histogram
Nb = len(h); # lengths of histogram
Ne = len(e); # length of edges
h = gda_cvec(h); # vector of counts
edges = gda_cvec( e[0:Ne-1] ); # vector of edges
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers
Db = bins[1,0]-bins[0,0];
hmax=np.max(h);
fig3 = plt.figure(3,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [bins[0,0],bins[Nb-1,0],0.0,1.1*hmax] );
plt.plot(bins,h,"k-",lw=2);
plt.xlabel('theta');
plt.ylabel('counts');
plt.show();
print("Caption: Histogram of polar angle theta (deg)");
# histogram of radius
Lb = 100; # number of bins in histogram
h, e = np.histogram(r,Lb,(0.0,2.0)); # create histogram
Nb = len(h); # lengths of histogram
Ne = len(e); # length of edges
h = gda_cvec(h); # vector of counts
edges = gda_cvec( e[0:Ne-1] ); # vector of edges
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers
Db = bins[1,0]-bins[0,0];
hmax=np.max(h);
fig4 = plt.figure(4,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [bins[0,0],bins[Nb-1,0],0.0,1.1*hmax] );
plt.plot(bins,h,"k-",lw=2);
plt.xlabel('radius');
plt.ylabel('counts');
plt.show();
print("Caption: Histogram of radius r");
# mean radius as a function of polar angle
fig4 = plt.figure(4,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [-180,180,0.0,2.0] );
plt.xlabel('theta (deg)');
plt.ylabel('mean radius r');
for i in range(-180,179):
p = np.where( (th>=i) & (th<(i+1)) );
k=p[0];
if( len(k.tolist()) == 0 ):
continue;
meanr = np.mean(r[k]);
plt.plot(i,meanr,'ko','lineWidth',2);
plt.show();
print("Caption: Mean radius vs polar angle (1 deg bins)");
gdapy17_07
Caption: True data (curve), observed data(circles)
Caption: Error surface (colors) with ensemble solution (white dots) superimposed
Caption: Histogram of polar angle theta (deg)
Caption: Histogram of radius r
Caption: Mean radius vs polar angle (1 deg bins)
In [16]:
# gdapy17_08
# This code parallels "Method Summary 5,
# Bootstrap Confidence Intervals"
print("gdapy17_08");
# Background:
# This example relies on syntheic data created by the
# gdama17_10 script. That script makes two
# true pulses p1(t) and p2(t) that have an amplitude
# spectral density of exactly
# s1(f)=A1*exp(-pi*abs(f)*tstar1) and
# s2(f)=A2*exp(-pi*abs(f)*tstar2)
# with tstar1=0.1 and tstar2=0.03. However, random noise
# is added to the pulses before computing their spectrum,
# so the observed pulses do not have exactly this spectrum.
# We calculate the 95% confidence of m2 = pi*(tstar2-tstar1) by
# bootstrapping and compare it to the estimate obtained via Newton's method
#
# the data file 'pulses.txt' has three columns,
# frequency, s1(f) and s2(f)
# Step 1: Identify the parameter for which confidence intervals are needed
# The parameter is m2 = pi*(tstar2-tstar1)
# Step 2: Identify the data and a method of estimating m2 from the data
# the data are the spectral rations, d=s2(f)/sq(f)
# we use linearized least squares to estimate m2
# true solution, known since this is a synthetic example
mtrue = gda_cvec(1.0, -pi*0.02);
# load data
D = np.genfromtxt("../Data/pulses.txt", delimiter="\t");
N, i = np.shape(D);
f = D[0:N,0:1];
s1obs = D[0:N,1:2];
s2obs = D[0:N,2:3];
logs1obs = np.log(s1obs);
logs2obs = np.log(s2obs);
# spectral ratio
dobs = np.divide(s2obs,s1obs);
logdobs = logs2obs-logs1obs;
N,i= np.shape(dobs);
# linearized fit, to get starting value for Newton's method
# di = m1 exp( m2 f )
# logdi = logm1 + m2 f
M = 2;
G = np.concatenate( (np.ones((N,1)), f), axis=1 );
GTG = np.matmul(G.T,G);
GTd = np.matmul(G.T,logdobs);
logmls = la.solve(GTG,GTd);
logdprels = np.matmul(G,logmls);
dprels = np.exp(logmls[0,0]+logmls[1,0]*f);
mls = gda_cvec( np.exp(logmls[0,0]), logmls[1,0] );
# step 3: reference solution via Newton's method
# di = gi(m1,m2) = m1 exp( m2 f )
# ddi/m1 = exp( m2 f )
# ddi/m2 = f exp( m2 f )
mest = mls;
mlast = mest;
for i in range(100):
x = np.exp(mest[1,0]*f);
Gk = np.concatenate( (x, mest[0,0]*np.multiply(f,x)), axis=1);
Dd = dobs - mest[0,0]*x;
GTG = np.matmul(Gk.T,Gk);
GTd = np.matmul(Gk.T,Dd);
Dm = la.solve(GTG, GTd);
mest = mest + Dm;
dev = np.max(np.abs(mest-mlast));
if(dev<1E-6):
break;
else:
mlast=mest;
x = np.exp(mest[1,0]*f);
# predicted data
dpre = mest[0,0]*x;
logdpre = np.log(dpre);
# estimate of covariance of m2 via Newton's method
# it can be compared with the bootstrap result
e = dobs - dpre;
sigmad2 = np.matmul(e.T,e)/(N-M);
covm = sigmad2*la.inv(GTG);
sigmam2 = sqrt(covm[1,1]);
print("true solution: %.4f %.4f" % (mtrue[0,0],mtrue[1,0]) );
print("linearzed solution: %.4f %.4f" % (mls[0,0],mls[1,0]) );
print("iterative solution: %.4f %.4f (%d iter)" % (mest[0,0],mest[1,0],i) );
# plot data
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
dmax = np.max(dobs);
plt.axis( [f[0,0], f[N-1,0], 0.0, 1.1*dmax] );
plt.plot( f, dprels, "r-", lw=2 );
plt.plot( f, dpre, "g-", lw=2 );
plt.plot( f, dobs, "ko", lw=2 );
plt.xlabel("f (Hz)");
plt.ylabel("d = s2(f)/s1(f)");
plt.show();
print("Caption: spectral ratio of two pulses, linearized estimate (red),");
print("iterative estimate (green), observed (circles)");
# plot data
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
dmax = np.max(logdobs);
plt.axis( [f[0,0], f[N-1,0], dmax-7, dmax+1] );
plt.plot( f, logdprels, "r-", lw=2 );
plt.plot( f, logdpre, "g-", lw=2 );
plt.plot( f, logdobs, "ko", lw=2 );
plt.xlabel("f (Hz)");
plt.ylabel("d = log(s2(f))-log(s1(f))");
plt.show();
print("Caption: log spectral ratio of two pulses");
# plot whose horizontal axis is m2
# the true value of m2 is the vertical blue bar
# the uterative least squares estimate of m2 is the vertical red bar
# and its 95% confidence is the horizontal red bar
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [0.04, 0.08, 0, 1] );
plt.plot( gda_cvec(-mtrue[1,0], -mtrue[1,0]), gda_cvec(0.0, 0.4), "b-", lw=3 );
plt.plot( gda_cvec(-mest[1,0], -mest[1,0]), gda_cvec(0, 0.35), "r-", lw=3 );
plt.plot( gda_cvec(-mest[1,0]-2*sigmam2, -mest[1,0]+2*sigmam2), gda_cvec(0.3, 0.3), "r-", lw=3 );
plt.xlabel("m(2)");
# now use the bootstrap method to compute an empirical
# probability density function of Dtstar and plot it
Nrs = 10000; # number of realizations
m2 = np.zeros((Nrs,1)); # tabulate the m2 of each realization
for ir in range(Nrs): # loop over realizations
# randomly resample the data
j = np.random.randint( low=0, high=N, size=(N,) );
frs = f[j,0:1]; # resampled frequencies
drs = dobs[j,0:1]; # resampled data
# solution by Newton's method
mrs = mls;
mlast = mrs;
for i in range(100):
x = np.exp(mrs[1,0]*frs);
Gk = np.concatenate( (x, mrs[0,0]*np.multiply(frs,x)), axis=1 );
Dd = drs - mrs[0,0]*x;
GTG = np.matmul(Gk.T,Gk);
GTd = np.matmul(Gk.T,Dd);
Dm = la.solve(GTG,GTd);
mrs = mrs + Dm;
dev = np.max(np.abs(mrs-mlast));
if(dev<1E-6):
break;
else:
mlast=mrs;
m2[ir,0] = -mrs[1,0]; # tabulate minus m2
# Now construct a histogram of the m2 values
# and scale it to an empirical probability density function
# histogram of m2
Lb = 100; # number of bins in histogram
binmin = 0.04; # range of histogram
binmax = 0.08;
h, e = np.histogram(m2,Lb,(binmin,binmax)); # create histogram
Nb = len(h); # lengths of histogram
Ne = len(e); # length of edges
p = gda_cvec(h); # vector of counts
edges = gda_cvec( e[0:Ne-1] ); # vector of edges
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers
Db = bins[1,0]-bins[0,0];
hmax=np.max(h);
# convert to an empirical pdf
p = p/(Db*np.sum(p)); # normalize to unit area
pmax = np.max(p); # find maximum for plotting purposes
plt.plot( bins, 0.5*p/pmax, "k-", lw=3 );
P = Db*np.cumsum(p); # integrate to an empirical cumulative distribution P(q)
# use the cumulative distribution to find the left and right
# edges of the 95% confidence interval and plot then as
# green bars. The upshot is that they agree pretty well with
# the least squares estimate (red horizontal bar). Because of
# the tendency of the logarithm to preferentially amplify
# small values, the center of the pdf is centered a little
# to the left of the true Dtstar value. However, the displacement
# is small compared to the width of the confidence interval.
z = np.where( P>0.025 );
k = z[0];
il = k[0];
qL = bins[il,0];
z = np.where( P>0.975);
k = z[0];
ir = k[0];
qR = bins[ir,0];
plt.plot( gda_cvec(qL, qL), gda_cvec(0, 0.2), "g-", lw=2 );
plt.plot( gda_cvec(qR, qR), gda_cvec(0, 0.2), "g-", lw=2 );
plt.show();
print("Caption: Decay rate parameter m(2): true (blue), iterative estimate");
print("and its 95 percent confidence (red), bootstrsp histograam (black curve)");
print("and its 95 percent confidence (green).");
print(" ");
print("linearized estimate of sigma-m2 %.4f" % (sigmam2) );
print("bootstrap estimate of sigma-m2 %.4f" % ((qR-qL)/4) );
print("The Newton method estimate of sigma-m2 agrees well with the bootstap estimate.");
gdapy17_08 true solution: 1.0000 -0.0628 linearzed solution: 0.9663 -0.0547 iterative solution: 1.0186 -0.0588 (3 iter)
Caption: spectral ratio of two pulses, linearized estimate (red), iterative estimate (green), observed (circles)
Caption: log spectral ratio of two pulses
Caption: Decay rate parameter m(2): true (blue), iterative estimate and its 95 percent confidence (red), bootstrsp histograam (black curve) and its 95 percent confidence (green). linearized estimate of sigma-m2 0.0021 bootstrap estimate of sigma-m2 0.0024 The Newton method estimate of sigma-m2 agrees well with the bootstap estimate.
In [17]:
# gdapy17_09
# This code parallels "Method Summary 6, Factor Analysis".
print("gdapy17_09");
# This is a hypothetical scenario with synthetic data, only,
# in which the object is to determine the source(s) of a pollutant
# from observations of its concentration in dust collected
# on a 2D grid of sampling sites. The pollutant is an element
# with four isotopes. The presumption is that each source
# (two factories and a natural background source) has a distinct
# isotopic pattern and that a dust sample at any given location
# is a mixture of these patterns. The pollutant emitted from the
# factories is concentrated near the factories; the natural
# pollutant is spread more uniformly across the area.
# This section creates the synthetic data.
# sample locations
Nx=20;
Ny=20;
x = gda_cvec( np.linspace(1.0,Nx,Nx) );
y = gda_cvec( np.linspace(1.0,Ny,Ny) );
# The factories will be numbered 1 and 2; the natural source 3
# factory locations, both as indices and (x,y) positions
ix1=2; x1=x[ix1,0]; iy1=2; y1=y[iy1,0];
ix2=16; x2=x[ix2,0]; iy2=16; y2=y[iy2,0];
# the factors are the fraction of the 4 isotopes for each source
# note that isotope 1 is present in a smaller
# fraction than the other three. However, the
# measurement technique for determining it is also better.
f1 = gda_cvec(0.0104, 0.2084, 0.1762, 0.6050);
f2 = gda_cvec(0.0165, 0.2748, 0.2365, 0.4722);
f3 = gda_cvec(0.0140, 0.2410, 0.2210, 0.5240);
# weight first elemen more as it has smaller value
w = gda_cvec(10.0, 1.0, 1.0, 1.0);
W = np.diag(w.ravel());
WI = np.diag(np.reciprocal(w).ravel());
# We now build the sample matrix S(i,i). It gives
# the amount of isotope j observed at station i
# measured, say, in picograms per square meter per day
# of dust deposited on a surface exposed to the
#% atmosphere
N = Nx*Ny; # number of samples
M = 4; # number of isotopes
Strue = np.zeros((N, M)); # true (noise free) samples matrix
Sobs = np.zeros((N, M)); # observed (noisy) sample matrix
jofixiy = np.zeros((Nx,Ny),dtype=int); # sample number as function of (ix,iy)
j=0;
v=0.1;
sigmaS = 0.01;
for ix in range(Nx):
for iy in range(Ny):
R1 = sqrt( (x[ix,0]-x1)**2 + (y[iy,0]-y1)**2 ); # distance to factory 1
R2 = sqrt( (x[ix,0]-x2)**2 + (y[iy,0]-y2)**2 ); # distance to factory 2
C1 = 800.0*(1/((R1/10)+1))+np.random.uniform(low=0.0,high=v); # loading 1 declines with R1
C2 = 200.0*(1/((R2/10)+1))+np.random.uniform(low=0.0,high=v); # loading 2 declines with R2
C3 = 10.0+np.random.uniform(low=0.0,high=v); # loading 3 is more-or-less constant
Strue[j:j+1,0:M] = C1*f1.T + C2*f2.T + C3*f3.T;
n = np.random.normal(loc=0.0, scale=sigmaS, size=(M,1) );
Sobs[j:j+1,0:M] = Strue[j:j+1,0:M] + np.divide(n,w).T; # add noise to data
jofixiy[ix,iy]=j; # lookup table for folding vector in matrix
j=j+1;
# Step 1: State the problem in words
# A dust sample is assumed to contain four isotopes
# of a toxic chemical element. The pollutant gets
# into the dust from factories (which are spatially
# localized) and natural sources (which are spatially
# distributed). A factor is the pattern of isotopes
# associated with a particular source. A sample is
# a mixture of the factors, where the loading represents
# the mass per unit area per unit time of the pollutant
# deposited from each souce. In our scenario, the wind
# is physically mixing the dust particles, so that
# each sampe site receives some dust from each source.
# Step 2: Organize the data as a sample matrix S
# The matrix Sobs(i,j) is already in the correct form
# (isotope j at station i).
# Step 3: Establish weights that reflect the importance
# of the elements. The first isotope is known to have been
# measured with much better (say 10x better) accuracy than
# the others.
# Wee weight w above, chosed to weight first element.
# which has values, more
# Step 4: Perform singular value decomposition and
# form the factor matrix F and loading matrix C
# S W = U SIGMA VT so S = (U SIGMA) (VT WI) = C F
U,lam,VT = la.svd( np.matmul(Sobs,W), full_matrices=False);
LAM = np.diag(lam);
Fpre = np.matmul(VT,WI); # factors
Cpre = np.matmul(U,LAM); # loadings
# Step 5: Determine the number P of important factors
# Plot the diagonal of ? as a function of row index i
# and choose P to include all rows with “large” ?_ii
# Since there are only M=4 singular values, we opt to
# print them out:
print("Singular values");
print("%f %f %f %f" % (lam[0], lam[1], lam[2], lam[3]) );
print(" ");
# SIGMA(4,4) is very smaller than the rest, so we will ignore it
# Step 6: Reduce the number of factors from M to P
P = 3;
FpreP = Fpre[0:P,0:M];
CpreP = Cpre[0:N,0:P];
SpreP = np.matmul(CpreP,FpreP);
# Step 7: Predict the data and examine the error
# We print out the error
print("RMS error of Sobs - SpreP");
for i in range(M):
dobs = Sobs[0:N,i:i+1];
dpre = SpreP[0:N,i:i+1];
e = dobs-dpre;
esq = np.matmul(e.T,e); esq=esq[0,0];
rmsi = sqrt(esq/N);
mi = np.mean(dobs);
print(" isotope %d is %f (mean is %f)" % (i, rmsi, mi) );
print(" ");
# The rms errors of the isotopes are
# seem acceptably small, given the overall
# size of the different isotopes in the sample matrix
# Step 7: Interpret the factors and their loadings
# print out the factors, normalizing them so the isotopes in
# a factor sum to 1
for i in range(P):
norm=np.sum(FpreP[i,:]);
print("factor %d: %f %f %f %f" % (i, FpreP[i,0]/norm, FpreP[i,1]/norm, FpreP[i,2]/norm, FpreP[i,3]/norm) );
print(" ");
# its not so clear to me that these factors are helpful,
# because the do not have a 1:1 correspondence to sources
# A map of the factor loading show a clear
# spatial pattern with two peaks, which we can assume
# to be the locations of two factories that are
# emitting the pollutant.
map1 = np.zeros((Nx,Ny));
map2 = np.zeros((Nx,Ny));
map3 = np.zeros((Nx,Ny));
for ix in range(Nx):
for iy in range(Ny):
j = jofixiy[ix,iy];
map1[ix,iy]=Cpre[j,0];
map2[ix,iy]=Cpre[j,1];
map3[ix,iy]=Cpre[j,2];
gda_draw("title C1",map1,"title C2",map2,"title C3",map3);
print("Caption: Maps of the first three factor loadings");
print(" ");
# measuring the locations of the two sources by eye gives indices
kx1 = 2; ky1=2;
kx2 = 16; ky2=16;
# measuring a point far from either source gives index
kx3 = 0; ky3=19;
# An informative quantity is the sample closest to
# each of the point sources, and furthest away from both.
# We use SpreP and not Sobs, because we hope the former has
# rejected some of the noise present in the latter
j1 = jofixiy[kx1,ky1]; s1 = SpreP[j1:j1+1,0:M].T;
j2 = jofixiy[kx2,ky2]; s2 = SpreP[j2:j2+1,0:M].T;
j3 = jofixiy[kx3,ky3]; s3 = SpreP[j3:j3+1,0:M].T;
s1 = s1/np.sum(s1);
s2 = s2/np.sum(s2);
s3 = s3/np.sum(s3);
# these give some sense of the composition of the sources
print("Samples at the (eyeballed) sources");
print("s1 %f %f %f %f" % (s1[0,0], s1[1,0], s1[2,0], s1[3,0]) );
print("s2 %f %f %f %f" % (s2[0,0], s2[1,0], s2[2,0], s2[3,0]) );
print("Samples far from the sources");
print("s3 %f %f %f %f" % (s3[0,0], s3[1,0], s3[2,0], s3[3,0]) );
print(" ");
# perhaps even a better estimate of sources 1 and 2
# is constructed by subtracting the background 3
# We use SpreP and not Sobs, because we hope the former has
# rejected some of the noise present in the latter
p1 = SpreP[j1:j1+1,0:M].T - SpreP[j3:j3+1,0:M].T;
p2 = SpreP[j2:j2+1,0:M].T - SpreP[j3:j3+1,0:M].T;
p1 = p1/np.sum(p1);
p2 = p2/np.sum(p2);
print("Best estimate of the two point sources");
print("s1 %f %f %f %f" % (p1[0,0], p1[1,0], p1[2,0], p1[3,0]) );
print("s2 %f %f %f %f" % (p2[0,0], p2[1,0], p2[2,0], p2[3,0]) );
gdapy17_09 Singular values 6665.628947 134.667794 0.309205 0.189121 RMS error of Sobs - SpreP isotope 0 is 0.000821 (mean is 5.828872) isotope 1 is 0.003873 (mean is 110.751212) isotope 2 is 0.002566 (mean is 94.232482) isotope 3 is 0.000670 (mean is 286.790103) factor 0: 0.011629 0.221646 0.188523 0.578203 factor 1: 0.072084 0.877077 0.789414 -0.738574 factor 2: -0.046239 -4.850384 5.852510 0.044114
Caption: Maps of the first three factor loadings Samples at the (eyeballed) sources s1 0.010908 0.213844 0.181337 0.593912 s2 0.013027 0.236840 0.202369 0.547764 Samples far from the sources s3 0.011713 0.222535 0.189386 0.576366 Best estimate of the two point sources s1 0.010289 0.207166 0.175151 0.607394 s2 0.018171 0.292834 0.253185 0.435811
In [18]:
# gdama17_10
# makes data for example for
# Methods Summary 3, Bootstrap Confidence Intervals
# note: filename currenlty mypulses.txt; change back to pulses.txt
# to overwrite data used in analysi script
# note: matlab version uses minimum phase timeseries, but I have
# been unable to duplicate MATLAB rceps() function, so this
# version uses zer-phase time series. Doesn't matter (much) for purpose
# of constructing a test dataset
print("gdapy17_10");
# noise level
sigmap = 0.5;
Nt = 32; # must be even
Dt = 0.01;
t = gda_cvec(Dt*np.linspace(0,Nt-1,Nt));
Nf = int(Nt/2) + 1;
fny = 1.0/(2.0*Dt);
Df = fny/(Nt/2.0);
fp = gda_cvec(Df*np.linspace(0,Nf-1,Nf));
fn = gda_cvec(-np.flipud(fp[1:Nf-1,0]));
f = gda_cvec(fp,fn);
# construct two pulses
tstar1 = 0.01;
p1t = np.exp(-pi*np.abs(f)*tstar1);
p1true = np.real(np.fft.ifft(p1t,axis=0)/Dt);
p1true = np.roll(p1true,int(Nt/2)-1);
tstar2 = 0.03;
p2t = np.exp(-pi*np.abs(f)*tstar2);
p2true = np.real(np.fft.ifft(p2t,axis=0)/Dt);
p2true = np.roll(p2true,int(Nt/2)-1);
# add noise
p1obs = p1true+np.random.normal(loc=0,scale=sigmap,size=(Nt,1));
p2obs = p2true+np.random.normal(loc=0,scale=sigmap,size=(Nt,1));
# observed psd
p1test = np.abs(Dt*np.fft.fft(p1obs,axis=0));
p2test = np.abs(Dt*np.fft.fft(p2obs,axis=0));
# plot original time series
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
pmax = np.max(np.abs(p1true));
plt.axis( [0, t[Nt-1,0], -0.1*pmax, 1.1*pmax] );
plt.plot( t, p1true, "k-", lw=2 );
plt.plot( t, p2true, "r-", lw=2 );
plt.plot( t, p1obs, "k.", lw=2 );
plt.plot( t, p2obs, "r.", lw=2 );
plt.xlabel("t (s)");
plt.ylabel("p");
plt.show();
print("Caption: True timseries (lines), observed timeseries (dots)");
# plot asd
fig2 = plt.figure(2,figsize=(12,5));
ax1=plt.subplot(1,1,1);
pmax = np.max(np.abs(p1t));
plt.axis( [0, fny, -0.1*pmax, 1.1*pmax] );
plt.plot( f[0:Nf,0:1], p1t[0:Nf,0:1], "k-", lw=2 );
plt.plot( f[0:Nf,0:1], p2t[0:Nf,0:1], "r-", lw=2 );
plt.plot( f[0:Nf,0:1], p1test[0:Nf,0:1], "ko", lw=2 );
plt.plot( f[0:Nf,0:1], p2test[0:Nf,0:1], "ro", lw=2 );
plt.xlabel("f (Hz)");
plt.ylabel("p-tilde");
plt.show();
print("Caption: true asd (dots), observed asd (circles)");
# plot log asd
fig3 = plt.figure(3,figsize=(12,5));
ax1=plt.subplot(1,1,1);
pmax = np.log(np.max(np.abs(p1t)));
plt.axis( [0, fny, pmax-7, pmax] );
plt.plot( f[0:Nf,0:1], np.log(p1t[0:Nf,0:1]), "k-", lw=2 );
plt.plot( f[0:Nf,0:1], np.log(p2t[0:Nf,0:1]), "r-", lw=2 );
plt.plot( f[0:Nf,0:1], np.log(p1test[0:Nf,0:1]), "ko", lw=2 );
plt.plot( f[0:Nf,0:1], np.log(p2test[0:Nf,0:1]), "ro", lw=2 );
plt.xlabel("f (Hz)");
plt.ylabel("log p-tilde");
plt.show();
print("Caption: true log asd (dots), observed asd (circles)");
Nfs = int(floor(3*Nf/4)); # max freq saved
# concatenate frequency, asd's into a matrix C
C = np.concatenate( (f[0:Nfs,0:1],p1test[0:Nfs,0:1],p2test[0:Nfs,0:1]),axis=1);
# save matrix to file of tab-separated values
np.savetxt("../Data/mypulses.txt",C,delimiter="\t");
gdapy17_10
Caption: True timseries (lines), observed timeseries (dots)
Caption: true asd (dots), observed asd (circles)
Caption: true log asd (dots), observed asd (circles)
In [ ]: