In [11]:
# gdapy03_00 clears all variables and import various modules
# you must run this cell first, before running any of others
# note that these examples use inverse theory techniques that
# are only covered in later chapters of the book
# History:
# 2022/11/14 - Created by W. Menke, using third edition MATLAB script as template
# 2024/05/23 - Patches to accomodate new verions of numpy, scipy, matplotlib
# patches dot product to return scalar value
# A reset deletes any previously-created variables from memory
%reset -f
# import various packages
import numpy as np # matrices & etc
from matplotlib import pyplot as plt # general plotting
from matplotlib import patches # plotting shapes
from datetime import date # date and time arithmetic
from math import exp, pi, sin, cos, tan, sqrt, floor # math functions
import scipy.linalg as la # linear algebra functions
import os # operating system
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
import scipy.signal as sg
import scipy.stats as st
import matplotlib
# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def gda_cvec(*argv):
t = int;
Nt = 0;
for a in argv:
v = gda_cvec1(a);
N,M = np.shape(v);
Nt = Nt + N;
if( N==0 ):
continue; # skip vector of zero length
if (t==int) and isinstance(v[0,0],float):
t=float;
elif isinstance(v[0,0],complex):
t=complex;
w = np.zeros((Nt,1),dtype=t);
Nt = 0;
for a in argv:
v = gda_cvec1(a);
N,M = np.shape(v);
w[Nt:Nt+N,0:1] = v; # patch 20230418 was #w[Nt:Nt+N,0] = v[0:N,0];
Nt = Nt + N;
return w;
# function to make a numpy N-by-1 column vector
# c=gda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=gda_cvec(v1,v2,...) concatenates
# many argiments.
def gda_cvec1(v):
if isinstance(v, int) or isinstance(v, np.int32):
w = np.zeros((1,1),dtype=int);
w[0,0] = v;
return w;
elif isinstance(v, float):
w = np.zeros((1,1),dtype=float);
w[0,0] = v;
return w;
elif isinstance(v, complex):
w = np.zeros((1,1),dtype=complex);
w[0,0] = v;
return w;
elif isinstance(v, np.ndarray):
s = np.shape(v);
if len(s) == 1:
return np.copy(np.reshape(v,(s[0],1)));
else:
[r,c]=s;
if( c==1 ):
return(np.copy(v));
elif(r==1):
return(np.copy(v.T));
else:
raise TypeError("gda_cvec: %d by %d ndarray not allowed" % (r, c));
elif isinstance(v, list):
r = len(v);
t = int;
for vi in v:
if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
pass;
elif isinstance(vi,float):
t=float;
elif isinstance(vi,complex):
t=complex;
break;
else:
raise TypeError("gda_cvec: list contains unsupported type %s" % type(vi));
w = np.zeros((r,1),dtype=t);
w[:,0] = np.array(v); # patch v -> np.array(v)
return w;
elif isinstance(v, tuple):
r = len(v);
t = int;
for vi in v:
if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
pass;
elif isinstance(vi,float):
t=float;
elif isinstance(vi,complex):
t=complex;
break;
else:
raise TypeError("gda_cvec: tuple contains unsupported type %s" % type(vi));
w = np.zeros((r,1),dtype=t);
w[:,0] = np.array(list(v)); # patch v -> np.array(list(v));
return w;
else:
raise TypeError("gda_cvec: %s not supported" % type(v));
# gda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def gda_draw(*argv):
DOCOLOR=True;
if( DOCOLOR ):
bwcmap = matplotlib.colormaps['jet'];
else:
bw = np.zeros((256,4));
v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
bw[:,0] = v;
bw[:,1] = v;
bw[:,2] = v;
bw[:,3] = np.ones(256);
bwcmap = ListedColormap(bw);
# size of plot
W = 16;
H = 4;
fig1 = plt.figure(1);
# figsize width and height in inches
fig1.set_size_inches(W,H);
ax1 = plt.subplot(1,1,1);
plt.axis([0, W, -H/2, H/2]);
plt.axis('off');
LM = W/6; # matrix width and heoght
LV = W/40; # vector width
FS = 0.12; # character width
TO = 0.4; # title vertical offset
SP = 0.2; # space between objects
LS = 0.2; # leading space
p = LS; # starting x-position
istitle=0; # flags presence of a title
for a in argv:
if isinstance(a,np.ndarray):
sh = np.shape(a);
if len(sh) == 1: # conversion to nx1 array
n = sh[0];
m = 1;
ap = a;
a = np.zeros((n,1));
a[:,0] = ap;
else:
n = sh[0];
m = sh[1];
if m==1:
pold=p;
left=p;
right=p+LV;
bottom=-LM/2;
top=LM/2;
plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
p = p+LV;
pm = (p+pold)/2;
if istitle:
plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
istitle=0;
p = p+SP;
else:
pold=p;
left=p;
right=p+LM;
bottom=-LM/2;
top=LM/2;
plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
p = p+LM;
pm = (p+pold)/2;
if istitle:
plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
istitle=0;
p = p+SP;
elif isinstance(a,str):
ns = len(a);
istitle=0;
if( ns>=6 ):
if 'title ' in a[0:6]:
istitle=1;
titlestr=a[6:];
if( istitle != 1):
plt.text(p,0,a);
p = p + ns*FS + SP;
plt.show();
return 1;
# bandpass filter, used in seismological example, but hand
# in a variety of settings involving time series
def gda_chebyshevfilt(d, Dt, flow, fhigh):
# (dout,u,v)=gda_chebyshevfilt(d, Dt, flow, fhigh);
# chebyshev IIR bandpass filter
# d - input array of data
# Dt - sampling interval
# flow - low pass frequency, Hz
# fhigh - high pass frequency, Hz
# dout - output array of data
# u - the numerator filter
# v - the denominator filter
# these filters can be used again using dout=filter(u,v,din);
# make sure input timeseries is a column vector
s = np.shape(d);
N = s[0];
if(N==1):
dd = np.zeros((N,1));
dd[:,0] = d;
else:
dd=d;
# sampling rate
rate=1/Dt;
# ripple parameter, set to ten percent
ripple=0.1;
# normalise frequency
fl=2.0*flow/rate;
fh=2.0*fhigh/rate;
# center frequency
cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );
# bandwidth
bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );
# ripple parameter factor
rpf = sqrt((sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple));
a = 0.5*(rpf-1.0/rpf);
b = 0.5*(rpf+1.0/rpf);
u=np.zeros((5,1));
v=np.zeros((5,1));
theta = 3*pi/4;
sr = a * cos(theta);
si = b * sin(theta);
es = sqrt(sr*sr+si*si);
tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
v[0,0] = 1.0;
v[1,0] = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
v[2,0] = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
v[3,0] = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
v[4,0] = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
tmp = 4.0*es*es*bw*bw/tmp;
u[0,0] = tmp;
u[1,0] = 0.0;
u[2,0] = -2.0*tmp;
u[3,0] = 0.0;
u[4,0] = tmp;
dout = sg.lfilter(u.ravel(),v.ravel(),dd.ravel());
return (gda_cvec(dout),gda_cvec(u),gda_cvec(v));
In [12]:
# gdapy03_01
# pdf and histogram
# axes
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# Normal pdf
sd = 1.0;
sd2 = sd**2;
dbar = 5.0;
p = np.exp(-0.5*np.power((d-dbar),2)/sd2)/(sqrt(2*pi)*sd);
norm = Dd*sum(p);
p = p/norm;
# realizations of a Normal pdf
M=200;
r=np.random.normal(loc=dbar,scale=sd,size=(M,1));
# histogram
Lb = 26; # number of bins in histogram
h, e = np.histogram(r,Lb,(dmin,dmax)); # create histogram
Nb = len(h); # lengths of histogram
Ne = len(e); # length of edges
h = gda_cvec(h); # vector of counts
edges = gda_cvec( e[0:Ne-1] ); # vector of edges
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers
Db = bins[1,0]-bins[0,0];
hmax=np.max(h);
# plot the data and the predicted line
fig1 = plt.figure(1,figsize=(12,5)); # smallish figure
# plot pdf
ax1 = plt.subplot(1, 3, 1);
plt.axis( [dmin, dmax, 0, 0.5 ] );
plt.plot(d,p,"r-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
# plot histogram
ax2 = plt.subplot(1, 3, 2);
plt.axis( [dmin, dmax, 0, hmax ] );
# improvise bar chart
for i in range(Nb):
tb = gda_cvec( bins[i,0]-Db/2, bins[i,0]-Db/2, bins[i,0]+Db/2, bins[i,0]+Db/2 );
th = gda_cvec( 0.0, h[i,0], h[i,0], 0.0 );
plt.plot(tb,th,"b-",lw=3);
plt.xlabel("d");
plt.ylabel("counts");
# convert histogram to an approximate pdf
norm = Db*sum(h);
h=h/norm;
# plot pdf and histogram superimposed
ax3 = plt.subplot(1, 3, 3);
plt.axis( [dmin, dmax, 0.0, 0.5 ] );
# improvise bar chart
for i in range(Nb):
tb = gda_cvec( bins[i,0]-Db/2, bins[i,0]-Db/2, bins[i,0]+Db/2, bins[i,0]+Db/2 );
th = gda_cvec( 0.0, h[i,0], h[i,0], 0.0 );
plt.plot(tb,th,"b-",lw=3);
plt.plot(d,p,"r-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
plt.show();
In [13]:
# gdapy03_02
# plot of a Normal pdf
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# Normal pdf
sd = 1.0;
sd2 = sd**2;
dbar = 5.0;
p = np.exp(-0.5*np.power((d-dbar),2)/sd2)/(sqrt(2*pi)*sd);
# plot pdf
fig1 = plt.figure(1,figsize=(12,5)); # smallish figure
ax1 = plt.subplot(1, 1, 1);
plt.axis( [dmin, dmax, 0, 0.5 ] );
plt.plot(d,p,"b-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
plt.show();
In [14]:
# gdapy03_03
# operations on a probability distributions
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# Normal pdf
sd = 1.0;
sd2 = sd**2;
dbar = 5.0;
p = np.exp(-0.5*np.power((d-dbar),2)/sd2)/(sqrt(2*pi)*sd);
# plot pdf
fig1 = plt.figure(1,figsize=(12,5)); # smallish figur
ax1 = plt.subplot(1, 1, 1);
plt.axis( [dmin, dmax, 0, np.max(p) ] );
plt.plot(d,p,"b-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
plt.show();
# total probability
Ptotal = Dd * np.sum(p);
print("total probabilty %.2f" % (Ptotal) );
# cumulative probability
P = gda_cvec(Dd*np.cumsum(p));
# plot cdf
fig2 = plt.figure(2,figsize=(12,5)); # smallish figur
ax1 = plt.subplot(1, 1, 1);
plt.axis( [dmin, dmax, 0, 1.1 ] );
plt.plot(d,P,"b-",lw=3);
plt.xlabel("d");
plt.ylabel("P(d)");
plt.show();
total probabilty 1.00
In [15]:
# gdapy03_04
# calculaion of mode and mean for a skewed pdf
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# construct a pdf of a complicated shape
dbar = 0.0;
sd = 3.0;
sd2 = sd**2;
p = np.exp(-0.5*np.power((d-dbar),2)/sd2);
p = np.multiply(d,p);
dbar = 1;
sd = 0.3;
sd2 = sd**2;
p = p + 4.0*np.exp(-0.5*np.power((d-dbar),2)/sd2);
norm = Dd*sum(p); # normalize pdf to unit area
p = p/norm;
# maximum liklihood point
pmax = np.max(p);
imax = np.argmax(p);
dml = d[imax,0];
# mean
dbar = Dd*np.sum(np.multiply(d,p));
# plot pdf
fig1 = plt.figure(1,figsize=(12,5)); # smallish figure
ax1 = plt.subplot(1, 1, 1);
plt.axis( [dmin, dmax, 0, np.max(p) ] );
plt.plot(d,p,"b-",lw=3);
plt.plot( gda_cvec(dml, dml), gda_cvec(0, 0.05), "k-", lw=2 );
plt.plot( gda_cvec(dbar, dbar), gda_cvec(0, 0.05), "k-", lw=2 );
plt.xlabel("d");
plt.ylabel("p(d)");
plt.show();
In [16]:
# gdapy03_05
# calculaion of variance
top=1;
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# two Normal pdfs with different variances
# Normal pdf 1
dbar = 5.0;
sd = 0.5;
sd2 = sd**2;
p = np.exp(-0.5*np.power((d-dbar),2)/sd2)/(sqrt(2.0*pi)*sd);
norm = Dd*np.sum(p);
# calculate mean (expectation) and variance
Ed = Dd * np.sum(np.multiply(d,p)); # expectation
q = np.power((d-Ed),2); # qudratic
qp = np.multiply(q,p); # product
sd2est = Dd * np.sum(qp); # variance
sdest = sqrt(sd2est); # sqrt variance
# Normal pdf B
dbarB = 5.0;
sdB = 1.5;
sdB2 = sdB**2;
pB = np.exp(-0.5*np.power((d-dbarB),2)/sdB2)/(sqrt(2.0*pi)*sdB);
norm1 = Dd*np.sum(pB);
# calculate mean (expectation) and variance
EdB = Dd * np.sum(np.multiply(d,pB)); # expectation
qB = np.power((d-EdB),2); # qudratic
qpB = np.multiply(qB,pB); # product
sdB2est = Dd * np.sum(qpB); # variance
sdBest = sqrt(sdB2est); # sqrt variance
print("mean value (expected value) 1: true: %.2f estimated: %.2f" % (dbar, Ed));
print("mean value (expected value) 2: true: %.2f estimated: %.2f" % (dbarB, EdB));
print(" ");
print("std dev 1: true: %.2f estimated: %.2f" % (sd, sdest));
print("std dev 2: true: %.2f estimated: %.2f" % (sdB, sdBest));
# plot
fig1 = plt.figure(1,figsize=(12,5)); # smallish figure
ax1 = plt.subplot(2, 3, 1);
plt.axis( [dmin, dmax, 0, 10*top ] );
plt.plot(d,q,"k-",lw=3);
plt.xlabel("d");
plt.ylabel("q(d)");
ax2 = plt.subplot(2, 3, 2);
plt.axis( [dmin, dmax, 0, top ] );
plt.plot(d,p,"k-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
ax3 = plt.subplot(2, 3, 3);
plt.axis( [dmin, dmax, 0, top ] );
plt.plot(d,qp,"k-",lw=3);
plt.xlabel("d");
plt.ylabel("q(d)*p(d)");
ax4 = plt.subplot(2, 3, 4);
plt.axis( [dmin, dmax, 0, 10*top ] );
plt.plot(d,qB,"k-",lw=3);
plt.xlabel("d");
plt.ylabel("q(d)");
ax5 = plt.subplot(2, 3, 5);
plt.axis( [dmin, dmax, 0, top ] );
plt.plot(d,pB,"k-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
ax6 = plt.subplot(2, 3, 6);
plt.axis( [dmin, dmax, 0, top ] );
plt.plot(d,qpB,"k-",lw=3);
plt.xlabel("d");
plt.ylabel("q(d)*p(d)");
plt.show();
mean value (expected value) 1: true: 5.00 estimated: 5.00 mean value (expected value) 2: true: 5.00 estimated: 5.00 std dev 1: true: 0.50 estimated: 0.50 std dev 2: true: 1.50 estimated: 1.49
In [17]:
# gdapy03_06
# 2D Normal distribution, uncorrelated
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# p(d1,p2)
d1bar = 5.0;
d2bar = 5.0;
sd1 = 1.5;
sd2 = 0.5;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
norm = 2*pi*sqrt(la.det(C));
CI=la.inv(C);
P=np.zeros((N,N)); # define p1 to increase along rows, p2 along cols
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
P[i,j] = exp(myarg)/norm;
A = (Dd**2)*np.sum(P);
fig2 = plt.figure(2,figsize=(12,5)); # plot pdf
ax1 = plt.subplot(1, 1, 1);
cmap = matplotlib.colormaps['jet'];
Pmax=np.max(P);
# P[10,40]=Pmax; defect to check plot orientation
plt.imshow( np.flipud(P), cmap=cmap, vmin=0.0, vmax=Pmax, extent=(dmin,dmax,dmin,dmax) );
plt.colorbar();
plt.xlabel("p2");
plt.ylabel("p1");
plt.gca().invert_yaxis();
plt.show();
In [19]:
# gdapy03_07
# 2D Normal distribution, correlated
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# p(d1,p2)
d1bar = 5.0;
d2bar = 5.0;
sd1 = 1.5;
sd2 = 0.5;
mvcov=0.4;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
C[0,1]=mvcov;
C[1,0]=mvcov;
norm = 2*pi*sqrt(la.det(C));
CI=la.inv(C);
P=np.zeros((N,N)); # define p1 to increase along rows, p2 along cols
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
P[i,j] = exp(myarg)/norm;
A = (Dd**2)*np.sum(P);
fig2 = plt.figure(2,figsize=(12,5)); # plot pdf
ax1 = plt.subplot(1, 1, 1);
cmap = matplotlib.colormaps['jet'];
Pmax=np.max(P);
# P[10,40]=Pmax; defect to check plot orientation
plt.imshow( np.flipud(P), cmap=cmap, vmin=0.0, vmax=Pmax, extent=(dmin,dmax,dmin,dmax) );
plt.colorbar();
plt.xlabel("p2");
plt.ylabel("p1");
plt.gca().invert_yaxis();
plt.show();
In [22]:
# gdapy03_08
# 2D Normal distribution, uncorrelated, + correlation, - correlation
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# uncorrelated
d1bar = 5.0;
d2bar = 5.0;
sd1 = 1.25;
sd2 = 0.75;
mycov=0.0;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
C[0,1]=mycov;
C[1,0]=mycov;
norm = 2*pi*sqrt(la.det(C));
CI=la.inv(C);
P0=np.zeros((N,N)); # define p1 to increase along rows, p2 along cols
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = 0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
P0[i,j] = exp(-myarg)/norm;
# positive correlation
d1bar = 5.0;
d2bar = 5.0;
sd1 = 1.25;
sd2 = 0.75;
mvcov=0.5;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
C[0,1]=mvcov;
C[1,0]=mvcov;
norm = 2*pi*sqrt(la.det(C));
CI=la.inv(C);
Pp=np.zeros((N,N)); # define p1 to increase along rows, p2 along cols
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
Pp[i,j] = exp(myarg)/norm;
# negative correlation
d1bar = 5.0;
d2bar = 5.0;
sd1 = 1.25;
sd2 = 0.75;
mycov=-0.5;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
C[0,1]=mycov;
C[1,0]=mycov;
norm = 2*pi*sqrt(la.det(C));
CI=la.inv(C);
Pn=np.zeros((N,N)); # define p1 to increase along rows, p2 along cols
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
Pn[i,j] = exp(myarg)/norm;
fig2 = plt.figure(2,figsize=(12,5)); # plot pdf's
ax1 = plt.subplot(1, 3, 1);
cmap = matplotlib.colormaps['jet'];
Pmax=np.max(P0);
plt.imshow( np.flipud(P0), cmap=cmap, vmin=0.0, vmax=Pmax, extent=(dmin,dmax,dmin,dmax) );
plt.colorbar(shrink=0.5);
plt.xlabel("p2");
plt.ylabel("p1");
plt.gca().invert_yaxis();
ax2 = plt.subplot(1, 3, 2);
cmap = matplotlib.colormaps['jet'];
plt.imshow( np.flipud(Pp), cmap=cmap, vmin=0.0, vmax=Pmax, extent=(dmin,dmax,dmin,dmax) );
plt.colorbar(shrink=0.5);
plt.xlabel("p2");
plt.ylabel("p1");
plt.gca().invert_yaxis();
ax3 = plt.subplot(1, 3, 3);
cmap = matplotlib.colormaps['jet'];
plt.imshow( np.flipud(Pn), cmap=cmap, vmin=0.0, vmax=Pmax, extent=(dmin,dmax,dmin,dmax) );
plt.colorbar(shrink=0.5);
plt.xlabel("p2");
plt.ylabel("p1");
plt.gca().invert_yaxis();
plt.show();
In [23]:
# gdapy03_09
# 1D uniform p.d.f., p(d)=constant, transformed to p(m) with m(d)=d^2
# note that m=sqrt(d) and that dm/dd=0.5/sqrt(d)
# d-axis, avoid d=0
Dd = 0.01;
N = 101;
d = gda_cvec(np.linspace(Dd,N*Dd,N));
dmin=d[0,0];
dmax=d[N-1,0];
# d-axis, avoid m=0
Dm = 0.01;
M = 101;
m = gda_cvec(np.linspace(Dm,M*Dm,M));
mmin=m[0,0];
mmax=m[M-1,0];
# uniform, p(d)
pd = np.ones((N,1));
# transform to p(m)
J = np.abs(np.divide(0.5,np.sqrt(d)));
pm = np.multiply(pd,J);
fig2 = plt.figure(2,figsize=(12,5)); # plot pdf's
# plot p(d);
ax1 = plt.subplot(1, 2, 1);
plt.axis( [0, 1, 0, 2] );
plt.plot(d,pd,"r-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
# plot p(m);
ax1 = plt.subplot(1, 2, 2);
plt.axis( [0, 1, 0, 2] );
plt.plot(m,pm,"b-",lw=3);
plt.xlabel("m");
plt.ylabel("p(m)");
In [24]:
# gdapy03_10
# two Normal curves of different variance
# d-axis
N = 101;
dmin=-5.0;
dmax=5.0;
Dd = (dmax-dmin)/(N-1);
d = dmin+gda_cvec(np.linspace(0.0,Dd*(N-1),N));
# narrow Normal p.d.f.
sd = 1.0;
sd2 = sd**2;
dbar = 0.0;
p = np.exp(-0.5*np.power((d-dbar),2)/sd2)/(sqrt(2*pi)*sd);
# make realiations of this pdf
Nr = 1000;
dr = np.random.normal(loc=dbar,scale=sd,size=(Nr,1));
# sample mean and std dev of the realizations
dbarest = np.mean(dr);
sigmaest = np.std(dr);
print("pdf a: true mean %.2f estimated mean %.2f" % (dbar,dbarest) );
print("pdf a: true stddev %.2f estimated stddev %.2f" % (sd,sigmaest) );
# wide Normal p.d.f.
sdb = 2.0;
sdb2 = sdb**2;
dbarb = 0.0;
pb = np.exp(-0.5*np.power((d-dbarb),2)/sdb2)/(sqrt(2*pi)*sdb);
# make realiations of this pdf
Nr = 1000;
drb = np.random.normal(loc=dbarb,scale=sdb,size=(Nr,1));
# sample mean and std dev of the realizations
dbarbest = np.mean(drb);
sigmabest = np.std(drb);
print("pdf a: true mean %.2f estimated mean %.2f" % (dbar,dbarest) );
print("pdf b: true stddev %.2f estimated stddev %.2f" % (sdb,sigmabest) );
# plot pdf's
fig2 = plt.figure(2,figsize=(12,5)); # plot pdf's
ax1 = plt.subplot(1, 1, 1);
plt.axis( [dmin, dmax, 0.0, 0.5] );
plt.plot(d,p,"r-",lw=3);
plt.plot(d,pb,"b-",lw=3);
plt.plot(gda_cvec(dbar,dbar),gda_cvec(0.0,0.1),"r-",lw=2);
plt.plot(gda_cvec(dbarb,dbarb),gda_cvec(0.0,0.1),"b-",lw=2);
plt.plot(gda_cvec(dbarest,dbarest),gda_cvec(0.0,0.1),"r:",lw=2);
plt.plot(gda_cvec(dbarbest,dbarbest),gda_cvec(0.0,0.1),"b:",lw=2);
plt.xlabel("d");
plt.ylabel("p(d)");
plt.show();
# uncorrelated 2D pdf is product of pa and pb
# assuming pa(d1) and pb(d2)
pab = np.matmul( p, pb.T );
gda_draw("title p(d1,d2)",pab);
# true covariance is zero
covab=0.0;
# estimated covariance is
S = np.concatenate((dr,drb), axis=1);
Cest = np.cov(S.T);
print("Covariance of p(d1m,d2): true %.4f, estimated %.4f" % (covab, Cest[0,1]) );
pdf a: true mean 0.00 estimated mean -0.02 pdf a: true stddev 1.00 estimated stddev 0.99 pdf a: true mean 0.00 estimated mean -0.02 pdf b: true stddev 2.00 estimated stddev 1.95
Covariance of p(d1m,d2): true 0.0000, estimated 0.0431
In [27]:
# gdams03_11
# product of two 2D Normal distributions
# d-axis
Dd = 0.1;
N = 101;
d = gda_cvec(np.linspace(0.0,Dd*(N-1),N));
dmin=d[0,0];
dmax=d[N-1,0];
# P1
d1bar = 4.0;
d2bar = 6.0;
sd1 = 1.25;
sd2 = 0.75;
mycov = 0.5;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
C[0,1]=mycov;
C[1,0]=mycov;
norm = 2.0*pi*sqrt(la.det(C));
CI=la.inv(C);
P1=np.zeros((N,N));
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
P1[i,j] = exp(myarg)/norm;
# save these parameters
C1=C;
C1I=CI;
DBAR1=gda_cvec(d1bar, d2bar);
# P2
d1bar = 6.0;
d2bar = 4.0;
sd1 = 0.75;
sd2 = 1.25;
mycov = -0.5;
C=np.zeros((2,2));
C[0,0]=sd1**2;
C[1,1]=sd2**2;
C[0,1]=mycov;
C[1,0]=mycov;
norm = 2.0*pi*sqrt(la.det(C));
CI=la.inv(C);
P2=np.zeros((N,N));
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d2bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
P2[i,j] = exp(myarg)/norm;
# save these parameters
C2=C;
C2I=CI;
DBAR2=gda_cvec(d1bar, d2bar);
P1P2 = np.multiply(P1,P2);
norm = (Dd**2)*np.sum(P1P2);
# from analytic formula
C3 = la.inv( C1I + C2I );
DBAR3 = np.matmul( C3, (np.matmul(C1I,DBAR1) + np.matmul(C2I,DBAR2)) );
d1bar = DBAR3[0,0];
d2bar = DBAR3[1,0];
C=C3;
norm = 2.0*pi*sqrt(la.det(C));
CI=la.inv(C);
P3=np.zeros((N,N));
for i in range(N):
for j in range(N):
dd = gda_cvec( dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar );
myarg = -0.5*np.matmul(np.matmul(dd.T,CI),dd); myarg=myarg[0,0];
P3[i,j] = exp(myarg)/norm;
gda_draw(' ',P1,' ',P2,' ',P1P2);
# plot the analytic one, too, to check results
# gda_draw(' ',P1,' ',P2,' ',P1P2,' ',P3);
In [28]:
# gdama03_12
# example of a Pierson's chi-squared test
# setup for plots
fig1 = plt.figure(2,figsize=(12,5));
# Part 1: Correct Distribution
print("Part 1: Correct Distribution");
# make some normally-distributed random data
Ndata = 200;
dbar = 5;
sigmad = 1;
drandom = np.random.normal(loc=dbar,scale=sigmad,size=(Ndata,1));
# estimate mean and standard deviation of d's
dbarest = np.mean(drandom);
sigmadest = np.std(drandom);
print("mean: true %.2f estimated %.2f" % (dbar, dbarest) );
# histogram
dmin = 0.0; # starting d
dmax = 10.0; # ending d
Lb = 40; # number of bins
dhist, e = np.histogram(drandom,Lb,(dmin,dmax)); # create histogram
Nbin = len(dhist); # lengths of histogram
Ne = len(e); # length of edges
dhist = gda_cvec(dhist); # convert h to column-vector
edges = gda_cvec(e[0:Ne-1]); # convert e to column-vector
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers of bins
Db = bins[1,0]-bins[0,0]; # size of bins
hmax=np.max(dhist); # maximum counts
# normalize to unit area
norm = sum(dhist);
pdest = dhist / norm;
# d-axis, for plotting purposes
d = gda_cvec(np.linspace(bins[0,0],bins[Nbin-1],Nbin));
# theoretical distribution
pdtrue = st.norm.cdf(d+Db/2,loc=dbarest,scale=sigmadest)-st.norm.cdf(d-Db/2,loc=dbarest,scale=sigmadest);
# plot
ax1 = plt.subplot(1, 2, 1);
plt.plot(d,pdest,"r-",lw=3);
plt.plot(d,pdtrue,"b-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
# compute chi squared statistic
x2est = Ndata*np.sum( np.divide( np.power(pdest-pdtrue,2), pdtrue ));
K = Nbin-3;
# compute P( x2 >= x2est ) = 1 - P(x2<x2est);
P = 1-st.chi2.cdf( x2est, K );
print("K %d chi-squared-est %.4f P(x2>=x2est) %.4f" % (K, x2est, P) );
# Part 2: Inorrect Distribution
print("Part 1: Incorrect Distribution");
# estimate mean and standard deviation of d's, and then make them incorrect
dbarest = np.mean(drandom)-0.5;
sigmadest = np.std(drandom)*1.5;
print("mean: true %.2f estimated %.2f" % (dbar, dbarest) );
# histogram
dmin = 0; # starting d
dmax = 10; # ending d
Lb = 40; # number of bins
dhist, e = np.histogram(drandom,Lb,(dmin,dmax)); # create histogram
Nbin = len(dhist); # lengths of histogram
Ne = len(e); # length of edges
dhist = gda_cvec(dhist); # convert h to column-vector
edges = gda_cvec(e[0:Ne-1]); # convert e to column-vector
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers of bins
Db = bins[1,0]-bins[0,0]; # size of bins
hmax=np.max(dhist); # maximum counts
# normalize to unit area
norm = sum(dhist);
pdest = dhist / norm;
# d-axis, for plottin purposes
d = gda_cvec(np.linspace(bins[0,0],bins[Nbin-1],Nbin));
# theoretical distribution
pdtrue = st.norm.cdf(d+Db/2,loc=dbarest,scale=sigmadest)-st.norm.cdf(d-Db/2,loc=dbarest,scale=sigmadest);
# plot
ax1 = plt.subplot(1, 2, 2);
plt.plot(d,pdest,"r-",lw=3);
plt.plot(d,pdtrue,"b-",lw=3);
plt.xlabel("d");
plt.ylabel("p(d)");
# compute chi squared statistic
x2est = Ndata*np.sum( np.divide( np.power(pdest-pdtrue,2), pdtrue ));
K = Nbin-3;
# compute P( x2 >= x2est ) = 1 - P(x2<x2est);
P = 1-st.chi2.cdf( x2est, K );
print("K %d chi-squared-est %.4f P(x2>=x2est) %.4f" % (K, x2est, P) );
Part 1: Correct Distribution mean: true 5.00 estimated 4.91 K 37 chi-squared-est 59.7177 P(x2>=x2est) 0.0104 Part 1: Incorrect Distribution mean: true 5.00 estimated 4.41 K 37 chi-squared-est 94.1955 P(x2>=x2est) 0.0000
In [29]:
# gdama03_13
# Illustrate a computing conditional distributions
# from a joint distribution. Supports Figure 2.15.
# set up vectors da and d2
# d-axes, for plottin purposes
L=40;
Dd = 1.0;
d1 = gda_cvec(np.linspace(0.0,Dd*(L-1),L));
d2 = gda_cvec(np.linspace(0.0,Dd*(L-1),L));
# make twoD normal pdf p(d1,d2)
d1bar=15;
d2bar=25;
s1=7;
s12=s1**2;
s2=8;
s22=s1**2;
norm=1.0/(2*pi*s1*s2);
p1=np.exp(-np.power(d1-d1bar,2)/(2*s1*s1)); # unnormalized 1D pdf in d1
p2=np.exp(-np.power(d2-d2bar,2)/(2*s2*s2)); # unnormalized 1D pdf in d1
P=norm*np.matmul(p1,p2.T); # 2D pdf via tensor product
# sum along columns, which integrates P along d2 to get p1=p(d1)
p1 = Dd*np.sum(P,axis=1,keepdims=True);
# sum along rows, which integrates P along d1 to get p2=p(d2)
# but transpoe to column-vector
p2 = Dd*np.sum(P,axis=0,keepdims=True).T;
# conditional distribution P1g2 = P(d1|d2) = P(d1,d2)/p2
P1g2 = np.divide( P, np.matmul(np.ones((L,1)),p2.T) );
# conditional distribution P2g1 = P(d2|d1) = P(d1,d2)/p1
P2g1 = np.divide( P, np.matmul(p1,np.ones((L,1)).T) );
# use simple drawing function that encapsulates all the graphics
gda_draw("title p(d1,d2)",P,"title p(d1|d2)",P1g2,'title p(d2|d1)',P2g1,);
In [30]:
# gdapy03_14
# Realization of a Gaussian disrribution, calculated several ways
# generate Normal random numbers with these mean and variance
N=1000;
mbar = 5.0;
sigma = 1.0;
# Method 1: by transformation of a uniform distribution
m = np.random.normal(loc=mbar,scale=sigma,size=(N,1));
# Method 2: by transformation of a uniform distribution
duniform = np.random.uniform(low=0.0,high=1.0,size=(N,1));
m2 = st.norm.ppf(duniform,loc=mbar,scale=sigma);
# histogram 1
mmin = 0.0; # starting d
mmax = 10.0; # ending d
Lb = 100; # number of bins
h1, e = np.histogram(m,Lb,(mmin,mmax)); # create histogram
Nbin = len(h1); # lengths of histogram
Ne = len(e); # length of edges
h1 = gda_cvec(h1); # convert h1 to column-vector
edges = gda_cvec(e[0:Ne-1]); # convert e to column-vector
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers of bins
Db = bins[1,0]-bins[0,0]; # size of bins
h1max=np.max(h1); # maximum counts
h1 = h1 / (Db*np.sum(h1));
# histogram 2
h2, e = np.histogram(m2,Lb,(mmin,mmax)); # create histogram
Nbin = len(h2); # lengths of histogram
Ne = len(e); # length of edges
h2 = gda_cvec(h2); # convert h2 to column-vector
edges = gda_cvec(e[0:Ne-1]); # convert e to column-vector
bins =edges + 0.5*(edges[1,0]-edges[0,0]); # centers of bins
Db = bins[1,0]-bins[0,0]; # size of bins
h2max=np.max(h2); # maximum counts
h2 = h2 / (Db*np.sum(h2));
# normal pdf
p = st.norm.pdf(bins, loc=mbar, scale=sigma);
# plot
fig1 = plt.figure(2,figsize=(12,5));
ax1 = plt.subplot(1, 1, 1);
plt.axis( [mmin, mmax, 0, 0.5] );
plt.plot( bins, h1, "r-", lw=3 );
plt.plot( bins, h2, "b-", lw=3 );
plt.plot( bins, p, "k-", lw=3 );
plt.xlabel("m");
plt.ylabel("p(m)");
In [31]:
# gdapy03_15
# create realizations of an exponential p.d.f. in two ways,
# transformation of a uniform distribution, and the Metropolis
# algoritm. In this example, the p.d.f. is
# p(d) = c*exp(-d)/c); for d>0
c = 2.0;
# d-axis
dmin = -10.0;
dmax = 10.0;
# the usual transformation rule is p(d) = p(m(d)) |dm/dd|
# suppose that p(m) is uniform over m=(-1,1) with amplitude 0.5
# handle the absolute value sign by breaking into two parts,
# Part 1: m>0,
# (1/c)*exp(-d/c)=(+/-)dm/dd. Choose the + sign, in order
# to map m=0 with d=0 and m=1 to d=infinity. Then
# m =(integral)(1/c)*exp(-d/c)dd+constant. Choose constant=1
# so m=1-exp(-d/c). Inverting gives d=-c*ln((1-m))
# Part 2: m<0
# similar calculation gives d=-c*ln((1+m))
# so overall d=-sgn(m)*c*ln((1-abs(m)))
# transform realizations of a uniform distribution to p(d)
M=5000;
rm=np.random.uniform(low=-1.0,high=1.0,size=(M,1));
rd=np.multiply(-np.sign(rm), c*np.log((1.0-np.abs(rm))));
# histogram
Lb = 40; # number of bins
h1, e = np.histogram(rd,Lb,(dmin,dmax)); # create histogram
Nbin = len(h1); # lengths of histogram
Ne = len(e); # length of edges
h1 = gda_cvec(h1); # convert h1 to column-vector
edges = gda_cvec(e[0:Ne-1]); # convert e to column-vector
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers of bins
Db = bins[1,0]-bins[0,0]; # size of bins
h1 = h1 / (Db*np.sum(h1)); # normalize to empirical pdf
h1max=np.max(h1); # maximum value of pdf
# evaluate exponential distribution
pexp = (0.5/c)*np.exp(-np.abs(bins)/c);
# plot
fig1 = plt.figure(2,figsize=(12,5));
ax1 = plt.subplot(1, 2, 1);
plt.axis( [dmin, dmax, 0, 1.1*h1max] );
# improvides bar chart
for i in range(Nbin):
tb = gda_cvec( bins[i,0]-Db/2.0, bins[i,0]-Db/2.0, bins[i,0]+Db/2.0, bins[i,0]+Db/2.0 );
th = gda_cvec( 0.0, h1[i,0], h1[i,0], 0.0 );
plt.plot(tb,th,"b-",lw=3);
plt.plot( bins, pexp, "r-", lw=3 );
plt.xlabel("d");
plt.ylabel("p(d)");
# Use Metropolis to sample pdf
Niter=5000;
rd = np.zeros((Niter,1));
prd = np.zeros((Niter,1));
rd[0,0] = 0.0;
prd[0,0] = (1.0/c)*exp(-abs(rd[0,0]/c));
s = 1.0; # scale parameter, defines neighborhood
for k in range(1,Niter):
# old realization
rdo = rd[k-1,0];
prdo = prd[k-1,0];
rdn = np.random.normal(loc=rdo,scale=s);
prdn = (0.5/c)*exp(-abs(rdn)/c);
# test parameter, ratio of probabilities
a = prdn/prdo;
# acceptance test
if( a>1.0 ):
rd[k,0] = rdn;
prd[k,0] = prdn;
else:
r = np.random.uniform(low=0.0,high=1.0);
if( a>r ):
rd[k,0] = rdn;
prd[k,0] = prdn;
else:
rd[k,0] = rdo;
prd[k,0] = prdo;
# histogram
h1, e = np.histogram(rd,Lb,(dmin,dmax)); # create histogram
Nbin = len(h1); # lengths of histogram
Ne = len(e); # length of edges
h2 = gda_cvec(h1); # convert h1 to column-vector
edges = gda_cvec(e[0:Ne-1]); # convert e to column-vector
bins = edges + 0.5*(edges[1,0]-edges[0,0]); # centers of bins
Db = bins[1,0]-bins[0,0]; # size of bins
# convert histogram to a empirical pdf
h2 = h2 / (Db*np.sum(h2)); # normalize to empirical pdf
h2max=np.max(h2); # maximum value of pdf
# plot histogram of Metropolis and true pdf
ax2 = plt.subplot(1, 2, 2);
plt.axis( [dmin, dmax, 0, 1.1*h1max] );
# improvides bar chart
for i in range(Nbin):
tb = gda_cvec( bins[i,0]-Db/2.0, bins[i,0]-Db/2.0, bins[i,0]+Db/2.0, bins[i,0]+Db/2.0 );
th = gda_cvec( 0.0, h2[i,0], h2[i,0], 0.0 );
plt.plot(tb,th,"b-",lw=3);
plt.plot( bins, pexp, "r-", lw=3 );
plt.xlabel("d");
plt.ylabel("p(d)");
In [ ]: