In [2]:
# gdapy13_00 clears all variables and import various modules
# you must run this cell first, before running any of others
# note that these examples use inverse theory techniques that
# are only covered in later chapters of the book
# History:
# 2023/04/29 - Created by W. Menke, from MATLAB version
# 2024/05/23 - Patches to accomodate new verions of numpy, scipy, matplotlib
# patched dot product to return scalar value, and
# similar issues associated with new disequivalence of scalar and 1x1 array
# A reset deletes any previously-created variables from memory
%reset -f
# import various packages
import numpy as np # matrices & etc
from matplotlib import pyplot as plt # general plotting
from matplotlib import patches # plotting shapes
from datetime import date # date and time arithmetic
from math import exp, pi, sin, cos, tan, atan2, sqrt, floor, log10, nan # math functions
import scipy.linalg as la # linear algebra functions
import os # operating system
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
import scipy.signal as sg
import scipy.stats as st
import scipy.sparse.linalg as las
from scipy import sparse
from scipy import optimize as opt
import matplotlib
# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def gda_cvec(*argv):
t = int;
Nt = 0;
for a in argv:
v = gda_cvec1(a);
N,M = np.shape(v);
Nt = Nt + N;
if( N==0 ):
continue; # skip vector of zero length
if (t==int) and isinstance(v[0,0],float):
t=float;
elif isinstance(v[0,0],complex):
t=complex;
w = np.zeros((Nt,1),dtype=t);
Nt = 0;
for a in argv:
v = gda_cvec1(a);
N,M = np.shape(v);
w[Nt:Nt+N,0:1] = v; # patch 20230418 was #w[Nt:Nt+N,0] = v[0:N,0];
Nt = Nt + N;
return w;
# function to make a numpy N-by-1 column vector
# c=gda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=gda_cvec(v1,v2,...) concatenates
# many argiments.
def gda_cvec1(v):
if isinstance(v, int) or isinstance(v, np.int32):
w = np.zeros((1,1),dtype=int);
w[0,0] = v;
return w;
elif isinstance(v, float):
w = np.zeros((1,1),dtype=float);
w[0,0] = v;
return w;
elif isinstance(v, complex):
w = np.zeros((1,1),dtype=complex);
w[0,0] = v;
return w;
elif isinstance(v, np.ndarray):
s = np.shape(v);
if len(s) == 1:
return np.copy(np.reshape(v,(s[0],1)));
else:
[r,c]=s;
if( c==1 ):
return(np.copy(v));
elif(r==1):
return(np.copy(v.T));
else:
raise TypeError("gda_cvec: %d by %d ndarray not allowed" % (r, c));
elif isinstance(v, list):
r = len(v);
t = int;
for vi in v:
if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
pass;
elif isinstance(vi,float):
t=float;
elif isinstance(vi,complex):
t=complex;
break;
else:
raise TypeError("gda_cvec: list contains unsupported type %s" % type(vi));
w = np.zeros((r,1),dtype=t);
w[:,0] = np.array(v); # patch v -> np.array(v)
return w;
elif isinstance(v, tuple):
r = len(v);
t = int;
for vi in v:
if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
pass;
elif isinstance(vi,float):
t=float;
elif isinstance(vi,complex):
t=complex;
break;
else:
raise TypeError("gda_cvec: tuple contains unsupported type %s" % type(vi));
w = np.zeros((r,1),dtype=t);
w[:,0] = np.array(list(v)); # patch v -> np.array(list(v));
return w;
else:
raise TypeError("gda_cvec: %s not supported" % type(v));
# gda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def gda_draw(*argv):
DOCOLOR=True;
if( DOCOLOR ):
bwcmap = matplotlib.colormaps['jet'];
else:
bw = np.zeros((256,4));
v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
bw[:,0] = v;
bw[:,1] = v;
bw[:,2] = v;
bw[:,3] = np.ones(256);
bwcmap = ListedColormap(bw);
# size of plot
W = 16;
H = 4;
fig1 = plt.figure(1);
# figsize width and height in inches
fig1.set_size_inches(W,H);
ax1 = plt.subplot(1,1,1);
plt.axis([0, W, -H/2, H/2]);
plt.axis('off');
LM = W/6; # matrix width and heoght
LV = W/40; # vector width
FS = 0.12; # character width
TO = 0.4; # title vertical offset
SP = 0.2; # space between objects
LS = 0.2; # leading space
p = LS; # starting x-position
istitle=0; # flags presence of a title
for a in argv:
if isinstance(a,np.ndarray):
sh = np.shape(a);
if len(sh) == 1: # conversion to nx1 array
n = sh[0];
m = 1;
ap = a;
a = np.zeros((n,1));
a[:,0] = ap;
else:
n = sh[0];
m = sh[1];
if m==1:
pold=p;
left=p;
right=p+LV;
bottom=-LM/2;
top=LM/2;
plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
p = p+LV;
pm = (p+pold)/2;
if istitle:
plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
istitle=0;
p = p+SP;
else:
pold=p;
left=p;
right=p+LM;
bottom=-LM/2;
top=LM/2;
plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
p = p+LM;
pm = (p+pold)/2;
if istitle:
plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
istitle=0;
p = p+SP;
elif isinstance(a,str):
ns = len(a);
istitle=0;
if( ns>=6 ):
if 'title ' in a[0:6]:
istitle=1;
titlestr=a[6:];
if( istitle != 1):
plt.text(p,0,a);
p = p + ns*FS + SP;
plt.show();
return 1;
# bandpass filter, used in seismological example, but hand
# in a variety of settings involving time series
def gda_chebyshevfilt(d, Dt, flow, fhigh):
# (dout,u,v)=gda_chebyshevfilt(d, Dt, flow, fhigh);
# chebyshev IIR bandpass filter
# d - input array of data
# Dt - sampling interval
# flow - low pass frequency, Hz
# fhigh - high pass frequency, Hz
# dout - output array of data
# u - the numerator filter
# v - the denominator filter
# these filters can be used again using dout=filter(u,v,din);
# make sure input timeseries is a column vector
s = np.shape(d);
N = s[0];
if(N==1):
dd = np.zeros((N,1));
dd[:,0] = d;
else:
dd=d;
# sampling rate
rate=1/Dt;
# ripple parameter, set to ten percent
ripple=0.1;
# normalise frequency
fl=2.0*flow/rate;
fh=2.0*fhigh/rate;
# center frequency
cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );
# bandwidth
bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );
# ripple parameter factor
rpf = sqrt((sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple));
a = 0.5*(rpf-1.0/rpf);
b = 0.5*(rpf+1.0/rpf);
u=np.zeros((5,1));
v=np.zeros((5,1));
theta = 3*pi/4;
sr = a * cos(theta);
si = b * sin(theta);
es = sqrt(sr*sr+si*si);
tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
v[0,0] = 1.0;
v[1,0] = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
v[2,0] = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
v[3,0] = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
v[4,0] = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
tmp = 4.0*es*es*bw*bw/tmp;
u[0,0] = tmp;
u[1,0] = 0.0;
u[2,0] = -2.0*tmp;
u[3,0] = 0.0;
u[4,0] = tmp;
dout = sg.lfilter(u.ravel(),v.ravel(),dd.ravel());
return (gda_cvec(dout),gda_cvec(u),gda_cvec(v));
# function to perform the multiplication FT (F v)
def gdaFTFmul(v):
# this function is used by bicongugate gradient to
# solve the generalized least squares problem Fm=F.
# Note that "F" must be called "gdaFsparse".
global gdaFsparse;
s = np.shape(v);
if(len(s)==1):
vv = np.zeros((s[0],1));
vv[:,0] = v;
else:
vv=v;
# weirdly, "*" multiplies sparse matrices just fine
temp = gdaFsparse*vv;
return gdaFsparse.transpose()*temp;
def gdaarrow(ax,rbar,mys,myl):
# pretty crazy way to draw an arrowhead!
rbarsq = np.matmul(rbar.T,rbar); rbarsq=rbarsq[0,0];
tangent = rbar/sqrt(rbarsq);
per1 = np.cross( tangent, gda_cvec(0.0, 0.0, 1.0), axis=0 );
pre1sq=np.matmul(per1.T,per1); pre1sq=pre1sq[0,0];
per1 = per1/sqrt(pre1sq);
per2 = np.cross( tangent, per1, axis=0 );
per2sq = np.matmul(per2.T,per2); per2sq=per2sq[0,0];
per2 = per2/sqrt(per2sq);
L = 0.05;
px=gda_cvec(0.0,rbar[0,0]);
py=gda_cvec(0.0,rbar[1,0]);
pz=gda_cvec(0.0,rbar[2,0]);
ax.plot3D( px.ravel(), py.ravel(), pz.ravel(), mys, lw=myl );
v1 = rbar - L*tangent + 0.25*L*per1;
v2 = rbar - L*tangent - 0.25*L*per1;
v3 = rbar - L*tangent + 0.25*L*per2;
v4 = rbar - L*tangent - 0.25*L*per2;
x0=gda_cvec(rbar[0,0], v1[0,0]);
y0=gda_cvec(rbar[1,0], v1[1,0]);
z0=gda_cvec(rbar[2,0], v1[2,0]);
ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
x0=gda_cvec(rbar[0,0], v2[0,0]);
y0=gda_cvec(rbar[1,0], v2[1,0]);
z0=gda_cvec(rbar[2,0], v2[2,0]);
ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
x0=gda_cvec(rbar[0,0], v3[0,0]);
y0=gda_cvec(rbar[1,0], v3[1,0]);
z0=gda_cvec(rbar[2,0], v3[2,0]);
ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
x0=gda_cvec(rbar[0,0], v4[0,0]);
y0=gda_cvec(rbar[1,0], v4[1,0]);
z0=gda_cvec(rbar[2,0], v4[2,0]);
ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
return 1;
def gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax):
ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmin,zmax).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymin,ymax).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmin,xmax).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmax,zmin).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymax,ymin).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmin).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmin).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmax,zmin).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymax,ymin).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmax,zmin).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymax,ymin).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
ax.plot3D( gda_cvec(xmax,xmin).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
return 1;
In [17]:
# gdapy13_01
# mixing of endmembers, visualized as vectors in 3D space
print("gdapy13_01");
xmin = 0.0;
xmax = 1.0;
ymin = 0.0;
ymax = 1.0;
zmin = 0.0;
zmax = 1.0;
# setup for 3D figure
fig1 = plt.figure(1,figsize=(12,8)); # smallish figure
ax = fig1.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax.axes.set_xlim3d(left=-1.0, right=xmax);
ax.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax.axes.set_zlim3d(bottom=-1.0, top=zmax);
# 3D box
gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax);
# factors
# factor 1
f1 = gda_cvec(1.0, 0.2, 0.8);
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
# factor 2
f2 = gda_cvec(0.2, 1.0, 0.8);
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;
gdaarrow(ax,f1,"r-",3);
gdaarrow(ax,f2,"r-",3);
# samples
s1=0.8*f1+(1-0.8)*f2;
s2=0.6*f1+(1-0.6)*f2;
s3=0.4*f1+(1-0.4)*f2;
s4=0.2*f1+(1-0.2)*f2;
gdaarrow(ax,s1,"k-",2);
gdaarrow(ax,s2,"k-",2);
gdaarrow(ax,s3,"k-",2);
gdaarrow(ax,s4,"k-",2);
plt.show();
print("Caption: Samples (black arrows) are linear combinations of end-members (red arrows).");
gdapy13_01
Caption: Samples (black arrows) are linear combinations of end-members (red arrows).
In [18]:
# gdapy13_02
# mixing of endmembers, visualized as vectors in 3D space
print("gdapy13_02");
xmin = 0.0;
xmax = 1.0;
ymin = 0.0;
ymax = 1.0;
zmin = 0.0;
zmax = 1.0;
# setup for 3D figure
fig1 = plt.figure(1,figsize=(12,8)); # smallish figure
ax = fig1.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax.axes.set_xlim3d(left=-1.0, right=xmax);
ax.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax.axes.set_zlim3d(bottom=-1.0, top=zmax);
# 3D box
gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax);
# factors
# factor 1
f1 = gda_cvec(1.0, 0.2, 0.8);
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
# factor 2
f2 = gda_cvec(0.2, 1.0, 0.8);
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;
gdaarrow(ax,f1,"r-",3);
gdaarrow(ax,f2,"r-",3);
# samples
s1=0.8*f1+(1-0.8)*f2;
s2=0.6*f1+(1-0.6)*f2;
s3=0.4*f1+(1-0.4)*f2;
s4=0.2*f1+(1-0.2)*f2;
gdaarrow(ax,s1,"k-",2);
gdaarrow(ax,s2,"k-",2);
gdaarrow(ax,s3,"k-",2);
gdaarrow(ax,s4,"k-",2);
USESVD=1;
if( USESVD ):
# use SVD. The minus signs are just to get the vectors
# in quadrants that look good in the plot
S=np.zeros((4,3));
S[0:1,0:3]=s1.T;
S[1:2,0:3]=s2.T;
S[2:3,0:3]=s3.T;
S[3:4,0:3]=s4.T;
U,lam,VT = la.svd(S,full_matrices=True);
V = VT.T;
v1 = -V[0:3,0:1];
v2 = -V[0:3,1:2];
v3 = -V[0:3,2:3];
else:
# just use the mean vector, and two vectors perpendicular
# to it, one of which is in the F1-F2 plane
v1 = f1+f2;
v1sq = np.matmul(v1.T,v1); v1sq=v1sq[0,0];
norm = sqrt(v1sq);
v1=v1/norm;
v3 = gda_cvec(np.cross(f1.ravel(),f2.ravel()));
v3sq = np.matmul(v3.T,v3); v3sq=v3sq[0,0];
norm = sqrt(v3sq);
v3=v3/norm;
v2 = gda_cvec(np.cross(v1.ravel(),v3.ravel()));
v2sq = np.matmul(v2.T,v2); v2sq=v2sq[0,0];
norm = sqrt(v2sq);
v2=v2/norm;
gdaarrow(ax,v1,"b-",2);
gdaarrow(ax,v2,"b-",2);
gdaarrow(ax,v3,"b-",2);
plt.show();
print("Caption: Samples (black arrows) are linear combinations of factors (red arrows)");
print("Two factors (solid blue) lie in the plane of the samples, one (blue dashed) is normal to it.");
gdapy13_02
Caption: Samples (black arrows) are linear combinations of factors (red arrows) Two factors (solid blue) lie in the plane of the samples, one (blue dashed) is normal to it.
In [5]:
# gdapy13_03
# mixing of endmembers, visualized as vectors in 3D space
print("gdapy13_03");
xmin = 0.0;
xmax = 1.0;
ymin = 0.0;
ymax = 1.0;
zmin = 0.0;
zmax = 1.0;
# setup for 3D figure
fig1 = plt.figure(1,figsize=(12,8)); # smallish figure
ax = fig1.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax.axes.set_xlim3d(left=-1.0, right=xmax);
ax.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax.axes.set_zlim3d(bottom=-1.0, top=zmax);
# 3D box
gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax);
# factors for case 1
# factor 1
f1 = gda_cvec(1.0, 0.2, 0.8);
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
# factor 2
f2 = gda_cvec(0.2, 1.0, 0.8);
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;
gdaarrow(ax,f1,"r-",3);
gdaarrow(ax,f2,"r-",3);
# samples
s1=0.8*f1+(1-0.8)*f2;
s2=0.6*f1+(1-0.6)*f2;
s3=0.4*f1+(1-0.4)*f2;
s4=0.2*f1+(1-0.2)*f2;
gdaarrow(ax,s1,"k-",2);
gdaarrow(ax,s2,"k-",2);
gdaarrow(ax,s3,"k-",2);
gdaarrow(ax,s4,"k-",2);
plt.show();
print("Caption: Samples (black arrows) and factors (red arrows).");
# factors for Case 2
f1a = gda_cvec( 1.0, 0.2, 0.8 );
f1asq = np.matmul(f1a.T,f1a); f1asq=f1asq[0,0];
norm = 1.2*sqrt(f1asq);
f1a=f1a/norm;
f2a = gda_cvec(0.2, 1, 0.8);
f2asq = np.matmul(f2a.T,f2a); f2asq=f2asq[0,0];
norm = 0.8*sqrt(f2asq);
f2a=f2a/norm;
f1 = f1a+0.2*f2a;
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
f2 = f2a + 0.2*f1a;
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;
# setup for 3D figure
fig2 = plt.figure(2,figsize=(12,8)); # smallish figure
ax2 = fig2.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax2.axes.set_xlim3d(left=-1.0, right=xmax);
ax2.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax2.axes.set_zlim3d(bottom=-1.0, top=zmax);
# 3D box
gdabox(ax2,xmin,xmax,ymin,ymax,zmin,zmax);
gdaarrow(ax2,s1,"k-",2);
gdaarrow(ax2,s2,"k-",2);
gdaarrow(ax2,s3,"k-",2);
gdaarrow(ax2,s4,"k-",2);
gdaarrow(ax2,f1,"r-",3);
gdaarrow(ax2,f2,"r-",3);
plt.show();
print("Caption: Samples (black arrows) and an alternative set of factors (red arrows).");
gdapy13_03
Caption: Samples (black arrows) and factors (red arrows).
Caption: Samples (black arrows) and an alternative set of factors (red arrows).
In [20]:
# gdapy13_04
# factor analysis on Atlantic Rocks dataset
# using singular value decomposition
print("gdapy13_04");
# load data
D = np.genfromtxt("../Data/rocks.txt", delimiter="\t");
N, M = np.shape(D);
sio2 = D[0:N,0:1]; # SiO2
tio2 = D[0:N,1:2]; # TiO2
als03 = D[0:N,2:3]; # Al203
feot = D[0:N,3:4]; # FeO-total
mgo = D[0:N,4:5]; # MgO
cao = D[0:N,5:6]; # CaO
na20 = D[0:N,6:7]; # Na2O
k20 = D[0:N,7:8]; # K2O
# compute factors and factor loadings using singular value decompostion
S=D;
U, lam, VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
Ns,i = np.shape(lam);
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));
# make shortened factor matrix with P factors
# and corresponding loading matrix
P=5;
FP = np.copy(F[0:P,0:M]);
CP = np.copy(C[0:N,0:P]);
# plot singular values
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [ 1, Ns, 0, np.max(lam) ] );
w = gda_cvec(np.linspace(1,Ns,Ns));
plt.plot( w, lam, "k-", lw=2 );
plt.plot( w, lam, "ko", lw=2 );
plt.xlabel("index, i");
plt.ylabel("lambda(i)");
plt.show();
print("Caption: Singular-values.");
print(" ");
# display first five factors
for j in range(5):
f1=F[j:j+1,0:M].T;
print("factor %d" % (j) );
print("SiO2 %f" % (f1[0,0]) );
print("TiO2 %f" % (f1[1,0]) );
print("Al203 %f" % (f1[2,0]) );
print("FeO-total %f" % (f1[3,0]) );
print("MgO %f" % (f1[4,0]) );
print("CaO %f" % (f1[5,0]) );
print("Na2O %f" % (f1[6,0]) );
print("k2O %f" % (f1[7,0]) );
print(" ");
# plot loadings of factors (f2,f3,f4) in 3D (c2,c3,c4) space
cmin=(-50.0);
cmax=50.0;
# setup for 3D figure
fig2 = plt.figure(2,figsize=(12,8)); # smallish figure
ax2 = fig2.add_subplot(111, projection='3d');
ax2.axes.set_xlim3d(left=cmin, right=cmax);
ax2.axes.set_ylim3d(bottom=cmin, top=cmax);
ax2.axes.set_zlim3d(bottom=cmin, top=cmax);
myelev=15.0;
myazim=125.0;
ax2.view_init(elev=myelev, azim=myazim);
ax2.set_xlabel("C1");
ax2.set_ylabel("C2");
ax2.set_zlabel("C3");
# 3D box
gdabox(ax2,cmin,cmax,cmin,cmax,cmin,cmax);
# plot loadings
ax2.plot3D( C[0:N,1], C[0:N,2], C[0:N,3], 'k.', lw=2 );
plt.show();
print("Caption: Scatter plot of the loadings of factors 2, 3, and 4.");
gdapy13_04
Caption: Singular-values. factor 0 SiO2 -0.908829 TiO2 -0.024638 Al203 -0.275168 FeO-total -0.177851 MgO -0.141341 CaO -0.209989 Na2O -0.044611 k2O -0.003430 factor 1 SiO2 0.007684 TiO2 -0.037474 Al203 -0.301583 FeO-total -0.018421 MgO 0.923193 CaO -0.226917 Na2O -0.058457 k2O -0.007204 factor 2 SiO2 -0.161689 TiO2 -0.126343 Al203 0.567828 FeO-total -0.659205 MgO 0.255748 CaO 0.365682 Na2O -0.041738 k2O -0.006464 factor 3 SiO2 0.209819 TiO2 0.151367 Al203 0.176021 FeO-total -0.427461 MgO -0.118643 CaO -0.780043 Na2O 0.302367 k2O 0.073403 factor 4 SiO2 0.309495 TiO2 -0.100476 Al203 -0.670083 FeO-total -0.585155 MgO -0.195193 CaO 0.207980 Na2O -0.145318 k2O 0.015035
Caption: Scatter plot of the loadings of factors 2, 3, and 4.
In [21]:
# gdapy13_05
# factor analysis on Atlantic Rocks dataset
# using singular value decomposition
# and the varimax procedure
print("gdapy13_05");
# load data
D = np.genfromtxt("../Data/rocks.txt", delimiter="\t");
N, M = np.shape(D);
sio2 = D[0:N,0:1]; # SiO2
tio2 = D[0:N,1:2]; # TiO2
als03 = D[0:N,2:3]; # Al203
feot = D[0:N,3:4]; # FeO-total
mgo = D[0:N,4:5]; # MgO
cao = D[0:N,5:6]; # CaO
na20 = D[0:N,6:7]; # Na2O
k20 = D[0:N,7:8]; # K2O
# compute factors and factor loadings using singular value decompostion
S=D;
U, lam, VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
Ns,i = np.shape(lam);
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));
# keep only first five singular values
P=5;
F = F[0:P,0:M];
C = C[0:N,0:P];
# initial rotated factor matrix, FP, abd rotation matrix, MR
MR=np.identity(P);
FP=np.copy(F);
# spike these factors using the varimax procedure
k = gda_cvec(1, 2, 3, 4);
Nk, i = np.shape(k);
# varimax is an iterative procedure, but converges very rapidly,
# so do only 3 iterations. Within each iteration, one applies
# the procedure to every pair of factors.
for iter in range(3): # iterations
for ii in range(Nk): # pairs of factors
for jj in range(ii+1,Nk):
# spike factors i and j
i=k[ii,0];
j=k[jj,0];
# copy factors from matrix to vectors
fA = FP[i:i+1,0:M].T;
fB = FP[j:j+1,0:M].T;
# standard varimax procedure to determine rotation angle q
u = np.power(fA,2) - np.power(fB,2);
v = 2.0* np.multiply(fA,fB);
A = 2.0*M*np.matmul(u.T,v); A=A[0,0];
B = np.sum(u)*np.sum(v);
top = A - B;
CC = M*(np.matmul(u.T,u)-np.matmul(v.T,v)); CC=CC[0,0];
DD = (np.sum(u)**2)-(np.sum(v)**2);
bot = CC - DD;
q = 0.25 * atan2(top,bot);
cq = cos(q);
sq = sin(q);
fAp = cq*fA + sq*fB;
fBp = -sq*fA + cq*fB;
# put back into factor matrix, FP
FP[i:i+1,0:M] = fAp.T;
FP[j:j+1,0:M] = fBp.T;
# accumulate rotation
mA = MR[i:i+1,0:M].T;
mB = MR[j:j+1,0:M].T;
mAp = cq*mA + sq*mB;
mBp = -sq*mA + cq*mB;
MR[i:i+1,0:M] = mAp.T;
MR[j:j+1,0:M] = mBp.T;
# new factor loadings
CP=np.matmul(C,MR.T);
# check that rotations correctly implemented
S0 = np.matmul(C,F);
SP = np.matmul(CP,FP);
e = np.max(np.abs(SP-S0));
print("max difference beween CP*FP and CPR*FPR: %e" % (e) );
print(" ");
# display first five factors
print("Rotated Factors" );
for j in range(5):
f1=FP[j:j+1,0:M].T;
print("factor %d" % (j) );
print("SiO2 %f" % (f1[0,0]) );
print("TiO2 %f" % (f1[1,0]) );
print("Al203 %f" % (f1[2,0]) );
print("FeO-total %f" % (f1[3,0]) );
print("MgO %f" % (f1[4,0]) );
print("CaO %f" % (f1[5,0]) );
print("Na2O %f" % (f1[6,0]) );
print("k2O %f" % (f1[7,0]) );
print(" ");
q = k[0,0]; r=q+1; a=np.abs(F[q:r,0:M]).T; ar=np.abs(FP[q:r,0:M]).T;
q = k[1,0]; r=q+1; b=np.abs(F[q:r,0:M]).T; br=np.abs(FP[q:r,0:M]).T;
q = k[2,0]; r=q+1; c=np.abs(F[q:r,0:M]).T; cr=np.abs(FP[q:r,0:M]).T;
q = k[3,0]; r=q+1; d=np.abs(F[q:r,0:M]).T; dr=np.abs(FP[q:r,0:M]).T;
gda_draw("title f1",a, "title f2",b, "title f3",c, "title f4",d, " ", "title f1r",ar, "title f2r",br, "title f3r",cr, "title f4r",dr);
gdapy13_05 max difference beween CP*FP and CPR*FPR: 2.131628e-14 Rotated Factors factor 0 SiO2 -0.908829 TiO2 -0.024638 Al203 -0.275168 FeO-total -0.177851 MgO -0.141341 CaO -0.209989 Na2O -0.044611 k2O -0.003430 factor 1 SiO2 -0.131621 TiO2 -0.070685 Al203 -0.014311 FeO-total -0.011084 MgO 0.984305 CaO -0.038976 Na2O -0.080453 k2O -0.022437 factor 2 SiO2 0.178651 TiO2 -0.077399 Al203 0.029953 FeO-total -0.979373 MgO 0.010557 CaO 0.014822 Na2O 0.016830 k2O 0.037555 factor 3 SiO2 0.182928 TiO2 0.195179 Al203 0.004503 FeO-total 0.012028 MgO 0.028210 CaO -0.913888 Na2O 0.297483 k2O 0.061593 factor 4 SiO2 0.288635 TiO2 -0.035952 Al203 -0.944592 FeO-total 0.024113 MgO 0.010209 CaO -0.002672 Na2O -0.149835 k2O 0.000397
In [22]:
# gdapy13_06
# factor analysis with simple shapes
# that allows these shapes to be classified
print("gdapy13_06");
# define samples. These are simple shapes
# that might represent cross-sections through
# a mountain range
N=15;
M=11;
S=np.zeros((N,M));
S[0:1,0:M] = gda_cvec(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0).T;
S[1:2,0:M] = gda_cvec(0, 2, 3, 4, 5, 6, 5, 4, 3, 1, 0).T;
S[2:3,0:M] = gda_cvec(0, 1, 2, 4, 5, 6, 5, 4, 2, 1, 0).T;
S[3:4,0:M] = gda_cvec(0, 1, 3, 5, 5, 4.5, 4, 3.5, 3, 2.5, 0).T;
S[4:5,0:M] = np.fliplr(S[3:4,0:M]);
S[5:6,0:M] = gda_cvec(0, 6, 6, 6, 5, 5, 5, 3.5, 2, 1, 0).T;
S[6:7,0:M] = np.fliplr(S[5:6,0:M]);
S[7:8,0:M] = gda_cvec(0, 0.25, 0.5, 1, 3, 6, 3, 1, 0.5, 0.25, 0).T;
S[8:9,0:M] = gda_cvec(0, 1, 2.5, 3, 4, 6, 4, 3, 2.5, 1, 0).T;
S[9:10,0:M] = gda_cvec(0, 1, 2, 4, 6, 4, 2, 1, 1, 0.5, 0).T;
S[10:11,0:M] = np.fliplr(S[9:10,0:M]);
S[11:12,0:M] = gda_cvec(0, 0, 0.5, 0, 1, 1, 2, 4, 6, 3, 0).T;
S[12:13,0:M] = np.fliplr(S[11:12,0:M]);
S[13:14,0:M] = gda_cvec(0, 0, 0.5, 1, 3, 6, 5.5, 5.5, 4.5, 3.5, 0).T;
S[14:15,0:M] = np.fliplr(S[13:14,0:M]);
# plot samples
fig1 = plt.figure(1,figsize=(15,5));
for i in range(N):
plt.subplot(5,3,i+1);
plt.axis( [0, M-1, 0, 7] );
plt.plot( np.linspace(0,M-1,M), S[i,0:M], "k-", lw=2 );
plt.plot( np.linspace(0,M-1,M), S[i,0:M], "k.", lw=2 );
plt.xlabel("i");
plt.ylabel("s(%d)" % (i));
plt.show();
print("Caption: Mountain profiles 1 through 15.");
print(" ");
# factor analysis
U, lam, VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
Ns,i = np.shape(lam);
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));
print("First 3 singular values: %f %f %f" % (lam[0,0], lam[1,0], lam[2,0]) );
# plot first 3 factors. SIgn flip is just for aesthetic plot
fig2 = plt.figure(2,figsize=(15,5));
for i in range(3):
plt.subplot(1,3,i+1);
plt.axis( [0, 10, -1, 1] );
plt.plot( np.linspace(0,M-1,M), -F[i,0:M], "k-", 'LineWidth', 2 );
plt.plot( np.linspace(0,M-1,M), -F[i,0:M], "k.", 'LineWidth', 2 );
plt.plot( np.linspace(0,M-1,M), np.zeros((M,)), "k:", 'LineWidth', 2 );
plt.xlabel("i");
plt.ylabel("F{%d}" % (i) );
plt.show();
print("Caption: First three EOFs (factors).");
# plot of the profiles arranged by loadings 2 and 3
fig3 = plt.figure(3,figsize=(15,5));
ax1=plt.subplot(1,1,1);
plt.axis( [-8, 8, -8, 8] );
plt.plot( gda_cvec(-8, 8), gda_cvec(0, 0), "k:", lw=2);
plt.plot( gda_cvec(0, 0), gda_cvec(-8, 8), 'k:', lw=2);
for i in range(N):
# loadings
c2 = C[i,1];
c3 = C[i,2];
# demean the sample and rescale
scale1 = 0.4;
x = gda_cvec(np.linspace(0,M-1,M));
s = S[i:i+1,0:M].T;
s = s - np.mean(s);
s = s*scale1;
smax = np.max(s);
ismax = np.argmax(s);
xmax = x[ismax,0];
xbar = np.mean(x);
x = x-xbar;
xmax = xmax-xbar;
x = x*scale1;
xmax = xmax*scale1;
# offset by scaled version of loadings
x = x+c2;
s = s+c3;
plt.fill( x, s, facecolor=[0, 0.5, 1.0,0.5] );
plt.plot( x, s, "k-", lw=2);
plt.plot( x, s, "k.", lw=2);
plt.plot( gda_cvec( x[0,0], x[M-1,0]), gda_cvec( s[0,0], s[M-1,0]), "k-", lw=2 );
plt.text( c2+xmax, c3, ("%d" % (i)) );
plt.xlabel("Factor loading, C(i,2)");
plt.ylabel("Factor loading, C(i,3}");
plt.show();
print("Caption: Mountsin profiles (blue) arranged according to loadings of");
print("EOFs (factors) 2 and 3");
gdapy13_06
Caption: Mountain profiles 1 through 15. First 3 singular values: 38.356675 12.688680 7.490978
Caption: First three EOFs (factors).
Caption: Mountsin profiles (blue) arranged according to loadings of EOFs (factors) 2 and 3
In [23]:
# gdapy13_07
# Synthetic EOF example using 2D images
# that are a mis of spatial patterns with
# time varying amplitudes
print("gdapy13_07");
# time-axis
Nt=25;
dt=1.0;
t = gda_cvec(dt*np.linspace(0,Nt-1,Nt));
tmax = t[Nt-1,0];
# space Nx by Nx grid
Nx=20;
Nx2=Nx*Nx;
dx=1.0;
L = dx*(Nx-1);
x = gda_cvec(dx*np.linspace(0,Nx-1,Nx));
xmin = x[0,0];
xmax=x[Nx-1,0];
# cook up data, Nt images, each Nx by Nx
# Each image contains three spatial modes
# (patterns) of variation
# define the modes
m0 = np.zeros((Nx,Nx));
m1 = np.zeros((Nx,Nx));
m2 = np.zeros((Nx,Nx));
mn = 0.1 * np.random.normal(loc=0.0,scale=1,size=(Nx,Nx));
for p in range(Nx):
for q in range(Nx):
m0[p,q] = sin(pi*x[p,0]/L)*sin(pi*x[q,0]/L);
m1[p,q] = sin(pi*x[p,0]/L)*sin(2*pi*x[q,0]/L);
m2[p,q] = sin(2*pi*x[p,0]/L)*sin(3*pi*x[q,0]/L);
# build data with modes with these amplitudes at different times
# These loadings were hand-tuned to look aesthetic
c0 = gda_cvec(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1 );
c1 = gda_cvec(1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5 );
c2 = gda_cvec(0, 0, 0, 1, 2, 3, 2, 1, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 1, 2, 3, 2, 1 );
# data is sum of modes with prescribed coefficients, plus random noise
fig1 = plt.figure(1,figsize=(12,15));
D = np.zeros( (Nx, Nx, Nt) );
for pp in range(Nt):
D[0:Nx,0:Nx,pp] = c0[pp,0]*m0 + c1[pp,0]*m1 + c2[pp,0]*m2 + 0.7*np.random.normal(loc=0,scale=1,size=(Nx,Nx));
plt.subplot(5,5,pp+1);
mycmap=matplotlib.colormaps['jet'];
plt.axis( [0, 20, 0, 20] );
left=xmin;
right=xmax;
bottom=xmin;
top=xmax;
dmax = 5.0;
plt.imshow( D[0:Nx,0:Nx,pp], cmap=mycmap, vmin=-dmax, vmax=dmax, extent=(left,right,bottom,top) );
plt.title("%d" % (pp));
plt.show();
print("Caption: Set of 25 sequential images (time shown above eacch image).");
# rearrange Nx by Nx image Nx*Nx as row vector
N = Nt;
M = Nx*Nx;
S=np.zeros((N,M));
for r in range(Nt):
for pp in range(Nx):
for qq in range(Nx):
s = Nx*pp+qq;
S[r,s] = D[pp,qq,r];
# test of code to rebuild Nx by Nx image froom Nx*Nx row vector
CHECK=0;
if( CHECK ):
D2 = np.zeros( (Nx, Nx, Nt) );
for r in range(Nt):
for s in range(Nx2):
pp = int(floor(s/Nx));
qq = s - Nx*pp;
D2[pp,qq,r] = S[r,s];
e=np.max(np.abs(D-D2));
print("Error check, error in folding %e" % (e) );
# basic SVD analysis
U,lam,VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));
S2 = np.matmul(C,F);
if( CHECK ):
e=np.max(np.abs(S-S2));
print("Error check, error in factorization %e" % (e) );
# select largest eigenvalues, reconstruct data from them
p=3;
print("p: %d" % (p) );
print(" ");
Up=U[0:N,0:p];
lamp=lam[0:p,0:1];
Cp = np.matmul(Up,np.diag(lamp.ravel()));
Fp = VT[0:p,0:M];
Sp = np.matmul(Cp,Fp);
if( CHECK ):
e=np.max(np.abs(S2-Sp));
print("Error check, error in rection to p factors %e" % (e) );
# plot singular-values
fig2 = plt.figure(2,figsize=(12,6));
plt.subplot(1,1,1);
w = gda_cvec(np.linspace(1,N,N));
plt.plot(w, lam, "k-", lw=3);
plt.plot(w, lam, "ko", lw=3);
plt.plot(w[0:3,0:1], lam[0:3,0:1], "ro", lw=3);
plt.xlabel("i");
plt.ylabel("L(i)");
plt.show();
print("Caption: Singular-values.");
F3 = np.zeros( (Nx, Nx, p) );
for r in range(p):
for s in range(Nx2):
pp = int(floor(s/Nx));
qq = s - Nx*pp;
F3[pp,qq,r] = Fp[r,s];
# plot EOF's
fig3 = plt.figure(3,figsize=(12,15));
for pp in range(p):
plt.subplot(1,p,pp+1);
mycmap=matplotlib.colormaps['jet'];
plt.axis( [0, 20, 0, 20] );
left=xmin;
right=xmax;
bottom=xmin;
top=xmax;
dmax = np.max(np.abs(F3[0:Nx,0:Nx,pp]));
plt.imshow( F3[0:Nx,0:Nx,pp], cmap=mycmap, vmin=-dmax, vmax=dmax, extent=(left,right,bottom,top) );
plt.title("%d" %p);
plt.show();
print("Caption: First 3 EOF's.");
# plot Loadings
fig4 = plt.figure(4,figsize=(12,15));
for pp in range(p):
cpp = Cp[0:N,pp:pp+1];
dmax=np.max(np.abs(cpp));
plt.subplot(p,1,pp+1);
plt.axis( [0, tmax, -dmax, dmax] );
plt.plot( t, cpp, 'k-', lw=2);
plt.xlabel("time t");
plt.ylabel("C(t,i)");
plt.title("%d" % (p) );
plt.show();
print("Caption: First 3 factor loadings.");
D3 = np.zeros( (Nx, Nx, Nt) );
for r in range(Nt):
for s in range(Nx2):
pp = int(floor(s/Nx));
qq = s - Nx*pp;
D3[pp,qq,r] = Sp[r,s];
# plot reconstructed data
fig5 = plt.figure(5,figsize=(12,15));
for p in range(Nt):
plt.subplot(5,5,p+1);
mycmap=matplotlib.colormaps['jet'];
plt.axis( [0, 20, 0, 20] );
left=xmin;
right=xmax;
bottom=xmin;
top=xmax;
dmax = 5.0;
plt.imshow( D3[0:Nx,0:Nx,p], cmap=mycmap, vmin=-dmax, vmax=dmax, extent=(left,right,bottom,top) );
plt.title("%d" %p);
plt.show();
print("Caption: Set of 25 sequential reconstructed images, using only p factors (time shown above each image).");
gdapy13_07
Caption: Set of 25 sequential images (time shown above eacch image). p: 3
Caption: Singular-values.
Caption: First 3 EOF's.
Caption: First 3 factor loadings.
Caption: Set of 25 sequential reconstructed images, using only p factors (time shown above each image).
In [ ]: