# gdapy13_00 clears all variables and import various modules
# you must run this cell first, before running any of others

# note that these examples use inverse theory techniques that
# are only covered in later chapters of the book

# History:
# 2023/04/29 - Created by W. Menke, from MATLAB version
# 2024/05/23 -  Patches to accomodate new verions of numpy, scipy, matplotlib
#               patched dot product to return scalar value, and
#               similar issues associated with new disequivalence of scalar and 1x1 array

# A reset deletes any previously-created variables from memory
%reset -f

# import various packages

import numpy as np                    # matrices & etc
from matplotlib import pyplot as plt  # general plotting 
from matplotlib import patches        # plotting shapes
from datetime import date             # date and time arithmetic
from math import exp, pi, sin, cos, tan, atan2, sqrt, floor, log10, nan   # math functions
import scipy.linalg as la             # linear algebra functions
import os                             # operating system
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
import scipy.signal as sg
import scipy.stats as st
import scipy.sparse.linalg as las
from scipy import sparse
from scipy import optimize as opt
import matplotlib

# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def gda_cvec(*argv):
    t = int;
    Nt = 0;
    for a in argv:
        v = gda_cvec1(a);
        N,M = np.shape(v);
        Nt = Nt + N;
        if( N==0 ):
            continue; # skip vector of zero length
        if (t==int) and isinstance(v[0,0],float):
            t=float;
        elif isinstance(v[0,0],complex):
            t=complex;
    w = np.zeros((Nt,1),dtype=t);
    Nt = 0;
    for a in argv:
        v = gda_cvec1(a);
        N,M = np.shape(v);
        w[Nt:Nt+N,0:1] = v;  # patch 20230418 was #w[Nt:Nt+N,0] = v[0:N,0];
        Nt = Nt + N;
    return w;

# function to make a numpy N-by-1 column vector
# c=gda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=gda_cvec(v1,v2,...) concatenates
# many argiments.
def gda_cvec1(v):
    if isinstance(v, int) or isinstance(v, np.int32):
        w = np.zeros((1,1),dtype=int);
        w[0,0] = v;
        return w;
    elif isinstance(v, float):
        w = np.zeros((1,1),dtype=float);
        w[0,0] = v;
        return w;
    elif isinstance(v, complex):
        w = np.zeros((1,1),dtype=complex);
        w[0,0] = v;
        return w;
    elif isinstance(v, np.ndarray):
        s = np.shape(v);
        if len(s) == 1:
            return np.copy(np.reshape(v,(s[0],1)));
        else:
            [r,c]=s;
            if( c==1 ):
                return(np.copy(v));
            elif(r==1):
                return(np.copy(v.T));
            else:
                raise TypeError("gda_cvec: %d by %d ndarray not allowed" % (r, c));
    elif isinstance(v, list):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("gda_cvec: list contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = np.array(v); # patch v -> np.array(v)
        return w;
    elif isinstance(v, tuple):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("gda_cvec: tuple contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = np.array(list(v)); # patch v -> np.array(list(v));
        return w;
    else:
        raise TypeError("gda_cvec: %s not supported" % type(v));

        
# gda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def gda_draw(*argv):
    DOCOLOR=True;
    if( DOCOLOR ):
        bwcmap = matplotlib.colormaps['jet'];
    else:
        bw = np.zeros((256,4));
        v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
        bw[:,0] = v;
        bw[:,1] = v;
        bw[:,2] = v;
        bw[:,3] = np.ones(256);
        bwcmap = ListedColormap(bw);
    # size of plot
    W = 16;
    H = 4;
    fig1 = plt.figure(1);
    # figsize width and height in inches
    fig1.set_size_inches(W,H);
    ax1 = plt.subplot(1,1,1);
    plt.axis([0, W, -H/2, H/2]);
    plt.axis('off');
    LM = W/6;    # matrix width and heoght
    LV = W/40;   # vector width
    FS = 0.12;    # character width
    TO = 0.4;    # title vertical offset
    SP = 0.2;    # space between objects
    LS = 0.2;    # leading space
    p = LS; # starting x-position
    istitle=0; # flags presence of a title
    for a in argv:
        if isinstance(a,np.ndarray):
            sh = np.shape(a);
            if len(sh) == 1:  # conversion to nx1 array
                n = sh[0];
                m = 1;
                ap = a;
                a = np.zeros((n,1));
                a[:,0] = ap;
            else:
                n = sh[0];
                m = sh[1];
            if m==1:
                pold=p;
                left=p;
                right=p+LV;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LV;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
            else:
                pold=p;
                left=p;
                right=p+LM;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LM;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
        elif isinstance(a,str):
            ns = len(a);
            istitle=0;
            if( ns>=6 ):
                if 'title ' in a[0:6]:
                    istitle=1;
                    titlestr=a[6:];
            if( istitle != 1):
                plt.text(p,0,a);
                p = p + ns*FS + SP;
    plt.show();
    return 1;

# bandpass filter, used in seismological example, but hand
# in a variety of settings involving time series
def gda_chebyshevfilt(d, Dt, flow, fhigh):
    # (dout,u,v)=gda_chebyshevfilt(d, Dt, flow, fhigh);
    # chebyshev IIR bandpass filter
    # d - input array of data
    # Dt - sampling interval
    # flow - low pass frequency, Hz
    # fhigh - high pass frequency, Hz
    # dout - output array of data
    # u - the numerator filter
    # v - the denominator filter
    # these filters can be used again using dout=filter(u,v,din);

    # make sure input timeseries is a column vector
    s = np.shape(d);
    N = s[0];
    if(N==1):
        dd = np.zeros((N,1));
        dd[:,0] = d;
    else:
        dd=d;
        
    # sampling rate
    rate=1/Dt;

    # ripple parameter, set to ten percent
    ripple=0.1;  

    # normalise frequency
    fl=2.0*flow/rate;
    fh=2.0*fhigh/rate;

    # center frequency 
    cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );

    # bandwidth
    bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );

    # ripple parameter factor
    rpf = sqrt((sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple));
    a = 0.5*(rpf-1.0/rpf);
    b = 0.5*(rpf+1.0/rpf);

    u=np.zeros((5,1));
    v=np.zeros((5,1));
    theta = 3*pi/4;
    sr = a * cos(theta);
    si = b * sin(theta);
    es = sqrt(sr*sr+si*si);
    tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
    v[0,0] = 1.0;
    v[1,0] = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
    v[2,0] = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
    v[3,0] = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
    v[4,0] = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
    tmp = 4.0*es*es*bw*bw/tmp;
    u[0,0] = tmp;
    u[1,0] = 0.0;
    u[2,0] = -2.0*tmp;
    u[3,0] = 0.0;
    u[4,0] = tmp;

    dout = sg.lfilter(u.ravel(),v.ravel(),dd.ravel());
    return (gda_cvec(dout),gda_cvec(u),gda_cvec(v));

# function to perform the multiplication FT (F v)
def gdaFTFmul(v):
    # this function is used by bicongugate gradient to
    # solve the generalized least squares problem Fm=F.
    # Note that "F" must be called "gdaFsparse".
    global gdaFsparse;
    s = np.shape(v);
    if(len(s)==1):
        vv = np.zeros((s[0],1));
        vv[:,0] = v;
    else:
        vv=v;
    # weirdly, "*" multiplies sparse matrices just fine
    temp = gdaFsparse*vv;
    return gdaFsparse.transpose()*temp;

def gdaarrow(ax,rbar,mys,myl):
    # pretty crazy way to draw an arrowhead!
    rbarsq = np.matmul(rbar.T,rbar); rbarsq=rbarsq[0,0];
    tangent = rbar/sqrt(rbarsq);
    per1 = np.cross( tangent, gda_cvec(0.0, 0.0, 1.0), axis=0 );
    pre1sq=np.matmul(per1.T,per1); pre1sq=pre1sq[0,0];
    per1 = per1/sqrt(pre1sq);
    per2 = np.cross( tangent, per1, axis=0 );
    per2sq = np.matmul(per2.T,per2); per2sq=per2sq[0,0];
    per2 = per2/sqrt(per2sq);
    L = 0.05;
    px=gda_cvec(0.0,rbar[0,0]);
    py=gda_cvec(0.0,rbar[1,0]);
    pz=gda_cvec(0.0,rbar[2,0]);
    ax.plot3D( px.ravel(), py.ravel(), pz.ravel(), mys, lw=myl );
    v1 = rbar - L*tangent + 0.25*L*per1;
    v2 = rbar - L*tangent - 0.25*L*per1;
    v3 = rbar - L*tangent + 0.25*L*per2;
    v4 = rbar - L*tangent - 0.25*L*per2;
    x0=gda_cvec(rbar[0,0], v1[0,0]);
    y0=gda_cvec(rbar[1,0], v1[1,0]);
    z0=gda_cvec(rbar[2,0], v1[2,0]);
    ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
    x0=gda_cvec(rbar[0,0], v2[0,0]);
    y0=gda_cvec(rbar[1,0], v2[1,0]);
    z0=gda_cvec(rbar[2,0], v2[2,0]);
    ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
    x0=gda_cvec(rbar[0,0], v3[0,0]);
    y0=gda_cvec(rbar[1,0], v3[1,0]);
    z0=gda_cvec(rbar[2,0], v3[2,0]);
    ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
    x0=gda_cvec(rbar[0,0], v4[0,0]);
    y0=gda_cvec(rbar[1,0], v4[1,0]);
    z0=gda_cvec(rbar[2,0], v4[2,0]);
    ax.plot3D( x0.ravel(), y0.ravel(), z0.ravel(), mys, lw=myl );
    return 1;

def gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax):
    ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmin,zmax).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymin,ymax).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmin,xmax).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmax,zmin).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymax,ymin).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmin).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmin).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymin,ymin).ravel(), gda_cvec(zmax,zmin).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymax,ymin).ravel(), gda_cvec(zmax,zmax).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmin,xmin).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmax,zmin).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmax).ravel(), gda_cvec(ymax,ymin).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
    ax.plot3D( gda_cvec(xmax,xmin).ravel(), gda_cvec(ymax,ymax).ravel(), gda_cvec(zmin,zmin).ravel(), "k-", lw=2 );
    return 1;

# gdapy13_01
 
# mixing of endmembers, visualized as vectors in 3D space
 
print("gdapy13_01");

xmin = 0.0;
xmax = 1.0;
 
ymin = 0.0;
ymax = 1.0;
 
zmin = 0.0;
zmax = 1.0;

# setup for 3D figure
fig1 = plt.figure(1,figsize=(12,8));   # smallish figure
ax = fig1.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax.axes.set_xlim3d(left=-1.0, right=xmax);
ax.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax.axes.set_zlim3d(bottom=-1.0, top=zmax);
 
# 3D box
gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax);
    
# factors
# factor 1
f1 = gda_cvec(1.0, 0.2, 0.8);
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
 
# factor 2
f2 = gda_cvec(0.2, 1.0, 0.8);
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;

gdaarrow(ax,f1,"r-",3);
gdaarrow(ax,f2,"r-",3);

 
# samples
s1=0.8*f1+(1-0.8)*f2;
s2=0.6*f1+(1-0.6)*f2;
s3=0.4*f1+(1-0.4)*f2;
s4=0.2*f1+(1-0.2)*f2;

gdaarrow(ax,s1,"k-",2);
gdaarrow(ax,s2,"k-",2);
gdaarrow(ax,s3,"k-",2);
gdaarrow(ax,s4,"k-",2);
plt.show();
print("Caption: Samples (black arrows) are linear combinations of end-members (red arrows).");

gdapy13_01

Caption: Samples (black arrows) are linear combinations of end-members (red arrows).

# gdapy13_02
 
# mixing of endmembers, visualized as vectors in 3D space
 
print("gdapy13_02");

xmin = 0.0;
xmax = 1.0;
 
ymin = 0.0;
ymax = 1.0;
 
zmin = 0.0;
zmax = 1.0;

# setup for 3D figure
fig1 = plt.figure(1,figsize=(12,8));   # smallish figure
ax = fig1.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax.axes.set_xlim3d(left=-1.0, right=xmax);
ax.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax.axes.set_zlim3d(bottom=-1.0, top=zmax);
 
# 3D box
gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax);
    
# factors
# factor 1
f1 = gda_cvec(1.0, 0.2, 0.8);
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
 
# factor 2
f2 = gda_cvec(0.2, 1.0, 0.8);
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;

gdaarrow(ax,f1,"r-",3);
gdaarrow(ax,f2,"r-",3);

 
# samples
s1=0.8*f1+(1-0.8)*f2;
s2=0.6*f1+(1-0.6)*f2;
s3=0.4*f1+(1-0.4)*f2;
s4=0.2*f1+(1-0.2)*f2;

gdaarrow(ax,s1,"k-",2);
gdaarrow(ax,s2,"k-",2);
gdaarrow(ax,s3,"k-",2);
gdaarrow(ax,s4,"k-",2);

USESVD=1;
if( USESVD ):
    # use SVD.  The minus signs are just to get the vectors
    # in quadrants that look good in the plot
    S=np.zeros((4,3));
    S[0:1,0:3]=s1.T;
    S[1:2,0:3]=s2.T;
    S[2:3,0:3]=s3.T;
    S[3:4,0:3]=s4.T;
    U,lam,VT = la.svd(S,full_matrices=True);
    V = VT.T;
    v1 = -V[0:3,0:1];
    v2 = -V[0:3,1:2];
    v3 = -V[0:3,2:3];
else:
    # just use the mean vector, and two vectors perpendicular
    # to it, one of which is in the F1-F2 plane
    v1 = f1+f2;
    v1sq = np.matmul(v1.T,v1); v1sq=v1sq[0,0];
    norm = sqrt(v1sq);
    v1=v1/norm;
 
    v3 = gda_cvec(np.cross(f1.ravel(),f2.ravel()));
    v3sq = np.matmul(v3.T,v3); v3sq=v3sq[0,0];
    norm = sqrt(v3sq);
    v3=v3/norm;
 
    v2 = gda_cvec(np.cross(v1.ravel(),v3.ravel()));
    v2sq = np.matmul(v2.T,v2); v2sq=v2sq[0,0];
    norm = sqrt(v2sq);
    v2=v2/norm;

gdaarrow(ax,v1,"b-",2);
gdaarrow(ax,v2,"b-",2);
gdaarrow(ax,v3,"b-",2);
plt.show();
print("Caption: Samples (black arrows) are linear combinations of factors (red arrows)");
print("Two factors (solid blue) lie in the plane of the samples, one (blue dashed) is normal to it.");

gdapy13_02

Caption: Samples (black arrows) are linear combinations of factors (red arrows)
Two factors (solid blue) lie in the plane of the samples, one (blue dashed) is normal to it.

# gdapy13_03
 
# mixing of endmembers, visualized as vectors in 3D space
 
print("gdapy13_03");

xmin = 0.0;
xmax = 1.0;
 
ymin = 0.0;
ymax = 1.0;
 
zmin = 0.0;
zmax = 1.0;

# setup for 3D figure
fig1 = plt.figure(1,figsize=(12,8));   # smallish figure
ax = fig1.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax.axes.set_xlim3d(left=-1.0, right=xmax);
ax.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax.axes.set_zlim3d(bottom=-1.0, top=zmax);
 
# 3D box
gdabox(ax,xmin,xmax,ymin,ymax,zmin,zmax);
    
# factors for case 1
# factor 1
f1 = gda_cvec(1.0, 0.2, 0.8);
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
 
# factor 2
f2 = gda_cvec(0.2, 1.0, 0.8);
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;

gdaarrow(ax,f1,"r-",3);
gdaarrow(ax,f2,"r-",3);

# samples
s1=0.8*f1+(1-0.8)*f2;
s2=0.6*f1+(1-0.6)*f2;
s3=0.4*f1+(1-0.4)*f2;
s4=0.2*f1+(1-0.2)*f2;

gdaarrow(ax,s1,"k-",2);
gdaarrow(ax,s2,"k-",2);
gdaarrow(ax,s3,"k-",2);
gdaarrow(ax,s4,"k-",2);
plt.show();
print("Caption: Samples (black arrows) and factors (red arrows).");

# factors for Case 2
f1a = gda_cvec( 1.0, 0.2, 0.8 );
f1asq = np.matmul(f1a.T,f1a); f1asq=f1asq[0,0];
norm = 1.2*sqrt(f1asq);
f1a=f1a/norm;
 
f2a = gda_cvec(0.2, 1, 0.8);
f2asq = np.matmul(f2a.T,f2a); f2asq=f2asq[0,0];
norm = 0.8*sqrt(f2asq);
f2a=f2a/norm;
 
f1 = f1a+0.2*f2a;
f1sq = np.matmul(f1.T,f1); f1sq=f1sq[0,0];
norm = 1.2*sqrt(f1sq);
f1=f1/norm;
 
f2 = f2a + 0.2*f1a;
f2sq = np.matmul(f2.T,f2); f2sq=f2sq[0,0];
norm = 0.8*sqrt(f2sq);
f2=f2/norm;

# setup for 3D figure
fig2 = plt.figure(2,figsize=(12,8));   # smallish figure
ax2 = fig2.add_subplot(111, projection='3d');
plt.axis('off');
plt.grid(b=None);
ax2.axes.set_xlim3d(left=-1.0, right=xmax);
ax2.axes.set_ylim3d(bottom=-1.0, top=ymax);
ax2.axes.set_zlim3d(bottom=-1.0, top=zmax);
 
# 3D box
gdabox(ax2,xmin,xmax,ymin,ymax,zmin,zmax);
gdaarrow(ax2,s1,"k-",2);
gdaarrow(ax2,s2,"k-",2);
gdaarrow(ax2,s3,"k-",2);
gdaarrow(ax2,s4,"k-",2);

gdaarrow(ax2,f1,"r-",3);
gdaarrow(ax2,f2,"r-",3);
plt.show();
print("Caption: Samples (black arrows) and an alternative set of factors (red arrows).");

gdapy13_03

Caption: Samples (black arrows) and factors (red arrows).

Caption: Samples (black arrows) and an alternative set of factors (red arrows).

# gdapy13_04
# factor analysis on Atlantic Rocks dataset
# using singular value decomposition
 
print("gdapy13_04");

# load data
D = np.genfromtxt("../Data/rocks.txt", delimiter="\t");
N, M = np.shape(D);
sio2 = D[0:N,0:1];   # SiO2
tio2 = D[0:N,1:2];   # TiO2
als03 = D[0:N,2:3];  # Al203
feot = D[0:N,3:4];   # FeO-total
mgo = D[0:N,4:5];    # MgO
cao = D[0:N,5:6];    # CaO
na20 = D[0:N,6:7];   # Na2O
k20 = D[0:N,7:8];    # K2O

# compute factors and factor loadings using singular value decompostion
S=D;
U, lam, VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
Ns,i = np.shape(lam);
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));

# make shortened factor matrix with P factors
# and corresponding loading matrix
P=5;  
FP = np.copy(F[0:P,0:M]);
CP = np.copy(C[0:N,0:P]);

# plot singular values
fig1 = plt.figure(1,figsize=(12,5));
ax1=plt.subplot(1,1,1);
plt.axis( [ 1, Ns, 0, np.max(lam) ] );
w = gda_cvec(np.linspace(1,Ns,Ns));
plt.plot( w, lam, "k-", lw=2 );
plt.plot( w, lam, "ko", lw=2 );
plt.xlabel("index, i");
plt.ylabel("lambda(i)");
plt.show();
print("Caption: Singular-values.");
print("   ");

# display first five factors
for j in range(5):
    f1=F[j:j+1,0:M].T;
    print("factor %d" % (j) );
    print("SiO2      %f" % (f1[0,0]) );
    print("TiO2      %f" % (f1[1,0]) );
    print("Al203     %f" % (f1[2,0]) );
    print("FeO-total %f" % (f1[3,0]) );
    print("MgO       %f" % (f1[4,0]) );
    print("CaO       %f" % (f1[5,0]) );
    print("Na2O      %f" % (f1[6,0]) );
    print("k2O       %f" % (f1[7,0]) );
    print("  ");
    
# plot loadings of factors (f2,f3,f4) in 3D (c2,c3,c4) space
cmin=(-50.0);
cmax=50.0;

# setup for 3D figure
fig2 = plt.figure(2,figsize=(12,8));   # smallish figure
ax2 = fig2.add_subplot(111, projection='3d');
ax2.axes.set_xlim3d(left=cmin, right=cmax);
ax2.axes.set_ylim3d(bottom=cmin, top=cmax);
ax2.axes.set_zlim3d(bottom=cmin, top=cmax);
myelev=15.0;
myazim=125.0;
ax2.view_init(elev=myelev, azim=myazim);
ax2.set_xlabel("C1");
ax2.set_ylabel("C2");
ax2.set_zlabel("C3");
    
# 3D box
gdabox(ax2,cmin,cmax,cmin,cmax,cmin,cmax);

# plot loadings
ax2.plot3D( C[0:N,1], C[0:N,2], C[0:N,3], 'k.', lw=2 );

plt.show();
print("Caption: Scatter plot of the loadings of factors 2, 3, and 4.");

gdapy13_04

Caption: Singular-values.
   
factor 0
SiO2      -0.908829
TiO2      -0.024638
Al203     -0.275168
FeO-total -0.177851
MgO       -0.141341
CaO       -0.209989
Na2O      -0.044611
k2O       -0.003430
  
factor 1
SiO2      0.007684
TiO2      -0.037474
Al203     -0.301583
FeO-total -0.018421
MgO       0.923193
CaO       -0.226917
Na2O      -0.058457
k2O       -0.007204
  
factor 2
SiO2      -0.161689
TiO2      -0.126343
Al203     0.567828
FeO-total -0.659205
MgO       0.255748
CaO       0.365682
Na2O      -0.041738
k2O       -0.006464
  
factor 3
SiO2      0.209819
TiO2      0.151367
Al203     0.176021
FeO-total -0.427461
MgO       -0.118643
CaO       -0.780043
Na2O      0.302367
k2O       0.073403
  
factor 4
SiO2      0.309495
TiO2      -0.100476
Al203     -0.670083
FeO-total -0.585155
MgO       -0.195193
CaO       0.207980
Na2O      -0.145318
k2O       0.015035

Caption: Scatter plot of the loadings of factors 2, 3, and 4.

# gdapy13_05
# factor analysis on Atlantic Rocks dataset
# using singular value decomposition
# and the varimax procedure

print("gdapy13_05");

# load data
D = np.genfromtxt("../Data/rocks.txt", delimiter="\t");
N, M = np.shape(D);
sio2 = D[0:N,0:1];   # SiO2
tio2 = D[0:N,1:2];   # TiO2
als03 = D[0:N,2:3];  # Al203
feot = D[0:N,3:4];   # FeO-total
mgo = D[0:N,4:5];    # MgO
cao = D[0:N,5:6];    # CaO
na20 = D[0:N,6:7];   # Na2O
k20 = D[0:N,7:8];    # K2O

# compute factors and factor loadings using singular value decompostion
S=D;
U, lam, VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
Ns,i = np.shape(lam);
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));

# keep only first five singular values
P=5;
F = F[0:P,0:M];
C = C[0:N,0:P];
 
# initial rotated factor matrix, FP, abd rotation matrix, MR
MR=np.identity(P);
FP=np.copy(F);

# spike these factors using the varimax procedure
k = gda_cvec(1, 2, 3, 4);
Nk, i = np.shape(k);

# varimax is an iterative procedure, but converges very rapidly,
# so do only 3 iterations.  Within each iteration, one applies
# the procedure to every pair of factors.
for iter in range(3): # iterations
    for ii in range(Nk):  # pairs of factors
        for jj in range(ii+1,Nk):
    
            # spike factors i and j
            i=k[ii,0];
            j=k[jj,0];
            
            # copy factors from matrix to vectors
            fA = FP[i:i+1,0:M].T;
            fB = FP[j:j+1,0:M].T;

            # standard varimax procedure to determine rotation angle q
            u = np.power(fA,2) - np.power(fB,2);
            v = 2.0* np.multiply(fA,fB);

            A = 2.0*M*np.matmul(u.T,v); A=A[0,0];
            B = np.sum(u)*np.sum(v);
            top = A - B;

            CC = M*(np.matmul(u.T,u)-np.matmul(v.T,v)); CC=CC[0,0];
            DD = (np.sum(u)**2)-(np.sum(v)**2);
            bot =  CC - DD;

            q = 0.25 * atan2(top,bot);

            cq = cos(q);
            sq = sin(q);

            fAp =  cq*fA + sq*fB;
            fBp = -sq*fA + cq*fB;

            # put back into factor matrix, FP
            FP[i:i+1,0:M] = fAp.T;
            FP[j:j+1,0:M] = fBp.T;

            # accumulate rotation
            mA = MR[i:i+1,0:M].T;
            mB = MR[j:j+1,0:M].T;
            mAp =  cq*mA + sq*mB;
            mBp = -sq*mA + cq*mB;
            MR[i:i+1,0:M] = mAp.T;
            MR[j:j+1,0:M] = mBp.T;

# new factor loadings
CP=np.matmul(C,MR.T);

# check that rotations correctly implemented
S0 = np.matmul(C,F);
SP = np.matmul(CP,FP);
e = np.max(np.abs(SP-S0));
print("max difference beween CP*FP and CPR*FPR: %e" % (e) );
print(" ");
            
# display first five factors
print("Rotated Factors" );
for j in range(5):
    f1=FP[j:j+1,0:M].T;
    print("factor %d" % (j) );
    print("SiO2      %f" % (f1[0,0]) );
    print("TiO2      %f" % (f1[1,0]) );
    print("Al203     %f" % (f1[2,0]) );
    print("FeO-total %f" % (f1[3,0]) );
    print("MgO       %f" % (f1[4,0]) );
    print("CaO       %f" % (f1[5,0]) );
    print("Na2O      %f" % (f1[6,0]) );
    print("k2O       %f" % (f1[7,0]) );
    print("  ");
    
q = k[0,0]; r=q+1; a=np.abs(F[q:r,0:M]).T; ar=np.abs(FP[q:r,0:M]).T;
q = k[1,0]; r=q+1; b=np.abs(F[q:r,0:M]).T; br=np.abs(FP[q:r,0:M]).T;
q = k[2,0]; r=q+1; c=np.abs(F[q:r,0:M]).T; cr=np.abs(FP[q:r,0:M]).T;
q = k[3,0]; r=q+1; d=np.abs(F[q:r,0:M]).T; dr=np.abs(FP[q:r,0:M]).T;
gda_draw("title f1",a, "title f2",b, "title f3",c, "title f4",d, "    ", "title f1r",ar, "title f2r",br, "title f3r",cr, "title f4r",dr);

gdapy13_05
max difference beween CP*FP and CPR*FPR: 2.131628e-14
 
Rotated Factors
factor 0
SiO2      -0.908829
TiO2      -0.024638
Al203     -0.275168
FeO-total -0.177851
MgO       -0.141341
CaO       -0.209989
Na2O      -0.044611
k2O       -0.003430
  
factor 1
SiO2      -0.131621
TiO2      -0.070685
Al203     -0.014311
FeO-total -0.011084
MgO       0.984305
CaO       -0.038976
Na2O      -0.080453
k2O       -0.022437
  
factor 2
SiO2      0.178651
TiO2      -0.077399
Al203     0.029953
FeO-total -0.979373
MgO       0.010557
CaO       0.014822
Na2O      0.016830
k2O       0.037555
  
factor 3
SiO2      0.182928
TiO2      0.195179
Al203     0.004503
FeO-total 0.012028
MgO       0.028210
CaO       -0.913888
Na2O      0.297483
k2O       0.061593
  
factor 4
SiO2      0.288635
TiO2      -0.035952
Al203     -0.944592
FeO-total 0.024113
MgO       0.010209
CaO       -0.002672
Na2O      -0.149835
k2O       0.000397

# gdapy13_06
 
# factor analysis with simple shapes
# that allows these shapes to be classified
 
print("gdapy13_06");

# define samples.  These are simple shapes
# that might represent cross-sections through
# a mountain range
N=15;
M=11;
S=np.zeros((N,M));
S[0:1,0:M] = gda_cvec(0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0).T;
S[1:2,0:M] = gda_cvec(0, 2, 3, 4, 5, 6, 5, 4, 3, 1, 0).T;
S[2:3,0:M] = gda_cvec(0, 1, 2, 4, 5, 6, 5, 4, 2, 1, 0).T;
S[3:4,0:M] = gda_cvec(0, 1, 3, 5, 5, 4.5, 4, 3.5, 3, 2.5, 0).T;
S[4:5,0:M] = np.fliplr(S[3:4,0:M]);
S[5:6,0:M] = gda_cvec(0, 6, 6, 6, 5, 5, 5, 3.5, 2, 1, 0).T;
S[6:7,0:M] = np.fliplr(S[5:6,0:M]);
S[7:8,0:M] = gda_cvec(0, 0.25, 0.5, 1, 3, 6, 3, 1, 0.5, 0.25, 0).T;
S[8:9,0:M] = gda_cvec(0, 1, 2.5, 3, 4, 6, 4, 3, 2.5, 1, 0).T;
S[9:10,0:M] = gda_cvec(0, 1, 2, 4, 6, 4, 2, 1, 1, 0.5, 0).T;
S[10:11,0:M] = np.fliplr(S[9:10,0:M]);
S[11:12,0:M] = gda_cvec(0, 0, 0.5, 0, 1, 1, 2, 4, 6, 3, 0).T;
S[12:13,0:M] = np.fliplr(S[11:12,0:M]);
S[13:14,0:M] = gda_cvec(0, 0, 0.5, 1, 3, 6, 5.5, 5.5, 4.5, 3.5, 0).T;
S[14:15,0:M] = np.fliplr(S[13:14,0:M]);

# plot samples
fig1 = plt.figure(1,figsize=(15,5));
for i in range(N):
    plt.subplot(5,3,i+1);
    plt.axis( [0, M-1, 0, 7] );
    plt.plot( np.linspace(0,M-1,M), S[i,0:M], "k-", lw=2 );
    plt.plot( np.linspace(0,M-1,M), S[i,0:M], "k.", lw=2 );
    plt.xlabel("i");
    plt.ylabel("s(%d)" % (i));
plt.show();
print("Caption: Mountain profiles 1 through 15.");
print("  ");

# factor analysis
U, lam, VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
Ns,i = np.shape(lam);
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));

print("First 3 singular values: %f %f %f" % (lam[0,0], lam[1,0], lam[2,0]) );

# plot first 3 factors.  SIgn flip is just for aesthetic plot
fig2 = plt.figure(2,figsize=(15,5));
 
for i in range(3):
    plt.subplot(1,3,i+1);
    plt.axis( [0, 10, -1, 1] );
    plt.plot( np.linspace(0,M-1,M), -F[i,0:M], "k-", 'LineWidth', 2 );
    plt.plot( np.linspace(0,M-1,M), -F[i,0:M], "k.", 'LineWidth', 2 );
    plt.plot( np.linspace(0,M-1,M), np.zeros((M,)), "k:", 'LineWidth', 2 );
    plt.xlabel("i");
    plt.ylabel("F{%d}" % (i) );
plt.show();
print("Caption: First three EOFs (factors).");


# plot of the profiles arranged by loadings 2 and 3
fig3 = plt.figure(3,figsize=(15,5));
ax1=plt.subplot(1,1,1);
plt.axis( [-8, 8, -8, 8] );
plt.plot( gda_cvec(-8, 8), gda_cvec(0, 0), "k:", lw=2);
plt.plot( gda_cvec(0, 0), gda_cvec(-8, 8), 'k:', lw=2);
 
for i in range(N):
    # loadings
    c2 = C[i,1];
    c3 = C[i,2];
    
    # demean the sample and rescale
    scale1 = 0.4;
    x = gda_cvec(np.linspace(0,M-1,M));
    s = S[i:i+1,0:M].T;
    s = s - np.mean(s);
    s = s*scale1;
    smax = np.max(s);
    ismax = np.argmax(s);
    xmax = x[ismax,0];
    xbar = np.mean(x);
    x = x-xbar;
    xmax = xmax-xbar;
    x = x*scale1;
    xmax = xmax*scale1;
 
    # offset by scaled version of loadings
    x = x+c2;
    s = s+c3;
    plt.fill( x, s, facecolor=[0, 0.5, 1.0,0.5] );
    plt.plot( x, s, "k-", lw=2);
    plt.plot( x, s, "k.", lw=2);
    plt.plot( gda_cvec( x[0,0], x[M-1,0]), gda_cvec( s[0,0], s[M-1,0]), "k-", lw=2 );
    
    plt.text( c2+xmax, c3, ("%d" % (i)) );

plt.xlabel("Factor loading, C(i,2)");
plt.ylabel("Factor loading, C(i,3}");
plt.show();

print("Caption: Mountsin profiles (blue) arranged according to loadings of");
print("EOFs (factors) 2 and 3");

gdapy13_06

Caption: Mountain profiles 1 through 15.
  
First 3 singular values: 38.356675 12.688680 7.490978

Caption: First three EOFs (factors).

Caption: Mountsin profiles (blue) arranged according to loadings of
EOFs (factors) 2 and 3

# gdapy13_07
 
# Synthetic EOF example using 2D images
# that are a mis of spatial patterns with
# time varying amplitudes
 
print("gdapy13_07");

# time-axis
Nt=25;
dt=1.0;
t = gda_cvec(dt*np.linspace(0,Nt-1,Nt));
tmax = t[Nt-1,0];
 
# space Nx by Nx grid
Nx=20;
Nx2=Nx*Nx;
dx=1.0;
L = dx*(Nx-1);
x = gda_cvec(dx*np.linspace(0,Nx-1,Nx));
xmin = x[0,0];
xmax=x[Nx-1,0];
 
# cook up data, Nt images, each Nx by Nx
# Each image contains three spatial modes
# (patterns) of variation
 
# define the modes
m0 = np.zeros((Nx,Nx));
m1 = np.zeros((Nx,Nx));
m2 = np.zeros((Nx,Nx));
mn = 0.1 * np.random.normal(loc=0.0,scale=1,size=(Nx,Nx));
for p in range(Nx):
    for q in range(Nx):
        m0[p,q] = sin(pi*x[p,0]/L)*sin(pi*x[q,0]/L);
        m1[p,q] = sin(pi*x[p,0]/L)*sin(2*pi*x[q,0]/L);
        m2[p,q] = sin(2*pi*x[p,0]/L)*sin(3*pi*x[q,0]/L);

# build data with modes with these amplitudes at different times
# These loadings were hand-tuned to look aesthetic
c0 = gda_cvec(1, -1, 1, -1, 1,   -1, 1, -1, 1, -1,     1, -1, 1, -1, 1,     -1, 1, -1, 1, -1,    1, -1, 1, -1, 1 );
c1 = gda_cvec(1, 2, 3, 4, 5,     5, 4, 3, 2, 1,        1, 2, 3, 4, 5,        5, 4, 3, 2, 1,      1, 2, 3, 4, 5 );
c2 = gda_cvec(0, 0, 0, 1, 2,     3, 2, 1, 0, 0,        0, 0, 1, 2, 1,        0, 0, 0, 0, 0,      1, 2, 3, 2, 1 );

# data is sum of modes with prescribed coefficients, plus random noise
fig1 = plt.figure(1,figsize=(12,15));
D = np.zeros( (Nx, Nx, Nt) );
for pp in range(Nt):
    D[0:Nx,0:Nx,pp] = c0[pp,0]*m0 + c1[pp,0]*m1 + c2[pp,0]*m2 + 0.7*np.random.normal(loc=0,scale=1,size=(Nx,Nx));
    plt.subplot(5,5,pp+1);
    mycmap=matplotlib.colormaps['jet'];
    plt.axis( [0, 20, 0, 20] );
    left=xmin;
    right=xmax;
    bottom=xmin;
    top=xmax;
    dmax = 5.0;
    plt.imshow( D[0:Nx,0:Nx,pp], cmap=mycmap, vmin=-dmax, vmax=dmax, extent=(left,right,bottom,top) );
    plt.title("%d" % (pp));
plt.show();
print("Caption: Set of 25 sequential images (time shown above eacch image).");

# rearrange Nx by Nx image Nx*Nx as row vector
N = Nt;
M = Nx*Nx;
S=np.zeros((N,M));
for r in range(Nt):
    for pp in range(Nx):
        for qq in range(Nx):
            s = Nx*pp+qq;
            S[r,s] = D[pp,qq,r];

# test of code to rebuild Nx by Nx image froom Nx*Nx row vector
CHECK=0;
if( CHECK ):
    D2 = np.zeros( (Nx, Nx, Nt) );
    for r in range(Nt):
        for s in range(Nx2):
            pp = int(floor(s/Nx));
            qq = s - Nx*pp;
            D2[pp,qq,r] = S[r,s];
    e=np.max(np.abs(D-D2));
    print("Error check, error in folding %e" % (e) );
    
# basic SVD analysis
U,lam,VT = la.svd(S,full_matrices=False);
lam = gda_cvec(lam);
V = VT.T;
F = VT;
C = np.matmul(U,np.diag(lam.ravel()));
S2 = np.matmul(C,F);
if( CHECK ):
    e=np.max(np.abs(S-S2));
    print("Error check, error in factorization %e" % (e) );
    
# select largest eigenvalues, reconstruct data from them
p=3;
print("p: %d" % (p) );
print("  ");
Up=U[0:N,0:p];
lamp=lam[0:p,0:1];
Cp = np.matmul(Up,np.diag(lamp.ravel()));
Fp = VT[0:p,0:M];
Sp = np.matmul(Cp,Fp);
if( CHECK ):
    e=np.max(np.abs(S2-Sp));
    print("Error check, error in rection to p factors %e" % (e) );

# plot singular-values
fig2 = plt.figure(2,figsize=(12,6));
plt.subplot(1,1,1);
w = gda_cvec(np.linspace(1,N,N));
plt.plot(w, lam, "k-", lw=3);
plt.plot(w, lam, "ko", lw=3);
plt.plot(w[0:3,0:1], lam[0:3,0:1], "ro", lw=3);
plt.xlabel("i");
plt.ylabel("L(i)");
plt.show();
print("Caption: Singular-values.");

F3 = np.zeros( (Nx, Nx, p) );
for r in range(p):
    for s in range(Nx2):
        pp = int(floor(s/Nx));
        qq = s - Nx*pp;
        F3[pp,qq,r] = Fp[r,s];

# plot EOF's
fig3 = plt.figure(3,figsize=(12,15));
for pp in range(p):
    plt.subplot(1,p,pp+1);
    mycmap=matplotlib.colormaps['jet'];
    plt.axis( [0, 20, 0, 20] );
    left=xmin;
    right=xmax;
    bottom=xmin;
    top=xmax;
    dmax = np.max(np.abs(F3[0:Nx,0:Nx,pp]));
    plt.imshow( F3[0:Nx,0:Nx,pp], cmap=mycmap, vmin=-dmax, vmax=dmax, extent=(left,right,bottom,top) );
    plt.title("%d" %p);
plt.show();
print("Caption: First 3 EOF's.");

# plot Loadings
fig4 = plt.figure(4,figsize=(12,15));
for pp in range(p):
    cpp = Cp[0:N,pp:pp+1];
    dmax=np.max(np.abs(cpp));
    plt.subplot(p,1,pp+1);
    plt.axis( [0, tmax, -dmax, dmax] );
    plt.plot( t, cpp, 'k-', lw=2);
    plt.xlabel("time t");
    plt.ylabel("C(t,i)");
    plt.title("%d" % (p) );
plt.show();
print("Caption: First 3 factor loadings.");

D3 = np.zeros( (Nx, Nx, Nt) );
for r in range(Nt):
    for s in range(Nx2):
        pp = int(floor(s/Nx));
        qq = s - Nx*pp;
        D3[pp,qq,r] = Sp[r,s];

# plot reconstructed data
fig5 = plt.figure(5,figsize=(12,15));
for p in range(Nt):
    plt.subplot(5,5,p+1);
    mycmap=matplotlib.colormaps['jet'];
    plt.axis( [0, 20, 0, 20] );
    left=xmin;
    right=xmax;
    bottom=xmin;
    top=xmax;
    dmax = 5.0;
    plt.imshow( D3[0:Nx,0:Nx,p], cmap=mycmap, vmin=-dmax, vmax=dmax, extent=(left,right,bottom,top) );
    plt.title("%d" %p);
plt.show();
print("Caption: Set of 25 sequential reconstructed images, using only p factors (time shown above each image).");

gdapy13_07

Caption: Set of 25 sequential images (time shown above eacch image).
p: 3

Caption: Singular-values.

Caption: First 3 EOF's.

Caption: First 3 factor loadings.

Caption: Set of 25 sequential reconstructed images, using only p factors (time shown above each image).