# gdapy08_00 clears all variables and import various modules
# you must run this cell first, before running any of others

# note that these examples use inverse theory techniques that
# are only covered in later chapters of the book

# History:
# 2023/04/26 - Created by W. Menke from Matlab version
# 2024/05/23 -  Patches to accomodate new verions of numpy, scipy, matplotlib
#               patched dot product to return scalar value

# A reset deletes any previously-created variables from memory
%reset -f

# import various packages

import numpy as np                    # matrices & etc
from matplotlib import pyplot as plt  # general plotting 
from matplotlib import patches        # plotting shapes
from datetime import date             # date and time arithmetic
from math import exp, pi, sin, cos, tan, sqrt, floor, log10, nan, log   # math functions
import scipy.linalg as la             # linear algebra functions
import os                             # operating system
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
from scipy.special import erf as scipyerf
import scipy.signal as sg
import scipy.stats as st
import scipy.sparse.linalg as las
import scipy.optimize as opt          # optimization functions such as linear programming
from scipy import sparse
import matplotlib

# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def gda_cvec(*argv):
    t = int;
    Nt = 0;
    for a in argv:
        v = gda_cvec1(a);
        N,M = np.shape(v);
        Nt = Nt + N;
        if( N==0 ):
            continue; # skip vector of zero length
        if (t==int) and isinstance(v[0,0],float):
            t=float;
        elif isinstance(v[0,0],complex):
            t=complex;
    w = np.zeros((Nt,1),dtype=t);
    Nt = 0;
    for a in argv:
        v = gda_cvec1(a);
        N,M = np.shape(v);
        w[Nt:Nt+N,0:1] = v;  # patch 20230418 was #w[Nt:Nt+N,0] = v[0:N,0];
        Nt = Nt + N;
    return w;

# function to make a numpy N-by-1 column vector
# c=gda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=gda_cvec(v1,v2,...) concatenates
# many argiments.
def gda_cvec1(v):
    if isinstance(v, int) or isinstance(v, np.int32):
        w = np.zeros((1,1),dtype=int);
        w[0,0] = v;
        return w;
    elif isinstance(v, float):
        w = np.zeros((1,1),dtype=float);
        w[0,0] = v;
        return w;
    elif isinstance(v, complex):
        w = np.zeros((1,1),dtype=complex);
        w[0,0] = v;
        return w;
    elif isinstance(v, np.ndarray):
        s = np.shape(v);
        if len(s) == 1:
            return np.copy(np.reshape(v,(s[0],1)));
        else:
            [r,c]=s;
            if( c==1 ):
                return(np.copy(v));
            elif(r==1):
                return(np.copy(v.T));
            else:
                raise TypeError("gda_cvec: %d by %d ndarray not allowed" % (r, c));
    elif isinstance(v, list):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("gda_cvec: list contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = np.array(v); # patch v -> np.array(v)
        return w;
    elif isinstance(v, tuple):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(vi, np.int32): #patch v->vi 20230418
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("gda_cvec: tuple contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = np.array(list(v)); # patch v -> np.array(list(v));
        return w;
    else:
        raise TypeError("gda_cvec: %s not supported" % type(v));

        
# gda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def gda_draw(*argv):
    DOCOLOR=True;
    if( DOCOLOR ):
        bwcmap = matplotlib.colormaps['jet'];
    else:
        bw = np.zeros((256,4));
        v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
        bw[:,0] = v;
        bw[:,1] = v;
        bw[:,2] = v;
        bw[:,3] = np.ones(256);
        bwcmap = ListedColormap(bw);
    # size of plot
    W = 16;
    H = 4;
    fig1 = plt.figure(1);
    # figsize width and height in inches
    fig1.set_size_inches(W,H);
    ax1 = plt.subplot(1,1,1);
    plt.axis([0, W, -H/2, H/2]);
    plt.axis('off');
    LM = W/6;    # matrix width and heoght
    LV = W/40;   # vector width
    FS = 0.12;    # character width
    TO = 0.4;    # title vertical offset
    SP = 0.2;    # space between objects
    LS = 0.2;    # leading space
    p = LS; # starting x-position
    istitle=0; # flags presence of a title
    for a in argv:
        if isinstance(a,np.ndarray):
            sh = np.shape(a);
            if len(sh) == 1:  # conversion to nx1 array
                n = sh[0];
                m = 1;
                ap = a;
                a = np.zeros((n,1));
                a[:,0] = ap;
            else:
                n = sh[0];
                m = sh[1];
            if m==1:
                pold=p;
                left=p;
                right=p+LV;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LV;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
            else:
                pold=p;
                left=p;
                right=p+LM;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LM;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
        elif isinstance(a,str):
            ns = len(a);
            istitle=0;
            if( ns>=6 ):
                if 'title ' in a[0:6]:
                    istitle=1;
                    titlestr=a[6:];
            if( istitle != 1):
                plt.text(p,0,a);
                p = p + ns*FS + SP;
    plt.show();
    return 1;

# bandpass filter, used in seismological example, but hand
# in a variety of settings involving time series
def gda_chebyshevfilt(d, Dt, flow, fhigh):
    # (dout,u,v)=gda_chebyshevfilt(d, Dt, flow, fhigh);
    # chebyshev IIR bandpass filter
    # d - input array of data
    # Dt - sampling interval
    # flow - low pass frequency, Hz
    # fhigh - high pass frequency, Hz
    # dout - output array of data
    # u - the numerator filter
    # v - the denominator filter
    # these filters can be used again using dout=filter(u,v,din);

    # make sure input timeseries is a column vector
    s = np.shape(d);
    N = s[0];
    if(N==1):
        dd = np.zeros((N,1));
        dd[:,0] = d;
    else:
        dd=d;
        
    # sampling rate
    rate=1/Dt;

    # ripple parameter, set to ten percent
    ripple=0.1;  

    # normalise frequency
    fl=2.0*flow/rate;
    fh=2.0*fhigh/rate;

    # center frequency 
    cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );

    # bandwidth
    bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );

    # ripple parameter factor
    rpf = sqrt((sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple));
    a = 0.5*(rpf-1.0/rpf);
    b = 0.5*(rpf+1.0/rpf);

    u=np.zeros((5,1));
    v=np.zeros((5,1));
    theta = 3*pi/4;
    sr = a * cos(theta);
    si = b * sin(theta);
    es = sqrt(sr*sr+si*si);
    tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
    v[0,0] = 1.0;
    v[1,0] = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
    v[2,0] = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
    v[3,0] = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
    v[4,0] = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
    tmp = 4.0*es*es*bw*bw/tmp;
    u[0,0] = tmp;
    u[1,0] = 0.0;
    u[2,0] = -2.0*tmp;
    u[3,0] = 0.0;
    u[4,0] = tmp;

    dout = sg.lfilter(u.ravel(),v.ravel(),dd.ravel());
    return (gda_cvec(dout),gda_cvec(u),gda_cvec(v));

# function to perform the multiplication FT (F v)
def gdaFTFmul(v):
    # this function is used by bicongugate gradient to
    # solve the generalized least squares problem Fm=F.
    # Note that "F" must be called "gdaFsparse".
    global gdaFsparse;
    s = np.shape(v);
    if(len(s)==1):
        vv = np.zeros((s[0],1));
        vv[:,0] = v;
    else:
        vv=v;
    # weirdly, "*" multiplies sparse matrices just fine
    temp = gdaFsparse*vv;
    return gdaFsparse.transpose()*temp;

# function to perform the multiplication FT (F v)
def gda_FTFmul(v):
    # this function is used by bicongugate gradient to
    # solve the generalized least squares problem Fm=F.
    # Note that "F" must be called "gdaFsparse".
    global gdaFsparse;
    s = np.shape(v);
    if(len(s)==1):
        vv = np.zeros((s[0],1));
        vv[:,0] = v;
    else:
        vv=v;
    # weirdly, "*" multiplies sparse matrices just fine
    temp = gdaFsparse*vv;
    return gdaFsparse.transpose()*temp;


# function to perform the multiplication FT f
def gda_FTFrhs(f):
    # this function is used by bicongugate gradient to
    # solve the generalized least squares problem Fm=F.
    # Note that "F" must be called "gdaFsparse".
    global gdaFsparse;
    return gdaFsparse.transpose()*f;

def gda_matrixmin(E):
    E1 = np.min(E,axis=0);
    k = np.argmin(E,axis=0);
    Emin = np.min(E1);
    ic = np.argmin(E1);
    ir = k[ic];
    return ir, ic, Emin;

# gdama08_01

# upper/lower bounds on localized average using linear programming
# data kernel G composed of arithmetic average of model parameters
 
print("gdapy08_01");

# data equation: mean of d
M=21; # must be odd
N=1;
mtrue = np.random.uniform(low=-1.0,high=1.0,size=(M,1));
mtrue = mtrue - np.mean(mtrue);  # remove mean
G=np.ones((N,M))/M;
dtrue=np.matmul(G,mtrue);
dobs=dtrue;
 
# upper bound: mi <= 1
mub = np.ones((M,1));
 
# lower bound: mi >= -1
mlb = -np.ones((M,1));
 
M2 = int(floor(M/2));
I = np.zeros((M2+1,1));
amin = np.zeros((M2+1,1));
amax = np.zeros((M2+1,1));
for L in range(M2+1):
    # averageing vector centered at N/2 of width 2*L+1
    a = np.zeros((M,1));
    il = M2-L;
    ir = M2+L;
    a[il:ir+1,0:1] = np.ones((2*L+1,1));
    I[L,0]=np.sum(a);
    a=a/I[L,0];
    
    # solve linear programming problem
    res = opt.linprog(c=a,A_eq=G,b_eq=dobs,bounds=np.concatenate((mlb,mub),axis=1));
    x = gda_cvec(res.x);
    amin[L,0] = res.fun;
    status = res.success;
    if( not status ):
        print("linear programming failed" );
        
    res = opt.linprog(c=-a,A_eq=G,b_eq=dobs,bounds=np.concatenate((mlb,mub),axis=1));
    x = gda_cvec(res.x);
    amax[L,0] = -res.fun;
    status = res.success;
    if( not status ):
        print("linear programming failed" );

# plot bounds
fig1 = plt.figure(1,figsize=(12,5));
ax1 = plt.subplot(1,1,1);
plt.axis( [1.0, M, 0.0, 1.5] );
plt.plot(I,amax, "k-", lw=2);
plt.plot(I,amax, "ko", lw=4);
plt.xlabel("width");
plt.ylabel("bound on |<m>|");
print("Caption: Lower bound on the mean of several adacent model parameters,");
print("as a function of the width of the averaging kernel");

gdapy08_01
Caption: Lower bound on the mean of several adacent model parameters,
as a function of the width of the averaging kernel

# gdapy08_02

# upper/lower bounds on localized average using linear programming
# data kernel G composed of decaying exponentials
 
print("gdapy08_02");

# model, m(z), moztly zero but a few spikes
M=101;
zmin=0;
zmax=10;
Dz=(zmax-zmin)/(M-1);
z=gda_cvec(zmin+Dz*np.linspace(0,M-1,M));
mtrue = 0.5+0.03*z;
 
# experiment: exponential smoothing of model
N=40;
G = np.zeros((N,M));
for i in range(N):
    j = int(floor((i+1)*M/N));
    nse = np.random.normal(loc=0.0,scale=0.15,size=(j,1));
    val = np.flipud(gda_cvec(np.linspace(1,j,j)/j));
    G[i:i+1,0:j] = val.T + nse.T;

GGT = np.matmul(G,G.T);
 
print("det %e" % (la.det(GGT)));

# observed data is true data plus noise
sd=0.001;
dtrue = np.matmul(G,mtrue);
dobs = dtrue + np.random.normal(loc=0.0,scale=sd,size=(N,1));
 
# draw the data kernel
gda_draw("title d",dtrue,"=","title G", G,"title mtrue",mtrue);

# minimum length solution
epsilon=1e-12;

GMG = np.matmul(G.T,la.inv(GGT+epsilon*np.identity(N)));
mest = np.matmul(GMG,dobs);
Rres = np.matmul(GMG,G);
 
# plot
fig1 = plt.figure(1,figsize=(12,5));
ax1 = plt.subplot(1,1,1);
 
# plot scale
pmmin=-0.5;
pmmax=1.5;
 
# plot the true model and the minimum length estimate
plt.axis( [zmin, zmax, pmmin, pmmax ] );
plt.plot( z, mtrue, "r-", lw=2);
plt.plot( z, mest,  "b-", lw=2);
plt.xlabel("z");
plt.ylabel("m");
 
# upper bound: mi <= 1
mub = np.ones((M,1));
 
# lower bound: mi >= -1
mlb = -np.ones((M,1));
 
# half width of averaging kernel
hw = 10;
 
# compute upper, lower bounds on average
amin=np.zeros((M,1));
amax=np.zeros((M,1));
for i in range(M):
    # averageing vector centered at M of width w
    a = np.zeros((M,1));
    j=i-hw;
    if( j<0 ):
        j=0;
    k=i+hw;
    if(k>=M):
        k=M-1;
    a[j:k+1,0:1]=np.ones((k-j+1,1))/(k-j+1);
    
    # solve linear programming problem
    res = opt.linprog(c=a,A_eq=G,b_eq=dobs,bounds=np.concatenate((mlb,mub),axis=1));
    x = gda_cvec(res.x);
    amin[i,0] = res.fun;
    status = res.success;
    if( not status ):
        print("linear programming failed" );
        
    # solve linear programming problem
    res = opt.linprog(c=-a,A_eq=G,b_eq=dobs,bounds=np.concatenate((mlb,mub),axis=1));
    x = gda_cvec(res.x);
    amax[i,0] = -res.fun;
    status = res.success;
    if( not status ):
        print("linear programming failed" );

# plot bounds
plt.plot( z, amin, "k-", lw=2);
plt.plot( z, amax, "k-", lw=2);
plt.show();
print("Caption: Exemplary inverse problem with model parameters m(z), showing");
print("true solution (red), damped least squares solution (blue), and");
print("upper and lower bounds (black)");

gdapy08_02
det 7.219721e-03

Caption: Exemplary inverse problem with model parameters m(z), showing
true solution (red), damped least squares solution (blue), and
upper and lower bounds (black)

# gdapy08_03

# Squeezing example
# The data kernel is a series of decaying exponentials in
# an auxillary variable z and is underdetermined.  Three
# models, all that fit the data to similar accuracy, are
# constructed:  Simple damped least squares; solution
# squeezed shallow ("up"), solution squeezed deep ("dn")
 
# Note: Becaue the data are noisy with noise that randomly
# varies between runs, you may have to run the scipt a few
# times until the least squares solution (black curve in
# figure 2 (top) is a good approximation to the true
# model (blue curve).
 
print("gdapy08_03");

# auxilliary variable, z
M=101;
zmin=0.0;
zmax=10.0;
Dz=(zmax-zmin)/(M-1);
z=gda_cvec(zmin+Dz*np.linspace(0,M-1,M));
 
# true solution is a boxcar
mtrue = np.zeros((M,1));
i1 = int(floor(5*M/11));
i2 = int(floor(7*M/11));
ic = int(floor(0.5*(i1+i2)));
zc = z[ic,0];
mtrue[i1:i2+1] = 1.0;
 
# data kernel is declining exponentials, normalized
# so that they compute a weighted average of the model
N=int(floor(M/2));
G = np.zeros((N,M));
c0=0.01;
for i in range(N):
    c = c0*i;
    v = np.exp(-c*z);
    vn = np.sum(v);
    G[i:i+1,0:M]=v.T/vn;

# draw the data kernel
gda_draw('title G',G);
print("Caption: Graphical depiction of the data kernel G.");
print("   ");

# create synthetic data by adding random noise to the
# true data
sd=0.0001;
dtrue = np.matmul(G,mtrue);
dobs = dtrue + np.random.normal(loc=0.0,scale=sd,size=(N,1));
 
# damped least squares
epsilon2 = 1e-9;
W = np.identity(M);
GTG = np.matmul(G.T,G) + epsilon2*W;
GTd = np.matmul(G.T,dobs);
mest0 = la.solve(GTG,GTd);
e = dobs - np.matmul(G,mest0);
E0 = np.matmul(e.T,e); E0 = E0[0,0]; # error
Ed = np.matmul(dobs.T,dobs); Ed = Ed[0,0]; # energy in data, for comparison
 
# squeeze up; the weights are low to the left
# of z=zc and high to the right of it. The error
# function erf(z) ramps up smoothly from -1 for
# z<<0 to +1 for z>>0, so I can use a scaled
# and shifted version of it to achieve weights
# that smoothly ramp up/down from one constant
# value to another at the point z0.  (Any
# "sigmoidal" shaped function would have sufficed;
# Matlab's sigmf() would be fine, too.
 
epsilon2 = 5e-9;
n=2;
A=1.0;
B=10.0;
wup = (B+1)+B*scipyerf((z-zc)/A);
Wup = np.diag(wup.ravel()); # weights grow with z
GTG = np.matmul(G.T,G) + epsilon2*Wup;
GTd = np.matmul(G.T,dobs);
mest_up = la.solve(GTG,GTd);
e = dobs - np.matmul(G,mest_up);
Eup = np.matmul(e.T,e); Eup=Eup[0,0];
 
# squeeze down
epsilon2 = 5e-9;
wdn = (B+1)-B*scipyerf((z-zc)/A)
Wdn = np.diag(wdn.ravel()); # weights decline with z
GTG = np.matmul(G.T,G) + epsilon2*Wdn;
GTd = np.matmul(G.T,dobs);
mest_dn = la.solve(GTG,GTd);
e = dobs - np.matmul(G,mest_dn);
Edn = np.matmul(e.T,e); Edn = Edn[0,0];
 
# I have adusted all the coefficients by trial and error
# so that E0/Ed is about 10-6 (a very good fit)
# Eup/E0 and Edn/E0 are both about 1.05 (almost as good)
print("Normalized error %.2e ratio: up %.3f dn %.3f" % (E0/Ed, Eup/E0, Edn/E0) );


fig1 = plt.figure(1,figsize=(12,5));

# plot true and unsqueezed models
ax1 = plt.subplot(3,1,1);
plt.axis( [zmin, zmax, -1.2, 1.2] );
plt.plot( gda_cvec(zmin, zmax), gda_cvec(0, 0), "k:", lw=2 );
plt.plot( z, mtrue, "b-", lw=3 );
plt.plot( z, mest0, "k-", lw=2 );
plt.xlabel("z");
plt.ylabel("m(z)");

# plot true and unsqueezed models
ax1 = plt.subplot(3,1,2);
plt.axis( [zmin, zmax, -1.2, 1.2] );
plt.plot( gda_cvec(zmin, zmax), gda_cvec(0, 0), "k:", lw=2 );
plt.plot( z, mest_up, "r-", lw=2 );
plt.plot( z, mest_dn, "g-", lw=2 );
plt.xlabel("z");
plt.ylabel("m(z)");

# plot true and unsqueezed models
ax1 = plt.subplot(3,1,3);
plt.axis( [zmin, zmax, 0.0, 1.1*np.max(wup)] );
plt.plot( gda_cvec(zmin, zmax), gda_cvec(0, 0), "k:", lw=2 );
plt.plot( z, wup, "r-", lw=2 );
plt.plot( z, wdn, "g-", lw=2 );
plt.xlabel("z");
plt.ylabel("W");
plt.show();
print("Caption: (Top) True solution m(z) (blue) and damped least squares solution");
print("(blue). (Middle) Upward squeezed solution (red) and downward squeezed");
print("solution (green). (Bottom) Weighting frunction for upward squeeze (green)");
print("and downward squeese (red).");

gdapy08_03

Caption: Graphical depiction of the data kernel G.
   
Normalized error 3.20e-07 ratio: up 1.136 dn 1.070

Caption: (Top) True solution m(z) (blue) and damped least squares solution
(blue). (Middle) Upward squeezed solution (red) and downward squeezed
solution (green). (Bottom) Weighting frunction for upward squeeze (green)
and downward squeese (red).