# edapy_11_00 clear all variables and import vatious modules

# History
# 2022/06/26 -  checked with most recent Python & etc. software
# 2023/07/13 -  fixed some issues, incl colormap, loglof plt, eda_cvec()
# 2024/05/26 -  Patches to accomodate new verions of numpy, scipy, matplotlib
#               patched dot product to return scalar value, and similar issues
#                  associated with depreciation of equivalence of 1x1 matrix and scalar

%reset -f
import os
from datetime import date
from math import exp, pi, sin, cos, tan, sqrt, floor, ceil, log
import numpy as np
from numpy import mod
import scipy.sparse.linalg as las
import scipy.interpolate as ip
import scipy.spatial as sp
from scipy import sparse
import scipy.linalg as la
import scipy.signal as sg
import matplotlib
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.colors import ListedColormap
from matplotlib import patches as pch
import timeit



# eda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def eda_draw(*argv):
    bw = np.zeros((256,4));
    v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
    bw[:,0] = v;
    bw[:,1] = v;
    bw[:,2] = v;
    bw[:,3] = np.ones(256);
    bwcmap = ListedColormap(bw);
    # size of plot
    W = 16;
    H = 4;
    fig1 = plt.figure(1);
    # figsize width and height in inches
    fig1.set_size_inches(W,H);
    ax1 = plt.subplot(1,1,1);
    plt.axis([0, W, -H/2, H/2]);
    plt.axis('off');
    LM = W/6;    # matrix width and heoght
    LV = W/40;   # vector width
    FS = 0.12;    # character width
    TO = 0.4;    # title vertical offset
    SP = 0.2;    # space between objects
    LS = 0.2;    # leading space
    p = LS; # starting x-position
    istitle=0; # flags presence of a title
    for a in argv:
        if isinstance(a,np.ndarray):
            sh = np.shape(a);
            if len(sh) == 1:  # conversion to nx1 array
                n = sh[0];
                m = 1;
                ap = a;
                a = np.zeros((n,1));
                a[:,0] = ap;
            else:
                n = sh[0];
                m = sh[1];
            if m==1:
                pold=p;
                left=p;
                right=p+LV;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LV;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
            else:
                pold=p;
                left=p;
                right=p+LM;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LM;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
        elif isinstance(a,str):
            ns = len(a);
            istitle=0;
            if( ns>=6 ):
                if 'title ' in a[0:6]:
                    istitle=1;
                    titlestr=a[6:];
            if( istitle != 1):
                plt.text(p,0,a);
                p = p + ns*FS + SP;
    plt.show();
    return 1;

# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def eda_cvec(*argv):
    t = int;
    Nt = 0;
    for a in argv:
        v = eda_cvec1(a);
        N,M = np.shape(v);
        Nt = Nt + N;
        if( N==0 ):
            continue; # skip vector of zero length
        if (t==int) and isinstance(v[0,0],float):
            t=float;
        elif isinstance(v[0,0],complex):
            t=complex;
    w = np.zeros((Nt,1),dtype=t);
    Nt = 0;
    for a in argv:
        v = eda_cvec1(a);
        N,M = np.shape(v);
        w[Nt:Nt+N,0] = v[0:N,0];
        Nt = Nt + N;
    return w;

# function to make a numpy N-by-1 column vector
# c=eda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=eda_cvec(v1,v2,...) concatenates
# many argiments.
def eda_cvec1(v):
    if isinstance(v, int) or isinstance(v, np.int32):
        w = np.zeros((1,1),dtype=int);
        w[0,0] = v;
        return w;
    elif isinstance(v, float):
        w = np.zeros((1,1),dtype=float);
        w[0,0] = v;
        return w;
    elif isinstance(v, complex):
        w = np.zeros((1,1),dtype=complex);
        w[0,0] = v;
        return w;
    elif isinstance(v, np.ndarray):
        s = np.shape(v);
        if len(s) == 1:
            return np.copy(np.reshape(v,(s[0],1)));
        else:
            [r,c]=s;
            if( c==1 ):
                return(np.copy(v));
            elif(r==1):
                return(np.copy(v.T));
            else:
                raise TypeError("eda_cvec: %d by %d ndarray not allowed" % (r, c));
    elif isinstance(v, list):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(v, np.int32):
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("eda_cvec: list contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = v;
        return w;
    elif isinstance(v, tuple):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(v, np.int32):
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("eda_cvec: tuple contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = v;
        return w;
    else:
        raise TypeError("eda_cvec: %s not supported" % type(v));
        
# sigma function, works with floats and Nx1 arrays
def eda_sigma(w,x):
    if( isinstance(x,np.ndarray) ):
        N,M = np.shape(x);
        y = np.zeros((N,1));
        for i in range(N):
            if( x[i,0] >= 0.0):
                y[i,0] = 1/( 1 + exp(-w*x[i,0]) );
            else:
                t = exp(w*x[i,0]);
                y[i,0] = t / (t + 1);
    elif( isinstance(x,float) ):
        y = 0.0;
        if( x >= 0.0):
            y = 1/( 1 + exp(-w*x) );
        else:
            t = exp(w*x);
            y = t / (t + 1);
    return(y);

def eda_net_init(Lmax,Nmax):
# initialize a network; produces only arrays of zeros
# input:
#        Lmax: number of layers
#        Nmax: maximum number of neurons on any layer
# output:
#        N: neurons on a layer array
#        w: weights
#        b: biases
#        a: activities

    # N[0:Lmax,1]: column-vector with number of neurons in each layer;
    N = np.zeros((Lmax,1),dtype=np.int8);

    # biases: b[0:N[i,0],i) is a column-vector that gives the
    # biases of the N(i) neurons on the i-th layer
    b = np.zeros( (Nmax, Lmax) );

    # weights: w[0:N[i,0],0:N[i-1,0],i) is a matrix that gives the
    # weights between the N[i] neurons in the i-th layer and the
    # N[i-1] neurons on the (i-1)-th layer. The weights assigned
    # to the first layer are never used and should be set to zero
    w = np.zeros( (Nmax, Nmax, Lmax) );

    # activity: a[0:N[i,0],i) is a column-vector that gives the
    # activitites of the N[i] neurons on the i-th layer
    a = np.zeros( (Nmax, Lmax) );
    
    return N, w, b, a;

def eda_net_view(N,w,b,fig,ax):
# makes a picture of a neural net
# you must set the figure before the call, e.g.
# fig = plt.figure(1);
# ax = plt.subplot(1,1,1);
    Nmax = np.max(N);
    Lmax, tmp = np.shape(N);

    # size of plot
    W = 16;
    H = 4;
    fig.set_size_inches(W,H);
    ax.axis('off')
    ax.axis([-0.5, Lmax-0.5, -0.5, Nmax-0.5]);

    scale = np.zeros((Lmax,1));
    for x in range(Lmax):
        scale[x,0] =  Nmax/N[x,0];

    for x in range(Lmax):
        for y in range(N[x,0]):
            lowerleft = ( x-0.2, y*scale[x,0]-0.4 );
            width=0.4;
            height = 0.8;
            c=0.9;
            ax.add_patch( pch.Rectangle( lowerleft, width, height, linewidth=2, fc=(c,c,c), ec=(0.0,0.0,0.0) ) );
            strx = x;
            stry = y*scale[x,0];
            mystr = 'b = %.2f' % (b[y,x]);
            ax.text(strx, stry, mystr, ha='center' );
            if ( x != (Lmax-1) ):
                for y2 in range(N[x+1,0]):
                    if ( w[ y2 , y , x+1] != 0 ):
                        ax.plot([x+0.2, x+0.8],[y*scale[x,0], y2*scale[x+1,0]],color=(0.7,0.7,0.7) );
                        strx = x+0.5;
                        stry = 0.07+(y*scale[x,0]+y2*scale[x+1,0])/2;
                        mystr = 'w = %.2f' % (w[ y2 , y , x+1]);
                        ax.text(strx, stry, mystr, ha='center' );
                                               
def eda_net_eval(N,w,b,a):
# eda_eval_net evaluates a network with properites N, w and b 
# the "input" activities a[0:N[0,1],0] must be set
# the output are the final activities and the final z's
    Nmax = np.max(N);
    Lmax, tmp = np.shape(N);
    z = np.zeros( (Nmax, Lmax) );
    for i in range(1,Lmax):
        M = w[0:N[i,0],0:N[i-1,0],i];
        v = a[0:N[i-1,0],i-1];
        u = b[0:N[i,0],i];
        zz = np.zeros((N[i,0],1));
        zz[:,0] =  np.matmul( M, v ) + u;
        z[0:N[i,0],i] = zz[:,0];
        if i < (Lmax-1):
            a[0:N[i,0],i] = eda_sigma(1.0,zz).ravel();
        else:
            a[0:N[i,0],i] = z[0:N[i,0],i];
    return a, z;

def eda_net_1dtower( Nc, slope, Xc, Dx, h, N, w, b ):
# eda_net_linear
# defines a network that computes a set of 1D towers
# input Nc: number of towers
#       slope: slope of all the towers
#       Xc: centers of the towers
#       Dx: width of the towers
#       h: heights of the towaers
#       N: layer array from eda_net_init()
#       w: weights array from eda_net_init()
#       b: bias array from from eda_net_init()
# output: updated N, w, b arrays

    N[:,0] = [1, Nc*2, 1];

    for ncvalue in range(Nc):
        w12 = 4*slope;
        w22 = 4*slope;
        w[ ((ncvalue)*2):((ncvalue)*2)+2, 0:N[0,0],  1] = np.reshape( [w12,w22], (2,1) );

        w13 =  h[ncvalue,0];
        w23 = -h[ncvalue,0];
        w[ 0:N[2,0], ((ncvalue)*2):((ncvalue)*2)+2, 2] = np.reshape( [w13,w23], (1,2) );

        b12 = -w12 * ( Xc[ncvalue,0] - Dx[ncvalue,0]/2 );
        b22 = -w22 * ( Xc[ncvalue,0] + Dx[ncvalue,0]/2 );
        b[ ((ncvalue)*2):((ncvalue)*2)+2, 1 ] = [b12, b22];

    return N, w, b;


def eda_net_2dtower( Nc, slope, Xc, Yc, Dx, Dy, h, N, w, b ):
# defines a network that superimposes several towers
# input:
#    Nc: number of towers
#    slope: slope of all the towers
#    Xc, Yc: vector of the centers of the towers in x and y
#    Dx, Dy: vector of the widths of the towers in x and y
#    h: vector of heights of the towers 
#    N: layer array from eda_net_init()
#    w: weights array from eda_net_init()
#    b: bias array from from eda_net_init()
# output: updated N, w, b arrays

    N[:,0] = [2, Nc*4, Nc, 1];

    for ncvalue in range(Nc):
        w12 = 4*slope;
        w22 = 4*slope;
        w32 = 4*slope;
        w42 = 4*slope;
        w[ ((ncvalue)*4):((ncvalue)*4)+2,   0 ,1] = [w12, w22];
        w[ ((ncvalue)*4)+2:((ncvalue)*4)+4, 1 ,1] = [w32, w42];
    
        w13 = 4*slope;
        w23 = -4*slope;
        w33 = 4*slope;
        w43 = -4*slope;
        w[ncvalue, ((ncvalue)*4):((ncvalue)*4)+4,2] = [w13, w23, w33, w43];
    
        w14 = h[ncvalue,0];
        w[0, ncvalue, 3] = w14;

        b12 = -w12 * ( Xc[ncvalue,0] - Dx[ncvalue,0]/2);
        b22 = -w22 * ( Xc[ncvalue,0] + Dx[ncvalue,0]/2);
        b32 = -w32 * ( Yc[ncvalue,0] - Dy[ncvalue,0]/2);
        b42 = -w42 * ( Yc[ncvalue,0] + Dy[ncvalue,0]/2);
        b[ ((ncvalue)*4):((ncvalue)*4)+4 ,1] = [b12, b22, b32, b42];
    
        b13 = -(3*(4*slope))/2;
        b[ncvalue,2] = b13;

    return N, w, b;

def eda_net_map(N,w,b):
# builds look-up tables to allow sequential ordering
# of non-trivual biases and weights
# used to build list of model parameters used in training
# input:
#        N: neurons on layer array
#        w: weights
#        b: biases
# output
#        Nb: number of biases
#        Nw: Number of non-zero weights
#            (use near-zero weights to fool it)
#        layer_of_bias, vector(i) specifying layer of bias i
#        neuron_of_bias, vector(i) specifying neuron of bias i
#        index_of_bias, 2d array(i,j) specifying number of bias
#            of layer j, neuron i
#         layer_of_weight, vector(i) specifying layer of weight i
#         r_neuron_of_weight, vector(i) specifying right neuron of weight i
#         l_neuron_of_weight, vector(i) specifying left neuron of weight i
#         index_of_weight, array(i,j,k) specifying number of weight(i,j,k)
#         note: all the arrays are set to their maximum size, with unused indices set to (-1)

    Nmax = np.max(N);
    Lmax, tmp = np.shape(N);

    layer_of_bias = -np.ones((Lmax*Nmax,1),dtype=int);
    neuron_of_bias = -np.ones((Lmax*Nmax,1),dtype=int);
    index_of_bias = -np.ones((Nmax,Lmax),dtype=int);
    
    Nb=0;  # number of nontrivial biases
    for k in range(1,Lmax):
        for j in range(N[k,0]):
            layer_of_bias[Nb,0]=k;
            neuron_of_bias[Nb,0]=j;
            index_of_bias[j,k]=Nb;
            Nb = Nb+1;

    Nw=0;  # number of nontrivial weights
    layer_of_weight = -np.ones((Lmax*Nmax*Nmax,1),dtype=int);
    r_neuron_of_weight = -np.ones((Lmax*Nmax*Nmax,1),dtype=int);
    l_neuron_of_weight = -np.ones((Lmax*Nmax*Nmax,1),dtype=int);
    index_of_weight = -np.ones((Nmax,Nmax,Lmax),dtype=int);

    for k in range(1,Lmax):
        for i in range(N[k,0]):
            for j in range(N[k-1,0]):
                if( w[i,j,k] != 0.0 ):
                    layer_of_weight[Nw,0]=k;
                    r_neuron_of_weight[Nw,0]=i;
                    l_neuron_of_weight[Nw,0]=j;
                    index_of_weight[j,i,k]=Nw;
                    Nw = Nw+1;

    return Nb, Nw, layer_of_bias, neuron_of_bias, index_of_bias, layer_of_weight, r_neuron_of_weight, l_neuron_of_weight, index_of_weight; 

def eda_net_deriv( N,w,b,a ):
# calculates the derivatives daLmax/dw and daLmax/dw
# for a network with parameters N,w,b,a
# and returns daLmaxdw, daLmaxdb

    # N[i]:  number of neurons in layer i
    
    # Nmax: maximum number of neurons in any layer
    Nmax = np.max(N);
    
    # Lmax number of layers
    Lmax, tmp = np.shape(N);
    
    # b[i,j]: bias of neuron i in layer j
    # b = np.zeros( (Nmax, Lmax) );

    # w[i,j,k]: weights between neuron i in the k-th layer and
    # neuron j in the (k-1)-th layer. The weights assigned
    # to the first layer are never used and should be set to zero
    # w = np.zeros( (Nmax, Nmax, Lmax) );

    # activity: a[0:N[i,0],i) is a column-vector that gives the
    # activitites of the N[i] neurons on the i-th layer
    # a = np.zeros( (Nmax, Lmax) );
    
    # dada[i,j,k]: the change in the activity of the i-th neuron of the layer k
    # due to a change in the activity of the j-th neuron in the layer k-1
    dada = np.zeros((Nmax,Nmax,Lmax));

    # daLmaxda[i,j,k]: the change in the activity of the i-th neuron in layer Lmax-1
    # due to a change in the activity of the j-th neuron in the layer k-1
    daLmaxda = np.zeros((Nmax,Nmax,Lmax));

    # dadz[i,k]: the change in the activity of the i-th neuron of layer k
    # due to a change in the input z of the same neuron
    dadz = np.zeros((Nmax,Lmax));
    
    # daLmaxdb[i,j,k]: the change in the activity of the i-th neuron in layer Lmax-1
    # due to a change in the bias of the of the j-th neuron in the layer k
    daLmaxdb = np.zeros((Nmax,Nmax,Lmax));

    # daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax-q
    # due to a change in the weight connecting the j-th neuron in the layer l
    # with the k-th neuron of layer l-1
    daLmaxdw = np.zeros((Nmax,Nmax,Nmax,Lmax));

    # da/dz
    for i in range(Lmax):
        if (i==(Lmax-1)):
            # sigma function is sigma(z)=z for top layer
            # d sigma / dz = 1
            # MATLAB: dadz(1:N(i),i) = ones(N(i),1);
            dadz[0:N[i,0],i] = np.ones((N[i,0],1)).ravel();
        else:
            # sigma function is sigma(z)=(1+exp(-z))**(-1) for other layers
            # d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            #              = exp(-z) (a**2)
            # and note exp(-z) = (1/a)-1
            # so d sigma / dz = a-a**2
            # MATLAB: dadz(1:N(i),i) = a(1:N(i),i) - (a(1:N(i),i).^2);
            dadz[0:N[i,0],i] = a[0:N[i,0],i] - np.power( a[0:N[i,0],i], 2);  
    
    # da/da
    for i in range(Lmax-1,0,-1): # MATLAB: [Lmax:-1:2]
        if ( i == (Lmax-1) ):
            # a=z in the top of the layer, and
            # z = (sum) w a + b
            # so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            # MATLAB: dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            dada[0:N[i,0],0:N[i-1,0],i] = w[0:N[i,0],0:N[i-1,0],i];
            # initialize daLmaxda to prepare for backpropagation
            # MATLAB: daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
            daLmaxda[0:N[i,0],0:N[i-1,0],i] = dada[0:N[i,0],0:N[i-1,0],i];
        else:
            # a=sigma(z) in the other layers, so use the chain rule
            # da/da = da(layer)/dz * dz/da(layer i-1)
            #    where as before dz/da(layer i-1) = w
            # MATLAB: dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            V1 = np.zeros((N[i,0],1));
            V1[:,0] = dadz[0:N[i,0],i];
            M1 = np.diag(V1.ravel());
            M2 = np.zeros((N[i,0],N[i-1,0]));
            M2[:,:] = w[0:N[i,0],0:N[i-1,0],i];
            dada[0:N[i,0],0:N[i-1,0],i] = np.matmul( M1, M2 );
            # backpropagate
            # MATLAB: daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
            #                        daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
            M1 = np.zeros((N[Lmax-1,0],N[i,0]));
            M1[:,:] = daLmaxda[0:N[Lmax-1,0],0:N[i,0],i+1];
            M2 = np.zeros((N[i,0],N[i-1,0]));
            M2[:,:] = dada[0:N[i,0],0:N[i-1,0],i];
            daLmaxda[0:N[Lmax-1,0],0:N[i-1,0],i] = np.matmul(M1, M2);
        
    # da/db
    for i in range(Lmax):
        if( i==(Lmax-1) ):
            # a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            # so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb[0:N[Lmax-1,0],0:N[Lmax-1,0],Lmax-1] = np.identity(N[Lmax-1,0]);
        else:
            # a = sigma(z) in the other layers, so use the chain rule
            # da(layer i)/db = da(layer i)/dz * dz/db
            # where as before dzdb=1
            dzdb = 1;
            dadb = np.zeros((N[i,0],1));
            dadb[:,0] = dadz[0:N[i,0],i] * dzdb;
            # then apply the chain rule for activities
            # da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            # MATLAB: daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
            M1 = np.zeros((N[Lmax-1,0],N[i,0]));
            M1[:,:]=daLmaxda[0:N[Lmax-1,0],0:N[i,0],i+1];
            M2 = np.diag(dadb.ravel());
            daLmaxdb[0:N[Lmax-1,0],0:N[i,0],i] = np.matmul( M1, M2 );

    # da/dw
    # calculation of weight derivatives
    # since z(layer i) = w*a(layer i-1) + b
    # dz(layer i)/dw = a(layer i-1)
    for l in range(1,Lmax): # MATLAB: [2:Lmax]
        if( l==(Lmax-1) ):
            # in the top layer, a=z, so da/dw = dz/dw = a
            for j in range(N[l,0]):
                daLmaxdw[j,j,0:N[l-1,0],l] = a[0:N[l-1,0],l-1];
        else:
            # in the other layers, use chain rule
            # da/dw = da/dz * dz/dw
            # and then the chain ruke again
            # da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j in range(N[l,0]):
                dzdw = np.zeros((N[l-1,0],1));
                dzdw[:,0] = a[0:N[l-1,0],l-1];
                dadw = dadz[j,l] * dzdw;
                # MATLAB: daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw';
                V1 = np.zeros((N[Lmax-1,0],1));
                V1[:,0] = daLmaxda[0:N[Lmax-1,0],j,l+1];
                M1 = np.matmul( V1, dadw.T );
                daLmaxdw[0:N[Lmax-1,0],j,0:N[l-1,0],l] = M1;
                
    return daLmaxdw, daLmaxdb;

def eda_net_linear( Nc, slope1, slope2, c, h, N, w, b ):
# defines a network that cpmputes the linear function
# d(x1, x2, ...) = c + h1*x1 + h2*x2 + h2*h3 ...
# (good for implementing a filter)
# input Nc: number of linear components
#       slope1: slope of the first stage (say 0.01)
#       slope2: slope of the second stage (say 0.02)
#              should be different from slope1
#       c: intercept
#       h: slopes
#       N: layer array from eda_net_init()
#       w: weights array from eda_net_init()
#       b: bias array from from eda_net_init()
# output: updated N, w, b arrays

    Nmax = np.max(N);
    Lmax, tmp = np.shape(N);
    N[:,0] = [Nc, Nc, Nc, 1];

    b[0,3] = c;

    for ncvalue in range(Nc):

        w12 = 4*slope1;
        w[ncvalue, ncvalue ,1] = w12;
  
        w13 = 4*slope2;
        w[ncvalue, ncvalue ,2] = w13;
    
        w2w3 = w12*w13;
    
        w14 = 16*h[ncvalue,0]/w2w3;
        w[0,ncvalue, 3] = w14;

        b12 = 0;
        b[ncvalue,1] = b12;
    
        b13 = -w13/2;
        b[ncvalue,2] = b13;
    
        b14 = -8*h[ncvalue,0]/w2w3;
        b[0,3] = b[0,3]+ b14;

    return N, w, b;

# edapy_11_01, example of a Taylor Series
# using the function
# 1/(1-x) = 1 + x + x^2 + x^3 + ...

# standard setup of time axis, t
N = 101;       # number of points
tmin = -1.2;   # start time
tmax = 0.8;    # end time
Dt = (tmax-tmin)/(N-1);  # sampling interval
t = eda_cvec( np.linspace(tmin,tmax,N) ); # time vector

#  plot data
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(1,1,1);
plt.axis([tmin, tmax, 0, 2]);
plt.xlabel('t');
plt.ylabel('y(t)');

# true function
y = 1.0/(1.0-t);
plt.plot(t,y,'-',linewidth=5,color=(0.8,0.8,0.8));
t0=0.0;
y0=1./(1.0-t0);
plt.plot(t0,y0,'ko',linewidth=2);

# linear appriximation
y1 = 1.0+t;
plt.plot(t,y1,'k:',linewidth=2);

# quadratic approximation
y2 = 1.0+t+np.power(t,2);
plt.plot(t,y2,'k--',linewidth=1.5);

# cubic approximation
y3 = 1+t+np.power(t,2)+np.power(t,3);
plt. plot(t,y3,'k-',linewidth=3);
           
plt.show();

# edapy_11_02, approximation for small distances on a sphere

# degree--radian conversion factors
RTOD = 180.0/pi;
DTOR = pi/180.0;

# the object is to compute distances between (lat1, lon1) and (lat2, lon2)
# (lat1, lon1) is a single point
# (lat2, lon2) are vectors with N points
N = 101;  # number of points
lat1 = 30.0*DTOR; # lat of single point
lon1 = 30.0*DTOR; # lon of single point
lat2min = 30.0*DTOR; # start lat of vector
lat2max = 40.0*DTOR; # end lat of vector
lat2 = eda_cvec( np.linspace(lat2min,lat2max,N) ); # vector of lats
lon2 = lat2; # vector of lons
latbar = (lat1 + lat2)/2; # vector of mean lat
Dlon = lon2 - lon1;       # vector of difference in lat with respect to first endpoint
Dlat = lat2 - lat1;       # vector of difference in lon with respect to first endpoint

# law of cosines on a sphere
cr = sin(lat1)*np.sin(lat2) + np.multiply( cos(lat1)*np.cos(lat2), np.cos(Dlon));
r = np.arccos(cr);

# flat earth approximation
coslatbar2 = np.power( np.cos(latbar), 2);
Dlat2 = np.power(Dlat,2);
Dlon2 = np.power(Dlon,2);
t1 = Dlat2 + np.multiply( Dlon2, coslatbar2 );
r1 = np.sqrt( t1 );

# higher order correction
t2 = np.multiply( Dlat2, np.multiply( Dlon2, (0.25-(coslatbar2/6)) ) );
r2 = np.sqrt( t1-t2 );

# plot distances
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(2,1,1);
plt.axis([30, 40, 0, 15]);
plt.xlabel('latitude (deg)');
plt.ylabel('r (deg)');
plt.plot( RTOD*lat2, RTOD*r, '-', linewidth=7, color=(0.85,0.85,0.85) );
plt.plot( RTOD*lat2, RTOD*r1, '-', linewidth=3, color=(0.7,0.7,0.7));
plt.plot( RTOD*lat2, RTOD*r2, 'k-', linewidth=1 );

# plot error in distances
fig1 = plt.figure(1,figsize=(8,4));
ax2 = plt.subplot(2,1,2);
plt.axis([30, 40, 0, 0.02]);
plt.xlabel('latitude (deg)');
plt.ylabel('error (deg)');
plt.plot( RTOD*lat2, RTOD*(r1-r), 'k:', 'LineWidth', 2 );
plt.plot( RTOD*lat2, RTOD*(r2-r), 'k-', 'LineWidth', 2 );

plt.show();

# edapy_11_03, variance of nonlinear function
# variance of T=1/f computed by:
#        small number approximation
#        numerical simulation

# two time pi
tpi = 2*pi;

# w is angular frequency "omega"
wbar = 1.3*tpi;       # mean
sigmaw = 0.2;         # standard deviation
sigmaw2 = sigmaw**2;  # variance

# realizations of a Normally-distributed random process
N = 10000;
wr = np.random.normal(wbar,sigmaw,(N,1));

# histogram of wr
Nbins = 201;                # number of bins in histogram
wbmin = 0;                  # start bin
wbmax = 20;                 # end bin
c, e = np.histogram(wr,Nbins,(wbmin,wbmax)); # make histogram
Nc = len(c);                # length of count c
Ne = len(e);                # length of edges e
count = eda_cvec(c);        # count in each bin
edges = eda_cvec(e[0:Ne-1]); # bin edge locations
wb =  eda_cvec( 0.5*(e[0:Nbins]+e[1:Nbins+1]) ); # bin centers
Dwb = wb[1,0]-wb[0,0]; # bin spacing

# true Normal p.d.f.
pw1 = (1/(sqrt(tpi)*sigmaw))*np.exp(-np.power(wb-wbar,2)/(2*sigmaw2));

# approximate p.d.f from histogram
pw2 = count / (Dwb*np.sum(count));

# plot omega p.d.f.'s
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(2,1,1);
plt.axis([0, 10, 0, 3]);
plt.xlabel('w (radians/s)');
plt.ylabel('p(w)');
plt.plot( wb, pw1, '-', linewidth=4, color=(0.8,0.8,0.8) );
plt.plot( wb, pw2, 'k:', linewidth=2 );

# now consider period T = 2 pi / omega

# approxmate mean and variance from linear transformation
Tbar = tpi/wbar;
sigmaT = sigmaw*tpi/(wbar**2);
sigmaT2 = sigmaT**2;

# convert realization to period
Tr = np.divide( tpi, wr );

# histogram or Tr
Nbins = 201;                 # number of bins in histogram
Tbmin = 0;                   # start bin
Tbmax = 2;                   # end bin
c, e = np.histogram(Tr,Nbins,(Tbmin,Tbmax)); # make histogram
Nc = len(c);                 # length of counts c
Ne = len(e);                 # length of edges e
count = eda_cvec(c);         # count vector
edges = eda_cvec(e[0:Ne-1]); # edges vector
Tb = eda_cvec(0.5*(e[0:Nbins]+e[1:Nbins+1])); # bin centers
DTb = Tb[1,0]-Tb[0,0];       # bin spacing

# approximate p.d.f from histogram
pT2 = count / (DTb*np.sum(count));

# Normal p.d.f. based on approximate mean and variance
pT1 = (1/(sqrt(tpi)*sigmaT))*np.exp(-np.power(Tb-Tbar,2)/(2*sigmaT2));

# plot omega p.d.f.'s
ax2 = plt.subplot(2,1,2);
plt.axis([0, 1, 0, 25]);
plt.xlabel('T (s))');
plt.ylabel('p(T)');
plt.plot( Tb, pT1, '-', linewidth=4, color=(0.8,0.8,0.8) );
plt.plot( Tb, pT2, 'k:', linewidth=2 );

plt.show();

# edapy_11_04, fit of sinusoid of unknown frequency
# to Black Rock Forest temperature data, using
# fit of a sinusoid of unknown (but approximately annual)
# iterative least squares method (Newton's method)

# load Black Rock Forest temperature data
D = np.genfromtxt('../Data/brf_temp.txt', delimiter='\t')

# clean the data by deleting outliers and zero (= no data) values
test1 = (D[:,1]>-35);
test2 = (D[:,1]<40);
test3 = (D[:,1]!=0.0);
r = np.where( test1 & test2 & test3);
k = r[0];
tmp = np.shape(k);
N = tmp[0];
t = D[k,0:1];    # cleaned time vector t of length N
dobs = D[k,1:2]; # cleaned observation vector dobs of length N

# some statistics
dmean = np.mean(dobs);                  # mean
amp = 0.5*(np.max(dobs)-np.min(dobs));  # amplitude

# Annual period, frequency, angular frequency
Ta = 365.0;
fa = 1.0/Ta;
wa = 2.0*pi*fa;

# nonlinear theory and its derivative
# note model parameters are scaled to be order unity
# d = m1*dmean + amp*m2*sin(m4 wa t) + amp*m3*cos(m4 wa t)
# dd/dm1 =  dmean
# dd/dm2 =  amp*sin(m4 wa t)
# dd/dm3 =  amp*cos(m4 wa t)
# dd/dm4 =  amp*m2*wa*t*cos(m4 wa t) - amp*m3*wa*t*sin(m4 wa t)

# inital guesses
M=4;
m = np.zeros((M,1));
m[0,0] =  1.0;        # average level is mean
m[1,0] =  0.05;       # phase with max in summer
m[2,0] = -0.95;       # so only -cos() term
m[3,0] =  1.0;        # annual frequency 

s = amp*np.sin(m[3,0]*wa*t);
c = amp*np.cos(m[3,0]*wa*t);
dpre = dmean*m[0,0] + m[1,0]*s + m[2,0]*c;

# plot data and initial prediction
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(2,1,1);
plt.axis([0,5000,-30,50]);
plt.xlabel('time (days)');
plt.ylabel('temperature (deg C)');
plt.plot( t, dobs, 'k-', linewidth=2 );
plt.plot( t, dpre, '-', linewidth=2, color=(0.7,0.7,0.7) );

# Newton's method
s = amp*np.sin(m[3,0]*wa*t);
c = amp*np.cos(m[3,0]*wa*t);
dpre = dmean*m[0,0] + m[1,0]*s + m[2,0]*c;
dd = dobs - dpre;
E = np.matmul(dd.T,dd);
Niter = 10;
ic=0;
for i in range(Niter):
    ic=ic+1;
    Eprev = E;
    G = np.zeros((N,4));
    G[:,0] = (dmean*np.ones((N,1))).ravel();
    G[:,1] = s.ravel();
    G[:,2] = c.ravel();
    G[:,3] = (m[1,0]*wa*np.multiply(t,c)-m[2,0]*wa*np.multiply(t,s)).ravel();
    GTG=np.matmul(G.T, G);
    GTdd=np.matmul(G.T, dd);
    dm = la.solve( GTG, GTdd );
    m = m + dm;
    s = amp*np.sin(m[3,0]*wa*t);
    c = amp*np.cos(m[3,0]*wa*t);
    dpre = m[0,0]*dmean + m[1,0]*s + m[2,0]*c;
    dd = dobs - dpre;
    E = np.matmul(dd.T,dd); E=E[0,0];
    if( (abs(E-Eprev)/E) < 1.0e-6 ):
        break


ax1 = plt.subplot(2,1,2);
plt.axis([0,5000,-30,50]);
plt.xlabel('time (days)');
plt.ylabel('temperature (deg C)');
plt.plot( t, dobs, 'k-', linewidth=2 );
plt.plot( t, dpre, '-', linewidth=2, color=(0.7,0.7,0.7) );

plt.show();

# account for scalings and compute frequency and period
ffinal = (wa*m[3,0])/(2*pi);
Tfinal = (2*pi)/(wa*m[3,0]);
                 
fig2 = plt.figure(2,figsize=(8,4));
ax1 = plt.subplot(1,1,1);
plt.axis([320,420,0,1]);
plt.xlabel('period (days)');
plt.ylabel('normalized amplitude spectral density');
plt.plot( [Tfinal, Tfinal], [0, 0.75], 'k-', linewidth=3 );

# covariance
# f = f0 + df   and   T = T0 + dT    and   T0 = 1 / f0
# T = 1 / (f0 + df ) = (1/f0) (1+df/f0)^-1
#   = (1/f0) (1-df/f0) = (1/f0) - df/(f0^2)
# dT = (-1/(f0^2)) df
# var(dT) = (-1/(f0^2))^2 var(df)

# confidence intervals
sigma2d = E/(N-M); # posterior variance
GTGI = la.inv(GTG); # inverse of G_transpose times G
sigma2m4 = sigma2d*GTGI[3,3]; # variance of mest[3]
sigma2f = (wa**2)*sigma2m4/((2*pi)**2);  # variance of frequency f
sigma2T = sigma2f / (ffinal**4); # variance of period T
sigmaT = sqrt(sigma2T); # standard deviation of period T
DT95 = 2*sigmaT; # 95% confidence
plt.plot( [Tfinal-DT95, Tfinal+DT95], [0.7, 0.7], 'k-', linewidth=3 );

# make evenly sampled time vectr
Dt = t[1,0]-t[0,0];                 # sampling interval
tstart = t[0,0];                    # start time
tend = t[N-1,0];                    # end time
N2 = floor((tend-tstart)/Dt)+1;     # N2 is lenth of evenly sampled time series
N2 = 2*floor(N2/2); # insure even   # ensure N2 is even
t2 = eda_cvec(np.linspace(tstart,tend,N2)); # evenly sampled time vector

# standard frequency definitions
fny = 1/(2*Dt);                      # Nyquist frequency
Nf = floor(N2/2)+1;                  # number of non-negative frequencies
Df = fny / floor(N2/2);              # frequency sampling
f = eda_cvec(np.linspace(0,fny,Nf)); # vector of non-negative frequencies

# interpolate data to even sampling
# but note that data gaps filled in with stright lines
d2f = ip.interp1d(t.ravel(),dobs.ravel(),kind='linear',fill_value='extrapolate');
d2 = eda_cvec(d2f(t2));

# demean, take FFT and compute amplitude spectral density
d2 = d2-np.mean(d2);                # demean timeseries
tmp = Dt*np.fft.fft(d2,axis=0);     # fourier trasnform
d2tilda = eda_cvec(tmp[0:Nf,0]);    # keep only non-negative frequencies
asd = np.abs(d2tilda);              # amplitude spectral density
maxasd = np.max(asd);               # maximum value of a.s.d.
 
plt.plot( np.divide(1,f[1:Nf]), 0.9*asd[1:Nf]/maxasd, 'k-' );
plt.plot( np.divide(1,f[1:Nf]), 0.9*asd[1:Nf]/maxasd, 'ko' );

plt.show();

print('iterations %d' % (ic) );
print('posterior error %.3f (deg C)' % (sqrt(sigma2d)) );
print('Estimated period %.3f +/- %.3f (95%%) days' % (Tfinal, DT95) );

iterations 4
posterior error 5.619 (deg C)
Estimated period 365.612 +/- 0.060 (95%) days

# edapy_11_05, sinusoidal fit using gradient descent method
# fit of a sinusoid of unknown (but approximately annual)
# frequency to Black Rock Forest temperature data using the
# gradient descent method

# load Black Rock Forest temperature data
D = np.genfromtxt('../Data/brf_temp.txt', delimiter='\t');

# clean the data by deleting outliers and zero (= no data) values
test1 = (D[:,1]>-35);
test2 = (D[:,1]<40);
test3 = (D[:,1]!=0.0);
r = np.where( test1 & test2 & test3 );
k = r[0];
tmp = np.shape(k);
N = tmp[0];
t = D[k,0:1];    # vector of cleaned times, length N
dobs = D[k,1:2]; # vector of cleaned observations, length N

# some statistics
dmean = np.mean(dobs);                   # mean
amp = 0.5*(np.max(dobs)-np.min(dobs));   # amplitude

# Annual period, frequency, angular frequency
Ta = 365.0;
fa = 1.0/Ta;
wa = 2.0*pi*fa;

# nonlinear theory and its derivative
# note model parameters are scaled to be order unity
# d = m1*dmean + amp*m2*sin(m4 wa t) + amp*m3*cos(m4 wa t)
# dd/dm1 =  dmean
# dd/dm2 =  amp*sin(m4 wa t)
# dd/dm3 =  amp*cos(m4 wa t)
# dd/dm4 =  amp*m2*wa*t*cos(m4 wa t) - amp*m3*wa*t*sin(m4 wa t)

# inital guesses
M=4;
m = np.zeros((M,1));
m[0,0] =  1.0;        # average level is mean
m[1,0] =  0.0;        # phase with max in summer
m[2,0] = -0.75;       # so only -cos() term
m[3,0] =  1.0;        # annual frequency 

Lmsq = np.matmul(m.T,m); Lmsq=Lmsq[0,0];
Lm = sqrt(Lmsq); # solution size

s = amp*np.sin(m[3,0]*wa*t);
c = amp*np.cos(m[3,0]*wa*t);
dpre = dmean*m[0,0] + m[1,0]*s + m[2,0]*c;

# plot data and initial prediction
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(2,1,1);
plt.axis([0,5000,-30,50]);
plt.xlabel('time (days)');
plt.ylabel('temperature (deg C)');
plt.plot( t, dobs, 'k-', linewidth=2 );
plt.plot( t, dpre, '-', linewidth=2, color=(0.7,0.7,0.7) );

# error and its gradient at the trial solution
mgo = m;
s = amp*np.sin(mgo[3,0]*wa*t);
c = amp*np.cos(mgo[3,0]*wa*t);
dprego = dmean*mgo[0,0] + mgo[1,0]*s + mgo[2,0]*c;
ego = dobs - dprego;
Ego = np.matmul(ego.T,ego); Ego=Ego[0,0];
Estart = Ego;
Go = np.zeros((N,4));
Go[:,0] = (dmean*np.ones((N,1))).ravel();
Go[:,1] = s.ravel();
Go[:,2] = c.ravel();
Go[:,3] = (mgo[1,0]*wa*np.multiply(t,c)-mgo[2,0]*wa*np.multiply(t,s)).ravel();
dEdmo = -2.0*np.matmul( Go.T, ego );

# gradient methid parametres
alpha = 0.1;
c1 = 0.0001;
c2 = 0.9;
tau = 0.5;

# gradient method
Niter = 10000; # maximum number of iterations
ic=0;          # iteration counter
TOL = 1.0e-5;  # stop when fractional change in model parameters is less than
for k in range(Niter):
    
    # downslope unit vector
    dEdmosq = np.matmul(dEdmo.T,dEdmo); dEdmosq=dEdmosq[0,0];
    v = -dEdmo / sqrt(dEdmosq);
    
    # backstep
    NBACK = 100;  # maximum number of back-steps
    for kk in range(NBACK):
        ic=ic+1;                                        # increment iteration counter
        mg = mgo+alpha*v;                               # update model
        s = amp*np.sin(mg[3,0]*wa*t);                   # sines
        c = amp*np.cos(mg[3,0]*wa*t);                   # cosines
        dpreg = dmean*mg[0,0] + mg[1,0]*s + mg[2,0]*c;  # predicted data
        eg = dobs - dpreg;                              # individual errors
        Eg = np.matmul(eg.T,eg); Eg=Eg[0,0];            # total error
        G = np.zeros((N,4));                            # set up data kernel
        G[:,0] = (dmean*np.ones((N,1))).ravel();
        G[:,1] = s.ravel(); 
        G[:,2] = c.ravel();
        G[:,3] = (mg[1,0]*wa*np.multiply(t,c)-mg[2,0]*wa*np.multiply(t,s)).ravel();
        dEdm = -2.0*np.matmul(G.T,eg);                  # grdient of error
        
        RULE = 1;  # Rule=1, the simplest rule: error went down
                   # RULE=2 is Armijo's rule
                   # RULE=3 is RULE2 + Wolfe condition
        if( RULE==1 ):
            if( Eg<=Ego ):
                break;
        elif( RULE==2 ):
            if( (Eg<=(Ego + c1*alpha*np.matmul(v.T,dEdmo))) ):
                break;
        elif( RULE==3 ):
            test1 = (Eg<=(Ego + c1*alpha*np.matmul(v.T,dEdmo)));
            test2 = ((-np.matmul(v.T,dEdm)) <= (-c2*np.matmul(v.T,dEdmo)));
            if( test1 and test2 ):
                break;
        
        # if the test failed, decrease the step size
        alpha = tau*alpha;

    # change in solution
    Dmgsq = np.matmul((mg-mgo).T,(mg-mgo)); Dmgsq=Dmgsq[0,0];
    Dmg = sqrt(Dmgsq);
    
    # update
    mgo=mg;
    dprego = dpreg;
    ego = eg;
    Ego = Eg;
    Go = G;
    dEdmo = dEdm;

    if( (Dmg/Lm) < TOL ):
        break;

m = mg;         # estimated model is from last iteration
dpre = dpreg;   # predicted datais from last iteration
E = Ego;        # error from last iteration
sigma2d = E/(N-M); # posterior variance

ax1 = plt.subplot(2,1,2);
plt.axis([0,5000,-30,50]);
plt.xlabel('time (days)');
plt.ylabel('temperature (deg C)');
plt.plot( t, dobs, 'k-', linewidth=2 );
plt.plot( t, dpre, '-', linewidth=2, color=(0.7,0.7,0.7) );

plt.show();

# account for scalings and compute frequency and period
ffinal = (wa*m[3,0])/(2*pi);
Tfinal = (2*pi)/(wa*m[3,0]);
                 
fig2 = plt.figure(2,figsize=(8,4));
ax1 = plt.subplot(1,1,1);
plt.axis([320,420,0,1]);
plt.xlabel('period (days)');
plt.ylabel('normalized amplitude spectral density');
plt.plot( [Tfinal, Tfinal], [0, 0.75], 'k-', linewidth=3 );

# standard time & frequency definitions
Dt = t[1,0]-t[0,0];
tstart = t[0,0];
tend = t[N-1,0];
N2 = floor((tend-tstart)/Dt)+1;
N2 = 2*floor(N2/2); # insure even
t2 = np.zeros((N2,1));
t2[:,0] = np.linspace(tstart,tend,N2);
fny = 1/(2*Dt);
Nf = floor(N2/2)+1;
Df = fny / floor(N2/2);
f = np.zeros((Nf,1));
f[:,0] = np.linspace(0,fny,Nf);

# interpolate data to even sampling
# but note that data gaps filled in with stright lines
d2f = ip.interp1d(t.ravel(),dobs.ravel(),kind='linear',fill_value='extrapolate');
d2 = eda_cvec( d2f(t2) );

# demean, take FFT and compute amplitude spectral density
d2 = d2-np.mean(d2);               # demean
tmp = Dt*np.fft.fft(d2,axis=0);    # fourier transform
d2tilda = eda_cvec(tmp[0:Nf,0]);   # keep only non-negative frequencies
asd = np.abs(d2tilda);             # amplitude spectral density
maxasd = np.max(asd);              # maximum a.s.d.

plt.plot( np.divide(1,f[1:Nf]), 0.9*asd[1:Nf]/maxasd, 'k-' );
plt.plot( np.divide(1,f[1:Nf]), 0.9*asd[1:Nf]/maxasd, 'ko' );

plt.show();

print('iterations %d' % (ic) );
print('posterior error %.3f (deg C)' % (sqrt(sigma2d)) );
print('Estimated period %.3f days' % (Tfinal) );

iterations 4353
posterior error 5.619 (deg C)
Estimated period 365.628 days

# edapy_11_06
# stochastic gradient method, full error every
# 100 iterations
# non-linear least squares fit of a sinusoid
# of unknown frequency, using the stochastic
# gradient method.  The data are subsampled on
# each iteration.  A fixed step size alpha is
# used to adcance the solution anti-parallel to the
# gradient. The solution is considered good enough
# when its posterior variance is reduced to a
# specififed value.

# load Black Rock Forest temperature data
D = np.genfromtxt('../Data/brf_temp.txt', delimiter='\t')

# clean the data by deleting outliers and zero (= no data) values
test1 = (D[:,1]>-35);
test2 = (D[:,1]<40);
test3 = (D[:,1]!=0.0);
r = np.where( test1 & test2 & test3);
k = r[0];
tmp = np.shape(k);
N = tmp[0];
t = D[k,0:1];    # cleaned time vector, length N
dobs = D[k,1:2]; # cleaned observation vector, length N

# subsampling information
decimation_factor = 1000;
Ns = floor(N/decimation_factor);  # number of points in subsampled data

# some statistics
dmean = np.mean(dobs);                   # mean
amp = 0.5*(np.max(dobs)-np.min(dobs));   # amplitude

# the solution considered good enough when
# its posterior variance is smaller than
low_enough_variance = 10.0;

# Annual period, frequency, angular frequency
Ta = 365.0;
fa = 1.0/Ta;
wa = 2.0*pi*fa;

# the model parameters m in this problem have been
# normalized so that we expect them to be of order
# 1.  The DC offset is parameterized as a fraction
# of the mean of the data, dmean*m0. The amplitudes
# of the mean of the data, dmean*m0. The amplitudes
# of the overall amplitude of the data, m1*amp and
#  m2*amp. The frequency is parameterized as a
# fraction of the annual frequecy, m3*wa.

# The resulting nonlinear theory and its derivative
# d = m0*dmean + m1*amp*sin(m3 wa t) + m2*amp*cos(m3 wa t)
# dd/dm0 =  dmean
# dd/dm1 =  amp*sin(m4 wa t)
# dd/dm2 =  amp*cos(m4 wa t)
# dd/dm3 =  amp*m2*wa*t*cos(m4 wa t) - amp*m3*wa*t*sin(m4 wa t)

# inital guesses
M=4;
m = np.zeros((M,1));
m[0,0] =  1.0;        # average level is mean
m[1,0] =  0.0;        # phase with max in summer
m[2,0] = -0.75;       # so only -cos() term
m[3,0] =  1.0;        # annual frequency 
Lmsq = np.matmul(m.T,m); Lmsq=Lmsq[0,0];
Lm = sqrt(Lmsq);      # solution size

# evaluate initial guess
s = amp*np.sin(m[3,0]*wa*t);
c = amp*np.cos(m[3,0]*wa*t);
dpre = dmean*m[0,0] + m[1,0]*s + m[2,0]*c;

# plot data and initial prediction
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(2,1,1);
plt.axis([0,5000,-30,50]);
plt.xlabel('time (days)');
plt.ylabel('temperature (deg C)');
plt.plot( t, dobs, 'k-', linewidth=2 );
plt.plot( t, dpre, '-', linewidth=2, color=(0.7,0.7,0.7) );

# trial solution
mg = m;
mg0 = m;

# the step size alpha is fixed
alpha = 1e-4;

myrange = 1;
Niter = 10000; # number of iterations
TOL = 1e-5;  # terminate iteration when change in model parameters drops below

for k in range(Niter):
    # subsample data
    ns = np.random.randint(0,N,(Ns,1));  # Ns random integers in range 0 to N-1
    ts = eda_cvec( t[ns,0] );            # subsampled time vector
    dobss = eda_cvec( dobs[ns,0] );      # subsampled observations
    
    # compute predicted data, the error and the
    # gradient of the error of the subsampled data
    s = amp*np.sin(mg[3,0]*wa*ts);
    c = amp*np.cos(mg[3,0]*wa*ts);
    dpreg = dmean*mg[0,0] + mg[1,0]*s + mg[2,0]*c; # predicted daya
    eg = dobss - dpreg;                            # individual errors
    Eg = np.matmul(eg.T,eg); Eg=Eg[0,0];           # total error
    if( k==1 ):                                    # save starting error
        Estart = Eg;
    G = np.zeros((Ns,4));                          # set up data kernel
    G[:,0] = (dmean*np.ones((Ns,1))).ravel();
    G[:,1] = s.ravel();
    G[:,2] = c.ravel();
    G[:,3] = (m[1,0]*wa*np.multiply(ts,c)-m[2,0]*wa*np.multiply(ts,s)).ravel();                
    dEdm = -2*np.matmul(G.T,eg);                   # gradient of error
                           
    # posterior variance of data
    sigma2d =  Eg/(Ns-M);
    
    # compute just the error for the full dataset
    # but only every 100 iterations
    if( mod(k-1, 100) == 0 ):
        sfull = amp*np.sin(mg[3,0]*wa*t); # sines
        cfull = amp*np.cos(mg[3,0]*wa*t); # cosines
        dpregfull = dmean*mg[0,0] + mg[1,0]*sfull + mg[2,0]*cfull; # predicte data
        egfull = dobs - dpregfull; # individual errors
        Egfull = np.matmul(egfull.T,egfull); Egfull=Egfull[0,0]; # total error
        # posterior variance of data
        sigma2dfull =  Egfull/(N-M);
        if( k==1 ):
            Estartfull = Egfull;
        # stop when the veriance is low enough
        if( sigma2dfull <= low_enough_variance ):
            break;
    
    # downslope unit 
    dEdmsq = np.matmul(dEdm.T,dEdm); dEdmsq=dEdmsq[0,0];
    v = -dEdm / sqrt(dEdmsq);
    # update solution
    mg = mg+alpha*v;
    
    # change in solution
    Dmgsq = np.matmul((mg-mg0).T,(mg-mg0)); Dmgsq=Dmgsq[0,0];
    Dmg = sqrt(Dmgsq);
    if( (Dmg/Lm) < TOL ):
        break;

# estimated solution
m = mg;                       # estimated model parametres from the last iteration
s = amp*np.sin(m[3,0]*wa*t);  # sines
c = amp*np.cos(m[3,0]*wa*t);  # cosines
dpre = dmean*m[0,0] + m[1,0]*s + m[2,0]*c; # predicted data

plt.plot(t,dpre,'-',linewidth=2,color=(0.9,0.9,0.9));

ffinal = wa*m[3,0]/(2*pi);    # estimated freqiency
Tfinal = (2*pi)/(wa*m[3,0]);  # estimated perior

print('Iterations: %d' % (k) );
print('mean: %.3f amp: %.3f %.3f' % (m[0,0]*dmean, m[1,0]*amp, m[2,0]*amp) );
print('Estimated period %.3f error ratio %.3f' % (Tfinal, Egfull/Estartfull) );

Iterations: 9999
mean: 9.962 amp: -1.485 -13.939
Estimated period 367.183 error ratio 0.293

# edapy_11_07
# example of a grid search that uses a table
# lookup of the function
# f(z) = sin(pi*sin(pi*z*sin(pi*(1-z^2))))

# true values of the model parameters
M=2;
mtrue = np.zeros((M,1));
mtrue[0,0] = 0.5142;
mtrue[1,0] = 0.7524

# standard setup of time axis, t
N = 10001;   # length of time series
tmin = 0.0;  # start time
tmax = 1.0;  # end time
Dt = (tmax-tmin)/(N-1); # sampling interval
t = eda_cvec( np.linspace(tmin, tmax, N) ); # timve vector

# subsamples indices for plotting purposes
Nsub = 20;
isub = range(0,N,Nsub);

# the true data obey the formula
# dtrue = f(z) with z = m1 + m2*t
z = mtrue[0,0]+mtrue[1,0]*t;
dtrue = np.sin( pi * np.sin( pi*np.multiply(z,np.sin(pi*(1-np.power(z,2)))))); # true data
# synthetic estimated data is true data plus random noise
sigmad = 0.1;
dobs = dtrue + np.random.normal(0.0,sigmad,(N,1)); # observed = signal + noise
etrue = dobs - dtrue; # individual errors
Etrue = np.matmul( etrue.T, etrue ); Etrue=Etrue[0,0]; # total error
print('true:     m %.4f %.4f    E %.3f' % (mtrue[0,0], mtrue[1,0], Etrue) );

# plot the true and observed data
fig1 = plt.figure(1,figsize=(8,4));
ax1 = plt.subplot(1,1,1);
plt.axis([tmin,tmax,-1.2,1.2]);
plt.xlabel('time t');
plt.ylabel('d(t)');
plt.plot(t[isub,0],dobs[isub,0],'k.',linewidth=2);
plt.plot(t[isub,0],dtrue[isub,0],'-',linewidth=7,color=(0.9,0.9,0.9));

# range of m1 for grid search
M1=101;       # m1-dimension of grid
m1min = 0.0;  # minimum m1
m1max = 1.0;  # maximum m1
Dm1 = (m1max-m1min)/(M1-1); # sampling interval
m1 = eda_cvec( np.linspace(m1min,m1max,M1) ); # vector of m1's

# range of m2 for grid search
M2=101;       # m2-dimension of grid
m2min = 0.0;  # minimum m2
m2max = 1.0;  # maximum m2 
Dm2 = (m2max-m2min)/(M1-1); # sampling interval
m2 = eda_cvec( np.linspace(m2min,m2max,M2) ); # vector of m2's

tic = timeit.default_timer();

# standard grid search that computes f(z) exactly
# note use of tic/toc pair to time exectiion
tic;
first = 1;
for m1trial in m1.ravel():
    for m2trial in m2.ravel():
        z = m1trial+m2trial*t; # evaluate z
        dtrial = np.sin( pi * np.sin( pi*np.multiply(z,np.sin(pi*(1-np.power(z,2)))))); # evaluate d(z)
        etrial = dobs - dtrial; # individual errors
        Etrial = np.matmul(etrial.T,etrial); Etrial=Etrial[0,0]; # total error
        # keep solun with smallest error
        if( first==1 ):
            Ebest1 = Etrial;
            m1best1 = m1trial;
            m2best1 = m2trial;
            first=0;
        elif (Etrial < Ebest1 ):
            Ebest1 = Etrial;
            m1best1 = m1trial;
            m2best1 = m2trial;
toc = timeit.default_timer();
duration1=1000*(toc-tic);

# print and plot results
print('exact:    m %.4f %.4f    E %.3f duration %.0f ms' % (m1best1, m2best1, Ebest1, duration1) );
z = m1best1+m2best1*t;
dest = np.sin( pi * np.sin( pi*np.multiply(z,np.sin(pi*(1-np.power(z,2))))));
plt.plot(t[isub,0],dest[isub,0],':',linewidth=4,color=(0.6,0.6,0.6));

# grid search with table lookup of the funtion f(z)
tic=timeit.default_timer();

# compute the table
K = 1001;            # size of loolup table
zmintable = -0.1;    # minimum z value
zmaxtable =  2.1;    # maximum z value
Dztable = (zmaxtable-zmintable)/(K-1); # sampling interval in z
ztable = eda_cvec(np.linspace(zmintable,zmaxtable,K)); # vector of z's
# corresponding vector of d's
dtable = ((np.sin( pi * np.sin( pi*np.multiply(ztable,np.sin(pi*(1-np.power(ztable,2))))))));

# grid search with table lookup
first = 1;
for m1trial in m1.ravel():
    for m2trial in m2.ravel():
        z = m1trial+m2trial*t; # evaluate z
        nn = np.floor((z-zmintable)/Dztable)+1; # lookup-table index of that z
        n = nn.astype(int);
        dtrial = dtable[n,0]; # look up d(z) in table
        etrial = dobs - dtrial; # indivisual errors
        Etrial = np.matmul(etrial.T,etrial); Etrial=Etrial[0,0]; # total error
        # keep solun with smallest error
        if( first==1 ):
            Ebest2 = Etrial;
            m1best2 = m1trial;
            m2best2 = m2trial;
            first=0;
        elif (Etrial < Ebest2 ):
            Ebest2 = Etrial;
            m1best2 = m1trial;
            m2best2 = m2trial;

toc = timeit.default_timer();
duration2=1000*(toc-tic);

# print and plot estimated solution
print('lookup:   m %.4f %.4f    E %.3f duration %.0f ms' % (m1best2, m2best2, Ebest2, duration2) );
print('speed improvement %.2f percent' % (100*(duration1-duration2)/duration1) );
z = m1best2+m2best2*t; # estimated z's
dest = np.sin( pi * np.sin( pi*np.multiply(z,np.sin(pi*(1-np.power(z,2)))))); # estimated data
plt.plot(t[isub,0],dest[isub,0],'k-',linewidth=2);

plt.show();

true:     m 0.5142 0.7524    E 97.643
exact:    m 0.5100 0.7600    E 104.751 duration 7768 ms
lookup:   m 0.5000 0.7700    E 116.251 duration 1163 ms
speed improvement 85.03 percent

# edapy_11_08
# plot sigma(x) = (1+exp(-wx))^-1
# for a variety of w's.  Note that
# dsigma/dx = w exp(-w) (1+exp(-wx))**(-2)
# so the slope at x=0 is s=w/4

Ns = 4;
s = eda_cvec( [100, 3, 1, 0.1] );  # slopes

# standard x axis setup
Nx = 501;  # number of x's
xmin = -2; # starting x
xmax = 2;  # ending x
x = eda_cvec( np.linspace(xmin,xmax,Nx) ); # vector of x's

# plot the true and observed data
fig1 = plt.figure(1,figsize=(8,8));

for i in range(Ns):
    w=4*s[i,0];                    # width
    sigma = eda_sigma(w,x);        # evaluate sigma function
    # make subplot
    ax1 = plt.subplot(Ns, 1, i+1);
    plt.axis([xmin,xmax,-1.5,1.5]);
    plt.xlabel('x');
    plt.ylabel('sigma(x)');
    plt.plot(x,sigma,'k-',lw=3);

plt.show();

# edapy_11_09
#
# Use a neural net to approximate a step function

# Step function definitions 
Xc = 2.0; # center of the step function
h = 1.0; # height of the step function
slope = 250.0; # slope of the step function

# Neural Net definitions
Lmax = 3; # number of layers
Nmax = 2; # maximum number of neurons in any layer

# initialize net to zeros
N, w, b, a = eda_net_init(Lmax,Nmax);

# set net values
N[:,0] = [1, 1, 1]; # one neuron in each layer
# weights
w[0,0,1] = 4*slope; # (neuron 0 in layer 1) fr (neuron 0 in previous layer)
w[0,0,2] = h;       # (neuron 0 in layer 2) fr (neuron 0 in previous layer)
b[0,1] = -4*slope*Xc; # bias of (neuron 0 in layer 1)

# plot network
fig1 = plt.figure(1,figsize=(16,6));
ax1 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig1,ax1);
plt.show();

# standard setup of x-axis
Nx= 100;
xmin = 0.0;
xmax = 4.0;
x = eda_cvec(np.linspace(xmin,xmax,Nx));

# evaluate network for each x to get m(x)
m = np.zeros((Nx,1));
for kx in range(Nx):  # loop over x's
    a[0:N[0,0],0] = x[kx,0]; # activity of (neuron 0 in layer 0) = x
    a, z = eda_net_eval(N,w,b,a); # evaluate network
    m[kx,0] = a[0,2]; # m(x) = activity of (neuron 0 in last layer)
    # Note by Menke, 2024/05/26 patched bug, was m[kx,0] = a[0:N[2,0],2];
# plot m(x)
fig2 = plt.figure(2,figsize=(12,5));
ax1 = plt.subplot(1, 1, 1);
plt.axis([xmin,xmax,-1.5,1.5]);
plt.xlabel('x');
plt.ylabel('m(x)');
plt.plot(x,m,'k-',lw=2);

# now do a different slope slope
slope = 1.0;

Lmax = 3;
Nmax = 2;

N, w, b, a = eda_net_init(Lmax,Nmax);
N[:,0] = [1, 1, 1];
w[0,0,1] = 4*slope;
w[0,0,2] = h;
b[0,1] = -4*slope*Xc;

fig3 = plt.figure(1,figsize=(16,6));
ax3 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig3,ax3);
plt.show();

Nx= 100;
xmin = 0;
xmax = 4;
x = eda_cvec( np.linspace(xmin,xmax,Nx) );

m = np.zeros((Nx,1));
for kx in range(Nx):
    a[0:N[0,0],0] = x[kx,0];
    a, z = eda_net_eval(N,w,b,a);
    m[kx,0] = a[0,2];
    # Note by Menke, 2024/05/26 patched bug, was m[kx,0] = a[0:N[2,0],2]
    
fig4 = plt.figure(2,figsize=(12,5));
ax4 = plt.subplot(1, 1, 1);
plt.axis([xmin,xmax,-1.5,1.5]);
plt.xlabel('x');
plt.ylabel('m(x)');
plt.plot(x,m,'k-',lw=2);

# edapy_11_10, neural net for 1D tower function

# Tower function definitions
Nc=1;                      # one tower
Xc = 2.0*np.ones((Nc,1));  # center of the tower
Dx = 1.0*np.ones((Nc,1));  # width of the tower
h = 1.5*np.ones((Nc,1));   # the height of the tower function
slope = 250.0; # slope of all the towers

# neural net setup
Lmax = 3; # number of layers
Nmax = 2; # maximum number of neurons in any layer
# initialize new to zeros
N,w,b,a = eda_net_init(Lmax,Nmax);
# initialize new to the tower
N,w,b=eda_net_1dtower(Nc,slope,Xc,Dx,h,N,w,b);

# plot the net
fig1 = plt.figure(1,figsize=(16,6));
ax1 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig1,ax1);
plt.show();

# standars setup of an x-axis
Nx= 100;
xmin = 0.0;
xmax = 4.0;
x = eda_cvec(np.linspace(xmin,xmax,Nx));

# evaluate the network for each x to get m(x)
m = np.zeros((Nx,1));
for kx in range(Nx):  # loop over x
    a[0:N[0,0],0] = x[kx,0]; # activity of (neoron 0 in layer 0) = x
    a, z = eda_net_eval(N,w,b,a); # evaluate network
    m[kx,0] = a[0,2]; # m(x) is activity of (neoron 0 in last layer)
    # Note by Menke, 2024/05/26 patched bug, was m[kx,0] = a[0:N[2,0],2]
    
# plot m(x)
fig2 = plt.figure(2,figsize=(12,5));
ax1 = plt.subplot(1, 1, 1);
plt.axis([xmin,xmax,-1.0,2.0]);
plt.xlabel('a[0,0]');
plt.ylabel('a[2,0]');
plt.title('tower function; slope = 999');
plt.plot(x,m,'k-',lw=2);
plt.show();

# repeat for different slope
slope = 1.0;
[N,w,b,a] = eda_net_init(Lmax,Nmax);
[N,w,b]=eda_net_1dtower(Nc,slope,Xc,Dx,h,N,w,b);

# plot the net
fig3 = plt.figure(3,figsize=(16,6));
ax3 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig3,ax3);
plt.show();

m = np.zeros((Nx,1));
for kx in range(Nx):
    a[0:N[0,0],0] = x[kx,0];
    a, z = eda_net_eval(N,w,b,a);
    m[kx,0] = a[0,2];
    # Note by Menke, 2024/05/26 patched bug, was m[kx,0] = a[0:N[2,0],2]
    
fig4 = plt.figure(4,figsize=(12,5));
ax4 = plt.subplot(1, 1, 1);
plt.axis([xmin,xmax,-1.0,2.0]);
plt.xlabel('a[0,0]');
plt.ylabel('a[2,0]');
plt.title('tower function; slope = 999');
plt.plot(x,m,'k-',lw=2);
plt.show();

# edapy_11_11, approximate function with several towers

# standard setup of an axis, xc, of tower cenetrs
Nc= 6; # the number of centers in the approximation
xcmin = 0.0; # maximum value of the centers
xcmax = 1.0; # minimum value of the centers
xc = eda_cvec(np.linspace(xcmax,xcmin,Nc)); # the centers
Dx = np.ones((Nc,1))*(xcmax-xcmin)/(Nc-1); # widths
# yc: the true value of the polynomial function
y = 0.2 + 1.75*np.power(xc,2); # heights
h = y; # height of the step functions
slope = 250.0; # slopes all the same

# ---Neural Net definitions---
Lmax = 3; # number of layers;
Nmax = Nc * 2; # maximum number of neurons in any layer
# initialize net to zeros
[N,w,b,a] = eda_net_init(Lmax,Nmax);
# initialize net to towers
[N,w,b] = eda_net_1dtower( Nc, slope, xc, Dx, h, N, w, b );

# plot network
fig1 = plt.figure(1,figsize=(14,10));
ax1 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig1,ax1);
plt.show();

# standard setuo of x-axis
Nx= 100;
xmin = xcmin - 0.1;
xmax = xcmax + 0.1;
x = eda_cvec(np.linspace(xmin,xmax,Nx));

# evaluate the network for each x to get m(x)
m = np.zeros((Nx,1));
for kx in range(Nx):
    a[0:N[0,0],0] = x[kx,0]; # activity of (neuron 0 in layer 0) = x
    a, z = eda_net_eval(N,w,b,a); # evaluate network
    m[kx,0] = a[0,2]; # m(x) = activity of (neuron 0 in last layer)
    # Note by Menke, 2024/05/26 patched bug, was m[kx,0] = a[0:N[2,0],2]
    
# true solution
mtrue = 0.2 + 1.75*np.power(x,2);

fig2 = plt.figure(2,figsize=(12,5));
ax1 = plt.subplot(1, 1, 1);
plt.axis([xmin,xmax,-1.0,2.0]);
plt.xlabel('a[0,0]');
plt.ylabel('a[2,0]');
plt.title('tower function; slope = 999');
plt.plot(x,m,'k-',lw=4);
plt.plot(x,mtrue,'-',lw=2,color=(0.8,0.8,0.8));
plt.show();

# edapy_11_12, neural net for 2D tower


# set up towers (of which there are actually only 1)
Nc = 1; # number of centers
# center of the towers
Xc = np.zeros((Nc,1));  # x position of center
Xc[:,0] = [1.5];
Yc = np.zeros((Nc,1));  # y position of center
Yc[:,0] = [1.5];
# widths of the towers
Dx = np.zeros((Nc,1));
Dx[:,0] = [1];
Dy = np.zeros((Nc,1));
Dy[:,0] = [1];
# h: the height of the towers
h = np.zeros((Nc,1));
h[:,0] = [1];
# slope: slope of all the towers 
slope = 250;

# ---Neural Net definitions---
Lmax = 4; # number of layers;
Nmax = Nc * 4; # maximum number of neurons in any layer
# initialize net to zeros
[N,w,b,a] = eda_net_init(Lmax,Nmax);
# initialize to towers
N, w, b = eda_net_2dtower(Nc,slope,Xc,Yc,Dx,Dy,h,N,w,b);

# plot network
fig1 = plt.figure(1,figsize=(14,10));
ax1 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig1,ax1);
plt.show();

# evaluate net on this (x,y) grid
Nx = 50;
Ny = 50;
xmin = 0.0;
ymin = 0.0;
xmax = 3.1;
ymax = 3.1;
x = eda_cvec(np.linspace(xmin,xmax,Nx));
y = eda_cvec(np.linspace(ymin,ymax,Ny));
m = np.zeros((Nx,Ny));
zprev = np.zeros((Nx,Ny));

# evauate the network for each x to get m(x)
# and also save the intermediate output z02
for kx in range(Nx):
    for ky in range(Ny):
        a[0:N[0,0],0] = [x[kx,0], x[ky,0] ]; 
        a, z = eda_net_eval(N,w,b,a);
        zprev[kx,ky] = z[0,2];
        m[kx,ky] = a[0,3];
        # Note by Menke, 2024/05/26 patched bug, was m[kx,ky] = a[0:N[2,0],3];

eda_draw('title z(0,2)',zprev,'   ','title a(0,3)', m);

# edapy_11_13
# simple least squares fit of gaussian with a single tower

# grid of points in (x,y)
Nx = 21;
Ny = 21;
xmax = 5;
ymax = 5;
xmin = 0;
ymin = 0;
x = eda_cvec(np.linspace(xmin,xmax,Nx));
y = eda_cvec(np.linspace(ymin,ymax,Ny));

# (x,y) index arrays for easy access
kofij=np.zeros((Nx,Ny),dtype=int);  # linear index k of a particular (i,j)
iofk=np.zeros((Nx*Ny,1),dtype=int); # row index of a particular k
jofk=np.zeros((Nx*Ny,1),dtype=int); # col index of a particular k
Nxy=0;
for i in range(Nx):
    for j in range(Ny):
        kofij[i,j]=Nxy;
        iofk[Nxy,0]=i;
        jofk[Nxy,0]=j;
        Nxy=Nxy+1;

# Gaussian
dobs = np.zeros((Nx,Ny));
dobsvec = np.zeros((Nxy,1));
xbar = 2.4;
ybar = 2.6;
amp = 5.0;
sigmax = 1.2;
sigmay = 0.8;
cx = 1/(2*sigmax*sigmax);
cy = 1/(2*sigmay*sigmay);
for i in range(Nx):
    for j in range(Ny):
        myd = amp*exp(-cx*((x[i,0]-xbar)**2) - cy*((y[j,0]-ybar)**2) );
        dobs[i,j] = myd;
        dobsvec[kofij[i,j],0] = myd;

# build the test network for a single 2D tower
Nc = 1; # One tower
Xc = np.zeros((Nc,1));
Xc[:,0] = [2.5]; # Xc: the center of the tower
Yc = np.zeros((Nc,1));
Yc[:,0] = [2.5];
Dx  = np.zeros((Nc,1));
Dx[:,0] = [2];   # Dx: the width of the tower
Dy = np.zeros((Nc,1));
Dy[:,0] = [2];
h = np.zeros((Nc,1));
h[:,0] = [1];    # h: the height of the tower
slope = 5/4;     # slope of the component step function

# ---Neural Net definitions---
Lmax = 4;      # number of layers
Nmax = Nc * 4; # maximum neurons in any layer

[N,w,b,a] = eda_net_init(Lmax,Nmax);

# daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
# due to a change in the bias of the of the j-th neuron in the layer k

# daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
# due to a change in the weight connecting the j-th neuron in the layer l
# with the k-th neuron of layer l-1

N,w,b = eda_net_2dtower( Nc, slope, Xc, Yc, Dx, Dy, h, N, w, b );

print('Untrained network');
fig2 = plt.figure(2,figsize=(14,10));
ax2 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig2,ax2);
plt.show();

Nb, Nw, layer_of_bias, neuron_of_bias, index_of_bias, \
layer_of_weight, r_neuron_of_weight, l_neuron_of_weight, index_of_weight \
= eda_net_map(N,w,b);

# linearized data kernel
M = Nb+Nw;
G = np.zeros((Nxy,M));

# predicted data
dpre = np.zeros((Nx,Ny));
dprevec = np.zeros((Nxy,1));

# initial predicted data
d0 = np.zeros((Nx,Ny));
d0vec = np.zeros((Nxy,1));

Niter=20;
for iter in range(Niter):
    # loop over (x,y)
    for kx in range(Nx):
        for ky in range(Ny):
            # activities
            a[0:N[0,0],0] = [x[kx,0],x[ky,0]];
            a, z = eda_net_eval( N,w,b,a);
            dpre[kx,ky] = a[0,3];
            dprevec[kofij[kx,ky],0]=dpre[kx,ky];
    
            # calculate the derivatives
            [daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a );
            # daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
            # due to a change in the bias of the of the j-th neuron in the layer k
            # daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
            # due to a change in the weight connecting the j-th neuron in the layer l
            # with the k-th neuron of layer l-1
    
            # biases first
            for ib in range(Nb):
                nob=neuron_of_bias[ib,0];
                lob=layer_of_bias[ib,0];
                G[kofij[kx,ky], ib ] = daLmaxdb[0, nob, lob ];

            # weights second
            for iw in range(Nw):
                rnow = r_neuron_of_weight[iw,0];
                lnow = l_neuron_of_weight[iw,0];
                low = layer_of_weight[iw,0];
                G[kofij[kx,ky], Nb+iw ] = daLmaxdw[0, rnow, lnow, low ];

    Dd = dobsvec - dprevec;
    if( iter==0 ):
        d0 = np.copy(dpre);
        d0vec = np.copy(dprevec);
        error = np.matmul( Dd.T, Dd ); error=error[0,0];
        error0 = error;

    # implement some damping to improve stability
    # but relax it after a few iterations
    if( iter<=5 ):
        epsi=1.0;
    elif (iter<=10):
        epsi=0.1;
    else:
        epsi=0.001;

    GTG = np.matmul(G.T,G) + epsi*np.identity(M);
    GTd = np.matmul(G.T,Dd);
    Dm = la.solve(GTG,GTd);

    # add in bias perturbations
    for ib in range(Nb):
        nob=neuron_of_bias[ib,0];
        lob=layer_of_bias[ib,0];
        b[nob, lob] = b[nob, lob] + Dm[ib,0];

    # add in weights perturbations
    for iw in range(Nw):
        rnow = r_neuron_of_weight[iw,0];
        lnow = l_neuron_of_weight[iw,0];
        low = layer_of_weight[iw,0];
        w[rnow, lnow, low] = w[rnow, lnow, low] + Dm[Nb+iw, 0];
        
    # loop over (x,y)
    for kx in range(Nx):
        for ky in range(Ny):
            # activities
            a[0:N[0,0],0] = [x[kx,0], x[ky,0]];
            a, z = eda_net_eval( N,w,b,a);
            dpre[kx,ky] = a[0,3];
            dprevec[kofij[kx,ky],0]=dpre[kx,ky];
            

print('------------------------------------------------------------------' );
print('Trained network');
fig3 = plt.figure(3,figsize=(14,10));
ax3 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig3,ax3);
plt.show();

eda_draw( 'title d-obs', dobs, 'title d-start', d0, 'title d-pre',dpre);

Dd = dobsvec - dprevec;
error = np.matmul(Dd.T,Dd); error=error[0,0];
print('starting error %.2f' %(error0) );
print('ending error   %.2f' %(error) );

Untrained network

------------------------------------------------------------------
Trained network

starting error 847.81
ending error   0.05

# edapy_11_14
# a network that implements the linear function y=cx+d
# for small w112 and w113 so that the approximation
# is reasonably accurate in the range -1<x<+1
# (when c, d, are both or order 1)

c = 0.5; # true slope
d = 2.0; # true intercept

# standard x axis setup
Nx = 101;
xmin = -1;
xmax = 1;
x = eda_cvec(np.linspace(xmin,xmax,Nx));

# the function we are trying o approximate
ytrue = c*x+d;

# ---Neural Net definitions---

# Nmax: number of layers;
Lmax = 4;

# Lmax: maximum number of neurons in any layer
Nmax = 1;

[N,w,b,a] = eda_net_init(Lmax,Nmax);

N[:,0] = [1, 1, 1, 1];

# these two slopes are arbitrary, as long as they are small
slope1 = 0.01;
slope2 = 0.02;
w[0,0,1] = 4*slope1;
w[0,0,2] = 4*slope2;
w2w3 = w[0,0,1]*w[0,0,2];
b[0,1] = 0;

# the details:
# near x=0, sigma(wx) = (w/4)*x + (1/2)
# z2 = w2*x + b2 = w2*x + 0
# a2 = sigma(z2) = w2/4+(1/2)
# z3 = w3*a2 + b3 = w3*(w2/4+(1/2)+b3 = (w2*w3/4)*x + w3/2 + b3
# so with choice b3 = -w3/2
# z3 = (w2*w3/4)*x
# a3 = sigma(z3) = (w2*w3/16)*x + (1/2)
# z4 = w4*a3+b4
# a4 = c*x + d

w[0,0,3] = 16*c/w2w3;

b[0,1] = 0;
b[0,2] = -w[0,0,2]/2;
b[0,3] = d-8*c/w2w3;

fig1 = plt.figure(1,figsize=(14,10));
ax1 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig1,ax1);
plt.show();

# forward propagate the network for each value of x
yest = np.zeros((Nx,1));
for kx in range(Nx):
    a[0,0] = x[kx,0]; 
    a, z = eda_net_eval( N,w,b,a);
    yest[kx,0]=a[0,3];

# plot the results
fig2 = plt.figure(2,figsize=(6,6));
ax2 = plt.subplot(1,1,1);
plt.axis( [xmin, xmax, d+c*xmin, d+c*xmax] );
plt.plot( x, ytrue, '-', linewidth=7, color=(0.8,0.8,0.8) );
plt.plot( x, yest, 'k-', linewidth=2);
plt.xlabel('x');
plt.ylabel('y(x)');

# edapy_11_15
# network to match non-linear response funtion

# least squares settngs
Niter1=100; # Stage 1: just tune existing network
epsi1 = 0.1;
Niter2=100; # Stage 2: add links and retune 
epsi2 = 0.005;
print('Stage 1: %d %.6e; Stage 1: %d %.6e' % (Niter1, epsi1, Niter2, epsi2) );

# load NonLinear Response data
D = np.genfromtxt('../Data/nonlinearresponse.txt', delimiter='\t');
N, tmp = np.shape(D);

#  break out into separate variables, metric conversion
traw = eda_cvec(D[:,0]); # time in days
dobsraw = eda_cvec(D[:,1])/35.3146;  # discharge in cubic meters per second
xraw = eda_cvec(D[:,2])*25.4; # precipitation in millimeters per day

# in this example, we use only a Nx day portion of the
# dataset, mainly so that when we plot it, storm events
# are clearly visible

Nx=501;
istart = 0;
t = eda_cvec(traw[istart:istart+Nx,0]);
x = eda_cvec(xraw[istart:istart+Nx,0]);
dobs = eda_cvec(dobsraw[istart:istart+Nx,0]);
tmin = np.min(t);
tmax = np.max(t);
xnorm = 1/np.max(x);        # x normalization constant
dobsnorm = 1/np.max(dobs);  # data normalization constant
x = x * xnorm;              # normalied x
dobs = dobs * dobsnorm;     # normalized data

print('---------------------- Initial guess ------------------------' );

# plot the discharge and precipitation data
fig1 = plt.figure(1,figsize=(14,6));
ax1 = plt.subplot(3,1,1);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, x, 'k-', linewidth=3);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('precip x (mm/day)');
ax2 = plt.subplot(3,1,2);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs (m3/s)');

# build a guess for the filter
Nc = 10;
c0 = 2;
To = 3; 
f0 = eda_cvec(np.exp(-np.linspace(0,Nc-1,Nc)/To)); # decaayng exponetial function
f = np.zeros((Nc,1));
f[0,0] = f0[0,0]/1000;    # manually reduce size of first element of filter
f[1:Nc,0] = f0[0:Nc-1,0]; # rest of filter is decaying exponetial
h = c0*f/np.sum(f);       # normalized fiter

# ---Neural Net definitions---

Lmax = 4; # number of layers
Nmax = Nc; # maximum number of neurons in any layer
[N,w,b,a] = eda_net_init(Lmax,Nmax);

slope1 = 0.01;
slope2 = 0.02;
[N,w,b] = eda_net_linear( Nc, slope1, slope2, 0.0, h, N, w, b );

# saved untrained network
N_save = np.copy(N);
w_save = np.copy(w);
b_save = np.copy(b);
a_save = np.copy(a);

# predict data of untrained network
d0 = np.zeros((Nx,1));
for kx in range(Nx): # loop over starting point of filter
    # set bottom activities for this kx
    for j in range(N[0,0]): # loop over points in filter
        k = kx-j;
        if( k>=0 ):
            a[j,0] = x[k,0];
        else:
            a[j,0] = 0;  
        # evaluate network
        a, z = eda_net_eval( N,w,b,a);
        d0[kx,0] = a[0,3];

# plot prediction of untrained network
ax3 = plt.subplot(3,1,3);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, '-', linewidth=7, color=(0.8,0.8,0.8));
plt.plot( t, d0, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs and d0 (m3/s)');
plt.show();

print('Untrained network');
fig2 = plt.figure(2,figsize=(14,10));
ax2 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig2,ax2);
plt.show();

# ---------------------   First Pass, no links added ----------------------------------

Nb, Nw, layer_of_bias, neuron_of_bias, index_of_bias, \
layer_of_weight, r_neuron_of_weight, l_neuron_of_weight, index_of_weight \
= eda_net_map(N,w,b);

# linearized data kernel
M = Nb+Nw;
G = np.zeros((Nx,M));

# predicted data
dpre = np.zeros((Nx,1));

for iter in range(Niter1):
    for kx in range(Nx): # loop over starting point of filter; also row of G
        
        # set bottom activities for this kx
        for j in range(N[0,0]): # loop over points in filter
            k = kx-j;
            if( k>=0 ):
                a[j,0] = x[k,0];
            else:
                a[j,0] = 0;
                
        # evaluate network
        a, z = eda_net_eval( N,w,b,a);
        dpre[kx,0] = a[0,3];
    
        # calculate the derivatives
        [daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a );
        # daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
        # due to a change in the bias of the of the j-th neuron in the layer k
        # daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
        # due to a change in the weight connecting the j-th neuron in the layer l
        # with the k-th neuron of layer l-1
    
        # biases first
        for ib in range(Nb):
            nob=neuron_of_bias[ib,0];
            lob=layer_of_bias[ib,0];
            G[kx, ib ] = daLmaxdb[0, nob, lob ];

        # weights second
        for iw in range(Nw):
            rnow = r_neuron_of_weight[iw,0];
            lnow = l_neuron_of_weight[iw,0];
            low = layer_of_weight[iw,0];
            G[kx, Nb+iw ] = daLmaxdw[0, rnow, lnow, low ];
            
        # end loop over rows of G

    Dd = dobs - dpre;
    if( iter==0 ):
        d0 = np.copy(dpre);
        error0 = np.matmul( Dd.T, Dd ); error0=error0[0,0];

    # implement some damping to improve stability
    # but relax it after a few iterations
    if( iter==50 ):
        epsi1=epsi1/10;

    GTG = np.matmul(G.T,G) + epsi1*np.identity(M);
    GTd = np.matmul(G.T,Dd);
    Dm = la.solve(GTG,GTd);

    # add in bias perturbations
    for ib in range(Nb):
        nob=neuron_of_bias[ib,0];
        lob=layer_of_bias[ib,0];
        b[nob, lob] = b[nob, lob] + Dm[ib,0];

    # add in weights perturbations
    for iw in range(Nw):
        rnow = r_neuron_of_weight[iw,0];
        lnow = l_neuron_of_weight[iw,0];
        low = layer_of_weight[iw,0];
        w[rnow, lnow, low] = w[rnow, lnow, low] + Dm[Nb+iw, 0];
        
for kx in range(Nx): # loop over starting point of filter
    # set bottom activities for this kx
    for j in range(N[0,0]): # loop over points in filter
        k = kx-j;
        if( k>=0 ):
            a[j,0] = x[k,0];
        else:
            a[j,0] = 0;  
        # evaluate network
        a, z = eda_net_eval( N,w,b,a);
        dpre[kx,0] = a[0,3];

print('---------------------- Stage 1 ------------------------' );

# plot the discharge and precipitation data
fig3 = plt.figure(3,figsize=(14,6));
ax1 = plt.subplot(3,1,1);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, x, 'k-', linewidth=3);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('precip x (mm/day)');
ax2 = plt.subplot(3,1,2);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs (m3/s)');
ax3 = plt.subplot(3,1,3);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, '-', linewidth=7, color=(0.8,0.8,0.8));
plt.plot( t, dpre, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs and dpre (m3/s)');
plt.show();

print('Stage 1 Trained network');
fig4 = plt.figure(4,figsize=(14,10));
ax4 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig4,ax4);
plt.show();

Dd = dobs - dpre;
error = np.matmul(Dd.T,Dd); error=error[0,0];
error1 = error;
print('starting     error %.2f' %(error0) );
print('intermediate error %.2f' %(error1) );


# ---------------------   Second Pass, links added  ----------------------------------

i=0;
for j in range(N[0,0]):
    if( w[i,j,1] == 0 ):
        w[i,j,1] = np.random.normal(loc=0.0,scale=0.0001);
i=0
for j in range(N[0,0]):
    if( w[i,j,2] == 0 ):
        w[i,j,2] = np.random.normal(loc=0,scale=0.0001);

Nb, Nw, layer_of_bias, neuron_of_bias, index_of_bias, \
layer_of_weight, r_neuron_of_weight, l_neuron_of_weight, index_of_weight \
= eda_net_map(N,w,b);

# linearized data kernel
M = Nb+Nw;
G = np.zeros((Nx,M));

# predicted data
dpre = np.zeros((Nx,1));

for iter in range(Niter2):
    for kx in range(Nx): # loop over starting point of filter; also row of G
        
        # set bottom activities for this kx
        for j in range(N[0,0]): # loop over points in filter
            k = kx-j;
            if( k>=0 ):
                a[j,0] = x[k,0];
            else:
                a[j,0] = 0;
                
        # evaluate network
        a, z = eda_net_eval( N,w,b,a);
        dpre[kx,0] = a[0,3];
    
        # calculate the derivatives
        [daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a );
        # daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
        # due to a change in the bias of the of the j-th neuron in the layer k
        # daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
        # due to a change in the weight connecting the j-th neuron in the layer l
        # with the k-th neuron of layer l-1
    
        # biases first
        for ib in range(Nb):
            nob=neuron_of_bias[ib,0];
            lob=layer_of_bias[ib,0];
            G[kx, ib ] = daLmaxdb[0, nob, lob ];

        # weights second
        for iw in range(Nw):
            rnow = r_neuron_of_weight[iw,0];
            lnow = l_neuron_of_weight[iw,0];
            low = layer_of_weight[iw,0];
            G[kx, Nb+iw ] = daLmaxdw[0, rnow, lnow, low ];
            
        # end loop over rows of G

    Dd = dobs - dpre;

    # implement some damping to improve stability
    # but relax it after a few iterations
    if( iter==50 ):
        epsi1=epsi2/10;

    GTG = np.matmul(G.T,G) + epsi2*np.identity(M);
    GTd = np.matmul(G.T,Dd);
    Dm = la.solve(GTG,GTd);

    # add in bias perturbations
    for ib in range(Nb):
        nob=neuron_of_bias[ib,0];
        lob=layer_of_bias[ib,0];
        b[nob, lob] = b[nob, lob] + Dm[ib,0];

    # add in weights perturbations
    for iw in range(Nw):
        rnow = r_neuron_of_weight[iw,0];
        lnow = l_neuron_of_weight[iw,0];
        low = layer_of_weight[iw,0];
        w[rnow, lnow, low] = w[rnow, lnow, low] + Dm[Nb+iw, 0];
        
for kx in range(Nx): # loop over starting point of filter
    # set bottom activities for this kx
    for j in range(N[0,0]): # loop over points in filter
        k = kx-j;
        if( k>=0 ):
            a[j,0] = x[k,0];
        else:
            a[j,0] = 0;  
        # evaluate network
        a, z = eda_net_eval( N,w,b,a);
        dpre[kx,0] = a[0,3];

print('---------------------- Second Pass ------------------------' );

# plot the discharge and precipitation data
fig5 = plt.figure(3,figsize=(14,6));
ax1 = plt.subplot(3,1,1);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, x, 'k-', linewidth=3);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('precip x (mm/day)');
ax2 = plt.subplot(3,1,2);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs (m3/s)');
ax3 = plt.subplot(3,1,3)
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, '-', linewidth=7, color=(0.8,0.8,0.8));
plt.plot( t, dpre, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs and dpre (m3/s)');
plt.show();

print('Stage 2 Trained network');
fig6 = plt.figure(6,figsize=(14,10));
ax6 = plt.subplot(1,1,1);
eda_net_view(N,w,b,fig6,ax6);
plt.show();

Dd = dobs - dpre;
error = np.matmul(Dd.T,Dd); error=error[0,0];
print('starting     error %.2f' %(error0) );
print('intermediate error %.2f' %(error1) );
print('final        error %.2f' %(error) );

# Now test against a new dataset
Nx=501;
istart = 500;
t = np.zeros((Nx,1));
t[:,0] = traw[istart:istart+Nx,0];
x = np.zeros((Nx,1));
x[:,0] = xraw[istart:istart+Nx,0];
dobs = np.zeros((Nx,1))
dobs[:,0] = dobsraw[istart:istart+Nx,0];
tmin = np.min(t);
tmax = np.max(t);
x = x * xnorm;
dobs = dobs * dobsnorm;

for kx in range(Nx): # loop over starting point of filter
    # set bottom activities for this kx
    for j in range(N[0,0]): # loop over points in filter
        k = kx-j;
        if( k>=0 ):
            a[j,0] = x[k,0];
            a_save[j,0] = x[k,0];
        else:
            a[j,0] = 0;  
            a_save[j,0] = 0;
        # evaluate network
        a_save, z = eda_net_eval( N_save,w_save,b_save,a_save);
        a, z = eda_net_eval( N,w,b,a);
        d0[kx,0] = a_save[0,3];
        dpre[kx,0] = a[0,3];


print('---------------------- Application to New Data ------------------------' );

# plot the discharge and precipitation data
fig10 = plt.figure(10,figsize=(14,6));
ax1 = plt.subplot(3,1,1);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, x, 'k-', linewidth=3);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('precip x (mm/day)');
ax2 = plt.subplot(3,1,2);
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, '-', linewidth=4, color=(0.8,0.8,0.8));
plt.plot( t, d0, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs and d0 (m3/s)');
ax3 = plt.subplot(3,1,3)
plt.axis( [tmin, tmax, -0.5, 1.5 ] );
plt.plot( t, dobs, '-', linewidth=4, color=(0.8,0.8,0.8));
plt.plot( t, dpre, 'k-', linewidth=1);
plt.plot( [tmin,tmax], [0,0], 'k-', linewidth=1);
plt.xlabel('time (days)');
plt.ylabel('dobs and dpre (m3/s)');
plt.show();

Dd = dobs - d0;
error = np.matmul(Dd.T,Dd); error=error[0,0];
print('untrained error %.2f' %(error) );
Dd = dobs - dpre;
error = np.matmul(Dd.T,Dd); error=error[0,0];
print('trained   error %.2f' %(error) );

Stage 1: 100 1.000000e-01; Stage 1: 100 5.000000e-03
---------------------- Initial guess ------------------------

Untrained network

---------------------- Stage 1 ------------------------

Stage 1 Trained network

starting     error 0.75
intermediate error 0.21
---------------------- Second Pass ------------------------

Stage 2 Trained network

starting     error 0.75
intermediate error 0.21
final        error 0.04
---------------------- Application to New Data ------------------------

untrained error 0.83
trained   error 0.25

# edapy_11_16, make data file of t, d(t), x(t) that
# is ued in edapy_11_15 by using Eulers's method to
# numerically integerate the nonlinear differential
# equation developed in the text

# Note: in order to prevent the overwriting of the
# '../Data/nonlinearresponse.txt file of the standard
# distribution, this script writes the file
# '../Data/mynonlinearresponse.txt, instead.

# stardard setup of a time axis, t
N=1001;
Dt = 1.0;
t = eda_cvec(Dt*np.linspace(0,N-1,N));
tmin = t[0,0];
tmax = t[N-2,0];

# main variables
x = np.zeros((N,1));  # precipitation
y = np.zeros((N,1));  # watershed volume
d = np.zeros((N,1));  # discharge

# precipitation events are randomly placed spikes
Np = floor(N/10);
# uniformly distributed times
p = np.random.randint(low=0,high=N,size=(Np,1));
cexp = 0.5;
# exponentially-distributed amplitudes
x[p,0] = np.random.exponential(scale=cexp,size=(Np,1));
# plot precipitation, x(t)
fig1 = plt.figure(1,figsize=(14,6));
ax1 = plt.subplot(3,1,1);
plt.axis( [0, N, 0, 2 ] );
plt.plot( t, x, 'k-', linewidth=2);
plt.plot( [tmin,tmax], [0,0], 'k-', lw=1);
plt.xlabel('time, t');
plt.ylabel('precipitation, x');

# quick and dirty integration of the differential
# equation by Euler's method.  Since y(t) is only
# exemplary, there is no special reason for the
# solution to be especially accurate
cf = 0.2;
for i in range(1,N):  # loop over times
    d[i,0] = cf*(y[i-1,0]**2);
    dydt = x[i,0] - d[i,0];
    y[i,0] = y[i-1,0] + dydt * Dt;
    if( y[i,0] < 0.0 ): # negative volumes not allowed
        y[i,0]=0.0;

# plot discharge
ax1 = plt.subplot(3,1,2);
plt.axis( [0, N, 0, 1.1*max(d) ] );
plt.plot(t,d,'k-',lw=2);
plt.xlabel('time, t');
plt.ylabel('discharge, d(t)');

# plot volume
ax1 = plt.subplot(3,1,3);
plt.axis( [0, N, 0, 1.1*max(y) ] );
plt.plot(t,y,'k-','LineWidth',2);
plt.xlabel('time,t');
plt.ylabel('volume, y(t)');
plt.show();

# concatenate time, discharge, precip into a matrix C
C = np.concatenate( (t,d,x),axis=1); 
# save matrix to file of tab-separated values
np.savetxt("../Data/mynonlinearresponse.txt",C, delimiter="\t");