Untitled

AllDotMFiles

This is not an executable Livescript, but rather one that was used to make a html version of all the .m files.

One .m file per cell, in alphabetical order.

clear all;

function y = autofun(v,transp_flag)
% perform FT F v = GT G v + HT H v
% where G is a toeplitz matrix built from filter g
% using only the autocorrelation, a, of g
global a e H;
N = length(v);
GTGv=zeros(N,1);
for i = [1:N]
    GTGv(i) =  [fliplr(a(1:i)'),  a(2:N-i+1)'] * v;
    end
Hv = e*H*v;
HTHv =e*H'*Hv;
y = GTGv + HTHv;
return

function [FTf] = autofunRHS(g,dobs,e,H,h)
% companion function of autofcn(); does RHS
% RHS for generalized least squares solution of
% g * m = dobs woth prior information e H m=e h
% with dampung e = sigma_d/digma_h
% set up F'f = GT dobs + e HT e h
% GT qobs is c=qobs2*g
cl = xcorr(dobs, g);
Nc = length(cl);
Nm = floor((Nc+1)/2);
c = cl(Nm: Nc);
HTh = (e^2) * H' * h;
FTf = c + HTh;
end

% brf_convert.m
% read black rock forest temperature dataset in file brf_raw.txt,
% reformat time, abd store it in a file 'brf_temp.txt'.  The data
% in the new file, and in the matrix D, are (col 1) time in days after
% Jan 1, 1997 and (col 2) temperature in deg C.
filename='brf_raw.txt';
mname = { 'Jan' 'Feb' 'Mar' 'Apr' 'May' 'Jun' 'Jul' 'Aug' 'Sep' 'Oct' 'Nov' 'Dec' };
mnumber = { '  1' '  2' '  3' '  4' '  5' '  6' '  7' '  8' '  9' ' 10' ' 11' ' 12' };
% count lines in file
fid = fopen(filename);
N = 0;
while 1
    tline = fgetl(fid);
    if ( ~ischar(tline) )
        break;
    end;
    N=N+1;
end
fclose(fid);
% initialize data
D=zeros(N,1);
% read data, presuming lines look like
% '0700-0759 1 Jan 1997	-19.35'
fid = fopen(filename);
for i = [1:N]
    tline = fgetl(fid);
    j =strfind(tline,'-'); % delete dash between first two numbers
    tline(j(1))=' ';
    % replace month name with month number
    for j = [1:12]
        k=strfind(tline,char(mname(j)));
        if( length(k) > 0 )
            tline(k(1):k(1)+2)=char(mnumber(j));
            break;
        end
    end
    f = sscanf(tline,'%f'); % break out hr and min from hhmm-style number
    h1 = floor( f(1)/100 );
    m1 = f(1)-h1*100;
    h2 = floor( f(2)/100 ); % break out hr and min from hhmm-style number
    m2 = f(2)-h1*100;
    
    jday = julian( f(5), f(4), f(3) ); % convert to epock time in seconds
    e1 = htoe(f(5), jday, h1, m1, 0.0, 0.0);
    e2 = htoe(f(5), jday, h2, m2, 0.0, 0.0);
    
    d1 = e1 / (24*60*60); % convert to epoch time in hours
    d2 = e2 / (24*60*60);
    
    D(i,1) = 0.5*(d1+d2);  % time is mean time of two times given
    D(i,2) = f(6); % temperature
  
end
D(:,1) = D(:,1) - D(1,1); % shift time so that dataset beings at t=0
fclose(fid);
dlmwrite('brf_temp.txt',D,'delimiter','\t','precision','%10.3f');

% brfread.m
% read black rock forest temperature dataset
function D = brfread(filename)
mname = { 'Jan' 'Feb' 'Mar' 'Apr' 'May' 'Jun' 'Jul' 'Aug' 'Sep' 'Oct' 'Nov' 'Dec' };
mnumber = { '  1' '  2' '  3' '  4' '  5' '  6' '  7' '  8' '  9' ' 10' ' 11' ' 12' };
% count lines in file
fid = fopen(filename);
N = 0;
while 1
    tline = fgetl(fid);
    if ( ~ischar(tline) )
        break;
    end;
    N=N+1;
end
fclose(fid);
% initialize data
D=zeros(N,1);
% read data, presuming lines look like
% '0700-0759 1 Jan 1997	-19.35'
fid = fopen(filename);
for i = [1:N]
    tline = fgetl(fid);
    j =strfind(tline,'-'); % delete dash between first two numbers
    tline(j(1))=' ';
    % replace month name with month number
    for j = [1:12]
        k=strfind(tline,char(mname(j)));
        if( length(k) > 0 )
            tline(k(1):k(1)+2)=char(mnumber(j));
            break;
        end
    end
    f = sscanf(tline,'%f'); % break out hr and min from hhmm-style number
    h1 = floor( f(1)/100 );
    m1 = f(1)-h1*100;
    h2 = floor( f(2)/100 ); % break out hr and min from hhmm-style number
    m2 = f(2)-h1*100;
    
    jday = julian( f(5), f(4), f(3) ); % convert to epock time in seconds
    e1 = htoe(f(5), jday, h1, m1, 0.0, 0.0);
    e2 = htoe(f(5), jday, h2, m2, 0.0, 0.0);
    
    d1 = e1 / (24*60*60); % convert to epoch time in hours
    d2 = e1 / (24*60*60);
    
    D(i,1) = 0.5*(d1+d2);  % time is mean time of two times given
    D(i,2) = f(6); % temperature
  
end
D(:,1) = D(:,1) - D(1,1); % shift time so that dataset beings at t=0
fclose(fid);

% chebyshevfilt
% chebyshev IIR bandpass filter
% din - input array of data
% Dt - sampling interval
% flow - low pass frequency, Hz
% fhigh - high pass frequency, Hz
% dout - output array of data
% u - the numerator filter
% v - the denominator filter
% these filters can be used again using dout=filter(u,v,din);
function [dout, u, v] = chebyshevfilt(din, Dt, flow, fhigh)
% sampling rate
rate=1/Dt;
% ripple parameter, set to ten percent
ripple=0.1;  
% normalise frequency
fl=2.0*flow/rate;
fh=2.0*fhigh/rate;
% center frequency 
cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );
% bandwidth
bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );
% ripple parameter factor
rpf = (sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple) ^ (0.5);
a = 0.5*(rpf-1.0/rpf);
b = 0.5*(rpf+1.0/rpf);
u=zeros(5,1);
v=zeros(5,1);
theta = 3*pi/4;
sr = a * cos(theta);
si = b * sin(theta);
es = sqrt(sr*sr+si*si);
tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
v(1) = 1.0;
v(2) = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
v(3) = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
v(4) = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
v(5) = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
tmp = 4.0*es*es*bw*bw/tmp;
u(1) = tmp;
u(2) = 0.0;
u(3) = -2.0*tmp;
u(4) = 0.0;
u(5) = tmp;
[dout]=filter(u,v,din);
return

% function to perform the multiplication y = (Ccc + sigmad2 I) v
function y = CpImul(v,transp_flag)
global N
global td
global sigmad2
global gamma2
global w0est
global sest
% Note (C + sigmad2 I) v implies yi = SUMj Cij vj + sigmad2 vi
y = zeros(N,1);
for i=1:N
    y(i)=sigmad2*v(i);
    for j=1:N
        Tabs = abs(td(i)-td(j));
        Cij = gamma2*cos(w0est*Tabs)*exp(-sest*Tabs);
        y(i)=y(i)+Cij*v(j);
    end
end
return

function mu = eda_bradley_fayyad_mean(S,K,Nsets,fraction)
% create initial k-mean cluster means, by
% the Bradley & Fayyad (1998) method, which
% is based on clustering clusters. The NxM
% sample matrix S is randomly subsampled Nsets
% times, with each set containing (fraction*N+2*K)
% samples.
% returns KxM matrix mu of means
    [N,M] = size(S);
    initmu = eda_random_partition_mean(S,K);
    KiMAX = K*Nsets;  % maximum number of means mu
    Ni = floor(fraction*N)+2*K; % size of small sets
    % Step 1, create KixM table CM of means of subsampled data
    CM = zeros(KiMAX,M);
    Ki=1; % counter for rows of CM
    for j=[1:Nsets]
        k = unidrnd(N,Ni,1);
        Si = S(k,1:M);
        [mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans_mod(Si, initmu);
        CM(Ki:Ki+K-1,1:M) = mymu(1:K,1:M);
        Ki=Ki+K;
    end
    % Step 2, create KixM table FMS of means of cluster means
    FMS = zeros(KiMAX,M);
    Ki=1; % counter for rows of CM
    for j=[1:Nsets]
        [mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(CM, CM(Ki:Ki+K-1,1:M));
        FMS(Ki:Ki+K-1,1:M) = mymu(1:K,1:M);
        Ki=Ki+K;
    end
    % Step 3A: associate each KxM member FMi of Fm
    %          with all individual 1xM elements of CM
    %          and compute the overall error
    error = zeros(Nsets,1);
    Ki=1;
    for i=[1:Nsets]
        FMi = FMS(Ki:Ki+K-1,1:M); % one group of means
        Ki=Ki+K;
        for j=[1:KiMAX]
            CMj = CM(j,1:M); % one member of CM
            for m=[1:K]
                delta = (CMj(1:M) - FMi(m,1:M))';
                r2 = delta'*delta;
                if( m==1 ) % always accept first distance
                    r2min = r2;
                elseif r2<r2min % accept if smaller
                    r2min = r2;
                end
            end
        end
        error(i) = error(i) + r2min;
    end
    
    % Step 3B: select the FMi with lowest error
    [tmp, i] = min(error);
    mu = FMS(K*(i-1)+1:K*(i-1)+K,1:M);
end

% eda_draw
% A function that automates the drawing of greyscale vectors and matrices
% for example, to draw Ax=y (where A is LxL) as
% eda_draw(A, 'caption A', x, 'caption x', '=', x, 'caption y');
function eda_draw(varargin)
figure(1);
clf;
N=40;
axis([0, 8*N, 0, 4*N]);
hold on;
axis equal;
axis ij;
axis off;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
axis off;
bw=0.9*(256-linspace(0,255,256)')/256;
colormap([bw,bw,bw]);
pixel = 0;
lastpixel = 0;
center = 0;
for i = 1 :size(varargin,2)
    temp = varargin{i};
    if(isa(temp, 'char') )
        if(~isempty(strfind(temp, 'caption')))
            temp2 = temp(8:length(temp));
            drawCaption(N,temp2,center);
        else
            lastpixel = pixel;
            pixel = drawText(N,temp,pixel);
            pixel=pixel+2*N/32;
            lastpixel=lastpixel+2*N/32;
        end
    else
        dims = size(temp);
        if (dims(2) == 1)
            lastpixel = pixel;
            pixel = drawVector(N,temp,pixel);
            center=(pixel + lastpixel)/2;
            pixel=pixel+8*N/32;
            lastpixel=lastpixel+8*N/32;
        else
            lastpixel = pixel;
            pixel = drawMatrix(N,temp,pixel);
            center=(pixel + lastpixel)/2;
            pixel=pixel+8*N/32;
            lastpixel=lastpixel+8*N/32;
        end
    end
end
return
function [pixel] = drawMatrix( N, A, pixel)
sf = 2;
range=max(max(A))-min(min(A));
if( range==0 )
    range=1;
end
w=N*sf;
imagesc( [pixel, pixel + w], [(7*N/8)*sf, (15*N/8-1)*sf], (A-min(min(A)))/range);
pixel = pixel + w;
return 
function [ pixel ] = drawVector( N, x, pixel )
sf = 2;
range=max(x)-min(x);
if( range==0 )
    range=1;
end
w=sf*1*N/32;
imagesc( [pixel, pixel + w], [(7*N/8)*sf,(15*N/8-1)*sf], (x-min(x))/range );
pixel = pixel + w;
return
function [ pixel ] = drawText( N, printme, pixel )
sf = 2;
pixel = pixel + 3;
text(pixel, sf*11*N/8, printme, 'HorizontalAlignment', 'center', 'VerticalAlignment', 'middle');
pixel = pixel + sf*N*length(printme)/64;
return
function [  ] = drawCaption( N, printme, pixel )
sf = 2;
text(pixel, sf*31*N/16, printme, 'HorizontalAlignment', 'center', 'VerticalAlignment', 'middle');
return

function mu = eda_forgy_mean(S,K)
% create initial k-mean cluster means, from samples
% randomly drawn NxM sample matrix S
% returns KxM matrix mu of means
    [N,M]=size(S);
    k = unidrnd(N,K,1);
    mu = S(k,1:M);
end

function [newmu,k,dist2,count,Kdist2] = eda_kmeans(S, mu)
% k-mean cluster analsis of NxM sample natrix S
% starting from initial guess KxM mu of cluster means
% returns:
% newmu: new cluster means
% k: Nx1 vector of cluster assignments
% dist2: Nx1 vector of squared distance to cluster center
% count: Kx1 vector of samples in cluster
% Kdist2: KxK matrix of inter-cluster squared distances
    [N, M] = size(S);   % N samples with M elements
    [K, M2] = size(mu); % K cluster meabs
    k = zeros(N,1);     % assoc sample w cluster
    dist2 = zeros(N,1);  % sq distance from cluster mean
    Kdist2 = zeros(K,K); % cluster to nearest cluster sq distance
    oldmu = mu;  % old cluster means
    
    MAXP = 2*K*M;  % max iterations
    for p=[1:MAXP]  % iterate
  
        % cluster assignment step
        changes = 0; % number of changes
        for i=[1:N] % loop over samples
            for j=[1:K] % loop over cluster means
                % deviation of sample from mean
                delta = (S(i,1:M) - oldmu(j,1:M))';
                % squared distance
                r2 = delta'*delta;
                if( j==1 ) % always accept first distance
                    r2min = r2;
                    newk = j;
                elseif r2<r2min % accept if smaller
                    r2min = r2;
                    newk = j;
                end
            end
            if k(i) ~= newk % count reassignments
                changes=changes+1;
            end
            k(i) = newk;  % reassigned cluster
            dist2(i) = r2min; % sq distance to that cluster
        end
        % end loop over iterations on no further changes
        if( (p~=1) && (changes==0) )
            break;
        end
        % means step
        newmu = zeros(K,M);  % new cluster means
        count = zeros(K,1);  % samples in cluster
        for i=[1:N]   % loop over samples
            for j=[1:M] % loop over elements
                % cluster sum
                newmu(k(i),j) = newmu(k(i),j) + S(i,j);
            end
            % count samples in cluster
            count(k(i)) = count(k(i)) + 1;
        end
        for j=[1:K] % loop over clusters
            if( count(j) == 0 ) % cluster with no samples
                % no change in cluster mean
                newmu(j,1:M) = oldmu(j,1:M);
            else
                % new cluster mean
                newmu(j,1:M) = newmu(j,1:M)/count(j);
            end
        end
        oldmu=newmu; % update cluster mean
    end
        
    % inter-cluster distances
    for i=[1:K-1]    % loop over cluster means
        for j=[2:K]  % loop over cluster means
            delta = (newmu(i,1:M) - newmu(j,1:M))';
            r2 = delta'*delta;
            Kdist2(i,j) = r2; % sq distance between clusters
            Kdist2(j,i) = r2; % sq distance between clusters
        end
    end
end

function [mymu2, myk2, mydist2, mycount2, myKdist2] = eda_kmeans_mod(S, mu)
% k-mean cluster analsis of NxM sample natrix S
% modified per Bradley & Fayyad (1998) never to
% return cluster with zero samples.
% Starts with initial guess KxM mu of cluster means
% returns:
% newmu: new cluster means
% k: Nx1 vector of cluster assignments
% dist: Nx1 vector of distance to cluster center
% count: Kx1 vector of samples in cluster
    [N,M] = size(S);
    initmu = mu;
    [mymu, myk, mydist, mycount, myKdist] = eda_kmeans(S, initmu);
    while min(mycount)==0      % while cluster with no samples
        [t,ic]=min(mycount);   % index of cluster
        [t,ir]=max(mydist);    % index of most distant sample
        ik=myk(ir);            % cluster of this sample
        mymu(ic,1:M) = S(ir,1:M); % new mean
        mycount(ic)=1;         % reset cluster count
        myk(ir)=ic;            % reset sample membership
        mydist(ir)=0.0;        % reset sample distance
        % note: in rare cases, next line could create
        % infinite loop, so don't do it.
        % mycount(ik)=mycount(ik)-1;
    end
    % redo k-means
    [mymu2, myk2, mydist2, mycount2, myKdist2] = eda_kmeans(S, mymu);  
end

function [N,w,b] = eda_net_1dtower(Nc,slope,Xc,Dx,h,N,w,b)
% eda_net_linear
% defines a network that computes a set of 1D towers
% input Nc: number of towers
%       slope: slope of all the towers
%       Xc: centers of the towers
%       Dx: width of the towers
%       h: heights of the towaers
%       N: layer array from eda_net_init()
%       w: weights array from eda_net_init()
%       b: bias array from from eda_net_init()
% output: updated N, w, b arrays
N = [1, Nc*2, 1]';
for ncvalue = 1:Nc
    w12 = 4*slope;
    w22 = 4*slope;
    w( ((ncvalue-1)*2)+1:((ncvalue-1)*2)+2, 1:N(1),2) = [w12;w22];
    w13 = h(ncvalue);
    w23 = -h(ncvalue);
    w(1:N(3), ((ncvalue-1)*2)+1:((ncvalue-1)*2)+2  ,3) = [w13;w23];
    b12 = -w12 * ( Xc(ncvalue) - Dx(ncvalue)/2);
    b22 = -w22 * ( Xc(ncvalue) + Dx(ncvalue)/2);
    b( ((ncvalue-1)*2)+1:((ncvalue-1)*2)+2 ,2) = [b12;b22];
end
end

function [N,w,b] = eda_net_2dtower(Nc,slope,Xc,Yc,Dx,Dy,h,N,w,b)
% eda_net_linear
% defines a network that superimposes several towers
% input:
%       Nc: number of towers
%       slope: slope of all the towers
%       Xc, Yc: vector of the centers of the towers in x and y
%       Dx, Dy: vector of the widths of the towers in x and y
%       h: vector of heights of the towers 
%       N: layer array from eda_net_init()
%       w: weights array from eda_net_init()
%       b: bias array from from eda_net_init()
% output: updated N, w, b arrays
N = [2, Nc*4, Nc, 1]';
for ncvalue = 1:Nc
    w12 = 4*slope;
    w22 = 4*slope;
    w32 = 4*slope;
    w42 = 4*slope;
    w( ((ncvalue-1)*4)+1:((ncvalue-1)*4)+2, 1 ,2) = [w12 ; w22];
    w( ((ncvalue-1)*4)+3:((ncvalue-1)*4)+4, 2 ,2) = [w32 ; w42];
  
    w13 = 4*slope;
    w23 = -4*slope;
    w33 = 4*slope;
    w43 = -4*slope;
    w(ncvalue, ((ncvalue-1)*4)+1:((ncvalue-1)*4)+4,3) = [w13;w23;w33;w43];
    
    w14 = h(ncvalue);
    w(1, ncvalue, 4) = w14;
    b12 = -w12 * ( Xc(ncvalue) - Dx(ncvalue)/2);
    b22 = -w22 * ( Xc(ncvalue) + Dx(ncvalue)/2);
    b32 = -w32 * ( Yc(ncvalue) - Dy(ncvalue)/2);
    b42 = -w42 * ( Yc(ncvalue) + Dy(ncvalue)/2);
    b( ((ncvalue-1)*4)+1:((ncvalue-1)*4)+4 ,2) = [b12;b22;b32;b42];
    
    b13 = -(3*(4*slope))/2;
    b( ncvalue,3) = b13;
    
end 
end

function [daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a )
% eda_net_deriv
% calculates the derivatives daLmax/dw and daLmax/db
% for a network with parameters N,w,b,a
    % background information
    % Lmax: number of layers
    % N(i): column-vector with number of neurons in each layer;
    % N = zeros(1,Lmax);
    % biases: b(1:N(i),i) is a column-vector that gives the
    % biases of the N(i) neurons on the i-th layer
    % b = zeros( Nmax, Lmax );
    % weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
    % weights between the N(i) neurons in the i-th layer and the
    % N(i-1) neurons on the (i-1)-th layer. The weights assigned
    % to the first layer are never used and should be set to zero
    % w = zeros( Nmax, Nmax, Lmax );
    % activity: a(1:N(i),i) is a column-vector that gives the
    % activitites of the N(i) neurons on the i-th layer
    % a = zeros( Nmax, Lmax );
    % end background information
    % start of code
    Nmax = max(N); % maximum number of neurons on a layer
    
    Lmax = length(N); % number of latyers
    
    % dada(i,j,k): the change in the activity of the i-th neuron of the layer k
    % due to a change in the activity of the j-th neuron in the layer k-1
    dada = zeros(Nmax,Nmax,Lmax);
    % daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
    % due to a change in the activity of the j-th neuron in the layer k-1
    daLmaxda = zeros(Nmax,Nmax,Lmax);
    % dadz(i,k): the change in the activity of the i-th neuron of layer k
    % due to a change in the input z of the same neuron
    dadz = zeros(Nmax,Lmax);
    
    % daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
    % due to a change in the bias of the of the j-th neuron in the layer k
    daLmaxdb = zeros(Nmax,Nmax,Lmax);
    % daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
    % due to a change in the weight connecting the j-th neuron in the layer l
    % with the k-th neuron of layer l-1
    daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
    % dadz(i,k): the change in the activity of the i-th neuron of layer k
    % due to a change in the input z of the same neuron
    % It depends only upon properties of the neuron
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            % was dadz(1:N(i),i) = ones(1:N(i),1);
            dadz(1:N(i),i) = ones(N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            % so d sigma / dz = a-a^2
            dadz(1:N(i),i) = a(1:N(i),i) - (a(1:N(i),i).^2);  
        end
    end
        
    % dada(i,j,k): the change in the activity of the i-th neuron of the layer k
    % due to a change in the activity of the j-th neuron in the layer k-1
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
    
    % da/db: depends only on properies of the neuron
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                % was daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw';
            end
        end
    end
end

function [ a ] = eda_net_eval(N,w,b,a)
% evaluates network with properites N, w, b, and
% initial activities a, returns final activities, a
    Nmax = max(N);
    Lmax = length(N);
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
end

function [N,w,b,a] = eda_net_init(Lmax,Nmax)
% initialize a network; produces only arrays of zeros
% input:
%        Lmax: number of layers
%        Nmax: maximum number of neurons on any layer
% output:
%        N: neurons on a layer array
%        w: weights
%        b: biases
%        a: activities
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
end

function [N,w,b] = eda_net_linear(Nc,slope1,slope2,c,h,N,w,b)
% creates a network that computes the linear function
% d(x1, x2, ...) = c + h1*x1 + h2*x2 + h2*h3 ...
% (good for implementing a filter)
% input Nc: number of linear components
%       slope1: slope of the first stage (say 0.01)
%       slope2: slope of the second stage (say 0.02)
%               should be different from slope1
%       c: intercept
%       h: slopes
%       N: layer array from eda_net_init()
%       w: weights array from eda_net_init()
%       b: bias array from from eda_net_init()
% output: updated N, w, b arrays
N = [Nc, Nc, Nc, 1]';
b(1,4) = c;
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
    w12 = 4*slope1;
    w(ncvalue, ncvalue ,2) = w12;
  
    w13 = 4*slope2;
    w(ncvalue, ncvalue ,3) = w13;
    
    w2w3 = w12*w13;
    
    w14 = 16*h(ncvalue)/w2w3;
    w(1,ncvalue, 4) = w14;
    b12 = 0;
    b(ncvalue,2) = b12;
    
    b13 = -w13/2;
    b(ncvalue,3) = b13;
    
    b14 = -8*h(ncvalue)/w2w3;
    b(1,4) = b(1,4)+ b14;
end 
end

function [Nb,Nw,layer_of_bias,neuron_of_bias,index_of_bias,layer_of_weight, ...
    r_neuron_of_weight,l_neuron_of_weight,index_of_weight ] = eda_net_map(N,w,b)
% builds look-up tables to allow sequential ordering
% of non-trivual biases and weights
% used to build list of model parameters used in training
% input:
%        N: neurons on layer array
%        w: weights
%        b: biases
% output
%        Nb: number of biases
%        Nw: Number of non-zero weights
%            (use near-zero weights to fool it)
%        layer_of_bias, vector(i) specifying layer of bias i
%        neuron_of_bias, vector(i) specifying neuron of bias i
%        index_of_bias, 2d array(i,j) specifying number of bias
%            of layer j, neuron i
%         layer_of_weight, vector(i) specifying layer of weight i
%         r_neuron_of_weight, vector(i) specifying right neuron of weight i
%         l_neuron_of_weight, vector(i) specifying left neuron of weight i
%         index_of_weight, array(i,j,k) specifying number of weight(i,j,k)
    Nmax = max(N);
    Lmax = length(N);
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
end

% view function makes a picture of a neural net
% you must set the figure and clear it before the
% call, e.g.
% figure(2); clf;
function null = eda_net_view(N,w,b)
axis off;
set(gcs,'FontSize',10);
Nmax = max(N);
Lmax = length(N);
axis([ 0 (Lmax+1) 0 (Nmax+1) ]);
hold on;
scale = zeros(Lmax,1);
for x = 1:Lmax
    scale(x,1) =  Nmax/N(x);
end
for x = 1:Lmax
    for y = 1:N(x)
        %plot(x,y,'o','MarkerSize',50);
        rectangle('Position',[x-.2,y*scale(x,1)-.4,.4,.8],'LineWidth',2);
        text(x,y*scale(x,1),strcat('b =  ', num2str( b(y,x))), 'HorizontalAlignment' , 'center');
        if x ~= Lmax
            for y2 = 1:N(x+1)
                if w( y2 , y , x+1) ~= 0
                    plot([x+.2 x+.8],[y*scale(x,1) y2*scale(x+1,1)],'Color',[0.7 0.7 0.7]);
                    text(x+.5,(y*scale(x,1)+y2*scale(x+1,1))/2,strcat('w =  ', num2str( w( y2 , y , x+1))), 'HorizontalAlignment' , 'center');
                end    
            end
        end
    end
end
end

% eda_netfilter.m
% simple least squares fit of gaussian with a single tower
epsi = 1;
Niter=100;
D = load('precip_discharge_data.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a 101 day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=101;
istart = 900;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x / max(x);
dobs = dobs / max(dobs);
% the discharge and precipitation data
figure(1);
clf;
subplot(2,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(2,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 20;
c0 = 2;
To=5; 
f = exp(-[0:Nc-1]'/To);
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
    slope1 = 0.01;
    slope2 = 0.02;
    w12 = 4*slope1;
    w(ncvalue, ncvalue ,2) = w12;
  
    w13 = 4*slope2;
    w(ncvalue, ncvalue ,3) = w13;
    
    w2w3 = w12*w13;
    
    w14 = 16*h(ncvalue)/w2w3;
    w(1,ncvalue, 4) = w14;
    b12 = 0;
    b(ncvalue,2) = b12;
    
    b13 = -w13/2;
    b(ncvalue,3) = b13;
    
    b14 = -8*h(ncvalue)/w2w3;
    b(1,4) = b(1,4)+ b14;
end 
figure(2);
clf;
eda_viewnet(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,1) = a(1,4);
    
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kx, ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x)
Dd = dobs - dpre;
 
if( iter==1 )
    d0 = dpre;
    error = Dd'*Dd;
    fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
    epsi=epsi / 10;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
figure(1);
plot( t, d0, 'r:', 'LineWidth', 2 );
plot( t, dpre, 'g-', 'LineWidth', 4 );
%eda_draw( dobs, 'caption d-obs', d0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );

% eda_netfilter2.m
% simple least squares fit of gaussian with a single tower
epsi = 1;
Niter=100;
epsisave=epsi;
D = load('precip_discharge_data.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a 101 day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=101;
istart = 900;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x / max(x);
dobs = dobs / max(dobs);
% the discharge and precipitation data
figure(1);
clf;
subplot(2,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(2,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 10;
c0 = 2;
To=5; 
f = exp(-[0:Nc-1]'/To);
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
    slope1 = 0.01;
    slope2 = 0.02;
    w12 = 4*slope1;
    w(ncvalue, ncvalue ,2) = w12;
  
    w13 = 4*slope2;
    w(ncvalue, ncvalue ,3) = w13;
    
    w2w3 = w12*w13;
    
    w14 = 16*h(ncvalue)/w2w3;
    w(1,ncvalue, 4) = w14;
    b12 = 0;
    b(ncvalue,2) = b12;
    
    b13 = -w13/2;
    b(ncvalue,3) = b13;
    
    b14 = -8*h(ncvalue)/w2w3;
    b(1,4) = b(1,4)+ b14;
end 
figure(2);
clf;
eda_viewnet(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,1) = a(1,4);
    
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kx, ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x)
Dd = dobs - dpre;
 
if( iter==1 )
    d0 = dpre;
    error = Dd'*Dd;
    fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
    epsi=epsi / 10;
elseif( iter==50 )
    epsi=epsi / 10;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
figure(1);
%plot( t, d0, 'r:', 'LineWidth', 2 );
plot( t, dpre, 'k-', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('intermediate error %.2f with %d unknowns\n', error, Nb+Nw );
% epsi=epsisave;
for i=[1]
for j=[1:N(1)]
    if( w(i,j,2) == 0 )
        w(i,j,2) = random('Normal',0,0.0001,1,1);
    end
end
end
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,1) = a(1,4);
    
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kx, ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x)
Dd = dobs - dpre;
 
if( iter==1 )
    d0 = dpre;
    error = Dd'*Dd;
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
    epsi=epsi / 10;
elseif( iter==50 )
    epsi=epsi / 100;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
figure(1);
plot( t, dpre, 'k--', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('final error %.2f with %d unknowns\n', error, Nb+Nw );
figure(3);
clf;
eda_viewnet(N,w,b);

% eda_netfilter3.m
% network to match non0linear response funtion
epsi1 = 1;
epsi2 = 0.05;
Niter=100;
D = load('nonlinearresponse.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a Nx day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=501;
istart = 1;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
xnorm = 1/max(x);
dobsnorm = 1/max(dobs);
x = x * xnorm;
dobs = dobs * dobsnorm;
% the discharge and precipitation data
figure(1);
clf;
subplot(4,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(4,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 10;
c0 = 2;
To=3; 
f = exp(-[0:Nc-1]'/To);
f = [f(1)/1000;f(1:end-1)];
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
    slope1 = 0.01;
    slope2 = 0.02;
    w12 = 4*slope1;
    w(ncvalue, ncvalue ,2) = w12;
  
    w13 = 4*slope2;
    w(ncvalue, ncvalue ,3) = w13;
    
    w2w3 = w12*w13;
    
    w14 = 16*h(ncvalue)/w2w3;
    w(1,ncvalue, 4) = w14;
    b12 = 0;
    b(ncvalue,2) = b12;
    
    b13 = -w13/2;
    b(ncvalue,3) = b13;
    
    b14 = -8*h(ncvalue)/w2w3;
    b(1,4) = b(1,4)+ b14;
end 
figure(2);
clf;
eda_viewnet(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,1) = a(1,4);
    
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kx, ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x)
Dd = dobs - dpre;
 
if( iter==1 )
    d0 = dpre;
    error = Dd'*Dd;
    fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==50 )
    epsi1=epsi1 / 10;
end
Dm = (G'*G+epsi1*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
figure(1);
% plot( t, d0, 'r:', 'LineWidth', 2 );
% plot( t, dpre, 'k-', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('intermediate error %.2f with %d unknowns\n', error, Nb+Nw );
for i=[1]
for j=[1:N(1)]
    if( w(i,j,2) == 0 )
        w(i,j,2) = random('Normal',0,0.0001,1,1);
    end
end
end
for i=[1]
for j=[1:N(1)]
    if( w(i,j,3) == 0 )
        w(i,j,3) = random('Normal',0,0.0001,1,1);
    end
end
end
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,1) = a(1,4);
    
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kx, ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x)
Dd = dobs - dpre;
 
if( iter==1 )
    d0 = dpre;
    error = Dd'*Dd;
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==50 )
    epsi2=epsi2/10;
end
Dm = (G'*G+epsi2*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
figure(1);
plot( t, dpre, 'k--', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('final error %.2f with %d unknowns\n', error, Nb+Nw );
figure(3);
clf;
eda_viewnet(N,w,b);
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
Nx=501;
istart = 499;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x * xnorm;
dobs = dobs * dobsnorm;
% the discharge and precipitation data
figure(1);
subplot(4,1,3);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(4,1,4);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% loop over x
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
plot( t, dpre, 'k--', 'LineWidth', 2 );

% eda_noninearresponse.m
clear all
N=1001;
Dt = 1;
t = Dt*[0:N-1]';
x = zeros(N,1);
y = zeros(N,1);
f = zeros(N,1);
Np = floor(N/10);
p = random('unid',N,Np,1);
cexp = 0.5;
x(p) = random('exp',cexp,Np,1);
figure(1);
clf;
subplot(3,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, N, 0, 2 ] );
plot(t,x,'k-','LineWidth',2);
cf = 0.2;
for i=[2:N]
    f(i) = cf*(y(i-1)^2);
    dydt = x(i) - f(i);
    y(i) = y(i-1) + dydt * Dt;
    if( y(i) < 0 )
        y(i)=0;
    end
end
subplot(3,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, N, 0, 1.1*max(f) ] );
plot(t,f,'k-','LineWidth',2);
subplot(3,1,3);
set(gca,'LineWidth',2);
hold on;
axis( [0, N, 0, 1.1*max(y) ] );
plot(t,y,'k-','LineWidth',2);
dlmwrite('nonlinearresponse.txt', [t,f,x], 'delimiter', '\t' );

function mu = eda_random_partition_mean(S,K)
% create initial k-mean cluster means, by K random
% partitions of NxM sample matrix S
% returns KxM matrix mu of means
    [N,M]=size(S);
    k = unidrnd(K,N,1); % random cluster-assignments
    mu = zeros(K,M);
    for i=[1:K] % loop over clusters
        j = find(k==i); % samples in cluster i
        mu(i,1:M) = mean(S(j,1:M),1); % their means
    end
end

% eda_testfit.m
% simple least squares fit of gaussian with a single tower
% grid of points in (x,y)
Nx = 21;
Ny = 21;
xmax = 5;
ymax = 5;
xmin = 0;
ymin = 0;
x = xmin + (xmax-xmin)* [0:Nx-1]'/(Nx-1);
y = ymin + (ymax-ymin)* [0:Ny-1]'/(Ny-1);
% (x,y) index arrays for easy access
kofij=zeros(Nx,Ny);  % linear index k of a particular (i,j)
iofk=zeros(Nx*Ny,1); % row index of a particular k
jofk=zeros(Nx*Ny,1); % col index of a particular k
Nxy=0;
for i=[1:Nx]
for j=[1:Ny]
    Nxy=Nxy+1;
    kofij(i,j)=Nxy;
    iofk(Nxy)=i;
    jofk(Nxy)=j;
end
end
% Gaussian
dobs = zeros(Nx,Ny);
dobsvec = zeros(Nxy,1);
xbar = 2.4;
ybar = 2.6;
amp = 5.0;
sigmax = 1.2;
sigmay = 0.8;
cx = 1/(2*sigmax*sigmax);
cy = 1/(2*sigmay*sigmay);
for i=[1:Nx]
for j=[1:Ny]
    dobs(i,j) = amp*exp(-cx*((x(i)-xbar)^2) - cy*((y(j)-ybar)^2) );
    dobsvec(kofij(i,j))=dobs(i,j);
end
end
% build the test network for a single 2D tower
Nc = 1;     % One tower
Xc = [2.5]; % Xc: the center of the tower
Yc = [2.5];
Dx = [2];   % Dx: the width of the tower
Dy = [2];
h = [1];    % h: the height of the tower
slope = 5/4;  % slope: slope of the step function
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc * 4;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
[N,w,b] = eda_net_2dtower( Nc, slope, Xc, Yc, Dx, Dy, h, N, w, b );
figure(2);
clf;
eda_net_view(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nxy,M);
% predicted data
dpre = zeros(Nx,Ny);
dprevec = zeros(Nxy,1);
% initial predicted data
d0 = zeros(Nx,Ny);
d0vec = zeros(Nxy,1);
Niter=20;
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
    
    % activities
    a(1:N(1),1) = [x(kx);x(ky)];
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,ky) = a(1,4);
    dprevec(kofij(kx,ky))=dpre(kx,ky);
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kofij(kx,ky), ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kofij(kx,ky), Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x,y)
end
Dd = dobsvec - dprevec;
 
if( iter==1 )
    d0 = dpre;
    d0vec = dobsvec;
    error = Dd'*Dd;
    fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter<5 )
    epsi=1.0;
elseif (iter<10)
    epsi=0.1;
else
    epsi=0.001;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
    
    % activities
    a(1:N(1),1) = [x(kx);x(ky)];
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,ky) = a(1,4);
    dprevec(kofij(kx,ky))=dpre(kx,ky);
end
end
eda_draw( dobs, 'caption d-obs', d0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobsvec - dprevec;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );

% eda_testfit3.m
% simple least squares fit of gaussian with a single tower
epsi = 1;
Niter=100;
D = load('precip_discharge_data.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a 101 day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=101;
istart = 900;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x / max(x);
dobs = dobs / max(dobs);
% the discharge and precipitation data
figure(1);
clf;
subplot(2,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(2,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 20;
c0 = 2;
To=5; 
f = exp(-[0:Nc-1]'/To);
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
    slope1 = 0.01;
    slope2 = 0.02;
    w12 = 4*slope1;
    w(ncvalue, ncvalue ,2) = w12;
  
    w13 = 4*slope2;
    w(ncvalue, ncvalue ,3) = w13;
    
    w2w3 = w12*w13;
    
    w14 = 16*h(ncvalue)/w2w3;
    w(1,ncvalue, 4) = w14;
    b12 = 0;
    b(ncvalue,2) = b12;
    
    b13 = -w13/2;
    b(ncvalue,3) = b13;
    
    b14 = -8*h(ncvalue)/w2w3;
    b(1,4) = b(1,4)+ b14;
end 
figure(2);
clf;
eda_net_view(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx,1) = a(1,4);
    
    % da/dz
    for i = [1:Lmax]
        if (i==Lmax)
            % sigma function is sigma(z)=z for top layer
            % d sigma / dz = 1
            dadz(1:N(i),i) = ones(1:N(i),1);
        else
            % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
            % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
            %              = exp(-z) (a^2)
            % and note exp(-z) = (1/a)-1
            dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
        end
    end
        
    % da/da
    for i = [Lmax:-1:2]
        if i == Lmax
            % a=z in the top of the layer, and
            % z = (sum) w a + b
            % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
            % initialize daLmaxda to prepare for backpropagation
            daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
        else
            % a=sigma(z) in the other layers, so use the chain rule
            % da/da = da(layer)/dz * dz/da(layer i-1)
            %    where as before dz/da(layer i-1) = w
            dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
            % backpropagate
            daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
                daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
        end            
    end
        
    % da/db
    for i = [1:Lmax]
        if( i==Lmax )
            % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
            % so da(layer i)/db = dz(layer i)/db = 1
            daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
        else
            % a = sigma(z) in the other layers, so use the chain rule
            % da(layer i)/db = da(layer i)/dz * dz/db
            % where as before dzdb=1
            dzdb = 1;
            dadb = dadz(1:N(i),i) * dzdb;
            % then apply the chain rule for activities
            % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
            daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
        end
    end
    % da/dw
    % calculation of weight derivatives
    % since z(layer i) = w*a(layer i-1) + b
    % dz(layer i)/dw = a(layer i-1)
    for l = [2:Lmax]  % for simplicity, loop over right neuron
        if( l==Lmax )
            % in the top layer, a=z, so da/dw = dz/dw = a
            for j = [1:N(l)]
                daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
            end
        else
            % in the other layers, use chain rule
            % da/dw = da/dz * dz/dw
            % and then the chain ruke again
            % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
            for j = [1:N(l)]
                dzdw = a(1:N(l-1),l-1);
                dadw = dadz(j,l) * dzdw;
                daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
            end
        end
    end
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kx, ib ) = daLmaxdb(1, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
    end
    
end % next (x)
Dd = dobs - dpre;
 
if( iter==1 )
    d0 = dpre;
    error = Dd'*Dd;
    fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
    epsi=epsi / 10;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
    
    % activities
    for j=[1:N(1)]
        k=kx-j+1;
        if( k>0 )
            a(j,1) = x(k);
        else
            a(j,1) = 0;
        end
    end
    for i=[2:Lmax]
        z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
        if i < Lmax
            a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
        else
            a(1:N(i),i) = z(1:N(i),i);
        end
    end
    dpre(kx) = a(1,4);    
end
figure(1);
plot( t, d0, 'r:', 'LineWidth', 2 );
plot( t, dpre, 'g-', 'LineWidth', 4 );
%eda_draw( dobs, 'caption d-obs', d0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );

% eda_testnetderiv.m
% verify the activity derivatives are calculated correctly
% by comparing the analytic result to one based on finite differences
delta = 0.0001; % increment size used in finite difference calculations
onetower=2; % tesr code on one tower or two
if( onetower==1 )
    % build the test network for a single 2D tower
    Nc = 1; % One tower
    Xc = [1.5]; % Xc: the center of the tower
    Yc = [1.5];
    Dx = [1];   % Dx: the width of the tower
    Dy = [1];
    h = [1];    % h: the height of the tower
    slope = 5/4;  % slope: slope of the step function
else
% build the test network for two superimposed 2D towers
Nc = 2; % One tower
Xc = [1.5 2.5]; % Xc: the center of the tower
Yc = [1.5 2.5];
Dx = [1 1];   % Dx: the width of the tower
Dy = [1 1];
h = [2 3];    % h: the height of the tower
slope = 5/4;  % slope: slope of the step function
end
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc * 4;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [2, Nc*4, Nc, 1]';
for ncvalue = 1:Nc
    w12 = 4*slope;
    w22 = 4*slope;
    w32 = 4*slope;
    w42 = 4*slope;
    w( ((ncvalue-1)*4)+1:((ncvalue-1)*4)+2, 1 ,2) = [w12 ; w22];
    w( ((ncvalue-1)*4)+3:((ncvalue-1)*4)+4, 2 ,2) = [w32 ; w42];
  
    w13 = 4*slope;
    w23 = -4*slope;
    w33 = 4*slope;
    w43 = -4*slope;
    w(ncvalue, ((ncvalue-1)*4)+1:((ncvalue-1)*4)+4,3) = [w13;w23;w33;w43];
    
    w14 = h(ncvalue);
    w(1, ncvalue, 4) = w14;
    b12 = -w12 * ( Xc(ncvalue) - Dx(ncvalue)/2);
    b22 = -w22 * ( Xc(ncvalue) + Dx(ncvalue)/2);
    b32 = -w12 * ( Yc(ncvalue) - Dy(ncvalue)/2);
    b42 = -w22 * ( Yc(ncvalue) + Dy(ncvalue)/2);
    b( ((ncvalue-1)*2)+1:((ncvalue-1)*2)+4 ,2) = [b12;b22;b32;b42];
    
    b13 = -(3*(4*slope))/2;
    b( ncvalue,3) = b13;
    
end 
bsave = b;
wsave = w;
figure(1);
clf;
eda_net_view(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0;  % number of nontrivial biases
for k=[2:Lmax]
    for j=[1:N(k)]
        Nb = Nb+1;
        layer_of_bias(Nb)=k;
        neuron_of_bias(Nb)=j;
        index_of_bias(j,k)=Nb;
    end
end
Nw=0;  % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
    for i=[1:N(k)]
    for j=[1:N(k-1)]
        if( w(i,j,k) ~= 0 )
            Nw = Nw+1;
            layer_of_weight(Nw)=k;
            r_neuron_of_weight(Nw)=i;
            l_neuron_of_weight(Nw)=j;
            index_of_weight(j,i,k)=Nw;
        end
    end
    end
end
fprintf('Biases %d Weights %d\n', Nb, Nw );
% grid of points in (x,y) (but only one point is used)
Nx = 50;
Ny = 50;
xmax = 3.1;
ymax = 3.1;
xmin = 0;
ymin = 0;
x = xmin + (xmax-xmin)* [0:Nx-1]'/(Nx-1);
y = ymin + (ymax-ymin)* [0:Ny-1]'/(Ny-1);
% choose a point in (x,y) and calculate activities
kx = 23;
ky = 22;
a(1:N(1),1) = [x(kx);x(ky)]; 
for i=[2:Lmax]
    z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
    if i < Lmax
        a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
    else
        a(1:N(i),i) = z(1:N(i),i);
    end
end
% save activities
asave=a;
% Derivative calculation:  Simple but tedious application of the chain rule
% Note that in the comments below, we have supressed most indices, so that
% the commonts alone don't communicate which quantities are scalars and which
% are vectors.  However, this can be determined by examining the subsequent
% line(s) of code. Note that the chain rule for vector function a(b(c)) is
% da(i)/dc(j) = (sum k) da(i)/db(k) db(k)/dc(j).  Note also that for
% a linear vector function b(i) = (sum j) M(i,j) a(j), the derivative
% is db(i)/da(j) = M(i,j)
% dadz
for i = [1:Lmax]
    if (i==Lmax)
        % sigma function is sigma(z)=z for top layer
        % d sigma / dz = 1
        dadz(1:N(i),i) = ones(1:N(i),1);
    else
        % sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
        % d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
        %              = exp(-z) (a^2)
        % and note exp(-z) = (1/a)-1
        dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
    end
end
        
% "backpropagation" of activity derivatives
% the derivative that we really want is daLmaxda
%  (the change in the layer Lmax activities with other layer activities)
% but we begin calculating dada = da(layer i) / da(layer i-1)
%   and then use the chain rule to accumulate them, starting with
%   the top layer, e.g.
% da(layer Lmax) / da(layer Lmax-2) =
%   da(layer Lmax) / da(layer Lmax-1) * da(layer Lmax-1) / da(layer Lmax-2)
% so the loop is from the last layer towards the first
for i = [Lmax:-1:2]
    if i == Lmax
        % a=z in the top of the layer, and
        % z = (sum) w a + b
        % so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
        dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
        % initialize daLmaxda to prepare for backpropagation
        daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
    else
        % a=sigma(z) in the other layers, so use the chain rule
        % da/da = da(layer)/dz * dz/da(layer i-1)
        %    where as before dz/da(layer i-1) = w
        dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
        % backpropagate
        daLmaxda(1:N(Lmax),1:N(i-1),i) =  ...
            daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
    end            
end
        
% bias derivatives
for i = [1:Lmax]
    if( i==Lmax )
        % a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
        % so da(layer i)/db = dz(layer i)/db = 1
        daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
    else
        % a = sigma(z) in the other layers, so use the chain rule
        % da(layer i)/db = da(layer i)/dz * dz/db
        % where as before dzdb=1
        dzdb = 1;
        dadb = dadz(1:N(i),i) * dzdb;
        % then apply the chain rule for activities
        % da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
        daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
    end
end
% check of bias deriviatives using finite differences
fprintf('\n');
fprintf('bias derivatives\n');
Nbad=0;
for ib=[1:Nb]
    
% restore old biases
b = bsave;
% execute network with one bias perturbed
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob,lob) = b(nob,lob) + delta;
for i=[2:Lmax]
    z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
    if i < Lmax
        a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
    else
        a(1:N(i),i) = z(1:N(i),i);
    end
end
Dnum = (a(1,4) - asave(1,4))/delta;
Dana = daLmaxdb(1,nob,lob);
error=(Dnum-Dana)/Dnum;
if( abs(error)>0.05 )
    Nbad = Nbad + 1;
end
fprintf('%d neuron %d layer %d num %.4f ana %.4f error %.4f\n',ib,nob,lob,Dnum,Dana,error);
end
fprintf('%d bad bias derivatives\n',Nbad);
fprintf('\n');
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax]  % for simplicity, loop over right neuron
    if( l==Lmax )
        % in the top layer, a=z, so da/dw = dz/dw = a
        for j = [1:N(l)]
            daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
        end
    else
        % in the other layers, use chain rule
        % da/dw = da/dz * dz/dw
        % and then the chain ruke again
        % da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
        for j = [1:N(l)]
            dzdw = a(1:N(l-1),l-1);
            dadw = dadz(j,l) * dzdw;
            daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
        end
    end
end
b = bsave; % restore old biases
% check of weight deriviatives using finite differences
fprintf('weight derivatives\n');
Nbad = 0;
for iw=[1:Nw]
% execute network with one bias perturbed
low=layer_of_weight(iw);
rnow=r_neuron_of_weight(iw);
lnow=l_neuron_of_weight(iw);
% restore old weights
w = wsave;
w(rnow,lnow,low) = w(rnow,lnow,low) + delta;
for i=[2:Lmax]
    z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
    if i < Lmax
        a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
    else
        a(1:N(i),i) = z(1:N(i),i);
    end
end
Dnum = (a(1,4) - asave(1,4))/delta;
Dana = daLmaxdw(1,rnow,lnow,low);
error=(Dnum-Dana)/Dnum;
fprintf('%d r-neuron %d l-neuron %d layer %d num %.4f ana %.4f error %.4f\n',iw, rnow,lnow,low,Dnum,Dana,error);
if( abs(error)>0.05 )
    Nbad = Nbad + 1;
end
end
fprintf('%d bad weight derivatives\n',Nbad);

function [ tBmA_est, Cmax_est ] = timedelay( uA, uB, Dt )
% time delay between two similar timeseries, by
% cross-correlation followed by parabilic refinment.
% delay is positive when pulse on B is later than pulse on A
% input:
% uA, uB, two signals to be compared
%         should have zero mean
%         if of unequal length, the shorter is zero padded
% Dt, sampling of both signals
% output
% tBmA_est, estimated time delay
% Cmax_est, cross-correlation at estimated delay
c = xcorr(uA, uB);        % cross-correlate
NA = length(uA);          % length of u
NB = length(uB);          % length of v
Nm = max([NA,NB]);        % max of two lengths
Nc = length(c);           % length of cross-correlation
% lag to nearest sample
[cmax, icmax] = max(c);   % determine location of maximum
tBmArough = -Dt * (icmax-Nm);  % time lag
% parabolic refinement
% (c-cmax) = m(1) + m(2) dt + m(3) dt^2
% d(c-cmax)/ddt = 0 = m(2) + 2 m(3) dt
% dt = -0.5*m(2)/m(3)
% c = cmax + m(1) + m(2) dt + m(3) dt^2
G = [ [1, 1, 1]', Dt*[-1, 0, 1]', (Dt^2)*[1, 0, 1]' ];
d = [ c(icmax-1)-cmax, c(icmax)-cmax, c(icmax+1)-cmax ]';
m = (G'*G)\(G'*d);
dt = -0.5*m(2)/m(3);
Cmax_est = cmax + m(1) + m(2)*dt + m(3)*(dt^2);
tBmA_est = tBmArough - dt;
end

% eda11_13b.m
% modification of ead11_13 to test case of two output neurons
% grid of points in (x,y)
Nx = 21;
Ny = 21;
xmax = 5;
ymax = 5;
xmin = 0;
ymin = 0;
x = xmin + (xmax-xmin)* [0:Nx-1]'/(Nx-1);
y = ymin + (ymax-ymin)* [0:Ny-1]'/(Ny-1);
% (x,y) index arrays for easy access
kofij=zeros(Nx,Ny);  % linear index k of a particular (i,j)
iofk=zeros(Nx*Ny,1); % row index of a particular k
jofk=zeros(Nx*Ny,1); % col index of a particular k
Nxy=0;
for i=[1:Nx]
for j=[1:Ny]
    Nxy=Nxy+1;
    kofij(i,j)=Nxy;
    iofk(Nxy)=i;
    jofk(Nxy)=j;
end
end
% Gaussian
dobs = zeros(Nx,Ny);
dobsvec = zeros(Nxy,1);
xbar = 2.4;
ybar = 2.6;
amp = 5.0;
sigmax = 1.2;
sigmay = 0.8;
cx = 1/(2*sigmax*sigmax);
cy = 1/(2*sigmay*sigmay);
for i=[1:Nx]
for j=[1:Ny]
    dobs(i,j) = amp*exp(-cx*((x(i)-xbar)^2) - cy*((y(j)-ybar)^2) );
    dobsvec(kofij(i,j))=dobs(i,j);
end
end
% build the test network for a single 2D tower
Nc = 1;     % One tower
Xc = [2.5]; % Xc: the center of the tower
Yc = [2.5];
Dx = [2];   % Dx: the width of the tower
Dy = [2];
h = [1];    % h: the height of the tower
slope = 5/4;  % slope: slope of the step function
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc * 4;
[N,w,b,a] = eda_net_init(Lmax,Nmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
[N,w,b] = eda_net_2dtower( Nc, slope, Xc, Yc, Dx, Dy, h, N, w, b );
% for test purposes, activate a second neuron on the top layer
N(Lmax)=2;
w(2,1,Lmax)=w(1,1,Lmax);
b(2,Lmax)=b(1,Lmax);
% target=1: the data are the output of the first neuron; 2 the second
% they should produce the same results
target=2;
fprintf('target %d\n', target );
% loop over (x,y)
dpre0 = zeros(Nx,Ny);
dpre0vec = zeros(Nxy,1);
for kx=[1:Nx]
for ky=[1:Ny]
    
    % activities
    a(1:N(1),1) = [x(kx);x(ky)];
    a = eda_net_eval( N,w,b,a);
    dpre0(kx,ky) = a(2,4);
    dpre0vec(kofij(kx,ky))=dpre(kx,ky);
end
end
Dd = dobsvec-dpre0vec;
error0 = Dd'*Dd;
fprintf('starting error %.2f\n', error );
figure(2);
clf;
eda_net_view(N,w,b);
[ Nb,Nw,layer_of_bias,neuron_of_bias,index_of_bias,layer_of_weight, ...
    r_neuron_of_weight,l_neuron_of_weight,index_of_weight ] = eda_net_map(N,w,b);
% linearized data kernel
M = Nb+Nw;
G = zeros(Nxy,M);
% predicted data
dpre = zeros(Nx,Ny);
dprevec = zeros(Nxy,1);
Niter=20;
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
    
    % activities
    a(1:N(1),1) = [x(kx);x(ky)];
    a = eda_net_eval( N,w,b,a);
    dpre(kx,ky) = a(target,4);
    dprevec(kofij(kx,ky))=dpre(kx,ky);
    
    [daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a );
    
    % biases first
    for ib=[1:Nb]
        nob=neuron_of_bias(ib);
        lob=layer_of_bias(ib);
        G(kofij(kx,ky), ib ) = daLmaxdb(target, nob, lob );
    end
    % weights second
    for iw=[1:Nw]
        rnow = r_neuron_of_weight(iw);
        lnow = l_neuron_of_weight(iw);
        low = layer_of_weight(iw);
        G(kofij(kx,ky), Nb+iw ) = daLmaxdw( target, rnow, lnow, low );
    end
    
end % next (x,y)
end
Dd = dobsvec - dprevec;
% implement some damping to improve stability
% but relax it after a few iterations
if( iter<5 )
    epsi=1.0;
elseif (iter<10)
    epsi=0.1;
else
    epsi=0.001;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
    nob=neuron_of_bias(ib);
    lob=layer_of_bias(ib);
    b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
    rnow = r_neuron_of_weight(iw);
    lnow = l_neuron_of_weight(iw);
    low = layer_of_weight(iw);
    w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
    
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
    
    % activities
    a(1:N(1),1) = [x(kx);x(ky)];
    a = eda_net_eval( N,w,b,a);
    dpre(kx,ky) = a(target,4);
    dprevec(kofij(kx,ky))=dpre(kx,ky);
end
end
eda_draw( dobs, 'caption d-obs', dpre0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobsvec - dprevec;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );

function [yr, jday, hr, mn, sc, ms] = etoh( e )
% this script is the inverse of htoe()
% compute the year, julian day, hour, minute, second, millisecond
% given the epoch time (time in seconds after midnight, January 1, 1970)
%
% input:
%          e, the number of secobds after January 1, 1970.
%
% output:
%          yr, an integer, e.g. 2010
%          jday, an integer, the day of the year, where January 1 is 1
%          hr, an integer, the hour of the day, starting with 0
%          mn, an integer, the minute of the hour, starting with 0
%          sc, an integer, the second of the minute, starting with 0
%          ms, an integer, the milliseconds of the second, starting with 0
%
%
% Note 1: the monthday() script can calculate month and day from julian day
%
% Note 2: The calculation does not include leap seconds.  It will give correct
% dates/times for for GPS and TIA clock standards, but will be off for the UTC standard.
% Typically, this function is used in conjunction with htoe() to increment
% a date/time by a time interval, that is:
%      epoch1 = htoe( yr1, jday1, hr1, mn1, sc1, ms1 );
%      epoch2 = epoch1+interval;
%      [yr2, jday2, hr2, mn2, sc2, ms2] = etoh( epoch2 );
% The overall calculation could be off by as much as 2 seconds per length of the
% interval in years.
        daysinmonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
        
        if( e<0 )
            sprintf('error: negative epochs not implemented')
            return;
        end
  
        epoch=0.0;
        frac = e - floor(e);
        i=1970;
        while (i<5000)
            epoch=epoch+365.0*86400.0;
            if( isleap(i) )
                epoch=epoch+86400.0;
            end
            if( epoch>e )
                break;
            end
            i=i+1;
         end
         yr=i; mo=1; day=1; hr=0; mn=0; sc=0; ms=0;
         jday = julian( yr, mo, day );
         epoch=htoe(yr, jday, hr, mn, sc, ms);
         for i = [1:12]
              epoch=epoch+86400.0*daysinmonth(i);
              if( (i==2) && isleap(yr) ) 
                  epoch=epoch+86400.0;
              end
              if( epoch>e)
                  break;
              end
          end
          mo=i;
          jday = julian( yr, mo, day );
          epoch=htoe(yr, jday, hr, mn, sc, ms);
          j=daysinmonth(mo);
          if( (mo==2) && isleap(yr) ) 
              j=j+1;
          end
          for i = [1:j]
              epoch=epoch+86400.0;
              if( epoch>e)
                  break;
              end
          end
          day=i;
          jday = julian( yr, mo, day );
          epoch=htoe(yr, jday, hr, mn, sc, ms);
          for i= [0:23]
              epoch=epoch+3600.0;
              if( epoch>e )
                  break;
              end
          end
          hr=i;
          epoch=htoe(yr, jday, hr, mn, sc, ms);
          for i = [0:59]
              epoch=epoch+60.0;
              if( epoch>e)
                  break;
              end
          end
          mn=i;
          epoch=htoe(yr, jday, hr, mn, sc, ms);
          for i = [0:60]
              epoch=epoch+1.0;
              if( epoch>e)
                  break;
              end
          end
          sc=i;
          ms = floor((1000.0*frac)+0.5);
return;

function y = filterfun(v,transp_flag)
global e g H;
% get dimensions
N = length(g);
M = length(v);
[K, M2] = size(H);
temp1=conv(g,v); % G v is of length N
a=temp1(1:N);
b=e*H*v; % H v is of length K
temp2=conv(flipud(g),a); % GT (G v) is of length M
a2 = temp2(N:N+M-1);
b2 = e*H'*b; % e HT (e H v) is of length M
% y = FT F v = GT G v + e^2 HT H v
y = a2+b2;
return

function [FTf] = filterfunRHS(g,dobs,e,H,h,M)
% companion function for filterfun, does RHS
% find f in g * f = dobs with prior information e H f= e h
% with damoing e = sigma_d / sigma_h.  This function
% M is number of unknowns; that is, the length of f
% note that GT v is similar to the convolution G v = g*v
% except that p is time-reversed and position of
% results in convolution is shifted to the bottom
N = length(dobs);
temp=conv(flipud(g),dobs);
FTfa = temp(N:N+M-1);
FTfb = (e^2)*H'*h;
FTf=FTfa+FTfb;
return

% function to perform the multiplication FT (F v)
function y = FTFmul(v,transp_flag)
global edaFsparse;
temp = edaFsparse*v;
y = edaFsparse'*temp;
return

function [epoch] = htoe(year, jday, hour, min, sec, msec)
% converts time in year, julian day, hour, minute, second, millisecond format
% to time in seconds after midnight, January 1, 1970.
%
% input:
%          year, an integer, e.g. 2010
%          julian, an integer, the day of the year, where January  1 is 1
%          hour, an integer, the hour of the day, starting with 0
%          minute, an integer, the minute of the hour, starting with 0
%          sec, an integer, the second of the minute, starting with 0
%          msec, an integer, the milliseconds of the second, starting with 0
% output:
%          epoch, the number of secobds after midnight, January 1, 1970.
%
% example: January 10, 2010 at 01:02:03.040
%          e = htoe( 2010, 10, 1, 2, 3, 40)
%
% Note 1: the julian() script can calculate julian day from year-month-day
%
% Note 2: The calculation does not include leap seconds.  It will give correct
% epoch times for GPS and TIA clock standards, but will be off for the UTC standard.
% When two epoch times are subtracted to calculate a time interval, the length
% of the interval could be incorrect by as much as 2 seconds per year.
epoch=0.0;
if( year<1970 )
    sprintf('sorry: cant handle pre-1970 data')
    return;
end
days=0;
if( year > 1970 )
    for i = [1970:year-1]
        days=days+365;
        if( isleap(i) )
            days=days+1;
        end
    end
end
days=days+(jday-1);
epoch = (86400.0*days) + (hour*3600.0) + (min*60.0) + sec + msec/1000.0;
return

% interpolate_reynolds.m
% read and interpolate and output Renolds Channel Water Quality dataset
% columns are days, precip, air_temp, water_temp, salinity, turbidity,
% chloropjyl
% load data
D = load('reynolds_uninterpolated.txt');
[N, Ncols] = size(D);
Dt=1;
Di=D;
% interpolate with splines
for i = [2:Ncols]
    % insure data in first and last rows
    if( D(1,i) < 0 )
        D(1,i)=0;
    end
    if( D(N,i) < 0 )
        D(N,i)=0;
    end
    good = find( D(:,i)>=0 );
    x = D(good,1);
    y = D(good,i);
    z = interp1(x,y,D(:,1));
    Di(:,i)=z;
end
t = Di(:,1);        % time, days
p = Di(:,2);        % precipitation, inches
at = Di(:,3);       % air temperature, deg C
wt = Di(:,4);       % water temperature, deg C
s = Di(:,5);        % salinity, practical salinity units
tb = Di(:,6);       % turbidity, 
c = Di(:,7);        % chlorophyl, micrograms per liter
figure(1);
clf;
subplot(6,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(p), max(p)] );
plot( t, p, 'k-', 'LineWidth', 2 );
ylabel('precip');
subplot(6,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(at), max(at)] );
plot( t, at, 'k-', 'LineWidth', 2 );
ylabel('T-air');
subplot(6,1,3);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(wt), max(wt)] );
plot( t, wt, 'k-', 'LineWidth', 2 );
ylabel('T-water');
subplot(6,1,4);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(s), max(s)] );
plot( t, s, 'k-', 'LineWidth', 2 );
xlabel('time, days');
ylabel('salinity');
subplot(6,1,5);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(tb), max(tb)] );
plot( t, tb, 'k-', 'LineWidth', 2 );
xlabel('time, days');
ylabel('turbidity');
subplot(6,1,6);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(c), max(c)] );
plot( t, c, 'k-', 'LineWidth', 2 );
xlabel('time, days');
ylabel('chlorophyl');
dlmwrite('reynolds_interpolated.txt', Di, 'delimiter','\t' );

function [leap] = isleap(year)
% returns 1 if year is a leap year and zero otherwise
% input:
%        year, an integer, the year (e.g. 2010)
% output:
%        leap, a logical value
leap = ((year-4*floor(year/4))==0) && ((year-100*floor(year/100))~=0) || ((year-400*floor(year/400))==0);
return

function [jday] = julian( year, month, day )
% this script is the inverse of julian()
% return the julian day given the year, month and day
%
% input:
%        year, an integer, e.g. 2010
%        month, an integer, with January being 1
%        day, an integer, with the first day of the month being 1
%
% output:
%        jday, an integer, the julian day of the year, with Jan 1 being 1
%
daysinmonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
leap = isleap(year);
jday=0;
if ( month>1 )
    for i = [1:month-1]
        jday=jday+daysinmonth(i);
        if( leap && (i==2) )
            jday=jday+1;
        end
    end
end
jday = jday + day;
return;

function [month, day] = monthday( year, jday )
% the month and the day given the julian day
%
% input:
%        year, an integer, the year, e.g. 2010
%        jday, an integer, the julian day
%
% output:
%        month, an integer, the month, with January being 1
%        day, an integer, the day of the month, starting at 1
%
daysinmonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
leap = isleap(year);
day=jday;
for i = [1:12]
    dim=daysinmonth(i);
    if( leap && (i==2) )
        dim=dim+1;
    end
    if( day <= dim )
        break;
    end
    day = day - dim;
end
month=i;
return