AllDotMFiles
This is not an executable Livescript, but rather one that was used to make a html version of all the .m files.
One .m file per cell, in alphabetical order.
clear all;
function y = autofun(v,transp_flag)
% perform FT F v = GT G v + HT H v
% where G is a toeplitz matrix built from filter g
% using only the autocorrelation, a, of g
global a e H;
N = length(v);
GTGv=zeros(N,1);
for i = [1:N]
GTGv(i) = [fliplr(a(1:i)'), a(2:N-i+1)'] * v;
end
Hv = e*H*v;
HTHv =e*H'*Hv;
y = GTGv + HTHv;
return
function [FTf] = autofunRHS(g,dobs,e,H,h)
% companion function of autofcn(); does RHS
% RHS for generalized least squares solution of
% g * m = dobs woth prior information e H m=e h
% with dampung e = sigma_d/digma_h
% set up F'f = GT dobs + e HT e h
% GT qobs is c=qobs2*g
cl = xcorr(dobs, g);
Nc = length(cl);
Nm = floor((Nc+1)/2);
c = cl(Nm: Nc);
HTh = (e^2) * H' * h;
FTf = c + HTh;
end
% brf_convert.m
% read black rock forest temperature dataset in file brf_raw.txt,
% reformat time, abd store it in a file 'brf_temp.txt'. The data
% in the new file, and in the matrix D, are (col 1) time in days after
% Jan 1, 1997 and (col 2) temperature in deg C.
filename='brf_raw.txt';
mname = { 'Jan' 'Feb' 'Mar' 'Apr' 'May' 'Jun' 'Jul' 'Aug' 'Sep' 'Oct' 'Nov' 'Dec' };
mnumber = { ' 1' ' 2' ' 3' ' 4' ' 5' ' 6' ' 7' ' 8' ' 9' ' 10' ' 11' ' 12' };
% count lines in file
fid = fopen(filename);
N = 0;
while 1
tline = fgetl(fid);
if ( ~ischar(tline) )
break;
end;
N=N+1;
end
fclose(fid);
% initialize data
D=zeros(N,1);
% read data, presuming lines look like
% '0700-0759 1 Jan 1997 -19.35'
fid = fopen(filename);
for i = [1:N]
tline = fgetl(fid);
j =strfind(tline,'-'); % delete dash between first two numbers
tline(j(1))=' ';
% replace month name with month number
for j = [1:12]
k=strfind(tline,char(mname(j)));
if( length(k) > 0 )
tline(k(1):k(1)+2)=char(mnumber(j));
break;
end
end
f = sscanf(tline,'%f'); % break out hr and min from hhmm-style number
h1 = floor( f(1)/100 );
m1 = f(1)-h1*100;
h2 = floor( f(2)/100 ); % break out hr and min from hhmm-style number
m2 = f(2)-h1*100;
jday = julian( f(5), f(4), f(3) ); % convert to epock time in seconds
e1 = htoe(f(5), jday, h1, m1, 0.0, 0.0);
e2 = htoe(f(5), jday, h2, m2, 0.0, 0.0);
d1 = e1 / (24*60*60); % convert to epoch time in hours
d2 = e2 / (24*60*60);
D(i,1) = 0.5*(d1+d2); % time is mean time of two times given
D(i,2) = f(6); % temperature
end
D(:,1) = D(:,1) - D(1,1); % shift time so that dataset beings at t=0
fclose(fid);
dlmwrite('brf_temp.txt',D,'delimiter','\t','precision','%10.3f');
% brfread.m
% read black rock forest temperature dataset
function D = brfread(filename)
mname = { 'Jan' 'Feb' 'Mar' 'Apr' 'May' 'Jun' 'Jul' 'Aug' 'Sep' 'Oct' 'Nov' 'Dec' };
mnumber = { ' 1' ' 2' ' 3' ' 4' ' 5' ' 6' ' 7' ' 8' ' 9' ' 10' ' 11' ' 12' };
% count lines in file
fid = fopen(filename);
N = 0;
while 1
tline = fgetl(fid);
if ( ~ischar(tline) )
break;
end;
N=N+1;
end
fclose(fid);
% initialize data
D=zeros(N,1);
% read data, presuming lines look like
% '0700-0759 1 Jan 1997 -19.35'
fid = fopen(filename);
for i = [1:N]
tline = fgetl(fid);
j =strfind(tline,'-'); % delete dash between first two numbers
tline(j(1))=' ';
% replace month name with month number
for j = [1:12]
k=strfind(tline,char(mname(j)));
if( length(k) > 0 )
tline(k(1):k(1)+2)=char(mnumber(j));
break;
end
end
f = sscanf(tline,'%f'); % break out hr and min from hhmm-style number
h1 = floor( f(1)/100 );
m1 = f(1)-h1*100;
h2 = floor( f(2)/100 ); % break out hr and min from hhmm-style number
m2 = f(2)-h1*100;
jday = julian( f(5), f(4), f(3) ); % convert to epock time in seconds
e1 = htoe(f(5), jday, h1, m1, 0.0, 0.0);
e2 = htoe(f(5), jday, h2, m2, 0.0, 0.0);
d1 = e1 / (24*60*60); % convert to epoch time in hours
d2 = e1 / (24*60*60);
D(i,1) = 0.5*(d1+d2); % time is mean time of two times given
D(i,2) = f(6); % temperature
end
D(:,1) = D(:,1) - D(1,1); % shift time so that dataset beings at t=0
fclose(fid);
% chebyshevfilt
% chebyshev IIR bandpass filter
% din - input array of data
% Dt - sampling interval
% flow - low pass frequency, Hz
% fhigh - high pass frequency, Hz
% dout - output array of data
% u - the numerator filter
% v - the denominator filter
% these filters can be used again using dout=filter(u,v,din);
function [dout, u, v] = chebyshevfilt(din, Dt, flow, fhigh)
% sampling rate
rate=1/Dt;
% ripple parameter, set to ten percent
ripple=0.1;
% normalise frequency
fl=2.0*flow/rate;
fh=2.0*fhigh/rate;
% center frequency
cf = 4 * tan( (fl*pi/2) ) * tan( (fh*pi/2) );
% bandwidth
bw = 2 * ( tan( (fh*pi/2) ) - tan( (fl*pi/2) ) );
% ripple parameter factor
rpf = (sqrt((1.0+1.0/(ripple*ripple))) + 1.0/ripple) ^ (0.5);
a = 0.5*(rpf-1.0/rpf);
b = 0.5*(rpf+1.0/rpf);
u=zeros(5,1);
v=zeros(5,1);
theta = 3*pi/4;
sr = a * cos(theta);
si = b * sin(theta);
es = sqrt(sr*sr+si*si);
tmp= 16.0 - 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw - 4.0*bw*cf*sr + cf*cf;
v(1) = 1.0;
v(2) = 4.0*(-16.0 + 8.0*bw*sr - 2.0*bw*cf*sr + cf*cf)/tmp;
v(3) = (96.0 - 16.0*cf - 8.0*es*es*bw*bw + 6.0*cf*cf)/tmp;
v(4) = (-64.0 - 32.0*bw*sr + 8.0*bw*cf*sr + 4.0*cf*cf)/tmp;
v(5) = (16.0 + 16.0*bw*sr + 8.0*cf + 4.0*es*es*bw*bw + 4.0*bw*cf*sr + cf*cf)/tmp;
tmp = 4.0*es*es*bw*bw/tmp;
u(1) = tmp;
u(2) = 0.0;
u(3) = -2.0*tmp;
u(4) = 0.0;
u(5) = tmp;
[dout]=filter(u,v,din);
return
% function to perform the multiplication y = (Ccc + sigmad2 I) v
function y = CpImul(v,transp_flag)
global N
global td
global sigmad2
global gamma2
global w0est
global sest
% Note (C + sigmad2 I) v implies yi = SUMj Cij vj + sigmad2 vi
y = zeros(N,1);
for i=1:N
y(i)=sigmad2*v(i);
for j=1:N
Tabs = abs(td(i)-td(j));
Cij = gamma2*cos(w0est*Tabs)*exp(-sest*Tabs);
y(i)=y(i)+Cij*v(j);
end
end
return
function mu = eda_bradley_fayyad_mean(S,K,Nsets,fraction)
% create initial k-mean cluster means, by
% the Bradley & Fayyad (1998) method, which
% is based on clustering clusters. The NxM
% sample matrix S is randomly subsampled Nsets
% times, with each set containing (fraction*N+2*K)
% samples.
% returns KxM matrix mu of means
[N,M] = size(S);
initmu = eda_random_partition_mean(S,K);
KiMAX = K*Nsets; % maximum number of means mu
Ni = floor(fraction*N)+2*K; % size of small sets
% Step 1, create KixM table CM of means of subsampled data
CM = zeros(KiMAX,M);
Ki=1; % counter for rows of CM
for j=[1:Nsets]
k = unidrnd(N,Ni,1);
Si = S(k,1:M);
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans_mod(Si, initmu);
CM(Ki:Ki+K-1,1:M) = mymu(1:K,1:M);
Ki=Ki+K;
end
% Step 2, create KixM table FMS of means of cluster means
FMS = zeros(KiMAX,M);
Ki=1; % counter for rows of CM
for j=[1:Nsets]
[mymu, myk, mydist2, mycount, myKdist2] = eda_kmeans(CM, CM(Ki:Ki+K-1,1:M));
FMS(Ki:Ki+K-1,1:M) = mymu(1:K,1:M);
Ki=Ki+K;
end
% Step 3A: associate each KxM member FMi of Fm
% with all individual 1xM elements of CM
% and compute the overall error
error = zeros(Nsets,1);
Ki=1;
for i=[1:Nsets]
FMi = FMS(Ki:Ki+K-1,1:M); % one group of means
Ki=Ki+K;
for j=[1:KiMAX]
CMj = CM(j,1:M); % one member of CM
for m=[1:K]
delta = (CMj(1:M) - FMi(m,1:M))';
r2 = delta'*delta;
if( m==1 ) % always accept first distance
r2min = r2;
elseif r2<r2min % accept if smaller
r2min = r2;
end
end
end
error(i) = error(i) + r2min;
end
% Step 3B: select the FMi with lowest error
[tmp, i] = min(error);
mu = FMS(K*(i-1)+1:K*(i-1)+K,1:M);
end
% eda_draw
% A function that automates the drawing of greyscale vectors and matrices
% for example, to draw Ax=y (where A is LxL) as
% eda_draw(A, 'caption A', x, 'caption x', '=', x, 'caption y');
function eda_draw(varargin)
figure(1);
clf;
N=40;
axis([0, 8*N, 0, 4*N]);
hold on;
axis equal;
axis ij;
axis off;
set(gca,'XTick',[]); % turn off horizontal axis
set(gca,'YTick',[]); % turn off vertical axis
axis off;
bw=0.9*(256-linspace(0,255,256)')/256;
colormap([bw,bw,bw]);
pixel = 0;
lastpixel = 0;
center = 0;
for i = 1 :size(varargin,2)
temp = varargin{i};
if(isa(temp, 'char') )
if(~isempty(strfind(temp, 'caption')))
temp2 = temp(8:length(temp));
drawCaption(N,temp2,center);
else
lastpixel = pixel;
pixel = drawText(N,temp,pixel);
pixel=pixel+2*N/32;
lastpixel=lastpixel+2*N/32;
end
else
dims = size(temp);
if (dims(2) == 1)
lastpixel = pixel;
pixel = drawVector(N,temp,pixel);
center=(pixel + lastpixel)/2;
pixel=pixel+8*N/32;
lastpixel=lastpixel+8*N/32;
else
lastpixel = pixel;
pixel = drawMatrix(N,temp,pixel);
center=(pixel + lastpixel)/2;
pixel=pixel+8*N/32;
lastpixel=lastpixel+8*N/32;
end
end
end
return
function [pixel] = drawMatrix( N, A, pixel)
sf = 2;
range=max(max(A))-min(min(A));
if( range==0 )
range=1;
end
w=N*sf;
imagesc( [pixel, pixel + w], [(7*N/8)*sf, (15*N/8-1)*sf], (A-min(min(A)))/range);
pixel = pixel + w;
return
function [ pixel ] = drawVector( N, x, pixel )
sf = 2;
range=max(x)-min(x);
if( range==0 )
range=1;
end
w=sf*1*N/32;
imagesc( [pixel, pixel + w], [(7*N/8)*sf,(15*N/8-1)*sf], (x-min(x))/range );
pixel = pixel + w;
return
function [ pixel ] = drawText( N, printme, pixel )
sf = 2;
pixel = pixel + 3;
text(pixel, sf*11*N/8, printme, 'HorizontalAlignment', 'center', 'VerticalAlignment', 'middle');
pixel = pixel + sf*N*length(printme)/64;
return
function [ ] = drawCaption( N, printme, pixel )
sf = 2;
text(pixel, sf*31*N/16, printme, 'HorizontalAlignment', 'center', 'VerticalAlignment', 'middle');
return
function mu = eda_forgy_mean(S,K)
% create initial k-mean cluster means, from samples
% randomly drawn NxM sample matrix S
% returns KxM matrix mu of means
[N,M]=size(S);
k = unidrnd(N,K,1);
mu = S(k,1:M);
end
function [newmu,k,dist2,count,Kdist2] = eda_kmeans(S, mu)
% k-mean cluster analsis of NxM sample natrix S
% starting from initial guess KxM mu of cluster means
% returns:
% newmu: new cluster means
% k: Nx1 vector of cluster assignments
% dist2: Nx1 vector of squared distance to cluster center
% count: Kx1 vector of samples in cluster
% Kdist2: KxK matrix of inter-cluster squared distances
[N, M] = size(S); % N samples with M elements
[K, M2] = size(mu); % K cluster meabs
k = zeros(N,1); % assoc sample w cluster
dist2 = zeros(N,1); % sq distance from cluster mean
Kdist2 = zeros(K,K); % cluster to nearest cluster sq distance
oldmu = mu; % old cluster means
MAXP = 2*K*M; % max iterations
for p=[1:MAXP] % iterate
% cluster assignment step
changes = 0; % number of changes
for i=[1:N] % loop over samples
for j=[1:K] % loop over cluster means
% deviation of sample from mean
delta = (S(i,1:M) - oldmu(j,1:M))';
% squared distance
r2 = delta'*delta;
if( j==1 ) % always accept first distance
r2min = r2;
newk = j;
elseif r2<r2min % accept if smaller
r2min = r2;
newk = j;
end
end
if k(i) ~= newk % count reassignments
changes=changes+1;
end
k(i) = newk; % reassigned cluster
dist2(i) = r2min; % sq distance to that cluster
end
% end loop over iterations on no further changes
if( (p~=1) && (changes==0) )
break;
end
% means step
newmu = zeros(K,M); % new cluster means
count = zeros(K,1); % samples in cluster
for i=[1:N] % loop over samples
for j=[1:M] % loop over elements
% cluster sum
newmu(k(i),j) = newmu(k(i),j) + S(i,j);
end
% count samples in cluster
count(k(i)) = count(k(i)) + 1;
end
for j=[1:K] % loop over clusters
if( count(j) == 0 ) % cluster with no samples
% no change in cluster mean
newmu(j,1:M) = oldmu(j,1:M);
else
% new cluster mean
newmu(j,1:M) = newmu(j,1:M)/count(j);
end
end
oldmu=newmu; % update cluster mean
end
% inter-cluster distances
for i=[1:K-1] % loop over cluster means
for j=[2:K] % loop over cluster means
delta = (newmu(i,1:M) - newmu(j,1:M))';
r2 = delta'*delta;
Kdist2(i,j) = r2; % sq distance between clusters
Kdist2(j,i) = r2; % sq distance between clusters
end
end
end
function [mymu2, myk2, mydist2, mycount2, myKdist2] = eda_kmeans_mod(S, mu)
% k-mean cluster analsis of NxM sample natrix S
% modified per Bradley & Fayyad (1998) never to
% return cluster with zero samples.
% Starts with initial guess KxM mu of cluster means
% returns:
% newmu: new cluster means
% k: Nx1 vector of cluster assignments
% dist: Nx1 vector of distance to cluster center
% count: Kx1 vector of samples in cluster
[N,M] = size(S);
initmu = mu;
[mymu, myk, mydist, mycount, myKdist] = eda_kmeans(S, initmu);
while min(mycount)==0 % while cluster with no samples
[t,ic]=min(mycount); % index of cluster
[t,ir]=max(mydist); % index of most distant sample
ik=myk(ir); % cluster of this sample
mymu(ic,1:M) = S(ir,1:M); % new mean
mycount(ic)=1; % reset cluster count
myk(ir)=ic; % reset sample membership
mydist(ir)=0.0; % reset sample distance
% note: in rare cases, next line could create
% infinite loop, so don't do it.
% mycount(ik)=mycount(ik)-1;
end
% redo k-means
[mymu2, myk2, mydist2, mycount2, myKdist2] = eda_kmeans(S, mymu);
end
function [N,w,b] = eda_net_1dtower(Nc,slope,Xc,Dx,h,N,w,b)
% eda_net_linear
% defines a network that computes a set of 1D towers
% input Nc: number of towers
% slope: slope of all the towers
% Xc: centers of the towers
% Dx: width of the towers
% h: heights of the towaers
% N: layer array from eda_net_init()
% w: weights array from eda_net_init()
% b: bias array from from eda_net_init()
% output: updated N, w, b arrays
N = [1, Nc*2, 1]';
for ncvalue = 1:Nc
w12 = 4*slope;
w22 = 4*slope;
w( ((ncvalue-1)*2)+1:((ncvalue-1)*2)+2, 1:N(1),2) = [w12;w22];
w13 = h(ncvalue);
w23 = -h(ncvalue);
w(1:N(3), ((ncvalue-1)*2)+1:((ncvalue-1)*2)+2 ,3) = [w13;w23];
b12 = -w12 * ( Xc(ncvalue) - Dx(ncvalue)/2);
b22 = -w22 * ( Xc(ncvalue) + Dx(ncvalue)/2);
b( ((ncvalue-1)*2)+1:((ncvalue-1)*2)+2 ,2) = [b12;b22];
end
end
function [N,w,b] = eda_net_2dtower(Nc,slope,Xc,Yc,Dx,Dy,h,N,w,b)
% eda_net_linear
% defines a network that superimposes several towers
% input:
% Nc: number of towers
% slope: slope of all the towers
% Xc, Yc: vector of the centers of the towers in x and y
% Dx, Dy: vector of the widths of the towers in x and y
% h: vector of heights of the towers
% N: layer array from eda_net_init()
% w: weights array from eda_net_init()
% b: bias array from from eda_net_init()
% output: updated N, w, b arrays
N = [2, Nc*4, Nc, 1]';
for ncvalue = 1:Nc
w12 = 4*slope;
w22 = 4*slope;
w32 = 4*slope;
w42 = 4*slope;
w( ((ncvalue-1)*4)+1:((ncvalue-1)*4)+2, 1 ,2) = [w12 ; w22];
w( ((ncvalue-1)*4)+3:((ncvalue-1)*4)+4, 2 ,2) = [w32 ; w42];
w13 = 4*slope;
w23 = -4*slope;
w33 = 4*slope;
w43 = -4*slope;
w(ncvalue, ((ncvalue-1)*4)+1:((ncvalue-1)*4)+4,3) = [w13;w23;w33;w43];
w14 = h(ncvalue);
w(1, ncvalue, 4) = w14;
b12 = -w12 * ( Xc(ncvalue) - Dx(ncvalue)/2);
b22 = -w22 * ( Xc(ncvalue) + Dx(ncvalue)/2);
b32 = -w32 * ( Yc(ncvalue) - Dy(ncvalue)/2);
b42 = -w42 * ( Yc(ncvalue) + Dy(ncvalue)/2);
b( ((ncvalue-1)*4)+1:((ncvalue-1)*4)+4 ,2) = [b12;b22;b32;b42];
b13 = -(3*(4*slope))/2;
b( ncvalue,3) = b13;
end
end
function [daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a )
% eda_net_deriv
% calculates the derivatives daLmax/dw and daLmax/db
% for a network with parameters N,w,b,a
% background information
% Lmax: number of layers
% N(i): column-vector with number of neurons in each layer;
% N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
% b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
% w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
% a = zeros( Nmax, Lmax );
% end background information
% start of code
Nmax = max(N); % maximum number of neurons on a layer
Lmax = length(N); % number of latyers
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
% It depends only upon properties of the neuron
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
% was dadz(1:N(i),i) = ones(1:N(i),1);
dadz(1:N(i),i) = ones(N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
% so d sigma / dz = a-a^2
dadz(1:N(i),i) = a(1:N(i),i) - (a(1:N(i),i).^2);
end
end
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db: depends only on properies of the neuron
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
% was daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw';
end
end
end
end
function [ a ] = eda_net_eval(N,w,b,a)
% evaluates network with properites N, w, b, and
% initial activities a, returns final activities, a
Nmax = max(N);
Lmax = length(N);
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
end
function [N,w,b,a] = eda_net_init(Lmax,Nmax)
% initialize a network; produces only arrays of zeros
% input:
% Lmax: number of layers
% Nmax: maximum number of neurons on any layer
% output:
% N: neurons on a layer array
% w: weights
% b: biases
% a: activities
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
end
function [N,w,b] = eda_net_linear(Nc,slope1,slope2,c,h,N,w,b)
% creates a network that computes the linear function
% d(x1, x2, ...) = c + h1*x1 + h2*x2 + h2*h3 ...
% (good for implementing a filter)
% input Nc: number of linear components
% slope1: slope of the first stage (say 0.01)
% slope2: slope of the second stage (say 0.02)
% should be different from slope1
% c: intercept
% h: slopes
% N: layer array from eda_net_init()
% w: weights array from eda_net_init()
% b: bias array from from eda_net_init()
% output: updated N, w, b arrays
N = [Nc, Nc, Nc, 1]';
b(1,4) = c;
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
w12 = 4*slope1;
w(ncvalue, ncvalue ,2) = w12;
w13 = 4*slope2;
w(ncvalue, ncvalue ,3) = w13;
w2w3 = w12*w13;
w14 = 16*h(ncvalue)/w2w3;
w(1,ncvalue, 4) = w14;
b12 = 0;
b(ncvalue,2) = b12;
b13 = -w13/2;
b(ncvalue,3) = b13;
b14 = -8*h(ncvalue)/w2w3;
b(1,4) = b(1,4)+ b14;
end
end
function [Nb,Nw,layer_of_bias,neuron_of_bias,index_of_bias,layer_of_weight, ...
r_neuron_of_weight,l_neuron_of_weight,index_of_weight ] = eda_net_map(N,w,b)
% builds look-up tables to allow sequential ordering
% of non-trivual biases and weights
% used to build list of model parameters used in training
% input:
% N: neurons on layer array
% w: weights
% b: biases
% output
% Nb: number of biases
% Nw: Number of non-zero weights
% (use near-zero weights to fool it)
% layer_of_bias, vector(i) specifying layer of bias i
% neuron_of_bias, vector(i) specifying neuron of bias i
% index_of_bias, 2d array(i,j) specifying number of bias
% of layer j, neuron i
% layer_of_weight, vector(i) specifying layer of weight i
% r_neuron_of_weight, vector(i) specifying right neuron of weight i
% l_neuron_of_weight, vector(i) specifying left neuron of weight i
% index_of_weight, array(i,j,k) specifying number of weight(i,j,k)
Nmax = max(N);
Lmax = length(N);
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
end
% view function makes a picture of a neural net
% you must set the figure and clear it before the
% call, e.g.
% figure(2); clf;
function null = eda_net_view(N,w,b)
axis off;
set(gcs,'FontSize',10);
Nmax = max(N);
Lmax = length(N);
axis([ 0 (Lmax+1) 0 (Nmax+1) ]);
hold on;
scale = zeros(Lmax,1);
for x = 1:Lmax
scale(x,1) = Nmax/N(x);
end
for x = 1:Lmax
for y = 1:N(x)
%plot(x,y,'o','MarkerSize',50);
rectangle('Position',[x-.2,y*scale(x,1)-.4,.4,.8],'LineWidth',2);
text(x,y*scale(x,1),strcat('b = ', num2str( b(y,x))), 'HorizontalAlignment' , 'center');
if x ~= Lmax
for y2 = 1:N(x+1)
if w( y2 , y , x+1) ~= 0
plot([x+.2 x+.8],[y*scale(x,1) y2*scale(x+1,1)],'Color',[0.7 0.7 0.7]);
text(x+.5,(y*scale(x,1)+y2*scale(x+1,1))/2,strcat('w = ', num2str( w( y2 , y , x+1))), 'HorizontalAlignment' , 'center');
end
end
end
end
end
end
% eda_netfilter.m
% simple least squares fit of gaussian with a single tower
epsi = 1;
Niter=100;
D = load('precip_discharge_data.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a 101 day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=101;
istart = 900;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x / max(x);
dobs = dobs / max(dobs);
% the discharge and precipitation data
figure(1);
clf;
subplot(2,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(2,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 20;
c0 = 2;
To=5;
f = exp(-[0:Nc-1]'/To);
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
slope1 = 0.01;
slope2 = 0.02;
w12 = 4*slope1;
w(ncvalue, ncvalue ,2) = w12;
w13 = 4*slope2;
w(ncvalue, ncvalue ,3) = w13;
w2w3 = w12*w13;
w14 = 16*h(ncvalue)/w2w3;
w(1,ncvalue, 4) = w14;
b12 = 0;
b(ncvalue,2) = b12;
b13 = -w13/2;
b(ncvalue,3) = b13;
b14 = -8*h(ncvalue)/w2w3;
b(1,4) = b(1,4)+ b14;
end
figure(2);
clf;
eda_viewnet(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,1) = a(1,4);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kx, ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x)
Dd = dobs - dpre;
if( iter==1 )
d0 = dpre;
error = Dd'*Dd;
fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
epsi=epsi / 10;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
figure(1);
plot( t, d0, 'r:', 'LineWidth', 2 );
plot( t, dpre, 'g-', 'LineWidth', 4 );
%eda_draw( dobs, 'caption d-obs', d0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );
% eda_netfilter2.m
% simple least squares fit of gaussian with a single tower
epsi = 1;
Niter=100;
epsisave=epsi;
D = load('precip_discharge_data.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a 101 day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=101;
istart = 900;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x / max(x);
dobs = dobs / max(dobs);
% the discharge and precipitation data
figure(1);
clf;
subplot(2,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(2,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 10;
c0 = 2;
To=5;
f = exp(-[0:Nc-1]'/To);
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
slope1 = 0.01;
slope2 = 0.02;
w12 = 4*slope1;
w(ncvalue, ncvalue ,2) = w12;
w13 = 4*slope2;
w(ncvalue, ncvalue ,3) = w13;
w2w3 = w12*w13;
w14 = 16*h(ncvalue)/w2w3;
w(1,ncvalue, 4) = w14;
b12 = 0;
b(ncvalue,2) = b12;
b13 = -w13/2;
b(ncvalue,3) = b13;
b14 = -8*h(ncvalue)/w2w3;
b(1,4) = b(1,4)+ b14;
end
figure(2);
clf;
eda_viewnet(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,1) = a(1,4);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kx, ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x)
Dd = dobs - dpre;
if( iter==1 )
d0 = dpre;
error = Dd'*Dd;
fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
epsi=epsi / 10;
elseif( iter==50 )
epsi=epsi / 10;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
figure(1);
%plot( t, d0, 'r:', 'LineWidth', 2 );
plot( t, dpre, 'k-', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('intermediate error %.2f with %d unknowns\n', error, Nb+Nw );
% epsi=epsisave;
for i=[1]
for j=[1:N(1)]
if( w(i,j,2) == 0 )
w(i,j,2) = random('Normal',0,0.0001,1,1);
end
end
end
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,1) = a(1,4);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kx, ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x)
Dd = dobs - dpre;
if( iter==1 )
d0 = dpre;
error = Dd'*Dd;
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
epsi=epsi / 10;
elseif( iter==50 )
epsi=epsi / 100;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
figure(1);
plot( t, dpre, 'k--', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('final error %.2f with %d unknowns\n', error, Nb+Nw );
figure(3);
clf;
eda_viewnet(N,w,b);
% eda_netfilter3.m
% network to match non0linear response funtion
epsi1 = 1;
epsi2 = 0.05;
Niter=100;
D = load('nonlinearresponse.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a Nx day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=501;
istart = 1;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
xnorm = 1/max(x);
dobsnorm = 1/max(dobs);
x = x * xnorm;
dobs = dobs * dobsnorm;
% the discharge and precipitation data
figure(1);
clf;
subplot(4,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(4,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 10;
c0 = 2;
To=3;
f = exp(-[0:Nc-1]'/To);
f = [f(1)/1000;f(1:end-1)];
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
slope1 = 0.01;
slope2 = 0.02;
w12 = 4*slope1;
w(ncvalue, ncvalue ,2) = w12;
w13 = 4*slope2;
w(ncvalue, ncvalue ,3) = w13;
w2w3 = w12*w13;
w14 = 16*h(ncvalue)/w2w3;
w(1,ncvalue, 4) = w14;
b12 = 0;
b(ncvalue,2) = b12;
b13 = -w13/2;
b(ncvalue,3) = b13;
b14 = -8*h(ncvalue)/w2w3;
b(1,4) = b(1,4)+ b14;
end
figure(2);
clf;
eda_viewnet(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,1) = a(1,4);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kx, ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x)
Dd = dobs - dpre;
if( iter==1 )
d0 = dpre;
error = Dd'*Dd;
fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==50 )
epsi1=epsi1 / 10;
end
Dm = (G'*G+epsi1*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
figure(1);
% plot( t, d0, 'r:', 'LineWidth', 2 );
% plot( t, dpre, 'k-', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('intermediate error %.2f with %d unknowns\n', error, Nb+Nw );
for i=[1]
for j=[1:N(1)]
if( w(i,j,2) == 0 )
w(i,j,2) = random('Normal',0,0.0001,1,1);
end
end
end
for i=[1]
for j=[1:N(1)]
if( w(i,j,3) == 0 )
w(i,j,3) = random('Normal',0,0.0001,1,1);
end
end
end
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,1) = a(1,4);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kx, ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x)
Dd = dobs - dpre;
if( iter==1 )
d0 = dpre;
error = Dd'*Dd;
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==50 )
epsi2=epsi2/10;
end
Dm = (G'*G+epsi2*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
figure(1);
plot( t, dpre, 'k--', 'LineWidth', 2 );
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('final error %.2f with %d unknowns\n', error, Nb+Nw );
figure(3);
clf;
eda_viewnet(N,w,b);
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
Nx=501;
istart = 499;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x * xnorm;
dobs = dobs * dobsnorm;
% the discharge and precipitation data
figure(1);
subplot(4,1,3);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(4,1,4);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, '-', 'LineWidth', 5, 'Color', [0.8,0.8,0.8] );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% loop over x
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
plot( t, dpre, 'k--', 'LineWidth', 2 );
% eda_noninearresponse.m
clear all
N=1001;
Dt = 1;
t = Dt*[0:N-1]';
x = zeros(N,1);
y = zeros(N,1);
f = zeros(N,1);
Np = floor(N/10);
p = random('unid',N,Np,1);
cexp = 0.5;
x(p) = random('exp',cexp,Np,1);
figure(1);
clf;
subplot(3,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [0, N, 0, 2 ] );
plot(t,x,'k-','LineWidth',2);
cf = 0.2;
for i=[2:N]
f(i) = cf*(y(i-1)^2);
dydt = x(i) - f(i);
y(i) = y(i-1) + dydt * Dt;
if( y(i) < 0 )
y(i)=0;
end
end
subplot(3,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [0, N, 0, 1.1*max(f) ] );
plot(t,f,'k-','LineWidth',2);
subplot(3,1,3);
set(gca,'LineWidth',2);
hold on;
axis( [0, N, 0, 1.1*max(y) ] );
plot(t,y,'k-','LineWidth',2);
dlmwrite('nonlinearresponse.txt', [t,f,x], 'delimiter', '\t' );
function mu = eda_random_partition_mean(S,K)
% create initial k-mean cluster means, by K random
% partitions of NxM sample matrix S
% returns KxM matrix mu of means
[N,M]=size(S);
k = unidrnd(K,N,1); % random cluster-assignments
mu = zeros(K,M);
for i=[1:K] % loop over clusters
j = find(k==i); % samples in cluster i
mu(i,1:M) = mean(S(j,1:M),1); % their means
end
end
% eda_testfit.m
% simple least squares fit of gaussian with a single tower
% grid of points in (x,y)
Nx = 21;
Ny = 21;
xmax = 5;
ymax = 5;
xmin = 0;
ymin = 0;
x = xmin + (xmax-xmin)* [0:Nx-1]'/(Nx-1);
y = ymin + (ymax-ymin)* [0:Ny-1]'/(Ny-1);
% (x,y) index arrays for easy access
kofij=zeros(Nx,Ny); % linear index k of a particular (i,j)
iofk=zeros(Nx*Ny,1); % row index of a particular k
jofk=zeros(Nx*Ny,1); % col index of a particular k
Nxy=0;
for i=[1:Nx]
for j=[1:Ny]
Nxy=Nxy+1;
kofij(i,j)=Nxy;
iofk(Nxy)=i;
jofk(Nxy)=j;
end
end
% Gaussian
dobs = zeros(Nx,Ny);
dobsvec = zeros(Nxy,1);
xbar = 2.4;
ybar = 2.6;
amp = 5.0;
sigmax = 1.2;
sigmay = 0.8;
cx = 1/(2*sigmax*sigmax);
cy = 1/(2*sigmay*sigmay);
for i=[1:Nx]
for j=[1:Ny]
dobs(i,j) = amp*exp(-cx*((x(i)-xbar)^2) - cy*((y(j)-ybar)^2) );
dobsvec(kofij(i,j))=dobs(i,j);
end
end
% build the test network for a single 2D tower
Nc = 1; % One tower
Xc = [2.5]; % Xc: the center of the tower
Yc = [2.5];
Dx = [2]; % Dx: the width of the tower
Dy = [2];
h = [1]; % h: the height of the tower
slope = 5/4; % slope: slope of the step function
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc * 4;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
[N,w,b] = eda_net_2dtower( Nc, slope, Xc, Yc, Dx, Dy, h, N, w, b );
figure(2);
clf;
eda_net_view(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nxy,M);
% predicted data
dpre = zeros(Nx,Ny);
dprevec = zeros(Nxy,1);
% initial predicted data
d0 = zeros(Nx,Ny);
d0vec = zeros(Nxy,1);
Niter=20;
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
% activities
a(1:N(1),1) = [x(kx);x(ky)];
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,ky) = a(1,4);
dprevec(kofij(kx,ky))=dpre(kx,ky);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kofij(kx,ky), ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kofij(kx,ky), Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x,y)
end
Dd = dobsvec - dprevec;
if( iter==1 )
d0 = dpre;
d0vec = dobsvec;
error = Dd'*Dd;
fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter<5 )
epsi=1.0;
elseif (iter<10)
epsi=0.1;
else
epsi=0.001;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
% activities
a(1:N(1),1) = [x(kx);x(ky)];
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,ky) = a(1,4);
dprevec(kofij(kx,ky))=dpre(kx,ky);
end
end
eda_draw( dobs, 'caption d-obs', d0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobsvec - dprevec;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );
% eda_testfit3.m
% simple least squares fit of gaussian with a single tower
epsi = 1;
Niter=100;
D = load('precip_discharge_data.txt');
% break out into separate variables, metric conversion
% time in days
t = D(:,1);
% discharge in cubic meters per second
dobs = D(:,2)/35.3146;
% precipitation in millimeters per day
x = D(:,3)*25.4;
% in this example, we use only a 101 day portion of the
% dataset, mainly so that when we plot it, storm events
% are clearly visible
Nx=101;
istart = 900;
t = t(istart:istart+Nx-1);
x = x(istart:istart+Nx-1);
dobs = dobs(istart:istart+Nx-1);
tmin = min(t);
tmax = max(t);
x = x / max(x);
dobs = dobs / max(dobs);
% the discharge and precipitation data
figure(1);
clf;
subplot(2,1,1);
hold on;
set(gca,'LineWidth',3);
axis( [tmin, tmax, -1, 1 ] );
plot( t, x, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('precipitation (mm/day)');
subplot(2,1,2);
set(gca,'LineWidth',3);
hold on;
axis( [tmin, tmax, -1, 1] );
plot( t, dobs, 'k-', 'LineWidth', 3 );
plot( [tmin,tmax]', [0,0]', 'k-', 'LineWidth', 2 );
xlabel('time (days)');
ylabel('discharge (m3/s)');
% build the network
Nc = 20;
c0 = 2;
To=5;
f = exp(-[0:Nc-1]'/To);
h = c0*f/sum(f);
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc ;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [Nc, Nc, Nc, 1]';
for ncvalue = 1:Nc
% these two slopes are arbitrary, as long as they are small
slope1 = 0.01;
slope2 = 0.02;
w12 = 4*slope1;
w(ncvalue, ncvalue ,2) = w12;
w13 = 4*slope2;
w(ncvalue, ncvalue ,3) = w13;
w2w3 = w12*w13;
w14 = 16*h(ncvalue)/w2w3;
w(1,ncvalue, 4) = w14;
b12 = 0;
b(ncvalue,2) = b12;
b13 = -w13/2;
b(ncvalue,3) = b13;
b14 = -8*h(ncvalue)/w2w3;
b(1,4) = b(1,4)+ b14;
end
figure(2);
clf;
eda_net_view(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
% linearized data kernel
M = Nb+Nw;
G = zeros(Nx,M);
% predicted data
dpre = zeros(Nx,1);
% initial predicted data
d0 = zeros(Nx,1);
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx,1) = a(1,4);
% da/dz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% da/da
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% da/db
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% da/dw
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kx, ib ) = daLmaxdb(1, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kx, Nb+iw ) = daLmaxdw( 1, rnow, lnow, low );
end
end % next (x)
Dd = dobs - dpre;
if( iter==1 )
d0 = dpre;
error = Dd'*Dd;
fprintf('starting error %.2f\n', error );
end
% implement some damping to improve stability
% but relax it after a few iterations
if( iter==10 )
epsi=epsi / 10;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
% activities
for j=[1:N(1)]
k=kx-j+1;
if( k>0 )
a(j,1) = x(k);
else
a(j,1) = 0;
end
end
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
dpre(kx) = a(1,4);
end
figure(1);
plot( t, d0, 'r:', 'LineWidth', 2 );
plot( t, dpre, 'g-', 'LineWidth', 4 );
%eda_draw( dobs, 'caption d-obs', d0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobs - dpre;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );
% eda_testnetderiv.m
% verify the activity derivatives are calculated correctly
% by comparing the analytic result to one based on finite differences
delta = 0.0001; % increment size used in finite difference calculations
onetower=2; % tesr code on one tower or two
if( onetower==1 )
% build the test network for a single 2D tower
Nc = 1; % One tower
Xc = [1.5]; % Xc: the center of the tower
Yc = [1.5];
Dx = [1]; % Dx: the width of the tower
Dy = [1];
h = [1]; % h: the height of the tower
slope = 5/4; % slope: slope of the step function
else
% build the test network for two superimposed 2D towers
Nc = 2; % One tower
Xc = [1.5 2.5]; % Xc: the center of the tower
Yc = [1.5 2.5];
Dx = [1 1]; % Dx: the width of the tower
Dy = [1 1];
h = [2 3]; % h: the height of the tower
slope = 5/4; % slope: slope of the step function
end
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc * 4;
% N(i): column-vector with number of neurons in each layer;
N = zeros(1,Lmax);
% biases: b(1:N(i),i) is a column-vector that gives the
% biases of the N(i) neurons on the i-th layer
b = zeros( Nmax, Lmax );
% weights: w(1:N(i),1:N(i-1),i) is a matrix that gives the
% weights between the N(i) neurons in the i-th layer and the
% N(i-1) neurons on the (i-1)-th layer. The weights assigned
% to the first layer are never used and should be set to zero
w = zeros( Nmax, Nmax, Lmax );
% activity: a(1:N(i),i) is a column-vector that gives the
% activitites of the N(i) neurons on the i-th layer
a = zeros( Nmax, Lmax );
% z: a(1:N(i),i) is a column-vector that gives the
% argument of the activitites of the N(i) neurons on the i-th layer
z = zeros( Nmax, Lmax );
% dada(i,j,k): the change in the activity of the i-th neuron of the layer k
% due to a change in the activity of the j-th neuron in the layer k-1
dada = zeros(Nmax,Nmax,Lmax);
% daLmaxda(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the activity of the j-th neuron in the layer k-1
daLmaxda = zeros(Nmax,Nmax,Lmax);
% dadz(i,k): the change in the activity of the i-th neuron of layer k
% due to a change in the input z of the same neuron
dadz = zeros(Nmax,Lmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
N = [2, Nc*4, Nc, 1]';
for ncvalue = 1:Nc
w12 = 4*slope;
w22 = 4*slope;
w32 = 4*slope;
w42 = 4*slope;
w( ((ncvalue-1)*4)+1:((ncvalue-1)*4)+2, 1 ,2) = [w12 ; w22];
w( ((ncvalue-1)*4)+3:((ncvalue-1)*4)+4, 2 ,2) = [w32 ; w42];
w13 = 4*slope;
w23 = -4*slope;
w33 = 4*slope;
w43 = -4*slope;
w(ncvalue, ((ncvalue-1)*4)+1:((ncvalue-1)*4)+4,3) = [w13;w23;w33;w43];
w14 = h(ncvalue);
w(1, ncvalue, 4) = w14;
b12 = -w12 * ( Xc(ncvalue) - Dx(ncvalue)/2);
b22 = -w22 * ( Xc(ncvalue) + Dx(ncvalue)/2);
b32 = -w12 * ( Yc(ncvalue) - Dy(ncvalue)/2);
b42 = -w22 * ( Yc(ncvalue) + Dy(ncvalue)/2);
b( ((ncvalue-1)*2)+1:((ncvalue-1)*2)+4 ,2) = [b12;b22;b32;b42];
b13 = -(3*(4*slope))/2;
b( ncvalue,3) = b13;
end
bsave = b;
wsave = w;
figure(1);
clf;
eda_net_view(N,w,b);
% build look-up tables to allow sequential ordering
% of non-trivual biases and weights
layer_of_bias = zeros(Lmax*Nmax,1);
neuron_of_bias = zeros(Lmax*Nmax,1);
index_of_bias = zeros(Nmax,Lmax);
Nb=0; % number of nontrivial biases
for k=[2:Lmax]
for j=[1:N(k)]
Nb = Nb+1;
layer_of_bias(Nb)=k;
neuron_of_bias(Nb)=j;
index_of_bias(j,k)=Nb;
end
end
Nw=0; % number of nontrivial weights
layer_of_weight = zeros(Lmax*Nmax*Nmax,1);
r_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
l_neuron_of_weight = zeros(Lmax*Nmax*Nmax,1);
index_of_weight = zeros(Nmax,Nmax,Lmax);
for k=[2:Lmax]
for i=[1:N(k)]
for j=[1:N(k-1)]
if( w(i,j,k) ~= 0 )
Nw = Nw+1;
layer_of_weight(Nw)=k;
r_neuron_of_weight(Nw)=i;
l_neuron_of_weight(Nw)=j;
index_of_weight(j,i,k)=Nw;
end
end
end
end
fprintf('Biases %d Weights %d\n', Nb, Nw );
% grid of points in (x,y) (but only one point is used)
Nx = 50;
Ny = 50;
xmax = 3.1;
ymax = 3.1;
xmin = 0;
ymin = 0;
x = xmin + (xmax-xmin)* [0:Nx-1]'/(Nx-1);
y = ymin + (ymax-ymin)* [0:Ny-1]'/(Ny-1);
% choose a point in (x,y) and calculate activities
kx = 23;
ky = 22;
a(1:N(1),1) = [x(kx);x(ky)];
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
% save activities
asave=a;
% Derivative calculation: Simple but tedious application of the chain rule
% Note that in the comments below, we have supressed most indices, so that
% the commonts alone don't communicate which quantities are scalars and which
% are vectors. However, this can be determined by examining the subsequent
% line(s) of code. Note that the chain rule for vector function a(b(c)) is
% da(i)/dc(j) = (sum k) da(i)/db(k) db(k)/dc(j). Note also that for
% a linear vector function b(i) = (sum j) M(i,j) a(j), the derivative
% is db(i)/da(j) = M(i,j)
% dadz
for i = [1:Lmax]
if (i==Lmax)
% sigma function is sigma(z)=z for top layer
% d sigma / dz = 1
dadz(1:N(i),i) = ones(1:N(i),1);
else
% sigma function is sigma(z)=(1+exp(-z))^(-1) for other layers
% d sigma / dz = (-1)(-1) exp(-z)(1+exp(-z))^(-1)
% = exp(-z) (a^2)
% and note exp(-z) = (1/a)-1
dadz(1:N(i),i) = (a(1:N(i),i).^2).*((1./a(1:N(i),i))-1);
end
end
% "backpropagation" of activity derivatives
% the derivative that we really want is daLmaxda
% (the change in the layer Lmax activities with other layer activities)
% but we begin calculating dada = da(layer i) / da(layer i-1)
% and then use the chain rule to accumulate them, starting with
% the top layer, e.g.
% da(layer Lmax) / da(layer Lmax-2) =
% da(layer Lmax) / da(layer Lmax-1) * da(layer Lmax-1) / da(layer Lmax-2)
% so the loop is from the last layer towards the first
for i = [Lmax:-1:2]
if i == Lmax
% a=z in the top of the layer, and
% z = (sum) w a + b
% so da(layer i)/da(layer i-1) = dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = w(1:N(i),1:N(i-1),i);
% initialize daLmaxda to prepare for backpropagation
daLmaxda(1:N(i),1:N(i-1),i) = dada(1:N(i),1:N(i-1),i);
else
% a=sigma(z) in the other layers, so use the chain rule
% da/da = da(layer)/dz * dz/da(layer i-1)
% where as before dz/da(layer i-1) = w
dada(1:N(i),1:N(i-1),i) = diag(dadz(1:N(i),i))*w(1:N(i),1:N(i-1),i);
% backpropagate
daLmaxda(1:N(Lmax),1:N(i-1),i) = ...
daLmaxda(1:N(Lmax),1:N(i),i+1)*dada(1:N(i),1:N(i-1),i);
end
end
% bias derivatives
for i = [1:Lmax]
if( i==Lmax )
% a=z in the top layer, and z(layer i)= w *a(layer i-1) + b
% so da(layer i)/db = dz(layer i)/db = 1
daLmaxdb(1:N(Lmax),1:N(Lmax),Lmax) = eye(N(Lmax),N(Lmax));
else
% a = sigma(z) in the other layers, so use the chain rule
% da(layer i)/db = da(layer i)/dz * dz/db
% where as before dzdb=1
dzdb = 1;
dadb = dadz(1:N(i),i) * dzdb;
% then apply the chain rule for activities
% da(layer Lmax)/db = da(layer Lmax)/da(layer i) * da(layer i)/db
daLmaxdb(1:N(Lmax),1:N(i),i) = daLmaxda(1:N(Lmax),1:N(i),i+1) * diag(dadb);
end
end
% check of bias deriviatives using finite differences
fprintf('\n');
fprintf('bias derivatives\n');
Nbad=0;
for ib=[1:Nb]
% restore old biases
b = bsave;
% execute network with one bias perturbed
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob,lob) = b(nob,lob) + delta;
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
Dnum = (a(1,4) - asave(1,4))/delta;
Dana = daLmaxdb(1,nob,lob);
error=(Dnum-Dana)/Dnum;
if( abs(error)>0.05 )
Nbad = Nbad + 1;
end
fprintf('%d neuron %d layer %d num %.4f ana %.4f error %.4f\n',ib,nob,lob,Dnum,Dana,error);
end
fprintf('%d bad bias derivatives\n',Nbad);
fprintf('\n');
% calculation of weight derivatives
% since z(layer i) = w*a(layer i-1) + b
% dz(layer i)/dw = a(layer i-1)
for l = [2:Lmax] % for simplicity, loop over right neuron
if( l==Lmax )
% in the top layer, a=z, so da/dw = dz/dw = a
for j = [1:N(l)]
daLmaxdw(j,j,1:N(l-1),l) = a(1:N(l-1),l-1);
end
else
% in the other layers, use chain rule
% da/dw = da/dz * dz/dw
% and then the chain ruke again
% da(layer Lmax)/dw = da(layer Lmax)/da(layer i) * da(layer i)/dw
for j = [1:N(l)]
dzdw = a(1:N(l-1),l-1);
dadw = dadz(j,l) * dzdw;
daLmaxdw(1:N(Lmax),j,1:N(l-1),l) = daLmaxda(1:N(Lmax),j,l+1) * dadw;
end
end
end
b = bsave; % restore old biases
% check of weight deriviatives using finite differences
fprintf('weight derivatives\n');
Nbad = 0;
for iw=[1:Nw]
% execute network with one bias perturbed
low=layer_of_weight(iw);
rnow=r_neuron_of_weight(iw);
lnow=l_neuron_of_weight(iw);
% restore old weights
w = wsave;
w(rnow,lnow,low) = w(rnow,lnow,low) + delta;
for i=[2:Lmax]
z(1:N(i),i) = w(1:N(i),1:N(i-1),i) * a(1:N(i-1),i-1) + b(1:N(i),i);
if i < Lmax
a(1:N(i),i) = 1.0./(1.0+exp(-z(1:N(i),i)));
else
a(1:N(i),i) = z(1:N(i),i);
end
end
Dnum = (a(1,4) - asave(1,4))/delta;
Dana = daLmaxdw(1,rnow,lnow,low);
error=(Dnum-Dana)/Dnum;
fprintf('%d r-neuron %d l-neuron %d layer %d num %.4f ana %.4f error %.4f\n',iw, rnow,lnow,low,Dnum,Dana,error);
if( abs(error)>0.05 )
Nbad = Nbad + 1;
end
end
fprintf('%d bad weight derivatives\n',Nbad);
function [ tBmA_est, Cmax_est ] = timedelay( uA, uB, Dt )
% time delay between two similar timeseries, by
% cross-correlation followed by parabilic refinment.
% delay is positive when pulse on B is later than pulse on A
% input:
% uA, uB, two signals to be compared
% should have zero mean
% if of unequal length, the shorter is zero padded
% Dt, sampling of both signals
% output
% tBmA_est, estimated time delay
% Cmax_est, cross-correlation at estimated delay
c = xcorr(uA, uB); % cross-correlate
NA = length(uA); % length of u
NB = length(uB); % length of v
Nm = max([NA,NB]); % max of two lengths
Nc = length(c); % length of cross-correlation
% lag to nearest sample
[cmax, icmax] = max(c); % determine location of maximum
tBmArough = -Dt * (icmax-Nm); % time lag
% parabolic refinement
% (c-cmax) = m(1) + m(2) dt + m(3) dt^2
% d(c-cmax)/ddt = 0 = m(2) + 2 m(3) dt
% dt = -0.5*m(2)/m(3)
% c = cmax + m(1) + m(2) dt + m(3) dt^2
G = [ [1, 1, 1]', Dt*[-1, 0, 1]', (Dt^2)*[1, 0, 1]' ];
d = [ c(icmax-1)-cmax, c(icmax)-cmax, c(icmax+1)-cmax ]';
m = (G'*G)\(G'*d);
dt = -0.5*m(2)/m(3);
Cmax_est = cmax + m(1) + m(2)*dt + m(3)*(dt^2);
tBmA_est = tBmArough - dt;
end
% eda11_13b.m
% modification of ead11_13 to test case of two output neurons
% grid of points in (x,y)
Nx = 21;
Ny = 21;
xmax = 5;
ymax = 5;
xmin = 0;
ymin = 0;
x = xmin + (xmax-xmin)* [0:Nx-1]'/(Nx-1);
y = ymin + (ymax-ymin)* [0:Ny-1]'/(Ny-1);
% (x,y) index arrays for easy access
kofij=zeros(Nx,Ny); % linear index k of a particular (i,j)
iofk=zeros(Nx*Ny,1); % row index of a particular k
jofk=zeros(Nx*Ny,1); % col index of a particular k
Nxy=0;
for i=[1:Nx]
for j=[1:Ny]
Nxy=Nxy+1;
kofij(i,j)=Nxy;
iofk(Nxy)=i;
jofk(Nxy)=j;
end
end
% Gaussian
dobs = zeros(Nx,Ny);
dobsvec = zeros(Nxy,1);
xbar = 2.4;
ybar = 2.6;
amp = 5.0;
sigmax = 1.2;
sigmay = 0.8;
cx = 1/(2*sigmax*sigmax);
cy = 1/(2*sigmay*sigmay);
for i=[1:Nx]
for j=[1:Ny]
dobs(i,j) = amp*exp(-cx*((x(i)-xbar)^2) - cy*((y(j)-ybar)^2) );
dobsvec(kofij(i,j))=dobs(i,j);
end
end
% build the test network for a single 2D tower
Nc = 1; % One tower
Xc = [2.5]; % Xc: the center of the tower
Yc = [2.5];
Dx = [2]; % Dx: the width of the tower
Dy = [2];
h = [1]; % h: the height of the tower
slope = 5/4; % slope: slope of the step function
% ---Neural Net definitions---
% Nmax: number of layers;
Lmax = 4;
% Lmax: maximum number of neurons in any layer
Nmax = Nc * 4;
[N,w,b,a] = eda_net_init(Lmax,Nmax);
% daLmaxdb(i,j,k): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the bias of the of the j-th neuron in the layer k
daLmaxdb = zeros(Nmax,Nmax,Lmax);
% daLmaxdw(i,j,k,l): the change in the activity of the i-th neuron in layer Lmax
% due to a change in the weight connecting the j-th neuron in the layer l
% with the k-th neuron of layer l-1
daLmaxdw = zeros(Nmax,Nmax,Nmax,Lmax);
[N,w,b] = eda_net_2dtower( Nc, slope, Xc, Yc, Dx, Dy, h, N, w, b );
% for test purposes, activate a second neuron on the top layer
N(Lmax)=2;
w(2,1,Lmax)=w(1,1,Lmax);
b(2,Lmax)=b(1,Lmax);
% target=1: the data are the output of the first neuron; 2 the second
% they should produce the same results
target=2;
fprintf('target %d\n', target );
% loop over (x,y)
dpre0 = zeros(Nx,Ny);
dpre0vec = zeros(Nxy,1);
for kx=[1:Nx]
for ky=[1:Ny]
% activities
a(1:N(1),1) = [x(kx);x(ky)];
a = eda_net_eval( N,w,b,a);
dpre0(kx,ky) = a(2,4);
dpre0vec(kofij(kx,ky))=dpre(kx,ky);
end
end
Dd = dobsvec-dpre0vec;
error0 = Dd'*Dd;
fprintf('starting error %.2f\n', error );
figure(2);
clf;
eda_net_view(N,w,b);
[ Nb,Nw,layer_of_bias,neuron_of_bias,index_of_bias,layer_of_weight, ...
r_neuron_of_weight,l_neuron_of_weight,index_of_weight ] = eda_net_map(N,w,b);
% linearized data kernel
M = Nb+Nw;
G = zeros(Nxy,M);
% predicted data
dpre = zeros(Nx,Ny);
dprevec = zeros(Nxy,1);
Niter=20;
for iter=[1:Niter]
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
% activities
a(1:N(1),1) = [x(kx);x(ky)];
a = eda_net_eval( N,w,b,a);
dpre(kx,ky) = a(target,4);
dprevec(kofij(kx,ky))=dpre(kx,ky);
[daLmaxdw,daLmaxdb] = eda_net_deriv( N,w,b,a );
% biases first
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
G(kofij(kx,ky), ib ) = daLmaxdb(target, nob, lob );
end
% weights second
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
G(kofij(kx,ky), Nb+iw ) = daLmaxdw( target, rnow, lnow, low );
end
end % next (x,y)
end
Dd = dobsvec - dprevec;
% implement some damping to improve stability
% but relax it after a few iterations
if( iter<5 )
epsi=1.0;
elseif (iter<10)
epsi=0.1;
else
epsi=0.001;
end
Dm = (G'*G+epsi*eye(M,M))\(G'*Dd);
% add in bias perturbations
for ib=[1:Nb]
nob=neuron_of_bias(ib);
lob=layer_of_bias(ib);
b(nob, lob) = b(nob, lob) + Dm(ib);
end
% add in weights perturbations
for iw=[1:Nw]
rnow = r_neuron_of_weight(iw);
lnow = l_neuron_of_weight(iw);
low = layer_of_weight(iw);
w(rnow, lnow, low) = w(rnow, lnow, low) + Dm( Nb+iw );
end
end % next iteration
% loop over (x,y)
for kx=[1:Nx]
for ky=[1:Ny]
% activities
a(1:N(1),1) = [x(kx);x(ky)];
a = eda_net_eval( N,w,b,a);
dpre(kx,ky) = a(target,4);
dprevec(kofij(kx,ky))=dpre(kx,ky);
end
end
eda_draw( dobs, 'caption d-obs', dpre0, 'caption d-start',dpre, 'caption d-pre');
Dd = dobsvec - dprevec;
error = Dd'*Dd;
fprintf('ending error %.2f\n', error );
function [yr, jday, hr, mn, sc, ms] = etoh( e )
% this script is the inverse of htoe()
% compute the year, julian day, hour, minute, second, millisecond
% given the epoch time (time in seconds after midnight, January 1, 1970)
%
% input:
% e, the number of secobds after January 1, 1970.
%
% output:
% yr, an integer, e.g. 2010
% jday, an integer, the day of the year, where January 1 is 1
% hr, an integer, the hour of the day, starting with 0
% mn, an integer, the minute of the hour, starting with 0
% sc, an integer, the second of the minute, starting with 0
% ms, an integer, the milliseconds of the second, starting with 0
%
%
% Note 1: the monthday() script can calculate month and day from julian day
%
% Note 2: The calculation does not include leap seconds. It will give correct
% dates/times for for GPS and TIA clock standards, but will be off for the UTC standard.
% Typically, this function is used in conjunction with htoe() to increment
% a date/time by a time interval, that is:
% epoch1 = htoe( yr1, jday1, hr1, mn1, sc1, ms1 );
% epoch2 = epoch1+interval;
% [yr2, jday2, hr2, mn2, sc2, ms2] = etoh( epoch2 );
% The overall calculation could be off by as much as 2 seconds per length of the
% interval in years.
daysinmonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
if( e<0 )
sprintf('error: negative epochs not implemented')
return;
end
epoch=0.0;
frac = e - floor(e);
i=1970;
while (i<5000)
epoch=epoch+365.0*86400.0;
if( isleap(i) )
epoch=epoch+86400.0;
end
if( epoch>e )
break;
end
i=i+1;
end
yr=i; mo=1; day=1; hr=0; mn=0; sc=0; ms=0;
jday = julian( yr, mo, day );
epoch=htoe(yr, jday, hr, mn, sc, ms);
for i = [1:12]
epoch=epoch+86400.0*daysinmonth(i);
if( (i==2) && isleap(yr) )
epoch=epoch+86400.0;
end
if( epoch>e)
break;
end
end
mo=i;
jday = julian( yr, mo, day );
epoch=htoe(yr, jday, hr, mn, sc, ms);
j=daysinmonth(mo);
if( (mo==2) && isleap(yr) )
j=j+1;
end
for i = [1:j]
epoch=epoch+86400.0;
if( epoch>e)
break;
end
end
day=i;
jday = julian( yr, mo, day );
epoch=htoe(yr, jday, hr, mn, sc, ms);
for i= [0:23]
epoch=epoch+3600.0;
if( epoch>e )
break;
end
end
hr=i;
epoch=htoe(yr, jday, hr, mn, sc, ms);
for i = [0:59]
epoch=epoch+60.0;
if( epoch>e)
break;
end
end
mn=i;
epoch=htoe(yr, jday, hr, mn, sc, ms);
for i = [0:60]
epoch=epoch+1.0;
if( epoch>e)
break;
end
end
sc=i;
ms = floor((1000.0*frac)+0.5);
return;
function y = filterfun(v,transp_flag)
global e g H;
% get dimensions
N = length(g);
M = length(v);
[K, M2] = size(H);
temp1=conv(g,v); % G v is of length N
a=temp1(1:N);
b=e*H*v; % H v is of length K
temp2=conv(flipud(g),a); % GT (G v) is of length M
a2 = temp2(N:N+M-1);
b2 = e*H'*b; % e HT (e H v) is of length M
% y = FT F v = GT G v + e^2 HT H v
y = a2+b2;
return
function [FTf] = filterfunRHS(g,dobs,e,H,h,M)
% companion function for filterfun, does RHS
% find f in g * f = dobs with prior information e H f= e h
% with damoing e = sigma_d / sigma_h. This function
% M is number of unknowns; that is, the length of f
% note that GT v is similar to the convolution G v = g*v
% except that p is time-reversed and position of
% results in convolution is shifted to the bottom
N = length(dobs);
temp=conv(flipud(g),dobs);
FTfa = temp(N:N+M-1);
FTfb = (e^2)*H'*h;
FTf=FTfa+FTfb;
return
% function to perform the multiplication FT (F v)
function y = FTFmul(v,transp_flag)
global edaFsparse;
temp = edaFsparse*v;
y = edaFsparse'*temp;
return
function [epoch] = htoe(year, jday, hour, min, sec, msec)
% converts time in year, julian day, hour, minute, second, millisecond format
% to time in seconds after midnight, January 1, 1970.
%
% input:
% year, an integer, e.g. 2010
% julian, an integer, the day of the year, where January 1 is 1
% hour, an integer, the hour of the day, starting with 0
% minute, an integer, the minute of the hour, starting with 0
% sec, an integer, the second of the minute, starting with 0
% msec, an integer, the milliseconds of the second, starting with 0
% output:
% epoch, the number of secobds after midnight, January 1, 1970.
%
% example: January 10, 2010 at 01:02:03.040
% e = htoe( 2010, 10, 1, 2, 3, 40)
%
% Note 1: the julian() script can calculate julian day from year-month-day
%
% Note 2: The calculation does not include leap seconds. It will give correct
% epoch times for GPS and TIA clock standards, but will be off for the UTC standard.
% When two epoch times are subtracted to calculate a time interval, the length
% of the interval could be incorrect by as much as 2 seconds per year.
epoch=0.0;
if( year<1970 )
sprintf('sorry: cant handle pre-1970 data')
return;
end
days=0;
if( year > 1970 )
for i = [1970:year-1]
days=days+365;
if( isleap(i) )
days=days+1;
end
end
end
days=days+(jday-1);
epoch = (86400.0*days) + (hour*3600.0) + (min*60.0) + sec + msec/1000.0;
return
% interpolate_reynolds.m
% read and interpolate and output Renolds Channel Water Quality dataset
% columns are days, precip, air_temp, water_temp, salinity, turbidity,
% chloropjyl
% load data
D = load('reynolds_uninterpolated.txt');
[N, Ncols] = size(D);
Dt=1;
Di=D;
% interpolate with splines
for i = [2:Ncols]
% insure data in first and last rows
if( D(1,i) < 0 )
D(1,i)=0;
end
if( D(N,i) < 0 )
D(N,i)=0;
end
good = find( D(:,i)>=0 );
x = D(good,1);
y = D(good,i);
z = interp1(x,y,D(:,1));
Di(:,i)=z;
end
t = Di(:,1); % time, days
p = Di(:,2); % precipitation, inches
at = Di(:,3); % air temperature, deg C
wt = Di(:,4); % water temperature, deg C
s = Di(:,5); % salinity, practical salinity units
tb = Di(:,6); % turbidity,
c = Di(:,7); % chlorophyl, micrograms per liter
figure(1);
clf;
subplot(6,1,1);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(p), max(p)] );
plot( t, p, 'k-', 'LineWidth', 2 );
ylabel('precip');
subplot(6,1,2);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(at), max(at)] );
plot( t, at, 'k-', 'LineWidth', 2 );
ylabel('T-air');
subplot(6,1,3);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(wt), max(wt)] );
plot( t, wt, 'k-', 'LineWidth', 2 );
ylabel('T-water');
subplot(6,1,4);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(s), max(s)] );
plot( t, s, 'k-', 'LineWidth', 2 );
xlabel('time, days');
ylabel('salinity');
subplot(6,1,5);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(tb), max(tb)] );
plot( t, tb, 'k-', 'LineWidth', 2 );
xlabel('time, days');
ylabel('turbidity');
subplot(6,1,6);
set(gca,'LineWidth',2);
hold on;
axis( [t(1), t(N), min(c), max(c)] );
plot( t, c, 'k-', 'LineWidth', 2 );
xlabel('time, days');
ylabel('chlorophyl');
dlmwrite('reynolds_interpolated.txt', Di, 'delimiter','\t' );
function [leap] = isleap(year)
% returns 1 if year is a leap year and zero otherwise
% input:
% year, an integer, the year (e.g. 2010)
% output:
% leap, a logical value
leap = ((year-4*floor(year/4))==0) && ((year-100*floor(year/100))~=0) || ((year-400*floor(year/400))==0);
return
function [jday] = julian( year, month, day )
% this script is the inverse of julian()
% return the julian day given the year, month and day
%
% input:
% year, an integer, e.g. 2010
% month, an integer, with January being 1
% day, an integer, with the first day of the month being 1
%
% output:
% jday, an integer, the julian day of the year, with Jan 1 being 1
%
daysinmonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
leap = isleap(year);
jday=0;
if ( month>1 )
for i = [1:month-1]
jday=jday+daysinmonth(i);
if( leap && (i==2) )
jday=jday+1;
end
end
end
jday = jday + day;
return;
function [month, day] = monthday( year, jday )
% the month and the day given the julian day
%
% input:
% year, an integer, the year, e.g. 2010
% jday, an integer, the julian day
%
% output:
% month, an integer, the month, with January being 1
% day, an integer, the day of the month, starting at 1
%
daysinmonth = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
leap = isleap(year);
day=jday;
for i = [1:12]
dim=daysinmonth(i);
if( leap && (i==2) )
dim=dim+1;
end
if( day <= dim )
break;
end
day = day - dim;
end
month=i;
return