# edapy_04_00 clear all variables and import vatious modules

# History
# 2022/06/26 -  checked with most recent Python & etc. software
# 2023/07/13 -  fixed some issues, incl colormap, loglof plt, eda_cvec()
# 2024/05/26 -  Patches to accomodate new verions of numpy, scipy, matplotlib
#               patched dot product to return scalar value

%reset -f
import os
from datetime import date
from math import exp, pi, sin, sqrt, floor, ceil
import numpy as np
import scipy.linalg as la
from matplotlib import pyplot as plt
import matplotlib
from matplotlib.colors import ListedColormap

# eda_draw function makes a "pictorial matrix equation"
# arguments are vectors, matrices and strings
# which are plotted in the order that the appear
# except that strings starting with 'title ' are plotted
# under the subseqeunt matrix or vector
# always returns a status of 1
def eda_draw(*argv):
    bw = np.zeros((256,4));
    v = 0.9*(256 - np.linspace( 0, 255, 256 ))/255;
    bw[:,0] = v;
    bw[:,1] = v;
    bw[:,2] = v;
    bw[:,3] = np.ones(256);
    bwcmap = ListedColormap(bw);
    # size of plot
    W = 16;
    H = 4;
    fig1 = plt.figure(1);
    # figsize width and height in inches
    fig1.set_size_inches(W,H);
    ax1 = plt.subplot(1,1,1);
    plt.axis([0, W, -H/2, H/2]);
    plt.axis('off');
    LM = W/6;    # matrix width and heoght
    LV = W/40;   # vector width
    FS = 0.12;    # character width
    TO = 0.4;    # title vertical offset
    SP = 0.2;    # space between objects
    LS = 0.2;    # leading space
    p = LS; # starting x-position
    istitle=0; # flags presence of a title
    for a in argv:
        if isinstance(a,np.ndarray):
            sh = np.shape(a);
            if len(sh) == 1:  # conversion to nx1 array
                n = sh[0];
                m = 1;
                ap = a;
                a = np.zeros((n,1));
                a[:,0] = ap;
            else:
                n = sh[0];
                m = sh[1];
            if m==1:
                pold=p;
                left=p;
                right=p+LV;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LV;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
            else:
                pold=p;
                left=p;
                right=p+LM;
                bottom=-LM/2;
                top=LM/2;
                plt.imshow( a, cmap=bwcmap, vmin=np.min(a), vmax=np.max(a), extent=(left,right,bottom,top) );
                p = p+LM;
                pm = (p+pold)/2;
                if istitle:
                    plt.text(pm,-(LM/2)-TO,titlestr,horizontalalignment='center');
                    istitle=0;
                p = p+SP;
        elif isinstance(a,str):
            ns = len(a);
            istitle=0;
            if( ns>=6 ):
                if 'title ' in a[0:6]:
                    istitle=1;
                    titlestr=a[6:];
            if( istitle != 1):
                plt.text(p,0,a);
                p = p + ns*FS + SP;
    plt.show();
    return 1;

# function to make a numpy N-by-1 column vector
# c=eda_cvec(v1, v2, ...) from a list of several
# array-like entities v1, v2, including a number
# a list of numbers, a tuple of numbers, an N-by-0 np array
# and a N-by-1 np array. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it
# It promotes integers to floats, and integers
# and floats to complex, by context.
# This version concatenates many argments,
# whereas c=eda_cvec1(v1) takes just one argiment.
# I recommend always using eda_cvec(v1, v2, ...)
def eda_cvec(*argv):
    t = int;
    Nt = 0;
    for a in argv:
        v = eda_cvec1(a);
        N,M = np.shape(v);
        Nt = Nt + N;
        if( N==0 ):
            continue; # skip vector of zero length
        if (t==int) and isinstance(v[0,0],float):
            t=float;
        elif isinstance(v[0,0],complex):
            t=complex;
    w = np.zeros((Nt,1),dtype=t);
    Nt = 0;
    for a in argv:
        v = eda_cvec1(a);
        N,M = np.shape(v);
        w[Nt:Nt+N,0] = v[0:N,0];
        Nt = Nt + N;
    return w;

# function to make a numpy N-by-1 column vector
# c=eda_cvec1(v) from entity v that is array-like,
# including a number, a list of numbers, a tuple
# of numbers, an N-by-0 np array and a N-by1 np array.
# It promotes integers to floats, and integers
# and floats to complex, by context. The function
# also insures that, if v is an np array, that
# c is a copy, as contrasted to a view, of it.
# This version takes just one input argment.
# whereas c=eda_cvec(v1,v2,...) concatenates
# many argiments.
def eda_cvec1(v):
    if isinstance(v, int) or isinstance(v, np.int32):
        w = np.zeros((1,1),dtype=int);
        w[0,0] = v;
        return w;
    elif isinstance(v, float):
        w = np.zeros((1,1),dtype=float);
        w[0,0] = v;
        return w;
    elif isinstance(v, complex):
        w = np.zeros((1,1),dtype=complex);
        w[0,0] = v;
        return w;
    elif isinstance(v, np.ndarray):
        s = np.shape(v);
        if len(s) == 1:
            return np.copy(np.reshape(v,(s[0],1)));
        else:
            [r,c]=s;
            if( c==1 ):
                return(np.copy(v));
            elif(r==1):
                return(np.copy(v.T));
            else:
                raise TypeError("eda_cvec: %d by %d ndarray not allowed" % (r, c));
    elif isinstance(v, list):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(v, np.int32):
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("eda_cvec: list contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = v;
        return w;
    elif isinstance(v, tuple):
        r = len(v);
        t = int;
        for vi in v:
            if isinstance(vi,int) or isinstance(v, np.int32):
                pass;
            elif isinstance(vi,float):
                t=float;
            elif isinstance(vi,complex):
                t=complex;
                break;
            else:
                raise TypeError("eda_cvec: tuple contains unsupported type %s" % type(vi));
        w = np.zeros((r,1),dtype=t);
        w[:,0] = v;
        return w;
    else:
        raise TypeError("eda_cvec: %s not supported" % type(v));

# edapy_04_01: data kernel for straight-line linear model

# make the auxillary variable x
N=11;
x = eda_cvec( np.linspace(0,N-1,N) );

# manualy set the columns of G
G = np.zeros((N,2));
# Note: In order to set a column of a matrix
# to a Nx1 column vector, the view of the matrix
# must have a shape of Nx1, e.g. G[0:N,0:1]
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = x;

print(G);

[[ 1.  0.]
 [ 1.  1.]
 [ 1.  2.]
 [ 1.  3.]
 [ 1.  4.]
 [ 1.  5.]
 [ 1.  6.]
 [ 1.  7.]
 [ 1.  8.]
 [ 1.  9.]
 [ 1. 10.]]

# edapy_04_02: data kernel for quadratic-curve linear model

# make the auxillary variable x
N=11;
x = eda_cvec( np.linspace(0,N-1,N) );

# manualy set the columns of G
G = np.zeros((N,3));
# Note: In order to set a column of a matrix
# to a Nx1 column vector, the view of the matrix
# must have a shape of Nx1, e.g. G[0:N,0:1]
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = x;
G[0:N,2:3] = np.power(x,2);

print(G);

[[  1.   0.   0.]
 [  1.   1.   1.]
 [  1.   2.   4.]
 [  1.   3.   9.]
 [  1.   4.  16.]
 [  1.   5.  25.]
 [  1.   6.  36.]
 [  1.   7.  49.]
 [  1.   8.  64.]
 [  1.   9.  81.]
 [  1.  10. 100.]]

# edapy_04_03: data kernel for polynomial linear model

# make the auxillary variable x on varange (0,1)
N=32;
x = eda_cvec( np.linspace(0,1,N) );
Dx = x[1,0]-x[0,0];

M=N;  # square G

G = np.zeros((N,M)); # create a G containing zeroes
# Note: In order to set a column of a matrix
# to a Nx1 column vector, the view of the matrix
# must have a shape of Nx1, e.g. G[0:N,0:1]
G[0:N,0:1] = np.ones((N,1)); # handle 1st column individually
for i in range(1,M): # loop over remaining columns
    G[0:N,i:i+1] = np.power(x,i); # x raised to (i-1) power

eda_draw('title G',G);

# edapy_04_04: data kernel for Fourier sum linear model

# make the auxillary variable x on varange (0,1)
N=32;
x = eda_cvec( np.linspace(0,1,N) );
Dx = x[1,0]-x[0,0];
xmax = x[N-1,0];

M=N-2;  # G is not square, since lambda=infinity case is omitted

# wavelengths lambda
Mo2 = floor(M/2);
lam= eda_cvec( 2*xmax * np.reciprocal( np.linspace(1,Mo2,Mo2) ) );

G = np.zeros((N,M));   # create matrix of zeros
for i in range(0,Mo2): # loop over (cos,sin) pairs
    icos = 2*i;   # column of the cos in th i-th pair
    isin = 2*i+1; # column of the sin in th i-th pair
    G[0:N,icos:icos+1] = np.cos(2*pi*x/lam[i,0]);
    G[0:N,isin:isin+1] = np.sin(2*pi*x/lam[i,0]);

eda_draw('title G',G);

# edapy_04_05: data kernel viewed as concatenation of columns

# make the auxillary variable x on varange (0,1)
N=32;
x = eda_cvec( np.linspace(0,1,N) );
Dx = x[1,0]-x[0,0];
xmax = x[N-1,0];

M=N-2;  # G no suqare, since lambda=infinity case is omitted
Mo2 = floor(M/2);

# wavelengths lambda
lam = eda_cvec( 2*xmax * np.reciprocal( np.linspace(1,Mo2,Mo2) ) );

G = np.zeros((N,M));
# Note: In order to set a column of a matrix
# to a Nx1 column vector, the view of the matrix
# must have a shape of Nx1, e.g. G[0:N,0:1]
for i in range(1,Mo2): # loop over (cos,sin) pairs
    icos = 2*i;   # column of the cos in th i-th pair
    isin = 2*i+1; # column of the sin in th i-th pair
    G[0:N,icos:icos+1] = np.cos(2*pi*x/lam[i,0]);
    G[0:N,isin:isin+1] = np.sin(2*pi*x/lam[i,0]);
    
g1 = eda_cvec( G[:,0] );
g2 = eda_cvec( G[:,1] );
g3 = eda_cvec( G[:,2] );
g4 = eda_cvec( G[:,3] );
gM = eda_cvec( G[:,M-1] );

eda_draw('title G',G,'=','title g1',g1,'title g2',g2,'title g3',g3,'title g4',g4,'. . .','title gM',gM);

# edapy_04_06: data kernel for the weighted average linear model
# note: this implementation requires an odd number of weights
# but does not require them to be symmetric

N=32;
M=N; # G is square

# case 1: 3-point average
w  = eda_cvec( 1, 2, 1 ); # unnormalized weights
La, i = np.shape(w);      # length of average
Lw = floor(La/2);         # position of central weight
n = np.sum(w);            # sum of weights
w = w/n;                  # normalized weights
wf = np.flipud(w);        # weights in reversed order
c = np.zeros((N,1));      # initialize left column of G
r = np.zeros((M,1));      # initialize top row of G
c[0:Lw+1,0:1]=wf[Lw:La,0:1]; # copy weights to c
r[0:Lw+1,0:1]=w[Lw:La,0:1];  # copy weights to r
G = la.toeplitz(c,r.T);      # data kernel G is toeplitz

G3 = np.copy(G);  # copy for display purposes

# case 2: 5-point average
w  = eda_cvec( 1, 2, 3, 2, 1 );   # unnormalized weights
La, i = np.shape(w);       # length of average
Lw = floor(La/2);          # position of central weight
n = np.sum(w);             # sum of weights
w = w/n;                   # normalized weights
wf = np.flipud(w);         # weights in reversed order
c = np.zeros((N,1));       # initialize left column of G
r = np.zeros((M,1));       # initialize top row of G
c[0:Lw+1,0:1]=wf[Lw:La,0:1]; # copy weights to c
r[0:Lw+1,0:1]=w[Lw:La,0:1];  # copy weights to r
G = la.toeplitz(c,r.T);      # data kernel G is toeplitz

G5 = np.copy(G);  # copy for display purposes

# case 3: 9-point average
w  = eda_cvec( 1, 2, 3, 4, 5, 4, 3, 2, 1 );   # unnormalized weights
La, i = np.shape(w);        # length of average
Lw = floor(La/2);           # length up to central weight
n = np.sum(w);              # sum of weights
w = w/n;                    # normalized weights
wf = np.flipud(w);          # weights in reversed order
c = np.zeros((N,1));        # initialize left column of G
r = np.zeros((M,1));        # initialize left column of G
c[0:Lw+1,0:1]=wf[Lw:La,0:1]; # only the first Lw element of the col are non-zero
r[0:Lw+1,0:1]=w[Lw:La,0:1];  # only the first Lw element of the row are non-zero
G = la.toeplitz(c,r.T);      # data kernel G is toeplitz

G9 = np.copy(G);  # copy for display purposes

eda_draw('title 3-pt',G3,'title 5-pt',G5,'title 9-pt',G9);

# edapy_04_07, prediction error
# reads in noisy, linear data from file
# and computes and plots prediction error for a particular
# choice of intercept and slope, m1 and m2

# model paramerters are intercept and slope
M=2;
mest = eda_cvec( [1.1, 2.3] );  # model estimate

# load data from file
T = np.genfromtxt('../data/linedata01.txt', delimiter='\t')
[N, K]=T.shape;
x = eda_cvec( T[:,0] );    # auxiliary varible
dobs = eda_cvec( T[:,1] ); # observed data

# linear data kernel
G = np.zeros((N,2));
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = x;

dpre = np.matmul(G,mest);  # predicted data
e = dobs-dpre;             # individual errors
E = np.matmul(e.T,e); E=E[0,0];  # total error

#  plot data
fig1 = plt.figure(1,figsize=(8,8));
ax1 = plt.subplot(2,1,1);
plt.plot(x,dobs,'ko');
plt.plot(x,dpre,'k-')
for i in range(N):
    plt.plot( [x[i,0], x[i,0]], [dobs[i,0], dpre[i,0]],'k:');
plt.xlabel('x');
plt.ylabel('d');
titlestr = "intercept %.2f slope %.2f" % (mest[0,0], mest[1,0]);
plt.title(titlestr);

#  plot errors
ax1 = plt.subplot(2,1,2);
plt.axis([-6.0, 6.0, -5, 5])
plt.plot([-6.0,6.0],[0.0,0.0],'k-');
plt.plot(x,e,'ko');
for i in range(N):
    plt.plot( [x[i,0], x[i,0]], [0.0, e[i,0]],'k:');
plt.xlabel('x');
plt.ylabel('e');
plt.show();

# edapy_04_08: grid search for intercept and slope
# of trend line fitted to noisy data

# model paramerters
M=2;
m = np.zeros((M,1));

# load data from file
T = np.genfromtxt('../data/linedata01.txt', delimiter='\t');
[N, K]=T.shape;
x = eda_cvec( T[:,0] );    # auxially variable
dobs = eda_cvec( T[:,1] ); # observed data

# linear data kernel
G = np.zeros((N,2));
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = x;

# define the (m1, m2) grid
L1 = 101;
L2 = 101;
m1min=0;
m1max=4;
m2min=0;
m2max=4;
m1 = eda_cvec( np.linspace(m1min,m1max,L1) );
m2 = eda_cvec( np.linspace(m2min,m2max,L2) );

# evaluate the error at each grid point
E = np.zeros((L1,L2));
for i in range(L1):
    for j in range(L2):
        m = eda_cvec([m1[i,0],m2[j,0]]); # put m's into vector form
        dpre = np.matmul(G,m);     # predicted data
        e = dobs - dpre;           # individual errors
        myE = np.matmul(e.T,e); myE=myE[0,0];
        E[i,j] = myE; # total error

# best estimate is the one with smallest error
# find the (i,j) for which E(i,j) is minimum
k = np.argmin( E, axis=0 );
Etemp = np.min( E, axis=0 );
j = np.argmin(Etemp);
i=k[j];
Emin = E[i,j];  # minimum error
# estmated model parameters
mbest = eda_cvec( [ m1[i,0],  m2[j,0] ] );
dpre = np.matmul(G,mbest); # predicted data
e = dobs-dpre; # individual errors

eda_draw('title E(m1,m2)',E);

print("Emin: %.2f" % (Emin) );
print("m1true:  %.2f   m2true:   %.2f" % (1, 2) );
print("m1est:   %.2f   m2est:    %.2f" % (mbest[0,0], mbest[1,0]) );

#  plot data
fig2 = plt.figure(2,figsize=(8,8));
ax1 = plt.subplot(2,1,1);
plt.axis([-6.0, 6.0, -15.0, 15.0]);
plt.plot(x,dobs,'ko');
plt.plot(x,dpre,'k-');
plt.xlabel('x)');
plt.ylabel('d');
titlestr = "intercept %.2f slope %.2f" % (mbest[0,0], mbest[1,0]);
plt.title(titlestr);

#  plot errors
ax1 = plt.subplot(2,1,2);
plt.axis([-6.0, 6.0, -5, 5])
plt.plot([-6.0,6.0],[0.0,0.0],'k-');
plt.plot(x,e,'ko');
for i in range(N):
    plt.plot( [x[i,0], x[i,0]], [0.0, e[i,0]],'k:');
plt.xlabel('x)');
plt.ylabel('d');
plt.show();

print(N);

Emin: 23.50
m1true:  1.00   m2true:   2.00
m1est:   0.96   m2est:    2.00

11

# edapy_04_09: grid search for intercept and slope
# for problem of fittng straight line to noisy data

# model paramerters
M=2
m = np.zeros((M,1));

# load data from file
T = np.genfromtxt('../data/linedata01.txt', delimiter='\t')
[N, K]=T.shape;
x = eda_cvec( T[:,0] );    # auxiallary variable
dobs = eda_cvec( T[:,1] ); # observed data

# Part 1: shift left
xshift=-2;
xs = x+xshift;

# linear data kernel
G = np.zeros((N,2));
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = xs;
dpre = np.matmul(G,m);

# define grid of intercepts and slopes
L1 = 101;
L2 = 101;
m1min=-10;
m1max=10;
m2min=-10;
m2max=10;
m1 = eda_cvec( np.linspace(m1min,m1max,L1) );
m2 = eda_cvec( np.linspace(m2min,m2max,L2) );
e = np.zeros((N,1));
E = np.zeros((L1,L2));


for i in range(L1):
    for j in range(L2):
        m[0:2,0:1] = eda_cvec( m1[i], m2[j] );
        dpre = np.matmul(G,m);
        e = dobs - dpre;
        E0 = np.matmul(e.T,e); E0=E0[0,0];
        E[i,j] = E0;
    
# best estimate is the one with smallest error
k = np.argmin( E, axis=0 );
Etemp = np.min( E, axis=0 );
j = np.argmin(Etemp);
i=k[j];
Ebest = E[i,j];

mbest = eda_cvec( m1[i,0],  m2[j,0] );
dpre = np.matmul(G,mbest);
Esl = E;
print("left  shift: intercept %.2f  slope %.2f" % (mbest[0,0]-2*mbest[1,0], mbest[1,0]));

# Part 2: no shift 
xshift=0;
xs = x+xshift;

# linear data kernel
G = np.zeros((N,2));
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = xs;
dpre = np.matmul(G,m);

# define grid of intercepts and slopes
L1 = 101;
L2 = 101;
m1min=-10;
m1max=10;
m2min=-10;
m2max=10;
m1 = eda_cvec( np.linspace(m1min,m1max,L1) );
m2 = eda_cvec( np.linspace(m2min,m2max,L2) );
e = np.zeros((N,1));
E = np.zeros((L1,L2));

for i in range(L1):
    for j in range(L2):
        m = eda_cvec( m1[i,0], m2[j,0] );
        dpre = np.matmul(G,m);
        e = dobs-dpre;
        E0 = np.matmul(e.T,e); E0=E0[0,0];
        E[i,j] = E0;
    
# best estimate is the one with smallest error
k = np.argmin( E, axis=0 );
Etemp = np.min( E, axis=0 );
j = np.argmin(Etemp);
i=k[j];

Ebest = E[i,j];
mbest = eda_cvec( m1[i,0],  m2[j,0] );
dpre = np.matmul(G,mbest);
Ens = E;
print("no    shift: intercept %.2f  slope %.2f" % (mbest[0,0], mbest[1,0]));

# Part 3: shift right
xshift=2;
xs = x+xshift;

# linear data kernel
G = np.zeros((N,2));
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = xs;
dpre = np.matmul(G,m);

# define grid of intercepts and slopes
L1 = 101;
L2 = 101;
m1min=-10;
m1max=10;
m2min=-10;
m2max=10;
m1 = eda_cvec( np.linspace(m1min,m1max,L1) );
m2 = eda_cvec( np.linspace(m2min,m2max,L2) );
e = np.zeros((N,1));
E = np.zeros((L1,L2));

for i in range(L1):
    for j in range(L2):
        m[0:2,0:1] = eda_cvec( m1[i], m2[j] );
        dpre = np.matmul(G,m);
        e = dobs-dpre;
        E0 = np.matmul(e.T,e); E0=E0[0,0];
        E[i,j] = E0;
    
# best estimate is the one with smallest error
k = np.argmin( E, axis=0 );
Etemp = np.min( E, axis=0 );
j = np.argmin(Etemp);
i=k[j];
Ebest = E[i,j];
mbest = eda_cvec( m1[i,0],  m2[j,0] );
dpre = np.matmul(G,mbest);
Esr = E;
print("right shift: intercept %.2f  slope %.2f" % (mbest[0,0]+2*mbest[1,0], mbest[1,0]));

# lake log of error to reduce range of variation for plotting purposes
eda_draw('title E shift left',np.log(Esl),'title E no shift',np.log(Ens),'title E shift right',np.log(Esr));

left  shift: intercept 1.00  slope 2.00
no    shift: intercept 1.00  slope 2.00
right shift: intercept 1.00  slope 2.00

# edapy_04_10: least-squares solution
# for straight line linear model

# reads in noisy, linear data from file
# and computes and plots predicted data
# using least-squares estimate of intercept
# and slope, m1 and m2

# model paramerters
M=2;
mest = np.zeros((M,1));

# load data from file
T = np.genfromtxt('../data/linedata01.txt', delimiter='\t')
[N, K]=T.shape;
x = eda_cvec( T[:,0] );
dobs = eda_cvec( T[:,1] );

# data kernel
G = np.zeros((N,2));
G[0:N,0:1] = np.ones((N,1));
G[0:N,1:2] = x;

# least squares solution
GTG = np.matmul(G.T,G);
mest = la.solve( GTG, np.matmul(G.T, dobs)  );

# eda04_10: variance calculation
dpre = np.matmul(G, mest); # predicted data
e = dobs - dpre;           # error
E = np.matmul(e.T,e); E=E[0,0];    # total error
sigmad2 = E/(N-M);         # posterior variance of data
Cm = sigmad2*la.inv(GTG);  # variance of model parameters
sigmam1 = sqrt( Cm[0,0] ); # sqrt variance of model parameter 1
sigmam2 = sqrt( Cm[1,1] ); # sqrt variance of model parameter 2

# print model parameters and theoir 95% confidence intervals
print("Total error: %.2f" % E);
str1 = "intercept %.2f +/- %.2f (%%95)" % (mest[0,0],2*sigmam1);
str2 = "slope     %.2f +/- %.2f (%%95)" % (mest[1,0],2*sigmam2);
print(str1);
print(str2);

#  plot data
fig2 = plt.figure(2,figsize=(8,6));
ax1 = plt.subplot(2,1,1);
plt.axis([-6, 6, -15, 15])
plt.plot(x,dobs,'ko');
plt.plot(x,dpre,'k-');
for i in range(N):
    plt.plot([x[i,0],x[i,0]], [dobs[i,0],dpre[i,0]],'k-');
plt.xlabel('x');
plt.ylabel('d');
titlestr = "intercept %.2f slope %.2f" % (mest[0,0], mest[1,0]);
plt.title(titlestr);

#  plot error, with different vertical scale
ax2 = plt.subplot(2,1,2);
plt.axis([-6, 6, -5, 5]);
plt.plot(x,e,'ko');
plt.plot([-6,6],[0,0],'k-');
for i in range(N):
    plt.plot([x[i,0],x[i,0]], [0,e[i,0]],'k:');
plt.xlabel('x');
plt.ylabel('e');
plt.show();

Total error: 23.46
intercept 0.94 +/- 0.97 (%95)
slope     2.02 +/- 0.31 (%95)

# edapy_04_11: trend of Black Rock Forest temperature data

# load data from file
D = np.genfromtxt('../data/brf_temp.txt', delimiter='\t')
[Nraw, K]=np.shape(D);
traw = eda_cvec( D[:,0] );  # time (auxiliary variable)
draw = eda_cvec( D[:,1] );  # temperature (data)

# select only good data
# r=where(); k=r[0]; computes an Nx0 array of indices that pass the test
r = np.where( (draw[:,0]!=0)&(draw[:,0]>-40)&(draw[:,0]<38) );
k = r[0];
# select these indices from the raw data
t = traw[k,0:1];
dobs = draw[k,0:1];

(N,temp)=np.shape(dobs); # length of good data

# set up the data kernel
Ty = 365.25;  # number of days in a year
M=4;          # numnber of model parameters
G = np.zeros((N,M)); # initialize data kernel to zero
G[0:N,0:1] = np.ones((N,1)); # constant in time, m0
G[0:N,1:2] = t;              # linear in time, m1
# cosine in time, period of 1 year, m2
G[0:N,2:3] = np.cos(2*pi*t/Ty);
# sine in time, period of 1 year, m3
G[0:N,3:4] = np.sin(2*pi*t/Ty);

# simple least squares solution
GTG = np.matmul(G.T,G);
mest = la.solve( GTG, np.matmul(G.T, dobs)  );
slope = Ty*mest[1,0]; # slope in deg C per year

# variance calculation
dpre = np.matmul(G, mest); # predicted data
e = dobs - dpre;           # error
E = np.matmul(e.T,e); E=E[0,0];     # total error
sigmad2A = 0.01**2;        # prior error, 0.1 deg C
sigmad2B = E/(N-M);        # posterior error , from fit
GTGI = la.inv(GTG);        # inverse of GTG
CmA = sigmad2A*GTGI;       # covariance of m using prior variance of data
CmB = sigmad2B*GTGI;       # covariance of m using posterior variance of data
errslopeA = Ty*sqrt(CmA[1,1]); # error in slope (prior case), with units of deg C per year
errslopeB = Ty*sqrt(CmB[1,1]); # error in slope (prior case), with units of deg C per year

# convert to per year
slopestr = "using prior     error: slope %e +/- %e deg C per year (95%%)" % (slope, 2*errslopeA);
print(slopestr);
slopestr = "using posterior error: slope %e +/- %e deg C per year (95%%)" % (slope, 2*errslopeB);
print(slopestr);

#  plot data
fig1 = plt.figure(1,figsize=(16,6));
ax1 = plt.subplot(3,1,1);
plt.axis([t[0,0], t[N-1,0], -40, 40])
plt.plot(t,dobs,'k.');
plt.xlabel('time (days)');
plt.ylabel('temperature (C)');
ax2 = plt.subplot(3,1,2);
plt.axis([t[0,0], t[N-1,0], -40, 40])
plt.plot(t,dpre,'k-');
plt.xlabel('time (days)');
plt.ylabel('temperature (C)');
ax3 = plt.subplot(3,1,3);
plt.axis([t[0,0], t[N-1,0], -40, 40])
plt.plot(t,e,'k-');
plt.xlabel('time (days)');
plt.ylabel('error (C)');

using prior     error: slope -3.154468e-02 +/- 1.626964e-05 deg C per year (95%)
using posterior error: slope -3.154468e-02 +/- 9.143652e-03 deg C per year (95%)