% distribution and histogram
p = exp(-0.5*(d-dbar).^2/(sd^2))/(sqrt(2*pi)*sd);
r=random('Normal',dbar,sd,M,1);
axis( [dmin, dmax, 0, 0.5 ] );
plot(d,p,'r-','LineWidth',3);
axis( [dmin, dmax, 0, hmax ] );
tb = [bins(i)-Db/2, bins(i)-Db/2, bins(i)+Db/2, bins(i)+Db/2]';
th = [0, h(i), h(i), 0]';
plot(tb,th,'b-','LineWidth',3);
% convert histogram to an approximate pdf
% plot dpf and histogram superimposed
axis( [dmin, dmax, 0, 0.5 ] );
tb = [bins(i)-Db/2, bins(i)-Db/2, bins(i)+Db/2, bins(i)+Db/2]';
th = [0, h(i), h(i), 0]';
plot(tb,th,'b-','LineWidth',3);
plot(d,p,'r-','LineWidth',3);
fprintf('(Left) Histogram (blue). (Middle) Probability density function (pdf, red). (Right)\n');
(Left) Histogram (blue). (Middle) Probability density function (pdf, red). (Right)
fprintf('Normalize histogram (blue) as a empirical pdf, with true pdf (red), superimposed\n');
Normalize histogram (blue) as a empirical pdf, with true pdf (red), superimposed
p = exp(-0.5*((d-dbar).^2)/sd^2)/(sqrt(2*pi)*sd);
axis( [dmin, dmax, 0, 0.5 ] );
plot(d,p,'b-','LineWidth',3);
plot([dn,dn], [0.0,pn], 'k-', 'LineWidth', 1);
plot([dn2,dn2], [0.0,pn2], 'k-', 'LineWidth', 1);
plot([0,dn], [pn,pn], 'k:', 'LineWidth', 1);
fprintf('Caption: Normal pdf with a mean of 5 and a variance of (1)^2\n');
Caption: Normal pdf with a mean of 5 and a variance of (1)^2
fprintf('Sliver of probsbility has area p*Dd.\n');
Sliver of probsbility has area p*Dd.
% operations on a probability distributions
p = exp(-0.5*(d-dbar).^2/sigma2) / (sqrt(2*pi)*sigma);
plot(d,p,'k-','LineWidth',3);
fprintf('Caption: Normal pdf with mean 5 and variance (1)^2.\n');
Caption: Normal pdf with mean 5 and variance (1)^2.
fprintf('total probabilty %f\n', Ptotal );
total probabilty 1.000000
plot(d,P,'k-','LineWidth',3);
fprintf('Caption: Cumulative probability distribution\n');
Caption: Cumulative probability distribution
sigma2 = Dd * sum(((d-Ed).^2).*p);
fprintf('mean %f and variance %f\n', Ed, sigma2 );
mean 4.999998 and variance 0.999988
% calculaion of mode and mean for a skewed pdf
% complicated mpn-Normal pdf
p = d .* exp(-0.5*((d-dbar).^2)/sd^2);
p = p + 4*exp(-0.5*((d-dbar).^2)/sd^2);
% maximum liklihood point
axis( [dmin, dmax, 0, 0.5 ] );
plot(d,p,'b-','LineWidth',3);
plot( [dml, dml]', [0, 0.05]', 'r-', 'LineWidth', 2 );
plot( [dbar, dbar]', [0, 0.05]', 'k-', 'LineWidth', 2 );
fprintf('Caption: Skewed pdf (blue), showing maximum likelihood point (red bar)\n');
Caption: Skewed pdf (blue), showing maximum likelihood point (red bar)
fprintf('and mean (black bar)\n');
% two Normal pdfs with different variances
p = exp(-0.5*((d-dbar).^2)/sd^2)/(sqrt(2*pi)*sd);
pB = exp(-0.5*((d-dbarB).^2)/sdB^2)/(sqrt(2*pi)*sdB);
% expected values (means calculated from pdfs)
fprintf('p1: true %.4f calculated %.4f\n', dbar, Ed);
p1: true 5.0000 calculated 5.0000
fprintf('p2: true %.4f calculated %.4f\n', dbarB, EdB);
p2: true 5.0000 calculated 4.9962
fprintf('std dev 1: true: %f estimated: %f\n', sd, sdest);
std dev 1: true: 0.500000 estimated: 0.500000
fprintf('std dev 2: true: %f estimated: %f\n', sdB, sdBest);
std dev 2: true: 1.500000 estimated: 1.492462
axis( [dmin, dmax, 0, 10*top ] );
plot(d,q,'k-','LineWidth',3);
axis( [dmin, dmax, 0, top ] );
plot(d,p,'k-','LineWidth',3);
axis( [dmin, dmax, 0, top ] );
plot(d,q.*p,'k-','LineWidth',3);
axis( [dmin, dmax, 0, 10*top ] );
plot(d,qB,'k-','LineWidth',3);
axis( [dmin, dmax, 0, top ] );
plot(d,pB,'k-','LineWidth',3);
axis( [dmin, dmax, 0, top ] );
plot(d,qB.*pB,'k-','LineWidth',3);
fprintf('Caption: Depiction of the calculation of variacne. (Top row) (Left) Quadratic\n');
Caption: Depiction of the calculation of variacne. (Top row) (Left) Quadratic
fprintf('curve q multiplies pdf p (middle) to produce product q*p (right). The area under\n');
curve q multiplies pdf p (middle) to produce product q*p (right). The area under
fprintf('the product q*p is the variacnce. (Bottom row) Same as top row but for a\n');
the product q*p is the variacnce. (Bottom row) Same as top row but for a
% realizations of these pdfs
Nr = 1000; % number of realizations
dr = random('Normal',dbar,sd,Nr,1); % realizations of p
drB = random('Normal',dbarB,sdB,Nr,1); % realizations of pB
fprintf('p1: True mean %.4f, estimated from ensemble: %.4f\n',dbar,dbarest);
p1: True mean 5.0000, estimated from ensemble: 5.0120
fprintf('p2: True mean %.4f, estimated from ensemble: %.4f\n',dbar,dbarBest);
p2: True mean 5.0000, estimated from ensemble: 4.9526
% sample standard deviation
fprintf('True sigma %.4f, estimated from ensemble: %.4f\n',sd,sigmadest);
True sigma 0.5000, estimated from ensemble: 0.5150
fprintf('True sigma %.4f, estimated from ensemble: %.4f\n',sd,sigmadBest);
True sigma 0.5000, estimated from ensemble: 1.4771
% 2D Normal distribution, uncorrelated
% parameters for 2D pdf p(d1,d2)
norm = 2*pi*sqrt(det(C));
% set elements of p(d2,d2z)
dd = [ dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar ]';
P(i,j) = exp(-0.5*dd'*CI*dd)/norm;
fprintf('Caption: Depiction of an 2D uncorrelated pdf p(d1,d2) (colors)\n');
Caption: Depiction of an 2D uncorrelated pdf p(d1,d2) (colors)
% two-column table of random numbers
Nr = 1000; % number of realizations in the ensemble
% the next statement works only for uncorrelated data
S = [ random('Normal',d1bar,sd1,Nr,1), random('Normal',d2bar,sd2,Nr,1) ];
% but this works fine irrespective of currenaltion
fprintf('Estimates from ensemble\n');
fprintf('true means: %.4f %.4f\n',d1bar,d2bar);
true means: 5.0000 5.0000
fprintf('estimated means: %.4f %.4f\n',dbarest(1),dbarest(2));
estimated means: 4.9620 5.0101
fprintf('true sigmas: %.4f %.4f\n',sd1,sd2);
true sigmas: 1.5000 0.5000
fprintf('estimated sigmas: %.4f %.4f\n',sqrt(Cest(1,1)),sqrt(Cest(2,2)));
estimated sigmas: 1.4523 0.4955
fprintf('true covariance %.4f\n',0.0);
fprintf('estimated covariance %.4f\n',Cest(1,2));
estimated covariance -0.0127
% 2D Normal distribution, uncorrelated, + correlation, - correlation
norm = 2*pi*sqrt(det(C));
dd = [ dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar ]';
P1(i,j) = exp(-0.5*dd'*CI*dd)/norm;
norm = 2*pi*sqrt(det(C));
dd = [ dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar ]';
P2(i,j) = exp(-0.5*dd'*CI*dd)/norm;
norm = 2*pi*sqrt(det(C));
dd = [ dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar ]';
P3(i,j) = exp(-0.5*dd'*CI*dd)/norm;
gda_draw(' ',P1,' ',P2,' ',P3);
fprintf('Caption: Depiction of 2D pdfs (colors). (Left) Uncorrelated. (middle)\n');
Caption: Depiction of 2D pdfs (colors). (Left) Uncorrelated. (middle)
fprintf('Positively correlated. (Right) Negatively correlated\n');
Positively correlated. (Right) Negatively correlated
% 1D uniform p.d.f., p(d)=constant, traansformed to p(m) with m(d)=d^2
% note that m=sqrt(d) and that dm/dd=0.5/sqrt(d)
set(gca,'LineWidth',2,'FontName','Cambria Math','FontAngle','italic');
plot(d,pd,'r-','LineWidth',3);
xlabel('d','FontName','Cambria Math','FontAngle','italic');
ylabel('p(d)','FontName','Cambria Math','FontAngle','italic');
set(gca,'LineWidth',2,'FontName','Cambria Math','FontAngle','italic');
plot(m,pm,'b-','LineWidth',3);
xlabel('d','FontName','Cambria Math','FontAngle','italic');
ylabel('p(m)','FontName','Cambria Math','FontAngle','italic');
fprintf('Caption: (Top) Uniform pdf p(d). (Bottom) Non-uniform pdf p(mp) resulting\n');
Caption: (Top) Uniform pdf p(d). (Bottom) Non-uniform pdf p(mp) resulting
fprintf('from transformation mp=m^2.\n');
from transformation mp=m^2.
% two Normal curves of different variance
p = exp(-0.5*(d-dbar).^2/(sd^2))/(sqrt(2*pi)*sd);
% make realizations of p(d)
rp = random('Normal',dbar,sd,Nr,1);
% sample mean and std dev
fprintf("pa: mean: true %.4f estimated %.4f\n",dbar, dbarest);
pa: mean: true 0.0000 estimated 0.0057
fprintf("pa: stddev: true %.4f estimated %.4f\n",sd, sdest);
pa: stddev: true 1.0000 estimated 0.9947
pb = exp(-0.5*(d-dbarb).^2/(sdb^2))/(sqrt(2*pi)*sdb);
% make realizations of pb(d)
rpb = random('Normal',dbarb,sdb,Nr,1);
% sample mean and std dev
fprintf("pb: mean: true %.4f estimated %.4f\n",dbarb, dbarbest);
pb: mean: true 0.0000 estimated -0.0146
fprintf("pb: stddev: true %.4f estimated %.4f\n",sdb, sdbest);
pb: stddev: true 2.0000 estimated 2.0116
axis( [dmin, dmax, 0, 0.5 ] );
plot(d,p,'r-','LineWidth',3);
plot(d,pb,'b-','LineWidth',3);
plot([dbar,dbar]',[0,0.1]','r-','LineWidth',2);
plot([dbarest,dbarest]',[0,0.1]','r:','LineWidth',2);
plot([dbarb,dbarb]',[0,0.1]','b-','LineWidth',2);
plot([dbarbest,dbarbest]',[0,0.1]','b:','LineWidth',2);
fprintf('Caption: Two normal curves (blue and red curves) of equal mean\n');
Caption: Two normal curves (blue and red curves) of equal mean
fprintf('and unequal variance, along with estimated mean (blue and red bars).\n');
and unequal variance, along with estimated mean (blue and red bars).
% true covariance is zero
% estimated covariance is
fprintf('Covariance of p(d1,d2): true %.4f, estimated %.4f\n', covab, Cest(1,2) );
Covariance of p(d1,d2): true 0.0000, estimated -0.0435
% product of two 2D Normal distributions
norm = 2*pi*sqrt(det(C));
dd = [ dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar ]';
P1(i,j) = exp(-0.5*dd'*CI*dd)/norm;
norm = 2*pi*sqrt(det(C));
dd = [ dmin+Dd*(i-1)-d2bar, dmin+Dd*(j-1)-d2bar ]';
P2(i,j) = exp(-0.5*dd'*CI*dd)/norm;
norm = (Dd^2)*sum(sum(P1P2))
DBAR3 = C3 * (C1I*DBAR1 + C2I*DBAR2);
norm = 2*pi*sqrt(det(C));
dd = [ dmin+Dd*(i-1)-d1bar, dmin+Dd*(j-1)-d2bar ]';
P3(i,j) = exp(-0.5*dd'*CI*dd)/norm;
% plot the analytic one, too, to check results
% gda_draw(' ',P1,' ',P2,' ',P1P2,' ',P3);
gda_draw(' ',P1,' ',P2,' ',P1P2);
fprintf('Caption: (left) Normal pdf. (middle) Another normal pdf. (right) Their\n');
Caption: (left) Normal pdf. (middle) Another normal pdf. (right) Their
fprintf('product, also a Normal pdf\n');
product, also a Normal pdf
% example of a Pierson's chi-squared test
set(gcf,'pos',[10, 10, 600, 250] );
% Part 1: Correct Distribution
fprintf('Part 1: Correct Distribution\n');
Part 1: Correct Distribution
% make some Gaussian random daya
drandom = random('Normal',dbar,sigmad,Ndata,1);
% estimate mean and standard deviation of d's
sigmadest = std(drandom);
fprintf('mean: true %f estimated %f\n', dbar, dbarest);
mean: true 5.000000 estimated 4.896683
fprintf('sigma: true %f estimated %f\n', sigmad, sigmadest);
sigma: true 1.000000 estimated 0.925331
% make histogram. The bins are from d +/- Dd/2.
Dd = (dmax-dmin)/(Nbin-1);
dhist = hist( drandom, d);
% theoretical distribution
pdtrue = normcdf(d+Dd/2,dbarest,sigmadest)-normcdf(d-Dd/2,dbarest,sigmadest);
plot(d,pdest,'r-','LineWidth',3);
plot(d,pdtrue,'b-','LineWidth',3);
% compute chi squared statistic
x2est = Ndata*sum( ((pdest-pdtrue).^2) ./ pdtrue );
% compute P( x2 >= x2est ) = 1 - P(x2<x2est);
P = 1-chi2cdf( x2est, K );
fprintf('K %d chi-squared-est %f P(x2>=x2est) %f\n', K, x2est, P );
K 37 chi-squared-est 17.024597 P(x2>=x2est) 0.997983
% Part 2: Incorrect Distribution
fprintf('Part 2: Incorrect Distribution\n');
Part 2: Incorrect Distribution
% estimate mean and standard deviation of d's
% then make them incorrect
dbarest = mean(drandom)-0.5;
sigmadest = std(drandom)*1.5;
fprintf('mean: true %f estimated %f\n', dbar, dbarest);
mean: true 5.000000 estimated 4.396683
fprintf('sigma: true %f estimated %f\n', sigmad, sigmadest);
sigma: true 1.000000 estimated 1.387997
% incorrecr theoretical distribution
pdtrue = normcdf(d+Dd/2,dbarest,sigmadest)-normcdf(d-Dd/2,dbarest,sigmadest);
plot(d,pdest,'r-','LineWidth',3);
plot(d,pdtrue,'b-','LineWidth',3);
fprintf('Caption: (Left) Histogram (red) and pdf (blue) that fits it well.\n');
Caption: (Left) Histogram (red) and pdf (blue) that fits it well.
fprintf('Same histogram (red) and pdf (blue) that fits it poorly.\n');
Same histogram (red) and pdf (blue) that fits it poorly.
% compute chi squared statistic
x2est = Ndata*sum( ((pdest-pdtrue).^2) ./ pdtrue );
% compute P( x2 >= x2est ) = 1 - P(x2<x2est);
P = 1-chi2cdf( x2est, K );
fprintf('K %d chi-squared-est %f P(x2>=x2est) %f\n', K, x2est, P );
K 37 chi-squared-est 76.878804 P(x2>=x2est) 0.000130
% Illustrate a computing conditional distributions
% from a joint distribution.
% set up vectors da and d2
% make a normal distribution
p1=exp(-((d1-d1bar).^2)/(2*s1*s1));
p2=exp(-((d2-d2bar).^2)/(2*s2*s2));
% sum along columns, which integrates P along d2 to get p1=p(d1)
% sum along rows, which integrates P along d1 to get p2=p(d2)
% conditional distribution P1g2 = P(d1|d2) = P(d1,d2)/p2
P1g2 = P ./ (ones(L,1)*p2');
% conditional distribution P2g1 = P(d2|d1) = P(d1,d2)/p1
P2g1 = P ./ (p1*ones(L,1)');
% use simple drawing function that encapsulates all the graphics
gda_draw(P,'caption p(d1,d2)',P1g2,'caption p(d1|d2)',P2g1,'caption p(d2|d1)');
fprintf('Caption: (Left) 2D pdf p(d1,d2). (Middle) Corresponding conditional pdf p(d1|d2).\n');
Caption: (Left) 2D pdf p(d1,d2). (Middle) Corresponding conditional pdf p(d1|d2).
fprintf('(Right) Corresponding conditional pdf p(d2|d1).\n');
(Right) Corresponding conditional pdf p(d2|d1).
% Realization of Normally-distributed data, calculated several ways
% generate Normal random numbers with these mean and variance
m = random('Normal',mbar,sigmam,Nr,1);
mnormal1 = m; % save for future use
% by transformation of a uniform distribution
d = random('unif',0,1,Nr,1);
m = norminv(d,mbar,sigmam);
mnormal2 = m; % save for future use
Dm = (mmax-mmin)/(Nbins-1);
m = mmin+Dm*[0:Nbins-2]'+Dm/2;
p = normpdf(m,mbar,sigmam);
axis( [mmin, mmax, 0, 0.5] );
plot( m, h1, 'r-', 'LineWidth', 3 );
plot( m, h2, 'b-', 'LineWidth', 3 );
plot( m, p, 'k-', 'LineWidth', 3 );
fprintf('Caption: True Normal pdf (black), and two empirical pdfs (red and blue)\n');
Caption: True Normal pdf (black), and two empirical pdfs (red and blue)
fprintf('computed by from ensembles computed two different ways, using Normal random\n');
computed by from ensembles computed two different ways, using Normal random
fprintf('numbers (red) and by transforming uniform random numbers.\n');
numbers (red) and by transforming uniform random numbers.
% create realizations of an exponential p.d.f. in two ways,
% transformation of a uniform distribution, and the Metropolis
% algoritm. In this example, the p.d.f. is
% p(d) = c*exp(-d)/c); for d>0
% evaluate exponential distribution
pexp = (0.5/c)*exp(-abs(d)/c);
% the usual transformation rule is p(d) = p(m(d)) |dm/dd|
% suppose that p(m) is uniform over m=(-1,1) with amplitude 0.5
% handle the absolute value sign by breaking into two parts,
% (1/c)*exp(-d/c)=(+/-)dm/dd. Choose the + sign, in order
% to map m=0 with d=0 and m=1 to d=infinity. Then
% m =(integral)(1/c)*exp(-d/c)dd+constant. Choose constant=1
% so m=1-exp(-d/c). Inverting gives d=-c*ln((1-m))
% similar calculation gives d=-c*ln((1+m))
% so overall d=-sgn(m)*c*ln((1-abs(m)))
% transform realizations of a uniform distribution to p(d)
rm=random('Uniform',-1,1,M,1);
rd=-sign(rm).*c.*log((1-abs(rm)));
% convert histogram to a p.d.f.
set(gcf,'pos',[10, 10, 600, 250]);
% plot histogram of transformed realizations and true pdf
axis( [dmin, dmax, 0, 0.3 ] );
tb = [bins(i)-Db/2, bins(i)-Db/2, bins(i)+Db/2, bins(i)+Db/2]';
th = [0, h(i), h(i), 0]';
plot(tb,th,'b-','LineWidth',3);
plot(d,pexp,'r-','LineWidth',3);
Nburn=1000; % delete this number of members at the start of the ensemble
logprd(1) = logfactor-abs(rd(1))/c;
rdn = random('Normal',rdo,s);
%prdn = (0.5/c)*exp(-abs(rdn)/c);
logprdn = logfactor-abs(rdn)/c;
% test parameter, ratio of probabilities
gamma = logprdn - logprdo;
r = random('Uniform',0,1);
rd = rd(Nburn+1:Niter,1);
logprd = logprd(Nburn+1:Niter,1);
% histogram, converted to a p.d.f.
% plot histogram of transformed realizations and true pdf
axis( [dmin, dmax, 0, 0.3 ] );
tb = [bins(i)-Db/2, bins(i)-Db/2, bins(i)+Db/2, bins(i)+Db/2]';
th = [0, h(i), h(i), 0]';
plot(tb,th,'b-','LineWidth',3);
plot(d,pexp,'r-','LineWidth',3);
fprintf('(Left) Histogram (blue) of ensemble of realizations of a two-sided exponential pdf,\n');
(Left) Histogram (blue) of ensemble of realizations of a two-sided exponential pdf,
fprintf('for ensemble created by transforming uniformly-distributed random numbers.\n');
for ensemble created by transforming uniformly-distributed random numbers.
fprintf('(Right) Histogram (blue) of ensemble of realizations of a two-sided exponential pdf,\n');
(Right) Histogram (blue) of ensemble of realizations of a two-sided exponential pdf,
fprintf('for ensemble created via the Metropolis-Hastings algorithm.\n');
for ensemble created via the Metropolis-Hastings algorithm.