function [ensemble,debug] = meboot(x,reps,lb,ub,scale,clt) % MEBOOT Maximum Entropy Bootstrapping % % [ensemble] = meboot(x,reps,lb,ub,scale,clt); % % Implements the Maximum Entropy Bootstrap (MEBOOT) for time series % based on the R code from Vinod and Lopez-de-Lacalle (2009). % % Input: % x = data vector (time series) % reps = number of replications for the bootstrap [999] % lb = lower bound for maximum entropy distribution; can be & default to [] % ub = upper bound for maximum entropy distribution; can be & defaults to [] % scale = switch [0/[1]] to scale the ensemble to the standard deviation of the data % clt = switch [0/[1]] to apply the Central Limit Theorm to the ensemble % % Output: % ensemble = bootstrapped sample ensemble [size(x,1) by reps] % debug = intermediate variables (optional) % % References: % Vinod, H. D. and J. Lopez de Lacalle, Maximum entropy bootstrap for % time series: The meboot R package, Journal of Statistical Software % 29, 5, 1–19, 2009 % Vinod, H. D. Maximum entropy ensembles for time series inference in economics, % Journal of Asian Economics 17, 6, 955–978, 2006. % % MATLAB function written by Kevin Anchukaitis and Benjamin Cook following the R Project code % for meboot & the algorithm as described in Vinod and Lopez-de-Lacalle (2009). % Version History: % v0.1 bootstrap code test and verified again V+LdL toy example [KJA] % v0.2 added lower/upper bounding code for physical conditions [KJA] %% Setup % parse inputs, recommend scale and clt set to 1 to match V+LdL if nargin==1; reps = 999; lb = []; ub = []; clt = 1; scale = 1; end; if nargin==4; clt = 1; scale = 1; end % trimmed mean setting, hard coded for now trim = 10; % get a column vector and size x = x(:); n = size(x,1); %% Sort and the original data and index in increasing order [xx,ordxx] = sort(x,1,'ascend'); %% Compute intermediate points on the sorted series. z = mean(embed(xx,2),2); debug.z = z; %% Compute lower limit for left tail (xmin) and upper limit for right (xmax) dv = abs(diff(x)); dvtrim = trimmean(dv,trim); if isempty(lb) xmin = xx(1) - dvtrim; else xmin = lb; end if isempty(ub) xmax = xx(n) + dvtrim; else xmax = ub; end %% Compute the mean of the maximum entropy density m(1) = 0.75*xx(1) + 0.25*xx(2); % for the lowest interval for k=2:length(xx)-1 m(k) = 0.25*xx(k-1) + 0.50*xx(k) + 0.25*xx(k+1); end m(length(xx)) = 0.25*xx(length(xx)-1) + 0.75*xx(length(xx)); % for the highest debug.m = m; %% Generate random numbers from the [0,1] uniform interval and %% compute sample quantiles at those points. % Generate random numbers from the [0,1] uniform interval. ensemble = meboot0(n,z,xmin,xmax,m,reps); qseq = sort(ensemble); % why resort again? %% Apply the order according to 'ordxx' as defined previously ensemble(ordxx,:) = qseq; %% [potentially optional] scale and force to follow Central Limit Theorm debug.ensemble1 = ensemble; % prior to variance adjustment and ensemble mean CLT if scale == 1 ensemble = expand(x,ensemble); end debug.ensemble2 = ensemble; % after variance adjustment if clt == 1 ensemble = force(x,ensemble); end %%% SUBFUNCTIONS %%% function ensemble = meboot0(n,z,xmin,xmax,m,reps) for irep = 1:reps % Generate random numbers from the [0,1] uniform interval p = rand(n,1); % Compute sample quantiles by linear interpolation at % ... those 'p'-s (if any) ... q = NaN(n,1); j = 0; i1 = 1.0; for ii1=1:n-2 ref23 = find(p > i1/n & p <= (i1+1)/n); if ~isempty(ref23) for i=1:length(ref23) qq(1) = z(ii1) + ( (z(ii1+1) - z(ii1)) / ((i1+1)/n - i1/n) ) * (p(ref23(i)) - i1/n); q(ref23(i)) = qq(1) + m(ii1) - 0.5 * (z(ii1) + z(ii1+1)); end end i1 = i1+1; end % ... 'p'-s within the [0, (1/n)] interval (Left tail) ref1 = find(p <= (1/n)); if(length(ref1) > 0) qq = interp1([0 1/n], [xmin z(1)], p(ref1)); q(ref1) = qq; if q(ref1) ~= 0 q(ref1) = qq + m(1)- 0.5 *(z(1)+xmin); end end % ... 'p'-s equal to (n-1)/n ref4 = find(p == ((n-1)/n)); if(length(ref4) > 0) q(ref4) = z(n-1); end % ... 'p'-s greater than (n-1)/n (Right tail) ref5 = find(p > ((n-1)/n)); if(length(ref5) > 0) % Right tail proportion qq = interp1([(n-1)/n 1],[z(n-1) xmax], p(ref5)); q(ref5) = qq; % this shifts xmax if q(ref1) ~= 1 q(ref5) = qq + m(n) - 0.5 *(z(n-1)+xmax); end end ensemble(:,irep) = q; end function [z] = embed(x,dim) % EMBED Ruelle-Takens embedding for i=1:size(x,1)+1-dim z(i,1:dim) = x(i+dim-1:-1:i); end function ensemble = expand(x,ensemble) % EXPAND Rescale the standard deviations of the ensemble to match the data D = sqrt(var(x)); [Z,M,sigma] = zscore(ensemble); ensemble = Z .* repmat(D', length(x), size(ensemble,2)); ensemble = ensemble + repmat(M, length(x), 1); function ensemble = force(x,ensemble) % FORCE Force the whole ensemble to follow Central Limit Theorem [n,bigj] = size(ensemble); xbar = mean(ensemble); [sortxbar,oo] = sort(xbar); % now spread out the means as if they were from normal density xcdf = cdfcalc((1:bigj)/(bigj+1))'; xcdf(1) = []; newbar = mean(x) + xcdf * std(x)/sqrt(bigj); % apply grand mean and standard deviation to ensemble newm = zscore(newbar) * std(x)/sqrt(bigj) + mean(x); % this forces the mean to be gm and std = s/sqrt(n) meanfix = newm - sortxbar; for iif=1:bigj ensemble(:,oo(iif)) = ensemble(:,oo(iif)) + meanfix(iif); end