function [REG,ptr,ptd] = do_cca_training(ptr,ptd,Ttrain,redoPtdPCA,redoPtrPCA) % Function to perform CCA. % USAGE: [REG,ptr,ptd] = do_cca_training(ptr,ptd,Ttrain,redoPtdPCA,redoPtrPCA) % Inputs: (see example below for details) % ptr: structure containing information about predictor variables % ptd: structure containing information about predictand variables % Ttrain: times for which data is used for training % redoPtdPCA: vector of flags to indicate whether to recompute PCA of predictor variables % redoPtrPCA: vector of flags to indicate whether to recompute PCA of predictand variables % Outputs: % REG: structure containing information about CCA % REG.A,REG.B: are matrices of canonical coefficients % REG.regstats: structure of regression statistics of the canonical variables % ptr: input ptr strcture with additional information added % ptr.eofs,ptr.stdev,ptr.clim: EOFs, std dev, climatalogy of input data % ptr.meof_eofs: EOFs of multivariate model state of predictor variables % ptr.X: PCs of multivariate model state % ptd: input ptd strcture with additional information added % ptd.eofs,ptd.stdev,ptd.clim: EOFs, std dev, climatalogy of input data % % Notes: % - this function was originally written to work with data from the INC model. % For backward compatibility, this function will attempt to read a parameter % file to determine whether to recompute PCA's. To avoid this, ALWAYS call this % function with at least 4 arguments. If the redoPtdPCA argument is passed, this % file will not be read. % Samar Khatiwala (spk@ldeo.columbia.edu) % e.g., % ptr.vars={'u1','v1','ht','sst','echam_taux','echam_tauy','qnet'}; % ptd.vars={'qflux'}; % % % Data files % % Predictors % ptr.datafiles{1}='../ocean_clim_u1mm.bin'; % ptr.datafiles{2}='../ocean_clim_v1mm.bin'; % ptr.datafiles{3}='../ocean_clim_htmm.bin'; % ptr.datafiles{4}='ICM_Data/InputData/PacificDomain_ECHAM/echam_sst.bin'; % ptr.datafiles{5}='ICM_Data/InputData/PacificDomain_ECHAM/echam_zonal_wind_stress.bin'; % ptr.datafiles{6}='ICM_Data/InputData/PacificDomain_ECHAM/echam_meridional_wind_stress.bin'; % ptr.datafiles{7}='ICM_Data/InputData/PacificDomain_ECHAM/echam_qnet.bin'; % % Predictands % ptd.datafiles{1}='../qflux_mm.bin'; % % ptr_numvars=length(ptr.vars); % ptd_numvars=length(ptd.vars); % redoPtrPCA=zeros(ptr_numvars,1); % redoPtdPCA=zeros(ptd_numvars,1); % for iv=1:ptr_numvars % ptr.pcafiles{iv}=[ptr.vars{iv} '_pca']; % file name for PCA % % [EOF,PC,SD,ZPC,clim]=get_inc_eofs(ptr.vars{iv},ptr.datafiles{iv},ptr.Tdata{iv},Tpca,100); % % save(ptr.pcafiles{iv},'EOF','PC','SD','ZPC','clim','Tpca') % end % for iv=1:ptd_numvars % ptd.pcafiles{iv}=[ptd.vars{iv} '_pca']; % file name for PCA % % [EOF,PC,SD,ZPC,clim]=get_inc_eofs(ptd.vars{iv},ptd.datafiles{iv},ptd.Tdata{iv},Tpca,100); % % save(ptd.pcafiles{iv},'EOF','PC','SD','ZPC','clim','Tpca') % end % % Data time % % Predictors % for iv=1:3 % ptr.Tdata{iv}=Trun; % u1,v1,ht % end % for iv=4:7 % ptr.Tdata{iv}=Trundata; % sst,echam_taux,echam_tauy,qnet % end % % Predictands % ptd.Tdata{1}=Trun; % qflux % % ptr.neofsv=zeros(ptr_numvars,1); % ptr.neofsv(:)=[75 75 80 85 80 70 65]; % ptd.neofsv=zeros(ptd_numvars,1); % ptd.neofsv(:)=[30]; % ptr.meof_neofs=30; if ptr.meof_neofs>sum(ptr.neofsv) error('ERROR: Number of MEOFs is greater than dimension of multivariate model state') end if any(ptd.neofsv>ptr.meof_neofs) error('ERROR: Number of predictand variables exceeds number of predictor variables') end ptr_numvars=length(ptr.vars); ptr_dim=sum(ptr.neofsv); ptd_numvars=length(ptd.vars); if nargin<4 % determine whether to redo PCA % Time interval of data % for backward compatibility load params Trun Trundata if length(Ttrain)~=length(Trun) redoPCA=1; else if any(Ttrain~=Trun) redoPCA=1; else redoPCA=0; end end redoPtrPCA=repmat(redoPCA,[ptr_numvars 1]); end if nargin<5 redoPtdPCA=repmat(redoPtrPCA(1),[ptd_numvars 1]); end % Predictor disp('Assembling multivariate model state') % Construct model state composed of standarized PCs of different % variables MZpc=[]; for iv=1:ptr_numvars neofs=ptr.neofsv(iv); % disp(['Computing PCA for variable ' ptr.vars{iv}]) if redoPtrPCA(iv) % only some data is being used for training. recompute PCA disp(['DO_CCA_TRAINING: recomputing PCA for variable ' ptr.vars{iv}]) varFile=ptr.datafiles{iv}; Tdata=ptr.Tdata{iv}; [ptr.eofs{iv},PC,ptr.stdev{iv},ZPCv,ptr.clim{iv}]=get_inc_eofs(ptr.vars{iv},varFile,Tdata,Ttrain,neofs); else % all data being used for training. load precomputed PCA disp(['DO_CCA_TRAINING: loading precomputed PCA for variable ' ptr.vars{iv}]) if isfield(ptr,'pcafiles') if ~isempty(ptr.pcafiles{iv}) varFile=ptr.pcafiles{iv}; else varFile=[ptr.vars{iv} '_pca']; end else varFile=[ptr.vars{iv} '_pca']; end S=load(varFile,'EOF','SD','ZPC','clim'); ptr.eofs{iv}=S.EOF(:,:,1:neofs); ptr.stdev{iv}=S.SD(1:neofs); ZPCv=S.ZPC(:,1:neofs); ptr.clim{iv}=S.clim; end MZpc=[MZpc ZPCv]; % (t,e) end % project onto MEOFs if ptr_numvars==1 ptr.meof_eofs=[]; ptr.X=MZpc(:,1:ptr.meof_neofs); else % PCA of multivariate model state (MZpc) % MEOF contains the truncated number of EOFs (ptr.meof_neofs) of the % multivariate model state, X contains the corresponding PCs (t,e) % and defines the predictor variable for the subsequent CCA. % neofs=sum(neofsv); % size(Zpc,2) [ptr.meof_eofs,ptr.X]=principal_component_analysis(MZpc,ptr.meof_neofs); end % CCA training disp('Training CCA') for iv=1:ptd_numvars neofs=ptd.neofsv(iv); % disp(['Computing PCA for variable ' ptd.vars{iv}]) if redoPtdPCA(iv) % only some data is being used for training. recompute PCA disp(['DO_CCA_TRAINING: recomputing PCA for variable ' ptd.vars{iv}]) varFile=ptd.datafiles{iv}; Tdata=ptd.Tdata{iv}; [ptd.eofs{iv},PC,ptd.stdev{iv},ZPCv,ptd.clim{iv}]=get_inc_eofs(ptd.vars{iv},varFile,Tdata,Ttrain,neofs); else % all data being used for training. load precomputed PCA disp(['DO_CCA_TRAINING: loading precomputed PCA for variable ' ptd.vars{iv}]) if isfield(ptd,'pcafiles') if ~isempty(ptd.pcafiles{iv}) varFile=ptd.pcafiles{iv}; else varFile=[ptd.vars{iv} '_pca']; end else varFile=[ptd.vars{iv} '_pca']; end S=load(varFile,'EOF','SD','ZPC','clim'); ptd.eofs{iv}=S.EOF(:,:,1:neofs); ptd.stdev{iv}=S.SD(1:neofs); ZPCv=S.ZPC(:,1:neofs); ptd.clim{iv}=S.clim; end ptd.Y{iv}=ZPCv; % CCA % A and B contain the canonical coefficients (CCA modes) % U and V contain the canonical variables (projection coefficients) % size(A,1) = ptr.meof_neofs = dimension of the reduced multivariate model state (predictor) % size(B,1) = ptd.neofs{iv} = dimension of predictand PCA space % size(A,2) = size(B,2) = number of CCA modes = min(rank(X),rank(Y)). % (= ptd.neofs{iv}) % disp('Performing CCA') [REG.A{iv},REG.B{iv},r,U,V] = canoncorr(ptr.X,ptd.Y{iv}); % Regress V(:,ic) against U(:,ic) nc=size(REG.A{iv},2); for ic=1:nc REG.regstats{ic,iv}=regstats(V(:,ic),U(:,ic)); end end