function [EOFs,PCs,Var,Xrecon]=principal_component_analysis(X,neof) % function [EOFs,PCs,Var]=principal_component_analysis(X,neof) % Function to do a principal component analysis of % data matrix X. % Input: % X: (t,x) each row corrsponds to a sample, each column % is a variable. (Each column is a time series of a % variable.) % neof: number of EOF/PC to return % Output: % EOFs: (x,e) matrix with EOFs (loadings) in the columns % PCs: (t,e) matrix with principal components (scores) in the columns % Var: variance of each principal component % Xrecon: (t,x) reconstructed X (WITHOUT adding back the mean) % To reconstruct: Xrecon = PCs*EOFs' % Notes: (1) This routine will subtract off the mean of each % variable (column) before performing PCA. % (2) sum(var(X)) = sum(Var) = sum(diag(S)^2/(m-1)) % Samar Khatiwala (spk@ldeo.columbia.edu) if strcmp(class(X),'single') disp('WARNING: Converting input matrix X to class DOUBLE') end % Center X by subtracting off column means [m,n] = size(X); X = X - repmat(mean(X,1),m,1); r = min(m-1,n); % max possible rank of X % SVD if nargin < 2 [U,S,EOFs]=svds(X,r); else [U,S,EOFs]=svds(X,min(r,neof)); end % EOFs: (x,e) % U: (t,e) % Determine the EOF coefficients PCs=U*S; % PCs=X*EOFs (t,e) % compute variance of each PC Var=diag(S).^2/(m-1); % Note: X = U*S*EOFs' % EOFs are eigenvectors of X'*X = (m-1)*cov(X) % sig^2 (=diag(S)^2) are eigenvalues of X'*X % So tr(X'*X) = sum(sig_i^2) = (m-1)*(total variance of X) if nargout>3 Xrecon = PCs*EOFs'; % (t,x) end