[mlpack-svn] r15384 - mlpack/conf/jenkins-conf/benchmark/methods/matlab
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Jul 2 17:37:48 EDT 2013
Author: marcus
Date: Tue Jul 2 17:37:48 2013
New Revision: 15384
Log:
Add Principal Components Analysis matlab method.
Added:
mlpack/conf/jenkins-conf/benchmark/methods/matlab/
mlpack/conf/jenkins-conf/benchmark/methods/matlab/pca.m
Added: mlpack/conf/jenkins-conf/benchmark/methods/matlab/pca.m
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/matlab/pca.m Tue Jul 2 17:37:48 2013
@@ -0,0 +1,70 @@
+function pca(cmd)
+% This program performs principal components analysis on the given dataset.
+% It will transform the data onto its principal components, optionally
+% performing dimensionality reduction by ignoring the principal components
+% with the smallest eigenvalues.
+%
+% Required options:
+% (-i) [string] Input dataset to perform PCA on.
+% Options:
+% (-d) [int] Desired dimensionality of output dataset. If this
+% option not set no dimensionality reduction is
+% performed. Default value 0.
+% (-s) If set, the data will be scaled before running PCA,
+% such that the variance of each feature is 1.
+
+
+inputFile = regexp(cmd, '.*?-i ([^\s]+)', 'tokens', 'once');
+
+% Load input dataset.
+loading_data = tic;
+total_time = tic;
+X = csvread(inputFile{:});
+disp(sprintf('[INFO ] loading_data: %fs', toc(loading_data)))
+
+% Find out what dimension we want.
+k = str2double(regexp(cmd,'.* -d.* (\d+)','tokens','once'));
+% Validate the parameter.
+if k > 0
+ if k > size(X, 2)
+ msg = [...
+ '[Fatal] New dimensionality (%i) cannot be greater than'...
+ 'existing dimensionality (%i)!'...
+ ];
+ disp(sprintf(msg, k, size(X, 2)))
+ return
+ end
+end
+
+% Retrieve the dimensions of X.
+[m, n] = size(X);
+
+% Get the options for running PCA.
+if strfind(cmd, '-s') > 0
+ % The princomp function centers X by subtracting off column means, but
+ % the function doesn't rescale the columns of X. So we have to rescale
+ % before princomp. If X is m-by-n with m > n, then compute only the
+ % first n columns.
+ if (m <= n)
+ [~, score] = princomp(zscore(X));
+ else
+ [~, score] = princomp(zscore(X), 'econ');
+ end
+else
+ % Performs principal components analysis on the dataset X. If X is
+ % m-by-n with m > n, then compute only the first n columns.
+ if (m <= n)
+ [~, score] = princomp(X);
+ else
+ [~, score] = princomp(X, 'econ');
+ end
+end
+
+% Reduced data dimension.
+if k > 0
+ score = score(:,1:k);
+end
+
+disp(sprintf('[INFO ] total_time: %fs', toc(total_time)))
+
+end
\ No newline at end of file
More information about the mlpack-svn
mailing list