[mlpack-svn] r15384 - mlpack/conf/jenkins-conf/benchmark/methods/matlab

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Jul 2 17:37:48 EDT 2013


Author: marcus
Date: Tue Jul  2 17:37:48 2013
New Revision: 15384

Log:
Add Principal Components Analysis matlab method.

Added:
   mlpack/conf/jenkins-conf/benchmark/methods/matlab/
   mlpack/conf/jenkins-conf/benchmark/methods/matlab/pca.m

Added: mlpack/conf/jenkins-conf/benchmark/methods/matlab/pca.m
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/matlab/pca.m	Tue Jul  2 17:37:48 2013
@@ -0,0 +1,70 @@
+function pca(cmd)
+% This program performs principal components analysis on the given dataset.
+% It will transform the data onto its principal components, optionally 
+% performing dimensionality reduction by ignoring the principal components 
+% with the smallest eigenvalues.
+%
+% Required options:
+%     (-i) [string]    Input dataset to perform PCA on.
+% Options:
+% (-d) [int]           Desired dimensionality of output dataset. If this 
+%                      option not set no dimensionality reduction is 
+%                      performed. Default value 0.
+% (-s)                 If set, the data will be scaled before running PCA,
+%                      such that the variance of each feature is 1.
+
+
+inputFile = regexp(cmd, '.*?-i ([^\s]+)', 'tokens', 'once');
+
+% Load input dataset.
+loading_data = tic;
+total_time = tic;
+X = csvread(inputFile{:});
+disp(sprintf('[INFO ]   loading_data: %fs', toc(loading_data)))
+
+% Find out what dimension we want.
+k = str2double(regexp(cmd,'.* -d.* (\d+)','tokens','once'));
+% Validate the parameter.
+if k > 0
+    if k > size(X, 2)
+        msg = [...
+            '[Fatal] New dimensionality (%i) cannot be greater than'...
+            'existing dimensionality (%i)!'...
+            ];
+        disp(sprintf(msg, k, size(X, 2)))
+        return
+    end
+end
+
+% Retrieve the dimensions of X.
+[m, n] = size(X);
+
+% Get the options for running PCA.
+if strfind(cmd, '-s') > 0
+    % The princomp function centers X by subtracting off column means, but 
+    % the function doesn't rescale the columns of X. So we have to rescale
+    % before princomp. If X is m-by-n with m > n, then compute only the 
+    % first n columns.
+    if (m <= n)
+        [~, score] = princomp(zscore(X));
+    else
+        [~, score] = princomp(zscore(X), 'econ');
+    end        
+else
+    % Performs principal components analysis on the dataset X. If X is 
+    % m-by-n with m > n, then compute only the first n columns.
+    if (m <= n)
+        [~, score] = princomp(X);
+    else
+        [~, score] = princomp(X, 'econ');
+    end        
+end
+
+% Reduced data dimension.
+if k > 0
+   score = score(:,1:k);
+end
+
+disp(sprintf('[INFO ]   total_time: %fs', toc(total_time)))
+
+end
\ No newline at end of file



More information about the mlpack-svn mailing list