[mlpack-svn] r15486 - mlpack/trunk/src/mlpack/methods/pca

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 17 11:36:37 EDT 2013


Author: rcurtin
Date: Wed Jul 17 11:36:36 2013
New Revision: 15486

Log:
Refactor PCA to use SVD because it is faster.


Modified:
   mlpack/trunk/src/mlpack/methods/pca/pca.cpp

Modified: mlpack/trunk/src/mlpack/methods/pca/pca.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/pca/pca.cpp	(original)
+++ mlpack/trunk/src/mlpack/methods/pca/pca.cpp	Wed Jul 17 11:36:36 2013
@@ -8,6 +8,7 @@
 #include "pca.hpp"
 #include <mlpack/core.hpp>
 #include <iostream>
+#include <complex>
 
 using namespace std;
 using namespace mlpack;
@@ -28,43 +29,49 @@
 void PCA::Apply(const arma::mat& data,
                 arma::mat& transformedData,
                 arma::vec& eigVal,
-                arma::mat& coeffs) const
+                arma::mat& coeff) const
 {
-  // Calculate the covariance matrix, given that the data is column-major (this
-  // is why we use ccov() and not cov()).
-  arma::mat covMat = ccov(data);
+  // This matrix will store the right singular values; we do not need them.
+  arma::mat v;
+
+  // Center the data into a temporary matrix.
+  arma::mat centeredData;
+  math::Center(data, centeredData);
 
-  // Centering is built into ccov(), so we don't need to worry about it.  We
-  // only need to scale the data if the user asked for it.
   if (scaleData)
   {
     // Scaling the data is when we reduce the variance of each dimension to 1.
-    // Normally you might do this by dividing each dimension by its standard
-    // deviation, but since we already have the covariance matrix we can
-    // simplify the operation into dividing each element C_ij in the covariance
-    // matrix by the standard deviation of dimension i multiplied by the
-    // standard deviation of dimension j.
-    arma::vec stdDev = sqrt(covMat.diag());
+    // We do this by dividing each dimension by its standard deviation.
+    arma::vec stdDev = arma::stddev(centeredData, 0, 1 /* for each dimension */);
 
     // If there are any zeroes, make them very small.
     for (size_t i = 0; i < stdDev.n_elem; ++i)
       if (stdDev[i] == 0)
         stdDev[i] = 1e-50;
 
-    covMat /= stdDev * trans(stdDev);
+    centeredData /= arma::repmat(stdDev, 1, centeredData.n_cols);
+  }
+
+  // Do singular value decomposition.  Use the economical singular value
+  // decomposition if the columns are much larger than the rows.
+  if (data.n_rows < data.n_cols)
+  {
+    // Do economical singular value decomposition and compute only the left
+    // singular vectors.
+    arma::svd_econ(coeff, eigVal, v, centeredData, 'l');
+  }
+  else
+  {
+    arma::svd(coeff, eigVal, v, centeredData);
   }
 
-  arma::eig_sym(eigVal, coeffs, covMat);
+  // Now we must square the singular values to get the eigenvalues.
+  // In addition we must divide by the number of points, because the covariance
+  // matrix is X * X' / (N - 1).
+  eigVal %= eigVal / (data.n_cols - 1);
 
-  int nEigVal = eigVal.n_elem;
-  for (int i = 0; i < floor(nEigVal / 2.0); i++)
-    eigVal.swap_rows(i, (nEigVal - 1) - i);
-
-  coeffs = arma::fliplr(coeffs);
-  transformedData = trans(coeffs) * data;
-  arma::colvec transformedDataMean = arma::mean(transformedData, 1);
-  transformedData = transformedData - (transformedDataMean *
-      arma::ones<arma::rowvec>(transformedData.n_cols));
+  // Project the samples to the principals.
+  transformedData = arma::trans(coeff) * centeredData;
 }
 
 /**



More information about the mlpack-svn mailing list