[mlpack-svn] r15270 - mlpack/trunk/src/mlpack/methods/pca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jun 20 15:36:35 EDT 2013
Author: rcurtin
Date: 2013-06-20 15:36:35 -0400 (Thu, 20 Jun 2013)
New Revision: 15270
Modified:
mlpack/trunk/src/mlpack/methods/pca/pca.cpp
Log:
Fix the scaling parameter; what was being done before did not make sense.
Modified: mlpack/trunk/src/mlpack/methods/pca/pca.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/pca.cpp 2013-06-20 19:36:21 UTC (rev 15269)
+++ mlpack/trunk/src/mlpack/methods/pca/pca.cpp 2013-06-20 19:36:35 UTC (rev 15270)
@@ -10,8 +10,8 @@
#include <iostream>
using namespace std;
-namespace mlpack {
-namespace pca {
+using namespace mlpack;
+using namespace mlpack::pca;
PCA::PCA(const bool scaleData) :
scaleData(scaleData)
@@ -30,14 +30,28 @@
arma::vec& eigVal,
arma::mat& coeffs) const
{
- // Original transpose op goes here.
+ // Calculate the covariance matrix, given that the data is column-major (this
+ // is why we use ccov() and not cov()).
arma::mat covMat = ccov(data);
- // Centering is built into ccov().
+ // Centering is built into ccov(), so we don't need to worry about it. We
+ // only need to scale the data if the user asked for it.
if (scaleData)
{
- covMat = covMat / (arma::ones<arma::colvec>(covMat.n_rows))
- * stddev(covMat, 0, 0);
+ // Scaling the data is when we reduce the variance of each dimension to 1.
+ // Normally you might do this by dividing each dimension by its standard
+ // deviation, but since we already have the covariance matrix we can
+ // simplify the operation into dividing each element C_ij in the covariance
+ // matrix by the standard deviation of dimension i multiplied by the
+ // standard deviation of dimension j.
+ arma::vec stdDev = sqrt(covMat.diag());
+
+ // If there are any zeroes, make them very small.
+ for (size_t i = 0; i < stdDev.n_elem; ++i)
+ if (stdDev[i] == 0)
+ stdDev[i] = 1e-50;
+
+ covMat /= stdDev * trans(stdDev);
}
arma::eig_sym(eigVal, coeffs, covMat);
@@ -88,6 +102,3 @@
if (newDimension < coeffs.n_rows && newDimension > 0)
data.shed_rows(newDimension, data.n_rows - 1);
}
-
-}; // namespace mlpack
-}; // namespace pca
More information about the mlpack-svn
mailing list