[mlpack-git] master: Add randomized SVD method. (cfbd604)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jul 6 15:30:09 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/98babfc774bce91170df994763b670b9abd20917...e7b9b042d1d6e2d9895d5fa141e9c135b2d2ea57

>---------------------------------------------------------------

commit cfbd604a428ddd0813ada3dba1fc33d751c7a13a
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Wed Jul 6 00:59:00 2016 +0200

    Add randomized SVD method.


>---------------------------------------------------------------

cfbd604a428ddd0813ada3dba1fc33d751c7a13a
 src/mlpack/methods/CMakeLists.txt                  |   1 +
 .../{quic_svd => randomized_svd}/CMakeLists.txt    |   4 +-
 .../methods/randomized_svd/randomized_svd.cpp      | 128 ++++++++++++++++++++
 .../methods/randomized_svd/randomized_svd.hpp      | 133 +++++++++++++++++++++
 4 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 5734d5c..39aa62f 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -46,6 +46,7 @@ set(DIRS
   perceptron
   quic_svd
   radical
+  randomized_svd
   range_search
   rann
   rmva
diff --git a/src/mlpack/methods/quic_svd/CMakeLists.txt b/src/mlpack/methods/randomized_svd/CMakeLists.txt
similarity index 90%
copy from src/mlpack/methods/quic_svd/CMakeLists.txt
copy to src/mlpack/methods/randomized_svd/CMakeLists.txt
index ae7a36a..ad0e5b3 100644
--- a/src/mlpack/methods/quic_svd/CMakeLists.txt
+++ b/src/mlpack/methods/randomized_svd/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Define the files we need to compile.
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
-  quic_svd.hpp
-  quic_svd_impl.hpp
+  randomized_svd.hpp
+  randomized_svd.cpp
 )
 
 # Add directory name to sources.
diff --git a/src/mlpack/methods/randomized_svd/randomized_svd.cpp b/src/mlpack/methods/randomized_svd/randomized_svd.cpp
new file mode 100644
index 0000000..64d62ee
--- /dev/null
+++ b/src/mlpack/methods/randomized_svd/randomized_svd.cpp
@@ -0,0 +1,128 @@
+/**
+ * @file randomized_svd.cpp
+ * @author Marcus Edel
+ *
+ * Implementation of the randomized SVD method.
+ */
+
+#include "randomized_svd.hpp"
+
+namespace mlpack {
+namespace svd {
+
+RandomizedSVD::RandomizedSVD(const arma::mat& data,
+                             arma::mat& u,
+                             arma::vec& s,
+                             arma::mat& v,
+                             const size_t iteratedPower,
+                             const size_t maxIterations,
+                             const size_t rank) :
+    iteratedPower(iteratedPower),
+    maxIterations(maxIterations)
+{
+  if (rank == 0)
+  {
+    Apply(data, u, s, v, data.n_rows);
+  }
+  else
+  {
+    Apply(data, u, s, v, rank);
+  }
+}
+
+RandomizedSVD::RandomizedSVD(const size_t iteratedPower,
+                             const size_t maxIterations) :
+    iteratedPower(iteratedPower),
+    maxIterations(maxIterations)
+{
+  /* Nothing to do here */
+}
+
+void RandomizedSVD::Apply(const arma::mat& data,
+                          arma::mat& u,
+                          arma::vec& s,
+                          arma::mat& v,
+                          const size_t rank)
+{
+  if (iteratedPower == 0)
+    iteratedPower = rank + 2;
+
+  // Center the data into a temporary matrix.
+  arma::vec rowMean = arma::sum(data, 1) / data.n_cols;
+
+  arma::mat R, Q, Qdata;
+  ann::RandomInitialization randomInit;
+
+  // Apply the centered data matrix to a random matrix, obtaining Q.
+  if (data.n_cols >= data.n_rows)
+  {
+    randomInit.Initialize(R, data.n_rows, iteratedPower);
+    Q = (data.t() * R) - arma::repmat(arma::trans(R.t() * rowMean),
+        data.n_cols, 1);
+  }
+  else
+  {
+    randomInit.Initialize(R, data.n_cols, iteratedPower);
+    Q = (data * R) - (rowMean * (arma::ones(1, data.n_cols) * R));
+  }
+
+  // Form a matrix Q whose columns constitute a
+  // well-conditioned basis for the columns of the earlier Q.
+  if (maxIterations == 0)
+  {
+    arma::qr_econ(Q, v, Q);
+  }
+  else
+  {
+    arma::lu(Q, v, Q);
+  }
+
+  // Perform normalized power iterations.
+  for (size_t i = 0; i < maxIterations; ++i)
+  {
+    if (data.n_cols >= data.n_rows)
+    {
+      Q = (data * Q) - rowMean * (arma::ones(1, data.n_cols) * Q);
+      arma::lu(Q, v, Q);
+      Q = (data.t() * Q) - arma::repmat(rowMean.t() * Q, data.n_cols, 1);
+    }
+    else
+    {
+      Q = (data.t() * Q) - arma::repmat(rowMean.t() * Q, data.n_cols, 1);
+      arma::lu(Q, v, Q);
+      Q = (data * Q) - (rowMean * (arma::ones(1, data.n_cols) * Q));
+    }
+
+    // Computing the LU decomposition is more efficient than computing the QR
+    // decomposition, so we only use in the last iteration, a pivoted QR
+    // decomposition which renormalizes Q, ensuring that the columns of Q are
+    // orthonormal.
+    if (i < (maxIterations - 1))
+    {
+      arma::lu(Q, v, Q);
+    }
+    else
+    {
+      arma::qr_econ(Q, v, Q);
+    }
+  }
+
+  // Do economical singular value decomposition and compute only the
+  // approximations of the left singular vectors by using the centered data
+  // applied to Q.
+  if (data.n_cols >= data.n_rows)
+  {
+    Qdata = (data * Q) - rowMean * (arma::ones(1, data.n_cols) * Q);
+    arma::svd_econ(u, s, v, Qdata);
+    v = Q * v;
+  }
+  else
+  {
+    Qdata = (Q.t() * data) - arma::repmat(Q.t() * rowMean, 1,  data.n_cols);
+    arma::svd_econ(u, s, v, Qdata);
+    u = Q * u;
+  }
+}
+
+} // namespace svd
+} // namespace mlpack
diff --git a/src/mlpack/methods/randomized_svd/randomized_svd.hpp b/src/mlpack/methods/randomized_svd/randomized_svd.hpp
new file mode 100644
index 0000000..c175fd2
--- /dev/null
+++ b/src/mlpack/methods/randomized_svd/randomized_svd.hpp
@@ -0,0 +1,133 @@
+/**
+ * @file randomized_svd.hpp
+ * @author Marcus Edel
+ *
+ * An implementation of the randomized SVD method.
+ */
+
+#ifndef MLPACK_METHODS_RANDOMIZED_SVD_RANDOMIZED_SVD_HPP
+#define MLPACK_METHODS_RANDOMIZED_SVD_RANDOMIZED_SVD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+
+namespace mlpack {
+namespace svd {
+
+/**
+ * Randomized SVD is a matrix factorization that is based on randomized matrix
+ * approximation techniques, developed in in "Finding structure with randomness:
+ * Probabilistic algorithms for constructing approximate matrix decompositions".
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{Halko2011,
+ *   author  = {Halko, N. and Martinsson, P. G. and Tropp, J. A.},
+ *   title   = {Finding Structure with Randomness: Probabilistic Algorithms for
+                Constructing Approximate Matrix Decompositions},
+ *   journal = {SIAM Rev.},
+ *   volume  = {53},
+ *   year    = {2011},
+ * }
+ * @endcode
+ *
+ * @code
+ * @article{Szlam2014,
+ *   author  = {Arthur Szlam Yuval Kluger and Mark Tygert},
+ *   title   = {An implementation of a randomized algorithm for principal
+                component analysis},
+ *   journal = {CoRR},
+ *   volume  = {abs/1412.3510},
+ *   year    = {2014},
+ * }
+ * @endcode
+ *
+ * An example of how to use the interface is shown below:
+ *
+ * @code
+ * arma::mat data; // Rating data in the form of coordinate list.
+ *
+ * const size_t rank = 20; // Rank used for the decomposition.
+ *
+ * // Make a RandomizedSVD object.
+ * RandomizedSVD rSVD();
+ *
+ * arma::mat u, s, v;
+ *
+ * // Use the Apply() method to get a factorization.
+ * rSVD.Apply(data, u, s, v, rank);
+ * @endcode
+ */
+class RandomizedSVD
+{
+  public:
+  /**
+   * Create object for the randomized SVD method.
+   *
+   * @param data Data matrix.
+   * @param u First unitary matrix.
+   * @param v Second unitary matrix.
+   * @param sigma Diagonal matrix of singular values.
+   * @param iteratedPower Size of the normalized power iterations
+   *        (Default: rank + 2).
+   * @param maxIterations Number of iterations for the power method
+   *        (Default: 2).
+   * @param rank Rank of the approximation (Default: number of rows.)
+   */
+  RandomizedSVD(const arma::mat& data,
+                arma::mat& u,
+                arma::vec& s,
+                arma::mat& v,
+                const size_t iteratedPower = 0,
+                const size_t maxIterations = 2,
+                const size_t rank = 0);
+
+  /**
+   * Create object for the randomized SVD method.
+   *
+   * @param iteratedPower Size of the normalized power iterations
+   *        (Default: rank + 2).
+   * @param maxIterations Number of iterations for the power method
+   *        (Default: 2).
+   */
+  RandomizedSVD(const size_t iteratedPower = 0, const size_t maxIterations = 2);
+
+  /**
+   * Apply Principal Component Analysis to the provided data set using the
+   * randomized SVD.
+   *
+   * @param data Data matrix.
+   * @param u First unitary matrix.
+   * @param v Second unitary matrix.
+   * @param sigma Diagonal matrix of singular values.
+   * @param rank Rank of the approximation.
+   */
+  void Apply(const arma::mat& data,
+             arma::mat& u,
+             arma::vec& s,
+             arma::mat& v,
+             const size_t rank);
+
+  //! Get the size of the normalized power iterations.
+  size_t IteratedPower() const { return iteratedPower; }
+  //! Modify the size of the normalized power iterations.
+  size_t& IteratedPower() { return iteratedPower; }
+
+  //! Get the number of iterations for the power method.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the number of iterations for the power method.
+  size_t& MaxIterations() { return maxIterations; }
+
+  private:
+    //! Locally stored size of the normalized power iterations.
+    size_t iteratedPower;
+
+    //! Locally stored number of iterations for the power method.
+    size_t maxIterations;
+};
+
+} // namespace svd
+} // namespace mlpack
+
+#endif




More information about the mlpack-git mailing list