[mlpack-git] master: Add randomized SVD method. (cfbd604)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jul 6 15:30:09 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/98babfc774bce91170df994763b670b9abd20917...e7b9b042d1d6e2d9895d5fa141e9c135b2d2ea57
>---------------------------------------------------------------
commit cfbd604a428ddd0813ada3dba1fc33d751c7a13a
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date: Wed Jul 6 00:59:00 2016 +0200
Add randomized SVD method.
>---------------------------------------------------------------
cfbd604a428ddd0813ada3dba1fc33d751c7a13a
src/mlpack/methods/CMakeLists.txt | 1 +
.../{quic_svd => randomized_svd}/CMakeLists.txt | 4 +-
.../methods/randomized_svd/randomized_svd.cpp | 128 ++++++++++++++++++++
.../methods/randomized_svd/randomized_svd.hpp | 133 +++++++++++++++++++++
4 files changed, 264 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 5734d5c..39aa62f 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -46,6 +46,7 @@ set(DIRS
perceptron
quic_svd
radical
+ randomized_svd
range_search
rann
rmva
diff --git a/src/mlpack/methods/quic_svd/CMakeLists.txt b/src/mlpack/methods/randomized_svd/CMakeLists.txt
similarity index 90%
copy from src/mlpack/methods/quic_svd/CMakeLists.txt
copy to src/mlpack/methods/randomized_svd/CMakeLists.txt
index ae7a36a..ad0e5b3 100644
--- a/src/mlpack/methods/quic_svd/CMakeLists.txt
+++ b/src/mlpack/methods/randomized_svd/CMakeLists.txt
@@ -1,8 +1,8 @@
# Define the files we need to compile.
# Anything not in this list will not be compiled into mlpack.
set(SOURCES
- quic_svd.hpp
- quic_svd_impl.hpp
+ randomized_svd.hpp
+ randomized_svd.cpp
)
# Add directory name to sources.
diff --git a/src/mlpack/methods/randomized_svd/randomized_svd.cpp b/src/mlpack/methods/randomized_svd/randomized_svd.cpp
new file mode 100644
index 0000000..64d62ee
--- /dev/null
+++ b/src/mlpack/methods/randomized_svd/randomized_svd.cpp
@@ -0,0 +1,128 @@
+/**
+ * @file randomized_svd.cpp
+ * @author Marcus Edel
+ *
+ * Implementation of the randomized SVD method.
+ */
+
+#include "randomized_svd.hpp"
+
+namespace mlpack {
+namespace svd {
+
+RandomizedSVD::RandomizedSVD(const arma::mat& data,
+ arma::mat& u,
+ arma::vec& s,
+ arma::mat& v,
+ const size_t iteratedPower,
+ const size_t maxIterations,
+ const size_t rank) :
+ iteratedPower(iteratedPower),
+ maxIterations(maxIterations)
+{
+ if (rank == 0)
+ {
+ Apply(data, u, s, v, data.n_rows);
+ }
+ else
+ {
+ Apply(data, u, s, v, rank);
+ }
+}
+
+RandomizedSVD::RandomizedSVD(const size_t iteratedPower,
+ const size_t maxIterations) :
+ iteratedPower(iteratedPower),
+ maxIterations(maxIterations)
+{
+ /* Nothing to do here */
+}
+
+void RandomizedSVD::Apply(const arma::mat& data,
+ arma::mat& u,
+ arma::vec& s,
+ arma::mat& v,
+ const size_t rank)
+{
+ if (iteratedPower == 0)
+ iteratedPower = rank + 2;
+
+ // Center the data into a temporary matrix.
+ arma::vec rowMean = arma::sum(data, 1) / data.n_cols;
+
+ arma::mat R, Q, Qdata;
+ ann::RandomInitialization randomInit;
+
+ // Apply the centered data matrix to a random matrix, obtaining Q.
+ if (data.n_cols >= data.n_rows)
+ {
+ randomInit.Initialize(R, data.n_rows, iteratedPower);
+ Q = (data.t() * R) - arma::repmat(arma::trans(R.t() * rowMean),
+ data.n_cols, 1);
+ }
+ else
+ {
+ randomInit.Initialize(R, data.n_cols, iteratedPower);
+ Q = (data * R) - (rowMean * (arma::ones(1, data.n_cols) * R));
+ }
+
+ // Form a matrix Q whose columns constitute a
+ // well-conditioned basis for the columns of the earlier Q.
+ if (maxIterations == 0)
+ {
+ arma::qr_econ(Q, v, Q);
+ }
+ else
+ {
+ arma::lu(Q, v, Q);
+ }
+
+ // Perform normalized power iterations.
+ for (size_t i = 0; i < maxIterations; ++i)
+ {
+ if (data.n_cols >= data.n_rows)
+ {
+ Q = (data * Q) - rowMean * (arma::ones(1, data.n_cols) * Q);
+ arma::lu(Q, v, Q);
+ Q = (data.t() * Q) - arma::repmat(rowMean.t() * Q, data.n_cols, 1);
+ }
+ else
+ {
+ Q = (data.t() * Q) - arma::repmat(rowMean.t() * Q, data.n_cols, 1);
+ arma::lu(Q, v, Q);
+ Q = (data * Q) - (rowMean * (arma::ones(1, data.n_cols) * Q));
+ }
+
+ // Computing the LU decomposition is more efficient than computing the QR
+ // decomposition, so we only use in the last iteration, a pivoted QR
+ // decomposition which renormalizes Q, ensuring that the columns of Q are
+ // orthonormal.
+ if (i < (maxIterations - 1))
+ {
+ arma::lu(Q, v, Q);
+ }
+ else
+ {
+ arma::qr_econ(Q, v, Q);
+ }
+ }
+
+ // Do economical singular value decomposition and compute only the
+ // approximations of the left singular vectors by using the centered data
+ // applied to Q.
+ if (data.n_cols >= data.n_rows)
+ {
+ Qdata = (data * Q) - rowMean * (arma::ones(1, data.n_cols) * Q);
+ arma::svd_econ(u, s, v, Qdata);
+ v = Q * v;
+ }
+ else
+ {
+ Qdata = (Q.t() * data) - arma::repmat(Q.t() * rowMean, 1, data.n_cols);
+ arma::svd_econ(u, s, v, Qdata);
+ u = Q * u;
+ }
+}
+
+} // namespace svd
+} // namespace mlpack
diff --git a/src/mlpack/methods/randomized_svd/randomized_svd.hpp b/src/mlpack/methods/randomized_svd/randomized_svd.hpp
new file mode 100644
index 0000000..c175fd2
--- /dev/null
+++ b/src/mlpack/methods/randomized_svd/randomized_svd.hpp
@@ -0,0 +1,133 @@
+/**
+ * @file randomized_svd.hpp
+ * @author Marcus Edel
+ *
+ * An implementation of the randomized SVD method.
+ */
+
+#ifndef MLPACK_METHODS_RANDOMIZED_SVD_RANDOMIZED_SVD_HPP
+#define MLPACK_METHODS_RANDOMIZED_SVD_RANDOMIZED_SVD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+
+namespace mlpack {
+namespace svd {
+
+/**
+ * Randomized SVD is a matrix factorization that is based on randomized matrix
+ * approximation techniques, developed in in "Finding structure with randomness:
+ * Probabilistic algorithms for constructing approximate matrix decompositions".
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{Halko2011,
+ * author = {Halko, N. and Martinsson, P. G. and Tropp, J. A.},
+ * title = {Finding Structure with Randomness: Probabilistic Algorithms for
+ Constructing Approximate Matrix Decompositions},
+ * journal = {SIAM Rev.},
+ * volume = {53},
+ * year = {2011},
+ * }
+ * @endcode
+ *
+ * @code
+ * @article{Szlam2014,
+ * author = {Arthur Szlam Yuval Kluger and Mark Tygert},
+ * title = {An implementation of a randomized algorithm for principal
+ component analysis},
+ * journal = {CoRR},
+ * volume = {abs/1412.3510},
+ * year = {2014},
+ * }
+ * @endcode
+ *
+ * An example of how to use the interface is shown below:
+ *
+ * @code
+ * arma::mat data; // Rating data in the form of coordinate list.
+ *
+ * const size_t rank = 20; // Rank used for the decomposition.
+ *
+ * // Make a RandomizedSVD object.
+ * RandomizedSVD rSVD();
+ *
+ * arma::mat u, s, v;
+ *
+ * // Use the Apply() method to get a factorization.
+ * rSVD.Apply(data, u, s, v, rank);
+ * @endcode
+ */
+class RandomizedSVD
+{
+ public:
+ /**
+ * Create object for the randomized SVD method.
+ *
+ * @param data Data matrix.
+ * @param u First unitary matrix.
+ * @param v Second unitary matrix.
+ * @param sigma Diagonal matrix of singular values.
+ * @param iteratedPower Size of the normalized power iterations
+ * (Default: rank + 2).
+ * @param maxIterations Number of iterations for the power method
+ * (Default: 2).
+ * @param rank Rank of the approximation (Default: number of rows.)
+ */
+ RandomizedSVD(const arma::mat& data,
+ arma::mat& u,
+ arma::vec& s,
+ arma::mat& v,
+ const size_t iteratedPower = 0,
+ const size_t maxIterations = 2,
+ const size_t rank = 0);
+
+ /**
+ * Create object for the randomized SVD method.
+ *
+ * @param iteratedPower Size of the normalized power iterations
+ * (Default: rank + 2).
+ * @param maxIterations Number of iterations for the power method
+ * (Default: 2).
+ */
+ RandomizedSVD(const size_t iteratedPower = 0, const size_t maxIterations = 2);
+
+ /**
+ * Apply Principal Component Analysis to the provided data set using the
+ * randomized SVD.
+ *
+ * @param data Data matrix.
+ * @param u First unitary matrix.
+ * @param v Second unitary matrix.
+ * @param sigma Diagonal matrix of singular values.
+ * @param rank Rank of the approximation.
+ */
+ void Apply(const arma::mat& data,
+ arma::mat& u,
+ arma::vec& s,
+ arma::mat& v,
+ const size_t rank);
+
+ //! Get the size of the normalized power iterations.
+ size_t IteratedPower() const { return iteratedPower; }
+ //! Modify the size of the normalized power iterations.
+ size_t& IteratedPower() { return iteratedPower; }
+
+ //! Get the number of iterations for the power method.
+ size_t MaxIterations() const { return maxIterations; }
+ //! Modify the number of iterations for the power method.
+ size_t& MaxIterations() { return maxIterations; }
+
+ private:
+ //! Locally stored size of the normalized power iterations.
+ size_t iteratedPower;
+
+ //! Locally stored number of iterations for the power method.
+ size_t maxIterations;
+};
+
+} // namespace svd
+} // namespace mlpack
+
+#endif
More information about the mlpack-git
mailing list