[mlpack-git] master,mlpack-1.0.x: Adding QUIC-SVD. (726837d)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:50:52 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 726837d7475539ec057155c5b152e04a3cbb65ed
Author: Siddharth Agrawal <siddharth.950 at gmail.com>
Date:   Thu Jul 3 13:08:12 2014 +0000

    Adding QUIC-SVD.


>---------------------------------------------------------------

726837d7475539ec057155c5b152e04a3cbb65ed
 src/mlpack/core/tree/CMakeLists.txt                |  2 +-
 .../{cosine_tree_impl.hpp => cosine_tree.cpp}      |  8 ---
 src/mlpack/core/tree/cosine_tree/cosine_tree.hpp   |  3 -
 src/mlpack/methods/CMakeLists.txt                  |  1 +
 src/mlpack/methods/CMakeLists.txt~                 |  3 +
 .../CMakeLists.txt                                 |  6 +-
 src/mlpack/methods/quic_svd/quic_svd.hpp           | 70 ++++++++++++++++++
 src/mlpack/methods/quic_svd/quic_svd_impl.hpp      | 82 ++++++++++++++++++++++
 src/mlpack/tests/CMakeLists.txt                    |  1 +
 src/mlpack/tests/CMakeLists.txt~                   |  4 ++
 src/mlpack/tests/quic_svd_test.cpp                 | 42 +++++++++++
 11 files changed, 207 insertions(+), 15 deletions(-)

diff --git a/src/mlpack/core/tree/CMakeLists.txt b/src/mlpack/core/tree/CMakeLists.txt
index 7988285..43b6117 100644
--- a/src/mlpack/core/tree/CMakeLists.txt
+++ b/src/mlpack/core/tree/CMakeLists.txt
@@ -14,7 +14,7 @@ set(SOURCES
   binary_space_tree/traits.hpp
   bounds.hpp
   cosine_tree/cosine_tree.hpp
-  cosine_tree/cosine_tree_impl.hpp
+  cosine_tree/cosine_tree.cpp
   cover_tree/cover_tree.hpp
   cover_tree/cover_tree_impl.hpp
   cover_tree/first_point_is_root.hpp
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
similarity index 98%
rename from src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
rename to src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
index 46167d5..67e1d9a 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
@@ -4,10 +4,6 @@
  *
  * Implementation of cosine tree.
  */
-#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
-#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
-
-// In case it wasn't included already for some reason.
 #include "cosine_tree.hpp"
 
 #include <boost/math/distributions/normal.hpp>
@@ -127,8 +123,6 @@ CosineTree::CosineTree(const arma::mat& dataset,
     
     // Calculate Monte Carlo error estimate for the root node.
     monteCarloError = MonteCarloError(&root, treeQueue);
-    
-    std::cout << monteCarloError / root.FrobNormSquared() << "\n";
   }
   
   // Construct the subspace basis from the current priority queue.
@@ -429,5 +423,3 @@ void CosineTree::CalculateCentroid()
 
 }; // namespace tree
 }; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
index 4a411d9..f4d5aac 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
@@ -243,7 +243,4 @@ class CompareCosineNode
 }; // namespace tree
 }; // namespace mlpack
 
-// Include implementation.
-#include "cosine_tree_impl.hpp"
-
 #endif
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index d9eea39..5115733 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -23,6 +23,7 @@ set(DIRS
 #  lmf
   pca
   perceptron
+  quic_svd
   radical
   range_search
   rann
diff --git a/src/mlpack/methods/CMakeLists.txt~ b/src/mlpack/methods/CMakeLists.txt~
index fb6a0d4..1477e4a 100644
--- a/src/mlpack/methods/CMakeLists.txt~
+++ b/src/mlpack/methods/CMakeLists.txt~
@@ -2,6 +2,7 @@
 set(DIRS
   amf
   cf
+  decision_stump
   det
   emst
   fastmks
@@ -21,6 +22,8 @@ set(DIRS
   nmf
 #  lmf
   pca
+  perceptron
+#  quic_svd
   radical
   range_search
   rann
diff --git a/src/mlpack/methods/perceptron/InitializationMethods/CMakeLists.txt b/src/mlpack/methods/quic_svd/CMakeLists.txt
similarity index 83%
copy from src/mlpack/methods/perceptron/InitializationMethods/CMakeLists.txt
copy to src/mlpack/methods/quic_svd/CMakeLists.txt
index d5d9c31..c89b866 100644
--- a/src/mlpack/methods/perceptron/InitializationMethods/CMakeLists.txt
+++ b/src/mlpack/methods/quic_svd/CMakeLists.txt
@@ -1,8 +1,8 @@
-# Define the files we need to compile
+# Define the files we need to compile.
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  random_init.hpp
-  zero_init.hpp
+  quic_svd.hpp
+  quic_svd_impl.hpp
 )
 
 # Add directory name to sources.
diff --git a/src/mlpack/methods/quic_svd/quic_svd.hpp b/src/mlpack/methods/quic_svd/quic_svd.hpp
new file mode 100644
index 0000000..f2ef741
--- /dev/null
+++ b/src/mlpack/methods/quic_svd/quic_svd.hpp
@@ -0,0 +1,70 @@
+/**
+ * @file quic_svd.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of QUIC-SVD.
+ */
+#ifndef __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_HPP
+#define __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/tree/cosine_tree/cosine_tree.hpp>
+
+namespace mlpack {
+namespace svd {
+
+class QUIC_SVD
+{
+ public:
+ 
+  /**
+   * Constructor which implements the QUIC-SVD algorithm. The function calls the
+   * CosineTree constructor to create a subspace basis, where the original
+   * matrix's projection has minimum reconstruction error. The constructor then
+   * uses the ExtractSVD() function to calculate the SVD of the original dataset
+   * in that subspace.
+   *
+   * @param dataset Matrix for which SVD is calculated.
+   * @param u First unitary matrix.
+   * @param v Second unitary matrix.
+   * @param sigma Diagonal matrix of singular values.
+   * @param epsilon Error tolerance fraction for calculated subspace.
+   * @param delta Cumulative probability for Monte Carlo error lower bound.
+   */
+  QUIC_SVD(const arma::mat& dataset,
+           arma::mat& u,
+           arma::mat& v,
+           arma::mat& sigma,
+           const double epsilon = 0.03,
+           const double delta = 0.1);
+  
+  /**
+   * This function uses the vector subspace created using a cosine tree to
+   * calculate an approximate SVD of the original matrix.
+   *
+   * @param u First unitary matrix.
+   * @param v Second unitary matrix.
+   * @param sigma Diagonal matrix of singular values.
+   */
+  void ExtractSVD(arma::mat& u,
+                  arma::mat& v,
+                  arma::mat& sigma);
+  
+ private:
+  //! Matrix for which cosine tree is constructed.
+  const arma::mat& dataset;
+  //! Error tolerance fraction for calculated subspace.
+  double epsilon;
+  //! Cumulative probability for Monte Carlo error lower bound.
+  double delta;
+  //! Subspace basis of the input dataset.
+  arma::mat basis;
+};
+
+}; // namespace svd
+}; // namespace mlpack
+
+// Include implementation.
+#include "quic_svd_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/quic_svd/quic_svd_impl.hpp b/src/mlpack/methods/quic_svd/quic_svd_impl.hpp
new file mode 100644
index 0000000..f1fada3
--- /dev/null
+++ b/src/mlpack/methods/quic_svd/quic_svd_impl.hpp
@@ -0,0 +1,82 @@
+/**
+ * @file quic_svd_impl.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of QUIC-SVD.
+ */
+#ifndef __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_IMPL_HPP
+#define __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "quic_svd.hpp"
+
+using namespace mlpack::tree;
+
+namespace mlpack {
+namespace svd {
+
+QUIC_SVD::QUIC_SVD(const arma::mat& dataset,
+                   arma::mat& u,
+                   arma::mat& v,
+                   arma::mat& sigma,
+                   const double epsilon,
+                   const double delta) :
+    dataset(dataset),
+    epsilon(epsilon),
+    delta(delta)
+{
+  // Since columns are sample in the implementation, the matrix is transposed if
+  // necessary for maximum speedup.
+  CosineTree* ctree;
+  if(dataset.n_cols > dataset.n_rows)
+    ctree = new CosineTree(dataset, epsilon, delta);
+  else
+    ctree = new CosineTree(dataset.t(), epsilon, delta);
+    
+  // Get subspace basis by creating the cosine tree.
+  ctree->GetFinalBasis(basis);
+  
+  // Use the ExtractSVD algorithm mentioned in the paper to extract the SVD of
+  // the original dataset in the obtained subspace.
+  ExtractSVD(u, v, sigma);
+}
+
+void QUIC_SVD::ExtractSVD(arma::mat& u,
+                          arma::mat& v,
+                          arma::mat& sigma)
+{
+  // Calculate A * V_hat, necessary for further calculations.
+  arma::mat projectedMat;
+  if(dataset.n_cols > dataset.n_rows)
+    projectedMat = dataset.t() * basis;
+  else
+    projectedMat = dataset * basis;
+  
+  // Calculate the squared projected matrix.
+  arma::mat projectedMatSquared = projectedMat.t() * projectedMat;
+
+  // Calculate the SVD of the above matrix.
+  arma::mat uBar, vBar;
+  arma::vec sigmaBar;
+  arma::svd(uBar, sigmaBar, vBar, projectedMatSquared);
+
+  // Calculate the approximate SVD of the original matrix, using the SVD of the
+  // squared projected matrix.
+  v = basis * vBar;
+  sigma = arma::sqrt(diagmat(sigmaBar));
+  u = projectedMat * vBar * sigma.i();
+  
+  // Since columns are sampled, the unitary matrices have to be exchanged, if
+  // the transposed matrix is not passed.
+  if(dataset.n_cols > dataset.n_rows)
+  {
+    arma::mat tempMat = u;
+    u = v;
+    v = tempMat;
+  }
+}
+
+}; // namespace svd
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 3175331..41c7867 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -36,6 +36,7 @@ add_executable(mlpack_test
   nmf_test.cpp
   pca_test.cpp
   perceptron_test.cpp
+  quic_svd_test.cpp
   radical_test.cpp
   range_search_test.cpp
   rectangle_tree_test.cpp
diff --git a/src/mlpack/tests/CMakeLists.txt~ b/src/mlpack/tests/CMakeLists.txt~
index d35779b..0c989c9 100644
--- a/src/mlpack/tests/CMakeLists.txt~
+++ b/src/mlpack/tests/CMakeLists.txt~
@@ -8,6 +8,7 @@ add_executable(mlpack_test
   aug_lagrangian_test.cpp
   cf_test.cpp
   cli_test.cpp
+  cosine_tree_test.cpp
   decision_stump_test.cpp
   det_test.cpp
   distribution_test.cpp
@@ -35,8 +36,11 @@ add_executable(mlpack_test
   nmf_test.cpp
   pca_test.cpp
   perceptron_test.cpp
+#  quic_svd_test.cpp
   radical_test.cpp
   range_search_test.cpp
+  rectangle_tree_test.cpp
+  sa_test.cpp
   save_restore_utility_test.cpp
   sgd_test.cpp
   sort_policy_test.cpp
diff --git a/src/mlpack/tests/quic_svd_test.cpp b/src/mlpack/tests/quic_svd_test.cpp
new file mode 100644
index 0000000..c0d9c34
--- /dev/null
+++ b/src/mlpack/tests/quic_svd_test.cpp
@@ -0,0 +1,42 @@
+/**
+ * @file quic_svd_test.cpp
+ * @author Siddharth Agrawal
+ *
+ * Test file for QUIC-SVD class.
+ */
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/quic_svd/quic_svd.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "old_boost_test_definitions.hpp"
+
+BOOST_AUTO_TEST_SUITE(QUICSVDTest);
+
+using namespace mlpack;
+using namespace mlpack::svd;
+
+/**
+ * The reconstruction error of the obtained SVD should be small.
+ */
+BOOST_AUTO_TEST_CASE(QUICSVDReconstructionError)
+{
+  // Load the dataset.
+  arma::mat dataset;
+  data::Load("test_data_3_1000.csv", dataset);
+	
+	// Obtain the SVD using default parameters.
+  arma::mat u, v, sigma;
+  QUIC_SVD quicsvd(dataset, u, v, sigma);
+  
+  // Reconstruct the matrix using the SVD.
+  arma::mat reconstruct;
+  reconstruct = u * sigma * v.t();
+  
+  // The relative reconstruction error should be small.
+  double relativeError = arma::norm(dataset - reconstruct, "frob") /
+                         arma::norm(dataset, "frob");                         
+  BOOST_REQUIRE_SMALL(relativeError, 1e-5);
+}
+
+BOOST_AUTO_TEST_SUITE_END();



More information about the mlpack-git mailing list