[mlpack-git] master: * added AverageInitialization to AMF (c934741)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:58:25 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit c934741be1fecedbc582843e361938585e8317d9
Author: sumedhghaisas <sumedhghaisas at gmail.com>
Date:   Sun Aug 17 17:34:41 2014 +0000

    * added AverageInitialization to AMF


>---------------------------------------------------------------

c934741be1fecedbc582843e361938585e8317d9
 src/mlpack/methods/amf/init_rules/CMakeLists.txt   |  1 +
 src/mlpack/methods/amf/init_rules/average_init.hpp | 63 ++++++++++++++++++++++
 src/mlpack/methods/cf/svd_wrapper.hpp              | 16 ++++--
 src/mlpack/methods/cf/svd_wrapper_impl.hpp         |  1 -
 src/mlpack/tests/svd_batch_test.cpp                |  3 +-
 5 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/methods/amf/init_rules/CMakeLists.txt b/src/mlpack/methods/amf/init_rules/CMakeLists.txt
index a31d281..f8ff4b6 100644
--- a/src/mlpack/methods/amf/init_rules/CMakeLists.txt
+++ b/src/mlpack/methods/amf/init_rules/CMakeLists.txt
@@ -3,6 +3,7 @@
 set(SOURCES
   random_init.hpp
   random_acol_init.hpp
+  average_init.hpp
 )
 
 # Add directory name to sources.
diff --git a/src/mlpack/methods/amf/init_rules/average_init.hpp b/src/mlpack/methods/amf/init_rules/average_init.hpp
new file mode 100644
index 0000000..cf01ce4
--- /dev/null
+++ b/src/mlpack/methods/amf/init_rules/average_init.hpp
@@ -0,0 +1,63 @@
+/**
+ * @file averge_init.hpp
+ * @author Sumedh Ghaisas
+ *
+ * Intialization rule for Alternating Matrix Factorization.
+ */
+#ifndef __MLPACK_METHODS_AMF_AVERAGE_INIT_HPP
+#define __MLPACK_METHODS_AMF_AVERAGE_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace amf {
+
+/**
+ * This initialization rule initializes matrix W and H to root of average of V 
+ * with uniform noise. Uniform noise is generated by Armadillo's 'randu' function.
+ * To have a better effect lower bound of the matrix is subtracted from average
+ * before dividing it by the factorization rank. This computed value is added 
+ * with the random noise.
+ */ 
+class AverageInitialization
+{
+ public:
+  // Empty constructor required for the InitializeRule template
+  AverageInitialization() { }
+
+  template<typename MatType>
+  inline static void Initialize(const MatType& V,
+                                const size_t r,
+                                arma::mat& W,
+                                arma::mat& H)
+  {
+    size_t n = V.n_rows;
+    size_t m = V.n_cols;
+  
+    double V_avg = 0;
+    size_t count = 0;
+    double min = DBL_MAX;
+    for(typename MatType::const_row_col_iterator it = V.begin();it != V.end();it++)
+    {
+      if(*it != 0)
+      {
+        count++;
+        V_avg += *it;
+        if(*it < min) min = *it;
+      }
+    }
+    V_avg = sqrt(((V_avg / (n * m)) - min) / r);
+
+    // Intialize to random values.
+    W.randu(n, r);
+    H.randu(r, m);
+    
+    W = W + V_avg;
+    H = H + V_avg;
+  }
+};
+
+}; // namespace amf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/cf/svd_wrapper.hpp b/src/mlpack/methods/cf/svd_wrapper.hpp
index 27ef452..3835e89 100644
--- a/src/mlpack/methods/cf/svd_wrapper.hpp
+++ b/src/mlpack/methods/cf/svd_wrapper.hpp
@@ -15,12 +15,22 @@ namespace cf
 {
 
 /**
- *
- * @see CF
+ * This class acts as a dummy class for passing as template parameter. Passing 
+ * this class as a template parameter to class SVDWrapper will force SVDWrapper
+ * to use Armadillo's SVD implementation.
  */
-
 class DummyClass {}; 
 
+/**
+ * This class acts as the wrapper for all SVD factorizers which are incompatible 
+ * with CF module. Normally SVD factrorizers implement Apply method which takes 
+ * matrix V and factorizes it into P, sigma and Q where V = P * sigma * trans(Q).
+ * But CF module requires factrorization to be V = W * H. This class multiplies 
+ * P and sigma and takes the first 'r' eigenvectors out where 'r' is the rank
+ * of factorization. Q matrix is transposed and trimmed to support the rank 
+ * of factorization. The Factroizer class should implement Apply which takes 
+ * matrices P, sigma, Q and V as their parameter respectively. 
+ */
 template<class Factorizer = DummyClass>
 class SVDWrapper
 {
diff --git a/src/mlpack/methods/cf/svd_wrapper_impl.hpp b/src/mlpack/methods/cf/svd_wrapper_impl.hpp
index f3368c3..2ec6dea 100644
--- a/src/mlpack/methods/cf/svd_wrapper_impl.hpp
+++ b/src/mlpack/methods/cf/svd_wrapper_impl.hpp
@@ -4,7 +4,6 @@
  *
  * Implementation of the SVD wrapper class.
  */
-
 template<class Factorizer>
 double mlpack::cf::SVDWrapper<Factorizer>::Apply(const arma::mat& V,
                          arma::mat& W,
diff --git a/src/mlpack/tests/svd_batch_test.cpp b/src/mlpack/tests/svd_batch_test.cpp
index 820e6f6..a5deb7f 100644
--- a/src/mlpack/tests/svd_batch_test.cpp
+++ b/src/mlpack/tests/svd_batch_test.cpp
@@ -2,6 +2,7 @@
 #include <mlpack/methods/amf/amf.hpp>
 #include <mlpack/methods/amf/update_rules/svd_batch_learning.hpp>
 #include <mlpack/methods/amf/init_rules/random_init.hpp>
+#include <mlpack/methods/amf/init_rules/average_init.hpp>
 #include <mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp>
 #include <mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp>
 
@@ -24,7 +25,7 @@ BOOST_AUTO_TEST_CASE(SVDBatchConvergenceElementTest)
   sp_mat data;
   data.sprandn(1000, 1000, 0.2);
   AMF<SimpleToleranceTermination<sp_mat>, 
-      RandomInitialization, 
+      AverageInitialization, 
       SVDBatchLearning> amf;
   mat m1,m2;
   amf.Apply(data, 2, m1, m2);



More information about the mlpack-git mailing list