[mlpack-git] master: add Binarize + test (2ff2fa1)

gitdub at mlpack.org gitdub at mlpack.org
Sun Jun 19 00:32:55 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/a0b31abe5ff69117645c664dbeac1476dd5e48f7...2da9c5bac14a00145c757b8139c245913b86e034

>---------------------------------------------------------------

commit 2ff2fa1bcb7d9538976e589942864f708dbcab63
Author: Keon Kim <kwk236 at gmail.com>
Date:   Fri Jun 3 00:21:32 2016 +0900

    add Binarize + test


>---------------------------------------------------------------

2ff2fa1bcb7d9538976e589942864f708dbcab63
 src/mlpack/core/data/CMakeLists.txt |  1 +
 src/mlpack/core/data/binarize.hpp   | 76 ++++++++++++++++++++++++++++++++++++
 src/mlpack/tests/CMakeLists.txt     |  1 +
 src/mlpack/tests/binarize_test.cpp  | 78 +++++++++++++++++++++++++++++++++++++
 4 files changed, 156 insertions(+)

diff --git a/src/mlpack/core/data/CMakeLists.txt b/src/mlpack/core/data/CMakeLists.txt
index ea87d0f..f11f19c 100644
--- a/src/mlpack/core/data/CMakeLists.txt
+++ b/src/mlpack/core/data/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SOURCES
   save_impl.hpp
   serialization_shim.hpp
   split_data.hpp
+  binarize.hpp
 )
 
 # add directory name to sources
diff --git a/src/mlpack/core/data/binarize.hpp b/src/mlpack/core/data/binarize.hpp
new file mode 100644
index 0000000..c92fb1d
--- /dev/null
+++ b/src/mlpack/core/data/binarize.hpp
@@ -0,0 +1,76 @@
+/**
+ * @file binarize.hpp
+ * @author Keon Kim
+ *
+ * Defines Binarize(), a utility function, sets values to 0 or 1
+ * to a given threshold.
+ */
+#ifndef MLPACK_CORE_DATA_BINARIZE_HPP
+#define MLPACK_CORE_DATA_BINARIZE_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace data {
+/**
+ * Given an input dataset and threshold, set values greater than threshold to
+ * 1 and values less than or equal to the threshold to 0. This overload takes
+ * a dimension and applys the changes to the given dimension.
+ *
+ * @code
+ * arma::mat input = loadData();
+ * double threshold = 0;
+ * size_t dimension = 0;
+ *
+ * // Binarize the first dimension. All positive values in the first dimension
+ * // will be set to 1 and the values less than or equal to 0 will become 0.
+ * Binarize(input, threshold, dimension);
+ * @endcode
+ *
+ * @param input Input matrix to Binarize.
+ * @param threshold Threshold can by any number.
+ * @param dimension Feature to apply the Binarize function.
+ */
+template<typename T>
+void Binarize(arma::Mat<T>& input,
+              const double threshold,
+              const size_t dimension)
+{
+  for (size_t i = 0; i < input.n_cols; ++i)
+  {
+    if (input(dimension, i) > threshold)
+      input(dimension, i) = 1;
+    else
+      input(dimension, i) = 0;
+  }
+}
+
+/**
+ * Given an input dataset and threshold, set values greater than threshold to
+ * 1 and values less than or equal to the threshold to 0. This overload applies
+ * the changes to all dimensions.
+ *
+ * @code
+ * arma::mat input = loadData();
+ * double threshold = 0;
+ *
+ * // Binarize the whole Matrix. All positive values in will be set to 1 and
+ * // the values less than or equal to 0 will become 0.
+ * Binarize(input, threshold);
+ * @endcode
+ *
+ * @param input Input matrix to Binarize.
+ * @param threshold Threshold can by any number.
+ */
+template<typename T>
+void Binarize(arma::Mat<T>& input,
+              const double threshold)
+{
+  for (size_t i = 0; i < input.n_rows; ++i)
+    Binarize(input, threshold, i);
+}
+
+} // namespace data
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index e1f255a..939f66b 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ add_executable(mlpack_test
   allkrann_search_test.cpp
   arma_extend_test.cpp
   aug_lagrangian_test.cpp
+  binarize_test.cpp
   cf_test.cpp
   cli_test.cpp
   convolution_test.cpp
diff --git a/src/mlpack/tests/binarize_test.cpp b/src/mlpack/tests/binarize_test.cpp
new file mode 100644
index 0000000..d456f14
--- /dev/null
+++ b/src/mlpack/tests/binarize_test.cpp
@@ -0,0 +1,78 @@
+/**
+ * @file binarize_test.cpp
+ * @author Keon Kim
+ *
+ * Test the Binarzie method.
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/core/data/binarize.hpp>
+#include <mlpack/core/math/random.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "old_boost_test_definitions.hpp"
+
+using namespace mlpack;
+using namespace arma;
+using namespace mlpack::data;
+
+BOOST_AUTO_TEST_SUITE(BinarizeTest);
+
+/**
+ * Compare the binarized data with answer.
+ *
+ * @param input The original data set before Binarize.
+ * @param answer The data want to compare with the input.
+ */
+void CheckAnswer(const mat& input,
+                 const umat& answer)
+{
+  for (size_t i = 0; i < input.n_cols; ++i)
+  {
+    const mat& lhsCol = input.col(i);
+    const umat& rhsCol = answer.col(i);
+    for (size_t j = 0; j < lhsCol.n_rows; ++j)
+    {
+      if (std::abs(rhsCol(j)) < 1e-5)
+        BOOST_REQUIRE_SMALL(lhsCol(j), 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(lhsCol(j), rhsCol(j), 1e-5);
+    }
+  }
+}
+
+BOOST_AUTO_TEST_CASE(BinarizeThreshold)
+{
+  mat input(10, 10, fill::randu); // fill input with randome Number
+  mat constMat(10, 10);
+  math::RandomSeed((size_t) std::time(NULL));
+  double threshold = math::Random(); // random number threshold
+  constMat.fill(threshold);
+
+  umat answer = input > constMat;
+
+  // Binarize every values inside the matrix with threshold of 0;
+  Binarize(input, threshold);
+
+  CheckAnswer(input, answer);
+}
+
+/**
+ * The same test as above, but on a larger dataset.
+ */
+BOOST_AUTO_TEST_CASE(BinarizeThresholdLargerTest)
+{
+  mat input(10, 500, fill::randu); // fill input with randome Number
+  mat constMat(10, 500);
+  math::RandomSeed((size_t) std::time(NULL));
+  double threshold = math::Random(); // random number threshold
+  constMat.fill(threshold);
+
+  umat answer = input > constMat;
+
+  // Binarize every values inside the matrix with threshold of 0;
+  Binarize(input, threshold);
+
+  CheckAnswer(input, answer);
+}
+
+BOOST_AUTO_TEST_SUITE_END();




More information about the mlpack-git mailing list