[mlpack-svn] r17459 - in mlpack/tags/mlpack-1.0.11: . CMake src/mlpack/methods/decision_stump src/mlpack/tests

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun Dec 7 14:31:22 EST 2014


Author: rcurtin
Date: Sun Dec  7 14:31:21 2014
New Revision: 17459

Log:
Backport fixes from r17310-r17318.  Mostly test fixes.


Modified:
   mlpack/tags/mlpack-1.0.11/   (props changed)
   mlpack/tags/mlpack-1.0.11/CMake/FindArmadillo.cmake
   mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump.hpp
   mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
   mlpack/tags/mlpack-1.0.11/src/mlpack/tests/cli_test.cpp
   mlpack/tags/mlpack-1.0.11/src/mlpack/tests/decision_stump_test.cpp
   mlpack/tags/mlpack-1.0.11/src/mlpack/tests/regularized_svd_test.cpp
   mlpack/tags/mlpack-1.0.11/src/mlpack/tests/sparse_coding_test.cpp

Modified: mlpack/tags/mlpack-1.0.11/CMake/FindArmadillo.cmake
==============================================================================
--- mlpack/tags/mlpack-1.0.11/CMake/FindArmadillo.cmake	(original)
+++ mlpack/tags/mlpack-1.0.11/CMake/FindArmadillo.cmake	Sun Dec  7 14:31:21 2014
@@ -221,7 +221,27 @@
 
     # Search for HDF5 (or replacement).
     if (NOT "${ARMA_USE_HDF5}" STREQUAL "")
-      find_package(HDF5 REQUIRED)
+      find_package(HDF5 QUIET)
+
+      if(NOT HDF5_FOUND)
+        # On Debian systems, the HDF5 package has been split into multiple
+        # packages so that it is co-installable.  But this may mean that the
+        # include files are hidden somewhere very odd that the FindHDF5.cmake
+        # script will not find.  Thus, we'll also quickly check pkgconfig to see
+        # if there is information on what to use there.
+        find_package(PkgConfig)
+        if (PKG_CONFIG_FOUND)
+          pkg_check_modules(HDF5 hdf5)
+          # But using pkgconfig is a little weird because HDF5_LIBRARIES won't
+          # be filled with exact library paths, like the other scripts.  So
+          # instead what we get is HDF5_LIBRARY_DIRS which is the equivalent of
+          # what we'd pass to -L.
+          if (HDF5_FOUND)
+            # I'm not sure what I think of doing this here...
+            link_directories("${HDF5_LIBRARY_DIRS}")
+          endif()
+        endif()
+      endif()
 
       set(SUPPORT_INCLUDE_DIRS "${SUPPORT_INCLUDE_DIRS}" "${HDF5_INCLUDE_DIRS}")
       set(SUPPORT_LIBRARIES "${SUPPORT_LIBRARIES}" "${HDF5_LIBRARIES}")
@@ -229,17 +249,38 @@
 
   else("${ARMA_USE_WRAPPER}" STREQUAL "")
     # Some older versions still require linking against HDF5 since they did not
-    # wrap libhdf5.  This was true until 4.300 (check this!).
+    # wrap libhdf5.  This was true for versions older than 4.300.
 
-    if(NOT "${ARMA_USE_HDF5}" STREQUAL "")
+    if(NOT "${ARMA_USE_HDF5}" STREQUAL "" AND
+       "${ARMADILLO_VERSION_STRING}" VERSION_LESS "4.300.0")
       message(STATUS "Armadillo HDF5 support is enabled and manual linking is "
                      "required.")
       # We have HDF5 support and need to link against HDF5.
-      find_package(HDF5 REQUIRED)
+      find_package(HDF5)
 
-      set(SUPPORT_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}")
-      set(SUPPORT_LIBRARIES "${HDF5_LIBRARIES}")
-    endif(NOT "${ARMA_USE_HDF5}" STREQUAL "")
+      if(NOT HDF5_FOUND)
+        # On Debian systems, the HDF5 package has been split into multiple
+        # packages so that it is co-installable.  But this may mean that the
+        # include files are hidden somewhere very odd that the FindHDF5.cmake
+        # script will not find.  Thus, we'll also quickly check pkgconfig to see
+        # if there is information on what to use there.
+        find_package(PkgConfig)
+        if (PKG_CONFIG_FOUND)
+          pkg_check_modules(HDF5 hdf5)
+          # But using pkgconfig is a little weird because HDF5_LIBRARIES won't
+          # be filled with exact library paths, like the other scripts.  So
+          # instead what we get is HDF5_LIBRARY_DIRS which is the equivalent of
+          # what we'd pass to -L.
+          if (HDF5_FOUND)
+            # I'm not sure what I think of doing this here...
+            link_directories("${HDF5_LIBRARY_DIRS}")
+          endif()
+        endif()
+
+        set(SUPPORT_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}")
+        set(SUPPORT_LIBRARIES "${HDF5_LIBRARIES}")
+      endif()
+    endif()
 
   endif("${ARMA_USE_WRAPPER}" STREQUAL "")
 

Modified: mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump.hpp
==============================================================================
--- mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump.hpp	(original)
+++ mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump.hpp	Sun Dec  7 14:31:21 2014
@@ -85,7 +85,7 @@
    *
   ModifyData(MatType& data, const arma::Row<double>& D);
   */
-  
+
   //! Access the splitting attribute.
   int SplitAttribute() const { return splitAttribute; }
   //! Modify the splitting attribute (be careful!).
@@ -164,8 +164,20 @@
    * @param attribute The attribute of which we calculate the entropy.
    * @param labels Corresponding labels of the attribute.
    */
-  template <typename AttType, typename LabelType>
-  double CalculateEntropy(arma::subview_row<LabelType> labels);
+  template <typename LabelType, bool isWeight>
+  double CalculateEntropy(arma::subview_row<LabelType> labels, int begin,
+                          const arma::rowvec& tempD);
+
+  /**
+   * Train the decision stump on the given data and labels.
+   *
+   * @param data Dataset to train on.
+   * @param labels Labels for dataset.
+   * @param isWeight Whether we need to run a weighted Decision Stump.
+   */
+  template <bool isWeight>
+  void Train(const MatType& data, const arma::Row<size_t>& labels,
+             const arma::rowvec& weightD);
 };
 
 }; // namespace decision_stump

Modified: mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
==============================================================================
--- mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump_impl.hpp	(original)
+++ mlpack/tags/mlpack-1.0.11/src/mlpack/methods/decision_stump/decision_stump_impl.hpp	Sun Dec  7 14:31:21 2014
@@ -351,44 +351,35 @@
 template <typename rType>
 rType DecisionStump<MatType>::CountMostFreq(const arma::Row<rType>& subCols)
 {
-  // Sort subCols for easier processing.
-  arma::Row<rType> sortCounts = arma::sort(subCols);
-  rType element = sortCounts[0];
-  size_t count = 0, localCount = 0;
+  // We'll create a map of elements and the number of times that each element is
+  // seen.
+  std::map<rType, size_t> countMap;
 
-  if (sortCounts.n_elem == 1)
-    return sortCounts[0];
-
-  // An O(n) loop which counts the most frequent element in sortCounts.
-  for (size_t i = 0; i < sortCounts.n_elem; ++i)
+  for (size_t i = 0; i < subCols.n_elem; ++i)
   {
-    if (i == sortCounts.n_elem - 1)
-    {
-      if (sortCounts(i - 1) == sortCounts(i))
-      {
-        // element = sortCounts(i - 1);
-        localCount++;
-      }
-      else if (localCount > count)
-        count = localCount;
-    }
-    else if (sortCounts(i) != sortCounts(i + 1))
-    {
-      localCount = 0;
-      count++;
-    }
+    if (countMap.count(subCols[i]) == 0)
+      countMap[subCols[i]] = 1;
     else
+      ++countMap[subCols[i]];
+  }
+
+  // Now find the maximum value.
+  typename std::map<rType, size_t>::iterator it = countMap.begin();
+  rType mostFreq = it->first;
+  size_t mostFreqCount = it->second;
+  while (it != countMap.end())
+>>>>>>> .merge-right.r17318
+  {
+    if (it->second >= mostFreqCount)
     {
-      localCount++;
-      if (localCount > count)
-      {
-        count = localCount;
-        if (localCount == 1)
-          element = sortCounts(i);
-      }
+      mostFreq = it->first;
+      mostFreqCount = it->second;
     }
+
+    ++it;
   }
-  return element;
+
+  return mostFreq;
 }
 
 /**

Modified: mlpack/tags/mlpack-1.0.11/src/mlpack/tests/cli_test.cpp
==============================================================================
--- mlpack/tags/mlpack-1.0.11/src/mlpack/tests/cli_test.cpp	(original)
+++ mlpack/tags/mlpack-1.0.11/src/mlpack/tests/cli_test.cpp	Sun Dec  7 14:31:21 2014
@@ -136,8 +136,8 @@
   // Now, if we specify this flag, it should be true.
   int argc = 2;
   char* argv[2];
-  argv[0] = strcpy(new char[strlen("programname")], "programname");
-  argv[1] = strcpy(new char[strlen("--flag_test")], "--flag_test");
+  argv[0] = strcpy(new char[strlen("programname") + 1], "programname");
+  argv[1] = strcpy(new char[strlen("--flag_test") + 1], "--flag_test");
 
   CLI::ParseCommandLine(argc, argv);
 

Modified: mlpack/tags/mlpack-1.0.11/src/mlpack/tests/decision_stump_test.cpp
==============================================================================
--- mlpack/tags/mlpack-1.0.11/src/mlpack/tests/decision_stump_test.cpp	(original)
+++ mlpack/tags/mlpack-1.0.11/src/mlpack/tests/decision_stump_test.cpp	Sun Dec  7 14:31:21 2014
@@ -21,7 +21,7 @@
  */
 #include <mlpack/core.hpp>
 #include <mlpack/methods/decision_stump/decision_stump.hpp>
- 
+
 #include <boost/test/unit_test.hpp>
 #include "old_boost_test_definitions.hpp"
 
@@ -221,9 +221,9 @@
 
   BOOST_CHECK_EQUAL(predictedLabels(0, 0), 0);
   BOOST_CHECK_EQUAL(predictedLabels(0, 1), 0);
-  BOOST_CHECK_EQUAL(predictedLabels(0, 2), 0);
-  BOOST_CHECK_EQUAL(predictedLabels(0, 3), 0);
-  BOOST_CHECK_EQUAL(predictedLabels(0, 4), 0);
+  BOOST_CHECK_EQUAL(predictedLabels(0, 2), 1);
+  BOOST_CHECK_EQUAL(predictedLabels(0, 3), 1);
+  BOOST_CHECK_EQUAL(predictedLabels(0, 4), 1);
   BOOST_CHECK_EQUAL(predictedLabels(0, 5), 1);
   BOOST_CHECK_EQUAL(predictedLabels(0, 6), 2);
   BOOST_CHECK_EQUAL(predictedLabels(0, 7), 2);

Modified: mlpack/tags/mlpack-1.0.11/src/mlpack/tests/regularized_svd_test.cpp
==============================================================================
--- mlpack/tags/mlpack-1.0.11/src/mlpack/tests/regularized_svd_test.cpp	(original)
+++ mlpack/tags/mlpack-1.0.11/src/mlpack/tests/regularized_svd_test.cpp	Sun Dec  7 14:31:21 2014
@@ -191,8 +191,15 @@
       parameters(i, j) += epsilon;
 
       // Compare numerical and backpropagation gradient values.
-      BOOST_REQUIRE_CLOSE(numGradient1, gradient1(i, j), 1e-2);
-      BOOST_REQUIRE_CLOSE(numGradient2, gradient2(i, j), 1e-2);
+      if (gradient1(i, j) == 0.0)
+        BOOST_REQUIRE_SMALL(numGradient1, 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(numGradient1, gradient1(i, j), 1e-2);
+
+      if (gradient2(i, j) == 0.0)
+        BOOST_REQUIRE_SMALL(numGradient2, 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(numGradient2, gradient2(i, j), 1e-2);
     }
   }
 }

Modified: mlpack/tags/mlpack-1.0.11/src/mlpack/tests/sparse_coding_test.cpp
==============================================================================
--- mlpack/tags/mlpack-1.0.11/src/mlpack/tests/sparse_coding_test.cpp	(original)
+++ mlpack/tags/mlpack-1.0.11/src/mlpack/tests/sparse_coding_test.cpp	Sun Dec  7 14:31:21 2014
@@ -118,7 +118,7 @@
 
 BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
 {
-  const double tol = 2e-7;
+  const double tol = 1e-6;
 
   double lambda1 = 0.1;
   uword nAtoms = 25;
@@ -138,7 +138,7 @@
   mat Z = sc.Codes();
 
   uvec adjacencies = find(Z);
-  double normGradient = sc.OptimizeDictionary(adjacencies, 1e-12);
+  double normGradient = sc.OptimizeDictionary(adjacencies, 1e-15);
 
   BOOST_REQUIRE_SMALL(normGradient, tol);
 }



More information about the mlpack-svn mailing list