[mlpack-git] master: Fix bad memory handling and write a test for it. (3f79fab)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Fri Jan 1 13:30:52 EST 2016


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/095844bd92e14d4a17c134b0b088dcf488d092fc...3f79fab92a40bb95a8bbd3eec13e8a3dfbd40d80

>---------------------------------------------------------------

commit 3f79fab92a40bb95a8bbd3eec13e8a3dfbd40d80
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Jan 1 10:30:28 2016 -0800

    Fix bad memory handling and write a test for it.


>---------------------------------------------------------------

3f79fab92a40bb95a8bbd3eec13e8a3dfbd40d80
 .../hoeffding_trees/hoeffding_tree_impl.hpp        | 28 ++++++++++-
 src/mlpack/tests/hoeffding_tree_test.cpp           | 58 ++++++++++++++++++++++
 2 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
index 3a6f52b..243ac0d 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
@@ -183,6 +183,8 @@ HoeffdingTree<FitnessFunction, NumericSplitType, CategoricalSplitType>::
     delete dimensionMappings;
   if (ownsInfo)
     delete datasetInfo;
+  for (size_t i = 0; i < children.size(); ++i)
+    delete children[i];
 }
 
 //! Train on a set of points.
@@ -667,8 +669,12 @@ void HoeffdingTree<
   using data::CreateNVP;
 
   ar & CreateNVP(splitDimension, "splitDimension");
+
+  // Clear memory for the mappings if necessary.
+  if (Archive::is_loading::value && ownsMappings && dimensionMappings)
+    delete dimensionMappings;
+
   ar & CreateNVP(dimensionMappings, "dimensionMappings");
-  ar & CreateNVP(ownsMappings, "ownsMappings");
 
   // Special handling for const object.
   data::DatasetInfo* d = NULL;
@@ -677,8 +683,17 @@ void HoeffdingTree<
   ar & CreateNVP(d, "datasetInfo");
   if (Archive::is_loading::value)
   {
+    if (datasetInfo && ownsInfo)
+      delete datasetInfo;
+
     datasetInfo = d;
     ownsInfo = true;
+    ownsMappings = true; // We also own the mappings we loaded.
+
+    // Clear the children.
+    for (size_t i = 0; i < children.size(); ++i)
+      delete children[i];
+    children.clear();
   }
 
   ar & CreateNVP(majorityClass, "majorityClass");
@@ -752,13 +767,22 @@ void HoeffdingTree<
       numChildren = children.size();
     ar & CreateNVP(numChildren, "numChildren");
     if (Archive::is_loading::value) // If needed, allocate space.
-      children.resize(numChildren, new HoeffdingTree(data::DatasetInfo(0), 0));
+    {
+      children.resize(numChildren, NULL);
+      for (size_t i = 0; i < numChildren; ++i)
+        children[i] = new HoeffdingTree(data::DatasetInfo(0), 0);
+    }
 
     for (size_t i = 0; i < numChildren; ++i)
     {
       std::ostringstream name;
       name << "child" << i;
       ar & data::CreateNVP(*children[i], name.str());
+
+      // The child doesn't actually own its own DatasetInfo.  We do.  The same
+      // applies for the dimension mappings.
+      children[i]->ownsInfo = false;
+      children[i]->ownsMappings = false;
     }
 
     if (Archive::is_loading::value)
diff --git a/src/mlpack/tests/hoeffding_tree_test.cpp b/src/mlpack/tests/hoeffding_tree_test.cpp
index ef7c146..367f712 100644
--- a/src/mlpack/tests/hoeffding_tree_test.cpp
+++ b/src/mlpack/tests/hoeffding_tree_test.cpp
@@ -13,6 +13,7 @@
 
 #include <boost/test/unit_test.hpp>
 #include "old_boost_test_definitions.hpp"
+#include "serialization.hpp"
 
 #include <stack>
 
@@ -1094,4 +1095,61 @@ BOOST_AUTO_TEST_CASE(ParameterChangeTest)
   }
 }
 
+BOOST_AUTO_TEST_CASE(MultipleSerializationTest)
+{
+  // Generate data.
+  arma::mat dataset(4, 9000);
+  arma::Row<size_t> labels(9000);
+  data::DatasetInfo info(4); // All features are numeric, except the fourth.
+  info.MapString("0", 3);
+  for (size_t i = 0; i < 9000; i += 3)
+  {
+    dataset(0, i) = mlpack::math::Random();
+    dataset(1, i) = mlpack::math::Random();
+    dataset(2, i) = mlpack::math::Random();
+    dataset(3, i) = 0.0;
+    labels[i] = 0;
+
+    dataset(0, i + 1) = mlpack::math::Random();
+    dataset(1, i + 1) = mlpack::math::Random() - 1.0;
+    dataset(2, i + 1) = mlpack::math::Random() + 0.5;
+    dataset(3, i + 1) = 0.0;
+    labels[i + 1] = 2;
+
+    dataset(0, i + 2) = mlpack::math::Random();
+    dataset(1, i + 2) = mlpack::math::Random() + 1.0;
+    dataset(2, i + 2) = mlpack::math::Random() + 0.8;
+    dataset(3, i + 2) = 0.0;
+    labels[i + 2] = 1;
+  }
+
+  // Batch training will give a tree with many labels.
+  HoeffdingTree<> deepTree(dataset, info, labels, 3, true);
+  // Streaming training will not.
+  HoeffdingTree<> shallowTree(dataset, info, labels, 3, false);
+
+  // Now serialize the shallow tree into the deep tree.
+  std::ostringstream oss;
+  {
+    boost::archive::binary_oarchive boa(oss);
+    boa << data::CreateNVP(shallowTree, "streamingDecisionTree");
+  }
+
+  std::istringstream iss(oss.str());
+  {
+    boost::archive::binary_iarchive bia(iss);
+    bia >> data::CreateNVP(deepTree, "streamingDecisionTree");
+  }
+
+  // Now do some classification and make sure the results are the same.
+  arma::Row<size_t> deepPredictions, shallowPredictions;
+  shallowTree.Classify(dataset, shallowPredictions);
+  deepTree.Classify(dataset, deepPredictions);
+
+  for (size_t i = 0; i < deepPredictions.n_elem; ++i)
+  {
+    BOOST_REQUIRE_EQUAL(shallowPredictions[i], deepPredictions[i]);
+  }
+}
+
 BOOST_AUTO_TEST_SUITE_END();



More information about the mlpack-git mailing list