[mlpack-git] master: Fix bad memory handling and write a test for it. (3f79fab)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Fri Jan 1 13:30:52 EST 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/095844bd92e14d4a17c134b0b088dcf488d092fc...3f79fab92a40bb95a8bbd3eec13e8a3dfbd40d80
>---------------------------------------------------------------
commit 3f79fab92a40bb95a8bbd3eec13e8a3dfbd40d80
Author: Ryan Curtin <ryan at ratml.org>
Date: Fri Jan 1 10:30:28 2016 -0800
Fix bad memory handling and write a test for it.
>---------------------------------------------------------------
3f79fab92a40bb95a8bbd3eec13e8a3dfbd40d80
.../hoeffding_trees/hoeffding_tree_impl.hpp | 28 ++++++++++-
src/mlpack/tests/hoeffding_tree_test.cpp | 58 ++++++++++++++++++++++
2 files changed, 84 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
index 3a6f52b..243ac0d 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
@@ -183,6 +183,8 @@ HoeffdingTree<FitnessFunction, NumericSplitType, CategoricalSplitType>::
delete dimensionMappings;
if (ownsInfo)
delete datasetInfo;
+ for (size_t i = 0; i < children.size(); ++i)
+ delete children[i];
}
//! Train on a set of points.
@@ -667,8 +669,12 @@ void HoeffdingTree<
using data::CreateNVP;
ar & CreateNVP(splitDimension, "splitDimension");
+
+ // Clear memory for the mappings if necessary.
+ if (Archive::is_loading::value && ownsMappings && dimensionMappings)
+ delete dimensionMappings;
+
ar & CreateNVP(dimensionMappings, "dimensionMappings");
- ar & CreateNVP(ownsMappings, "ownsMappings");
// Special handling for const object.
data::DatasetInfo* d = NULL;
@@ -677,8 +683,17 @@ void HoeffdingTree<
ar & CreateNVP(d, "datasetInfo");
if (Archive::is_loading::value)
{
+ if (datasetInfo && ownsInfo)
+ delete datasetInfo;
+
datasetInfo = d;
ownsInfo = true;
+ ownsMappings = true; // We also own the mappings we loaded.
+
+ // Clear the children.
+ for (size_t i = 0; i < children.size(); ++i)
+ delete children[i];
+ children.clear();
}
ar & CreateNVP(majorityClass, "majorityClass");
@@ -752,13 +767,22 @@ void HoeffdingTree<
numChildren = children.size();
ar & CreateNVP(numChildren, "numChildren");
if (Archive::is_loading::value) // If needed, allocate space.
- children.resize(numChildren, new HoeffdingTree(data::DatasetInfo(0), 0));
+ {
+ children.resize(numChildren, NULL);
+ for (size_t i = 0; i < numChildren; ++i)
+ children[i] = new HoeffdingTree(data::DatasetInfo(0), 0);
+ }
for (size_t i = 0; i < numChildren; ++i)
{
std::ostringstream name;
name << "child" << i;
ar & data::CreateNVP(*children[i], name.str());
+
+ // The child doesn't actually own its own DatasetInfo. We do. The same
+ // applies for the dimension mappings.
+ children[i]->ownsInfo = false;
+ children[i]->ownsMappings = false;
}
if (Archive::is_loading::value)
diff --git a/src/mlpack/tests/hoeffding_tree_test.cpp b/src/mlpack/tests/hoeffding_tree_test.cpp
index ef7c146..367f712 100644
--- a/src/mlpack/tests/hoeffding_tree_test.cpp
+++ b/src/mlpack/tests/hoeffding_tree_test.cpp
@@ -13,6 +13,7 @@
#include <boost/test/unit_test.hpp>
#include "old_boost_test_definitions.hpp"
+#include "serialization.hpp"
#include <stack>
@@ -1094,4 +1095,61 @@ BOOST_AUTO_TEST_CASE(ParameterChangeTest)
}
}
+BOOST_AUTO_TEST_CASE(MultipleSerializationTest)
+{
+ // Generate data.
+ arma::mat dataset(4, 9000);
+ arma::Row<size_t> labels(9000);
+ data::DatasetInfo info(4); // All features are numeric, except the fourth.
+ info.MapString("0", 3);
+ for (size_t i = 0; i < 9000; i += 3)
+ {
+ dataset(0, i) = mlpack::math::Random();
+ dataset(1, i) = mlpack::math::Random();
+ dataset(2, i) = mlpack::math::Random();
+ dataset(3, i) = 0.0;
+ labels[i] = 0;
+
+ dataset(0, i + 1) = mlpack::math::Random();
+ dataset(1, i + 1) = mlpack::math::Random() - 1.0;
+ dataset(2, i + 1) = mlpack::math::Random() + 0.5;
+ dataset(3, i + 1) = 0.0;
+ labels[i + 1] = 2;
+
+ dataset(0, i + 2) = mlpack::math::Random();
+ dataset(1, i + 2) = mlpack::math::Random() + 1.0;
+ dataset(2, i + 2) = mlpack::math::Random() + 0.8;
+ dataset(3, i + 2) = 0.0;
+ labels[i + 2] = 1;
+ }
+
+ // Batch training will give a tree with many labels.
+ HoeffdingTree<> deepTree(dataset, info, labels, 3, true);
+ // Streaming training will not.
+ HoeffdingTree<> shallowTree(dataset, info, labels, 3, false);
+
+ // Now serialize the shallow tree into the deep tree.
+ std::ostringstream oss;
+ {
+ boost::archive::binary_oarchive boa(oss);
+ boa << data::CreateNVP(shallowTree, "streamingDecisionTree");
+ }
+
+ std::istringstream iss(oss.str());
+ {
+ boost::archive::binary_iarchive bia(iss);
+ bia >> data::CreateNVP(deepTree, "streamingDecisionTree");
+ }
+
+ // Now do some classification and make sure the results are the same.
+ arma::Row<size_t> deepPredictions, shallowPredictions;
+ shallowTree.Classify(dataset, shallowPredictions);
+ deepTree.Classify(dataset, deepPredictions);
+
+ for (size_t i = 0; i < deepPredictions.n_elem; ++i)
+ {
+ BOOST_REQUIRE_EQUAL(shallowPredictions[i], deepPredictions[i]);
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list