[mlpack-git] master: Smarter serialization; modest size decrease in output. (5d89077)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:44:33 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 5d89077064cb1889bfca7c26ad4c830120bee382
Author: Ryan Curtin <ryan at ratml.org>
Date: Sat Oct 17 11:34:19 2015 -0400
Smarter serialization; modest size decrease in output.
>---------------------------------------------------------------
5d89077064cb1889bfca7c26ad4c830120bee382
.../hoeffding_trees/hoeffding_split_impl.hpp | 68 +++++++++-------------
1 file changed, 29 insertions(+), 39 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 7f8306f..c7df1fb 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -354,56 +354,46 @@ void HoeffdingSplit<
ar & CreateNVP(maxSamples, "maxSamples");
ar & CreateNVP(successProbability, "successProbability");
- // This is hackish for now...
+ // Serialize the splits, but not if we haven't seen any samples yet (in
+ // which case we can just reinitialize).
if (Archive::is_loading::value)
{
- size_t numNumeric;
- ar & CreateNVP(numNumeric, "numNumericSplits");
- numericSplits.resize(numNumeric, NumericSplitType(numClasses));
- for (size_t i = 0; i < numNumeric; ++i)
+ // Re-initialize all of the splits.
+ numericSplits.clear();
+ categoricalSplits.clear();
+ for (size_t i = 0; i < datasetInfo->Dimensionality(); ++i)
{
- std::ostringstream name;
- name << "numericSplit" << i;
- ar & CreateNVP(numericSplits[i], name.str());
+ if (datasetInfo->Type(i) == data::Datatype::categorical)
+ categoricalSplits.push_back(CategoricalSplitType(
+ datasetInfo->NumMappings(i), numClasses));
+ else
+ numericSplits.push_back(NumericSplitType(numClasses));
}
- size_t numCategorical;
- ar & CreateNVP(numCategorical, "numCategoricalSplits");
- categoricalSplits.resize(numCategorical, CategoricalSplitType(1, 1));
- for (size_t i = 0; i < numCategorical; ++i)
- {
- std::ostringstream name;
- name << "categoricalSplit" << i;
- ar & CreateNVP(categoricalSplits[i], name.str());
- }
+ // Clear things we don't need.
+ categoricalSplit = typename CategoricalSplitType::SplitInfo(numClasses);
+ numericSplit = typename NumericSplitType::SplitInfo();
}
- else
- {
- size_t splits = numericSplits.size();
- ar & CreateNVP(splits, "numNumericSplits");
- for (size_t i = 0; i < numericSplits.size(); ++i)
- {
- std::ostringstream name;
- name << "numericSplit" << i;
- ar & CreateNVP(numericSplits[i], name.str());
- }
+ // There's no need to serialize if there's no information contained in the
+ // splits.
+ if (numSamples == 0)
+ return;
- splits = categoricalSplits.size();
- ar & CreateNVP(splits, "numCategoricalSplits");
- for (size_t i = 0; i < categoricalSplits.size(); ++i)
- {
- std::ostringstream name;
- name << "categoricalSplit" << i;
- ar & CreateNVP(categoricalSplits[i], name.str());
- }
+ // Serialize numeric splits.
+ for (size_t i = 0; i < numericSplits.size(); ++i)
+ {
+ std::ostringstream name;
+ name << "numericSplit" << i;
+ ar & CreateNVP(numericSplits[i], name.str());
}
- if (Archive::is_loading::value)
+ // Serialize categorical splits.
+ for (size_t i = 0; i < categoricalSplits.size(); ++i)
{
- // Clear things we don't need.
- categoricalSplit = typename CategoricalSplitType::SplitInfo(numClasses);
- numericSplit = typename NumericSplitType::SplitInfo();
+ std::ostringstream name;
+ name << "categoricalSplit" << i;
+ ar & CreateNVP(categoricalSplits[i], name.str());
}
}
else
More information about the mlpack-git
mailing list