[mlpack-git] master: Smarter serialization; modest size decrease in output. (5d89077)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:44:33 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 5d89077064cb1889bfca7c26ad4c830120bee382
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat Oct 17 11:34:19 2015 -0400

    Smarter serialization; modest size decrease in output.


>---------------------------------------------------------------

5d89077064cb1889bfca7c26ad4c830120bee382
 .../hoeffding_trees/hoeffding_split_impl.hpp       | 68 +++++++++-------------
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 7f8306f..c7df1fb 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -354,56 +354,46 @@ void HoeffdingSplit<
     ar & CreateNVP(maxSamples, "maxSamples");
     ar & CreateNVP(successProbability, "successProbability");
 
-    // This is hackish for now...
+    // Serialize the splits, but not if we haven't seen any samples yet (in
+    // which case we can just reinitialize).
     if (Archive::is_loading::value)
     {
-      size_t numNumeric;
-      ar & CreateNVP(numNumeric, "numNumericSplits");
-      numericSplits.resize(numNumeric, NumericSplitType(numClasses));
-      for (size_t i = 0; i < numNumeric; ++i)
+      // Re-initialize all of the splits.
+      numericSplits.clear();
+      categoricalSplits.clear();
+      for (size_t i = 0; i < datasetInfo->Dimensionality(); ++i)
       {
-        std::ostringstream name;
-        name << "numericSplit" << i;
-        ar & CreateNVP(numericSplits[i], name.str());
+        if (datasetInfo->Type(i) == data::Datatype::categorical)
+          categoricalSplits.push_back(CategoricalSplitType(
+              datasetInfo->NumMappings(i), numClasses));
+        else
+          numericSplits.push_back(NumericSplitType(numClasses));
       }
 
-      size_t numCategorical;
-      ar & CreateNVP(numCategorical, "numCategoricalSplits");
-      categoricalSplits.resize(numCategorical, CategoricalSplitType(1, 1));
-      for (size_t i = 0; i < numCategorical; ++i)
-      {
-        std::ostringstream name;
-        name << "categoricalSplit" << i;
-        ar & CreateNVP(categoricalSplits[i], name.str());
-      }
+      // Clear things we don't need.
+      categoricalSplit = typename CategoricalSplitType::SplitInfo(numClasses);
+      numericSplit = typename NumericSplitType::SplitInfo();
     }
-    else
-    {
-      size_t splits = numericSplits.size();
-      ar & CreateNVP(splits, "numNumericSplits");
-      for (size_t i = 0; i < numericSplits.size(); ++i)
-      {
-        std::ostringstream name;
-        name << "numericSplit" << i;
-        ar & CreateNVP(numericSplits[i], name.str());
-      }
 
+    // There's no need to serialize if there's no information contained in the
+    // splits.
+    if (numSamples == 0)
+      return;
 
-      splits = categoricalSplits.size();
-      ar & CreateNVP(splits, "numCategoricalSplits");
-      for (size_t i = 0; i < categoricalSplits.size(); ++i)
-      {
-        std::ostringstream name;
-        name << "categoricalSplit" << i;
-        ar & CreateNVP(categoricalSplits[i], name.str());
-      }
+    // Serialize numeric splits.
+    for (size_t i = 0; i < numericSplits.size(); ++i)
+    {
+      std::ostringstream name;
+      name << "numericSplit" << i;
+      ar & CreateNVP(numericSplits[i], name.str());
     }
 
-    if (Archive::is_loading::value)
+    // Serialize categorical splits.
+    for (size_t i = 0; i < categoricalSplits.size(); ++i)
     {
-      // Clear things we don't need.
-      categoricalSplit = typename CategoricalSplitType::SplitInfo(numClasses);
-      numericSplit = typename NumericSplitType::SplitInfo();
+      std::ostringstream name;
+      name << "categoricalSplit" << i;
+      ar & CreateNVP(categoricalSplits[i], name.str());
     }
   }
   else



More information about the mlpack-git mailing list