[mlpack-git] master: Hold a pointer to the dataset info, and serialize accordingly. This is a bit hackish. (929f86a)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:43:37 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 929f86a55f6f76d6e2b52651b4f28f43d881f7e3
Author: ryan <ryan at ratml.org>
Date:   Thu Oct 1 19:11:45 2015 -0400

    Hold a pointer to the dataset info, and serialize accordingly.
    This is a bit hackish.


>---------------------------------------------------------------

929f86a55f6f76d6e2b52651b4f28f43d881f7e3
 .../methods/hoeffding_trees/hoeffding_split.hpp      |  2 +-
 .../methods/hoeffding_trees/hoeffding_split_impl.hpp | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
index 5af29ae..c499154 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
@@ -75,7 +75,7 @@ class HoeffdingSplit
   size_t numSamples;
   size_t numClasses;
   size_t maxSamples;
-  const data::DatasetInfo& datasetInfo;
+  data::DatasetInfo* datasetInfo;
   double successProbability;
 
   // And we need to keep some information for after we have split.
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index f124f35..cdd8ff9 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -30,7 +30,7 @@ HoeffdingSplit<
     numSamples(0),
     numClasses(numClasses),
     maxSamples(maxSamples),
-    datasetInfo(datasetInfo),
+    datasetInfo(const_cast<data::DatasetInfo*>(&datasetInfo)),
     successProbability(successProbability),
     splitDimension(size_t(-1)),
     categoricalSplit(0),
@@ -100,9 +100,9 @@ void HoeffdingSplit<
     size_t categoricalIndex = 0;
     for (size_t i = 0; i < point.n_rows; ++i)
     {
-      if (datasetInfo.Type(i) == data::Datatype::categorical)
+      if (datasetInfo->Type(i) == data::Datatype::categorical)
         categoricalSplits[categoricalIndex++].Train(point[i], label);
-      else if (datasetInfo.Type(i) == data::Datatype::numeric)
+      else if (datasetInfo->Type(i) == data::Datatype::numeric)
         numericSplits[numericIndex++].Train(point[i], label);
     }
   }
@@ -168,11 +168,11 @@ size_t HoeffdingSplit<
   {
     // Split!
     splitDimension = largestIndex;
-    if (datasetInfo.Type(largestIndex) == data::Datatype::categorical)
+    if (datasetInfo->Type(largestIndex) == data::Datatype::categorical)
     {
       // I don't know if this should be here.
       majorityClass = categoricalSplits[largestIndex].MajorityClass();
-      return datasetInfo.NumMappings(largestIndex);
+      return datasetInfo->NumMappings(largestIndex);
     }
     else
     {
@@ -238,9 +238,9 @@ size_t HoeffdingSplit<
 >::CalculateDirection(const VecType& point) const
 {
   // Don't call this before the node is split...
-  if (datasetInfo.Type(splitDimension) == data::Datatype::numeric)
+  if (datasetInfo->Type(splitDimension) == data::Datatype::numeric)
     return numericSplit.CalculateDirection(point[splitDimension]);
-  else if (datasetInfo.Type(splitDimension) == data::Datatype::categorical)
+  else if (datasetInfo->Type(splitDimension) == data::Datatype::categorical)
     return categoricalSplit.CalculateDirection(point[splitDimension]);
   else
     return 0; // Not sure what to do here...
@@ -320,9 +320,8 @@ void HoeffdingSplit<
 
   ar & CreateNVP(splitDimension, "splitDimension");
   ar & CreateNVP(dimensionMappings, "dimensionMappings");
-  // What to do here about ownership...?
-  if (Archive::is_loading::value)
-    ownsMappings = true;
+  ar & CreateNVP(ownsMappings, "ownsMappings");
+  ar & CreateNVP(datasetInfo, "datasetInfo");
 
   // Depending on whether or not we have split yet, we may need to save
   // different things.
@@ -368,6 +367,7 @@ void HoeffdingSplit<
         ar & CreateNVP(numericSplits[i], name.str());
       }
 
+
       splits = categoricalSplits.size();
       ar & CreateNVP(splits, "numCategoricalSplits");
       for (size_t i = 0; i < categoricalSplits.size(); ++i)



More information about the mlpack-git mailing list