[mlpack-git] master: Hold a pointer to the dataset info, and serialize accordingly. This is a bit hackish. (929f86a)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:43:37 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 929f86a55f6f76d6e2b52651b4f28f43d881f7e3
Author: ryan <ryan at ratml.org>
Date: Thu Oct 1 19:11:45 2015 -0400
Hold a pointer to the dataset info, and serialize accordingly.
This is a bit hackish.
>---------------------------------------------------------------
929f86a55f6f76d6e2b52651b4f28f43d881f7e3
.../methods/hoeffding_trees/hoeffding_split.hpp | 2 +-
.../methods/hoeffding_trees/hoeffding_split_impl.hpp | 20 ++++++++++----------
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
index 5af29ae..c499154 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
@@ -75,7 +75,7 @@ class HoeffdingSplit
size_t numSamples;
size_t numClasses;
size_t maxSamples;
- const data::DatasetInfo& datasetInfo;
+ data::DatasetInfo* datasetInfo;
double successProbability;
// And we need to keep some information for after we have split.
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index f124f35..cdd8ff9 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -30,7 +30,7 @@ HoeffdingSplit<
numSamples(0),
numClasses(numClasses),
maxSamples(maxSamples),
- datasetInfo(datasetInfo),
+ datasetInfo(const_cast<data::DatasetInfo*>(&datasetInfo)),
successProbability(successProbability),
splitDimension(size_t(-1)),
categoricalSplit(0),
@@ -100,9 +100,9 @@ void HoeffdingSplit<
size_t categoricalIndex = 0;
for (size_t i = 0; i < point.n_rows; ++i)
{
- if (datasetInfo.Type(i) == data::Datatype::categorical)
+ if (datasetInfo->Type(i) == data::Datatype::categorical)
categoricalSplits[categoricalIndex++].Train(point[i], label);
- else if (datasetInfo.Type(i) == data::Datatype::numeric)
+ else if (datasetInfo->Type(i) == data::Datatype::numeric)
numericSplits[numericIndex++].Train(point[i], label);
}
}
@@ -168,11 +168,11 @@ size_t HoeffdingSplit<
{
// Split!
splitDimension = largestIndex;
- if (datasetInfo.Type(largestIndex) == data::Datatype::categorical)
+ if (datasetInfo->Type(largestIndex) == data::Datatype::categorical)
{
// I don't know if this should be here.
majorityClass = categoricalSplits[largestIndex].MajorityClass();
- return datasetInfo.NumMappings(largestIndex);
+ return datasetInfo->NumMappings(largestIndex);
}
else
{
@@ -238,9 +238,9 @@ size_t HoeffdingSplit<
>::CalculateDirection(const VecType& point) const
{
// Don't call this before the node is split...
- if (datasetInfo.Type(splitDimension) == data::Datatype::numeric)
+ if (datasetInfo->Type(splitDimension) == data::Datatype::numeric)
return numericSplit.CalculateDirection(point[splitDimension]);
- else if (datasetInfo.Type(splitDimension) == data::Datatype::categorical)
+ else if (datasetInfo->Type(splitDimension) == data::Datatype::categorical)
return categoricalSplit.CalculateDirection(point[splitDimension]);
else
return 0; // Not sure what to do here...
@@ -320,9 +320,8 @@ void HoeffdingSplit<
ar & CreateNVP(splitDimension, "splitDimension");
ar & CreateNVP(dimensionMappings, "dimensionMappings");
- // What to do here about ownership...?
- if (Archive::is_loading::value)
- ownsMappings = true;
+ ar & CreateNVP(ownsMappings, "ownsMappings");
+ ar & CreateNVP(datasetInfo, "datasetInfo");
// Depending on whether or not we have split yet, we may need to save
// different things.
@@ -368,6 +367,7 @@ void HoeffdingSplit<
ar & CreateNVP(numericSplits[i], name.str());
}
+
splits = categoricalSplits.size();
ar & CreateNVP(splits, "numCategoricalSplits");
for (size_t i = 0; i < categoricalSplits.size(); ++i)
More information about the mlpack-git
mailing list