[mlpack-svn] r13240 - mlpack/trunk/src/mlpack/methods/det
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jul 16 13:23:12 EDT 2012
Author: rcurtin
Date: 2012-07-16 13:23:11 -0400 (Mon, 16 Jul 2012)
New Revision: 13240
Modified:
mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
mlpack/trunk/src/mlpack/methods/det/dtree.hpp
mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
Log:
Further switching from cT to double. Later we will switch to all log error.
Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp 2012-07-16 16:07:04 UTC (rev 13239)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp 2012-07-16 17:23:11 UTC (rev 13240)
@@ -128,8 +128,8 @@
arma::Mat<eT>* new_dataset = new arma::Mat<eT>(*dataset);
// Growing the tree
- long double old_alpha = 0.0;
- long double alpha = dtree->Grow(new_dataset, &old_from_new, useVolumeReg,
+ double old_alpha = 0.0;
+ double alpha = dtree->Grow(*new_dataset, old_from_new, useVolumeReg,
maxLeafSize, minLeafSize);
delete new_dataset;
@@ -160,10 +160,10 @@
}
// Sequentially prune and save the alpha values and the values of c_t^2 * r_t.
- std::vector<std::pair<long double, long double> > pruned_sequence;
+ std::vector<std::pair<double, long double> > pruned_sequence;
while (dtree->subtree_leaves() > 1)
{
- std::pair<long double, long double> tree_seq(old_alpha,
+ std::pair<double, long double> tree_seq(old_alpha,
-1.0 * dtree->subtree_leaves_error());
pruned_sequence.push_back(tree_seq);
old_alpha = alpha;
@@ -230,12 +230,12 @@
// Grow the tree.
old_alpha = 0.0;
- alpha = dtree_cv->Grow(train, &old_from_new_cv, useVolumeReg, maxLeafSize,
+ alpha = dtree_cv->Grow(*train, old_from_new_cv, useVolumeReg, maxLeafSize,
minLeafSize);
// Sequentially prune with all the values of available alphas and adding
// values for test values.
- std::vector<std::pair<long double, long double> >::iterator it;
+ std::vector<std::pair<double, long double> >::iterator it;
for (it = pruned_sequence.begin(); it < pruned_sequence.end() -2; ++it)
{
// Compute test values for this state of the tree.
@@ -275,7 +275,7 @@
long double optimal_alpha = -1.0;
long double best_cv_error = numeric_limits<long double>::max();
- std::vector<std::pair<long double, long double> >::iterator it;
+ std::vector<std::pair<double, long double> >::iterator it;
for (it = pruned_sequence.begin(); it < pruned_sequence.end() -1; ++it)
{
@@ -300,7 +300,7 @@
// Grow the tree.
old_alpha = 0.0;
- alpha = dtree_opt->Grow(new_dataset, &old_from_new, useVolumeReg, maxLeafSize,
+ alpha = dtree_opt->Grow(*new_dataset, old_from_new, useVolumeReg, maxLeafSize,
minLeafSize);
// Prune with optimal alpha.
Modified: mlpack/trunk/src/mlpack/methods/det/dtree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.hpp 2012-07-16 16:07:04 UTC (rev 13239)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.hpp 2012-07-16 17:23:11 UTC (rev 13240)
@@ -179,29 +179,29 @@
DTree(arma::mat& data);
// Non-root node initializers
- DTree(const arma::vec& max_vals,
- const arma::vec& min_vals,
+ DTree(const arma::vec& maxVals,
+ const arma::vec& minVals,
const size_t start,
const size_t end,
const double error);
- DTree(const arma::vec& max_vals,
- const arma::vec& min_vals,
- size_t total_points,
- size_t start,
- size_t end);
+ DTree(const arma::vec& maxVals,
+ const arma::vec& minVals,
+ const size_t totalPoints,
+ const size_t start,
+ const size_t end);
~DTree();
// Greedily expand the tree
- cT Grow(MatType* data,
- arma::Col<size_t> *old_from_new,
- bool useVolReg = false,
- size_t maxLeafSize = 10,
- size_t minLeafSize = 5);
+ double Grow(arma::mat& data,
+ arma::Col<size_t>& oldFromNew,
+ const bool useVolReg = false,
+ const size_t maxLeafSize = 10,
+ const size_t minLeafSize = 5);
// perform alpha pruning on the tree
- cT PruneAndUpdate(cT old_alpha, bool useVolReg = false);
+ double PruneAndUpdate(const double old_alpha, const bool useVolReg = false);
// compute the density at a given point
cT ComputeValue(VecType* query);
Modified: mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp 2012-07-16 16:07:04 UTC (rev 13239)
+++ mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp 2012-07-16 17:23:11 UTC (rev 13240)
@@ -203,10 +203,10 @@
const size_t totalPoints) :
start_(0),
end_(totalPoints),
- root_(true),
maxVals(maxVals),
minVals(minVals),
error_(-std::exp(LogNegativeError(totalPoints))),
+ root_(true),
bucket_tag_(-1),
left_(NULL),
right_(NULL)
@@ -293,45 +293,44 @@
// Greedily expand the tree
template<typename eT, typename cT>
-cT DTree<eT, cT>::Grow(MatType* data,
- arma::Col<size_t> *old_from_new,
- bool useVolReg,
- size_t maxLeafSize,
- size_t minLeafSize)
+double DTree<eT, cT>::Grow(arma::mat& data,
+ arma::Col<size_t>& oldFromNew,
+ const bool useVolReg,
+ const size_t maxLeafSize,
+ const size_t minLeafSize)
{
- assert(data->n_rows == maxVals.n_elem);
- assert(data->n_rows == minVals.n_elem);
+ assert(data.n_rows == maxVals.n_elem);
+ assert(data.n_rows == minVals.n_elem);
- cT left_g, right_g;
+ double leftG, rightG;
// Compute points ratio.
- ratio_ = (cT) (end_ - start_) / (cT) old_from_new->n_elem;
+ ratio_ = (double) (end_ - start_) / (double) oldFromNew.n_elem;
- // Compute the v_t_inv: the inverse of the volume of the node.
- cT log_vol_t = 0;
+ // Compute the v_t_inv: the inverse of the volume of the node. We use log to
+ // prevent overflow.
+ double logVol = 0;
for (size_t i = 0; i < maxVals.n_elem; ++i)
if (maxVals[i] - minVals[i] > 0.0)
- // Use log to prevent overflow.
- log_vol_t += (cT) std::log(maxVals[i] - minVals[i]);
+ logVol += std::log(maxVals[i] - minVals[i]);
// Check for overflow.
- assert(std::exp(log_vol_t) > 0.0);
- v_t_inv_ = 1.0 / std::exp(log_vol_t);
+ assert(std::exp(logVol) > 0.0);
+ v_t_inv_ = 1.0 / std::exp(logVol);
- // Check if node is large enough.
+ // Check if node is large enough to split.
if ((size_t) (end_ - start_) > maxLeafSize) {
// Find the split.
size_t dim;
double splitValue;
- double left_error, right_error;
- if (FindSplit(*data, dim, splitValue, left_error, right_error, maxLeafSize,
+ double leftError, rightError;
+ if (FindSplit(data, dim, splitValue, leftError, rightError, maxLeafSize,
minLeafSize))
{
// Move the data around for the children to have points in a node lie
// contiguously (to increase efficiency during the training).
- const size_t splitIndex = SplitData(*data, dim, splitValue,
- *old_from_new);
+ const size_t splitIndex = SplitData(data, dim, splitValue, oldFromNew);
// Make max and min vals for the children.
arma::vec max_vals_l(maxVals);
@@ -347,12 +346,12 @@
split_dim_ = dim;
// Recursively grow the children.
- left_ = new DTree(max_vals_l, min_vals_l, start_, splitIndex, left_error);
- right_ = new DTree(max_vals_r, min_vals_r, splitIndex, end_, right_error);
+ left_ = new DTree(max_vals_l, min_vals_l, start_, splitIndex, leftError);
+ right_ = new DTree(max_vals_r, min_vals_r, splitIndex, end_, rightError);
- left_g = left_->Grow(data, old_from_new, useVolReg, maxLeafSize,
+ leftG = left_->Grow(data, oldFromNew, useVolReg, maxLeafSize,
minLeafSize);
- right_g = right_->Grow(data, old_from_new, useVolReg, maxLeafSize,
+ rightG = right_->Grow(data, oldFromNew, useVolReg, maxLeafSize,
minLeafSize);
// Store values of R(T~) and |T~|.
@@ -363,23 +362,6 @@
// Store the subtree_leaves_v_t_inv.
subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv() +
right_->subtree_leaves_v_t_inv();
-
- // Form T1 by removing leaves for which R(t) = R(t_L) + R(t_R).
- if ((left_->subtree_leaves() == 1) && (right_->subtree_leaves() == 1))
- {
- if (left_->error() + right_->error() == error_)
- {
- delete left_;
- left_ = NULL;
-
- delete right_;
- right_ = NULL;
-
- subtree_leaves_ = 1;
- subtree_leaves_error_ = error_;
- subtree_leaves_v_t_inv_ = v_t_inv_;
- }
- }
}
else
{
@@ -399,14 +381,15 @@
}
// If this is a leaf, do not compute g_k(t); otherwise compute, store, and
- // propagate min(g_k(t_L),g_k(t_R),g_k(t)), unless t_L and/or t_R are leaves.
+ // propagate min(g_k(t_L), g_k(t_R), g_k(t)), unless t_L and/or t_R are
+ // leaves.
if (subtree_leaves_ == 1)
{
- return std::numeric_limits<cT>::max();
+ return std::numeric_limits<double>::max();
}
else
{
- cT g_t;
+ double g_t;
if (useVolReg)
g_t = (error_ - subtree_leaves_error_) /
(subtree_leaves_v_t_inv_ - v_t_inv_);
@@ -414,44 +397,46 @@
g_t = (error_ - subtree_leaves_error_) / (subtree_leaves_ - 1);
assert(g_t > 0.0);
- return min(g_t, min(left_g, right_g));
+ return min(g_t, min(leftG, rightG));
}
- // We need to compute (c_t^2)*r_t for all subtree leaves; this is equal to
+ // We need to compute (c_t^2) * r_t for all subtree leaves; this is equal to
// n_t ^ 2 / r_t * n ^ 2 = -error_. Therefore the value we need is actually
// -1.0 * subtree_leaves_error_.
}
template<typename eT, typename cT>
-cT DTree<eT, cT>::PruneAndUpdate(cT old_alpha, bool useVolReg)
+double DTree<eT, cT>::PruneAndUpdate(const double oldAlpha,
+ const bool useVolReg)
{
// Compute g_t.
if (subtree_leaves_ == 1) // If we are a leaf...
{
- return std::numeric_limits<cT>::max();
+ return std::numeric_limits<double>::max();
}
else
{
// Compute g_t value for node t.
- cT g_t;
+ double g_t;
if (useVolReg)
g_t = (error_ - subtree_leaves_error_) /
(subtree_leaves_v_t_inv_ - v_t_inv_);
else
g_t = (error_ - subtree_leaves_error_) / (subtree_leaves_ - 1);
- if (g_t > old_alpha)
+ if (g_t > oldAlpha)
{
// Go down the tree and update accordingly. Traverse the children.
- cT left_g = left_->PruneAndUpdate(old_alpha, useVolReg);
- cT right_g = right_->PruneAndUpdate(old_alpha, useVolReg);
+ double left_g = left_->PruneAndUpdate(oldAlpha, useVolReg);
+ double right_g = right_->PruneAndUpdate(oldAlpha, useVolReg);
// Update values.
subtree_leaves_ = left_->subtree_leaves() + right_->subtree_leaves();
subtree_leaves_error_ = left_->subtree_leaves_error() +
right_->subtree_leaves_error();
- subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv() + right_->subtree_leaves_v_t_inv();
+ subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv() +
+ right_->subtree_leaves_v_t_inv();
// Update g_t value.
if (useVolReg)
@@ -460,7 +445,7 @@
else
g_t = (error_ - subtree_leaves_error_) / (subtree_leaves_ - 1);
- assert(g_t < std::numeric_limits<cT>::max());
+ assert(g_t < std::numeric_limits<double>::max());
if (left_->subtree_leaves() == 1 && right_->subtree_leaves() == 1)
return g_t;
@@ -470,7 +455,6 @@
return min(g_t, left_g);
else
return min(g_t, min(left_g, right_g));
-
}
else
{
@@ -486,8 +470,7 @@
right_ = NULL;
// Pass information upward.
- return std::numeric_limits<cT>::max();
-
+ return std::numeric_limits<double>::max();
}
}
}
More information about the mlpack-svn
mailing list