[mlpack-svn] r13240 - mlpack/trunk/src/mlpack/methods/det

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jul 16 13:23:12 EDT 2012


Author: rcurtin
Date: 2012-07-16 13:23:11 -0400 (Mon, 16 Jul 2012)
New Revision: 13240

Modified:
   mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
   mlpack/trunk/src/mlpack/methods/det/dtree.hpp
   mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
Log:
Further switching from cT to double.  Later we will switch to all log error.


Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp	2012-07-16 16:07:04 UTC (rev 13239)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp	2012-07-16 17:23:11 UTC (rev 13240)
@@ -128,8 +128,8 @@
   arma::Mat<eT>* new_dataset = new arma::Mat<eT>(*dataset);
 
   // Growing the tree
-  long double old_alpha = 0.0;
-  long double alpha = dtree->Grow(new_dataset, &old_from_new, useVolumeReg,
+  double old_alpha = 0.0;
+  double alpha = dtree->Grow(*new_dataset, old_from_new, useVolumeReg,
       maxLeafSize, minLeafSize);
 
   delete new_dataset;
@@ -160,10 +160,10 @@
   }
 
   // Sequentially prune and save the alpha values and the values of c_t^2 * r_t.
-  std::vector<std::pair<long double, long double> > pruned_sequence;
+  std::vector<std::pair<double, long double> > pruned_sequence;
   while (dtree->subtree_leaves() > 1)
   {
-    std::pair<long double, long double> tree_seq(old_alpha,
+    std::pair<double, long double> tree_seq(old_alpha,
         -1.0 * dtree->subtree_leaves_error());
     pruned_sequence.push_back(tree_seq);
     old_alpha = alpha;
@@ -230,12 +230,12 @@
 
     // Grow the tree.
     old_alpha = 0.0;
-    alpha = dtree_cv->Grow(train, &old_from_new_cv, useVolumeReg, maxLeafSize,
+    alpha = dtree_cv->Grow(*train, old_from_new_cv, useVolumeReg, maxLeafSize,
         minLeafSize);
 
     // Sequentially prune with all the values of available alphas and adding
     // values for test values.
-    std::vector<std::pair<long double, long double> >::iterator it;
+    std::vector<std::pair<double, long double> >::iterator it;
     for (it = pruned_sequence.begin(); it < pruned_sequence.end() -2; ++it)
     {
       // Compute test values for this state of the tree.
@@ -275,7 +275,7 @@
 
   long double optimal_alpha = -1.0;
   long double best_cv_error = numeric_limits<long double>::max();
-  std::vector<std::pair<long double, long double> >::iterator it;
+  std::vector<std::pair<double, long double> >::iterator it;
 
   for (it = pruned_sequence.begin(); it < pruned_sequence.end() -1; ++it)
   {
@@ -300,7 +300,7 @@
 
   // Grow the tree.
   old_alpha = 0.0;
-  alpha = dtree_opt->Grow(new_dataset, &old_from_new, useVolumeReg, maxLeafSize,
+  alpha = dtree_opt->Grow(*new_dataset, old_from_new, useVolumeReg, maxLeafSize,
       minLeafSize);
 
   // Prune with optimal alpha.

Modified: mlpack/trunk/src/mlpack/methods/det/dtree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.hpp	2012-07-16 16:07:04 UTC (rev 13239)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.hpp	2012-07-16 17:23:11 UTC (rev 13240)
@@ -179,29 +179,29 @@
   DTree(arma::mat& data);
 
   // Non-root node initializers
-  DTree(const arma::vec& max_vals,
-        const arma::vec& min_vals,
+  DTree(const arma::vec& maxVals,
+        const arma::vec& minVals,
         const size_t start,
         const size_t end,
         const double error);
 
-  DTree(const arma::vec& max_vals,
-        const arma::vec& min_vals,
-        size_t total_points,
-        size_t start,
-        size_t end);
+  DTree(const arma::vec& maxVals,
+        const arma::vec& minVals,
+        const size_t totalPoints,
+        const size_t start,
+        const size_t end);
 
   ~DTree();
 
   // Greedily expand the tree
-  cT Grow(MatType* data,
-          arma::Col<size_t> *old_from_new,
-          bool useVolReg = false,
-          size_t maxLeafSize = 10,
-          size_t minLeafSize = 5);
+  double Grow(arma::mat& data,
+              arma::Col<size_t>& oldFromNew,
+              const bool useVolReg = false,
+              const size_t maxLeafSize = 10,
+              const size_t minLeafSize = 5);
 
   // perform alpha pruning on the tree
-  cT PruneAndUpdate(cT old_alpha, bool useVolReg = false);
+  double PruneAndUpdate(const double old_alpha, const bool useVolReg = false);
 
   // compute the density at a given point
   cT ComputeValue(VecType* query);

Modified: mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp	2012-07-16 16:07:04 UTC (rev 13239)
+++ mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp	2012-07-16 17:23:11 UTC (rev 13240)
@@ -203,10 +203,10 @@
                      const size_t totalPoints) :
     start_(0),
     end_(totalPoints),
-    root_(true),
     maxVals(maxVals),
     minVals(minVals),
     error_(-std::exp(LogNegativeError(totalPoints))),
+    root_(true),
     bucket_tag_(-1),
     left_(NULL),
     right_(NULL)
@@ -293,45 +293,44 @@
 
 // Greedily expand the tree
 template<typename eT, typename cT>
-cT DTree<eT, cT>::Grow(MatType* data,
-                       arma::Col<size_t> *old_from_new,
-                       bool useVolReg,
-                       size_t maxLeafSize,
-                       size_t minLeafSize)
+double DTree<eT, cT>::Grow(arma::mat& data,
+                           arma::Col<size_t>& oldFromNew,
+                           const bool useVolReg,
+                           const size_t maxLeafSize,
+                           const size_t minLeafSize)
 {
-  assert(data->n_rows == maxVals.n_elem);
-  assert(data->n_rows == minVals.n_elem);
+  assert(data.n_rows == maxVals.n_elem);
+  assert(data.n_rows == minVals.n_elem);
 
-  cT left_g, right_g;
+  double leftG, rightG;
 
   // Compute points ratio.
-  ratio_ = (cT) (end_ - start_) / (cT) old_from_new->n_elem;
+  ratio_ = (double) (end_ - start_) / (double) oldFromNew.n_elem;
 
-  // Compute the v_t_inv: the inverse of the volume of the node.
-  cT log_vol_t = 0;
+  // Compute the v_t_inv: the inverse of the volume of the node.  We use log to
+  // prevent overflow.
+  double logVol = 0;
   for (size_t i = 0; i < maxVals.n_elem; ++i)
     if (maxVals[i] - minVals[i] > 0.0)
-      // Use log to prevent overflow.
-      log_vol_t += (cT) std::log(maxVals[i] - minVals[i]);
+      logVol += std::log(maxVals[i] - minVals[i]);
 
   // Check for overflow.
-  assert(std::exp(log_vol_t) > 0.0);
-  v_t_inv_ = 1.0 / std::exp(log_vol_t);
+  assert(std::exp(logVol) > 0.0);
+  v_t_inv_ = 1.0 / std::exp(logVol);
 
-  // Check if node is large enough.
+  // Check if node is large enough to split.
   if ((size_t) (end_ - start_) > maxLeafSize) {
 
     // Find the split.
     size_t dim;
     double splitValue;
-    double left_error, right_error;
-    if (FindSplit(*data, dim, splitValue, left_error, right_error, maxLeafSize,
+    double leftError, rightError;
+    if (FindSplit(data, dim, splitValue, leftError, rightError, maxLeafSize,
         minLeafSize))
     {
       // Move the data around for the children to have points in a node lie
       // contiguously (to increase efficiency during the training).
-      const size_t splitIndex = SplitData(*data, dim, splitValue,
-          *old_from_new);
+      const size_t splitIndex = SplitData(data, dim, splitValue, oldFromNew);
 
       // Make max and min vals for the children.
       arma::vec max_vals_l(maxVals);
@@ -347,12 +346,12 @@
       split_dim_ = dim;
 
       // Recursively grow the children.
-      left_ = new DTree(max_vals_l, min_vals_l, start_, splitIndex, left_error);
-      right_ = new DTree(max_vals_r, min_vals_r, splitIndex, end_, right_error);
+      left_ = new DTree(max_vals_l, min_vals_l, start_, splitIndex, leftError);
+      right_ = new DTree(max_vals_r, min_vals_r, splitIndex, end_, rightError);
 
-      left_g = left_->Grow(data, old_from_new, useVolReg, maxLeafSize,
+      leftG = left_->Grow(data, oldFromNew, useVolReg, maxLeafSize,
           minLeafSize);
-      right_g = right_->Grow(data, old_from_new, useVolReg, maxLeafSize,
+      rightG = right_->Grow(data, oldFromNew, useVolReg, maxLeafSize,
           minLeafSize);
 
       // Store values of R(T~) and |T~|.
@@ -363,23 +362,6 @@
       // Store the subtree_leaves_v_t_inv.
       subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv() +
           right_->subtree_leaves_v_t_inv();
-
-      // Form T1 by removing leaves for which R(t) = R(t_L) + R(t_R).
-      if ((left_->subtree_leaves() == 1) && (right_->subtree_leaves() == 1))
-      {
-        if (left_->error() + right_->error() == error_)
-        {
-          delete left_;
-          left_ = NULL;
-
-          delete right_;
-          right_ = NULL;
-
-          subtree_leaves_ = 1;
-          subtree_leaves_error_ = error_;
-          subtree_leaves_v_t_inv_ = v_t_inv_;
-        }
-      }
     }
     else
     {
@@ -399,14 +381,15 @@
   }
 
   // If this is a leaf, do not compute g_k(t); otherwise compute, store, and
-  // propagate min(g_k(t_L),g_k(t_R),g_k(t)), unless t_L and/or t_R are leaves.
+  // propagate min(g_k(t_L), g_k(t_R), g_k(t)), unless t_L and/or t_R are
+  // leaves.
   if (subtree_leaves_ == 1)
   {
-    return std::numeric_limits<cT>::max();
+    return std::numeric_limits<double>::max();
   }
   else
   {
-    cT g_t;
+    double g_t;
     if (useVolReg)
       g_t = (error_ - subtree_leaves_error_) /
           (subtree_leaves_v_t_inv_ - v_t_inv_);
@@ -414,44 +397,46 @@
       g_t = (error_ - subtree_leaves_error_) / (subtree_leaves_ - 1);
 
     assert(g_t > 0.0);
-    return min(g_t, min(left_g, right_g));
+    return min(g_t, min(leftG, rightG));
   }
 
-  // We need to compute (c_t^2)*r_t for all subtree leaves; this is equal to
+  // We need to compute (c_t^2) * r_t for all subtree leaves; this is equal to
   // n_t ^ 2 / r_t * n ^ 2 = -error_.  Therefore the value we need is actually
   // -1.0 * subtree_leaves_error_.
 }
 
 
 template<typename eT, typename cT>
-cT DTree<eT, cT>::PruneAndUpdate(cT old_alpha, bool useVolReg)
+double DTree<eT, cT>::PruneAndUpdate(const double oldAlpha,
+                                     const bool useVolReg)
 {
   // Compute g_t.
   if (subtree_leaves_ == 1) // If we are a leaf...
   {
-    return std::numeric_limits<cT>::max();
+    return std::numeric_limits<double>::max();
   }
   else
   {
     // Compute g_t value for node t.
-    cT g_t;
+    double g_t;
     if (useVolReg)
       g_t = (error_ - subtree_leaves_error_) /
           (subtree_leaves_v_t_inv_ - v_t_inv_);
     else
       g_t = (error_ - subtree_leaves_error_) / (subtree_leaves_ - 1);
 
-    if (g_t > old_alpha)
+    if (g_t > oldAlpha)
     {
       // Go down the tree and update accordingly.  Traverse the children.
-      cT left_g = left_->PruneAndUpdate(old_alpha, useVolReg);
-      cT right_g = right_->PruneAndUpdate(old_alpha, useVolReg);
+      double left_g = left_->PruneAndUpdate(oldAlpha, useVolReg);
+      double right_g = right_->PruneAndUpdate(oldAlpha, useVolReg);
 
       // Update values.
       subtree_leaves_ = left_->subtree_leaves() + right_->subtree_leaves();
       subtree_leaves_error_ = left_->subtree_leaves_error() +
           right_->subtree_leaves_error();
-      subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv() + right_->subtree_leaves_v_t_inv();
+      subtree_leaves_v_t_inv_ = left_->subtree_leaves_v_t_inv() +
+          right_->subtree_leaves_v_t_inv();
 
       // Update g_t value.
       if (useVolReg)
@@ -460,7 +445,7 @@
       else
         g_t = (error_ - subtree_leaves_error_) / (subtree_leaves_ - 1);
 
-      assert(g_t < std::numeric_limits<cT>::max());
+      assert(g_t < std::numeric_limits<double>::max());
 
       if (left_->subtree_leaves() == 1 && right_->subtree_leaves() == 1)
         return g_t;
@@ -470,7 +455,6 @@
         return min(g_t, left_g);
       else
         return min(g_t, min(left_g, right_g));
-
     }
     else
     {
@@ -486,8 +470,7 @@
       right_ = NULL;
 
       // Pass information upward.
-      return std::numeric_limits<cT>::max();
-
+      return std::numeric_limits<double>::max();
     }
   }
 }




More information about the mlpack-svn mailing list