[mlpack-git] master: Optimize small things in density estimation trees (340b34b)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed May 20 11:13:29 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/322deab1ff056e33d4e6aea5f4d0ef9a5b62ab4c...77d750c8fd46140b1d6060424f68768a21c89377

>---------------------------------------------------------------

commit 340b34b091fc00976206a54d467536df27a88788
Author: Janzen Brewer <jahabrewer at gmail.com>
Date:   Fri May 15 07:48:54 2015 -0700

    Optimize small things in density estimation trees
    
    While doing other work, I noticed a few things that could use
    improvement:
      * An std::vector that is used as an array
      * Unnecessary dynamic memory allocation
    I fixed these things and measured a marginal speedup.


>---------------------------------------------------------------

340b34b091fc00976206a54d467536df27a88788
 src/mlpack/methods/det/dt_utils.cpp | 46 ++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/mlpack/methods/det/dt_utils.cpp b/src/mlpack/methods/det/dt_utils.cpp
index e1aab34..f46714d 100644
--- a/src/mlpack/methods/det/dt_utils.cpp
+++ b/src/mlpack/methods/det/dt_utils.cpp
@@ -104,7 +104,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
                             const std::string unprunedTreeOutput)
 {
   // Initialize the tree.
-  DTree* dtree = new DTree(dataset);
+  DTree dtree(dataset);
 
   // Prepare to grow the tree...
   arma::Col<size_t> oldFromNew(dataset.n_cols);
@@ -116,10 +116,10 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
 
   // Growing the tree
   double oldAlpha = 0.0;
-  double alpha = dtree->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
+  double alpha = dtree.Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
       minLeafSize);
 
-  Log::Info << dtree->SubtreeLeaves() << " leaf nodes in the tree using full "
+  Log::Info << dtree.SubtreeLeaves() << " leaf nodes in the tree using full "
       << "dataset; minimum alpha: " << alpha << "." << std::endl;
 
   // Compute densities for the training points in the full tree, if we were
@@ -132,7 +132,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
       for (size_t i = 0; i < dataset.n_cols; ++i)
       {
         arma::vec testPoint = dataset.unsafe_col(i);
-        outfile << dtree->ComputeValue(testPoint) << std::endl;
+        outfile << dtree.ComputeValue(testPoint) << std::endl;
       }
     }
     else
@@ -146,40 +146,37 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
 
   // Sequentially prune and save the alpha values and the values of c_t^2 * r_t.
   std::vector<std::pair<double, double> > prunedSequence;
-  while (dtree->SubtreeLeaves() > 1)
+  while (dtree.SubtreeLeaves() > 1)
   {
     std::pair<double, double> treeSeq(oldAlpha,
-        dtree->SubtreeLeavesLogNegError());
+        dtree.SubtreeLeavesLogNegError());
     prunedSequence.push_back(treeSeq);
     oldAlpha = alpha;
-    alpha = dtree->PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
+    alpha = dtree.PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
 
     // Some sanity checks.
     Log::Assert((alpha < std::numeric_limits<double>::max()) ||
-        (dtree->SubtreeLeaves() == 1));
+        (dtree.SubtreeLeaves() == 1));
     Log::Assert(alpha > oldAlpha);
-    Log::Assert(dtree->SubtreeLeavesLogNegError() < treeSeq.second);
+    Log::Assert(dtree.SubtreeLeavesLogNegError() < treeSeq.second);
   }
 
   std::pair<double, double> treeSeq(oldAlpha,
-      dtree->SubtreeLeavesLogNegError());
+      dtree.SubtreeLeavesLogNegError());
   prunedSequence.push_back(treeSeq);
 
   Log::Info << prunedSequence.size() << " trees in the sequence; maximum alpha:"
       << " " << oldAlpha << "." << std::endl;
 
-  delete dtree;
-
   arma::mat cvData(dataset);
   size_t testSize = dataset.n_cols / folds;
 
-  std::vector<double> regularizationConstants;
-  regularizationConstants.resize(prunedSequence.size(), 0);
+  double regularizationConstants[prunedSequence.size()] = {0};
 
+  Timer::Start("cross_validation");
   // Go through each fold.
   #pragma omp parallel for default(none) \
-    shared(testSize,cvData,prunedSequence,regularizationConstants,dataset) \
-    private(alpha,oldAlpha)
+    shared(testSize,cvData,prunedSequence,regularizationConstants,dataset)
   for (size_t fold = 0; fold < folds; fold++)
   {
     // Break up data into train and test sets.
@@ -204,7 +201,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
     }
 
     // Initialize the tree.
-    DTree* cvDTree = new DTree(train);
+    DTree cvDTree(train);
 
     // Getting ready to grow the tree...
     arma::Col<size_t> cvOldFromNew(train.n_cols);
@@ -212,8 +209,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
       cvOldFromNew[i] = i;
 
     // Grow the tree.
-    oldAlpha = 0.0;
-    alpha = cvDTree->Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize,
+    cvDTree.Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize,
         minLeafSize);
 
     // Sequentially prune with all the values of available alphas and adding
@@ -227,7 +223,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
       for (size_t j = 0; j < test.n_cols; j++)
       {
         arma::vec testPoint = test.unsafe_col(j);
-        cvVal += cvDTree->ComputeValue(testPoint);
+        cvVal += cvDTree.ComputeValue(testPoint);
       }
 
       // Update the cv regularization constant.
@@ -235,9 +231,9 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
       regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols;
 
       // Determine the new alpha value and prune accordingly.
-      oldAlpha = 0.5 * (prunedSequence[i + 1].first +
+      double cvOldAlpha = 0.5 * (prunedSequence[i + 1].first +
           prunedSequence[i + 2].first);
-      alpha = cvDTree->PruneAndUpdate(oldAlpha, train.n_cols, useVolumeReg);
+      cvDTree.PruneAndUpdate(cvOldAlpha, train.n_cols, useVolumeReg);
     }
 
     // Compute test values for this state of the tree.
@@ -245,17 +241,15 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
     for (size_t i = 0; i < test.n_cols; ++i)
     {
       arma::vec testPoint = test.unsafe_col(i);
-      cvVal += cvDTree->ComputeValue(testPoint);
+      cvVal += cvDTree.ComputeValue(testPoint);
     }
 
     if (prunedSequence.size() > 2)
       #pragma omp atomic
       regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal /
           (double) dataset.n_cols;
-
-    test.reset();
-    delete cvDTree;
   }
+  Timer::Stop("cross_validation");
 
   double optimalAlpha = -1.0;
   long double cvBestError = -std::numeric_limits<long double>::max();



More information about the mlpack-git mailing list