[mlpack-git] master: Optimize small things in density estimation trees (340b34b)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed May 20 11:13:29 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/322deab1ff056e33d4e6aea5f4d0ef9a5b62ab4c...77d750c8fd46140b1d6060424f68768a21c89377
>---------------------------------------------------------------
commit 340b34b091fc00976206a54d467536df27a88788
Author: Janzen Brewer <jahabrewer at gmail.com>
Date: Fri May 15 07:48:54 2015 -0700
Optimize small things in density estimation trees
While doing other work, I noticed a few things that could use
improvement:
* An std::vector that is used as an array
* Unnecessary dynamic memory allocation
I fixed these things and measured a marginal speedup.
>---------------------------------------------------------------
340b34b091fc00976206a54d467536df27a88788
src/mlpack/methods/det/dt_utils.cpp | 46 ++++++++++++++++---------------------
1 file changed, 20 insertions(+), 26 deletions(-)
diff --git a/src/mlpack/methods/det/dt_utils.cpp b/src/mlpack/methods/det/dt_utils.cpp
index e1aab34..f46714d 100644
--- a/src/mlpack/methods/det/dt_utils.cpp
+++ b/src/mlpack/methods/det/dt_utils.cpp
@@ -104,7 +104,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
const std::string unprunedTreeOutput)
{
// Initialize the tree.
- DTree* dtree = new DTree(dataset);
+ DTree dtree(dataset);
// Prepare to grow the tree...
arma::Col<size_t> oldFromNew(dataset.n_cols);
@@ -116,10 +116,10 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
// Growing the tree
double oldAlpha = 0.0;
- double alpha = dtree->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
+ double alpha = dtree.Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
minLeafSize);
- Log::Info << dtree->SubtreeLeaves() << " leaf nodes in the tree using full "
+ Log::Info << dtree.SubtreeLeaves() << " leaf nodes in the tree using full "
<< "dataset; minimum alpha: " << alpha << "." << std::endl;
// Compute densities for the training points in the full tree, if we were
@@ -132,7 +132,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
for (size_t i = 0; i < dataset.n_cols; ++i)
{
arma::vec testPoint = dataset.unsafe_col(i);
- outfile << dtree->ComputeValue(testPoint) << std::endl;
+ outfile << dtree.ComputeValue(testPoint) << std::endl;
}
}
else
@@ -146,40 +146,37 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
// Sequentially prune and save the alpha values and the values of c_t^2 * r_t.
std::vector<std::pair<double, double> > prunedSequence;
- while (dtree->SubtreeLeaves() > 1)
+ while (dtree.SubtreeLeaves() > 1)
{
std::pair<double, double> treeSeq(oldAlpha,
- dtree->SubtreeLeavesLogNegError());
+ dtree.SubtreeLeavesLogNegError());
prunedSequence.push_back(treeSeq);
oldAlpha = alpha;
- alpha = dtree->PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
+ alpha = dtree.PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
// Some sanity checks.
Log::Assert((alpha < std::numeric_limits<double>::max()) ||
- (dtree->SubtreeLeaves() == 1));
+ (dtree.SubtreeLeaves() == 1));
Log::Assert(alpha > oldAlpha);
- Log::Assert(dtree->SubtreeLeavesLogNegError() < treeSeq.second);
+ Log::Assert(dtree.SubtreeLeavesLogNegError() < treeSeq.second);
}
std::pair<double, double> treeSeq(oldAlpha,
- dtree->SubtreeLeavesLogNegError());
+ dtree.SubtreeLeavesLogNegError());
prunedSequence.push_back(treeSeq);
Log::Info << prunedSequence.size() << " trees in the sequence; maximum alpha:"
<< " " << oldAlpha << "." << std::endl;
- delete dtree;
-
arma::mat cvData(dataset);
size_t testSize = dataset.n_cols / folds;
- std::vector<double> regularizationConstants;
- regularizationConstants.resize(prunedSequence.size(), 0);
+ double regularizationConstants[prunedSequence.size()] = {0};
+ Timer::Start("cross_validation");
// Go through each fold.
#pragma omp parallel for default(none) \
- shared(testSize,cvData,prunedSequence,regularizationConstants,dataset) \
- private(alpha,oldAlpha)
+ shared(testSize,cvData,prunedSequence,regularizationConstants,dataset)
for (size_t fold = 0; fold < folds; fold++)
{
// Break up data into train and test sets.
@@ -204,7 +201,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
}
// Initialize the tree.
- DTree* cvDTree = new DTree(train);
+ DTree cvDTree(train);
// Getting ready to grow the tree...
arma::Col<size_t> cvOldFromNew(train.n_cols);
@@ -212,8 +209,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
cvOldFromNew[i] = i;
// Grow the tree.
- oldAlpha = 0.0;
- alpha = cvDTree->Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize,
+ cvDTree.Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize,
minLeafSize);
// Sequentially prune with all the values of available alphas and adding
@@ -227,7 +223,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
for (size_t j = 0; j < test.n_cols; j++)
{
arma::vec testPoint = test.unsafe_col(j);
- cvVal += cvDTree->ComputeValue(testPoint);
+ cvVal += cvDTree.ComputeValue(testPoint);
}
// Update the cv regularization constant.
@@ -235,9 +231,9 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols;
// Determine the new alpha value and prune accordingly.
- oldAlpha = 0.5 * (prunedSequence[i + 1].first +
+ double cvOldAlpha = 0.5 * (prunedSequence[i + 1].first +
prunedSequence[i + 2].first);
- alpha = cvDTree->PruneAndUpdate(oldAlpha, train.n_cols, useVolumeReg);
+ cvDTree.PruneAndUpdate(cvOldAlpha, train.n_cols, useVolumeReg);
}
// Compute test values for this state of the tree.
@@ -245,17 +241,15 @@ DTree* mlpack::det::Trainer(arma::mat& dataset,
for (size_t i = 0; i < test.n_cols; ++i)
{
arma::vec testPoint = test.unsafe_col(i);
- cvVal += cvDTree->ComputeValue(testPoint);
+ cvVal += cvDTree.ComputeValue(testPoint);
}
if (prunedSequence.size() > 2)
#pragma omp atomic
regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal /
(double) dataset.n_cols;
-
- test.reset();
- delete cvDTree;
}
+ Timer::Stop("cross_validation");
double optimalAlpha = -1.0;
long double cvBestError = -std::numeric_limits<long double>::max();
More information about the mlpack-git
mailing list