[mlpack-svn] r13309 - mlpack/trunk/src/mlpack/methods/det
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Aug 1 16:47:08 EDT 2012
Author: rcurtin
Date: 2012-08-01 16:47:08 -0400 (Wed, 01 Aug 2012)
New Revision: 13309
Modified:
mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
mlpack/trunk/src/mlpack/methods/det/dtree.hpp
mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
Log:
Cleanup of dt_utils.hpp. Get rid of 'using namespace std' in dtree.hpp and
update accordingly.
Modified: mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_main.cpp 2012-08-01 20:20:05 UTC (rev 13308)
+++ mlpack/trunk/src/mlpack/methods/det/dt_main.cpp 2012-08-01 20:47:08 UTC (rev 13309)
@@ -89,7 +89,7 @@
// Obtain the optimal tree.
Timer::Start("det_training");
- DTree<double> *dtreeOpt = Trainer<double>(&trainingData, folds,
+ DTree<double> *dtreeOpt = Trainer<double>(trainingData, folds,
regularization, maxLeafSize, minLeafSize, unprunedTreeEstimateFile);
Timer::Stop("det_training");
Modified: mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp 2012-08-01 20:20:05 UTC (rev 13308)
+++ mlpack/trunk/src/mlpack/methods/det/dt_utils.hpp 2012-08-01 20:47:08 UTC (rev 13309)
@@ -17,46 +17,46 @@
namespace det {
template<typename eT>
-void PrintLeafMembership(DTree<eT> *dtree,
+void PrintLeafMembership(DTree<eT>* dtree,
const arma::Mat<eT>& data,
const arma::Mat<size_t>& labels,
- size_t num_classes,
- string leaf_class_membership_file = "")
+ const size_t numClasses,
+ const std::string leafClassMembershipFile = "")
{
// Tag the leaves with numbers.
- int num_leaves = dtree->TagTree();
+ int numLeaves = dtree->TagTree();
- arma::Mat<size_t> table(num_leaves, num_classes);
+ arma::Mat<size_t> table(numLeaves, numClasses);
table.zeros();
for (size_t i = 0; i < data.n_cols; i++)
{
- arma::Col<eT> test_p = data.unsafe_col(i);
- int leaf_tag = dtree->FindBucket(test_p);
- size_t label = labels[i];
- table(leaf_tag, label) += 1;
+ const arma::Col<eT> test_p = data.unsafe_col(i);
+ const int leafTag = dtree->FindBucket(test_p);
+ const size_t label = labels[i];
+ table(leafTag, label) += 1;
}
- if (leaf_class_membership_file == "")
+ if (leafClassMembershipFile == "")
{
- Log::Warn << "Leaf Membership: Classes in each leaf" << std::endl
- << table << std::endl;
+ Log::Info << "Leaf membership; row represents leaf id, column represents "
+ << "class id; value represents number of points in leaf in class."
+ << std::endl << table;
}
else
{
// Create a stream for the file.
- ofstream outfile(leaf_class_membership_file.c_str());
+ std::ofstream outfile(leafClassMembershipFile.c_str());
if (outfile.good())
{
outfile << table;
- Log::Warn << "Leaf Membership: Classes in each leaf"
- << " printed in '" << leaf_class_membership_file << "'."
- << std::endl;
+ Log::Info << "Leaf membership printed to '" << leafClassMembershipFile
+ << "'." << std::endl;
}
else
{
- Log::Warn << "Can't open '" << leaf_class_membership_file << "'."
- << std::endl;
+ Log::Warn << "Can't open '" << leafClassMembershipFile << "' to write "
+ << "leaf membership to." << std::endl;
}
outfile.close();
}
@@ -66,8 +66,8 @@
template<typename eT>
-void PrintVariableImportance(DTree<eT> *dtree,
- const string vi_file = "")
+void PrintVariableImportance(const DTree<eT>* dtree,
+ const std::string viFile = "")
{
arma::vec imps;
dtree->ComputeVariableImportance(imps);
@@ -77,253 +77,235 @@
if (imps[i] > max)
max = imps[i];
- Log::Warn << "Max. variable importance: " << max << "." << std::endl;
+ Log::Info << "Maximum variable importance: " << max << "." << std::endl;
- if (vi_file == "")
+ if (viFile == "")
{
- Log::Warn << "Variable importance: " << std::endl << imps.t() << std::endl;
+ Log::Info << "Variable importance: " << std::endl << imps.t() << std::endl;
}
else
{
- ofstream outfile(vi_file.c_str());
+ std::ofstream outfile(viFile.c_str());
if (outfile.good())
{
- Log::Warn << "Variable importance printed in '" << vi_file << "'."
- << endl;
outfile << imps;
- } else {
- Log::Warn << "Can't open '" << vi_file
- << "'" << endl;
+ Log::Info << "Variable importance printed to '" << viFile << "'."
+ << std::endl;
}
+ else
+ {
+ Log::Warn << "Can't open '" << viFile << "' to write variable importance "
+ << "to." << std::endl;
+ }
outfile.close();
}
+}
- return;
-} // PrintVariableImportance
-
// This function trains the optimal decision tree using the given number of
// folds.
template<typename eT>
-DTree<eT> *Trainer(arma::Mat<eT>* dataset,
- size_t folds,
- bool useVolumeReg = false,
- size_t maxLeafSize = 10,
- size_t minLeafSize = 5,
- string unprunedTreeOutput = "")
+DTree<eT>* Trainer(arma::Mat<eT>& dataset,
+ const size_t folds,
+ const bool useVolumeReg = false,
+ const size_t maxLeafSize = 10,
+ const size_t minLeafSize = 5,
+ const std::string unprunedTreeOutput = "")
{
// Initialize the tree.
- DTree<eT>* dtree = new DTree<eT>(*dataset);
+ DTree<eT>* dtree = new DTree<eT>(dataset);
- // Getting ready to grow the tree...
- arma::Col<size_t> old_from_new(dataset->n_cols);
- for (size_t i = 0; i < old_from_new.n_elem; i++)
- old_from_new[i] = i;
+ // Prepare to grow the tree...
+ arma::Col<size_t> oldFromNew(dataset.n_cols);
+ for (size_t i = 0; i < oldFromNew.n_elem; i++)
+ oldFromNew[i] = i;
- // Saving the dataset since it would be modified while growing the tree
- arma::Mat<eT>* new_dataset = new arma::Mat<eT>(*dataset);
+ // Save the dataset since it would be modified while growing the tree.
+ arma::Mat<eT> newDataset(dataset);
// Growing the tree
- double old_alpha = 0.0;
- double alpha = dtree->Grow(*new_dataset, old_from_new, useVolumeReg,
- maxLeafSize, minLeafSize);
+ double oldAlpha = 0.0;
+ double alpha = dtree->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
+ minLeafSize);
- delete new_dataset;
+ Log::Info << dtree->SubtreeLeaves() << " leaf nodes in the tree using full "
+ << "dataset; minimum alpha: " << alpha << "." << std::endl;
- Log::Info << dtree->SubtreeLeaves()
- << " leaf nodes in the tree with full data; min_alpha: " << alpha << "."
- << std::endl;
-
// Compute densities for the training points in the full tree, if we were
// asked for this.
if (unprunedTreeOutput != "")
{
- ofstream outfile(unprunedTreeOutput.c_str());
+ std::ofstream outfile(unprunedTreeOutput.c_str());
if (outfile.good())
{
- for (size_t i = 0; i < dataset->n_cols; ++i)
+ for (size_t i = 0; i < dataset.n_cols; ++i)
{
- arma::Col<eT> test_p = dataset->unsafe_col(i);
- outfile << dtree->ComputeValue(test_p) << endl;
+ arma::Col<eT> test_p = dataset.unsafe_col(i);
+ outfile << dtree->ComputeValue(test_p) << std::endl;
}
}
else
{
- Log::Warn << "Can't open '" << unprunedTreeOutput << "'." << std::endl;
+ Log::Warn << "Can't open '" << unprunedTreeOutput << "' to write computed"
+ << " densities to." << std::endl;
}
outfile.close();
}
// Sequentially prune and save the alpha values and the values of c_t^2 * r_t.
- std::vector<std::pair<double, double> > pruned_sequence;
+ std::vector<std::pair<double, double> > prunedSequence;
while (dtree->SubtreeLeaves() > 1)
{
- std::pair<double, double> tree_seq(old_alpha,
+ std::pair<double, double> treeSeq(oldAlpha,
dtree->SubtreeLeavesLogNegError());
- pruned_sequence.push_back(tree_seq);
- old_alpha = alpha;
- alpha = dtree->PruneAndUpdate(old_alpha, dataset->n_cols, useVolumeReg);
+ prunedSequence.push_back(treeSeq);
+ oldAlpha = alpha;
+ alpha = dtree->PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg);
// Some sanity checks.
- assert((alpha < std::numeric_limits<double>::max()) ||
+ Log::Assert((alpha < std::numeric_limits<double>::max()) ||
(dtree->SubtreeLeaves() == 1));
- assert(alpha > old_alpha);
- assert(dtree->SubtreeLeavesLogNegError() < tree_seq.second);
+ Log::Assert(alpha > oldAlpha);
+ Log::Assert(dtree->SubtreeLeavesLogNegError() < treeSeq.second);
}
- std::pair<double, double> tree_seq(old_alpha,
+ std::pair<double, double> treeSeq(oldAlpha,
dtree->SubtreeLeavesLogNegError());
- pruned_sequence.push_back(tree_seq);
+ prunedSequence.push_back(treeSeq);
- Log::Info << pruned_sequence.size() << " trees in the sequence; max_alpha: "
- << old_alpha << "." << std::endl;
+ Log::Info << prunedSequence.size() << " trees in the sequence; maximum alpha:"
+ << " " << oldAlpha << "." << std::endl;
delete dtree;
- arma::Mat<eT>* cvdata = new arma::Mat<eT>(*dataset);
- size_t test_size = dataset->n_cols / folds;
+ arma::Mat<eT> cvdata(dataset);
+ size_t testSize = dataset.n_cols / folds;
// Go through each fold.
for (size_t fold = 0; fold < folds; fold++)
{
// Break up data into train and test sets.
- size_t start = fold * test_size;
- size_t end = std::min((fold + 1) * test_size, (size_t) cvdata->n_cols);
+ size_t start = fold * testSize;
+ size_t end = std::min((fold + 1) * testSize, (size_t) cvdata.n_cols);
- arma::Mat<eT> test = cvdata->cols(start, end - 1);
- arma::Mat<eT>* train = new arma::Mat<eT>(cvdata->n_rows,
- cvdata->n_cols - test.n_cols);
+ arma::Mat<eT> test = cvdata.cols(start, end - 1);
+ arma::Mat<eT> train(cvdata.n_rows, cvdata.n_cols - test.n_cols);
- if (start == 0 && end < cvdata->n_cols)
+ if (start == 0 && end < cvdata.n_cols)
{
- assert(train->n_cols == cvdata->n_cols - end);
- train->cols(0, train->n_cols - 1) = cvdata->cols(end, cvdata->n_cols - 1);
+ train.cols(0, train.n_cols - 1) = cvdata.cols(end, cvdata.n_cols - 1);
}
- else if (start > 0 && end == cvdata->n_cols)
+ else if (start > 0 && end == cvdata.n_cols)
{
- assert(train->n_cols == start);
- train->cols(0, train->n_cols - 1) = cvdata->cols(0, start - 1);
+ train.cols(0, train.n_cols - 1) = cvdata.cols(0, start - 1);
}
else
{
- assert(train->n_cols == start + cvdata->n_cols - end);
-
- train->cols(0, start - 1) = cvdata->cols(0, start - 1);
- train->cols(start, train->n_cols - 1) =
- cvdata->cols(end, cvdata->n_cols - 1);
+ train.cols(0, start - 1) = cvdata.cols(0, start - 1);
+ train.cols(start, train.n_cols - 1) = cvdata.cols(end, cvdata.n_cols - 1);
}
- assert(train->n_cols + test.n_cols == cvdata->n_cols);
-
// Initialize the tree.
- DTree<eT>* dtree_cv = new DTree<eT>(*train);
+ DTree<eT>* cvDTree = new DTree<eT>(train);
// Getting ready to grow the tree...
- arma::Col<size_t> old_from_new_cv(train->n_cols);
- for (size_t i = 0; i < old_from_new_cv.n_elem; i++)
- old_from_new_cv[i] = i;
+ arma::Col<size_t> cvOldFromNew(train.n_cols);
+ for (size_t i = 0; i < cvOldFromNew.n_elem; i++)
+ cvOldFromNew[i] = i;
// Grow the tree.
- old_alpha = 0.0;
- alpha = dtree_cv->Grow(*train, old_from_new_cv, useVolumeReg, maxLeafSize,
+ oldAlpha = 0.0;
+ alpha = cvDTree->Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize,
minLeafSize);
// Sequentially prune with all the values of available alphas and adding
// values for test values.
std::vector<std::pair<double, double> >::iterator it;
- for (it = pruned_sequence.begin(); it < pruned_sequence.end() -2; ++it)
+ for (it = prunedSequence.begin(); it < prunedSequence.end() - 2; ++it)
{
// Compute test values for this state of the tree.
- double val_cv = 0.0;
+ double cvVal = 0.0;
for (size_t i = 0; i < test.n_cols; i++)
{
- arma::Col<eT> test_point = test.unsafe_col(i);
- val_cv += dtree_cv->ComputeValue(test_point);
+ arma::Col<eT> testPoint = test.unsafe_col(i);
+ cvVal += cvDTree->ComputeValue(testPoint);
}
// Update the cv error value by mapping out of log-space then back into
// it, using long doubles.
long double notLogVal = -std::exp((long double) it->second) -
- 2.0 * val_cv / (double) dataset->n_cols;
+ 2.0 * cvVal / (double) dataset.n_cols;
it->second = (double) std::log(-notLogVal);
// Determine the new alpha value and prune accordingly.
- old_alpha = sqrt(((it + 1)->first) * ((it + 2)->first));
- alpha = dtree_cv->PruneAndUpdate(old_alpha, train->n_cols, useVolumeReg);
+ oldAlpha = sqrt(((it + 1)->first) * ((it + 2)->first));
+ alpha = cvDTree->PruneAndUpdate(oldAlpha, train.n_cols, useVolumeReg);
}
// Compute test values for this state of the tree.
- double val_cv = 0.0;
+ double cvVal = 0.0;
for (size_t i = 0; i < test.n_cols; ++i)
{
- arma::Col<eT> test_point = test.unsafe_col(i);
- val_cv += dtree_cv->ComputeValue(test_point);
+ arma::Col<eT> testPoint = test.unsafe_col(i);
+ cvVal += cvDTree->ComputeValue(testPoint);
}
// Update the cv error value.
long double notLogVal = -std::exp((long double) it->second) -
- 2.0 * val_cv / (double) dataset->n_cols;
+ 2.0 * cvVal / (double) dataset.n_cols;
it->second -= (double) std::log(-notLogVal);
test.reset();
- delete train;
-
- delete dtree_cv;
+ delete cvDTree;
}
- delete cvdata;
-
- double optimal_alpha = -1.0;
- double best_cv_error = numeric_limits<double>::max();
+ double optimalAlpha = -1.0;
+ double cvBestError = std::numeric_limits<double>::max();
std::vector<std::pair<double, double> >::iterator it;
- for (it = pruned_sequence.begin(); it < pruned_sequence.end() -1; ++it)
+ for (it = prunedSequence.begin(); it < prunedSequence.end() -1; ++it)
{
- if (it->second < best_cv_error)
+ if (it->second < cvBestError)
{
- best_cv_error = it->second;
- optimal_alpha = it->first;
+ cvBestError = it->second;
+ optimalAlpha = it->first;
}
}
- Log::Info << "Optimal alpha: " << optimal_alpha << "." << std::endl;
+ Log::Info << "Optimal alpha: " << optimalAlpha << "." << std::endl;
// Initialize the tree.
- DTree<eT>* dtree_opt = new DTree<eT>(*dataset);
+ DTree<eT>* dtreeOpt = new DTree<eT>(dataset);
// Getting ready to grow the tree...
- for (size_t i = 0; i < old_from_new.n_elem; i++)
- old_from_new[i] = i;
+ for (size_t i = 0; i < oldFromNew.n_elem; i++)
+ oldFromNew[i] = i;
// Save the dataset since it would be modified while growing the tree.
- new_dataset = new arma::Mat<eT>(*dataset);
+ newDataset = dataset;
// Grow the tree.
- old_alpha = 0.0;
- alpha = dtree_opt->Grow(*new_dataset, old_from_new, useVolumeReg, maxLeafSize,
+ oldAlpha = 0.0;
+ alpha = dtreeOpt->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize,
minLeafSize);
// Prune with optimal alpha.
- while ((old_alpha > optimal_alpha) && (dtree_opt->SubtreeLeaves() > 1))
+ while ((oldAlpha > optimalAlpha) && (dtreeOpt->SubtreeLeaves() > 1))
{
- old_alpha = alpha;
- alpha = dtree_opt->PruneAndUpdate(old_alpha, new_dataset->n_cols,
- useVolumeReg);
+ oldAlpha = alpha;
+ alpha = dtreeOpt->PruneAndUpdate(oldAlpha, newDataset.n_cols, useVolumeReg);
// Some sanity checks.
- assert((alpha < numeric_limits<double>::max()) ||
- (dtree_opt->SubtreeLeaves() == 1));
- assert(alpha < old_alpha);
+ Log::Assert((alpha < std::numeric_limits<double>::max()) ||
+ (dtreeOpt->SubtreeLeaves() == 1));
+ Log::Assert(alpha < oldAlpha);
}
- Log::Info << dtree_opt->SubtreeLeaves()
- << " leaf nodes in the optimally pruned tree; optimal alpha: "
- << old_alpha << "." << std::endl;
+ Log::Info << dtreeOpt->SubtreeLeaves() << " leaf nodes in the optimally "
+ << "pruned tree; optimal alpha: " << oldAlpha << "." << std::endl;
- delete new_dataset;
-
- return dtree_opt;
+ return dtreeOpt;
}
}; // namespace det
Modified: mlpack/trunk/src/mlpack/methods/det/dtree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree.hpp 2012-08-01 20:20:05 UTC (rev 13308)
+++ mlpack/trunk/src/mlpack/methods/det/dtree.hpp 2012-08-01 20:47:08 UTC (rev 13309)
@@ -12,10 +12,6 @@
#include <mlpack/core.hpp>
-using namespace mlpack;
-using namespace std;
-
-
namespace mlpack {
namespace det /** Density Estimation Trees */ {
Modified: mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp 2012-08-01 20:20:05 UTC (rev 13308)
+++ mlpack/trunk/src/mlpack/methods/det/dtree_impl.hpp 2012-08-01 20:47:08 UTC (rev 13309)
@@ -419,7 +419,7 @@
gT = alphaUpper - std::log(subtreeLeaves - 1);
}
- return min(gT, min(leftG, rightG));
+ return std::min(gT, std::min(leftG, rightG));
}
// We need to compute (c_t^2) * r_t for all subtree leaves; this is equal to
@@ -514,7 +514,7 @@
assert(gT < std::numeric_limits<double>::max());
- return min(gT, min(leftG, rightG));
+ return std::min(gT, std::min(leftG, rightG));
}
else
{
@@ -524,8 +524,9 @@
subtreeLeavesLogNegError = logNegError;
delete left;
+ delete right;
+
left = NULL;
- delete right;
right = NULL;
// Pass information upward.
More information about the mlpack-svn
mailing list