[mlpack-svn] r13312 - mlpack/trunk/src/mlpack/methods/det

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Aug 1 22:12:30 EDT 2012


Author: rcurtin
Date: 2012-08-01 22:12:29 -0400 (Wed, 01 Aug 2012)
New Revision: 13312

Added:
   mlpack/trunk/src/mlpack/methods/det/det_main.cpp
Removed:
   mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
Modified:
   mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt
Log:
Clean up CLI parameter names and then rename the executable to 'det' not
'dt_main' so it matches more with other MLPACK executables.


Modified: mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt	2012-08-02 01:15:33 UTC (rev 13311)
+++ mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt	2012-08-02 02:12:29 UTC (rev 13312)
@@ -22,11 +22,11 @@
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
 # executable
-add_executable(dt_main
-  dt_main.cpp
+add_executable(det
+  det_main.cpp
 )
 # link dependencies of executable
-target_link_libraries(dt_main
+target_link_libraries(det
   mlpack
 )
-install(TARGETS dt_main RUNTIME DESTINATION bin)
+install(TARGETS det RUNTIME DESTINATION bin)

Copied: mlpack/trunk/src/mlpack/methods/det/det_main.cpp (from rev 13310, mlpack/trunk/src/mlpack/methods/det/dt_main.cpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/det_main.cpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/det/det_main.cpp	2012-08-02 02:12:29 UTC (rev 13312)
@@ -0,0 +1,192 @@
+/**
+ * @file dt_main.cpp
+ * @ Parikshit Ram (pram at cc.gatech.edu)
+ *
+ * This file provides an example use of the DET
+ */
+
+#include <mlpack/core.hpp>
+#include "dt_utils.hpp"
+
+using namespace mlpack;
+using namespace mlpack::det;
+using namespace std;
+
+PROGRAM_INFO("Density Estimation With Density Estimation Trees",
+    "This program performs a number of functions related to Density Estimation "
+    "Trees.  The optimal Density Estimation Tree (DET) can be trained on a set "
+    "of data (specified by --train_file) using cross-validation (with number of"
+    " folds specified by --folds).  In addition, the density of a set of test "
+    "points (specified by --test_file) can be estimated, and the importance of "
+    "each dimension can be computed.  If class labels are given for the "
+    "training points (with --labels_file), the class memberships of each leaf "
+    "in the DET can be calculated."
+    "\n\n"
+    "The created DET can be saved to a file, along with the density estimates "
+    "for the test set and the variable importances.");
+
+// Input data files.
+PARAM_STRING_REQ("train_file", "The data set on which to build a density "
+    "estimation tree.", "t");
+PARAM_STRING("test_file", "A set of test points to estimate the density of.",
+    "T", "");
+PARAM_STRING("labels_file", "The labels for the given training data to "
+    "generate the class membership of each leaf (as an extra statistic)", "l",
+    "");
+
+// Output data files.
+PARAM_STRING("unpruned_tree_estimates_file", "The file in which to output the "
+    "density estimates on the training set from the large unpruned tree.", "u",
+    "");
+PARAM_STRING("training_set_estimates_file", "The file in which to output the "
+    "density estimates on the training set from the final optimally pruned "
+    "tree.", "e", "");
+PARAM_STRING("test_set_estimates_file", "The file in which to output the "
+    "estimates on the test set from the final optimally pruned tree.", "E", "");
+PARAM_STRING("leaf_class_table_file", "The file in which to output the leaf "
+    "class membership table.", "L", "leaf_class_membership.txt");
+PARAM_STRING("tree_file", "The file in which to print the final optimally "
+    "pruned tree.", "r", "");
+PARAM_STRING("vi_file", "The file to output the variable importance values "
+    "for each feature.", "i", "");
+
+// Parameters for the algorithm.
+PARAM_INT("folds", "The number of folds of cross-validation to perform for the "
+    "estimation (0 is LOOCV)", "f", 10);
+PARAM_INT("min_leaf_size", "The minimum size of a leaf in the unpruned, fully "
+    "grown DET.", "N", 5);
+PARAM_INT("max_leaf_size", "The maximum size of a leaf in the unpruned, fully "
+    "grown DET.", "M", 10);
+PARAM_FLAG("volume_regularization", "This flag gives the used the option to use"
+    "a form of regularization similar to the usual alpha-pruning in decision "
+    "tree. But instead of regularizing on the number of leaves, you regularize "
+    "on the sum of the inverse of the volume of the leaves (meaning you "
+    "penalize low volume leaves.", "R");
+
+// Some flags for output of some information about the tree.
+PARAM_FLAG("print_tree", "Print the tree out on the command line (or in the "
+    "file specified with --tree_file).", "p");
+PARAM_FLAG("print_vi", "Print the variable importance of each feature out on "
+    "the command line (or in the file specified with --vi_file).", "I");
+
+int main(int argc, char *argv[])
+{
+  CLI::ParseCommandLine(argc, argv);
+
+  string trainSetFile = CLI::GetParam<string>("train_file");
+  arma::Mat<double> trainingData;
+
+  data::Load(trainSetFile, trainingData, true);
+
+  // Cross-validation here.
+  size_t folds = CLI::GetParam<int>("folds");
+  if (folds == 0)
+  {
+    folds = trainingData.n_cols;
+    Log::Info << "Performing leave-one-out cross validation." << endl;
+  }
+  else
+  {
+    Log::Info << "Performing " << folds << "-fold cross validation." << endl;
+  }
+
+  const string unprunedTreeEstimateFile =
+      CLI::GetParam<string>("unpruned_tree_estimates_file");
+  const bool regularization = CLI::HasParam("volume_regularization");
+  const int maxLeafSize = CLI::GetParam<int>("max_leaf_size");
+  const int minLeafSize = CLI::GetParam<int>("min_leaf_size");
+
+  // Obtain the optimal tree.
+  Timer::Start("det_training");
+  DTree *dtreeOpt = Trainer(trainingData, folds, regularization, maxLeafSize,
+      minLeafSize, unprunedTreeEstimateFile);
+  Timer::Stop("det_training");
+
+  // Compute densities for the training points in the optimal tree.
+  FILE *fp = NULL;
+
+  if (CLI::GetParam<string>("training_set_estimate_file") != "")
+  {
+    fp = fopen(CLI::GetParam<string>("training_set_estimate_file").c_str(),
+        "w");
+
+    // Compute density estimates for each point in the training set.
+    Timer::Start("det_estimation_time");
+    for (size_t i = 0; i < trainingData.n_cols; i++)
+      fprintf(fp, "%lg\n", dtreeOpt->ComputeValue(trainingData.unsafe_col(i)));
+    Timer::Stop("det_estimation_time");
+
+    fclose(fp);
+  }
+
+  // Compute the density at the provided test points and output the density in
+  // the given file.
+  const string testFile = CLI::GetParam<string>("test_file");
+  if (testFile != "")
+  {
+    arma::mat testData;
+    data::Load(testFile, testData, true);
+
+    fp = NULL;
+
+    if (CLI::GetParam<string>("test_set_estimates_file") != "")
+    {
+      fp = fopen(CLI::GetParam<string>("test_set_estimates_file").c_str(), "w");
+
+      Timer::Start("det_test_set_estimation");
+      for (size_t i = 0; i < testData.n_cols; i++)
+        fprintf(fp, "%lg\n", dtreeOpt->ComputeValue(testData.unsafe_col(i)));
+      Timer::Stop("det_test_set_estimation");
+
+      fclose(fp);
+    }
+  }
+
+  // Print the final tree.
+  if (CLI::HasParam("print_tree"))
+  {
+    fp = NULL;
+    if (CLI::GetParam<string>("tree_file") != "")
+    {
+      fp = fopen(CLI::GetParam<string>("tree_file").c_str(), "w");
+
+      if (fp != NULL)
+      {
+        dtreeOpt->WriteTree(fp);
+        fclose(fp);
+      }
+    }
+    else
+    {
+      dtreeOpt->WriteTree(stdout);
+      printf("\n");
+    }
+  }
+
+  // Print the leaf memberships for the optimal tree.
+  if (CLI::GetParam<string>("labels_file") != "")
+  {
+    std::string labelsFile = CLI::GetParam<string>("labels_file");
+    arma::Mat<size_t> labels;
+
+    data::Load(labelsFile, labels, true);
+
+    size_t numClasses = max(max(labels));
+    Log::Info << numClasses << " found in labels file '" << labelsFile << "'."
+        << std::endl;
+
+    Log::Assert(trainingData.n_cols == labels.n_cols);
+    Log::Assert(labels.n_rows == 1);
+
+    PrintLeafMembership(dtreeOpt, trainingData, labels, numClasses,
+       CLI::GetParam<string>("leaf_class_table_file"));
+  }
+
+  // Print variable importance.
+  if (CLI::HasParam("print_vi"))
+  {
+    PrintVariableImportance(dtreeOpt, CLI::GetParam<string>("vi_file"));
+  }
+
+  delete dtreeOpt;
+}

Deleted: mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_main.cpp	2012-08-02 01:15:33 UTC (rev 13311)
+++ mlpack/trunk/src/mlpack/methods/det/dt_main.cpp	2012-08-02 02:12:29 UTC (rev 13312)
@@ -1,201 +0,0 @@
-/**
- * @file dt_main.cpp
- * @ Parikshit Ram (pram at cc.gatech.edu)
- *
- * This file provides an example use of the DET
- */
-
-#include <mlpack/core.hpp>
-#include "dt_utils.hpp"
-
-using namespace mlpack;
-using namespace mlpack::det;
-using namespace std;
-
-PROGRAM_INFO("Density estimation with DET", "This program provides an example "
-    "use of the Density Estimation Tree for density estimation. For more "
-    "details, please look at the paper titled 'Density Estimation Trees'.");
-
-// Input data files.
-PARAM_STRING_REQ("input/training_set", "The data set on which to perform "
-    "density estimation.", "S");
-PARAM_STRING("input/test_set", "An extra set of test points on which to "
-    "estimate the density given the estimator.", "T", "");
-PARAM_STRING("input/labels", "The labels for the given training data to "
-    "generate the class membership of each leaf (as an extra statistic)", "L",
-    "");
-
-// Output data files.
-PARAM_STRING("output/unpruned_tree_estimates", "The file in which to output the"
-    " estimates on the training set from the large unpruned tree.", "u", "");
-PARAM_STRING("output/training_set_estimates", "The file in which to output the "
-    "estimates on the training set from the final optimally pruned tree.", "s",
-    "");
-PARAM_STRING("output/test_set_estimates", "The file in which to output the "
-    "estimates on the test set from the final optimally pruned tree.", "t", "");
-PARAM_STRING("output/leaf_class_table", "The file in which to output the leaf "
-    "class membership table.", "l", "leaf_class_membership.txt");
-PARAM_STRING("output/tree", "The file in which to print the final optimally "
-    "pruned tree.", "p", "");
-PARAM_STRING("output/vi", "The file to output the variable importance values "
-    "for each feature.", "i", "");
-
-// Parameters for the algorithm.
-PARAM_INT("param/number_of_classes", "The number of classes present in the "
-    "'labels' set provided", "C", 0);
-PARAM_INT("param/folds", "The number of folds of cross-validation to perform "
-    "for the estimation (enter 0 for LOOCV)", "F", 10);
-PARAM_INT("DET/min_leaf_size", "The minimum size of a leaf in the unpruned "
-    "fully grown DET.", "N", 5);
-PARAM_INT("DET/max_leaf_size", "The maximum size of a leaf in the unpruned "
-    "fully grown DET.", "M", 10);
-PARAM_FLAG("DET/use_volume_reg", "This flag gives the used the option to use a "
-    "form of regularization similar to the usual alpha-pruning in decision "
-    "tree. But instead of regularizing on the number of leaves, you regularize "
-    "on the sum of the inverse of the volume of the leaves (meaning you "
-    "penalize low volume leaves.", "R");
-
-// Some flags for output of some information about the tree.
-PARAM_FLAG("flag/print_tree", "Print the tree out on the command line.", "P");
-PARAM_FLAG("flag/print_vi", "Print the variable importance of each feature "
-    "out on the command line.", "I");
-
-int main(int argc, char *argv[])
-{
-  CLI::ParseCommandLine(argc, argv);
-
-  string trainSetFile = CLI::GetParam<string>("input/training_set");
-  arma::Mat<double> trainingData;
-
-  data::Load(trainSetFile, trainingData, true);
-
-  // Cross-validation here.
-  size_t folds = CLI::GetParam<int>("param/folds");
-  if (folds == 0)
-  {
-    folds = trainingData.n_cols;
-    Log::Info << "Performing leave-one-out cross validation." << endl;
-  }
-  else
-  {
-    Log::Info << "Performing " << folds << "-fold cross validation." << endl;
-  }
-
-  const string unprunedTreeEstimateFile =
-      CLI::GetParam<string>("output/unpruned_tree_estimates");
-  const bool regularization = CLI::HasParam("DET/use_volume_reg");
-  const int maxLeafSize = CLI::GetParam<int>("DET/max_leaf_size");
-  const int minLeafSize = CLI::GetParam<int>("DET/min_leaf_size");
-
-  // Obtain the optimal tree.
-  Timer::Start("det_training");
-  DTree *dtreeOpt = Trainer(trainingData, folds, regularization, maxLeafSize,
-      minLeafSize, unprunedTreeEstimateFile);
-  Timer::Stop("det_training");
-
-  // Compute densities for the training points in the optimal tree.
-  FILE *fp = NULL;
-
-  if (CLI::GetParam<string>("output/training_set_estimates") != "")
-  {
-    fp = fopen(CLI::GetParam<string>("output/training_set_estimates").c_str(),
-        "w");
-  }
-
-  // Computation timing is more accurate when printing is not performed.
-  Timer::Start("det_estimation_time");
-  for (size_t i = 0; i < trainingData.n_cols; i++)
-  {
-    arma::vec testPoint = trainingData.unsafe_col(i);
-    double f = dtreeOpt->ComputeValue(testPoint);
-
-    if (fp != NULL)
-      fprintf(fp, "%lg\n", f);
-  }
-  Timer::Stop("det_estimation_time");
-
-  if (fp != NULL)
-    fclose(fp);
-
-  // Compute the density at the provided test points and output the density in
-  // the given file.
-  if (CLI::GetParam<string>("input/test_set") != "")
-  {
-    const string testFile = CLI::GetParam<string>("input/test_set");
-    arma::mat testData;
-    data::Load(testFile, testData, true);
-
-    fp = NULL;
-
-    if (CLI::GetParam<string>("output/test_set_estimates") != "")
-    {
-      fp = fopen(CLI::GetParam<string>("output/test_set_estimates").c_str(),
-          "w");
-    }
-
-    Timer::Start("det_test_set_estimation");
-    for (size_t i = 0; i < testData.n_cols; i++)
-    {
-      arma::vec testPoint = testData.unsafe_col(i);
-      double f = dtreeOpt->ComputeValue(testPoint);
-
-      if (fp != NULL)
-        fprintf(fp, "%lg\n", f);
-    }
-    Timer::Stop("det_test_set_estimation");
-
-    if (fp != NULL)
-      fclose(fp);
-  }
-
-  // Print the final tree.
-  if (CLI::HasParam("flag/print_tree"))
-  {
-    fp = NULL;
-    if (CLI::GetParam<string>("output/tree") != "")
-    {
-      fp = fopen(CLI::GetParam<string>("output/tree").c_str(), "w");
-
-      if (fp != NULL)
-      {
-        dtreeOpt->WriteTree(fp);
-        fclose(fp);
-      }
-    }
-    else
-    {
-      dtreeOpt->WriteTree(stdout);
-      printf("\n");
-    }
-  }
-
-  // Print the leaf memberships for the optimal tree.
-  if (CLI::GetParam<string>("input/labels") != "")
-  {
-    std::string labelsFile = CLI::GetParam<string>("input/labels");
-    arma::Mat<size_t> labels;
-
-    data::Load(labelsFile, labels, true);
-
-    size_t num_classes = CLI::GetParam<int>("param/number_of_classes");
-    if (num_classes == 0)
-    {
-      Log::Fatal << "Number of classes (param/number_of_classes) not specified!"
-          << endl;
-    }
-
-    Log::Assert(trainingData.n_cols == labels.n_cols);
-    Log::Assert(labels.n_rows == 1);
-
-    PrintLeafMembership(dtreeOpt, trainingData, labels, num_classes,
-       CLI::GetParam<string>("output/leaf_class_table"));
-  }
-
-  // Print variable importance.
-  if (CLI::HasParam("flag/print_vi"))
-  {
-    PrintVariableImportance(dtreeOpt, CLI::GetParam<string>("output/vi"));
-  }
-
-  delete dtreeOpt;
-}




More information about the mlpack-svn mailing list