[mlpack-svn] r13312 - mlpack/trunk/src/mlpack/methods/det
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Aug 1 22:12:30 EDT 2012
Author: rcurtin
Date: 2012-08-01 22:12:29 -0400 (Wed, 01 Aug 2012)
New Revision: 13312
Added:
mlpack/trunk/src/mlpack/methods/det/det_main.cpp
Removed:
mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
Modified:
mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt
Log:
Clean up CLI parameter names and then rename the executable to 'det' not
'dt_main' so it matches more with other MLPACK executables.
Modified: mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt 2012-08-02 01:15:33 UTC (rev 13311)
+++ mlpack/trunk/src/mlpack/methods/det/CMakeLists.txt 2012-08-02 02:12:29 UTC (rev 13312)
@@ -22,11 +22,11 @@
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
# executable
-add_executable(dt_main
- dt_main.cpp
+add_executable(det
+ det_main.cpp
)
# link dependencies of executable
-target_link_libraries(dt_main
+target_link_libraries(det
mlpack
)
-install(TARGETS dt_main RUNTIME DESTINATION bin)
+install(TARGETS det RUNTIME DESTINATION bin)
Copied: mlpack/trunk/src/mlpack/methods/det/det_main.cpp (from rev 13310, mlpack/trunk/src/mlpack/methods/det/dt_main.cpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/det_main.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/det/det_main.cpp 2012-08-02 02:12:29 UTC (rev 13312)
@@ -0,0 +1,192 @@
+/**
+ * @file dt_main.cpp
+ * @ Parikshit Ram (pram at cc.gatech.edu)
+ *
+ * This file provides an example use of the DET
+ */
+
+#include <mlpack/core.hpp>
+#include "dt_utils.hpp"
+
+using namespace mlpack;
+using namespace mlpack::det;
+using namespace std;
+
+PROGRAM_INFO("Density Estimation With Density Estimation Trees",
+ "This program performs a number of functions related to Density Estimation "
+ "Trees. The optimal Density Estimation Tree (DET) can be trained on a set "
+ "of data (specified by --train_file) using cross-validation (with number of"
+ " folds specified by --folds). In addition, the density of a set of test "
+ "points (specified by --test_file) can be estimated, and the importance of "
+ "each dimension can be computed. If class labels are given for the "
+ "training points (with --labels_file), the class memberships of each leaf "
+ "in the DET can be calculated."
+ "\n\n"
+ "The created DET can be saved to a file, along with the density estimates "
+ "for the test set and the variable importances.");
+
+// Input data files.
+PARAM_STRING_REQ("train_file", "The data set on which to build a density "
+ "estimation tree.", "t");
+PARAM_STRING("test_file", "A set of test points to estimate the density of.",
+ "T", "");
+PARAM_STRING("labels_file", "The labels for the given training data to "
+ "generate the class membership of each leaf (as an extra statistic)", "l",
+ "");
+
+// Output data files.
+PARAM_STRING("unpruned_tree_estimates_file", "The file in which to output the "
+ "density estimates on the training set from the large unpruned tree.", "u",
+ "");
+PARAM_STRING("training_set_estimates_file", "The file in which to output the "
+ "density estimates on the training set from the final optimally pruned "
+ "tree.", "e", "");
+PARAM_STRING("test_set_estimates_file", "The file in which to output the "
+ "estimates on the test set from the final optimally pruned tree.", "E", "");
+PARAM_STRING("leaf_class_table_file", "The file in which to output the leaf "
+ "class membership table.", "L", "leaf_class_membership.txt");
+PARAM_STRING("tree_file", "The file in which to print the final optimally "
+ "pruned tree.", "r", "");
+PARAM_STRING("vi_file", "The file to output the variable importance values "
+ "for each feature.", "i", "");
+
+// Parameters for the algorithm.
+PARAM_INT("folds", "The number of folds of cross-validation to perform for the "
+ "estimation (0 is LOOCV)", "f", 10);
+PARAM_INT("min_leaf_size", "The minimum size of a leaf in the unpruned, fully "
+ "grown DET.", "N", 5);
+PARAM_INT("max_leaf_size", "The maximum size of a leaf in the unpruned, fully "
+ "grown DET.", "M", 10);
+PARAM_FLAG("volume_regularization", "This flag gives the used the option to use"
+ "a form of regularization similar to the usual alpha-pruning in decision "
+ "tree. But instead of regularizing on the number of leaves, you regularize "
+ "on the sum of the inverse of the volume of the leaves (meaning you "
+ "penalize low volume leaves.", "R");
+
+// Some flags for output of some information about the tree.
+PARAM_FLAG("print_tree", "Print the tree out on the command line (or in the "
+ "file specified with --tree_file).", "p");
+PARAM_FLAG("print_vi", "Print the variable importance of each feature out on "
+ "the command line (or in the file specified with --vi_file).", "I");
+
+int main(int argc, char *argv[])
+{
+ CLI::ParseCommandLine(argc, argv);
+
+ string trainSetFile = CLI::GetParam<string>("train_file");
+ arma::Mat<double> trainingData;
+
+ data::Load(trainSetFile, trainingData, true);
+
+ // Cross-validation here.
+ size_t folds = CLI::GetParam<int>("folds");
+ if (folds == 0)
+ {
+ folds = trainingData.n_cols;
+ Log::Info << "Performing leave-one-out cross validation." << endl;
+ }
+ else
+ {
+ Log::Info << "Performing " << folds << "-fold cross validation." << endl;
+ }
+
+ const string unprunedTreeEstimateFile =
+ CLI::GetParam<string>("unpruned_tree_estimates_file");
+ const bool regularization = CLI::HasParam("volume_regularization");
+ const int maxLeafSize = CLI::GetParam<int>("max_leaf_size");
+ const int minLeafSize = CLI::GetParam<int>("min_leaf_size");
+
+ // Obtain the optimal tree.
+ Timer::Start("det_training");
+ DTree *dtreeOpt = Trainer(trainingData, folds, regularization, maxLeafSize,
+ minLeafSize, unprunedTreeEstimateFile);
+ Timer::Stop("det_training");
+
+ // Compute densities for the training points in the optimal tree.
+ FILE *fp = NULL;
+
+ if (CLI::GetParam<string>("training_set_estimate_file") != "")
+ {
+ fp = fopen(CLI::GetParam<string>("training_set_estimate_file").c_str(),
+ "w");
+
+ // Compute density estimates for each point in the training set.
+ Timer::Start("det_estimation_time");
+ for (size_t i = 0; i < trainingData.n_cols; i++)
+ fprintf(fp, "%lg\n", dtreeOpt->ComputeValue(trainingData.unsafe_col(i)));
+ Timer::Stop("det_estimation_time");
+
+ fclose(fp);
+ }
+
+ // Compute the density at the provided test points and output the density in
+ // the given file.
+ const string testFile = CLI::GetParam<string>("test_file");
+ if (testFile != "")
+ {
+ arma::mat testData;
+ data::Load(testFile, testData, true);
+
+ fp = NULL;
+
+ if (CLI::GetParam<string>("test_set_estimates_file") != "")
+ {
+ fp = fopen(CLI::GetParam<string>("test_set_estimates_file").c_str(), "w");
+
+ Timer::Start("det_test_set_estimation");
+ for (size_t i = 0; i < testData.n_cols; i++)
+ fprintf(fp, "%lg\n", dtreeOpt->ComputeValue(testData.unsafe_col(i)));
+ Timer::Stop("det_test_set_estimation");
+
+ fclose(fp);
+ }
+ }
+
+ // Print the final tree.
+ if (CLI::HasParam("print_tree"))
+ {
+ fp = NULL;
+ if (CLI::GetParam<string>("tree_file") != "")
+ {
+ fp = fopen(CLI::GetParam<string>("tree_file").c_str(), "w");
+
+ if (fp != NULL)
+ {
+ dtreeOpt->WriteTree(fp);
+ fclose(fp);
+ }
+ }
+ else
+ {
+ dtreeOpt->WriteTree(stdout);
+ printf("\n");
+ }
+ }
+
+ // Print the leaf memberships for the optimal tree.
+ if (CLI::GetParam<string>("labels_file") != "")
+ {
+ std::string labelsFile = CLI::GetParam<string>("labels_file");
+ arma::Mat<size_t> labels;
+
+ data::Load(labelsFile, labels, true);
+
+ size_t numClasses = max(max(labels));
+ Log::Info << numClasses << " found in labels file '" << labelsFile << "'."
+ << std::endl;
+
+ Log::Assert(trainingData.n_cols == labels.n_cols);
+ Log::Assert(labels.n_rows == 1);
+
+ PrintLeafMembership(dtreeOpt, trainingData, labels, numClasses,
+ CLI::GetParam<string>("leaf_class_table_file"));
+ }
+
+ // Print variable importance.
+ if (CLI::HasParam("print_vi"))
+ {
+ PrintVariableImportance(dtreeOpt, CLI::GetParam<string>("vi_file"));
+ }
+
+ delete dtreeOpt;
+}
Deleted: mlpack/trunk/src/mlpack/methods/det/dt_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/det/dt_main.cpp 2012-08-02 01:15:33 UTC (rev 13311)
+++ mlpack/trunk/src/mlpack/methods/det/dt_main.cpp 2012-08-02 02:12:29 UTC (rev 13312)
@@ -1,201 +0,0 @@
-/**
- * @file dt_main.cpp
- * @ Parikshit Ram (pram at cc.gatech.edu)
- *
- * This file provides an example use of the DET
- */
-
-#include <mlpack/core.hpp>
-#include "dt_utils.hpp"
-
-using namespace mlpack;
-using namespace mlpack::det;
-using namespace std;
-
-PROGRAM_INFO("Density estimation with DET", "This program provides an example "
- "use of the Density Estimation Tree for density estimation. For more "
- "details, please look at the paper titled 'Density Estimation Trees'.");
-
-// Input data files.
-PARAM_STRING_REQ("input/training_set", "The data set on which to perform "
- "density estimation.", "S");
-PARAM_STRING("input/test_set", "An extra set of test points on which to "
- "estimate the density given the estimator.", "T", "");
-PARAM_STRING("input/labels", "The labels for the given training data to "
- "generate the class membership of each leaf (as an extra statistic)", "L",
- "");
-
-// Output data files.
-PARAM_STRING("output/unpruned_tree_estimates", "The file in which to output the"
- " estimates on the training set from the large unpruned tree.", "u", "");
-PARAM_STRING("output/training_set_estimates", "The file in which to output the "
- "estimates on the training set from the final optimally pruned tree.", "s",
- "");
-PARAM_STRING("output/test_set_estimates", "The file in which to output the "
- "estimates on the test set from the final optimally pruned tree.", "t", "");
-PARAM_STRING("output/leaf_class_table", "The file in which to output the leaf "
- "class membership table.", "l", "leaf_class_membership.txt");
-PARAM_STRING("output/tree", "The file in which to print the final optimally "
- "pruned tree.", "p", "");
-PARAM_STRING("output/vi", "The file to output the variable importance values "
- "for each feature.", "i", "");
-
-// Parameters for the algorithm.
-PARAM_INT("param/number_of_classes", "The number of classes present in the "
- "'labels' set provided", "C", 0);
-PARAM_INT("param/folds", "The number of folds of cross-validation to perform "
- "for the estimation (enter 0 for LOOCV)", "F", 10);
-PARAM_INT("DET/min_leaf_size", "The minimum size of a leaf in the unpruned "
- "fully grown DET.", "N", 5);
-PARAM_INT("DET/max_leaf_size", "The maximum size of a leaf in the unpruned "
- "fully grown DET.", "M", 10);
-PARAM_FLAG("DET/use_volume_reg", "This flag gives the used the option to use a "
- "form of regularization similar to the usual alpha-pruning in decision "
- "tree. But instead of regularizing on the number of leaves, you regularize "
- "on the sum of the inverse of the volume of the leaves (meaning you "
- "penalize low volume leaves.", "R");
-
-// Some flags for output of some information about the tree.
-PARAM_FLAG("flag/print_tree", "Print the tree out on the command line.", "P");
-PARAM_FLAG("flag/print_vi", "Print the variable importance of each feature "
- "out on the command line.", "I");
-
-int main(int argc, char *argv[])
-{
- CLI::ParseCommandLine(argc, argv);
-
- string trainSetFile = CLI::GetParam<string>("input/training_set");
- arma::Mat<double> trainingData;
-
- data::Load(trainSetFile, trainingData, true);
-
- // Cross-validation here.
- size_t folds = CLI::GetParam<int>("param/folds");
- if (folds == 0)
- {
- folds = trainingData.n_cols;
- Log::Info << "Performing leave-one-out cross validation." << endl;
- }
- else
- {
- Log::Info << "Performing " << folds << "-fold cross validation." << endl;
- }
-
- const string unprunedTreeEstimateFile =
- CLI::GetParam<string>("output/unpruned_tree_estimates");
- const bool regularization = CLI::HasParam("DET/use_volume_reg");
- const int maxLeafSize = CLI::GetParam<int>("DET/max_leaf_size");
- const int minLeafSize = CLI::GetParam<int>("DET/min_leaf_size");
-
- // Obtain the optimal tree.
- Timer::Start("det_training");
- DTree *dtreeOpt = Trainer(trainingData, folds, regularization, maxLeafSize,
- minLeafSize, unprunedTreeEstimateFile);
- Timer::Stop("det_training");
-
- // Compute densities for the training points in the optimal tree.
- FILE *fp = NULL;
-
- if (CLI::GetParam<string>("output/training_set_estimates") != "")
- {
- fp = fopen(CLI::GetParam<string>("output/training_set_estimates").c_str(),
- "w");
- }
-
- // Computation timing is more accurate when printing is not performed.
- Timer::Start("det_estimation_time");
- for (size_t i = 0; i < trainingData.n_cols; i++)
- {
- arma::vec testPoint = trainingData.unsafe_col(i);
- double f = dtreeOpt->ComputeValue(testPoint);
-
- if (fp != NULL)
- fprintf(fp, "%lg\n", f);
- }
- Timer::Stop("det_estimation_time");
-
- if (fp != NULL)
- fclose(fp);
-
- // Compute the density at the provided test points and output the density in
- // the given file.
- if (CLI::GetParam<string>("input/test_set") != "")
- {
- const string testFile = CLI::GetParam<string>("input/test_set");
- arma::mat testData;
- data::Load(testFile, testData, true);
-
- fp = NULL;
-
- if (CLI::GetParam<string>("output/test_set_estimates") != "")
- {
- fp = fopen(CLI::GetParam<string>("output/test_set_estimates").c_str(),
- "w");
- }
-
- Timer::Start("det_test_set_estimation");
- for (size_t i = 0; i < testData.n_cols; i++)
- {
- arma::vec testPoint = testData.unsafe_col(i);
- double f = dtreeOpt->ComputeValue(testPoint);
-
- if (fp != NULL)
- fprintf(fp, "%lg\n", f);
- }
- Timer::Stop("det_test_set_estimation");
-
- if (fp != NULL)
- fclose(fp);
- }
-
- // Print the final tree.
- if (CLI::HasParam("flag/print_tree"))
- {
- fp = NULL;
- if (CLI::GetParam<string>("output/tree") != "")
- {
- fp = fopen(CLI::GetParam<string>("output/tree").c_str(), "w");
-
- if (fp != NULL)
- {
- dtreeOpt->WriteTree(fp);
- fclose(fp);
- }
- }
- else
- {
- dtreeOpt->WriteTree(stdout);
- printf("\n");
- }
- }
-
- // Print the leaf memberships for the optimal tree.
- if (CLI::GetParam<string>("input/labels") != "")
- {
- std::string labelsFile = CLI::GetParam<string>("input/labels");
- arma::Mat<size_t> labels;
-
- data::Load(labelsFile, labels, true);
-
- size_t num_classes = CLI::GetParam<int>("param/number_of_classes");
- if (num_classes == 0)
- {
- Log::Fatal << "Number of classes (param/number_of_classes) not specified!"
- << endl;
- }
-
- Log::Assert(trainingData.n_cols == labels.n_cols);
- Log::Assert(labels.n_rows == 1);
-
- PrintLeafMembership(dtreeOpt, trainingData, labels, num_classes,
- CLI::GetParam<string>("output/leaf_class_table"));
- }
-
- // Print variable importance.
- if (CLI::HasParam("flag/print_vi"))
- {
- PrintVariableImportance(dtreeOpt, CLI::GetParam<string>("output/vi"));
- }
-
- delete dtreeOpt;
-}
More information about the mlpack-svn
mailing list