[mlpack-svn] r10757 - mlpack/trunk/src/mlpack/methods/emst

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Dec 13 17:12:54 EST 2011


Author: rcurtin
Date: 2011-12-13 17:12:53 -0500 (Tue, 13 Dec 2011)
New Revision: 10757

Modified:
   mlpack/trunk/src/mlpack/methods/emst/emst_main.cpp
Log:
Document EMST better.


Modified: mlpack/trunk/src/mlpack/methods/emst/emst_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/emst/emst_main.cpp	2011-12-13 21:55:33 UTC (rev 10756)
+++ mlpack/trunk/src/mlpack/methods/emst/emst_main.cpp	2011-12-13 22:12:53 UTC (rev 10757)
@@ -1,29 +1,44 @@
 /**
- * @file emst.cc
+ * @file emst_main.cpp
+ * @author Bill March (march at gatech.edu)
  *
- * Calls the DualTreeBoruvka algorithm from dtb.h
- * Can optionally call Naive Boruvka's method
+ * Calls the DualTreeBoruvka algorithm from dtb.hpp.
+ * Can optionally call naive Boruvka's method.
  *
  * For algorithm details, see:
  * March, W.B., Ram, P., and Gray, A.G.
  * Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis, Applications.
  * In KDD, 2010.
- *
- * @author Bill March (march at gatech.edu)
  */
 
 #include "dtb.hpp"
 
 #include <mlpack/core.hpp>
 
-PARAM_STRING_REQ("input_file", "Data input file.", "I");
-PARAM_STRING("output_file", "Data output file.  Stored as an edge list.", "O", "emst_output.csv");
-PARAM_FLAG("do_naive", "Compute the MST using .", "");
-PARAM_STRING("naive_output_file", "Naive data output file.", "N",
-    "naive_output.csv");
-PARAM_INT("leaf_size", "Leaf size in the kd-tree.  Singleton leaves give the empirically best performance at the cost of greater memory requirements.", "L", 1);
-PARAM_DOUBLE("total_squared_length", "Squared length of the computed tree.", "T", 0.0);
+PROGRAM_INFO("Fast Euclidean Minimum Spanning Tree", "This program can compute "
+    "the Euclidean minimum spanning tree of a set of input points using the "
+    "dual-tree Boruvka algorithm.  This method is detailed in the following "
+    "paper:\n\n"
+    "  @inproceedings{\n"
+    "    author = {March, W.B., Ram, P., and Gray, A.G.},\n"
+    "    title = {{Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis,\n"
+    "        Applications.}},\n"
+    "    booktitle = {Proceedings of the 16th ACM SIGKDD International "
+    "Conference\n        on Knowledge Discovery and Data Mining},\n"
+    "    series = {KDD '10},\n"
+    "    year = {2010}\n"
+    "  }\n");
 
+PARAM_STRING_REQ("input_file", "Data input file.", "i");
+PARAM_STRING("output_file", "Data output file.  Stored as an edge list.", "o",
+    "emst_output.csv");
+PARAM_FLAG("naive", "Compute the MST using O(n^2) naive algorithm.", "n");
+PARAM_INT("leaf_size", "Leaf size in the kd-tree.  One-element leaves give the "
+    "empirically best performance, but at the cost of greater memory "
+    "requirements.", "l", 1);
+PARAM_DOUBLE("total_squared_length", "Squared length of the computed tree.",
+    "s", 0.0);
+
 using namespace mlpack;
 using namespace mlpack::emst;
 
@@ -32,38 +47,44 @@
   CLI::ParseCommandLine(argc, argv);
 
   ///////////////// READ IN DATA //////////////////////////////////
-  std::string data_file_name = CLI::GetParam<std::string>("emst/input_file");
+  std::string dataFilename = CLI::GetParam<std::string>("input_file");
 
   Log::Info << "Reading in data.\n";
 
-  arma::mat data_points;
-  data::Load(data_file_name.c_str(), data_points, true);
+  arma::mat dataPoints;
+  data::Load(dataFilename.c_str(), dataPoints, true);
 
   // Do naive
-  if (CLI::GetParam<bool>("do_naive"))
+  if (CLI::GetParam<bool>("naive"))
   {
     Log::Info << "Running naive algorithm.\n";
 
     DualTreeBoruvka naive;
 
-    naive.Init(data_points, true);
+    naive.Init(dataPoints, true);
 
     arma::mat naive_results;
     naive.ComputeMST(naive_results);
 
-    std::string naive_output_filename =
-    CLI::GetParam<std::string>("naive_output_file");
+    std::string outputFilename = CLI::GetParam<std::string>("output_file");
 
-    data::Save(naive_output_filename.c_str(), naive_results, true);
+    data::Save(outputFilename.c_str(), naive_results, true);
   }
   else
   {
     Log::Info << "Data read, building tree.\n";
 
     /////////////// Initialize DTB //////////////////////
+    if (CLI::GetParam<int>("leaf_size") <= 0)
+    {
+      Log::Fatal << "Invalid leaf size (" << CLI::GetParam<int>("leaf_size")
+          << ")!  Must be greater than or equal to 1." << std::endl;
+    }
+
     size_t leafSize = CLI::GetParam<int>("leaf_size");
+
     DualTreeBoruvka dtb;
-    dtb.Init(data_points, false, leafSize);
+    dtb.Init(dataPoints, false, leafSize);
 
     Log::Info << "Tree built, running algorithm.\n\n";
 
@@ -72,13 +93,10 @@
 
     dtb.ComputeMST(results);
 
-
     //////////////// Output the Results ////////////////
+    std::string outputFilename = CLI::GetParam<std::string>("output_file");
 
-    std::string output_filename =
-        CLI::GetParam<std::string>("output_file");
-
-    data::Save(output_filename.c_str(), results, true);
+    data::Save(outputFilename.c_str(), results, true);
   }
 
   return 0;




More information about the mlpack-svn mailing list