[mlpack-git] master: fix problem while prepending in executables (cfa9b79)

gitdub at mlpack.org gitdub at mlpack.org
Fri Jun 3 08:32:32 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/4fa39b6ab0baa1428116d0406264b5452e716d06...97402b9098d9d72889aa795923cf8fd67a4d87bf

>---------------------------------------------------------------

commit cfa9b791d08e84f274668e2b2c5f184dee831062
Author: Keon Kim <kwk236 at gmail.com>
Date:   Fri Jun 3 04:51:22 2016 +0900

    fix problem while prepending in executables


>---------------------------------------------------------------

cfa9b791d08e84f274668e2b2c5f184dee831062
 .../methods/preprocess/preprocess_split_main.cpp   | 63 +++++++---------------
 1 file changed, 18 insertions(+), 45 deletions(-)

diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index 1e063db..fc73ae6 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -14,26 +14,20 @@ PROGRAM_INFO("Split Data", "This utility takes a dataset and optionally labels "
     "(-r) option; the default is 0.2 (20%)."
     "\n\n"
     "The program does not modify the original file, but instead makes separate "
-    "files to save the training and test files; you can specify the file names "
-    "with --training_file (-t) and --test_file (-T). If these options are not "
-    "specified, the program automatically names the training and test file by "
-    "prepending 'train_' and 'test_' to the dataset filename (which was "
-    "specified by --input_file)."
+    "files to save the training and test files; The program requires you to "
+    "specify the file names with --training_file (-t) and --test_file (-T)."
     "\n\n"
     "Optionally, labels can be also be split along with the data by specifying "
     "the --input_labels_file (-I) option. Splitting labels works the same way "
     "as splitting the data. The output training and test labels will be saved "
     "to the files specified by --training_labels_file (-l) and "
-    "--test_labels_file (-L), respectively. If these options are not specified,"
-    " then the program will automatically name the training labels and test "
-    "labels file by prepending 'train_' and 'test_' to the labels filename "
-    "(which was specified by --input_labels_file)."
+    "--test_labels_file (-L), respectively."
     "\n\n"
     "So, a simple example where we want to split dataset.csv into "
-    "train_dataset.csv and test_dataset.csv with 60% of the data in the "
-    "training set and 40% of the dataset in the test set, we could run"
+    "train.csv and test.csv with 60% of the data in the training set and 40% "
+    "of the dataset in the test set, we could run"
     "\n\n"
-    "$ mlpack_preprocess_split -i dataset.csv -r 0.4"
+    "$ mlpack_preprocess_split -i dataset.csv -t train.csv -T test.csv -r 0.4"
     "\n\n"
     "If we had a dataset in dataset.csv and associated labels in labels.csv, "
     "and we wanted to split these into training_set.csv, training_labels.csv, "
@@ -46,12 +40,12 @@ PROGRAM_INFO("Split Data", "This utility takes a dataset and optionally labels "
 
 // Define parameters for data.
 PARAM_STRING_REQ("input_file", "File containing data,", "i");
+PARAM_STRING_REQ("training_file", "File name to save train data", "t");
+PARAM_STRING_REQ("test_file", "File name to save test data", "T");
 // Define optional parameters.
 PARAM_STRING("input_labels_file", "File containing labels", "I", "");
-PARAM_STRING("training_file", "File name to save train data", "t", "");
-PARAM_STRING("test_file", "File name to save test data", "T", "");
 PARAM_STRING("training_labels_file", "File name to save train label", "l", "");
-PARAM_STRING("test_labels_file", "File name to save test label", "L", "");
+PARAM_STRING("test_labels_file", "File name to save test label", "L","");
 
 // Define optional test ratio, default is 0.2 (Test 20% Train 80%)
 PARAM_DOUBLE("test_ratio", "Ratio of test set, if not set,"
@@ -67,49 +61,28 @@ int main(int argc, char** argv)
   CLI::ParseCommandLine(argc, argv);
   const string inputFile = CLI::GetParam<string>("input_file");
   const string inputLabels = CLI::GetParam<string>("input_labels_file");
-  string trainingFile = CLI::GetParam<string>("training_file");
-  string testFile = CLI::GetParam<string>("test_file");
-  string trainingLabelsFile = CLI::GetParam<string>("training_labels_file");
-  string testLabelsFile = CLI::GetParam<string>("test_labels_file");
+  const string trainingFile = CLI::GetParam<string>("training_file");
+  const string testFile = CLI::GetParam<string>("test_file");
+  const string trainingLabelsFile = CLI::GetParam<string>("training_labels_file");
+  const string testLabelsFile = CLI::GetParam<string>("test_labels_file");
   const double testRatio = CLI::GetParam<double>("test_ratio");
 
-  // Check on data parameters.
-  if (trainingFile.empty())
-  {
-    trainingFile = "train_" + inputFile;
-    Log::Warn << "You did not specify --training_file, so the training set file"
-        << " name will be automatically set to '" << trainingFile << "'." 
-        << endl;
-  }
-  if (testFile.empty())
-  {
-    testFile = "test_" + inputFile;
-    Log::Warn << "You did not specify --test_file, so the test set file name "
-        << "will be automatically set to '" << testFile << "'." << endl;
-  }
-
   // Check on label parameters.
-  if (!inputLabels.empty())
+  if (CLI::HasParam("input_labels"))
   {
     if (!CLI::HasParam("training_labels_file"))
     {
-      trainingLabelsFile = "train_" + inputLabels;
-      Log::Warn << "You did not specify --training_labels_file, so the training"
-          << "set labels file name will be automatically set to '"
-          << trainingLabelsFile << "'." << endl;
+      Log::Fatal << "You did not specify --training_labels_file" << endl;
     }
     if (!CLI::HasParam("test_labels_file"))
     {
-      testLabelsFile = "test_" + inputLabels;
-      Log::Warn << "You did not specify --test_labels_file, so the test set "
-        << "labels file name will be automatically set to '"
-        << testLabelsFile << "'." << endl;
+      Log::Fatal << "You did not specify --test_labels_fil" << endl;
     }
   }
   else
   {
-    if (CLI::HasParam("training_labels_file")
-        || CLI::HasParam("test_labels_file"))
+    if (CLI::HasParam("training_labels_file") ||
+        CLI::HasParam("test_labels_file"))
     {
       Log::Fatal << "When specifying --training_labels_file or "
           << "--test_labels_file, you must also specify --input_labels. "




More information about the mlpack-git mailing list