[mlpack-git] master: add more program info of split (2417a19)

gitdub at mlpack.org gitdub at mlpack.org
Thu Jun 2 02:14:19 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/1f562a1aba7ae55475afcc95659511c2b7f694e5...5b8fdce471328f722fcd8c0f22a6d995ce22c98b

>---------------------------------------------------------------

commit 2417a1992b9dfdc96a5be45450e73a17e97db050
Author: Keon Kim <kwk236 at gmail.com>
Date:   Thu Jun 2 15:01:22 2016 +0900

    add more program info of split


>---------------------------------------------------------------

2417a1992b9dfdc96a5be45450e73a17e97db050
 src/mlpack/core/data/split_data.hpp                   |  3 ++-
 .../methods/preprocess/preprocess_split_main.cpp      | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index e5f1e2e..d02f6c6 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -183,7 +183,8 @@ Split(const arma::Mat<T>& input,
   arma::Mat<T> testData;
   Split(input, trainData, testData, testRatio);
 
-  return std::make_tuple(std::move(trainData), std::move(testData));
+  return std::make_tuple(std::move(trainData),
+                         std::move(testData));
 }
 
 } // namespace data
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index ca8e830..d24b8ff 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -7,9 +7,22 @@
 #include <mlpack/core.hpp>
 #include <mlpack/core/data/split_data.hpp>
 
-PROGRAM_INFO("Split into Train and Test Data", "This "
-    "utility takes data and labels and split into a training "
-    "set and a test set.");
+PROGRAM_INFO("Split Data", "This utility takes data and split into a training "
+    "set and a test set. Before the split happens, it shuffles the data in "
+    "the each feature. Without (--test_ratio) specified, the default "
+    "test-to-training ratio is set to 0.2."
+    "\n\n"
+    "The program does not modify or write on the original file, but instead "
+    "makes a seperate files to save the training and test files; you can "
+    "specify the file names with (-training_file) and (-test_file). If the "
+    "names are not specified, the program automatically names the training "
+    "and test file by attaching 'train_' and 'test_' in front of the "
+    "original file name"
+    "\n\n"
+    "Optionally, a label can be also be splited along with the data at the "
+    "same time by specifying (--input_lables) option. Splitting label works "
+    "the same as splitting the data and you can also specify the names using "
+    "(--trainning_labels_file) and (--test_labels_file).");
 
 // Define parameters for data
 PARAM_STRING_REQ("input_file", "File containing data,", "i");




More information about the mlpack-git mailing list