[mlpack-git] master: Stub of new Load(). (431fcef)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:41:40 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 431fcefe5d86417f2fb088c12cfec643aa5505e1
Author: ryan <ryan at ratml.org>
Date:   Wed Sep 9 11:21:34 2015 -0400

    Stub of new Load().


>---------------------------------------------------------------

431fcefe5d86417f2fb088c12cfec643aa5505e1
 src/mlpack/core/data/load.hpp      | 42 ++++++++++++++++++++++++++++++---
 src/mlpack/core/data/load_impl.hpp | 48 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/load.hpp b/src/mlpack/core/data/load.hpp
index a0bf91d..950fd5a 100644
--- a/src/mlpack/core/data/load.hpp
+++ b/src/mlpack/core/data/load.hpp
@@ -20,8 +20,8 @@ namespace data /** Functions to load and save matrices and models. */ {
 
 /**
  * Loads a matrix from file, guessing the filetype from the extension.  This
- * will transpose the matrix at load time.  If the filetype cannot be
- * determined, an error will be given.
+ * will transpose the matrix at load time (unless the transpose parameter is set
+ * to false).  If the filetype cannot be determined, an error will be given.
  *
  * The supported types of files are the same as found in Armadillo:
  *
@@ -56,7 +56,43 @@ template<typename eT>
 bool Load(const std::string& filename,
           arma::Mat<eT>& matrix,
           const bool fatal = false,
-          bool transpose = true);
+          const bool transpose = true);
+
+/**
+ * Loads a matrix from a file, guessing the filetype from the extension and
+ * mapping categorical features with a DatasetInfo object.  This will transpose
+ * the matrix (unless the transpose parameter is set to false).  This particular
+ * overload of Load() can only load text-based formats, such as those given
+ * below:
+ *
+ * - CSV (csv_ascii), denoted by .csv, or optionally .txt
+ * - TSV (raw_ascii), denoted by .tsv, .csv, or .txt
+ * - ASCII (raw_ascii), denoted by .txt
+ *
+ * If the file extension is not one of those types, an error will be given.
+ * This is preferable to Armadillo's default behavior of loading an unknown
+ * filetype as raw_binary, which can have very confusing effects.
+ *
+ * If the parameter 'fatal' is set to true, a std::runtime_error exception will
+ * be thrown if the matrix does not load successfully.  The parameter
+ * 'transpose' controls whether or not the matrix is transposed after loading.
+ * In most cases, because data is generally stored in a row-major format and
+ * mlpack requires column-major matrices, this should be left at its default
+ * value of 'true'.
+ *
+ * @param filename Name of file to load.
+ * @param matrix Matrix to load contents of file into.
+ * @param info DatasetInfo object to populate with mappings and data types.
+ * @param fatal If an error should be reported as fatal (default false).
+ * @param transpose If true, transpose the matrix after loading.
+ * @return Boolean value indicating success or failure of load.
+ */
+template<typename eT>
+bool Load(const std::string& filename,
+          arma::Mat<eT>& matrix,
+          DatasetInfo& info,
+          const bool fatal = false,
+          const bool transpose = true);
 
 /**
  * Load a model from a file, guessing the filetype from the extension, or,
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 369f8bf..307a886 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -51,7 +51,7 @@ template<typename eT>
 bool Load(const std::string& filename,
           arma::Mat<eT>& matrix,
           const bool fatal,
-          bool transpose)
+          const bool transpose)
 {
   Timer::Start("loading_data");
 
@@ -263,6 +263,52 @@ bool Load(const std::string& filename,
   return success;
 }
 
+// Load with mappings.  Unfortunately we have to implement this ourselves.
+template<typename eT>
+bool Load(const std::string& filename,
+          arma::Mat<eT>& matrix,
+          DatasetInfo& info,
+          const bool fatal,
+          const bool transpose)
+{
+  // Get the extension and load as necessary.
+  Timer::Start("loading_data");
+
+  // Get the extension.
+  std::string extension = Extension(filename);
+
+  // Catch nonexistent files by opening the stream ourselves.
+  std::fstream stream;
+  stream.open(filename.c_str(), std::fstream::in);
+
+  if (!stream.is_open())
+  {
+    Timer::Stop("loading_data");
+    if (fatal)
+      Log::Fatal << "Cannot open file '" << filename << "'. " << std::endl;
+    else
+      Log::Warn << "Cannot open file '" << filename << "'; load failed."
+          << std::endl;
+
+    return false;
+  }
+
+  bool unknownType = false;
+  arma::file_type loadType;
+  std::string stringType;
+
+  if (extension == "csv" || extension == "tsv")
+  {
+    
+  }
+  else if (extension == "txt")
+  {
+
+  }
+
+  Timer::Stop("loading_data");
+}
+
 // Load a model from file.
 template<typename T>
 bool Load(const std::string& filename,



More information about the mlpack-git mailing list