[mlpack-git] master: Stub of new Load(). (431fcef)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:41:40 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 431fcefe5d86417f2fb088c12cfec643aa5505e1
Author: ryan <ryan at ratml.org>
Date: Wed Sep 9 11:21:34 2015 -0400
Stub of new Load().
>---------------------------------------------------------------
431fcefe5d86417f2fb088c12cfec643aa5505e1
src/mlpack/core/data/load.hpp | 42 ++++++++++++++++++++++++++++++---
src/mlpack/core/data/load_impl.hpp | 48 +++++++++++++++++++++++++++++++++++++-
2 files changed, 86 insertions(+), 4 deletions(-)
diff --git a/src/mlpack/core/data/load.hpp b/src/mlpack/core/data/load.hpp
index a0bf91d..950fd5a 100644
--- a/src/mlpack/core/data/load.hpp
+++ b/src/mlpack/core/data/load.hpp
@@ -20,8 +20,8 @@ namespace data /** Functions to load and save matrices and models. */ {
/**
* Loads a matrix from file, guessing the filetype from the extension. This
- * will transpose the matrix at load time. If the filetype cannot be
- * determined, an error will be given.
+ * will transpose the matrix at load time (unless the transpose parameter is set
+ * to false). If the filetype cannot be determined, an error will be given.
*
* The supported types of files are the same as found in Armadillo:
*
@@ -56,7 +56,43 @@ template<typename eT>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
const bool fatal = false,
- bool transpose = true);
+ const bool transpose = true);
+
+/**
+ * Loads a matrix from a file, guessing the filetype from the extension and
+ * mapping categorical features with a DatasetInfo object. This will transpose
+ * the matrix (unless the transpose parameter is set to false). This particular
+ * overload of Load() can only load text-based formats, such as those given
+ * below:
+ *
+ * - CSV (csv_ascii), denoted by .csv, or optionally .txt
+ * - TSV (raw_ascii), denoted by .tsv, .csv, or .txt
+ * - ASCII (raw_ascii), denoted by .txt
+ *
+ * If the file extension is not one of those types, an error will be given.
+ * This is preferable to Armadillo's default behavior of loading an unknown
+ * filetype as raw_binary, which can have very confusing effects.
+ *
+ * If the parameter 'fatal' is set to true, a std::runtime_error exception will
+ * be thrown if the matrix does not load successfully. The parameter
+ * 'transpose' controls whether or not the matrix is transposed after loading.
+ * In most cases, because data is generally stored in a row-major format and
+ * mlpack requires column-major matrices, this should be left at its default
+ * value of 'true'.
+ *
+ * @param filename Name of file to load.
+ * @param matrix Matrix to load contents of file into.
+ * @param info DatasetInfo object to populate with mappings and data types.
+ * @param fatal If an error should be reported as fatal (default false).
+ * @param transpose If true, transpose the matrix after loading.
+ * @return Boolean value indicating success or failure of load.
+ */
+template<typename eT>
+bool Load(const std::string& filename,
+ arma::Mat<eT>& matrix,
+ DatasetInfo& info,
+ const bool fatal = false,
+ const bool transpose = true);
/**
* Load a model from a file, guessing the filetype from the extension, or,
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 369f8bf..307a886 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -51,7 +51,7 @@ template<typename eT>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
const bool fatal,
- bool transpose)
+ const bool transpose)
{
Timer::Start("loading_data");
@@ -263,6 +263,52 @@ bool Load(const std::string& filename,
return success;
}
+// Load with mappings. Unfortunately we have to implement this ourselves.
+template<typename eT>
+bool Load(const std::string& filename,
+ arma::Mat<eT>& matrix,
+ DatasetInfo& info,
+ const bool fatal,
+ const bool transpose)
+{
+ // Get the extension and load as necessary.
+ Timer::Start("loading_data");
+
+ // Get the extension.
+ std::string extension = Extension(filename);
+
+ // Catch nonexistent files by opening the stream ourselves.
+ std::fstream stream;
+ stream.open(filename.c_str(), std::fstream::in);
+
+ if (!stream.is_open())
+ {
+ Timer::Stop("loading_data");
+ if (fatal)
+ Log::Fatal << "Cannot open file '" << filename << "'. " << std::endl;
+ else
+ Log::Warn << "Cannot open file '" << filename << "'; load failed."
+ << std::endl;
+
+ return false;
+ }
+
+ bool unknownType = false;
+ arma::file_type loadType;
+ std::string stringType;
+
+ if (extension == "csv" || extension == "tsv")
+ {
+
+ }
+ else if (extension == "txt")
+ {
+
+ }
+
+ Timer::Stop("loading_data");
+}
+
// Load a model from file.
template<typename T>
bool Load(const std::string& filename,
More information about the mlpack-git
mailing list