[mlpack-svn] r15360 - mlpack/trunk/src/mlpack/core/data

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jun 28 15:52:41 EDT 2013


Author: rcurtin
Date: Fri Jun 28 15:52:41 2013
New Revision: 15360

Log:
Add utility functions to normalize labels.


Added:
   mlpack/trunk/src/mlpack/core/data/normalize_labels.hpp
   mlpack/trunk/src/mlpack/core/data/normalize_labels_impl.hpp

Added: mlpack/trunk/src/mlpack/core/data/normalize_labels.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/data/normalize_labels.hpp	Fri Jun 28 15:52:41 2013
@@ -0,0 +1,51 @@
+/**
+ * @file normalize_labels.hpp
+ * @author Ryan Curtin
+ *
+ * Often labels are not given as {0, 1, 2, ...} but instead {1, 2, ...} or even
+ * {-1, 1} or otherwise.  The purpose of this function is to normalize labels to
+ * {0, 1, 2, ...} and provide a mapping back to those labels.
+ */
+#ifndef __MLPACK_CORE_DATA_NORMALIZE_LABELS_HPP
+#define __MLPACK_CORE_DATA_NORMALIZE_LABELS_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace data {
+
+/**
+ * Given a set of labels of a particular datatype, convert them to unsigned
+ * labels in the range [0, n) where n is the number of different labels.  Also,
+ * a reverse mapping from the new label to the old value is stored in the
+ * 'mapping' vector.
+ *
+ * @param labelsIn Input labels of arbitrary datatype.
+ * @param labels Vector that unsigned labels will be stored in.
+ * @param mapping Reverse mapping to convert new labels back to old labels.
+ */
+template<typename eT>
+void NormalizeLabels(const arma::Col<eT>& labelsIn,
+                     arma::uvec& labels,
+                     arma::Col<eT>& mapping);
+
+/**
+ * Given a set of labels that have been mapped to the range [0, n), map them
+ * back to the original labels given by the 'mapping' vector.
+ *
+ * @param labels Set of normalized labels to convert.
+ * @param mapping Mapping to use to convert labels.
+ * @param labelsOut Vector to store new labels in.
+ */
+template<typename eT>
+void RevertLabels(const arma::uvec& labels,
+                  const arma::Col<eT>& mapping,
+                  arma::Col<eT>& labelsOut);
+
+}; // namespace data
+}; // namespace mlpack
+
+// Include implementation.
+#include "normalize_labels_impl.hpp"
+
+#endif

Added: mlpack/trunk/src/mlpack/core/data/normalize_labels_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/data/normalize_labels_impl.hpp	Fri Jun 28 15:52:41 2013
@@ -0,0 +1,87 @@
+/**
+ * @file normalize_labels_impl.hpp
+ * @author Ryan Curtin
+ *
+ * Implementation of label normalization functions; these are useful for mapping
+ * labels to the range [0, n).
+ */
+#ifndef __MLPACK_CORE_DATA_NORMALIZE_LABELS_IMPL_HPP
+#define __MLPACK_CORE_DATA_NORMALIZE_LABELS_IMPL_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace data {
+
+/**
+ * Given a set of labels of a particular datatype, convert them to unsigned
+ * labels in the range [0, n) where n is the number of different labels.  Also,
+ * a reverse mapping from the new label to the old value is stored in the
+ * 'mapping' vector.
+ *
+ * @param labelsIn Input labels of arbitrary datatype.
+ * @param labels Vector that unsigned labels will be stored in.
+ * @param mapping Reverse mapping to convert new labels back to old labels.
+ */
+template<typename eT>
+void NormalizeLabels(const arma::Col<eT>& labelsIn,
+                     arma::uvec& labels,
+                     arma::Col<eT>& mapping)
+{
+  // Loop over the input labels, and develop the mapping.  We'll first naively
+  // resize the mapping to the maximum possible size, and then when we fill it,
+  // we'll resize it back down to its actual size.
+  mapping.set_size(labelsIn.n_elem);
+  labels.set_size(labelsIn.n_elem);
+  size_t curLabel = 0;
+  for (size_t i = 0; i < labelsIn.n_elem; ++i)
+  {
+    bool found = false;
+    for (size_t j = 0; j < curLabel; ++j)
+    {
+      // Is the label already in the list of labels we have seen?
+      if (labelsIn[i] == mapping[j])
+      {
+        labels[i] = j;
+        found = true;
+        break;
+      }
+    }
+
+    // Do we need to add this new label?
+    if (!found)
+    {
+      mapping[curLabel] = labelsIn[i];
+      labels[i] = curLabel;
+      ++curLabel;
+    }
+  }
+
+  // Resize mapping back down to necessary size.
+  mapping.resize(curLabel);
+}
+
+/**
+ * Given a set of labels that have been mapped to the range [0, n), map them
+ * back to the original labels given by the 'mapping' vector.
+ *
+ * @param labels Set of normalized labels to convert.
+ * @param mapping Mapping to use to convert labels.
+ * @param labelsOut Vector to store new labels in.
+ */
+template<typename eT>
+void RevertLabels(const arma::uvec& labels,
+                  const arma::Col<eT>& mapping,
+                  arma::Col<eT>& labelsOut)
+{
+  // We already have the mapping, so we just need to loop over each element.
+  labelsOut.set_size(labels.n_elem);
+
+  for (size_t i = 0; i < labels.n_elem; ++i)
+    labelsOut[i] = mapping[labels[i]];
+}
+
+}; // namespace data
+}; // namespace mlpack
+
+#endif



More information about the mlpack-svn mailing list