[mlpack-git] master: Merge remote-tracking branch 'upstream/master' into r-proj-tree (dc0b456)

gitdub at mlpack.org gitdub at mlpack.org
Sun Aug 7 13:35:16 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/a7794bde8082c691553152393e1e230098f5e920...87776e52cf9ead63fa458118a0cfd2fe46b23466

>---------------------------------------------------------------

commit dc0b456bb7bc41485cb6c13f2256e8c698be269b
Merge: bc8994a b8de1fa
Author: Mikhail Lozhnikov <lozhnikovma at gmail.com>
Date:   Sun Aug 7 20:35:16 2016 +0300

    Merge remote-tracking branch 'upstream/master' into r-proj-tree


>---------------------------------------------------------------

dc0b456bb7bc41485cb6c13f2256e8c698be269b
 CMake/mlpack_coverage.in                           |  125 ++
 CMakeLists.txt                                     |   62 +-
 HISTORY.md                                         |    6 +-
 README.md                                          |    2 +-
 doc/guide/build.hpp                                |    4 +-
 doc/tutorials/kmeans/kmeans.txt                    |   18 +-
 src/mlpack/CMakeLists.txt                          |    3 +
 src/mlpack/core.hpp                                |    1 +
 src/mlpack/core/boost_backport/README.md           |   14 +-
 src/mlpack/core/boost_backport/bernoulli.hpp       |  176 +++
 src/mlpack/core/boost_backport/boost_backport.hpp  |   42 +
 .../boost_backport/detail/bernoulli_details.hpp    |  660 ++++++++
 .../core/boost_backport/detail/polygamma.hpp       |  558 +++++++
 .../boost_backport/detail/unchecked_bernoulli.hpp  |  700 +++++++++
 src/mlpack/core/boost_backport/math_fwd.hpp        | 1586 ++++++++++++++++++++
 src/mlpack/core/boost_backport/policy.hpp          | 1042 +++++++++++++
 src/mlpack/core/boost_backport/polygamma.hpp       |   94 ++
 src/mlpack/core/boost_backport/trigamma.hpp        |  469 ++++++
 src/mlpack/core/data/CMakeLists.txt                |    5 +-
 src/mlpack/core/data/dataset_info.hpp              |  114 --
 src/mlpack/core/data/dataset_info_impl.hpp         |  100 --
 src/mlpack/core/data/dataset_mapper.hpp            |  164 ++
 src/mlpack/core/data/dataset_mapper_impl.hpp       |  151 ++
 .../data/imputation_methods}/CMakeLists.txt        |    6 +-
 .../data/imputation_methods/custom_imputation.hpp  |   77 +
 .../data/imputation_methods/listwise_deletion.hpp  |   69 +
 .../data/imputation_methods/mean_imputation.hpp    |   99 ++
 .../data/imputation_methods/median_imputation.hpp  |   88 ++
 src/mlpack/core/data/imputer.hpp                   |   90 ++
 src/mlpack/core/data/load.hpp                      |   18 +-
 src/mlpack/core/data/load_arff.hpp                 |    4 +-
 src/mlpack/core/data/load_arff_impl.hpp            |    6 +-
 src/mlpack/core/data/load_impl.hpp                 |   51 +-
 .../data/map_policies}/CMakeLists.txt              |    4 +-
 src/mlpack/core/data/map_policies/datatype.hpp     |   28 +
 .../core/data/map_policies/increment_policy.hpp    |  131 ++
 .../core/data/map_policies/missing_policy.hpp      |  151 ++
 src/mlpack/core/dists/CMakeLists.txt               |    2 +
 src/mlpack/core/dists/gamma_distribution.cpp       |  119 ++
 src/mlpack/core/dists/gamma_distribution.hpp       |  135 ++
 .../tree/binary_space_tree/midpoint_split_impl.hpp |    7 +-
 src/mlpack/core/tree/binary_space_tree/traits.hpp  |   39 +
 .../core/tree/cover_tree/cover_tree_impl.hpp       |   32 +-
 src/mlpack/core/tree/rectangle_tree.hpp            |    2 +-
 .../tree/rectangle_tree/r_plus_tree_split_impl.hpp |   21 +-
 src/mlpack/core/tree/rectangle_tree/traits.hpp     |   48 +
 src/mlpack/core/util/cli.cpp                       |  126 +-
 src/mlpack/core/util/cli.hpp                       |  351 +----
 src/mlpack/core/util/cli_impl.hpp                  |   21 +-
 src/mlpack/core/util/option.hpp                    |   20 +-
 src/mlpack/core/util/option_impl.hpp               |   11 +-
 src/mlpack/core/util/param.hpp                     |  426 ++++++
 src/mlpack/methods/CMakeLists.txt                  |    1 +
 src/mlpack/methods/adaboost/adaboost_main.cpp      |   37 +-
 src/mlpack/methods/ann/cnn.hpp                     |    3 +
 src/mlpack/methods/ann/cnn_impl.hpp                |   13 +-
 src/mlpack/methods/ann/layer/pooling_layer.hpp     |   37 +-
 src/mlpack/methods/cf/cf.cpp                       |   73 +-
 src/mlpack/methods/cf/cf.hpp                       |   24 +-
 src/mlpack/methods/cf/cf_main.cpp                  |   44 +-
 .../methods/decision_stump/decision_stump_main.cpp |   23 +-
 src/mlpack/methods/det/det_main.cpp                |   37 +-
 src/mlpack/methods/emst/emst_main.cpp              |   20 +-
 src/mlpack/methods/fastmks/fastmks.hpp             |   21 +-
 src/mlpack/methods/fastmks/fastmks_impl.hpp        |  107 +-
 src/mlpack/methods/fastmks/fastmks_main.cpp        |   33 +-
 src/mlpack/methods/fastmks/fastmks_rules.hpp       |   69 +-
 src/mlpack/methods/fastmks/fastmks_rules_impl.hpp  |  101 +-
 src/mlpack/methods/gmm/gmm_generate_main.cpp       |   15 +-
 src/mlpack/methods/gmm/gmm_probability_main.cpp    |   21 +-
 src/mlpack/methods/gmm/gmm_train_main.cpp          |   26 +-
 src/mlpack/methods/hmm/hmm_generate_main.cpp       |   23 +-
 src/mlpack/methods/hmm/hmm_loglik_main.cpp         |    8 +-
 src/mlpack/methods/hmm/hmm_train_main.cpp          |   21 +-
 src/mlpack/methods/hmm/hmm_viterbi_main.cpp        |   10 +-
 .../hoeffding_trees/hoeffding_tree_main.cpp        |   36 +-
 src/mlpack/methods/kernel_pca/kernel_pca_main.cpp  |   31 +-
 src/mlpack/methods/kmeans/kmeans_main.cpp          |   37 +-
 src/mlpack/methods/lars/lars_main.cpp              |   61 +-
 .../linear_regression/linear_regression_main.cpp   |   63 +-
 .../local_coordinate_coding_main.cpp               |   32 +-
 .../logistic_regression_main.cpp                   |   48 +-
 src/mlpack/methods/lsh/lsh_main.cpp                |   45 +-
 src/mlpack/methods/lsh/lsh_search.hpp              |   42 +-
 src/mlpack/methods/lsh/lsh_search_impl.hpp         |  100 +-
 src/mlpack/methods/mean_shift/mean_shift_main.cpp  |   80 +-
 src/mlpack/methods/mvu/mvu_main.cpp                |   10 +-
 src/mlpack/methods/naive_bayes/nbc_main.cpp        |   17 +-
 src/mlpack/methods/nca/nca_main.cpp                |   49 +-
 src/mlpack/methods/neighbor_search/CMakeLists.txt  |    2 -
 src/mlpack/methods/neighbor_search/kfn_main.cpp    |   40 +-
 src/mlpack/methods/neighbor_search/knn_main.cpp    |   27 +-
 .../neighbor_search/neighbor_search_impl.hpp       |   35 +-
 .../neighbor_search/neighbor_search_rules.hpp      |   66 +-
 .../neighbor_search/neighbor_search_rules_impl.hpp |   83 +-
 .../sort_policies/furthest_neighbor_sort.cpp       |   27 -
 .../sort_policies/furthest_neighbor_sort.hpp       |   18 -
 .../sort_policies/nearest_neighbor_sort.cpp        |   27 -
 .../sort_policies/nearest_neighbor_sort.hpp        |   18 -
 src/mlpack/methods/nmf/nmf_main.cpp                |   36 +-
 src/mlpack/methods/pca/pca_main.cpp                |   27 +-
 src/mlpack/methods/perceptron/perceptron_main.cpp  |   34 +-
 src/mlpack/methods/preprocess/CMakeLists.txt       |    2 +-
 .../preprocess/preprocess_binarize_main.cpp        |   18 +-
 .../methods/preprocess/preprocess_imputer_main.cpp |  174 +++
 .../methods/preprocess/preprocess_split_main.cpp   |   64 +-
 src/mlpack/methods/radical/radical_main.cpp        |   65 +-
 .../methods/range_search/range_search_impl.hpp     |    2 +-
 .../methods/range_search/range_search_main.cpp     |   25 +-
 .../methods/range_search/range_search_rules.hpp    |    8 +-
 src/mlpack/methods/rann/krann_main.cpp             |   31 +-
 src/mlpack/methods/rann/ra_search_impl.hpp         |   42 +-
 src/mlpack/methods/rann/ra_search_rules.hpp        |   76 +-
 src/mlpack/methods/rann/ra_search_rules_impl.hpp   |   92 +-
 src/mlpack/methods/rmva/rmva_main.cpp              |   37 +-
 .../softmax_regression/softmax_regression_main.cpp |   35 +-
 .../methods/sparse_coding/sparse_coding_main.cpp   |   56 +-
 src/mlpack/prereqs.hpp                             |    8 +-
 src/mlpack/tests/CMakeLists.txt                    |    1 +
 src/mlpack/tests/cli_test.cpp                      |    2 +-
 src/mlpack/tests/distribution_test.cpp             |  168 ++-
 src/mlpack/tests/imputation_test.cpp               |  266 ++++
 src/mlpack/tests/krann_search_test.cpp             |    2 +-
 src/mlpack/tests/load_save_test.cpp                |    2 +
 src/mlpack/tests/pca_test.cpp                      |   26 +-
 src/mlpack/tests/rectangle_tree_test.cpp           |    8 +
 src/mlpack/tests/sort_policy_test.cpp              |   72 -
 src/mlpack/tests/union_find_test.cpp               |   40 +-
 128 files changed, 9595 insertions(+), 1863 deletions(-)

diff --cc src/mlpack/core/tree/binary_space_tree/midpoint_split_impl.hpp
index 6f2c07f,28dc707..7cbf6fd
--- a/src/mlpack/core/tree/binary_space_tree/midpoint_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/midpoint_split_impl.hpp
@@@ -36,7 -37,10 +36,10 @@@ bool MidpointSplit<BoundType, MatType>:
        if (width > maxWidth)
        {
          maxWidth = width;
 -        splitDimension = d;
 +        splitInfo.splitDimension = d;
+ 
+         // Split in the midpoint of that dimension.
 -        splitVal = bound[d].Mid();
++        splitInfo.splitVal = bound[d].Mid();
        }
      }
    }
@@@ -64,18 -68,102 +67,20 @@@
        if (width > maxWidth)
        {
          maxWidth = width;
 -        splitDimension = d;
 -
 -        // Split in the midpoint of that dimension.
 -        splitVal = ranges[d].Mid();
 -      }
 -    }
 -
 -    delete[] ranges;
 -  }
 -
 -  if (maxWidth <= 0) // All these points are the same.  We can't split.
 -    return false;
 -
 -  // Perform the actual splitting.  This will order the dataset such that points
 -  // with value in dimension splitDimension less than or equal to splitVal are
 -  // on the left of splitCol, and points with value in dimension splitDimension
 -  // greater than splitVal are on the right side of splitCol.
 -  splitCol = PerformSplit(data, begin, count, splitDimension, splitVal);
 -
 -  return true;
 -}
 -
 -template<typename BoundType, typename MatType>
 -bool MidpointSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
 -                                                  MatType& data,
 -                                                  const size_t begin,
 -                                                  const size_t count,
 -                                                  size_t& splitCol,
 -                                                  std::vector<size_t>& oldFromNew)
 -{
 -  size_t splitDimension = data.n_rows; // Indicate invalid.
 -  double maxWidth = -1;
 -  double splitVal = DBL_MAX;
 -
 -  // Find the split dimension.  If the bound is tight, we only need to consult
 -  // the bound's width.
 -  if (bound::BoundTraits<BoundType>::HasTightBounds)
 -  {
 -    for (size_t d = 0; d < data.n_rows; d++)
 -    {
 -      const double width = bound[d].Width();
 -
 -      if (width > maxWidth)
 -      {
 -        maxWidth = width;
 -        splitDimension = d;
 -
 -        // Split in the midpoint of that dimension.
 -        splitVal = bound[d].Mid();
 -      }
 -    }
 -  }
 -  else
 -  {
 -    // We must individually calculate bounding boxes.
 -    math::Range* ranges = new math::Range[data.n_rows];
 -    for (size_t i = begin; i < begin + count; ++i)
 -    {
 -      // Expand each dimension as necessary.
 -      for (size_t d = 0; d < data.n_rows; ++d)
 -      {
 -        const double val = data(d, i);
 -        if (val < ranges[d].Lo())
 -          ranges[d].Lo() = val;
 -        if (val > ranges[d].Hi())
 -          ranges[d].Hi() = val;
 -      }
 -    }
 -
 -    // Now, which is the widest?
 -    for (size_t d = 0; d < data.n_rows; d++)
 -    {
 -      const double width = ranges[d].Width();
 -
 -      if (width > maxWidth)
 -      {
 -        maxWidth = width;
 -        splitDimension = d;
 -
 +        splitInfo.splitDimension = d;
+         // Split in the midpoint of that dimension.
 -        splitVal = ranges[d].Mid();
++        splitInfo.splitVal = ranges[d].Mid();
        }
      }
  
      delete[] ranges;
    }
  
-   if (maxWidth == 0) // All these points are the same.  We can't split.
+   if (maxWidth <= 0) // All these points are the same.  We can't split.
      return false;
  
 -  // Perform the actual splitting.  This will order the dataset such that points
 -  // with value in dimension splitDimension less than or equal to splitVal are
 -  // on the left of splitCol, and points with value in dimension splitDimension
 -  // greater than splitVal are on the right side of splitCol.
 -  splitCol = PerformSplit(data, begin, count, splitDimension, splitVal,
 -      oldFromNew);
 +  // Split in the midpoint of that dimension.
 +  splitInfo.splitVal = bound[splitInfo.splitDimension].Mid();
  
    return true;
  }
diff --cc src/mlpack/core/tree/binary_space_tree/traits.hpp
index ade4356,9a81673..15cadc1
--- a/src/mlpack/core/tree/binary_space_tree/traits.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/traits.hpp
@@@ -41,74 -42,6 +42,89 @@@ class TreeTraits<BinarySpaceTree<Metric
    static const bool FirstPointIsCentroid = false;
  
    /**
++   * The tree has not got duplicated points.
++   */
++  static const bool HasDuplicatedPoints = false;
++
++  /**
 +   * Points are not contained at multiple levels of the binary space tree.
 +   */
 +  static const bool HasSelfChildren = false;
 +
 +  /**
 +   * Points are rearranged during building of the tree.
 +   */
 +  static const bool RearrangesDataset = true;
 +
 +  /**
 +   * This is always a binary tree.
 +   */
 +  static const bool BinaryTree = true;
 +};
 +
 +template<typename MetricType,
 +         typename StatisticType,
 +         typename MatType,
 +         template<typename BoundMetricType, typename...> class BoundType>
 +class TreeTraits<BinarySpaceTree<MetricType, StatisticType, MatType, BoundType,
 +                                 RPTreeMaxSplit>>
 +{
 + public:
 +  /**
 +   * Children of a random projection tree node may overlap.
 +   */
 +  static const bool HasOverlappingChildren = true;
 +
 +  /**
++   * The tree has not got duplicated points.
++   */
++  static const bool HasDuplicatedPoints = false;
++
++  /**
 +   * There is no guarantee that the first point in a node is its centroid.
 +   */
 +  static const bool FirstPointIsCentroid = false;
 +
 +  /**
 +   * Points are not contained at multiple levels of the binary space tree.
 +   */
 +  static const bool HasSelfChildren = false;
 +
 +  /**
 +   * Points are rearranged during building of the tree.
 +   */
 +  static const bool RearrangesDataset = true;
 +
 +  /**
 +   * This is always a binary tree.
 +   */
 +  static const bool BinaryTree = true;
 +};
 +
 +template<typename MetricType,
 +         typename StatisticType,
 +         typename MatType,
 +         template<typename BoundMetricType, typename...> class BoundType>
 +class TreeTraits<BinarySpaceTree<MetricType, StatisticType, MatType, BoundType,
 +                                 RPTreeMeanSplit>>
 +{
 + public:
 +  /**
 +   * Children of a random projection tree node may overlap.
 +   */
 +  static const bool HasOverlappingChildren = true;
 +
 +  /**
++   * The tree has not got duplicated points.
++   */
++  static const bool HasDuplicatedPoints = false;
++
++  /**
 +   * There is no guarantee that the first point in a node is its centroid.
 +   */
 +  static const bool FirstPointIsCentroid = false;
 +
 +  /**
     * Points are not contained at multiple levels of the binary space tree.
     */
    static const bool HasSelfChildren = false;
diff --cc src/mlpack/methods/neighbor_search/kfn_main.cpp
index 628d26b,fac28b5..16e58a4
--- a/src/mlpack/methods/neighbor_search/kfn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/kfn_main.cpp
@@@ -61,15 -61,14 +61,15 @@@ PARAM_INT_IN("k", "Number of furthest n
  
  // The user may specify the type of tree to use, and a few pararmeters for tree
  // building.
- PARAM_STRING("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
 -PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'cover', 'r', "
 -    "'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus'.", "t", "kd");
 -PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, R "
 -    "trees, R* trees, X trees, Hilbert R trees, R+ trees and R++ trees).", "l",
 -    20);
++PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
 +    "'max-split-rp-tree', 'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', "
 +    "'r-plus', 'r-plus-plus'.", "t", "kd");
- PARAM_INT("leaf_size", "Leaf size for tree building (used for kd-trees, "
++PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, "
 +    "random projection trees, R trees, R* trees, X trees, Hilbert R trees, "
 +    "R+ trees and R++ trees).", "l", 20);
  PARAM_FLAG("random_basis", "Before tree-building, project the data onto a "
      "random orthogonal basis.", "R");
- PARAM_INT("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
+ PARAM_INT_IN("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
  
  // Search settings.
  PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
diff --cc src/mlpack/methods/neighbor_search/knn_main.cpp
index cfecb49,14e07db..2f3a713
--- a/src/mlpack/methods/neighbor_search/knn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/knn_main.cpp
@@@ -62,15 -63,14 +63,15 @@@ PARAM_INT_IN("k", "Number of nearest ne
  
  // The user may specify the type of tree to use, and a few parameters for tree
  // building.
- PARAM_STRING("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
 -PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'cover', 'r', "
 -    "'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus'.", "t", "kd");
 -PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, R "
 -    "trees, R* trees, X trees, Hilbert R trees, R+ trees and R++ trees).", "l",
 -    20);
++PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
 +    "'max-split-rp-tree', 'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', "
 +    "'r-plus', 'r-plus-plus'.", "t", "kd");
- PARAM_INT("leaf_size", "Leaf size for tree building (used for kd-trees, "
++PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, "
 +    "random projection trees, R trees, R* trees, X trees, Hilbert R trees, "
 +    "R+ trees and R++ trees).", "l", 20);
  PARAM_FLAG("random_basis", "Before tree-building, project the data onto a "
      "random orthogonal basis.", "R");
- PARAM_INT("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
+ PARAM_INT_IN("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
  
  // Search settings.
  PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
diff --cc src/mlpack/methods/range_search/range_search_main.cpp
index b837b7c,3b8b088..d014bc4
--- a/src/mlpack/methods/range_search/range_search_main.cpp
+++ b/src/mlpack/methods/range_search/range_search_main.cpp
@@@ -69,12 -70,10 +70,12 @@@ PARAM_DOUBLE_IN("min", "Lower bound in 
  
  // The user may specify the type of tree to use, and a few parameters for tree
  // building.
- PARAM_STRING("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
 -PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'cover', 'r', "
 -    "'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus'.", "t", "kd");
 -PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, R "
 -    "trees, R* trees, X trees, Hilbert R trees, R+ trees and R++ trees).", "l",
++PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
 +    "'max-split-rp-tree', 'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', "
 +    "'r-plus', 'r-plus-plus'.", "t", "kd");
- PARAM_INT("leaf_size", "Leaf size for tree building (used for kd-trees, "
++PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, "
 +    "random projection trees, R trees, R* trees, X trees, Hilbert R trees, "
 +    "R+ trees and R++ trees).", "l",
      20);
  PARAM_FLAG("random_basis", "Before tree-building, project the data onto a "
      "random orthogonal basis.", "R");




More information about the mlpack-git mailing list