[mlpack-git] master: Merge remote-tracking branch 'upstream/master' into r-proj-tree (dc0b456)
gitdub at mlpack.org
gitdub at mlpack.org
Sun Aug 7 13:35:16 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/a7794bde8082c691553152393e1e230098f5e920...87776e52cf9ead63fa458118a0cfd2fe46b23466
>---------------------------------------------------------------
commit dc0b456bb7bc41485cb6c13f2256e8c698be269b
Merge: bc8994a b8de1fa
Author: Mikhail Lozhnikov <lozhnikovma at gmail.com>
Date: Sun Aug 7 20:35:16 2016 +0300
Merge remote-tracking branch 'upstream/master' into r-proj-tree
>---------------------------------------------------------------
dc0b456bb7bc41485cb6c13f2256e8c698be269b
CMake/mlpack_coverage.in | 125 ++
CMakeLists.txt | 62 +-
HISTORY.md | 6 +-
README.md | 2 +-
doc/guide/build.hpp | 4 +-
doc/tutorials/kmeans/kmeans.txt | 18 +-
src/mlpack/CMakeLists.txt | 3 +
src/mlpack/core.hpp | 1 +
src/mlpack/core/boost_backport/README.md | 14 +-
src/mlpack/core/boost_backport/bernoulli.hpp | 176 +++
src/mlpack/core/boost_backport/boost_backport.hpp | 42 +
.../boost_backport/detail/bernoulli_details.hpp | 660 ++++++++
.../core/boost_backport/detail/polygamma.hpp | 558 +++++++
.../boost_backport/detail/unchecked_bernoulli.hpp | 700 +++++++++
src/mlpack/core/boost_backport/math_fwd.hpp | 1586 ++++++++++++++++++++
src/mlpack/core/boost_backport/policy.hpp | 1042 +++++++++++++
src/mlpack/core/boost_backport/polygamma.hpp | 94 ++
src/mlpack/core/boost_backport/trigamma.hpp | 469 ++++++
src/mlpack/core/data/CMakeLists.txt | 5 +-
src/mlpack/core/data/dataset_info.hpp | 114 --
src/mlpack/core/data/dataset_info_impl.hpp | 100 --
src/mlpack/core/data/dataset_mapper.hpp | 164 ++
src/mlpack/core/data/dataset_mapper_impl.hpp | 151 ++
.../data/imputation_methods}/CMakeLists.txt | 6 +-
.../data/imputation_methods/custom_imputation.hpp | 77 +
.../data/imputation_methods/listwise_deletion.hpp | 69 +
.../data/imputation_methods/mean_imputation.hpp | 99 ++
.../data/imputation_methods/median_imputation.hpp | 88 ++
src/mlpack/core/data/imputer.hpp | 90 ++
src/mlpack/core/data/load.hpp | 18 +-
src/mlpack/core/data/load_arff.hpp | 4 +-
src/mlpack/core/data/load_arff_impl.hpp | 6 +-
src/mlpack/core/data/load_impl.hpp | 51 +-
.../data/map_policies}/CMakeLists.txt | 4 +-
src/mlpack/core/data/map_policies/datatype.hpp | 28 +
.../core/data/map_policies/increment_policy.hpp | 131 ++
.../core/data/map_policies/missing_policy.hpp | 151 ++
src/mlpack/core/dists/CMakeLists.txt | 2 +
src/mlpack/core/dists/gamma_distribution.cpp | 119 ++
src/mlpack/core/dists/gamma_distribution.hpp | 135 ++
.../tree/binary_space_tree/midpoint_split_impl.hpp | 7 +-
src/mlpack/core/tree/binary_space_tree/traits.hpp | 39 +
.../core/tree/cover_tree/cover_tree_impl.hpp | 32 +-
src/mlpack/core/tree/rectangle_tree.hpp | 2 +-
.../tree/rectangle_tree/r_plus_tree_split_impl.hpp | 21 +-
src/mlpack/core/tree/rectangle_tree/traits.hpp | 48 +
src/mlpack/core/util/cli.cpp | 126 +-
src/mlpack/core/util/cli.hpp | 351 +----
src/mlpack/core/util/cli_impl.hpp | 21 +-
src/mlpack/core/util/option.hpp | 20 +-
src/mlpack/core/util/option_impl.hpp | 11 +-
src/mlpack/core/util/param.hpp | 426 ++++++
src/mlpack/methods/CMakeLists.txt | 1 +
src/mlpack/methods/adaboost/adaboost_main.cpp | 37 +-
src/mlpack/methods/ann/cnn.hpp | 3 +
src/mlpack/methods/ann/cnn_impl.hpp | 13 +-
src/mlpack/methods/ann/layer/pooling_layer.hpp | 37 +-
src/mlpack/methods/cf/cf.cpp | 73 +-
src/mlpack/methods/cf/cf.hpp | 24 +-
src/mlpack/methods/cf/cf_main.cpp | 44 +-
.../methods/decision_stump/decision_stump_main.cpp | 23 +-
src/mlpack/methods/det/det_main.cpp | 37 +-
src/mlpack/methods/emst/emst_main.cpp | 20 +-
src/mlpack/methods/fastmks/fastmks.hpp | 21 +-
src/mlpack/methods/fastmks/fastmks_impl.hpp | 107 +-
src/mlpack/methods/fastmks/fastmks_main.cpp | 33 +-
src/mlpack/methods/fastmks/fastmks_rules.hpp | 69 +-
src/mlpack/methods/fastmks/fastmks_rules_impl.hpp | 101 +-
src/mlpack/methods/gmm/gmm_generate_main.cpp | 15 +-
src/mlpack/methods/gmm/gmm_probability_main.cpp | 21 +-
src/mlpack/methods/gmm/gmm_train_main.cpp | 26 +-
src/mlpack/methods/hmm/hmm_generate_main.cpp | 23 +-
src/mlpack/methods/hmm/hmm_loglik_main.cpp | 8 +-
src/mlpack/methods/hmm/hmm_train_main.cpp | 21 +-
src/mlpack/methods/hmm/hmm_viterbi_main.cpp | 10 +-
.../hoeffding_trees/hoeffding_tree_main.cpp | 36 +-
src/mlpack/methods/kernel_pca/kernel_pca_main.cpp | 31 +-
src/mlpack/methods/kmeans/kmeans_main.cpp | 37 +-
src/mlpack/methods/lars/lars_main.cpp | 61 +-
.../linear_regression/linear_regression_main.cpp | 63 +-
.../local_coordinate_coding_main.cpp | 32 +-
.../logistic_regression_main.cpp | 48 +-
src/mlpack/methods/lsh/lsh_main.cpp | 45 +-
src/mlpack/methods/lsh/lsh_search.hpp | 42 +-
src/mlpack/methods/lsh/lsh_search_impl.hpp | 100 +-
src/mlpack/methods/mean_shift/mean_shift_main.cpp | 80 +-
src/mlpack/methods/mvu/mvu_main.cpp | 10 +-
src/mlpack/methods/naive_bayes/nbc_main.cpp | 17 +-
src/mlpack/methods/nca/nca_main.cpp | 49 +-
src/mlpack/methods/neighbor_search/CMakeLists.txt | 2 -
src/mlpack/methods/neighbor_search/kfn_main.cpp | 40 +-
src/mlpack/methods/neighbor_search/knn_main.cpp | 27 +-
.../neighbor_search/neighbor_search_impl.hpp | 35 +-
.../neighbor_search/neighbor_search_rules.hpp | 66 +-
.../neighbor_search/neighbor_search_rules_impl.hpp | 83 +-
.../sort_policies/furthest_neighbor_sort.cpp | 27 -
.../sort_policies/furthest_neighbor_sort.hpp | 18 -
.../sort_policies/nearest_neighbor_sort.cpp | 27 -
.../sort_policies/nearest_neighbor_sort.hpp | 18 -
src/mlpack/methods/nmf/nmf_main.cpp | 36 +-
src/mlpack/methods/pca/pca_main.cpp | 27 +-
src/mlpack/methods/perceptron/perceptron_main.cpp | 34 +-
src/mlpack/methods/preprocess/CMakeLists.txt | 2 +-
.../preprocess/preprocess_binarize_main.cpp | 18 +-
.../methods/preprocess/preprocess_imputer_main.cpp | 174 +++
.../methods/preprocess/preprocess_split_main.cpp | 64 +-
src/mlpack/methods/radical/radical_main.cpp | 65 +-
.../methods/range_search/range_search_impl.hpp | 2 +-
.../methods/range_search/range_search_main.cpp | 25 +-
.../methods/range_search/range_search_rules.hpp | 8 +-
src/mlpack/methods/rann/krann_main.cpp | 31 +-
src/mlpack/methods/rann/ra_search_impl.hpp | 42 +-
src/mlpack/methods/rann/ra_search_rules.hpp | 76 +-
src/mlpack/methods/rann/ra_search_rules_impl.hpp | 92 +-
src/mlpack/methods/rmva/rmva_main.cpp | 37 +-
.../softmax_regression/softmax_regression_main.cpp | 35 +-
.../methods/sparse_coding/sparse_coding_main.cpp | 56 +-
src/mlpack/prereqs.hpp | 8 +-
src/mlpack/tests/CMakeLists.txt | 1 +
src/mlpack/tests/cli_test.cpp | 2 +-
src/mlpack/tests/distribution_test.cpp | 168 ++-
src/mlpack/tests/imputation_test.cpp | 266 ++++
src/mlpack/tests/krann_search_test.cpp | 2 +-
src/mlpack/tests/load_save_test.cpp | 2 +
src/mlpack/tests/pca_test.cpp | 26 +-
src/mlpack/tests/rectangle_tree_test.cpp | 8 +
src/mlpack/tests/sort_policy_test.cpp | 72 -
src/mlpack/tests/union_find_test.cpp | 40 +-
128 files changed, 9595 insertions(+), 1863 deletions(-)
diff --cc src/mlpack/core/tree/binary_space_tree/midpoint_split_impl.hpp
index 6f2c07f,28dc707..7cbf6fd
--- a/src/mlpack/core/tree/binary_space_tree/midpoint_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/midpoint_split_impl.hpp
@@@ -36,7 -37,10 +36,10 @@@ bool MidpointSplit<BoundType, MatType>:
if (width > maxWidth)
{
maxWidth = width;
- splitDimension = d;
+ splitInfo.splitDimension = d;
+
+ // Split in the midpoint of that dimension.
- splitVal = bound[d].Mid();
++ splitInfo.splitVal = bound[d].Mid();
}
}
}
@@@ -64,18 -68,102 +67,20 @@@
if (width > maxWidth)
{
maxWidth = width;
- splitDimension = d;
-
- // Split in the midpoint of that dimension.
- splitVal = ranges[d].Mid();
- }
- }
-
- delete[] ranges;
- }
-
- if (maxWidth <= 0) // All these points are the same. We can't split.
- return false;
-
- // Perform the actual splitting. This will order the dataset such that points
- // with value in dimension splitDimension less than or equal to splitVal are
- // on the left of splitCol, and points with value in dimension splitDimension
- // greater than splitVal are on the right side of splitCol.
- splitCol = PerformSplit(data, begin, count, splitDimension, splitVal);
-
- return true;
-}
-
-template<typename BoundType, typename MatType>
-bool MidpointSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
- MatType& data,
- const size_t begin,
- const size_t count,
- size_t& splitCol,
- std::vector<size_t>& oldFromNew)
-{
- size_t splitDimension = data.n_rows; // Indicate invalid.
- double maxWidth = -1;
- double splitVal = DBL_MAX;
-
- // Find the split dimension. If the bound is tight, we only need to consult
- // the bound's width.
- if (bound::BoundTraits<BoundType>::HasTightBounds)
- {
- for (size_t d = 0; d < data.n_rows; d++)
- {
- const double width = bound[d].Width();
-
- if (width > maxWidth)
- {
- maxWidth = width;
- splitDimension = d;
-
- // Split in the midpoint of that dimension.
- splitVal = bound[d].Mid();
- }
- }
- }
- else
- {
- // We must individually calculate bounding boxes.
- math::Range* ranges = new math::Range[data.n_rows];
- for (size_t i = begin; i < begin + count; ++i)
- {
- // Expand each dimension as necessary.
- for (size_t d = 0; d < data.n_rows; ++d)
- {
- const double val = data(d, i);
- if (val < ranges[d].Lo())
- ranges[d].Lo() = val;
- if (val > ranges[d].Hi())
- ranges[d].Hi() = val;
- }
- }
-
- // Now, which is the widest?
- for (size_t d = 0; d < data.n_rows; d++)
- {
- const double width = ranges[d].Width();
-
- if (width > maxWidth)
- {
- maxWidth = width;
- splitDimension = d;
-
+ splitInfo.splitDimension = d;
+ // Split in the midpoint of that dimension.
- splitVal = ranges[d].Mid();
++ splitInfo.splitVal = ranges[d].Mid();
}
}
delete[] ranges;
}
- if (maxWidth == 0) // All these points are the same. We can't split.
+ if (maxWidth <= 0) // All these points are the same. We can't split.
return false;
- // Perform the actual splitting. This will order the dataset such that points
- // with value in dimension splitDimension less than or equal to splitVal are
- // on the left of splitCol, and points with value in dimension splitDimension
- // greater than splitVal are on the right side of splitCol.
- splitCol = PerformSplit(data, begin, count, splitDimension, splitVal,
- oldFromNew);
+ // Split in the midpoint of that dimension.
+ splitInfo.splitVal = bound[splitInfo.splitDimension].Mid();
return true;
}
diff --cc src/mlpack/core/tree/binary_space_tree/traits.hpp
index ade4356,9a81673..15cadc1
--- a/src/mlpack/core/tree/binary_space_tree/traits.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/traits.hpp
@@@ -41,74 -42,6 +42,89 @@@ class TreeTraits<BinarySpaceTree<Metric
static const bool FirstPointIsCentroid = false;
/**
++ * The tree has not got duplicated points.
++ */
++ static const bool HasDuplicatedPoints = false;
++
++ /**
+ * Points are not contained at multiple levels of the binary space tree.
+ */
+ static const bool HasSelfChildren = false;
+
+ /**
+ * Points are rearranged during building of the tree.
+ */
+ static const bool RearrangesDataset = true;
+
+ /**
+ * This is always a binary tree.
+ */
+ static const bool BinaryTree = true;
+};
+
+template<typename MetricType,
+ typename StatisticType,
+ typename MatType,
+ template<typename BoundMetricType, typename...> class BoundType>
+class TreeTraits<BinarySpaceTree<MetricType, StatisticType, MatType, BoundType,
+ RPTreeMaxSplit>>
+{
+ public:
+ /**
+ * Children of a random projection tree node may overlap.
+ */
+ static const bool HasOverlappingChildren = true;
+
+ /**
++ * The tree has not got duplicated points.
++ */
++ static const bool HasDuplicatedPoints = false;
++
++ /**
+ * There is no guarantee that the first point in a node is its centroid.
+ */
+ static const bool FirstPointIsCentroid = false;
+
+ /**
+ * Points are not contained at multiple levels of the binary space tree.
+ */
+ static const bool HasSelfChildren = false;
+
+ /**
+ * Points are rearranged during building of the tree.
+ */
+ static const bool RearrangesDataset = true;
+
+ /**
+ * This is always a binary tree.
+ */
+ static const bool BinaryTree = true;
+};
+
+template<typename MetricType,
+ typename StatisticType,
+ typename MatType,
+ template<typename BoundMetricType, typename...> class BoundType>
+class TreeTraits<BinarySpaceTree<MetricType, StatisticType, MatType, BoundType,
+ RPTreeMeanSplit>>
+{
+ public:
+ /**
+ * Children of a random projection tree node may overlap.
+ */
+ static const bool HasOverlappingChildren = true;
+
+ /**
++ * The tree has not got duplicated points.
++ */
++ static const bool HasDuplicatedPoints = false;
++
++ /**
+ * There is no guarantee that the first point in a node is its centroid.
+ */
+ static const bool FirstPointIsCentroid = false;
+
+ /**
* Points are not contained at multiple levels of the binary space tree.
*/
static const bool HasSelfChildren = false;
diff --cc src/mlpack/methods/neighbor_search/kfn_main.cpp
index 628d26b,fac28b5..16e58a4
--- a/src/mlpack/methods/neighbor_search/kfn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/kfn_main.cpp
@@@ -61,15 -61,14 +61,15 @@@ PARAM_INT_IN("k", "Number of furthest n
// The user may specify the type of tree to use, and a few pararmeters for tree
// building.
- PARAM_STRING("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
-PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'cover', 'r', "
- "'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus'.", "t", "kd");
-PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, R "
- "trees, R* trees, X trees, Hilbert R trees, R+ trees and R++ trees).", "l",
- 20);
++PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
+ "'max-split-rp-tree', 'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', "
+ "'r-plus', 'r-plus-plus'.", "t", "kd");
- PARAM_INT("leaf_size", "Leaf size for tree building (used for kd-trees, "
++PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, "
+ "random projection trees, R trees, R* trees, X trees, Hilbert R trees, "
+ "R+ trees and R++ trees).", "l", 20);
PARAM_FLAG("random_basis", "Before tree-building, project the data onto a "
"random orthogonal basis.", "R");
- PARAM_INT("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
+ PARAM_INT_IN("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
// Search settings.
PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
diff --cc src/mlpack/methods/neighbor_search/knn_main.cpp
index cfecb49,14e07db..2f3a713
--- a/src/mlpack/methods/neighbor_search/knn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/knn_main.cpp
@@@ -62,15 -63,14 +63,15 @@@ PARAM_INT_IN("k", "Number of nearest ne
// The user may specify the type of tree to use, and a few parameters for tree
// building.
- PARAM_STRING("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
-PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'cover', 'r', "
- "'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus'.", "t", "kd");
-PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, R "
- "trees, R* trees, X trees, Hilbert R trees, R+ trees and R++ trees).", "l",
- 20);
++PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
+ "'max-split-rp-tree', 'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', "
+ "'r-plus', 'r-plus-plus'.", "t", "kd");
- PARAM_INT("leaf_size", "Leaf size for tree building (used for kd-trees, "
++PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, "
+ "random projection trees, R trees, R* trees, X trees, Hilbert R trees, "
+ "R+ trees and R++ trees).", "l", 20);
PARAM_FLAG("random_basis", "Before tree-building, project the data onto a "
"random orthogonal basis.", "R");
- PARAM_INT("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
+ PARAM_INT_IN("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0);
// Search settings.
PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
diff --cc src/mlpack/methods/range_search/range_search_main.cpp
index b837b7c,3b8b088..d014bc4
--- a/src/mlpack/methods/range_search/range_search_main.cpp
+++ b/src/mlpack/methods/range_search/range_search_main.cpp
@@@ -69,12 -70,10 +70,12 @@@ PARAM_DOUBLE_IN("min", "Lower bound in
// The user may specify the type of tree to use, and a few parameters for tree
// building.
- PARAM_STRING("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
-PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'cover', 'r', "
- "'r-star', 'x', 'ball', 'hilbert-r', 'r-plus', 'r-plus-plus'.", "t", "kd");
-PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, R "
- "trees, R* trees, X trees, Hilbert R trees, R+ trees and R++ trees).", "l",
++PARAM_STRING_IN("tree_type", "Type of tree to use: 'kd', 'rp-tree', "
+ "'max-split-rp-tree', 'cover', 'r', 'r-star', 'x', 'ball', 'hilbert-r', "
+ "'r-plus', 'r-plus-plus'.", "t", "kd");
- PARAM_INT("leaf_size", "Leaf size for tree building (used for kd-trees, "
++PARAM_INT_IN("leaf_size", "Leaf size for tree building (used for kd-trees, "
+ "random projection trees, R trees, R* trees, X trees, Hilbert R trees, "
+ "R+ trees and R++ trees).", "l",
20);
PARAM_FLAG("random_basis", "Before tree-building, project the data onto a "
"random orthogonal basis.", "R");
More information about the mlpack-git
mailing list