[mlpack-git] master: - Rowback to faster sparse iteration. (50fa931)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Oct 19 17:36:02 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/94d14187222231ca29e4f6419c5999c660db4f8a...981ffa2d67d8fe38df6c699589005835fef710ea
>---------------------------------------------------------------
commit 50fa9312175a6f44a1f9bb211c8dc80ccb8a0492
Author: theJonan <ivan at jonan.info>
Date: Thu Oct 20 00:36:02 2016 +0300
- Rowback to faster sparse iteration.
>---------------------------------------------------------------
50fa9312175a6f44a1f9bb211c8dc80ccb8a0492
src/mlpack/methods/det/dtree_impl.hpp | 50 +++++++++++------------------------
1 file changed, 16 insertions(+), 34 deletions(-)
diff --git a/src/mlpack/methods/det/dtree_impl.hpp b/src/mlpack/methods/det/dtree_impl.hpp
index 4c1b9f3..1d2cb76 100644
--- a/src/mlpack/methods/det/dtree_impl.hpp
+++ b/src/mlpack/methods/det/dtree_impl.hpp
@@ -23,12 +23,12 @@ namespace details
* in a vector, that can easily be iterated afterwards.
*/
template <typename MatType>
- std::vector<std::pair<typename MatType::elem_type, size_t>>
- ExtractSplits(const MatType& data,
- size_t dim,
- size_t start,
- size_t end,
- size_t minLeafSize)
+ void ExtractSplits(std::vector<std::pair<typename MatType::elem_type, size_t>>& splitVec,
+ const MatType& data,
+ size_t dim,
+ size_t start,
+ size_t end,
+ size_t minLeafSize)
{
typedef typename MatType::elem_type ElemType;
typedef std::pair<ElemType, size_t> SplitItem;
@@ -37,9 +37,6 @@ namespace details
// We sort these, in-place (it's a copy of the data, anyways).
std::sort(dimVec.begin(), dimVec.end());
- // We're going to collect results here.
- std::vector<SplitItem> splitVec;
-
// Ensure the minimum leaf size on both sides. We need to figure out why
// there are spikes if this minLeafSize is enforced here...
for (size_t i = minLeafSize - 1; i < dimVec.n_elem - minLeafSize; ++i)
@@ -52,37 +49,27 @@ namespace details
if (split != dimVec[i])
splitVec.push_back(SplitItem(split, i));
}
-
- return splitVec;
-
}
// This the custom, sparse optimized implementation of the same routine.
template <typename ElemType>
- std::vector<std::pair<ElemType, size_t>>
- ExtractSplits(const arma::SpMat<ElemType>& data,
- size_t dim,
- size_t start,
- size_t end,
- size_t minLeafSize)
+ void ExtractSplits(std::vector<std::pair<ElemType, size_t>>& splitVec,
+ const arma::SpMat<ElemType>& data,
+ size_t dim,
+ size_t start,
+ size_t end,
+ size_t minLeafSize)
{
- typedef typename arma::SpMat<ElemType>::const_row_iterator RowIterator;
typedef std::pair<ElemType, size_t> SplitItem;
const size_t n_elem = end - start;
// Construct a vector of values.
- std::vector<ElemType> valsVec;
- valsVec.reserve(n_elem);
-
- for (RowIterator j(data, dim, start);j.row() == dim && j.col() < end; ++j)
- valsVec.push_back(*j);
+ const arma::SpRow<ElemType> row = data(dim, arma::span(start, end - 1));
+ std::vector<ElemType> valsVec(row.begin(), row.end());
// ... and sort it!
std::sort(valsVec.begin(), valsVec.end());
- // We're going to collect our splits here.
- std::vector<SplitItem> splitVec;
-
// Now iterate over the values, taking account for the over-the-zeroes
// jump and construct the splits vector.
ElemType lastVal = -std::numeric_limits<ElemType>::max();
@@ -116,8 +103,6 @@ namespace details
lastVal = newVal;
}
-
- return splitVec;
}
};
@@ -319,11 +304,8 @@ bool DTree<MatType, TagType>::FindSplit(const MatType& data,
// could be quite inefficient for sparse matrices, due to copy operations (3).
// This one has custom implementation for dense and sparse matrices.
- std::vector<SplitItem> splitVec = details::ExtractSplits(data,
- dim,
- start,
- end,
- minLeafSize);
+ std::vector<SplitItem> splitVec;
+ details::ExtractSplits(splitVec, data, dim, start, end, minLeafSize);
// Iterate on all the splits for this dimension
for (typename std::vector<SplitItem>::iterator i = splitVec.begin();
More information about the mlpack-git
mailing list