[mlpack-git] master: Fixes a lot of style issues (cc6a5c2)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Jun 28 06:47:32 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eaa7182ebed8cce3fd6191dc1f8170546ea297da...812048c7c6bee0b6c8d936677f23bbb5930c6cfc
>---------------------------------------------------------------
commit cc6a5c29263e19143e1147f5fa7851cb8a0cab21
Author: Yannis Mentekidis <mentekid at gmail.com>
Date: Tue Jun 28 11:47:32 2016 +0100
Fixes a lot of style issues
>---------------------------------------------------------------
cc6a5c29263e19143e1147f5fa7851cb8a0cab21
src/mlpack/methods/lsh/lsh_main.cpp | 6 +-
src/mlpack/methods/lsh/lsh_search.hpp | 17 +++---
src/mlpack/methods/lsh/lsh_search_impl.hpp | 93 +++++++++++++-----------------
src/mlpack/tests/lsh_test.cpp | 60 +++++++++----------
4 files changed, 81 insertions(+), 95 deletions(-)
diff --git a/src/mlpack/methods/lsh/lsh_main.cpp b/src/mlpack/methods/lsh/lsh_main.cpp
index 0c22f93..dee9f55 100644
--- a/src/mlpack/methods/lsh/lsh_main.cpp
+++ b/src/mlpack/methods/lsh/lsh_main.cpp
@@ -64,8 +64,8 @@ PARAM_INT("tables", "The number of hash tables to be used.", "L", 30);
PARAM_DOUBLE("hash_width", "The hash width for the first-level hashing in the "
"LSH preprocessing. By default, the LSH class automatically estimates a "
"hash width for its use.", "H", 0.0);
-PARAM_INT("num_probes", "Number of additional probes for Multiprobe LSH"
- " If 0, traditional LSH is used.", "T", 0);
+PARAM_INT("num_probes", "Number of additional probes for Multiprobe LSH;"
+ " if 0, traditional LSH is used.", "T", 0);
PARAM_INT("second_hash_size", "The size of the second level hash table.", "S",
99901);
PARAM_INT("bucket_size", "The maximum size of a bucket in the second level "
@@ -139,7 +139,7 @@ int main(int argc, char *argv[])
const size_t numProj = CLI::GetParam<int>("projections");
const size_t numTables = CLI::GetParam<int>("tables");
const double hashWidth = CLI::GetParam<double>("hash_width");
- const size_t numProbes = CLI::GetParam<int>("num_probes");
+ const size_t numProbes = (size_t) CLI::GetParam<int>("num_probes");
arma::Mat<size_t> neighbors;
arma::mat distances;
diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index c621710..44a7b64 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -35,7 +35,6 @@
* }
*
*/
-
#ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
#define MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
@@ -280,9 +279,9 @@ class LSHSearch
* hashing the query into all the hash tables and eventually into
* multiple buckets of the second hash table.
* @param numTablesToSearch The number of tables to perform the search in. If
- * 0, all tables are searched.
+ * 0, all tables are searched.
* @param T The number of additional probing bins for multiprobe LSH. If 0,
- * single-probe is used.
+ * single-probe is used.
*/
template<typename VecType>
void ReturnIndicesFromTable(const VecType& queryPoint,
@@ -354,10 +353,10 @@ class LSHSearch
*
* @param queryCode vector containing the numProj-dimensional query code.
* @param queryCodeNotFloored vector containing the projection location of the
- * query.
+ * query.
* @param T number of additional probing bins.
* @param additionalProbingBins matrix. Each column will hold one additional
- * bin.
+ * bin.
*/
void GetAdditionalProbingBins(const arma::vec& queryCode,
const arma::vec& queryCodeNotFloored,
@@ -368,6 +367,8 @@ class LSHSearch
* Returns the score of a perturbation vector generated by perturbation set A.
* The score of a pertubation set (vector) is the sum of scores of the
* participating actions.
+ * @param A perturbation set to compute the score of.
+ * @param scores vector containing score of each perturbation.
*/
double PerturbationScore(const std::vector<bool>& A,
const arma::vec& scores) const;
@@ -375,6 +376,7 @@ class LSHSearch
* Inline function used by GetAdditionalProbingBins. The vector shift operation
* replaces the largest element of a vector A with (largest element) + 1.
* Returns true if resulting vector is valid, otherwise false.
+ * @param A perturbation set to shift.
*/
bool PerturbationShift(std::vector<bool>& A) const;
@@ -383,14 +385,15 @@ class LSHSearch
* operation adds the element [1 + (largest_element)] to a vector A, where
* largest_element is the largest element of A. Returns true if resulting vector
* is valid, otherwise false.
+ * @param A perturbation set to expand.
*/
-
bool PerturbationExpand(std::vector<bool>& A) const;
+
/**
* Return true if perturbation set A is valid. A perturbation set is invalid if
* it contains two (or more) actions for the same dimension or dimensions that
* are larger than the queryCode's dimensions.
- * @param A - a perturbation set
+ * @param A perturbation set to validate.
*/
bool PerturbationValid(const std::vector<bool>& A) const;
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index d91aebe..5916d36 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -9,7 +9,6 @@
#include <mlpack/core.hpp>
-
namespace mlpack {
namespace neighbor {
@@ -358,7 +357,7 @@ bool LSHSearch<SortPolicy>::PerturbationShift(std::vector<bool>& A) const
if ( maxPos + 1 < A.size()) // otherwise, this is an invalid vector
{
A[maxPos] = 0;
- A[maxPos+1] = 1;
+ A[maxPos + 1] = 1;
return true; // valid
}
return false; // invalid
@@ -374,7 +373,7 @@ bool LSHSearch<SortPolicy>::PerturbationExpand(std::vector<bool>& A) const
if (A[i]) // marked true
maxPos = i;
- if ( maxPos + 1 < A.size()) // otherwise, this is an invalid vector
+ if (maxPos + 1 < A.size()) // otherwise, this is an invalid vector
{
A[maxPos + 1] = 1;
return true;
@@ -402,12 +401,11 @@ bool LSHSearch<SortPolicy>::PerturbationValid(
continue;
// If dimesnion is unseen thus far, mark it as seen.
- if ( check[i % numProj] == false )
+ if (check[i % numProj] == false)
check[i % numProj] = true;
else
return false; // If dimension was seen before, set is not valid.
}
-
// If we didn't fail, set is valid.
return true;
}
@@ -415,10 +413,10 @@ bool LSHSearch<SortPolicy>::PerturbationValid(
// Compute additional probing bins for a query
template<typename SortPolicy>
void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
- const arma::vec& queryCode,
- const arma::vec& queryCodeNotFloored,
- const size_t T,
- arma::mat& additionalProbingBins) const
+ const arma::vec& queryCode,
+ const arma::vec& queryCodeNotFloored,
+ const size_t T,
+ arma::mat& additionalProbingBins) const
{
// No additional bins requested. Our work is done.
@@ -448,15 +446,15 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
// Actions vector describes what perturbation (-1/+1) corresponds to a score.
arma::Col<short int> actions(2 * numProj); // will be [-1 ... 1 ...]
- actions.rows(0, numProj - 1) = // first numProj rows
+ actions.rows(0, numProj - 1) = // First numProj rows.
-1 * arma::ones< arma::Col<short int> > (numProj); // -1s
- actions.rows(numProj, (2 * numProj) - 1) = // last numProj rows
+ actions.rows(numProj, (2 * numProj) - 1) = // Last numProj rows.
arma::ones< arma::Col<short int> > (numProj); // 1s
// Acting dimension vector shows which coordinate to transform according to
- // actions described by actions vector
- arma::Col<size_t> positions(2 * numProj); // will be [0 1 2 ... 0 1 2 ...]
+ // actions (actions are described by actions vector above).
+ arma::Col<size_t> positions(2 * numProj); // Will be [0 1 2 ... 0 1 2 ...].
positions.rows(0, numProj - 1) =
arma::linspace< arma::Col<size_t> >(0, numProj - 1, numProj);
positions.rows(numProj, 2 * numProj - 1) =
@@ -465,7 +463,6 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
// Special case: No need to create heap for 1 or 2 codes.
if (T <= 2)
{
-
// First, find location of minimum score, generate 1 perturbation vector,
// and add its code to additionalProbingBins column 0.
@@ -500,7 +497,7 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
size_t minloc2 = 0;
for (size_t s = 0; s < (2 * numProj); ++s) // here we can't start from 1
{
- if ( minscore2 > scores[s] && s != minloc) //second smallest
+ if (minscore2 > scores[s] && s != minloc) //second smallest
{
minscore2 = scores[s];
minloc2 = s;
@@ -515,7 +512,7 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
// General case: more than 2 perturbation vectors require use of minheap.
// Sort everything in increasing order.
- arma::Col<long long unsigned int> sortidx = arma::sort_index(scores);
+ arma::uvec sortidx = arma::sort_index(scores);
scores = scores(sortidx);
actions = actions(sortidx);
positions = positions(sortidx);
@@ -568,40 +565,38 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
// Shift operation on Ai (replace max with max+1).
std::vector<bool> As = Ai;
- if ( PerturbationShift(As) && PerturbationValid(As) )
+ if (PerturbationShift(As) && PerturbationValid(As))
// Don't add invalid sets.
{
perturbationSets.push_back(As); // add shifted set to sets
minHeap.push(
- std::make_pair(
- PerturbationScore(As, scores),
- perturbationSets.size() - 1)
+ std::make_pair(PerturbationScore(As, scores),
+ perturbationSets.size() - 1)
);
}
// Expand operation on Ai (add max+1 to set).
std::vector<bool> Ae = Ai;
- if ( PerturbationExpand(Ae) && PerturbationValid(Ae) )
+ if (PerturbationExpand(Ae) && PerturbationValid(Ae))
// Don't add invalid sets.
{
perturbationSets.push_back(Ae); // add expanded set to sets
minHeap.push(
- std::make_pair(
- PerturbationScore(Ae, scores),
- perturbationSets.size() - 1)
+ std::make_pair(PerturbationScore(Ae, scores),
+ perturbationSets.size() - 1)
);
}
- }while (! PerturbationValid(Ai) );//Discard invalid perturbations
+ } while (!PerturbationValid(Ai));//Discard invalid perturbations
// Found valid perturbation set Ai. Construct perturbation vector from set.
for (size_t pos = 0; pos < Ai.size(); ++pos)
// If Ai[pos] is marked, add action to probing vector.
additionalProbingBins(positions(pos), pvec)
- += Ai[pos] ? actions(pos) : 0; }
+ += Ai[pos] ? actions(pos) : 0;
+ }
}
-
template<typename SortPolicy>
template<typename VecType>
void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
@@ -630,8 +625,7 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
for (size_t i = 0; i < numTablesToSearch; i++)
queryCodesNotFloored.unsafe_col(i) = projections.slice(i).t() * queryPoint;
queryCodesNotFloored += offsets.cols(0, numTablesToSearch - 1);
- allProjInTables = arma::floor(queryCodesNotFloored/hashWidth);
-
+ allProjInTables = arma::floor(queryCodesNotFloored / hashWidth);
// Use hashMat to store the primary probing codes and any additional codes
// from multiprobe LSH.
@@ -640,14 +634,13 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
// Compute the primary hash value of each key of the query into a bucket of
// the secondHashTable using the secondHashWeights.
- hashMat.row(0) =
- arma::conv_to< arma::Row<size_t> >:: // floor by typecasting to size_t
- from( secondHashWeights.t() * allProjInTables );
- // mod to compute 2nd-level codes
+ hashMat.row(0) = arma::conv_to<arma::Row<size_t>> // Floor by typecasting
+ ::from(secondHashWeights.t() * allProjInTables);
+ // Mod to compute 2nd-level codes.
for (size_t i = 0; i < numTablesToSearch; i++)
hashMat(0, i) = (hashMat(0, i) % secondHashSize);
- // Compute hash codes of additional probing bins
+ // Compute hash codes of additional probing bins.
if (T > 0)
{
for (size_t i = 0; i < numTablesToSearch; ++i)
@@ -704,19 +697,16 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
for (size_t i = 0; i < numTablesToSearch; ++i) // for all tables
{
- for (size_t p = 0; p < T + 1; ++p) // for entire probing sequence
+ for (size_t p = 0; p < T + 1; ++p) // For entire probing sequence.
{
-
// get the sequence code
size_t hashInd = hashMat(p, i);
size_t tableRow = bucketRowInHashTable[hashInd];
if (tableRow < secondHashSize && bucketContentSize[tableRow] > 0)
- {
// Pick the indices in the bucket corresponding to hashInd.
for (size_t j = 0; j < bucketContentSize[tableRow]; ++j)
refPointsConsidered[ secondHashTable[tableRow](j) ]++;
- }
}
}
@@ -795,19 +785,18 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
// If the user requested more than the available number of additional probing
// bins, set Teffective to maximum T. Maximum T is 2^numProj - 1
size_t Teffective = T;
- if (T > ( (size_t) ( (1 << numProj) - 1) ) )
+ if (T > ((size_t) ((1 << numProj) - 1)))
{
- Teffective = ( 1 << numProj ) - 1;
- Log::Warn << "Requested " << T <<
- " additional bins are more than theoretical maximum. Using " <<
- Teffective << " instead." << std::endl;
+ Teffective = (1 << numProj) - 1;
+ Log::Warn << "Requested " << T << " additional bins are more than "
+ << "theoretical maximum. Using " << Teffective << " instead."
+ << std::endl;
}
// If the user set multiprobe, log it
- if (T > 0)
- Log::Info << "Running multiprobe LSH with " << Teffective <<
- " additional probing bins per table per query."<< std::endl;
-
+ if (Teffective > 0)
+ Log::Info << "Running multiprobe LSH with " << Teffective
+ <<" additional probing bins per table per query." << std::endl;
size_t avgIndicesReturned = 0;
@@ -859,12 +848,12 @@ Search(const size_t k,
// If the user requested more than the available number of additional probing
// bins, set Teffective to maximum T. Maximum T is 2^numProj - 1
size_t Teffective = T;
- if (T > ( (size_t) ( (1 << numProj) - 1) ) )
+ if (T > ((size_t) ((1 << numProj) - 1)))
{
- Teffective = ( 1 << numProj ) - 1;
- Log::Warn << "Requested " << T <<
- " additional bins are more than theoretical maximum. Using " <<
- Teffective << " instead." << std::endl;
+ Teffective = (1 << numProj) - 1;
+ Log::Warn << "Requested " << T << " additional bins are more than "
+ << "theoretical maximum. Using " << Teffective << " instead."
+ << std::endl;
}
// If the user set multiprobe, log it
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 73cc3a1..d0c65cd 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -16,8 +16,13 @@ using namespace mlpack;
using namespace mlpack::neighbor;
/**
- * Generates a point set of four clusters around (0.5, 0.5),
- * (3.5, 0.5), (0.5, 3.5), (3.5, 3.5).
+ * Generates a point set of four clusters:
+ * -C1 around (0.5, 3.5),
+ * -C2 around (3.5, 3.5),
+ * -C3 around (0.5, 0.5),
+ * -C4 around (3.5, 3.5).
+ *
+ * It then merges these clusters into one set, rdata.
*/
void GetPointset(const size_t N, arma::mat& rdata)
{
@@ -506,40 +511,30 @@ BOOST_AUTO_TEST_CASE(MultiprobeTest)
for (size_t rep = 0; rep < repetitions; ++rep)
{
- // train a model
- LSHSearch<> multiprobeTest(
- rdata,
- numProj,
- numTables,
- hashWidth,
- secondHashSize,
- bucketSize);
+ // Train a model.
+ LSHSearch<> multiprobeTest(rdata, numProj, numTables, hashWidth,
+ secondHashSize, bucketSize);
double prevRecall = 0;
- // search with varying number of probes
+ // Search with varying number of probes.
for (size_t p = 0; p < probeTrials; ++p)
{
arma::Mat<size_t> lshNeighbors;
- arma::mat lshDistances; //move outside of loop for speed?
-
- multiprobeTest.Search(
- qdata,
- k,
- lshNeighbors,
- lshDistances,
- 0,
+ arma::mat lshDistances;
+
+ multiprobeTest.Search(qdata, k, lshNeighbors, lshDistances, 0,
numProbes[p]);
- // compute recall
- double recall = LSHSearch<>::ComputeRecall(lshNeighbors, groundTruth); //TODO: change to LSHSearch::ComputeRecall??
+ // Compute recall of this run.
+ double recall = LSHSearch<>::ComputeRecall(lshNeighbors, groundTruth);
if (p > 0)
{
- // more probes should at the very least not lower recall...
+ // More probes should at the very least not lower recall...
BOOST_REQUIRE_GE(recall, prevRecall);
- // ... and should ideally increase it a bit
+ // ... and should ideally increase it a bit.
if (recall > prevRecall + epsilonIncrease)
- foundIncrease = true;
+ foundIncrease = true;
prevRecall = recall;
}
}
@@ -563,7 +558,7 @@ BOOST_AUTO_TEST_CASE(MultiprobeTest)
BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
{
math::RandomSeed(std::time(NULL));
- // generate known deterministic clusters of points
+ // Generate known deterministic clusters of points.
const size_t N = 40;
arma::mat rdata;
GetPointset(N, rdata);
@@ -573,19 +568,18 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
const int secondHashSize = 99901;
const int bucketSize = 500;
- // 1 table, projections on orthonormal plane
+ // 1 table, projections on orthonormal plane.
arma::cube projections(2, 2, 1);
projections(0, 0, 0) = 1;
projections(1, 0, 0) = 0;
projections(0, 1, 0) = 0;
projections(1, 1, 0) = 1;
- // construct LSH object with given tables
+ // Construct LSH object with given tables.
LSHSearch<> lshTest(rdata, projections,
hashWidth, secondHashSize, bucketSize);
-
- // two points near the clusters but outside their bins. q1's lowest scoring
+ // Two points near the clusters but outside their bins. q1's lowest scoring
// perturbation vectors will map to C1 cluster's bins.
arma::mat q1;
q1 << 1.1 << arma::endr << 3.3; // vector [1.1, 3.3]
@@ -593,15 +587,15 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
arma::Mat<size_t> neighbors;
arma::mat distances;
- // Test that q1 simple search comes up empty
+ // Test that q1 simple search comes up empty.
lshTest.Search(q1, k, neighbors, distances);
cout << neighbors << endl;
- BOOST_REQUIRE( arma::all(neighbors.col(0) == N) );
+ BOOST_REQUIRE(arma::all(neighbors.col(0) == N));
- // Searching with 3 additional probing bins should find neighbors
+ // Searching with 3 additional probing bins should find neighbors.
lshTest.Search(q1, k, neighbors, distances, 0, 3);
cout << neighbors << endl;
- BOOST_REQUIRE( arma::all(neighbors.col(0) == N || neighbors.col(0) < 10) );
+ BOOST_REQUIRE(arma::all(neighbors.col(0) == N || neighbors.col(0) < 10));
}
*/
More information about the mlpack-git
mailing list