[mlpack-svn] r16137 - in mlpack/conf/jenkins-conf/benchmark: . methods/ann methods/ann/src
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Jan 11 07:50:27 EST 2014
Author: marcus
Date: Sat Jan 11 07:50:27 2014
New Revision: 16137
Log:
Add ann allknn method src and benchmark script.
Added:
mlpack/conf/jenkins-conf/benchmark/methods/ann/
mlpack/conf/jenkins-conf/benchmark/methods/ann/allknn.py
mlpack/conf/jenkins-conf/benchmark/methods/ann/src/
mlpack/conf/jenkins-conf/benchmark/methods/ann/src/allknn.cpp
Modified:
mlpack/conf/jenkins-conf/benchmark/Makefile
Modified: mlpack/conf/jenkins-conf/benchmark/Makefile
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/Makefile (original)
+++ mlpack/conf/jenkins-conf/benchmark/Makefile Sat Jan 11 07:50:27 2014
@@ -58,6 +58,7 @@
export MS_PRINT_BIN=/usr/bin/ms_print
export VALGRIND_BIN=/usr/bin/valgrind
export FLANN_PATH=methods/flann/
+export ANN_PATH=methods/ann/
# Color settings.
NO_COLOR=\033[0m
Added: mlpack/conf/jenkins-conf/benchmark/methods/ann/allknn.py
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/ann/allknn.py Sat Jan 11 07:50:27 2014
@@ -0,0 +1,124 @@
+'''
+ @file allknn.py
+ @author Marcus Edel
+
+ Class to benchmark the ann All K-Nearest-Neighbors method with kd-trees.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+ sys.path.insert(0, cmd_subfolder)
+
+from log import *
+from profiler import *
+
+import shlex
+import subprocess
+import re
+import collections
+
+'''
+This class implements the All K-Nearest-Neighbor Search benchmark.
+'''
+class ALLKNN(object):
+
+ '''
+ Create the All K-Nearest-Neighbors benchmark instance, show some informations
+ and return the instance.
+
+ @param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+ @param timeout - The time until the timeout. Default no timeout.
+ @param path - Path to the ann executable.
+ @param verbose - Display informational messages.
+ '''
+ def __init__(self, dataset, timeout=0, path=os.environ["ANN_PATH"],
+ verbose = True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+ self.timeout = timeout
+
+ '''
+ Perform All K-Nearest-Neighbors. If the method has been successfully completed
+ return the elapsed time in seconds.
+
+ @param options - Extra options for the method.
+ @return - Elapsed time in seconds or a negative value if the method was not
+ successful.
+ '''
+ def RunMethod(self, options):
+ Log.Info("Perform ALLKNN.", self.verbose)
+
+ # If the dataset contains two files then the second file is the query file.
+ # In this case we add this to the command line.
+ if len(self.dataset) == 2:
+ cmd = shlex.split(self.path + "allknn -r " + self.dataset[0] + " -q " +
+ self.dataset[1] + " -v " + options)
+ else:
+ cmd = shlex.split(self.path + "allknn -r " + self.dataset +
+ " -v " + options)
+
+ # Run command with the nessecary arguments and return its output as a byte
+ # string. We have untrusted input so we disable all shell based features.
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+ timeout=self.timeout)
+ except subprocess.TimeoutExpired as e:
+ Log.Warn(str(e))
+ return -2
+ except Exception as e:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer")
+ return -1
+ else:
+ time = self.GetTime(timer)
+ Log.Info(("total time: %fs" % (time)), self.verbose)
+
+ return time
+
+ '''
+ Parse the timer data form a given string.
+
+ @param data - String to parse timer data from.
+ @return - Namedtuple that contains the timer data or -1 in case of an error.
+ '''
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object to
+ # parse the timer data.
+ pattern = re.compile(r"""
+ .*?knn_time: (?P<knn_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data.decode())
+ if not match:
+ Log.Fatal("Can't parse the data: wrong format")
+ return -1
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple("timer", ["knn_time"])
+
+ if match.group("knn_time").count(".") == 1:
+ return timer(float(match.group("knn_time")))
+ else:
+ return timer(float(match.group("knn_time").replace(",", ".")))
+
+ '''
+ Return the elapsed time in seconds.
+
+ @param timer - Namedtuple that contains the timer data.
+ @return Elapsed time in seconds.
+ '''
+ def GetTime(self, timer):
+ return timer.total_time
Added: mlpack/conf/jenkins-conf/benchmark/methods/ann/src/allknn.cpp
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/ann/src/allknn.cpp Sat Jan 11 07:50:27 2014
@@ -0,0 +1,114 @@
+/**
+ * @file allknn.cpp
+ * @author Marcus Edel
+ *
+ * Code to benchmark the ann All K-Nearest-Neighbors method with kd-trees.
+ */
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/util/timers.hpp>
+#include <ANN/ANN.h>
+
+using namespace mlpack;
+using namespace std;
+
+// Information about the program itself.
+PROGRAM_INFO("All K-Nearest-Neighbors",
+ "This program will calculate the all k-nearest-neighbors with the ann "
+ "library.");
+
+// Define our input parameters that this program will take.
+PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
+ "r");
+PARAM_INT_REQ("k", "Number of nearest neighbors to find.", "k");
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
+
+
+int main(int argc, char **argv)
+{
+ // Parse command line options.
+ CLI::ParseCommandLine(argc, argv);
+
+ // Get all the parameters.
+ const string referenceFile = CLI::GetParam<string>("reference_file");
+ const string queryFile = CLI::GetParam<string>("query_file");
+
+ int lsInt = CLI::GetParam<int>("leaf_size");
+
+ size_t k = CLI::GetParam<int>("k");
+
+ arma::mat referenceData;
+ arma::mat queryData; // So it doesn't go out of scope.
+ data::Load(referenceFile, referenceData, true);
+
+ Log::Info << "Loaded reference data from '" << referenceFile << "' ("
+ << referenceData.n_rows << " x " << referenceData.n_cols << ")." << endl;
+
+ if (queryFile != "")
+ {
+ data::Load(queryFile, queryData, true);
+ Log::Info << "Loaded query data from '" << queryFile << "' ("
+ << queryData.n_rows << " x " << queryData.n_cols << ")." << endl;
+ }
+ else
+ {
+ queryData = referenceData;
+ }
+
+ // Sanity check on k value: must be greater than 0, must be less than the
+ // number of reference points.
+ if (k > referenceData.n_cols)
+ {
+ Log::Fatal << "Invalid k: " << k << "; must be greater than 0 and less ";
+ Log::Fatal << "than or equal to the number of reference points (";
+ Log::Fatal << referenceData.n_cols << ")." << endl;
+ }
+
+ // Sanity check on leaf size.
+ if (lsInt < 0)
+ {
+ Log::Fatal << "Invalid leaf size: " << lsInt << ". Must be greater "
+ "than or equal to 0." << endl;
+ }
+
+ size_t leafSize = lsInt;
+ size_t maxPts = referenceData.n_elem;
+ size_t dim = referenceData.n_rows;
+
+ ANNidxArray nnIdx = new ANNidx[k];
+ ANNdistArray dists = new ANNdist[k];
+ ANNpointArray dataPts = annAllocPts(maxPts, dim);
+
+ for (int i = 0; i < referenceData.n_cols; ++i)
+ {
+ for (int j = 0; j < referenceData.n_rows; ++j)
+ {
+ dataPts[i][j] = referenceData(j,i);
+ }
+ }
+
+ Timer::Start("knn_time");
+
+ ANNkd_tree* kdTree = new ANNkd_tree(dataPts, maxPts, dim, lsInt);
+
+ arma::vec queryPoint;
+ for (int i = 0; i < queryData.n_cols; ++i)
+ {
+ queryPoint = queryData.col(i);
+ kdTree->annkSearch(queryPoint.memptr(), k, nnIdx, dists, 0);
+
+ for (int j = 0; j < k; j++)
+ {
+ dists[j] = sqrt(dists[j]);
+ }
+ }
+
+ Timer::Stop("knn_time");
+
+ delete [] nnIdx;
+ delete [] dists;
+ delete kdTree;
+ annClose();
+ return 0;
+}
More information about the mlpack-svn
mailing list