[mlpack-svn] r16110 - in mlpack/conf/jenkins-conf/benchmark: . methods/flann methods/flann/src

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Jan 4 08:22:34 EST 2014


Author: marcus
Date: Sat Jan  4 08:22:33 2014
New Revision: 16110

Log:
Add flann allknn method src and benchmark script.

Added:
   mlpack/conf/jenkins-conf/benchmark/methods/flann/
   mlpack/conf/jenkins-conf/benchmark/methods/flann/allknn.py
   mlpack/conf/jenkins-conf/benchmark/methods/flann/src/
   mlpack/conf/jenkins-conf/benchmark/methods/flann/src/allknn.cpp
Modified:
   mlpack/conf/jenkins-conf/benchmark/Makefile

Modified: mlpack/conf/jenkins-conf/benchmark/Makefile
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/Makefile	(original)
+++ mlpack/conf/jenkins-conf/benchmark/Makefile	Sat Jan  4 08:22:33 2014
@@ -57,6 +57,7 @@
 export LD_LIBRARY_PATH=/opt/shogun/shogun-2.1.0/lib/
 export MS_PRINT_BIN=/usr/bin/ms_print
 export VALGRIND_BIN=/usr/bin/valgrind
+export FLANN_PATH=methods/flann/
 
 # Color settings.
 NO_COLOR=\033[0m

Added: mlpack/conf/jenkins-conf/benchmark/methods/flann/allknn.py
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/flann/allknn.py	Sat Jan  4 08:22:33 2014
@@ -0,0 +1,124 @@
+'''
+  @file allknn.py
+  @author Marcus Edel
+
+  Class to benchmark the flann All K-Nearest-Neighbors method with kd-trees.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+from log import *
+from profiler import *
+
+import shlex
+import subprocess
+import re
+import collections
+
+'''
+This class implements the All K-Nearest-Neighbor Search benchmark.
+'''
+class ALLKNN(object):
+
+  ''' 
+  Create the All K-Nearest-Neighbors benchmark instance, show some informations 
+  and return the instance.
+  
+  @param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+  @param timeout - The time until the timeout. Default no timeout.
+  @param path - Path to the flann executable.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, timeout=0, path=os.environ["FLANN_PATH"], 
+        verbose = True):
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    self.timeout = timeout
+
+  '''
+  Perform All K-Nearest-Neighbors. If the method has been successfully completed
+  return the elapsed time in seconds.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not 
+  successful.
+  '''
+  def RunMethod(self, options):
+    Log.Info("Perform ALLKNN.", self.verbose)
+
+    # If the dataset contains two files then the second file is the query file. 
+    # In this case we add this to the command line.
+    if len(self.dataset) == 2:
+      cmd = shlex.split(self.path + "allknn -r " + self.dataset[0] + " -q " + 
+          self.dataset[1] + " -v " + options)
+    else:
+      cmd = shlex.split(self.path + "allknn -r " + self.dataset + 
+          " -v " + options)   
+
+    # Run command with the nessecary arguments and return its output as a byte 
+    # string. We have untrusted input so we disable all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False, 
+          timeout=self.timeout) 
+    except subprocess.TimeoutExpired as e:
+      Log.Warn(str(e))
+      return -2
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Return the elapsed time.
+    timer = self.parseTimer(s)
+    if not timer:
+      Log.Fatal("Can't parse the timer")
+      return -1
+    else:
+      time = self.GetTime(timer)
+      Log.Info(("total time: %fs" % (time)), self.verbose)
+
+      return time
+
+  '''
+  Parse the timer data form a given string.
+
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data or -1 in case of an error.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(r"""
+        .*?knn_time: (?P<knn_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+    
+    match = pattern.match(data.decode())
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["knn_time"])
+      
+      if match.group("knn_time").count(".") == 1:
+        return timer(float(match.group("knn_time")))
+      else:
+        return timer(float(match.group("knn_time").replace(",", ".")))
+
+  '''
+  Return the elapsed time in seconds.
+
+  @param timer - Namedtuple that contains the timer data.
+  @return Elapsed time in seconds.
+  '''
+  def GetTime(self, timer):
+    return timer.total_time

Added: mlpack/conf/jenkins-conf/benchmark/methods/flann/src/allknn.cpp
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/flann/src/allknn.cpp	Sat Jan  4 08:22:33 2014
@@ -0,0 +1,103 @@
+/**
+ * @file allknn.cpp
+ * @author Marcus Edel
+ *
+ * Code to benchmark the flann All K-Nearest-Neighbors method with kd-trees.
+ */
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/util/timers.hpp>
+#include <flann/flann.hpp>
+
+using namespace mlpack;
+using namespace flann;
+using namespace std;
+
+// Information about the program itself.
+PROGRAM_INFO("All K-Nearest-Neighbors",
+    "This program will calculate the all k-nearest-neighbors with the flann "
+    "library.");
+
+// Define our input parameters that this program will take.
+PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
+    "r");
+PARAM_INT_REQ("k", "Number of nearest neighbors to find.", "k");
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
+
+int main(int argc, char** argv)
+{
+    // Parse command line options.
+    CLI::ParseCommandLine(argc, argv);
+
+    // Get all the parameters.
+    const string referenceFile = CLI::GetParam<string>("reference_file");
+    const string queryFile = CLI::GetParam<string>("query_file");
+
+    int lsInt = CLI::GetParam<int>("leaf_size");
+
+    size_t k = CLI::GetParam<int>("k");
+
+    arma::mat referenceData;
+    arma::mat queryData; // So it doesn't go out of scope.
+    data::Load(referenceFile, referenceData, true);
+
+    Log::Info << "Loaded reference data from '" << referenceFile << "' ("
+        << referenceData.n_rows << " x " << referenceData.n_cols << ")." << endl;
+
+    if (queryFile != "")
+    {
+    data::Load(queryFile, queryData, true);
+    Log::Info << "Loaded query data from '" << queryFile << "' ("
+        << queryData.n_rows << " x " << queryData.n_cols << ")." << endl;
+    }
+
+    // Sanity check on k value: must be greater than 0, must be less than the
+    // number of reference points.
+    if (k > referenceData.n_cols)
+    {
+    Log::Fatal << "Invalid k: " << k << "; must be greater than 0 and less ";
+    Log::Fatal << "than or equal to the number of reference points (";
+    Log::Fatal << referenceData.n_cols << ")." << endl;
+    }
+
+    // Sanity check on leaf size.
+    if (lsInt < 0)
+    {
+    Log::Fatal << "Invalid leaf size: " << lsInt << ".  Must be greater "
+        "than or equal to 0." << endl;
+    }
+    size_t leafSize = lsInt;
+
+    flann::Matrix<double> dataset = flann::Matrix<double>(
+        referenceData.memptr(), referenceData.n_cols, referenceData.n_rows);
+    flann::Matrix<double> query;
+    if (queryFile != "")
+    {
+        query = flann::Matrix<double>(queryData.memptr(), queryData.n_cols, 
+            queryData.n_rows);
+    }
+    else
+    {
+        query = flann::Matrix<double>(referenceData.memptr(), 
+            referenceData.n_cols, referenceData.n_rows);
+    }
+
+    Matrix<int> indices(new int[query.rows*k], query.rows, k);
+    Matrix<double> dists(new double[query.rows*k], query.rows, k);
+
+    Timer::Start("knn_time");
+    
+    // Perform All K-Nearest-Neighbors.
+    Index<L2<double> > index(dataset, flann::KDTreeSingleIndexParams(leafSize));
+    index.buildIndex();                                                                                               
+
+    index.knnSearch(query, indices, dists, k, flann::SearchParams(0));
+
+    Timer::Stop("knn_time");   
+
+    delete[] indices.ptr();
+    delete[] dists.ptr();
+
+    return 0;
+}



More information about the mlpack-svn mailing list