[mlpack-svn] r15559 - mlpack/conf/jenkins-conf/benchmark/methods/shogun

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 26 09:10:20 EDT 2013


Author: marcus
Date: Fri Jul 26 09:10:20 2013
New Revision: 15559

Log:
Add timeout for the shogun benchmark scripts.

Modified:
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/allknn.py	Fri Jul 26 09:10:20 2013
@@ -33,11 +33,13 @@
   Create the All K-Nearest-Neighbors benchmark instance.
   
   @param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement All K-Nearest-Neighbors.
@@ -46,49 +48,58 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def AllKnnShogun(self, options):
-    totalTimer = Timer()
-
-    # Load input dataset.
-    # If the dataset contains two files then the second file is the query file. 
-    # In this case we add this to the command line.
-    Log.Info("Loading dataset", self.verbose)
-    if len(self.dataset) == 2:
-      referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
-      queryData = np.genfromtxt(self.dataset[1], delimiter=',')
-      queryFeat = RealFeatures(queryFeat.T)
-    else:
-      referenceData = np.genfromtxt(self.dataset, delimiter=',')
-
-    # Labels are the last row of the dataset.
-    labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)])
-    referenceData = referenceData[:,:-1]
-
-    with totalTimer:
-      # Get all the parameters.
-      k = re.search("-k (\d+)", options)
-      if not k:
-        Log.Fatal("Required option: Number of furthest neighbors to find.")
-        return -1
-      else:
-        k = int(k.group(1))
-        if (k < 1 or k > referenceData.shape[0]):
-          Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
-            + "less ")
-          return -1
-
-      referenceFeat = RealFeatures(referenceData.T)
-      distance = EuclideanDistance(referenceFeat, referenceFeat)
-
-      # Perform All K-Nearest-Neighbors.
-      model = SKNN(k, distance, labels)
-      model.train()      
 
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunAllKnnShogun():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      # If the dataset contains two files then the second file is the query file. 
+      # In this case we add this to the command line.
+      Log.Info("Loading dataset", self.verbose)
       if len(self.dataset) == 2:
-        out = model.apply(queryFeat).get_labels()
+        referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
+        queryData = np.genfromtxt(self.dataset[1], delimiter=',')
+        queryFeat = RealFeatures(queryFeat.T)
       else:
-        out = model.apply(referenceFeat).get_labels()
+        referenceData = np.genfromtxt(self.dataset, delimiter=',')
 
-    return totalTimer.ElapsedTime()
+      # Labels are the last row of the dataset.
+      labels = MulticlassLabels(referenceData[:, (referenceData.shape[1] - 1)])
+      referenceData = referenceData[:,:-1]
+
+      with totalTimer:
+        # Get all the parameters.
+        k = re.search("-k (\d+)", options)
+        if not k:
+          Log.Fatal("Required option: Number of furthest neighbors to find.")
+          return -1
+        else:
+          k = int(k.group(1))
+          if (k < 1 or k > referenceData.shape[0]):
+            Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
+              + "less ")
+            return -1
+
+        referenceFeat = RealFeatures(referenceData.T)
+        distance = EuclideanDistance(referenceFeat, referenceFeat)
+
+        # Perform All K-Nearest-Neighbors.
+        model = SKNN(k, distance, labels)
+        model.train()      
+
+        if len(self.dataset) == 2:
+          out = model.apply(queryFeat).get_labels()
+        else:
+          out = model.apply(referenceFeat).get_labels()
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunAllKnnShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform All K-Nearest-Neighbors. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/gmm.py	Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
   Create the Gaussian Mixture Model benchmark instance.
   
   @param dataset - Input dataset to perform Gaussian Mixture Model on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True): 
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement Gaussian Mixture Model.
@@ -45,27 +47,36 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def GMMShogun(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    dataPoints = np.genfromtxt(self.dataset, delimiter=',')
-    dataFeat = RealFeatures(dataPoints.T)
-
-    # Get all the parameters.
-    g = re.search("-g (\d+)", options)
-    n = re.search("-n (\d+)", options)
-    s = re.search("-n (\d+)", options)
-
-    g = 1 if not g else int(g.group(1))
-    n = 250 if not n else int(n.group(1))
-
-    # Create the Gaussian Mixture Model.
-    model = Clustering.GMM(g)
-    model.set_features(dataFeat)
-    with totalTimer:
-      model.train_em(1e-9, n, 1e-9)
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunGMMShogun():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      dataPoints = np.genfromtxt(self.dataset, delimiter=',')
+      dataFeat = RealFeatures(dataPoints.T)
+
+      # Get all the parameters.
+      g = re.search("-g (\d+)", options)
+      n = re.search("-n (\d+)", options)
+      s = re.search("-n (\d+)", options)
+
+      g = 1 if not g else int(g.group(1))
+      n = 250 if not n else int(n.group(1))
+
+      # Create the Gaussian Mixture Model.
+      model = Clustering.GMM(g)
+      model.set_features(dataFeat)
+      with totalTimer:
+        model.train_em(1e-9, n, 1e-9)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunGMMShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Gaussian Mixture Model. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/kernel_pca.py	Fri Jul 26 09:10:20 2013
@@ -33,11 +33,13 @@
   Create the Kernel Principal Components Analysis benchmark instance.
   
   @param dataset - Input dataset to perform KPCA on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement Kernel Principal Components Analysis.
@@ -46,54 +48,63 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def KPCAShogun(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    data = np.genfromtxt(self.dataset, delimiter=',')
-    dataFeat = RealFeatures(data.T)
-
-    with totalTimer:
-      # Get the new dimensionality, if it is necessary.
-      dimension = re.search('-d (\d+)', options)
-      if not dimension:
-        d = data.shape[1]
-      else:
-        d = int(dimension.group(1))      
-        if (d > data.shape[1]):
-          Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
-            + "than existing dimensionality (" + str(data.shape[1]) + ")!")
-          return -1    
-
-      # Get the kernel type and make sure it is valid.
-      kernel = re.search("-k ([^\s]+)", options)
-      if not kernel:
-          Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan'" + 
-                ", 'polynomial' and 'gaussian'.")
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunKPCAShogun():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      data = np.genfromtxt(self.dataset, delimiter=',')
+      dataFeat = RealFeatures(data.T)
+
+      with totalTimer:
+        # Get the new dimensionality, if it is necessary.
+        dimension = re.search('-d (\d+)', options)
+        if not dimension:
+          d = data.shape[1]
+        else:
+          d = int(dimension.group(1))      
+          if (d > data.shape[1]):
+            Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
+              + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+            return -1    
+
+        # Get the kernel type and make sure it is valid.
+        kernel = re.search("-k ([^\s]+)", options)
+        if not kernel:
+            Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan'" + 
+                  ", 'polynomial' and 'gaussian'.")
+            return -1
+        elif kernel.group(1) == "polynomial":
+          degree = re.search('-D (\d+)', options)
+          degree = 1 if not degree else int(degree.group(1))
+          
+          kernel = PolyKernel(dataFeat, dataFeat, degree, True)
+        elif kernel.group(1) == "gaussian":
+          kernel = GaussianKernel(dataFeat, dataFeat, 2.0)
+        elif kernel.group(1) == "linear":
+          kernel = LinearKernel(dataFeat, dataFeat)
+        elif kernel.group(1) == "hyptan":
+          kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0)
+        else:
+          Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid choices"
+                  + " are 'linear', 'hyptan', 'polynomial' and 'gaussian'.")
           return -1
-      elif kernel.group(1) == "polynomial":
-        degree = re.search('-D (\d+)', options)
-        degree = 1 if not degree else int(degree.group(1))
-        
-        kernel = PolyKernel(dataFeat, dataFeat, degree, True)
-      elif kernel.group(1) == "gaussian":
-        kernel = GaussianKernel(dataFeat, dataFeat, 2.0)
-      elif kernel.group(1) == "linear":
-        kernel = LinearKernel(dataFeat, dataFeat)
-      elif kernel.group(1) == "hyptan":
-        kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0)
-      else:
-        Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid choices"
-                + " are 'linear', 'hyptan', 'polynomial' and 'gaussian'.")
-        return -1
-
-      # Perform Kernel Principal Components Analysis.
-      model = KernelPCA(kernel)
-      model.set_target_dim(d)
-      model.init(dataFeat)
-      model.apply_to_feature_matrix(dataFeat)
 
-    return totalTimer.ElapsedTime()    
+        # Perform Kernel Principal Components Analysis.
+        model = KernelPCA(kernel)
+        model.set_target_dim(d)
+        model.init(dataFeat)
+        model.apply_to_feature_matrix(dataFeat)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunKPCAShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2    
 
   '''
   Perform Kernel Principal Components Analysis. If the method has been 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/kmeans.py	Fri Jul 26 09:10:20 2013
@@ -33,11 +33,13 @@
   Create the K-Means Clustering benchmark instance.
   
   @param dataset - Input dataset to perform K-Means Clustering on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement K-Means Clustering.
@@ -46,7 +48,6 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def KMeansShogun(self, options):
-    totalTimer = Timer()
 
     # Gather parameters.
     clusters = re.search("-c (\d+)", options)
@@ -76,7 +77,11 @@
           + " " + self.dataset[1] + " " + clusters.group(1) + " " 
           + str(maxIterations))
       try:
-        s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) 
+        s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False, 
+            timeout=self.timeout)
+      except subprocess.TimeoutExpired as e:
+        Log.Warn(str(e))
+        return -2
       except Exception as e:
         Log.Fatal("Could not execute command: " + str(cmd))
         return -1
@@ -93,30 +98,41 @@
         return time      
 
     else:
-      import numpy as np
-      from shogun.Distance import EuclideanDistance
-      from shogun.Features import RealFeatures
-      from shogun import Clustering
-      from shogun.Mathematics import Math_init_random
-
-      if seed:
-        Math_init_random(seed.group(1))
-
-      data = np.genfromtxt(self.dataset, delimiter=',')
-
-      dataFeat = RealFeatures(data.T)
-      distance = EuclideanDistance(dataFeat, dataFeat)
-
-      # Create the K-Means object and perform K-Means clustering.
-      with totalTimer:
-        model = Clustering.KMeans(int(clusters.group(1)), distance)
-        model.set_max_iter(maxIterations)
-        model.train()
 
-        labels = model.apply().get_labels()
-        centers = model.get_cluster_centers()
+      @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+      def RunKMeansShogun():
+        import numpy as np
+        from shogun.Distance import EuclideanDistance
+        from shogun.Features import RealFeatures
+        from shogun import Clustering
+        from shogun.Mathematics import Math_init_random
+
+        totalTimer = Timer()
+
+        if seed:
+          Math_init_random(seed.group(1))
+
+        data = np.genfromtxt(self.dataset, delimiter=',')
+
+        dataFeat = RealFeatures(data.T)
+        distance = EuclideanDistance(dataFeat, dataFeat)
+
+        # Create the K-Means object and perform K-Means clustering.
+        with totalTimer:
+          model = Clustering.KMeans(int(clusters.group(1)), distance)
+          model.set_max_iter(maxIterations)
+          model.train()
 
-      return totalTimer.ElapsedTime()
+          labels = model.apply().get_labels()
+          centers = model.get_cluster_centers()
+
+        return totalTimer.ElapsedTime()
+
+      try:
+        return RunKMeansShogun()
+      except TimeoutError as e:
+        Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+        return -2
 
   '''
   Perform K-Means Clustering. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/lars.py	Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
   Create the All Least Angle Regression benchmark instance.
   
   @param dataset - Input dataset to perform Least Angle Regression on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement Least Angle Regression.
@@ -45,28 +47,37 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def LARSShogun(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    inputData = np.genfromtxt(self.dataset[0], delimiter=',')
-    responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
-    inputFeat = RealFeatures(inputData.T)
-    responsesFeat = RegressionLabels(responsesData)
-
-    # Get all the parameters.
-    lambda1 = re.search("-l (\d+)", options)
-    lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
-
-    with totalTimer:
-      # Perform LARS.
-      model = LeastAngleRegression(False)
-      model.set_max_l1_norm(lambda1)
-      model.set_labels(responsesFeat)
-      model.train(inputFeat)
-      model.get_w(model.get_path_size() - 1)
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunLARSShogun():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
+      inputFeat = RealFeatures(inputData.T)
+      responsesFeat = RegressionLabels(responsesData)
+
+      # Get all the parameters.
+      lambda1 = re.search("-l (\d+)", options)
+      lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
+
+      with totalTimer:
+        # Perform LARS.
+        model = LeastAngleRegression(False)
+        model.set_max_l1_norm(lambda1)
+        model.set_labels(responsesFeat)
+        model.train(inputFeat)
+        model.get_w(model.get_path_size() - 1)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunLARSShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Least Angle Regression. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/linear_regression.py	Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
   Create the Linear Regression benchmark instance.
   
   @param dataset - Input dataset to perform Linear Regression on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement Linear Regression.
@@ -45,27 +47,36 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def LinearRegressionShogun(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    # If the dataset contains two files then the second file is the responses
-    # file. In this case we add this to the command line.
-    Log.Info("Loading dataset", self.verbose)
-    if len(self.dataset) == 2:
-      X = np.genfromtxt(self.dataset[0], delimiter=',')
-      y = np.genfromtxt(self.dataset[1], delimiter=',')
-    else:
-      X = np.genfromtxt(self.dataset, delimiter=',')
-      y = X[:, (X.shape[1] - 1)]
-      X = X[:,:-1]
-
-    with totalTimer:
-      # Perform linear regression.
-      model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y))
-      model.train()
-      b = model.get_w()
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunLinearRegressionShogun():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      # If the dataset contains two files then the second file is the responses
+      # file. In this case we add this to the command line.
+      Log.Info("Loading dataset", self.verbose)
+      if len(self.dataset) == 2:
+        X = np.genfromtxt(self.dataset[0], delimiter=',')
+        y = np.genfromtxt(self.dataset[1], delimiter=',')
+      else:
+        X = np.genfromtxt(self.dataset, delimiter=',')
+        y = X[:, (X.shape[1] - 1)]
+        X = X[:,:-1]
+
+      with totalTimer:
+        # Perform linear regression.
+        model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y))
+        model.train()
+        b = model.get_w()
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunLinearRegressionShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Linear Regression. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/nbc.py	Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
   Create the Naive Bayes Classifier benchmark instance.
   
   @param dataset - Input dataset to perform NBC on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the shogun libary to implement Naive Bayes Classifier.
@@ -45,28 +47,37 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def NBCShogun(self, options):
-    totalTimer = Timer()
-    
-    Log.Info("Loading dataset", self.verbose)
-    # Load train and test dataset.
-    trainData = np.genfromtxt(self.dataset[0], delimiter=',')
-    testData = np.genfromtxt(self.dataset[1], delimiter=',')
-
-    # Labels are the last row of the training set.
-    labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)])
-
-    with totalTimer:
-      # Transform into features.
-      trainFeat = RealFeatures(trainData[:,:-1].T)
-      testFeat = RealFeatures(testData.T)
-
-      # Create and train the classifier.
-      nbc = GaussianNaiveBayes(trainFeat, labels)
-      nbc.train()
-      # Run Naive Bayes Classifier on the test dataset.
-      nbc.apply(testFeat).get_labels()
 
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunNBCShogun():
+      totalTimer = Timer()
+      
+      Log.Info("Loading dataset", self.verbose)
+      # Load train and test dataset.
+      trainData = np.genfromtxt(self.dataset[0], delimiter=',')
+      testData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+      # Labels are the last row of the training set.
+      labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)])
+
+      with totalTimer:
+        # Transform into features.
+        trainFeat = RealFeatures(trainData[:,:-1].T)
+        testFeat = RealFeatures(testData.T)
+
+        # Create and train the classifier.
+        nbc = GaussianNaiveBayes(trainFeat, labels)
+        nbc.train()
+        # Run Naive Bayes Classifier on the test dataset.
+        nbc.apply(testFeat).get_labels()
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunNBCShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Naive Bayes Classifier. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py	Fri Jul 26 09:10:20 2013
@@ -32,11 +32,13 @@
   Create the Principal Components Analysis benchmark instance.
   
   @param dataset - Input dataset to perform PCA on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
     # Load input dataset.
     Log.Info("Loading dataset", verbose)
@@ -49,35 +51,44 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def PCAShogun(self, options):
-    totalTimer = Timer()
-    
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    feat = RealFeatures(self.data.T)
-
-    with totalTimer:
-      # Find out what dimension we want.
-      match = re.search('-d (\d+)', options)
-
-      if not match:
-        k = self.data.shape[1]
-      else:
-        k = int(match.group(1))      
-        if (k > self.data.shape[1]):
-          Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater than"
-              + "existing dimensionality (" + str(self.data.shape[1]) + ")!")
-          return -1
-
-      # Get the options for running PCA.
-      s = True if options.find("-s") > -1 else False
-
-      # Perform PCA.
-      prep = ShogunPCA(s)
-      prep.set_target_dim(k)
-      prep.init(feat)
-      prep.apply_to_feature_matrix(feat)
 
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunPCAShogun():
+      totalTimer = Timer()
+      
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      feat = RealFeatures(self.data.T)
+
+      with totalTimer:
+        # Find out what dimension we want.
+        match = re.search('-d (\d+)', options)
+
+        if not match:
+          k = self.data.shape[1]
+        else:
+          k = int(match.group(1))      
+          if (k > self.data.shape[1]):
+            Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater than"
+                + "existing dimensionality (" + str(self.data.shape[1]) + ")!")
+            return -1
+
+        # Get the options for running PCA.
+        s = True if options.find("-s") > -1 else False
+
+        # Perform PCA.
+        prep = ShogunPCA(s)
+        prep.set_target_dim(k)
+        prep.init(feat)
+        prep.apply_to_feature_matrix(feat)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunPCAShogun()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Principal Components Analysis. If the method has been successfully 



More information about the mlpack-svn mailing list