[mlpack-svn] r15557 - mlpack/conf/jenkins-conf/benchmark/methods/scikit

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 26 09:05:19 EDT 2013


Author: marcus
Date: Fri Jul 26 09:05:18 2013
New Revision: 15557

Log:
Add timeout for the scikit benchmark scripts.

Modified:
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py
   mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the All K-Nearest-Neighbors benchmark instance.
   
   @param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement All K-Nearest-Neighbors.
@@ -44,52 +46,61 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def AllKnnScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    # If the dataset contains two files then the second file is the query file 
-    # In this case we add this to the command line.
-    Log.Info("Loading dataset", self.verbose)
-    if len(self.dataset) == 2:
-      referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
-      queryData = np.genfromtxt(self.dataset[1], delimiter=',')
-    else:
-      referenceData = np.genfromtxt(self.dataset, delimiter=',')
-
-    with totalTimer:
-      # Get all the parameters.
-      k = re.search("-k (\d+)", options)
-      leafSize = re.search("-l (\d+)", options)
-
-      if not k:
-        Log.Fatal("Required option: Number of furthest neighbors to find.")
-        return -1
-      else:
-        k = int(k.group(1))
-        if (k < 1 or k > referenceData.shape[0]):
-          Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
-            + "less ")
-          return -1
-
-      if not leafSize:
-        l = 20
-      elif int(leafSize.group(1)) < 0:
-        Log.Fatal("Invalid leaf size: " + str(leafSize.group(1)) + ". Must be " +
-            "greater than or equal to 0.")
-        return -1
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunAllKnnScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      # If the dataset contains two files then the second file is the query file 
+      # In this case we add this to the command line.
+      Log.Info("Loading dataset", self.verbose)
+      if len(self.dataset) == 2:
+        referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
+        queryData = np.genfromtxt(self.dataset[1], delimiter=',')
       else:
-        l = int(leafSize.group(1))
+        referenceData = np.genfromtxt(self.dataset, delimiter=',')
 
-      # Perform All K-Nearest-Neighbors.
-      model = NearestNeighbors(n_neighbors=k, algorithm='kd_tree', leaf_size=l)
-      model.fit(referenceData)
+      with totalTimer:
+        # Get all the parameters.
+        k = re.search("-k (\d+)", options)
+        leafSize = re.search("-l (\d+)", options)
 
-      if len(self.dataset) == 2:
-        out = model.kneighbors(queryData, k, return_distance=True)
-      else:
-        out = model.kneighbors(referenceData, k, return_distance=True)
+        if not k:
+          Log.Fatal("Required option: Number of furthest neighbors to find.")
+          return -1
+        else:
+          k = int(k.group(1))
+          if (k < 1 or k > referenceData.shape[0]):
+            Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
+              + "less ")
+            return -1
+
+        if not leafSize:
+          l = 20
+        elif int(leafSize.group(1)) < 0:
+          Log.Fatal("Invalid leaf size: " + str(leafSize.group(1)) + ". Must be " +
+              "greater than or equal to 0.")
+          return -1
+        else:
+          l = int(leafSize.group(1))
 
-    return totalTimer.ElapsedTime()
+        # Perform All K-Nearest-Neighbors.
+        model = NearestNeighbors(n_neighbors=k, algorithm='kd_tree', leaf_size=l)
+        model.fit(referenceData)
+
+        if len(self.dataset) == 2:
+          out = model.kneighbors(queryData, k, return_distance=True)
+        else:
+          out = model.kneighbors(referenceData, k, return_distance=True)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunAllKnnScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform All K-Nearest-Neighbors. If the method has been successfully completed 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Gaussian Mixture Model benchmark instance.
   
   @param dataset - Input dataset to perform Gaussian Mixture Model on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Gaussian Mixture Model.
@@ -44,27 +46,36 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def GMMScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    dataPoints = np.genfromtxt(self.dataset, delimiter=',')
-
-    # Get all the parameters.
-    g = re.search("-g (\d+)", options)
-    n = re.search("-n (\d+)", options)
-    s = re.search("-n (\d+)", options)
-
-    g = 1 if not g else int(g.group(1))
-    n = 250 if not n else int(n.group(1))
-    s = 0 if not s else int(s.group(1))
-
-    # Create the Gaussian Mixture Model.
-    model = mixture.GMM(n_components=g, covariance_type='full', random_state=s, 
-        n_iter=n)
-    with totalTimer:
-      model.fit(dataPoints) 
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunGMMScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      dataPoints = np.genfromtxt(self.dataset, delimiter=',')
+
+      # Get all the parameters.
+      g = re.search("-g (\d+)", options)
+      n = re.search("-n (\d+)", options)
+      s = re.search("-n (\d+)", options)
+
+      g = 1 if not g else int(g.group(1))
+      n = 250 if not n else int(n.group(1))
+      s = 0 if not s else int(s.group(1))
+
+      # Create the Gaussian Mixture Model.
+      model = mixture.GMM(n_components=g, covariance_type='full', random_state=s, 
+          n_iter=n)
+      with totalTimer:
+        model.fit(dataPoints) 
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunGMMScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Gaussian Mixture Model. If the method has been successfully completed 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the independent component analysis benchmark instance.
   
   @param dataset - Input dataset to perform independent component analysis on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement independent component analysis.
@@ -44,21 +46,30 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def ICAScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    data = np.genfromtxt(self.dataset, delimiter=',')
-
-    s = re.search('-s (\d+)', options)
-    s = 0 if not s else int(s.group(1))
-
-    # Perform ICA.
-    with totalTimer:
-      model = FastICA(random_state=s)
-      ic = model.fit(data).transform(data)
-      mixing = model.get_mixing_matrix()
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunICAScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      data = np.genfromtxt(self.dataset, delimiter=',')
+
+      s = re.search('-s (\d+)', options)
+      s = 0 if not s else int(s.group(1))
+
+      # Perform ICA.
+      with totalTimer:
+        model = FastICA(random_state=s)
+        ic = model.fit(data).transform(data)
+        mixing = model.get_mixing_matrix()
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunICAScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform independent component analysis. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Kernel Principal Components Analysis benchmark instance.
   
   @param dataset - Input dataset to perform KPCA on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Kernel Principal Components Analysis.
@@ -44,47 +46,56 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def KPCAScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    data = np.genfromtxt(self.dataset, delimiter=',')
-
-    with totalTimer:
-      # Get the new dimensionality, if it is necessary.
-      dimension = re.search('-d (\d+)', options)
-      if not dimension:
-        d = data.shape[1]
-      else:
-        d = int(dimension.group(1))      
-        if (d > data.shape[1]):
-          Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
-            + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunKPCAScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      data = np.genfromtxt(self.dataset, delimiter=',')
+
+      with totalTimer:
+        # Get the new dimensionality, if it is necessary.
+        dimension = re.search('-d (\d+)', options)
+        if not dimension:
+          d = data.shape[1]
+        else:
+          d = int(dimension.group(1))      
+          if (d > data.shape[1]):
+            Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
+              + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+            return -1
+
+        # Get the kernel type and make sure it is valid.
+        kernel = re.search("-k ([^\s]+)", options)
+        if not kernel:
+          Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan' " + 
+                "and 'polynomial'.")
           return -1
+        elif kernel.group(1) == "linear":
+          model = KernelPCA(n_components=d, kernel="linear")
+        elif kernel.group(1) == "hyptan":
+          model = KernelPCA(n_components=d, kernel="sigmoid")
+        elif kernel.group(1) == "polynomial":
+          degree = re.search('-D (\d+)', options)
+          degree = 1 if not degree else int(degree.group(1))
+
+          model = KernelPCA(n_components=d, kernel="poly", degree=degree)
+        else:
+          Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
+              "choices are 'linear', 'hyptan' and 'polynomial'.")
+          return -1
+          
+        out = model.fit_transform(data)
 
-      # Get the kernel type and make sure it is valid.
-      kernel = re.search("-k ([^\s]+)", options)
-      if not kernel:
-        Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan' " + 
-              "and 'polynomial'.")
-        return -1
-      elif kernel.group(1) == "linear":
-        model = KernelPCA(n_components=d, kernel="linear")
-      elif kernel.group(1) == "hyptan":
-        model = KernelPCA(n_components=d, kernel="sigmoid")
-      elif kernel.group(1) == "polynomial":
-        degree = re.search('-D (\d+)', options)
-        degree = 1 if not degree else int(degree.group(1))
-
-        model = KernelPCA(n_components=d, kernel="poly", degree=degree)
-      else:
-        Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
-            "choices are 'linear', 'hyptan' and 'polynomial'.")
-        return -1
-        
-      out = model.fit_transform(data)
+      return totalTimer.ElapsedTime()
 
-    return totalTimer.ElapsedTime()
+    try:
+      return RunKPCAScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Kernel Principal Components Analysis. If the method has been 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the K-Means Clustering benchmark instance.
   
   @param dataset - Input dataset to perform K-Means on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement K-Means Clustering.
@@ -44,50 +46,59 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def KMeansScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    # If the dataset contains two files then the second file is the centroids 
-    # file. In this case we add this to the command line.
-    Log.Info("Loading dataset", self.verbose)
-    if len(self.dataset) == 2:
-      data = np.genfromtxt(self.dataset[0], delimiter=',')
-      centroids = np.genfromtxt(self.dataset[1], delimiter=',')
-    else:
-      data = np.genfromtxt(self.dataset, delimiter=',')
-
-    # Gather parameters.
-    clusters = re.search("-c (\d+)", options)
-    maxIterations = re.search("-m (\d+)", options)
-    seed = re.search("-s (\d+)", options)
-
-    # Now do validation of options.
-    if not clusters and len(self.dataset) != 2:
-      Log.Fatal("Required option: Number of clusters or cluster locations.")
-      return -1
-    elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
-      Log.Fatal("Invalid number of clusters requested! Must be greater than or "
-          + "equal to 1.")
-      return -1
-
-    m = 1000 if not maxIterations else int(maxIterations.group(1))
-
-    # Create the KMeans object and perform K-Means clustering.
-    with totalTimer:
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunKMeansScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      # If the dataset contains two files then the second file is the centroids 
+      # file. In this case we add this to the command line.
+      Log.Info("Loading dataset", self.verbose)
       if len(self.dataset) == 2:
-        kmeans = KMeans(k=centroids.shape[1], init=centroids, n_init=1, 
-            max_iter=m)
-      elif seed:
-        kmeans = KMeans(n_clusters=int(clusters.group(1)), init='random', 
-            n_init=1, max_iter=m, random_state=int(seed.group(1)))
+        data = np.genfromtxt(self.dataset[0], delimiter=',')
+        centroids = np.genfromtxt(self.dataset[1], delimiter=',')
       else:
-        kmeans = KMeans(n_clusters=int(clusters.group(1)), n_init=1, max_iter=m)      
-
-      kmeans.fit(data)
-      labels = kmeans.labels_
-      centers = kmeans.cluster_centers_
+        data = np.genfromtxt(self.dataset, delimiter=',')
 
-    return totalTimer.ElapsedTime()
+      # Gather parameters.
+      clusters = re.search("-c (\d+)", options)
+      maxIterations = re.search("-m (\d+)", options)
+      seed = re.search("-s (\d+)", options)
+
+      # Now do validation of options.
+      if not clusters and len(self.dataset) != 2:
+        Log.Fatal("Required option: Number of clusters or cluster locations.")
+        return -1
+      elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
+        Log.Fatal("Invalid number of clusters requested! Must be greater than or "
+            + "equal to 1.")
+        return -1
+
+      m = 1000 if not maxIterations else int(maxIterations.group(1))
+
+      # Create the KMeans object and perform K-Means clustering.
+      with totalTimer:
+        if len(self.dataset) == 2:
+          kmeans = KMeans(k=centroids.shape[1], init=centroids, n_init=1, 
+              max_iter=m)
+        elif seed:
+          kmeans = KMeans(n_clusters=int(clusters.group(1)), init='random', 
+              n_init=1, max_iter=m, random_state=int(seed.group(1)))
+        else:
+          kmeans = KMeans(n_clusters=int(clusters.group(1)), n_init=1, max_iter=m)      
+
+        kmeans.fit(data)
+        labels = kmeans.labels_
+        centers = kmeans.cluster_centers_
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunKMeansScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform K-Means Clustering. If the method has been successfully completed 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Least Angle Regression benchmark instance.
   
   @param dataset - Input dataset to perform Least Angle Regression on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Least Angle Regression.
@@ -44,24 +46,33 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def LARSScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    inputData = np.genfromtxt(self.dataset[0], delimiter=',')
-    responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
-
-    with totalTimer:
-      # Get all the parameters.
-      lambda1 = re.search("-l (\d+)", options)
-      lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
-
-      # Perform LARS.
-      model = LassoLars(alpha=lambda1)
-      model.fit(inputData, responsesData)
-      out = model.coef_
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunLARSScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+      with totalTimer:
+        # Get all the parameters.
+        lambda1 = re.search("-l (\d+)", options)
+        lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
+
+        # Perform LARS.
+        model = LassoLars(alpha=lambda1)
+        model.fit(inputData, responsesData)
+        out = model.coef_
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunLARSScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Least Angle Regression. If the method has been successfully completed 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Linear Regression benchmark instance.
   
   @param dataset - Input dataset to perform Linear Regression on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Linear Regression.
@@ -44,27 +46,36 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def LinearRegressionScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    # If the dataset contains two files then the second file is the responses 
-    # file. In this case we add this to the command line.
-    Log.Info("Loading dataset", self.verbose)
-    if len(self.dataset) == 2:
-      X = np.genfromtxt(self.dataset[0], delimiter=',')
-      y = np.genfromtxt(self.dataset[1], delimiter=',')
-    else:
-      X = np.genfromtxt(self.dataset, delimiter=',')
-      y = X[:, (X.shape[1] - 1)]
-      X = X[:,:-1]
-
-    with totalTimer:
-      # Perform linear regression.
-      model = SLinearRegression()
-      model.fit(X, y, n_jobs=-1)
-      b = model.coef_
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunLinearRegressionScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      # If the dataset contains two files then the second file is the responses 
+      # file. In this case we add this to the command line.
+      Log.Info("Loading dataset", self.verbose)
+      if len(self.dataset) == 2:
+        X = np.genfromtxt(self.dataset[0], delimiter=',')
+        y = np.genfromtxt(self.dataset[1], delimiter=',')
+      else:
+        X = np.genfromtxt(self.dataset, delimiter=',')
+        y = X[:, (X.shape[1] - 1)]
+        X = X[:,:-1]
+
+      with totalTimer:
+        # Perform linear regression.
+        model = SLinearRegression()
+        model.fit(X, y, n_jobs=-1)
+        b = model.coef_
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunLinearRegressionScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Linear Regression. If the method has been successfully completed 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Naive Bayes Classifier benchmark instance.
   
   @param dataset - Input dataset to perform NBC on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Naive Bayes Classifier.
@@ -44,25 +46,34 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def NBCScikit(self, options):
-    totalTimer = Timer()
-    
-    Log.Info("Loading dataset", self.verbose)
-    # Load train and test dataset.
-    trainData = np.genfromtxt(self.dataset[0], delimiter=',')
-    testData = np.genfromtxt(self.dataset[1], delimiter=',')
-
-    # Labels are the last row of the training set.
-    labels = trainData[:, (trainData.shape[1] - 1)]
-    trainData = trainData[:,:-1]
-
-    with totalTimer:      
-      # Create and train the classifier.
-      nbc = MultinomialNB()
-      nbc.fit(trainData, labels)
-      # Run Naive Bayes Classifier on the test dataset.
-      nbc.predict(testData)
 
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunNBCScikit():
+      totalTimer = Timer()
+      
+      Log.Info("Loading dataset", self.verbose)
+      # Load train and test dataset.
+      trainData = np.genfromtxt(self.dataset[0], delimiter=',')
+      testData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+      # Labels are the last row of the training set.
+      labels = trainData[:, (trainData.shape[1] - 1)]
+      trainData = trainData[:,:-1]
+
+      with totalTimer:      
+        # Create and train the classifier.
+        nbc = MultinomialNB()
+        nbc.fit(trainData, labels)
+        # Run Naive Bayes Classifier on the test dataset.
+        nbc.predict(testData)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunNBCScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Naive Bayes Classifier. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Naive Bayes Classifier benchmark instance.
   
   @param dataset - Input dataset to perform NBC on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Non-negative Matrix Factorization.
@@ -44,39 +46,48 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def NMFScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    data = np.genfromtxt(self.dataset, delimiter=',')
-
-    with totalTimer:      
-      # Gather parameters.
-      seed = re.search("-s (\d+)", options)
-      maxIterations = re.search("-m (\d+)", options)
-      minResidue = re.search("-e ([^\s]+)", options)
-      updateRule = re.search("-u ([^\s]+)", options)
-
-      m = 10000 if not maxIterations else int(maxIterations.group(1))
-      e = 1e-05 if not maxIterations else int(minResidue.group(1))
-
-      if updateRule:
-        u = updateRule.group(1)
-        if u != 'alspgrad':
-          Log.Fatal("Invalid update rules ('" + u + "'); must be 'alspgrad'.")
-          return -1
-
-      # Perform NMF with the specified update rules.
-      if seed:
-        s = int(seed.group(1))
-        model = ScikitNMF(n_components=2, init='random', max_iter = m, tol = e, random_state = s)
-      else:
-        model = ScikitNMF(n_components=2, init='nndsvdar', max_iter = m, tol = e)
-
-      W = model.fit_transform(data)
-      H = model.components_
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunNMFScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      data = np.genfromtxt(self.dataset, delimiter=',')
+
+      with totalTimer:      
+        # Gather parameters.
+        seed = re.search("-s (\d+)", options)
+        maxIterations = re.search("-m (\d+)", options)
+        minResidue = re.search("-e ([^\s]+)", options)
+        updateRule = re.search("-u ([^\s]+)", options)
+
+        m = 10000 if not maxIterations else int(maxIterations.group(1))
+        e = 1e-05 if not maxIterations else int(minResidue.group(1))
+
+        if updateRule:
+          u = updateRule.group(1)
+          if u != 'alspgrad':
+            Log.Fatal("Invalid update rules ('" + u + "'); must be 'alspgrad'.")
+            return -1
+
+        # Perform NMF with the specified update rules.
+        if seed:
+          s = int(seed.group(1))
+          model = ScikitNMF(n_components=2, init='random', max_iter = m, tol = e, random_state = s)
+        else:
+          model = ScikitNMF(n_components=2, init='nndsvdar', max_iter = m, tol = e)
+
+        W = model.fit_transform(data)
+        H = model.components_
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunNMFScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Non-negative Matrix Factorization. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Principal Components Analysis benchmark instance.
   
   @param dataset - Input dataset to perform PCA on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Principal Components Analysis.
@@ -44,34 +46,43 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def PCAScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    Log.Info("Loading dataset", self.verbose)
-    data = np.genfromtxt(self.dataset, delimiter=',')
-
-    with totalTimer:
-      # Find out what dimension we want.
-      match = re.search('-d (\d+)', options)
-
-      if not match:
-        k = data.shape[1]
-      else:
-        k = int(match.group(1))      
-        if (k > data.shape[1]):
-          Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
-              + "than existing dimensionality (" + str(data.shape[1]) + ")!")
-          return -1
-
-      # Get the options for running PCA.
-      s = True if options.find("-s") > -1 else False
-
-      # Perform PCA.
-      pca = decomposition.PCA(n_components = k, whiten = s)
-      pca.fit(data)
-      score = pca.transform(data)
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunPCAScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      data = np.genfromtxt(self.dataset, delimiter=',')
+
+      with totalTimer:
+        # Find out what dimension we want.
+        match = re.search('-d (\d+)', options)
+
+        if not match:
+          k = data.shape[1]
+        else:
+          k = int(match.group(1))      
+          if (k > data.shape[1]):
+            Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
+                + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+            return -1
+
+        # Get the options for running PCA.
+        s = True if options.find("-s") > -1 else False
+
+        # Perform PCA.
+        pca = decomposition.PCA(n_components = k, whiten = s)
+        pca.fit(data)
+        score = pca.transform(data)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunPCAScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Principal Components Analysis. If the method has been successfully 

Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py	Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
   Create the Sparse Coding benchmark instance.
   
   @param dataset - Input dataset to perform Sparse Coding on.
+  @param timeout - The time until the timeout. Default no timeout.
   @param verbose - Display informational messages.
   '''
-  def __init__(self, dataset, verbose=True): 
+  def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
+    self.timeout = timeout
 
   '''
   Use the scikit libary to implement Sparse Coding.
@@ -44,23 +46,32 @@
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
   def SparseCodingScikit(self, options):
-    totalTimer = Timer()
 
-    # Load input dataset.
-    inputData = np.genfromtxt(self.dataset[0], delimiter=',')
-    dictionary = np.genfromtxt(self.dataset[1], delimiter=',')
-
-    # Get all the parameters.
-    l = re.search("-l (\d+)", options)
-    l = 0 if not l else int(l.group(1))
-
-    with totalTimer:
-      # Perform Sparse Coding.
-      model = SparseCoder(dictionary=dictionary, transform_algorithm='lars',
-          transform_alpha=l)
-      code = model.transform(inputData)
-
-    return totalTimer.ElapsedTime()
+    @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+    def RunSparseCodingScikit():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+      dictionary = np.genfromtxt(self.dataset[1], delimiter=',')
+
+      # Get all the parameters.
+      l = re.search("-l (\d+)", options)
+      l = 0 if not l else int(l.group(1))
+
+      with totalTimer:
+        # Perform Sparse Coding.
+        model = SparseCoder(dictionary=dictionary, transform_algorithm='lars',
+            transform_alpha=l)
+        code = model.transform(inputData)
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunSparseCodingScikit()
+    except TimeoutError as e:
+      Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+      return -2
 
   '''
   Perform Sparse Coding. If the method has been successfully completed 



More information about the mlpack-svn mailing list