[mlpack-svn] r15557 - mlpack/conf/jenkins-conf/benchmark/methods/scikit
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 26 09:05:19 EDT 2013
Author: marcus
Date: Fri Jul 26 09:05:18 2013
New Revision: 15557
Log:
Add timeout for the scikit benchmark scripts.
Modified:
mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py
mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/allknn.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the All K-Nearest-Neighbors benchmark instance.
@param dataset - Input dataset to perform All K-Nearest-Neighbors on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement All K-Nearest-Neighbors.
@@ -44,52 +46,61 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def AllKnnScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- # If the dataset contains two files then the second file is the query file
- # In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
- queryData = np.genfromtxt(self.dataset[1], delimiter=',')
- else:
- referenceData = np.genfromtxt(self.dataset, delimiter=',')
-
- with totalTimer:
- # Get all the parameters.
- k = re.search("-k (\d+)", options)
- leafSize = re.search("-l (\d+)", options)
-
- if not k:
- Log.Fatal("Required option: Number of furthest neighbors to find.")
- return -1
- else:
- k = int(k.group(1))
- if (k < 1 or k > referenceData.shape[0]):
- Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
- + "less ")
- return -1
-
- if not leafSize:
- l = 20
- elif int(leafSize.group(1)) < 0:
- Log.Fatal("Invalid leaf size: " + str(leafSize.group(1)) + ". Must be " +
- "greater than or equal to 0.")
- return -1
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunAllKnnScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the query file
+ # In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
+ if len(self.dataset) == 2:
+ referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
+ queryData = np.genfromtxt(self.dataset[1], delimiter=',')
else:
- l = int(leafSize.group(1))
+ referenceData = np.genfromtxt(self.dataset, delimiter=',')
- # Perform All K-Nearest-Neighbors.
- model = NearestNeighbors(n_neighbors=k, algorithm='kd_tree', leaf_size=l)
- model.fit(referenceData)
+ with totalTimer:
+ # Get all the parameters.
+ k = re.search("-k (\d+)", options)
+ leafSize = re.search("-l (\d+)", options)
- if len(self.dataset) == 2:
- out = model.kneighbors(queryData, k, return_distance=True)
- else:
- out = model.kneighbors(referenceData, k, return_distance=True)
+ if not k:
+ Log.Fatal("Required option: Number of furthest neighbors to find.")
+ return -1
+ else:
+ k = int(k.group(1))
+ if (k < 1 or k > referenceData.shape[0]):
+ Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0 and "
+ + "less ")
+ return -1
+
+ if not leafSize:
+ l = 20
+ elif int(leafSize.group(1)) < 0:
+ Log.Fatal("Invalid leaf size: " + str(leafSize.group(1)) + ". Must be " +
+ "greater than or equal to 0.")
+ return -1
+ else:
+ l = int(leafSize.group(1))
- return totalTimer.ElapsedTime()
+ # Perform All K-Nearest-Neighbors.
+ model = NearestNeighbors(n_neighbors=k, algorithm='kd_tree', leaf_size=l)
+ model.fit(referenceData)
+
+ if len(self.dataset) == 2:
+ out = model.kneighbors(queryData, k, return_distance=True)
+ else:
+ out = model.kneighbors(referenceData, k, return_distance=True)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunAllKnnScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform All K-Nearest-Neighbors. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/gmm.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Gaussian Mixture Model benchmark instance.
@param dataset - Input dataset to perform Gaussian Mixture Model on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Gaussian Mixture Model.
@@ -44,27 +46,36 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def GMMScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- dataPoints = np.genfromtxt(self.dataset, delimiter=',')
-
- # Get all the parameters.
- g = re.search("-g (\d+)", options)
- n = re.search("-n (\d+)", options)
- s = re.search("-n (\d+)", options)
-
- g = 1 if not g else int(g.group(1))
- n = 250 if not n else int(n.group(1))
- s = 0 if not s else int(s.group(1))
-
- # Create the Gaussian Mixture Model.
- model = mixture.GMM(n_components=g, covariance_type='full', random_state=s,
- n_iter=n)
- with totalTimer:
- model.fit(dataPoints)
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunGMMScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ dataPoints = np.genfromtxt(self.dataset, delimiter=',')
+
+ # Get all the parameters.
+ g = re.search("-g (\d+)", options)
+ n = re.search("-n (\d+)", options)
+ s = re.search("-n (\d+)", options)
+
+ g = 1 if not g else int(g.group(1))
+ n = 250 if not n else int(n.group(1))
+ s = 0 if not s else int(s.group(1))
+
+ # Create the Gaussian Mixture Model.
+ model = mixture.GMM(n_components=g, covariance_type='full', random_state=s,
+ n_iter=n)
+ with totalTimer:
+ model.fit(dataPoints)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunGMMScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Gaussian Mixture Model. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/ica.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the independent component analysis benchmark instance.
@param dataset - Input dataset to perform independent component analysis on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement independent component analysis.
@@ -44,21 +46,30 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def ICAScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- s = re.search('-s (\d+)', options)
- s = 0 if not s else int(s.group(1))
-
- # Perform ICA.
- with totalTimer:
- model = FastICA(random_state=s)
- ic = model.fit(data).transform(data)
- mixing = model.get_mixing_matrix()
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunICAScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ s = re.search('-s (\d+)', options)
+ s = 0 if not s else int(s.group(1))
+
+ # Perform ICA.
+ with totalTimer:
+ model = FastICA(random_state=s)
+ ic = model.fit(data).transform(data)
+ mixing = model.get_mixing_matrix()
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunICAScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform independent component analysis. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/kernel_pca.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Kernel Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform KPCA on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Kernel Principal Components Analysis.
@@ -44,47 +46,56 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def KPCAScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- with totalTimer:
- # Get the new dimensionality, if it is necessary.
- dimension = re.search('-d (\d+)', options)
- if not dimension:
- d = data.shape[1]
- else:
- d = int(dimension.group(1))
- if (d > data.shape[1]):
- Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
- + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunKPCAScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ with totalTimer:
+ # Get the new dimensionality, if it is necessary.
+ dimension = re.search('-d (\d+)', options)
+ if not dimension:
+ d = data.shape[1]
+ else:
+ d = int(dimension.group(1))
+ if (d > data.shape[1]):
+ Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
+ + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+ return -1
+
+ # Get the kernel type and make sure it is valid.
+ kernel = re.search("-k ([^\s]+)", options)
+ if not kernel:
+ Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan' " +
+ "and 'polynomial'.")
return -1
+ elif kernel.group(1) == "linear":
+ model = KernelPCA(n_components=d, kernel="linear")
+ elif kernel.group(1) == "hyptan":
+ model = KernelPCA(n_components=d, kernel="sigmoid")
+ elif kernel.group(1) == "polynomial":
+ degree = re.search('-D (\d+)', options)
+ degree = 1 if not degree else int(degree.group(1))
+
+ model = KernelPCA(n_components=d, kernel="poly", degree=degree)
+ else:
+ Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
+ "choices are 'linear', 'hyptan' and 'polynomial'.")
+ return -1
+
+ out = model.fit_transform(data)
- # Get the kernel type and make sure it is valid.
- kernel = re.search("-k ([^\s]+)", options)
- if not kernel:
- Log.Fatal("Choose kernel type, valid choices are 'linear', 'hyptan' " +
- "and 'polynomial'.")
- return -1
- elif kernel.group(1) == "linear":
- model = KernelPCA(n_components=d, kernel="linear")
- elif kernel.group(1) == "hyptan":
- model = KernelPCA(n_components=d, kernel="sigmoid")
- elif kernel.group(1) == "polynomial":
- degree = re.search('-D (\d+)', options)
- degree = 1 if not degree else int(degree.group(1))
-
- model = KernelPCA(n_components=d, kernel="poly", degree=degree)
- else:
- Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
- "choices are 'linear', 'hyptan' and 'polynomial'.")
- return -1
-
- out = model.fit_transform(data)
+ return totalTimer.ElapsedTime()
- return totalTimer.ElapsedTime()
+ try:
+ return RunKPCAScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Kernel Principal Components Analysis. If the method has been
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/kmeans.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the K-Means Clustering benchmark instance.
@param dataset - Input dataset to perform K-Means on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement K-Means Clustering.
@@ -44,50 +46,59 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def KMeansScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- # If the dataset contains two files then the second file is the centroids
- # file. In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- data = np.genfromtxt(self.dataset[0], delimiter=',')
- centroids = np.genfromtxt(self.dataset[1], delimiter=',')
- else:
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- # Gather parameters.
- clusters = re.search("-c (\d+)", options)
- maxIterations = re.search("-m (\d+)", options)
- seed = re.search("-s (\d+)", options)
-
- # Now do validation of options.
- if not clusters and len(self.dataset) != 2:
- Log.Fatal("Required option: Number of clusters or cluster locations.")
- return -1
- elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
- Log.Fatal("Invalid number of clusters requested! Must be greater than or "
- + "equal to 1.")
- return -1
-
- m = 1000 if not maxIterations else int(maxIterations.group(1))
-
- # Create the KMeans object and perform K-Means clustering.
- with totalTimer:
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunKMeansScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the centroids
+ # file. In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
if len(self.dataset) == 2:
- kmeans = KMeans(k=centroids.shape[1], init=centroids, n_init=1,
- max_iter=m)
- elif seed:
- kmeans = KMeans(n_clusters=int(clusters.group(1)), init='random',
- n_init=1, max_iter=m, random_state=int(seed.group(1)))
+ data = np.genfromtxt(self.dataset[0], delimiter=',')
+ centroids = np.genfromtxt(self.dataset[1], delimiter=',')
else:
- kmeans = KMeans(n_clusters=int(clusters.group(1)), n_init=1, max_iter=m)
-
- kmeans.fit(data)
- labels = kmeans.labels_
- centers = kmeans.cluster_centers_
+ data = np.genfromtxt(self.dataset, delimiter=',')
- return totalTimer.ElapsedTime()
+ # Gather parameters.
+ clusters = re.search("-c (\d+)", options)
+ maxIterations = re.search("-m (\d+)", options)
+ seed = re.search("-s (\d+)", options)
+
+ # Now do validation of options.
+ if not clusters and len(self.dataset) != 2:
+ Log.Fatal("Required option: Number of clusters or cluster locations.")
+ return -1
+ elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
+ Log.Fatal("Invalid number of clusters requested! Must be greater than or "
+ + "equal to 1.")
+ return -1
+
+ m = 1000 if not maxIterations else int(maxIterations.group(1))
+
+ # Create the KMeans object and perform K-Means clustering.
+ with totalTimer:
+ if len(self.dataset) == 2:
+ kmeans = KMeans(k=centroids.shape[1], init=centroids, n_init=1,
+ max_iter=m)
+ elif seed:
+ kmeans = KMeans(n_clusters=int(clusters.group(1)), init='random',
+ n_init=1, max_iter=m, random_state=int(seed.group(1)))
+ else:
+ kmeans = KMeans(n_clusters=int(clusters.group(1)), n_init=1, max_iter=m)
+
+ kmeans.fit(data)
+ labels = kmeans.labels_
+ centers = kmeans.cluster_centers_
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunKMeansScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform K-Means Clustering. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/lars.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Least Angle Regression benchmark instance.
@param dataset - Input dataset to perform Least Angle Regression on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Least Angle Regression.
@@ -44,24 +46,33 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def LARSScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- inputData = np.genfromtxt(self.dataset[0], delimiter=',')
- responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
-
- with totalTimer:
- # Get all the parameters.
- lambda1 = re.search("-l (\d+)", options)
- lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
-
- # Perform LARS.
- model = LassoLars(alpha=lambda1)
- model.fit(inputData, responsesData)
- out = model.coef_
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunLARSScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+ responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+ with totalTimer:
+ # Get all the parameters.
+ lambda1 = re.search("-l (\d+)", options)
+ lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))
+
+ # Perform LARS.
+ model = LassoLars(alpha=lambda1)
+ model.fit(inputData, responsesData)
+ out = model.coef_
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunLARSScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Least Angle Regression. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/linear_regression.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Linear Regression benchmark instance.
@param dataset - Input dataset to perform Linear Regression on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Linear Regression.
@@ -44,27 +46,36 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def LinearRegressionScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- # If the dataset contains two files then the second file is the responses
- # file. In this case we add this to the command line.
- Log.Info("Loading dataset", self.verbose)
- if len(self.dataset) == 2:
- X = np.genfromtxt(self.dataset[0], delimiter=',')
- y = np.genfromtxt(self.dataset[1], delimiter=',')
- else:
- X = np.genfromtxt(self.dataset, delimiter=',')
- y = X[:, (X.shape[1] - 1)]
- X = X[:,:-1]
-
- with totalTimer:
- # Perform linear regression.
- model = SLinearRegression()
- model.fit(X, y, n_jobs=-1)
- b = model.coef_
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunLinearRegressionScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ # If the dataset contains two files then the second file is the responses
+ # file. In this case we add this to the command line.
+ Log.Info("Loading dataset", self.verbose)
+ if len(self.dataset) == 2:
+ X = np.genfromtxt(self.dataset[0], delimiter=',')
+ y = np.genfromtxt(self.dataset[1], delimiter=',')
+ else:
+ X = np.genfromtxt(self.dataset, delimiter=',')
+ y = X[:, (X.shape[1] - 1)]
+ X = X[:,:-1]
+
+ with totalTimer:
+ # Perform linear regression.
+ model = SLinearRegression()
+ model.fit(X, y, n_jobs=-1)
+ b = model.coef_
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunLinearRegressionScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Linear Regression. If the method has been successfully completed
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/nbc.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Naive Bayes Classifier benchmark instance.
@param dataset - Input dataset to perform NBC on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Naive Bayes Classifier.
@@ -44,25 +46,34 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def NBCScikit(self, options):
- totalTimer = Timer()
-
- Log.Info("Loading dataset", self.verbose)
- # Load train and test dataset.
- trainData = np.genfromtxt(self.dataset[0], delimiter=',')
- testData = np.genfromtxt(self.dataset[1], delimiter=',')
-
- # Labels are the last row of the training set.
- labels = trainData[:, (trainData.shape[1] - 1)]
- trainData = trainData[:,:-1]
-
- with totalTimer:
- # Create and train the classifier.
- nbc = MultinomialNB()
- nbc.fit(trainData, labels)
- # Run Naive Bayes Classifier on the test dataset.
- nbc.predict(testData)
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunNBCScikit():
+ totalTimer = Timer()
+
+ Log.Info("Loading dataset", self.verbose)
+ # Load train and test dataset.
+ trainData = np.genfromtxt(self.dataset[0], delimiter=',')
+ testData = np.genfromtxt(self.dataset[1], delimiter=',')
+
+ # Labels are the last row of the training set.
+ labels = trainData[:, (trainData.shape[1] - 1)]
+ trainData = trainData[:,:-1]
+
+ with totalTimer:
+ # Create and train the classifier.
+ nbc = MultinomialNB()
+ nbc.fit(trainData, labels)
+ # Run Naive Bayes Classifier on the test dataset.
+ nbc.predict(testData)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunNBCScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Naive Bayes Classifier. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/nmf.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Naive Bayes Classifier benchmark instance.
@param dataset - Input dataset to perform NBC on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Non-negative Matrix Factorization.
@@ -44,39 +46,48 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def NMFScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- with totalTimer:
- # Gather parameters.
- seed = re.search("-s (\d+)", options)
- maxIterations = re.search("-m (\d+)", options)
- minResidue = re.search("-e ([^\s]+)", options)
- updateRule = re.search("-u ([^\s]+)", options)
-
- m = 10000 if not maxIterations else int(maxIterations.group(1))
- e = 1e-05 if not maxIterations else int(minResidue.group(1))
-
- if updateRule:
- u = updateRule.group(1)
- if u != 'alspgrad':
- Log.Fatal("Invalid update rules ('" + u + "'); must be 'alspgrad'.")
- return -1
-
- # Perform NMF with the specified update rules.
- if seed:
- s = int(seed.group(1))
- model = ScikitNMF(n_components=2, init='random', max_iter = m, tol = e, random_state = s)
- else:
- model = ScikitNMF(n_components=2, init='nndsvdar', max_iter = m, tol = e)
-
- W = model.fit_transform(data)
- H = model.components_
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunNMFScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ with totalTimer:
+ # Gather parameters.
+ seed = re.search("-s (\d+)", options)
+ maxIterations = re.search("-m (\d+)", options)
+ minResidue = re.search("-e ([^\s]+)", options)
+ updateRule = re.search("-u ([^\s]+)", options)
+
+ m = 10000 if not maxIterations else int(maxIterations.group(1))
+ e = 1e-05 if not maxIterations else int(minResidue.group(1))
+
+ if updateRule:
+ u = updateRule.group(1)
+ if u != 'alspgrad':
+ Log.Fatal("Invalid update rules ('" + u + "'); must be 'alspgrad'.")
+ return -1
+
+ # Perform NMF with the specified update rules.
+ if seed:
+ s = int(seed.group(1))
+ model = ScikitNMF(n_components=2, init='random', max_iter = m, tol = e, random_state = s)
+ else:
+ model = ScikitNMF(n_components=2, init='nndsvdar', max_iter = m, tol = e)
+
+ W = model.fit_transform(data)
+ H = model.components_
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunNMFScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Non-negative Matrix Factorization. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/pca.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Principal Components Analysis benchmark instance.
@param dataset - Input dataset to perform PCA on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Principal Components Analysis.
@@ -44,34 +46,43 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def PCAScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- Log.Info("Loading dataset", self.verbose)
- data = np.genfromtxt(self.dataset, delimiter=',')
-
- with totalTimer:
- # Find out what dimension we want.
- match = re.search('-d (\d+)', options)
-
- if not match:
- k = data.shape[1]
- else:
- k = int(match.group(1))
- if (k > data.shape[1]):
- Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
- + "than existing dimensionality (" + str(data.shape[1]) + ")!")
- return -1
-
- # Get the options for running PCA.
- s = True if options.find("-s") > -1 else False
-
- # Perform PCA.
- pca = decomposition.PCA(n_components = k, whiten = s)
- pca.fit(data)
- score = pca.transform(data)
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunPCAScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ Log.Info("Loading dataset", self.verbose)
+ data = np.genfromtxt(self.dataset, delimiter=',')
+
+ with totalTimer:
+ # Find out what dimension we want.
+ match = re.search('-d (\d+)', options)
+
+ if not match:
+ k = data.shape[1]
+ else:
+ k = int(match.group(1))
+ if (k > data.shape[1]):
+ Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
+ + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+ return -1
+
+ # Get the options for running PCA.
+ s = True if options.find("-s") > -1 else False
+
+ # Perform PCA.
+ pca = decomposition.PCA(n_components = k, whiten = s)
+ pca.fit(data)
+ score = pca.transform(data)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunPCAScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Principal Components Analysis. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/scikit/sparse_coding.py Fri Jul 26 09:05:18 2013
@@ -31,11 +31,13 @@
Create the Sparse Coding benchmark instance.
@param dataset - Input dataset to perform Sparse Coding on.
+ @param timeout - The time until the timeout. Default no timeout.
@param verbose - Display informational messages.
'''
- def __init__(self, dataset, verbose=True):
+ def __init__(self, dataset, timeout=0, verbose=True):
self.verbose = verbose
self.dataset = dataset
+ self.timeout = timeout
'''
Use the scikit libary to implement Sparse Coding.
@@ -44,23 +46,32 @@
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
def SparseCodingScikit(self, options):
- totalTimer = Timer()
- # Load input dataset.
- inputData = np.genfromtxt(self.dataset[0], delimiter=',')
- dictionary = np.genfromtxt(self.dataset[1], delimiter=',')
-
- # Get all the parameters.
- l = re.search("-l (\d+)", options)
- l = 0 if not l else int(l.group(1))
-
- with totalTimer:
- # Perform Sparse Coding.
- model = SparseCoder(dictionary=dictionary, transform_algorithm='lars',
- transform_alpha=l)
- code = model.transform(inputData)
-
- return totalTimer.ElapsedTime()
+ @timeout(self.timeout, os.strerror(errno.ETIMEDOUT))
+ def RunSparseCodingScikit():
+ totalTimer = Timer()
+
+ # Load input dataset.
+ inputData = np.genfromtxt(self.dataset[0], delimiter=',')
+ dictionary = np.genfromtxt(self.dataset[1], delimiter=',')
+
+ # Get all the parameters.
+ l = re.search("-l (\d+)", options)
+ l = 0 if not l else int(l.group(1))
+
+ with totalTimer:
+ # Perform Sparse Coding.
+ model = SparseCoder(dictionary=dictionary, transform_algorithm='lars',
+ transform_alpha=l)
+ code = model.transform(inputData)
+
+ return totalTimer.ElapsedTime()
+
+ try:
+ return RunSparseCodingScikit()
+ except TimeoutError as e:
+ Log.Warn("Script timed out after " + str(self.timeout) + " seconds")
+ return -2
'''
Perform Sparse Coding. If the method has been successfully completed
More information about the mlpack-svn
mailing list