Improved error handling in run_exps.py.

author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-04-12 11:30:27 -0400
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-04-12 11:30:27 -0400
commit: 09bc409657606a37346d82ab1e4c44a165bd3541 (patch)
tree: 72c569f69f37acafdc89fde4724bde7b373ef8f9
parent: 384e322f974534c1c734db144633e3c3e002b1f8 (diff)
6 files changed, 114 insertions, 68 deletions
diff --git a/common.py b/common.py
index 4920ec8..a2c6224 100644
--- a/common.py
+++ b/common.py
@@ -26,15 +26,17 @@ def get_executable_hint(prog, hint, optional=False):
    '''Search for @prog in system PATH. Print @hint if no binary is found.
    Die if not @optional.'''
    try:
-        prog = get_executable(prog)
+        full_path = get_executable(prog)
    except IOError:
        if not optional:
            sys.stderr.write(("Cannot find executable '%s' in PATH. This is " +\
                              "a part of '%s' which should be added to PATH.\n")\
                             % (prog, hint))
            sys.exit(1)
+        else:
+            full_path = None
-    return prog
+    return full_path
 def get_config_option(option):
    '''Search for @option in installed kernel config (if present).
diff --git a/parse/sched.py b/parse/sched.py
index 2da0149..147a2e5 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -161,6 +161,7 @@ def extract_sched_data(result, data_dir, work_dir):
        cmd_arr = [conf.BINS['st_show']]
        cmd_arr.extend(bins)
        with open(output_file, "w") as f:
+            print("calling %s" % cmd_arr)
            subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
    task_dict = defaultdict(lambda :
diff --git a/run/executable/executable.py b/run/executable/executable.py
index 02e35ae..263e305 100644
--- a/run/executable/executable.py
+++ b/run/executable/executable.py
@@ -1,13 +1,13 @@
 import sys
 import subprocess
 import signal
-from common import is_executable
+from common import get_executable
 class Executable(object):
    '''Parent object that represents an executable for use in task-sets.'''
    def __init__(self, exec_file, extra_args=None, stdout_file = None, stderr_file = None):
-        self.exec_file = exec_file
+        self.exec_file = get_executable(exec_file)
        self.cwd = None
        self.stdout_file = stdout_file
        self.stderr_file = stderr_file
@@ -18,7 +18,7 @@ class Executable(object):
        else:
            self.extra_args = [str(a) for a in list(extra_args)] # make a duplicate
-        if not is_executable(self.exec_file):
+        if not self.exec_file:
            raise Exception("Not executable ? : %s" % self.exec_file)
    def __del__(self):
diff --git a/run/experiment.py b/run/experiment.py
index e5811f8..ff0e9f3 100644
--- a/run/experiment.py
+++ b/run/experiment.py
@@ -1,7 +1,9 @@
+import common as com
 import os
 import time
 import run.litmus_util as lu
 import shutil as sh
+import traceback
 from operator import methodcaller
 class ExperimentException(Exception):
@@ -15,17 +17,12 @@ class ExperimentDone(ExperimentException):
    def __str__(self):
        return "Experiment finished already: %d" % self.name
-class ExperimentInterrupted(ExperimentException):
-    '''Raised when an experiment appears to be interrupted (partial results).'''
-    def __str__(self):
-        return "Experiment was interrupted in progress: %d" % self.name
 class ExperimentFailed(ExperimentException):
    def __str__(self):
        return "Experiment failed during execution: %d" % self.name
+class SystemCorrupted(Exception):
+    pass
 class Experiment(object):
    '''Execute one task-set and save the results. Experiments have unique IDs.'''
@@ -44,6 +41,8 @@ class Experiment(object):
        self.exec_out = None
        self.exec_err = None
+        self.task_batch = com.num_cpus()
        self.__make_dirs()
        self.__assign_executable_cwds()
        self.__setup_tracers(tracer_types)
@@ -84,65 +83,67 @@ class Experiment(object):
            executable.cwd = self.working_dir
        map(assign_cwd, self.executables)
-    def __run_tasks(self):
+    def __kill_all(self):
-        already_waiting = lu.waiting_tasks()
+        # Give time for whatever failed to finish failing
+        time.sleep(2)
+        released = lu.release_tasks()
+        self.log("Re-released %d tasks" % released)
-        if already_waiting:
+        time.sleep(5)
-            self.log("Already %d tasks waiting for release!")
-            self.log("Experiment will fail if any of these tasks are released.")
-        self.log("Starting the programs")
+        self.log("Killing all tasks")
        for e in self.executables:
            try:
+                e.kill()
+            except:
+                pass
+        time.sleep(2)
+    def __run_tasks(self):
+        self.log("Starting %d tasks" % len(self.executables))
+        for i,e in enumerate(self.executables):
+            try:
                e.execute()
            except:
                raise Exception("Executable failed: %s" % e)
        self.log("Sleeping until tasks are ready for release...")
-        start = time.clock()
+        start = time.time()
-        while (lu.waiting_tasks() - already_waiting) < len(self.executables):
+        while lu.waiting_tasks() < len(self.executables):
-            if time.clock() - start > 30.0:
+            if time.time() - start > 30.0:
-                raise Exception("Too much time has passed waiting for tasks!")
+                s = "waiting: %d, submitted: %d" %\
+                  (lu.waiting_tasks(), len(self.executables))
+                raise Exception("Too much time spent waiting for tasks! %s" % s)
            time.sleep(1)
        # Exact tracers (like overheads) must be started right after release or
        # measurements will be full of irrelevant records
        self.log("Starting %d released tracers" % len(self.exact_tracers))
        map(methodcaller('start_tracing'), self.exact_tracers)
+        time.sleep(1)
-        self.log("Releasing %d tasks" % len(self.executables))
+        try:
-        released = lu.release_tasks()
+            self.log("Releasing %d tasks" % len(self.executables))
-        ret = True
-        if released != len(self.executables):
-            # Some tasks failed to release, kill all tasks and fail
-            # Need to re-release non-released tasks before we can kill them though
-            self.log("Failed to release {} tasks! Re-releasing and killing".format(
-                len(self.executables) - released, len(self.executables)))
-            time.sleep(5)
            released = lu.release_tasks()
-            self.log("Re-released %d tasks" % released)
+            if released != len(self.executables):
+                # Some tasks failed to release, kill all tasks and fail
-            time.sleep(5)
+                # Need to release non-released tasks before they can be killed
+                raise Exception("Released %s tasks, expected %s tasks" %
-            self.log("Killing all tasks")
+                                (released, len(self.executables)))
-            map(methodcaller('kill'), self.executables)
-            ret = False
+            self.log("Waiting for program to finish...")
+            for e in self.executables:
+                if not e.wait():
+                    raise Exception("Executable %s failed to complete!" % e)
-        self.log("Waiting for program to finish...")
+        finally:
-        for e in self.executables:
+            # And these must be stopped here for the same reason
-            if not e.wait():
+            self.log("Stopping exact tracers")
-                ret = False
+            map(methodcaller('stop_tracing'), self.exact_tracers)
-        # And these must be stopped here for the same reason
-        self.log("Stopping exact tracers")
-        map(methodcaller('stop_tracing'), self.exact_tracers)
-        if not ret:
-            raise ExperimentFailed(self.name)
    def __save_results(self):
        os.rename(self.working_dir, self.finished_dir)
@@ -151,29 +152,48 @@ class Experiment(object):
        print("[Exp %s]: %s" % (self.name, msg))
    def run_exp(self):
+        self.__check_system()
        succ = False
        try:
-            self.setup()
+            self.__setup()
            try:
                self.__run_tasks()
                self.log("Saving results in %s" % self.finished_dir)
                succ = True
+            except:
+                traceback.print_exc()
+                self.__kill_all()
+                raise ExperimentFailed(self.name)
            finally:
-                self.teardown()
+                self.__teardown()
        finally:
            self.log("Switching to Linux scheduler")
-            try:
-                lu.switch_scheduler("Linux")
+            # Give the tasks 10 seconds to finish before bailing
-            except:
+            start = time.time()
-                self.log("Failed to switch back to Linux.")
+            while lu.all_tasks() > 0:
+                if time.time() - start < 10.0:
+                    raise SystemCorrupted("%d tasks still running!" %
+                                          lu.all_tasks())
+            lu.switch_scheduler("Linux")
        if succ:
            self.__save_results()
            self.log("Experiment done!")
+    def __check_system(self):
+        running = lu.all_tasks()
+        if running:
+            raise SystemCorrupted("%d tasks already running!" % running)
+        sched = lu.scheduler()
+        if sched != "Linux":
+            raise SystemCorrupted("Scheduler is %s, should be Linux" % sched)
-    def setup(self):
+    def __setup(self):
        self.log("Writing %d proc entries" % len(self.proc_entries))
        map(methodcaller('write_proc'), self.proc_entries)
@@ -190,7 +210,7 @@ class Experiment(object):
            executable.stderr_file = self.exec_err
        map(set_out, self.executables)
-    def teardown(self):
+    def __teardown(self):
        self.exec_out and self.exec_out.close()
        self.exec_err and self.exec_err.close()
diff --git a/run/litmus_util.py b/run/litmus_util.py
index 4709b66..70da262 100644
--- a/run/litmus_util.py
+++ b/run/litmus_util.py
@@ -3,6 +3,11 @@ import time
 import subprocess
 import config.config as conf
+def scheduler():
+    with open('/proc/litmus/active_plugin', 'r') as active_plugin:
+        cur_plugin = active_plugin.read().strip()
+    return cur_plugin
 def switch_scheduler(switch_to_in):
    '''Switch the scheduler to whatever is passed in.
@@ -18,11 +23,10 @@ def switch_scheduler(switch_to_in):
    # it takes a bit to do the switch, sleep an arbitrary amount of time
    time.sleep(2)
-    with open('/proc/litmus/active_plugin', 'r') as active_plugin:
+    cur_plugin = scheduler()
-        cur_plugin = active_plugin.read().strip()
    if switch_to != cur_plugin:
-        raise Exception("Could not switch to '%s' (check dmesg)"  % switch_to)
+        raise Exception("Could not switch to '%s' (check dmesg), current: %s" %\
+                        (switch_to, cur_plugin))
 def waiting_tasks():
    reg = re.compile(r'^ready.*?(?P<READY>\d+)$', re.M)
@@ -35,6 +39,17 @@ def waiting_tasks():
    return 0 if not ready else int(ready)
+def all_tasks():
+    reg = re.compile(r'^real-time.*?(?P<TASKS>\d+)$', re.M)
+    with open('/proc/litmus/stats', 'r') as f:
+        data = f.read()
+    # Ignore if no tasks are waiting for release
+    match = re.search(reg, data)
+    ready = match.group("TASKS")
+    return 0 if not ready else int(ready)
 def release_tasks():
    try:
        data = subprocess.check_output([conf.BINS['release']])
diff --git a/run_exps.py b/run_exps.py
index b9cf88e..6531415 100755
--- a/run_exps.py
+++ b/run_exps.py
@@ -8,12 +8,11 @@ import re
 import shutil
 import sys
 import run.tracer as trace
-import traceback
 from collections import namedtuple
 from optparse import OptionParser
 from run.executable.executable import Executable
-from run.experiment import Experiment,ExperimentDone
+from run.experiment import Experiment,ExperimentDone,ExperimentFailed,SystemCorrupted
 from run.proc_entry import ProcEntry
 '''Customizable experiment parameters'''
@@ -330,6 +329,7 @@ def main():
        created = True
        os.mkdir(out_base)
+    ran  = 0
    done = 0
    succ = 0
    failed = 0
@@ -362,15 +362,23 @@ def main():
            invalid += 1
            print("Invalid environment for experiment '%s'" % exp)
            print(e)
-        except:
+        except KeyboardInterrupt:
+            print("Keyboard interrupt, quitting")
+            break
+        except SystemCorrupted as e:
+            print("System is corrupted! Fix state before continuing.")
+            print(e)
+            break
+        except ExperimentFailed:
            print("Failed experiment %s" % exp)
-            traceback.print_exc()
            failed += 1
+        ran += 1
    if not os.listdir(out_base) and created and not succ:
        os.rmdir(out_base)
-    message = "Experiments run:\t%d" % len(args) +\
+    message = "Experiments ran:\t%d of %d" % (ran, len(args)) +\
      "\n  Successful:\t\t%d" % succ +\
      "\n  Failed:\t\t%d" % failed +\
      "\n  Already Done:\t\t%d" % done +\
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-04-12 11:30:27 -0400
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-04-12 11:30:27 -0400
commit	09bc409657606a37346d82ab1e4c44a165bd3541 (patch)
tree	72c569f69f37acafdc89fde4724bde7b373ef8f9
parent	384e322f974534c1c734db144633e3c3e002b1f8 (diff)

diff --git a/common.py b/common.py index 4920ec8..a2c6224 100644 --- a/common.py +++ b/common.py
@@ -26,15 +26,17 @@ def get_executable_hint(prog, hint, optional=False):
26	'''Search for @prog in system PATH. Print @hint if no binary is found.	26	'''Search for @prog in system PATH. Print @hint if no binary is found.
27	Die if not @optional.'''	27	Die if not @optional.'''
28	try:	28	try:
29	prog = get_executable(prog)	29	full_path = get_executable(prog)
30	except IOError:	30	except IOError:
31	if not optional:	31	if not optional:
32	sys.stderr.write(("Cannot find executable '%s' in PATH. This is " +\	32	sys.stderr.write(("Cannot find executable '%s' in PATH. This is " +\
33	"a part of '%s' which should be added to PATH.\n")\	33	"a part of '%s' which should be added to PATH.\n")\
34	% (prog, hint))	34	% (prog, hint))
35	sys.exit(1)	35	sys.exit(1)
		36	else:
		37	full_path = None
36		38
37	return prog	39	return full_path
38		40
39	def get_config_option(option):	41	def get_config_option(option):
40	'''Search for @option in installed kernel config (if present).	42	'''Search for @option in installed kernel config (if present).


diff --git a/parse/sched.py b/parse/sched.py index 2da0149..147a2e5 100644 --- a/parse/sched.py +++ b/parse/sched.py
@@ -161,6 +161,7 @@ def extract_sched_data(result, data_dir, work_dir):
161	cmd_arr = [conf.BINS['st_show']]	161	cmd_arr = [conf.BINS['st_show']]
162	cmd_arr.extend(bins)	162	cmd_arr.extend(bins)
163	with open(output_file, "w") as f:	163	with open(output_file, "w") as f:
		164	print("calling %s" % cmd_arr)
164	subprocess.call(cmd_arr, cwd=data_dir, stdout=f)	165	subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
165		166
166	task_dict = defaultdict(lambda :	167	task_dict = defaultdict(lambda :


diff --git a/run/executable/executable.py b/run/executable/executable.py index 02e35ae..263e305 100644 --- a/run/executable/executable.py +++ b/run/executable/executable.py
@@ -1,13 +1,13 @@
1	import sys	1	import sys
2	import subprocess	2	import subprocess
3	import signal	3	import signal
4	from common import is_executable	4	from common import get_executable
5		5
6	class Executable(object):	6	class Executable(object):
7	'''Parent object that represents an executable for use in task-sets.'''	7	'''Parent object that represents an executable for use in task-sets.'''
8		8
9	def __init__(self, exec_file, extra_args=None, stdout_file = None, stderr_file = None):	9	def __init__(self, exec_file, extra_args=None, stdout_file = None, stderr_file = None):
10	self.exec_file = exec_file	10	self.exec_file = get_executable(exec_file)
11	self.cwd = None	11	self.cwd = None
12	self.stdout_file = stdout_file	12	self.stdout_file = stdout_file
13	self.stderr_file = stderr_file	13	self.stderr_file = stderr_file
@@ -18,7 +18,7 @@ class Executable(object):
18	else:	18	else:
19	self.extra_args = [str(a) for a in list(extra_args)] # make a duplicate	19	self.extra_args = [str(a) for a in list(extra_args)] # make a duplicate
20		20
21	if not is_executable(self.exec_file):	21	if not self.exec_file:
22	raise Exception("Not executable ? : %s" % self.exec_file)	22	raise Exception("Not executable ? : %s" % self.exec_file)
23		23
24	def __del__(self):	24	def __del__(self):


diff --git a/run/experiment.py b/run/experiment.py index e5811f8..ff0e9f3 100644 --- a/run/experiment.py +++ b/run/experiment.py
@@ -1,7 +1,9 @@
		1	import common as com
1	import os	2	import os
2	import time	3	import time
3	import run.litmus_util as lu	4	import run.litmus_util as lu
4	import shutil as sh	5	import shutil as sh
		6	import traceback
5	from operator import methodcaller	7	from operator import methodcaller
6		8
7	class ExperimentException(Exception):	9	class ExperimentException(Exception):
@@ -15,17 +17,12 @@ class ExperimentDone(ExperimentException):
15	def __str__(self):	17	def __str__(self):
16	return "Experiment finished already: %d" % self.name	18	return "Experiment finished already: %d" % self.name
17		19
18
19	class ExperimentInterrupted(ExperimentException):
20	'''Raised when an experiment appears to be interrupted (partial results).'''
21	def __str__(self):
22	return "Experiment was interrupted in progress: %d" % self.name
23
24
25	class ExperimentFailed(ExperimentException):	20	class ExperimentFailed(ExperimentException):
26	def __str__(self):	21	def __str__(self):
27	return "Experiment failed during execution: %d" % self.name	22	return "Experiment failed during execution: %d" % self.name
28		23
		24	class SystemCorrupted(Exception):
		25	pass
29		26
30	class Experiment(object):	27	class Experiment(object):
31	'''Execute one task-set and save the results. Experiments have unique IDs.'''	28	'''Execute one task-set and save the results. Experiments have unique IDs.'''
@@ -44,6 +41,8 @@ class Experiment(object):
44	self.exec_out = None	41	self.exec_out = None
45	self.exec_err = None	42	self.exec_err = None
46		43
		44	self.task_batch = com.num_cpus()
		45
47	self.__make_dirs()	46	self.__make_dirs()
48	self.__assign_executable_cwds()	47	self.__assign_executable_cwds()
49	self.__setup_tracers(tracer_types)	48	self.__setup_tracers(tracer_types)
@@ -84,65 +83,67 @@ class Experiment(object):
84	executable.cwd = self.working_dir	83	executable.cwd = self.working_dir
85	map(assign_cwd, self.executables)	84	map(assign_cwd, self.executables)
86		85
87	def __run_tasks(self):	86	def __kill_all(self):
88	already_waiting = lu.waiting_tasks()	87	# Give time for whatever failed to finish failing
		88	time.sleep(2)
		89
		90	released = lu.release_tasks()
		91	self.log("Re-released %d tasks" % released)
89		92
90	if already_waiting:	93	time.sleep(5)
91	self.log("Already %d tasks waiting for release!")
92	self.log("Experiment will fail if any of these tasks are released.")
93		94
94	self.log("Starting the programs")	95	self.log("Killing all tasks")
95	for e in self.executables:	96	for e in self.executables:
96	try:	97	try:
		98	e.kill()
		99	except:
		100	pass
		101
		102	time.sleep(2)
		103
		104	def __run_tasks(self):
		105	self.log("Starting %d tasks" % len(self.executables))
		106
		107	for i,e in enumerate(self.executables):
		108	try:
97	e.execute()	109	e.execute()
98	except:	110	except:
99	raise Exception("Executable failed: %s" % e)	111	raise Exception("Executable failed: %s" % e)
100		112
101	self.log("Sleeping until tasks are ready for release...")	113	self.log("Sleeping until tasks are ready for release...")
102	start = time.clock()	114	start = time.time()
103	while (lu.waiting_tasks() - already_waiting) < len(self.executables):	115	while lu.waiting_tasks() < len(self.executables):
104	if time.clock() - start > 30.0:	116	if time.time() - start > 30.0:
105	raise Exception("Too much time has passed waiting for tasks!")	117	s = "waiting: %d, submitted: %d" %\
		118	(lu.waiting_tasks(), len(self.executables))
		119	raise Exception("Too much time spent waiting for tasks! %s" % s)
106	time.sleep(1)	120	time.sleep(1)
107		121
108	# Exact tracers (like overheads) must be started right after release or	122	# Exact tracers (like overheads) must be started right after release or
109	# measurements will be full of irrelevant records	123	# measurements will be full of irrelevant records
110	self.log("Starting %d released tracers" % len(self.exact_tracers))	124	self.log("Starting %d released tracers" % len(self.exact_tracers))
111	map(methodcaller('start_tracing'), self.exact_tracers)	125	map(methodcaller('start_tracing'), self.exact_tracers)
		126	time.sleep(1)
112		127
113	self.log("Releasing %d tasks" % len(self.executables))	128	try:
114	released = lu.release_tasks()	129	self.log("Releasing %d tasks" % len(self.executables))
115
116	ret = True
117	if released != len(self.executables):
118	# Some tasks failed to release, kill all tasks and fail
119	# Need to re-release non-released tasks before we can kill them though
120	self.log("Failed to release {} tasks! Re-releasing and killing".format(
121	len(self.executables) - released, len(self.executables)))
122	time.sleep(5)
123
124	released = lu.release_tasks()	130	released = lu.release_tasks()
125		131
126	self.log("Re-released %d tasks" % released)	132	if released != len(self.executables):
127		133	# Some tasks failed to release, kill all tasks and fail
128	time.sleep(5)	134	# Need to release non-released tasks before they can be killed
129		135	raise Exception("Released %s tasks, expected %s tasks" %
130	self.log("Killing all tasks")	136	(released, len(self.executables)))
131	map(methodcaller('kill'), self.executables)
132		137
133	ret = False	138	self.log("Waiting for program to finish...")
		139	for e in self.executables:
		140	if not e.wait():
		141	raise Exception("Executable %s failed to complete!" % e)
134		142
135	self.log("Waiting for program to finish...")	143	finally:
136	for e in self.executables:	144	# And these must be stopped here for the same reason
137	if not e.wait():	145	self.log("Stopping exact tracers")
138	ret = False	146	map(methodcaller('stop_tracing'), self.exact_tracers)
139
140	# And these must be stopped here for the same reason
141	self.log("Stopping exact tracers")
142	map(methodcaller('stop_tracing'), self.exact_tracers)
143
144	if not ret:
145	raise ExperimentFailed(self.name)
146		147
147	def __save_results(self):	148	def __save_results(self):
148	os.rename(self.working_dir, self.finished_dir)	149	os.rename(self.working_dir, self.finished_dir)
@@ -151,29 +152,48 @@ class Experiment(object):
151	print("[Exp %s]: %s" % (self.name, msg))	152	print("[Exp %s]: %s" % (self.name, msg))
152		153
153	def run_exp(self):	154	def run_exp(self):
		155	self.__check_system()
		156
154	succ = False	157	succ = False
155	try:	158	try:
156	self.setup()	159	self.__setup()
157		160
158	try:	161	try:
159	self.__run_tasks()	162	self.__run_tasks()
160	self.log("Saving results in %s" % self.finished_dir)	163	self.log("Saving results in %s" % self.finished_dir)
161	succ = True	164	succ = True
		165	except:
		166	traceback.print_exc()
		167	self.__kill_all()
		168	raise ExperimentFailed(self.name)
162	finally:	169	finally:
163	self.teardown()	170	self.__teardown()
164	finally:	171	finally:
165	self.log("Switching to Linux scheduler")	172	self.log("Switching to Linux scheduler")
166	try:	173
167	lu.switch_scheduler("Linux")	174	# Give the tasks 10 seconds to finish before bailing
168	except:	175	start = time.time()
169	self.log("Failed to switch back to Linux.")	176	while lu.all_tasks() > 0:
		177	if time.time() - start < 10.0:
		178	raise SystemCorrupted("%d tasks still running!" %
		179	lu.all_tasks())
		180
		181	lu.switch_scheduler("Linux")
170		182
171	if succ:	183	if succ:
172	self.__save_results()	184	self.__save_results()
173	self.log("Experiment done!")	185	self.log("Experiment done!")
174		186
		187	def __check_system(self):
		188	running = lu.all_tasks()
		189	if running:
		190	raise SystemCorrupted("%d tasks already running!" % running)
		191
		192	sched = lu.scheduler()
		193	if sched != "Linux":
		194	raise SystemCorrupted("Scheduler is %s, should be Linux" % sched)
175		195
176	def setup(self):	196	def __setup(self):
177	self.log("Writing %d proc entries" % len(self.proc_entries))	197	self.log("Writing %d proc entries" % len(self.proc_entries))
178	map(methodcaller('write_proc'), self.proc_entries)	198	map(methodcaller('write_proc'), self.proc_entries)
179		199
@@ -190,7 +210,7 @@ class Experiment(object):
190	executable.stderr_file = self.exec_err	210	executable.stderr_file = self.exec_err
191	map(set_out, self.executables)	211	map(set_out, self.executables)
192		212
193	def teardown(self):	213	def __teardown(self):
194	self.exec_out and self.exec_out.close()	214	self.exec_out and self.exec_out.close()
195	self.exec_err and self.exec_err.close()	215	self.exec_err and self.exec_err.close()
196		216


diff --git a/run/litmus_util.py b/run/litmus_util.py index 4709b66..70da262 100644 --- a/run/litmus_util.py +++ b/run/litmus_util.py
@@ -3,6 +3,11 @@ import time
3	import subprocess	3	import subprocess
4	import config.config as conf	4	import config.config as conf
5		5
		6	def scheduler():
		7	with open('/proc/litmus/active_plugin', 'r') as active_plugin:
		8	cur_plugin = active_plugin.read().strip()
		9	return cur_plugin
		10
6	def switch_scheduler(switch_to_in):	11	def switch_scheduler(switch_to_in):
7	'''Switch the scheduler to whatever is passed in.	12	'''Switch the scheduler to whatever is passed in.
8		13
@@ -18,11 +23,10 @@ def switch_scheduler(switch_to_in):
18	# it takes a bit to do the switch, sleep an arbitrary amount of time	23	# it takes a bit to do the switch, sleep an arbitrary amount of time
19	time.sleep(2)	24	time.sleep(2)
20		25
21	with open('/proc/litmus/active_plugin', 'r') as active_plugin:	26	cur_plugin = scheduler()
22	cur_plugin = active_plugin.read().strip()
23
24	if switch_to != cur_plugin:	27	if switch_to != cur_plugin:
25	raise Exception("Could not switch to '%s' (check dmesg)" % switch_to)	28	raise Exception("Could not switch to '%s' (check dmesg), current: %s" %\
		29	(switch_to, cur_plugin))
26		30
27	def waiting_tasks():	31	def waiting_tasks():
28	reg = re.compile(r'^ready.*?(?P<READY>\d+)$', re.M)	32	reg = re.compile(r'^ready.*?(?P<READY>\d+)$', re.M)
@@ -35,6 +39,17 @@ def waiting_tasks():
35		39
36	return 0 if not ready else int(ready)	40	return 0 if not ready else int(ready)
37		41
		42	def all_tasks():
		43	reg = re.compile(r'^real-time.*?(?P<TASKS>\d+)$', re.M)
		44	with open('/proc/litmus/stats', 'r') as f:
		45	data = f.read()
		46
		47	# Ignore if no tasks are waiting for release
		48	match = re.search(reg, data)
		49	ready = match.group("TASKS")
		50
		51	return 0 if not ready else int(ready)
		52
38	def release_tasks():	53	def release_tasks():
39	try:	54	try:
40	data = subprocess.check_output([conf.BINS['release']])	55	data = subprocess.check_output([conf.BINS['release']])


diff --git a/run_exps.py b/run_exps.py index b9cf88e..6531415 100755 --- a/run_exps.py +++ b/run_exps.py
@@ -8,12 +8,11 @@ import re
8	import shutil	8	import shutil
9	import sys	9	import sys
10	import run.tracer as trace	10	import run.tracer as trace
11	import traceback
12		11
13	from collections import namedtuple	12	from collections import namedtuple
14	from optparse import OptionParser	13	from optparse import OptionParser
15	from run.executable.executable import Executable	14	from run.executable.executable import Executable
16	from run.experiment import Experiment,ExperimentDone	15	from run.experiment import Experiment,ExperimentDone,ExperimentFailed,SystemCorrupted
17	from run.proc_entry import ProcEntry	16	from run.proc_entry import ProcEntry
18		17
19	'''Customizable experiment parameters'''	18	'''Customizable experiment parameters'''
@@ -330,6 +329,7 @@ def main():
330	created = True	329	created = True
331	os.mkdir(out_base)	330	os.mkdir(out_base)
332		331
		332	ran = 0
333	done = 0	333	done = 0
334	succ = 0	334	succ = 0
335	failed = 0	335	failed = 0
@@ -362,15 +362,23 @@ def main():
362	invalid += 1	362	invalid += 1
363	print("Invalid environment for experiment '%s'" % exp)	363	print("Invalid environment for experiment '%s'" % exp)
364	print(e)	364	print(e)
365	except:	365	except KeyboardInterrupt:
		366	print("Keyboard interrupt, quitting")
		367	break
		368	except SystemCorrupted as e:
		369	print("System is corrupted! Fix state before continuing.")
		370	print(e)
		371	break
		372	except ExperimentFailed:
366	print("Failed experiment %s" % exp)	373	print("Failed experiment %s" % exp)
367	traceback.print_exc()
368	failed += 1	374	failed += 1
369		375
		376	ran += 1
		377
370	if not os.listdir(out_base) and created and not succ:	378	if not os.listdir(out_base) and created and not succ:
371	os.rmdir(out_base)	379	os.rmdir(out_base)
372		380
373	message = "Experiments run:\t%d" % len(args) +\	381	message = "Experiments ran:\t%d of %d" % (ran, len(args)) +\
374	"\n Successful:\t\t%d" % succ +\	382	"\n Successful:\t\t%d" % succ +\
375	"\n Failed:\t\t%d" % failed +\	383	"\n Failed:\t\t%d" % failed +\
376	"\n Already Done:\t\t%d" % done +\	384	"\n Already Done:\t\t%d" % done +\