aboutsummaryrefslogtreecommitdiffstats
path: root/run/experiment.py
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-04-29 16:50:23 -0400
committerJonathan Herman <hermanjl@cs.unc.edu>2013-04-29 16:50:23 -0400
commit7e32c3915e7ea27d2533d99a22fa53ef923198f5 (patch)
tree5bd8d8a3ed6861e039a683f47a953d2f7a22d8b1 /run/experiment.py
parent7545402506aa76261e18d85af585ff0ac1cf05c1 (diff)
Added run_exps.py option to --retry failed experiments.
If the retry flag is specified, failed experiments will be re-run after all other experiments have run. They can be re-run at most 5 times. This commit required a refactoring of run_exps.py to clean up the main experiment running loop.
Diffstat (limited to 'run/experiment.py')
-rw-r--r--run/experiment.py50
1 files changed, 31 insertions, 19 deletions
diff --git a/run/experiment.py b/run/experiment.py
index b0e46b6..9a70414 100644
--- a/run/experiment.py
+++ b/run/experiment.py
@@ -2,6 +2,7 @@ import os
2import time 2import time
3import run.litmus_util as lu 3import run.litmus_util as lu
4import shutil as sh 4import shutil as sh
5
5from operator import methodcaller 6from operator import methodcaller
6 7
7class ExperimentException(Exception): 8class ExperimentException(Exception):
@@ -69,21 +70,24 @@ class Experiment(object):
69 executable.cwd = self.working_dir 70 executable.cwd = self.working_dir
70 map(assign_cwd, self.executables) 71 map(assign_cwd, self.executables)
71 72
72 def __kill_all(self): 73 def __try_kill_all(self):
73 if lu.waiting_tasks(): 74 try:
74 released = lu.release_tasks() 75 if lu.waiting_tasks():
75 self.log("Re-released %d tasks" % released) 76 released = lu.release_tasks()
77 self.log("Re-released %d tasks" % released)
76 78
77 time.sleep(1) 79 time.sleep(1)
78 80
79 self.log("Killing all tasks") 81 self.log("Killing all tasks")
80 for e in self.executables: 82 for e in self.executables:
81 try: 83 try:
82 e.kill() 84 e.kill()
83 except: 85 except:
84 pass 86 pass
85 87
86 time.sleep(1) 88 time.sleep(1)
89 except:
90 self.log("Failed to kill all tasks.")
87 91
88 def __strip_path(self, path): 92 def __strip_path(self, path):
89 '''Shorten path to something more readable.''' 93 '''Shorten path to something more readable.'''
@@ -138,7 +142,7 @@ class Experiment(object):
138 now_ready = lu.waiting_tasks() 142 now_ready = lu.waiting_tasks()
139 if now_ready != num_ready: 143 if now_ready != num_ready:
140 wait_start = time.time() 144 wait_start = time.time()
141 num_ready = lu.now_ready 145 num_ready = now_ready
142 146
143 def __run_tasks(self): 147 def __run_tasks(self):
144 self.log("Starting %d tasks" % len(self.executables)) 148 self.log("Starting %d tasks" % len(self.executables))
@@ -185,6 +189,7 @@ class Experiment(object):
185 189
186 sched = lu.scheduler() 190 sched = lu.scheduler()
187 if sched != "Linux": 191 if sched != "Linux":
192 self.log("Switching back to Linux scheduler")
188 try: 193 try:
189 lu.switch_scheduler("Linux") 194 lu.switch_scheduler("Linux")
190 except: 195 except:
@@ -229,6 +234,8 @@ class Experiment(object):
229 self.log("Stopping regular tracers") 234 self.log("Stopping regular tracers")
230 map(methodcaller('stop_tracing'), self.regular_tracers) 235 map(methodcaller('stop_tracing'), self.regular_tracers)
231 236
237 os.system('sync')
238
232 def log(self, msg): 239 def log(self, msg):
233 print("[Exp %s]: %s" % (self.name, msg)) 240 print("[Exp %s]: %s" % (self.name, msg))
234 241
@@ -236,6 +243,7 @@ class Experiment(object):
236 self.__to_linux() 243 self.__to_linux()
237 244
238 succ = False 245 succ = False
246 exception = None
239 try: 247 try:
240 self.__setup() 248 self.__setup()
241 249
@@ -244,16 +252,20 @@ class Experiment(object):
244 self.log("Saving results in %s" % self.finished_dir) 252 self.log("Saving results in %s" % self.finished_dir)
245 succ = True 253 succ = True
246 except Exception as e: 254 except Exception as e:
255 exception = e
256
247 # Give time for whatever failed to finish failing 257 # Give time for whatever failed to finish failing
248 time.sleep(2) 258 time.sleep(2)
249 self.__kill_all()
250 259
251 raise e 260 self.__try_kill_all()
252 finally:
253 self.__teardown()
254 finally: 261 finally:
255 self.log("Switching back to Linux scheduler") 262 try:
256 self.__to_linux() 263 self.__teardown()
264 self.__to_linux()
265 except Exception as e:
266 exception = exception or e
267 finally:
268 if exception: raise exception
257 269
258 if succ: 270 if succ:
259 self.__save_results() 271 self.__save_results()