diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-04-29 16:50:23 -0400 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-04-29 16:50:23 -0400 |
| commit | 7e32c3915e7ea27d2533d99a22fa53ef923198f5 (patch) | |
| tree | 5bd8d8a3ed6861e039a683f47a953d2f7a22d8b1 /run | |
| parent | 7545402506aa76261e18d85af585ff0ac1cf05c1 (diff) | |
Added run_exps.py option to --retry failed experiments.
If the retry flag is specified, failed experiments will be re-run after all
other experiments have run. They can be re-run at most 5 times.
This commit required a refactoring of run_exps.py to clean up the main
experiment running loop.
Diffstat (limited to 'run')
| -rw-r--r-- | run/experiment.py | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/run/experiment.py b/run/experiment.py index b0e46b6..9a70414 100644 --- a/run/experiment.py +++ b/run/experiment.py | |||
| @@ -2,6 +2,7 @@ import os | |||
| 2 | import time | 2 | import time |
| 3 | import run.litmus_util as lu | 3 | import run.litmus_util as lu |
| 4 | import shutil as sh | 4 | import shutil as sh |
| 5 | |||
| 5 | from operator import methodcaller | 6 | from operator import methodcaller |
| 6 | 7 | ||
| 7 | class ExperimentException(Exception): | 8 | class ExperimentException(Exception): |
| @@ -69,21 +70,24 @@ class Experiment(object): | |||
| 69 | executable.cwd = self.working_dir | 70 | executable.cwd = self.working_dir |
| 70 | map(assign_cwd, self.executables) | 71 | map(assign_cwd, self.executables) |
| 71 | 72 | ||
| 72 | def __kill_all(self): | 73 | def __try_kill_all(self): |
| 73 | if lu.waiting_tasks(): | 74 | try: |
| 74 | released = lu.release_tasks() | 75 | if lu.waiting_tasks(): |
| 75 | self.log("Re-released %d tasks" % released) | 76 | released = lu.release_tasks() |
| 77 | self.log("Re-released %d tasks" % released) | ||
| 76 | 78 | ||
| 77 | time.sleep(1) | 79 | time.sleep(1) |
| 78 | 80 | ||
| 79 | self.log("Killing all tasks") | 81 | self.log("Killing all tasks") |
| 80 | for e in self.executables: | 82 | for e in self.executables: |
| 81 | try: | 83 | try: |
| 82 | e.kill() | 84 | e.kill() |
| 83 | except: | 85 | except: |
| 84 | pass | 86 | pass |
| 85 | 87 | ||
| 86 | time.sleep(1) | 88 | time.sleep(1) |
| 89 | except: | ||
| 90 | self.log("Failed to kill all tasks.") | ||
| 87 | 91 | ||
| 88 | def __strip_path(self, path): | 92 | def __strip_path(self, path): |
| 89 | '''Shorten path to something more readable.''' | 93 | '''Shorten path to something more readable.''' |
| @@ -138,7 +142,7 @@ class Experiment(object): | |||
| 138 | now_ready = lu.waiting_tasks() | 142 | now_ready = lu.waiting_tasks() |
| 139 | if now_ready != num_ready: | 143 | if now_ready != num_ready: |
| 140 | wait_start = time.time() | 144 | wait_start = time.time() |
| 141 | num_ready = lu.now_ready | 145 | num_ready = now_ready |
| 142 | 146 | ||
| 143 | def __run_tasks(self): | 147 | def __run_tasks(self): |
| 144 | self.log("Starting %d tasks" % len(self.executables)) | 148 | self.log("Starting %d tasks" % len(self.executables)) |
| @@ -185,6 +189,7 @@ class Experiment(object): | |||
| 185 | 189 | ||
| 186 | sched = lu.scheduler() | 190 | sched = lu.scheduler() |
| 187 | if sched != "Linux": | 191 | if sched != "Linux": |
| 192 | self.log("Switching back to Linux scheduler") | ||
| 188 | try: | 193 | try: |
| 189 | lu.switch_scheduler("Linux") | 194 | lu.switch_scheduler("Linux") |
| 190 | except: | 195 | except: |
| @@ -229,6 +234,8 @@ class Experiment(object): | |||
| 229 | self.log("Stopping regular tracers") | 234 | self.log("Stopping regular tracers") |
| 230 | map(methodcaller('stop_tracing'), self.regular_tracers) | 235 | map(methodcaller('stop_tracing'), self.regular_tracers) |
| 231 | 236 | ||
| 237 | os.system('sync') | ||
| 238 | |||
| 232 | def log(self, msg): | 239 | def log(self, msg): |
| 233 | print("[Exp %s]: %s" % (self.name, msg)) | 240 | print("[Exp %s]: %s" % (self.name, msg)) |
| 234 | 241 | ||
| @@ -236,6 +243,7 @@ class Experiment(object): | |||
| 236 | self.__to_linux() | 243 | self.__to_linux() |
| 237 | 244 | ||
| 238 | succ = False | 245 | succ = False |
| 246 | exception = None | ||
| 239 | try: | 247 | try: |
| 240 | self.__setup() | 248 | self.__setup() |
| 241 | 249 | ||
| @@ -244,16 +252,20 @@ class Experiment(object): | |||
| 244 | self.log("Saving results in %s" % self.finished_dir) | 252 | self.log("Saving results in %s" % self.finished_dir) |
| 245 | succ = True | 253 | succ = True |
| 246 | except Exception as e: | 254 | except Exception as e: |
| 255 | exception = e | ||
| 256 | |||
| 247 | # Give time for whatever failed to finish failing | 257 | # Give time for whatever failed to finish failing |
| 248 | time.sleep(2) | 258 | time.sleep(2) |
| 249 | self.__kill_all() | ||
| 250 | 259 | ||
| 251 | raise e | 260 | self.__try_kill_all() |
| 252 | finally: | ||
| 253 | self.__teardown() | ||
| 254 | finally: | 261 | finally: |
| 255 | self.log("Switching back to Linux scheduler") | 262 | try: |
| 256 | self.__to_linux() | 263 | self.__teardown() |
| 264 | self.__to_linux() | ||
| 265 | except Exception as e: | ||
| 266 | exception = exception or e | ||
| 267 | finally: | ||
| 268 | if exception: raise exception | ||
| 257 | 269 | ||
| 258 | if succ: | 270 | if succ: |
| 259 | self.__save_results() | 271 | self.__save_results() |
