diff options
-rw-r--r-- | config/config.py | 2 | ||||
-rwxr-xr-x | parse_exps.py | 4 | ||||
-rw-r--r-- | run/executable/executable.py | 3 | ||||
-rw-r--r-- | run/experiment.py | 185 | ||||
-rw-r--r-- | run/litmus_util.py | 13 | ||||
-rwxr-xr-x | run_exps.py | 170 |
6 files changed, 216 insertions, 161 deletions
diff --git a/config/config.py b/config/config.py index 1ac468b..b631aa2 100644 --- a/config/config.py +++ b/config/config.py | |||
@@ -9,7 +9,7 @@ BINS = {'rtspin' : get_executable_hint('rtspin', 'liblitmus'), | |||
9 | 'ftsplit' : get_executable_hint('ft2csv', 'feather-trace-tools'), | 9 | 'ftsplit' : get_executable_hint('ft2csv', 'feather-trace-tools'), |
10 | 'ftsort' : get_executable_hint('ftsort', 'feather-trace-tools'), | 10 | 'ftsort' : get_executable_hint('ftsort', 'feather-trace-tools'), |
11 | 'st_trace' : get_executable_hint('st_trace', 'feather-trace-tools'), | 11 | 'st_trace' : get_executable_hint('st_trace', 'feather-trace-tools'), |
12 | # Option, as not everyone uses kernelshark yet | 12 | # Optional, as not everyone uses kernelshark yet |
13 | 'trace-cmd' : get_executable_hint('trace-cmd', 'rt-kernelshark', True), | 13 | 'trace-cmd' : get_executable_hint('trace-cmd', 'rt-kernelshark', True), |
14 | # Optional, as sched_trace is not a publically supported repository | 14 | # Optional, as sched_trace is not a publically supported repository |
15 | 'st_show' : get_executable_hint('st_show', 'sched_trace', True)} | 15 | 'st_show' : get_executable_hint('st_show', 'sched_trace', True)} |
diff --git a/parse_exps.py b/parse_exps.py index d07378c..c2cbedb 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
@@ -140,8 +140,8 @@ def main(): | |||
140 | if opts.ignore: | 140 | if opts.ignore: |
141 | for param in opts.ignore.split(","): | 141 | for param in opts.ignore.split(","): |
142 | builder.try_remove(param) | 142 | builder.try_remove(param) |
143 | # Always average multiple trials | 143 | builder.try_remove(PARAMS['trial']) # Always average multiple trials |
144 | builder.try_remove(PARAMS['trial']) | 144 | builder.try_remove(PARAMS['cycles']) # Only need for feather-trace parsing |
145 | 145 | ||
146 | col_map = builder.build() | 146 | col_map = builder.build() |
147 | result_table = TupleTable(col_map) | 147 | result_table = TupleTable(col_map) |
diff --git a/run/executable/executable.py b/run/executable/executable.py index e6f2003..a2426f1 100644 --- a/run/executable/executable.py +++ b/run/executable/executable.py | |||
@@ -59,6 +59,9 @@ class Executable(object): | |||
59 | def interrupt(self): | 59 | def interrupt(self): |
60 | self.sp.send_signal(signal.SIGINT) | 60 | self.sp.send_signal(signal.SIGINT) |
61 | 61 | ||
62 | def poll(self): | ||
63 | return self.sp.poll() | ||
64 | |||
62 | def terminate(self): | 65 | def terminate(self): |
63 | '''Send the terminate signal to the binary.''' | 66 | '''Send the terminate signal to the binary.''' |
64 | self.sp.terminate() | 67 | self.sp.terminate() |
diff --git a/run/experiment.py b/run/experiment.py index ff0e9f3..b0e46b6 100644 --- a/run/experiment.py +++ b/run/experiment.py | |||
@@ -1,9 +1,7 @@ | |||
1 | import common as com | ||
2 | import os | 1 | import os |
3 | import time | 2 | import time |
4 | import run.litmus_util as lu | 3 | import run.litmus_util as lu |
5 | import shutil as sh | 4 | import shutil as sh |
6 | import traceback | ||
7 | from operator import methodcaller | 5 | from operator import methodcaller |
8 | 6 | ||
9 | class ExperimentException(Exception): | 7 | class ExperimentException(Exception): |
@@ -11,16 +9,11 @@ class ExperimentException(Exception): | |||
11 | def __init__(self, name): | 9 | def __init__(self, name): |
12 | self.name = name | 10 | self.name = name |
13 | 11 | ||
14 | |||
15 | class ExperimentDone(ExperimentException): | 12 | class ExperimentDone(ExperimentException): |
16 | '''Raised when an experiment looks like it's been run already.''' | 13 | '''Raised when an experiment looks like it's been run already.''' |
17 | def __str__(self): | 14 | def __str__(self): |
18 | return "Experiment finished already: %d" % self.name | 15 | return "Experiment finished already: %d" % self.name |
19 | 16 | ||
20 | class ExperimentFailed(ExperimentException): | ||
21 | def __str__(self): | ||
22 | return "Experiment failed during execution: %d" % self.name | ||
23 | |||
24 | class SystemCorrupted(Exception): | 17 | class SystemCorrupted(Exception): |
25 | pass | 18 | pass |
26 | 19 | ||
@@ -31,7 +24,6 @@ class Experiment(object): | |||
31 | def __init__(self, name, scheduler, working_dir, finished_dir, | 24 | def __init__(self, name, scheduler, working_dir, finished_dir, |
32 | proc_entries, executables, tracer_types): | 25 | proc_entries, executables, tracer_types): |
33 | '''Run an experiment, optionally wrapped in tracing.''' | 26 | '''Run an experiment, optionally wrapped in tracing.''' |
34 | |||
35 | self.name = name | 27 | self.name = name |
36 | self.scheduler = scheduler | 28 | self.scheduler = scheduler |
37 | self.working_dir = working_dir | 29 | self.working_dir = working_dir |
@@ -40,16 +32,10 @@ class Experiment(object): | |||
40 | self.executables = executables | 32 | self.executables = executables |
41 | self.exec_out = None | 33 | self.exec_out = None |
42 | self.exec_err = None | 34 | self.exec_err = None |
35 | self.tracer_types = tracer_types | ||
43 | 36 | ||
44 | self.task_batch = com.num_cpus() | 37 | def __setup_tracers(self): |
45 | 38 | tracers = [ t(self.working_dir) for t in self.tracer_types ] | |
46 | self.__make_dirs() | ||
47 | self.__assign_executable_cwds() | ||
48 | self.__setup_tracers(tracer_types) | ||
49 | |||
50 | |||
51 | def __setup_tracers(self, tracer_types): | ||
52 | tracers = [ t(self.working_dir) for t in tracer_types ] | ||
53 | 39 | ||
54 | self.regular_tracers = [t for t in tracers if not t.is_exact()] | 40 | self.regular_tracers = [t for t in tracers if not t.is_exact()] |
55 | self.exact_tracers = [t for t in tracers if t.is_exact()] | 41 | self.exact_tracers = [t for t in tracers if t.is_exact()] |
@@ -84,13 +70,11 @@ class Experiment(object): | |||
84 | map(assign_cwd, self.executables) | 70 | map(assign_cwd, self.executables) |
85 | 71 | ||
86 | def __kill_all(self): | 72 | def __kill_all(self): |
87 | # Give time for whatever failed to finish failing | 73 | if lu.waiting_tasks(): |
88 | time.sleep(2) | 74 | released = lu.release_tasks() |
89 | 75 | self.log("Re-released %d tasks" % released) | |
90 | released = lu.release_tasks() | ||
91 | self.log("Re-released %d tasks" % released) | ||
92 | 76 | ||
93 | time.sleep(5) | 77 | time.sleep(1) |
94 | 78 | ||
95 | self.log("Killing all tasks") | 79 | self.log("Killing all tasks") |
96 | for e in self.executables: | 80 | for e in self.executables: |
@@ -99,7 +83,62 @@ class Experiment(object): | |||
99 | except: | 83 | except: |
100 | pass | 84 | pass |
101 | 85 | ||
102 | time.sleep(2) | 86 | time.sleep(1) |
87 | |||
88 | def __strip_path(self, path): | ||
89 | '''Shorten path to something more readable.''' | ||
90 | file_dir = os.path.split(self.working_dir)[0] | ||
91 | if path.index(file_dir) == 0: | ||
92 | path = path[len(file_dir):] | ||
93 | |||
94 | return path.strip("/") | ||
95 | |||
96 | def __check_tasks_status(self): | ||
97 | '''Raises an exception if any tasks have failed.''' | ||
98 | msgs = [] | ||
99 | |||
100 | for e in self.executables: | ||
101 | status = e.poll() | ||
102 | if status != None and status: | ||
103 | err_msg = "Task %s failed with status: %s" % (e.wait(), status) | ||
104 | msgs += [err_msg] | ||
105 | |||
106 | if msgs: | ||
107 | # Show at most 3 messages so that every task failing doesn't blow | ||
108 | # up the terminal | ||
109 | if len(msgs) > 3: | ||
110 | num_errs = len(msgs) - 3 | ||
111 | msgs = msgs[0:4] + ["...%d more task errors..." % num_errs] | ||
112 | |||
113 | out_name = self.__strip_path(self.exec_out.name) | ||
114 | err_name = self.__strip_path(self.exec_err.name) | ||
115 | help = "Check dmesg, %s, and %s" % (out_name, err_name) | ||
116 | |||
117 | raise Exception("\n".join(msgs + [help])) | ||
118 | |||
119 | def __wait_for_ready(self): | ||
120 | self.log("Sleeping until tasks are ready for release...") | ||
121 | |||
122 | wait_start = time.time() | ||
123 | num_ready = lu.waiting_tasks() | ||
124 | |||
125 | while num_ready < len(self.executables): | ||
126 | # Quit if too much time passes without a task becoming ready | ||
127 | if time.time() - wait_start > 180.0: | ||
128 | s = "waiting: %d, submitted: %d" %\ | ||
129 | (lu.waiting_tasks(), len(self.executables)) | ||
130 | raise Exception("Too much time spent waiting for tasks! %s" % s) | ||
131 | |||
132 | time.sleep(1) | ||
133 | |||
134 | # Quit if any tasks fail | ||
135 | self.__check_tasks_status() | ||
136 | |||
137 | # Reset the waiting time whenever more tasks become ready | ||
138 | now_ready = lu.waiting_tasks() | ||
139 | if now_ready != num_ready: | ||
140 | wait_start = time.time() | ||
141 | num_ready = lu.now_ready | ||
103 | 142 | ||
104 | def __run_tasks(self): | 143 | def __run_tasks(self): |
105 | self.log("Starting %d tasks" % len(self.executables)) | 144 | self.log("Starting %d tasks" % len(self.executables)) |
@@ -108,16 +147,9 @@ class Experiment(object): | |||
108 | try: | 147 | try: |
109 | e.execute() | 148 | e.execute() |
110 | except: | 149 | except: |
111 | raise Exception("Executable failed: %s" % e) | 150 | raise Exception("Executable failed to start: %s" % e) |
112 | 151 | ||
113 | self.log("Sleeping until tasks are ready for release...") | 152 | self.__wait_for_ready() |
114 | start = time.time() | ||
115 | while lu.waiting_tasks() < len(self.executables): | ||
116 | if time.time() - start > 30.0: | ||
117 | s = "waiting: %d, submitted: %d" %\ | ||
118 | (lu.waiting_tasks(), len(self.executables)) | ||
119 | raise Exception("Too much time spent waiting for tasks! %s" % s) | ||
120 | time.sleep(1) | ||
121 | 153 | ||
122 | # Exact tracers (like overheads) must be started right after release or | 154 | # Exact tracers (like overheads) must be started right after release or |
123 | # measurements will be full of irrelevant records | 155 | # measurements will be full of irrelevant records |
@@ -148,60 +180,40 @@ class Experiment(object): | |||
148 | def __save_results(self): | 180 | def __save_results(self): |
149 | os.rename(self.working_dir, self.finished_dir) | 181 | os.rename(self.working_dir, self.finished_dir) |
150 | 182 | ||
151 | def log(self, msg): | 183 | def __to_linux(self): |
152 | print("[Exp %s]: %s" % (self.name, msg)) | 184 | msgs = [] |
153 | |||
154 | def run_exp(self): | ||
155 | self.__check_system() | ||
156 | |||
157 | succ = False | ||
158 | try: | ||
159 | self.__setup() | ||
160 | 185 | ||
186 | sched = lu.scheduler() | ||
187 | if sched != "Linux": | ||
161 | try: | 188 | try: |
162 | self.__run_tasks() | 189 | lu.switch_scheduler("Linux") |
163 | self.log("Saving results in %s" % self.finished_dir) | ||
164 | succ = True | ||
165 | except: | 190 | except: |
166 | traceback.print_exc() | 191 | msgs += ["Scheduler is %s, cannot switch to Linux!" % sched] |
167 | self.__kill_all() | ||
168 | raise ExperimentFailed(self.name) | ||
169 | finally: | ||
170 | self.__teardown() | ||
171 | finally: | ||
172 | self.log("Switching to Linux scheduler") | ||
173 | |||
174 | # Give the tasks 10 seconds to finish before bailing | ||
175 | start = time.time() | ||
176 | while lu.all_tasks() > 0: | ||
177 | if time.time() - start < 10.0: | ||
178 | raise SystemCorrupted("%d tasks still running!" % | ||
179 | lu.all_tasks()) | ||
180 | |||
181 | lu.switch_scheduler("Linux") | ||
182 | |||
183 | if succ: | ||
184 | self.__save_results() | ||
185 | self.log("Experiment done!") | ||
186 | 192 | ||
187 | def __check_system(self): | ||
188 | running = lu.all_tasks() | 193 | running = lu.all_tasks() |
189 | if running: | 194 | if running: |
190 | raise SystemCorrupted("%d tasks already running!" % running) | 195 | msgs += ["%d real-time tasks still running!" % running] |
191 | 196 | ||
192 | sched = lu.scheduler() | 197 | if msgs: |
193 | if sched != "Linux": | 198 | raise SystemCorrupted("\n".join(msgs)) |
194 | raise SystemCorrupted("Scheduler is %s, should be Linux" % sched) | ||
195 | 199 | ||
196 | def __setup(self): | 200 | def __setup(self): |
201 | self.__make_dirs() | ||
202 | self.__assign_executable_cwds() | ||
203 | self.__setup_tracers() | ||
204 | |||
197 | self.log("Writing %d proc entries" % len(self.proc_entries)) | 205 | self.log("Writing %d proc entries" % len(self.proc_entries)) |
198 | map(methodcaller('write_proc'), self.proc_entries) | 206 | map(methodcaller('write_proc'), self.proc_entries) |
199 | 207 | ||
208 | self.log("Starting %d regular tracers" % len(self.regular_tracers)) | ||
209 | map(methodcaller('start_tracing'), self.regular_tracers) | ||
210 | |||
211 | time.sleep(1) | ||
212 | |||
200 | self.log("Switching to %s" % self.scheduler) | 213 | self.log("Switching to %s" % self.scheduler) |
201 | lu.switch_scheduler(self.scheduler) | 214 | lu.switch_scheduler(self.scheduler) |
202 | 215 | ||
203 | self.log("Starting %d regular tracers" % len(self.regular_tracers)) | 216 | time.sleep(1) |
204 | map(methodcaller('start_tracing'), self.regular_tracers) | ||
205 | 217 | ||
206 | self.exec_out = open('%s/exec-out.txt' % self.working_dir, 'w') | 218 | self.exec_out = open('%s/exec-out.txt' % self.working_dir, 'w') |
207 | self.exec_err = open('%s/exec-err.txt' % self.working_dir, 'w') | 219 | self.exec_err = open('%s/exec-err.txt' % self.working_dir, 'w') |
@@ -217,3 +229,32 @@ class Experiment(object): | |||
217 | self.log("Stopping regular tracers") | 229 | self.log("Stopping regular tracers") |
218 | map(methodcaller('stop_tracing'), self.regular_tracers) | 230 | map(methodcaller('stop_tracing'), self.regular_tracers) |
219 | 231 | ||
232 | def log(self, msg): | ||
233 | print("[Exp %s]: %s" % (self.name, msg)) | ||
234 | |||
235 | def run_exp(self): | ||
236 | self.__to_linux() | ||
237 | |||
238 | succ = False | ||
239 | try: | ||
240 | self.__setup() | ||
241 | |||
242 | try: | ||
243 | self.__run_tasks() | ||
244 | self.log("Saving results in %s" % self.finished_dir) | ||
245 | succ = True | ||
246 | except Exception as e: | ||
247 | # Give time for whatever failed to finish failing | ||
248 | time.sleep(2) | ||
249 | self.__kill_all() | ||
250 | |||
251 | raise e | ||
252 | finally: | ||
253 | self.__teardown() | ||
254 | finally: | ||
255 | self.log("Switching back to Linux scheduler") | ||
256 | self.__to_linux() | ||
257 | |||
258 | if succ: | ||
259 | self.__save_results() | ||
260 | self.log("Experiment done!") | ||
diff --git a/run/litmus_util.py b/run/litmus_util.py index 70da262..81f690b 100644 --- a/run/litmus_util.py +++ b/run/litmus_util.py | |||
@@ -20,7 +20,7 @@ def switch_scheduler(switch_to_in): | |||
20 | with open('/proc/litmus/active_plugin', 'w') as active_plugin: | 20 | with open('/proc/litmus/active_plugin', 'w') as active_plugin: |
21 | subprocess.Popen(["echo", switch_to], stdout=active_plugin) | 21 | subprocess.Popen(["echo", switch_to], stdout=active_plugin) |
22 | 22 | ||
23 | # it takes a bit to do the switch, sleep an arbitrary amount of time | 23 | # It takes a bit to do the switch, sleep an arbitrary amount of time |
24 | time.sleep(2) | 24 | time.sleep(2) |
25 | 25 | ||
26 | cur_plugin = scheduler() | 26 | cur_plugin = scheduler() |
@@ -29,24 +29,21 @@ def switch_scheduler(switch_to_in): | |||
29 | (switch_to, cur_plugin)) | 29 | (switch_to, cur_plugin)) |
30 | 30 | ||
31 | def waiting_tasks(): | 31 | def waiting_tasks(): |
32 | reg = re.compile(r'^ready.*?(?P<READY>\d+)$', re.M) | 32 | reg = re.compile(r'^ready.*?(?P<WAITING>\d+)$', re.M) |
33 | with open('/proc/litmus/stats', 'r') as f: | 33 | with open('/proc/litmus/stats', 'r') as f: |
34 | data = f.read() | 34 | data = f.read() |
35 | 35 | ||
36 | # Ignore if no tasks are waiting for release | 36 | # Ignore if no tasks are waiting for release |
37 | match = re.search(reg, data) | 37 | waiting = re.search(reg, data).group("WAITING") |
38 | ready = match.group("READY") | ||
39 | 38 | ||
40 | return 0 if not ready else int(ready) | 39 | return 0 if not waiting else int(waiting) |
41 | 40 | ||
42 | def all_tasks(): | 41 | def all_tasks(): |
43 | reg = re.compile(r'^real-time.*?(?P<TASKS>\d+)$', re.M) | 42 | reg = re.compile(r'^real-time.*?(?P<TASKS>\d+)$', re.M) |
44 | with open('/proc/litmus/stats', 'r') as f: | 43 | with open('/proc/litmus/stats', 'r') as f: |
45 | data = f.read() | 44 | data = f.read() |
46 | 45 | ||
47 | # Ignore if no tasks are waiting for release | 46 | ready = re.search(reg, data).group("TASKS") |
48 | match = re.search(reg, data) | ||
49 | ready = match.group("TASKS") | ||
50 | 47 | ||
51 | return 0 if not ready else int(ready) | 48 | return 0 if not ready else int(ready) |
52 | 49 | ||
diff --git a/run_exps.py b/run_exps.py index d7a06b5..a15018d 100755 --- a/run_exps.py +++ b/run_exps.py | |||
@@ -12,12 +12,13 @@ from config.config import PARAMS,DEFAULTS | |||
12 | from collections import namedtuple | 12 | from collections import namedtuple |
13 | from optparse import OptionParser | 13 | from optparse import OptionParser |
14 | from run.executable.executable import Executable | 14 | from run.executable.executable import Executable |
15 | from run.experiment import Experiment,ExperimentDone,ExperimentFailed,SystemCorrupted | 15 | from run.experiment import Experiment,ExperimentDone,SystemCorrupted |
16 | from run.proc_entry import ProcEntry | 16 | from run.proc_entry import ProcEntry |
17 | 17 | ||
18 | '''Customizable experiment parameters''' | 18 | '''Customizable experiment parameters''' |
19 | ExpParams = namedtuple('ExpParams', ['scheduler', 'duration', 'tracers', | 19 | ExpParams = namedtuple('ExpParams', ['scheduler', 'duration', 'tracers', |
20 | 'kernel', 'config_options']) | 20 | 'kernel', 'config_options', 'file_params', |
21 | 'pre_script', 'post_script']) | ||
21 | '''Comparison of requested versus actual kernel compile parameter value''' | 22 | '''Comparison of requested versus actual kernel compile parameter value''' |
22 | ConfigResult = namedtuple('ConfigResult', ['param', 'wanted', 'actual']) | 23 | ConfigResult = namedtuple('ConfigResult', ['param', 'wanted', 'actual']) |
23 | 24 | ||
@@ -113,8 +114,8 @@ def fix_paths(schedule, exp_dir, sched_file): | |||
113 | args = args.replace(arg, abspath) | 114 | args = args.replace(arg, abspath) |
114 | break | 115 | break |
115 | elif re.match(r'.*\w+\.[a-zA-Z]\w*', arg): | 116 | elif re.match(r'.*\w+\.[a-zA-Z]\w*', arg): |
116 | print("WARNING: non-existent file '%s' may be referenced:\n\t%s" | 117 | sys.stderr.write("WARNING: non-existent file '%s' " % arg + |
117 | % (arg, sched_file)) | 118 | "may be referenced:\n\t%s" % sched_file) |
118 | 119 | ||
119 | schedule['task'][idx] = (task, args) | 120 | schedule['task'][idx] = (task, args) |
120 | 121 | ||
@@ -182,22 +183,21 @@ def verify_environment(exp_params): | |||
182 | raise InvalidConfig(results) | 183 | raise InvalidConfig(results) |
183 | 184 | ||
184 | 185 | ||
185 | def run_parameter(exp_dir, out_dir, params, param_name): | 186 | def run_script(script_params, exp, exp_dir, out_dir): |
186 | '''Run an executable (arguments optional) specified as a configurable | 187 | '''Run an executable (arguments optional)''' |
187 | @param_name in @params.''' | 188 | if not script_params: |
188 | if PARAMS[param_name] not in params: | ||
189 | return | 189 | return |
190 | 190 | ||
191 | script_params = params[PARAMS[param_name]] | ||
192 | |||
193 | # Split into arguments and program name | 191 | # Split into arguments and program name |
194 | if type(script_params) != type([]): | 192 | if type(script_params) != type([]): |
195 | script_params = [script_params] | 193 | script_params = [script_params] |
196 | script_name = script_params.pop(0) | ||
197 | 194 | ||
195 | exp.log("Running %s" % script_params.join(" ")) | ||
196 | |||
197 | script_name = script_params.pop(0) | ||
198 | script = com.get_executable(script_name, cwd=exp_dir) | 198 | script = com.get_executable(script_name, cwd=exp_dir) |
199 | 199 | ||
200 | out = open('%s/%s-out.txt' % (out_dir, param_name), 'w') | 200 | out = open('%s/%s-out.txt' % (out_dir, script_name), 'w') |
201 | prog = Executable(script, script_params, cwd=out_dir, | 201 | prog = Executable(script, script_params, cwd=out_dir, |
202 | stderr_file=out, stdout_file=out) | 202 | stderr_file=out, stdout_file=out) |
203 | 203 | ||
@@ -207,28 +207,41 @@ def run_parameter(exp_dir, out_dir, params, param_name): | |||
207 | out.close() | 207 | out.close() |
208 | 208 | ||
209 | 209 | ||
210 | def get_exp_params(cmd_scheduler, cmd_duration, file_params): | 210 | def make_exp_params(cmd_scheduler, cmd_duration, sched_dir, param_file): |
211 | '''Return ExpParam with configured values of all hardcoded params.''' | 211 | '''Return ExpParam with configured values of all hardcoded params.''' |
212 | kernel = copts = "" | 212 | kernel = copts = "" |
213 | 213 | ||
214 | scheduler = cmd_scheduler or file_params[PARAMS['sched']] | 214 | # Load parameter file |
215 | duration = cmd_duration or file_params[PARAMS['dur']] or\ | 215 | param_file = param_file or "%s/%s" % (sched_dir, DEFAULTS['params_file']) |
216 | if os.path.isfile(param_file): | ||
217 | fparams = com.load_params(param_file) | ||
218 | else: | ||
219 | fparams = {} | ||
220 | |||
221 | scheduler = cmd_scheduler or fparams[PARAMS['sched']] | ||
222 | duration = cmd_duration or fparams[PARAMS['dur']] or\ | ||
216 | DEFAULTS['duration'] | 223 | DEFAULTS['duration'] |
217 | 224 | ||
218 | # Experiments can specify required kernel name | 225 | # Experiments can specify required kernel name |
219 | if PARAMS['kernel'] in file_params: | 226 | if PARAMS['kernel'] in fparams: |
220 | kernel = file_params[PARAMS['kernel']] | 227 | kernel = fparams[PARAMS['kernel']] |
221 | 228 | ||
222 | # Or required config options | 229 | # Or required config options |
223 | if PARAMS['copts'] in file_params: | 230 | if PARAMS['copts'] in fparams: |
224 | copts = file_params[PARAMS['copts']] | 231 | copts = fparams[PARAMS['copts']] |
225 | 232 | ||
226 | # Or required tracers | 233 | # Or required tracers |
227 | requested = [] | 234 | requested = [] |
228 | if PARAMS['trace'] in file_params: | 235 | if PARAMS['trace'] in fparams: |
229 | requested = file_params[PARAMS['trace']] | 236 | requested = fparams[PARAMS['trace']] |
230 | tracers = trace.get_tracer_types(requested) | 237 | tracers = trace.get_tracer_types(requested) |
231 | 238 | ||
239 | # Or scripts to run before and after experiments | ||
240 | def get_script(name): | ||
241 | return fparams[name] if name in fparams else None | ||
242 | pre_script = get_script('pre') | ||
243 | post_script = get_script('post') | ||
244 | |||
232 | # But only these two are mandatory | 245 | # But only these two are mandatory |
233 | if not scheduler: | 246 | if not scheduler: |
234 | raise IOError("No scheduler found in param file!") | 247 | raise IOError("No scheduler found in param file!") |
@@ -236,48 +249,39 @@ def get_exp_params(cmd_scheduler, cmd_duration, file_params): | |||
236 | raise IOError("No duration found in param file!") | 249 | raise IOError("No duration found in param file!") |
237 | 250 | ||
238 | return ExpParams(scheduler=scheduler, kernel=kernel, duration=duration, | 251 | return ExpParams(scheduler=scheduler, kernel=kernel, duration=duration, |
239 | config_options=copts, tracers=tracers) | 252 | config_options=copts, tracers=tracers, file_params=fparams, |
240 | 253 | pre_script=pre_script, post_script=post_script) | |
241 | 254 | ||
242 | def load_experiment(sched_file, cmd_scheduler, cmd_duration, | 255 | def run_experiment(name, sched_file, exp_params, out_dir, |
243 | param_file, out_dir, ignore, jabber): | 256 | start_message, ignore, jabber): |
244 | '''Load and parse data from files and run result.''' | 257 | '''Load and parse data from files and run result.''' |
245 | if not os.path.isfile(sched_file): | 258 | if not os.path.isfile(sched_file): |
246 | raise IOError("Cannot find schedule file: %s" % sched_file) | 259 | raise IOError("Cannot find schedule file: %s" % sched_file) |
247 | 260 | ||
248 | dir_name, fname = os.path.split(sched_file) | 261 | dir_name, fname = os.path.split(sched_file) |
249 | exp_name = os.path.split(dir_name)[1] + "/" + fname | ||
250 | work_dir = "%s/tmp" % dir_name | 262 | work_dir = "%s/tmp" % dir_name |
251 | 263 | ||
252 | # Load parameter file | 264 | procs, execs = load_schedule(name, sched_file, exp_params.duration) |
253 | param_file = param_file or \ | ||
254 | "%s/%s" % (dir_name, DEFAULTS['params_file']) | ||
255 | if os.path.isfile(param_file): | ||
256 | file_params = com.load_params(param_file) | ||
257 | else: | ||
258 | file_params = {} | ||
259 | |||
260 | # Create input needed by Experiment | ||
261 | exp_params = get_exp_params(cmd_scheduler, cmd_duration, file_params) | ||
262 | procs, execs = load_schedule(exp_name, sched_file, exp_params.duration) | ||
263 | 265 | ||
264 | exp = Experiment(exp_name, exp_params.scheduler, work_dir, out_dir, | 266 | exp = Experiment(name, exp_params.scheduler, work_dir, out_dir, |
265 | procs, execs, exp_params.tracers) | 267 | procs, execs, exp_params.tracers) |
266 | 268 | ||
269 | exp.log(start_message) | ||
270 | |||
267 | if not ignore: | 271 | if not ignore: |
268 | verify_environment(exp_params) | 272 | verify_environment(exp_params) |
269 | 273 | ||
270 | run_parameter(dir_name, work_dir, file_params, 'pre') | 274 | run_script(exp_params.pre_script, exp, dir_name, work_dir) |
271 | 275 | ||
272 | exp.run_exp() | 276 | exp.run_exp() |
273 | 277 | ||
274 | run_parameter(dir_name, out_dir, file_params, 'post') | 278 | run_script(exp_params.post_script, exp, dir_name, out_dir) |
275 | 279 | ||
276 | if jabber: | 280 | if jabber: |
277 | jabber.send("Completed '%s'" % exp_name) | 281 | jabber.send("Completed '%s'" % name) |
278 | 282 | ||
279 | # Save parameters used to run experiment in out_dir | 283 | # Save parameters used to run experiment in out_dir |
280 | out_params = dict(file_params.items() + | 284 | out_params = dict(exp_params.file_params.items() + |
281 | [(PARAMS['sched'], exp_params.scheduler), | 285 | [(PARAMS['sched'], exp_params.scheduler), |
282 | (PARAMS['tasks'], len(execs)), | 286 | (PARAMS['tasks'], len(execs)), |
283 | (PARAMS['dur'], exp_params.duration)]) | 287 | (PARAMS['dur'], exp_params.duration)]) |
@@ -292,6 +296,7 @@ def load_experiment(sched_file, cmd_scheduler, cmd_duration, | |||
292 | 296 | ||
293 | 297 | ||
294 | def get_exps(opts, args): | 298 | def get_exps(opts, args): |
299 | '''Return list of experiment files or directories''' | ||
295 | if args: | 300 | if args: |
296 | return args | 301 | return args |
297 | 302 | ||
@@ -333,63 +338,72 @@ def setup_email(target): | |||
333 | return None | 338 | return None |
334 | 339 | ||
335 | 340 | ||
341 | def make_paths(exp, out_base_dir, opts): | ||
342 | '''Translate experiment name to (schedule file, output directory) paths''' | ||
343 | path = "%s/%s" % (os.getcwd(), exp) | ||
344 | out_dir = "%s/%s" % (out_base_dir, os.path.split(exp.strip('/'))[1]) | ||
345 | |||
346 | if not os.path.exists(path): | ||
347 | raise IOError("Invalid experiment: %s" % path) | ||
348 | |||
349 | if opts.force and os.path.exists(out_dir): | ||
350 | shutil.rmtree(out_dir) | ||
351 | |||
352 | if os.path.isdir(path): | ||
353 | sched_file = "%s/%s" % (path, opts.sched_file) | ||
354 | else: | ||
355 | sched_file = path | ||
356 | |||
357 | return sched_file, out_dir | ||
358 | |||
359 | |||
336 | def main(): | 360 | def main(): |
337 | opts, args = parse_args() | 361 | opts, args = parse_args() |
338 | 362 | ||
339 | scheduler = opts.scheduler | ||
340 | duration = opts.duration | ||
341 | param_file = opts.param_file | ||
342 | out_base = os.path.abspath(opts.out_dir) | ||
343 | |||
344 | exps = get_exps(opts, args) | 363 | exps = get_exps(opts, args) |
345 | 364 | ||
346 | created = False | 365 | jabber = setup_jabber(opts.jabber) if opts.jabber else None |
366 | email = setup_email(opts.email) if opts.email else None | ||
367 | |||
368 | out_base = os.path.abspath(opts.out_dir) | ||
369 | created = False | ||
347 | if not os.path.exists(out_base): | 370 | if not os.path.exists(out_base): |
348 | created = True | 371 | created = True |
349 | os.mkdir(out_base) | 372 | os.mkdir(out_base) |
350 | 373 | ||
351 | ran = 0 | 374 | ran = done = succ = failed = invalid = 0 |
352 | done = 0 | ||
353 | succ = 0 | ||
354 | failed = 0 | ||
355 | invalid = 0 | ||
356 | |||
357 | jabber = setup_jabber(opts.jabber) if opts.jabber else None | ||
358 | email = setup_email(opts.email) if opts.email else None | ||
359 | |||
360 | for exp in exps: | ||
361 | path = "%s/%s" % (os.getcwd(), exp) | ||
362 | out_dir = "%s/%s" % (out_base, os.path.split(exp.strip('/'))[1]) | ||
363 | 375 | ||
364 | if not os.path.exists(path): | 376 | for i, exp in enumerate(exps): |
365 | raise IOError("Invalid experiment: %s" % path) | 377 | sched_file, out_dir = make_paths(exp, out_base, opts) |
378 | sched_dir = os.path.split(sched_file)[0] | ||
366 | 379 | ||
367 | if opts.force and os.path.exists(out_dir): | 380 | try: |
368 | shutil.rmtree(out_dir) | 381 | start_message = "Loading experiment %d of %d." % (i+1, len(exps)) |
382 | exp_params = make_exp_params(opts.scheduler, opts.duration, | ||
383 | sched_dir, opts.param_file) | ||
369 | 384 | ||
370 | if os.path.isdir(exp): | 385 | run_experiment(exp, sched_file, exp_params, out_dir, |
371 | path = "%s/%s" % (path, opts.sched_file) | 386 | start_message, opts.ignore, jabber) |
372 | 387 | ||
373 | try: | ||
374 | load_experiment(path, scheduler, duration, param_file, | ||
375 | out_dir, opts.ignore, jabber) | ||
376 | succ += 1 | 388 | succ += 1 |
377 | except ExperimentDone: | 389 | except ExperimentDone: |
390 | sys.stderr.write("Experiment '%s' already completed " % exp + | ||
391 | "at '%s'\n" % out_base) | ||
378 | done += 1 | 392 | done += 1 |
379 | print("Experiment '%s' already completed at '%s'" % (exp, out_base)) | ||
380 | except (InvalidKernel, InvalidConfig) as e: | 393 | except (InvalidKernel, InvalidConfig) as e: |
394 | sys.stderr.write("Invalid environment for experiment '%s'\n" % exp) | ||
395 | sys.stderr.write("%s\n" % e) | ||
381 | invalid += 1 | 396 | invalid += 1 |
382 | print("Invalid environment for experiment '%s'" % exp) | ||
383 | print(e) | ||
384 | except KeyboardInterrupt: | 397 | except KeyboardInterrupt: |
385 | print("Keyboard interrupt, quitting") | 398 | sys.stderr.write("Keyboard interrupt, quitting\n") |
386 | break | 399 | break |
387 | except SystemCorrupted as e: | 400 | except SystemCorrupted as e: |
388 | print("System is corrupted! Fix state before continuing.") | 401 | sys.stderr.write("System is corrupted! Fix state before continuing.\n") |
389 | print(e) | 402 | sys.stderr.write("%s\n" % e) |
390 | break | 403 | break |
391 | except ExperimentFailed: | 404 | except Exception as e: |
392 | print("Failed experiment %s" % exp) | 405 | sys.stderr.write("Failed experiment %s\n" % exp) |
406 | sys.stderr.write("%s\n" % e) | ||
393 | failed += 1 | 407 | failed += 1 |
394 | 408 | ||
395 | ran += 1 | 409 | ran += 1 |
@@ -398,7 +412,7 @@ def main(): | |||
398 | if not os.listdir(out_base) and created and not succ: | 412 | if not os.listdir(out_base) and created and not succ: |
399 | os.rmdir(out_base) | 413 | os.rmdir(out_base) |
400 | 414 | ||
401 | message = "Experiments ran:\t%d of %d" % (ran, len(args)) +\ | 415 | message = "Experiments ran:\t%d of %d" % (ran, len(exps)) +\ |
402 | "\n Successful:\t\t%d" % succ +\ | 416 | "\n Successful:\t\t%d" % succ +\ |
403 | "\n Failed:\t\t%d" % failed +\ | 417 | "\n Failed:\t\t%d" % failed +\ |
404 | "\n Already Done:\t\t%d" % done +\ | 418 | "\n Already Done:\t\t%d" % done +\ |