diff options
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | common.py | 7 | ||||
-rw-r--r-- | config/config.py | 18 | ||||
-rw-r--r-- | gen/edf_generators.py | 1 | ||||
-rw-r--r-- | gen/generator.py | 6 | ||||
-rwxr-xr-x | gen_exps.py | 15 | ||||
-rw-r--r-- | parse/col_map.py | 4 | ||||
-rw-r--r-- | parse/sched.py | 31 | ||||
-rwxr-xr-x | parse_exps.py | 4 | ||||
-rw-r--r-- | run/crontab.py | 151 | ||||
-rw-r--r-- | run/experiment.py | 52 | ||||
-rwxr-xr-x | run_exps.py | 334 |
12 files changed, 474 insertions, 155 deletions
@@ -156,16 +156,14 @@ You can specify your own spin programs to run as well instead of rtspin by putti | |||
156 | $ echo "colorspin -f color1.csv 10 20" > test.sched | 156 | $ echo "colorspin -f color1.csv 10 20" > test.sched |
157 | ``` | 157 | ``` |
158 | 158 | ||
159 | You can specify parameters for an experiment in a file instead of on the command line using params.py (the `-p` option lets you choose the name of this file if `params.py` is not for you): | 159 | You can specify parameters for an experiment in a file instead of on the command line using params.py: |
160 | 160 | ||
161 | ```bash | 161 | ```bash |
162 | $ echo "{'scheduler':'GSN-EDF', 'duration':10}" > params.py | 162 | $ echo "{'scheduler':'GSN-EDF', 'duration':10}" > params.py |
163 | $ run_exps.py test.sched | 163 | $ run_exps.py test.sched |
164 | ``` | 164 | ``` |
165 | 165 | ||
166 | You can also run multiple experiments with a single command, provided a directory with a schedule file exists for each. By default, the program will look for sched.py for the schedule file and params.py for the parameter file, but this behavior can be changed using the `-p` and `-c` options. | 166 | You can also run multiple experiments with a single command, provided a directory with a schedule file exists for each. You can include non-relevant parameters which `run_exps.py` does not understand in `params.py`. These parameters will be saved with the data output by `run_exps.py`. This is useful for tracking variations in system parameters versus experimental results. In the following example, multiple experiments are demonstrated and an extra parameter `test-param` is included: |
167 | |||
168 | You can include non-relevant parameters which `run_exps.py` does not understand in `params.py`. These parameters will be saved with the data output by `run_exps.py`. This is useful for tracking variations in system parameters versus experimental results. In the following example, multiple experiments are demonstrated and an extra parameter `test-param` is included: | ||
169 | 167 | ||
170 | ```bash | 168 | ```bash |
171 | $ mkdir test1 | 169 | $ mkdir test1 |
@@ -182,7 +182,7 @@ def ft_freq(): | |||
182 | 182 | ||
183 | 183 | ||
184 | def kernel(): | 184 | def kernel(): |
185 | return subprocess.check_output(["uname", "-r"]) | 185 | return subprocess.check_output(["uname", "-r"]).strip("\n") |
186 | 186 | ||
187 | def is_executable(fname): | 187 | def is_executable(fname): |
188 | '''Return whether the file passed in is executable''' | 188 | '''Return whether the file passed in is executable''' |
@@ -212,4 +212,7 @@ def log_once(id, msg = None): | |||
212 | __logged += [id] | 212 | __logged += [id] |
213 | if indent: | 213 | if indent: |
214 | msg = ' ' + msg.strip('\t').replace('\n', '\n\t') | 214 | msg = ' ' + msg.strip('\t').replace('\n', '\n\t') |
215 | sys.stderr.write('\n' + msg + '\n') | 215 | sys.stderr.write('\n' + msg.strip('\n') + '\n') |
216 | |||
217 | def get_cmd(): | ||
218 | return os.path.split(sys.argv[0])[1] | ||
diff --git a/config/config.py b/config/config.py index 28e78c9..27cb2dd 100644 --- a/config/config.py +++ b/config/config.py | |||
@@ -14,12 +14,14 @@ BINS = {'rtspin' : get_executable_hint('rtspin', 'liblitmus'), | |||
14 | # Optional, as sched_trace is not a publically supported repository | 14 | # Optional, as sched_trace is not a publically supported repository |
15 | 'st_show' : get_executable_hint('st_show', 'sched_trace', True)} | 15 | 'st_show' : get_executable_hint('st_show', 'sched_trace', True)} |
16 | 16 | ||
17 | '''Names of output files.''' | 17 | '''Names of data files.''' |
18 | FILES = {'ft_data' : 'ft.bin', | 18 | FILES = {'params_file' : 'params.py', |
19 | 'ft_matches' : r'(ft.*\.bin$)|(.*\.ft)', | 19 | 'sched_file' : 'sched.py', |
20 | 'linux_data' : 'trace.dat', | 20 | 'ft_data' : 'ft.bin', |
21 | 'sched_data' : 'st-{}.bin', | 21 | 'ft_matches' : r'(ft.*\.bin$)|(.*\.ft)', |
22 | 'log_data' : 'trace.slog'} | 22 | 'linux_data' : 'trace.dat', |
23 | 'sched_data' : 'st-{}.bin', | ||
24 | 'log_data' : 'trace.slog'} | ||
23 | 25 | ||
24 | '''Default parameter names in params.py.''' | 26 | '''Default parameter names in params.py.''' |
25 | PARAMS = {'sched' : 'scheduler', # Scheduler used by run_exps | 27 | PARAMS = {'sched' : 'scheduler', # Scheduler used by run_exps |
@@ -35,9 +37,7 @@ PARAMS = {'sched' : 'scheduler', # Scheduler used by run_exps | |||
35 | } | 37 | } |
36 | 38 | ||
37 | '''Default values for program options.''' | 39 | '''Default values for program options.''' |
38 | DEFAULTS = {'params_file' : 'params.py', | 40 | DEFAULTS = {'duration' : 10, |
39 | 'sched_file' : 'sched.py', | ||
40 | 'duration' : 10, | ||
41 | 'prog' : 'rtspin', | 41 | 'prog' : 'rtspin', |
42 | 'out-gen' : 'exps', | 42 | 'out-gen' : 'exps', |
43 | 'out-run' : 'run-data', | 43 | 'out-run' : 'run-data', |
diff --git a/gen/edf_generators.py b/gen/edf_generators.py index a722c21..8e4b8df 100644 --- a/gen/edf_generators.py +++ b/gen/edf_generators.py | |||
@@ -28,6 +28,7 @@ class EdfGenerator(gen.Generator): | |||
28 | pdist = self._create_dist('period', | 28 | pdist = self._create_dist('period', |
29 | exp_params['periods'], | 29 | exp_params['periods'], |
30 | gen.NAMED_PERIODS) | 30 | gen.NAMED_PERIODS) |
31 | |||
31 | udist = self._create_dist('utilization', | 32 | udist = self._create_dist('utilization', |
32 | exp_params['utils'], | 33 | exp_params['utils'], |
33 | gen.NAMED_UTILIZATIONS) | 34 | gen.NAMED_UTILIZATIONS) |
diff --git a/gen/generator.py b/gen/generator.py index bc86cfe..40a0243 100644 --- a/gen/generator.py +++ b/gen/generator.py | |||
@@ -6,7 +6,7 @@ import shutil as sh | |||
6 | 6 | ||
7 | from Cheetah.Template import Template | 7 | from Cheetah.Template import Template |
8 | from common import get_config_option,num_cpus,recordtype,log_once | 8 | from common import get_config_option,num_cpus,recordtype,log_once |
9 | from config.config import DEFAULTS,PARAMS | 9 | from config.config import FILES,PARAMS |
10 | from gen.dp import DesignPointGenerator | 10 | from gen.dp import DesignPointGenerator |
11 | from parse.col_map import ColMapBuilder | 11 | from parse.col_map import ColMapBuilder |
12 | 12 | ||
@@ -129,7 +129,7 @@ class Generator(object): | |||
129 | 129 | ||
130 | def _write_schedule(self, params): | 130 | def _write_schedule(self, params): |
131 | '''Write schedule file using current template for @params.''' | 131 | '''Write schedule file using current template for @params.''' |
132 | sched_file = self.out_dir + "/" + DEFAULTS['sched_file'] | 132 | sched_file = self.out_dir + "/" + FILES['sched_file'] |
133 | with open(sched_file, 'wa') as f: | 133 | with open(sched_file, 'wa') as f: |
134 | f.write(str(Template(self.template, searchList=[params]))) | 134 | f.write(str(Template(self.template, searchList=[params]))) |
135 | 135 | ||
@@ -143,7 +143,7 @@ class Generator(object): | |||
143 | else: | 143 | else: |
144 | tasks = 0 | 144 | tasks = 0 |
145 | 145 | ||
146 | exp_params_file = self.out_dir + "/" + DEFAULTS['params_file'] | 146 | exp_params_file = self.out_dir + "/" + FILES['params_file'] |
147 | with open(exp_params_file, 'wa') as f: | 147 | with open(exp_params_file, 'wa') as f: |
148 | params['scheduler'] = self.scheduler | 148 | params['scheduler'] = self.scheduler |
149 | pprint.pprint(params, f) | 149 | pprint.pprint(params, f) |
diff --git a/gen_exps.py b/gen_exps.py index 65f50d8..e888f5f 100755 --- a/gen_exps.py +++ b/gen_exps.py | |||
@@ -43,6 +43,14 @@ def load_file(fname): | |||
43 | except: | 43 | except: |
44 | raise IOError("Invalid generation file: %s" % fname) | 44 | raise IOError("Invalid generation file: %s" % fname) |
45 | 45 | ||
46 | def print_descriptions(described): | ||
47 | for generator in described.split(','): | ||
48 | if generator not in gen.get_generators(): | ||
49 | sys.stderr.write("No generator '%s'\n" % generator) | ||
50 | else: | ||
51 | print("Generator '%s', " % generator) | ||
52 | gen.get_generators()[generator]().print_help() | ||
53 | |||
46 | def main(): | 54 | def main(): |
47 | opts, args = parse_args() | 55 | opts, args = parse_args() |
48 | 56 | ||
@@ -50,12 +58,7 @@ def main(): | |||
50 | if opts.list_gens: | 58 | if opts.list_gens: |
51 | print(", ".join(gen.get_generators())) | 59 | print(", ".join(gen.get_generators())) |
52 | if opts.described != None: | 60 | if opts.described != None: |
53 | for generator in opts.described.split(','): | 61 | print_descriptions(opts.described) |
54 | if generator not in gen.get_generators(): | ||
55 | sys.stderr.write("No generator '%s'\n" % generator) | ||
56 | else: | ||
57 | print("Generator '%s', " % generator) | ||
58 | gen.get_generators()[generator]().print_help() | ||
59 | if opts.list_gens or opts.described: | 62 | if opts.list_gens or opts.described: |
60 | return 0 | 63 | return 0 |
61 | 64 | ||
diff --git a/parse/col_map.py b/parse/col_map.py index ceb8867..59484e8 100644 --- a/parse/col_map.py +++ b/parse/col_map.py | |||
@@ -22,7 +22,7 @@ class ColMapBuilder(object): | |||
22 | 22 | ||
23 | class ColMap(object): | 23 | class ColMap(object): |
24 | def __init__(self, col_list, values = None): | 24 | def __init__(self, col_list, values = None): |
25 | self.col_list = col_list | 25 | self.col_list = sorted(col_list) |
26 | self.rev_map = {} | 26 | self.rev_map = {} |
27 | self.values = values | 27 | self.values = values |
28 | 28 | ||
@@ -50,7 +50,7 @@ class ColMap(object): | |||
50 | if col not in kv: | 50 | if col not in kv: |
51 | key += (None,) | 51 | key += (None,) |
52 | else: | 52 | else: |
53 | key += (kv[col],) | 53 | key += (str(kv[col]),) |
54 | 54 | ||
55 | return key | 55 | return key |
56 | 56 | ||
diff --git a/parse/sched.py b/parse/sched.py index 6e1fbe6..524f1ed 100644 --- a/parse/sched.py +++ b/parse/sched.py | |||
@@ -98,15 +98,38 @@ record_map = {} | |||
98 | RECORD_SIZE = 24 | 98 | RECORD_SIZE = 24 |
99 | NSEC_PER_MSEC = 1000000 | 99 | NSEC_PER_MSEC = 1000000 |
100 | 100 | ||
101 | def bits_to_bytes(bits): | ||
102 | '''Includes padding''' | ||
103 | return bits / 8 + (1 if bits%8 else 0) | ||
104 | |||
105 | def field_bytes(fields): | ||
106 | fbytes = 0 | ||
107 | fbits = 0 | ||
108 | for f in fields: | ||
109 | flist = list(f) | ||
110 | |||
111 | if len(flist) > 2: | ||
112 | # Specified a bitfield | ||
113 | fbits += flist[2] | ||
114 | else: | ||
115 | # Only specified a type, use types size | ||
116 | fbytes += sizeof(list(f)[1]) | ||
117 | |||
118 | # Bitfields followed by a byte will cause any incomplete | ||
119 | # bytes to be turned into full bytes | ||
120 | fbytes += bits_to_bytes(fbits) | ||
121 | fbits = 0 | ||
122 | |||
123 | fbytes += bits_to_bytes(fbits) | ||
124 | return fbytes + fbits | ||
125 | |||
101 | def register_record(id, clazz): | 126 | def register_record(id, clazz): |
102 | fields = clazz.FIELDS | 127 | fields = clazz.FIELDS |
103 | 128 | diff = RECORD_SIZE - field_bytes(SchedRecord.FIELDS) - field_bytes(fields) | |
104 | fsize = lambda fields : sum([sizeof(list(f)[1]) for f in fields]) | ||
105 | diff = RECORD_SIZE - fsize(SchedRecord.FIELDS) - fsize(fields) | ||
106 | 129 | ||
107 | # Create extra padding fields to make record the proper size | 130 | # Create extra padding fields to make record the proper size |
108 | # Creating one big field of c_uint64 and giving it a size of 8*diff | 131 | # Creating one big field of c_uint64 and giving it a size of 8*diff |
109 | # _shoud_ work, but doesn't. This is an uglier way of accomplishing | 132 | # _should_ work, but doesn't. This is an uglier way of accomplishing |
110 | # the same goal | 133 | # the same goal |
111 | for d in range(diff): | 134 | for d in range(diff): |
112 | fields += [("extra%d" % d, c_char)] | 135 | fields += [("extra%d" % d, c_char)] |
diff --git a/parse_exps.py b/parse_exps.py index 98f95df..37667aa 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
@@ -14,7 +14,7 @@ import sys | |||
14 | import traceback | 14 | import traceback |
15 | 15 | ||
16 | from collections import namedtuple | 16 | from collections import namedtuple |
17 | from config.config import DEFAULTS,PARAMS | 17 | from config.config import FILES,DEFAULTS,PARAMS |
18 | from optparse import OptionParser | 18 | from optparse import OptionParser |
19 | from parse.point import ExpPoint | 19 | from parse.point import ExpPoint |
20 | from parse.tuple_table import TupleTable | 20 | from parse.tuple_table import TupleTable |
@@ -94,7 +94,7 @@ def parse_exp(exp_force_base): | |||
94 | 94 | ||
95 | 95 | ||
96 | def get_exp_params(data_dir, cm_builder): | 96 | def get_exp_params(data_dir, cm_builder): |
97 | param_file = "%s/%s" % (data_dir, DEFAULTS['params_file']) | 97 | param_file = "%s/%s" % (data_dir, FILES['params_file']) |
98 | if os.path.isfile(param_file): | 98 | if os.path.isfile(param_file): |
99 | params = com.load_params(param_file) | 99 | params = com.load_params(param_file) |
100 | 100 | ||
diff --git a/run/crontab.py b/run/crontab.py new file mode 100644 index 0000000..87d71b1 --- /dev/null +++ b/run/crontab.py | |||
@@ -0,0 +1,151 @@ | |||
1 | from __future__ import print_function | ||
2 | |||
3 | import common | ||
4 | import os | ||
5 | import re | ||
6 | import sys | ||
7 | |||
8 | from subprocess import Popen, PIPE, check_output | ||
9 | |||
10 | PANIC_DUR = 10 | ||
11 | DELAY = 30 | ||
12 | DELAY_INTERVAL = 10 | ||
13 | |||
14 | def get_cron_data(): | ||
15 | try: | ||
16 | return check_output(['crontab', '-l']) | ||
17 | except: | ||
18 | return "" | ||
19 | |||
20 | def wall(message): | ||
21 | '''A wall command with no header''' | ||
22 | return "echo '%s' | wall -n" % message | ||
23 | |||
24 | def sanitize(args, ignored): | ||
25 | ret_args = [] | ||
26 | for a in args: | ||
27 | if a in ignored: | ||
28 | continue | ||
29 | if '-' == a[0] and '--' != a[0:2]: | ||
30 | for i in ignored: | ||
31 | a = a.replace(i, '') | ||
32 | ret_args += [a] | ||
33 | return ret_args | ||
34 | |||
35 | def get_outfname(): | ||
36 | return "cron-%s.txt" % common.get_cmd() | ||
37 | |||
38 | def get_boot_cron(ignored_params, extra=""): | ||
39 | '''Turn current python script into a crontab reboot entry''' | ||
40 | job_args = sanitize(sys.argv, ignored_params) | ||
41 | job = " ".join(job_args) | ||
42 | out_fname = get_outfname() | ||
43 | |||
44 | short_job = " ".join([common.get_cmd()] + job_args[1:]) | ||
45 | msg = "Job '%s' will write output to '%s'" % (short_job, out_fname) | ||
46 | |||
47 | sys.stderr.write("%s %d seconds after reboot.\n" % (msg, DELAY)) | ||
48 | |||
49 | # Create sleep and wall commands which will countdown DELAY seconds | ||
50 | # before executing the job | ||
51 | cmds = ["sleep %d" % DELAY_INTERVAL] | ||
52 | delay_rem = DELAY - DELAY_INTERVAL | ||
53 | while delay_rem > 0: | ||
54 | wmsg = "Restarting experiments in %d seconds. %s" % (delay_rem, extra) | ||
55 | cmds += [wall(wmsg)] | ||
56 | cmds += ["sleep %d" % min(DELAY_INTERVAL, delay_rem)] | ||
57 | delay_rem -= DELAY_INTERVAL | ||
58 | delay_cmd = ";".join(cmds) | ||
59 | |||
60 | # Create command which will only execute if the same kernel is running | ||
61 | kern = common.kernel() | ||
62 | fail_wall = wall("Need matching kernel '%s' to run!" % kern) | ||
63 | run_cmd = "echo '%s' | grep -q `uname -r` && %s && %s && %s >> %s 2>>%s || %s" %\ | ||
64 | (kern, wall(msg), wall("Starting..."), job, out_fname, out_fname, fail_wall) | ||
65 | |||
66 | return "@reboot cd %s; %s; %s;" % (os.getcwd(), delay_cmd, run_cmd) | ||
67 | |||
68 | def set_panic_restart(bool_val): | ||
69 | '''Enable / disable restart on panics''' | ||
70 | if bool_val: | ||
71 | sys.stderr.write("Kernel will reboot after panic.\n") | ||
72 | dur = PANIC_DUR | ||
73 | else: | ||
74 | sys.stderr.write("Kernel will no longer reboot after panic.\n") | ||
75 | dur = 0 | ||
76 | |||
77 | check_output(['sysctl', '-w', "kernel.panic=%d" % dur, | ||
78 | "kernel.panic_on_oops=%d" % dur]) | ||
79 | |||
80 | def write_cron_data(data): | ||
81 | '''Write new crontab entry. No blank lines are written''' | ||
82 | |||
83 | # I don't know why "^\s*$" doesn't match, hence this ugly regex | ||
84 | data = re.sub(r"\n\s*\n", "\n", data, re.M) | ||
85 | |||
86 | sp = Popen(["crontab", "-"], stdin=PIPE) | ||
87 | stdout, stderr = sp.communicate(input=data) | ||
88 | |||
89 | def install_path(): | ||
90 | '''Place the current path in the crontab entry''' | ||
91 | data = get_cron_data() | ||
92 | curr_line = re.findall(r"PATH=.*", data) | ||
93 | |||
94 | if curr_line: | ||
95 | curr_paths = re.findall(r"((?:\/\w+)+)", curr_line[0]) | ||
96 | data = re.sub(curr_line[0], "", data) | ||
97 | else: | ||
98 | curr_paths = [] | ||
99 | curr_paths = set(curr_paths) | ||
100 | |||
101 | for path in os.environ["PATH"].split(os.pathsep): | ||
102 | curr_paths.add(path) | ||
103 | |||
104 | data = "PATH=" + os.pathsep.join(curr_paths) + "\n" + data | ||
105 | |||
106 | write_cron_data(data) | ||
107 | |||
108 | def install_boot_job(ignored_params, reboot_message): | ||
109 | '''Re-run the current python script on system reboot using crontab''' | ||
110 | remove_boot_job() | ||
111 | |||
112 | data = get_cron_data() | ||
113 | job = get_boot_cron(ignored_params, reboot_message) | ||
114 | |||
115 | set_panic_restart(True) | ||
116 | |||
117 | write_cron_data(data + job + "\n") | ||
118 | |||
119 | if job not in get_cron_data(): | ||
120 | raise IOError("Failed to write %s into cron!" % job) | ||
121 | else: | ||
122 | install_path() | ||
123 | |||
124 | def clean_output(): | ||
125 | fname = get_outfname() | ||
126 | if os.path.exists(fname): | ||
127 | os.remove(fname) | ||
128 | |||
129 | def kill_boot_job(): | ||
130 | remove_boot_job() | ||
131 | |||
132 | cmd = common.get_cmd() | ||
133 | |||
134 | procs = check_output("ps -eo pid,args".split(" ")) | ||
135 | pairs = re.findall("(\d+) (.*)", procs) | ||
136 | |||
137 | for pid, args in pairs: | ||
138 | if re.search(r"/bin/sh -c.*%s"%cmd, args): | ||
139 | sys.stderr.write("Killing job %s\n" % pid) | ||
140 | check_output(("kill -9 %s" % pid).split(" ")) | ||
141 | |||
142 | def remove_boot_job(): | ||
143 | '''Remove installed reboot job from crontab''' | ||
144 | data = get_cron_data() | ||
145 | regex = re.compile(r".*%s.*" % re.escape(common.get_cmd()), re.M) | ||
146 | |||
147 | if regex.search(data): | ||
148 | new_cron = regex.sub("", data) | ||
149 | write_cron_data(new_cron) | ||
150 | |||
151 | set_panic_restart(False) | ||
diff --git a/run/experiment.py b/run/experiment.py index 4667cb1..5f18bea 100644 --- a/run/experiment.py +++ b/run/experiment.py | |||
@@ -43,6 +43,9 @@ class Experiment(object): | |||
43 | self.exec_err = None | 43 | self.exec_err = None |
44 | self.tracer_types = tracer_types | 44 | self.tracer_types = tracer_types |
45 | 45 | ||
46 | self.regular_tracers = [] | ||
47 | self.exact_tracers = [] | ||
48 | |||
46 | def __setup_tracers(self): | 49 | def __setup_tracers(self): |
47 | tracers = [ t(self.working_dir) for t in self.tracer_types ] | 50 | tracers = [ t(self.working_dir) for t in self.tracer_types ] |
48 | 51 | ||
@@ -63,8 +66,13 @@ class Experiment(object): | |||
63 | Experiment.INTERRUPTED_DIR) | 66 | Experiment.INTERRUPTED_DIR) |
64 | interrupted = "%s/%s" % (os.path.split(self.working_dir)[0], | 67 | interrupted = "%s/%s" % (os.path.split(self.working_dir)[0], |
65 | Experiment.INTERRUPTED_DIR) | 68 | Experiment.INTERRUPTED_DIR) |
69 | old_int = "%s/%s" % (self.working_dir, Experiment.INTERRUPTED_DIR) | ||
70 | |||
66 | if os.path.exists(interrupted): | 71 | if os.path.exists(interrupted): |
67 | sh.rmtree(interrupted) | 72 | sh.rmtree(interrupted) |
73 | if os.path.exists(old_int): | ||
74 | sh.rmtree(old_int) | ||
75 | |||
68 | os.rename(self.working_dir, interrupted) | 76 | os.rename(self.working_dir, interrupted) |
69 | 77 | ||
70 | os.mkdir(self.working_dir) | 78 | os.mkdir(self.working_dir) |
@@ -78,21 +86,24 @@ class Experiment(object): | |||
78 | executable.cwd = self.working_dir | 86 | executable.cwd = self.working_dir |
79 | map(assign_cwd, self.executables) | 87 | map(assign_cwd, self.executables) |
80 | 88 | ||
81 | def __kill_all(self): | 89 | def __try_kill_all(self): |
82 | if lu.waiting_tasks(): | 90 | try: |
83 | released = lu.release_tasks() | 91 | if lu.waiting_tasks(): |
84 | self.log("Re-released %d tasks" % released) | 92 | released = lu.release_tasks() |
93 | self.log("Re-released %d tasks" % released) | ||
85 | 94 | ||
86 | time.sleep(1) | 95 | time.sleep(1) |
87 | 96 | ||
88 | self.log("Killing all tasks") | 97 | self.log("Killing all tasks") |
89 | for e in self.executables: | 98 | for e in self.executables: |
90 | try: | 99 | try: |
91 | e.kill() | 100 | e.kill() |
92 | except: | 101 | except: |
93 | pass | 102 | pass |
94 | 103 | ||
95 | time.sleep(1) | 104 | time.sleep(1) |
105 | except: | ||
106 | self.log("Failed to kill all tasks.") | ||
96 | 107 | ||
97 | def __strip_path(self, path): | 108 | def __strip_path(self, path): |
98 | '''Shorten path to something more readable.''' | 109 | '''Shorten path to something more readable.''' |
@@ -194,6 +205,7 @@ class Experiment(object): | |||
194 | 205 | ||
195 | sched = lu.scheduler() | 206 | sched = lu.scheduler() |
196 | if sched != "Linux": | 207 | if sched != "Linux": |
208 | self.log("Switching back to Linux scheduler") | ||
197 | try: | 209 | try: |
198 | lu.switch_scheduler("Linux") | 210 | lu.switch_scheduler("Linux") |
199 | except: | 211 | except: |
@@ -303,6 +315,7 @@ class Experiment(object): | |||
303 | self.__to_linux() | 315 | self.__to_linux() |
304 | 316 | ||
305 | succ = False | 317 | succ = False |
318 | exception = None | ||
306 | try: | 319 | try: |
307 | self.__setup() | 320 | self.__setup() |
308 | 321 | ||
@@ -311,20 +324,21 @@ class Experiment(object): | |||
311 | self.log("Saving results in %s" % self.finished_dir) | 324 | self.log("Saving results in %s" % self.finished_dir) |
312 | succ = True | 325 | succ = True |
313 | except Exception as e: | 326 | except Exception as e: |
327 | exception = e | ||
328 | |||
314 | # Give time for whatever failed to finish failing | 329 | # Give time for whatever failed to finish failing |
315 | time.sleep(2) | 330 | time.sleep(2) |
316 | self.__kill_all() | ||
317 | 331 | ||
318 | raise e | 332 | self.__try_kill_all() |
319 | finally: | ||
320 | self.__teardown() | ||
321 | finally: | 333 | finally: |
322 | self.log("Switching back to Linux scheduler") | ||
323 | try: | 334 | try: |
335 | self.__teardown() | ||
324 | self.__to_linux() | 336 | self.__to_linux() |
325 | except Exception as e: | 337 | except Exception as e: |
326 | print(e) | 338 | exception = exception or e |
327 | 339 | finally: | |
340 | if exception: raise exception | ||
341 | |||
328 | if succ: | 342 | if succ: |
329 | self.__save_results() | 343 | self.__save_results() |
330 | self.log("Experiment done!") | 344 | self.log("Experiment done!") |
diff --git a/run_exps.py b/run_exps.py index afabca8..21666a9 100755 --- a/run_exps.py +++ b/run_exps.py | |||
@@ -3,14 +3,18 @@ from __future__ import print_function | |||
3 | 3 | ||
4 | import common as com | 4 | import common as com |
5 | import os | 5 | import os |
6 | import pickle | ||
7 | import pprint | ||
6 | import re | 8 | import re |
7 | import shutil | 9 | import shutil |
8 | import sys | 10 | import sys |
11 | import run.crontab as cron | ||
9 | import run.tracer as trace | 12 | import run.tracer as trace |
10 | 13 | ||
11 | from config.config import PARAMS,DEFAULTS | 14 | from config.config import PARAMS,DEFAULTS,FILES |
12 | from collections import namedtuple | 15 | from collections import namedtuple |
13 | from optparse import OptionParser | 16 | from optparse import OptionParser,OptionGroup |
17 | from parse.enum import Enum | ||
14 | from run.executable.executable import Executable | 18 | from run.executable.executable import Executable |
15 | from run.experiment import Experiment,ExperimentDone,SystemCorrupted | 19 | from run.experiment import Experiment,ExperimentDone,SystemCorrupted |
16 | from run.proc_entry import ProcEntry | 20 | from run.proc_entry import ProcEntry |
@@ -19,9 +23,19 @@ from run.proc_entry import ProcEntry | |||
19 | ExpParams = namedtuple('ExpParams', ['scheduler', 'duration', 'tracers', | 23 | ExpParams = namedtuple('ExpParams', ['scheduler', 'duration', 'tracers', |
20 | 'kernel', 'config_options', 'file_params', | 24 | 'kernel', 'config_options', 'file_params', |
21 | 'pre_script', 'post_script']) | 25 | 'pre_script', 'post_script']) |
26 | '''Tracked with each experiment''' | ||
27 | ExpState = Enum(['Failed', 'Succeeded', 'Invalid', 'Done', 'None']) | ||
28 | ExpData = com.recordtype('ExpData', ['name', 'params', 'sched_file', 'out_dir', | ||
29 | 'retries', 'state']) | ||
22 | '''Comparison of requested versus actual kernel compile parameter value''' | 30 | '''Comparison of requested versus actual kernel compile parameter value''' |
23 | ConfigResult = namedtuple('ConfigResult', ['param', 'wanted', 'actual']) | 31 | ConfigResult = namedtuple('ConfigResult', ['param', 'wanted', 'actual']) |
24 | 32 | ||
33 | '''Maximum times an experiment will be retried''' | ||
34 | MAX_RETRY = 5 | ||
35 | '''Location experiment retry count is stored''' | ||
36 | TRIES_FNAME = ".tries.pkl" | ||
37 | |||
38 | |||
25 | class InvalidKernel(Exception): | 39 | class InvalidKernel(Exception): |
26 | def __init__(self, kernel): | 40 | def __init__(self, kernel): |
27 | self.kernel = kernel | 41 | self.kernel = kernel |
@@ -51,27 +65,37 @@ def parse_args(): | |||
51 | 65 | ||
52 | parser.add_option('-s', '--scheduler', dest='scheduler', | 66 | parser.add_option('-s', '--scheduler', dest='scheduler', |
53 | help='scheduler for all experiments') | 67 | help='scheduler for all experiments') |
68 | parser.add_option('-d', '--duration', dest='duration', type='int', | ||
69 | help='duration (seconds) of tasks') | ||
54 | parser.add_option('-i', '--ignore-environment', dest='ignore', | 70 | parser.add_option('-i', '--ignore-environment', dest='ignore', |
55 | action='store_true', default=False, | 71 | action='store_true', default=False, |
56 | help='run experiments even in invalid environments ') | 72 | help='run experiments even in invalid environments ') |
57 | parser.add_option('-d', '--duration', dest='duration', type='int', | 73 | parser.add_option('-f', '--force', action='store_true', default=False, |
58 | help='duration (seconds) of tasks') | 74 | dest='force', help='overwrite existing data') |
59 | parser.add_option('-o', '--out-dir', dest='out_dir', | 75 | parser.add_option('-o', '--out-dir', dest='out_dir', |
60 | help='directory for data output', | 76 | help='directory for data output', |
61 | default=DEFAULTS['out-run']) | 77 | default=DEFAULTS['out-run']) |
62 | parser.add_option('-p', '--params', dest='param_file', | 78 | |
63 | help='file with experiment parameters') | 79 | group = OptionGroup(parser, "Communication Options") |
64 | parser.add_option('-c', '--schedule-file', dest='sched_file', | 80 | group.add_option('-j', '--jabber', metavar='username@domain', |
65 | help='name of schedule files within directories', | 81 | dest='jabber', default=None, |
66 | default=DEFAULTS['sched_file']) | 82 | help='send a jabber message when an experiment completes') |
67 | parser.add_option('-f', '--force', action='store_true', default=False, | 83 | group.add_option('-e', '--email', metavar='username@server', |
68 | dest='force', help='overwrite existing data') | 84 | dest='email', default=None, |
69 | parser.add_option('-j', '--jabber', metavar='username@domain', | 85 | help='send an email when all experiments complete') |
70 | dest='jabber', default=None, | 86 | parser.add_option_group(group) |
71 | help='send a jabber message when an experiment completes') | 87 | |
72 | parser.add_option('-e', '--email', metavar='username@server', | 88 | group = OptionGroup(parser, "Persistence Options") |
73 | dest='email', default=None, | 89 | group.add_option('-r', '--retry', dest='retry', action='store_true', |
74 | help='send an email when all experiments complete') | 90 | default=False, help='retry failed experiments') |
91 | group.add_option('-c', '--crontab', dest='crontab', | ||
92 | action='store_true', default=False, | ||
93 | help='use crontab to resume interrupted script after ' | ||
94 | 'system restarts. implies --retry') | ||
95 | group.add_option('-k', '--kill-crontab', dest='kill', | ||
96 | action='store_true', default=False, | ||
97 | help='kill existing script crontabs and exit') | ||
98 | parser.add_option_group(group) | ||
75 | 99 | ||
76 | return parser.parse_args() | 100 | return parser.parse_args() |
77 | 101 | ||
@@ -207,12 +231,12 @@ def run_script(script_params, exp, exp_dir, out_dir): | |||
207 | out.close() | 231 | out.close() |
208 | 232 | ||
209 | 233 | ||
210 | def make_exp_params(cmd_scheduler, cmd_duration, sched_dir, param_file): | 234 | def make_exp_params(cmd_scheduler, cmd_duration, sched_dir): |
211 | '''Return ExpParam with configured values of all hardcoded params.''' | 235 | '''Return ExpParam with configured values of all hardcoded params.''' |
212 | kernel = copts = "" | 236 | kernel = copts = "" |
213 | 237 | ||
214 | # Load parameter file | 238 | # Load parameter file |
215 | param_file = param_file or "%s/%s" % (sched_dir, DEFAULTS['params_file']) | 239 | param_file = "%s/%s" % (sched_dir, FILES['params_file']) |
216 | if os.path.isfile(param_file): | 240 | if os.path.isfile(param_file): |
217 | fparams = com.load_params(param_file) | 241 | fparams = com.load_params(param_file) |
218 | else: | 242 | else: |
@@ -252,65 +276,118 @@ def make_exp_params(cmd_scheduler, cmd_duration, sched_dir, param_file): | |||
252 | config_options=copts, tracers=tracers, file_params=fparams, | 276 | config_options=copts, tracers=tracers, file_params=fparams, |
253 | pre_script=pre_script, post_script=post_script) | 277 | pre_script=pre_script, post_script=post_script) |
254 | 278 | ||
255 | def run_experiment(name, sched_file, exp_params, out_dir, | 279 | def run_experiment(data, start_message, ignore, jabber): |
256 | start_message, ignore, jabber): | ||
257 | '''Load and parse data from files and run result.''' | 280 | '''Load and parse data from files and run result.''' |
258 | if not os.path.isfile(sched_file): | 281 | if not os.path.isfile(data.sched_file): |
259 | raise IOError("Cannot find schedule file: %s" % sched_file) | 282 | raise IOError("Cannot find schedule file: %s" % data.sched_file) |
260 | 283 | ||
261 | dir_name, fname = os.path.split(sched_file) | 284 | dir_name, fname = os.path.split(data.sched_file) |
262 | work_dir = "%s/tmp" % dir_name | 285 | work_dir = "%s/tmp" % dir_name |
263 | 286 | ||
264 | procs, execs = load_schedule(name, sched_file, exp_params.duration) | 287 | procs, execs = load_schedule(data.name, data.sched_file, data.params.duration) |
265 | 288 | ||
266 | exp = Experiment(name, exp_params.scheduler, work_dir, out_dir, | 289 | exp = Experiment(data.name, data.params.scheduler, work_dir, |
267 | procs, execs, exp_params.tracers) | 290 | data.out_dir, procs, execs, data.params.tracers) |
268 | 291 | ||
269 | exp.log(start_message) | 292 | exp.log(start_message) |
270 | 293 | ||
271 | if not ignore: | 294 | if not ignore: |
272 | verify_environment(exp_params) | 295 | verify_environment(data.params) |
273 | 296 | ||
274 | run_script(exp_params.pre_script, exp, dir_name, work_dir) | 297 | run_script(data.params.pre_script, exp, dir_name, work_dir) |
275 | 298 | ||
276 | exp.run_exp() | 299 | exp.run_exp() |
277 | 300 | ||
278 | run_script(exp_params.post_script, exp, dir_name, out_dir) | 301 | run_script(data.params.post_script, exp, dir_name, data.out_dir) |
279 | 302 | ||
280 | if jabber: | 303 | if jabber: |
281 | jabber.send("Completed '%s'" % name) | 304 | jabber.send("Completed '%s'" % data.name) |
282 | 305 | ||
283 | # Save parameters used to run experiment in out_dir | 306 | # Save parameters used to run dataeriment in out_dir |
284 | out_params = dict(exp_params.file_params.items() + | 307 | out_params = dict([(PARAMS['sched'], data.params.scheduler), |
285 | [(PARAMS['sched'], exp_params.scheduler), | ||
286 | (PARAMS['tasks'], len(execs)), | 308 | (PARAMS['tasks'], len(execs)), |
287 | (PARAMS['dur'], exp_params.duration)]) | 309 | (PARAMS['dur'], data.params.duration)] + |
310 | data.params.file_params.items()) | ||
288 | 311 | ||
289 | # Feather-trace clock frequency saved for accurate overhead parsing | 312 | # Feather-trace clock frequency saved for accurate overhead parsing |
290 | ft_freq = com.ft_freq() | 313 | ft_freq = com.ft_freq() |
291 | if ft_freq: | 314 | if ft_freq: |
292 | out_params[PARAMS['cycles']] = ft_freq | 315 | out_params[PARAMS['cycles']] = ft_freq |
293 | 316 | ||
294 | with open("%s/%s" % (out_dir, DEFAULTS['params_file']), 'w') as f: | 317 | out_param_f = "%s/%s" % (data.out_dir, FILES['params_file']) |
295 | f.write(str(out_params)) | 318 | with open(out_param_f, 'w') as f: |
319 | pprint.pprint(out_params, f) | ||
320 | |||
296 | 321 | ||
322 | def make_paths(exp, opts, out_base_dir): | ||
323 | '''Translate experiment name to (schedule file, output directory) paths''' | ||
324 | path = os.path.abspath(exp) | ||
325 | out_dir = "%s/%s" % (out_base_dir, os.path.split(exp.strip('/'))[1]) | ||
326 | |||
327 | if not os.path.exists(path): | ||
328 | raise IOError("Invalid experiment: %s" % path) | ||
297 | 329 | ||
298 | def get_exps(opts, args): | 330 | if opts.force and os.path.exists(out_dir): |
299 | '''Return list of experiment files or directories''' | 331 | shutil.rmtree(out_dir) |
300 | if args: | ||
301 | return args | ||
302 | 332 | ||
303 | # Default to sched_file > generated dirs | 333 | if os.path.isdir(path): |
304 | if os.path.exists(opts.sched_file): | 334 | sched_file = "%s/%s" % (path, FILES['sched_file']) |
305 | sys.stderr.write("Reading schedule from %s.\n" % opts.sched_file) | ||
306 | return [opts.sched_file] | ||
307 | elif os.path.exists(DEFAULTS['out-gen']): | ||
308 | sys.stderr.write("Reading schedules from %s/*.\n" % DEFAULTS['out-gen']) | ||
309 | sched_dirs = os.listdir(DEFAULTS['out-gen']) | ||
310 | return ['%s/%s' % (DEFAULTS['out-gen'], d) for d in sched_dirs] | ||
311 | else: | 335 | else: |
312 | sys.stderr.write("Run with -h to view options.\n"); | 336 | sched_file = path |
313 | sys.exit(1) | 337 | |
338 | return sched_file, out_dir | ||
339 | |||
340 | |||
341 | def get_common_header(args): | ||
342 | common = "" | ||
343 | done = False | ||
344 | |||
345 | if len(args) == 1: | ||
346 | return common | ||
347 | |||
348 | while not done: | ||
349 | common += args[0][len(common)] | ||
350 | for path in args: | ||
351 | if path.find(common, 0, len(common)): | ||
352 | done = True | ||
353 | break | ||
354 | |||
355 | return common[:len(common)-1] | ||
356 | |||
357 | |||
358 | def get_exps(opts, args, out_base_dir): | ||
359 | '''Return list of ExpDatas''' | ||
360 | |||
361 | if not args: | ||
362 | if os.path.exists(FILES['sched_file']): | ||
363 | # Default to sched_file in current directory | ||
364 | sys.stderr.write("Reading schedule from %s.\n" % FILES['sched_file']) | ||
365 | args = [FILES['sched_file']] | ||
366 | elif os.path.exists(DEFAULTS['out-gen']): | ||
367 | # Then try experiments created by gen_exps | ||
368 | sys.stderr.write("Reading schedules from %s/*.\n" % DEFAULTS['out-gen']) | ||
369 | sched_dirs = os.listdir(DEFAULTS['out-gen']) | ||
370 | args = ['%s/%s' % (DEFAULTS['out-gen'], d) for d in sched_dirs] | ||
371 | else: | ||
372 | sys.stderr.write("Run with -h to view options.\n"); | ||
373 | sys.exit(1) | ||
374 | |||
375 | # Part of arg paths which is identical for each arg | ||
376 | common = get_common_header(args) | ||
377 | |||
378 | exps = [] | ||
379 | for path in args: | ||
380 | sched_file, out_dir = make_paths(path, opts, out_base_dir) | ||
381 | name = path[len(common):] | ||
382 | |||
383 | sched_dir = os.path.split(sched_file)[0] | ||
384 | |||
385 | exp_params = make_exp_params(opts.scheduler, opts.duration, sched_dir) | ||
386 | |||
387 | exps += [ExpData(name, exp_params, sched_file, out_dir, | ||
388 | 0, ExpState.None)] | ||
389 | |||
390 | return exps | ||
314 | 391 | ||
315 | 392 | ||
316 | def setup_jabber(target): | 393 | def setup_jabber(target): |
@@ -338,93 +415,142 @@ def setup_email(target): | |||
338 | return None | 415 | return None |
339 | 416 | ||
340 | 417 | ||
341 | def make_paths(exp, out_base_dir, opts): | 418 | def tries_file(exp): |
342 | '''Translate experiment name to (schedule file, output directory) paths''' | 419 | return "%s/%s" % (os.path.split(exp.sched_file)[0], TRIES_FNAME) |
343 | path = "%s/%s" % (os.getcwd(), exp) | ||
344 | out_dir = "%s/%s" % (out_base_dir, os.path.split(exp.strip('/'))[1]) | ||
345 | 420 | ||
346 | if not os.path.exists(path): | ||
347 | raise IOError("Invalid experiment: %s" % path) | ||
348 | 421 | ||
349 | if opts.force and os.path.exists(out_dir): | 422 | def get_tries(exp): |
350 | shutil.rmtree(out_dir) | 423 | if not os.path.exists(tries_file(exp)): |
424 | return 0 | ||
425 | with open(tries_file(exp), 'r') as f: | ||
426 | return int(pickle.load(f)) | ||
351 | 427 | ||
352 | if os.path.isdir(path): | ||
353 | sched_file = "%s/%s" % (path, opts.sched_file) | ||
354 | else: | ||
355 | sched_file = path | ||
356 | 428 | ||
357 | return sched_file, out_dir | 429 | def set_tries(exp, val): |
430 | if not val: | ||
431 | if os.path.exists(tries_file(exp)): | ||
432 | os.remove(tries_file(exp)) | ||
433 | else: | ||
434 | with open(tries_file(exp), 'w') as f: | ||
435 | pickle.dump(str(val), f) | ||
436 | os.system('sync') | ||
358 | 437 | ||
359 | def main(): | ||
360 | opts, args = parse_args() | ||
361 | exps = get_exps(opts, args) | ||
362 | 438 | ||
439 | def run_exps(exps, opts): | ||
363 | jabber = setup_jabber(opts.jabber) if opts.jabber else None | 440 | jabber = setup_jabber(opts.jabber) if opts.jabber else None |
364 | email = setup_email(opts.email) if opts.email else None | ||
365 | 441 | ||
366 | out_base = os.path.abspath(opts.out_dir) | 442 | # Give each experiment a unique id |
367 | created = False | 443 | exps_remaining = enumerate(exps) |
368 | if not os.path.exists(out_base): | 444 | # But run experiments which have failed the most last |
369 | created = True | 445 | exps_remaining = sorted(exps_remaining, key=lambda x: get_tries(x[1])) |
370 | os.mkdir(out_base) | ||
371 | 446 | ||
372 | ran = done = succ = failed = invalid = 0 | 447 | while exps_remaining: |
448 | i, exp = exps_remaining.pop(0) | ||
373 | 449 | ||
374 | for i, exp in enumerate(exps): | 450 | verb = "Loading" if exp.state == ExpState.None else "Re-running failed" |
375 | sched_file, out_dir = make_paths(exp, out_base, opts) | 451 | start_message = "%s experiment %d of %d." % (verb, i+1, len(exps)) |
376 | sched_dir = os.path.split(sched_file)[0] | ||
377 | 452 | ||
378 | try: | 453 | try: |
379 | start_message = "Loading experiment %d of %d." % (i+1, len(exps)) | 454 | set_tries(exp, get_tries(exp) + 1) |
380 | exp_params = make_exp_params(opts.scheduler, opts.duration, | 455 | if get_tries(exp) > MAX_RETRY: |
381 | sched_dir, opts.param_file) | 456 | raise Exception("Hit maximum retries of %d" % MAX_RETRY) |
382 | 457 | ||
383 | run_experiment(exp, sched_file, exp_params, out_dir, | 458 | run_experiment(exp, start_message, opts.ignore, jabber) |
384 | start_message, opts.ignore, jabber) | ||
385 | 459 | ||
386 | succ += 1 | 460 | set_tries(exp, 0) |
387 | except ExperimentDone: | 461 | exp.state = ExpState.Succeeded |
388 | sys.stderr.write("Experiment '%s' already completed " % exp + | ||
389 | "at '%s'\n" % out_base) | ||
390 | done += 1 | ||
391 | except (InvalidKernel, InvalidConfig) as e: | ||
392 | sys.stderr.write("Invalid environment for experiment '%s'\n" % exp) | ||
393 | sys.stderr.write("%s\n" % e) | ||
394 | invalid += 1 | ||
395 | except KeyboardInterrupt: | 462 | except KeyboardInterrupt: |
396 | sys.stderr.write("Keyboard interrupt, quitting\n") | 463 | sys.stderr.write("Keyboard interrupt, quitting\n") |
464 | set_tries(exp, get_tries(exp) - 1) | ||
397 | break | 465 | break |
466 | except ExperimentDone: | ||
467 | sys.stderr.write("Experiment already completed at '%s'\n" % exp.out_dir) | ||
468 | set_tries(exp, 0) | ||
469 | exp.state = ExpState.Done | ||
470 | except (InvalidKernel, InvalidConfig) as e: | ||
471 | sys.stderr.write("Invalid environment for experiment '%s'\n" % exp.name) | ||
472 | sys.stderr.write("%s\n" % e) | ||
473 | set_tries(exp, get_tries(exp) - 1) | ||
474 | exp.state = ExpState.Invalid | ||
398 | except SystemCorrupted as e: | 475 | except SystemCorrupted as e: |
399 | sys.stderr.write("System is corrupted! Fix state before continuing.\n") | 476 | sys.stderr.write("System is corrupted! Fix state before continuing.\n") |
400 | sys.stderr.write("%s\n" % e) | 477 | sys.stderr.write("%s\n" % e) |
401 | break | 478 | exp.state = ExpState.Failed |
479 | if not opts.retry: | ||
480 | break | ||
481 | else: | ||
482 | sys.stderr.write("Remaining experiments may fail\n") | ||
402 | except Exception as e: | 483 | except Exception as e: |
403 | sys.stderr.write("Failed experiment %s\n" % exp) | 484 | sys.stderr.write("Failed experiment %s\n" % exp.name) |
404 | sys.stderr.write("%s\n" % e) | 485 | sys.stderr.write("%s\n" % e) |
405 | failed += 1 | 486 | exp.state = ExpState.Failed |
406 | 487 | ||
407 | ran += 1 | 488 | if exp.state is ExpState.Failed and opts.retry: |
489 | exps_remaining += [(i, exp)] | ||
408 | 490 | ||
409 | # Clean out directory if it failed immediately | 491 | |
410 | if not os.listdir(out_base) and created and not succ: | 492 | def main(): |
411 | os.rmdir(out_base) | 493 | opts, args = parse_args() |
494 | |||
495 | if opts.kill: | ||
496 | cron.kill_boot_job() | ||
497 | sys.exit(1) | ||
498 | |||
499 | email = setup_email(opts.email) if opts.email else None | ||
500 | |||
501 | # Create base output directory for run data | ||
502 | out_base = os.path.abspath(opts.out_dir) | ||
503 | created = False | ||
504 | if not os.path.exists(out_base): | ||
505 | created = True | ||
506 | os.mkdir(out_base) | ||
507 | |||
508 | exps = get_exps(opts, args, out_base) | ||
509 | |||
510 | if opts.crontab: | ||
511 | # Resume script on startup | ||
512 | opts.retry = True | ||
513 | cron.install_boot_job(['f', '--forced'], | ||
514 | "Stop with %s -k" % com.get_cmd()) | ||
515 | |||
516 | if opts.force or not opts.retry: | ||
517 | cron.clean_output() | ||
518 | for e in exps: | ||
519 | set_tries(e, 0) | ||
520 | |||
521 | try: | ||
522 | run_exps(exps, opts) | ||
523 | finally: | ||
524 | # Remove persistent state | ||
525 | for e in exps: | ||
526 | set_tries(e, 0) | ||
527 | cron.remove_boot_job() | ||
528 | |||
529 | def state_count(state): | ||
530 | return len(filter(lambda x: x.state is state, exps)) | ||
531 | |||
532 | ran = len(filter(lambda x: x.state is not ExpState.None, exps)) | ||
533 | succ = state_count(ExpState.Succeeded) | ||
412 | 534 | ||
413 | message = "Experiments ran:\t%d of %d" % (ran, len(exps)) +\ | 535 | message = "Experiments ran:\t%d of %d" % (ran, len(exps)) +\ |
414 | "\n Successful:\t\t%d" % succ +\ | 536 | "\n Successful:\t\t%d" % succ +\ |
415 | "\n Failed:\t\t%d" % failed +\ | 537 | "\n Failed:\t\t%d" % state_count(ExpState.Failed) +\ |
416 | "\n Already Done:\t\t%d" % done +\ | 538 | "\n Already Done:\t\t%d" % state_count(ExpState.Done) +\ |
417 | "\n Invalid Environment:\t%d" % invalid | 539 | "\n Invalid Environment:\t%d" % state_count(ExpState.Invalid) |
418 | 540 | ||
419 | print(message) | 541 | print(message) |
420 | 542 | ||
543 | if email: | ||
544 | email.send(message) | ||
545 | email.close() | ||
546 | |||
421 | if succ: | 547 | if succ: |
422 | sys.stderr.write("Successful experiment data saved in %s.\n" % | 548 | sys.stderr.write("Successful experiment data saved in %s.\n" % |
423 | opts.out_dir) | 549 | opts.out_dir) |
550 | elif not os.listdir(out_base) and created: | ||
551 | # Remove directory if no data was put into it | ||
552 | os.rmdir(out_base) | ||
424 | 553 | ||
425 | if email: | ||
426 | email.send(message) | ||
427 | email.close() | ||
428 | 554 | ||
429 | if __name__ == '__main__': | 555 | if __name__ == '__main__': |
430 | main() | 556 | main() |