aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--common.py6
-rw-r--r--parse/sched.py1
-rw-r--r--run/executable/executable.py6
-rw-r--r--run/experiment.py128
-rw-r--r--run/litmus_util.py23
-rwxr-xr-xrun_exps.py18
6 files changed, 114 insertions, 68 deletions
diff --git a/common.py b/common.py
index 4920ec8..a2c6224 100644
--- a/common.py
+++ b/common.py
@@ -26,15 +26,17 @@ def get_executable_hint(prog, hint, optional=False):
26 '''Search for @prog in system PATH. Print @hint if no binary is found. 26 '''Search for @prog in system PATH. Print @hint if no binary is found.
27 Die if not @optional.''' 27 Die if not @optional.'''
28 try: 28 try:
29 prog = get_executable(prog) 29 full_path = get_executable(prog)
30 except IOError: 30 except IOError:
31 if not optional: 31 if not optional:
32 sys.stderr.write(("Cannot find executable '%s' in PATH. This is " +\ 32 sys.stderr.write(("Cannot find executable '%s' in PATH. This is " +\
33 "a part of '%s' which should be added to PATH.\n")\ 33 "a part of '%s' which should be added to PATH.\n")\
34 % (prog, hint)) 34 % (prog, hint))
35 sys.exit(1) 35 sys.exit(1)
36 else:
37 full_path = None
36 38
37 return prog 39 return full_path
38 40
39def get_config_option(option): 41def get_config_option(option):
40 '''Search for @option in installed kernel config (if present). 42 '''Search for @option in installed kernel config (if present).
diff --git a/parse/sched.py b/parse/sched.py
index 2da0149..147a2e5 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -161,6 +161,7 @@ def extract_sched_data(result, data_dir, work_dir):
161 cmd_arr = [conf.BINS['st_show']] 161 cmd_arr = [conf.BINS['st_show']]
162 cmd_arr.extend(bins) 162 cmd_arr.extend(bins)
163 with open(output_file, "w") as f: 163 with open(output_file, "w") as f:
164 print("calling %s" % cmd_arr)
164 subprocess.call(cmd_arr, cwd=data_dir, stdout=f) 165 subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
165 166
166 task_dict = defaultdict(lambda : 167 task_dict = defaultdict(lambda :
diff --git a/run/executable/executable.py b/run/executable/executable.py
index 02e35ae..263e305 100644
--- a/run/executable/executable.py
+++ b/run/executable/executable.py
@@ -1,13 +1,13 @@
1import sys 1import sys
2import subprocess 2import subprocess
3import signal 3import signal
4from common import is_executable 4from common import get_executable
5 5
6class Executable(object): 6class Executable(object):
7 '''Parent object that represents an executable for use in task-sets.''' 7 '''Parent object that represents an executable for use in task-sets.'''
8 8
9 def __init__(self, exec_file, extra_args=None, stdout_file = None, stderr_file = None): 9 def __init__(self, exec_file, extra_args=None, stdout_file = None, stderr_file = None):
10 self.exec_file = exec_file 10 self.exec_file = get_executable(exec_file)
11 self.cwd = None 11 self.cwd = None
12 self.stdout_file = stdout_file 12 self.stdout_file = stdout_file
13 self.stderr_file = stderr_file 13 self.stderr_file = stderr_file
@@ -18,7 +18,7 @@ class Executable(object):
18 else: 18 else:
19 self.extra_args = [str(a) for a in list(extra_args)] # make a duplicate 19 self.extra_args = [str(a) for a in list(extra_args)] # make a duplicate
20 20
21 if not is_executable(self.exec_file): 21 if not self.exec_file:
22 raise Exception("Not executable ? : %s" % self.exec_file) 22 raise Exception("Not executable ? : %s" % self.exec_file)
23 23
24 def __del__(self): 24 def __del__(self):
diff --git a/run/experiment.py b/run/experiment.py
index e5811f8..ff0e9f3 100644
--- a/run/experiment.py
+++ b/run/experiment.py
@@ -1,7 +1,9 @@
1import common as com
1import os 2import os
2import time 3import time
3import run.litmus_util as lu 4import run.litmus_util as lu
4import shutil as sh 5import shutil as sh
6import traceback
5from operator import methodcaller 7from operator import methodcaller
6 8
7class ExperimentException(Exception): 9class ExperimentException(Exception):
@@ -15,17 +17,12 @@ class ExperimentDone(ExperimentException):
15 def __str__(self): 17 def __str__(self):
16 return "Experiment finished already: %d" % self.name 18 return "Experiment finished already: %d" % self.name
17 19
18
19class ExperimentInterrupted(ExperimentException):
20 '''Raised when an experiment appears to be interrupted (partial results).'''
21 def __str__(self):
22 return "Experiment was interrupted in progress: %d" % self.name
23
24
25class ExperimentFailed(ExperimentException): 20class ExperimentFailed(ExperimentException):
26 def __str__(self): 21 def __str__(self):
27 return "Experiment failed during execution: %d" % self.name 22 return "Experiment failed during execution: %d" % self.name
28 23
24class SystemCorrupted(Exception):
25 pass
29 26
30class Experiment(object): 27class Experiment(object):
31 '''Execute one task-set and save the results. Experiments have unique IDs.''' 28 '''Execute one task-set and save the results. Experiments have unique IDs.'''
@@ -44,6 +41,8 @@ class Experiment(object):
44 self.exec_out = None 41 self.exec_out = None
45 self.exec_err = None 42 self.exec_err = None
46 43
44 self.task_batch = com.num_cpus()
45
47 self.__make_dirs() 46 self.__make_dirs()
48 self.__assign_executable_cwds() 47 self.__assign_executable_cwds()
49 self.__setup_tracers(tracer_types) 48 self.__setup_tracers(tracer_types)
@@ -84,65 +83,67 @@ class Experiment(object):
84 executable.cwd = self.working_dir 83 executable.cwd = self.working_dir
85 map(assign_cwd, self.executables) 84 map(assign_cwd, self.executables)
86 85
87 def __run_tasks(self): 86 def __kill_all(self):
88 already_waiting = lu.waiting_tasks() 87 # Give time for whatever failed to finish failing
88 time.sleep(2)
89
90 released = lu.release_tasks()
91 self.log("Re-released %d tasks" % released)
89 92
90 if already_waiting: 93 time.sleep(5)
91 self.log("Already %d tasks waiting for release!")
92 self.log("Experiment will fail if any of these tasks are released.")
93 94
94 self.log("Starting the programs") 95 self.log("Killing all tasks")
95 for e in self.executables: 96 for e in self.executables:
96 try: 97 try:
98 e.kill()
99 except:
100 pass
101
102 time.sleep(2)
103
104 def __run_tasks(self):
105 self.log("Starting %d tasks" % len(self.executables))
106
107 for i,e in enumerate(self.executables):
108 try:
97 e.execute() 109 e.execute()
98 except: 110 except:
99 raise Exception("Executable failed: %s" % e) 111 raise Exception("Executable failed: %s" % e)
100 112
101 self.log("Sleeping until tasks are ready for release...") 113 self.log("Sleeping until tasks are ready for release...")
102 start = time.clock() 114 start = time.time()
103 while (lu.waiting_tasks() - already_waiting) < len(self.executables): 115 while lu.waiting_tasks() < len(self.executables):
104 if time.clock() - start > 30.0: 116 if time.time() - start > 30.0:
105 raise Exception("Too much time has passed waiting for tasks!") 117 s = "waiting: %d, submitted: %d" %\
118 (lu.waiting_tasks(), len(self.executables))
119 raise Exception("Too much time spent waiting for tasks! %s" % s)
106 time.sleep(1) 120 time.sleep(1)
107 121
108 # Exact tracers (like overheads) must be started right after release or 122 # Exact tracers (like overheads) must be started right after release or
109 # measurements will be full of irrelevant records 123 # measurements will be full of irrelevant records
110 self.log("Starting %d released tracers" % len(self.exact_tracers)) 124 self.log("Starting %d released tracers" % len(self.exact_tracers))
111 map(methodcaller('start_tracing'), self.exact_tracers) 125 map(methodcaller('start_tracing'), self.exact_tracers)
126 time.sleep(1)
112 127
113 self.log("Releasing %d tasks" % len(self.executables)) 128 try:
114 released = lu.release_tasks() 129 self.log("Releasing %d tasks" % len(self.executables))
115
116 ret = True
117 if released != len(self.executables):
118 # Some tasks failed to release, kill all tasks and fail
119 # Need to re-release non-released tasks before we can kill them though
120 self.log("Failed to release {} tasks! Re-releasing and killing".format(
121 len(self.executables) - released, len(self.executables)))
122 time.sleep(5)
123
124 released = lu.release_tasks() 130 released = lu.release_tasks()
125 131
126 self.log("Re-released %d tasks" % released) 132 if released != len(self.executables):
127 133 # Some tasks failed to release, kill all tasks and fail
128 time.sleep(5) 134 # Need to release non-released tasks before they can be killed
129 135 raise Exception("Released %s tasks, expected %s tasks" %
130 self.log("Killing all tasks") 136 (released, len(self.executables)))
131 map(methodcaller('kill'), self.executables)
132 137
133 ret = False 138 self.log("Waiting for program to finish...")
139 for e in self.executables:
140 if not e.wait():
141 raise Exception("Executable %s failed to complete!" % e)
134 142
135 self.log("Waiting for program to finish...") 143 finally:
136 for e in self.executables: 144 # And these must be stopped here for the same reason
137 if not e.wait(): 145 self.log("Stopping exact tracers")
138 ret = False 146 map(methodcaller('stop_tracing'), self.exact_tracers)
139
140 # And these must be stopped here for the same reason
141 self.log("Stopping exact tracers")
142 map(methodcaller('stop_tracing'), self.exact_tracers)
143
144 if not ret:
145 raise ExperimentFailed(self.name)
146 147
147 def __save_results(self): 148 def __save_results(self):
148 os.rename(self.working_dir, self.finished_dir) 149 os.rename(self.working_dir, self.finished_dir)
@@ -151,29 +152,48 @@ class Experiment(object):
151 print("[Exp %s]: %s" % (self.name, msg)) 152 print("[Exp %s]: %s" % (self.name, msg))
152 153
153 def run_exp(self): 154 def run_exp(self):
155 self.__check_system()
156
154 succ = False 157 succ = False
155 try: 158 try:
156 self.setup() 159 self.__setup()
157 160
158 try: 161 try:
159 self.__run_tasks() 162 self.__run_tasks()
160 self.log("Saving results in %s" % self.finished_dir) 163 self.log("Saving results in %s" % self.finished_dir)
161 succ = True 164 succ = True
165 except:
166 traceback.print_exc()
167 self.__kill_all()
168 raise ExperimentFailed(self.name)
162 finally: 169 finally:
163 self.teardown() 170 self.__teardown()
164 finally: 171 finally:
165 self.log("Switching to Linux scheduler") 172 self.log("Switching to Linux scheduler")
166 try: 173
167 lu.switch_scheduler("Linux") 174 # Give the tasks 10 seconds to finish before bailing
168 except: 175 start = time.time()
169 self.log("Failed to switch back to Linux.") 176 while lu.all_tasks() > 0:
177 if time.time() - start < 10.0:
178 raise SystemCorrupted("%d tasks still running!" %
179 lu.all_tasks())
180
181 lu.switch_scheduler("Linux")
170 182
171 if succ: 183 if succ:
172 self.__save_results() 184 self.__save_results()
173 self.log("Experiment done!") 185 self.log("Experiment done!")
174 186
187 def __check_system(self):
188 running = lu.all_tasks()
189 if running:
190 raise SystemCorrupted("%d tasks already running!" % running)
191
192 sched = lu.scheduler()
193 if sched != "Linux":
194 raise SystemCorrupted("Scheduler is %s, should be Linux" % sched)
175 195
176 def setup(self): 196 def __setup(self):
177 self.log("Writing %d proc entries" % len(self.proc_entries)) 197 self.log("Writing %d proc entries" % len(self.proc_entries))
178 map(methodcaller('write_proc'), self.proc_entries) 198 map(methodcaller('write_proc'), self.proc_entries)
179 199
@@ -190,7 +210,7 @@ class Experiment(object):
190 executable.stderr_file = self.exec_err 210 executable.stderr_file = self.exec_err
191 map(set_out, self.executables) 211 map(set_out, self.executables)
192 212
193 def teardown(self): 213 def __teardown(self):
194 self.exec_out and self.exec_out.close() 214 self.exec_out and self.exec_out.close()
195 self.exec_err and self.exec_err.close() 215 self.exec_err and self.exec_err.close()
196 216
diff --git a/run/litmus_util.py b/run/litmus_util.py
index 4709b66..70da262 100644
--- a/run/litmus_util.py
+++ b/run/litmus_util.py
@@ -3,6 +3,11 @@ import time
3import subprocess 3import subprocess
4import config.config as conf 4import config.config as conf
5 5
6def scheduler():
7 with open('/proc/litmus/active_plugin', 'r') as active_plugin:
8 cur_plugin = active_plugin.read().strip()
9 return cur_plugin
10
6def switch_scheduler(switch_to_in): 11def switch_scheduler(switch_to_in):
7 '''Switch the scheduler to whatever is passed in. 12 '''Switch the scheduler to whatever is passed in.
8 13
@@ -18,11 +23,10 @@ def switch_scheduler(switch_to_in):
18 # it takes a bit to do the switch, sleep an arbitrary amount of time 23 # it takes a bit to do the switch, sleep an arbitrary amount of time
19 time.sleep(2) 24 time.sleep(2)
20 25
21 with open('/proc/litmus/active_plugin', 'r') as active_plugin: 26 cur_plugin = scheduler()
22 cur_plugin = active_plugin.read().strip()
23
24 if switch_to != cur_plugin: 27 if switch_to != cur_plugin:
25 raise Exception("Could not switch to '%s' (check dmesg)" % switch_to) 28 raise Exception("Could not switch to '%s' (check dmesg), current: %s" %\
29 (switch_to, cur_plugin))
26 30
27def waiting_tasks(): 31def waiting_tasks():
28 reg = re.compile(r'^ready.*?(?P<READY>\d+)$', re.M) 32 reg = re.compile(r'^ready.*?(?P<READY>\d+)$', re.M)
@@ -35,6 +39,17 @@ def waiting_tasks():
35 39
36 return 0 if not ready else int(ready) 40 return 0 if not ready else int(ready)
37 41
42def all_tasks():
43 reg = re.compile(r'^real-time.*?(?P<TASKS>\d+)$', re.M)
44 with open('/proc/litmus/stats', 'r') as f:
45 data = f.read()
46
47 # Ignore if no tasks are waiting for release
48 match = re.search(reg, data)
49 ready = match.group("TASKS")
50
51 return 0 if not ready else int(ready)
52
38def release_tasks(): 53def release_tasks():
39 try: 54 try:
40 data = subprocess.check_output([conf.BINS['release']]) 55 data = subprocess.check_output([conf.BINS['release']])
diff --git a/run_exps.py b/run_exps.py
index b9cf88e..6531415 100755
--- a/run_exps.py
+++ b/run_exps.py
@@ -8,12 +8,11 @@ import re
8import shutil 8import shutil
9import sys 9import sys
10import run.tracer as trace 10import run.tracer as trace
11import traceback
12 11
13from collections import namedtuple 12from collections import namedtuple
14from optparse import OptionParser 13from optparse import OptionParser
15from run.executable.executable import Executable 14from run.executable.executable import Executable
16from run.experiment import Experiment,ExperimentDone 15from run.experiment import Experiment,ExperimentDone,ExperimentFailed,SystemCorrupted
17from run.proc_entry import ProcEntry 16from run.proc_entry import ProcEntry
18 17
19'''Customizable experiment parameters''' 18'''Customizable experiment parameters'''
@@ -330,6 +329,7 @@ def main():
330 created = True 329 created = True
331 os.mkdir(out_base) 330 os.mkdir(out_base)
332 331
332 ran = 0
333 done = 0 333 done = 0
334 succ = 0 334 succ = 0
335 failed = 0 335 failed = 0
@@ -362,15 +362,23 @@ def main():
362 invalid += 1 362 invalid += 1
363 print("Invalid environment for experiment '%s'" % exp) 363 print("Invalid environment for experiment '%s'" % exp)
364 print(e) 364 print(e)
365 except: 365 except KeyboardInterrupt:
366 print("Keyboard interrupt, quitting")
367 break
368 except SystemCorrupted as e:
369 print("System is corrupted! Fix state before continuing.")
370 print(e)
371 break
372 except ExperimentFailed:
366 print("Failed experiment %s" % exp) 373 print("Failed experiment %s" % exp)
367 traceback.print_exc()
368 failed += 1 374 failed += 1
369 375
376 ran += 1
377
370 if not os.listdir(out_base) and created and not succ: 378 if not os.listdir(out_base) and created and not succ:
371 os.rmdir(out_base) 379 os.rmdir(out_base)
372 380
373 message = "Experiments run:\t%d" % len(args) +\ 381 message = "Experiments ran:\t%d of %d" % (ran, len(args)) +\
374 "\n Successful:\t\t%d" % succ +\ 382 "\n Successful:\t\t%d" % succ +\
375 "\n Failed:\t\t%d" % failed +\ 383 "\n Failed:\t\t%d" % failed +\
376 "\n Already Done:\t\t%d" % done +\ 384 "\n Already Done:\t\t%d" % done +\