diff options
Diffstat (limited to 'run/experiment.py')
-rw-r--r-- | run/experiment.py | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/run/experiment.py b/run/experiment.py new file mode 100644 index 0000000..4bd47c6 --- /dev/null +++ b/run/experiment.py | |||
@@ -0,0 +1,209 @@ | |||
1 | import os | ||
2 | import time | ||
3 | import litmus_util | ||
4 | from operator import methodcaller | ||
5 | from tracer import SchedTracer, LogTracer, PerfTracer, LinuxTracer, OverheadTracer | ||
6 | |||
7 | class ExperimentException(Exception): | ||
8 | '''Used to indicate when there are problems with an experiment.''' | ||
9 | def __init__(self, name): | ||
10 | self.name = name | ||
11 | |||
12 | |||
13 | class ExperimentDone(ExperimentException): | ||
14 | '''Raised when an experiment looks like it's been run already.''' | ||
15 | def __str__(self): | ||
16 | return "Experiment finished already: %d" % self.name | ||
17 | |||
18 | |||
19 | class ExperimentInterrupted(ExperimentException): | ||
20 | '''Raised when an experiment appears to be interrupted (partial results).''' | ||
21 | def __str__(self): | ||
22 | return "Experiment was interrupted in progress: %d" % self.name | ||
23 | |||
24 | |||
25 | class ExperimentFailed(ExperimentException): | ||
26 | def __str__(self): | ||
27 | return "Experiment failed during execution: %d" % self.name | ||
28 | |||
29 | |||
30 | class Experiment(object): | ||
31 | '''Execute one task-set and save the results. Experiments have unique IDs.''' | ||
32 | INTERRUPTED_DIR = ".interrupted" | ||
33 | |||
34 | def __init__(self, name, scheduler, working_dir, finished_dir, proc_entries, executables): | ||
35 | '''Run an experiment, optionally wrapped in tracing.''' | ||
36 | |||
37 | self.name = name | ||
38 | self.scheduler = scheduler | ||
39 | self.working_dir = working_dir | ||
40 | self.finished_dir = finished_dir | ||
41 | self.proc_entries = proc_entries | ||
42 | self.executables = executables | ||
43 | self.exec_out = None | ||
44 | self.exec_err = None | ||
45 | |||
46 | self.__make_dirs() | ||
47 | self.__assign_executable_cwds() | ||
48 | |||
49 | self.tracers = [] | ||
50 | if SchedTracer.enabled(): | ||
51 | self.log("Enabling sched_trace") | ||
52 | self.tracers.append( SchedTracer(working_dir) ) | ||
53 | if LinuxTracer.enabled(): | ||
54 | self.log("Enabling trace-cmd / ftrace / kernelshark") | ||
55 | self.tracers.append( LinuxTracer(working_dir) ) | ||
56 | if LogTracer.enabled(): | ||
57 | self.log("Enabling logging") | ||
58 | self.tracers.append( LogTracer(working_dir) ) | ||
59 | if PerfTracer.enabled(): | ||
60 | self.log("Tracking CPU performance counters") | ||
61 | self.tracers.append( PerfTracer(working_dir) ) | ||
62 | |||
63 | # Overhead trace must be handled seperately, see __run_tasks | ||
64 | if OverheadTracer.enabled(): | ||
65 | self.log("Enabling overhead tracing") | ||
66 | self.overhead_trace = OverheadTracer(working_dir) | ||
67 | else: | ||
68 | self.overhead_trace = None | ||
69 | |||
70 | def __make_dirs(self): | ||
71 | interrupted = None | ||
72 | |||
73 | if os.path.exists(self.finished_dir): | ||
74 | raise ExperimentDone(self.name) | ||
75 | |||
76 | if os.path.exists(self.working_dir): | ||
77 | self.log("Found interrupted experiment, saving in %s" % | ||
78 | Experiment.INTERRUPTED_DIR) | ||
79 | interrupted = "%s/%s" % (os.path.split(self.working_dir)[0], | ||
80 | Experiment.INTERRUPTED_DIR) | ||
81 | os.rename(self.working_dir, interrupted) | ||
82 | |||
83 | os.mkdir(self.working_dir) | ||
84 | |||
85 | if interrupted: | ||
86 | os.rename(interrupted, "%s/%s" % (self.working_dir, | ||
87 | os.path.split(interrupted)[1])) | ||
88 | |||
89 | def __assign_executable_cwds(self): | ||
90 | def assign_cwd(executable): | ||
91 | executable.cwd = self.working_dir | ||
92 | map(assign_cwd, self.executables) | ||
93 | |||
94 | def __run_tasks(self): | ||
95 | exec_pause = 0.3 | ||
96 | self.log("Starting the programs over ({0} seconds)".format( | ||
97 | len(self.executables) * exec_pause)) | ||
98 | for e in self.executables: | ||
99 | try: | ||
100 | e.execute() | ||
101 | except: | ||
102 | raise Exception("Executable failed: %s" % e) | ||
103 | time.sleep(exec_pause) | ||
104 | |||
105 | sleep_time = len(self.executables) / litmus_util.num_cpus() | ||
106 | self.log("Sleeping for %d seconds before release" % sleep_time) | ||
107 | time.sleep(sleep_time) | ||
108 | |||
109 | # Overhead tracer must be started right after release or overhead | ||
110 | # measurements will be full of irrelevant records | ||
111 | if self.overhead_trace: | ||
112 | self.log("Starting overhead trace") | ||
113 | self.overhead_trace.start_tracing() | ||
114 | |||
115 | self.log("Releasing %d tasks" % len(self.executables)) | ||
116 | released = litmus_util.release_tasks() | ||
117 | |||
118 | ret = True | ||
119 | if released != len(self.executables): | ||
120 | # Some tasks failed to release, kill all tasks and fail | ||
121 | # Need to re-release non-released tasks before we can kill them though | ||
122 | self.log("Failed to release {} tasks! Re-releasing and killing".format( | ||
123 | len(self.executables) - released, len(self.executables))) | ||
124 | |||
125 | time.sleep(5) | ||
126 | |||
127 | released = litmus_util.release_tasks() | ||
128 | |||
129 | self.log("Re-released %d tasks" % released) | ||
130 | |||
131 | time.sleep(5) | ||
132 | |||
133 | self.log("Killing all tasks") | ||
134 | map(methodcaller('kill'), self.executables) | ||
135 | |||
136 | ret = False | ||
137 | |||
138 | self.log("Waiting for program to finish...") | ||
139 | for e in self.executables: | ||
140 | if not e.wait(): | ||
141 | ret = False | ||
142 | |||
143 | # And it must be stopped here for the same reason | ||
144 | if self.overhead_trace: | ||
145 | self.log("Stopping overhead trace") | ||
146 | self.overhead_trace.stop_tracing() | ||
147 | |||
148 | if not ret: | ||
149 | raise ExperimentFailed(self.name) | ||
150 | |||
151 | def __save_results(self): | ||
152 | os.rename(self.working_dir, self.finished_dir) | ||
153 | |||
154 | def log(self, msg): | ||
155 | print "[Exp %s]: %s" % (self.name, msg) | ||
156 | |||
157 | def run_exp(self): | ||
158 | succ = False | ||
159 | try: | ||
160 | self.setup() | ||
161 | |||
162 | try: | ||
163 | self.__run_tasks() | ||
164 | self.log("Saving results in %s" % self.finished_dir) | ||
165 | succ = True | ||
166 | finally: | ||
167 | self.teardown() | ||
168 | finally: | ||
169 | self.log("Switching to Linux scheduler") | ||
170 | litmus_util.switch_scheduler("Linux") | ||
171 | |||
172 | if succ: | ||
173 | self.__save_results() | ||
174 | self.log("Experiment done!") | ||
175 | |||
176 | |||
177 | def setup(self): | ||
178 | self.log("Writing %d proc entries" % len(self.proc_entries)) | ||
179 | map(methodcaller('write_proc'), self.proc_entries) | ||
180 | |||
181 | if len(self.proc_entries): | ||
182 | time.sleep(2) | ||
183 | |||
184 | self.log("Switching to %s" % self.scheduler) | ||
185 | litmus_util.switch_scheduler(self.scheduler) | ||
186 | |||
187 | self.log("Starting %d tracers" % len(self.tracers)) | ||
188 | map(methodcaller('start_tracing'), self.tracers) | ||
189 | |||
190 | self.exec_out = open('%s/exec-out.txt' % self.working_dir, 'w') | ||
191 | self.exec_err = open('%s/exec-err.txt' % self.working_dir, 'w') | ||
192 | def set_out(executable): | ||
193 | executable.stdout_file = self.exec_out | ||
194 | executable.stderr_file = self.exec_err | ||
195 | map(set_out, self.executables) | ||
196 | |||
197 | time.sleep(4) | ||
198 | |||
199 | def teardown(self): | ||
200 | self.exec_out and self.exec_out.close() | ||
201 | self.exec_err and self.exec_err.close() | ||
202 | |||
203 | sleep_time = 10 | ||
204 | self.log("Sleeping %d seconds to allow buffer flushing" % sleep_time) | ||
205 | time.sleep(sleep_time) | ||
206 | |||
207 | self.log("Stopping tracers") | ||
208 | map(methodcaller('stop_tracing'), self.tracers) | ||
209 | |||