aboutsummaryrefslogtreecommitdiffstats
path: root/pm_data_analysis/pm_data_analyzer.py
diff options
context:
space:
mode:
Diffstat (limited to 'pm_data_analysis/pm_data_analyzer.py')
-rwxr-xr-xpm_data_analysis/pm_data_analyzer.py507
1 files changed, 234 insertions, 273 deletions
diff --git a/pm_data_analysis/pm_data_analyzer.py b/pm_data_analysis/pm_data_analyzer.py
index 43de82a..02c5098 100755
--- a/pm_data_analysis/pm_data_analyzer.py
+++ b/pm_data_analysis/pm_data_analyzer.py
@@ -1,11 +1,19 @@
1#!/usr/bin/env python 1#!/usr/bin/env python
2# 2"""
3# Preemption and migration overheads analysis. 3Usage: %prog [options] filename
4# Take a single-task-set-size file and generate max / avg for the valid 4
5# samples for preemption and different kinds of migration 5FILENAME is where the .raw overhead data are. Filename
6# 6and the path to it also gives the base path and filename for the
7# Save computed valid overheads and print a file suitable for processing 7files that contains already processed overheads and the directory
8# using Bjorn's gnuplot wrapper 8where to save the output data.
9FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw".
10Also, take a look at the "compact_results" script
11"""
12
13import defapp
14
15from optparse import make_option as o
16from os.path import splitext, basename, dirname
9 17
10import sys 18import sys
11import numpy as np 19import numpy as np
@@ -15,8 +23,50 @@ import pm
15import pmserialize as pms 23import pmserialize as pms
16import statanalyzer as pmstat 24import statanalyzer as pmstat
17 25
18from optparse import OptionParser 26options = [
19from os.path import splitext, basename, dirname 27 o("-l", "--cores-per-l2", dest="coresL2", action="store", type="int",
28 help="Number of cores per L2 cache; if all cores share the same \
29L2 (i.e., no L3) set this to 0 (default = 2)"),
30 o("-p", "--phys-cpu", dest="pcpu", action="store", type="int",
31 help="Number of physical sockets on this machine (default 4)"),
32 o(None, "--limit-preempt", dest="npreempt", action="store", type="int",
33 help="Limit the number of preemption sample used in statistics \
34to NPREEMPT"),
35 o(None, "--limit-l2", dest="nl2cache", action="store", type="int",
36 help="Limit the number of l2cache sample used in statistics \
37to NL2CACHE"),
38 o(None, "--limit-onchip", dest="nonchip", action="store", type="int",
39 help="Limit the number of onchip sample used in statistics \
40to NONCHIP"),
41 o(None, "--limit-offchip", dest="noffchip", action="store", type="int",
42 help="Limit the number of offchip sample used in statistics \
43to NOFFCHIP"),
44 o("-r", "--read-valid-data", dest="read_valid", action="store_true",
45 help="read already processed data from file"),
46 o("-v", "--verbose", dest="verbose", action="store_true"),
47 o("-d", "--debug", dest="debug", action="store_true"),
48 o("-u", "--microsec", dest="cpufreq", action="store", type="float",
49 help="Print overhead results in microseconds; \
50CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)"),
51 ]
52# this cores per chip parameter implies a different topology model not fully
53# supported atm
54# o("-c", "--cores-per-chip", dest="coresC",
55# action="store", type="int", default="6",
56# help="number of cores per chip (default = 6)")
57
58defaults = {
59 'coresL2' : 2,
60 'pcpu' : 4,
61 'npreempt' : 0,
62 'nl2cache' : 0,
63 'nonchip' : 0,
64 'noffchip' : 0,
65 'read_valid': False,
66 'verbose' : False,
67 'debug' : False,
68 'cpufrequ' : 0,
69 }
20 70
21# from Bjoern's simple-gnuplot-wrapper 71# from Bjoern's simple-gnuplot-wrapper
22def decode(name): 72def decode(name):
@@ -47,279 +97,190 @@ class Overhead:
47 def add(self, ovd_vector, label): 97 def add(self, ovd_vector, label):
48 self.overheads.append([ovd_vector, label]) 98 self.overheads.append([ovd_vector, label])
49 99
50# read previously saved overhead data 100class Analyzer(defapp.App):
51def read_valid_data(filename, coresL2, valid_ovds): 101 def __init__(self):
52 nf = filename + '_preemption.vbin' 102 defapp.App.__init__(self, options, defaults, no_std_opts=True)
53 if debug: 103 self.lsamples = {}
54 print "Reading '%s'" % nf 104 if self.options.npreempt:
55 valid_ovds.add(pms.unpickl_it(nf), 'preemtion') 105 self.lsamples['preemption'] = self.options.npreempt
56 106 if self.options.nl2cache:
57 nf = filename + '_onchip.vbin' 107 self.lsamples['l2cache'] = self.options.nl2cache
58 if debug: 108 if self.options.nonchip:
59 print "Reading '%s'" % nf 109 self.lsamples['onchip'] = self.options.nonchip
60 valid_ovds.add(pms.unpickl_it(nf), 'onchip') 110 if self.options.noffchip:
61 111 self.lsamples['offchip'] = self.options.noffchip
62 nf = filename + '_offchip.vbin' 112
63 if debug: 113 # read previously saved overhead data
64 print "Reading '%s'" % nf 114 def read_valid_data(self, filename):
65 valid_ovds.add(pms.unpickl_it(nf), 'offchip') 115 nf = filename + '_preemption.vbin'
66 116 if self.options.debug:
67 if coresL2 != 0:
68 nf = filename + '_l2cache.vbin'
69 if debug:
70 print "Reading '%s'" % nf 117 print "Reading '%s'" % nf
71 valid_ovds.add(pms.unpickl_it(nf), 'l2cache') 118 self.valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
72
73
74def process_raw_data(filename, coresL2, pcpu, valid_ovds):
75 # initialize pmmodule
76 pm.load(filename, coresL2, pcpu)
77
78 ovds = Overhead()
79 119
80 # get overheads 120 nf = filename + '_onchip.vbin'
81 ovds.add(pm.getPreemption(), 'preemption') 121 if self.options.debug:
82 ovds.add(pm.getOnChipMigration(), 'onchip') 122 print "Reading '%s'" % nf
83 ovds.add(pm.getOffChipMigration(), 'offchip') 123 self.valid_ovds.add(pms.unpickl_it(nf), 'onchip')
84 if coresL2 != 0:
85 ovds.add(pm.getL2Migration(), 'l2cache')
86
87 if debug:
88 for i in ovds:
89 print i[0], i[1]
90
91 # instance the statistical analizer to remove outliers
92 sd = pmstat.InterQuartileRange(25,75, True)
93 124
94 for i in ovds: 125 nf = filename + '_offchip.vbin'
95 if len(i[0]) != 0: 126 if debug:
96 # just add overheads, "forget" preemption length 127 print "Reading '%s'" % nf
97 # FIXME: is really needed? 128 self.valid_ovds.add(pms.unpickl_it(nf), 'offchip')
98 # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1]) 129
99 valid_ovds.add(i[0][:,0], i[1]) 130 if self.options.coresL2 != 0:
100 else: 131 nf = filename + '_l2cache.vbin'
101 print "Warning: no valid data collected..." 132 if self.options.debug:
102 valid_ovds.add([], i[1]) 133 print "Reading '%s'" % nf
134 self.valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
135
136 def process_raw_data(self, datafile):
137 coresL2 = self.options.coresL2
138 pcpu = self.options.pcpu
139 # initialize pmmodule
140 pm.load(datafile, coresL2, pcpu)
141 ovds = Overhead()
142 # get overheads
143 ovds.add(pm.getPreemption(), 'preemption')
144 ovds.add(pm.getOnChipMigration(), 'onchip')
145 ovds.add(pm.getOffChipMigration(), 'offchip')
146 if coresL2 != 0:
147 ovds.add(pm.getL2Migration(), 'l2cache')
148
149 if self.options.debug:
150 for i in ovds:
151 print i[0], i[1]
152
153 # instance the statistical analizer to remove outliers
154 sd = pmstat.InterQuartileRange(25,75, True)
103 155
104 if debug:
105 # check outliers removals
106 print "Before outliers removal"
107 for i in ovds: 156 for i in ovds:
108 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} 157 if len(i[0]) != 0:
109 print "After outliers removal" 158 # just add overheads, "forget" preemption length
110 for i in valid_ovds: 159 # FIXME: is really needed?
111 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} 160 # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
112 161 self.valid_ovds.add(i[0][:,0], i[1])
113 if verbose: 162 else:
114 for i in valid_ovds: 163 print "Warning: no valid data collected..."
115 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} 164 self.valid_ovds.add([], i[1])
116 165
117 # serialize valid overheads 166 if self.options.debug:
118 for i in valid_ovds: 167 # check outliers removals
119 dname = dirname(filename) 168 print "Before outliers removal"
120 fname, ext = splitext(basename(filename)) 169 for i in ovds:
121 170 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
122 curf = dname + '/' + fname + '_' + i[1] + '.vbin' 171 print "After outliers removal"
123 pms.pickl_it(i[0], curf) 172 for i in self.valid_ovds:
124 173 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
125 del ovds 174
126 175 if self.options.verbose:
127# The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd" 176 for i in self.valid_ovds:
128# Filename output format: 177 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
129# hard_pm_plugin=GSN-EDF_dist=uni_light_wss=2048_ovd=preemption.csv 178
130# ovd: preemption, onchip, offchip, l2cache 179 # serialize valid overheads
131 180 for i in self.valid_ovds:
132def analize_data(valid_ovds, dname, conf, samples_limits): 181 dname = dirname(datafile)
133 182 fname, ext = splitext(basename(datafile))
134 csvbname = dname + '/pm_plugin=' + conf['plugin'] + \ 183
135 '_dist=uni_light_wss=' + conf['wss'] 184 curf = dname + '/' + fname + '_' + i[1] + '.vbin'
136 185 pms.pickl_it(i[0], curf)
137 if verbose: 186
138 print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \ 187 del ovds
188
189 # The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd"
190 # Filename output format:
191 # pm_plugin=GSN-EDF_dist=uni_light_wss=2048_ovd=preemption.csv
192 # ovd: preemption, onchip, offchip, l2cache
193
194 def analyze_data(self, dname, conf):
195
196 csvbname = dname + '/pm_plugin=' + conf['plugin'] + \
197 '_dist=uni_light_wss=' + conf['wss']
198 if self.options.verbose:
199 print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
139 "1":conf['tss']} 200 "1":conf['tss']}
140 201
141 for i in valid_ovds: 202 for i in self.valid_ovds:
203 csvfname = csvbname + '_ovd=' + i[1] + '.csv'
204 if self.options.debug:
205 print "Saving csv '%s'" % csvfname
206
207 csvf = open(csvfname, 'a')
208 csvlist = [conf['tss']]
209
210 # data (valid_ovds already have only overheads, not length)
211 # vector = i[0][:,0]
212 #
213 # Check if we need to limit the number of samples
214 # that we use in the computation of max and avg.
215 # Statistically, this is more sound than other choices
216 if i[1] in self.lsamples:
217 if self.lsamples[i[1]] > 0:
218 nsamples = min(self.lsamples[i[1]], len(i[0]))
219 if self.options.verbose:
220 print "Computing %(0)s stat only on %(1)d samples" % \
221 {"0":i[1],
222 "1":nsamples}
223
224 vector = i[0][0:nsamples]
225 else:
226 vector = i[0]
142 227
143 csvfname = csvbname + '_ovd=' + i[1] + '.csv' 228 if vector != []:
144 if debug: 229 # FIXME if after disabling prefetching there are
145 print "Saving csv '%s'" % csvfname 230 # still negative value, they shouldn't be considered
146 231 max_vec = np.max(vector)
147 csvf = open(csvfname, 'a') 232 avg_vec = np.average(vector)
148 csvlist = [conf['tss']] 233 else:
149 234 max_vec = 0
150 # data (valid_ovds already have only overheads, not length) 235 avg_vec = 0
151 # vector = i[0][:,0]
152 #
153 # Check if we need to limit the number of samples
154 # that we use in the computation of max and avg.
155 # Statistically, this is more sound than other choices
156 if i[1] in samples_limits:
157 if verbose:
158 print "Computing %(0)s stat only on %(1)d samples" % \
159 {"0":i[1], "1":samples_limits[i[1]]}
160
161 if samples_limits[i[1]] != 0:
162 vector = i[0][0:samples_limits[i[1]]]
163 else:
164 vector = i[0]
165 else:
166 vector = i[0]
167 236
168 if vector != []: 237 if self.options.cpufreq == 0:
169 # FIXME if after disabling prefetching there are 238 max_vec_str = "%5.5f" % max_vec
170 # still negative value, they shouldn't be considered 239 avg_vec_str = "%5.5f" % avg_vec
171 max_vec = np.max(vector) 240 else:
172 avg_vec = np.average(vector) 241 max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq)
242 avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq)
243
244 csvlist.append(max_vec_str)
245 csvlist.append(avg_vec_str)
246 pms.csv_it(csvf, csvlist)
247 csvf.close()
248
249 if self.options.verbose:
250 if self.options.cpufreq == 0:
251 print i[1] + " overheads (ticks)"
252 print "Max = %5.5f" % max_vec
253 print "Avg = %5.5f" % avg_vec
254 else:
255 print i[1] + " overheads (us)"
256 print "Max = %5.5f" % (max_vec / self.options.cpufreq)
257 print "Avg = %5.5f" % (avg_vec / self.options.cpufreq)
258
259 def process_datafile(self, datafile):
260 dname = dirname(datafile)
261 bname = basename(datafile)
262 fname, ext = splitext(bname)
263 if ext != '.raw':
264 self.err("Warning: '%s' doesn't look like a .raw file"
265 % bname)
266 if self.options.verbose:
267 print "\nProcessing: " + fname
268 conf = decode(fname)
269
270 self.valid_ovds = Overhead()
271 if self.options.read_valid:
272 # .vbin output should be in same directory as input filename
273 readf = dname + '/' + fname
274 self.read_valid_data(readf)
173 else: 275 else:
174 max_vec = 0 276 self.process_raw_data(datafile)
175 avg_vec = 0
176 277
177 if cpufreq == 0: 278 self.analyze_data(dname, conf)
178 max_vec_str = "%5.5f" % max_vec 279 del self.valid_ovds
179 avg_vec_str = "%5.5f" % avg_vec 280
180 else: 281 def default(self, _):
181 max_vec_str = "%5.5f" % (max_vec / cpufreq) 282 for datafile in self.args:
182 avg_vec_str = "%5.5f" % (avg_vec / cpufreq) 283 self.process_datafile(datafile)
183
184 csvlist.append(max_vec_str)
185 csvlist.append(avg_vec_str)
186 pms.csv_it(csvf, csvlist)
187 csvf.close()
188
189 if verbose:
190 if cpufreq == 0:
191 print i[1] + " overheads (ticks)"
192 print "Max = %5.5f" % max_vec
193 print "Avg = %5.5f" % avg_vec
194 else:
195 print i[1] + " overheads (us)"
196 print "Max = %5.5f" % (max_vec / cpufreq)
197 print "Avg = %5.5f" % (avg_vec / cpufreq)
198
199
200# filename-extension convention to get "pretty" output filenames
201# .raw for raw bin data
202# .vbin for valid overheads
203# .csv for processed final data
204def main():
205 usage = "Usage: %prog [options] filename"
206 description = """FILENAME is where the .raw overhead data are. Filename
207and the path to it also gives the base path and filename for the
208files that contains already processed overheads and the directory
209where to save the output data.
210FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw".
211Take a look at the "compact_results" script
212"""
213 parser = OptionParser(usage=usage, description=description)
214 parser.add_option("-l", "--cores-per-l2", dest="coresL2",
215 action="store", type="int", default="2",
216 help="number of cores per L2 cache; "
217 "if all cores share the same L2 (i.e., no L3) set this to 0 "
218 "(default = 2)")
219# this cores per chip parameter implies a different topology model not fully
220# supported atm
221# parser.add_option("-c", "--cores-per-chip", dest="coresC",
222# action="store", type="int", default="6",
223# help="number of cores per chip (default = 6)")
224 parser.add_option("-p", "--phys-cpu", dest="pcpu",
225 action="store", type="int", default="4",
226 help="Number of physical sockets on this machine (default 4)")
227
228 parser.add_option("", "--limit-preempt", dest="npreempt",
229 action="store", type="int", default="0",
230 help="Limit the number of preemption sample used in "
231 "statistics to NPREEMPT")
232 parser.add_option("", "--limit-l2", dest="nl2cache",
233 action="store", type="int", default="0",
234 help="Limit the number of l2cache sample used in "
235 "statistics to NL2CACHE")
236 parser.add_option("", "--limit-onchip", dest="nonchip",
237 action="store", type="int", default="0",
238 help="Limit the number of onchip sample used in "
239 "statistics to NONCHIP")
240 parser.add_option("", "--limit-offchip", dest="noffchip",
241 action="store", type="int", default="0",
242 help="Limit the number of offchip sample used in "
243 "statistics to NOFFCHIP")
244
245 parser.add_option("-r", "--read-valid-data", dest="read_valid",
246 action="store_true", default=False,
247 help="read already processed data from file")
248
249 parser.add_option("-v", "--verbose", dest="verbose",
250 action="store_true", default=False,
251 help="Be verbose")
252 parser.add_option("-d", "--debug", dest="debug",
253 action="store_true", default=False,
254 help="Debugging information")
255
256 parser.add_option("-u", "--microsec", dest="cpufreq",
257 action="store", type="float",
258 help="Print overhead results in microseconds; \
259 CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)")
260
261 (options, args) = parser.parse_args()
262 if len(args) != 1:
263 parser.error("Argument missing")
264 sys.exit(-1)
265
266 valid_ovds = Overhead()
267
268 global verbose
269 global debug
270 global cpufreq
271 if options.verbose:
272 verbose = 1
273 else:
274 verbose = 0
275
276 if options.debug:
277 debug = 1
278 else:
279 debug = 0
280
281 if options.cpufreq:
282 cpufreq = options.cpufreq
283 else:
284 cpufreq = 0
285
286 # filename processing
287 dname = dirname(args[0])
288 bname = basename(args[0])
289 fname, ext = splitext(bname)
290 conf = decode(fname)
291
292 if ext != '.raw':
293 print "Warning: '%s' doesn't look like a .raw file" % bname
294 return -1
295
296 if verbose:
297 print "\nProcessing: " + fname
298
299 if options.read_valid:
300 # .vbin output should be in same directory as input filename
301 # TODO can be improved with custom directory and file reading
302 readf = dname + '/' + fname
303 read_valid_data(readf, options.coresL2, valid_ovds)
304 else:
305 ret = process_raw_data(args[0], options.coresL2, options.pcpu,
306 valid_ovds)
307 if ret == -1:
308 print "Cannot process raw data, quitting"
309 return None
310
311 lsamples = {}
312 if options.npreempt:
313 lsamples['preemption'] = options.npreempt
314 if options.nl2cache:
315 lsamples['l2cache'] = options.nl2cache
316 if options.nonchip:
317 lsamples['onchip'] = options.nonchip
318 if options.noffchip:
319 lsamples['offchip'] = options.noffchip
320
321 analize_data(valid_ovds, dname, conf, lsamples)
322
323if __name__ == '__main__':
324 main()
325 284
285if __name__ == "__main__":
286 Analyzer().launch()