diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-15 20:29:05 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-15 20:29:05 -0400 |
commit | de754b0a0a56b10eeb14305358f58a275eaa262c (patch) | |
tree | eb4c02bffc02a3cc364756c25d6d139e6ce1ece5 | |
parent | 311f3384882847edd83d688a1a2dcc12bdc59d23 (diff) |
producer/consumer overhead distillation script
-rwxr-xr-x | distill_pco.py | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/distill_pco.py b/distill_pco.py new file mode 100755 index 0000000..c83ccde --- /dev/null +++ b/distill_pco.py | |||
@@ -0,0 +1,209 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | import os | ||
4 | import re | ||
5 | import fnmatch | ||
6 | import shutil as sh | ||
7 | import sys | ||
8 | import csv | ||
9 | import numpy as np | ||
10 | from scipy.stats import scoreatpercentile | ||
11 | import bisect | ||
12 | from optparse import OptionParser | ||
13 | |||
14 | from utils.machines import machines | ||
15 | |||
16 | import utils.iqr | ||
17 | |||
18 | class Topology: | ||
19 | ncpus, root, leaves, dist_mat = 0, None, None, None | ||
20 | levels = ['L1', 'L2', 'L3', 'Mem', 'System'] | ||
21 | |||
22 | class Node: | ||
23 | idx, name, parent, children = 0, 'Unk', None, None | ||
24 | def __init__(self, idx, name, parent = None): | ||
25 | self.idx = idx | ||
26 | self.name = name | ||
27 | self.parent = parent | ||
28 | self.children = [] | ||
29 | def __repr__(self): | ||
30 | return self.name + '_' + str(self.idx) | ||
31 | |||
32 | def __build_level_above(self, machine, l, child_nodes): | ||
33 | key = 'n' + l | ||
34 | if key in machine: | ||
35 | cluster_sz = machine[key] | ||
36 | else: | ||
37 | cluster_sz = 1 | ||
38 | nchildren = len(child_nodes) | ||
39 | nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)] | ||
40 | for i in range(len(child_nodes)): | ||
41 | child_nodes[i].parent = nodes[i/cluster_sz] | ||
42 | nodes[i/cluster_sz].children.append(child_nodes[i]) | ||
43 | return nodes | ||
44 | |||
45 | def __find_dist(self, a, b): | ||
46 | if a != b: | ||
47 | # pass-through (ex. as CPU is to private L1) | ||
48 | if len(a.parent.children) == 1: | ||
49 | return self.__find_dist(a.parent, b.parent) | ||
50 | else: | ||
51 | return 1 + self.__find_dist(a.parent, b.parent) | ||
52 | return 0 | ||
53 | |||
54 | def __build_dist_matrix(self): | ||
55 | dist_mat = np.empty([self.ncpus, self.ncpus], int) | ||
56 | for i in range(self.ncpus): | ||
57 | for j in range(i, self.ncpus): | ||
58 | dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j]) | ||
59 | return dist_mat | ||
60 | |||
61 | def __init__(self, machine): | ||
62 | self.ncpus = machine['sockets']*machine['cores_per_socket'] | ||
63 | |||
64 | # build the Topology bottom up | ||
65 | self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)] | ||
66 | nodes = self.leaves | ||
67 | for l in self.levels: | ||
68 | nodes = self.__build_level_above(machine, l, nodes) | ||
69 | self.root = nodes | ||
70 | |||
71 | self.dist_mat = self.__build_dist_matrix() | ||
72 | |||
73 | |||
74 | def __repr_level(self, node, stem, buf): | ||
75 | spacing = 3 | ||
76 | buf += stem + node.name + '_' + str(node.idx) + '\n' | ||
77 | for c in node.children: | ||
78 | buf = self.__repr_level(c, stem + ' '*spacing, buf) | ||
79 | return buf | ||
80 | |||
81 | def __repr__(self): | ||
82 | buf = self.__repr_level(self.root[0], '', '') | ||
83 | return buf | ||
84 | |||
85 | def distance(self, a, b): | ||
86 | return self.dist_mat[a,b] | ||
87 | |||
88 | |||
89 | topologies = {} | ||
90 | def get_topo(host): | ||
91 | if host in topologies: | ||
92 | return topologies[host] | ||
93 | else: | ||
94 | topo = Topology(machines[host]) | ||
95 | topologies[host] = topo | ||
96 | return topo | ||
97 | |||
98 | # find the max/avg/std of preemption and migration | ||
99 | def process_cpmd(csv_file, params): | ||
100 | |||
101 | if 'pco' not in params: | ||
102 | raise Exception(('not producer/consumer overhead file: %s)') % csv_file) | ||
103 | |||
104 | topo = get_topo(params['host']) | ||
105 | |||
106 | print 'processing ' + csv_file | ||
107 | |||
108 | ifile = open(csv_file, "r") | ||
109 | reader = csv.reader(ifile) | ||
110 | costs = {} | ||
111 | |||
112 | SAMPLE = 0 | ||
113 | WSS = 1 | ||
114 | DELAY = 2 | ||
115 | LAST_CPU = 3 | ||
116 | NEXT_CPU = 4 | ||
117 | DIST = 5 | ||
118 | PRODUCE_COLD = 6 | ||
119 | PRODUCE_HOT = 7 | ||
120 | CONSUME_COLD = 8 | ||
121 | CONSUME_HOT = 9 | ||
122 | |||
123 | for row in reader: | ||
124 | hot = int(row[CONSUME_HOT]) | ||
125 | after = int(row[CONSUME_COLD]) | ||
126 | cost = max(after - hot, 0) | ||
127 | distance = topo.distance(int(row[NEXT_CPU]), int(row[LAST_CPU])) | ||
128 | assert distance == int(row[DIST]) | ||
129 | if distance not in costs: | ||
130 | costs[distance] = [] | ||
131 | costs[distance].append(cost) | ||
132 | |||
133 | for d,c in costs.iteritems(): | ||
134 | arr = np.array(c, float) | ||
135 | arr = np.sort(arr) | ||
136 | (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5) | ||
137 | for x in np.nditer(arr, op_flags=['readwrite']): | ||
138 | x[...] = utils.machines.cycles_to_us(params['host'], x) | ||
139 | costs[d] = arr | ||
140 | |||
141 | stats = {} | ||
142 | # print costs | ||
143 | for d,arr in costs.iteritems(): | ||
144 | stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()} | ||
145 | |||
146 | return stats | ||
147 | |||
148 | def parse_args(): | ||
149 | parser = OptionParser("usage: %prog [files...]") | ||
150 | return parser.parse_args() | ||
151 | |||
152 | def safe_split(t, delim): | ||
153 | t = t.split(delim) | ||
154 | if len(t) == 1: | ||
155 | t = tuple([t[0], None]) | ||
156 | return t | ||
157 | |||
158 | def get_level(machine, ncpus): | ||
159 | dist = get_topo(machine).distance(0, int(ncpus)-1) | ||
160 | names = ['L1', 'L2', 'L3', 'mem', 'sys'] | ||
161 | if dist <= len(names): | ||
162 | return names[dist] | ||
163 | else: | ||
164 | raise Exception("Unable to determine level.") | ||
165 | return '' | ||
166 | |||
167 | def main(): | ||
168 | opts, args = parse_args() | ||
169 | |||
170 | files = filter(os.path.exists, args) | ||
171 | |||
172 | regex = fnmatch.translate("pco_*.csv") | ||
173 | csvs = re.compile(regex) | ||
174 | files = filter(csvs.search, files) | ||
175 | |||
176 | results = {} | ||
177 | for f in files: | ||
178 | temp = os.path.basename(f).split(".csv")[0] | ||
179 | tokens = temp.split("_") | ||
180 | |||
181 | params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)} | ||
182 | common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']]) | ||
183 | if common not in results: | ||
184 | results[common] = {} | ||
185 | results[common][int(params['wss'])] = process_cpmd(f, params) | ||
186 | |||
187 | # print results | ||
188 | for common in results: | ||
189 | trends = results[common] | ||
190 | name = 'dpco_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s.csv' % | ||
191 | (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5]) | ||
192 | f = open(name, 'w') | ||
193 | for w,stats in iter(sorted(trends.iteritems())): | ||
194 | f.write('%d' % w) | ||
195 | _mean = 0 | ||
196 | _max = 0 | ||
197 | for i,data in iter(sorted(stats.iteritems())): | ||
198 | dist_mean = data['mean'] | ||
199 | _mean = max(_mean, dist_mean) | ||
200 | f.write(', %.6f' % dist_mean) | ||
201 | f.write(', %.6f' % _mean) | ||
202 | for i,data in iter(sorted(stats.iteritems())): | ||
203 | dist_max = data['max'] | ||
204 | _max = max(_max, dist_max) | ||
205 | f.write(', %.6f' % dist_max) | ||
206 | f.write(', %.6f\n' % _max) | ||
207 | |||
208 | if __name__ == '__main__': | ||
209 | main() | ||