aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-10-15 20:29:05 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2013-10-15 20:29:05 -0400
commitde754b0a0a56b10eeb14305358f58a275eaa262c (patch)
treeeb4c02bffc02a3cc364756c25d6d139e6ce1ece5
parent311f3384882847edd83d688a1a2dcc12bdc59d23 (diff)
producer/consumer overhead distillation script
-rwxr-xr-xdistill_pco.py209
1 files changed, 209 insertions, 0 deletions
diff --git a/distill_pco.py b/distill_pco.py
new file mode 100755
index 0000000..c83ccde
--- /dev/null
+++ b/distill_pco.py
@@ -0,0 +1,209 @@
1#!/usr/bin/env python
2
3import os
4import re
5import fnmatch
6import shutil as sh
7import sys
8import csv
9import numpy as np
10from scipy.stats import scoreatpercentile
11import bisect
12from optparse import OptionParser
13
14from utils.machines import machines
15
16import utils.iqr
17
18class Topology:
19 ncpus, root, leaves, dist_mat = 0, None, None, None
20 levels = ['L1', 'L2', 'L3', 'Mem', 'System']
21
22 class Node:
23 idx, name, parent, children = 0, 'Unk', None, None
24 def __init__(self, idx, name, parent = None):
25 self.idx = idx
26 self.name = name
27 self.parent = parent
28 self.children = []
29 def __repr__(self):
30 return self.name + '_' + str(self.idx)
31
32 def __build_level_above(self, machine, l, child_nodes):
33 key = 'n' + l
34 if key in machine:
35 cluster_sz = machine[key]
36 else:
37 cluster_sz = 1
38 nchildren = len(child_nodes)
39 nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
40 for i in range(len(child_nodes)):
41 child_nodes[i].parent = nodes[i/cluster_sz]
42 nodes[i/cluster_sz].children.append(child_nodes[i])
43 return nodes
44
45 def __find_dist(self, a, b):
46 if a != b:
47 # pass-through (ex. as CPU is to private L1)
48 if len(a.parent.children) == 1:
49 return self.__find_dist(a.parent, b.parent)
50 else:
51 return 1 + self.__find_dist(a.parent, b.parent)
52 return 0
53
54 def __build_dist_matrix(self):
55 dist_mat = np.empty([self.ncpus, self.ncpus], int)
56 for i in range(self.ncpus):
57 for j in range(i, self.ncpus):
58 dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
59 return dist_mat
60
61 def __init__(self, machine):
62 self.ncpus = machine['sockets']*machine['cores_per_socket']
63
64 # build the Topology bottom up
65 self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
66 nodes = self.leaves
67 for l in self.levels:
68 nodes = self.__build_level_above(machine, l, nodes)
69 self.root = nodes
70
71 self.dist_mat = self.__build_dist_matrix()
72
73
74 def __repr_level(self, node, stem, buf):
75 spacing = 3
76 buf += stem + node.name + '_' + str(node.idx) + '\n'
77 for c in node.children:
78 buf = self.__repr_level(c, stem + ' '*spacing, buf)
79 return buf
80
81 def __repr__(self):
82 buf = self.__repr_level(self.root[0], '', '')
83 return buf
84
85 def distance(self, a, b):
86 return self.dist_mat[a,b]
87
88
89topologies = {}
90def get_topo(host):
91 if host in topologies:
92 return topologies[host]
93 else:
94 topo = Topology(machines[host])
95 topologies[host] = topo
96 return topo
97
98# find the max/avg/std of preemption and migration
99def process_cpmd(csv_file, params):
100
101 if 'pco' not in params:
102 raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
103
104 topo = get_topo(params['host'])
105
106 print 'processing ' + csv_file
107
108 ifile = open(csv_file, "r")
109 reader = csv.reader(ifile)
110 costs = {}
111
112 SAMPLE = 0
113 WSS = 1
114 DELAY = 2
115 LAST_CPU = 3
116 NEXT_CPU = 4
117 DIST = 5
118 PRODUCE_COLD = 6
119 PRODUCE_HOT = 7
120 CONSUME_COLD = 8
121 CONSUME_HOT = 9
122
123 for row in reader:
124 hot = int(row[CONSUME_HOT])
125 after = int(row[CONSUME_COLD])
126 cost = max(after - hot, 0)
127 distance = topo.distance(int(row[NEXT_CPU]), int(row[LAST_CPU]))
128 assert distance == int(row[DIST])
129 if distance not in costs:
130 costs[distance] = []
131 costs[distance].append(cost)
132
133 for d,c in costs.iteritems():
134 arr = np.array(c, float)
135 arr = np.sort(arr)
136 (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
137 for x in np.nditer(arr, op_flags=['readwrite']):
138 x[...] = utils.machines.cycles_to_us(params['host'], x)
139 costs[d] = arr
140
141 stats = {}
142# print costs
143 for d,arr in costs.iteritems():
144 stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
145
146 return stats
147
148def parse_args():
149 parser = OptionParser("usage: %prog [files...]")
150 return parser.parse_args()
151
152def safe_split(t, delim):
153 t = t.split(delim)
154 if len(t) == 1:
155 t = tuple([t[0], None])
156 return t
157
158def get_level(machine, ncpus):
159 dist = get_topo(machine).distance(0, int(ncpus)-1)
160 names = ['L1', 'L2', 'L3', 'mem', 'sys']
161 if dist <= len(names):
162 return names[dist]
163 else:
164 raise Exception("Unable to determine level.")
165 return ''
166
167def main():
168 opts, args = parse_args()
169
170 files = filter(os.path.exists, args)
171
172 regex = fnmatch.translate("pco_*.csv")
173 csvs = re.compile(regex)
174 files = filter(csvs.search, files)
175
176 results = {}
177 for f in files:
178 temp = os.path.basename(f).split(".csv")[0]
179 tokens = temp.split("_")
180
181 params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
182 common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
183 if common not in results:
184 results[common] = {}
185 results[common][int(params['wss'])] = process_cpmd(f, params)
186
187# print results
188 for common in results:
189 trends = results[common]
190 name = 'dpco_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s.csv' %
191 (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5])
192 f = open(name, 'w')
193 for w,stats in iter(sorted(trends.iteritems())):
194 f.write('%d' % w)
195 _mean = 0
196 _max = 0
197 for i,data in iter(sorted(stats.iteritems())):
198 dist_mean = data['mean']
199 _mean = max(_mean, dist_mean)
200 f.write(', %.6f' % dist_mean)
201 f.write(', %.6f' % _mean)
202 for i,data in iter(sorted(stats.iteritems())):
203 dist_max = data['max']
204 _max = max(_max, dist_max)
205 f.write(', %.6f' % dist_max)
206 f.write(', %.6f\n' % _max)
207
208if __name__ == '__main__':
209 main()