aboutsummaryrefslogtreecommitdiffstats
path: root/distill_write_cold.py
diff options
context:
space:
mode:
Diffstat (limited to 'distill_write_cold.py')
-rwxr-xr-xdistill_write_cold.py205
1 files changed, 205 insertions, 0 deletions
diff --git a/distill_write_cold.py b/distill_write_cold.py
new file mode 100755
index 0000000..28e9eb0
--- /dev/null
+++ b/distill_write_cold.py
@@ -0,0 +1,205 @@
1#!/usr/bin/env python
2
3import os
4import re
5import fnmatch
6import shutil as sh
7import sys
8import csv
9import numpy as np
10from scipy.stats import scoreatpercentile
11import bisect
12from optparse import OptionParser
13
14from utils.machines import machines
15
16import utils.iqr
17
18class Topology:
19 ncpus, root, leaves, dist_mat = 0, None, None, None
20 levels = ['L1', 'L2', 'L3', 'Mem', 'System']
21
22 class Node:
23 idx, name, parent, children = 0, 'Unk', None, None
24 def __init__(self, idx, name, parent = None):
25 self.idx = idx
26 self.name = name
27 self.parent = parent
28 self.children = []
29 def __repr__(self):
30 return self.name + '_' + str(self.idx)
31
32 def __build_level_above(self, machine, l, child_nodes):
33 key = 'n' + l
34 if key in machine:
35 cluster_sz = machine[key]
36 else:
37 cluster_sz = 1
38 nchildren = len(child_nodes)
39 nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
40 for i in range(len(child_nodes)):
41 child_nodes[i].parent = nodes[i/cluster_sz]
42 nodes[i/cluster_sz].children.append(child_nodes[i])
43 return nodes
44
45 def __find_dist(self, a, b):
46 if a != b:
47 # pass-through (ex. as CPU is to private L1)
48 if len(a.parent.children) == 1:
49 return self.__find_dist(a.parent, b.parent)
50 else:
51 return 1 + self.__find_dist(a.parent, b.parent)
52 return 0
53
54 def __build_dist_matrix(self):
55 dist_mat = np.empty([self.ncpus, self.ncpus], int)
56 for i in range(self.ncpus):
57 for j in range(i, self.ncpus):
58 dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
59 return dist_mat
60
61 def __init__(self, machine):
62 self.ncpus = machine['sockets']*machine['cores_per_socket']
63
64 # build the Topology bottom up
65 self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
66 nodes = self.leaves
67 for l in self.levels:
68 nodes = self.__build_level_above(machine, l, nodes)
69 self.root = nodes
70
71 self.dist_mat = self.__build_dist_matrix()
72
73
74 def __repr_level(self, node, stem, buf):
75 spacing = 3
76 buf += stem + node.name + '_' + str(node.idx) + '\n'
77 for c in node.children:
78 buf = self.__repr_level(c, stem + ' '*spacing, buf)
79 return buf
80
81 def __repr__(self):
82 buf = self.__repr_level(self.root[0], '', '')
83 return buf
84
85 def distance(self, a, b):
86 return self.dist_mat[a,b]
87
88
89topologies = {}
90def get_topo(host):
91 if host in topologies:
92 return topologies[host]
93 else:
94 topo = Topology(machines[host])
95 topologies[host] = topo
96 return topo
97
98def non_polluter_filename(csv_file):
99 return re.sub(r"polluters=True", r"polluters=False", csv_file)
100
101# find the max/avg/std of preemption and migration
102def process_cpmd(csv_file, params):
103
104 if 'pco' not in params:
105 raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
106
107 topo = get_topo(params['host'])
108
109 print 'processing ' + csv_file
110
111 ifile = open(csv_file, "r")
112 bestcase = open(non_polluter_filename(csv_file), "r")
113
114 reader = csv.reader(ifile)
115 bc_reader = csv.reader(bestcase)
116 costs = {}
117
118 SAMPLE = 0
119 WSS = 1
120 DELAY = 2
121 LAST_CPU = 3
122 NEXT_CPU = 4
123 DIST = 5
124 PRODUCE_COLD = 6
125 PRODUCE_HOT = 7
126 CONSUME_COLD = 8
127 CONSUME_HOT = 9
128
129 for (row, bc_row) in zip(reader, bc_reader):
130 cold = int(row[PRODUCE_COLD])
131 distance = int(row[DIST])
132 if distance not in costs:
133 costs[distance] = []
134 costs[distance].append(cold)
135
136 for d,c in costs.iteritems():
137 arr = np.array(c, float)
138 arr = np.sort(arr)
139 (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
140 for x in np.nditer(arr, op_flags=['readwrite']):
141 x[...] = utils.machines.cycles_to_us(params['host'], x)
142 costs[d] = arr
143
144 stats = {}
145# print costs
146 for d,arr in costs.iteritems():
147 stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
148
149 return stats
150
151def parse_args():
152 parser = OptionParser("usage: %prog [files...]")
153 return parser.parse_args()
154
155def safe_split(t, delim):
156 t = t.split(delim)
157 if len(t) == 1:
158 t = tuple([t[0], None])
159 return t
160
161def get_level(machine, ncpus):
162 dist = get_topo(machine).distance(0, int(ncpus)-1)
163 names = ['L1', 'L2', 'L3', 'mem', 'sys']
164 if dist <= len(names):
165 return names[dist]
166 else:
167 raise Exception("Unable to determine level.")
168 return ''
169
170def main():
171 opts, args = parse_args()
172
173 files = filter(os.path.exists, args)
174
175 regex = fnmatch.translate("pco_*.csv")
176 csvs = re.compile(regex)
177 files = filter(csvs.search, files)
178
179 results = {}
180 for f in files:
181 temp = os.path.basename(f).split(".csv")[0]
182 tokens = temp.split("_")
183
184 params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
185 common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
186 if common not in results:
187 results[common] = {}
188 results[common][int(params['wss'])] = process_cpmd(f, params)
189
190# print results
191 for common in results:
192 trends = results[common]
193 for t in ['max', 'median', 'mean']:
194 name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
195 f = open(name, 'w')
196 f.write('WSS,L1,L2,L3,MEM\n')
197 for w,stats in iter(sorted(trends.iteritems())):
198 f.write('%d' % w)
199 for i,data in iter(sorted(stats.iteritems())):
200 val = data[t]
201 f.write(',%.6f' % val)
202 f.write('\n')
203
204if __name__ == '__main__':
205 main()