diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2014-01-31 21:55:03 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2014-01-31 21:55:03 -0500 |
commit | 21a605fb8fe90f3b2659cb9d93039232bb2bddc4 (patch) | |
tree | 3d32f1912a5a639f7152adee608e307369408da0 /distill_write_cold.py | |
parent | c55e81ec12f80f60846b251aa7bbe0f6c044e7e8 (diff) |
Compute costs for reading and writing data.wip-ecrts14-pgm
Diffstat (limited to 'distill_write_cold.py')
-rwxr-xr-x | distill_write_cold.py | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/distill_write_cold.py b/distill_write_cold.py new file mode 100755 index 0000000..28e9eb0 --- /dev/null +++ b/distill_write_cold.py | |||
@@ -0,0 +1,205 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | import os | ||
4 | import re | ||
5 | import fnmatch | ||
6 | import shutil as sh | ||
7 | import sys | ||
8 | import csv | ||
9 | import numpy as np | ||
10 | from scipy.stats import scoreatpercentile | ||
11 | import bisect | ||
12 | from optparse import OptionParser | ||
13 | |||
14 | from utils.machines import machines | ||
15 | |||
16 | import utils.iqr | ||
17 | |||
18 | class Topology: | ||
19 | ncpus, root, leaves, dist_mat = 0, None, None, None | ||
20 | levels = ['L1', 'L2', 'L3', 'Mem', 'System'] | ||
21 | |||
22 | class Node: | ||
23 | idx, name, parent, children = 0, 'Unk', None, None | ||
24 | def __init__(self, idx, name, parent = None): | ||
25 | self.idx = idx | ||
26 | self.name = name | ||
27 | self.parent = parent | ||
28 | self.children = [] | ||
29 | def __repr__(self): | ||
30 | return self.name + '_' + str(self.idx) | ||
31 | |||
32 | def __build_level_above(self, machine, l, child_nodes): | ||
33 | key = 'n' + l | ||
34 | if key in machine: | ||
35 | cluster_sz = machine[key] | ||
36 | else: | ||
37 | cluster_sz = 1 | ||
38 | nchildren = len(child_nodes) | ||
39 | nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)] | ||
40 | for i in range(len(child_nodes)): | ||
41 | child_nodes[i].parent = nodes[i/cluster_sz] | ||
42 | nodes[i/cluster_sz].children.append(child_nodes[i]) | ||
43 | return nodes | ||
44 | |||
45 | def __find_dist(self, a, b): | ||
46 | if a != b: | ||
47 | # pass-through (ex. as CPU is to private L1) | ||
48 | if len(a.parent.children) == 1: | ||
49 | return self.__find_dist(a.parent, b.parent) | ||
50 | else: | ||
51 | return 1 + self.__find_dist(a.parent, b.parent) | ||
52 | return 0 | ||
53 | |||
54 | def __build_dist_matrix(self): | ||
55 | dist_mat = np.empty([self.ncpus, self.ncpus], int) | ||
56 | for i in range(self.ncpus): | ||
57 | for j in range(i, self.ncpus): | ||
58 | dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j]) | ||
59 | return dist_mat | ||
60 | |||
61 | def __init__(self, machine): | ||
62 | self.ncpus = machine['sockets']*machine['cores_per_socket'] | ||
63 | |||
64 | # build the Topology bottom up | ||
65 | self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)] | ||
66 | nodes = self.leaves | ||
67 | for l in self.levels: | ||
68 | nodes = self.__build_level_above(machine, l, nodes) | ||
69 | self.root = nodes | ||
70 | |||
71 | self.dist_mat = self.__build_dist_matrix() | ||
72 | |||
73 | |||
74 | def __repr_level(self, node, stem, buf): | ||
75 | spacing = 3 | ||
76 | buf += stem + node.name + '_' + str(node.idx) + '\n' | ||
77 | for c in node.children: | ||
78 | buf = self.__repr_level(c, stem + ' '*spacing, buf) | ||
79 | return buf | ||
80 | |||
81 | def __repr__(self): | ||
82 | buf = self.__repr_level(self.root[0], '', '') | ||
83 | return buf | ||
84 | |||
85 | def distance(self, a, b): | ||
86 | return self.dist_mat[a,b] | ||
87 | |||
88 | |||
89 | topologies = {} | ||
90 | def get_topo(host): | ||
91 | if host in topologies: | ||
92 | return topologies[host] | ||
93 | else: | ||
94 | topo = Topology(machines[host]) | ||
95 | topologies[host] = topo | ||
96 | return topo | ||
97 | |||
98 | def non_polluter_filename(csv_file): | ||
99 | return re.sub(r"polluters=True", r"polluters=False", csv_file) | ||
100 | |||
101 | # find the max/avg/std of preemption and migration | ||
102 | def process_cpmd(csv_file, params): | ||
103 | |||
104 | if 'pco' not in params: | ||
105 | raise Exception(('not producer/consumer overhead file: %s)') % csv_file) | ||
106 | |||
107 | topo = get_topo(params['host']) | ||
108 | |||
109 | print 'processing ' + csv_file | ||
110 | |||
111 | ifile = open(csv_file, "r") | ||
112 | bestcase = open(non_polluter_filename(csv_file), "r") | ||
113 | |||
114 | reader = csv.reader(ifile) | ||
115 | bc_reader = csv.reader(bestcase) | ||
116 | costs = {} | ||
117 | |||
118 | SAMPLE = 0 | ||
119 | WSS = 1 | ||
120 | DELAY = 2 | ||
121 | LAST_CPU = 3 | ||
122 | NEXT_CPU = 4 | ||
123 | DIST = 5 | ||
124 | PRODUCE_COLD = 6 | ||
125 | PRODUCE_HOT = 7 | ||
126 | CONSUME_COLD = 8 | ||
127 | CONSUME_HOT = 9 | ||
128 | |||
129 | for (row, bc_row) in zip(reader, bc_reader): | ||
130 | cold = int(row[PRODUCE_COLD]) | ||
131 | distance = int(row[DIST]) | ||
132 | if distance not in costs: | ||
133 | costs[distance] = [] | ||
134 | costs[distance].append(cold) | ||
135 | |||
136 | for d,c in costs.iteritems(): | ||
137 | arr = np.array(c, float) | ||
138 | arr = np.sort(arr) | ||
139 | (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5) | ||
140 | for x in np.nditer(arr, op_flags=['readwrite']): | ||
141 | x[...] = utils.machines.cycles_to_us(params['host'], x) | ||
142 | costs[d] = arr | ||
143 | |||
144 | stats = {} | ||
145 | # print costs | ||
146 | for d,arr in costs.iteritems(): | ||
147 | stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()} | ||
148 | |||
149 | return stats | ||
150 | |||
151 | def parse_args(): | ||
152 | parser = OptionParser("usage: %prog [files...]") | ||
153 | return parser.parse_args() | ||
154 | |||
155 | def safe_split(t, delim): | ||
156 | t = t.split(delim) | ||
157 | if len(t) == 1: | ||
158 | t = tuple([t[0], None]) | ||
159 | return t | ||
160 | |||
161 | def get_level(machine, ncpus): | ||
162 | dist = get_topo(machine).distance(0, int(ncpus)-1) | ||
163 | names = ['L1', 'L2', 'L3', 'mem', 'sys'] | ||
164 | if dist <= len(names): | ||
165 | return names[dist] | ||
166 | else: | ||
167 | raise Exception("Unable to determine level.") | ||
168 | return '' | ||
169 | |||
170 | def main(): | ||
171 | opts, args = parse_args() | ||
172 | |||
173 | files = filter(os.path.exists, args) | ||
174 | |||
175 | regex = fnmatch.translate("pco_*.csv") | ||
176 | csvs = re.compile(regex) | ||
177 | files = filter(csvs.search, files) | ||
178 | |||
179 | results = {} | ||
180 | for f in files: | ||
181 | temp = os.path.basename(f).split(".csv")[0] | ||
182 | tokens = temp.split("_") | ||
183 | |||
184 | params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)} | ||
185 | common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']]) | ||
186 | if common not in results: | ||
187 | results[common] = {} | ||
188 | results[common][int(params['wss'])] = process_cpmd(f, params) | ||
189 | |||
190 | # print results | ||
191 | for common in results: | ||
192 | trends = results[common] | ||
193 | for t in ['max', 'median', 'mean']: | ||
194 | name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t) | ||
195 | f = open(name, 'w') | ||
196 | f.write('WSS,L1,L2,L3,MEM\n') | ||
197 | for w,stats in iter(sorted(trends.iteritems())): | ||
198 | f.write('%d' % w) | ||
199 | for i,data in iter(sorted(stats.iteritems())): | ||
200 | val = data[t] | ||
201 | f.write(',%.6f' % val) | ||
202 | f.write('\n') | ||
203 | |||
204 | if __name__ == '__main__': | ||
205 | main() | ||