aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2014-01-31 21:55:03 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2014-01-31 21:55:03 -0500
commit21a605fb8fe90f3b2659cb9d93039232bb2bddc4 (patch)
tree3d32f1912a5a639f7152adee608e307369408da0
parentc55e81ec12f80f60846b251aa7bbe0f6c044e7e8 (diff)
Compute costs for reading and writing data.wip-ecrts14-pgm
-rwxr-xr-xdistill_read_hot.py205
-rwxr-xr-xdistill_write_cold.py205
-rw-r--r--gen/edf_generators.py11
-rw-r--r--gen/generator.py9
4 files changed, 430 insertions, 0 deletions
diff --git a/distill_read_hot.py b/distill_read_hot.py
new file mode 100755
index 0000000..7b994ff
--- /dev/null
+++ b/distill_read_hot.py
@@ -0,0 +1,205 @@
1#!/usr/bin/env python
2
3import os
4import re
5import fnmatch
6import shutil as sh
7import sys
8import csv
9import numpy as np
10from scipy.stats import scoreatpercentile
11import bisect
12from optparse import OptionParser
13
14from utils.machines import machines
15
16import utils.iqr
17
18class Topology:
19 ncpus, root, leaves, dist_mat = 0, None, None, None
20 levels = ['L1', 'L2', 'L3', 'Mem', 'System']
21
22 class Node:
23 idx, name, parent, children = 0, 'Unk', None, None
24 def __init__(self, idx, name, parent = None):
25 self.idx = idx
26 self.name = name
27 self.parent = parent
28 self.children = []
29 def __repr__(self):
30 return self.name + '_' + str(self.idx)
31
32 def __build_level_above(self, machine, l, child_nodes):
33 key = 'n' + l
34 if key in machine:
35 cluster_sz = machine[key]
36 else:
37 cluster_sz = 1
38 nchildren = len(child_nodes)
39 nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
40 for i in range(len(child_nodes)):
41 child_nodes[i].parent = nodes[i/cluster_sz]
42 nodes[i/cluster_sz].children.append(child_nodes[i])
43 return nodes
44
45 def __find_dist(self, a, b):
46 if a != b:
47 # pass-through (ex. as CPU is to private L1)
48 if len(a.parent.children) == 1:
49 return self.__find_dist(a.parent, b.parent)
50 else:
51 return 1 + self.__find_dist(a.parent, b.parent)
52 return 0
53
54 def __build_dist_matrix(self):
55 dist_mat = np.empty([self.ncpus, self.ncpus], int)
56 for i in range(self.ncpus):
57 for j in range(i, self.ncpus):
58 dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
59 return dist_mat
60
61 def __init__(self, machine):
62 self.ncpus = machine['sockets']*machine['cores_per_socket']
63
64 # build the Topology bottom up
65 self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
66 nodes = self.leaves
67 for l in self.levels:
68 nodes = self.__build_level_above(machine, l, nodes)
69 self.root = nodes
70
71 self.dist_mat = self.__build_dist_matrix()
72
73
74 def __repr_level(self, node, stem, buf):
75 spacing = 3
76 buf += stem + node.name + '_' + str(node.idx) + '\n'
77 for c in node.children:
78 buf = self.__repr_level(c, stem + ' '*spacing, buf)
79 return buf
80
81 def __repr__(self):
82 buf = self.__repr_level(self.root[0], '', '')
83 return buf
84
85 def distance(self, a, b):
86 return self.dist_mat[a,b]
87
88
89topologies = {}
90def get_topo(host):
91 if host in topologies:
92 return topologies[host]
93 else:
94 topo = Topology(machines[host])
95 topologies[host] = topo
96 return topo
97
98def non_polluter_filename(csv_file):
99 return re.sub(r"polluters=True", r"polluters=False", csv_file)
100
101# find the max/avg/std of preemption and migration
102def process_cpmd(csv_file, params):
103
104 if 'pco' not in params:
105 raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
106
107 topo = get_topo(params['host'])
108
109 print 'processing ' + csv_file
110
111 ifile = open(csv_file, "r")
112 bestcase = open(non_polluter_filename(csv_file), "r")
113
114 reader = csv.reader(ifile)
115 bc_reader = csv.reader(bestcase)
116 costs = {}
117
118 SAMPLE = 0
119 WSS = 1
120 DELAY = 2
121 LAST_CPU = 3
122 NEXT_CPU = 4
123 DIST = 5
124 PRODUCE_COLD = 6
125 PRODUCE_HOT = 7
126 CONSUME_COLD = 8
127 CONSUME_HOT = 9
128
129 for (row, bc_row) in zip(reader, bc_reader):
130 hot = int(row[CONSUME_HOT])
131 distance = int(row[DIST])
132 if distance not in costs:
133 costs[distance] = []
134 costs[distance].append(hot)
135
136 for d,c in costs.iteritems():
137 arr = np.array(c, float)
138 arr = np.sort(arr)
139 (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
140 for x in np.nditer(arr, op_flags=['readwrite']):
141 x[...] = utils.machines.cycles_to_us(params['host'], x)
142 costs[d] = arr
143
144 stats = {}
145# print costs
146 for d,arr in costs.iteritems():
147 stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
148
149 return stats
150
151def parse_args():
152 parser = OptionParser("usage: %prog [files...]")
153 return parser.parse_args()
154
155def safe_split(t, delim):
156 t = t.split(delim)
157 if len(t) == 1:
158 t = tuple([t[0], None])
159 return t
160
161def get_level(machine, ncpus):
162 dist = get_topo(machine).distance(0, int(ncpus)-1)
163 names = ['L1', 'L2', 'L3', 'mem', 'sys']
164 if dist <= len(names):
165 return names[dist]
166 else:
167 raise Exception("Unable to determine level.")
168 return ''
169
170def main():
171 opts, args = parse_args()
172
173 files = filter(os.path.exists, args)
174
175 regex = fnmatch.translate("pco_*.csv")
176 csvs = re.compile(regex)
177 files = filter(csvs.search, files)
178
179 results = {}
180 for f in files:
181 temp = os.path.basename(f).split(".csv")[0]
182 tokens = temp.split("_")
183
184 params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
185 common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
186 if common not in results:
187 results[common] = {}
188 results[common][int(params['wss'])] = process_cpmd(f, params)
189
190# print results
191 for common in results:
192 trends = results[common]
193 for t in ['max', 'median', 'mean']:
194 name = 'dro_hot_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
195 f = open(name, 'w')
196 f.write('WSS,L1,L2,L3,MEM\n')
197 for w,stats in iter(sorted(trends.iteritems())):
198 f.write('%d' % w)
199 for i,data in iter(sorted(stats.iteritems())):
200 val = data[t]
201 f.write(',%.6f' % val)
202 f.write('\n')
203
204if __name__ == '__main__':
205 main()
diff --git a/distill_write_cold.py b/distill_write_cold.py
new file mode 100755
index 0000000..28e9eb0
--- /dev/null
+++ b/distill_write_cold.py
@@ -0,0 +1,205 @@
1#!/usr/bin/env python
2
3import os
4import re
5import fnmatch
6import shutil as sh
7import sys
8import csv
9import numpy as np
10from scipy.stats import scoreatpercentile
11import bisect
12from optparse import OptionParser
13
14from utils.machines import machines
15
16import utils.iqr
17
18class Topology:
19 ncpus, root, leaves, dist_mat = 0, None, None, None
20 levels = ['L1', 'L2', 'L3', 'Mem', 'System']
21
22 class Node:
23 idx, name, parent, children = 0, 'Unk', None, None
24 def __init__(self, idx, name, parent = None):
25 self.idx = idx
26 self.name = name
27 self.parent = parent
28 self.children = []
29 def __repr__(self):
30 return self.name + '_' + str(self.idx)
31
32 def __build_level_above(self, machine, l, child_nodes):
33 key = 'n' + l
34 if key in machine:
35 cluster_sz = machine[key]
36 else:
37 cluster_sz = 1
38 nchildren = len(child_nodes)
39 nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
40 for i in range(len(child_nodes)):
41 child_nodes[i].parent = nodes[i/cluster_sz]
42 nodes[i/cluster_sz].children.append(child_nodes[i])
43 return nodes
44
45 def __find_dist(self, a, b):
46 if a != b:
47 # pass-through (ex. as CPU is to private L1)
48 if len(a.parent.children) == 1:
49 return self.__find_dist(a.parent, b.parent)
50 else:
51 return 1 + self.__find_dist(a.parent, b.parent)
52 return 0
53
54 def __build_dist_matrix(self):
55 dist_mat = np.empty([self.ncpus, self.ncpus], int)
56 for i in range(self.ncpus):
57 for j in range(i, self.ncpus):
58 dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
59 return dist_mat
60
61 def __init__(self, machine):
62 self.ncpus = machine['sockets']*machine['cores_per_socket']
63
64 # build the Topology bottom up
65 self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
66 nodes = self.leaves
67 for l in self.levels:
68 nodes = self.__build_level_above(machine, l, nodes)
69 self.root = nodes
70
71 self.dist_mat = self.__build_dist_matrix()
72
73
74 def __repr_level(self, node, stem, buf):
75 spacing = 3
76 buf += stem + node.name + '_' + str(node.idx) + '\n'
77 for c in node.children:
78 buf = self.__repr_level(c, stem + ' '*spacing, buf)
79 return buf
80
81 def __repr__(self):
82 buf = self.__repr_level(self.root[0], '', '')
83 return buf
84
85 def distance(self, a, b):
86 return self.dist_mat[a,b]
87
88
89topologies = {}
90def get_topo(host):
91 if host in topologies:
92 return topologies[host]
93 else:
94 topo = Topology(machines[host])
95 topologies[host] = topo
96 return topo
97
98def non_polluter_filename(csv_file):
99 return re.sub(r"polluters=True", r"polluters=False", csv_file)
100
101# find the max/avg/std of preemption and migration
102def process_cpmd(csv_file, params):
103
104 if 'pco' not in params:
105 raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
106
107 topo = get_topo(params['host'])
108
109 print 'processing ' + csv_file
110
111 ifile = open(csv_file, "r")
112 bestcase = open(non_polluter_filename(csv_file), "r")
113
114 reader = csv.reader(ifile)
115 bc_reader = csv.reader(bestcase)
116 costs = {}
117
118 SAMPLE = 0
119 WSS = 1
120 DELAY = 2
121 LAST_CPU = 3
122 NEXT_CPU = 4
123 DIST = 5
124 PRODUCE_COLD = 6
125 PRODUCE_HOT = 7
126 CONSUME_COLD = 8
127 CONSUME_HOT = 9
128
129 for (row, bc_row) in zip(reader, bc_reader):
130 cold = int(row[PRODUCE_COLD])
131 distance = int(row[DIST])
132 if distance not in costs:
133 costs[distance] = []
134 costs[distance].append(cold)
135
136 for d,c in costs.iteritems():
137 arr = np.array(c, float)
138 arr = np.sort(arr)
139 (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
140 for x in np.nditer(arr, op_flags=['readwrite']):
141 x[...] = utils.machines.cycles_to_us(params['host'], x)
142 costs[d] = arr
143
144 stats = {}
145# print costs
146 for d,arr in costs.iteritems():
147 stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
148
149 return stats
150
151def parse_args():
152 parser = OptionParser("usage: %prog [files...]")
153 return parser.parse_args()
154
155def safe_split(t, delim):
156 t = t.split(delim)
157 if len(t) == 1:
158 t = tuple([t[0], None])
159 return t
160
161def get_level(machine, ncpus):
162 dist = get_topo(machine).distance(0, int(ncpus)-1)
163 names = ['L1', 'L2', 'L3', 'mem', 'sys']
164 if dist <= len(names):
165 return names[dist]
166 else:
167 raise Exception("Unable to determine level.")
168 return ''
169
170def main():
171 opts, args = parse_args()
172
173 files = filter(os.path.exists, args)
174
175 regex = fnmatch.translate("pco_*.csv")
176 csvs = re.compile(regex)
177 files = filter(csvs.search, files)
178
179 results = {}
180 for f in files:
181 temp = os.path.basename(f).split(".csv")[0]
182 tokens = temp.split("_")
183
184 params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
185 common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
186 if common not in results:
187 results[common] = {}
188 results[common][int(params['wss'])] = process_cpmd(f, params)
189
190# print results
191 for common in results:
192 trends = results[common]
193 for t in ['max', 'median', 'mean']:
194 name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
195 f = open(name, 'w')
196 f.write('WSS,L1,L2,L3,MEM\n')
197 for w,stats in iter(sorted(trends.iteritems())):
198 f.write('%d' % w)
199 for i,data in iter(sorted(stats.iteritems())):
200 val = data[t]
201 f.write(',%.6f' % val)
202 f.write('\n')
203
204if __name__ == '__main__':
205 main()
diff --git a/gen/edf_generators.py b/gen/edf_generators.py
index eda23e4..cca4d44 100644
--- a/gen/edf_generators.py
+++ b/gen/edf_generators.py
@@ -269,4 +269,15 @@ class CflSplitPgmGenerator(EdfPgmGenerator):
269 if exp_params['level'] == 'ALL': 269 if exp_params['level'] == 'ALL':
270 # kludge: assume global task sets are always schedulable 270 # kludge: assume global task sets are always schedulable
271 is_sched = True 271 is_sched = True
272
273 if is_sched:
274 # compute the minimum time to produce/consume, so this can be discounted
275 # from the execution time during runtime
276 for ti in ts:
277 consume_amount = ti.wss
278 produce_amount = sum([e.wss for e in ti.node.outEdges])
279 consume_time = overheads.read(consume_amount)
280 produce_time = overheads.write(produce_amount)
281 ti.cost_discount = consume_time + produce_time
282
272 return is_sched, ts 283 return is_sched, ts
diff --git a/gen/generator.py b/gen/generator.py
index 8b3a189..e49606d 100644
--- a/gen/generator.py
+++ b/gen/generator.py
@@ -152,6 +152,7 @@ class Generator(object):
152 rates_arg = [] 152 rates_arg = []
153 etoe_arg = [] 153 etoe_arg = []
154 exec_arg = [] 154 exec_arg = []
155 discount_arg = []
155 cluster_arg = [] 156 cluster_arg = []
156 clustersz_arg = [] 157 clustersz_arg = []
157 wss_arg = [] 158 wss_arg = []
@@ -164,6 +165,7 @@ class Generator(object):
164 cluster_arg_t = [] 165 cluster_arg_t = []
165 graph_desc_arg_t = [] 166 graph_desc_arg_t = []
166 exec_arg_t = [] 167 exec_arg_t = []
168 discount_arg_t = []
167 rates_arg_t = [] 169 rates_arg_t = []
168 wss_arg_t = [] 170 wss_arg_t = []
169 split_arg_t = [] 171 split_arg_t = []
@@ -174,6 +176,9 @@ class Generator(object):
174 cluster_arg_t.append('node_' + str(n.id) + ':' + str(n.task.partition)) 176 cluster_arg_t.append('node_' + str(n.id) + ':' + str(n.task.partition))
175 cost_str = format(n.task.cost/1000.0, '.4f').rstrip('0').rstrip('.') 177 cost_str = format(n.task.cost/1000.0, '.4f').rstrip('0').rstrip('.')
176 exec_arg_t.append('node_' + str(n.id) + ':' + cost_str) 178 exec_arg_t.append('node_' + str(n.id) + ':' + cost_str)
179 if n.task.cost_discount > 10:
180 discount_str = format(n.task.cost_discount/1000.0, '.4f').rstrip('0').rstrip('.')
181 discount_arg_t.append('node_' + str(n.id) + ':' + discount_str)
177 if n.task.split != 1: 182 if n.task.split != 1:
178 split_arg_t.append('node_' + str(n.id) + ':' + str(n.task.split)) 183 split_arg_t.append('node_' + str(n.id) + ':' + str(n.task.split))
179 if n.isSrc == True: 184 if n.isSrc == True:
@@ -193,12 +198,14 @@ class Generator(object):
193 cluster_arg_t = ','.join(cluster_arg_t) 198 cluster_arg_t = ','.join(cluster_arg_t)
194 graph_desc_arg_t = ','.join(graph_desc_arg_t) 199 graph_desc_arg_t = ','.join(graph_desc_arg_t)
195 exec_arg_t = ','.join(exec_arg_t) 200 exec_arg_t = ','.join(exec_arg_t)
201 discount_arg_t = ','.join(discount_arg_t)
196 wss_arg_t = ','.join(wss_arg_t) 202 wss_arg_t = ','.join(wss_arg_t)
197 split_arg_t = ','.join(split_arg_t) 203 split_arg_t = ','.join(split_arg_t)
198 rates_arg_t = ','.join(rates_arg_t) 204 rates_arg_t = ','.join(rates_arg_t)
199 205
200 cluster_arg.append(cluster_arg_t) 206 cluster_arg.append(cluster_arg_t)
201 exec_arg.append(exec_arg_t) 207 exec_arg.append(exec_arg_t)
208 discount_arg.append(discount_arg_t)
202 graph_desc_arg.append(graph_desc_arg_t) 209 graph_desc_arg.append(graph_desc_arg_t)
203 wss_arg.append(wss_arg_t) 210 wss_arg.append(wss_arg_t)
204 split_arg.append(split_arg_t) 211 split_arg.append(split_arg_t)
@@ -223,6 +230,8 @@ class Generator(object):
223 pgm_args_t = ''; 230 pgm_args_t = '';
224 pgm_args_t += '--wait --cluster ' + cluster_arg[i] + ' --clusterSize ' + clustersz_arg[i] 231 pgm_args_t += '--wait --cluster ' + cluster_arg[i] + ' --clusterSize ' + clustersz_arg[i]
225 pgm_args_t += ' --graph ' + graph_desc_arg[i] + ' --rates ' + rates_arg[i] + ' --execution ' + exec_arg[i] 232 pgm_args_t += ' --graph ' + graph_desc_arg[i] + ' --rates ' + rates_arg[i] + ' --execution ' + exec_arg[i]
233 if len(discount_arg[i]) != 0:
234 pgm_args_t += ' --discount ' + discount_arg[i]
226 if len(split_arg[i]) != 0: 235 if len(split_arg[i]) != 0:
227 pgm_args_t += ' --split ' + split_arg[i] 236 pgm_args_t += ' --split ' + split_arg[i]
228 if len(wss_arg[i]) != 0: 237 if len(wss_arg[i]) != 0: