producer/consumer overhead distillation script

author: Glenn Elliott <gelliott@cs.unc.edu> 2013-10-15 20:29:05 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2013-10-15 20:29:05 -0400
commit: de754b0a0a56b10eeb14305358f58a275eaa262c (patch)
tree: eb4c02bffc02a3cc364756c25d6d139e6ce1ece5
parent: 311f3384882847edd83d688a1a2dcc12bdc59d23 (diff)
1 files changed, 209 insertions, 0 deletions
diff --git a/distill_pco.py b/distill_pco.py
new file mode 100755
index 0000000..c83ccde
--- /dev/null
+++ b/distill_pco.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+import os
+import re
+import fnmatch
+import shutil as sh
+import sys
+import csv
+import numpy as np
+from scipy.stats import scoreatpercentile
+import bisect
+from optparse import OptionParser
+from utils.machines import machines
+import utils.iqr
+class Topology:
+        ncpus, root, leaves, dist_mat = 0, None, None, None
+        levels = ['L1', 'L2', 'L3', 'Mem', 'System']
+        class Node:
+                idx, name, parent, children = 0, 'Unk', None, None
+                def __init__(self, idx, name, parent = None):
+                        self.idx = idx
+                        self.name = name
+                        self.parent = parent
+                        self.children = []
+                def __repr__(self):
+                        return self.name + '_' + str(self.idx)
+        def __build_level_above(self, machine, l, child_nodes):
+                key = 'n' + l
+                if key in machine:
+                        cluster_sz = machine[key]
+                else:
+                        cluster_sz = 1
+                nchildren = len(child_nodes)
+                nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
+                for i in range(len(child_nodes)):
+                        child_nodes[i].parent = nodes[i/cluster_sz]
+                        nodes[i/cluster_sz].children.append(child_nodes[i])
+                return nodes
+        def __find_dist(self, a, b):
+                if a != b:
+                        # pass-through (ex. as CPU is to private L1)
+                        if len(a.parent.children) == 1:
+                                return self.__find_dist(a.parent, b.parent)
+                        else:
+                                return 1 + self.__find_dist(a.parent, b.parent)
+                return 0
+        def __build_dist_matrix(self):
+                dist_mat = np.empty([self.ncpus, self.ncpus], int)
+                for i in range(self.ncpus):
+                        for j in range(i, self.ncpus):
+                                dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
+                return dist_mat
+        def __init__(self, machine):
+                self.ncpus = machine['sockets']*machine['cores_per_socket']
+                # build the Topology bottom up
+                self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
+                nodes = self.leaves
+                for l in self.levels:
+                        nodes = self.__build_level_above(machine, l, nodes)
+                self.root = nodes
+                self.dist_mat = self.__build_dist_matrix()
+        def __repr_level(self, node, stem, buf):
+                spacing = 3
+                buf += stem + node.name + '_' + str(node.idx) + '\n'
+                for c in node.children:
+                        buf = self.__repr_level(c, stem + ' '*spacing, buf)
+                return buf
+        def __repr__(self):
+                buf = self.__repr_level(self.root[0], '', '')
+                return buf
+        def distance(self, a, b):
+                return self.dist_mat[a,b]
+topologies = {}
+def get_topo(host):
+        if host in topologies:
+                return topologies[host]
+        else:
+                topo = Topology(machines[host])
+                topologies[host] = topo
+                return topo
+# find the max/avg/std of preemption and migration
+def process_cpmd(csv_file, params):
+        if 'pco' not in params:
+                raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
+        topo = get_topo(params['host'])
+        print 'processing ' + csv_file
+        ifile = open(csv_file, "r")
+        reader = csv.reader(ifile)
+        costs = {}
+        SAMPLE = 0
+        WSS = 1
+        DELAY = 2
+        LAST_CPU = 3
+        NEXT_CPU = 4
+        DIST = 5
+        PRODUCE_COLD = 6
+        PRODUCE_HOT = 7
+        CONSUME_COLD = 8
+        CONSUME_HOT = 9
+        for row in reader:
+                hot = int(row[CONSUME_HOT])
+                after = int(row[CONSUME_COLD])
+                cost = max(after - hot, 0)
+                distance = topo.distance(int(row[NEXT_CPU]), int(row[LAST_CPU]))
+                assert distance == int(row[DIST])
+                if distance not in costs:
+                        costs[distance] = []
+                costs[distance].append(cost)
+        for d,c in costs.iteritems():
+                arr = np.array(c, float)
+                arr = np.sort(arr)
+                (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
+                for x in np.nditer(arr, op_flags=['readwrite']):
+                        x[...] = utils.machines.cycles_to_us(params['host'], x)
+                costs[d] = arr
+        stats = {}
+#       print costs
+        for d,arr in costs.iteritems():
+                stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
+        return stats
+def parse_args():
+        parser = OptionParser("usage: %prog [files...]")
+        return parser.parse_args()
+def safe_split(t, delim):
+        t = t.split(delim)
+        if len(t) == 1:
+                t = tuple([t[0], None])
+        return t
+def get_level(machine, ncpus):
+        dist = get_topo(machine).distance(0, int(ncpus)-1)
+        names = ['L1', 'L2', 'L3', 'mem', 'sys']
+        if dist <= len(names):
+                return names[dist]
+        else:
+                raise Exception("Unable to determine level.")
+        return ''
+def main():
+        opts, args = parse_args()
+        files = filter(os.path.exists, args)
+        regex = fnmatch.translate("pco_*.csv")
+        csvs = re.compile(regex)
+        files = filter(csvs.search, files)
+        results = {}
+        for f in files:
+                temp = os.path.basename(f).split(".csv")[0]
+                tokens = temp.split("_")
+                params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
+                common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
+                if common not in results:
+                        results[common] = {}
+                results[common][int(params['wss'])] = process_cpmd(f, params)
+#       print results
+        for common in results:
+                trends = results[common]
+                name = 'dpco_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s.csv' %
+                        (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5])
+                f = open(name, 'w')
+                for w,stats in iter(sorted(trends.iteritems())):
+                        f.write('%d' % w)
+                        _mean = 0
+                        _max = 0
+                        for i,data in iter(sorted(stats.iteritems())):
+                                dist_mean = data['mean']
+                                _mean = max(_mean, dist_mean)
+                                f.write(', %.6f' % dist_mean)
+                        f.write(', %.6f' % _mean)
+                        for i,data in iter(sorted(stats.iteritems())):
+                                dist_max = data['max']
+                                _max = max(_max, dist_max)
+                                f.write(', %.6f' % dist_max)
+                        f.write(', %.6f\n' % _max)
+if __name__ == '__main__':
+        main()
author	Glenn Elliott <gelliott@cs.unc.edu>	2013-10-15 20:29:05 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2013-10-15 20:29:05 -0400
commit	de754b0a0a56b10eeb14305358f58a275eaa262c (patch)
tree	eb4c02bffc02a3cc364756c25d6d139e6ce1ece5
parent	311f3384882847edd83d688a1a2dcc12bdc59d23 (diff)

diff --git a/distill_pco.py b/distill_pco.py new file mode 100755 index 0000000..c83ccde --- /dev/null +++ b/distill_pco.py
@@ -0,0 +1,209 @@
	1	#!/usr/bin/env python
	2
	3	import os
	4	import re
	5	import fnmatch
	6	import shutil as sh
	7	import sys
	8	import csv
	9	import numpy as np
	10	from scipy.stats import scoreatpercentile
	11	import bisect
	12	from optparse import OptionParser
	13
	14	from utils.machines import machines
	15
	16	import utils.iqr
	17
	18	class Topology:
	19	ncpus, root, leaves, dist_mat = 0, None, None, None
	20	levels = ['L1', 'L2', 'L3', 'Mem', 'System']
	21
	22	class Node:
	23	idx, name, parent, children = 0, 'Unk', None, None
	24	def __init__(self, idx, name, parent = None):
	25	self.idx = idx
	26	self.name = name
	27	self.parent = parent
	28	self.children = []
	29	def __repr__(self):
	30	return self.name + '_' + str(self.idx)
	31
	32	def __build_level_above(self, machine, l, child_nodes):
	33	key = 'n' + l
	34	if key in machine:
	35	cluster_sz = machine[key]
	36	else:
	37	cluster_sz = 1
	38	nchildren = len(child_nodes)
	39	nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
	40	for i in range(len(child_nodes)):
	41	child_nodes[i].parent = nodes[i/cluster_sz]
	42	nodes[i/cluster_sz].children.append(child_nodes[i])
	43	return nodes
	44
	45	def __find_dist(self, a, b):
	46	if a != b:
	47	# pass-through (ex. as CPU is to private L1)
	48	if len(a.parent.children) == 1:
	49	return self.__find_dist(a.parent, b.parent)
	50	else:
	51	return 1 + self.__find_dist(a.parent, b.parent)
	52	return 0
	53
	54	def __build_dist_matrix(self):
	55	dist_mat = np.empty([self.ncpus, self.ncpus], int)
	56	for i in range(self.ncpus):
	57	for j in range(i, self.ncpus):
	58	dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
	59	return dist_mat
	60
	61	def __init__(self, machine):
	62	self.ncpus = machine['sockets']*machine['cores_per_socket']
	63
	64	# build the Topology bottom up
	65	self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
	66	nodes = self.leaves
	67	for l in self.levels:
	68	nodes = self.__build_level_above(machine, l, nodes)
	69	self.root = nodes
	70
	71	self.dist_mat = self.__build_dist_matrix()
	72
	73
	74	def __repr_level(self, node, stem, buf):
	75	spacing = 3
	76	buf += stem + node.name + '_' + str(node.idx) + '\n'
	77	for c in node.children:
	78	buf = self.__repr_level(c, stem + ' '*spacing, buf)
	79	return buf
	80
	81	def __repr__(self):
	82	buf = self.__repr_level(self.root[0], '', '')
	83	return buf
	84
	85	def distance(self, a, b):
	86	return self.dist_mat[a,b]
	87
	88
	89	topologies = {}
	90	def get_topo(host):
	91	if host in topologies:
	92	return topologies[host]
	93	else:
	94	topo = Topology(machines[host])
	95	topologies[host] = topo
	96	return topo
	97
	98	# find the max/avg/std of preemption and migration
	99	def process_cpmd(csv_file, params):
	100
	101	if 'pco' not in params:
	102	raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
	103
	104	topo = get_topo(params['host'])
	105
	106	print 'processing ' + csv_file
	107
	108	ifile = open(csv_file, "r")
	109	reader = csv.reader(ifile)
	110	costs = {}
	111
	112	SAMPLE = 0
	113	WSS = 1
	114	DELAY = 2
	115	LAST_CPU = 3
	116	NEXT_CPU = 4
	117	DIST = 5
	118	PRODUCE_COLD = 6
	119	PRODUCE_HOT = 7
	120	CONSUME_COLD = 8
	121	CONSUME_HOT = 9
	122
	123	for row in reader:
	124	hot = int(row[CONSUME_HOT])
	125	after = int(row[CONSUME_COLD])
	126	cost = max(after - hot, 0)
	127	distance = topo.distance(int(row[NEXT_CPU]), int(row[LAST_CPU]))
	128	assert distance == int(row[DIST])
	129	if distance not in costs:
	130	costs[distance] = []
	131	costs[distance].append(cost)
	132
	133	for d,c in costs.iteritems():
	134	arr = np.array(c, float)
	135	arr = np.sort(arr)
	136	(arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
	137	for x in np.nditer(arr, op_flags=['readwrite']):
	138	x[...] = utils.machines.cycles_to_us(params['host'], x)
	139	costs[d] = arr
	140
	141	stats = {}
	142	# print costs
	143	for d,arr in costs.iteritems():
	144	stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
	145
	146	return stats
	147
	148	def parse_args():
	149	parser = OptionParser("usage: %prog [files...]")
	150	return parser.parse_args()
	151
	152	def safe_split(t, delim):
	153	t = t.split(delim)
	154	if len(t) == 1:
	155	t = tuple([t[0], None])
	156	return t
	157
	158	def get_level(machine, ncpus):
	159	dist = get_topo(machine).distance(0, int(ncpus)-1)
	160	names = ['L1', 'L2', 'L3', 'mem', 'sys']
	161	if dist <= len(names):
	162	return names[dist]
	163	else:
	164	raise Exception("Unable to determine level.")
	165	return ''
	166
	167	def main():
	168	opts, args = parse_args()
	169
	170	files = filter(os.path.exists, args)
	171
	172	regex = fnmatch.translate("pco_*.csv")
	173	csvs = re.compile(regex)
	174	files = filter(csvs.search, files)
	175
	176	results = {}
	177	for f in files:
	178	temp = os.path.basename(f).split(".csv")[0]
	179	tokens = temp.split("_")
	180
	181	params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
	182	common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
	183	if common not in results:
	184	results[common] = {}
	185	results[common][int(params['wss'])] = process_cpmd(f, params)
	186
	187	# print results
	188	for common in results:
	189	trends = results[common]
	190	name = 'dpco_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s.csv' %
	191	(common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5])
	192	f = open(name, 'w')
	193	for w,stats in iter(sorted(trends.iteritems())):
	194	f.write('%d' % w)
	195	_mean = 0
	196	_max = 0
	197	for i,data in iter(sorted(stats.iteritems())):
	198	dist_mean = data['mean']
	199	_mean = max(_mean, dist_mean)
	200	f.write(', %.6f' % dist_mean)
	201	f.write(', %.6f' % _mean)
	202	for i,data in iter(sorted(stats.iteritems())):
	203	dist_max = data['max']
	204	_max = max(_max, dist_max)
	205	f.write(', %.6f' % dist_max)
	206	f.write(', %.6f\n' % _max)
	207
	208	if __name__ == '__main__':
	209	main()