Compute costs for reading and writing data.wip-ecrts14-pgm

author: Glenn Elliott <gelliott@cs.unc.edu> 2014-01-31 21:55:03 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2014-01-31 21:55:03 -0500
commit: 21a605fb8fe90f3b2659cb9d93039232bb2bddc4 (patch)
tree: 3d32f1912a5a639f7152adee608e307369408da0 /distill_write_cold.py
parent: c55e81ec12f80f60846b251aa7bbe0f6c044e7e8 (diff)
1 files changed, 205 insertions, 0 deletions
diff --git a/distill_write_cold.py b/distill_write_cold.py
new file mode 100755
index 0000000..28e9eb0
--- /dev/null
+++ b/distill_write_cold.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+import os
+import re
+import fnmatch
+import shutil as sh
+import sys
+import csv
+import numpy as np
+from scipy.stats import scoreatpercentile
+import bisect
+from optparse import OptionParser
+from utils.machines import machines
+import utils.iqr
+class Topology:
+        ncpus, root, leaves, dist_mat = 0, None, None, None
+        levels = ['L1', 'L2', 'L3', 'Mem', 'System']
+        class Node:
+                idx, name, parent, children = 0, 'Unk', None, None
+                def __init__(self, idx, name, parent = None):
+                        self.idx = idx
+                        self.name = name
+                        self.parent = parent
+                        self.children = []
+                def __repr__(self):
+                        return self.name + '_' + str(self.idx)
+        def __build_level_above(self, machine, l, child_nodes):
+                key = 'n' + l
+                if key in machine:
+                        cluster_sz = machine[key]
+                else:
+                        cluster_sz = 1
+                nchildren = len(child_nodes)
+                nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
+                for i in range(len(child_nodes)):
+                        child_nodes[i].parent = nodes[i/cluster_sz]
+                        nodes[i/cluster_sz].children.append(child_nodes[i])
+                return nodes
+        def __find_dist(self, a, b):
+                if a != b:
+                        # pass-through (ex. as CPU is to private L1)
+                        if len(a.parent.children) == 1:
+                                return self.__find_dist(a.parent, b.parent)
+                        else:
+                                return 1 + self.__find_dist(a.parent, b.parent)
+                return 0
+        def __build_dist_matrix(self):
+                dist_mat = np.empty([self.ncpus, self.ncpus], int)
+                for i in range(self.ncpus):
+                        for j in range(i, self.ncpus):
+                                dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
+                return dist_mat
+        def __init__(self, machine):
+                self.ncpus = machine['sockets']*machine['cores_per_socket']
+                # build the Topology bottom up
+                self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
+                nodes = self.leaves
+                for l in self.levels:
+                        nodes = self.__build_level_above(machine, l, nodes)
+                self.root = nodes
+                self.dist_mat = self.__build_dist_matrix()
+        def __repr_level(self, node, stem, buf):
+                spacing = 3
+                buf += stem + node.name + '_' + str(node.idx) + '\n'
+                for c in node.children:
+                        buf = self.__repr_level(c, stem + ' '*spacing, buf)
+                return buf
+        def __repr__(self):
+                buf = self.__repr_level(self.root[0], '', '')
+                return buf
+        def distance(self, a, b):
+                return self.dist_mat[a,b]
+topologies = {}
+def get_topo(host):
+        if host in topologies:
+                return topologies[host]
+        else:
+                topo = Topology(machines[host])
+                topologies[host] = topo
+                return topo
+def non_polluter_filename(csv_file):
+        return re.sub(r"polluters=True", r"polluters=False", csv_file)
+# find the max/avg/std of preemption and migration
+def process_cpmd(csv_file, params):
+        if 'pco' not in params:
+                raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
+        topo = get_topo(params['host'])
+        print 'processing ' + csv_file
+        ifile = open(csv_file, "r")
+        bestcase = open(non_polluter_filename(csv_file), "r")
+        reader = csv.reader(ifile)
+        bc_reader = csv.reader(bestcase)
+        costs = {}
+        SAMPLE = 0
+        WSS = 1
+        DELAY = 2
+        LAST_CPU = 3
+        NEXT_CPU = 4
+        DIST = 5
+        PRODUCE_COLD = 6
+        PRODUCE_HOT = 7
+        CONSUME_COLD = 8
+        CONSUME_HOT = 9
+        for (row, bc_row) in zip(reader, bc_reader):
+                cold = int(row[PRODUCE_COLD])
+                distance = int(row[DIST])
+                if distance not in costs:
+                        costs[distance] = []
+                costs[distance].append(cold)
+        for d,c in costs.iteritems():
+                arr = np.array(c, float)
+                arr = np.sort(arr)
+                (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
+                for x in np.nditer(arr, op_flags=['readwrite']):
+                        x[...] = utils.machines.cycles_to_us(params['host'], x)
+                costs[d] = arr
+        stats = {}
+#       print costs
+        for d,arr in costs.iteritems():
+                stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
+        return stats
+def parse_args():
+        parser = OptionParser("usage: %prog [files...]")
+        return parser.parse_args()
+def safe_split(t, delim):
+        t = t.split(delim)
+        if len(t) == 1:
+                t = tuple([t[0], None])
+        return t
+def get_level(machine, ncpus):
+        dist = get_topo(machine).distance(0, int(ncpus)-1)
+        names = ['L1', 'L2', 'L3', 'mem', 'sys']
+        if dist <= len(names):
+                return names[dist]
+        else:
+                raise Exception("Unable to determine level.")
+        return ''
+def main():
+        opts, args = parse_args()
+        files = filter(os.path.exists, args)
+        regex = fnmatch.translate("pco_*.csv")
+        csvs = re.compile(regex)
+        files = filter(csvs.search, files)
+        results = {}
+        for f in files:
+                temp = os.path.basename(f).split(".csv")[0]
+                tokens = temp.split("_")
+                params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
+                common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
+                if common not in results:
+                        results[common] = {}
+                results[common][int(params['wss'])] = process_cpmd(f, params)
+#       print results
+        for common in results:
+                trends = results[common]
+                for t in ['max', 'median', 'mean']:
+                        name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
+                        f = open(name, 'w')
+                        f.write('WSS,L1,L2,L3,MEM\n')
+                        for w,stats in iter(sorted(trends.iteritems())):
+                                f.write('%d' % w)
+                                for i,data in iter(sorted(stats.iteritems())):
+                                        val = data[t]
+                                        f.write(',%.6f' % val)
+                                f.write('\n')
+if __name__ == '__main__':
+        main()
author	Glenn Elliott <gelliott@cs.unc.edu>	2014-01-31 21:55:03 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2014-01-31 21:55:03 -0500
commit	21a605fb8fe90f3b2659cb9d93039232bb2bddc4 (patch)
tree	3d32f1912a5a639f7152adee608e307369408da0 /distill_write_cold.py
parent	c55e81ec12f80f60846b251aa7bbe0f6c044e7e8 (diff)

diff --git a/distill_write_cold.py b/distill_write_cold.py new file mode 100755 index 0000000..28e9eb0 --- /dev/null +++ b/distill_write_cold.py
@@ -0,0 +1,205 @@
	1	#!/usr/bin/env python
	2
	3	import os
	4	import re
	5	import fnmatch
	6	import shutil as sh
	7	import sys
	8	import csv
	9	import numpy as np
	10	from scipy.stats import scoreatpercentile
	11	import bisect
	12	from optparse import OptionParser
	13
	14	from utils.machines import machines
	15
	16	import utils.iqr
	17
	18	class Topology:
	19	ncpus, root, leaves, dist_mat = 0, None, None, None
	20	levels = ['L1', 'L2', 'L3', 'Mem', 'System']
	21
	22	class Node:
	23	idx, name, parent, children = 0, 'Unk', None, None
	24	def __init__(self, idx, name, parent = None):
	25	self.idx = idx
	26	self.name = name
	27	self.parent = parent
	28	self.children = []
	29	def __repr__(self):
	30	return self.name + '_' + str(self.idx)
	31
	32	def __build_level_above(self, machine, l, child_nodes):
	33	key = 'n' + l
	34	if key in machine:
	35	cluster_sz = machine[key]
	36	else:
	37	cluster_sz = 1
	38	nchildren = len(child_nodes)
	39	nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
	40	for i in range(len(child_nodes)):
	41	child_nodes[i].parent = nodes[i/cluster_sz]
	42	nodes[i/cluster_sz].children.append(child_nodes[i])
	43	return nodes
	44
	45	def __find_dist(self, a, b):
	46	if a != b:
	47	# pass-through (ex. as CPU is to private L1)
	48	if len(a.parent.children) == 1:
	49	return self.__find_dist(a.parent, b.parent)
	50	else:
	51	return 1 + self.__find_dist(a.parent, b.parent)
	52	return 0
	53
	54	def __build_dist_matrix(self):
	55	dist_mat = np.empty([self.ncpus, self.ncpus], int)
	56	for i in range(self.ncpus):
	57	for j in range(i, self.ncpus):
	58	dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
	59	return dist_mat
	60
	61	def __init__(self, machine):
	62	self.ncpus = machine['sockets']*machine['cores_per_socket']
	63
	64	# build the Topology bottom up
	65	self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
	66	nodes = self.leaves
	67	for l in self.levels:
	68	nodes = self.__build_level_above(machine, l, nodes)
	69	self.root = nodes
	70
	71	self.dist_mat = self.__build_dist_matrix()
	72
	73
	74	def __repr_level(self, node, stem, buf):
	75	spacing = 3
	76	buf += stem + node.name + '_' + str(node.idx) + '\n'
	77	for c in node.children:
	78	buf = self.__repr_level(c, stem + ' '*spacing, buf)
	79	return buf
	80
	81	def __repr__(self):
	82	buf = self.__repr_level(self.root[0], '', '')
	83	return buf
	84
	85	def distance(self, a, b):
	86	return self.dist_mat[a,b]
	87
	88
	89	topologies = {}
	90	def get_topo(host):
	91	if host in topologies:
	92	return topologies[host]
	93	else:
	94	topo = Topology(machines[host])
	95	topologies[host] = topo
	96	return topo
	97
	98	def non_polluter_filename(csv_file):
	99	return re.sub(r"polluters=True", r"polluters=False", csv_file)
	100
	101	# find the max/avg/std of preemption and migration
	102	def process_cpmd(csv_file, params):
	103
	104	if 'pco' not in params:
	105	raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
	106
	107	topo = get_topo(params['host'])
	108
	109	print 'processing ' + csv_file
	110
	111	ifile = open(csv_file, "r")
	112	bestcase = open(non_polluter_filename(csv_file), "r")
	113
	114	reader = csv.reader(ifile)
	115	bc_reader = csv.reader(bestcase)
	116	costs = {}
	117
	118	SAMPLE = 0
	119	WSS = 1
	120	DELAY = 2
	121	LAST_CPU = 3
	122	NEXT_CPU = 4
	123	DIST = 5
	124	PRODUCE_COLD = 6
	125	PRODUCE_HOT = 7
	126	CONSUME_COLD = 8
	127	CONSUME_HOT = 9
	128
	129	for (row, bc_row) in zip(reader, bc_reader):
	130	cold = int(row[PRODUCE_COLD])
	131	distance = int(row[DIST])
	132	if distance not in costs:
	133	costs[distance] = []
	134	costs[distance].append(cold)
	135
	136	for d,c in costs.iteritems():
	137	arr = np.array(c, float)
	138	arr = np.sort(arr)
	139	(arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
	140	for x in np.nditer(arr, op_flags=['readwrite']):
	141	x[...] = utils.machines.cycles_to_us(params['host'], x)
	142	costs[d] = arr
	143
	144	stats = {}
	145	# print costs
	146	for d,arr in costs.iteritems():
	147	stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
	148
	149	return stats
	150
	151	def parse_args():
	152	parser = OptionParser("usage: %prog [files...]")
	153	return parser.parse_args()
	154
	155	def safe_split(t, delim):
	156	t = t.split(delim)
	157	if len(t) == 1:
	158	t = tuple([t[0], None])
	159	return t
	160
	161	def get_level(machine, ncpus):
	162	dist = get_topo(machine).distance(0, int(ncpus)-1)
	163	names = ['L1', 'L2', 'L3', 'mem', 'sys']
	164	if dist <= len(names):
	165	return names[dist]
	166	else:
	167	raise Exception("Unable to determine level.")
	168	return ''
	169
	170	def main():
	171	opts, args = parse_args()
	172
	173	files = filter(os.path.exists, args)
	174
	175	regex = fnmatch.translate("pco_*.csv")
	176	csvs = re.compile(regex)
	177	files = filter(csvs.search, files)
	178
	179	results = {}
	180	for f in files:
	181	temp = os.path.basename(f).split(".csv")[0]
	182	tokens = temp.split("_")
	183
	184	params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
	185	common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
	186	if common not in results:
	187	results[common] = {}
	188	results[common][int(params['wss'])] = process_cpmd(f, params)
	189
	190	# print results
	191	for common in results:
	192	trends = results[common]
	193	for t in ['max', 'median', 'mean']:
	194	name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
	195	f = open(name, 'w')
	196	f.write('WSS,L1,L2,L3,MEM\n')
	197	for w,stats in iter(sorted(trends.iteritems())):
	198	f.write('%d' % w)
	199	for i,data in iter(sorted(stats.iteritems())):
	200	val = data[t]
	201	f.write(',%.6f' % val)
	202	f.write('\n')
	203
	204	if __name__ == '__main__':
	205	main()