Compute costs for reading and writing data.wip-ecrts14-pgm

author: Glenn Elliott <gelliott@cs.unc.edu> 2014-01-31 21:55:03 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2014-01-31 21:55:03 -0500
commit: 21a605fb8fe90f3b2659cb9d93039232bb2bddc4 (patch)
tree: 3d32f1912a5a639f7152adee608e307369408da0
parent: c55e81ec12f80f60846b251aa7bbe0f6c044e7e8 (diff)
4 files changed, 430 insertions, 0 deletions
diff --git a/distill_read_hot.py b/distill_read_hot.py
new file mode 100755
index 0000000..7b994ff
--- /dev/null
+++ b/distill_read_hot.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+import os
+import re
+import fnmatch
+import shutil as sh
+import sys
+import csv
+import numpy as np
+from scipy.stats import scoreatpercentile
+import bisect
+from optparse import OptionParser
+from utils.machines import machines
+import utils.iqr
+class Topology:
+        ncpus, root, leaves, dist_mat = 0, None, None, None
+        levels = ['L1', 'L2', 'L3', 'Mem', 'System']
+        class Node:
+                idx, name, parent, children = 0, 'Unk', None, None
+                def __init__(self, idx, name, parent = None):
+                        self.idx = idx
+                        self.name = name
+                        self.parent = parent
+                        self.children = []
+                def __repr__(self):
+                        return self.name + '_' + str(self.idx)
+        def __build_level_above(self, machine, l, child_nodes):
+                key = 'n' + l
+                if key in machine:
+                        cluster_sz = machine[key]
+                else:
+                        cluster_sz = 1
+                nchildren = len(child_nodes)
+                nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
+                for i in range(len(child_nodes)):
+                        child_nodes[i].parent = nodes[i/cluster_sz]
+                        nodes[i/cluster_sz].children.append(child_nodes[i])
+                return nodes
+        def __find_dist(self, a, b):
+                if a != b:
+                        # pass-through (ex. as CPU is to private L1)
+                        if len(a.parent.children) == 1:
+                                return self.__find_dist(a.parent, b.parent)
+                        else:
+                                return 1 + self.__find_dist(a.parent, b.parent)
+                return 0
+        def __build_dist_matrix(self):
+                dist_mat = np.empty([self.ncpus, self.ncpus], int)
+                for i in range(self.ncpus):
+                        for j in range(i, self.ncpus):
+                                dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
+                return dist_mat
+        def __init__(self, machine):
+                self.ncpus = machine['sockets']*machine['cores_per_socket']
+                # build the Topology bottom up
+                self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
+                nodes = self.leaves
+                for l in self.levels:
+                        nodes = self.__build_level_above(machine, l, nodes)
+                self.root = nodes
+                self.dist_mat = self.__build_dist_matrix()
+        def __repr_level(self, node, stem, buf):
+                spacing = 3
+                buf += stem + node.name + '_' + str(node.idx) + '\n'
+                for c in node.children:
+                        buf = self.__repr_level(c, stem + ' '*spacing, buf)
+                return buf
+        def __repr__(self):
+                buf = self.__repr_level(self.root[0], '', '')
+                return buf
+        def distance(self, a, b):
+                return self.dist_mat[a,b]
+topologies = {}
+def get_topo(host):
+        if host in topologies:
+                return topologies[host]
+        else:
+                topo = Topology(machines[host])
+                topologies[host] = topo
+                return topo
+def non_polluter_filename(csv_file):
+        return re.sub(r"polluters=True", r"polluters=False", csv_file)
+# find the max/avg/std of preemption and migration
+def process_cpmd(csv_file, params):
+        if 'pco' not in params:
+                raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
+        topo = get_topo(params['host'])
+        print 'processing ' + csv_file
+        ifile = open(csv_file, "r")
+        bestcase = open(non_polluter_filename(csv_file), "r")
+        reader = csv.reader(ifile)
+        bc_reader = csv.reader(bestcase)
+        costs = {}
+        SAMPLE = 0
+        WSS = 1
+        DELAY = 2
+        LAST_CPU = 3
+        NEXT_CPU = 4
+        DIST = 5
+        PRODUCE_COLD = 6
+        PRODUCE_HOT = 7
+        CONSUME_COLD = 8
+        CONSUME_HOT = 9
+        for (row, bc_row) in zip(reader, bc_reader):
+                hot = int(row[CONSUME_HOT])
+                distance = int(row[DIST])
+                if distance not in costs:
+                        costs[distance] = []
+                costs[distance].append(hot)
+        for d,c in costs.iteritems():
+                arr = np.array(c, float)
+                arr = np.sort(arr)
+                (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
+                for x in np.nditer(arr, op_flags=['readwrite']):
+                        x[...] = utils.machines.cycles_to_us(params['host'], x)
+                costs[d] = arr
+        stats = {}
+#       print costs
+        for d,arr in costs.iteritems():
+                stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
+        return stats
+def parse_args():
+        parser = OptionParser("usage: %prog [files...]")
+        return parser.parse_args()
+def safe_split(t, delim):
+        t = t.split(delim)
+        if len(t) == 1:
+                t = tuple([t[0], None])
+        return t
+def get_level(machine, ncpus):
+        dist = get_topo(machine).distance(0, int(ncpus)-1)
+        names = ['L1', 'L2', 'L3', 'mem', 'sys']
+        if dist <= len(names):
+                return names[dist]
+        else:
+                raise Exception("Unable to determine level.")
+        return ''
+def main():
+        opts, args = parse_args()
+        files = filter(os.path.exists, args)
+        regex = fnmatch.translate("pco_*.csv")
+        csvs = re.compile(regex)
+        files = filter(csvs.search, files)
+        results = {}
+        for f in files:
+                temp = os.path.basename(f).split(".csv")[0]
+                tokens = temp.split("_")
+                params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
+                common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
+                if common not in results:
+                        results[common] = {}
+                results[common][int(params['wss'])] = process_cpmd(f, params)
+#       print results
+        for common in results:
+                trends = results[common]
+                for t in ['max', 'median', 'mean']:
+                        name = 'dro_hot_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
+                        f = open(name, 'w')
+                        f.write('WSS,L1,L2,L3,MEM\n')
+                        for w,stats in iter(sorted(trends.iteritems())):
+                                f.write('%d' % w)
+                                for i,data in iter(sorted(stats.iteritems())):
+                                        val = data[t]
+                                        f.write(',%.6f' % val)
+                                f.write('\n')
+if __name__ == '__main__':
+        main()
diff --git a/distill_write_cold.py b/distill_write_cold.py
new file mode 100755
index 0000000..28e9eb0
--- /dev/null
+++ b/distill_write_cold.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+import os
+import re
+import fnmatch
+import shutil as sh
+import sys
+import csv
+import numpy as np
+from scipy.stats import scoreatpercentile
+import bisect
+from optparse import OptionParser
+from utils.machines import machines
+import utils.iqr
+class Topology:
+        ncpus, root, leaves, dist_mat = 0, None, None, None
+        levels = ['L1', 'L2', 'L3', 'Mem', 'System']
+        class Node:
+                idx, name, parent, children = 0, 'Unk', None, None
+                def __init__(self, idx, name, parent = None):
+                        self.idx = idx
+                        self.name = name
+                        self.parent = parent
+                        self.children = []
+                def __repr__(self):
+                        return self.name + '_' + str(self.idx)
+        def __build_level_above(self, machine, l, child_nodes):
+                key = 'n' + l
+                if key in machine:
+                        cluster_sz = machine[key]
+                else:
+                        cluster_sz = 1
+                nchildren = len(child_nodes)
+                nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
+                for i in range(len(child_nodes)):
+                        child_nodes[i].parent = nodes[i/cluster_sz]
+                        nodes[i/cluster_sz].children.append(child_nodes[i])
+                return nodes
+        def __find_dist(self, a, b):
+                if a != b:
+                        # pass-through (ex. as CPU is to private L1)
+                        if len(a.parent.children) == 1:
+                                return self.__find_dist(a.parent, b.parent)
+                        else:
+                                return 1 + self.__find_dist(a.parent, b.parent)
+                return 0
+        def __build_dist_matrix(self):
+                dist_mat = np.empty([self.ncpus, self.ncpus], int)
+                for i in range(self.ncpus):
+                        for j in range(i, self.ncpus):
+                                dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
+                return dist_mat
+        def __init__(self, machine):
+                self.ncpus = machine['sockets']*machine['cores_per_socket']
+                # build the Topology bottom up
+                self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
+                nodes = self.leaves
+                for l in self.levels:
+                        nodes = self.__build_level_above(machine, l, nodes)
+                self.root = nodes
+                self.dist_mat = self.__build_dist_matrix()
+        def __repr_level(self, node, stem, buf):
+                spacing = 3
+                buf += stem + node.name + '_' + str(node.idx) + '\n'
+                for c in node.children:
+                        buf = self.__repr_level(c, stem + ' '*spacing, buf)
+                return buf
+        def __repr__(self):
+                buf = self.__repr_level(self.root[0], '', '')
+                return buf
+        def distance(self, a, b):
+                return self.dist_mat[a,b]
+topologies = {}
+def get_topo(host):
+        if host in topologies:
+                return topologies[host]
+        else:
+                topo = Topology(machines[host])
+                topologies[host] = topo
+                return topo
+def non_polluter_filename(csv_file):
+        return re.sub(r"polluters=True", r"polluters=False", csv_file)
+# find the max/avg/std of preemption and migration
+def process_cpmd(csv_file, params):
+        if 'pco' not in params:
+                raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
+        topo = get_topo(params['host'])
+        print 'processing ' + csv_file
+        ifile = open(csv_file, "r")
+        bestcase = open(non_polluter_filename(csv_file), "r")
+        reader = csv.reader(ifile)
+        bc_reader = csv.reader(bestcase)
+        costs = {}
+        SAMPLE = 0
+        WSS = 1
+        DELAY = 2
+        LAST_CPU = 3
+        NEXT_CPU = 4
+        DIST = 5
+        PRODUCE_COLD = 6
+        PRODUCE_HOT = 7
+        CONSUME_COLD = 8
+        CONSUME_HOT = 9
+        for (row, bc_row) in zip(reader, bc_reader):
+                cold = int(row[PRODUCE_COLD])
+                distance = int(row[DIST])
+                if distance not in costs:
+                        costs[distance] = []
+                costs[distance].append(cold)
+        for d,c in costs.iteritems():
+                arr = np.array(c, float)
+                arr = np.sort(arr)
+                (arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
+                for x in np.nditer(arr, op_flags=['readwrite']):
+                        x[...] = utils.machines.cycles_to_us(params['host'], x)
+                costs[d] = arr
+        stats = {}
+#       print costs
+        for d,arr in costs.iteritems():
+                stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
+        return stats
+def parse_args():
+        parser = OptionParser("usage: %prog [files...]")
+        return parser.parse_args()
+def safe_split(t, delim):
+        t = t.split(delim)
+        if len(t) == 1:
+                t = tuple([t[0], None])
+        return t
+def get_level(machine, ncpus):
+        dist = get_topo(machine).distance(0, int(ncpus)-1)
+        names = ['L1', 'L2', 'L3', 'mem', 'sys']
+        if dist <= len(names):
+                return names[dist]
+        else:
+                raise Exception("Unable to determine level.")
+        return ''
+def main():
+        opts, args = parse_args()
+        files = filter(os.path.exists, args)
+        regex = fnmatch.translate("pco_*.csv")
+        csvs = re.compile(regex)
+        files = filter(csvs.search, files)
+        results = {}
+        for f in files:
+                temp = os.path.basename(f).split(".csv")[0]
+                tokens = temp.split("_")
+                params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
+                common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
+                if common not in results:
+                        results[common] = {}
+                results[common][int(params['wss'])] = process_cpmd(f, params)
+#       print results
+        for common in results:
+                trends = results[common]
+                for t in ['max', 'median', 'mean']:
+                        name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
+                        f = open(name, 'w')
+                        f.write('WSS,L1,L2,L3,MEM\n')
+                        for w,stats in iter(sorted(trends.iteritems())):
+                                f.write('%d' % w)
+                                for i,data in iter(sorted(stats.iteritems())):
+                                        val = data[t]
+                                        f.write(',%.6f' % val)
+                                f.write('\n')
+if __name__ == '__main__':
+        main()
diff --git a/gen/edf_generators.py b/gen/edf_generators.py
index eda23e4..cca4d44 100644
--- a/gen/edf_generators.py
+++ b/gen/edf_generators.py
@@ -269,4 +269,15 @@ class CflSplitPgmGenerator(EdfPgmGenerator):
        if exp_params['level'] == 'ALL':
            # kludge: assume global task sets are always schedulable
            is_sched = True
+        if is_sched:
+            # compute the minimum time to produce/consume, so this can be discounted
+            # from the execution time during runtime
+            for ti in ts:
+                consume_amount = ti.wss
+                produce_amount = sum([e.wss for e in ti.node.outEdges])
+                consume_time = overheads.read(consume_amount)
+                produce_time = overheads.write(produce_amount)
+                ti.cost_discount = consume_time + produce_time
        return is_sched, ts
diff --git a/gen/generator.py b/gen/generator.py
index 8b3a189..e49606d 100644
--- a/gen/generator.py
+++ b/gen/generator.py
@@ -152,6 +152,7 @@ class Generator(object):
        rates_arg = []
        etoe_arg = []
        exec_arg = []
+        discount_arg = []
        cluster_arg = []
        clustersz_arg = []
        wss_arg = []
@@ -164,6 +165,7 @@ class Generator(object):
            cluster_arg_t = []
            graph_desc_arg_t = []
            exec_arg_t = []
+            discount_arg_t = []
            rates_arg_t = []
            wss_arg_t = []
            split_arg_t = []
@@ -174,6 +176,9 @@ class Generator(object):
                cluster_arg_t.append('node_' + str(n.id) + ':' + str(n.task.partition))
                cost_str = format(n.task.cost/1000.0, '.4f').rstrip('0').rstrip('.')
                exec_arg_t.append('node_' + str(n.id) + ':' + cost_str)
+                if n.task.cost_discount > 10:
+                    discount_str = format(n.task.cost_discount/1000.0, '.4f').rstrip('0').rstrip('.')
+                    discount_arg_t.append('node_' + str(n.id) + ':' + discount_str)
                if n.task.split != 1:
                    split_arg_t.append('node_' + str(n.id) + ':' + str(n.task.split))
                if n.isSrc == True:
@@ -193,12 +198,14 @@ class Generator(object):
            cluster_arg_t = ','.join(cluster_arg_t)
            graph_desc_arg_t = ','.join(graph_desc_arg_t)
            exec_arg_t = ','.join(exec_arg_t)
+            discount_arg_t = ','.join(discount_arg_t)
            wss_arg_t = ','.join(wss_arg_t)
            split_arg_t = ','.join(split_arg_t)
            rates_arg_t = ','.join(rates_arg_t)
            cluster_arg.append(cluster_arg_t)
            exec_arg.append(exec_arg_t)
+            discount_arg.append(discount_arg_t)
            graph_desc_arg.append(graph_desc_arg_t)
            wss_arg.append(wss_arg_t)
            split_arg.append(split_arg_t)
@@ -223,6 +230,8 @@ class Generator(object):
            pgm_args_t = '';
            pgm_args_t += '--wait --cluster ' + cluster_arg[i] + ' --clusterSize ' + clustersz_arg[i]
            pgm_args_t += ' --graph ' + graph_desc_arg[i] + ' --rates ' + rates_arg[i] + ' --execution ' + exec_arg[i]
+            if len(discount_arg[i]) != 0:
+                pgm_args_t += ' --discount ' + discount_arg[i]
            if len(split_arg[i]) != 0:
                pgm_args_t += ' --split ' + split_arg[i]
            if len(wss_arg[i]) != 0:
author	Glenn Elliott <gelliott@cs.unc.edu>	2014-01-31 21:55:03 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2014-01-31 21:55:03 -0500
commit	21a605fb8fe90f3b2659cb9d93039232bb2bddc4 (patch)
tree	3d32f1912a5a639f7152adee608e307369408da0
parent	c55e81ec12f80f60846b251aa7bbe0f6c044e7e8 (diff)

diff --git a/distill_read_hot.py b/distill_read_hot.py new file mode 100755 index 0000000..7b994ff --- /dev/null +++ b/distill_read_hot.py
@@ -0,0 +1,205 @@
		1	#!/usr/bin/env python
		2
		3	import os
		4	import re
		5	import fnmatch
		6	import shutil as sh
		7	import sys
		8	import csv
		9	import numpy as np
		10	from scipy.stats import scoreatpercentile
		11	import bisect
		12	from optparse import OptionParser
		13
		14	from utils.machines import machines
		15
		16	import utils.iqr
		17
		18	class Topology:
		19	ncpus, root, leaves, dist_mat = 0, None, None, None
		20	levels = ['L1', 'L2', 'L3', 'Mem', 'System']
		21
		22	class Node:
		23	idx, name, parent, children = 0, 'Unk', None, None
		24	def __init__(self, idx, name, parent = None):
		25	self.idx = idx
		26	self.name = name
		27	self.parent = parent
		28	self.children = []
		29	def __repr__(self):
		30	return self.name + '_' + str(self.idx)
		31
		32	def __build_level_above(self, machine, l, child_nodes):
		33	key = 'n' + l
		34	if key in machine:
		35	cluster_sz = machine[key]
		36	else:
		37	cluster_sz = 1
		38	nchildren = len(child_nodes)
		39	nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
		40	for i in range(len(child_nodes)):
		41	child_nodes[i].parent = nodes[i/cluster_sz]
		42	nodes[i/cluster_sz].children.append(child_nodes[i])
		43	return nodes
		44
		45	def __find_dist(self, a, b):
		46	if a != b:
		47	# pass-through (ex. as CPU is to private L1)
		48	if len(a.parent.children) == 1:
		49	return self.__find_dist(a.parent, b.parent)
		50	else:
		51	return 1 + self.__find_dist(a.parent, b.parent)
		52	return 0
		53
		54	def __build_dist_matrix(self):
		55	dist_mat = np.empty([self.ncpus, self.ncpus], int)
		56	for i in range(self.ncpus):
		57	for j in range(i, self.ncpus):
		58	dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
		59	return dist_mat
		60
		61	def __init__(self, machine):
		62	self.ncpus = machine['sockets']*machine['cores_per_socket']
		63
		64	# build the Topology bottom up
		65	self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
		66	nodes = self.leaves
		67	for l in self.levels:
		68	nodes = self.__build_level_above(machine, l, nodes)
		69	self.root = nodes
		70
		71	self.dist_mat = self.__build_dist_matrix()
		72
		73
		74	def __repr_level(self, node, stem, buf):
		75	spacing = 3
		76	buf += stem + node.name + '_' + str(node.idx) + '\n'
		77	for c in node.children:
		78	buf = self.__repr_level(c, stem + ' '*spacing, buf)
		79	return buf
		80
		81	def __repr__(self):
		82	buf = self.__repr_level(self.root[0], '', '')
		83	return buf
		84
		85	def distance(self, a, b):
		86	return self.dist_mat[a,b]
		87
		88
		89	topologies = {}
		90	def get_topo(host):
		91	if host in topologies:
		92	return topologies[host]
		93	else:
		94	topo = Topology(machines[host])
		95	topologies[host] = topo
		96	return topo
		97
		98	def non_polluter_filename(csv_file):
		99	return re.sub(r"polluters=True", r"polluters=False", csv_file)
		100
		101	# find the max/avg/std of preemption and migration
		102	def process_cpmd(csv_file, params):
		103
		104	if 'pco' not in params:
		105	raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
		106
		107	topo = get_topo(params['host'])
		108
		109	print 'processing ' + csv_file
		110
		111	ifile = open(csv_file, "r")
		112	bestcase = open(non_polluter_filename(csv_file), "r")
		113
		114	reader = csv.reader(ifile)
		115	bc_reader = csv.reader(bestcase)
		116	costs = {}
		117
		118	SAMPLE = 0
		119	WSS = 1
		120	DELAY = 2
		121	LAST_CPU = 3
		122	NEXT_CPU = 4
		123	DIST = 5
		124	PRODUCE_COLD = 6
		125	PRODUCE_HOT = 7
		126	CONSUME_COLD = 8
		127	CONSUME_HOT = 9
		128
		129	for (row, bc_row) in zip(reader, bc_reader):
		130	hot = int(row[CONSUME_HOT])
		131	distance = int(row[DIST])
		132	if distance not in costs:
		133	costs[distance] = []
		134	costs[distance].append(hot)
		135
		136	for d,c in costs.iteritems():
		137	arr = np.array(c, float)
		138	arr = np.sort(arr)
		139	(arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
		140	for x in np.nditer(arr, op_flags=['readwrite']):
		141	x[...] = utils.machines.cycles_to_us(params['host'], x)
		142	costs[d] = arr
		143
		144	stats = {}
		145	# print costs
		146	for d,arr in costs.iteritems():
		147	stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
		148
		149	return stats
		150
		151	def parse_args():
		152	parser = OptionParser("usage: %prog [files...]")
		153	return parser.parse_args()
		154
		155	def safe_split(t, delim):
		156	t = t.split(delim)
		157	if len(t) == 1:
		158	t = tuple([t[0], None])
		159	return t
		160
		161	def get_level(machine, ncpus):
		162	dist = get_topo(machine).distance(0, int(ncpus)-1)
		163	names = ['L1', 'L2', 'L3', 'mem', 'sys']
		164	if dist <= len(names):
		165	return names[dist]
		166	else:
		167	raise Exception("Unable to determine level.")
		168	return ''
		169
		170	def main():
		171	opts, args = parse_args()
		172
		173	files = filter(os.path.exists, args)
		174
		175	regex = fnmatch.translate("pco_*.csv")
		176	csvs = re.compile(regex)
		177	files = filter(csvs.search, files)
		178
		179	results = {}
		180	for f in files:
		181	temp = os.path.basename(f).split(".csv")[0]
		182	tokens = temp.split("_")
		183
		184	params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
		185	common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
		186	if common not in results:
		187	results[common] = {}
		188	results[common][int(params['wss'])] = process_cpmd(f, params)
		189
		190	# print results
		191	for common in results:
		192	trends = results[common]
		193	for t in ['max', 'median', 'mean']:
		194	name = 'dro_hot_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
		195	f = open(name, 'w')
		196	f.write('WSS,L1,L2,L3,MEM\n')
		197	for w,stats in iter(sorted(trends.iteritems())):
		198	f.write('%d' % w)
		199	for i,data in iter(sorted(stats.iteritems())):
		200	val = data[t]
		201	f.write(',%.6f' % val)
		202	f.write('\n')
		203
		204	if __name__ == '__main__':
		205	main()


diff --git a/distill_write_cold.py b/distill_write_cold.py new file mode 100755 index 0000000..28e9eb0 --- /dev/null +++ b/distill_write_cold.py
@@ -0,0 +1,205 @@
		1	#!/usr/bin/env python
		2
		3	import os
		4	import re
		5	import fnmatch
		6	import shutil as sh
		7	import sys
		8	import csv
		9	import numpy as np
		10	from scipy.stats import scoreatpercentile
		11	import bisect
		12	from optparse import OptionParser
		13
		14	from utils.machines import machines
		15
		16	import utils.iqr
		17
		18	class Topology:
		19	ncpus, root, leaves, dist_mat = 0, None, None, None
		20	levels = ['L1', 'L2', 'L3', 'Mem', 'System']
		21
		22	class Node:
		23	idx, name, parent, children = 0, 'Unk', None, None
		24	def __init__(self, idx, name, parent = None):
		25	self.idx = idx
		26	self.name = name
		27	self.parent = parent
		28	self.children = []
		29	def __repr__(self):
		30	return self.name + '_' + str(self.idx)
		31
		32	def __build_level_above(self, machine, l, child_nodes):
		33	key = 'n' + l
		34	if key in machine:
		35	cluster_sz = machine[key]
		36	else:
		37	cluster_sz = 1
		38	nchildren = len(child_nodes)
		39	nodes = [self.Node(idx, l) for idx in range(nchildren/cluster_sz)]
		40	for i in range(len(child_nodes)):
		41	child_nodes[i].parent = nodes[i/cluster_sz]
		42	nodes[i/cluster_sz].children.append(child_nodes[i])
		43	return nodes
		44
		45	def __find_dist(self, a, b):
		46	if a != b:
		47	# pass-through (ex. as CPU is to private L1)
		48	if len(a.parent.children) == 1:
		49	return self.__find_dist(a.parent, b.parent)
		50	else:
		51	return 1 + self.__find_dist(a.parent, b.parent)
		52	return 0
		53
		54	def __build_dist_matrix(self):
		55	dist_mat = np.empty([self.ncpus, self.ncpus], int)
		56	for i in range(self.ncpus):
		57	for j in range(i, self.ncpus):
		58	dist_mat[i,j] = dist_mat[j,i] = self.__find_dist(self.leaves[i], self.leaves[j])
		59	return dist_mat
		60
		61	def __init__(self, machine):
		62	self.ncpus = machine['sockets']*machine['cores_per_socket']
		63
		64	# build the Topology bottom up
		65	self.leaves = [self.Node(idx, 'CPU') for idx in range(self.ncpus)]
		66	nodes = self.leaves
		67	for l in self.levels:
		68	nodes = self.__build_level_above(machine, l, nodes)
		69	self.root = nodes
		70
		71	self.dist_mat = self.__build_dist_matrix()
		72
		73
		74	def __repr_level(self, node, stem, buf):
		75	spacing = 3
		76	buf += stem + node.name + '_' + str(node.idx) + '\n'
		77	for c in node.children:
		78	buf = self.__repr_level(c, stem + ' '*spacing, buf)
		79	return buf
		80
		81	def __repr__(self):
		82	buf = self.__repr_level(self.root[0], '', '')
		83	return buf
		84
		85	def distance(self, a, b):
		86	return self.dist_mat[a,b]
		87
		88
		89	topologies = {}
		90	def get_topo(host):
		91	if host in topologies:
		92	return topologies[host]
		93	else:
		94	topo = Topology(machines[host])
		95	topologies[host] = topo
		96	return topo
		97
		98	def non_polluter_filename(csv_file):
		99	return re.sub(r"polluters=True", r"polluters=False", csv_file)
		100
		101	# find the max/avg/std of preemption and migration
		102	def process_cpmd(csv_file, params):
		103
		104	if 'pco' not in params:
		105	raise Exception(('not producer/consumer overhead file: %s)') % csv_file)
		106
		107	topo = get_topo(params['host'])
		108
		109	print 'processing ' + csv_file
		110
		111	ifile = open(csv_file, "r")
		112	bestcase = open(non_polluter_filename(csv_file), "r")
		113
		114	reader = csv.reader(ifile)
		115	bc_reader = csv.reader(bestcase)
		116	costs = {}
		117
		118	SAMPLE = 0
		119	WSS = 1
		120	DELAY = 2
		121	LAST_CPU = 3
		122	NEXT_CPU = 4
		123	DIST = 5
		124	PRODUCE_COLD = 6
		125	PRODUCE_HOT = 7
		126	CONSUME_COLD = 8
		127	CONSUME_HOT = 9
		128
		129	for (row, bc_row) in zip(reader, bc_reader):
		130	cold = int(row[PRODUCE_COLD])
		131	distance = int(row[DIST])
		132	if distance not in costs:
		133	costs[distance] = []
		134	costs[distance].append(cold)
		135
		136	for d,c in costs.iteritems():
		137	arr = np.array(c, float)
		138	arr = np.sort(arr)
		139	(arr, mincut, maxcut) = utils.iqr.apply_iqr(arr, 1.5)
		140	for x in np.nditer(arr, op_flags=['readwrite']):
		141	x[...] = utils.machines.cycles_to_us(params['host'], x)
		142	costs[d] = arr
		143
		144	stats = {}
		145	# print costs
		146	for d,arr in costs.iteritems():
		147	stats[d] = {'max':arr.max(), 'median':np.median(arr), 'mean':arr.mean(), 'std':arr.std()}
		148
		149	return stats
		150
		151	def parse_args():
		152	parser = OptionParser("usage: %prog [files...]")
		153	return parser.parse_args()
		154
		155	def safe_split(t, delim):
		156	t = t.split(delim)
		157	if len(t) == 1:
		158	t = tuple([t[0], None])
		159	return t
		160
		161	def get_level(machine, ncpus):
		162	dist = get_topo(machine).distance(0, int(ncpus)-1)
		163	names = ['L1', 'L2', 'L3', 'mem', 'sys']
		164	if dist <= len(names):
		165	return names[dist]
		166	else:
		167	raise Exception("Unable to determine level.")
		168	return ''
		169
		170	def main():
		171	opts, args = parse_args()
		172
		173	files = filter(os.path.exists, args)
		174
		175	regex = fnmatch.translate("pco_*.csv")
		176	csvs = re.compile(regex)
		177	files = filter(csvs.search, files)
		178
		179	results = {}
		180	for f in files:
		181	temp = os.path.basename(f).split(".csv")[0]
		182	tokens = temp.split("_")
		183
		184	params = {k:v for (k,v) in map(lambda x: safe_split(x, "="), tokens)}
		185	common = tuple([params['host'], params['ncpu'], params['polluters'], params['walk'], params['hpages'], params['upages']])
		186	if common not in results:
		187	results[common] = {}
		188	results[common][int(params['wss'])] = process_cpmd(f, params)
		189
		190	# print results
		191	for common in results:
		192	trends = results[common]
		193	for t in ['max', 'median', 'mean']:
		194	name = 'dwo_cold_host=%s_lvl=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (common[0], get_level(common[0], common[1]), common[2], common[3], common[4], common[5], t)
		195	f = open(name, 'w')
		196	f.write('WSS,L1,L2,L3,MEM\n')
		197	for w,stats in iter(sorted(trends.iteritems())):
		198	f.write('%d' % w)
		199	for i,data in iter(sorted(stats.iteritems())):
		200	val = data[t]
		201	f.write(',%.6f' % val)
		202	f.write('\n')
		203
		204	if __name__ == '__main__':
		205	main()


diff --git a/gen/edf_generators.py b/gen/edf_generators.py index eda23e4..cca4d44 100644 --- a/gen/edf_generators.py +++ b/gen/edf_generators.py
@@ -269,4 +269,15 @@ class CflSplitPgmGenerator(EdfPgmGenerator):
269	if exp_params['level'] == 'ALL':	269	if exp_params['level'] == 'ALL':
270	# kludge: assume global task sets are always schedulable	270	# kludge: assume global task sets are always schedulable
271	is_sched = True	271	is_sched = True
		272
		273	if is_sched:
		274	# compute the minimum time to produce/consume, so this can be discounted
		275	# from the execution time during runtime
		276	for ti in ts:
		277	consume_amount = ti.wss
		278	produce_amount = sum([e.wss for e in ti.node.outEdges])
		279	consume_time = overheads.read(consume_amount)
		280	produce_time = overheads.write(produce_amount)
		281	ti.cost_discount = consume_time + produce_time
		282
272	return is_sched, ts	283	return is_sched, ts


diff --git a/gen/generator.py b/gen/generator.py index 8b3a189..e49606d 100644 --- a/gen/generator.py +++ b/gen/generator.py
@@ -152,6 +152,7 @@ class Generator(object):
152	rates_arg = []	152	rates_arg = []
153	etoe_arg = []	153	etoe_arg = []
154	exec_arg = []	154	exec_arg = []
		155	discount_arg = []
155	cluster_arg = []	156	cluster_arg = []
156	clustersz_arg = []	157	clustersz_arg = []
157	wss_arg = []	158	wss_arg = []
@@ -164,6 +165,7 @@ class Generator(object):
164	cluster_arg_t = []	165	cluster_arg_t = []
165	graph_desc_arg_t = []	166	graph_desc_arg_t = []
166	exec_arg_t = []	167	exec_arg_t = []
		168	discount_arg_t = []
167	rates_arg_t = []	169	rates_arg_t = []
168	wss_arg_t = []	170	wss_arg_t = []
169	split_arg_t = []	171	split_arg_t = []
@@ -174,6 +176,9 @@ class Generator(object):
174	cluster_arg_t.append('node_' + str(n.id) + ':' + str(n.task.partition))	176	cluster_arg_t.append('node_' + str(n.id) + ':' + str(n.task.partition))
175	cost_str = format(n.task.cost/1000.0, '.4f').rstrip('0').rstrip('.')	177	cost_str = format(n.task.cost/1000.0, '.4f').rstrip('0').rstrip('.')
176	exec_arg_t.append('node_' + str(n.id) + ':' + cost_str)	178	exec_arg_t.append('node_' + str(n.id) + ':' + cost_str)
		179	if n.task.cost_discount > 10:
		180	discount_str = format(n.task.cost_discount/1000.0, '.4f').rstrip('0').rstrip('.')
		181	discount_arg_t.append('node_' + str(n.id) + ':' + discount_str)
177	if n.task.split != 1:	182	if n.task.split != 1:
178	split_arg_t.append('node_' + str(n.id) + ':' + str(n.task.split))	183	split_arg_t.append('node_' + str(n.id) + ':' + str(n.task.split))
179	if n.isSrc == True:	184	if n.isSrc == True:
@@ -193,12 +198,14 @@ class Generator(object):
193	cluster_arg_t = ','.join(cluster_arg_t)	198	cluster_arg_t = ','.join(cluster_arg_t)
194	graph_desc_arg_t = ','.join(graph_desc_arg_t)	199	graph_desc_arg_t = ','.join(graph_desc_arg_t)
195	exec_arg_t = ','.join(exec_arg_t)	200	exec_arg_t = ','.join(exec_arg_t)
		201	discount_arg_t = ','.join(discount_arg_t)
196	wss_arg_t = ','.join(wss_arg_t)	202	wss_arg_t = ','.join(wss_arg_t)
197	split_arg_t = ','.join(split_arg_t)	203	split_arg_t = ','.join(split_arg_t)
198	rates_arg_t = ','.join(rates_arg_t)	204	rates_arg_t = ','.join(rates_arg_t)
199		205
200	cluster_arg.append(cluster_arg_t)	206	cluster_arg.append(cluster_arg_t)
201	exec_arg.append(exec_arg_t)	207	exec_arg.append(exec_arg_t)
		208	discount_arg.append(discount_arg_t)
202	graph_desc_arg.append(graph_desc_arg_t)	209	graph_desc_arg.append(graph_desc_arg_t)
203	wss_arg.append(wss_arg_t)	210	wss_arg.append(wss_arg_t)
204	split_arg.append(split_arg_t)	211	split_arg.append(split_arg_t)
@@ -223,6 +230,8 @@ class Generator(object):
223	pgm_args_t = '';	230	pgm_args_t = '';
224	pgm_args_t += '--wait --cluster ' + cluster_arg[i] + ' --clusterSize ' + clustersz_arg[i]	231	pgm_args_t += '--wait --cluster ' + cluster_arg[i] + ' --clusterSize ' + clustersz_arg[i]
225	pgm_args_t += ' --graph ' + graph_desc_arg[i] + ' --rates ' + rates_arg[i] + ' --execution ' + exec_arg[i]	232	pgm_args_t += ' --graph ' + graph_desc_arg[i] + ' --rates ' + rates_arg[i] + ' --execution ' + exec_arg[i]
		233	if len(discount_arg[i]) != 0:
		234	pgm_args_t += ' --discount ' + discount_arg[i]
226	if len(split_arg[i]) != 0:	235	if len(split_arg[i]) != 0:
227	pgm_args_t += ' --split ' + split_arg[i]	236	pgm_args_t += ' --split ' + split_arg[i]
228	if len(wss_arg[i]) != 0:	237	if len(wss_arg[i]) != 0: