Merge branch 'wip-ecrts14-pgm' of ssh://rtsrv.cs.unc.edu/home/litmus/schedcat into wip-ecrts14-pgm

author: Glenn Elliott <gelliott@cs.unc.edu> 2014-01-21 23:06:05 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2014-01-21 23:06:05 -0500
commit: 61d61b72c1bd50365aa5019aac5e104d1438b5fd (patch)
tree: d521da0a3f0f6d5825f153464720b90b3be6b947
parent: 111fcad23c49330a24f6f0e93ac98668e2cc6c15 (diff)
parent: f73adf2e5e3ae46b1616c08f3b4edd6852afed31 (diff)
4 files changed, 728 insertions, 52 deletions
diff --git a/ecrts14/ecrts14.py b/ecrts14/ecrts14.py
index c348091..3cf1389 100755
--- a/ecrts14/ecrts14.py
+++ b/ecrts14/ecrts14.py
@@ -77,6 +77,7 @@ NAMED_HEIGHT_FACTORS = {
    'uni-short'  : [1.0/3.0, 1.0/2.0],
    'uni-medium' : [1.0/2.0, 3.0/4.0],
    'uni-tall'   : [3.0/4.0, 1.0],
+    'pipeline'   : [1.0, 1.0],
 }
 NAMED_FAN = {
@@ -321,6 +322,10 @@ def process_dp(_dp):
    while not complete(__avg_sched, n):
        ts, graphs, subts = create_pgm_task_set(dp)
+        if overheads.consumer is not None:
+            for t in ts:
+                overheads.consumer.place_production(t)
        num_graphs = len(graphs)
        avg_depth = sum([g.depth for g in graphs])/float(num_graphs)
        avg_graph_size = sum([len(g.nodes) for g in graphs])/float(num_graphs)
@@ -335,8 +340,8 @@ def process_dp(_dp):
                this_method = {}
                this_method['sched'] = is_sched
                if is_sched:
-                    this_method['latencies'] = map(graph.bound_graph_response_time, graphs)
+                    this_method['latencies'] = map(graph.bound_graph_latency, graphs)
-                    this_method['ideal_latencies'] = map(graph.compute_ideal_response_time, graphs)
+                    this_method['ideal_latencies'] = map(graph.compute_ideal_graph_latency, graphs)
                this_task_set[method] = this_method
        else:
            # global. no partitioning. all methods equivelent
@@ -345,8 +350,8 @@ def process_dp(_dp):
            this_method = {}
            this_method['sched'] = is_sched
            if is_sched:
-                this_method['latencies'] = map(graph.bound_graph_response_time, graphs)
+                this_method['latencies'] = map(graph.bound_graph_latency, graphs)
-                this_method['ideal_latencies'] = map(graph.compute_ideal_response_time, graphs)
+                this_method['ideal_latencies'] = map(graph.compute_ideal_graph_latency, graphs)
            for method, _, _ in TESTS:
                this_task_set[method] = this_method
@@ -354,7 +359,7 @@ def process_dp(_dp):
        all_sched = True if num_method_sched == n_methods else False
        if all_sched:
-            hrt_ideal_response_times = map(graph.compute_hrt_ideal_response_time, graphs)
+            hrt_ideal_response_times = map(graph.compute_hrt_ideal_graph_latency, graphs)
            # they're all schedulable, so compute graph latencies
            # redo with job splitting
@@ -363,12 +368,12 @@ def process_dp(_dp):
                    # redo test to get the split-based latency
                    dp.job_splitting = True
                    is_sched, processed_ts = test(ts, graphs, subts, dp, overheads)
-                    this_task_set[method]['split_latencies'] = map(graph.bound_graph_response_time, graphs)
+                    this_task_set[method]['split_latencies'] = map(graph.bound_graph_latency, graphs)
            else:
                # global. no partitioning. all methods equivelent
                dp.job_splitting = True
                is_sched, processed_ts = TESTS[0][2](ts, graphs, subts, dp, overheads)
-                split_lat = map(graph.bound_graph_response_time, graphs)
+                split_lat = map(graph.bound_graph_latency, graphs)
                for method, _, _ in TESTS:
                    this_task_set[method]['split_latencies'] = split_lat
diff --git a/ecrts14/graph.py b/ecrts14/graph.py
index 5006033..95e63cb 100755
--- a/ecrts14/graph.py
+++ b/ecrts14/graph.py
@@ -57,7 +57,7 @@ class node:
        return len(self.inEdges) + len(self.outEdges)
    def __repr__(self):
-        graph_id = self.graph.id if self.graph and hasattr(self.graph.id) else -1
+        graph_id = self.graph.id if self.graph and hasattr(self.graph, 'id') else -1
        stem = 'node_%s(gid:%d,l:%d,src:%d,sink:%d,spine:%d)' % (self.id, graph_id, self.privLevel, self.isSrc, self.isSink, self.isSpine)
        pred_str = 'preds{'
        for n in self.pred:
@@ -107,6 +107,8 @@ class graph:
        self.edges = []
        self.nodesAtLevel = {}
        self.depth = 0
+        # assume a sporadic release from sources
+        self.isSporadic = True
    def __repr__(self):
        s = ''
@@ -142,38 +144,102 @@ class graph:
        outs.write('}')
        return outs.getvalue()
-def bound_graph_response_time(g):
+def bound_graph_latency(g):
+    sporadic_latency = 0
+    graph_latency = 0
    if len(g.nodes) == 1:
-        g.response_time = g.nodes[0].task.response_time
+        sporadic_latency = g.nodes[0].task.response_time
    else:
+        # We assume all nodes share a period
+        period = g.nodes[0].task.period
        for n in g.nodes:
-            n.latency = 0.0 if not n.isSrc else n.task.response_time
+            n.latency = 0 if not n.isSrc else max(period, n.task.response_time)
            n.isQueued = False
-    
+        for e in g.edges:
-        queue = g.sources[:]
+            e.longest = False
+        # accumulate latencies down the graph
+        queue = g.sources[:]
        while len(queue) != 0:
            # breadth-first propagation of latencies from srcs to sinks
            v = queue.pop(0)
            v.isQueued = False
            for e in v.outEdges:
-                latency = v.latency + e.s.task.response_time
+                latency = v.latency + max(period, e.s.task.response_time)
                # if we updated the latency, then we need to revisit the node
                if latency > e.s.latency:
+                    # clear out old longest
+                    cur_longest = [e for e in e.s.inEdges if e.longest == True]
+                    if len(cur_longest) > 0:
+                        assert len(cur_longest) == 1
+                        cur_longest[0].longest = False
+                    # set new longest
+                    e.longest = True
                    e.s.latency = latency
                    if e.s.isQueued == False:
                        e.s.isQueued = True
                        queue.append(e.s)
-        g.response_time = max(g.sinks, key=lambda n: n.latency).latency
-    assert g.response_time != 0.0
-    return g.response_time
-def compute_ideal_response_time(g):
+        max_sink = max(g.sinks, key=lambda n: n.latency)
+        longest_path = []
+        longest_path.append(max_sink)
+        queue.append(max_sink)
+        while len(queue) != 0:
+            v = queue.pop(0)
+            if v.inEdges:
+                longest_edge = [e for e in v.inEdges if e.longest is True]
+                assert len(longest_edge) == 1
+                longest_edge = longest_edge[0]
+                # prepend to the path
+                longest_path.insert(0, longest_edge.p)
+                queue.append(longest_edge.p)
+            else:
+                assert v.isSrc
+        assert len(longest_path) <= g.depth
+        depth_latency = len(longest_path) * period
+        sporadic_latency = max_sink.latency - depth_latency
+        if g.isSporadic:
+            # factor the accumlated latency into grapth-depth and
+            # tardiness-based components
+            if max_sink.task.response_time < period:
+                # Optimization:
+                #  * Remove one period from depth_latency
+                #  * Add response time of sink to depth_latency
+                # (We don't modify sporadic_latency since we know the sink's
+                # contribition was 0.)
+                graph_latency = depth_latency - period + max_sink.task.response_time
+            else:
+                graph_latency = depth_latency
+        else:
+            # TODO: Optimize of the sink tasks response time
+            # Rate-based bound:
+            graph_latency = 4 * depth_latency
+    assert sporadic_latency >= 0
+    assert graph_latency >= 0
+#    print 'depth:           \t',g.depth
+#    print 'period:          \t',period
+#    print 'd*p:             \t',g.depth * period
+#    print 'combined:        \t', sporadic_latency + graph_latency
+#    print 'sporadic latency:\t', sporadic_latency
+#    print 'graph latency:   \t', graph_latency
+#    raw_input('press enter')
+    return sporadic_latency + graph_latency
+def compute_ideal_graph_latency(g):
+    graph_latency = 0
    if len(g.nodes) == 1:
-        g.response_time = g.nodes[0].task.cost
+        graph_latency = g.nodes[0].task.cost
    else:
        for n in g.nodes:
-            n.latency = 0.0 if not n.isSrc else n.task.cost
+            n.latency = 0 if not n.isSrc else n.task.cost
            n.isQueued = False
    
        queue = g.sources[:]
@@ -190,21 +256,17 @@ def compute_ideal_response_time(g):
                    if e.s.isQueued == False:
                        e.s.isQueued = True
                        queue.append(e.s)
-        g.response_time = max(g.sinks, key=lambda n: n.latency).latency
+        graph_latency = max(g.sinks, key=lambda n: n.latency).latency
-    if g.response_time == 0:
+    assert graph_latency > 0
-        if len(g.nodes) == 1:
+    return graph_latency
-            print 'single-node graph: node info:',g.nodes[0],' task info: ',g.nodes[0].task
-        else:
-            print 'multi-node graph...'
-    assert g.response_time != 0
-    return g.response_time
-def compute_hrt_ideal_response_time(g):
+def compute_hrt_ideal_graph_latency(g):
+    graph_latency = 0
    if len(g.nodes) == 1:
-        g.response_time = g.nodes[0].task.deadline
+        graph_latency = g.nodes[0].task.deadline
    else:
        for n in g.nodes:
-            n.latency = 0.0 if not n.isSrc else n.task.deadline
+            n.latency = 0 if not n.isSrc else n.task.deadline
            n.isQueued = False
    
        queue = g.sources[:]
@@ -221,9 +283,9 @@ def compute_hrt_ideal_response_time(g):
                    if e.s.isQueued == False:
                        e.s.isQueued = True
                        queue.append(e.s)
-        g.response_time = max(g.sinks, key=lambda n: n.latency).latency
+        graph_latency = max(g.sinks, key=lambda n: n.latency).latency
-    assert g.response_time != 0.0
+    assert graph_latency > 0
-    return g.response_time
+    return graph_latency
 def link(up, down):
    up.succ.append(down)
diff --git a/ecrts14/quick.py b/ecrts14/quick.py
new file mode 100755
index 0000000..560b365
--- /dev/null
+++ b/ecrts14/quick.py
@@ -0,0 +1,583 @@
+#!/usr/bin/env python
+from __future__ import division
+import argparse
+import random
+import sys
+import os
+import math
+import time
+import inspect
+import sqlite3 as lite
+import json
+import copy
+from collections import defaultdict
+from csv import DictWriter
+from itertools import product
+from math import ceil
+from multiprocessing import Pool, cpu_count
+from numpy import arange
+from pprint import pprint
+import traceback
+from schedcat.model.tasks import SporadicTask, TaskSystem
+from schedcat.overheads.model import Overheads, CacheDelay, ConsumerOverheads, ProducerOverheads
+import schedcat.model.resources as resources
+import schedcat.generator.tasks as tasks
+import schedcat.mapping.binpack as bp
+from schedcat.generator.tasksets import NAMED_UTILIZATIONS
+from schedcat.util.storage import storage
+from generator import DesignPointGenerator
+from schedcat.stats.stats import proportion_ci
+import graph
+import tests
+import topology
+from machines import machines
+#import gc
+#import resource
+import traceback
+import database as db
+NAMED_PERIODS_US = {
+    # Named period distributions used in several UNC papers, in microseconds
+    'uni-short'     : tasks.uniform_int( 3*1000,  33*1000),
+    'uni-moderate'  : tasks.uniform_int(10*1000, 100*1000),
+    'uni-long'      : tasks.uniform_int(50*1000, 250*1000),
+}
+#based off of a 24-core system
+# fewer graphs = harder partitioning
+NAMED_NUM_GRAPHS = {
+    'uni-many'   : graph.uniform(24, 24*3),
+    'uni-medium' : graph.uniform(12, 24),
+    'uni-few'    : graph.uniform(1,12),
+    'bimo-many'   : graph.binomial(24, 24*3),
+    'bimo-medium' : graph.binomial(12, 24),
+    'bimo-few'    : graph.binomial(1,12),
+}
+NAMED_SHAPES = {
+    'uniform'  : graph.uniform(),
+    'binomial' : graph.binomial(),
+#    'geometric': graph.geometric(),
+}
+NAMED_HEIGHT_FACTORS = {
+    'uni-short'  : [1.0/3.0, 1.0/2.0],
+    'uni-medium' : [1.0/2.0, 3.0/4.0],
+    'uni-tall'   : [3.0/4.0, 1.0],
+}
+NAMED_FAN = {
+    'none'        : graph.uniform(1,1),
+    'uniform_3'   : graph.uniform(1,3),
+    'uniform_6'   : graph.uniform(1,6),
+    'geometric_3' : graph.geometric(1,3),
+    'geometric_6' : graph.geometric(1,3),
+}
+NAMED_EDGE_HOP = {
+    'none'       : graph.uniform(1,1),
+    'uniform_3'  : graph.uniform(1,3),
+    'uniform_deep' : graph.uniform(1,100),
+    'geometric_3': graph.geometric(1,3),
+}
+NAMED_EDGE_WSS = {
+    'uni-light'  : tasks.uniform_int(1, 64),
+    'uni-medium' : tasks.uniform_int(256, 1024),
+    'uni-heavy'  : tasks.uniform_int(2*1024, 8*1024),
+    'bimo-light'   : tasks.multimodal([(tasks.uniform_int(64,256), 8), (tasks.uniform_int(2*1024, 8*1024), 1)]),
+    'bimo-medium'  : tasks.multimodal([(tasks.uniform_int(64,256), 6), (tasks.uniform_int(2*1024, 8*1024), 3)]),
+    'bimo-heavy'   : tasks.multimodal([(tasks.uniform_int(64,256), 4), (tasks.uniform_int(2*1024, 8*1024), 5)]),
+}
+TESTS = [
+    (0, "CacheAgnostic", tests.test_partition_no_cache),
+    (1, "MaximizeParallelism", tests.test_partition_parallel),
+    (2, "CacheAware", tests.test_partition_cache_aware),
+    (3, "CacheAwareEdges", tests.test_partition_cache_aware_edges),
+    (4, "CacheAwareBFSEdges", tests.test_partition_cache_aware_bfs),
+    (5, "CacheAwareDFSEdges", tests.test_partition_cache_aware_dfs)
+#    (6, "MaximizeParallelismCacheAware", tests.test_partition_parallel2)
+]
+MIN_SAMPLES = 5
+MAX_SAMPLES = 10
+#MIN_SAMPLES = 200
+#MAX_SAMPLES = 500
+#MIN_SAMPLES = 1000
+#MAX_SAMPLES = 10000
+MAX_CI      = 0.05
+CONFIDENCE  = 0.95
+#TOTAL_TESTED = 0
+def create_pgm_task_set(dp):
+    tg = tasks.TaskGenerator(period = NAMED_PERIODS_US[dp.period],
+                             util   = NAMED_UTILIZATIONS[dp.task_util])
+    ts = tg.make_task_set(max_util = dp.sys_util, squeeze = True)
+    
+    # swap the squeeze task into random position
+    shuf = random.randint(0, len(ts)-1)
+    ts[-1], ts[shuf] = ts[shuf], ts[-1]
+    
+    nrTasks = len(ts)
+    nrGraphs = min(dp.num_graphs(), nrTasks)
+    shares = []
+    for i in range(nrGraphs):
+        shares.append(1.0 - random.random())
+    
+    weight = sum(shares)
+    shares = [int((s/weight)*nrTasks + 0.5) for s in shares]
+    # we may have gained/lost a node due to rounding
+    # add/remove node from any share with space
+    todrop = sum(shares) - nrTasks
+    if todrop > 0:
+        for i in range(todrop):
+            candidates = [i for i,y in enumerate(shares) if y > 1]
+            shares[random.choice(candidates)] -= 1
+    elif todrop < 0:
+        for i in range(-1*todrop):
+            shares[random.randint(0,len(shares)-1)] += 1
+    # make sure that no graph has zero nodes
+    # steal from graphs at random
+    nullGraphs = [i for i,y in enumerate(shares) if y == 0]
+    while nullGraphs:
+        stealGraphs = [i for i,y in enumerate(shares) if y > 1]
+        assert stealGraphs
+        shares[random.choice(stealGraphs)] -= 1
+        shares[nullGraphs[-1]] += 1
+        nullGraphs.pop()
+    assert sum(shares) == nrTasks
+    subtasksets = []
+    count = 0
+    for i in range(nrGraphs):
+        subts = ts[count:count+shares[i]]
+        assert len(subts) > 0
+        subtasksets.append(subts)
+        count += shares[i]
+    graphs = []
+    for subts in subtasksets:
+        graphsz = len(subts)
+        min_depth = max(1, int(dp.depth_factor[0] * graphsz))
+        max_depth = max(1, int(dp.depth_factor[1] * graphsz))
+        gg = graph.GraphGenerator(tasks.uniform_int(graphsz, graphsz),
+                                 tasks.uniform_int(min_depth, max_depth),
+                                 dp.node_placement,
+                                 dp.fan_out, dp.fan_in_cap,
+                                 dp.edge_distance,
+                                 dp.nr_source, dp.nr_sink,
+                                 False, False)
+        g = gg.graph()
+                                 
+        assert len(g.nodes) == graphsz
+        
+        # asign working sets to each edge
+        for e in g.edges:
+            e.wss = dp.wss()
+        
+        # bind tasks/nodes, and compute node wss
+        for i in range(graphsz):
+            g.nodes[i].task = subts[i]
+            subts[i].node = g.nodes[i]
+            subts[i].graph = g
+            # task working set is the sum of inputs
+            subts[i].wss = 0 if len(g.nodes[i].inEdges) == 0 else sum([w.wss for w in g.nodes[i].inEdges])
+        # single root
+        root = [r for r in g.nodes if r.isSrc][0]
+        # adjust every task to have same execution rate as the source. must
+        # adjust both period and execution time. utilization remains unchanged.
+        for t in subts:
+            if root.task != t:
+                tutil = t.utilization()
+                t.period = root.task.period
+                t.deadline = t.period
+                t.cost = int(t.period * tutil + 0.5)
+        graphs.append(g)
+    # tag each graph with an id
+    for i,g in enumerate(graphs):
+        g.id = i
+    return ts, graphs, subtasksets
+def complete(results, n):
+    if n < MIN_SAMPLES:
+        return False
+    elif n > MAX_SAMPLES:
+        return True
+    else:
+        for method, _, _ in TESTS:
+            if proportion_ci(results[method], n, CONFIDENCE) > MAX_CI:
+                return False
+        return True
+def update_mean(old_mean, n, new_sample):
+    return (old_mean*n + new_sample)/(n+1)
+def get_ovh_dir():
+    parent = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+    ovh_dir = os.path.join(parent, 'overheads')
+    return ovh_dir
+def get_consumer_overheads(dp, _system):
+    co_file = '%s/consumer/dco_host=%s_lvl=mem_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (dp.host, dp.host, str(dp.polluters), dp.walk, str(dp.huge_pages), str(dp.uncached), dp.ovh_type)
+    co_file = os.path.join(get_ovh_dir(), co_file)
+    co = ConsumerOverheads.from_file(co_file, non_decreasing=False, system=_system)
+    return co
+def get_producer_overheads(dp):
+    po_file = '%s/producer/dpo_host=%s_type=%s.csv' % (dp.host, dp.host, dp.ovh_type)
+    po_file = os.path.join(get_ovh_dir(), po_file)
+    po = ProducerOverheads.from_file(po_file, non_decreasing=False)
+    return po
+def get_cpmds(dp):
+    cpmd_file = '%s/cpmd/dpmo_host=%s_lvl=mem_wcycle=%s_polluters=%s_walk=%s_hpages=%s_upages=%s_type=%s.csv' % (dp.host, dp.host, str(dp.wcycle), str(dp.polluters), dp.walk, str(dp.huge_pages), str(dp.uncached), dp.ovh_type)
+    cpmd_file = os.path.join(get_ovh_dir(), cpmd_file)
+    cpmds = CacheDelay.from_file(cpmd_file, non_decreasing=False)
+    return cpmds
+def get_overheads(dp, system = None):
+    cluster_size = dp.processors/dp.nr_clusters
+    max_dist = dp.system.distance(0, cluster_size-1)
+    lvl = dp.system.levels[max_dist]
+    max_wss = dp.system.max_wss()
+    ovh_file = 'ovh_host=%s_sched=%s_lvl=%s_type=%s.csv' % (dp.host, dp.sched, lvl, dp.ovh_type)
+    ovh_file = os.path.join(get_ovh_dir(), ovh_file)
+    ovh = Overheads.from_file(ovh_file)
+    ovh.shared_cache = dp.system.schedcat_distance(0, max_dist)
+    ovh.cache_affinity_loss = get_cpmds(dp)
+    ovh.cache_affinity_loss.set_max_wss(max_wss)
+    ovh.consumer = get_consumer_overheads(dp, system)
+    ovh.producer = get_producer_overheads(dp)
+    return ovh
+def process_dp(_dp):
+    dp = copy.deepcopy(_dp)
+    # kludge in parameters that pickle doesn't like...
+    dp.system = topology.Topology(machines[dp.host])
+    
+    # convert names to distributions
+    dp.num_graphs = NAMED_NUM_GRAPHS[dp.num_graphs]
+    dp.depth_factor = NAMED_HEIGHT_FACTORS[dp.depth_factor]
+    dp.node_placement = NAMED_SHAPES[dp.node_placement]
+    dp.fan_out = NAMED_FAN[dp.fan_out]
+    dp.edge_distance = NAMED_EDGE_HOP[dp.edge_distance]
+    dp.wss = NAMED_EDGE_WSS[dp.wss]
+    # slam in unchaging values
+    dp.nr_source = graph.uniform(1,1)
+    dp.nr_sink = graph.uniform(1,1)
+    dp.uncached = False
+    dp.huge_pages = False
+    dp.sched = 'edf'
+    dp.walk = 'seq'
+    
+    __avg_sched = defaultdict(float)
+    __avg_ts_size = defaultdict(float)
+    __avg_nr_graphs = defaultdict(float)
+    __avg_graph_size = defaultdict(float)
+    __avg_k = defaultdict(float)
+    __avg_latencies = defaultdict(float)
+    __avg_tard_ratios = defaultdict(float)
+    __avg_hrt_ratios = defaultdict(float)
+    __avg_split_latencies = defaultdict(float)
+    __avg_split_tard_ratios = defaultdict(float)
+    __avg_split_hrt_ratios = defaultdict(float)
+    n_methods = len(TESTS)
+    n = 0
+    n_all_sched = 0
+    overheads = get_overheads(dp, dp.system)
+    while not complete(__avg_sched, n):
+        ts, graphs, subts = create_pgm_task_set(dp)
+        if overheads.consumer is not None:
+            for t in ts:
+                overheads.consumer.place_production(t)
+        num_graphs = len(graphs)
+        avg_depth = sum([g.depth for g in graphs])/float(num_graphs)
+        avg_graph_size = sum([len(g.nodes) for g in graphs])/float(num_graphs)
+        hrt_ideal_response_times = None
+        this_task_set = {}
+        if dp.nr_clusters != 1:
+            for method, _, test in TESTS:
+                dp.job_splitting = False
+                is_sched, processed_ts = test(ts, graphs, subts, dp, overheads)
+                this_method = {}
+                this_method['sched'] = is_sched
+                if is_sched:
+                    this_method['latencies'] = map(graph.bound_graph_latency, graphs)
+                    this_method['ideal_latencies'] = map(graph.compute_ideal_graph_latency, graphs)
+                this_task_set[method] = this_method
+        else:
+            # global. no partitioning. all methods equivelent
+            dp.job_splitting = False
+            is_sched, processed_ts = TESTS[0][2](ts, graphs, subts, dp, overheads)
+            this_method = {}
+            this_method['sched'] = is_sched
+            if is_sched:
+                this_method['latencies'] = map(graph.bound_graph_latency, graphs)
+                this_method['ideal_latencies'] = map(graph.compute_ideal_graph_latency, graphs)
+            for method, _, _ in TESTS:
+                this_task_set[method] = this_method
+        num_method_sched = sum([1 for sched_data in this_task_set.itervalues() if sched_data['sched'] == True])
+        all_sched = True if num_method_sched == n_methods else False
+        if all_sched:
+            hrt_ideal_response_times = map(graph.compute_hrt_ideal_graph_latency, graphs)
+            # they're all schedulable, so compute graph latencies
+            # redo with job splitting
+            if dp.nr_clusters != 1:
+                for method, _, test in TESTS:
+                    # redo test to get the split-based latency
+                    dp.job_splitting = True
+                    is_sched, processed_ts = test(ts, graphs, subts, dp, overheads)
+                    this_task_set[method]['split_latencies'] = map(graph.bound_graph_latency, graphs)
+            else:
+                # global. no partitioning. all methods equivelent
+                dp.job_splitting = True
+                is_sched, processed_ts = TESTS[0][2](ts, graphs, subts, dp, overheads)
+                split_lat = map(graph.bound_graph_latency, graphs)
+                for method, _, _ in TESTS:
+                    this_task_set[method]['split_latencies'] = split_lat
+        # process the results
+        for method, sched_data in this_task_set.iteritems():
+            is_sched = sched_data['sched']
+            __avg_sched[method] = update_mean(__avg_sched[method], n, is_sched)
+            # only include latency data for task sets that were schedulable for all methods
+            if all_sched:
+                avg_tard_ratio = 0.0
+                avg_hrt_tard_ratio = 0.0
+                avg_split_tard_ratio = 0.0
+                avg_split_hrt_tard_ratio = 0.0
+                for latency, split_latency, ideal_latency, hrt_latency in zip(sched_data['latencies'], sched_data['split_latencies'], sched_data['ideal_latencies'], hrt_ideal_response_times):
+                    if ideal_latency == 0.0:
+                        print 'ecrts14.py: bad latency. latency values:',sched_data
+                    avg_tard_ratio       += (latency / ideal_latency)
+                    avg_hrt_tard_ratio   += (latency / hrt_latency)
+                    avg_split_tard_ratio += (split_latency / ideal_latency)
+                    avg_split_hrt_tard_ratio  += (split_latency / hrt_latency)
+                avg_latency = sum(sched_data['latencies'])/float(num_graphs)
+                avg_tard_ratio /= float(num_graphs)
+                avg_hrt_tard_ratio /= float(num_graphs)
+                avg_split_latency = sum(sched_data['split_latencies'])/float(num_graphs)
+                avg_split_tard_ratio /= float(num_graphs)
+                avg_split_hrt_tard_ratio /= float(num_graphs)
+                __avg_latencies[method] = update_mean(__avg_latencies[method], n_all_sched, avg_latency)
+                __avg_tard_ratios[method] = update_mean(__avg_tard_ratios[method], n_all_sched, avg_tard_ratio)
+                __avg_hrt_ratios[method] = update_mean(__avg_hrt_ratios[method], n_all_sched, avg_hrt_tard_ratio)
+                __avg_split_latencies[method] = update_mean(__avg_split_latencies[method], n_all_sched, avg_split_latency)
+                __avg_split_tard_ratios[method] = update_mean(__avg_split_tard_ratios[method], n_all_sched, avg_split_tard_ratio)
+                __avg_split_hrt_ratios[method] = update_mean(__avg_split_hrt_ratios[method], n_all_sched, avg_split_hrt_tard_ratio)
+                # we could share these values across all methods
+                __avg_ts_size[method] = update_mean(__avg_ts_size[method], n_all_sched, len(ts))
+                __avg_nr_graphs[method] = update_mean(__avg_nr_graphs[method], n_all_sched, num_graphs)
+                __avg_graph_size[method] = update_mean(__avg_graph_size[method], n_all_sched, avg_graph_size)
+                __avg_k[method] = update_mean(__avg_k[method], n_all_sched, avg_depth)
+        if all_sched:
+            n_all_sched += 1
+        n += 1
+    if n_all_sched == 0:
+        for method, _, _ in TESTS:
+            __avg_latencies[method] = -1.0
+            __avg_tard_ratios[method] = -1.0
+            __avg_hrt_ratios[method] = -1.0
+            __avg_split_latencies[method] = -1.0
+            __avg_split_tard_ratios[method] = -1.0
+            __avg_split_hrt_ratios[method] = -1.0
+            __avg_ts_size[method] = 0.0
+            __avg_nr_graphs[method] = 0.0
+            __avg_graph_size[method] = 0.0
+            __avg_k[method] = 0.0
+    return __avg_sched, __avg_latencies, __avg_tard_ratios, __avg_hrt_ratios, __avg_split_latencies, __avg_split_tard_ratios, __avg_split_hrt_ratios, __avg_ts_size, __avg_nr_graphs, __avg_graph_size, __avg_k
+def process_design_points(args):
+    chunk_size = 1
+    try:
+        (worker_id, db_name) = args
+        nr_processed = 0
+        __processed_dps = []
+        __results = []
+        while True:
+            dps = db.get_design_points(db_name, nr_dp = chunk_size)
+            if not dps or not len(dps):
+                break
+            for dp in dps:
+                print '%d starting dp' % worker_id
+                (avg_sched, avg_lat, avg_tard_ratio, avg_hrt_tard_ratio, avg_split_lat, avg_split_tard_ratio, avg_split_hrt_tard_ratio, avg_ts_size, avg_nr_graphs, avg_size, avg_k) = process_dp(dp)
+                print '%d finished dp' % worker_id
+                sched_data = {}
+                for m, _, _ in TESTS:
+                    results = storage()
+                    results.avg_sched = avg_sched[m]
+                    results.avg_latency = avg_lat[m]
+                    results.avg_tard_ratio = avg_tard_ratio[m]
+                    results.avg_hrt_tard_ratio = avg_hrt_tard_ratio[m]
+                    results.avg_split_latency = avg_split_lat[m]
+                    results.avg_split_tard_ratio = avg_split_tard_ratio[m]
+                    results.avg_split_hrt_tard_ratio = avg_split_hrt_tard_ratio[m]
+                    results.avg_ts_size = avg_ts_size[m]
+                    results.avg_nr_graphs = avg_nr_graphs[m]
+                    results.avg_graph_size = avg_size[m]
+                    results.avg_k = avg_k[m]
+                    sched_data[m] = results
+                __processed_dps.append(dp)
+                __results.append(sched_data)
+                nr_processed += 1
+        if len(__processed_dps):
+            assert len(__processed_dps) == len(__results)
+            db.store_sched_results(db_name, __processed_dps, __results)
+    except lite.OperationalError:
+        print "CRAP. Database Error!"
+        print traceback.format_exc()
+    return nr_processed
+def valid(dp):
+    return True
+# TODO:
+#XXX 1. Track average graph size.
+#XXX 2. Increase minimum number of task sets
+#XXX 3. Remove 'mean' overhead type
+#XXX 4. Explore more branchy graphs
+#XXX 5. Pick one heur_aggress value (0.75)
+#XXX 6. Add wss parameters.
+#XXX 7. Remove polluters (for now).
+# 8. Why are graphs so shallow?
+#XXX 9. Job splitting
+def main():
+    random.seed(12345)
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', "--pretend", action='store_true',
+                        help = "Only print design point, do not execute")
+    parser.add_argument('-m', "--processors", default=1, type = int,
+                        help="Number of processors to execute on")
+    parser.add_argument('-d', "--database", type = str,
+                        default = "",
+                        help = "Database for holding experiment data")
+    parser.add_argument('--initonly', action='store_true',
+                        help = "Only store design points to database")
+    parser.add_argument('--worker', action='store_true',
+                        help = "Only process design points from database")
+    parser.add_argument('--resume', action='store_true',
+                        help = "Preserve existing database entries")
+    args = parser.parse_args()
+    if args.database == "":
+        print "Database name required."
+        exit(-1)
+    if not args.worker:
+        cpus = 24.0
+        exp = storage()
+        # system parameters
+        exp.processors = [int(cpus)]
+#        exp.nr_clusters = [1, 4, 12, 24]
+        exp.nr_clusters = [24]
+        exp.host = ['ludwig']
+        exp.polluters = [False]
+        exp.ovh_type = ['max']
+        # task parameters
+        step_size = 0.1
+        exp.sys_util =  [float(v) for v in arange(step_size, cpus+step_size, step_size)]
+#        exp.sys_util =  [float(v) for v in arange(10.0, cpus+step_size, step_size)]
+        exp.task_util = ['uni-light']
+        exp.period =    ['uni-short']
+        exp.wcycle =    [0]
+        # graph parameters
+        exp.num_graphs =     ['uni-few']
+#        exp.depth_factor =   ['uni-medium']
+        exp.depth_factor =   ['uni-short']
+        exp.node_placement = ['binomial']
+        exp.fan_out =        ['uniform_3']
+        exp.edge_distance =  ['geometric_3']
+        exp.wss =            ['bimo-medium']
+        exp.fan_in_cap =     [3]
+        exp.heur_aggressiveness = [0.75]
+        design_points = [dp for dp in DesignPointGenerator(exp, is_valid = valid)]
+        nr_dp = len(design_points)
+        if not args.pretend:
+            random.shuffle(design_points)
+            db.create_tables(args.database, dummy_dp = design_points[0], clean = not args.resume)
+            num_stored = db.store_design_points(args.database, design_points, clean = not args.resume)
+        print "Loaded %d of %d design points. (%d already completed)" % (num_stored, nr_dp, nr_dp - num_stored)
+    if args.pretend or args.initonly:
+        exit(0)
+    if args.worker:
+        print "Running as worker process."
+    total_nr_processed = 0
+    if args.processors > 1:
+        pool = Pool(processes = args.processors)
+        args = zip(range(args.processors), [args.database]*args.processors)
+        try:
+            for i,nr_processed in enumerate(pool.map(process_design_points, args)):
+                print 'worker %d: processed %d design points.' % (i,nr_processed)
+                total_nr_processed += nr_processed
+            pool.close()
+        except Exception as e:
+            pool.terminate()
+            print e
+            raise
+    else:
+       total_nr_processed = process_design_points((0, args.database))
+    print 'Processed %d design points!' % total_nr_processed
+if __name__ == '__main__':
+    main()
diff --git a/schedcat/overheads/model.py b/schedcat/overheads/model.py
index 42001a6..a3941d8 100644
--- a/schedcat/overheads/model.py
+++ b/schedcat/overheads/model.py
@@ -148,7 +148,9 @@ class ConsumerOverheads(object):
                working_set[ConsumerOverheads.levels[i+1]] += delta
    def worst_case_placement(self, working_set):
-        ws = copy.deepcopy(working_set)
+        if not self.limits:
+            self.compute_limits()
+        ws = working_set
        self.coalesce(ws)
        placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
        dirty = True
@@ -183,8 +185,40 @@ class ConsumerOverheads(object):
        return placement
    def best_case_placement(self, working_set):
-        placement = copy.deepcopy(working_set)
+        if not self.limits:
+            self.compute_limits()
+        placement = working_set
+        self.coalesce(placement)
+        return placement
+    def place_production(self, ti):
+        if not self.limits:
+            self.compute_limits()
+        produced = sum([e.wss for e in ti.node.outEdges])
+        placement = {'L1':produced, 'L2':0, 'L3':0, 'MEM':0}
        self.coalesce(placement)
+        ti.placed_production = placement
+    def worst_case_place_consumption(self, tp, wss, dist):
+        placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
+        remaining = wss
+        for l in ConsumerOverheads.rlevels[0:(4-dist)]:
+            consumed = min(remaining, tp.placed_production[l])
+            placement[l] = consumed
+            remaining -= consumed
+            if remaining == 0:
+                break
+        if remaining > 0:
+            # Place anything left over to the 'dist' level.
+            # Coalescing takes place at a later stage.
+            placement[ConsumerOverheads.levels[dist]] += remaining
+#        assert sum(placement.itervalues()) == wss
+        return placement
+    def best_case_place_consumption(self, tp, wss, dist):
+        placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
+        placement[ConsumerOverheads.levels[dist]] = wss
+        # Coalescing takes place at a later stage
        return placement
    def consume_cost(self, shared_mem_level, working_set_size):
@@ -211,7 +245,7 @@ class ConsumerOverheads(object):
    def consume_cost_spilled(self, ti, num_cpus):
        if self.system and self.system.machine:
-            consume_amount = [0, 0, 0, 0]
+            init_placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
            ti_hi_cpu = (ti.partition+1)*num_cpus - 1
            # sum up wss from different sources
            for e in ti.node.inEdges:
@@ -221,13 +255,9 @@ class ConsumerOverheads(object):
                # ti and producer share a multi-cpu partition.
                producer_lo_cpu = e.p.task.partition*num_cpus
                dist = self.system.distance(producer_lo_cpu, ti_hi_cpu)
-                consume_amount[dist] += e.wss
+                sources = self.worst_case_place_consumption(e.p.task, e.wss, dist)
-            if not self.limits:
+                for l,wss in sources.iteritems():
-                self.compute_limits()
+                    init_placement[l] += sources[l]
-            init_placement = {'L1':consume_amount[0],
-                              'L2':consume_amount[1],
-                              'L3':consume_amount[2],
-                             'MEM':consume_amount[3]}
            placement = self.worst_case_placement(init_placement)
            # convert to schedcat's format...
            consumer_cost = self.consume_multilevel_cost({0:placement['MEM'],
@@ -240,7 +270,7 @@ class ConsumerOverheads(object):
    def consume_cost_spilled_estimate(self, ti, partition, num_cpus):
        if self.system and self.system.machine:
-            consume_amount = [0, 0, 0, 0]
+            init_placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
            ti_hi_cpu = (partition+1)*num_cpus - 1
            # sum up wss from different sources
            for e in ti.node.inEdges:
@@ -252,13 +282,9 @@ class ConsumerOverheads(object):
                    continue
                producer_lo_cpu = e.p.task.partition*num_cpus
                dist = self.system.distance(producer_lo_cpu, ti_hi_cpu)
-                consume_amount[dist] += e.wss
+                sources = self.worst_case_place_consumption(e.p.task, e.wss, dist)
-            if not self.limits:
+                for l,wss in sources.iteritems():
-                self.compute_limits()
+                    init_placement[l] += sources[l]
-            init_placement = {'L1':consume_amount[0],
-                              'L2':consume_amount[1],
-                              'L3':consume_amount[2],
-                             'MEM':consume_amount[3]}
            placement = self.worst_case_placement(init_placement)
            # convert to schedcat's format...
            consumer_cost = self.consume_multilevel_cost({0:placement['MEM'],
author	Glenn Elliott <gelliott@cs.unc.edu>	2014-01-21 23:06:05 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2014-01-21 23:06:05 -0500
commit	61d61b72c1bd50365aa5019aac5e104d1438b5fd (patch)
tree	d521da0a3f0f6d5825f153464720b90b3be6b947
parent	111fcad23c49330a24f6f0e93ac98668e2cc6c15 (diff)
parent	f73adf2e5e3ae46b1616c08f3b4edd6852afed31 (diff)