rtss14

author: Glenn Elliott <gelliott@cs.unc.edu> 2014-04-29 17:41:31 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2014-04-29 17:41:31 -0400
commit: 30d7c45411c0db77baef949498e9c21ca9c289e4 (patch)
tree: 46777110d8f7200d9f77aa1c4e4467c812c7c3ae
parent: 8d785ca286cbc924c3ab7cb8bde6ea6f8e2c8596 (diff)
4 files changed, 216 insertions, 331 deletions
diff --git a/schedcat/generator/tasks.py b/schedcat/generator/tasks.py
index 422d5f0..804ed25 100644
--- a/schedcat/generator/tasks.py
+++ b/schedcat/generator/tasks.py
@@ -12,6 +12,12 @@ def uniform_int(minval, maxval):
        return random.randint(minval, maxval)
    return _draw
+def const(val):
+    "Create a function that returns a constant value"
+    def _draw():
+        return val
+    return _draw
 def uniform(minval, maxval):
    "Create a function that draws floats uniformly from [minval, maxval]"
    def _draw():
diff --git a/schedcat/model/tasks.py b/schedcat/model/tasks.py
index 91fccab..9c84090 100644
--- a/schedcat/model/tasks.py
+++ b/schedcat/model/tasks.py
@@ -31,6 +31,57 @@ class SporadicTask(object):
    def utilization(self):
        return self.cost / self.period
+    def eff_utilization(self, factor):
+        if not self.uses_gpu:
+            return self.utilization()
+        # cpu cost without gpu management
+        eff_cost = (self.cost - self.ccost)
+        # kernel time scaled to single-thread of cpu time
+        eff_cost += self.gcost * factor
+        return eff_cost/self.period
+    def ee_utilization(self):
+        if not self.uses_gpu:
+            return 0.0
+        return self.gcost / self.period
+    def ce_utilization(self, with_send = True, with_recv = True, with_state = False):
+        if not self.uses_gpu:
+            return 0.0
+        ce_cost = 0.0
+        if with_send:
+            ce_cost += self.scost
+        if with_recv:
+            ce_cost += self.rcost
+        if with_state:
+            ce_cost += self.stcost
+        return ce_cost / self.period
+    def g_utilization(self, with_state = False, with_cpu = True):
+        if not  self.uses_gpu:
+            return 0.0
+        totcost = self.gcost + self.scost + self.rcost
+        if with_state:
+            totcost += self.stcost
+        if with_cpu:
+            totcost += self.ccost
+        return float(totcost)/self.period
+    def bandwidth(self, send_only = False, recv_only = False, state_only = False, all_data = False):
+        if not self.uses_gpu:
+            return 0.0
+        elif send_only:
+            return float(self.sdata)/self.period
+        elif recv_only:
+            return float(self.rdata)/self.period
+        elif state_only:
+            return float(self.stdata)/self.period
+        elif all_data:
+            return float(self.sdata + self.rdata + self.stdata)/self.period
+        else:
+            # assume just input/output
+            return float(self.sdata + self.rdata)/self.period
    def utilization_q(self):
        return Fraction(self.cost, self.period)
@@ -40,6 +91,11 @@ class SporadicTask(object):
    def density_q(self):
        return Fraction(self.cost, min(self.period, self.deadline))
+    def est_response_time(self):
+        if hasattr(self, 'response_time'):
+            return self.response_time
+        return self.period
    def tardiness(self):
        """Return this task's tardiness.
        Note: this function can only be called after some test
@@ -47,6 +103,9 @@ class SporadicTask(object):
        """
        return max(0, self.response_time - self.deadline)
+    def ptardiness(self):
+        return self.tardiness()/self.deadline
    def maxjobs(self, interval_length):
        """Compute the maximum number of jobs that can execute during
        some interval.
@@ -154,6 +213,27 @@ class TaskSystem(list):
        "Assumes t.wss has been initialized for each task."
        return max([t.wss for t in self])
+    def max_tardiness(self):
+        return max([t.tardiness() for t in self])
+    def max_ptardiness(self):
+        return max([t.ptardiness() for t in self])
+    def bandwidth(self, send_only = False, recv_only = False, state_only = False, all_data = False):
+        return sum([t.bandwidth(send_only, recv_only, state_only, all_data) for t in self])
+    def ee_utilization(self):
+        return sum([t.ee_utilization() for t in self])
+    def ce_utilization(self, with_send = True, with_recv = True, with_state = False):
+        return sum([t.ce_utilization(with_send, with_recv, with_state) for t in self])
+    def g_utilization(self, with_state = False, with_cpu = True):
+        return sum([t.g_utilization(with_state, with_cpu) for t in self])
+    def eff_utilization(self, factor):
+        return sum([t.eff_utilization(factor) for t in self])
    def copy(self):
        ts = TaskSystem((copy.deepcopy(t) for t in self))
        return ts
diff --git a/schedcat/overheads/jlfp.py b/schedcat/overheads/jlfp.py
index 8db39df..7d7a29b 100644
--- a/schedcat/overheads/jlfp.py
+++ b/schedcat/overheads/jlfp.py
@@ -28,12 +28,7 @@ def preemption_centric_irq_costs(oheads, dedicated_irq, taskset):
    urel  = 0.0
    if not dedicated_irq:
        rel   = oheads.release(n)
-        for ti in taskset:
+        urel = sum([(rel/ti.period) for ti in taskset])
-            # PGM consumers (early-releasing tasks) don't use release timers,
-            # so skip them.
-            if hasattr(ti, 'node') and len(ti.node.inEdges):
-                continue
-            urel += (rel / ti.period)
    # cost of preemption
    cpre_numerator = tck + ev_lat * utick
@@ -44,6 +39,14 @@ def preemption_centric_irq_costs(oheads, dedicated_irq, taskset):
    return (uscale, cpre_numerator / uscale)
+def count_gpu_interrupts(t, ts):
+    def instances(u, interval):
+        return int(ceil((interval + max(u.response_time, u.period))/u.period))
+    # -1 to exclude the token lock from the gpu request count
+    count = sum([(u.nrequests-1)*instances(u, max(t.response_time, t.period))
+                for u in ts if t is not u and u.uses_gpu and u.nrequests > 2])
+    return count
 def charge_scheduling_overheads(oheads, num_cpus, dedicated_irq, taskset):
    if not oheads:
        return taskset
@@ -54,66 +57,73 @@ def charge_scheduling_overheads(oheads, num_cpus, dedicated_irq, taskset):
    uscale, cpre = preemption_centric_irq_costs(oheads, dedicated_irq, taskset)
    if uscale <= 0:
+        print 'interrupt overload'
        # interrupt overload
        return False
    sched_ovh = oheads.schedule(n)
    ctx_ovh = oheads.ctx_switch(n)
+    ipi_ovh = oheads.ipi_latency(n)
    cache_ovh = oheads.cache_affinity_loss.cpmd_cost(oheads.shared_cache, taskset.max_wss())
    sysin_ovh = oheads.syscall_in(n)
    sysout_ovh = oheads.syscall_out(n)
+    nvth_ovh = oheads.nvtop(n)
+    nvbhrel_ovh = oheads.nvbot_release(n)
+    nvtop = nvth_ovh + nvbhrel_ovh
    sched = 2 * (sched_ovh + ctx_ovh) + cache_ovh
+    # wait, resume, yield: 3 (sched and ctx)
+    # two CPMD: 1 at resume and 1 at preemption of lower-priority task
+    # one ipi for wake-up latency
+    locking = 3 * (sched_ovh + ctx_ovh) + 2 * cache_ovh
+    locking_unscaled = ipi_ovh
+    # and extra overheads for gpusync engine locks
+    # sched/ctx for klmirqd, aux task, and task itself
+    # 3 ipi for each waking task
+    # Don't charge CPMDs because these are really small and fast
+    # (It would be like charging a CPMD because of a timer tick.)
+    engine_locking = 6 * (sched_ovh + ctx_ovh)
+    engine_locking_unscaled = 3 * ipi_ovh
    irq_latency = oheads.release_latency(n)
    if dedicated_irq:
-        unscaled = 2 * cpre + oheads.ipi_latency(n) + oheads.release(n)
+        unscaled = 2 * cpre + ipi_ovh + oheads.release(n)
    elif num_cpus > 1:
-        unscaled = 2 * cpre + oheads.ipi_latency(n)
+        unscaled = 2 * cpre + ipi_ovh
    else:
        unscaled = 2 * cpre
-    # Charge PGM costs
-    pgm_costs = defaultdict(float)
-    max_producer_delay = 0.0
-    for ti in taskset:
-        # Charge consumers costs
-        if hasattr(ti, 'node') and len(ti.node.inEdges):
-            # Cost of consuming from remote producer after some delay, rather
-            # than consuming from a local producer with no delay.
-            if oheads.consumer:
-                consumer_cache_cost = oheads.consumer.consume_cost_spilled(ti, num_cpus)
-                pgm_costs[ti] += consumer_cache_cost
-            # We call into the scheduler to release the next job
-            early_release_cost = oheads.schedule(n)
-            pgm_costs[ti] += early_release_cost
-        # Charge the producer costs
-        if hasattr(ti, 'node') and len(ti.node.outEdges):
-            # one syscall in/out for each consumer (wakeup + sched)
-            out_degree = len(ti.node.outEdges)
-            producer_cost = out_degree * (sysin_ovh + sysout_ovh + sched_ovh)
-            # Compute how long it takes to check token constraints of consumers.
-            # This is already in ti's execution time, but we need to determine
-            # how long ti will be boosted.
-            scan_cost = 0.0
-            if oheads.producer:
-                for e in ti.node.outEdges:
-                    consumer_in_degree = len(e.s.task.node.inEdges)
-                    scan_cost += oheads.producer(consumer_in_degree)
-            max_producer_delay = max(max_producer_delay, producer_cost + scan_cost)
-            # charge for sched_yield() to exit boosted state
-            boost_cost = sysin_ovh + sysout_ovh + sched_ovh + ctx_ovh
-            #
-            pgm_costs[ti] += (producer_cost + boost_cost)
    for ti in taskset:
-        # PGM: A producer's boosted section may delay our release
+        latency = irq_latency
-        latency = irq_latency + max_producer_delay
-        
        ti.period   -= latency
        ti.deadline -= latency
-        ti.cost      = ((ti.cost + sched + pgm_costs[ti]) / uscale) + unscaled
+        ti_sched = sched
+        ti_unscaled = unscaled
+        # Charge gpu interrupt overheads
+        ti_sched += nvtop * count_gpu_interrupts(ti, taskset)
+        # Charge overheads for GPUSync locking protocol use
+        if ti.nrequests > 0:
+            ti_sched += ti.nrequests * locking
+            ti_unscaled += ti.nrequests * locking_unscaled
+            # There are extra overheads for the engine locks...
+            if ti.nrequests > 2:
+                # -1 to exclude the token lock
+                ti_sched += (ti.nrequests - 1) * engine_locking
+                ti_unscaled += (ti.nrequests - 1) * engine_locking_unscaled
+        # Charge overheads for blocking.
+        # Includes donation costs, GPU exec/copy time, lock blocking, etc.
+        if hasattr(ti, 'gpusync_ovh'):
+            ti_sched += ti.gpusync_ovh
+        ti.cost      = ((ti.cost + ti_sched) / uscale) + ti_unscaled
        if ti.density() > 1:
            return False
    return taskset
@@ -127,6 +137,6 @@ def quantize_params(taskset):
        t.period   = int(floor(t.period))
        t.deadline = int(floor(t.deadline))
        if not min(t.period, t.deadline) or t.density() > 1:
-            return False
+           return False
    return taskset
diff --git a/schedcat/overheads/model.py b/schedcat/overheads/model.py
index ea82862..f28ff1c 100644
--- a/schedcat/overheads/model.py
+++ b/schedcat/overheads/model.py
@@ -1,6 +1,7 @@
 from __future__ import division
 import copy
+import itertools
 from schedcat.util.csv import load_columns as load_column_csv
 from schedcat.util.math import monotonic_pwlin, piece_wise_linear, const
@@ -27,6 +28,11 @@ class Overheads(object):
        ('READ-UNLOCK',     'read_unlock'),
        ('SYSCALL-IN',      'syscall_in'),
        ('SYSCALL-OUT',     'syscall_out'),
+        # GPU-related overheads
+        ('NV-TOP',            'nvtop'),
+        ('NV-BOTTOM',         'nvbot'),
+        ('NV-BOTTOM-RELEASE', 'nvbot_release'),
        ]
    def zero_overheads(self):
@@ -61,44 +67,47 @@ class Overheads(object):
        o.load_approximations(fname, non_decreasing)
        return o
 class CacheDelay(object):
    """Cache-related Preemption and Migration Delay (CPMD)
    Overheads are expressed as a piece-wise linear function of working set size.
    """
-    MEM, L1, L2, L3 = 0, 1, 2, 3
+    MAPPING  = {0:'L1', 1:'L2', 2:'L3', 3:'Mem', 4:'Numa'}
-    SCHEDCAT_MAPPING = list(enumerate(["MEM", "L1", "L2", "L3"]))
+    RMAPPING = {'L1':0, 'L2':1, 'L3':2, 'Mem':3, 'Numa':4}
-    def __init__(self, l1=0, l2=0, l3=0, mem=0):
+    def __init__(self, l1=0, l2=0, l3=0, mem=0, numa=0):
-        self.mem_hierarchy  = [const(mem), const(l1), const(l2), const(l3)]
+        self.mem_hierarchy  = [const(l1), const(l2), const(l3), const(mem), const(numa)]
-        for (i, name) in CacheDelay.SCHEDCAT_MAPPING:
+        for i, name in CacheDelay.MAPPING.iteritems():
            self.__dict__[name] = self.mem_hierarchy[i]
    def cpmd_cost(self, shared_mem_level, working_set_size):
-        wss = min(working_set_size, self.cache_size)
+#        wss = min(working_set_size, self.cache_size)
-        return self.mem_hierarchy[shared_mem_level](wss)
+        if type(shared_mem_level) is str:
+            shared_mem_level = CacheDelay.get_idx_for_name(shared_mem_level)
+        return self.mem_hierarchy[shared_mem_level](working_set_size)
    def set_cpmd_cost(self, shared_mem_level, approximation):
+        if type(shared_mem_level) is str:
+            shared_mem_level = CacheDelay.get_idx_for_name(shared_mem_level)
        self.mem_hierarchy[shared_mem_level] = approximation
-        name = CacheDelay.SCHEDCAT_MAPPING[shared_mem_level][1]
+        name = CacheDelay.MAPPING[shared_mem_level][1]
        self.__dict__[name] = self.mem_hierarchy[shared_mem_level]
-    def set_max_wss(self, cache_size):
+#    def set_max_wss(self, cache_size):
-        self.cache_size = cache_size
+#        self.cache_size = cache_size
    def max_cost(self, working_set_size):
-        wss = min(working_set_size, self.cache_size)
+#        wss = min(working_set_size, self.cache_size)
-        return max([f(wss) for f in self.mem_hierarchy])
+        return max([f(working_set_size) for f in self.mem_hierarchy])
    def __call__(self, wss):
        return self.max_cost(wss)
    @staticmethod
    def get_idx_for_name(key):
-        for (i, name) in CacheDelay.SCHEDCAT_MAPPING:
+        return CacheDelay.RMAPPING[key]
-            if name == key:
-                return i
-        assert False # bad key
    @staticmethod
    def from_file(fname, non_decreasing=True):
@@ -108,7 +117,7 @@ class CacheDelay(object):
        o = CacheDelay()
-        for idx, name in CacheDelay.SCHEDCAT_MAPPING:
+        for idx, name in CacheDelay.MAPPING.iteritems():
            if name in data.by_name:
                points = zip(data.by_name['WSS'], data.by_name[name])
                if non_decreasing:
@@ -118,291 +127,71 @@ class CacheDelay(object):
                o.__dict__[name] = o.mem_hierarchy[idx]
        return o
-class RawOverheads(object):
-    MEM, L1, L2, L3 = 0, 1, 2, 3
-    SCHEDCAT_MAPPING = list(enumerate(["MEM", "L1", "L2", "L3"]))
-    def __init__(self, l1=0, l2=0, l3=0, mem=0):
+def btokb(byts):
-        self.mem_hierarchy  = [const(mem), const(l1), const(l2), const(l3)]
+    return byts/1024.0
-        for (i, name) in RawOverheads.SCHEDCAT_MAPPING:
-            self.__dict__[name] = self.mem_hierarchy[i]
-    def cost(self, shared_mem_level, working_set_size):
+class XmitOverheads(object):
-        wss = min(working_set_size, self.cache_size)
-        return self.mem_hierarchy[shared_mem_level](wss)
-    def __call__(self, wss):
+    D2DN = 0
-        # presume local: L1
+    D2DF = 1
-        return self.mem_hierarchy[1](wss)
+    D2H  = 2
+    H2D  = 3
-    @staticmethod
+    MAPPING  = {D2DN:'D2DN', D2DF:'D2DF', D2H:'D2H', H2D:'H2D'}
-    def get_idx_for_name(key):
+    RMAPPING = {'D2DN':D2DN, 'D2DF':D2DF, 'D2H':D2H, 'H2D':H2D}
-        for (i, name) in RawOverheads.SCHEDCAT_MAPPING:
-            if name == key:
-                return i
-        assert False # bad key
-    @staticmethod
+    def __init__(self, d2dn=0, d2df=0, d2h=0, h2d=0):
-    def from_file(fname, non_decreasing=True):
+        self.xmit = [const(d2dn), const(d2df), const(d2h), const(h2d)]
-        data = load_column_csv(fname, convert=float)
+        for i, name in XmitOverheads.MAPPING.iteritems():
-        if not 'WSS' in data.by_name:
+            self.__dict__[name] = self.xmit[i]
-            raise IOError, 'WSS column is missing'
-        o = RawOverheads()
-        for idx, name in RawOverheads.SCHEDCAT_MAPPING:
+    def xmit_cost(self, xmit_type, datasz):
-            if name in data.by_name:
+        if type(xmit_type) is str:
-                points = zip(data.by_name['WSS'], data.by_name[name])
+            xmit_type = XmitOverheads.get_idx_for_name(xmit_type)
-                if non_decreasing:
+        if datasz == 0:
-                    o.mem_hierarchy[idx] = monotonic_pwlin(points)
+            return 0.0
-                else:
+        return self.xmit[xmit_type](btokb(datasz))
-                    o.mem_hierarchy[idx] = piece_wise_linear(points)
-                o.__dict__[name] = o.mem_hierarchy[idx]
-        return o
-class ConsumerOverheads(object):
+    def compute_xmit_cost(self, xmit_type, datasz, chunk_size):
-    """Consumption cost overheads
+        nchunks = int(datasz/chunk_size)
-        Overheads are expressed as a piece-wise linear function of working set size.
+        extra = datasz - nchunks*chunk_size
-        """
+        cost = self.xmit_cost(xmit_type, chunk_size)*nchunks
-    
+        if extra > 0:
-    MEM, L1, L2, L3 = 0, 1, 2, 3
+            cost += self.xmit_cost(xmit_type, extra)
-    SCHEDCAT_MAPPING = list(enumerate(["MEM", "L1", "L2", "L3"]))
-    levels = ['L1', 'L2', 'L3', 'MEM']
-    rlevels = list(reversed(levels))
-    
-    def __init__(self, l1=0, l2=0, l3=0, mem=0, system=None):
-        self.mem_hierarchy  = [const(mem), const(l1), const(l2), const(l3)]
-        for (i, name) in ConsumerOverheads.SCHEDCAT_MAPPING:
-            self.__dict__[name] = self.mem_hierarchy[i]
-        self.system = system
-        if self.system:
-            self.compute_limits()
-        else:
-            self.limits = None
-    
-    def coalesce(self, working_set):
-        for i,l in enumerate(ConsumerOverheads.levels):
-            if working_set[l] > self.limits[l]:
-                delta = working_set[l] - self.limits[l]
-                working_set[l] = self.limits[l]
-                working_set[ConsumerOverheads.levels[i+1]] += delta
-    def worst_case_placement(self, working_set):
-        if not self.limits:
-            self.compute_limits()
-        ws = working_set
-        self.coalesce(ws)
-        placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
-        dirty = True
-        while dirty:
-            dirty = False
-            for i,l in enumerate(ConsumerOverheads.rlevels):
-                footprint = ws[l]
-                if footprint == 0:
-                    # no data accessed at this level
-                    continue
-                # place the data
-                placement[l] += ws[l]
-                # consume this data
-                ws[l] = 0
-                # evict data from mem hierarchy, top down
-                for j,v in enumerate(ConsumerOverheads.levels[0:-1]):
-                    evicted = min(footprint, ws[v])
-                    if evicted:
-                        # spill the evicted amount...
-                        ws[v] -= evicted
-                        # ...down to the next level
-                        ws[ConsumerOverheads.levels[j+1]] += evicted
-                        # coalesce the spill down the mem hierarchy
-                        self.coalesce(ws)
-                        # recored that we must recurse
-                        dirty = True
-                    # keep going if the footprint was large enough to
-                    # spill into the next cache
-                    footprint -= self.limits[v]
-                    if footprint <= 0:
-                        break
-        return placement
-    def best_case_placement(self, working_set):
-        if not self.limits:
-            self.compute_limits()
-        placement = working_set
-        self.coalesce(placement)
-        return placement
-    def place_production(self, ti):
-        if not self.limits:
-            self.compute_limits()
-        produced = sum([e.wss for e in ti.node.outEdges])
-        placement = {'L1':produced, 'L2':0, 'L3':0, 'MEM':0}
-        self.coalesce(placement)
-        ti.placed_production = placement
-    def worst_case_place_consumption(self, tp, wss, dist):
-        placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
-        remaining = wss
-        for l in ConsumerOverheads.rlevels[0:(4-dist)]:
-            consumed = min(remaining, tp.placed_production[l])
-            placement[l] = consumed
-            remaining -= consumed
-            if remaining == 0:
-                break
-        if remaining > 0:
-            # Place anything left over to the 'dist' level.
-            # Coalescing takes place at a later stage.
-            placement[ConsumerOverheads.levels[dist]] += remaining
-#        assert sum(placement.itervalues()) == wss
-        return placement
-    def best_case_place_consumption(self, tp, wss, dist):
-        placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
-        placement[ConsumerOverheads.levels[dist]] = wss
-        # Coalescing takes place at a later stage
-        return placement
-    def consume_cost(self, shared_mem_level, working_set_size):
-        return self.mem_hierarchy[shared_mem_level](working_set_size)
-    
-    def consume_multilevel_cost(self, working_set):
-        # working_set is a dictionary of bytes to be consumed from
-        # different distances
-        cost = 0.0
-        for shared_mem_level, wss in working_set.iteritems():
-            cost += self.consume_cost(shared_mem_level, wss)
        return cost
-    def compute_limits(self):
+    def set_xmit_cost(self, xmit_type, approximation):
-        self.limits = {'L1':self.system.machine['L1'],
+        if type(xmit_type) is str:
-                       'L2':self.system.machine['L2'],
+            xmit_type = XmitOverheads.get_idx_for_name(xmit_type)
-                       'L3':self.system.machine['L3'],
+        self.xmit[xmit_type] = approximation
-                       'MEM':10000000000}
+        name = XmitOverheads.MAPPING[xmit_type][1]
-        # shrink the available space if we're on an inclusive cache
+        self.__dict__[name] = self.xmit[xmit_type]
-        if self.system.machine['inclusive'] == 1:
-            for i,l in enumerate(ConsumerOverheads.rlevels[1:-1]):
+    def max_cost(self, datasz):
-                above = ConsumerOverheads.rlevels[i+2]
+        return max([f(btokb(datasz)) for f in self.xmit])
-                self.limits[l] -= self.limits[above]
+    def __call__(self, datasz):
-    def consume_cost_spilled(self, ti, num_cpus):
+        return self.max_cost(btokb(datasz))
-        if self.system and self.system.machine:
-            init_placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
-            ti_hi_cpu = (ti.partition+1)*num_cpus - 1
-            # sum up wss from different sources
-            for e in ti.node.inEdges:
-                # assume memory distance
-                dist = 3
-                # -- IMPORTANT --
-                # Take distance between ti's last cpu and producer's first cpu
-                # in partition. Ensures conservative consumer overhead even if
-                # ti and producer share a multi-cpu partition.
-                if e.p.task.partition != -1:
-                    producer_lo_cpu = e.p.task.partition*num_cpus
-                    dist = self.system.distance(producer_lo_cpu, ti_hi_cpu)
-                sources = self.worst_case_place_consumption(e.p.task, e.wss, dist)
-                for l,wss in sources.iteritems():
-                    init_placement[l] += sources[l]
-            placement = self.worst_case_placement(init_placement)
-            # convert to schedcat's format...
-            consumer_cost = self.consume_multilevel_cost({0:placement['MEM'],
-                                                          1:placement['L1'],
-                                                          2:placement['L2'],
-                                                          3:placement['L3']})
-            return consumer_cost
-        else:
-            return self.max_cost(ti.wss)
-    def consume_cost_spilled_estimate(self, ti, partition, num_cpus):
-        if partition != -1 and self.system and self.system.machine:
-            init_placement = {'L1':0, 'L2':0, 'L3':0, 'MEM':0}
-            ti_hi_cpu = (partition+1)*num_cpus - 1
-            # sum up wss from different sources
-            for e in ti.node.inEdges:
-                # assume memory distance
-                dist = 3
-                # -- IMPORTANT --
-                # Take distance between ti's last cpu and producer's first cpu
-                # in partition. Ensures conservative consumer overhead even if
-                # ti and producer share a multi-cpu partition.
-                if e.p.task.partition != -1:
-                    producer_lo_cpu = e.p.task.partition*num_cpus
-                    dist = self.system.distance(producer_lo_cpu, ti_hi_cpu)
-                sources = self.worst_case_place_consumption(e.p.task, e.wss, dist)
-                for l,wss in sources.iteritems():
-                    init_placement[l] += sources[l]
-            placement = self.worst_case_placement(init_placement)
-            # convert to schedcat's format...
-            consumer_cost = self.consume_multilevel_cost({0:placement['MEM'],
-                                                          1:placement['L1'],
-                                                          2:placement['L2'],
-                                                          3:placement['L3']})
-            return consumer_cost
-        else:
-            return self.max_cost(ti.wss)
-    def set_consume_cost(self, shared_mem_level, approximation):
-        self.mem_hierarchy[shared_mem_level] = approximation
-        name = ConsumeOverhead.SCHEDCAT_MAPPING[shared_mem_level][1]
-        self.__dict__[name] = self.mem_hierarchy[shared_mem_level]
-    
-    def max_cost(self, working_set_size):
-        return max([f(working_set_size) for f in self.mem_hierarchy])
-    
-    def __call__(self, wss):
-        return self.max_cost(wss)
-    
    @staticmethod
    def get_idx_for_name(key):
-        for (i, name) in ConsumerOverheads.SCHEDCAT_MAPPING:
+        return XmitOverheads.RMAPPING[key]
-            if name == key:
-                return i
-        assert False # bad key
-    
    @staticmethod
-    def from_file(fname, non_decreasing=True, system=None):
+    def from_file(fname, non_decreasing=True):
        data = load_column_csv(fname, convert=float)
        if not 'WSS' in data.by_name:
            raise IOError, 'WSS column is missing'
-        
-        o = ConsumerOverheads()
+        o = XmitOverheads()
-        o.system = system
-        
+        for idx, name in XmitOverheads.MAPPING.iteritems():
-        for idx, name in ConsumerOverheads.SCHEDCAT_MAPPING:
            if name in data.by_name:
                points = zip(data.by_name['WSS'], data.by_name[name])
                if non_decreasing:
-                    o.mem_hierarchy[idx] = monotonic_pwlin(points)
+                    o.xmit[idx] = monotonic_pwlin(points)
                else:
-                    o.mem_hierarchy[idx] = piece_wise_linear(points)
+                    o.xmit[idx] = piece_wise_linear(points)
-                o.__dict__[name] = o.mem_hierarchy[idx]
+                o.__dict__[name] = o.xmit[idx]
-        return o
-class ProducerOverheads(object):
-    """Token production and constraint checking overheads.
-        """
-    
-    def __init__(self, cost = 0):
-        self.cost_func  = const(cost)
-    
-    def production_cost(self, degree):
-        return self.cost_func(degree)
-    
-    def set_production_cost(self, approximation):
-        self.cost_func = approximation
-    
-    def __call__(self, degree):
-        return self.production_cost(degree)
-    
-    @staticmethod
-    def from_file(fname, non_decreasing=True):
-        data = load_column_csv(fname, convert=float)
-        if not 'DEG' in data.by_name:
-            raise IOError, 'DEG (degree) column is missing'
-        
-        o = ProducerOverheads()
-        points = zip(data.by_name['DEG'], data.by_name['COST'])
-        if non_decreasing:
-            o.cost_func = monotonic_pwlin(points)
-        else:
-            o.cost_func = piece_wise_linear(points)
        return o
author	Glenn Elliott <gelliott@cs.unc.edu>	2014-04-29 17:41:31 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2014-04-29 17:41:31 -0400
commit	30d7c45411c0db77baef949498e9c21ca9c289e4 (patch)
tree	46777110d8f7200d9f77aa1c4e4467c812c7c3ae
parent	8d785ca286cbc924c3ab7cb8bde6ea6f8e2c8596 (diff)