distill_schedcat_profiles.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

#!/usr/bin/env python


from plot import decode
from util import load_csv_file, write_csv_file


from os.path  import splitext, basename
from optparse import make_option as o, OptionParser
from collections import defaultdict

options = None
opts = [
    # output options
    o('-p', '--prefix', action='store', dest='prefix'),
    o('-t', '--tag', action='store', dest='tag'),
]

defaults = {
    # output options
    'prefix' : '',
    'tag'    : '',
}

def ludwig_cpu_count(sched):
    if 'L3' in sched:
        cpus = 6
    elif 'L2' in sched:
        cpus = 2
    elif 'P-'  in sched:
        cpus= 1
    else:
        # global
        cpus = 24
    if '-RM' in sched and cpus == 24:
        cpus -=1
    return cpus

HOSTS = {
    'ludwig' : ludwig_cpu_count,
}

class DataFile(object):
    def __init__(self, fname):
        bname     = basename(fname)
        name, ext = splitext(bname)
        self.conf = decode(name)
        self.sched = self.conf['scheduler']
        self.overhead = self.conf['overhead']
        self.host  = self.conf['host']
        self.locks = self.conf['locks'] if 'locks' in self.conf else None
        self.name = name
        self.data = load_csv_file(fname)

    def key(self):
        if self.locks:
            return "%s/%s" % (self.sched, self.locks)
        else:
            return self.sched


def group_by_scheduler(files):
    g = defaultdict(list)
    for f in files:
        g[f.key()].append(f)
    return g

NUM_INDEX = 2
WC_INDEX  = 5
AVG_INDEX = 6

def write_profiles(files):
    host   = files[0].host
    sched  = files[0].sched
    num    = len(files[0].data)

    if host in HOSTS:
        cpus = HOSTS[host](sched)
    else:
        cpus = 1

    def row(i, idx):
        return [int(files[0].data[i][NUM_INDEX]) * cpus] + [f.data[i][idx] for f in files]

    avg  = [row(i, AVG_INDEX) for i in xrange(num)]
    wc   = [row(i, WC_INDEX) for i in xrange(num)]
    header = ['NUM_TASKS'] + [f.overhead.replace('-', '_') for f in files]

    if files[0].locks:
        lock_str = '_locks=%s' % files[0].locks
    else:
        lock_str = ''
    avg_name = '%soh_host=%s_scheduler=%s%s_stat=avg%s.csv' % \
        (options.prefix, host, sched, lock_str, options.tag)
    wc_name  = '%soh_host=%s_scheduler=%s%s_stat=wc%s.csv' % \
        (options.prefix, host, sched, lock_str, options.tag)

    print 'Generating %s.' % (avg_name)
    write_csv_file(avg_name, [header] + avg, width=15)
    print 'Generating %s.' % (wc_name)
    write_csv_file(wc_name, [header] + wc, width=15)

if __name__ == '__main__':
    parser = OptionParser(option_list=opts)
    parser.set_defaults(**defaults)
    (options, files) = parser.parse_args()

    try:
        data = []
        for f in files:
            try:
                data.append(DataFile(f))
            except IOError as err:
                print "Skipping %s.\n(%s)" % (f, err)
        groups = group_by_scheduler(data)
        for sched in groups:
            write_profiles(groups[sched])
    except KeyboardInterrupt:
        pass