parse/point.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

'''
Too much duplicate code in this file
'''

import copy
import numpy as np
from enum import Enum
from collections import defaultdict

Type = Enum(['Min','Max','Avg','Var'])
default_typemap = {Type.Max : {Type.Max : 1, Type.Min : 1, Type.Avg : 1, Type.Var : 1},
                   Type.Min : {Type.Max : 1, Type.Min : 1, Type.Avg : 1, Type.Var : 1},
                   Type.Avg : {Type.Max : 1, Type.Min : 1, Type.Avg : 1, Type.Var : 1}}

def make_typemap():
    return copy.deepcopy(default_typemap)

def dict_str(adict, sep = "\n"):
    def num_str(v):
        try:
            float(v)
            return "%6.3f" % v
        except:
            return v
    size = 20 if sep == "\n" else 4
    return sep.join([("%" + str(size) + "s: %9s") % (k, num_str(v)) for (k,v) in sorted(adict.iteritems())])

class Measurement(object):
    def __init__(self, id = None, kv = {}, default=list):
        self.id = id
        self.stats = defaultdict(default)
        for k, v in kv.iteritems():
            self[k] = v

    def from_array(self,array):
        array = np.array(array)
        self[Type.Max] = array.max()
        self[Type.Avg] = array.mean()
        self[Type.Var] = array.var()
        self[Type.Min] = array.min()
        return self

    def __check_type(self, type):
        if not type in Type:
            raise AttributeError("Not a valid type '%s'" % type)

    def __getitem__(self, type):
        self.__check_type(type)
        return self.stats[type]

    def __iter__(self):
        return self.stats.iteritems()

    def __contains__(self, type):
        self.__check_type(type)
        return type in self.stats

    def __setitem__(self, t, value):
        self.__check_type(t)
        # Numpy returns single memmapped values which can't be pickled
        # Convert them to floats which can be
        if type(value) is np.memmap:
            value = float(value)
        self.stats[t] = value

    def __str__(self):
        return "%s" % dict_str(self.stats, " ")

class Summary(Measurement):
    def __init__(self, id="", measures=[], typemap = default_typemap):
        super(Summary, self).__init__(id, default=Measurement)

        if measures:
            self.__check_types(measures, typemap)
            self.__summarize(measures, typemap)

    def __check_types(self, measures, typemap):
        required_types = self.__get_required(typemap)
        for m in measures:
            for type in required_types:
                if type not in m:
                    raise ValueError("measurement '%s' missing type '%s'" %
                                     (self.id, type))

    def __summarize(self, measures, typemap):
        for sum_type in Type:
            self[sum_type] = Measurement(self.id)

        def avg(vals):
            return sum(vals) / len(vals)

        for base_type in Type:
            for sum_type, func in (Type.Min,min),(Type.Max,max),(Type.Avg, avg):
                if typemap[sum_type][base_type]:
                    val = func([m[base_type] for m in measures])
                    self[sum_type][base_type] = val

    def __get_required(self, typemap):
        required = []
        for base_type in Type:
            matches = [t[base_type] for t in typemap.itervalues()]
            if bool(sum(matches)):
                required += [base_type]
        return required

class ExpPoint(object):
    def __init__(self, id = "", init = {}, default=Measurement):
        self.stats = defaultdict(default)
        for type, value in init.iteritems():
            self[type] = value
        self.id = id

    def __check_val(self, obj):
        if not isinstance(obj, Measurement):
            raise AttributeError("Not a valid measurement '%s'" % obj)

    def __getitem__(self, type):
        return self.stats[type]

    def __iter__(self):
        return self.stats.iteritems()

    def __contains__(self, type):
        return type in self.stats

    def __setitem__(self, type, value):
        self.__check_val(value)
        self.stats[type] = value

    def __str__(self):
        return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats))

    def get_stats(self):
        return self.stats.keys()


class SummaryPoint(ExpPoint):
    def __init__(self, id="", points=[], typemap = default_typemap):
        super(SummaryPoint,self).__init__("Summary-%s" % id,
                                          default=Summary)

        grouped = defaultdict(lambda : [])

        for exp in points:
            for name,measure in exp.stats.iteritems():
                grouped[name] += [measure]

        for key in grouped.iterkeys():
            self[key] = Summary(key, grouped[key], typemap)