aboutsummaryrefslogtreecommitdiffstats
path: root/parse/tuple_table.py
blob: e6f0cc5709c234a2b1716f529d694d36b1dea5f3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from collections import defaultdict
from point import SummaryPoint
from dir_map import DirMap

class ColMap(object):
    def __init__(self):
        self.rev_map = {}
        self.col_list = []

    def columns(self):
        return self.col_list

    def get_key(self, kv):
        key = ()
        added = 0

        for col in self.col_list:
            if col not in kv:
                key += (None,)
            else:
                added += 1
                key += (kv[col],)

        if added < len(kv):
            raise Exception("column map '%s' missed field in map '%s'" %
                            (self.col_list, kv))
        
        return key

    def __contains__(self, col):
        return col in self.rev_map

    def get_map(self, tuple):
        map = {}
        for i in range(0, len(tuple)):
            map[self.col_list[i]] = tuple[i]
        return map

    def try_add(self, column):
        if column not in self.rev_map:
            self.rev_map[column] = len(self.col_list)
            self.col_list += [column]

    def __str__(self):
        return "<ColMap>%s" % (self.rev_map)

class TupleTable(object):
    def __init__(self, col_map):
        self.col_map = col_map
        self.table = defaultdict(lambda: [])
        self.reduced = False

    def add_exp(self, kv, point):
        key = self.col_map.get_key(kv)
        self.table[key] += [point]

    def get_exps(self, kv):
        key = self.col_map.get_key(kv)
        return self.table[key]

    def __reduce(self):
        if self.reduced:
            raise Exception("cannot reduce twice!")
        self.reduced = True
        for key, values in self.table.iteritems():
            self.table[key] = SummaryPoint(str(key), values)

    def write_result(self, out_dir):
        dir_map = DirMap(out_dir)
        self.__reduce()
        for key, point in self.table.iteritems():
            kv = self.col_map.get_map(key)

            for col in self.col_map.columns():
                val = kv[col]

                try:
                    float(val)
                    kv.pop(col)
                    dir_map.add_point(col, val, kv, point)
                    kv[col] = val
                except:
                    # Only vary numbers. Otherwise, just have seperate lines
                    continue

        dir_map.reduce()
        dir_map.write()