aboutsummaryrefslogtreecommitdiffstats
path: root/parse/col_map.py
blob: 15e1d648bd527307532879cd7007b91332e28b9c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from collections import defaultdict

class ColMapBuilder(object):
    def __init__(self):
        self.value_map = defaultdict(set)

    def build(self):
        columns = sorted(self.value_map.keys(),
                         key=lambda c: (-len(self.value_map[c]), c))
        col_list = filter(lambda c : len(self.value_map[c]) > 1, columns)
        return ColMap(col_list, self.value_map)

    def try_add(self, column, value):
        self.value_map[column].add( value )

    def try_remove(self, column):
        if column in self.value_map:
            del(self.value_map[column])

class ColMap(object):
    def __init__(self, col_list, values = None):
        self.col_list = col_list
        self.rev_map = {}
        self.values = values

        self.minimums = []
        for c in col_list:
            end = 1
            while c[:end] in self.minimums:
                end += 1
            self.minimums += [c[:end]]

        for i, col in enumerate(col_list):
            self.rev_map[col] = i

    def columns(self):
        return self.col_list

    def get_values(self):
        return self.values

    def get_key(self, kv):
        '''Convert a key-value dict into an ordered tuple of values.'''
        key = ()

        for col in self.col_list:
            if col not in kv:
                key += (None,)
            else:
                key += (kv[col],)

        return key

    def get_kv(self, key):
        '''Convert an ordered tuple of values into a key-value dict.'''
        kv = {}
        for i in range(0, len(key)):
            kv[self.col_list[i]] = key[i]
        return kv

    def encode(self, kv, minimum=False):
        '''Converted a dict into a string with items sorted according to
        the ColMap key order.'''
        def escape(val):
            return str(val).replace("_", "-").replace("=", "-")

        vals = []

        if minimum:
            format = "%s:%s"
            join = ", "
        else:
            format = "%s=%s"
            join = "_"

        reverse = list(self.col_list)
        reverse.reverse()
        for key in reverse:
            if key not in kv:
                continue
            display = key if not minimum else self.minimums[self.rev_map[key]]
            k, v = escape(display), escape(kv[key])
            vals += [format % (k, v)]

        return join.join(vals)

    @staticmethod
    def decode(string):
        '''Convert a string into a key-value dict.'''
        vals = {}
        for assignment in string.split("_"):
            k, v = assignment.split("=")
            vals[k] = v
        return vals

    def __contains__(self, col):
        return col in self.rev_map

    def __str__(self):
        return "<ColMap>%s" % (self.rev_map)