aboutsummaryrefslogtreecommitdiffstats
path: root/parse/dir_map.py
blob: 11c872a356eb5d0f808a6dae3f3935d0f57062d0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import os
import re

from collections import defaultdict

class DirMap(object):
    class Node(object):
        def __init__(self, parent = None):
            self.parent = parent
            self.children = defaultdict(lambda : DirMap.Node(self))
            self.values = []

        def heir(self, generation=1):
            def heir2(node, generation):
                if not generation:
                    return node
                elif not node.children:
                    return None
                else:
                    next_heir = node.children.values()[0]
                    return next_heir.heir(generation - 1)
            return heir2(self, generation)

        def leafs(self, path=[], offset=0):
            path = list(path)
            check_node = self.heir(offset)
            if check_node and check_node.children:
                for child_name, child_node in self.children.iteritems():
                    path += [child_name]
                    for leaf in child_node.leafs(path, offset):
                        yield leaf
                    path.pop()
            else:
                yield (path, self)

    def __init__(self):
        self.root = DirMap.Node(None)
        self.values  = []

    def add_values(self, path, values):
        node = self.root
        for p in path:
            node = node.children[p]
        node.values += values

    def remove_childless(self):
        def remove_childless2(node):
            for key, child in node:
                remove_childless2(child)
                if not (child.children or child.values):
                    node.children.pop(key)

            if len(node.values) == 1:
                node.values = []

        remove_childless2(self.root)

    def write(self, out_dir):
        def write2(path, node):
            out_path = "/".join(path)
            if node.values:
                # Leaf
                with open("/".join(path), "w") as f:
                    arr = [",".join([str(b) for b in n]) for n in node.values]
                    arr = sorted(arr, key=lambda x: x[0])
                    f.write("\n".join(arr) + "\n")
            elif not os.path.isdir(out_path):
                os.mkdir(out_path)

            for (key, child) in node.children.iteritems():
                path.append(key)
                write2(path, child)
                path.pop()

        write2([out_dir], self.root)

    def leafs(self, offset=0):
        for leaf in self.root.leafs([], offset):
            yield leaf

    @staticmethod
    def read(in_dir):
        dir_map = DirMap()
        if not os.path.exists(in_dir):
            raise ValueError("Can't load from nonexistent path : %s" % in_dir)

        def read2(path):
            if os.path.isdir(path):
                map(lambda x : read2(path+"/"+x), os.listdir(path))
            else:
                if not re.match(r'.*\.csv', path):
                    return

                with open(path, 'rb') as f:
                    data = np.loadtxt(f, delimiter=",")

                # Convert to tuples of ints if possible, else floats
                values = [map(lambda a:a if a%1 else int(a), t) for t in data]
                values = map(tuple, values)

                stripped = path if path.find(in_dir) else path[len(in_dir):]
                path_arr = stripped.split("/")
                path_arr = filter(lambda x: x != '', path_arr)

                dir_map.add_values(path_arr, values)

        read2(in_dir)

        return dir_map

    def __str__(self):
        def str2(node, level):
            header = "  " * level
            ret = ""
            if not node.children:
                return "%s%s\n" % (header, str(node.values) if node.values else "")
            for key,child in node.children.iteritems():
                ret += "%s/%s\n" % (header, key)
                ret += str2(child, level + 1)
            return ret
        return str2(self.root, 1)