diff options
Diffstat (limited to 'merge.py')
-rwxr-xr-x | merge.py | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/merge.py b/merge.py new file mode 100755 index 0000000..153c0c2 --- /dev/null +++ b/merge.py | |||
@@ -0,0 +1,150 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | import defapp | ||
4 | from optparse import make_option as o | ||
5 | from os.path import splitext, basename, split | ||
6 | |||
7 | import os | ||
8 | |||
9 | import sys | ||
10 | |||
11 | from util import load_csv_file | ||
12 | |||
13 | from wsched import split_rows, group, filter_comments, print_row | ||
14 | |||
15 | |||
16 | def data_from(fname, keycol): | ||
17 | data = load_csv_file(fname) | ||
18 | return group(data, keycol) | ||
19 | |||
20 | def merged_data(fnames, keycol, replace=True): | ||
21 | all_data = {} | ||
22 | for fname in fnames: | ||
23 | try: | ||
24 | for (key, rows) in data_from(fname, keycol): | ||
25 | if key in all_data and not replace: | ||
26 | all_data[key].extend(rows) | ||
27 | else: | ||
28 | all_data[key] = rows | ||
29 | except IOError as ioe: | ||
30 | print >>sys.stderr, "[!!] Skipping %s; %s." % (fname, str(ioe)) | ||
31 | for key in sorted(all_data.keys()): | ||
32 | for row in all_data[key]: | ||
33 | yield row | ||
34 | |||
35 | def substituted_data(origname, files, keycol): | ||
36 | data = load_csv_file(origname) | ||
37 | (by_key, order) = split_rows(data, keycol) | ||
38 | for (idx, fname) in files: | ||
39 | try: | ||
40 | for (key, rows) in data_from(fname, keycol): | ||
41 | if key in by_key: | ||
42 | for orow, nrow in zip(by_key[key], rows): | ||
43 | # substitute | ||
44 | orow[idx] = nrow[idx] | ||
45 | except IOError as ioe: | ||
46 | print >>sys.stderr, "[!!] Skipping %s; %s." % (fname, str(ioe)) | ||
47 | for key in order: | ||
48 | for row in by_key[key]: | ||
49 | yield row | ||
50 | |||
51 | |||
52 | options = [ | ||
53 | # output options | ||
54 | o('-o', '--output-prefix', action='store', dest='prefix'), | ||
55 | o('-k', '--key-column', action='store', type='int', dest='keycol'), | ||
56 | o(None, '--no-comments', action='store_true', dest='no_comments'), | ||
57 | o('-w', '--col-width', action='store', type='int', dest='colwidth'), | ||
58 | o('-d', '--dir', action='append', dest='merge_dirs'), | ||
59 | o('-s', '--sub', action='append', type='int', dest='sub_col'), | ||
60 | ] | ||
61 | |||
62 | defaults = { | ||
63 | 'prefix' : None, | ||
64 | 'keycol' : 0, | ||
65 | 'no_comments' : False, | ||
66 | 'colwidth' : 10, | ||
67 | 'precision' : 3, | ||
68 | 'indent' : 2, | ||
69 | 'merge_dirs' : [], | ||
70 | 'sub_col' : [], | ||
71 | } | ||
72 | |||
73 | |||
74 | class DataFileMerger(defapp.App): | ||
75 | def __init__(self): | ||
76 | defapp.App.__init__(self, options, defaults, no_std_opts=True) | ||
77 | self.out = sys.stdout | ||
78 | |||
79 | def merge_file(self, bname, dirs, fout): | ||
80 | out = open(fout, 'w') | ||
81 | files = [os.path.join(d, bname) for d in dirs] | ||
82 | if not self.options.no_comments: | ||
83 | for src in files: | ||
84 | filter_comments(src, out=out) | ||
85 | for r in merged_data(files, self.options.keycol): | ||
86 | print_row(r, out, self.fmt) | ||
87 | out.close() | ||
88 | |||
89 | |||
90 | def sub_column(self, bname, dirs, fout): | ||
91 | out = open(fout, 'w') | ||
92 | files = [os.path.join(d, bname) for d in dirs] | ||
93 | if not self.options.no_comments: | ||
94 | for src in files: | ||
95 | filter_comments(src, out=out) | ||
96 | srcs = zip(self.options.sub_col, files[1:]) | ||
97 | print fout, '<-', files[0], '+', srcs | ||
98 | for r in substituted_data(files[0], srcs, | ||
99 | self.options.keycol): | ||
100 | print_row(r, out, self.fmt) | ||
101 | out.close() | ||
102 | |||
103 | def target_name(self, fname, extra=None): | ||
104 | path, bname = os.path.split(fname) | ||
105 | if extra: | ||
106 | bname, ext = os.path.splitext(bname) | ||
107 | bname = "%s%s%s" % (bname, extra, ext) | ||
108 | return (self.options.prefix + bname, bname, [path] + self.options.merge_dirs) | ||
109 | |||
110 | def merge(self): | ||
111 | if self.options.prefix: | ||
112 | for i, datafile in enumerate(self.args): | ||
113 | (fout, bname, dirs) = self.target_name(datafile) | ||
114 | print "[%d/%d] Merging {%s}/%s -> %s" % \ | ||
115 | (i + 1, len(self.args), ",".join(dirs), bname, fout) | ||
116 | try: | ||
117 | self.merge_file(bname, dirs, fout) | ||
118 | except IOError as ioe: | ||
119 | sys.stderr.write("[!!] ") | ||
120 | sys.stderr.write(str(ioe)) | ||
121 | sys.stderr.write("\n") | ||
122 | else: | ||
123 | print "Requires output prefix (-o)." | ||
124 | |||
125 | def sub(self): | ||
126 | if self.options.prefix: | ||
127 | for i, datafile in enumerate(self.args): | ||
128 | (fout, bname, dirs) = self.target_name(datafile) | ||
129 | print "[%d/%d] Column-merging {%s}/%s -> %s" % \ | ||
130 | (i + 1, len(self.args), ",".join(dirs), bname, fout) | ||
131 | try: | ||
132 | self.sub_column(bname, dirs, fout) | ||
133 | except IOError as ioe: | ||
134 | sys.stderr.write("[!!] ") | ||
135 | sys.stderr.write(str(ioe)) | ||
136 | sys.stderr.write("\n") | ||
137 | else: | ||
138 | print "Requires output prefix (-o)." | ||
139 | |||
140 | def default(self, _): | ||
141 | self.fmt = "%%%d.%df" % (self.options.colwidth, self.options.precision) | ||
142 | print self.options.sub_col | ||
143 | if self.options.sub_col: | ||
144 | self.sub() | ||
145 | else: | ||
146 | self.merge() | ||
147 | |||
148 | if __name__ == '__main__': | ||
149 | DataFileMerger().launch() | ||
150 | |||