1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
#!/usr/bin/env python
import defapp
from optparse import make_option as o
from os.path import splitext, basename, split
import os
import sys
from util import load_csv_file
from wsched import split_rows, group, filter_comments, print_row
def data_from(fname, keycol):
data = load_csv_file(fname)
return group(data, keycol)
def merged_data(fnames, keycol, replace=True):
all_data = {}
for fname in fnames:
try:
for (key, rows) in data_from(fname, keycol):
if key in all_data and not replace:
all_data[key].extend(rows)
else:
all_data[key] = rows
except IOError as ioe:
print >>sys.stderr, "[!!] Skipping %s; %s." % (fname, str(ioe))
for key in sorted(all_data.keys()):
for row in all_data[key]:
yield row
def substituted_data(origname, files, keycol):
data = load_csv_file(origname)
(by_key, order) = split_rows(data, keycol)
for (idx, fname) in files:
try:
for (key, rows) in data_from(fname, keycol):
if key in by_key:
for orow, nrow in zip(by_key[key], rows):
# substitute
orow[idx] = nrow[idx]
except IOError as ioe:
print >>sys.stderr, "[!!] Skipping %s; %s." % (fname, str(ioe))
for key in order:
for row in by_key[key]:
yield row
options = [
# output options
o('-o', '--output-prefix', action='store', dest='prefix'),
o('-k', '--key-column', action='store', type='int', dest='keycol'),
o(None, '--no-comments', action='store_true', dest='no_comments'),
o('-w', '--col-width', action='store', type='int', dest='colwidth'),
o('-d', '--dir', action='append', dest='merge_dirs'),
o('-s', '--sub', action='append', type='int', dest='sub_col'),
]
defaults = {
'prefix' : None,
'keycol' : 0,
'no_comments' : False,
'colwidth' : 10,
'precision' : 3,
'indent' : 2,
'merge_dirs' : [],
'sub_col' : [],
}
class DataFileMerger(defapp.App):
def __init__(self):
defapp.App.__init__(self, options, defaults, no_std_opts=True)
self.out = sys.stdout
def merge_file(self, bname, dirs, fout):
out = open(fout, 'w')
files = [os.path.join(d, bname) for d in dirs]
if not self.options.no_comments:
for src in files:
filter_comments(src, out=out)
for r in merged_data(files, self.options.keycol):
print_row(r, out, self.fmt)
out.close()
def sub_column(self, bname, dirs, fout):
out = open(fout, 'w')
files = [os.path.join(d, bname) for d in dirs]
if not self.options.no_comments:
for src in files:
filter_comments(src, out=out)
srcs = zip(self.options.sub_col, files[1:])
print fout, '<-', files[0], '+', srcs
for r in substituted_data(files[0], srcs,
self.options.keycol):
print_row(r, out, self.fmt)
out.close()
def target_name(self, fname, extra=None):
path, bname = os.path.split(fname)
if extra:
bname, ext = os.path.splitext(bname)
bname = "%s%s%s" % (bname, extra, ext)
return (self.options.prefix + bname, bname, [path] + self.options.merge_dirs)
def merge(self):
if self.options.prefix:
for i, datafile in enumerate(self.args):
(fout, bname, dirs) = self.target_name(datafile)
print "[%d/%d] Merging {%s}/%s -> %s" % \
(i + 1, len(self.args), ",".join(dirs), bname, fout)
try:
self.merge_file(bname, dirs, fout)
except IOError as ioe:
sys.stderr.write("[!!] ")
sys.stderr.write(str(ioe))
sys.stderr.write("\n")
else:
print "Requires output prefix (-o)."
def sub(self):
if self.options.prefix:
for i, datafile in enumerate(self.args):
(fout, bname, dirs) = self.target_name(datafile)
print "[%d/%d] Column-merging {%s}/%s -> %s" % \
(i + 1, len(self.args), ",".join(dirs), bname, fout)
try:
self.sub_column(bname, dirs, fout)
except IOError as ioe:
sys.stderr.write("[!!] ")
sys.stderr.write(str(ioe))
sys.stderr.write("\n")
else:
print "Requires output prefix (-o)."
def default(self, _):
self.fmt = "%%%d.%df" % (self.options.colwidth, self.options.precision)
print self.options.sub_col
if self.options.sub_col:
self.sub()
else:
self.merge()
if __name__ == '__main__':
DataFileMerger().launch()
|