#!/usr/bin/env python import defapp from optparse import make_option as o from os.path import splitext, basename, split import os import sys from util import load_csv_file from wsched import split_rows, group, filter_comments, print_row def data_from(fname, keycol): data = load_csv_file(fname) return group(data, keycol) def merged_data(fnames, keycol, replace=True): all_data = {} for fname in fnames: try: for (key, rows) in data_from(fname, keycol): if key in all_data and not replace: all_data[key].extend(rows) else: all_data[key] = rows except IOError as ioe: print >>sys.stderr, "[!!] Skipping %s; %s." % (fname, str(ioe)) for key in sorted(all_data.keys()): for row in all_data[key]: yield row def substituted_data(origname, files, keycol): data = load_csv_file(origname) (by_key, order) = split_rows(data, keycol) for (idx, fname) in files: try: for (key, rows) in data_from(fname, keycol): if key in by_key: for orow, nrow in zip(by_key[key], rows): # substitute orow[idx] = nrow[idx] except IOError as ioe: print >>sys.stderr, "[!!] Skipping %s; %s." % (fname, str(ioe)) for key in order: for row in by_key[key]: yield row options = [ # output options o('-o', '--output-prefix', action='store', dest='prefix'), o('-k', '--key-column', action='store', type='int', dest='keycol'), o(None, '--no-comments', action='store_true', dest='no_comments'), o('-w', '--col-width', action='store', type='int', dest='colwidth'), o('-d', '--dir', action='append', dest='merge_dirs'), o('-s', '--sub', action='append', type='int', dest='sub_col'), ] defaults = { 'prefix' : None, 'keycol' : 0, 'no_comments' : False, 'colwidth' : 10, 'precision' : 3, 'indent' : 2, 'merge_dirs' : [], 'sub_col' : [], } class DataFileMerger(defapp.App): def __init__(self): defapp.App.__init__(self, options, defaults, no_std_opts=True) self.out = sys.stdout def merge_file(self, bname, dirs, fout): out = open(fout, 'w') files = [os.path.join(d, bname) for d in dirs] if not self.options.no_comments: for src in files: filter_comments(src, out=out) for r in merged_data(files, self.options.keycol): print_row(r, out, self.fmt) out.close() def sub_column(self, bname, dirs, fout): out = open(fout, 'w') files = [os.path.join(d, bname) for d in dirs] if not self.options.no_comments: for src in files: filter_comments(src, out=out) srcs = zip(self.options.sub_col, files[1:]) print fout, '<-', files[0], '+', srcs for r in substituted_data(files[0], srcs, self.options.keycol): print_row(r, out, self.fmt) out.close() def target_name(self, fname, extra=None): path, bname = os.path.split(fname) if extra: bname, ext = os.path.splitext(bname) bname = "%s%s%s" % (bname, extra, ext) return (self.options.prefix + bname, bname, [path] + self.options.merge_dirs) def merge(self): if self.options.prefix: for i, datafile in enumerate(self.args): (fout, bname, dirs) = self.target_name(datafile) print "[%d/%d] Merging {%s}/%s -> %s" % \ (i + 1, len(self.args), ",".join(dirs), bname, fout) try: self.merge_file(bname, dirs, fout) except IOError as ioe: sys.stderr.write("[!!] ") sys.stderr.write(str(ioe)) sys.stderr.write("\n") else: print "Requires output prefix (-o)." def sub(self): if self.options.prefix: for i, datafile in enumerate(self.args): (fout, bname, dirs) = self.target_name(datafile) print "[%d/%d] Column-merging {%s}/%s -> %s" % \ (i + 1, len(self.args), ",".join(dirs), bname, fout) try: self.sub_column(bname, dirs, fout) except IOError as ioe: sys.stderr.write("[!!] ") sys.stderr.write(str(ioe)) sys.stderr.write("\n") else: print "Requires output prefix (-o)." def default(self, _): self.fmt = "%%%d.%df" % (self.options.colwidth, self.options.precision) print self.options.sub_col if self.options.sub_col: self.sub() else: self.merge() if __name__ == '__main__': DataFileMerger().launch()