#!/usr/bin/env python """ Do stuff with csv files. """ import optparse import defapp import csv import operator from collections import defaultdict as defdict o = optparse.make_option opts = [ # o('-t', '--two', action='store', dest='double_val', nargs=2, type='int', # help='A two-parameter option.'), o('-c', '--column', action='store', dest='col', type='int', help='The column on which to operate.'), # o(None, '--true', action='store_true', dest='truth', # help='A boolean flag value.'), # o(None, '--degree', action='store', type='float', dest='thruthiness', # help='Not quite absolut truth.'), ] defaults = { 'col' : 0, } def make_vector_op(op): def vector_op(a, b, defvalue=0): if len(a) > len(b): shorter = b longer = a else: shorter = a longer = b c = list(longer) for i in xrange(len(shorter)): c[i] = op(longer[i], shorter[i]) for i in xrange(len(shorter), len(longer)): c[i] = op(longer[i], defvalue) return c return vector_op def make_scalar_op(op): def scalar_op(scalar, a): return [op(x, scalar) for x in a] return scalar_op row_add = make_vector_op(operator.add) row_min = make_vector_op(min) row_max = make_vector_op(max) def row_reduce(row_op, fixup=lambda key, rows, res: res): def _reduce(order, by_key): for key in order: if key in by_key: rows = by_key[key] res = reduce(row_op, rows) del by_key[key] yield fixup(key, rows, res) return _reduce row_mul = make_scalar_op(operator.mul) row_div = make_scalar_op(operator.div) def select_by_key(rows, col, cast=None): by_key = defdict(list) order = [] for r in rows: key = r[col] if cast: by_key[key] += [[cast(x) for x in r]] else: by_key[key] += [r] order += [key] return (order, by_key) class CsvApp(defapp.App): def __init__(self): defapp.App.__init__(self, opts, defaults) def ordered_transform(self, make_iterator): """Average all rows with the same key in a given column.""" files = list(self.args) del files[0] try: for fn in files: # read in content (order, by_key) = select_by_key(csv.reader(open(fn, 'r')), self.options.col, float) # write out csv.writer(self.outfile()).writerows(make_iterator(order, by_key)) except IOError, ex: print "Error:", ex def do_avg(self, _): def fixup_avg(key, rows, res): res = row_div(len(rows), res) res[self.options.col] = key return res self.ordered_transform(row_reduce(row_add, fixup_avg)) def do_max(self, _): self.ordered_transform(row_reduce(row_max)) def do_min(self, _): self.ordered_transform(row_reduce(row_min)) if __name__ == '__main__': CsvApp().launch()