#!/usr/bin/env python """ Do stuff with csv files. """ import optparse import defapp import csv import operator from collections import defaultdict as defdict o = optparse.make_option opts = [ o('-x', '--exchange', action='append', dest='col_xchg', nargs=2, type='int', help='Columns that should be switched with reorder.'), o('-c', '--column', action='store', dest='col', type='int', help='The column on which to operate.'), # o(None, '--true', action='store_true', dest='truth', # help='A boolean flag value.'), # o(None, '--degree', action='store', type='float', dest='thruthiness', # help='Not quite absolut truth.'), ] defaults = { 'col' : 0, 'col_xcgh' : [], } def make_vector_op(op): def vector_op(a, b, defvalue=0): if len(a) > len(b): shorter = b longer = a else: shorter = a longer = b c = list(longer) for i in xrange(len(shorter)): c[i] = op(longer[i], shorter[i]) for i in xrange(len(shorter), len(longer)): c[i] = op(longer[i], defvalue) return c return vector_op def make_scalar_op(op): def scalar_op(scalar, a): return [op(x, scalar) for x in a] return scalar_op row_add = make_vector_op(operator.add) row_min = make_vector_op(min) row_max = make_vector_op(max) def row_reduce(row_op, fixup=lambda key, rows, res: res): def _reduce(order, by_key): for key in order: if key in by_key: rows = by_key[key] res = reduce(row_op, rows) del by_key[key] yield fixup(key, rows, res) return _reduce row_mul = make_scalar_op(operator.mul) row_div = make_scalar_op(operator.div) def transpose(rows): rows = list(rows) if rows: r = len(rows) c = max([len(x) for x in rows]) def at(x, y): try: return rows[x][y] except IndexError: return 0 for i in xrange(c): yield [at(j, i) for j in xrange(r) ] def reorder_columns(rows, xchg_pairs): for r in rows: print type(r) for (x,y) in xchg_pairs: r[x], r[y] = r[y], r[x] yield r def select_by_key(rows, col, cast=None): by_key = defdict(list) order = [] for r in rows: key = r[col] if cast: by_key[key] += [[cast(x) for x in r]] else: by_key[key] += [r] order += [key] return (order, by_key) class CsvApp(defapp.App): def __init__(self): defapp.App.__init__(self, opts, defaults) self.options.col -= 1 self.options.col_xchg = [(x - 1, y - 1) for (x, y) in self.options.col_xchg] def transform(self, make_iterator, ordered=True): """Average all rows with the same key in a given column.""" files = list(self.args) del files[0] for fn in files: try: # read in content rows = csv.reader(open(fn, 'r')) # set up transformation if ordered: (order, by_key) = select_by_key(rows, self.options.col, float) rows = make_iterator(order, by_key) else: rows = make_iterator(rows) # write out csv.writer(self.outfile()).writerows(rows) except IOError, ex: self.err("%s:%s" % (fn, str(ex))) except IndexError, ex: self.err("%s: Sorry, index out of range." % fn) def do_avg(self, _): def fixup_avg(key, rows, res): res = row_div(len(rows), res) res[self.options.col] = key return res self.transform(row_reduce(row_add, fixup_avg)) def do_max(self, _): self.transform(row_reduce(row_max)) def do_min(self, _): self.transform(row_reduce(row_min)) def do_transpose(self, _): self.transform(transpose, ordered=False) def do_reorder(self, _): self.transform(lambda rows: reorder_columns(rows, self.options.col_xchg), ordered=False) if __name__ == '__main__': CsvApp().launch()