From 8a32b55ce1c25580da379555b2c4a5f149cfd43b Mon Sep 17 00:00:00 2001 From: Bjoern Brandenburg Date: Thu, 4 Sep 2008 20:48:46 -0400 Subject: started work on a csv transformation tool --- csv_tool | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ defapp.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100755 csv_tool create mode 100644 defapp.py diff --git a/csv_tool b/csv_tool new file mode 100755 index 0000000..455037c --- /dev/null +++ b/csv_tool @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +""" +Do stuff with csv files. +""" + +import optparse +import defapp + +import csv +import operator +from collections import defaultdict as defdict + +o = optparse.make_option + +opts = [ + +# o('-t', '--two', action='store', dest='double_val', nargs=2, type='int', +# help='A two-parameter option.'), + + o('-c', '--column', action='store', dest='col', type='int', + help='The column on which to operate.'), + +# o(None, '--true', action='store_true', dest='truth', +# help='A boolean flag value.'), + +# o(None, '--degree', action='store', type='float', dest='thruthiness', +# help='Not quite absolut truth.'), + ] + +defaults = { + 'col' : 0, + } + +def make_vector_op(op): + def vector_op(a, b, defvalue=0): + if len(a) > len(b): + shorter = b + longer = a + else: + shorter = a + longer = b + c = list(longer) + for i in xrange(len(shorter)): + c[i] = op(longer[i], shorter[i]) + for i in xrange(len(shorter), len(longer)): + c[i] = op(longer[i], defvalue) + return c + return vector_op + +def make_scalar_op(op): + def scalar_op(scalar, a): + return [op(x, scalar) for x in a] + return scalar_op + +row_add = make_vector_op(operator.add) +row_min = make_vector_op(min) +row_max = make_vector_op(max) + +def row_reduce(row_op, fixup=lambda key, rows, res: res): + def _reduce(order, by_key): + for key in order: + if key in by_key: + rows = by_key[key] + res = reduce(row_op, rows) + del by_key[key] + yield fixup(key, rows, res) + return _reduce + + +row_mul = make_scalar_op(operator.mul) +row_div = make_scalar_op(operator.div) + +def select_by_key(rows, col, cast=None): + by_key = defdict(list) + order = [] + for r in rows: + key = r[col] + if cast: + by_key[key] += [[cast(x) for x in r]] + else: + by_key[key] += [r] + order += [key] + return (order, by_key) + +class CsvApp(defapp.App): + def __init__(self): + defapp.App.__init__(self, opts, defaults) + + def ordered_transform(self, make_iterator): + """Average all rows with the same key in a given column.""" + files = list(self.args) + del files[0] + try: + for fn in files: + # read in content + (order, by_key) = select_by_key(csv.reader(open(fn, 'r')), + self.options.col, float) + # write out + csv.writer(self.outfile()).writerows(make_iterator(order, by_key)) + except IOError, ex: + print "Error:", ex + + def do_avg(self, _): + def fixup_avg(key, rows, res): + res = row_div(len(rows), res) + res[self.options.col] = key + return res + self.ordered_transform(row_reduce(row_add, fixup_avg)) + + def do_max(self, _): + self.ordered_transform(row_reduce(row_max)) + + def do_min(self, _): + self.ordered_transform(row_reduce(row_min)) + +if __name__ == '__main__': + CsvApp().launch() diff --git a/defapp.py b/defapp.py new file mode 100644 index 0000000..e459084 --- /dev/null +++ b/defapp.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +""" +A basic Python application shell, for copy&paste development. +""" + +import optparse +import cmd +import sys + +o = optparse.make_option + +class App(cmd.Cmd): + def __init__(self, opts=None, defaults=None, no_std_opts=False, + stdout=sys.stdout, stderr=sys.stderr, default_cmd=None): + cmd.Cmd.__init__(self, None, stdout, stderr) + self.default_cmd = default_cmd + if not opts: + opts = [] + if not defaults: + defaults = {} + defaults["_App_file"] = None + self.f = None + if not no_std_opts: + opts += [ o('-o', '--output', action='store', dest='_App_file', + help='store output in FILE', metavar='FILE')] + (self.options, self.args) = self.__parse(opts, defaults) + + def __parse(self, opts, defaults): + parser = optparse.OptionParser(option_list=opts) + parser.set_defaults(**defaults) + return parser.parse_args() + + def launch(self, args=None): + if args: + self.args = args + try: + if self.options._App_file: + self.f = open(self.options._App_file, 'w') + self.onecmd(' '.join(self.args)) + except IOError, msg: + self.err("I/O Error:", msg) + except KeyboardInterrupt: + self.err("Interrupted.") + if self.f: + self.f.close() + + def outfile(self): + if self.f: + return f + else: + return sys.stdout + + def emptyline(self): + if self.default_cmd: + self.onecmd(self.default_cmd) + + def default(self, line): + self.err("%s: Command not recognized." % line) + + def do_dump_config(self, key): + """Display the configuration as parsed on the console.""" + def is_private(k): return k[0] == '_' + def show(k): print "%20s : %10s" % (k, str(self.options.__dict__[k])) + if not key: + for x in sorted(self.options.__dict__.keys()): + if not is_private(x): + show(x) + elif not is_private(key) and key in self.options.__dict__: + show(key) + else: + self.err("%s: unknown option." % key) + + @staticmethod + def __write(stream, *args, **kargs): + stream.write(" ".join([str(a) for a in args])) + if not ('omit_newline' in kargs and kargs['omit_newline']): + stream.write("\n") + stream.flush() + + def err(self, *args, **kargs): + self.__write(sys.stderr, *args, **kargs) + + def msg(self, *args, **kargs): + self.__write(sys.stdout, *args, **kargs) + + def out(self, *args, **kargs): + if self.f: + self.__write(self.f, *args, **kargs) + else: + self.__write(sys.stdout, *args, **kargs) + +if __name__ == "__main__": + a = App() + a.launch() -- cgit v1.2.2