From 6ccfe134d4bff05f457fc97d03f9f3692ab99adb Mon Sep 17 00:00:00 2001 From: Bjoern Brandenburg Date: Sun, 11 Jan 2009 20:20:33 -0500 Subject: remove some of the unneeded parts --- csv_tool | 214 --------------------------------------------------------------- 1 file changed, 214 deletions(-) delete mode 100755 csv_tool (limited to 'csv_tool') diff --git a/csv_tool b/csv_tool deleted file mode 100755 index 17ad949..0000000 --- a/csv_tool +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python - -""" -Do stuff with csv files. -""" - -import optparse -import defapp - -import csv -import operator -import os.path -from collections import defaultdict as defdict -from itertools import izip - -o = optparse.make_option - -opts = [ - - o('-c', '--column', action='append', dest='col', type='int', - help='The column(s) on which to operate.'), - - o(None, '--write-to-file', action='store_true', dest='write_to_file', - help='Write the output of operation xyz on file abc.csv to xyz_abc.csv.'), - - o('-u', '--upper-bound', action='store_true', dest='upper_bound', - help="The linear regression is modified to represent an upper bound."), - -# o(None, '--degree', action='store', type='float', dest='thruthiness', -# help='Not quite absolut truth.'), - ] - -defaults = { - 'col' : [], - 'write_to_file' : False, - 'upper_bound' : False, - } - -def pair_iter(it): - it = iter(it) - while True: - yield (it.next(), it.next()) - -def make_vector_op(op): - def vector_op(a, b, defvalue=0): - if len(a) > len(b): - shorter = b - longer = a - else: - shorter = a - longer = b - c = list(longer) - for i in xrange(len(shorter)): - c[i] = op(longer[i], shorter[i]) - for i in xrange(len(shorter), len(longer)): - c[i] = op(longer[i], defvalue) - return c - return vector_op - -def make_scalar_op(op): - def scalar_op(scalar, a): - return [op(x, scalar) for x in a] - return scalar_op - -row_add = make_vector_op(operator.add) -row_min = make_vector_op(min) -row_max = make_vector_op(max) - -def row_reduce(row_op, fixup=lambda key, rows, res: res): - def _reduce(order, by_key): - for key in order: - if key in by_key: - rows = by_key[key] - res = reduce(row_op, rows) - del by_key[key] - yield fixup(key, rows, res) - return _reduce - - -row_mul = make_scalar_op(operator.mul) -row_div = make_scalar_op(operator.div) - -def transpose(rows): - rows = list(rows) - if rows: - r = len(rows) - c = max([len(x) for x in rows]) - def at(x, y): - try: - return rows[x][y] - except IndexError: - return 0 - for i in xrange(c): - yield [at(j, i) for j in xrange(r) ] - -def reorder_columns(rows, xchg_pairs): - for r in rows: - for (x,y) in xchg_pairs: - r[x], r[y] = r[y], r[x] - yield r - -def select_columns(rows, cols): - for r in rows: - yield [r[x] for x in cols] - -def numpy_lstsq(x, y): - from numpy import ones, array - from numpy.linalg import lstsq - A = ones((len(y), 2), dtype=float) - A[:,0] = array(x) - b = array(y) - return lstsq(A, b)[0] - -def max_delta(c0, c1, x, y): - return max([float(y) - (c0 + c1 * float(x)) for (x,y) in izip(x, y)]) - -def least_squares(rows, xy_pairs, upper_bound): - cols = [] - for (x, y) in xy_pairs: - cols += [x, y] - rows = select_columns(rows, cols) - cols = transpose(rows) - for ((x, y), (xval, yval)) in izip(xy_pairs, pair_iter(cols)): - c1, c0 = numpy_lstsq(xval, yval) - if upper_bound: - c0 += max_delta(c0, c1, xval, yval) - yield [x + 1, y + 1, c0, c1] - -def select_by_key(rows, col, cast=None): - by_key = defdict(list) - order = [] - for r in rows: - key = r[col] - if cast: - by_key[key] += [[cast(x) for x in r]] - else: - by_key[key] += [r] - order += [key] - return (order, by_key) - -class CsvApp(defapp.App): - def __init__(self): - defapp.App.__init__(self, opts, defaults) - # fixup human-friendly offsets - if not self.options.col: - self.options.col = [1] - self.options.col = [x - 1 for x in self.options.col] - self.options.col_pairs = list(pair_iter(self.options.col)) - - def transform(self, make_iterator, ordered=True): - """ - Read a file, pass the rows in the file to an iterator factory, and - write out the output of the iterator. The iterator performs the - desired transformation. - """ - files = list(self.args) - del files[0] - for fn in files: - try: - # read in content - rows = csv.reader(open(fn, 'r')) - # set up transformation - if ordered: - (order, by_key) = select_by_key(rows, self.options.col[0], - float) - rows = make_iterator(order, by_key) - else: - rows = make_iterator(rows) - # write out - outfile = self.outfile() - if self.options.write_to_file: - (dir, file) = os.path.split(fn) - fn = os.path.join(dir, self.args[0] + '_' + file) - outfile = open(fn, 'w') - csv.writer(outfile).writerows(rows) - if self.options.write_to_file: - outfile.close() - except IOError, ex: - self.err("%s:%s" % (fn, str(ex))) - except IndexError, ex: - self.err("%s: Sorry, index out of range." % fn) - - def do_avg(self, _): - def fixup_avg(key, rows, res): - res = row_div(len(rows), res) - res[self.options.col[0]] = key - return res - self.transform(row_reduce(row_add, fixup_avg)) - - def do_max(self, _): - self.transform(row_reduce(row_max)) - - def do_min(self, _): - self.transform(row_reduce(row_min)) - - def do_transpose(self, _): - self.transform(transpose, ordered=False) - - def do_reorder(self, _): - self.transform(lambda rows: reorder_columns( - rows, self.options.col_pairs), ordered=False) - - def do_select(self, _): - self.transform(lambda rows: select_columns( - rows, self.options.col), ordered=False) - - def do_lstsqrs(self, _): - self.transform( - lambda rows: least_squares(rows, self.options.col_pairs, - self.options.upper_bound), - ordered=False) - -if __name__ == '__main__': - CsvApp().launch() -- cgit v1.2.2