aboutsummaryrefslogtreecommitdiffstats
path: root/csv_tool
diff options
context:
space:
mode:
Diffstat (limited to 'csv_tool')
-rwxr-xr-xcsv_tool118
1 files changed, 118 insertions, 0 deletions
diff --git a/csv_tool b/csv_tool
new file mode 100755
index 0000000..455037c
--- /dev/null
+++ b/csv_tool
@@ -0,0 +1,118 @@
1#!/usr/bin/env python
2
3"""
4Do stuff with csv files.
5"""
6
7import optparse
8import defapp
9
10import csv
11import operator
12from collections import defaultdict as defdict
13
14o = optparse.make_option
15
16opts = [
17
18# o('-t', '--two', action='store', dest='double_val', nargs=2, type='int',
19# help='A two-parameter option.'),
20
21 o('-c', '--column', action='store', dest='col', type='int',
22 help='The column on which to operate.'),
23
24# o(None, '--true', action='store_true', dest='truth',
25# help='A boolean flag value.'),
26
27# o(None, '--degree', action='store', type='float', dest='thruthiness',
28# help='Not quite absolut truth.'),
29 ]
30
31defaults = {
32 'col' : 0,
33 }
34
35def make_vector_op(op):
36 def vector_op(a, b, defvalue=0):
37 if len(a) > len(b):
38 shorter = b
39 longer = a
40 else:
41 shorter = a
42 longer = b
43 c = list(longer)
44 for i in xrange(len(shorter)):
45 c[i] = op(longer[i], shorter[i])
46 for i in xrange(len(shorter), len(longer)):
47 c[i] = op(longer[i], defvalue)
48 return c
49 return vector_op
50
51def make_scalar_op(op):
52 def scalar_op(scalar, a):
53 return [op(x, scalar) for x in a]
54 return scalar_op
55
56row_add = make_vector_op(operator.add)
57row_min = make_vector_op(min)
58row_max = make_vector_op(max)
59
60def row_reduce(row_op, fixup=lambda key, rows, res: res):
61 def _reduce(order, by_key):
62 for key in order:
63 if key in by_key:
64 rows = by_key[key]
65 res = reduce(row_op, rows)
66 del by_key[key]
67 yield fixup(key, rows, res)
68 return _reduce
69
70
71row_mul = make_scalar_op(operator.mul)
72row_div = make_scalar_op(operator.div)
73
74def select_by_key(rows, col, cast=None):
75 by_key = defdict(list)
76 order = []
77 for r in rows:
78 key = r[col]
79 if cast:
80 by_key[key] += [[cast(x) for x in r]]
81 else:
82 by_key[key] += [r]
83 order += [key]
84 return (order, by_key)
85
86class CsvApp(defapp.App):
87 def __init__(self):
88 defapp.App.__init__(self, opts, defaults)
89
90 def ordered_transform(self, make_iterator):
91 """Average all rows with the same key in a given column."""
92 files = list(self.args)
93 del files[0]
94 try:
95 for fn in files:
96 # read in content
97 (order, by_key) = select_by_key(csv.reader(open(fn, 'r')),
98 self.options.col, float)
99 # write out
100 csv.writer(self.outfile()).writerows(make_iterator(order, by_key))
101 except IOError, ex:
102 print "Error:", ex
103
104 def do_avg(self, _):
105 def fixup_avg(key, rows, res):
106 res = row_div(len(rows), res)
107 res[self.options.col] = key
108 return res
109 self.ordered_transform(row_reduce(row_add, fixup_avg))
110
111 def do_max(self, _):
112 self.ordered_transform(row_reduce(row_max))
113
114 def do_min(self, _):
115 self.ordered_transform(row_reduce(row_min))
116
117if __name__ == '__main__':
118 CsvApp().launch()