1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#!/usr/bin/env python
"""
Do stuff with csv files.
"""
import optparse
import defapp
import csv
import operator
from collections import defaultdict as defdict
o = optparse.make_option
opts = [
# o('-t', '--two', action='store', dest='double_val', nargs=2, type='int',
# help='A two-parameter option.'),
o('-c', '--column', action='store', dest='col', type='int',
help='The column on which to operate.'),
# o(None, '--true', action='store_true', dest='truth',
# help='A boolean flag value.'),
# o(None, '--degree', action='store', type='float', dest='thruthiness',
# help='Not quite absolut truth.'),
]
defaults = {
'col' : 0,
}
def make_vector_op(op):
def vector_op(a, b, defvalue=0):
if len(a) > len(b):
shorter = b
longer = a
else:
shorter = a
longer = b
c = list(longer)
for i in xrange(len(shorter)):
c[i] = op(longer[i], shorter[i])
for i in xrange(len(shorter), len(longer)):
c[i] = op(longer[i], defvalue)
return c
return vector_op
def make_scalar_op(op):
def scalar_op(scalar, a):
return [op(x, scalar) for x in a]
return scalar_op
row_add = make_vector_op(operator.add)
row_min = make_vector_op(min)
row_max = make_vector_op(max)
def row_reduce(row_op, fixup=lambda key, rows, res: res):
def _reduce(order, by_key):
for key in order:
if key in by_key:
rows = by_key[key]
res = reduce(row_op, rows)
del by_key[key]
yield fixup(key, rows, res)
return _reduce
row_mul = make_scalar_op(operator.mul)
row_div = make_scalar_op(operator.div)
def select_by_key(rows, col, cast=None):
by_key = defdict(list)
order = []
for r in rows:
key = r[col]
if cast:
by_key[key] += [[cast(x) for x in r]]
else:
by_key[key] += [r]
order += [key]
return (order, by_key)
class CsvApp(defapp.App):
def __init__(self):
defapp.App.__init__(self, opts, defaults)
def ordered_transform(self, make_iterator):
"""Average all rows with the same key in a given column."""
files = list(self.args)
del files[0]
try:
for fn in files:
# read in content
(order, by_key) = select_by_key(csv.reader(open(fn, 'r')),
self.options.col, float)
# write out
csv.writer(self.outfile()).writerows(make_iterator(order, by_key))
except IOError, ex:
print "Error:", ex
def do_avg(self, _):
def fixup_avg(key, rows, res):
res = row_div(len(rows), res)
res[self.options.col] = key
return res
self.ordered_transform(row_reduce(row_add, fixup_avg))
def do_max(self, _):
self.ordered_transform(row_reduce(row_max))
def do_min(self, _):
self.ordered_transform(row_reduce(row_min))
if __name__ == '__main__':
CsvApp().launch()
|