diff options
Diffstat (limited to 'csv_tool')
-rwxr-xr-x | csv_tool | 214 |
1 files changed, 0 insertions, 214 deletions
diff --git a/csv_tool b/csv_tool deleted file mode 100755 index 17ad949..0000000 --- a/csv_tool +++ /dev/null | |||
@@ -1,214 +0,0 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | """ | ||
4 | Do stuff with csv files. | ||
5 | """ | ||
6 | |||
7 | import optparse | ||
8 | import defapp | ||
9 | |||
10 | import csv | ||
11 | import operator | ||
12 | import os.path | ||
13 | from collections import defaultdict as defdict | ||
14 | from itertools import izip | ||
15 | |||
16 | o = optparse.make_option | ||
17 | |||
18 | opts = [ | ||
19 | |||
20 | o('-c', '--column', action='append', dest='col', type='int', | ||
21 | help='The column(s) on which to operate.'), | ||
22 | |||
23 | o(None, '--write-to-file', action='store_true', dest='write_to_file', | ||
24 | help='Write the output of operation xyz on file abc.csv to xyz_abc.csv.'), | ||
25 | |||
26 | o('-u', '--upper-bound', action='store_true', dest='upper_bound', | ||
27 | help="The linear regression is modified to represent an upper bound."), | ||
28 | |||
29 | # o(None, '--degree', action='store', type='float', dest='thruthiness', | ||
30 | # help='Not quite absolut truth.'), | ||
31 | ] | ||
32 | |||
33 | defaults = { | ||
34 | 'col' : [], | ||
35 | 'write_to_file' : False, | ||
36 | 'upper_bound' : False, | ||
37 | } | ||
38 | |||
39 | def pair_iter(it): | ||
40 | it = iter(it) | ||
41 | while True: | ||
42 | yield (it.next(), it.next()) | ||
43 | |||
44 | def make_vector_op(op): | ||
45 | def vector_op(a, b, defvalue=0): | ||
46 | if len(a) > len(b): | ||
47 | shorter = b | ||
48 | longer = a | ||
49 | else: | ||
50 | shorter = a | ||
51 | longer = b | ||
52 | c = list(longer) | ||
53 | for i in xrange(len(shorter)): | ||
54 | c[i] = op(longer[i], shorter[i]) | ||
55 | for i in xrange(len(shorter), len(longer)): | ||
56 | c[i] = op(longer[i], defvalue) | ||
57 | return c | ||
58 | return vector_op | ||
59 | |||
60 | def make_scalar_op(op): | ||
61 | def scalar_op(scalar, a): | ||
62 | return [op(x, scalar) for x in a] | ||
63 | return scalar_op | ||
64 | |||
65 | row_add = make_vector_op(operator.add) | ||
66 | row_min = make_vector_op(min) | ||
67 | row_max = make_vector_op(max) | ||
68 | |||
69 | def row_reduce(row_op, fixup=lambda key, rows, res: res): | ||
70 | def _reduce(order, by_key): | ||
71 | for key in order: | ||
72 | if key in by_key: | ||
73 | rows = by_key[key] | ||
74 | res = reduce(row_op, rows) | ||
75 | del by_key[key] | ||
76 | yield fixup(key, rows, res) | ||
77 | return _reduce | ||
78 | |||
79 | |||
80 | row_mul = make_scalar_op(operator.mul) | ||
81 | row_div = make_scalar_op(operator.div) | ||
82 | |||
83 | def transpose(rows): | ||
84 | rows = list(rows) | ||
85 | if rows: | ||
86 | r = len(rows) | ||
87 | c = max([len(x) for x in rows]) | ||
88 | def at(x, y): | ||
89 | try: | ||
90 | return rows[x][y] | ||
91 | except IndexError: | ||
92 | return 0 | ||
93 | for i in xrange(c): | ||
94 | yield [at(j, i) for j in xrange(r) ] | ||
95 | |||
96 | def reorder_columns(rows, xchg_pairs): | ||
97 | for r in rows: | ||
98 | for (x,y) in xchg_pairs: | ||
99 | r[x], r[y] = r[y], r[x] | ||
100 | yield r | ||
101 | |||
102 | def select_columns(rows, cols): | ||
103 | for r in rows: | ||
104 | yield [r[x] for x in cols] | ||
105 | |||
106 | def numpy_lstsq(x, y): | ||
107 | from numpy import ones, array | ||
108 | from numpy.linalg import lstsq | ||
109 | A = ones((len(y), 2), dtype=float) | ||
110 | A[:,0] = array(x) | ||
111 | b = array(y) | ||
112 | return lstsq(A, b)[0] | ||
113 | |||
114 | def max_delta(c0, c1, x, y): | ||
115 | return max([float(y) - (c0 + c1 * float(x)) for (x,y) in izip(x, y)]) | ||
116 | |||
117 | def least_squares(rows, xy_pairs, upper_bound): | ||
118 | cols = [] | ||
119 | for (x, y) in xy_pairs: | ||
120 | cols += [x, y] | ||
121 | rows = select_columns(rows, cols) | ||
122 | cols = transpose(rows) | ||
123 | for ((x, y), (xval, yval)) in izip(xy_pairs, pair_iter(cols)): | ||
124 | c1, c0 = numpy_lstsq(xval, yval) | ||
125 | if upper_bound: | ||
126 | c0 += max_delta(c0, c1, xval, yval) | ||
127 | yield [x + 1, y + 1, c0, c1] | ||
128 | |||
129 | def select_by_key(rows, col, cast=None): | ||
130 | by_key = defdict(list) | ||
131 | order = [] | ||
132 | for r in rows: | ||
133 | key = r[col] | ||
134 | if cast: | ||
135 | by_key[key] += [[cast(x) for x in r]] | ||
136 | else: | ||
137 | by_key[key] += [r] | ||
138 | order += [key] | ||
139 | return (order, by_key) | ||
140 | |||
141 | class CsvApp(defapp.App): | ||
142 | def __init__(self): | ||
143 | defapp.App.__init__(self, opts, defaults) | ||
144 | # fixup human-friendly offsets | ||
145 | if not self.options.col: | ||
146 | self.options.col = [1] | ||
147 | self.options.col = [x - 1 for x in self.options.col] | ||
148 | self.options.col_pairs = list(pair_iter(self.options.col)) | ||
149 | |||
150 | def transform(self, make_iterator, ordered=True): | ||
151 | """ | ||
152 | Read a file, pass the rows in the file to an iterator factory, and | ||
153 | write out the output of the iterator. The iterator performs the | ||
154 | desired transformation. | ||
155 | """ | ||
156 | files = list(self.args) | ||
157 | del files[0] | ||
158 | for fn in files: | ||
159 | try: | ||
160 | # read in content | ||
161 | rows = csv.reader(open(fn, 'r')) | ||
162 | # set up transformation | ||
163 | if ordered: | ||
164 | (order, by_key) = select_by_key(rows, self.options.col[0], | ||
165 | float) | ||
166 | rows = make_iterator(order, by_key) | ||
167 | else: | ||
168 | rows = make_iterator(rows) | ||
169 | # write out | ||
170 | outfile = self.outfile() | ||
171 | if self.options.write_to_file: | ||
172 | (dir, file) = os.path.split(fn) | ||
173 | fn = os.path.join(dir, self.args[0] + '_' + file) | ||
174 | outfile = open(fn, 'w') | ||
175 | csv.writer(outfile).writerows(rows) | ||
176 | if self.options.write_to_file: | ||
177 | outfile.close() | ||
178 | except IOError, ex: | ||
179 | self.err("%s:%s" % (fn, str(ex))) | ||
180 | except IndexError, ex: | ||
181 | self.err("%s: Sorry, index out of range." % fn) | ||
182 | |||
183 | def do_avg(self, _): | ||
184 | def fixup_avg(key, rows, res): | ||
185 | res = row_div(len(rows), res) | ||
186 | res[self.options.col[0]] = key | ||
187 | return res | ||
188 | self.transform(row_reduce(row_add, fixup_avg)) | ||
189 | |||
190 | def do_max(self, _): | ||
191 | self.transform(row_reduce(row_max)) | ||
192 | |||
193 | def do_min(self, _): | ||
194 | self.transform(row_reduce(row_min)) | ||
195 | |||
196 | def do_transpose(self, _): | ||
197 | self.transform(transpose, ordered=False) | ||
198 | |||
199 | def do_reorder(self, _): | ||
200 | self.transform(lambda rows: reorder_columns( | ||
201 | rows, self.options.col_pairs), ordered=False) | ||
202 | |||
203 | def do_select(self, _): | ||
204 | self.transform(lambda rows: select_columns( | ||
205 | rows, self.options.col), ordered=False) | ||
206 | |||
207 | def do_lstsqrs(self, _): | ||
208 | self.transform( | ||
209 | lambda rows: least_squares(rows, self.options.col_pairs, | ||
210 | self.options.upper_bound), | ||
211 | ordered=False) | ||
212 | |||
213 | if __name__ == '__main__': | ||
214 | CsvApp().launch() | ||