diff options
author | Bjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu> | 2008-09-04 20:48:46 -0400 |
---|---|---|
committer | Bjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu> | 2008-09-04 20:48:46 -0400 |
commit | 8a32b55ce1c25580da379555b2c4a5f149cfd43b (patch) | |
tree | 82af205c47bbba9a6bd3e0bc1404101c09f1b8ce | |
parent | 924e8a861c6fc0ce73efa6caf1525404fedb4494 (diff) |
started work on a csv transformation tool
-rwxr-xr-x | csv_tool | 118 | ||||
-rw-r--r-- | defapp.py | 95 |
2 files changed, 213 insertions, 0 deletions
diff --git a/csv_tool b/csv_tool new file mode 100755 index 0000000..455037c --- /dev/null +++ b/csv_tool | |||
@@ -0,0 +1,118 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | """ | ||
4 | Do stuff with csv files. | ||
5 | """ | ||
6 | |||
7 | import optparse | ||
8 | import defapp | ||
9 | |||
10 | import csv | ||
11 | import operator | ||
12 | from collections import defaultdict as defdict | ||
13 | |||
14 | o = optparse.make_option | ||
15 | |||
16 | opts = [ | ||
17 | |||
18 | # o('-t', '--two', action='store', dest='double_val', nargs=2, type='int', | ||
19 | # help='A two-parameter option.'), | ||
20 | |||
21 | o('-c', '--column', action='store', dest='col', type='int', | ||
22 | help='The column on which to operate.'), | ||
23 | |||
24 | # o(None, '--true', action='store_true', dest='truth', | ||
25 | # help='A boolean flag value.'), | ||
26 | |||
27 | # o(None, '--degree', action='store', type='float', dest='thruthiness', | ||
28 | # help='Not quite absolut truth.'), | ||
29 | ] | ||
30 | |||
31 | defaults = { | ||
32 | 'col' : 0, | ||
33 | } | ||
34 | |||
35 | def make_vector_op(op): | ||
36 | def vector_op(a, b, defvalue=0): | ||
37 | if len(a) > len(b): | ||
38 | shorter = b | ||
39 | longer = a | ||
40 | else: | ||
41 | shorter = a | ||
42 | longer = b | ||
43 | c = list(longer) | ||
44 | for i in xrange(len(shorter)): | ||
45 | c[i] = op(longer[i], shorter[i]) | ||
46 | for i in xrange(len(shorter), len(longer)): | ||
47 | c[i] = op(longer[i], defvalue) | ||
48 | return c | ||
49 | return vector_op | ||
50 | |||
51 | def make_scalar_op(op): | ||
52 | def scalar_op(scalar, a): | ||
53 | return [op(x, scalar) for x in a] | ||
54 | return scalar_op | ||
55 | |||
56 | row_add = make_vector_op(operator.add) | ||
57 | row_min = make_vector_op(min) | ||
58 | row_max = make_vector_op(max) | ||
59 | |||
60 | def row_reduce(row_op, fixup=lambda key, rows, res: res): | ||
61 | def _reduce(order, by_key): | ||
62 | for key in order: | ||
63 | if key in by_key: | ||
64 | rows = by_key[key] | ||
65 | res = reduce(row_op, rows) | ||
66 | del by_key[key] | ||
67 | yield fixup(key, rows, res) | ||
68 | return _reduce | ||
69 | |||
70 | |||
71 | row_mul = make_scalar_op(operator.mul) | ||
72 | row_div = make_scalar_op(operator.div) | ||
73 | |||
74 | def select_by_key(rows, col, cast=None): | ||
75 | by_key = defdict(list) | ||
76 | order = [] | ||
77 | for r in rows: | ||
78 | key = r[col] | ||
79 | if cast: | ||
80 | by_key[key] += [[cast(x) for x in r]] | ||
81 | else: | ||
82 | by_key[key] += [r] | ||
83 | order += [key] | ||
84 | return (order, by_key) | ||
85 | |||
86 | class CsvApp(defapp.App): | ||
87 | def __init__(self): | ||
88 | defapp.App.__init__(self, opts, defaults) | ||
89 | |||
90 | def ordered_transform(self, make_iterator): | ||
91 | """Average all rows with the same key in a given column.""" | ||
92 | files = list(self.args) | ||
93 | del files[0] | ||
94 | try: | ||
95 | for fn in files: | ||
96 | # read in content | ||
97 | (order, by_key) = select_by_key(csv.reader(open(fn, 'r')), | ||
98 | self.options.col, float) | ||
99 | # write out | ||
100 | csv.writer(self.outfile()).writerows(make_iterator(order, by_key)) | ||
101 | except IOError, ex: | ||
102 | print "Error:", ex | ||
103 | |||
104 | def do_avg(self, _): | ||
105 | def fixup_avg(key, rows, res): | ||
106 | res = row_div(len(rows), res) | ||
107 | res[self.options.col] = key | ||
108 | return res | ||
109 | self.ordered_transform(row_reduce(row_add, fixup_avg)) | ||
110 | |||
111 | def do_max(self, _): | ||
112 | self.ordered_transform(row_reduce(row_max)) | ||
113 | |||
114 | def do_min(self, _): | ||
115 | self.ordered_transform(row_reduce(row_min)) | ||
116 | |||
117 | if __name__ == '__main__': | ||
118 | CsvApp().launch() | ||
diff --git a/defapp.py b/defapp.py new file mode 100644 index 0000000..e459084 --- /dev/null +++ b/defapp.py | |||
@@ -0,0 +1,95 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | """ | ||
4 | A basic Python application shell, for copy&paste development. | ||
5 | """ | ||
6 | |||
7 | import optparse | ||
8 | import cmd | ||
9 | import sys | ||
10 | |||
11 | o = optparse.make_option | ||
12 | |||
13 | class App(cmd.Cmd): | ||
14 | def __init__(self, opts=None, defaults=None, no_std_opts=False, | ||
15 | stdout=sys.stdout, stderr=sys.stderr, default_cmd=None): | ||
16 | cmd.Cmd.__init__(self, None, stdout, stderr) | ||
17 | self.default_cmd = default_cmd | ||
18 | if not opts: | ||
19 | opts = [] | ||
20 | if not defaults: | ||
21 | defaults = {} | ||
22 | defaults["_App_file"] = None | ||
23 | self.f = None | ||
24 | if not no_std_opts: | ||
25 | opts += [ o('-o', '--output', action='store', dest='_App_file', | ||
26 | help='store output in FILE', metavar='FILE')] | ||
27 | (self.options, self.args) = self.__parse(opts, defaults) | ||
28 | |||
29 | def __parse(self, opts, defaults): | ||
30 | parser = optparse.OptionParser(option_list=opts) | ||
31 | parser.set_defaults(**defaults) | ||
32 | return parser.parse_args() | ||
33 | |||
34 | def launch(self, args=None): | ||
35 | if args: | ||
36 | self.args = args | ||
37 | try: | ||
38 | if self.options._App_file: | ||
39 | self.f = open(self.options._App_file, 'w') | ||
40 | self.onecmd(' '.join(self.args)) | ||
41 | except IOError, msg: | ||
42 | self.err("I/O Error:", msg) | ||
43 | except KeyboardInterrupt: | ||
44 | self.err("Interrupted.") | ||
45 | if self.f: | ||
46 | self.f.close() | ||
47 | |||
48 | def outfile(self): | ||
49 | if self.f: | ||
50 | return f | ||
51 | else: | ||
52 | return sys.stdout | ||
53 | |||
54 | def emptyline(self): | ||
55 | if self.default_cmd: | ||
56 | self.onecmd(self.default_cmd) | ||
57 | |||
58 | def default(self, line): | ||
59 | self.err("%s: Command not recognized." % line) | ||
60 | |||
61 | def do_dump_config(self, key): | ||
62 | """Display the configuration as parsed on the console.""" | ||
63 | def is_private(k): return k[0] == '_' | ||
64 | def show(k): print "%20s : %10s" % (k, str(self.options.__dict__[k])) | ||
65 | if not key: | ||
66 | for x in sorted(self.options.__dict__.keys()): | ||
67 | if not is_private(x): | ||
68 | show(x) | ||
69 | elif not is_private(key) and key in self.options.__dict__: | ||
70 | show(key) | ||
71 | else: | ||
72 | self.err("%s: unknown option." % key) | ||
73 | |||
74 | @staticmethod | ||
75 | def __write(stream, *args, **kargs): | ||
76 | stream.write(" ".join([str(a) for a in args])) | ||
77 | if not ('omit_newline' in kargs and kargs['omit_newline']): | ||
78 | stream.write("\n") | ||
79 | stream.flush() | ||
80 | |||
81 | def err(self, *args, **kargs): | ||
82 | self.__write(sys.stderr, *args, **kargs) | ||
83 | |||
84 | def msg(self, *args, **kargs): | ||
85 | self.__write(sys.stdout, *args, **kargs) | ||
86 | |||
87 | def out(self, *args, **kargs): | ||
88 | if self.f: | ||
89 | self.__write(self.f, *args, **kargs) | ||
90 | else: | ||
91 | self.__write(sys.stdout, *args, **kargs) | ||
92 | |||
93 | if __name__ == "__main__": | ||
94 | a = App() | ||
95 | a.launch() | ||