aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu>2008-09-04 20:48:46 -0400
committerBjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu>2008-09-04 20:48:46 -0400
commit8a32b55ce1c25580da379555b2c4a5f149cfd43b (patch)
tree82af205c47bbba9a6bd3e0bc1404101c09f1b8ce
parent924e8a861c6fc0ce73efa6caf1525404fedb4494 (diff)
started work on a csv transformation tool
-rwxr-xr-xcsv_tool118
-rw-r--r--defapp.py95
2 files changed, 213 insertions, 0 deletions
diff --git a/csv_tool b/csv_tool
new file mode 100755
index 0000000..455037c
--- /dev/null
+++ b/csv_tool
@@ -0,0 +1,118 @@
1#!/usr/bin/env python
2
3"""
4Do stuff with csv files.
5"""
6
7import optparse
8import defapp
9
10import csv
11import operator
12from collections import defaultdict as defdict
13
14o = optparse.make_option
15
16opts = [
17
18# o('-t', '--two', action='store', dest='double_val', nargs=2, type='int',
19# help='A two-parameter option.'),
20
21 o('-c', '--column', action='store', dest='col', type='int',
22 help='The column on which to operate.'),
23
24# o(None, '--true', action='store_true', dest='truth',
25# help='A boolean flag value.'),
26
27# o(None, '--degree', action='store', type='float', dest='thruthiness',
28# help='Not quite absolut truth.'),
29 ]
30
31defaults = {
32 'col' : 0,
33 }
34
35def make_vector_op(op):
36 def vector_op(a, b, defvalue=0):
37 if len(a) > len(b):
38 shorter = b
39 longer = a
40 else:
41 shorter = a
42 longer = b
43 c = list(longer)
44 for i in xrange(len(shorter)):
45 c[i] = op(longer[i], shorter[i])
46 for i in xrange(len(shorter), len(longer)):
47 c[i] = op(longer[i], defvalue)
48 return c
49 return vector_op
50
51def make_scalar_op(op):
52 def scalar_op(scalar, a):
53 return [op(x, scalar) for x in a]
54 return scalar_op
55
56row_add = make_vector_op(operator.add)
57row_min = make_vector_op(min)
58row_max = make_vector_op(max)
59
60def row_reduce(row_op, fixup=lambda key, rows, res: res):
61 def _reduce(order, by_key):
62 for key in order:
63 if key in by_key:
64 rows = by_key[key]
65 res = reduce(row_op, rows)
66 del by_key[key]
67 yield fixup(key, rows, res)
68 return _reduce
69
70
71row_mul = make_scalar_op(operator.mul)
72row_div = make_scalar_op(operator.div)
73
74def select_by_key(rows, col, cast=None):
75 by_key = defdict(list)
76 order = []
77 for r in rows:
78 key = r[col]
79 if cast:
80 by_key[key] += [[cast(x) for x in r]]
81 else:
82 by_key[key] += [r]
83 order += [key]
84 return (order, by_key)
85
86class CsvApp(defapp.App):
87 def __init__(self):
88 defapp.App.__init__(self, opts, defaults)
89
90 def ordered_transform(self, make_iterator):
91 """Average all rows with the same key in a given column."""
92 files = list(self.args)
93 del files[0]
94 try:
95 for fn in files:
96 # read in content
97 (order, by_key) = select_by_key(csv.reader(open(fn, 'r')),
98 self.options.col, float)
99 # write out
100 csv.writer(self.outfile()).writerows(make_iterator(order, by_key))
101 except IOError, ex:
102 print "Error:", ex
103
104 def do_avg(self, _):
105 def fixup_avg(key, rows, res):
106 res = row_div(len(rows), res)
107 res[self.options.col] = key
108 return res
109 self.ordered_transform(row_reduce(row_add, fixup_avg))
110
111 def do_max(self, _):
112 self.ordered_transform(row_reduce(row_max))
113
114 def do_min(self, _):
115 self.ordered_transform(row_reduce(row_min))
116
117if __name__ == '__main__':
118 CsvApp().launch()
diff --git a/defapp.py b/defapp.py
new file mode 100644
index 0000000..e459084
--- /dev/null
+++ b/defapp.py
@@ -0,0 +1,95 @@
1#!/usr/bin/env python
2
3"""
4A basic Python application shell, for copy&paste development.
5"""
6
7import optparse
8import cmd
9import sys
10
11o = optparse.make_option
12
13class App(cmd.Cmd):
14 def __init__(self, opts=None, defaults=None, no_std_opts=False,
15 stdout=sys.stdout, stderr=sys.stderr, default_cmd=None):
16 cmd.Cmd.__init__(self, None, stdout, stderr)
17 self.default_cmd = default_cmd
18 if not opts:
19 opts = []
20 if not defaults:
21 defaults = {}
22 defaults["_App_file"] = None
23 self.f = None
24 if not no_std_opts:
25 opts += [ o('-o', '--output', action='store', dest='_App_file',
26 help='store output in FILE', metavar='FILE')]
27 (self.options, self.args) = self.__parse(opts, defaults)
28
29 def __parse(self, opts, defaults):
30 parser = optparse.OptionParser(option_list=opts)
31 parser.set_defaults(**defaults)
32 return parser.parse_args()
33
34 def launch(self, args=None):
35 if args:
36 self.args = args
37 try:
38 if self.options._App_file:
39 self.f = open(self.options._App_file, 'w')
40 self.onecmd(' '.join(self.args))
41 except IOError, msg:
42 self.err("I/O Error:", msg)
43 except KeyboardInterrupt:
44 self.err("Interrupted.")
45 if self.f:
46 self.f.close()
47
48 def outfile(self):
49 if self.f:
50 return f
51 else:
52 return sys.stdout
53
54 def emptyline(self):
55 if self.default_cmd:
56 self.onecmd(self.default_cmd)
57
58 def default(self, line):
59 self.err("%s: Command not recognized." % line)
60
61 def do_dump_config(self, key):
62 """Display the configuration as parsed on the console."""
63 def is_private(k): return k[0] == '_'
64 def show(k): print "%20s : %10s" % (k, str(self.options.__dict__[k]))
65 if not key:
66 for x in sorted(self.options.__dict__.keys()):
67 if not is_private(x):
68 show(x)
69 elif not is_private(key) and key in self.options.__dict__:
70 show(key)
71 else:
72 self.err("%s: unknown option." % key)
73
74 @staticmethod
75 def __write(stream, *args, **kargs):
76 stream.write(" ".join([str(a) for a in args]))
77 if not ('omit_newline' in kargs and kargs['omit_newline']):
78 stream.write("\n")
79 stream.flush()
80
81 def err(self, *args, **kargs):
82 self.__write(sys.stderr, *args, **kargs)
83
84 def msg(self, *args, **kargs):
85 self.__write(sys.stdout, *args, **kargs)
86
87 def out(self, *args, **kargs):
88 if self.f:
89 self.__write(self.f, *args, **kargs)
90 else:
91 self.__write(sys.stdout, *args, **kargs)
92
93if __name__ == "__main__":
94 a = App()
95 a.launch()