diff options
author | Bjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu> | 2008-09-04 22:12:54 -0400 |
---|---|---|
committer | Bjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu> | 2008-09-04 22:12:54 -0400 |
commit | 76ae48f526cfad971ef921601a50460bbf1553a5 (patch) | |
tree | 10e856ce2ba205e835748bccd9b333dbbae6b24a | |
parent | 6325c94ffb8dc4010c1946bbc674472b105ad899 (diff) |
add transposing of matrices(= csv files)
-rwxr-xr-x | csv_tool | 47 |
1 files changed, 35 insertions, 12 deletions
@@ -71,6 +71,19 @@ def row_reduce(row_op, fixup=lambda key, rows, res: res): | |||
71 | row_mul = make_scalar_op(operator.mul) | 71 | row_mul = make_scalar_op(operator.mul) |
72 | row_div = make_scalar_op(operator.div) | 72 | row_div = make_scalar_op(operator.div) |
73 | 73 | ||
74 | def transpose(rows): | ||
75 | rows = list(rows) | ||
76 | if rows: | ||
77 | r = len(rows) | ||
78 | c = max([len(x) for x in rows]) | ||
79 | def at(x, y): | ||
80 | try: | ||
81 | return rows[x][y] | ||
82 | except IndexError: | ||
83 | return 0 | ||
84 | for i in xrange(c): | ||
85 | yield [at(j, i) for j in xrange(r) ] | ||
86 | |||
74 | def select_by_key(rows, col, cast=None): | 87 | def select_by_key(rows, col, cast=None): |
75 | by_key = defdict(list) | 88 | by_key = defdict(list) |
76 | order = [] | 89 | order = [] |
@@ -87,32 +100,42 @@ class CsvApp(defapp.App): | |||
87 | def __init__(self): | 100 | def __init__(self): |
88 | defapp.App.__init__(self, opts, defaults) | 101 | defapp.App.__init__(self, opts, defaults) |
89 | 102 | ||
90 | def ordered_transform(self, make_iterator): | 103 | def transform(self, make_iterator, ordered=True): |
91 | """Average all rows with the same key in a given column.""" | 104 | """Average all rows with the same key in a given column.""" |
92 | files = list(self.args) | 105 | files = list(self.args) |
93 | del files[0] | 106 | del files[0] |
94 | try: | 107 | for fn in files: |
95 | for fn in files: | 108 | try: |
96 | # read in content | 109 | # read in content |
97 | (order, by_key) = select_by_key(csv.reader(open(fn, 'r')), | 110 | rows = csv.reader(open(fn, 'r')) |
98 | self.options.col, float) | 111 | # set up transformation |
112 | if ordered: | ||
113 | (order, by_key) = select_by_key(rows, self.options.col, float) | ||
114 | rows = make_iterator(order, by_key) | ||
115 | else: | ||
116 | rows = make_iterator(rows) | ||
99 | # write out | 117 | # write out |
100 | csv.writer(self.outfile()).writerows(make_iterator(order, by_key)) | 118 | csv.writer(self.outfile()).writerows(rows) |
101 | except IOError, ex: | 119 | except IOError, ex: |
102 | print "Error:", ex | 120 | self.err("%s:%s" % (fn, str(ex))) |
103 | 121 | except IndexError, ex: | |
122 | self.err("%s: Sorry, index out of range." % fn) | ||
123 | |||
104 | def do_avg(self, _): | 124 | def do_avg(self, _): |
105 | def fixup_avg(key, rows, res): | 125 | def fixup_avg(key, rows, res): |
106 | res = row_div(len(rows), res) | 126 | res = row_div(len(rows), res) |
107 | res[self.options.col] = key | 127 | res[self.options.col] = key |
108 | return res | 128 | return res |
109 | self.ordered_transform(row_reduce(row_add, fixup_avg)) | 129 | self.transform(row_reduce(row_add, fixup_avg)) |
110 | 130 | ||
111 | def do_max(self, _): | 131 | def do_max(self, _): |
112 | self.ordered_transform(row_reduce(row_max)) | 132 | self.transform(row_reduce(row_max)) |
113 | 133 | ||
114 | def do_min(self, _): | 134 | def do_min(self, _): |
115 | self.ordered_transform(row_reduce(row_min)) | 135 | self.transform(row_reduce(row_min)) |
136 | |||
137 | def do_transpose(self, _): | ||
138 | self.transform(transpose, ordered=False) | ||
116 | 139 | ||
117 | if __name__ == '__main__': | 140 | if __name__ == '__main__': |
118 | CsvApp().launch() | 141 | CsvApp().launch() |