diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-13 17:04:37 -0500 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-13 17:04:37 -0500 |
| commit | 0663c432764117c42e226d8cac623a9fcf3e8daf (patch) | |
| tree | cd36aed08c5f30b28ee29ea0e29da304947ff052 | |
| parent | 0c28870f3f9dd5fe3c029a63ab4149de5f2beb59 (diff) | |
Parallelized plotting and parsing.
| -rw-r--r-- | parse/dir_map.py | 58 | ||||
| -rwxr-xr-x | parse_exps.py | 11 | ||||
| -rwxr-xr-x | plot_exps.py | 32 |
3 files changed, 53 insertions, 48 deletions
diff --git a/parse/dir_map.py b/parse/dir_map.py index 11c872a..1c17f40 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py | |||
| @@ -4,38 +4,38 @@ import re | |||
| 4 | 4 | ||
| 5 | from collections import defaultdict | 5 | from collections import defaultdict |
| 6 | 6 | ||
| 7 | class DirMap(object): | 7 | class DirMapNode(object): |
| 8 | class Node(object): | 8 | def __init__(self): |
| 9 | def __init__(self, parent = None): | 9 | self.children = defaultdict(DirMapNode) |
| 10 | self.parent = parent | 10 | self.values = [] |
| 11 | self.children = defaultdict(lambda : DirMap.Node(self)) | 11 | |
| 12 | self.values = [] | 12 | def heir(self, generation=1): |
| 13 | 13 | def heir2(node, generation): | |
| 14 | def heir(self, generation=1): | 14 | if not generation: |
| 15 | def heir2(node, generation): | 15 | return node |
| 16 | if not generation: | 16 | elif not node.children: |
| 17 | return node | 17 | return None |
| 18 | elif not node.children: | ||
| 19 | return None | ||
| 20 | else: | ||
| 21 | next_heir = node.children.values()[0] | ||
| 22 | return next_heir.heir(generation - 1) | ||
| 23 | return heir2(self, generation) | ||
| 24 | |||
| 25 | def leafs(self, path=[], offset=0): | ||
| 26 | path = list(path) | ||
| 27 | check_node = self.heir(offset) | ||
| 28 | if check_node and check_node.children: | ||
| 29 | for child_name, child_node in self.children.iteritems(): | ||
| 30 | path += [child_name] | ||
| 31 | for leaf in child_node.leafs(path, offset): | ||
| 32 | yield leaf | ||
| 33 | path.pop() | ||
| 34 | else: | 18 | else: |
| 35 | yield (path, self) | 19 | next_heir = node.children.values()[0] |
| 20 | return next_heir.heir(generation - 1) | ||
| 21 | return heir2(self, generation) | ||
| 22 | |||
| 23 | def leafs(self, path=[], offset=0): | ||
| 24 | path = list(path) | ||
| 25 | check_node = self.heir(offset) | ||
| 26 | if check_node and check_node.children: | ||
| 27 | for child_name, child_node in self.children.iteritems(): | ||
| 28 | path += [child_name] | ||
| 29 | for leaf in child_node.leafs(path, offset): | ||
| 30 | yield leaf | ||
| 31 | path.pop() | ||
| 32 | else: | ||
| 33 | yield (path, self) | ||
| 34 | |||
| 35 | class DirMap(object): | ||
| 36 | 36 | ||
| 37 | def __init__(self): | 37 | def __init__(self): |
| 38 | self.root = DirMap.Node(None) | 38 | self.root = DirMapNode() |
| 39 | self.values = [] | 39 | self.values = [] |
| 40 | 40 | ||
| 41 | def add_values(self, path, values): | 41 | def add_values(self, path, values): |
diff --git a/parse_exps.py b/parse_exps.py index c2376de..f27021a 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
| @@ -16,7 +16,7 @@ from parse.dir_map import DirMap | |||
| 16 | from parse.point import ExpPoint | 16 | from parse.point import ExpPoint |
| 17 | from parse.tuple_table import TupleTable,ReducedTupleTable | 17 | from parse.tuple_table import TupleTable,ReducedTupleTable |
| 18 | from parse.col_map import ColMapBuilder | 18 | from parse.col_map import ColMapBuilder |
| 19 | 19 | from multiprocessing import Pool, cpu_count | |
| 20 | 20 | ||
| 21 | def parse_args(): | 21 | def parse_args(): |
| 22 | # TODO: convert data-dir to proper option, clean 'dest' options | 22 | # TODO: convert data-dir to proper option, clean 'dest' options |
| @@ -106,7 +106,7 @@ def parse_exp(exp, force): | |||
| 106 | 106 | ||
| 107 | pickle.dump(result, f) | 107 | pickle.dump(result, f) |
| 108 | 108 | ||
| 109 | return result | 109 | return (exp, result) |
| 110 | 110 | ||
| 111 | def main(): | 111 | def main(): |
| 112 | opts, args = parse_args() | 112 | opts, args = parse_args() |
| @@ -126,8 +126,11 @@ def main(): | |||
| 126 | result_table = TupleTable(col_map) | 126 | result_table = TupleTable(col_map) |
| 127 | 127 | ||
| 128 | sys.stderr.write("Parsing data...\n") | 128 | sys.stderr.write("Parsing data...\n") |
| 129 | for i,exp in enumerate(exps): | 129 | |
| 130 | result = parse_exp(exp, opts.force) | 130 | procs = min(len(exps), cpu_count()/2) |
| 131 | pool = Pool(processes=procs) | ||
| 132 | enum = pool.imap_unordered(parse_exp, exps, [opts.force]*len(exps)) | ||
| 133 | for i, (exp, result) in enumerate(enum): | ||
| 131 | if opts.verbose: | 134 | if opts.verbose: |
| 132 | print(result) | 135 | print(result) |
| 133 | else: | 136 | else: |
diff --git a/plot_exps.py b/plot_exps.py index b17cd36..8fbef99 100755 --- a/plot_exps.py +++ b/plot_exps.py | |||
| @@ -10,6 +10,7 @@ from optparse import OptionParser | |||
| 10 | from parse.col_map import ColMap,ColMapBuilder | 10 | from parse.col_map import ColMap,ColMapBuilder |
| 11 | from parse.dir_map import DirMap | 11 | from parse.dir_map import DirMap |
| 12 | from plot.style import StyleMap | 12 | from plot.style import StyleMap |
| 13 | from multiprocessing import Pool, cpu_count | ||
| 13 | 14 | ||
| 14 | def parse_args(): | 15 | def parse_args(): |
| 15 | parser = OptionParser("usage: %prog [options] [csv_dir]...") | 16 | parser = OptionParser("usage: %prog [options] [csv_dir]...") |
| @@ -21,10 +22,11 @@ def parse_args(): | |||
| 21 | 22 | ||
| 22 | return parser.parse_args() | 23 | return parser.parse_args() |
| 23 | 24 | ||
| 24 | ExpDetails = namedtuple('ExpDetails', ['variable', 'value', 'title', 'out']) | 25 | ExpDetails = namedtuple('ExpDetails', ['variable', 'value', 'title', |
| 26 | 'out', 'node']) | ||
| 25 | OUT_FORMAT = 'pdf' | 27 | OUT_FORMAT = 'pdf' |
| 26 | 28 | ||
| 27 | def get_details(path, out_dir): | 29 | def get_details(node, path, out_dir): |
| 28 | '''Decode a @path into details about a single experiment.''' | 30 | '''Decode a @path into details about a single experiment.''' |
| 29 | out = "_".join(path) if path else "plot" | 31 | out = "_".join(path) if path else "plot" |
| 30 | out = "%s/%s.%s" % (out_dir, out, OUT_FORMAT) | 32 | out = "%s/%s.%s" % (out_dir, out, OUT_FORMAT) |
| @@ -36,9 +38,9 @@ def get_details(path, out_dir): | |||
| 36 | title += " by %s" % variable if variable else "" | 38 | title += " by %s" % variable if variable else "" |
| 37 | title += " (%s)" % (", ".join(path)) if path else "" | 39 | title += " (%s)" % (", ".join(path)) if path else "" |
| 38 | 40 | ||
| 39 | return ExpDetails(variable, value, title, out) | 41 | return ExpDetails(variable, value, title, out, node) |
| 40 | 42 | ||
| 41 | def plot_by_variable(plot_node, details): | 43 | def plot_by_variable(details): |
| 42 | '''Plot each .csv files under @plot_node as a line on a shared plot.''' | 44 | '''Plot each .csv files under @plot_node as a line on a shared plot.''' |
| 43 | 45 | ||
| 44 | builder = ColMapBuilder() | 46 | builder = ColMapBuilder() |
| @@ -46,7 +48,7 @@ def plot_by_variable(plot_node, details): | |||
| 46 | 48 | ||
| 47 | # Generate mapping of (column)=>(line property to vary) for consistently | 49 | # Generate mapping of (column)=>(line property to vary) for consistently |
| 48 | # formatted plots | 50 | # formatted plots |
| 49 | for line_path, line_node in plot_node.children.iteritems(): | 51 | for line_path, line_node in details.node.children.iteritems(): |
| 50 | encoded = line_path[:line_path.index(".csv")] | 52 | encoded = line_path[:line_path.index(".csv")] |
| 51 | line_config = ColMap.decode(encoded) | 53 | line_config = ColMap.decode(encoded) |
| 52 | 54 | ||
| @@ -85,8 +87,6 @@ def plot_dir(data_dir, out_dir, force): | |||
| 85 | sys.stderr.write("Reading data...\n") | 87 | sys.stderr.write("Reading data...\n") |
| 86 | dir_map = DirMap.read(data_dir) | 88 | dir_map = DirMap.read(data_dir) |
| 87 | 89 | ||
| 88 | sys.stderr.write("Creating column map...\n") | ||
| 89 | |||
| 90 | if not os.path.exists(out_dir): | 90 | if not os.path.exists(out_dir): |
| 91 | os.mkdir(out_dir) | 91 | os.mkdir(out_dir) |
| 92 | 92 | ||
| @@ -94,18 +94,20 @@ def plot_dir(data_dir, out_dir, force): | |||
| 94 | 94 | ||
| 95 | # Count total plots for % counter | 95 | # Count total plots for % counter |
| 96 | num_plots = len([x for x in dir_map.leafs(1)]) | 96 | num_plots = len([x for x in dir_map.leafs(1)]) |
| 97 | plot_num = 0 | ||
| 98 | 97 | ||
| 98 | plot_details = [] | ||
| 99 | for plot_path, plot_node in dir_map.leafs(1): | 99 | for plot_path, plot_node in dir_map.leafs(1): |
| 100 | details = get_details(plot_path, out_dir) | 100 | details = get_details(plot_node, plot_path, out_dir) |
| 101 | 101 | ||
| 102 | if force or not os.path.exists(details.out): | 102 | if force or not os.path.exists(details.out): |
| 103 | plot_by_variable(plot_node, details) | 103 | plot_details += [details] |
| 104 | 104 | ||
| 105 | plot_num += 1 | 105 | procs = min(len(plot_details), cpu_count()/2) |
| 106 | 106 | pool = Pool(processes=procs) | |
| 107 | sys.stderr.write('\r {0:.2%}'.format(float(plot_num)/num_plots)) | 107 | enum = pool.imap_unordered(plot_by_variable, plot_details) |
| 108 | sys.stderr.write('\n') | 108 | for i, _ in enumerate(enum): |
| 109 | sys.stderr.write('\r {0:.2%}'.format(float(i)/num_plots)) | ||
| 110 | sys.stderr.write('\n') | ||
| 109 | 111 | ||
| 110 | def main(): | 112 | def main(): |
| 111 | opts, args = parse_args() | 113 | opts, args = parse_args() |
