From 0663c432764117c42e226d8cac623a9fcf3e8daf Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Wed, 13 Feb 2013 17:04:37 -0500 Subject: Parallelized plotting and parsing. --- parse/dir_map.py | 58 ++++++++++++++++++++++++++++---------------------------- parse_exps.py | 11 +++++++---- plot_exps.py | 32 ++++++++++++++++--------------- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/parse/dir_map.py b/parse/dir_map.py index 11c872a..1c17f40 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py @@ -4,38 +4,38 @@ import re from collections import defaultdict -class DirMap(object): - class Node(object): - def __init__(self, parent = None): - self.parent = parent - self.children = defaultdict(lambda : DirMap.Node(self)) - self.values = [] - - def heir(self, generation=1): - def heir2(node, generation): - if not generation: - return node - elif not node.children: - return None - else: - next_heir = node.children.values()[0] - return next_heir.heir(generation - 1) - return heir2(self, generation) - - def leafs(self, path=[], offset=0): - path = list(path) - check_node = self.heir(offset) - if check_node and check_node.children: - for child_name, child_node in self.children.iteritems(): - path += [child_name] - for leaf in child_node.leafs(path, offset): - yield leaf - path.pop() +class DirMapNode(object): + def __init__(self): + self.children = defaultdict(DirMapNode) + self.values = [] + + def heir(self, generation=1): + def heir2(node, generation): + if not generation: + return node + elif not node.children: + return None else: - yield (path, self) + next_heir = node.children.values()[0] + return next_heir.heir(generation - 1) + return heir2(self, generation) + + def leafs(self, path=[], offset=0): + path = list(path) + check_node = self.heir(offset) + if check_node and check_node.children: + for child_name, child_node in self.children.iteritems(): + path += [child_name] + for leaf in child_node.leafs(path, offset): + yield leaf + path.pop() + else: + yield (path, self) + +class DirMap(object): def __init__(self): - self.root = DirMap.Node(None) + self.root = DirMapNode() self.values = [] def add_values(self, path, values): diff --git a/parse_exps.py b/parse_exps.py index c2376de..f27021a 100755 --- a/parse_exps.py +++ b/parse_exps.py @@ -16,7 +16,7 @@ from parse.dir_map import DirMap from parse.point import ExpPoint from parse.tuple_table import TupleTable,ReducedTupleTable from parse.col_map import ColMapBuilder - +from multiprocessing import Pool, cpu_count def parse_args(): # TODO: convert data-dir to proper option, clean 'dest' options @@ -106,7 +106,7 @@ def parse_exp(exp, force): pickle.dump(result, f) - return result + return (exp, result) def main(): opts, args = parse_args() @@ -126,8 +126,11 @@ def main(): result_table = TupleTable(col_map) sys.stderr.write("Parsing data...\n") - for i,exp in enumerate(exps): - result = parse_exp(exp, opts.force) + + procs = min(len(exps), cpu_count()/2) + pool = Pool(processes=procs) + enum = pool.imap_unordered(parse_exp, exps, [opts.force]*len(exps)) + for i, (exp, result) in enumerate(enum): if opts.verbose: print(result) else: diff --git a/plot_exps.py b/plot_exps.py index b17cd36..8fbef99 100755 --- a/plot_exps.py +++ b/plot_exps.py @@ -10,6 +10,7 @@ from optparse import OptionParser from parse.col_map import ColMap,ColMapBuilder from parse.dir_map import DirMap from plot.style import StyleMap +from multiprocessing import Pool, cpu_count def parse_args(): parser = OptionParser("usage: %prog [options] [csv_dir]...") @@ -21,10 +22,11 @@ def parse_args(): return parser.parse_args() -ExpDetails = namedtuple('ExpDetails', ['variable', 'value', 'title', 'out']) +ExpDetails = namedtuple('ExpDetails', ['variable', 'value', 'title', + 'out', 'node']) OUT_FORMAT = 'pdf' -def get_details(path, out_dir): +def get_details(node, path, out_dir): '''Decode a @path into details about a single experiment.''' out = "_".join(path) if path else "plot" out = "%s/%s.%s" % (out_dir, out, OUT_FORMAT) @@ -36,9 +38,9 @@ def get_details(path, out_dir): title += " by %s" % variable if variable else "" title += " (%s)" % (", ".join(path)) if path else "" - return ExpDetails(variable, value, title, out) + return ExpDetails(variable, value, title, out, node) -def plot_by_variable(plot_node, details): +def plot_by_variable(details): '''Plot each .csv files under @plot_node as a line on a shared plot.''' builder = ColMapBuilder() @@ -46,7 +48,7 @@ def plot_by_variable(plot_node, details): # Generate mapping of (column)=>(line property to vary) for consistently # formatted plots - for line_path, line_node in plot_node.children.iteritems(): + for line_path, line_node in details.node.children.iteritems(): encoded = line_path[:line_path.index(".csv")] line_config = ColMap.decode(encoded) @@ -85,8 +87,6 @@ def plot_dir(data_dir, out_dir, force): sys.stderr.write("Reading data...\n") dir_map = DirMap.read(data_dir) - sys.stderr.write("Creating column map...\n") - if not os.path.exists(out_dir): os.mkdir(out_dir) @@ -94,18 +94,20 @@ def plot_dir(data_dir, out_dir, force): # Count total plots for % counter num_plots = len([x for x in dir_map.leafs(1)]) - plot_num = 0 + plot_details = [] for plot_path, plot_node in dir_map.leafs(1): - details = get_details(plot_path, out_dir) + details = get_details(plot_node, plot_path, out_dir) if force or not os.path.exists(details.out): - plot_by_variable(plot_node, details) - - plot_num += 1 - - sys.stderr.write('\r {0:.2%}'.format(float(plot_num)/num_plots)) - sys.stderr.write('\n') + plot_details += [details] + + procs = min(len(plot_details), cpu_count()/2) + pool = Pool(processes=procs) + enum = pool.imap_unordered(plot_by_variable, plot_details) + for i, _ in enumerate(enum): + sys.stderr.write('\r {0:.2%}'.format(float(i)/num_plots)) + sys.stderr.write('\n') def main(): opts, args = parse_args() -- cgit v1.2.2