aboutsummaryrefslogtreecommitdiffstats
path: root/plot_exps.py
blob: d49e69d2fdc46e552dbfd37d1828ad2c05e22853 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python
from __future__ import print_function

# Without this trickery, matplotlib uses the current X windows session
# to create graphs. Problem 1 with this: requires user has an X windows,
# through ssh -X or otherws. Problem 2: it kills the performance on the
# computer running the X session, even if that computer isn't the one
# running plot_exps.py!
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plot

import common as com
import multiprocessing
import os
import shutil as sh
import sys
import traceback

from collections import namedtuple
from config.config import DEFAULTS

from optparse import OptionParser
from parse.col_map import ColMap,ColMapBuilder
from parse.dir_map import DirMap
from plot.style import make_styler

def parse_args():
    parser = OptionParser("usage: %prog [options] [csv_dir]...")

    parser.add_option('-o', '--out-dir', dest='out_dir',
                      help='directory for plot output',
                      default=DEFAULTS['out-plot'])
    parser.add_option('-f', '--force', action='store_true', default=False,
                      dest='force', help='overwrite existing data')
    parser.add_option('-p', '--processors',
                      default=max(multiprocessing.cpu_count() - 1, 1),
                      type='int', dest='processors',
                      help='number of threads for processing')

    return parser.parse_args()

ExpDetails = namedtuple('ExpDetails', ['variable', 'value', 'title',
                                       'out', 'node'])
OUT_FORMAT = 'pdf'

def get_details(node, path, out_dir):
    '''Decode a @path into details about a single experiment.'''
    out = "_".join(path) if path else "plot"
    out = "%s/%s.%s" % (out_dir, out, OUT_FORMAT)

    value = path.pop(0) if path else None
    variable = path.pop(0) if path else None

    title  = value.capitalize() if value else ""
    title += " by %s" % variable if variable else ""
    title += " (%s)" % (", ".join(path)) if path else ""

    return ExpDetails(variable, value, title, out, node)

def plot_by_variable(details):
    '''Plot each .csv files under @plot_node as a line on a shared plot.'''

    builder = ColMapBuilder()
    config_nodes = []

    # Decode file names into configuration dicts
    for line_path, line_node in details.node.children.iteritems():
        encoded = line_path[:line_path.index(".csv")]

        try:
            line_config = ColMap.decode(encoded)
        except:
            line_config = {'name': encoded}

        for k, v in line_config.iteritems():
            builder.try_add(k, v)
        config_nodes += [(line_config, line_node)]

    col_map   = builder.build()
    style_map = make_styler(col_map)

    figure = plot.figure()
    axes   = figure.add_subplot(111)

    # Create a line for each file node and its configuration
    for line_config, line_node in config_nodes:
        style  = style_map.get_style(line_config)
        values = sorted(line_node.values, key=lambda tup: tup[0])
        xvalues, yvalues = zip(*values)

        plot.plot(xvalues, yvalues, style.fmt())

    axes.set_title(details.title)

    lines, labels = zip(*style_map.get_key())
    axes.legend(tuple(lines), tuple(labels), prop={'size':10},
	    # This code places the legend slightly to the right of the plot
        bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)

    axes.set_ylabel(details.value)
    axes.set_xlabel(details.variable)
    axes.set_xlim(0, axes.get_xlim()[1])
    axes.set_ylim(0, axes.get_ylim()[1])

    plot.savefig(details.out, format=OUT_FORMAT,
    	# Using 'tight' causes savefig to rescale the image for non-plot
	    # artists, which in our case is just the legend
        bbox_inches='tight')

    return True

def plot_wrapper(details):
    '''Wrap exceptions in named method for printing in multiprocessing pool.'''
    try:
        return plot_by_variable(details)
    except:
        traceback.print_exc()

def plot_dir(data_dir, out_dir, max_procs, force):
    sys.stderr.write("Reading data...\n")
    dir_map = DirMap.read(data_dir)

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    sys.stderr.write("Plotting...\n")

    # Count total plots for % counter
    num_plots = len([x for x in dir_map.leafs(1)])

    plot_details = []
    for plot_path, plot_node in dir_map.leafs(1):
        details = get_details(plot_node, plot_path, out_dir)

        if force or not os.path.exists(details.out):
            plot_details += [details]

    if not plot_details:
        return

    procs  = min(len(plot_details), max_procs)
    logged = multiprocessing.Manager().list()

    pool   = multiprocessing.Pool(processes=procs,
                initializer=com.set_logged_list, initargs=(logged,))

    enum  = pool.imap_unordered(plot_wrapper, plot_details)

    try:
        for i, _ in enumerate(enum):
            sys.stderr.write('\r {0:.2%}'.format(float(i)/num_plots))
        pool.close()
    except:
        pool.terminate()
        traceback.print_exc()
        raise Exception("Failed plotting!")
    finally:
        pool.join()

    sys.stderr.write('\n')

def get_dirs(args):
    if args:
        return args
    elif os.path.exists(DEFAULTS['out-parse']):
        return [DEFAULTS['out-parse']]
    else:
        return os.getcwd()

def main():
    opts, args = parse_args()
    dirs = get_dirs(args)

    if opts.force and os.path.exists(opts.out_dir):
        sh.rmtree(opts.out_dir)
    if not os.path.exists(opts.out_dir):
        os.mkdir(opts.out_dir)

    for dir in dirs:
        if len(dirs) > 1:
            out_dir = "%s/%s" % (opts.out_dir, os.path.split(dir)[1])
        else:
            out_dir = opts.out_dir
        plot_dir(dir, out_dir, opts.processors, opts.force)

    sys.stderr.write("Plots saved in %s.\n" % opts.out_dir)

if __name__ == '__main__':
    main()