diff options
author | Mac Mollison <mollison@cs.unc.edu> | 2010-12-15 09:53:23 -0500 |
---|---|---|
committer | Mac Mollison <mollison@cs.unc.edu> | 2010-12-15 09:53:23 -0500 |
commit | c364f1d807eeb246ca67184246fd2c8d7933b8b6 (patch) | |
tree | 4146a91547d660e055a4d60f023fe7879075d11a | |
parent | 51e246d367d043913a882080abde3d8bae5ce4d4 (diff) |
Improve behavior when out-of-order record detected
In order to sort records from different files into a single
stream (in order of timestamp), unit-trace uses per-cpu buffers;
the length of the buffers was previously hardcoded. If that
length proved insufficient (resulting in out-of-order records),
before this commit, unit-trace produced a FATAL ERROR and
terminated.
Now, unit-trace merely keeps a record of all out-of-order
records and prints a warning at the end, listing them.
The motivation for this change was the observation that
at least some times, grossly out-of-order errors were at
the very beginning of the trace (e.g. task system release),
so they don't really matter. If we know the IDs of the records
that are unordered, we are able to check (with the -o output)
to see if their misordering actually matters or not.
Moreover, the buffer size can now be specified with -b, and
the previously hard-coded value (200) is the default. Making
this number smaller greatly improves runtime, and vice versa.
I suspect that further investigation into the problem of sorting
records will show that the current method is overkill; down the
road, we may be able to replace this method with something much
faster. (The current method has the advantage that it is extremely
scalable, but I don't think that pays off for the size of traces
we typically examine.)
-rwxr-xr-x | unit-trace | 12 | ||||
-rw-r--r-- | unit_trace/trace_reader.py | 32 | ||||
-rw-r--r-- | unit_trace/warning_printer.py | 26 |
3 files changed, 52 insertions, 18 deletions
@@ -35,6 +35,8 @@ parser.add_option("-e", "--earliest", default=0, type=int, dest="earliest", | |||
35 | help="Earliest timestamp of interest") | 35 | help="Earliest timestamp of interest") |
36 | parser.add_option("-l", "--latest", default=0, type=int, dest="latest", | 36 | parser.add_option("-l", "--latest", default=0, type=int, dest="latest", |
37 | help="Latest timestamp of interest") | 37 | help="Latest timestamp of interest") |
38 | parser.add_option("-b", "--bufsize", dest="buffsize", default=200, type=int, | ||
39 | help="Per-CPU buffer size for sorting records") | ||
38 | (options, traces) = parser.parse_args() | 40 | (options, traces) = parser.parse_args() |
39 | traces = list(traces) | 41 | traces = list(traces) |
40 | if len(traces) < 1: | 42 | if len(traces) < 1: |
@@ -50,7 +52,7 @@ import unit_trace | |||
50 | 52 | ||
51 | # Read events from traces | 53 | # Read events from traces |
52 | from unit_trace import trace_reader | 54 | from unit_trace import trace_reader |
53 | stream = trace_reader.trace_reader(traces) | 55 | stream = trace_reader.trace_reader(traces, options.buffsize) |
54 | 56 | ||
55 | # Skip over records | 57 | # Skip over records |
56 | if options.skipnum > 0: | 58 | if options.skipnum > 0: |
@@ -100,7 +102,7 @@ if options.gedf is True: | |||
100 | # This might cause a performance bottleneck that could be eliminated by | 102 | # This might cause a performance bottleneck that could be eliminated by |
101 | # checking how many we actually need :-) | 103 | # checking how many we actually need :-) |
102 | import itertools | 104 | import itertools |
103 | stream1, stream2, stream3 = itertools.tee(stream,3) | 105 | stream1, stream2, stream3, stream4 = itertools.tee(stream,4) |
104 | 106 | ||
105 | # Call standard out printer | 107 | # Call standard out printer |
106 | if options.stdout is True: | 108 | if options.stdout is True: |
@@ -117,7 +119,11 @@ if options.num_inversions > -1: | |||
117 | from unit_trace import gedf_inversion_stat_printer | 119 | from unit_trace import gedf_inversion_stat_printer |
118 | gedf_inversion_stat_printer.gedf_inversion_stat_printer(stream2,options.num_inversions) | 120 | gedf_inversion_stat_printer.gedf_inversion_stat_printer(stream2,options.num_inversions) |
119 | 121 | ||
122 | # Print any warnings | ||
123 | from unit_trace import warning_printer | ||
124 | warning_printer.warning_printer(stream3) | ||
125 | |||
120 | # Call visualizer | 126 | # Call visualizer |
121 | if options.visualize is True: | 127 | if options.visualize is True: |
122 | from unit_trace import viz | 128 | from unit_trace import viz |
123 | viz.visualizer.visualizer(stream3, options.time_per_maj) | 129 | viz.visualizer.visualizer(stream4, options.time_per_maj) |
diff --git a/unit_trace/trace_reader.py b/unit_trace/trace_reader.py index a4c3c05..db36c13 100644 --- a/unit_trace/trace_reader.py +++ b/unit_trace/trace_reader.py | |||
@@ -34,7 +34,7 @@ import sys | |||
34 | ############################################################################### | 34 | ############################################################################### |
35 | 35 | ||
36 | # Generator function returning an iterable over records in a trace file. | 36 | # Generator function returning an iterable over records in a trace file. |
37 | def trace_reader(files): | 37 | def trace_reader(files, buffsize): |
38 | 38 | ||
39 | # Yield a record containing the input files | 39 | # Yield a record containing the input files |
40 | # This is used by progress.py to calculate progress | 40 | # This is used by progress.py to calculate progress |
@@ -66,17 +66,15 @@ def trace_reader(files): | |||
66 | # Forget that file iter | 66 | # Forget that file iter |
67 | file_iters.pop() | 67 | file_iters.pop() |
68 | 68 | ||
69 | # We keep 200 records in each buffer and then keep the buffer sorted | 69 | # We keep some number of records (given as buffsize) in each buffer and |
70 | # This is because records may have been recorded slightly out of order | 70 | # then keep the buffer sorted. This is because records may have been |
71 | # This cannot guarantee records are produced in order, but it makes it | 71 | # recorded slightly out of order. The 'try' and 'except' catches the case |
72 | # overwhelmingly probably. | 72 | # where there are less than buffsize records in a file (throwing a |
73 | # The 'try' and 'except' catches the case where there are less than 200 | 73 | # StopIteration) which otherwise would propogate up and cause the |
74 | # records in a file (throwing a StopIteration) which otherwise would | 74 | # trace_reader generator itself to throw a StopIteration. |
75 | # propogate up and cause the trace_reader generator itself to throw a | ||
76 | # StopIteration. | ||
77 | for x in range(0,len(file_iter_buff)): | 75 | for x in range(0,len(file_iter_buff)): |
78 | try: | 76 | try: |
79 | for y in range(0,200): | 77 | for y in range(0,buffsize): |
80 | file_iter_buff[x].append(file_iters[x].next()) | 78 | file_iter_buff[x].append(file_iters[x].next()) |
81 | except StopIteration: | 79 | except StopIteration: |
82 | pass | 80 | pass |
@@ -119,16 +117,20 @@ def trace_reader(files): | |||
119 | del file_iter_buff[buff_to_refill] | 117 | del file_iter_buff[buff_to_refill] |
120 | del file_iters[buff_to_refill] | 118 | del file_iters[buff_to_refill] |
121 | 119 | ||
120 | # Give the record an id number | ||
121 | id += 1 | ||
122 | earliest.id = id | ||
123 | |||
122 | # Check for monotonically increasing time | 124 | # Check for monotonically increasing time |
123 | if last_time is not None and earliest.when < last_time: | 125 | if last_time is not None and earliest.when < last_time: |
124 | exit("FATAL ERROR: trace_reader.py: out-of-order record produced") | 126 | record = Obj() |
127 | record.record_type = "meta" | ||
128 | record.type_name = "out_of_order_warning" | ||
129 | record.id = earliest.id | ||
130 | yield record | ||
125 | else: | 131 | else: |
126 | last_time = earliest.when | 132 | last_time = earliest.when |
127 | 133 | ||
128 | # Give the record an id number | ||
129 | id += 1 | ||
130 | earliest.id = id | ||
131 | |||
132 | # Yield the record | 134 | # Yield the record |
133 | yield earliest | 135 | yield earliest |
134 | 136 | ||
diff --git a/unit_trace/warning_printer.py b/unit_trace/warning_printer.py new file mode 100644 index 0000000..0f726ff --- /dev/null +++ b/unit_trace/warning_printer.py | |||
@@ -0,0 +1,26 @@ | |||
1 | ############################################################################### | ||
2 | # Description | ||
3 | ############################################################################### | ||
4 | |||
5 | # Display any warnings | ||
6 | |||
7 | ############################################################################### | ||
8 | # Imports | ||
9 | ############################################################################### | ||
10 | |||
11 | import sys | ||
12 | |||
13 | ############################################################################### | ||
14 | # Public functions | ||
15 | ############################################################################### | ||
16 | |||
17 | def warning_printer(stream): | ||
18 | out_of_order_ids = [] | ||
19 | for record in stream: | ||
20 | if (record.record_type == "meta" and | ||
21 | record.type_name == "out_of_order_warning"): | ||
22 | out_of_order_ids.append(record.id) | ||
23 | if len(out_of_order_ids) > 0: | ||
24 | sys.stderr.write( | ||
25 | "WARNING: The following {0} records were out of order:\n{1}\n".format( | ||
26 | len(out_of_order_ids), out_of_order_ids)) | ||