diff options
author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2013-10-10 10:27:29 -0400 |
---|---|---|
committer | Bjoern Brandenburg <bbb@mpi-sws.org> | 2014-01-14 04:37:19 -0500 |
commit | acea569078b62c6af0e278573a7707d67c5809c5 (patch) | |
tree | c53eb7eeee924aa234c46fb74fcc9b500552b2e5 | |
parent | bf319d4af32822c04ffd59ab83afaf132d44784e (diff) |
Implement -c (implausbile latency filter) in ftsort
Given the option
-c <CYCLES PER NANOSECOND>
ftsort now looks for latencies that are implausibly large and
filters them.
Whether a latency sample is "implausibly large" is determined based on
the following observation: the timestamps of earlier and later events
indicate when a processor was in preemptable mode (i.e., points in
time at which interrupts were not masked). The delta between two
consecutive points imply an upper bound on the maximum "interrupts
off" segment during the interval between the two points. No interrupt
latency should be larger than this upper bound.
Note that this option should *NOT* be required for publication-quality
benchmarks in most circumstances. In particular, this should never be
necessary when benchmarking LITMUS^RT kernels. If it becomes
necessary, then this should be considered a bug and reported.
For context, this filter was originally developed for benchmarking
SCHED_DEADLINE, in which "jobs are released" (i.e., nanosleep() calls
are completed) such that implausibly large latencies may arise if the
system becomes overloaded.
-rw-r--r-- | src/ftsort.c | 84 |
1 files changed, 82 insertions, 2 deletions
diff --git a/src/ftsort.c b/src/ftsort.c index 8ba0114..33b4584 100644 --- a/src/ftsort.c +++ b/src/ftsort.c | |||
@@ -35,9 +35,12 @@ static unsigned int holes = 0; | |||
35 | static unsigned int non_monotonic = 0; | 35 | static unsigned int non_monotonic = 0; |
36 | static unsigned int reordered = 0; | 36 | static unsigned int reordered = 0; |
37 | static unsigned int aborted_moves = 0; | 37 | static unsigned int aborted_moves = 0; |
38 | static unsigned int implausible = 0; | ||
38 | 39 | ||
39 | static int want_verbose = 0; | 40 | static int want_verbose = 0; |
40 | 41 | ||
42 | double cycles_per_nanosecond = 0; | ||
43 | |||
41 | #define LOOK_AHEAD 1024 | 44 | #define LOOK_AHEAD 1024 |
42 | #define MAX_NR_NOT_IN_RANGE 5 | 45 | #define MAX_NR_NOT_IN_RANGE 5 |
43 | 46 | ||
@@ -255,6 +258,72 @@ static void pre_check_cpu_monotonicity(struct timestamp *start, | |||
255 | } | 258 | } |
256 | } | 259 | } |
257 | 260 | ||
261 | |||
262 | static struct timestamp* find_np_upper_bound( | ||
263 | uint8_t cpu, | ||
264 | struct timestamp *start, | ||
265 | struct timestamp *end) | ||
266 | { | ||
267 | struct timestamp *pos, *prev; | ||
268 | |||
269 | for (pos = start, prev = pos - 1; | ||
270 | pos < end && prev->seq_no + 1 == pos->seq_no; | ||
271 | pos++, prev = pos - 1) { | ||
272 | if (pos->cpu == cpu && | ||
273 | (pos->event == TS_RELEASE_START || | ||
274 | pos->event == TS_SCHED_START)) | ||
275 | return pos; | ||
276 | } | ||
277 | return NULL; | ||
278 | } | ||
279 | |||
280 | static void filter_implausible_latencies(struct timestamp *start, | ||
281 | struct timestamp *end) | ||
282 | { | ||
283 | uint64_t last_preemptable[MAX_CPUS]; | ||
284 | uint64_t delta; | ||
285 | int lp_valid[MAX_CPUS]; | ||
286 | int i; | ||
287 | |||
288 | struct timestamp *pos, *next; | ||
289 | |||
290 | for (i = 0; i < MAX_CPUS; i++) | ||
291 | lp_valid[i] = 0; | ||
292 | |||
293 | for (pos = start, next = pos + 1; next < end; pos++, next = pos + 1) { | ||
294 | /* In Linux, scheduler invocation can only start when a CPU is | ||
295 | * preemptable. We use this to lower bound the time when a CPU | ||
296 | * was last preemptable. */ | ||
297 | |||
298 | /* reset at holes */ | ||
299 | if (pos->seq_no + 1 != next->seq_no) { | ||
300 | for (i = 0; i < MAX_CPUS; i++) | ||
301 | lp_valid[i] = 0; | ||
302 | } else if (pos->event == TS_SCHED_START) { | ||
303 | lp_valid[pos->cpu] = 1; | ||
304 | last_preemptable[pos->cpu] = pos->timestamp; | ||
305 | } else if (pos->event == TS_RELEASE_LATENCY) { | ||
306 | if (lp_valid[pos->cpu] && | ||
307 | (next = find_np_upper_bound(pos->cpu, next, end)) && | ||
308 | next->timestamp > last_preemptable[pos->cpu]) { | ||
309 | delta = next->timestamp - last_preemptable[pos->cpu]; | ||
310 | if (delta / cycles_per_nanosecond < pos->timestamp) { | ||
311 | /* This makes no sense: more release latency than the | ||
312 | * upper bound on the non-preemptable section length. | ||
313 | */ | ||
314 | pos->event = UINT8_MAX; | ||
315 | implausible++; | ||
316 | if (want_verbose) | ||
317 | printf("Latency %12lluns on cpu %u is implausible: " | ||
318 | "upper bound on non-preemptability = %10.0fns\n", | ||
319 | (unsigned long long) pos->timestamp, pos->cpu, | ||
320 | delta / cycles_per_nanosecond); | ||
321 | } | ||
322 | } | ||
323 | } | ||
324 | } | ||
325 | } | ||
326 | |||
258 | static inline uint64_t bget(int x, uint64_t quad) | 327 | static inline uint64_t bget(int x, uint64_t quad) |
259 | 328 | ||
260 | { | 329 | { |
@@ -285,10 +354,11 @@ static void restore_byte_order(struct timestamp* start, struct timestamp* end) | |||
285 | } | 354 | } |
286 | 355 | ||
287 | #define USAGE \ | 356 | #define USAGE \ |
288 | "Usage: ftsort [-e] <logfile> \n" \ | 357 | "Usage: ftsort [-e] [-s] [-v] [-c CYCLES] <logfile> \n" \ |
289 | " -e: endianess swap -- restores byte order \n" \ | 358 | " -e: endianess swap -- restores byte order \n" \ |
290 | " -s: simulate -- don't overwrite file\n" \ | 359 | " -s: simulate -- don't overwrite file\n" \ |
291 | " -v: verbose -- be chatty\n" \ | 360 | " -v: verbose -- be chatty\n" \ |
361 | " -c: CPU speed -- cycles per nanosecond" \ | ||
292 | "\n" \ | 362 | "\n" \ |
293 | "WARNING: Changes are permanent, unless -s is specified.\n" | 363 | "WARNING: Changes are permanent, unless -s is specified.\n" |
294 | 364 | ||
@@ -301,7 +371,7 @@ static void die(char* msg) | |||
301 | exit(1); | 371 | exit(1); |
302 | } | 372 | } |
303 | 373 | ||
304 | #define OPTS "esv" | 374 | #define OPTS "esvc:" |
305 | 375 | ||
306 | int main(int argc, char** argv) | 376 | int main(int argc, char** argv) |
307 | { | 377 | { |
@@ -324,6 +394,11 @@ int main(int argc, char** argv) | |||
324 | case 'v': | 394 | case 'v': |
325 | want_verbose = 1; | 395 | want_verbose = 1; |
326 | break; | 396 | break; |
397 | case 'c': | ||
398 | cycles_per_nanosecond = atof(optarg); | ||
399 | if (cycles_per_nanosecond <= 0) | ||
400 | die("Bad argument -c: need positive number."); | ||
401 | break; | ||
327 | default: | 402 | default: |
328 | die("Unknown option."); | 403 | die("Unknown option."); |
329 | break; | 404 | break; |
@@ -353,6 +428,9 @@ int main(int argc, char** argv) | |||
353 | pre_check_cpu_monotonicity(ts, end); | 428 | pre_check_cpu_monotonicity(ts, end); |
354 | reorder(ts, end); | 429 | reorder(ts, end); |
355 | 430 | ||
431 | if (cycles_per_nanosecond) | ||
432 | filter_implausible_latencies(ts, end); | ||
433 | |||
356 | /* write back */ | 434 | /* write back */ |
357 | if (simulate) | 435 | if (simulate) |
358 | fprintf(stderr, "Note: not writing back results.\n"); | 436 | fprintf(stderr, "Note: not writing back results.\n"); |
@@ -367,11 +445,13 @@ int main(int argc, char** argv) | |||
367 | "Reordered : %10u\n" | 445 | "Reordered : %10u\n" |
368 | "Non-monotonic : %10u\n" | 446 | "Non-monotonic : %10u\n" |
369 | "Seq. constraint : %10u\n" | 447 | "Seq. constraint : %10u\n" |
448 | "Implausible : %10u\n" | ||
370 | "Size : %10.2f Mb\n" | 449 | "Size : %10.2f Mb\n" |
371 | "Time : %10.2f s\n" | 450 | "Time : %10.2f s\n" |
372 | "Throughput : %10.2f Mb/s\n", | 451 | "Throughput : %10.2f Mb/s\n", |
373 | (unsigned int) count, | 452 | (unsigned int) count, |
374 | holes, reordered, non_monotonic, aborted_moves, | 453 | holes, reordered, non_monotonic, aborted_moves, |
454 | implausible, | ||
375 | ((double) size) / 1024.0 / 1024.0, | 455 | ((double) size) / 1024.0 / 1024.0, |
376 | (stop - start), | 456 | (stop - start), |
377 | ((double) size) / 1024.0 / 1024.0 / (stop - start)); | 457 | ((double) size) / 1024.0 / 1024.0 / (stop - start)); |