diff options
Diffstat (limited to 'tools/perf/util/session.c')
-rw-r--r-- | tools/perf/util/session.c | 106 |
1 files changed, 76 insertions, 30 deletions
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5d353e70fe26..9401909fa283 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -98,7 +98,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc | |||
98 | self->unknown_events = 0; | 98 | self->unknown_events = 0; |
99 | self->machines = RB_ROOT; | 99 | self->machines = RB_ROOT; |
100 | self->repipe = repipe; | 100 | self->repipe = repipe; |
101 | self->ordered_samples.flush_limit = ULLONG_MAX; | ||
102 | INIT_LIST_HEAD(&self->ordered_samples.samples_head); | 101 | INIT_LIST_HEAD(&self->ordered_samples.samples_head); |
103 | 102 | ||
104 | if (mode == O_RDONLY) { | 103 | if (mode == O_RDONLY) { |
@@ -194,6 +193,18 @@ static int process_event_stub(event_t *event __used, | |||
194 | return 0; | 193 | return 0; |
195 | } | 194 | } |
196 | 195 | ||
196 | static int process_finished_round_stub(event_t *event __used, | ||
197 | struct perf_session *session __used, | ||
198 | struct perf_event_ops *ops __used) | ||
199 | { | ||
200 | dump_printf(": unhandled!\n"); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static int process_finished_round(event_t *event, | ||
205 | struct perf_session *session, | ||
206 | struct perf_event_ops *ops); | ||
207 | |||
197 | static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) | 208 | static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) |
198 | { | 209 | { |
199 | if (handler->sample == NULL) | 210 | if (handler->sample == NULL) |
@@ -222,6 +233,12 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) | |||
222 | handler->tracing_data = process_event_stub; | 233 | handler->tracing_data = process_event_stub; |
223 | if (handler->build_id == NULL) | 234 | if (handler->build_id == NULL) |
224 | handler->build_id = process_event_stub; | 235 | handler->build_id = process_event_stub; |
236 | if (handler->finished_round == NULL) { | ||
237 | if (handler->ordered_samples) | ||
238 | handler->finished_round = process_finished_round; | ||
239 | else | ||
240 | handler->finished_round = process_finished_round_stub; | ||
241 | } | ||
225 | } | 242 | } |
226 | 243 | ||
227 | static const char *event__name[] = { | 244 | static const char *event__name[] = { |
@@ -359,16 +376,14 @@ struct sample_queue { | |||
359 | struct list_head list; | 376 | struct list_head list; |
360 | }; | 377 | }; |
361 | 378 | ||
362 | #define FLUSH_PERIOD (2 * NSEC_PER_SEC) | ||
363 | |||
364 | static void flush_sample_queue(struct perf_session *s, | 379 | static void flush_sample_queue(struct perf_session *s, |
365 | struct perf_event_ops *ops) | 380 | struct perf_event_ops *ops) |
366 | { | 381 | { |
367 | struct list_head *head = &s->ordered_samples.samples_head; | 382 | struct list_head *head = &s->ordered_samples.samples_head; |
368 | u64 limit = s->ordered_samples.flush_limit; | 383 | u64 limit = s->ordered_samples.next_flush; |
369 | struct sample_queue *tmp, *iter; | 384 | struct sample_queue *tmp, *iter; |
370 | 385 | ||
371 | if (!ops->ordered_samples) | 386 | if (!ops->ordered_samples || !limit) |
372 | return; | 387 | return; |
373 | 388 | ||
374 | list_for_each_entry_safe(iter, tmp, head, list) { | 389 | list_for_each_entry_safe(iter, tmp, head, list) { |
@@ -387,6 +402,55 @@ static void flush_sample_queue(struct perf_session *s, | |||
387 | } | 402 | } |
388 | } | 403 | } |
389 | 404 | ||
405 | /* | ||
406 | * When perf record finishes a pass on every buffers, it records this pseudo | ||
407 | * event. | ||
408 | * We record the max timestamp t found in the pass n. | ||
409 | * Assuming these timestamps are monotonic across cpus, we know that if | ||
410 | * a buffer still has events with timestamps below t, they will be all | ||
411 | * available and then read in the pass n + 1. | ||
412 | * Hence when we start to read the pass n + 2, we can safely flush every | ||
413 | * events with timestamps below t. | ||
414 | * | ||
415 | * ============ PASS n ================= | ||
416 | * CPU 0 | CPU 1 | ||
417 | * | | ||
418 | * cnt1 timestamps | cnt2 timestamps | ||
419 | * 1 | 2 | ||
420 | * 2 | 3 | ||
421 | * - | 4 <--- max recorded | ||
422 | * | ||
423 | * ============ PASS n + 1 ============== | ||
424 | * CPU 0 | CPU 1 | ||
425 | * | | ||
426 | * cnt1 timestamps | cnt2 timestamps | ||
427 | * 3 | 5 | ||
428 | * 4 | 6 | ||
429 | * 5 | 7 <---- max recorded | ||
430 | * | ||
431 | * Flush every events below timestamp 4 | ||
432 | * | ||
433 | * ============ PASS n + 2 ============== | ||
434 | * CPU 0 | CPU 1 | ||
435 | * | | ||
436 | * cnt1 timestamps | cnt2 timestamps | ||
437 | * 6 | 8 | ||
438 | * 7 | 9 | ||
439 | * - | 10 | ||
440 | * | ||
441 | * Flush every events below timestamp 7 | ||
442 | * etc... | ||
443 | */ | ||
444 | static int process_finished_round(event_t *event __used, | ||
445 | struct perf_session *session, | ||
446 | struct perf_event_ops *ops) | ||
447 | { | ||
448 | flush_sample_queue(session, ops); | ||
449 | session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; | ||
450 | |||
451 | return 0; | ||
452 | } | ||
453 | |||
390 | static void __queue_sample_end(struct sample_queue *new, struct list_head *head) | 454 | static void __queue_sample_end(struct sample_queue *new, struct list_head *head) |
391 | { | 455 | { |
392 | struct sample_queue *iter; | 456 | struct sample_queue *iter; |
@@ -455,16 +519,11 @@ static void __queue_sample_event(struct sample_queue *new, | |||
455 | } | 519 | } |
456 | 520 | ||
457 | static int queue_sample_event(event_t *event, struct sample_data *data, | 521 | static int queue_sample_event(event_t *event, struct sample_data *data, |
458 | struct perf_session *s, | 522 | struct perf_session *s) |
459 | struct perf_event_ops *ops) | ||
460 | { | 523 | { |
461 | u64 timestamp = data->time; | 524 | u64 timestamp = data->time; |
462 | struct sample_queue *new; | 525 | struct sample_queue *new; |
463 | u64 flush_limit; | ||
464 | |||
465 | 526 | ||
466 | if (s->ordered_samples.flush_limit == ULLONG_MAX) | ||
467 | s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD; | ||
468 | 527 | ||
469 | if (timestamp < s->ordered_samples.last_flush) { | 528 | if (timestamp < s->ordered_samples.last_flush) { |
470 | printf("Warning: Timestamp below last timeslice flush\n"); | 529 | printf("Warning: Timestamp below last timeslice flush\n"); |
@@ -488,23 +547,8 @@ static int queue_sample_event(event_t *event, struct sample_data *data, | |||
488 | __queue_sample_event(new, s); | 547 | __queue_sample_event(new, s); |
489 | s->ordered_samples.last_inserted = new; | 548 | s->ordered_samples.last_inserted = new; |
490 | 549 | ||
491 | /* | 550 | if (new->timestamp > s->ordered_samples.max_timestamp) |
492 | * We want to have a slice of events covering 2 * FLUSH_PERIOD | 551 | s->ordered_samples.max_timestamp = new->timestamp; |
493 | * If FLUSH_PERIOD is big enough, it ensures every events that occured | ||
494 | * in the first half of the timeslice have all been buffered and there | ||
495 | * are none remaining (we need that because of the weakly ordered | ||
496 | * event recording we have). Then once we reach the 2 * FLUSH_PERIOD | ||
497 | * timeslice, we flush the first half to be gentle with the memory | ||
498 | * (the second half can still get new events in the middle, so wait | ||
499 | * another period to flush it) | ||
500 | */ | ||
501 | flush_limit = s->ordered_samples.flush_limit; | ||
502 | |||
503 | if (new->timestamp > flush_limit && | ||
504 | new->timestamp - flush_limit > FLUSH_PERIOD) { | ||
505 | s->ordered_samples.flush_limit += FLUSH_PERIOD; | ||
506 | flush_sample_queue(s, ops); | ||
507 | } | ||
508 | 552 | ||
509 | return 0; | 553 | return 0; |
510 | } | 554 | } |
@@ -520,7 +564,7 @@ static int perf_session__process_sample(event_t *event, struct perf_session *s, | |||
520 | bzero(&data, sizeof(struct sample_data)); | 564 | bzero(&data, sizeof(struct sample_data)); |
521 | event__parse_sample(event, s->sample_type, &data); | 565 | event__parse_sample(event, s->sample_type, &data); |
522 | 566 | ||
523 | queue_sample_event(event, &data, s, ops); | 567 | queue_sample_event(event, &data, s); |
524 | 568 | ||
525 | return 0; | 569 | return 0; |
526 | } | 570 | } |
@@ -572,6 +616,8 @@ static int perf_session__process_event(struct perf_session *self, | |||
572 | return ops->tracing_data(event, self); | 616 | return ops->tracing_data(event, self); |
573 | case PERF_RECORD_HEADER_BUILD_ID: | 617 | case PERF_RECORD_HEADER_BUILD_ID: |
574 | return ops->build_id(event, self); | 618 | return ops->build_id(event, self); |
619 | case PERF_RECORD_FINISHED_ROUND: | ||
620 | return ops->finished_round(event, self, ops); | ||
575 | default: | 621 | default: |
576 | self->unknown_events++; | 622 | self->unknown_events++; |
577 | return -1; | 623 | return -1; |
@@ -786,7 +832,7 @@ more: | |||
786 | done: | 832 | done: |
787 | err = 0; | 833 | err = 0; |
788 | /* do the final flush for ordered samples */ | 834 | /* do the final flush for ordered samples */ |
789 | self->ordered_samples.flush_limit = ULLONG_MAX; | 835 | self->ordered_samples.next_flush = ULLONG_MAX; |
790 | flush_sample_queue(self, ops); | 836 | flush_sample_queue(self, ops); |
791 | out_err: | 837 | out_err: |
792 | ui_progress__delete(progress); | 838 | ui_progress__delete(progress); |