aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/session.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-04-23 18:04:12 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-04-23 21:49:58 -0400
commitc61e52ee705f938596d307625dce00cc4345aaf0 (patch)
tree6bb8a1d2662790c6b5ee8d09e0b94d91c97d1da0 /tools/perf/util/session.c
parent5710fcad7c367adefe5634dc998f1f88780a8457 (diff)
perf: Generalize perf lock's sample event reordering to the session layer
The sample events recorded by perf record are not time ordered because we have one buffer per cpu for each event (even demultiplexed per task/per cpu for task bound events). But when we read trace events we want them to be ordered by time because many state machines are involved. There are currently two ways perf tools deal with that: - use -M to multiplex every buffers (perf sched, perf kmem) But this creates a lot of contention in SMP machines on record time. - use a post-processing time reordering (perf timechart, perf lock) The reordering used by timechart is simple but doesn't scale well with huge flow of events, in terms of performance and memory use (unusable with perf lock for example). Perf lock has its own samples reordering that flushes its memory use in a regular basis and that uses a sorting based on the previous event queued (a new event to be queued is close to the previous one most of the time). This patch proposes to export perf lock's samples reordering facility to the session layer that reads the events. So if a tool wants to get ordered sample events, it needs to set its struct perf_event_ops::ordered_samples to true and that's it. This prepares tracing based perf tools to get rid of the need to use buffers multiplexing (-M) or to implement their own reordering. Also lower the flush period to 2 as it's sufficient already. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Tom Zanussi <tzanussi@gmail.com>
Diffstat (limited to 'tools/perf/util/session.c')
-rw-r--r--tools/perf/util/session.c179
1 files changed, 178 insertions, 1 deletions
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7d88ae5c270f..b7aade2184b2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -98,6 +98,8 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
98 self->cwdlen = 0; 98 self->cwdlen = 0;
99 self->unknown_events = 0; 99 self->unknown_events = 0;
100 self->kerninfo_root = RB_ROOT; 100 self->kerninfo_root = RB_ROOT;
101 self->ordered_samples.flush_limit = ULLONG_MAX;
102 INIT_LIST_HEAD(&self->ordered_samples.samples_head);
101 103
102 if (mode == O_RDONLY) { 104 if (mode == O_RDONLY) {
103 if (perf_session__open(self, force) < 0) 105 if (perf_session__open(self, force) < 0)
@@ -351,6 +353,178 @@ static event__swap_op event__swap_ops[] = {
351 [PERF_RECORD_HEADER_MAX] = NULL, 353 [PERF_RECORD_HEADER_MAX] = NULL,
352}; 354};
353 355
356struct sample_queue {
357 u64 timestamp;
358 struct sample_event *event;
359 struct list_head list;
360};
361
362#define FLUSH_PERIOD (2 * NSEC_PER_SEC)
363
364static void flush_sample_queue(struct perf_session *s,
365 struct perf_event_ops *ops)
366{
367 struct list_head *head = &s->ordered_samples.samples_head;
368 u64 limit = s->ordered_samples.flush_limit;
369 struct sample_queue *tmp, *iter;
370
371 if (!ops->ordered_samples)
372 return;
373
374 list_for_each_entry_safe(iter, tmp, head, list) {
375 if (iter->timestamp > limit)
376 return;
377
378 if (iter == s->ordered_samples.last_inserted)
379 s->ordered_samples.last_inserted = NULL;
380
381 ops->sample((event_t *)iter->event, s);
382
383 s->ordered_samples.last_flush = iter->timestamp;
384 list_del(&iter->list);
385 free(iter->event);
386 free(iter);
387 }
388}
389
390static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
391{
392 struct sample_queue *iter;
393
394 list_for_each_entry_reverse(iter, head, list) {
395 if (iter->timestamp < new->timestamp) {
396 list_add(&new->list, &iter->list);
397 return;
398 }
399 }
400
401 list_add(&new->list, head);
402}
403
404static void __queue_sample_before(struct sample_queue *new,
405 struct sample_queue *iter,
406 struct list_head *head)
407{
408 list_for_each_entry_continue_reverse(iter, head, list) {
409 if (iter->timestamp < new->timestamp) {
410 list_add(&new->list, &iter->list);
411 return;
412 }
413 }
414
415 list_add(&new->list, head);
416}
417
418static void __queue_sample_after(struct sample_queue *new,
419 struct sample_queue *iter,
420 struct list_head *head)
421{
422 list_for_each_entry_continue(iter, head, list) {
423 if (iter->timestamp > new->timestamp) {
424 list_add_tail(&new->list, &iter->list);
425 return;
426 }
427 }
428 list_add_tail(&new->list, head);
429}
430
431/* The queue is ordered by time */
432static void __queue_sample_event(struct sample_queue *new,
433 struct perf_session *s)
434{
435 struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
436 struct list_head *head = &s->ordered_samples.samples_head;
437
438
439 if (!last_inserted) {
440 __queue_sample_end(new, head);
441 return;
442 }
443
444 /*
445 * Most of the time the current event has a timestamp
446 * very close to the last event inserted, unless we just switched
447 * to another event buffer. Having a sorting based on a list and
448 * on the last inserted event that is close to the current one is
449 * probably more efficient than an rbtree based sorting.
450 */
451 if (last_inserted->timestamp >= new->timestamp)
452 __queue_sample_before(new, last_inserted, head);
453 else
454 __queue_sample_after(new, last_inserted, head);
455}
456
457static int queue_sample_event(event_t *event, struct sample_data *data,
458 struct perf_session *s,
459 struct perf_event_ops *ops)
460{
461 u64 timestamp = data->time;
462 struct sample_queue *new;
463 u64 flush_limit;
464
465
466 if (s->ordered_samples.flush_limit == ULLONG_MAX)
467 s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD;
468
469 if (timestamp < s->ordered_samples.last_flush) {
470 printf("Warning: Timestamp below last timeslice flush\n");
471 return -EINVAL;
472 }
473
474 new = malloc(sizeof(*new));
475 if (!new)
476 return -ENOMEM;
477
478 new->timestamp = timestamp;
479
480 new->event = malloc(event->header.size);
481 if (!new->event) {
482 free(new);
483 return -ENOMEM;
484 }
485
486 memcpy(new->event, event, event->header.size);
487
488 __queue_sample_event(new, s);
489 s->ordered_samples.last_inserted = new;
490
491 /*
492 * We want to have a slice of events covering 2 * FLUSH_PERIOD
493 * If FLUSH_PERIOD is big enough, it ensures every events that occured
494 * in the first half of the timeslice have all been buffered and there
495 * are none remaining (we need that because of the weakly ordered
496 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
497 * timeslice, we flush the first half to be gentle with the memory
498 * (the second half can still get new events in the middle, so wait
499 * another period to flush it)
500 */
501 flush_limit = s->ordered_samples.flush_limit;
502
503 if (new->timestamp > flush_limit &&
504 new->timestamp - flush_limit > FLUSH_PERIOD) {
505 s->ordered_samples.flush_limit += FLUSH_PERIOD;
506 flush_sample_queue(s, ops);
507 }
508
509 return 0;
510}
511
512static int perf_session__process_sample(event_t *event, struct perf_session *s,
513 struct perf_event_ops *ops)
514{
515 struct sample_data data;
516
517 if (!ops->ordered_samples)
518 return ops->sample(event, s);
519
520 bzero(&data, sizeof(struct sample_data));
521 event__parse_sample(event, s->sample_type, &data);
522
523 queue_sample_event(event, &data, s, ops);
524
525 return 0;
526}
527
354static int perf_session__process_event(struct perf_session *self, 528static int perf_session__process_event(struct perf_session *self,
355 event_t *event, 529 event_t *event,
356 struct perf_event_ops *ops, 530 struct perf_event_ops *ops,
@@ -371,7 +545,7 @@ static int perf_session__process_event(struct perf_session *self,
371 545
372 switch (event->header.type) { 546 switch (event->header.type) {
373 case PERF_RECORD_SAMPLE: 547 case PERF_RECORD_SAMPLE:
374 return ops->sample(event, self); 548 return perf_session__process_sample(event, self, ops);
375 case PERF_RECORD_MMAP: 549 case PERF_RECORD_MMAP:
376 return ops->mmap(event, self); 550 return ops->mmap(event, self);
377 case PERF_RECORD_COMM: 551 case PERF_RECORD_COMM:
@@ -611,6 +785,9 @@ more:
611 goto more; 785 goto more;
612done: 786done:
613 err = 0; 787 err = 0;
788 /* do the final flush for ordered samples */
789 self->ordered_samples.flush_limit = ULLONG_MAX;
790 flush_sample_queue(self, ops);
614out_err: 791out_err:
615 ui_progress__delete(progress); 792 ui_progress__delete(progress);
616 return err; 793 return err;