aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/perf/builtin-lock.c197
-rw-r--r--tools/perf/util/session.c179
-rw-r--r--tools/perf/util/session.h10
3 files changed, 210 insertions, 176 deletions
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 716d8c544a56..ce276750b140 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -316,8 +316,6 @@ alloc_failed:
316 316
317static char const *input_name = "perf.data"; 317static char const *input_name = "perf.data";
318 318
319static int profile_cpu = -1;
320
321struct raw_event_sample { 319struct raw_event_sample {
322 u32 size; 320 u32 size;
323 char data[0]; 321 char data[0];
@@ -697,8 +695,7 @@ process_lock_release_event(void *data,
697} 695}
698 696
699static void 697static void
700process_raw_event(void *data, int cpu __used, 698process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
701 u64 timestamp __used, struct thread *thread __used)
702{ 699{
703 struct event *event; 700 struct event *event;
704 int type; 701 int type;
@@ -716,176 +713,6 @@ process_raw_event(void *data, int cpu __used,
716 process_lock_release_event(data, event, cpu, timestamp, thread); 713 process_lock_release_event(data, event, cpu, timestamp, thread);
717} 714}
718 715
719struct raw_event_queue {
720 u64 timestamp;
721 int cpu;
722 void *data;
723 struct thread *thread;
724 struct list_head list;
725};
726
727static LIST_HEAD(raw_event_head);
728
729#define FLUSH_PERIOD (5 * NSEC_PER_SEC)
730
731static u64 flush_limit = ULLONG_MAX;
732static u64 last_flush = 0;
733struct raw_event_queue *last_inserted;
734
735static void flush_raw_event_queue(u64 limit)
736{
737 struct raw_event_queue *tmp, *iter;
738
739 list_for_each_entry_safe(iter, tmp, &raw_event_head, list) {
740 if (iter->timestamp > limit)
741 return;
742
743 if (iter == last_inserted)
744 last_inserted = NULL;
745
746 process_raw_event(iter->data, iter->cpu, iter->timestamp,
747 iter->thread);
748
749 last_flush = iter->timestamp;
750 list_del(&iter->list);
751 free(iter->data);
752 free(iter);
753 }
754}
755
756static void __queue_raw_event_end(struct raw_event_queue *new)
757{
758 struct raw_event_queue *iter;
759
760 list_for_each_entry_reverse(iter, &raw_event_head, list) {
761 if (iter->timestamp < new->timestamp) {
762 list_add(&new->list, &iter->list);
763 return;
764 }
765 }
766
767 list_add(&new->list, &raw_event_head);
768}
769
770static void __queue_raw_event_before(struct raw_event_queue *new,
771 struct raw_event_queue *iter)
772{
773 list_for_each_entry_continue_reverse(iter, &raw_event_head, list) {
774 if (iter->timestamp < new->timestamp) {
775 list_add(&new->list, &iter->list);
776 return;
777 }
778 }
779
780 list_add(&new->list, &raw_event_head);
781}
782
783static void __queue_raw_event_after(struct raw_event_queue *new,
784 struct raw_event_queue *iter)
785{
786 list_for_each_entry_continue(iter, &raw_event_head, list) {
787 if (iter->timestamp > new->timestamp) {
788 list_add_tail(&new->list, &iter->list);
789 return;
790 }
791 }
792 list_add_tail(&new->list, &raw_event_head);
793}
794
795/* The queue is ordered by time */
796static void __queue_raw_event(struct raw_event_queue *new)
797{
798 if (!last_inserted) {
799 __queue_raw_event_end(new);
800 return;
801 }
802
803 /*
804 * Most of the time the current event has a timestamp
805 * very close to the last event inserted, unless we just switched
806 * to another event buffer. Having a sorting based on a list and
807 * on the last inserted event that is close to the current one is
808 * probably more efficient than an rbtree based sorting.
809 */
810 if (last_inserted->timestamp >= new->timestamp)
811 __queue_raw_event_before(new, last_inserted);
812 else
813 __queue_raw_event_after(new, last_inserted);
814}
815
816static void queue_raw_event(void *data, int raw_size, int cpu,
817 u64 timestamp, struct thread *thread)
818{
819 struct raw_event_queue *new;
820
821 if (flush_limit == ULLONG_MAX)
822 flush_limit = timestamp + FLUSH_PERIOD;
823
824 if (timestamp < last_flush) {
825 printf("Warning: Timestamp below last timeslice flush\n");
826 return;
827 }
828
829 new = malloc(sizeof(*new));
830 if (!new)
831 die("Not enough memory\n");
832
833 new->timestamp = timestamp;
834 new->cpu = cpu;
835 new->thread = thread;
836
837 new->data = malloc(raw_size);
838 if (!new->data)
839 die("Not enough memory\n");
840
841 memcpy(new->data, data, raw_size);
842
843 __queue_raw_event(new);
844 last_inserted = new;
845
846 /*
847 * We want to have a slice of events covering 2 * FLUSH_PERIOD
848 * If FLUSH_PERIOD is big enough, it ensures every events that occured
849 * in the first half of the timeslice have all been buffered and there
850 * are none remaining (we need that because of the weakly ordered
851 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
852 * timeslice, we flush the first half to be gentle with the memory
853 * (the second half can still get new events in the middle, so wait
854 * another period to flush it)
855 */
856 if (new->timestamp > flush_limit &&
857 new->timestamp - flush_limit > FLUSH_PERIOD) {
858 flush_limit += FLUSH_PERIOD;
859 flush_raw_event_queue(flush_limit);
860 }
861}
862
863static int process_sample_event(event_t *event, struct perf_session *s)
864{
865 struct thread *thread;
866 struct sample_data data;
867
868 bzero(&data, sizeof(struct sample_data));
869 event__parse_sample(event, s->sample_type, &data);
870 /* CAUTION: using tid as thread.pid */
871 thread = perf_session__findnew(s, data.tid);
872
873 if (thread == NULL) {
874 pr_debug("problem processing %d event, skipping it.\n",
875 event->header.type);
876 return -1;
877 }
878
879 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
880
881 if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
882 return 0;
883
884 queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread);
885
886 return 0;
887}
888
889/* TODO: various way to print, coloring, nano or milli sec */ 716/* TODO: various way to print, coloring, nano or milli sec */
890static void print_result(void) 717static void print_result(void)
891{ 718{
@@ -963,9 +790,30 @@ static void dump_map(void)
963 } 790 }
964} 791}
965 792
793static int process_sample_event(event_t *self, struct perf_session *s)
794{
795 struct sample_data data;
796 struct thread *thread;
797
798 bzero(&data, sizeof(data));
799 event__parse_sample(self, s->sample_type, &data);
800
801 thread = perf_session__findnew(s, data.tid);
802 if (thread == NULL) {
803 pr_debug("problem processing %d event, skipping it.\n",
804 self->header.type);
805 return -1;
806 }
807
808 process_raw_event(data.raw_data, data.cpu, data.time, thread);
809
810 return 0;
811}
812
966static struct perf_event_ops eops = { 813static struct perf_event_ops eops = {
967 .sample = process_sample_event, 814 .sample = process_sample_event,
968 .comm = event__process_comm, 815 .comm = event__process_comm,
816 .ordered_samples = true,
969}; 817};
970 818
971static int read_events(void) 819static int read_events(void)
@@ -994,7 +842,6 @@ static void __cmd_report(void)
994 setup_pager(); 842 setup_pager();
995 select_key(); 843 select_key();
996 read_events(); 844 read_events();
997 flush_raw_event_queue(ULLONG_MAX);
998 sort_result(); 845 sort_result();
999 print_result(); 846 print_result();
1000} 847}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7d88ae5c270f..b7aade2184b2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -98,6 +98,8 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
98 self->cwdlen = 0; 98 self->cwdlen = 0;
99 self->unknown_events = 0; 99 self->unknown_events = 0;
100 self->kerninfo_root = RB_ROOT; 100 self->kerninfo_root = RB_ROOT;
101 self->ordered_samples.flush_limit = ULLONG_MAX;
102 INIT_LIST_HEAD(&self->ordered_samples.samples_head);
101 103
102 if (mode == O_RDONLY) { 104 if (mode == O_RDONLY) {
103 if (perf_session__open(self, force) < 0) 105 if (perf_session__open(self, force) < 0)
@@ -351,6 +353,178 @@ static event__swap_op event__swap_ops[] = {
351 [PERF_RECORD_HEADER_MAX] = NULL, 353 [PERF_RECORD_HEADER_MAX] = NULL,
352}; 354};
353 355
356struct sample_queue {
357 u64 timestamp;
358 struct sample_event *event;
359 struct list_head list;
360};
361
362#define FLUSH_PERIOD (2 * NSEC_PER_SEC)
363
364static void flush_sample_queue(struct perf_session *s,
365 struct perf_event_ops *ops)
366{
367 struct list_head *head = &s->ordered_samples.samples_head;
368 u64 limit = s->ordered_samples.flush_limit;
369 struct sample_queue *tmp, *iter;
370
371 if (!ops->ordered_samples)
372 return;
373
374 list_for_each_entry_safe(iter, tmp, head, list) {
375 if (iter->timestamp > limit)
376 return;
377
378 if (iter == s->ordered_samples.last_inserted)
379 s->ordered_samples.last_inserted = NULL;
380
381 ops->sample((event_t *)iter->event, s);
382
383 s->ordered_samples.last_flush = iter->timestamp;
384 list_del(&iter->list);
385 free(iter->event);
386 free(iter);
387 }
388}
389
390static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
391{
392 struct sample_queue *iter;
393
394 list_for_each_entry_reverse(iter, head, list) {
395 if (iter->timestamp < new->timestamp) {
396 list_add(&new->list, &iter->list);
397 return;
398 }
399 }
400
401 list_add(&new->list, head);
402}
403
404static void __queue_sample_before(struct sample_queue *new,
405 struct sample_queue *iter,
406 struct list_head *head)
407{
408 list_for_each_entry_continue_reverse(iter, head, list) {
409 if (iter->timestamp < new->timestamp) {
410 list_add(&new->list, &iter->list);
411 return;
412 }
413 }
414
415 list_add(&new->list, head);
416}
417
418static void __queue_sample_after(struct sample_queue *new,
419 struct sample_queue *iter,
420 struct list_head *head)
421{
422 list_for_each_entry_continue(iter, head, list) {
423 if (iter->timestamp > new->timestamp) {
424 list_add_tail(&new->list, &iter->list);
425 return;
426 }
427 }
428 list_add_tail(&new->list, head);
429}
430
431/* The queue is ordered by time */
432static void __queue_sample_event(struct sample_queue *new,
433 struct perf_session *s)
434{
435 struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
436 struct list_head *head = &s->ordered_samples.samples_head;
437
438
439 if (!last_inserted) {
440 __queue_sample_end(new, head);
441 return;
442 }
443
444 /*
445 * Most of the time the current event has a timestamp
446 * very close to the last event inserted, unless we just switched
447 * to another event buffer. Having a sorting based on a list and
448 * on the last inserted event that is close to the current one is
449 * probably more efficient than an rbtree based sorting.
450 */
451 if (last_inserted->timestamp >= new->timestamp)
452 __queue_sample_before(new, last_inserted, head);
453 else
454 __queue_sample_after(new, last_inserted, head);
455}
456
457static int queue_sample_event(event_t *event, struct sample_data *data,
458 struct perf_session *s,
459 struct perf_event_ops *ops)
460{
461 u64 timestamp = data->time;
462 struct sample_queue *new;
463 u64 flush_limit;
464
465
466 if (s->ordered_samples.flush_limit == ULLONG_MAX)
467 s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD;
468
469 if (timestamp < s->ordered_samples.last_flush) {
470 printf("Warning: Timestamp below last timeslice flush\n");
471 return -EINVAL;
472 }
473
474 new = malloc(sizeof(*new));
475 if (!new)
476 return -ENOMEM;
477
478 new->timestamp = timestamp;
479
480 new->event = malloc(event->header.size);
481 if (!new->event) {
482 free(new);
483 return -ENOMEM;
484 }
485
486 memcpy(new->event, event, event->header.size);
487
488 __queue_sample_event(new, s);
489 s->ordered_samples.last_inserted = new;
490
491 /*
492 * We want to have a slice of events covering 2 * FLUSH_PERIOD
493 * If FLUSH_PERIOD is big enough, it ensures every events that occured
494 * in the first half of the timeslice have all been buffered and there
495 * are none remaining (we need that because of the weakly ordered
496 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
497 * timeslice, we flush the first half to be gentle with the memory
498 * (the second half can still get new events in the middle, so wait
499 * another period to flush it)
500 */
501 flush_limit = s->ordered_samples.flush_limit;
502
503 if (new->timestamp > flush_limit &&
504 new->timestamp - flush_limit > FLUSH_PERIOD) {
505 s->ordered_samples.flush_limit += FLUSH_PERIOD;
506 flush_sample_queue(s, ops);
507 }
508
509 return 0;
510}
511
512static int perf_session__process_sample(event_t *event, struct perf_session *s,
513 struct perf_event_ops *ops)
514{
515 struct sample_data data;
516
517 if (!ops->ordered_samples)
518 return ops->sample(event, s);
519
520 bzero(&data, sizeof(struct sample_data));
521 event__parse_sample(event, s->sample_type, &data);
522
523 queue_sample_event(event, &data, s, ops);
524
525 return 0;
526}
527
354static int perf_session__process_event(struct perf_session *self, 528static int perf_session__process_event(struct perf_session *self,
355 event_t *event, 529 event_t *event,
356 struct perf_event_ops *ops, 530 struct perf_event_ops *ops,
@@ -371,7 +545,7 @@ static int perf_session__process_event(struct perf_session *self,
371 545
372 switch (event->header.type) { 546 switch (event->header.type) {
373 case PERF_RECORD_SAMPLE: 547 case PERF_RECORD_SAMPLE:
374 return ops->sample(event, self); 548 return perf_session__process_sample(event, self, ops);
375 case PERF_RECORD_MMAP: 549 case PERF_RECORD_MMAP:
376 return ops->mmap(event, self); 550 return ops->mmap(event, self);
377 case PERF_RECORD_COMM: 551 case PERF_RECORD_COMM:
@@ -611,6 +785,9 @@ more:
611 goto more; 785 goto more;
612done: 786done:
613 err = 0; 787 err = 0;
788 /* do the final flush for ordered samples */
789 self->ordered_samples.flush_limit = ULLONG_MAX;
790 flush_sample_queue(self, ops);
614out_err: 791out_err:
615 ui_progress__delete(progress); 792 ui_progress__delete(progress);
616 return err; 793 return err;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5e47c87b9266..796e2291ebd7 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -8,9 +8,17 @@
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include "../../../include/linux/perf_event.h" 9#include "../../../include/linux/perf_event.h"
10 10
11struct sample_queue;
11struct ip_callchain; 12struct ip_callchain;
12struct thread; 13struct thread;
13 14
15struct ordered_samples {
16 u64 last_flush;
17 u64 flush_limit;
18 struct list_head samples_head;
19 struct sample_queue *last_inserted;
20};
21
14struct perf_session { 22struct perf_session {
15 struct perf_header header; 23 struct perf_header header;
16 unsigned long size; 24 unsigned long size;
@@ -28,6 +36,7 @@ struct perf_session {
28 bool fd_pipe; 36 bool fd_pipe;
29 int cwdlen; 37 int cwdlen;
30 char *cwd; 38 char *cwd;
39 struct ordered_samples ordered_samples;
31 char filename[0]; 40 char filename[0];
32}; 41};
33 42
@@ -47,6 +56,7 @@ struct perf_event_ops {
47 event_type, 56 event_type,
48 tracing_data, 57 tracing_data,
49 build_id; 58 build_id;
59 bool ordered_samples;
50}; 60};
51 61
52struct perf_session *perf_session__new(const char *filename, int mode, bool force); 62struct perf_session *perf_session__new(const char *filename, int mode, bool force);