aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/trace/events/lock.h55
-rw-r--r--kernel/lockdep.c4
-rw-r--r--tools/perf/builtin-lock.c145
-rw-r--r--tools/perf/builtin-record.c34
-rw-r--r--tools/perf/util/event.h3
-rw-r--r--tools/perf/util/session.c125
-rw-r--r--tools/perf/util/session.h36
7 files changed, 257 insertions, 145 deletions
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 5c1dcfc16c60..2821b86de63b 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -35,15 +35,15 @@ TRACE_EVENT(lock_acquire,
35 __get_str(name)) 35 __get_str(name))
36); 36);
37 37
38TRACE_EVENT(lock_release, 38DECLARE_EVENT_CLASS(lock,
39 39
40 TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), 40 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
41 41
42 TP_ARGS(lock, nested, ip), 42 TP_ARGS(lock, ip),
43 43
44 TP_STRUCT__entry( 44 TP_STRUCT__entry(
45 __string(name, lock->name) 45 __string( name, lock->name )
46 __field(void *, lockdep_addr) 46 __field( void *, lockdep_addr )
47 ), 47 ),
48 48
49 TP_fast_assign( 49 TP_fast_assign(
@@ -51,51 +51,30 @@ TRACE_EVENT(lock_release,
51 __entry->lockdep_addr = lock; 51 __entry->lockdep_addr = lock;
52 ), 52 ),
53 53
54 TP_printk("%p %s", 54 TP_printk("%p %s", __entry->lockdep_addr, __get_str(name))
55 __entry->lockdep_addr, __get_str(name))
56); 55);
57 56
58#ifdef CONFIG_LOCK_STAT 57DEFINE_EVENT(lock, lock_release,
59
60TRACE_EVENT(lock_contended,
61 58
62 TP_PROTO(struct lockdep_map *lock, unsigned long ip), 59 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
63 60
64 TP_ARGS(lock, ip), 61 TP_ARGS(lock, ip)
62);
65 63
66 TP_STRUCT__entry( 64#ifdef CONFIG_LOCK_STAT
67 __string(name, lock->name)
68 __field(void *, lockdep_addr)
69 ),
70 65
71 TP_fast_assign( 66DEFINE_EVENT(lock, lock_contended,
72 __assign_str(name, lock->name);
73 __entry->lockdep_addr = lock;
74 ),
75 67
76 TP_printk("%p %s", 68 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
77 __entry->lockdep_addr, __get_str(name))
78);
79 69
80TRACE_EVENT(lock_acquired, 70 TP_ARGS(lock, ip)
81 TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), 71);
82 72
83 TP_ARGS(lock, ip, waittime), 73DEFINE_EVENT(lock, lock_acquired,
84 74
85 TP_STRUCT__entry( 75 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
86 __string(name, lock->name)
87 __field(s64, wait_nsec)
88 __field(void *, lockdep_addr)
89 ),
90 76
91 TP_fast_assign( 77 TP_ARGS(lock, ip)
92 __assign_str(name, lock->name);
93 __entry->wait_nsec = waittime;
94 __entry->lockdep_addr = lock;
95 ),
96 TP_printk("%p %s (%llu ns)", __entry->lockdep_addr,
97 __get_str(name),
98 __entry->wait_nsec)
99); 78);
100 79
101#endif 80#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2594e1ce41cb..e9c759f06c1d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3227,7 +3227,7 @@ void lock_release(struct lockdep_map *lock, int nested,
3227 raw_local_irq_save(flags); 3227 raw_local_irq_save(flags);
3228 check_flags(flags); 3228 check_flags(flags);
3229 current->lockdep_recursion = 1; 3229 current->lockdep_recursion = 1;
3230 trace_lock_release(lock, nested, ip); 3230 trace_lock_release(lock, ip);
3231 __lock_release(lock, nested, ip); 3231 __lock_release(lock, nested, ip);
3232 current->lockdep_recursion = 0; 3232 current->lockdep_recursion = 0;
3233 raw_local_irq_restore(flags); 3233 raw_local_irq_restore(flags);
@@ -3380,7 +3380,7 @@ found_it:
3380 hlock->holdtime_stamp = now; 3380 hlock->holdtime_stamp = now;
3381 } 3381 }
3382 3382
3383 trace_lock_acquired(lock, ip, waittime); 3383 trace_lock_acquired(lock, ip);
3384 3384
3385 stats = get_lock_stats(hlock_class(hlock)); 3385 stats = get_lock_stats(hlock_class(hlock));
3386 if (waittime) { 3386 if (waittime) {
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6605000ed73d..e18dfdc2948a 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -387,7 +387,20 @@ static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr)
387 return seq; 387 return seq;
388} 388}
389 389
390static int bad_hist[4]; 390enum broken_state {
391 BROKEN_ACQUIRE,
392 BROKEN_ACQUIRED,
393 BROKEN_CONTENDED,
394 BROKEN_RELEASE,
395 BROKEN_MAX,
396};
397
398static int bad_hist[BROKEN_MAX];
399
400enum acquire_flags {
401 TRY_LOCK = 1,
402 READ_LOCK = 2,
403};
391 404
392static void 405static void
393report_lock_acquire_event(struct trace_acquire_event *acquire_event, 406report_lock_acquire_event(struct trace_acquire_event *acquire_event,
@@ -413,9 +426,9 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event,
413 if (!acquire_event->flag) { 426 if (!acquire_event->flag) {
414 seq->state = SEQ_STATE_ACQUIRING; 427 seq->state = SEQ_STATE_ACQUIRING;
415 } else { 428 } else {
416 if (acquire_event->flag & 1) 429 if (acquire_event->flag & TRY_LOCK)
417 ls->nr_trylock++; 430 ls->nr_trylock++;
418 if (acquire_event->flag & 2) 431 if (acquire_event->flag & READ_LOCK)
419 ls->nr_readlock++; 432 ls->nr_readlock++;
420 seq->state = SEQ_STATE_READ_ACQUIRED; 433 seq->state = SEQ_STATE_READ_ACQUIRED;
421 seq->read_count = 1; 434 seq->read_count = 1;
@@ -423,7 +436,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event,
423 } 436 }
424 break; 437 break;
425 case SEQ_STATE_READ_ACQUIRED: 438 case SEQ_STATE_READ_ACQUIRED:
426 if (acquire_event->flag & 2) { 439 if (acquire_event->flag & READ_LOCK) {
427 seq->read_count++; 440 seq->read_count++;
428 ls->nr_acquired++; 441 ls->nr_acquired++;
429 goto end; 442 goto end;
@@ -437,7 +450,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event,
437broken: 450broken:
438 /* broken lock sequence, discard it */ 451 /* broken lock sequence, discard it */
439 ls->discard = 1; 452 ls->discard = 1;
440 bad_hist[0]++; 453 bad_hist[BROKEN_ACQUIRE]++;
441 list_del(&seq->list); 454 list_del(&seq->list);
442 free(seq); 455 free(seq);
443 goto end; 456 goto end;
@@ -481,10 +494,9 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event,
481 case SEQ_STATE_CONTENDED: 494 case SEQ_STATE_CONTENDED:
482 contended_term = timestamp - seq->prev_event_time; 495 contended_term = timestamp - seq->prev_event_time;
483 ls->wait_time_total += contended_term; 496 ls->wait_time_total += contended_term;
484
485 if (contended_term < ls->wait_time_min) 497 if (contended_term < ls->wait_time_min)
486 ls->wait_time_min = contended_term; 498 ls->wait_time_min = contended_term;
487 else if (ls->wait_time_max < contended_term) 499 if (ls->wait_time_max < contended_term)
488 ls->wait_time_max = contended_term; 500 ls->wait_time_max = contended_term;
489 break; 501 break;
490 case SEQ_STATE_RELEASED: 502 case SEQ_STATE_RELEASED:
@@ -492,7 +504,7 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event,
492 case SEQ_STATE_READ_ACQUIRED: 504 case SEQ_STATE_READ_ACQUIRED:
493 /* broken lock sequence, discard it */ 505 /* broken lock sequence, discard it */
494 ls->discard = 1; 506 ls->discard = 1;
495 bad_hist[1]++; 507 bad_hist[BROKEN_ACQUIRED]++;
496 list_del(&seq->list); 508 list_del(&seq->list);
497 free(seq); 509 free(seq);
498 goto end; 510 goto end;
@@ -540,7 +552,7 @@ report_lock_contended_event(struct trace_contended_event *contended_event,
540 case SEQ_STATE_CONTENDED: 552 case SEQ_STATE_CONTENDED:
541 /* broken lock sequence, discard it */ 553 /* broken lock sequence, discard it */
542 ls->discard = 1; 554 ls->discard = 1;
543 bad_hist[2]++; 555 bad_hist[BROKEN_CONTENDED]++;
544 list_del(&seq->list); 556 list_del(&seq->list);
545 free(seq); 557 free(seq);
546 goto end; 558 goto end;
@@ -594,7 +606,7 @@ report_lock_release_event(struct trace_release_event *release_event,
594 case SEQ_STATE_RELEASED: 606 case SEQ_STATE_RELEASED:
595 /* broken lock sequence, discard it */ 607 /* broken lock sequence, discard it */
596 ls->discard = 1; 608 ls->discard = 1;
597 bad_hist[3]++; 609 bad_hist[BROKEN_RELEASE]++;
598 goto free_seq; 610 goto free_seq;
599 break; 611 break;
600 default: 612 default:
@@ -713,6 +725,21 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
713 process_lock_release_event(data, event, cpu, timestamp, thread); 725 process_lock_release_event(data, event, cpu, timestamp, thread);
714} 726}
715 727
728static void print_bad_events(int bad, int total)
729{
730 /* Output for debug, this have to be removed */
731 int i;
732 const char *name[4] =
733 { "acquire", "acquired", "contended", "release" };
734
735 pr_info("\n=== output for debug===\n\n");
736 pr_info("bad: %d, total: %d\n", bad, total);
737 pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100);
738 pr_info("histogram of events caused bad sequence\n");
739 for (i = 0; i < BROKEN_MAX; i++)
740 pr_info(" %10s: %d\n", name[i], bad_hist[i]);
741}
742
716/* TODO: various way to print, coloring, nano or milli sec */ 743/* TODO: various way to print, coloring, nano or milli sec */
717static void print_result(void) 744static void print_result(void)
718{ 745{
@@ -720,15 +747,15 @@ static void print_result(void)
720 char cut_name[20]; 747 char cut_name[20];
721 int bad, total; 748 int bad, total;
722 749
723 printf("%20s ", "Name"); 750 pr_info("%20s ", "Name");
724 printf("%10s ", "acquired"); 751 pr_info("%10s ", "acquired");
725 printf("%10s ", "contended"); 752 pr_info("%10s ", "contended");
726 753
727 printf("%15s ", "total wait (ns)"); 754 pr_info("%15s ", "total wait (ns)");
728 printf("%15s ", "max wait (ns)"); 755 pr_info("%15s ", "max wait (ns)");
729 printf("%15s ", "min wait (ns)"); 756 pr_info("%15s ", "min wait (ns)");
730 757
731 printf("\n\n"); 758 pr_info("\n\n");
732 759
733 bad = total = 0; 760 bad = total = 0;
734 while ((st = pop_from_result())) { 761 while ((st = pop_from_result())) {
@@ -741,7 +768,7 @@ static void print_result(void)
741 768
742 if (strlen(st->name) < 16) { 769 if (strlen(st->name) < 16) {
743 /* output raw name */ 770 /* output raw name */
744 printf("%20s ", st->name); 771 pr_info("%20s ", st->name);
745 } else { 772 } else {
746 strncpy(cut_name, st->name, 16); 773 strncpy(cut_name, st->name, 16);
747 cut_name[16] = '.'; 774 cut_name[16] = '.';
@@ -749,33 +776,40 @@ static void print_result(void)
749 cut_name[18] = '.'; 776 cut_name[18] = '.';
750 cut_name[19] = '\0'; 777 cut_name[19] = '\0';
751 /* cut off name for saving output style */ 778 /* cut off name for saving output style */
752 printf("%20s ", cut_name); 779 pr_info("%20s ", cut_name);
753 } 780 }
754 781
755 printf("%10u ", st->nr_acquired); 782 pr_info("%10u ", st->nr_acquired);
756 printf("%10u ", st->nr_contended); 783 pr_info("%10u ", st->nr_contended);
757 784
758 printf("%15llu ", st->wait_time_total); 785 pr_info("%15llu ", st->wait_time_total);
759 printf("%15llu ", st->wait_time_max); 786 pr_info("%15llu ", st->wait_time_max);
760 printf("%15llu ", st->wait_time_min == ULLONG_MAX ? 787 pr_info("%15llu ", st->wait_time_min == ULLONG_MAX ?
761 0 : st->wait_time_min); 788 0 : st->wait_time_min);
762 printf("\n"); 789 pr_info("\n");
763 } 790 }
764 791
765 { 792 print_bad_events(bad, total);
766 /* Output for debug, this have to be removed */ 793}
767 int i;
768 const char *name[4] =
769 { "acquire", "acquired", "contended", "release" };
770 794
771 printf("\n=== output for debug===\n\n"); 795static int info_threads;
772 printf("bad:%d, total:%d\n", bad, total); 796static int info_map;
773 printf("bad rate:%f\n", (double)(bad / total));
774 797
775 printf("histogram of events caused bad sequence\n"); 798static void dump_threads(void)
776 for (i = 0; i < 4; i++) 799{
777 printf(" %10s: %d\n", name[i], bad_hist[i]); 800 struct thread_stat *st;
778 } 801 struct rb_node *node;
802 struct thread *t;
803
804 pr_info("%10s: comm\n", "Thread ID");
805
806 node = rb_first(&thread_stats);
807 while (node) {
808 st = container_of(node, struct thread_stat, rb);
809 t = perf_session__findnew(session, st->tid);
810 pr_info("%10d: %s\n", st->tid, t->comm);
811 node = rb_next(node);
812 };
779} 813}
780 814
781static void dump_map(void) 815static void dump_map(void)
@@ -783,13 +817,24 @@ static void dump_map(void)
783 unsigned int i; 817 unsigned int i;
784 struct lock_stat *st; 818 struct lock_stat *st;
785 819
820 pr_info("Address of instance: name of class\n");
786 for (i = 0; i < LOCKHASH_SIZE; i++) { 821 for (i = 0; i < LOCKHASH_SIZE; i++) {
787 list_for_each_entry(st, &lockhash_table[i], hash_entry) { 822 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
788 printf("%p: %s\n", st->addr, st->name); 823 pr_info(" %p: %s\n", st->addr, st->name);
789 } 824 }
790 } 825 }
791} 826}
792 827
828static void dump_info(void)
829{
830 if (info_threads)
831 dump_threads();
832 else if (info_map)
833 dump_map();
834 else
835 die("Unknown type of information\n");
836}
837
793static int process_sample_event(event_t *self, struct perf_session *s) 838static int process_sample_event(event_t *self, struct perf_session *s)
794{ 839{
795 struct sample_data data; 840 struct sample_data data;
@@ -858,6 +903,19 @@ static const struct option report_options[] = {
858 OPT_END() 903 OPT_END()
859}; 904};
860 905
906static const char * const info_usage[] = {
907 "perf lock info [<options>]",
908 NULL
909};
910
911static const struct option info_options[] = {
912 OPT_BOOLEAN('t', "threads", &info_threads,
913 "dump thread list in perf.data"),
914 OPT_BOOLEAN('m', "map", &info_map,
915 "map of lock instances (name:address table)"),
916 OPT_END()
917};
918
861static const char * const lock_usage[] = { 919static const char * const lock_usage[] = {
862 "perf lock [<options>] {record|trace|report}", 920 "perf lock [<options>] {record|trace|report}",
863 NULL 921 NULL
@@ -872,7 +930,6 @@ static const struct option lock_options[] = {
872 930
873static const char *record_args[] = { 931static const char *record_args[] = {
874 "record", 932 "record",
875 "-a",
876 "-R", 933 "-R",
877 "-f", 934 "-f",
878 "-m", "1024", 935 "-m", "1024",
@@ -929,12 +986,18 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
929 } else if (!strcmp(argv[0], "trace")) { 986 } else if (!strcmp(argv[0], "trace")) {
930 /* Aliased to 'perf trace' */ 987 /* Aliased to 'perf trace' */
931 return cmd_trace(argc, argv, prefix); 988 return cmd_trace(argc, argv, prefix);
932 } else if (!strcmp(argv[0], "map")) { 989 } else if (!strcmp(argv[0], "info")) {
990 if (argc) {
991 argc = parse_options(argc, argv,
992 info_options, info_usage, 0);
993 if (argc)
994 usage_with_options(info_usage, info_options);
995 }
933 /* recycling report_lock_ops */ 996 /* recycling report_lock_ops */
934 trace_handler = &report_lock_ops; 997 trace_handler = &report_lock_ops;
935 setup_pager(); 998 setup_pager();
936 read_events(); 999 read_events();
937 dump_map(); 1000 dump_info();
938 } else { 1001 } else {
939 usage_with_options(lock_usage, lock_options); 1002 usage_with_options(lock_usage, lock_options);
940 } 1003 }
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d3981ac50e1d..6b77b285fe10 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -494,6 +494,29 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
494 " relocation symbol.\n", machine->pid); 494 " relocation symbol.\n", machine->pid);
495} 495}
496 496
497static struct perf_event_header finished_round_event = {
498 .size = sizeof(struct perf_event_header),
499 .type = PERF_RECORD_FINISHED_ROUND,
500};
501
502static void mmap_read_all(void)
503{
504 int i, counter, thread;
505
506 for (i = 0; i < nr_cpu; i++) {
507 for (counter = 0; counter < nr_counters; counter++) {
508 for (thread = 0; thread < thread_num; thread++) {
509 if (mmap_array[i][counter][thread].base)
510 mmap_read(&mmap_array[i][counter][thread]);
511 }
512
513 }
514 }
515
516 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
517 write_output(&finished_round_event, sizeof(finished_round_event));
518}
519
497static int __cmd_record(int argc, const char **argv) 520static int __cmd_record(int argc, const char **argv)
498{ 521{
499 int i, counter; 522 int i, counter;
@@ -726,16 +749,7 @@ static int __cmd_record(int argc, const char **argv)
726 int hits = samples; 749 int hits = samples;
727 int thread; 750 int thread;
728 751
729 for (i = 0; i < nr_cpu; i++) { 752 mmap_read_all();
730 for (counter = 0; counter < nr_counters; counter++) {
731 for (thread = 0;
732 thread < thread_num; thread++) {
733 if (mmap_array[i][counter][thread].base)
734 mmap_read(&mmap_array[i][counter][thread]);
735 }
736
737 }
738 }
739 753
740 if (hits == samples) { 754 if (hits == samples) {
741 if (done) 755 if (done)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b364da5b0cbf..6cc1b1dced55 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -84,11 +84,12 @@ struct build_id_event {
84 char filename[]; 84 char filename[];
85}; 85};
86 86
87enum perf_header_event_type { /* above any possible kernel type */ 87enum perf_user_event_type { /* above any possible kernel type */
88 PERF_RECORD_HEADER_ATTR = 64, 88 PERF_RECORD_HEADER_ATTR = 64,
89 PERF_RECORD_HEADER_EVENT_TYPE = 65, 89 PERF_RECORD_HEADER_EVENT_TYPE = 65,
90 PERF_RECORD_HEADER_TRACING_DATA = 66, 90 PERF_RECORD_HEADER_TRACING_DATA = 66,
91 PERF_RECORD_HEADER_BUILD_ID = 67, 91 PERF_RECORD_HEADER_BUILD_ID = 67,
92 PERF_RECORD_FINISHED_ROUND = 68,
92 PERF_RECORD_HEADER_MAX 93 PERF_RECORD_HEADER_MAX
93}; 94};
94 95
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 71bc608e0ec6..c088d8f9b51c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -98,7 +98,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
98 self->unknown_events = 0; 98 self->unknown_events = 0;
99 self->machines = RB_ROOT; 99 self->machines = RB_ROOT;
100 self->repipe = repipe; 100 self->repipe = repipe;
101 self->ordered_samples.flush_limit = ULLONG_MAX;
102 INIT_LIST_HEAD(&self->ordered_samples.samples_head); 101 INIT_LIST_HEAD(&self->ordered_samples.samples_head);
103 machine__init(&self->host_machine, "", HOST_KERNEL_ID); 102 machine__init(&self->host_machine, "", HOST_KERNEL_ID);
104 103
@@ -195,6 +194,18 @@ static int process_event_stub(event_t *event __used,
195 return 0; 194 return 0;
196} 195}
197 196
197static int process_finished_round_stub(event_t *event __used,
198 struct perf_session *session __used,
199 struct perf_event_ops *ops __used)
200{
201 dump_printf(": unhandled!\n");
202 return 0;
203}
204
205static int process_finished_round(event_t *event,
206 struct perf_session *session,
207 struct perf_event_ops *ops);
208
198static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) 209static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
199{ 210{
200 if (handler->sample == NULL) 211 if (handler->sample == NULL)
@@ -223,6 +234,12 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
223 handler->tracing_data = process_event_stub; 234 handler->tracing_data = process_event_stub;
224 if (handler->build_id == NULL) 235 if (handler->build_id == NULL)
225 handler->build_id = process_event_stub; 236 handler->build_id = process_event_stub;
237 if (handler->finished_round == NULL) {
238 if (handler->ordered_samples)
239 handler->finished_round = process_finished_round;
240 else
241 handler->finished_round = process_finished_round_stub;
242 }
226} 243}
227 244
228static const char *event__name[] = { 245static const char *event__name[] = {
@@ -360,16 +377,14 @@ struct sample_queue {
360 struct list_head list; 377 struct list_head list;
361}; 378};
362 379
363#define FLUSH_PERIOD (2 * NSEC_PER_SEC)
364
365static void flush_sample_queue(struct perf_session *s, 380static void flush_sample_queue(struct perf_session *s,
366 struct perf_event_ops *ops) 381 struct perf_event_ops *ops)
367{ 382{
368 struct list_head *head = &s->ordered_samples.samples_head; 383 struct list_head *head = &s->ordered_samples.samples_head;
369 u64 limit = s->ordered_samples.flush_limit; 384 u64 limit = s->ordered_samples.next_flush;
370 struct sample_queue *tmp, *iter; 385 struct sample_queue *tmp, *iter;
371 386
372 if (!ops->ordered_samples) 387 if (!ops->ordered_samples || !limit)
373 return; 388 return;
374 389
375 list_for_each_entry_safe(iter, tmp, head, list) { 390 list_for_each_entry_safe(iter, tmp, head, list) {
@@ -388,6 +403,55 @@ static void flush_sample_queue(struct perf_session *s,
388 } 403 }
389} 404}
390 405
406/*
407 * When perf record finishes a pass on every buffers, it records this pseudo
408 * event.
409 * We record the max timestamp t found in the pass n.
410 * Assuming these timestamps are monotonic across cpus, we know that if
411 * a buffer still has events with timestamps below t, they will be all
412 * available and then read in the pass n + 1.
413 * Hence when we start to read the pass n + 2, we can safely flush every
414 * events with timestamps below t.
415 *
416 * ============ PASS n =================
417 * CPU 0 | CPU 1
418 * |
419 * cnt1 timestamps | cnt2 timestamps
420 * 1 | 2
421 * 2 | 3
422 * - | 4 <--- max recorded
423 *
424 * ============ PASS n + 1 ==============
425 * CPU 0 | CPU 1
426 * |
427 * cnt1 timestamps | cnt2 timestamps
428 * 3 | 5
429 * 4 | 6
430 * 5 | 7 <---- max recorded
431 *
432 * Flush every events below timestamp 4
433 *
434 * ============ PASS n + 2 ==============
435 * CPU 0 | CPU 1
436 * |
437 * cnt1 timestamps | cnt2 timestamps
438 * 6 | 8
439 * 7 | 9
440 * - | 10
441 *
442 * Flush every events below timestamp 7
443 * etc...
444 */
445static int process_finished_round(event_t *event __used,
446 struct perf_session *session,
447 struct perf_event_ops *ops)
448{
449 flush_sample_queue(session, ops);
450 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
451
452 return 0;
453}
454
391static void __queue_sample_end(struct sample_queue *new, struct list_head *head) 455static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
392{ 456{
393 struct sample_queue *iter; 457 struct sample_queue *iter;
@@ -456,17 +520,12 @@ static void __queue_sample_event(struct sample_queue *new,
456} 520}
457 521
458static int queue_sample_event(event_t *event, struct sample_data *data, 522static int queue_sample_event(event_t *event, struct sample_data *data,
459 struct perf_session *s, 523 struct perf_session *s)
460 struct perf_event_ops *ops)
461{ 524{
462 u64 timestamp = data->time; 525 u64 timestamp = data->time;
463 struct sample_queue *new; 526 struct sample_queue *new;
464 u64 flush_limit;
465 527
466 528
467 if (s->ordered_samples.flush_limit == ULLONG_MAX)
468 s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD;
469
470 if (timestamp < s->ordered_samples.last_flush) { 529 if (timestamp < s->ordered_samples.last_flush) {
471 printf("Warning: Timestamp below last timeslice flush\n"); 530 printf("Warning: Timestamp below last timeslice flush\n");
472 return -EINVAL; 531 return -EINVAL;
@@ -489,23 +548,8 @@ static int queue_sample_event(event_t *event, struct sample_data *data,
489 __queue_sample_event(new, s); 548 __queue_sample_event(new, s);
490 s->ordered_samples.last_inserted = new; 549 s->ordered_samples.last_inserted = new;
491 550
492 /* 551 if (new->timestamp > s->ordered_samples.max_timestamp)
493 * We want to have a slice of events covering 2 * FLUSH_PERIOD 552 s->ordered_samples.max_timestamp = new->timestamp;
494 * If FLUSH_PERIOD is big enough, it ensures every events that occured
495 * in the first half of the timeslice have all been buffered and there
496 * are none remaining (we need that because of the weakly ordered
497 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
498 * timeslice, we flush the first half to be gentle with the memory
499 * (the second half can still get new events in the middle, so wait
500 * another period to flush it)
501 */
502 flush_limit = s->ordered_samples.flush_limit;
503
504 if (new->timestamp > flush_limit &&
505 new->timestamp - flush_limit > FLUSH_PERIOD) {
506 s->ordered_samples.flush_limit += FLUSH_PERIOD;
507 flush_sample_queue(s, ops);
508 }
509 553
510 return 0; 554 return 0;
511} 555}
@@ -521,7 +565,7 @@ static int perf_session__process_sample(event_t *event, struct perf_session *s,
521 bzero(&data, sizeof(struct sample_data)); 565 bzero(&data, sizeof(struct sample_data));
522 event__parse_sample(event, s->sample_type, &data); 566 event__parse_sample(event, s->sample_type, &data);
523 567
524 queue_sample_event(event, &data, s, ops); 568 queue_sample_event(event, &data, s);
525 569
526 return 0; 570 return 0;
527} 571}
@@ -573,6 +617,8 @@ static int perf_session__process_event(struct perf_session *self,
573 return ops->tracing_data(event, self); 617 return ops->tracing_data(event, self);
574 case PERF_RECORD_HEADER_BUILD_ID: 618 case PERF_RECORD_HEADER_BUILD_ID:
575 return ops->build_id(event, self); 619 return ops->build_id(event, self);
620 case PERF_RECORD_FINISHED_ROUND:
621 return ops->finished_round(event, self, ops);
576 default: 622 default:
577 self->unknown_events++; 623 self->unknown_events++;
578 return -1; 624 return -1;
@@ -651,15 +697,18 @@ more:
651 p = &event; 697 p = &event;
652 p += sizeof(struct perf_event_header); 698 p += sizeof(struct perf_event_header);
653 699
654 err = do_read(self->fd, p, size - sizeof(struct perf_event_header)); 700 if (size - sizeof(struct perf_event_header)) {
655 if (err <= 0) { 701 err = do_read(self->fd, p,
656 if (err == 0) { 702 size - sizeof(struct perf_event_header));
657 pr_err("unexpected end of event stream\n"); 703 if (err <= 0) {
658 goto done; 704 if (err == 0) {
659 } 705 pr_err("unexpected end of event stream\n");
706 goto done;
707 }
660 708
661 pr_err("failed to read event data\n"); 709 pr_err("failed to read event data\n");
662 goto out_err; 710 goto out_err;
711 }
663 } 712 }
664 713
665 if (size == 0 || 714 if (size == 0 ||
@@ -787,7 +836,7 @@ more:
787done: 836done:
788 err = 0; 837 err = 0;
789 /* do the final flush for ordered samples */ 838 /* do the final flush for ordered samples */
790 self->ordered_samples.flush_limit = ULLONG_MAX; 839 self->ordered_samples.next_flush = ULLONG_MAX;
791 flush_sample_queue(self, ops); 840 flush_sample_queue(self, ops);
792out_err: 841out_err:
793 ui_progress__delete(progress); 842 ui_progress__delete(progress);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index eb9f179376a5..242d528bfae2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -14,7 +14,8 @@ struct thread;
14 14
15struct ordered_samples { 15struct ordered_samples {
16 u64 last_flush; 16 u64 last_flush;
17 u64 flush_limit; 17 u64 next_flush;
18 u64 max_timestamp;
18 struct list_head samples_head; 19 struct list_head samples_head;
19 struct sample_queue *last_inserted; 20 struct sample_queue *last_inserted;
20}; 21};
@@ -42,23 +43,28 @@ struct perf_session {
42 char filename[0]; 43 char filename[0];
43}; 44};
44 45
46struct perf_event_ops;
47
45typedef int (*event_op)(event_t *self, struct perf_session *session); 48typedef int (*event_op)(event_t *self, struct perf_session *session);
49typedef int (*event_op2)(event_t *self, struct perf_session *session,
50 struct perf_event_ops *ops);
46 51
47struct perf_event_ops { 52struct perf_event_ops {
48 event_op sample, 53 event_op sample,
49 mmap, 54 mmap,
50 comm, 55 comm,
51 fork, 56 fork,
52 exit, 57 exit,
53 lost, 58 lost,
54 read, 59 read,
55 throttle, 60 throttle,
56 unthrottle, 61 unthrottle,
57 attr, 62 attr,
58 event_type, 63 event_type,
59 tracing_data, 64 tracing_data,
60 build_id; 65 build_id;
61 bool ordered_samples; 66 event_op2 finished_round;
67 bool ordered_samples;
62}; 68};
63 69
64struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe); 70struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);