diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-lock.txt | 29 | ||||
-rw-r--r-- | tools/perf/builtin-lock.c | 148 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 4 | ||||
-rw-r--r-- | tools/perf/command-list.txt | 1 | ||||
-rw-r--r-- | tools/perf/perf-archive.sh | 3 | ||||
-rw-r--r-- | tools/perf/perf.h | 4 | ||||
-rw-r--r-- | tools/perf/util/probe-event.c | 2 |
7 files changed, 182 insertions, 9 deletions
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt new file mode 100644 index 000000000000..b317102138c8 --- /dev/null +++ b/tools/perf/Documentation/perf-lock.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | perf-lock(1) | ||
2 | ============ | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-lock - Analyze lock events | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf lock' {record|report|trace} | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | You can analyze various lock behaviours | ||
16 | and statistics with this 'perf lock' command. | ||
17 | |||
18 | 'perf lock record <command>' records lock events | ||
19 | between start and end <command>. And this command | ||
20 | produces the file "perf.data" which contains tracing | ||
21 | results of lock events. | ||
22 | |||
23 | 'perf lock trace' shows raw lock events. | ||
24 | |||
25 | 'perf lock report' reports statistical data. | ||
26 | |||
27 | SEE ALSO | ||
28 | -------- | ||
29 | linkperf:perf[1] | ||
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index fb9ab2ad3f92..e12c844df1e2 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
@@ -460,6 +460,150 @@ process_raw_event(void *data, int cpu, | |||
460 | process_lock_release_event(data, event, cpu, timestamp, thread); | 460 | process_lock_release_event(data, event, cpu, timestamp, thread); |
461 | } | 461 | } |
462 | 462 | ||
463 | struct raw_event_queue { | ||
464 | u64 timestamp; | ||
465 | int cpu; | ||
466 | void *data; | ||
467 | struct thread *thread; | ||
468 | struct list_head list; | ||
469 | }; | ||
470 | |||
471 | static LIST_HEAD(raw_event_head); | ||
472 | |||
473 | #define FLUSH_PERIOD (5 * NSEC_PER_SEC) | ||
474 | |||
475 | static u64 flush_limit = ULLONG_MAX; | ||
476 | static u64 last_flush = 0; | ||
477 | struct raw_event_queue *last_inserted; | ||
478 | |||
479 | static void flush_raw_event_queue(u64 limit) | ||
480 | { | ||
481 | struct raw_event_queue *tmp, *iter; | ||
482 | |||
483 | list_for_each_entry_safe(iter, tmp, &raw_event_head, list) { | ||
484 | if (iter->timestamp > limit) | ||
485 | return; | ||
486 | |||
487 | if (iter == last_inserted) | ||
488 | last_inserted = NULL; | ||
489 | |||
490 | process_raw_event(iter->data, iter->cpu, iter->timestamp, | ||
491 | iter->thread); | ||
492 | |||
493 | last_flush = iter->timestamp; | ||
494 | list_del(&iter->list); | ||
495 | free(iter->data); | ||
496 | free(iter); | ||
497 | } | ||
498 | } | ||
499 | |||
500 | static void __queue_raw_event_end(struct raw_event_queue *new) | ||
501 | { | ||
502 | struct raw_event_queue *iter; | ||
503 | |||
504 | list_for_each_entry_reverse(iter, &raw_event_head, list) { | ||
505 | if (iter->timestamp < new->timestamp) { | ||
506 | list_add(&new->list, &iter->list); | ||
507 | return; | ||
508 | } | ||
509 | } | ||
510 | |||
511 | list_add(&new->list, &raw_event_head); | ||
512 | } | ||
513 | |||
514 | static void __queue_raw_event_before(struct raw_event_queue *new, | ||
515 | struct raw_event_queue *iter) | ||
516 | { | ||
517 | list_for_each_entry_continue_reverse(iter, &raw_event_head, list) { | ||
518 | if (iter->timestamp < new->timestamp) { | ||
519 | list_add(&new->list, &iter->list); | ||
520 | return; | ||
521 | } | ||
522 | } | ||
523 | |||
524 | list_add(&new->list, &raw_event_head); | ||
525 | } | ||
526 | |||
527 | static void __queue_raw_event_after(struct raw_event_queue *new, | ||
528 | struct raw_event_queue *iter) | ||
529 | { | ||
530 | list_for_each_entry_continue(iter, &raw_event_head, list) { | ||
531 | if (iter->timestamp > new->timestamp) { | ||
532 | list_add_tail(&new->list, &iter->list); | ||
533 | return; | ||
534 | } | ||
535 | } | ||
536 | list_add_tail(&new->list, &raw_event_head); | ||
537 | } | ||
538 | |||
539 | /* The queue is ordered by time */ | ||
540 | static void __queue_raw_event(struct raw_event_queue *new) | ||
541 | { | ||
542 | if (!last_inserted) { | ||
543 | __queue_raw_event_end(new); | ||
544 | return; | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Most of the time the current event has a timestamp | ||
549 | * very close to the last event inserted, unless we just switched | ||
550 | * to another event buffer. Having a sorting based on a list and | ||
551 | * on the last inserted event that is close to the current one is | ||
552 | * probably more efficient than an rbtree based sorting. | ||
553 | */ | ||
554 | if (last_inserted->timestamp >= new->timestamp) | ||
555 | __queue_raw_event_before(new, last_inserted); | ||
556 | else | ||
557 | __queue_raw_event_after(new, last_inserted); | ||
558 | } | ||
559 | |||
560 | static void queue_raw_event(void *data, int raw_size, int cpu, | ||
561 | u64 timestamp, struct thread *thread) | ||
562 | { | ||
563 | struct raw_event_queue *new; | ||
564 | |||
565 | if (flush_limit == ULLONG_MAX) | ||
566 | flush_limit = timestamp + FLUSH_PERIOD; | ||
567 | |||
568 | if (timestamp < last_flush) { | ||
569 | printf("Warning: Timestamp below last timeslice flush\n"); | ||
570 | return; | ||
571 | } | ||
572 | |||
573 | new = malloc(sizeof(*new)); | ||
574 | if (!new) | ||
575 | die("Not enough memory\n"); | ||
576 | |||
577 | new->timestamp = timestamp; | ||
578 | new->cpu = cpu; | ||
579 | new->thread = thread; | ||
580 | |||
581 | new->data = malloc(raw_size); | ||
582 | if (!new->data) | ||
583 | die("Not enough memory\n"); | ||
584 | |||
585 | memcpy(new->data, data, raw_size); | ||
586 | |||
587 | __queue_raw_event(new); | ||
588 | last_inserted = new; | ||
589 | |||
590 | /* | ||
591 | * We want to have a slice of events covering 2 * FLUSH_PERIOD | ||
592 | * If FLUSH_PERIOD is big enough, it ensures every events that occured | ||
593 | * in the first half of the timeslice have all been buffered and there | ||
594 | * are none remaining (we need that because of the weakly ordered | ||
595 | * event recording we have). Then once we reach the 2 * FLUSH_PERIOD | ||
596 | * timeslice, we flush the first half to be gentle with the memory | ||
597 | * (the second half can still get new events in the middle, so wait | ||
598 | * another period to flush it) | ||
599 | */ | ||
600 | if (new->timestamp > flush_limit && | ||
601 | new->timestamp - flush_limit > FLUSH_PERIOD) { | ||
602 | flush_limit += FLUSH_PERIOD; | ||
603 | flush_raw_event_queue(flush_limit); | ||
604 | } | ||
605 | } | ||
606 | |||
463 | static int process_sample_event(event_t *event, struct perf_session *session) | 607 | static int process_sample_event(event_t *event, struct perf_session *session) |
464 | { | 608 | { |
465 | struct thread *thread; | 609 | struct thread *thread; |
@@ -480,7 +624,7 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
480 | if (profile_cpu != -1 && profile_cpu != (int) data.cpu) | 624 | if (profile_cpu != -1 && profile_cpu != (int) data.cpu) |
481 | return 0; | 625 | return 0; |
482 | 626 | ||
483 | process_raw_event(data.raw_data, data.cpu, data.time, thread); | 627 | queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread); |
484 | 628 | ||
485 | return 0; | 629 | return 0; |
486 | } | 630 | } |
@@ -576,6 +720,7 @@ static void __cmd_report(void) | |||
576 | setup_pager(); | 720 | setup_pager(); |
577 | select_key(); | 721 | select_key(); |
578 | read_events(); | 722 | read_events(); |
723 | flush_raw_event_queue(ULLONG_MAX); | ||
579 | sort_result(); | 724 | sort_result(); |
580 | print_result(); | 725 | print_result(); |
581 | } | 726 | } |
@@ -608,7 +753,6 @@ static const char *record_args[] = { | |||
608 | "record", | 753 | "record", |
609 | "-a", | 754 | "-a", |
610 | "-R", | 755 | "-R", |
611 | "-M", | ||
612 | "-f", | 756 | "-f", |
613 | "-m", "1024", | 757 | "-m", "1024", |
614 | "-c", "1", | 758 | "-c", "1", |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5db687fc13de..407041d20de0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -573,7 +573,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
573 | 573 | ||
574 | if (symbol__init() < 0) | 574 | if (symbol__init() < 0) |
575 | return -1; | 575 | return -1; |
576 | setup_pager(); | 576 | if (!script_name) |
577 | setup_pager(); | ||
577 | 578 | ||
578 | session = perf_session__new(input_name, O_RDONLY, 0); | 579 | session = perf_session__new(input_name, O_RDONLY, 0); |
579 | if (session == NULL) | 580 | if (session == NULL) |
@@ -608,7 +609,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
608 | return -1; | 609 | return -1; |
609 | } | 610 | } |
610 | 611 | ||
611 | perf_header__read(&session->header, input); | ||
612 | err = scripting_ops->generate_script("perf-trace"); | 612 | err = scripting_ops->generate_script("perf-trace"); |
613 | goto out; | 613 | goto out; |
614 | } | 614 | } |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 9afcff2e3ae5..db6ee94d4a8e 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
@@ -18,3 +18,4 @@ perf-top mainporcelain common | |||
18 | perf-trace mainporcelain common | 18 | perf-trace mainporcelain common |
19 | perf-probe mainporcelain common | 19 | perf-probe mainporcelain common |
20 | perf-kmem mainporcelain common | 20 | perf-kmem mainporcelain common |
21 | perf-lock mainporcelain common | ||
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 45fbe2f07b15..910468e6e01c 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh | |||
@@ -9,8 +9,9 @@ fi | |||
9 | 9 | ||
10 | DEBUGDIR=~/.debug/ | 10 | DEBUGDIR=~/.debug/ |
11 | BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) | 11 | BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) |
12 | NOBUILDID=0000000000000000000000000000000000000000 | ||
12 | 13 | ||
13 | perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS | 14 | perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS |
14 | if [ ! -s $BUILDIDS ] ; then | 15 | if [ ! -s $BUILDIDS ] ; then |
15 | echo "perf archive: no build-ids found" | 16 | echo "perf archive: no build-ids found" |
16 | rm -f $BUILDIDS | 17 | rm -f $BUILDIDS |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 75f941bfba9e..6fb379bc1d1f 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -65,9 +65,7 @@ | |||
65 | * Use the __kuser_memory_barrier helper in the CPU helper page. See | 65 | * Use the __kuser_memory_barrier helper in the CPU helper page. See |
66 | * arch/arm/kernel/entry-armv.S in the kernel source for details. | 66 | * arch/arm/kernel/entry-armv.S in the kernel source for details. |
67 | */ | 67 | */ |
68 | #define rmb() asm volatile("mov r0, #0xffff0fff; mov lr, pc;" \ | 68 | #define rmb() ((void(*)(void))0xffff0fa0)() |
69 | "sub pc, r0, #95" ::: "r0", "lr", "cc", \ | ||
70 | "memory") | ||
71 | #define cpu_relax() asm volatile("":::"memory") | 69 | #define cpu_relax() asm volatile("":::"memory") |
72 | #endif | 70 | #endif |
73 | 71 | ||
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c971e81e9cbf..53181dbfe4a8 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -508,8 +508,8 @@ void show_perf_probe_events(void) | |||
508 | struct str_node *ent; | 508 | struct str_node *ent; |
509 | 509 | ||
510 | setup_pager(); | 510 | setup_pager(); |
511 | |||
512 | memset(&pp, 0, sizeof(pp)); | 511 | memset(&pp, 0, sizeof(pp)); |
512 | |||
513 | fd = open_kprobe_events(O_RDONLY, 0); | 513 | fd = open_kprobe_events(O_RDONLY, 0); |
514 | rawlist = get_trace_kprobe_event_rawlist(fd); | 514 | rawlist = get_trace_kprobe_event_rawlist(fd); |
515 | close(fd); | 515 | close(fd); |