diff options
author | Ingo Molnar <mingo@kernel.org> | 2017-03-16 12:29:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-03-16 12:29:23 -0400 |
commit | 61f63e383784bd0ab6529cfc95ddc59c713afcc9 (patch) | |
tree | ffb1dd2bf4cac157fcf560ffa722365c434fabbc /tools/perf/builtin-script.c | |
parent | ee368428aac96d94a9804b9109a81355451c3cd9 (diff) | |
parent | 61f35d750683b21e9e3836e309195c79c1daed74 (diff) |
Merge tag 'perf-core-for-mingo-4.12-20170316' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
New features:
- Add 'brstackinsn' field in 'perf script' to reuse the x86 instruction
decoder used in the Intel PT code to study hot paths to samples (Andi Kleen)
Kernel changes:
- Default UPROBES_EVENTS to Y (Alexei Starovoitov)
- Fix check for kretprobe offset within function entry (Naveen N. Rao)
Infrastructure changes:
- Introduce util func is_sdt_event() (Ravi Bangoria)
- Make perf_event__synthesize_mmap_events() scale on older kernels where
reading /proc/pid/maps is way slower than reading /proc/pid/task/pid/maps (Stephane Eranian)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-script.c')
-rw-r--r-- | tools/perf/builtin-script.c | 264 |
1 files changed, 255 insertions, 9 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 66d62c98dff9..c98e16689b57 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/time64.h> | 28 | #include <linux/time64.h> |
29 | #include "asm/bug.h" | 29 | #include "asm/bug.h" |
30 | #include "util/mem-events.h" | 30 | #include "util/mem-events.h" |
31 | #include "util/dump-insn.h" | ||
31 | 32 | ||
32 | static char const *script_name; | 33 | static char const *script_name; |
33 | static char const *generate_script_lang; | 34 | static char const *generate_script_lang; |
@@ -42,6 +43,7 @@ static bool nanosecs; | |||
42 | static const char *cpu_list; | 43 | static const char *cpu_list; |
43 | static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | 44 | static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); |
44 | static struct perf_stat_config stat_config; | 45 | static struct perf_stat_config stat_config; |
46 | static int max_blocks; | ||
45 | 47 | ||
46 | unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; | 48 | unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; |
47 | 49 | ||
@@ -69,6 +71,7 @@ enum perf_output_field { | |||
69 | PERF_OUTPUT_CALLINDENT = 1U << 20, | 71 | PERF_OUTPUT_CALLINDENT = 1U << 20, |
70 | PERF_OUTPUT_INSN = 1U << 21, | 72 | PERF_OUTPUT_INSN = 1U << 21, |
71 | PERF_OUTPUT_INSNLEN = 1U << 22, | 73 | PERF_OUTPUT_INSNLEN = 1U << 22, |
74 | PERF_OUTPUT_BRSTACKINSN = 1U << 23, | ||
72 | }; | 75 | }; |
73 | 76 | ||
74 | struct output_option { | 77 | struct output_option { |
@@ -98,6 +101,7 @@ struct output_option { | |||
98 | {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, | 101 | {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, |
99 | {.str = "insn", .field = PERF_OUTPUT_INSN}, | 102 | {.str = "insn", .field = PERF_OUTPUT_INSN}, |
100 | {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, | 103 | {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, |
104 | {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, | ||
101 | }; | 105 | }; |
102 | 106 | ||
103 | /* default set to maintain compatibility with current format */ | 107 | /* default set to maintain compatibility with current format */ |
@@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, | |||
292 | "selected. Hence, no address to lookup the source line number.\n"); | 296 | "selected. Hence, no address to lookup the source line number.\n"); |
293 | return -EINVAL; | 297 | return -EINVAL; |
294 | } | 298 | } |
295 | 299 | if (PRINT_FIELD(BRSTACKINSN) && | |
300 | !(perf_evlist__combined_branch_type(session->evlist) & | ||
301 | PERF_SAMPLE_BRANCH_ANY)) { | ||
302 | pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" | ||
303 | "Hint: run 'perf record -b ...'\n"); | ||
304 | return -EINVAL; | ||
305 | } | ||
296 | if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && | 306 | if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && |
297 | perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", | 307 | perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", |
298 | PERF_OUTPUT_TID|PERF_OUTPUT_PID)) | 308 | PERF_OUTPUT_TID|PERF_OUTPUT_PID)) |
@@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample, | |||
546 | } | 556 | } |
547 | } | 557 | } |
548 | 558 | ||
559 | #define MAXBB 16384UL | ||
560 | |||
561 | static int grab_bb(u8 *buffer, u64 start, u64 end, | ||
562 | struct machine *machine, struct thread *thread, | ||
563 | bool *is64bit, u8 *cpumode, bool last) | ||
564 | { | ||
565 | long offset, len; | ||
566 | struct addr_location al; | ||
567 | bool kernel; | ||
568 | |||
569 | if (!start || !end) | ||
570 | return 0; | ||
571 | |||
572 | kernel = machine__kernel_ip(machine, start); | ||
573 | if (kernel) | ||
574 | *cpumode = PERF_RECORD_MISC_KERNEL; | ||
575 | else | ||
576 | *cpumode = PERF_RECORD_MISC_USER; | ||
577 | |||
578 | /* | ||
579 | * Block overlaps between kernel and user. | ||
580 | * This can happen due to ring filtering | ||
581 | * On Intel CPUs the entry into the kernel is filtered, | ||
582 | * but the exit is not. Let the caller patch it up. | ||
583 | */ | ||
584 | if (kernel != machine__kernel_ip(machine, end)) { | ||
585 | printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", | ||
586 | start, end); | ||
587 | return -ENXIO; | ||
588 | } | ||
589 | |||
590 | memset(&al, 0, sizeof(al)); | ||
591 | if (end - start > MAXBB - MAXINSN) { | ||
592 | if (last) | ||
593 | printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); | ||
594 | else | ||
595 | printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); | ||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); | ||
600 | if (!al.map || !al.map->dso) { | ||
601 | printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); | ||
602 | return 0; | ||
603 | } | ||
604 | if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { | ||
605 | printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | /* Load maps to ensure dso->is_64_bit has been updated */ | ||
610 | map__load(al.map); | ||
611 | |||
612 | offset = al.map->map_ip(al.map, start); | ||
613 | len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer, | ||
614 | end - start + MAXINSN); | ||
615 | |||
616 | *is64bit = al.map->dso->is_64_bit; | ||
617 | if (len <= 0) | ||
618 | printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", | ||
619 | start, end); | ||
620 | return len; | ||
621 | } | ||
622 | |||
623 | static void print_jump(uint64_t ip, struct branch_entry *en, | ||
624 | struct perf_insn *x, u8 *inbuf, int len, | ||
625 | int insn) | ||
626 | { | ||
627 | printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", | ||
628 | ip, | ||
629 | dump_insn(x, ip, inbuf, len, NULL), | ||
630 | en->flags.predicted ? " PRED" : "", | ||
631 | en->flags.mispred ? " MISPRED" : "", | ||
632 | en->flags.in_tx ? " INTX" : "", | ||
633 | en->flags.abort ? " ABORT" : ""); | ||
634 | if (en->flags.cycles) { | ||
635 | printf(" %d cycles", en->flags.cycles); | ||
636 | if (insn) | ||
637 | printf(" %.2f IPC", (float)insn / en->flags.cycles); | ||
638 | } | ||
639 | putchar('\n'); | ||
640 | } | ||
641 | |||
642 | static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, | ||
643 | uint64_t addr, struct symbol **lastsym, | ||
644 | struct perf_event_attr *attr) | ||
645 | { | ||
646 | struct addr_location al; | ||
647 | int off; | ||
648 | |||
649 | memset(&al, 0, sizeof(al)); | ||
650 | |||
651 | thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); | ||
652 | if (!al.map) | ||
653 | thread__find_addr_map(thread, cpumode, MAP__VARIABLE, | ||
654 | addr, &al); | ||
655 | if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) | ||
656 | return; | ||
657 | |||
658 | al.cpu = cpu; | ||
659 | al.sym = NULL; | ||
660 | if (al.map) | ||
661 | al.sym = map__find_symbol(al.map, al.addr); | ||
662 | |||
663 | if (!al.sym) | ||
664 | return; | ||
665 | |||
666 | if (al.addr < al.sym->end) | ||
667 | off = al.addr - al.sym->start; | ||
668 | else | ||
669 | off = al.addr - al.map->start - al.sym->start; | ||
670 | printf("\t%s", al.sym->name); | ||
671 | if (off) | ||
672 | printf("%+d", off); | ||
673 | putchar(':'); | ||
674 | if (PRINT_FIELD(SRCLINE)) | ||
675 | map__fprintf_srcline(al.map, al.addr, "\t", stdout); | ||
676 | putchar('\n'); | ||
677 | *lastsym = al.sym; | ||
678 | } | ||
679 | |||
680 | static void print_sample_brstackinsn(struct perf_sample *sample, | ||
681 | struct thread *thread, | ||
682 | struct perf_event_attr *attr, | ||
683 | struct machine *machine) | ||
684 | { | ||
685 | struct branch_stack *br = sample->branch_stack; | ||
686 | u64 start, end; | ||
687 | int i, insn, len, nr, ilen; | ||
688 | struct perf_insn x; | ||
689 | u8 buffer[MAXBB]; | ||
690 | unsigned off; | ||
691 | struct symbol *lastsym = NULL; | ||
692 | |||
693 | if (!(br && br->nr)) | ||
694 | return; | ||
695 | nr = br->nr; | ||
696 | if (max_blocks && nr > max_blocks + 1) | ||
697 | nr = max_blocks + 1; | ||
698 | |||
699 | x.thread = thread; | ||
700 | x.cpu = sample->cpu; | ||
701 | |||
702 | putchar('\n'); | ||
703 | |||
704 | /* Handle first from jump, of which we don't know the entry. */ | ||
705 | len = grab_bb(buffer, br->entries[nr-1].from, | ||
706 | br->entries[nr-1].from, | ||
707 | machine, thread, &x.is64bit, &x.cpumode, false); | ||
708 | if (len > 0) { | ||
709 | print_ip_sym(thread, x.cpumode, x.cpu, | ||
710 | br->entries[nr - 1].from, &lastsym, attr); | ||
711 | print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], | ||
712 | &x, buffer, len, 0); | ||
713 | } | ||
714 | |||
715 | /* Print all blocks */ | ||
716 | for (i = nr - 2; i >= 0; i--) { | ||
717 | if (br->entries[i].from || br->entries[i].to) | ||
718 | pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, | ||
719 | br->entries[i].from, | ||
720 | br->entries[i].to); | ||
721 | start = br->entries[i + 1].to; | ||
722 | end = br->entries[i].from; | ||
723 | |||
724 | len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); | ||
725 | /* Patch up missing kernel transfers due to ring filters */ | ||
726 | if (len == -ENXIO && i > 0) { | ||
727 | end = br->entries[--i].from; | ||
728 | pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); | ||
729 | len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); | ||
730 | } | ||
731 | if (len <= 0) | ||
732 | continue; | ||
733 | |||
734 | insn = 0; | ||
735 | for (off = 0;; off += ilen) { | ||
736 | uint64_t ip = start + off; | ||
737 | |||
738 | print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); | ||
739 | if (ip == end) { | ||
740 | print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); | ||
741 | break; | ||
742 | } else { | ||
743 | printf("\t%016" PRIx64 "\t%s\n", ip, | ||
744 | dump_insn(&x, ip, buffer + off, len - off, &ilen)); | ||
745 | if (ilen == 0) | ||
746 | break; | ||
747 | insn++; | ||
748 | } | ||
749 | } | ||
750 | } | ||
751 | |||
752 | /* | ||
753 | * Hit the branch? In this case we are already done, and the target | ||
754 | * has not been executed yet. | ||
755 | */ | ||
756 | if (br->entries[0].from == sample->ip) | ||
757 | return; | ||
758 | if (br->entries[0].flags.abort) | ||
759 | return; | ||
760 | |||
761 | /* | ||
762 | * Print final block upto sample | ||
763 | */ | ||
764 | start = br->entries[0].to; | ||
765 | end = sample->ip; | ||
766 | len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); | ||
767 | print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); | ||
768 | if (len <= 0) { | ||
769 | /* Print at least last IP if basic block did not work */ | ||
770 | len = grab_bb(buffer, sample->ip, sample->ip, | ||
771 | machine, thread, &x.is64bit, &x.cpumode, false); | ||
772 | if (len <= 0) | ||
773 | return; | ||
774 | |||
775 | printf("\t%016" PRIx64 "\t%s\n", sample->ip, | ||
776 | dump_insn(&x, sample->ip, buffer, len, NULL)); | ||
777 | return; | ||
778 | } | ||
779 | for (off = 0; off <= end - start; off += ilen) { | ||
780 | printf("\t%016" PRIx64 "\t%s\n", start + off, | ||
781 | dump_insn(&x, start + off, buffer + off, len - off, &ilen)); | ||
782 | if (ilen == 0) | ||
783 | break; | ||
784 | } | ||
785 | } | ||
549 | 786 | ||
550 | static void print_sample_addr(struct perf_sample *sample, | 787 | static void print_sample_addr(struct perf_sample *sample, |
551 | struct thread *thread, | 788 | struct thread *thread, |
@@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample, | |||
632 | } | 869 | } |
633 | 870 | ||
634 | static void print_insn(struct perf_sample *sample, | 871 | static void print_insn(struct perf_sample *sample, |
635 | struct perf_event_attr *attr) | 872 | struct perf_event_attr *attr, |
873 | struct thread *thread, | ||
874 | struct machine *machine) | ||
636 | { | 875 | { |
637 | if (PRINT_FIELD(INSNLEN)) | 876 | if (PRINT_FIELD(INSNLEN)) |
638 | printf(" ilen: %d", sample->insn_len); | 877 | printf(" ilen: %d", sample->insn_len); |
@@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample, | |||
643 | for (i = 0; i < sample->insn_len; i++) | 882 | for (i = 0; i < sample->insn_len; i++) |
644 | printf(" %02x", (unsigned char)sample->insn[i]); | 883 | printf(" %02x", (unsigned char)sample->insn[i]); |
645 | } | 884 | } |
885 | if (PRINT_FIELD(BRSTACKINSN)) | ||
886 | print_sample_brstackinsn(sample, thread, attr, machine); | ||
646 | } | 887 | } |
647 | 888 | ||
648 | static void print_sample_bts(struct perf_sample *sample, | 889 | static void print_sample_bts(struct perf_sample *sample, |
649 | struct perf_evsel *evsel, | 890 | struct perf_evsel *evsel, |
650 | struct thread *thread, | 891 | struct thread *thread, |
651 | struct addr_location *al) | 892 | struct addr_location *al, |
893 | struct machine *machine) | ||
652 | { | 894 | { |
653 | struct perf_event_attr *attr = &evsel->attr; | 895 | struct perf_event_attr *attr = &evsel->attr; |
654 | bool print_srcline_last = false; | 896 | bool print_srcline_last = false; |
@@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample, | |||
689 | if (print_srcline_last) | 931 | if (print_srcline_last) |
690 | map__fprintf_srcline(al->map, al->addr, "\n ", stdout); | 932 | map__fprintf_srcline(al->map, al->addr, "\n ", stdout); |
691 | 933 | ||
692 | print_insn(sample, attr); | 934 | print_insn(sample, attr, thread, machine); |
693 | 935 | ||
694 | printf("\n"); | 936 | printf("\n"); |
695 | } | 937 | } |
@@ -872,7 +1114,8 @@ static size_t data_src__printf(u64 data_src) | |||
872 | 1114 | ||
873 | static void process_event(struct perf_script *script, | 1115 | static void process_event(struct perf_script *script, |
874 | struct perf_sample *sample, struct perf_evsel *evsel, | 1116 | struct perf_sample *sample, struct perf_evsel *evsel, |
875 | struct addr_location *al) | 1117 | struct addr_location *al, |
1118 | struct machine *machine) | ||
876 | { | 1119 | { |
877 | struct thread *thread = al->thread; | 1120 | struct thread *thread = al->thread; |
878 | struct perf_event_attr *attr = &evsel->attr; | 1121 | struct perf_event_attr *attr = &evsel->attr; |
@@ -899,7 +1142,7 @@ static void process_event(struct perf_script *script, | |||
899 | print_sample_flags(sample->flags); | 1142 | print_sample_flags(sample->flags); |
900 | 1143 | ||
901 | if (is_bts_event(attr)) { | 1144 | if (is_bts_event(attr)) { |
902 | print_sample_bts(sample, evsel, thread, al); | 1145 | print_sample_bts(sample, evsel, thread, al, machine); |
903 | return; | 1146 | return; |
904 | } | 1147 | } |
905 | 1148 | ||
@@ -937,7 +1180,7 @@ static void process_event(struct perf_script *script, | |||
937 | 1180 | ||
938 | if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) | 1181 | if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) |
939 | print_sample_bpf_output(sample); | 1182 | print_sample_bpf_output(sample); |
940 | print_insn(sample, attr); | 1183 | print_insn(sample, attr, thread, machine); |
941 | printf("\n"); | 1184 | printf("\n"); |
942 | } | 1185 | } |
943 | 1186 | ||
@@ -1047,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool, | |||
1047 | if (scripting_ops) | 1290 | if (scripting_ops) |
1048 | scripting_ops->process_event(event, sample, evsel, &al); | 1291 | scripting_ops->process_event(event, sample, evsel, &al); |
1049 | else | 1292 | else |
1050 | process_event(scr, sample, evsel, &al); | 1293 | process_event(scr, sample, evsel, &al, machine); |
1051 | 1294 | ||
1052 | out_put: | 1295 | out_put: |
1053 | addr_location__put(&al); | 1296 | addr_location__put(&al); |
@@ -2191,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2191 | "Valid types: hw,sw,trace,raw. " | 2434 | "Valid types: hw,sw,trace,raw. " |
2192 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," | 2435 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," |
2193 | "addr,symoff,period,iregs,brstack,brstacksym,flags," | 2436 | "addr,symoff,period,iregs,brstack,brstacksym,flags," |
2194 | "bpf-output,callindent,insn,insnlen", parse_output_fields), | 2437 | "bpf-output,callindent,insn,insnlen,brstackinsn", |
2438 | parse_output_fields), | ||
2195 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 2439 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
2196 | "system-wide collection from all CPUs"), | 2440 | "system-wide collection from all CPUs"), |
2197 | OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", | 2441 | OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", |
@@ -2222,6 +2466,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2222 | OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, | 2466 | OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, |
2223 | "Show namespace events (if recorded)"), | 2467 | "Show namespace events (if recorded)"), |
2224 | OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), | 2468 | OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), |
2469 | OPT_INTEGER(0, "max-blocks", &max_blocks, | ||
2470 | "Maximum number of code blocks to dump with brstackinsn"), | ||
2225 | OPT_BOOLEAN(0, "ns", &nanosecs, | 2471 | OPT_BOOLEAN(0, "ns", &nanosecs, |
2226 | "Use 9 decimal places when displaying time"), | 2472 | "Use 9 decimal places when displaying time"), |
2227 | OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", | 2473 | OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", |