aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-script.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-03-16 12:29:23 -0400
committerIngo Molnar <mingo@kernel.org>2017-03-16 12:29:23 -0400
commit61f63e383784bd0ab6529cfc95ddc59c713afcc9 (patch)
treeffb1dd2bf4cac157fcf560ffa722365c434fabbc /tools/perf/builtin-script.c
parentee368428aac96d94a9804b9109a81355451c3cd9 (diff)
parent61f35d750683b21e9e3836e309195c79c1daed74 (diff)
Merge tag 'perf-core-for-mingo-4.12-20170316' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add 'brstackinsn' field in 'perf script' to reuse the x86 instruction decoder used in the Intel PT code to study hot paths to samples (Andi Kleen) Kernel changes: - Default UPROBES_EVENTS to Y (Alexei Starovoitov) - Fix check for kretprobe offset within function entry (Naveen N. Rao) Infrastructure changes: - Introduce util func is_sdt_event() (Ravi Bangoria) - Make perf_event__synthesize_mmap_events() scale on older kernels where reading /proc/pid/maps is way slower than reading /proc/pid/task/pid/maps (Stephane Eranian) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-script.c')
-rw-r--r--tools/perf/builtin-script.c264
1 files changed, 255 insertions, 9 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 66d62c98dff9..c98e16689b57 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -28,6 +28,7 @@
28#include <linux/time64.h> 28#include <linux/time64.h>
29#include "asm/bug.h" 29#include "asm/bug.h"
30#include "util/mem-events.h" 30#include "util/mem-events.h"
31#include "util/dump-insn.h"
31 32
32static char const *script_name; 33static char const *script_name;
33static char const *generate_script_lang; 34static char const *generate_script_lang;
@@ -42,6 +43,7 @@ static bool nanosecs;
42static const char *cpu_list; 43static const char *cpu_list;
43static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 44static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
44static struct perf_stat_config stat_config; 45static struct perf_stat_config stat_config;
46static int max_blocks;
45 47
46unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; 48unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
47 49
@@ -69,6 +71,7 @@ enum perf_output_field {
69 PERF_OUTPUT_CALLINDENT = 1U << 20, 71 PERF_OUTPUT_CALLINDENT = 1U << 20,
70 PERF_OUTPUT_INSN = 1U << 21, 72 PERF_OUTPUT_INSN = 1U << 21,
71 PERF_OUTPUT_INSNLEN = 1U << 22, 73 PERF_OUTPUT_INSNLEN = 1U << 22,
74 PERF_OUTPUT_BRSTACKINSN = 1U << 23,
72}; 75};
73 76
74struct output_option { 77struct output_option {
@@ -98,6 +101,7 @@ struct output_option {
98 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, 101 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
99 {.str = "insn", .field = PERF_OUTPUT_INSN}, 102 {.str = "insn", .field = PERF_OUTPUT_INSN},
100 {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, 103 {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
104 {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
101}; 105};
102 106
103/* default set to maintain compatibility with current format */ 107/* default set to maintain compatibility with current format */
@@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
292 "selected. Hence, no address to lookup the source line number.\n"); 296 "selected. Hence, no address to lookup the source line number.\n");
293 return -EINVAL; 297 return -EINVAL;
294 } 298 }
295 299 if (PRINT_FIELD(BRSTACKINSN) &&
300 !(perf_evlist__combined_branch_type(session->evlist) &
301 PERF_SAMPLE_BRANCH_ANY)) {
302 pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
303 "Hint: run 'perf record -b ...'\n");
304 return -EINVAL;
305 }
296 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && 306 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
297 perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", 307 perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
298 PERF_OUTPUT_TID|PERF_OUTPUT_PID)) 308 PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample,
546 } 556 }
547} 557}
548 558
559#define MAXBB 16384UL
560
561static int grab_bb(u8 *buffer, u64 start, u64 end,
562 struct machine *machine, struct thread *thread,
563 bool *is64bit, u8 *cpumode, bool last)
564{
565 long offset, len;
566 struct addr_location al;
567 bool kernel;
568
569 if (!start || !end)
570 return 0;
571
572 kernel = machine__kernel_ip(machine, start);
573 if (kernel)
574 *cpumode = PERF_RECORD_MISC_KERNEL;
575 else
576 *cpumode = PERF_RECORD_MISC_USER;
577
578 /*
579 * Block overlaps between kernel and user.
580 * This can happen due to ring filtering
581 * On Intel CPUs the entry into the kernel is filtered,
582 * but the exit is not. Let the caller patch it up.
583 */
584 if (kernel != machine__kernel_ip(machine, end)) {
585 printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n",
586 start, end);
587 return -ENXIO;
588 }
589
590 memset(&al, 0, sizeof(al));
591 if (end - start > MAXBB - MAXINSN) {
592 if (last)
593 printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
594 else
595 printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start);
596 return 0;
597 }
598
599 thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
600 if (!al.map || !al.map->dso) {
601 printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
602 return 0;
603 }
604 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
605 printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
606 return 0;
607 }
608
609 /* Load maps to ensure dso->is_64_bit has been updated */
610 map__load(al.map);
611
612 offset = al.map->map_ip(al.map, start);
613 len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer,
614 end - start + MAXINSN);
615
616 *is64bit = al.map->dso->is_64_bit;
617 if (len <= 0)
618 printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
619 start, end);
620 return len;
621}
622
623static void print_jump(uint64_t ip, struct branch_entry *en,
624 struct perf_insn *x, u8 *inbuf, int len,
625 int insn)
626{
627 printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s",
628 ip,
629 dump_insn(x, ip, inbuf, len, NULL),
630 en->flags.predicted ? " PRED" : "",
631 en->flags.mispred ? " MISPRED" : "",
632 en->flags.in_tx ? " INTX" : "",
633 en->flags.abort ? " ABORT" : "");
634 if (en->flags.cycles) {
635 printf(" %d cycles", en->flags.cycles);
636 if (insn)
637 printf(" %.2f IPC", (float)insn / en->flags.cycles);
638 }
639 putchar('\n');
640}
641
642static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu,
643 uint64_t addr, struct symbol **lastsym,
644 struct perf_event_attr *attr)
645{
646 struct addr_location al;
647 int off;
648
649 memset(&al, 0, sizeof(al));
650
651 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
652 if (!al.map)
653 thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
654 addr, &al);
655 if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
656 return;
657
658 al.cpu = cpu;
659 al.sym = NULL;
660 if (al.map)
661 al.sym = map__find_symbol(al.map, al.addr);
662
663 if (!al.sym)
664 return;
665
666 if (al.addr < al.sym->end)
667 off = al.addr - al.sym->start;
668 else
669 off = al.addr - al.map->start - al.sym->start;
670 printf("\t%s", al.sym->name);
671 if (off)
672 printf("%+d", off);
673 putchar(':');
674 if (PRINT_FIELD(SRCLINE))
675 map__fprintf_srcline(al.map, al.addr, "\t", stdout);
676 putchar('\n');
677 *lastsym = al.sym;
678}
679
680static void print_sample_brstackinsn(struct perf_sample *sample,
681 struct thread *thread,
682 struct perf_event_attr *attr,
683 struct machine *machine)
684{
685 struct branch_stack *br = sample->branch_stack;
686 u64 start, end;
687 int i, insn, len, nr, ilen;
688 struct perf_insn x;
689 u8 buffer[MAXBB];
690 unsigned off;
691 struct symbol *lastsym = NULL;
692
693 if (!(br && br->nr))
694 return;
695 nr = br->nr;
696 if (max_blocks && nr > max_blocks + 1)
697 nr = max_blocks + 1;
698
699 x.thread = thread;
700 x.cpu = sample->cpu;
701
702 putchar('\n');
703
704 /* Handle first from jump, of which we don't know the entry. */
705 len = grab_bb(buffer, br->entries[nr-1].from,
706 br->entries[nr-1].from,
707 machine, thread, &x.is64bit, &x.cpumode, false);
708 if (len > 0) {
709 print_ip_sym(thread, x.cpumode, x.cpu,
710 br->entries[nr - 1].from, &lastsym, attr);
711 print_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
712 &x, buffer, len, 0);
713 }
714
715 /* Print all blocks */
716 for (i = nr - 2; i >= 0; i--) {
717 if (br->entries[i].from || br->entries[i].to)
718 pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
719 br->entries[i].from,
720 br->entries[i].to);
721 start = br->entries[i + 1].to;
722 end = br->entries[i].from;
723
724 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
725 /* Patch up missing kernel transfers due to ring filters */
726 if (len == -ENXIO && i > 0) {
727 end = br->entries[--i].from;
728 pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
729 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
730 }
731 if (len <= 0)
732 continue;
733
734 insn = 0;
735 for (off = 0;; off += ilen) {
736 uint64_t ip = start + off;
737
738 print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr);
739 if (ip == end) {
740 print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn);
741 break;
742 } else {
743 printf("\t%016" PRIx64 "\t%s\n", ip,
744 dump_insn(&x, ip, buffer + off, len - off, &ilen));
745 if (ilen == 0)
746 break;
747 insn++;
748 }
749 }
750 }
751
752 /*
753 * Hit the branch? In this case we are already done, and the target
754 * has not been executed yet.
755 */
756 if (br->entries[0].from == sample->ip)
757 return;
758 if (br->entries[0].flags.abort)
759 return;
760
761 /*
762 * Print final block upto sample
763 */
764 start = br->entries[0].to;
765 end = sample->ip;
766 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
767 print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr);
768 if (len <= 0) {
769 /* Print at least last IP if basic block did not work */
770 len = grab_bb(buffer, sample->ip, sample->ip,
771 machine, thread, &x.is64bit, &x.cpumode, false);
772 if (len <= 0)
773 return;
774
775 printf("\t%016" PRIx64 "\t%s\n", sample->ip,
776 dump_insn(&x, sample->ip, buffer, len, NULL));
777 return;
778 }
779 for (off = 0; off <= end - start; off += ilen) {
780 printf("\t%016" PRIx64 "\t%s\n", start + off,
781 dump_insn(&x, start + off, buffer + off, len - off, &ilen));
782 if (ilen == 0)
783 break;
784 }
785}
549 786
550static void print_sample_addr(struct perf_sample *sample, 787static void print_sample_addr(struct perf_sample *sample,
551 struct thread *thread, 788 struct thread *thread,
@@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample,
632} 869}
633 870
634static void print_insn(struct perf_sample *sample, 871static void print_insn(struct perf_sample *sample,
635 struct perf_event_attr *attr) 872 struct perf_event_attr *attr,
873 struct thread *thread,
874 struct machine *machine)
636{ 875{
637 if (PRINT_FIELD(INSNLEN)) 876 if (PRINT_FIELD(INSNLEN))
638 printf(" ilen: %d", sample->insn_len); 877 printf(" ilen: %d", sample->insn_len);
@@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample,
643 for (i = 0; i < sample->insn_len; i++) 882 for (i = 0; i < sample->insn_len; i++)
644 printf(" %02x", (unsigned char)sample->insn[i]); 883 printf(" %02x", (unsigned char)sample->insn[i]);
645 } 884 }
885 if (PRINT_FIELD(BRSTACKINSN))
886 print_sample_brstackinsn(sample, thread, attr, machine);
646} 887}
647 888
648static void print_sample_bts(struct perf_sample *sample, 889static void print_sample_bts(struct perf_sample *sample,
649 struct perf_evsel *evsel, 890 struct perf_evsel *evsel,
650 struct thread *thread, 891 struct thread *thread,
651 struct addr_location *al) 892 struct addr_location *al,
893 struct machine *machine)
652{ 894{
653 struct perf_event_attr *attr = &evsel->attr; 895 struct perf_event_attr *attr = &evsel->attr;
654 bool print_srcline_last = false; 896 bool print_srcline_last = false;
@@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample,
689 if (print_srcline_last) 931 if (print_srcline_last)
690 map__fprintf_srcline(al->map, al->addr, "\n ", stdout); 932 map__fprintf_srcline(al->map, al->addr, "\n ", stdout);
691 933
692 print_insn(sample, attr); 934 print_insn(sample, attr, thread, machine);
693 935
694 printf("\n"); 936 printf("\n");
695} 937}
@@ -872,7 +1114,8 @@ static size_t data_src__printf(u64 data_src)
872 1114
873static void process_event(struct perf_script *script, 1115static void process_event(struct perf_script *script,
874 struct perf_sample *sample, struct perf_evsel *evsel, 1116 struct perf_sample *sample, struct perf_evsel *evsel,
875 struct addr_location *al) 1117 struct addr_location *al,
1118 struct machine *machine)
876{ 1119{
877 struct thread *thread = al->thread; 1120 struct thread *thread = al->thread;
878 struct perf_event_attr *attr = &evsel->attr; 1121 struct perf_event_attr *attr = &evsel->attr;
@@ -899,7 +1142,7 @@ static void process_event(struct perf_script *script,
899 print_sample_flags(sample->flags); 1142 print_sample_flags(sample->flags);
900 1143
901 if (is_bts_event(attr)) { 1144 if (is_bts_event(attr)) {
902 print_sample_bts(sample, evsel, thread, al); 1145 print_sample_bts(sample, evsel, thread, al, machine);
903 return; 1146 return;
904 } 1147 }
905 1148
@@ -937,7 +1180,7 @@ static void process_event(struct perf_script *script,
937 1180
938 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) 1181 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
939 print_sample_bpf_output(sample); 1182 print_sample_bpf_output(sample);
940 print_insn(sample, attr); 1183 print_insn(sample, attr, thread, machine);
941 printf("\n"); 1184 printf("\n");
942} 1185}
943 1186
@@ -1047,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool,
1047 if (scripting_ops) 1290 if (scripting_ops)
1048 scripting_ops->process_event(event, sample, evsel, &al); 1291 scripting_ops->process_event(event, sample, evsel, &al);
1049 else 1292 else
1050 process_event(scr, sample, evsel, &al); 1293 process_event(scr, sample, evsel, &al, machine);
1051 1294
1052out_put: 1295out_put:
1053 addr_location__put(&al); 1296 addr_location__put(&al);
@@ -2191,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2191 "Valid types: hw,sw,trace,raw. " 2434 "Valid types: hw,sw,trace,raw. "
2192 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2435 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
2193 "addr,symoff,period,iregs,brstack,brstacksym,flags," 2436 "addr,symoff,period,iregs,brstack,brstacksym,flags,"
2194 "bpf-output,callindent,insn,insnlen", parse_output_fields), 2437 "bpf-output,callindent,insn,insnlen,brstackinsn",
2438 parse_output_fields),
2195 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2439 OPT_BOOLEAN('a', "all-cpus", &system_wide,
2196 "system-wide collection from all CPUs"), 2440 "system-wide collection from all CPUs"),
2197 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 2441 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -2222,6 +2466,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2222 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, 2466 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
2223 "Show namespace events (if recorded)"), 2467 "Show namespace events (if recorded)"),
2224 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), 2468 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
2469 OPT_INTEGER(0, "max-blocks", &max_blocks,
2470 "Maximum number of code blocks to dump with brstackinsn"),
2225 OPT_BOOLEAN(0, "ns", &nanosecs, 2471 OPT_BOOLEAN(0, "ns", &nanosecs,
2226 "Use 9 decimal places when displaying time"), 2472 "Use 9 decimal places when displaying time"),
2227 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", 2473 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",