aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-03-16 12:29:23 -0400
committerIngo Molnar <mingo@kernel.org>2017-03-16 12:29:23 -0400
commit61f63e383784bd0ab6529cfc95ddc59c713afcc9 (patch)
treeffb1dd2bf4cac157fcf560ffa722365c434fabbc /tools
parentee368428aac96d94a9804b9109a81355451c3cd9 (diff)
parent61f35d750683b21e9e3836e309195c79c1daed74 (diff)
Merge tag 'perf-core-for-mingo-4.12-20170316' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add 'brstackinsn' field in 'perf script' to reuse the x86 instruction decoder used in the Intel PT code to study hot paths to samples (Andi Kleen) Kernel changes: - Default UPROBES_EVENTS to Y (Alexei Starovoitov) - Fix check for kretprobe offset within function entry (Naveen N. Rao) Infrastructure changes: - Introduce util func is_sdt_event() (Ravi Bangoria) - Make perf_event__synthesize_mmap_events() scale on older kernels where reading /proc/pid/maps is way slower than reading /proc/pid/task/pid/maps (Stephane Eranian) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h5
-rw-r--r--tools/perf/Documentation/perf-script.txt13
-rw-r--r--tools/perf/builtin-script.c264
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/dump-insn.c14
-rw-r--r--tools/perf/util/dump-insn.h22
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c24
-rw-r--r--tools/perf/util/parse-events.h20
-rw-r--r--tools/perf/util/probe-event.c9
10 files changed, 352 insertions, 24 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 293149a1c6a1..4e7772387c6e 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -100,7 +100,7 @@
100#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ 100#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
101#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ 101#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
102#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ 102#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
103/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ 103#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
104#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ 104#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
105#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 105#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
106#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 106#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
@@ -186,7 +186,7 @@
186 * 186 *
187 * Reuse free bits when adding new feature flags! 187 * Reuse free bits when adding new feature flags!
188 */ 188 */
189 189#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
190#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ 190#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
191#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 191#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
192#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 192#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
@@ -321,5 +321,4 @@
321#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 321#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
322#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 322#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
323#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 323#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
324
325#endif /* _ASM_X86_CPUFEATURES_H */ 324#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 62c9b0c77a3a..cb0eda3925e6 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
116--fields:: 116--fields::
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
119 srcline, period, iregs, brstack, brstacksym, flags, bpf-output, 119 srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
120 callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, 120 callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
121 to indicate to which event type the field list applies. 121 to indicate to which event type the field list applies.
122 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace 122 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -189,15 +189,20 @@ OPTIONS
189 i.e., -F "" is not allowed. 189 i.e., -F "" is not allowed.
190 190
191 The brstack output includes branch related information with raw addresses using the 191 The brstack output includes branch related information with raw addresses using the
192 /v/v/v/v/ syntax in the following order: 192 /v/v/v/v/cycles syntax in the following order:
193 FROM: branch source instruction 193 FROM: branch source instruction
194 TO : branch target instruction 194 TO : branch target instruction
195 M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported 195 M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
196 X/- : X=branch inside a transactional region, -=not in transaction region or not supported 196 X/- : X=branch inside a transactional region, -=not in transaction region or not supported
197 A/- : A=TSX abort entry, -=not aborted region or not supported 197 A/- : A=TSX abort entry, -=not aborted region or not supported
198 cycles
198 199
199 The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible. 200 The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
200 201
202 When brstackinsn is specified the full assembler sequences of branch sequences for each sample
203 is printed. This is the full execution path leading to the sample. This is only supported when the
204 sample was recorded with perf record -b or -j any.
205
201-k:: 206-k::
202--vmlinux=<file>:: 207--vmlinux=<file>::
203 vmlinux pathname 208 vmlinux pathname
@@ -302,6 +307,10 @@ include::itrace.txt[]
302 stop time is not given (i.e, time string is 'x.y,') then analysis goes 307 stop time is not given (i.e, time string is 'x.y,') then analysis goes
303 to end of file. 308 to end of file.
304 309
310--max-blocks::
311 Set the maximum number of program blocks to print with brstackasm for
312 each sample.
313
305SEE ALSO 314SEE ALSO
306-------- 315--------
307linkperf:perf-record[1], linkperf:perf-script-perl[1], 316linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 66d62c98dff9..c98e16689b57 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -28,6 +28,7 @@
28#include <linux/time64.h> 28#include <linux/time64.h>
29#include "asm/bug.h" 29#include "asm/bug.h"
30#include "util/mem-events.h" 30#include "util/mem-events.h"
31#include "util/dump-insn.h"
31 32
32static char const *script_name; 33static char const *script_name;
33static char const *generate_script_lang; 34static char const *generate_script_lang;
@@ -42,6 +43,7 @@ static bool nanosecs;
42static const char *cpu_list; 43static const char *cpu_list;
43static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 44static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
44static struct perf_stat_config stat_config; 45static struct perf_stat_config stat_config;
46static int max_blocks;
45 47
46unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; 48unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
47 49
@@ -69,6 +71,7 @@ enum perf_output_field {
69 PERF_OUTPUT_CALLINDENT = 1U << 20, 71 PERF_OUTPUT_CALLINDENT = 1U << 20,
70 PERF_OUTPUT_INSN = 1U << 21, 72 PERF_OUTPUT_INSN = 1U << 21,
71 PERF_OUTPUT_INSNLEN = 1U << 22, 73 PERF_OUTPUT_INSNLEN = 1U << 22,
74 PERF_OUTPUT_BRSTACKINSN = 1U << 23,
72}; 75};
73 76
74struct output_option { 77struct output_option {
@@ -98,6 +101,7 @@ struct output_option {
98 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, 101 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
99 {.str = "insn", .field = PERF_OUTPUT_INSN}, 102 {.str = "insn", .field = PERF_OUTPUT_INSN},
100 {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, 103 {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
104 {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
101}; 105};
102 106
103/* default set to maintain compatibility with current format */ 107/* default set to maintain compatibility with current format */
@@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
292 "selected. Hence, no address to lookup the source line number.\n"); 296 "selected. Hence, no address to lookup the source line number.\n");
293 return -EINVAL; 297 return -EINVAL;
294 } 298 }
295 299 if (PRINT_FIELD(BRSTACKINSN) &&
300 !(perf_evlist__combined_branch_type(session->evlist) &
301 PERF_SAMPLE_BRANCH_ANY)) {
302 pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
303 "Hint: run 'perf record -b ...'\n");
304 return -EINVAL;
305 }
296 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && 306 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
297 perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", 307 perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
298 PERF_OUTPUT_TID|PERF_OUTPUT_PID)) 308 PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample,
546 } 556 }
547} 557}
548 558
559#define MAXBB 16384UL
560
561static int grab_bb(u8 *buffer, u64 start, u64 end,
562 struct machine *machine, struct thread *thread,
563 bool *is64bit, u8 *cpumode, bool last)
564{
565 long offset, len;
566 struct addr_location al;
567 bool kernel;
568
569 if (!start || !end)
570 return 0;
571
572 kernel = machine__kernel_ip(machine, start);
573 if (kernel)
574 *cpumode = PERF_RECORD_MISC_KERNEL;
575 else
576 *cpumode = PERF_RECORD_MISC_USER;
577
578 /*
579 * Block overlaps between kernel and user.
580 * This can happen due to ring filtering
581 * On Intel CPUs the entry into the kernel is filtered,
582 * but the exit is not. Let the caller patch it up.
583 */
584 if (kernel != machine__kernel_ip(machine, end)) {
585 printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n",
586 start, end);
587 return -ENXIO;
588 }
589
590 memset(&al, 0, sizeof(al));
591 if (end - start > MAXBB - MAXINSN) {
592 if (last)
593 printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
594 else
595 printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start);
596 return 0;
597 }
598
599 thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
600 if (!al.map || !al.map->dso) {
601 printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
602 return 0;
603 }
604 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
605 printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
606 return 0;
607 }
608
609 /* Load maps to ensure dso->is_64_bit has been updated */
610 map__load(al.map);
611
612 offset = al.map->map_ip(al.map, start);
613 len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer,
614 end - start + MAXINSN);
615
616 *is64bit = al.map->dso->is_64_bit;
617 if (len <= 0)
618 printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
619 start, end);
620 return len;
621}
622
623static void print_jump(uint64_t ip, struct branch_entry *en,
624 struct perf_insn *x, u8 *inbuf, int len,
625 int insn)
626{
627 printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s",
628 ip,
629 dump_insn(x, ip, inbuf, len, NULL),
630 en->flags.predicted ? " PRED" : "",
631 en->flags.mispred ? " MISPRED" : "",
632 en->flags.in_tx ? " INTX" : "",
633 en->flags.abort ? " ABORT" : "");
634 if (en->flags.cycles) {
635 printf(" %d cycles", en->flags.cycles);
636 if (insn)
637 printf(" %.2f IPC", (float)insn / en->flags.cycles);
638 }
639 putchar('\n');
640}
641
642static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu,
643 uint64_t addr, struct symbol **lastsym,
644 struct perf_event_attr *attr)
645{
646 struct addr_location al;
647 int off;
648
649 memset(&al, 0, sizeof(al));
650
651 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
652 if (!al.map)
653 thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
654 addr, &al);
655 if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
656 return;
657
658 al.cpu = cpu;
659 al.sym = NULL;
660 if (al.map)
661 al.sym = map__find_symbol(al.map, al.addr);
662
663 if (!al.sym)
664 return;
665
666 if (al.addr < al.sym->end)
667 off = al.addr - al.sym->start;
668 else
669 off = al.addr - al.map->start - al.sym->start;
670 printf("\t%s", al.sym->name);
671 if (off)
672 printf("%+d", off);
673 putchar(':');
674 if (PRINT_FIELD(SRCLINE))
675 map__fprintf_srcline(al.map, al.addr, "\t", stdout);
676 putchar('\n');
677 *lastsym = al.sym;
678}
679
680static void print_sample_brstackinsn(struct perf_sample *sample,
681 struct thread *thread,
682 struct perf_event_attr *attr,
683 struct machine *machine)
684{
685 struct branch_stack *br = sample->branch_stack;
686 u64 start, end;
687 int i, insn, len, nr, ilen;
688 struct perf_insn x;
689 u8 buffer[MAXBB];
690 unsigned off;
691 struct symbol *lastsym = NULL;
692
693 if (!(br && br->nr))
694 return;
695 nr = br->nr;
696 if (max_blocks && nr > max_blocks + 1)
697 nr = max_blocks + 1;
698
699 x.thread = thread;
700 x.cpu = sample->cpu;
701
702 putchar('\n');
703
704 /* Handle first from jump, of which we don't know the entry. */
705 len = grab_bb(buffer, br->entries[nr-1].from,
706 br->entries[nr-1].from,
707 machine, thread, &x.is64bit, &x.cpumode, false);
708 if (len > 0) {
709 print_ip_sym(thread, x.cpumode, x.cpu,
710 br->entries[nr - 1].from, &lastsym, attr);
711 print_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
712 &x, buffer, len, 0);
713 }
714
715 /* Print all blocks */
716 for (i = nr - 2; i >= 0; i--) {
717 if (br->entries[i].from || br->entries[i].to)
718 pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
719 br->entries[i].from,
720 br->entries[i].to);
721 start = br->entries[i + 1].to;
722 end = br->entries[i].from;
723
724 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
725 /* Patch up missing kernel transfers due to ring filters */
726 if (len == -ENXIO && i > 0) {
727 end = br->entries[--i].from;
728 pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
729 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
730 }
731 if (len <= 0)
732 continue;
733
734 insn = 0;
735 for (off = 0;; off += ilen) {
736 uint64_t ip = start + off;
737
738 print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr);
739 if (ip == end) {
740 print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn);
741 break;
742 } else {
743 printf("\t%016" PRIx64 "\t%s\n", ip,
744 dump_insn(&x, ip, buffer + off, len - off, &ilen));
745 if (ilen == 0)
746 break;
747 insn++;
748 }
749 }
750 }
751
752 /*
753 * Hit the branch? In this case we are already done, and the target
754 * has not been executed yet.
755 */
756 if (br->entries[0].from == sample->ip)
757 return;
758 if (br->entries[0].flags.abort)
759 return;
760
761 /*
762 * Print final block upto sample
763 */
764 start = br->entries[0].to;
765 end = sample->ip;
766 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
767 print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr);
768 if (len <= 0) {
769 /* Print at least last IP if basic block did not work */
770 len = grab_bb(buffer, sample->ip, sample->ip,
771 machine, thread, &x.is64bit, &x.cpumode, false);
772 if (len <= 0)
773 return;
774
775 printf("\t%016" PRIx64 "\t%s\n", sample->ip,
776 dump_insn(&x, sample->ip, buffer, len, NULL));
777 return;
778 }
779 for (off = 0; off <= end - start; off += ilen) {
780 printf("\t%016" PRIx64 "\t%s\n", start + off,
781 dump_insn(&x, start + off, buffer + off, len - off, &ilen));
782 if (ilen == 0)
783 break;
784 }
785}
549 786
550static void print_sample_addr(struct perf_sample *sample, 787static void print_sample_addr(struct perf_sample *sample,
551 struct thread *thread, 788 struct thread *thread,
@@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample,
632} 869}
633 870
634static void print_insn(struct perf_sample *sample, 871static void print_insn(struct perf_sample *sample,
635 struct perf_event_attr *attr) 872 struct perf_event_attr *attr,
873 struct thread *thread,
874 struct machine *machine)
636{ 875{
637 if (PRINT_FIELD(INSNLEN)) 876 if (PRINT_FIELD(INSNLEN))
638 printf(" ilen: %d", sample->insn_len); 877 printf(" ilen: %d", sample->insn_len);
@@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample,
643 for (i = 0; i < sample->insn_len; i++) 882 for (i = 0; i < sample->insn_len; i++)
644 printf(" %02x", (unsigned char)sample->insn[i]); 883 printf(" %02x", (unsigned char)sample->insn[i]);
645 } 884 }
885 if (PRINT_FIELD(BRSTACKINSN))
886 print_sample_brstackinsn(sample, thread, attr, machine);
646} 887}
647 888
648static void print_sample_bts(struct perf_sample *sample, 889static void print_sample_bts(struct perf_sample *sample,
649 struct perf_evsel *evsel, 890 struct perf_evsel *evsel,
650 struct thread *thread, 891 struct thread *thread,
651 struct addr_location *al) 892 struct addr_location *al,
893 struct machine *machine)
652{ 894{
653 struct perf_event_attr *attr = &evsel->attr; 895 struct perf_event_attr *attr = &evsel->attr;
654 bool print_srcline_last = false; 896 bool print_srcline_last = false;
@@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample,
689 if (print_srcline_last) 931 if (print_srcline_last)
690 map__fprintf_srcline(al->map, al->addr, "\n ", stdout); 932 map__fprintf_srcline(al->map, al->addr, "\n ", stdout);
691 933
692 print_insn(sample, attr); 934 print_insn(sample, attr, thread, machine);
693 935
694 printf("\n"); 936 printf("\n");
695} 937}
@@ -872,7 +1114,8 @@ static size_t data_src__printf(u64 data_src)
872 1114
873static void process_event(struct perf_script *script, 1115static void process_event(struct perf_script *script,
874 struct perf_sample *sample, struct perf_evsel *evsel, 1116 struct perf_sample *sample, struct perf_evsel *evsel,
875 struct addr_location *al) 1117 struct addr_location *al,
1118 struct machine *machine)
876{ 1119{
877 struct thread *thread = al->thread; 1120 struct thread *thread = al->thread;
878 struct perf_event_attr *attr = &evsel->attr; 1121 struct perf_event_attr *attr = &evsel->attr;
@@ -899,7 +1142,7 @@ static void process_event(struct perf_script *script,
899 print_sample_flags(sample->flags); 1142 print_sample_flags(sample->flags);
900 1143
901 if (is_bts_event(attr)) { 1144 if (is_bts_event(attr)) {
902 print_sample_bts(sample, evsel, thread, al); 1145 print_sample_bts(sample, evsel, thread, al, machine);
903 return; 1146 return;
904 } 1147 }
905 1148
@@ -937,7 +1180,7 @@ static void process_event(struct perf_script *script,
937 1180
938 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) 1181 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
939 print_sample_bpf_output(sample); 1182 print_sample_bpf_output(sample);
940 print_insn(sample, attr); 1183 print_insn(sample, attr, thread, machine);
941 printf("\n"); 1184 printf("\n");
942} 1185}
943 1186
@@ -1047,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool,
1047 if (scripting_ops) 1290 if (scripting_ops)
1048 scripting_ops->process_event(event, sample, evsel, &al); 1291 scripting_ops->process_event(event, sample, evsel, &al);
1049 else 1292 else
1050 process_event(scr, sample, evsel, &al); 1293 process_event(scr, sample, evsel, &al, machine);
1051 1294
1052out_put: 1295out_put:
1053 addr_location__put(&al); 1296 addr_location__put(&al);
@@ -2191,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2191 "Valid types: hw,sw,trace,raw. " 2434 "Valid types: hw,sw,trace,raw. "
2192 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2435 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
2193 "addr,symoff,period,iregs,brstack,brstacksym,flags," 2436 "addr,symoff,period,iregs,brstack,brstacksym,flags,"
2194 "bpf-output,callindent,insn,insnlen", parse_output_fields), 2437 "bpf-output,callindent,insn,insnlen,brstackinsn",
2438 parse_output_fields),
2195 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2439 OPT_BOOLEAN('a', "all-cpus", &system_wide,
2196 "system-wide collection from all CPUs"), 2440 "system-wide collection from all CPUs"),
2197 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 2441 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -2222,6 +2466,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2222 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, 2466 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
2223 "Show namespace events (if recorded)"), 2467 "Show namespace events (if recorded)"),
2224 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), 2468 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
2469 OPT_INTEGER(0, "max-blocks", &max_blocks,
2470 "Maximum number of code blocks to dump with brstackinsn"),
2225 OPT_BOOLEAN(0, "ns", &nanosecs, 2471 OPT_BOOLEAN(0, "ns", &nanosecs,
2226 "Use 9 decimal places when displaying time"), 2472 "Use 9 decimal places when displaying time"),
2227 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", 2473 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2ea5ee179a3b..fb4f42f1bb38 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -82,6 +82,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
82libperf-$(CONFIG_AUXTRACE) += intel-pt.o 82libperf-$(CONFIG_AUXTRACE) += intel-pt.o
83libperf-$(CONFIG_AUXTRACE) += intel-bts.o 83libperf-$(CONFIG_AUXTRACE) += intel-bts.o
84libperf-y += parse-branch-options.o 84libperf-y += parse-branch-options.o
85libperf-y += dump-insn.o
85libperf-y += parse-regs-options.o 86libperf-y += parse-regs-options.o
86libperf-y += term.o 87libperf-y += term.o
87libperf-y += help-unknown-cmd.o 88libperf-y += help-unknown-cmd.o
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
new file mode 100644
index 000000000000..ffbdb19f05d0
--- /dev/null
+++ b/tools/perf/util/dump-insn.c
@@ -0,0 +1,14 @@
1#include <linux/compiler.h>
2#include "dump-insn.h"
3
4/* Fallback code */
5
6__weak
7const char *dump_insn(struct perf_insn *x __maybe_unused,
8 u64 ip __maybe_unused, u8 *inbuf __maybe_unused,
9 int inlen __maybe_unused, int *lenp)
10{
11 if (lenp)
12 *lenp = 0;
13 return "?";
14}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
new file mode 100644
index 000000000000..90fb115981cf
--- /dev/null
+++ b/tools/perf/util/dump-insn.h
@@ -0,0 +1,22 @@
1#ifndef __PERF_DUMP_INSN_H
2#define __PERF_DUMP_INSN_H 1
3
4#define MAXINSN 15
5
6#include <linux/types.h>
7
8struct thread;
9
10struct perf_insn {
11 /* Initialized by callers: */
12 struct thread *thread;
13 u8 cpumode;
14 bool is64bit;
15 int cpu;
16 /* Temporary */
17 char out[256];
18};
19
20const char *dump_insn(struct perf_insn *x, u64 ip,
21 u8 *inbuf, int inlen, int *lenp);
22#endif
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index d082cb70445d..33fc2e9c0b0c 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -325,8 +325,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
325 if (machine__is_default_guest(machine)) 325 if (machine__is_default_guest(machine))
326 return 0; 326 return 0;
327 327
328 snprintf(filename, sizeof(filename), "%s/proc/%d/maps", 328 snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
329 machine->root_dir, pid); 329 machine->root_dir, pid, pid);
330 330
331 fp = fopen(filename, "r"); 331 fp = fopen(filename, "r");
332 if (fp == NULL) { 332 if (fp == NULL) {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 459352a9978f..54818828023b 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -26,6 +26,7 @@
26#include "insn.c" 26#include "insn.c"
27 27
28#include "intel-pt-insn-decoder.h" 28#include "intel-pt-insn-decoder.h"
29#include "dump-insn.h"
29 30
30#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN 31#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
31#error Instruction buffer size too small 32#error Instruction buffer size too small
@@ -179,6 +180,29 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
179 return 0; 180 return 0;
180} 181}
181 182
183const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
184 u8 *inbuf, int inlen, int *lenp)
185{
186 struct insn insn;
187 int n, i;
188 int left;
189
190 insn_init(&insn, inbuf, inlen, x->is64bit);
191 insn_get_length(&insn);
192 if (!insn_complete(&insn) || insn.length > inlen)
193 return "<bad>";
194 if (lenp)
195 *lenp = insn.length;
196 left = sizeof(x->out);
197 n = snprintf(x->out, left, "insn: ");
198 left -= n;
199 for (i = 0; i < insn.length; i++) {
200 n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
201 left -= n;
202 }
203 return x->out;
204}
205
182const char *branch_name[] = { 206const char *branch_name[] = {
183 [INTEL_PT_OP_OTHER] = "Other", 207 [INTEL_PT_OP_OTHER] = "Other",
184 [INTEL_PT_OP_CALL] = "Call", 208 [INTEL_PT_OP_CALL] = "Call",
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1af6a267c21b..8c72b0ff7fcb 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -8,6 +8,7 @@
8#include <stdbool.h> 8#include <stdbool.h>
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/perf_event.h> 10#include <linux/perf_event.h>
11#include <string.h>
11 12
12struct list_head; 13struct list_head;
13struct perf_evsel; 14struct perf_evsel;
@@ -196,4 +197,23 @@ int is_valid_tracepoint(const char *event_string);
196int valid_event_mount(const char *eventfs); 197int valid_event_mount(const char *eventfs);
197char *parse_events_formats_error_string(char *additional_terms); 198char *parse_events_formats_error_string(char *additional_terms);
198 199
200#ifdef HAVE_LIBELF_SUPPORT
201/*
202 * If the probe point starts with '%',
203 * or starts with "sdt_" and has a ':' but no '=',
204 * then it should be a SDT/cached probe point.
205 */
206static inline bool is_sdt_event(char *str)
207{
208 return (str[0] == '%' ||
209 (!strncmp(str, "sdt_", 4) &&
210 !!strchr(str, ':') && !strchr(str, '=')));
211}
212#else
213static inline bool is_sdt_event(char *str __maybe_unused)
214{
215 return false;
216}
217#endif /* HAVE_LIBELF_SUPPORT */
218
199#endif /* __PERF_PARSE_EVENTS_H */ 219#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index c9bdc9ded0c3..b19d17801beb 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1341,14 +1341,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
1341 if (!arg) 1341 if (!arg)
1342 return -EINVAL; 1342 return -EINVAL;
1343 1343
1344 /* 1344 if (is_sdt_event(arg)) {
1345 * If the probe point starts with '%',
1346 * or starts with "sdt_" and has a ':' but no '=',
1347 * then it should be a SDT/cached probe point.
1348 */
1349 if (arg[0] == '%' ||
1350 (!strncmp(arg, "sdt_", 4) &&
1351 !!strchr(arg, ':') && !strchr(arg, '='))) {
1352 pev->sdt = true; 1345 pev->sdt = true;
1353 if (arg[0] == '%') 1346 if (arg[0] == '%')
1354 arg++; 1347 arg++;