aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/kprobetrace.txt149
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/Makefile3
-rw-r--r--arch/x86/include/asm/inat.h220
-rw-r--r--arch/x86/include/asm/inat_types.h29
-rw-r--r--arch/x86/include/asm/insn.h184
-rw-r--r--arch/x86/include/asm/ptrace.h62
-rw-r--r--arch/x86/kernel/entry_32.S24
-rw-r--r--arch/x86/kernel/entry_64.S8
-rw-r--r--arch/x86/kernel/kprobes.c234
-rw-r--r--arch/x86/kernel/ptrace.c112
-rw-r--r--arch/x86/lib/.gitignore1
-rw-r--r--arch/x86/lib/Makefile13
-rw-r--r--arch/x86/lib/inat.c90
-rw-r--r--arch/x86/lib/insn.c516
-rw-r--r--arch/x86/lib/x86-opcode-map.txt893
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/tools/Makefile22
-rw-r--r--arch/x86/tools/distill.awk47
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk380
-rw-r--r--arch/x86/tools/test_get_len.c168
-rw-r--r--include/linux/ftrace_event.h32
-rw-r--r--include/linux/kprobes.h2
-rw-r--r--include/linux/syscalls.h12
-rw-r--r--include/trace/ftrace.h59
-rw-r--r--include/trace/syscall.h11
-rw-r--r--kernel/kprobes.c68
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/trace/Kconfig17
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h23
-rw-r--r--kernel/trace/trace_event_profile.c45
-rw-r--r--kernel/trace/trace_events.c144
-rw-r--r--kernel/trace/trace_export.c14
-rw-r--r--kernel/trace/trace_kprobe.c1513
-rw-r--r--kernel/trace/trace_syscalls.c60
-rw-r--r--tools/perf/Documentation/perf-probe.txt49
-rw-r--r--tools/perf/Makefile15
-rw-r--r--tools/perf/builtin-probe.c435
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/perf.c1
-rw-r--r--tools/perf/util/probe-finder.c732
-rw-r--r--tools/perf/util/probe-finder.h57
-rw-r--r--tools/perf/util/util.h9
45 files changed, 6190 insertions, 288 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
new file mode 100644
index 000000000000..47aabeebbdf6
--- /dev/null
+++ b/Documentation/trace/kprobetrace.txt
@@ -0,0 +1,149 @@
1 Kprobe-based Event Tracing
2 ==========================
3
4 Documentation is written by Masami Hiramatsu
5
6
7Overview
8--------
9These events are similar to tracepoint based events. Instead of Tracepoint,
10this is based on kprobes (kprobe and kretprobe). So it can probe wherever
11kprobes can probe (this means, all functions body except for __kprobes
12functions). Unlike the Tracepoint based event, this can be added and removed
13dynamically, on the fly.
14
15To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y.
16
17Similar to the events tracer, this doesn't need to be activated via
18current_tracer. Instead of that, add probe points via
19/sys/kernel/debug/tracing/kprobe_events, and enable it via
20/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled.
21
22
23Synopsis of kprobe_events
24-------------------------
25 p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
26 r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
27
28 GRP : Group name. If omitted, use "kprobes" for it.
29 EVENT : Event name. If omitted, the event name is generated
30 based on SYMBOL+offs or MEMADDR.
31 SYMBOL[+offs] : Symbol+offset where the probe is inserted.
32 MEMADDR : Address where the probe is inserted.
33
34 FETCHARGS : Arguments. Each probe can have up to 128 args.
35 %REG : Fetch register REG
36 @ADDR : Fetch memory at ADDR (ADDR should be in kernel)
37 @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
38 $stackN : Fetch Nth entry of stack (N >= 0)
39 $stack : Fetch stack address.
40 $argN : Fetch function argument. (N >= 0)(*)
41 $retval : Fetch return value.(**)
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
43 NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
44
45 (*) aN may not correct on asmlinkaged functions and at the middle of
46 function body.
47 (**) only for return probe.
48 (***) this is useful for fetching a field of data structures.
49
50
51Per-Probe Event Filtering
52-------------------------
53 Per-probe event filtering feature allows you to set different filter on each
54probe and gives you what arguments will be shown in trace buffer. If an event
55name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event
56under tracing/events/kprobes/<EVENT>, at the directory you can see 'id',
57'enabled', 'format' and 'filter'.
58
59enabled:
60 You can enable/disable the probe by writing 1 or 0 on it.
61
62format:
63 This shows the format of this probe event.
64
65filter:
66 You can write filtering rules of this event.
67
68id:
69 This shows the id of this probe event.
70
71
72Event Profiling
73---------------
74 You can check the total number of probe hits and probe miss-hits via
75/sys/kernel/debug/tracing/kprobe_profile.
76 The first column is event name, the second is the number of probe hits,
77the third is the number of probe miss-hits.
78
79
80Usage examples
81--------------
82To add a probe as a new event, write a new definition to kprobe_events
83as below.
84
85 echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
86
87 This sets a kprobe on the top of do_sys_open() function with recording
881st to 4th arguments as "myprobe" event. As this example shows, users can
89choose more familiar names for each arguments.
90
91 echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
92
93 This sets a kretprobe on the return point of do_sys_open() function with
94recording return value as "myretprobe" event.
95 You can see the format of these events via
96/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format.
97
98 cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
99name: myprobe
100ID: 75
101format:
102 field:unsigned short common_type; offset:0; size:2;
103 field:unsigned char common_flags; offset:2; size:1;
104 field:unsigned char common_preempt_count; offset:3; size:1;
105 field:int common_pid; offset:4; size:4;
106 field:int common_tgid; offset:8; size:4;
107
108 field: unsigned long ip; offset:16;tsize:8;
109 field: int nargs; offset:24;tsize:4;
110 field: unsigned long dfd; offset:32;tsize:8;
111 field: unsigned long filename; offset:40;tsize:8;
112 field: unsigned long flags; offset:48;tsize:8;
113 field: unsigned long mode; offset:56;tsize:8;
114
115print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
116
117
118 You can see that the event has 4 arguments as in the expressions you specified.
119
120 echo > /sys/kernel/debug/tracing/kprobe_events
121
122 This clears all probe points.
123
124 Right after definition, each event is disabled by default. For tracing these
125events, you need to enable it.
126
127 echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
128 echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
129
130 And you can see the traced information via /sys/kernel/debug/tracing/trace.
131
132 cat /sys/kernel/debug/tracing/trace
133# tracer: nop
134#
135# TASK-PID CPU# TIMESTAMP FUNCTION
136# | | | | |
137 <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
138 <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe
139 <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
140 <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
141 <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
142 <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
143
144
145 Each line shows when the kernel hits an event, and <- SYMBOL means kernel
146returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
147returns from do_sys_open to sys_open+0x1b).
148
149
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d105f29bb6bb..7d0b681a132b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST
186config HAVE_MMIOTRACE_SUPPORT 186config HAVE_MMIOTRACE_SUPPORT
187 def_bool y 187 def_bool y
188 188
189config X86_DECODER_SELFTEST
190 bool "x86 instruction decoder selftest"
191 depends on DEBUG_KERNEL
192 ---help---
193 Perform x86 instruction decoder selftests at build time.
194 This option is useful for checking the sanity of x86 instruction
195 decoder code.
196 If unsure, say "N".
197
189# 198#
190# IO delay types: 199# IO delay types:
191# 200#
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d2d24c9ee64d..78b32be55e9e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -155,6 +155,9 @@ all: bzImage
155KBUILD_IMAGE := $(boot)/bzImage 155KBUILD_IMAGE := $(boot)/bzImage
156 156
157bzImage: vmlinux 157bzImage: vmlinux
158ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
159 $(Q)$(MAKE) $(build)=arch/x86/tools posttest
160endif
158 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) 161 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
159 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot 162 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
160 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ 163 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
new file mode 100644
index 000000000000..205b063e3e32
--- /dev/null
+++ b/arch/x86/include/asm/inat.h
@@ -0,0 +1,220 @@
1#ifndef _ASM_X86_INAT_H
2#define _ASM_X86_INAT_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23#include <asm/inat_types.h>
24
25/*
26 * Internal bits. Don't use bitmasks directly, because these bits are
27 * unstable. You should use checking functions.
28 */
29
30#define INAT_OPCODE_TABLE_SIZE 256
31#define INAT_GROUP_TABLE_SIZE 8
32
33/* Legacy last prefixes */
34#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
35#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
36#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
37/* Other Legacy prefixes */
38#define INAT_PFX_LOCK 4 /* 0xF0 */
39#define INAT_PFX_CS 5 /* 0x2E */
40#define INAT_PFX_DS 6 /* 0x3E */
41#define INAT_PFX_ES 7 /* 0x26 */
42#define INAT_PFX_FS 8 /* 0x64 */
43#define INAT_PFX_GS 9 /* 0x65 */
44#define INAT_PFX_SS 10 /* 0x36 */
45#define INAT_PFX_ADDRSZ 11 /* 0x67 */
46/* x86-64 REX prefix */
47#define INAT_PFX_REX 12 /* 0x4X */
48/* AVX VEX prefixes */
49#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
50#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
51
52#define INAT_LSTPFX_MAX 3
53#define INAT_LGCPFX_MAX 11
54
55/* Immediate size */
56#define INAT_IMM_BYTE 1
57#define INAT_IMM_WORD 2
58#define INAT_IMM_DWORD 3
59#define INAT_IMM_QWORD 4
60#define INAT_IMM_PTR 5
61#define INAT_IMM_VWORD32 6
62#define INAT_IMM_VWORD 7
63
64/* Legacy prefix */
65#define INAT_PFX_OFFS 0
66#define INAT_PFX_BITS 4
67#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
68#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
69/* Escape opcodes */
70#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
71#define INAT_ESC_BITS 2
72#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
73#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
74/* Group opcodes (1-16) */
75#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
76#define INAT_GRP_BITS 5
77#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
78#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
79/* Immediates */
80#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
81#define INAT_IMM_BITS 3
82#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
83/* Flags */
84#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
85#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
86#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
87#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
88#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
89#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
90#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
91#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
92/* Attribute making macros for attribute tables */
93#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
94#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
95#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
96#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
97
98/* Attribute search APIs */
99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
100extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
101 insn_byte_t last_pfx,
102 insn_attr_t esc_attr);
103extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
104 insn_byte_t last_pfx,
105 insn_attr_t esc_attr);
106extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
107 insn_byte_t vex_m,
108 insn_byte_t vex_pp);
109
110/* Attribute checking functions */
111static inline int inat_is_legacy_prefix(insn_attr_t attr)
112{
113 attr &= INAT_PFX_MASK;
114 return attr && attr <= INAT_LGCPFX_MAX;
115}
116
117static inline int inat_is_address_size_prefix(insn_attr_t attr)
118{
119 return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
120}
121
122static inline int inat_is_operand_size_prefix(insn_attr_t attr)
123{
124 return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
125}
126
127static inline int inat_is_rex_prefix(insn_attr_t attr)
128{
129 return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
130}
131
132static inline int inat_last_prefix_id(insn_attr_t attr)
133{
134 if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
135 return 0;
136 else
137 return attr & INAT_PFX_MASK;
138}
139
140static inline int inat_is_vex_prefix(insn_attr_t attr)
141{
142 attr &= INAT_PFX_MASK;
143 return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
144}
145
146static inline int inat_is_vex3_prefix(insn_attr_t attr)
147{
148 return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
149}
150
151static inline int inat_is_escape(insn_attr_t attr)
152{
153 return attr & INAT_ESC_MASK;
154}
155
156static inline int inat_escape_id(insn_attr_t attr)
157{
158 return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
159}
160
161static inline int inat_is_group(insn_attr_t attr)
162{
163 return attr & INAT_GRP_MASK;
164}
165
166static inline int inat_group_id(insn_attr_t attr)
167{
168 return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
169}
170
171static inline int inat_group_common_attribute(insn_attr_t attr)
172{
173 return attr & ~INAT_GRP_MASK;
174}
175
176static inline int inat_has_immediate(insn_attr_t attr)
177{
178 return attr & INAT_IMM_MASK;
179}
180
181static inline int inat_immediate_size(insn_attr_t attr)
182{
183 return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
184}
185
186static inline int inat_has_modrm(insn_attr_t attr)
187{
188 return attr & INAT_MODRM;
189}
190
191static inline int inat_is_force64(insn_attr_t attr)
192{
193 return attr & INAT_FORCE64;
194}
195
196static inline int inat_has_second_immediate(insn_attr_t attr)
197{
198 return attr & INAT_SCNDIMM;
199}
200
201static inline int inat_has_moffset(insn_attr_t attr)
202{
203 return attr & INAT_MOFFSET;
204}
205
206static inline int inat_has_variant(insn_attr_t attr)
207{
208 return attr & INAT_VARIANT;
209}
210
211static inline int inat_accept_vex(insn_attr_t attr)
212{
213 return attr & INAT_VEXOK;
214}
215
216static inline int inat_must_vex(insn_attr_t attr)
217{
218 return attr & INAT_VEXONLY;
219}
220#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
1#ifndef _ASM_X86_INAT_TYPES_H
2#define _ASM_X86_INAT_TYPES_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23
24/* Instruction attributes */
25typedef unsigned int insn_attr_t;
26typedef unsigned char insn_byte_t;
27typedef signed int insn_value_t;
28
29#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
new file mode 100644
index 000000000000..96c2e0ad04ca
--- /dev/null
+++ b/arch/x86/include/asm/insn.h
@@ -0,0 +1,184 @@
1#ifndef _ASM_X86_INSN_H
2#define _ASM_X86_INSN_H
3/*
4 * x86 instruction analysis
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2009
21 */
22
23/* insn_attr_t is defined in inat.h */
24#include <asm/inat.h>
25
26struct insn_field {
27 union {
28 insn_value_t value;
29 insn_byte_t bytes[4];
30 };
31 /* !0 if we've run insn_get_xxx() for this field */
32 unsigned char got;
33 unsigned char nbytes;
34};
35
36struct insn {
37 struct insn_field prefixes; /*
38 * Prefixes
39 * prefixes.bytes[3]: last prefix
40 */
41 struct insn_field rex_prefix; /* REX prefix */
42 struct insn_field vex_prefix; /* VEX prefix */
43 struct insn_field opcode; /*
44 * opcode.bytes[0]: opcode1
45 * opcode.bytes[1]: opcode2
46 * opcode.bytes[2]: opcode3
47 */
48 struct insn_field modrm;
49 struct insn_field sib;
50 struct insn_field displacement;
51 union {
52 struct insn_field immediate;
53 struct insn_field moffset1; /* for 64bit MOV */
54 struct insn_field immediate1; /* for 64bit imm or off16/32 */
55 };
56 union {
57 struct insn_field moffset2; /* for 64bit MOV */
58 struct insn_field immediate2; /* for 64bit imm or seg16 */
59 };
60
61 insn_attr_t attr;
62 unsigned char opnd_bytes;
63 unsigned char addr_bytes;
64 unsigned char length;
65 unsigned char x86_64;
66
67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */
68 const insn_byte_t *next_byte;
69};
70
71#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
72#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
73#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
74
75#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
76#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
77#define X86_SIB_BASE(sib) ((sib) & 0x07)
78
79#define X86_REX_W(rex) ((rex) & 8)
80#define X86_REX_R(rex) ((rex) & 4)
81#define X86_REX_X(rex) ((rex) & 2)
82#define X86_REX_B(rex) ((rex) & 1)
83
84/* VEX bit flags */
85#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
86#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
87#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
88#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
89#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
90/* VEX bit fields */
91#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
92#define X86_VEX2_M 1 /* VEX2.M always 1 */
93#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
94#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
95#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
96
97/* The last prefix is needed for two-byte and three-byte opcodes */
98static inline insn_byte_t insn_last_prefix(struct insn *insn)
99{
100 return insn->prefixes.bytes[3];
101}
102
103extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
104extern void insn_get_prefixes(struct insn *insn);
105extern void insn_get_opcode(struct insn *insn);
106extern void insn_get_modrm(struct insn *insn);
107extern void insn_get_sib(struct insn *insn);
108extern void insn_get_displacement(struct insn *insn);
109extern void insn_get_immediate(struct insn *insn);
110extern void insn_get_length(struct insn *insn);
111
112/* Attribute will be determined after getting ModRM (for opcode groups) */
113static inline void insn_get_attribute(struct insn *insn)
114{
115 insn_get_modrm(insn);
116}
117
118/* Instruction uses RIP-relative addressing */
119extern int insn_rip_relative(struct insn *insn);
120
121/* Init insn for kernel text */
122static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
123{
124#ifdef CONFIG_X86_64
125 insn_init(insn, kaddr, 1);
126#else /* CONFIG_X86_32 */
127 insn_init(insn, kaddr, 0);
128#endif
129}
130
131static inline int insn_is_avx(struct insn *insn)
132{
133 if (!insn->prefixes.got)
134 insn_get_prefixes(insn);
135 return (insn->vex_prefix.value != 0);
136}
137
138static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
139{
140 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
141 return X86_VEX2_M;
142 else
143 return X86_VEX3_M(insn->vex_prefix.bytes[1]);
144}
145
146static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
147{
148 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
149 return X86_VEX_P(insn->vex_prefix.bytes[1]);
150 else
151 return X86_VEX_P(insn->vex_prefix.bytes[2]);
152}
153
154/* Offset of each field from kaddr */
155static inline int insn_offset_rex_prefix(struct insn *insn)
156{
157 return insn->prefixes.nbytes;
158}
159static inline int insn_offset_vex_prefix(struct insn *insn)
160{
161 return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
162}
163static inline int insn_offset_opcode(struct insn *insn)
164{
165 return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
166}
167static inline int insn_offset_modrm(struct insn *insn)
168{
169 return insn_offset_opcode(insn) + insn->opcode.nbytes;
170}
171static inline int insn_offset_sib(struct insn *insn)
172{
173 return insn_offset_modrm(insn) + insn->modrm.nbytes;
174}
175static inline int insn_offset_displacement(struct insn *insn)
176{
177 return insn_offset_sib(insn) + insn->sib.nbytes;
178}
179static inline int insn_offset_immediate(struct insn *insn)
180{
181 return insn_offset_displacement(insn) + insn->displacement.nbytes;
182}
183
184#endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 0f0d908349aa..a3d49dd7d26e 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -7,6 +7,7 @@
7 7
8#ifdef __KERNEL__ 8#ifdef __KERNEL__
9#include <asm/segment.h> 9#include <asm/segment.h>
10#include <asm/page_types.h>
10#endif 11#endif
11 12
12#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
216 return regs->sp; 217 return regs->sp;
217} 218}
218 219
220/* Query offset/name of register from its name/offset */
221extern int regs_query_register_offset(const char *name);
222extern const char *regs_query_register_name(unsigned int offset);
223#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))
224
225/**
226 * regs_get_register() - get register value from its offset
227 * @regs: pt_regs from which register value is gotten.
228 * @offset: offset number of the register.
229 *
230 * regs_get_register returns the value of a register whose offset from @regs
231 * is @offset. The @offset is the offset of the register in struct pt_regs.
232 * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
233 */
234static inline unsigned long regs_get_register(struct pt_regs *regs,
235 unsigned int offset)
236{
237 if (unlikely(offset > MAX_REG_OFFSET))
238 return 0;
239 return *(unsigned long *)((unsigned long)regs + offset);
240}
241
242/**
243 * regs_within_kernel_stack() - check the address in the stack
244 * @regs: pt_regs which contains kernel stack pointer.
245 * @addr: address which is checked.
246 *
247 * regs_within_kenel_stack() checks @addr is within the kernel stack page(s).
248 * If @addr is within the kernel stack, it returns true. If not, returns false.
249 */
250static inline int regs_within_kernel_stack(struct pt_regs *regs,
251 unsigned long addr)
252{
253 return ((addr & ~(THREAD_SIZE - 1)) ==
254 (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
255}
256
257/**
258 * regs_get_kernel_stack_nth() - get Nth entry of the stack
259 * @regs: pt_regs which contains kernel stack pointer.
260 * @n: stack entry number.
261 *
262 * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
263 * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
264 * this returns 0.
265 */
266static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
267 unsigned int n)
268{
269 unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
270 addr += n;
271 if (regs_within_kernel_stack(regs, (unsigned long)addr))
272 return *addr;
273 else
274 return 0;
275}
276
277/* Get Nth argument at function call */
278extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
279 unsigned int n);
280
219/* 281/*
220 * These are defined as per linux/ptrace.h, which see. 282 * These are defined as per linux/ptrace.h, which see.
221 */ 283 */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7d52e9da5e0c..50b9c220e121 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork)
334END(ret_from_fork) 334END(ret_from_fork)
335 335
336/* 336/*
337 * Interrupt exit functions should be protected against kprobes
338 */
339 .pushsection .kprobes.text, "ax"
340/*
337 * Return to user mode is not as complex as all this looks, 341 * Return to user mode is not as complex as all this looks,
338 * but we want the default path for a system call return to 342 * but we want the default path for a system call return to
339 * go as quickly as possible which is why some of this is 343 * go as quickly as possible which is why some of this is
@@ -383,6 +387,10 @@ need_resched:
383END(resume_kernel) 387END(resume_kernel)
384#endif 388#endif
385 CFI_ENDPROC 389 CFI_ENDPROC
390/*
391 * End of kprobes section
392 */
393 .popsection
386 394
387/* SYSENTER_RETURN points to after the "sysenter" instruction in 395/* SYSENTER_RETURN points to after the "sysenter" instruction in
388 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 396 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -513,6 +521,10 @@ sysexit_audit:
513 PTGS_TO_GS_EX 521 PTGS_TO_GS_EX
514ENDPROC(ia32_sysenter_target) 522ENDPROC(ia32_sysenter_target)
515 523
524/*
525 * syscall stub including irq exit should be protected against kprobes
526 */
527 .pushsection .kprobes.text, "ax"
516 # system call handler stub 528 # system call handler stub
517ENTRY(system_call) 529ENTRY(system_call)
518 RING0_INT_FRAME # can't unwind into user space anyway 530 RING0_INT_FRAME # can't unwind into user space anyway
@@ -705,6 +717,10 @@ syscall_badsys:
705 jmp resume_userspace 717 jmp resume_userspace
706END(syscall_badsys) 718END(syscall_badsys)
707 CFI_ENDPROC 719 CFI_ENDPROC
720/*
721 * End of kprobes section
722 */
723 .popsection
708 724
709/* 725/*
710 * System calls that need a pt_regs pointer. 726 * System calls that need a pt_regs pointer.
@@ -814,6 +830,10 @@ common_interrupt:
814ENDPROC(common_interrupt) 830ENDPROC(common_interrupt)
815 CFI_ENDPROC 831 CFI_ENDPROC
816 832
833/*
834 * Irq entries should be protected against kprobes
835 */
836 .pushsection .kprobes.text, "ax"
817#define BUILD_INTERRUPT3(name, nr, fn) \ 837#define BUILD_INTERRUPT3(name, nr, fn) \
818ENTRY(name) \ 838ENTRY(name) \
819 RING0_INT_FRAME; \ 839 RING0_INT_FRAME; \
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug)
980 jmp error_code 1000 jmp error_code
981 CFI_ENDPROC 1001 CFI_ENDPROC
982END(spurious_interrupt_bug) 1002END(spurious_interrupt_bug)
1003/*
1004 * End of kprobes section
1005 */
1006 .popsection
983 1007
984ENTRY(kernel_thread_helper) 1008ENTRY(kernel_thread_helper)
985 pushl $0 # fake return address for unwinder 1009 pushl $0 # fake return address for unwinder
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bd5bbddddf91..722df1b1152d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -803,6 +803,10 @@ END(interrupt)
803 call \func 803 call \func
804 .endm 804 .endm
805 805
806/*
807 * Interrupt entry/exit should be protected against kprobes
808 */
809 .pushsection .kprobes.text, "ax"
806 /* 810 /*
807 * The interrupt stubs push (~vector+0x80) onto the stack and 811 * The interrupt stubs push (~vector+0x80) onto the stack and
808 * then jump to common_interrupt. 812 * then jump to common_interrupt.
@@ -941,6 +945,10 @@ ENTRY(retint_kernel)
941 945
942 CFI_ENDPROC 946 CFI_ENDPROC
943END(common_interrupt) 947END(common_interrupt)
948/*
949 * End of kprobes section
950 */
951 .popsection
944 952
945/* 953/*
946 * APIC interrupts. 954 * APIC interrupts.
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b000..c5f1f117e0c0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,12 +48,14 @@
48#include <linux/preempt.h> 48#include <linux/preempt.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h>
51 52
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
53#include <asm/desc.h> 54#include <asm/desc.h>
54#include <asm/pgtable.h> 55#include <asm/pgtable.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
56#include <asm/alternative.h> 57#include <asm/alternative.h>
58#include <asm/insn.h>
57 59
58void jprobe_return_end(void); 60void jprobe_return_end(void);
59 61
@@ -106,50 +108,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
106 /* ----------------------------------------------- */ 108 /* ----------------------------------------------- */
107 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 109 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
108}; 110};
109static const u32 onebyte_has_modrm[256 / 32] = {
110 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
111 /* ----------------------------------------------- */
112 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
113 W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
114 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
115 W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
116 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
117 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
118 W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
119 W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
120 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
121 W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
122 W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
123 W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
124 W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
125 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
126 W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
127 W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
128 /* ----------------------------------------------- */
129 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
130};
131static const u32 twobyte_has_modrm[256 / 32] = {
132 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
133 /* ----------------------------------------------- */
134 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
135 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
136 W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
137 W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
138 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
139 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
140 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
141 W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
142 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
143 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
144 W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
145 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
146 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
147 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
148 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
149 W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
150 /* ----------------------------------------------- */
151 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
152};
153#undef W 111#undef W
154 112
155struct kretprobe_blackpoint kretprobe_blacklist[] = { 113struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +202,75 @@ retry:
244 } 202 }
245} 203}
246 204
205/* Recover the probed instruction at addr for further analysis. */
206static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
207{
208 struct kprobe *kp;
209 kp = get_kprobe((void *)addr);
210 if (!kp)
211 return -EINVAL;
212
213 /*
214 * Basically, kp->ainsn.insn has an original instruction.
215 * However, RIP-relative instruction can not do single-stepping
216 * at different place, fix_riprel() tweaks the displacement of
217 * that instruction. In that case, we can't recover the instruction
218 * from the kp->ainsn.insn.
219 *
220 * On the other hand, kp->opcode has a copy of the first byte of
221 * the probed instruction, which is overwritten by int3. And
222 * the instruction at kp->addr is not modified by kprobes except
223 * for the first byte, we can recover the original instruction
224 * from it and kp->opcode.
225 */
226 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
227 buf[0] = kp->opcode;
228 return 0;
229}
230
231/* Dummy buffers for kallsyms_lookup */
232static char __dummy_buf[KSYM_NAME_LEN];
233
234/* Check if paddr is at an instruction boundary */
235static int __kprobes can_probe(unsigned long paddr)
236{
237 int ret;
238 unsigned long addr, offset = 0;
239 struct insn insn;
240 kprobe_opcode_t buf[MAX_INSN_SIZE];
241
242 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
243 return 0;
244
245 /* Decode instructions */
246 addr = paddr - offset;
247 while (addr < paddr) {
248 kernel_insn_init(&insn, (void *)addr);
249 insn_get_opcode(&insn);
250
251 /*
252 * Check if the instruction has been modified by another
253 * kprobe, in which case we replace the breakpoint by the
254 * original instruction in our buffer.
255 */
256 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
257 ret = recover_probed_instruction(buf, addr);
258 if (ret)
259 /*
260 * Another debugging subsystem might insert
261 * this breakpoint. In that case, we can't
262 * recover it.
263 */
264 return 0;
265 kernel_insn_init(&insn, buf);
266 }
267 insn_get_length(&insn);
268 addr += insn.length;
269 }
270
271 return (addr == paddr);
272}
273
247/* 274/*
248 * Returns non-zero if opcode modifies the interrupt flag. 275 * Returns non-zero if opcode modifies the interrupt flag.
249 */ 276 */
@@ -277,68 +304,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
277static void __kprobes fix_riprel(struct kprobe *p) 304static void __kprobes fix_riprel(struct kprobe *p)
278{ 305{
279#ifdef CONFIG_X86_64 306#ifdef CONFIG_X86_64
280 u8 *insn = p->ainsn.insn; 307 struct insn insn;
281 s64 disp; 308 kernel_insn_init(&insn, p->ainsn.insn);
282 int need_modrm;
283
284 /* Skip legacy instruction prefixes. */
285 while (1) {
286 switch (*insn) {
287 case 0x66:
288 case 0x67:
289 case 0x2e:
290 case 0x3e:
291 case 0x26:
292 case 0x64:
293 case 0x65:
294 case 0x36:
295 case 0xf0:
296 case 0xf3:
297 case 0xf2:
298 ++insn;
299 continue;
300 }
301 break;
302 }
303 309
304 /* Skip REX instruction prefix. */ 310 if (insn_rip_relative(&insn)) {
305 if (is_REX_prefix(insn)) 311 s64 newdisp;
306 ++insn; 312 u8 *disp;
307 313 insn_get_displacement(&insn);
308 if (*insn == 0x0f) { 314 /*
309 /* Two-byte opcode. */ 315 * The copied instruction uses the %rip-relative addressing
310 ++insn; 316 * mode. Adjust the displacement for the difference between
311 need_modrm = test_bit(*insn, 317 * the original location of this instruction and the location
312 (unsigned long *)twobyte_has_modrm); 318 * of the copy that will actually be run. The tricky bit here
313 } else 319 * is making sure that the sign extension happens correctly in
314 /* One-byte opcode. */ 320 * this calculation, since we need a signed 32-bit result to
315 need_modrm = test_bit(*insn, 321 * be sign-extended to 64 bits when it's added to the %rip
316 (unsigned long *)onebyte_has_modrm); 322 * value and yield the same 64-bit result that the sign-
317 323 * extension of the original signed 32-bit displacement would
318 if (need_modrm) { 324 * have given.
319 u8 modrm = *++insn; 325 */
320 if ((modrm & 0xc7) == 0x05) { 326 newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
321 /* %rip+disp32 addressing mode */ 327 (u8 *) p->ainsn.insn;
322 /* Displacement follows ModRM byte. */ 328 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
323 ++insn; 329 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
324 /* 330 *(s32 *) disp = (s32) newdisp;
325 * The copied instruction uses the %rip-relative
326 * addressing mode. Adjust the displacement for the
327 * difference between the original location of this
328 * instruction and the location of the copy that will
329 * actually be run. The tricky bit here is making sure
330 * that the sign extension happens correctly in this
331 * calculation, since we need a signed 32-bit result to
332 * be sign-extended to 64 bits when it's added to the
333 * %rip value and yield the same 64-bit result that the
334 * sign-extension of the original signed 32-bit
335 * displacement would have given.
336 */
337 disp = (u8 *) p->addr + *((s32 *) insn) -
338 (u8 *) p->ainsn.insn;
339 BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
340 *(s32 *)insn = (s32) disp;
341 }
342 } 331 }
343#endif 332#endif
344} 333}
@@ -359,6 +348,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
359 348
360int __kprobes arch_prepare_kprobe(struct kprobe *p) 349int __kprobes arch_prepare_kprobe(struct kprobe *p)
361{ 350{
351 if (!can_probe((unsigned long)p->addr))
352 return -EILSEQ;
362 /* insn: must be on special executable page on x86. */ 353 /* insn: must be on special executable page on x86. */
363 p->ainsn.insn = get_insn_slot(); 354 p->ainsn.insn = get_insn_slot();
364 if (!p->ainsn.insn) 355 if (!p->ainsn.insn)
@@ -472,17 +463,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
472{ 463{
473 switch (kcb->kprobe_status) { 464 switch (kcb->kprobe_status) {
474 case KPROBE_HIT_SSDONE: 465 case KPROBE_HIT_SSDONE:
475#ifdef CONFIG_X86_64
476 /* TODO: Provide re-entrancy from post_kprobes_handler() and
477 * avoid exception stack corruption while single-stepping on
478 * the instruction of the new probe.
479 */
480 arch_disarm_kprobe(p);
481 regs->ip = (unsigned long)p->addr;
482 reset_current_kprobe();
483 preempt_enable_no_resched();
484 break;
485#endif
486 case KPROBE_HIT_ACTIVE: 466 case KPROBE_HIT_ACTIVE:
487 save_previous_kprobe(kcb); 467 save_previous_kprobe(kcb);
488 set_current_kprobe(p, regs, kcb); 468 set_current_kprobe(p, regs, kcb);
@@ -491,18 +471,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
491 kcb->kprobe_status = KPROBE_REENTER; 471 kcb->kprobe_status = KPROBE_REENTER;
492 break; 472 break;
493 case KPROBE_HIT_SS: 473 case KPROBE_HIT_SS:
494 if (p == kprobe_running()) { 474 /* A probe has been hit in the codepath leading up to, or just
495 regs->flags &= ~X86_EFLAGS_TF; 475 * after, single-stepping of a probed instruction. This entire
496 regs->flags |= kcb->kprobe_saved_flags; 476 * codepath should strictly reside in .kprobes.text section.
497 return 0; 477 * Raise a BUG or we'll continue in an endless reentering loop
498 } else { 478 * and eventually a stack overflow.
499 /* A probe has been hit in the codepath leading up 479 */
500 * to, or just after, single-stepping of a probed 480 printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
501 * instruction. This entire codepath should strictly 481 p->addr);
502 * reside in .kprobes.text section. Raise a warning 482 dump_kprobe(p);
503 * to highlight this peculiar case. 483 BUG();
504 */
505 }
506 default: 484 default:
507 /* impossible cases */ 485 /* impossible cases */
508 WARN_ON(1); 486 WARN_ON(1);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7b058a2dc66a..c4f76d275ee4 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -49,6 +49,118 @@ enum x86_regset {
49 REGSET_IOPERM32, 49 REGSET_IOPERM32,
50}; 50};
51 51
52struct pt_regs_offset {
53 const char *name;
54 int offset;
55};
56
57#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
58#define REG_OFFSET_END {.name = NULL, .offset = 0}
59
60static const struct pt_regs_offset regoffset_table[] = {
61#ifdef CONFIG_X86_64
62 REG_OFFSET_NAME(r15),
63 REG_OFFSET_NAME(r14),
64 REG_OFFSET_NAME(r13),
65 REG_OFFSET_NAME(r12),
66 REG_OFFSET_NAME(r11),
67 REG_OFFSET_NAME(r10),
68 REG_OFFSET_NAME(r9),
69 REG_OFFSET_NAME(r8),
70#endif
71 REG_OFFSET_NAME(bx),
72 REG_OFFSET_NAME(cx),
73 REG_OFFSET_NAME(dx),
74 REG_OFFSET_NAME(si),
75 REG_OFFSET_NAME(di),
76 REG_OFFSET_NAME(bp),
77 REG_OFFSET_NAME(ax),
78#ifdef CONFIG_X86_32
79 REG_OFFSET_NAME(ds),
80 REG_OFFSET_NAME(es),
81 REG_OFFSET_NAME(fs),
82 REG_OFFSET_NAME(gs),
83#endif
84 REG_OFFSET_NAME(orig_ax),
85 REG_OFFSET_NAME(ip),
86 REG_OFFSET_NAME(cs),
87 REG_OFFSET_NAME(flags),
88 REG_OFFSET_NAME(sp),
89 REG_OFFSET_NAME(ss),
90 REG_OFFSET_END,
91};
92
93/**
94 * regs_query_register_offset() - query register offset from its name
95 * @name: the name of a register
96 *
97 * regs_query_register_offset() returns the offset of a register in struct
98 * pt_regs from its name. If the name is invalid, this returns -EINVAL;
99 */
100int regs_query_register_offset(const char *name)
101{
102 const struct pt_regs_offset *roff;
103 for (roff = regoffset_table; roff->name != NULL; roff++)
104 if (!strcmp(roff->name, name))
105 return roff->offset;
106 return -EINVAL;
107}
108
109/**
110 * regs_query_register_name() - query register name from its offset
111 * @offset: the offset of a register in struct pt_regs.
112 *
113 * regs_query_register_name() returns the name of a register from its
114 * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
115 */
116const char *regs_query_register_name(unsigned int offset)
117{
118 const struct pt_regs_offset *roff;
119 for (roff = regoffset_table; roff->name != NULL; roff++)
120 if (roff->offset == offset)
121 return roff->name;
122 return NULL;
123}
124
125static const int arg_offs_table[] = {
126#ifdef CONFIG_X86_32
127 [0] = offsetof(struct pt_regs, ax),
128 [1] = offsetof(struct pt_regs, dx),
129 [2] = offsetof(struct pt_regs, cx)
130#else /* CONFIG_X86_64 */
131 [0] = offsetof(struct pt_regs, di),
132 [1] = offsetof(struct pt_regs, si),
133 [2] = offsetof(struct pt_regs, dx),
134 [3] = offsetof(struct pt_regs, cx),
135 [4] = offsetof(struct pt_regs, r8),
136 [5] = offsetof(struct pt_regs, r9)
137#endif
138};
139
140/**
141 * regs_get_argument_nth() - get Nth argument at function call
142 * @regs: pt_regs which contains registers at function entry.
143 * @n: argument number.
144 *
145 * regs_get_argument_nth() returns @n th argument of a function call.
146 * Since usually the kernel stack will be changed right after function entry,
147 * you must use this at function entry. If the @n th entry is NOT in the
148 * kernel stack or pt_regs, this returns 0.
149 */
150unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
151{
152 if (n < ARRAY_SIZE(arg_offs_table))
153 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
154 else {
155 /*
156 * The typical case: arg n is on the stack.
157 * (Note: stack[0] = return address, so skip it)
158 */
159 n -= ARRAY_SIZE(arg_offs_table);
160 return regs_get_kernel_stack_nth(regs, 1 + n);
161 }
162}
163
52/* 164/*
53 * does not yet catch signals sent when the child dies. 165 * does not yet catch signals sent when the child dies.
54 * in exit.c or in signal.c. 166 * in exit.c or in signal.c.
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore
new file mode 100644
index 000000000000..8df89f0a3fe6
--- /dev/null
+++ b/arch/x86/lib/.gitignore
@@ -0,0 +1 @@
inat-tables.c
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 85f5db95c60f..a2d6472895fb 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,12 +2,25 @@
2# Makefile for x86 specific library files. 2# Makefile for x86 specific library files.
3# 3#
4 4
5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
7quiet_cmd_inat_tables = GEN $@
8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
9
10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
11 $(call cmd,inat_tables)
12
13$(obj)/inat.o: $(obj)/inat-tables.c
14
15clean-files := inat-tables.c
16
5obj-$(CONFIG_SMP) := msr.o 17obj-$(CONFIG_SMP) := msr.o
6 18
7lib-y := delay.o 19lib-y := delay.o
8lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
9lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
10lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-y += insn.o inat.o
11 24
12obj-y += msr-reg.o msr-reg-export.o 25obj-y += msr-reg.o msr-reg-export.o
13 26
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
new file mode 100644
index 000000000000..46fc4ee09fc4
--- /dev/null
+++ b/arch/x86/lib/inat.c
@@ -0,0 +1,90 @@
1/*
2 * x86 instruction attribute tables
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21#include <asm/insn.h>
22
23/* Attribute tables are generated from opcode map */
24#include "inat-tables.c"
25
26/* Attribute search APIs */
27insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
28{
29 return inat_primary_table[opcode];
30}
31
32insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
33 insn_attr_t esc_attr)
34{
35 const insn_attr_t *table;
36 insn_attr_t lpfx_attr;
37 int n, m = 0;
38
39 n = inat_escape_id(esc_attr);
40 if (last_pfx) {
41 lpfx_attr = inat_get_opcode_attribute(last_pfx);
42 m = inat_last_prefix_id(lpfx_attr);
43 }
44 table = inat_escape_tables[n][0];
45 if (!table)
46 return 0;
47 if (inat_has_variant(table[opcode]) && m) {
48 table = inat_escape_tables[n][m];
49 if (!table)
50 return 0;
51 }
52 return table[opcode];
53}
54
55insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
56 insn_attr_t grp_attr)
57{
58 const insn_attr_t *table;
59 insn_attr_t lpfx_attr;
60 int n, m = 0;
61
62 n = inat_group_id(grp_attr);
63 if (last_pfx) {
64 lpfx_attr = inat_get_opcode_attribute(last_pfx);
65 m = inat_last_prefix_id(lpfx_attr);
66 }
67 table = inat_group_tables[n][0];
68 if (!table)
69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
71 table = inat_group_tables[n][m];
72 if (!table)
73 return inat_group_common_attribute(grp_attr);
74 }
75 return table[X86_MODRM_REG(modrm)] |
76 inat_group_common_attribute(grp_attr);
77}
78
79insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
80 insn_byte_t vex_p)
81{
82 const insn_attr_t *table;
83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
84 return 0;
85 table = inat_avx_tables[vex_m][vex_p];
86 if (!table)
87 return 0;
88 return table[opcode];
89}
90
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644
index 000000000000..9f33b984d0ef
--- /dev/null
+++ b/arch/x86/lib/insn.c
@@ -0,0 +1,516 @@
1/*
2 * x86 instruction analysis
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
19 */
20
21#include <linux/string.h>
22#include <asm/inat.h>
23#include <asm/insn.h>
24
25#define get_next(t, insn) \
26 ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
27
28#define peek_next(t, insn) \
29 ({t r; r = *(t*)insn->next_byte; r; })
30
31#define peek_nbyte_next(t, insn, n) \
32 ({t r; r = *(t*)((insn)->next_byte + n); r; })
33
34/**
35 * insn_init() - initialize struct insn
36 * @insn: &struct insn to be initialized
37 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
38 * @x86_64: !0 for 64-bit kernel or 64-bit app
39 */
40void insn_init(struct insn *insn, const void *kaddr, int x86_64)
41{
42 memset(insn, 0, sizeof(*insn));
43 insn->kaddr = kaddr;
44 insn->next_byte = kaddr;
45 insn->x86_64 = x86_64 ? 1 : 0;
46 insn->opnd_bytes = 4;
47 if (x86_64)
48 insn->addr_bytes = 8;
49 else
50 insn->addr_bytes = 4;
51}
52
53/**
54 * insn_get_prefixes - scan x86 instruction prefix bytes
55 * @insn: &struct insn containing instruction
56 *
57 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
58 * to point to the (first) opcode. No effect if @insn->prefixes.got
59 * is already set.
60 */
61void insn_get_prefixes(struct insn *insn)
62{
63 struct insn_field *prefixes = &insn->prefixes;
64 insn_attr_t attr;
65 insn_byte_t b, lb;
66 int i, nb;
67
68 if (prefixes->got)
69 return;
70
71 nb = 0;
72 lb = 0;
73 b = peek_next(insn_byte_t, insn);
74 attr = inat_get_opcode_attribute(b);
75 while (inat_is_legacy_prefix(attr)) {
76 /* Skip if same prefix */
77 for (i = 0; i < nb; i++)
78 if (prefixes->bytes[i] == b)
79 goto found;
80 if (nb == 4)
81 /* Invalid instruction */
82 break;
83 prefixes->bytes[nb++] = b;
84 if (inat_is_address_size_prefix(attr)) {
85 /* address size switches 2/4 or 4/8 */
86 if (insn->x86_64)
87 insn->addr_bytes ^= 12;
88 else
89 insn->addr_bytes ^= 6;
90 } else if (inat_is_operand_size_prefix(attr)) {
91 /* oprand size switches 2/4 */
92 insn->opnd_bytes ^= 6;
93 }
94found:
95 prefixes->nbytes++;
96 insn->next_byte++;
97 lb = b;
98 b = peek_next(insn_byte_t, insn);
99 attr = inat_get_opcode_attribute(b);
100 }
101 /* Set the last prefix */
102 if (lb && lb != insn->prefixes.bytes[3]) {
103 if (unlikely(insn->prefixes.bytes[3])) {
104 /* Swap the last prefix */
105 b = insn->prefixes.bytes[3];
106 for (i = 0; i < nb; i++)
107 if (prefixes->bytes[i] == lb)
108 prefixes->bytes[i] = b;
109 }
110 insn->prefixes.bytes[3] = lb;
111 }
112
113 /* Decode REX prefix */
114 if (insn->x86_64) {
115 b = peek_next(insn_byte_t, insn);
116 attr = inat_get_opcode_attribute(b);
117 if (inat_is_rex_prefix(attr)) {
118 insn->rex_prefix.value = b;
119 insn->rex_prefix.nbytes = 1;
120 insn->next_byte++;
121 if (X86_REX_W(b))
122 /* REX.W overrides opnd_size */
123 insn->opnd_bytes = 8;
124 }
125 }
126 insn->rex_prefix.got = 1;
127
128 /* Decode VEX prefix */
129 b = peek_next(insn_byte_t, insn);
130 attr = inat_get_opcode_attribute(b);
131 if (inat_is_vex_prefix(attr)) {
132 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
133 if (!insn->x86_64) {
134 /*
135 * In 32-bits mode, if the [7:6] bits (mod bits of
136 * ModRM) on the second byte are not 11b, it is
137 * LDS or LES.
138 */
139 if (X86_MODRM_MOD(b2) != 3)
140 goto vex_end;
141 }
142 insn->vex_prefix.bytes[0] = b;
143 insn->vex_prefix.bytes[1] = b2;
144 if (inat_is_vex3_prefix(attr)) {
145 b2 = peek_nbyte_next(insn_byte_t, insn, 2);
146 insn->vex_prefix.bytes[2] = b2;
147 insn->vex_prefix.nbytes = 3;
148 insn->next_byte += 3;
149 if (insn->x86_64 && X86_VEX_W(b2))
150 /* VEX.W overrides opnd_size */
151 insn->opnd_bytes = 8;
152 } else {
153 insn->vex_prefix.nbytes = 2;
154 insn->next_byte += 2;
155 }
156 }
157vex_end:
158 insn->vex_prefix.got = 1;
159
160 prefixes->got = 1;
161 return;
162}
163
164/**
165 * insn_get_opcode - collect opcode(s)
166 * @insn: &struct insn containing instruction
167 *
168 * Populates @insn->opcode, updates @insn->next_byte to point past the
169 * opcode byte(s), and set @insn->attr (except for groups).
170 * If necessary, first collects any preceding (prefix) bytes.
171 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
172 * is already 1.
173 */
174void insn_get_opcode(struct insn *insn)
175{
176 struct insn_field *opcode = &insn->opcode;
177 insn_byte_t op, pfx;
178 if (opcode->got)
179 return;
180 if (!insn->prefixes.got)
181 insn_get_prefixes(insn);
182
183 /* Get first opcode */
184 op = get_next(insn_byte_t, insn);
185 opcode->bytes[0] = op;
186 opcode->nbytes = 1;
187
188 /* Check if there is VEX prefix or not */
189 if (insn_is_avx(insn)) {
190 insn_byte_t m, p;
191 m = insn_vex_m_bits(insn);
192 p = insn_vex_p_bits(insn);
193 insn->attr = inat_get_avx_attribute(op, m, p);
194 if (!inat_accept_vex(insn->attr))
195 insn->attr = 0; /* This instruction is bad */
196 goto end; /* VEX has only 1 byte for opcode */
197 }
198
199 insn->attr = inat_get_opcode_attribute(op);
200 while (inat_is_escape(insn->attr)) {
201 /* Get escaped opcode */
202 op = get_next(insn_byte_t, insn);
203 opcode->bytes[opcode->nbytes++] = op;
204 pfx = insn_last_prefix(insn);
205 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
206 }
207 if (inat_must_vex(insn->attr))
208 insn->attr = 0; /* This instruction is bad */
209end:
210 opcode->got = 1;
211}
212
213/**
214 * insn_get_modrm - collect ModRM byte, if any
215 * @insn: &struct insn containing instruction
216 *
217 * Populates @insn->modrm and updates @insn->next_byte to point past the
218 * ModRM byte, if any. If necessary, first collects the preceding bytes
219 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
220 */
221void insn_get_modrm(struct insn *insn)
222{
223 struct insn_field *modrm = &insn->modrm;
224 insn_byte_t pfx, mod;
225 if (modrm->got)
226 return;
227 if (!insn->opcode.got)
228 insn_get_opcode(insn);
229
230 if (inat_has_modrm(insn->attr)) {
231 mod = get_next(insn_byte_t, insn);
232 modrm->value = mod;
233 modrm->nbytes = 1;
234 if (inat_is_group(insn->attr)) {
235 pfx = insn_last_prefix(insn);
236 insn->attr = inat_get_group_attribute(mod, pfx,
237 insn->attr);
238 }
239 }
240
241 if (insn->x86_64 && inat_is_force64(insn->attr))
242 insn->opnd_bytes = 8;
243 modrm->got = 1;
244}
245
246
247/**
248 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
249 * @insn: &struct insn containing instruction
250 *
251 * If necessary, first collects the instruction up to and including the
252 * ModRM byte. No effect if @insn->x86_64 is 0.
253 */
254int insn_rip_relative(struct insn *insn)
255{
256 struct insn_field *modrm = &insn->modrm;
257
258 if (!insn->x86_64)
259 return 0;
260 if (!modrm->got)
261 insn_get_modrm(insn);
262 /*
263 * For rip-relative instructions, the mod field (top 2 bits)
264 * is zero and the r/m field (bottom 3 bits) is 0x5.
265 */
266 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
267}
268
269/**
270 * insn_get_sib() - Get the SIB byte of instruction
271 * @insn: &struct insn containing instruction
272 *
273 * If necessary, first collects the instruction up to and including the
274 * ModRM byte.
275 */
276void insn_get_sib(struct insn *insn)
277{
278 insn_byte_t modrm;
279
280 if (insn->sib.got)
281 return;
282 if (!insn->modrm.got)
283 insn_get_modrm(insn);
284 if (insn->modrm.nbytes) {
285 modrm = (insn_byte_t)insn->modrm.value;
286 if (insn->addr_bytes != 2 &&
287 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
288 insn->sib.value = get_next(insn_byte_t, insn);
289 insn->sib.nbytes = 1;
290 }
291 }
292 insn->sib.got = 1;
293}
294
295
296/**
297 * insn_get_displacement() - Get the displacement of instruction
298 * @insn: &struct insn containing instruction
299 *
300 * If necessary, first collects the instruction up to and including the
301 * SIB byte.
302 * Displacement value is sign-expanded.
303 */
304void insn_get_displacement(struct insn *insn)
305{
306 insn_byte_t mod, rm, base;
307
308 if (insn->displacement.got)
309 return;
310 if (!insn->sib.got)
311 insn_get_sib(insn);
312 if (insn->modrm.nbytes) {
313 /*
314 * Interpreting the modrm byte:
315 * mod = 00 - no displacement fields (exceptions below)
316 * mod = 01 - 1-byte displacement field
317 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
318 * address size = 2 (0x67 prefix in 32-bit mode)
319 * mod = 11 - no memory operand
320 *
321 * If address size = 2...
322 * mod = 00, r/m = 110 - displacement field is 2 bytes
323 *
324 * If address size != 2...
325 * mod != 11, r/m = 100 - SIB byte exists
326 * mod = 00, SIB base = 101 - displacement field is 4 bytes
327 * mod = 00, r/m = 101 - rip-relative addressing, displacement
328 * field is 4 bytes
329 */
330 mod = X86_MODRM_MOD(insn->modrm.value);
331 rm = X86_MODRM_RM(insn->modrm.value);
332 base = X86_SIB_BASE(insn->sib.value);
333 if (mod == 3)
334 goto out;
335 if (mod == 1) {
336 insn->displacement.value = get_next(char, insn);
337 insn->displacement.nbytes = 1;
338 } else if (insn->addr_bytes == 2) {
339 if ((mod == 0 && rm == 6) || mod == 2) {
340 insn->displacement.value =
341 get_next(short, insn);
342 insn->displacement.nbytes = 2;
343 }
344 } else {
345 if ((mod == 0 && rm == 5) || mod == 2 ||
346 (mod == 0 && base == 5)) {
347 insn->displacement.value = get_next(int, insn);
348 insn->displacement.nbytes = 4;
349 }
350 }
351 }
352out:
353 insn->displacement.got = 1;
354}
355
356/* Decode moffset16/32/64 */
357static void __get_moffset(struct insn *insn)
358{
359 switch (insn->addr_bytes) {
360 case 2:
361 insn->moffset1.value = get_next(short, insn);
362 insn->moffset1.nbytes = 2;
363 break;
364 case 4:
365 insn->moffset1.value = get_next(int, insn);
366 insn->moffset1.nbytes = 4;
367 break;
368 case 8:
369 insn->moffset1.value = get_next(int, insn);
370 insn->moffset1.nbytes = 4;
371 insn->moffset2.value = get_next(int, insn);
372 insn->moffset2.nbytes = 4;
373 break;
374 }
375 insn->moffset1.got = insn->moffset2.got = 1;
376}
377
378/* Decode imm v32(Iz) */
379static void __get_immv32(struct insn *insn)
380{
381 switch (insn->opnd_bytes) {
382 case 2:
383 insn->immediate.value = get_next(short, insn);
384 insn->immediate.nbytes = 2;
385 break;
386 case 4:
387 case 8:
388 insn->immediate.value = get_next(int, insn);
389 insn->immediate.nbytes = 4;
390 break;
391 }
392}
393
394/* Decode imm v64(Iv/Ov) */
395static void __get_immv(struct insn *insn)
396{
397 switch (insn->opnd_bytes) {
398 case 2:
399 insn->immediate1.value = get_next(short, insn);
400 insn->immediate1.nbytes = 2;
401 break;
402 case 4:
403 insn->immediate1.value = get_next(int, insn);
404 insn->immediate1.nbytes = 4;
405 break;
406 case 8:
407 insn->immediate1.value = get_next(int, insn);
408 insn->immediate1.nbytes = 4;
409 insn->immediate2.value = get_next(int, insn);
410 insn->immediate2.nbytes = 4;
411 break;
412 }
413 insn->immediate1.got = insn->immediate2.got = 1;
414}
415
416/* Decode ptr16:16/32(Ap) */
417static void __get_immptr(struct insn *insn)
418{
419 switch (insn->opnd_bytes) {
420 case 2:
421 insn->immediate1.value = get_next(short, insn);
422 insn->immediate1.nbytes = 2;
423 break;
424 case 4:
425 insn->immediate1.value = get_next(int, insn);
426 insn->immediate1.nbytes = 4;
427 break;
428 case 8:
429 /* ptr16:64 is not exist (no segment) */
430 return;
431 }
432 insn->immediate2.value = get_next(unsigned short, insn);
433 insn->immediate2.nbytes = 2;
434 insn->immediate1.got = insn->immediate2.got = 1;
435}
436
437/**
438 * insn_get_immediate() - Get the immediates of instruction
439 * @insn: &struct insn containing instruction
440 *
441 * If necessary, first collects the instruction up to and including the
442 * displacement bytes.
443 * Basically, most of immediates are sign-expanded. Unsigned-value can be
444 * get by bit masking with ((1 << (nbytes * 8)) - 1)
445 */
446void insn_get_immediate(struct insn *insn)
447{
448 if (insn->immediate.got)
449 return;
450 if (!insn->displacement.got)
451 insn_get_displacement(insn);
452
453 if (inat_has_moffset(insn->attr)) {
454 __get_moffset(insn);
455 goto done;
456 }
457
458 if (!inat_has_immediate(insn->attr))
459 /* no immediates */
460 goto done;
461
462 switch (inat_immediate_size(insn->attr)) {
463 case INAT_IMM_BYTE:
464 insn->immediate.value = get_next(char, insn);
465 insn->immediate.nbytes = 1;
466 break;
467 case INAT_IMM_WORD:
468 insn->immediate.value = get_next(short, insn);
469 insn->immediate.nbytes = 2;
470 break;
471 case INAT_IMM_DWORD:
472 insn->immediate.value = get_next(int, insn);
473 insn->immediate.nbytes = 4;
474 break;
475 case INAT_IMM_QWORD:
476 insn->immediate1.value = get_next(int, insn);
477 insn->immediate1.nbytes = 4;
478 insn->immediate2.value = get_next(int, insn);
479 insn->immediate2.nbytes = 4;
480 break;
481 case INAT_IMM_PTR:
482 __get_immptr(insn);
483 break;
484 case INAT_IMM_VWORD32:
485 __get_immv32(insn);
486 break;
487 case INAT_IMM_VWORD:
488 __get_immv(insn);
489 break;
490 default:
491 break;
492 }
493 if (inat_has_second_immediate(insn->attr)) {
494 insn->immediate2.value = get_next(char, insn);
495 insn->immediate2.nbytes = 1;
496 }
497done:
498 insn->immediate.got = 1;
499}
500
501/**
502 * insn_get_length() - Get the length of instruction
503 * @insn: &struct insn containing instruction
504 *
505 * If necessary, first collects the instruction up to and including the
506 * immediates bytes.
507 */
508void insn_get_length(struct insn *insn)
509{
510 if (insn->length)
511 return;
512 if (!insn->immediate.got)
513 insn_get_immediate(insn);
514 insn->length = (unsigned char)((unsigned long)insn->next_byte
515 - (unsigned long)insn->kaddr);
516}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
new file mode 100644
index 000000000000..a793da5e560e
--- /dev/null
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -0,0 +1,893 @@
1# x86 Opcode Maps
2#
3#<Opcode maps>
4# Table: table-name
5# Referrer: escaped-name
6# AVXcode: avx-code
7# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
8# (or)
9# opcode: escape # escaped-name
10# EndTable
11#
12#<group maps>
13# GrpTable: GrpXXX
14# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
15# EndTable
16#
17# AVX Superscripts
18# (VEX): this opcode can accept VEX prefix.
19# (oVEX): this opcode requires VEX prefix.
20# (o128): this opcode only supports 128bit VEX.
21# (o256): this opcode only supports 256bit VEX.
22#
23
24Table: one byte opcode
25Referrer:
26AVXcode:
27# 0x00 - 0x0f
2800: ADD Eb,Gb
2901: ADD Ev,Gv
3002: ADD Gb,Eb
3103: ADD Gv,Ev
3204: ADD AL,Ib
3305: ADD rAX,Iz
3406: PUSH ES (i64)
3507: POP ES (i64)
3608: OR Eb,Gb
3709: OR Ev,Gv
380a: OR Gb,Eb
390b: OR Gv,Ev
400c: OR AL,Ib
410d: OR rAX,Iz
420e: PUSH CS (i64)
430f: escape # 2-byte escape
44# 0x10 - 0x1f
4510: ADC Eb,Gb
4611: ADC Ev,Gv
4712: ADC Gb,Eb
4813: ADC Gv,Ev
4914: ADC AL,Ib
5015: ADC rAX,Iz
5116: PUSH SS (i64)
5217: POP SS (i64)
5318: SBB Eb,Gb
5419: SBB Ev,Gv
551a: SBB Gb,Eb
561b: SBB Gv,Ev
571c: SBB AL,Ib
581d: SBB rAX,Iz
591e: PUSH DS (i64)
601f: POP DS (i64)
61# 0x20 - 0x2f
6220: AND Eb,Gb
6321: AND Ev,Gv
6422: AND Gb,Eb
6523: AND Gv,Ev
6624: AND AL,Ib
6725: AND rAx,Iz
6826: SEG=ES (Prefix)
6927: DAA (i64)
7028: SUB Eb,Gb
7129: SUB Ev,Gv
722a: SUB Gb,Eb
732b: SUB Gv,Ev
742c: SUB AL,Ib
752d: SUB rAX,Iz
762e: SEG=CS (Prefix)
772f: DAS (i64)
78# 0x30 - 0x3f
7930: XOR Eb,Gb
8031: XOR Ev,Gv
8132: XOR Gb,Eb
8233: XOR Gv,Ev
8334: XOR AL,Ib
8435: XOR rAX,Iz
8536: SEG=SS (Prefix)
8637: AAA (i64)
8738: CMP Eb,Gb
8839: CMP Ev,Gv
893a: CMP Gb,Eb
903b: CMP Gv,Ev
913c: CMP AL,Ib
923d: CMP rAX,Iz
933e: SEG=DS (Prefix)
943f: AAS (i64)
95# 0x40 - 0x4f
9640: INC eAX (i64) | REX (o64)
9741: INC eCX (i64) | REX.B (o64)
9842: INC eDX (i64) | REX.X (o64)
9943: INC eBX (i64) | REX.XB (o64)
10044: INC eSP (i64) | REX.R (o64)
10145: INC eBP (i64) | REX.RB (o64)
10246: INC eSI (i64) | REX.RX (o64)
10347: INC eDI (i64) | REX.RXB (o64)
10448: DEC eAX (i64) | REX.W (o64)
10549: DEC eCX (i64) | REX.WB (o64)
1064a: DEC eDX (i64) | REX.WX (o64)
1074b: DEC eBX (i64) | REX.WXB (o64)
1084c: DEC eSP (i64) | REX.WR (o64)
1094d: DEC eBP (i64) | REX.WRB (o64)
1104e: DEC eSI (i64) | REX.WRX (o64)
1114f: DEC eDI (i64) | REX.WRXB (o64)
112# 0x50 - 0x5f
11350: PUSH rAX/r8 (d64)
11451: PUSH rCX/r9 (d64)
11552: PUSH rDX/r10 (d64)
11653: PUSH rBX/r11 (d64)
11754: PUSH rSP/r12 (d64)
11855: PUSH rBP/r13 (d64)
11956: PUSH rSI/r14 (d64)
12057: PUSH rDI/r15 (d64)
12158: POP rAX/r8 (d64)
12259: POP rCX/r9 (d64)
1235a: POP rDX/r10 (d64)
1245b: POP rBX/r11 (d64)
1255c: POP rSP/r12 (d64)
1265d: POP rBP/r13 (d64)
1275e: POP rSI/r14 (d64)
1285f: POP rDI/r15 (d64)
129# 0x60 - 0x6f
13060: PUSHA/PUSHAD (i64)
13161: POPA/POPAD (i64)
13262: BOUND Gv,Ma (i64)
13363: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
13464: SEG=FS (Prefix)
13565: SEG=GS (Prefix)
13666: Operand-Size (Prefix)
13767: Address-Size (Prefix)
13868: PUSH Iz (d64)
13969: IMUL Gv,Ev,Iz
1406a: PUSH Ib (d64)
1416b: IMUL Gv,Ev,Ib
1426c: INS/INSB Yb,DX
1436d: INS/INSW/INSD Yz,DX
1446e: OUTS/OUTSB DX,Xb
1456f: OUTS/OUTSW/OUTSD DX,Xz
146# 0x70 - 0x7f
14770: JO Jb
14871: JNO Jb
14972: JB/JNAE/JC Jb
15073: JNB/JAE/JNC Jb
15174: JZ/JE Jb
15275: JNZ/JNE Jb
15376: JBE/JNA Jb
15477: JNBE/JA Jb
15578: JS Jb
15679: JNS Jb
1577a: JP/JPE Jb
1587b: JNP/JPO Jb
1597c: JL/JNGE Jb
1607d: JNL/JGE Jb
1617e: JLE/JNG Jb
1627f: JNLE/JG Jb
163# 0x80 - 0x8f
16480: Grp1 Eb,Ib (1A)
16581: Grp1 Ev,Iz (1A)
16682: Grp1 Eb,Ib (1A),(i64)
16783: Grp1 Ev,Ib (1A)
16884: TEST Eb,Gb
16985: TEST Ev,Gv
17086: XCHG Eb,Gb
17187: XCHG Ev,Gv
17288: MOV Eb,Gb
17389: MOV Ev,Gv
1748a: MOV Gb,Eb
1758b: MOV Gv,Ev
1768c: MOV Ev,Sw
1778d: LEA Gv,M
1788e: MOV Sw,Ew
1798f: Grp1A (1A) | POP Ev (d64)
180# 0x90 - 0x9f
18190: NOP | PAUSE (F3) | XCHG r8,rAX
18291: XCHG rCX/r9,rAX
18392: XCHG rDX/r10,rAX
18493: XCHG rBX/r11,rAX
18594: XCHG rSP/r12,rAX
18695: XCHG rBP/r13,rAX
18796: XCHG rSI/r14,rAX
18897: XCHG rDI/r15,rAX
18998: CBW/CWDE/CDQE
19099: CWD/CDQ/CQO
1919a: CALLF Ap (i64)
1929b: FWAIT/WAIT
1939c: PUSHF/D/Q Fv (d64)
1949d: POPF/D/Q Fv (d64)
1959e: SAHF
1969f: LAHF
197# 0xa0 - 0xaf
198a0: MOV AL,Ob
199a1: MOV rAX,Ov
200a2: MOV Ob,AL
201a3: MOV Ov,rAX
202a4: MOVS/B Xb,Yb
203a5: MOVS/W/D/Q Xv,Yv
204a6: CMPS/B Xb,Yb
205a7: CMPS/W/D Xv,Yv
206a8: TEST AL,Ib
207a9: TEST rAX,Iz
208aa: STOS/B Yb,AL
209ab: STOS/W/D/Q Yv,rAX
210ac: LODS/B AL,Xb
211ad: LODS/W/D/Q rAX,Xv
212ae: SCAS/B AL,Yb
213af: SCAS/W/D/Q rAX,Xv
214# 0xb0 - 0xbf
215b0: MOV AL/R8L,Ib
216b1: MOV CL/R9L,Ib
217b2: MOV DL/R10L,Ib
218b3: MOV BL/R11L,Ib
219b4: MOV AH/R12L,Ib
220b5: MOV CH/R13L,Ib
221b6: MOV DH/R14L,Ib
222b7: MOV BH/R15L,Ib
223b8: MOV rAX/r8,Iv
224b9: MOV rCX/r9,Iv
225ba: MOV rDX/r10,Iv
226bb: MOV rBX/r11,Iv
227bc: MOV rSP/r12,Iv
228bd: MOV rBP/r13,Iv
229be: MOV rSI/r14,Iv
230bf: MOV rDI/r15,Iv
231# 0xc0 - 0xcf
232c0: Grp2 Eb,Ib (1A)
233c1: Grp2 Ev,Ib (1A)
234c2: RETN Iw (f64)
235c3: RETN
236c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix)
237c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix)
238c6: Grp11 Eb,Ib (1A)
239c7: Grp11 Ev,Iz (1A)
240c8: ENTER Iw,Ib
241c9: LEAVE (d64)
242ca: RETF Iw
243cb: RETF
244cc: INT3
245cd: INT Ib
246ce: INTO (i64)
247cf: IRET/D/Q
248# 0xd0 - 0xdf
249d0: Grp2 Eb,1 (1A)
250d1: Grp2 Ev,1 (1A)
251d2: Grp2 Eb,CL (1A)
252d3: Grp2 Ev,CL (1A)
253d4: AAM Ib (i64)
254d5: AAD Ib (i64)
255d6:
256d7: XLAT/XLATB
257d8: ESC
258d9: ESC
259da: ESC
260db: ESC
261dc: ESC
262dd: ESC
263de: ESC
264df: ESC
265# 0xe0 - 0xef
266e0: LOOPNE/LOOPNZ Jb (f64)
267e1: LOOPE/LOOPZ Jb (f64)
268e2: LOOP Jb (f64)
269e3: JrCXZ Jb (f64)
270e4: IN AL,Ib
271e5: IN eAX,Ib
272e6: OUT Ib,AL
273e7: OUT Ib,eAX
274e8: CALL Jz (f64)
275e9: JMP-near Jz (f64)
276ea: JMP-far Ap (i64)
277eb: JMP-short Jb (f64)
278ec: IN AL,DX
279ed: IN eAX,DX
280ee: OUT DX,AL
281ef: OUT DX,eAX
282# 0xf0 - 0xff
283f0: LOCK (Prefix)
284f1:
285f2: REPNE (Prefix)
286f3: REP/REPE (Prefix)
287f4: HLT
288f5: CMC
289f6: Grp3_1 Eb (1A)
290f7: Grp3_2 Ev (1A)
291f8: CLC
292f9: STC
293fa: CLI
294fb: STI
295fc: CLD
296fd: STD
297fe: Grp4 (1A)
298ff: Grp5 (1A)
299EndTable
300
301Table: 2-byte opcode (0x0f)
302Referrer: 2-byte escape
303AVXcode: 1
304# 0x0f 0x00-0x0f
30500: Grp6 (1A)
30601: Grp7 (1A)
30702: LAR Gv,Ew
30803: LSL Gv,Ew
30904:
31005: SYSCALL (o64)
31106: CLTS
31207: SYSRET (o64)
31308: INVD
31409: WBINVD
3150a:
3160b: UD2 (1B)
3170c:
3180d: NOP Ev | GrpP
3190e: FEMMS
320# 3DNow! uses the last imm byte as opcode extension.
3210f: 3DNow! Pq,Qq,Ib
322# 0x0f 0x10-0x1f
32310: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128)
32411: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128)
32512: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX)
32613: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128)
32714: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX)
32815: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX)
32916: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX)
33017: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128)
33118: Grp16 (1A)
33219:
3331a:
3341b:
3351c:
3361d:
3371e:
3381f: NOP Ev
339# 0x0f 0x20-0x2f
34020: MOV Rd,Cd
34121: MOV Rd,Dd
34222: MOV Cd,Rd
34323: MOV Dd,Rd
34424:
34525:
34626:
34727:
34828: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX)
34929: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX)
3502a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128)
3512b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX)
3522c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128)
3532d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128)
3542e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128)
3552f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128)
356# 0x0f 0x30-0x3f
35730: WRMSR
35831: RDTSC
35932: RDMSR
36033: RDPMC
36134: SYSENTER
36235: SYSEXIT
36336:
36437: GETSEC
36538: escape # 3-byte escape 1
36639:
3673a: escape # 3-byte escape 2
3683b:
3693c:
3703d:
3713e:
3723f:
373# 0x0f 0x40-0x4f
37440: CMOVO Gv,Ev
37541: CMOVNO Gv,Ev
37642: CMOVB/C/NAE Gv,Ev
37743: CMOVAE/NB/NC Gv,Ev
37844: CMOVE/Z Gv,Ev
37945: CMOVNE/NZ Gv,Ev
38046: CMOVBE/NA Gv,Ev
38147: CMOVA/NBE Gv,Ev
38248: CMOVS Gv,Ev
38349: CMOVNS Gv,Ev
3844a: CMOVP/PE Gv,Ev
3854b: CMOVNP/PO Gv,Ev
3864c: CMOVL/NGE Gv,Ev
3874d: CMOVNL/GE Gv,Ev
3884e: CMOVLE/NG Gv,Ev
3894f: CMOVNLE/G Gv,Ev
390# 0x0f 0x50-0x5f
39150: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX)
39251: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128)
39352: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128)
39453: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128)
39554: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX)
39655: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX)
39756: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX)
39857: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX)
39958: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128)
40059: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128)
4015a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128)
4025b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX)
4035c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128)
4045d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128)
4055e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128)
4065f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128)
407# 0x0f 0x60-0x6f
40860: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128)
40961: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128)
41062: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128)
41163: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128)
41264: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128)
41365: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128)
41466: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128)
41567: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128)
41668: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128)
41769: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128)
4186a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128)
4196b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128)
4206c: punpcklqdq Vdq,Wdq (66),(VEX),(o128)
4216d: punpckhqdq Vdq,Wdq (66),(VEX),(o128)
4226e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128)
4236f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX)
424# 0x0f 0x70-0x7f
42570: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128)
42671: Grp12 (1A)
42772: Grp13 (1A)
42873: Grp14 (1A)
42974: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128)
43075: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128)
43176: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128)
43277: emms/vzeroupper/vzeroall (VEX)
43378: VMREAD Ed/q,Gd/q
43479: VMWRITE Gd/q,Ed/q
4357a:
4367b:
4377c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX)
4387d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX)
4397e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128)
4407f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX)
441# 0x0f 0x80-0x8f
44280: JO Jz (f64)
44381: JNO Jz (f64)
44482: JB/JNAE/JC Jz (f64)
44583: JNB/JAE/JNC Jz (f64)
44684: JZ/JE Jz (f64)
44785: JNZ/JNE Jz (f64)
44886: JBE/JNA Jz (f64)
44987: JNBE/JA Jz (f64)
45088: JS Jz (f64)
45189: JNS Jz (f64)
4528a: JP/JPE Jz (f64)
4538b: JNP/JPO Jz (f64)
4548c: JL/JNGE Jz (f64)
4558d: JNL/JGE Jz (f64)
4568e: JLE/JNG Jz (f64)
4578f: JNLE/JG Jz (f64)
458# 0x0f 0x90-0x9f
45990: SETO Eb
46091: SETNO Eb
46192: SETB/C/NAE Eb
46293: SETAE/NB/NC Eb
46394: SETE/Z Eb
46495: SETNE/NZ Eb
46596: SETBE/NA Eb
46697: SETA/NBE Eb
46798: SETS Eb
46899: SETNS Eb
4699a: SETP/PE Eb
4709b: SETNP/PO Eb
4719c: SETL/NGE Eb
4729d: SETNL/GE Eb
4739e: SETLE/NG Eb
4749f: SETNLE/G Eb
475# 0x0f 0xa0-0xaf
476a0: PUSH FS (d64)
477a1: POP FS (d64)
478a2: CPUID
479a3: BT Ev,Gv
480a4: SHLD Ev,Gv,Ib
481a5: SHLD Ev,Gv,CL
482a6: GrpPDLK
483a7: GrpRNG
484a8: PUSH GS (d64)
485a9: POP GS (d64)
486aa: RSM
487ab: BTS Ev,Gv
488ac: SHRD Ev,Gv,Ib
489ad: SHRD Ev,Gv,CL
490ae: Grp15 (1A),(1C)
491af: IMUL Gv,Ev
492# 0x0f 0xb0-0xbf
493b0: CMPXCHG Eb,Gb
494b1: CMPXCHG Ev,Gv
495b2: LSS Gv,Mp
496b3: BTR Ev,Gv
497b4: LFS Gv,Mp
498b5: LGS Gv,Mp
499b6: MOVZX Gv,Eb
500b7: MOVZX Gv,Ew
501b8: JMPE | POPCNT Gv,Ev (F3)
502b9: Grp10 (1A)
503ba: Grp8 Ev,Ib (1A)
504bb: BTC Ev,Gv
505bc: BSF Gv,Ev
506bd: BSR Gv,Ev
507be: MOVSX Gv,Eb
508bf: MOVSX Gv,Ew
509# 0x0f 0xc0-0xcf
510c0: XADD Eb,Gb
511c1: XADD Ev,Gv
512c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX)
513c3: movnti Md/q,Gd/q
514c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128)
515c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128)
516c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX)
517c7: Grp9 (1A)
518c8: BSWAP RAX/EAX/R8/R8D
519c9: BSWAP RCX/ECX/R9/R9D
520ca: BSWAP RDX/EDX/R10/R10D
521cb: BSWAP RBX/EBX/R11/R11D
522cc: BSWAP RSP/ESP/R12/R12D
523cd: BSWAP RBP/EBP/R13/R13D
524ce: BSWAP RSI/ESI/R14/R14D
525cf: BSWAP RDI/EDI/R15/R15D
526# 0x0f 0xd0-0xdf
527d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX)
528d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128)
529d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128)
530d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128)
531d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128)
532d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128)
533d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
534d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128)
535d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128)
536d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128)
537da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128)
538db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128)
539dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128)
540dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128)
541de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128)
542df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128)
543# 0x0f 0xe0-0xef
544e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128)
545e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128)
546e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128)
547e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128)
548e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128)
549e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128)
550e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX)
551e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX)
552e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128)
553e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128)
554ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128)
555eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128)
556ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128)
557ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128)
558ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128)
559ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128)
560# 0x0f 0xf0-0xff
561f0: lddqu Vdq,Mdq (F2),(VEX)
562f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128)
563f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128)
564f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128)
565f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128)
566f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128)
567f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128)
568f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128)
569f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128)
570f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128)
571fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128)
572fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128)
573fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128)
574fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128)
575fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128)
576ff:
577EndTable
578
579Table: 3-byte opcode 1 (0x0f 0x38)
580Referrer: 3-byte escape 1
581AVXcode: 2
582# 0x0f 0x38 0x00-0x0f
58300: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128)
58401: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128)
58502: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128)
58603: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128)
58704: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128)
58805: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128)
58906: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128)
59007: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128)
59108: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128)
59209: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128)
5930a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128)
5940b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128)
5950c: Vpermilps /r (66),(oVEX)
5960d: Vpermilpd /r (66),(oVEX)
5970e: vtestps /r (66),(oVEX)
5980f: vtestpd /r (66),(oVEX)
599# 0x0f 0x38 0x10-0x1f
60010: pblendvb Vdq,Wdq (66)
60111:
60212:
60313:
60414: blendvps Vdq,Wdq (66)
60515: blendvpd Vdq,Wdq (66)
60616:
60717: ptest Vdq,Wdq (66),(VEX)
60818: vbroadcastss /r (66),(oVEX)
60919: vbroadcastsd /r (66),(oVEX),(o256)
6101a: vbroadcastf128 /r (66),(oVEX),(o256)
6111b:
6121c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128)
6131d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128)
6141e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128)
6151f:
616# 0x0f 0x38 0x20-0x2f
61720: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128)
61821: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128)
61922: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128)
62023: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128)
62124: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128)
62225: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128)
62326:
62427:
62528: pmuldq Vdq,Wdq (66),(VEX),(o128)
62629: pcmpeqq Vdq,Wdq (66),(VEX),(o128)
6272a: movntdqa Vdq,Mdq (66),(VEX),(o128)
6282b: packusdw Vdq,Wdq (66),(VEX),(o128)
6292c: vmaskmovps(ld) /r (66),(oVEX)
6302d: vmaskmovpd(ld) /r (66),(oVEX)
6312e: vmaskmovps(st) /r (66),(oVEX)
6322f: vmaskmovpd(st) /r (66),(oVEX)
633# 0x0f 0x38 0x30-0x3f
63430: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128)
63531: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128)
63632: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128)
63733: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128)
63834: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128)
63935: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128)
64036:
64137: pcmpgtq Vdq,Wdq (66),(VEX),(o128)
64238: pminsb Vdq,Wdq (66),(VEX),(o128)
64339: pminsd Vdq,Wdq (66),(VEX),(o128)
6443a: pminuw Vdq,Wdq (66),(VEX),(o128)
6453b: pminud Vdq,Wdq (66),(VEX),(o128)
6463c: pmaxsb Vdq,Wdq (66),(VEX),(o128)
6473d: pmaxsd Vdq,Wdq (66),(VEX),(o128)
6483e: pmaxuw Vdq,Wdq (66),(VEX),(o128)
6493f: pmaxud Vdq,Wdq (66),(VEX),(o128)
650# 0x0f 0x38 0x40-0x8f
65140: pmulld Vdq,Wdq (66),(VEX),(o128)
65241: phminposuw Vdq,Wdq (66),(VEX),(o128)
65380: INVEPT Gd/q,Mdq (66)
65481: INVPID Gd/q,Mdq (66)
655# 0x0f 0x38 0x90-0xbf (FMA)
65696: vfmaddsub132pd/ps /r (66),(VEX)
65797: vfmsubadd132pd/ps /r (66),(VEX)
65898: vfmadd132pd/ps /r (66),(VEX)
65999: vfmadd132sd/ss /r (66),(VEX),(o128)
6609a: vfmsub132pd/ps /r (66),(VEX)
6619b: vfmsub132sd/ss /r (66),(VEX),(o128)
6629c: vfnmadd132pd/ps /r (66),(VEX)
6639d: vfnmadd132sd/ss /r (66),(VEX),(o128)
6649e: vfnmsub132pd/ps /r (66),(VEX)
6659f: vfnmsub132sd/ss /r (66),(VEX),(o128)
666a6: vfmaddsub213pd/ps /r (66),(VEX)
667a7: vfmsubadd213pd/ps /r (66),(VEX)
668a8: vfmadd213pd/ps /r (66),(VEX)
669a9: vfmadd213sd/ss /r (66),(VEX),(o128)
670aa: vfmsub213pd/ps /r (66),(VEX)
671ab: vfmsub213sd/ss /r (66),(VEX),(o128)
672ac: vfnmadd213pd/ps /r (66),(VEX)
673ad: vfnmadd213sd/ss /r (66),(VEX),(o128)
674ae: vfnmsub213pd/ps /r (66),(VEX)
675af: vfnmsub213sd/ss /r (66),(VEX),(o128)
676b6: vfmaddsub231pd/ps /r (66),(VEX)
677b7: vfmsubadd231pd/ps /r (66),(VEX)
678b8: vfmadd231pd/ps /r (66),(VEX)
679b9: vfmadd231sd/ss /r (66),(VEX),(o128)
680ba: vfmsub231pd/ps /r (66),(VEX)
681bb: vfmsub231sd/ss /r (66),(VEX),(o128)
682bc: vfnmadd231pd/ps /r (66),(VEX)
683bd: vfnmadd231sd/ss /r (66),(VEX),(o128)
684be: vfnmsub231pd/ps /r (66),(VEX)
685bf: vfnmsub231sd/ss /r (66),(VEX),(o128)
686# 0x0f 0x38 0xc0-0xff
687db: aesimc Vdq,Wdq (66),(VEX),(o128)
688dc: aesenc Vdq,Wdq (66),(VEX),(o128)
689dd: aesenclast Vdq,Wdq (66),(VEX),(o128)
690de: aesdec Vdq,Wdq (66),(VEX),(o128)
691df: aesdeclast Vdq,Wdq (66),(VEX),(o128)
692f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
693f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
694EndTable
695
696Table: 3-byte opcode 2 (0x0f 0x3a)
697Referrer: 3-byte escape 2
698AVXcode: 3
699# 0x0f 0x3a 0x00-0xff
70004: vpermilps /r,Ib (66),(oVEX)
70105: vpermilpd /r,Ib (66),(oVEX)
70206: vperm2f128 /r,Ib (66),(oVEX),(o256)
70308: roundps Vdq,Wdq,Ib (66),(VEX)
70409: roundpd Vdq,Wdq,Ib (66),(VEX)
7050a: roundss Vss,Wss,Ib (66),(VEX),(o128)
7060b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128)
7070c: blendps Vdq,Wdq,Ib (66),(VEX)
7080d: blendpd Vdq,Wdq,Ib (66),(VEX)
7090e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128)
7100f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128)
71114: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128)
71215: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128)
71316: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128)
71417: extractps Ed,Vdq,Ib (66),(VEX),(o128)
71518: vinsertf128 /r,Ib (66),(oVEX),(o256)
71619: vextractf128 /r,Ib (66),(oVEX),(o256)
71720: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128)
71821: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128)
71922: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128)
72040: dpps Vdq,Wdq,Ib (66),(VEX)
72141: dppd Vdq,Wdq,Ib (66),(VEX),(o128)
72242: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128)
72344: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128)
7244a: vblendvps /r,Ib (66),(oVEX)
7254b: vblendvpd /r,Ib (66),(oVEX)
7264c: vpblendvb /r,Ib (66),(oVEX),(o128)
72760: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128)
72861: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128)
72962: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128)
73063: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128)
731df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128)
732EndTable
733
734GrpTable: Grp1
7350: ADD
7361: OR
7372: ADC
7383: SBB
7394: AND
7405: SUB
7416: XOR
7427: CMP
743EndTable
744
745GrpTable: Grp1A
7460: POP
747EndTable
748
749GrpTable: Grp2
7500: ROL
7511: ROR
7522: RCL
7533: RCR
7544: SHL/SAL
7555: SHR
7566:
7577: SAR
758EndTable
759
760GrpTable: Grp3_1
7610: TEST Eb,Ib
7621:
7632: NOT Eb
7643: NEG Eb
7654: MUL AL,Eb
7665: IMUL AL,Eb
7676: DIV AL,Eb
7687: IDIV AL,Eb
769EndTable
770
771GrpTable: Grp3_2
7720: TEST Ev,Iz
7731:
7742: NOT Ev
7753: NEG Ev
7764: MUL rAX,Ev
7775: IMUL rAX,Ev
7786: DIV rAX,Ev
7797: IDIV rAX,Ev
780EndTable
781
782GrpTable: Grp4
7830: INC Eb
7841: DEC Eb
785EndTable
786
787GrpTable: Grp5
7880: INC Ev
7891: DEC Ev
7902: CALLN Ev (f64)
7913: CALLF Ep
7924: JMPN Ev (f64)
7935: JMPF Ep
7946: PUSH Ev (d64)
7957:
796EndTable
797
798GrpTable: Grp6
7990: SLDT Rv/Mw
8001: STR Rv/Mw
8012: LLDT Ew
8023: LTR Ew
8034: VERR Ew
8045: VERW Ew
805EndTable
806
807GrpTable: Grp7
8080: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8091: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
8102: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
8113: LIDT Ms
8124: SMSW Mw/Rv
8135:
8146: LMSW Ew
8157: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
816EndTable
817
818GrpTable: Grp8
8194: BT
8205: BTS
8216: BTR
8227: BTC
823EndTable
824
825GrpTable: Grp9
8261: CMPXCHG8B/16B Mq/Mdq
8276: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
8287: VMPTRST Mq
829EndTable
830
831GrpTable: Grp10
832EndTable
833
834GrpTable: Grp11
8350: MOV
836EndTable
837
838GrpTable: Grp12
8392: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128)
8404: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128)
8416: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128)
842EndTable
843
844GrpTable: Grp13
8452: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128)
8464: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128)
8476: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128)
848EndTable
849
850GrpTable: Grp14
8512: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128)
8523: psrldq Udq,Ib (66),(11B),(VEX),(o128)
8536: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128)
8547: pslldq Udq,Ib (66),(11B),(VEX),(o128)
855EndTable
856
857GrpTable: Grp15
8580: fxsave
8591: fxstor
8602: ldmxcsr (VEX)
8613: stmxcsr (VEX)
8624: XSAVE
8635: XRSTOR | lfence (11B)
8646: mfence (11B)
8657: clflush | sfence (11B)
866EndTable
867
868GrpTable: Grp16
8690: prefetch NTA
8701: prefetch T0
8712: prefetch T1
8723: prefetch T2
873EndTable
874
875# AMD's Prefetch Group
876GrpTable: GrpP
8770: PREFETCH
8781: PREFETCHW
879EndTable
880
881GrpTable: GrpPDLK
8820: MONTMUL
8831: XSHA1
8842: XSHA2
885EndTable
886
887GrpTable: GrpRNG
8880: xstore-rng
8891: xcrypt-ecb
8902: xcrypt-cbc
8914: xcrypt-cfb
8925: xcrypt-ofb
893EndTable
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f4cee9028cf0..8f4e2ac93928 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
38 * Returns 0 if mmiotrace is disabled, or if the fault is not 38 * Returns 0 if mmiotrace is disabled, or if the fault is not
39 * handled by mmiotrace: 39 * handled by mmiotrace:
40 */ 40 */
41static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 41static inline int __kprobes
42kmmio_fault(struct pt_regs *regs, unsigned long addr)
42{ 43{
43 if (unlikely(is_kmmio_active())) 44 if (unlikely(is_kmmio_active()))
44 if (kmmio_handler(regs, addr) == 1) 45 if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
46 return 0; 47 return 0;
47} 48}
48 49
49static inline int notify_page_fault(struct pt_regs *regs) 50static inline int __kprobes notify_page_fault(struct pt_regs *regs)
50{ 51{
51 int ret = 0; 52 int ret = 0;
52 53
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void)
240 * 241 *
241 * Handle a fault on the vmalloc or module mapping area 242 * Handle a fault on the vmalloc or module mapping area
242 */ 243 */
243static noinline int vmalloc_fault(unsigned long address) 244static noinline __kprobes int vmalloc_fault(unsigned long address)
244{ 245{
245 unsigned long pgd_paddr; 246 unsigned long pgd_paddr;
246 pmd_t *pmd_k; 247 pmd_t *pmd_k;
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void)
357 * 358 *
358 * This assumes no large pages in there. 359 * This assumes no large pages in there.
359 */ 360 */
360static noinline int vmalloc_fault(unsigned long address) 361static noinline __kprobes int vmalloc_fault(unsigned long address)
361{ 362{
362 pgd_t *pgd, *pgd_ref; 363 pgd_t *pgd, *pgd_ref;
363 pud_t *pud, *pud_ref; 364 pud_t *pud, *pud_ref;
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
860 * There are no security implications to leaving a stale TLB when 861 * There are no security implications to leaving a stale TLB when
861 * increasing the permissions on a page. 862 * increasing the permissions on a page.
862 */ 863 */
863static noinline int 864static noinline __kprobes int
864spurious_fault(unsigned long error_code, unsigned long address) 865spurious_fault(unsigned long error_code, unsigned long address)
865{ 866{
866 pgd_t *pgd; 867 pgd_t *pgd;
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
new file mode 100644
index 000000000000..4688f90ce5a2
--- /dev/null
+++ b/arch/x86/tools/Makefile
@@ -0,0 +1,22 @@
1PHONY += posttest
2
3ifeq ($(KBUILD_VERBOSE),1)
4 postest_verbose = -v
5else
6 postest_verbose =
7endif
8
9quiet_cmd_posttest = TEST $@
10 cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len -$(CONFIG_64BIT) $(posttest_verbose)
11
12posttest: $(obj)/test_get_len vmlinux
13 $(call cmd,posttest)
14
15hostprogs-y := test_get_len
16
17# -I needed for generated C source and C source which in the kernel tree.
18HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
19
20# Dependencies are also needed.
21$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
22
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk
new file mode 100644
index 000000000000..c13c0ee48ab4
--- /dev/null
+++ b/arch/x86/tools/distill.awk
@@ -0,0 +1,47 @@
1#!/bin/awk -f
2# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
3# Distills the disassembly as follows:
4# - Removes all lines except the disassembled instructions.
5# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes
6# into a single line.
7# - Remove bad(or prefix only) instructions
8
9BEGIN {
10 prev_addr = ""
11 prev_hex = ""
12 prev_mnemonic = ""
13 bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))"
14 fwait_expr = "^9b "
15 fwait_str="9b\tfwait"
16}
17
18/^ *[0-9a-f]+ <[^>]*>:/ {
19 # Symbol entry
20 printf("%s%s\n", $2, $1)
21}
22
23/^ *[0-9a-f]+:/ {
24 if (split($0, field, "\t") < 3) {
25 # This is a continuation of the same insn.
26 prev_hex = prev_hex field[2]
27 } else {
28 # Skip bad instructions
29 if (match(prev_mnemonic, bad_expr))
30 prev_addr = ""
31 # Split fwait from other f* instructions
32 if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") {
33 printf "%s\t%s\n", prev_addr, fwait_str
34 sub(fwait_expr, "", prev_hex)
35 }
36 if (prev_addr != "")
37 printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
38 prev_addr = field[1]
39 prev_hex = field[2]
40 prev_mnemonic = field[3]
41 }
42}
43
44END {
45 if (prev_addr != "")
46 printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
47}
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
new file mode 100644
index 000000000000..e34e92a28eb6
--- /dev/null
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -0,0 +1,380 @@
1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7# Awk implementation sanity check
8function check_awk_implement() {
9 if (!match("abc", "[[:lower:]]+"))
10 return "Your awk doesn't support charactor-class."
11 if (sprintf("%x", 0) != "0")
12 return "Your awk has a printf-format problem."
13 return ""
14}
15
16# Clear working vars
17function clear_vars() {
18 delete table
19 delete lptable2
20 delete lptable1
21 delete lptable3
22 eid = -1 # escape id
23 gid = -1 # group id
24 aid = -1 # AVX id
25 tname = ""
26}
27
28BEGIN {
29 # Implementation error checking
30 awkchecked = check_awk_implement()
31 if (awkchecked != "") {
32 print "Error: " awkchecked > "/dev/stderr"
33 print "Please try to use gawk." > "/dev/stderr"
34 exit 1
35 }
36
37 # Setup generating tables
38 print "/* x86 opcode map generated from x86-opcode-map.txt */"
39 print "/* Do not change this code. */\n"
40 ggid = 1
41 geid = 1
42 gaid = 0
43 delete etable
44 delete gtable
45 delete atable
46
47 opnd_expr = "^[[:alpha:]/]"
48 ext_expr = "^\\("
49 sep_expr = "^\\|$"
50 group_expr = "^Grp[[:alnum:]]+"
51
52 imm_expr = "^[IJAO][[:lower:]]"
53 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
54 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
55 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
56 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
57 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
58 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
59 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
60 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
61 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
62 imm_flag["Ob"] = "INAT_MOFFSET"
63 imm_flag["Ov"] = "INAT_MOFFSET"
64
65 modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])"
66 force64_expr = "\\([df]64\\)"
67 rex_expr = "^REX(\\.[XRWB]+)*"
68 fpu_expr = "^ESC" # TODO
69
70 lprefix1_expr = "\\(66\\)"
71 lprefix2_expr = "\\(F3\\)"
72 lprefix3_expr = "\\(F2\\)"
73 max_lprefix = 4
74
75 vexok_expr = "\\(VEX\\)"
76 vexonly_expr = "\\(oVEX\\)"
77
78 prefix_expr = "\\(Prefix\\)"
79 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
80 prefix_num["REPNE"] = "INAT_PFX_REPNE"
81 prefix_num["REP/REPE"] = "INAT_PFX_REPE"
82 prefix_num["LOCK"] = "INAT_PFX_LOCK"
83 prefix_num["SEG=CS"] = "INAT_PFX_CS"
84 prefix_num["SEG=DS"] = "INAT_PFX_DS"
85 prefix_num["SEG=ES"] = "INAT_PFX_ES"
86 prefix_num["SEG=FS"] = "INAT_PFX_FS"
87 prefix_num["SEG=GS"] = "INAT_PFX_GS"
88 prefix_num["SEG=SS"] = "INAT_PFX_SS"
89 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
90 prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
91 prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
92
93 clear_vars()
94}
95
96function semantic_error(msg) {
97 print "Semantic error at " NR ": " msg > "/dev/stderr"
98 exit 1
99}
100
101function debug(msg) {
102 print "DEBUG: " msg
103}
104
105function array_size(arr, i,c) {
106 c = 0
107 for (i in arr)
108 c++
109 return c
110}
111
112/^Table:/ {
113 print "/* " $0 " */"
114 if (tname != "")
115 semantic_error("Hit Table: before EndTable:.");
116}
117
118/^Referrer:/ {
119 if (NF != 1) {
120 # escape opcode table
121 ref = ""
122 for (i = 2; i <= NF; i++)
123 ref = ref $i
124 eid = escape[ref]
125 tname = sprintf("inat_escape_table_%d", eid)
126 }
127}
128
129/^AVXcode:/ {
130 if (NF != 1) {
131 # AVX/escape opcode table
132 aid = $2
133 if (gaid <= aid)
134 gaid = aid + 1
135 if (tname == "") # AVX only opcode table
136 tname = sprintf("inat_avx_table_%d", $2)
137 }
138 if (aid == -1 && eid == -1) # primary opcode table
139 tname = "inat_primary_table"
140}
141
142/^GrpTable:/ {
143 print "/* " $0 " */"
144 if (!($2 in group))
145 semantic_error("No group: " $2 )
146 gid = group[$2]
147 tname = "inat_group_table_" gid
148}
149
150function print_table(tbl,name,fmt,n)
151{
152 print "const insn_attr_t " name " = {"
153 for (i = 0; i < n; i++) {
154 id = sprintf(fmt, i)
155 if (tbl[id])
156 print " [" id "] = " tbl[id] ","
157 }
158 print "};"
159}
160
161/^EndTable/ {
162 if (gid != -1) {
163 # print group tables
164 if (array_size(table) != 0) {
165 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
166 "0x%x", 8)
167 gtable[gid,0] = tname
168 }
169 if (array_size(lptable1) != 0) {
170 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
171 "0x%x", 8)
172 gtable[gid,1] = tname "_1"
173 }
174 if (array_size(lptable2) != 0) {
175 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
176 "0x%x", 8)
177 gtable[gid,2] = tname "_2"
178 }
179 if (array_size(lptable3) != 0) {
180 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
181 "0x%x", 8)
182 gtable[gid,3] = tname "_3"
183 }
184 } else {
185 # print primary/escaped tables
186 if (array_size(table) != 0) {
187 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
188 "0x%02x", 256)
189 etable[eid,0] = tname
190 if (aid >= 0)
191 atable[aid,0] = tname
192 }
193 if (array_size(lptable1) != 0) {
194 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
195 "0x%02x", 256)
196 etable[eid,1] = tname "_1"
197 if (aid >= 0)
198 atable[aid,1] = tname "_1"
199 }
200 if (array_size(lptable2) != 0) {
201 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
202 "0x%02x", 256)
203 etable[eid,2] = tname "_2"
204 if (aid >= 0)
205 atable[aid,2] = tname "_2"
206 }
207 if (array_size(lptable3) != 0) {
208 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
209 "0x%02x", 256)
210 etable[eid,3] = tname "_3"
211 if (aid >= 0)
212 atable[aid,3] = tname "_3"
213 }
214 }
215 print ""
216 clear_vars()
217}
218
219function add_flags(old,new) {
220 if (old && new)
221 return old " | " new
222 else if (old)
223 return old
224 else
225 return new
226}
227
228# convert operands to flags.
229function convert_operands(opnd, i,imm,mod)
230{
231 imm = null
232 mod = null
233 for (i in opnd) {
234 i = opnd[i]
235 if (match(i, imm_expr) == 1) {
236 if (!imm_flag[i])
237 semantic_error("Unknown imm opnd: " i)
238 if (imm) {
239 if (i != "Ib")
240 semantic_error("Second IMM error")
241 imm = add_flags(imm, "INAT_SCNDIMM")
242 } else
243 imm = imm_flag[i]
244 } else if (match(i, modrm_expr))
245 mod = "INAT_MODRM"
246 }
247 return add_flags(imm, mod)
248}
249
250/^[0-9a-f]+\:/ {
251 if (NR == 1)
252 next
253 # get index
254 idx = "0x" substr($1, 1, index($1,":") - 1)
255 if (idx in table)
256 semantic_error("Redefine " idx " in " tname)
257
258 # check if escaped opcode
259 if ("escape" == $2) {
260 if ($3 != "#")
261 semantic_error("No escaped name")
262 ref = ""
263 for (i = 4; i <= NF; i++)
264 ref = ref $i
265 if (ref in escape)
266 semantic_error("Redefine escape (" ref ")")
267 escape[ref] = geid
268 geid++
269 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
270 next
271 }
272
273 variant = null
274 # converts
275 i = 2
276 while (i <= NF) {
277 opcode = $(i++)
278 delete opnds
279 ext = null
280 flags = null
281 opnd = null
282 # parse one opcode
283 if (match($i, opnd_expr)) {
284 opnd = $i
285 split($(i++), opnds, ",")
286 flags = convert_operands(opnds)
287 }
288 if (match($i, ext_expr))
289 ext = $(i++)
290 if (match($i, sep_expr))
291 i++
292 else if (i < NF)
293 semantic_error($i " is not a separator")
294
295 # check if group opcode
296 if (match(opcode, group_expr)) {
297 if (!(opcode in group)) {
298 group[opcode] = ggid
299 ggid++
300 }
301 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
302 }
303 # check force(or default) 64bit
304 if (match(ext, force64_expr))
305 flags = add_flags(flags, "INAT_FORCE64")
306
307 # check REX prefix
308 if (match(opcode, rex_expr))
309 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
310
311 # check coprocessor escape : TODO
312 if (match(opcode, fpu_expr))
313 flags = add_flags(flags, "INAT_MODRM")
314
315 # check VEX only code
316 if (match(ext, vexonly_expr))
317 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
318
319 # check VEX only code
320 if (match(ext, vexok_expr))
321 flags = add_flags(flags, "INAT_VEXOK")
322
323 # check prefixes
324 if (match(ext, prefix_expr)) {
325 if (!prefix_num[opcode])
326 semantic_error("Unknown prefix: " opcode)
327 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
328 }
329 if (length(flags) == 0)
330 continue
331 # check if last prefix
332 if (match(ext, lprefix1_expr)) {
333 lptable1[idx] = add_flags(lptable1[idx],flags)
334 variant = "INAT_VARIANT"
335 } else if (match(ext, lprefix2_expr)) {
336 lptable2[idx] = add_flags(lptable2[idx],flags)
337 variant = "INAT_VARIANT"
338 } else if (match(ext, lprefix3_expr)) {
339 lptable3[idx] = add_flags(lptable3[idx],flags)
340 variant = "INAT_VARIANT"
341 } else {
342 table[idx] = add_flags(table[idx],flags)
343 }
344 }
345 if (variant)
346 table[idx] = add_flags(table[idx],variant)
347}
348
349END {
350 if (awkchecked != "")
351 exit 1
352 # print escape opcode map's array
353 print "/* Escape opcode map array */"
354 print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
355 "[INAT_LSTPFX_MAX + 1] = {"
356 for (i = 0; i < geid; i++)
357 for (j = 0; j < max_lprefix; j++)
358 if (etable[i,j])
359 print " ["i"]["j"] = "etable[i,j]","
360 print "};\n"
361 # print group opcode map's array
362 print "/* Group opcode map array */"
363 print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
364 "[INAT_LSTPFX_MAX + 1] = {"
365 for (i = 0; i < ggid; i++)
366 for (j = 0; j < max_lprefix; j++)
367 if (gtable[i,j])
368 print " ["i"]["j"] = "gtable[i,j]","
369 print "};\n"
370 # print AVX opcode map's array
371 print "/* AVX opcode map array */"
372 print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
373 "[INAT_LSTPFX_MAX + 1] = {"
374 for (i = 0; i < gaid; i++)
375 for (j = 0; j < max_lprefix; j++)
376 if (atable[i,j])
377 print " ["i"]["j"] = "atable[i,j]","
378 print "};"
379}
380
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
new file mode 100644
index 000000000000..af75e07217ba
--- /dev/null
+++ b/arch/x86/tools/test_get_len.c
@@ -0,0 +1,168 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2009
17 */
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <assert.h>
23#include <unistd.h>
24
25#define unlikely(cond) (cond)
26
27#include <asm/insn.h>
28#include <inat.c>
29#include <insn.c>
30
31/*
32 * Test of instruction analysis in general and insn_get_length() in
33 * particular. See if insn_get_length() and the disassembler agree
34 * on the length of each instruction in an elf disassembly.
35 *
36 * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
37 */
38
39const char *prog;
40static int verbose;
41static int x86_64;
42
43static void usage(void)
44{
45 fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
46 " %s [-y|-n] [-v] \n", prog);
47 fprintf(stderr, "\t-y 64bit mode\n");
48 fprintf(stderr, "\t-n 32bit mode\n");
49 fprintf(stderr, "\t-v verbose mode\n");
50 exit(1);
51}
52
53static void malformed_line(const char *line, int line_nr)
54{
55 fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line);
56 exit(3);
57}
58
59static void dump_field(FILE *fp, const char *name, const char *indent,
60 struct insn_field *field)
61{
62 fprintf(fp, "%s.%s = {\n", indent, name);
63 fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
64 indent, field->value, field->bytes[0], field->bytes[1],
65 field->bytes[2], field->bytes[3]);
66 fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
67 field->got, field->nbytes);
68}
69
70static void dump_insn(FILE *fp, struct insn *insn)
71{
72 fprintf(fp, "Instruction = { \n");
73 dump_field(fp, "prefixes", "\t", &insn->prefixes);
74 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
75 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
76 dump_field(fp, "opcode", "\t", &insn->opcode);
77 dump_field(fp, "modrm", "\t", &insn->modrm);
78 dump_field(fp, "sib", "\t", &insn->sib);
79 dump_field(fp, "displacement", "\t", &insn->displacement);
80 dump_field(fp, "immediate1", "\t", &insn->immediate1);
81 dump_field(fp, "immediate2", "\t", &insn->immediate2);
82 fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
83 insn->attr, insn->opnd_bytes, insn->addr_bytes);
84 fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
85 insn->length, insn->x86_64, insn->kaddr);
86}
87
88static void parse_args(int argc, char **argv)
89{
90 int c;
91 prog = argv[0];
92 while ((c = getopt(argc, argv, "ynv")) != -1) {
93 switch (c) {
94 case 'y':
95 x86_64 = 1;
96 break;
97 case 'n':
98 x86_64 = 0;
99 break;
100 case 'v':
101 verbose = 1;
102 break;
103 default:
104 usage();
105 }
106 }
107}
108
109#define BUFSIZE 256
110
111int main(int argc, char **argv)
112{
113 char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
114 unsigned char insn_buf[16];
115 struct insn insn;
116 int insns = 0, c;
117
118 parse_args(argc, argv);
119
120 while (fgets(line, BUFSIZE, stdin)) {
121 char copy[BUFSIZE], *s, *tab1, *tab2;
122 int nb = 0;
123 unsigned int b;
124
125 if (line[0] == '<') {
126 /* Symbol line */
127 strcpy(sym, line);
128 continue;
129 }
130
131 insns++;
132 memset(insn_buf, 0, 16);
133 strcpy(copy, line);
134 tab1 = strchr(copy, '\t');
135 if (!tab1)
136 malformed_line(line, insns);
137 s = tab1 + 1;
138 s += strspn(s, " ");
139 tab2 = strchr(s, '\t');
140 if (!tab2)
141 malformed_line(line, insns);
142 *tab2 = '\0'; /* Characters beyond tab2 aren't examined */
143 while (s < tab2) {
144 if (sscanf(s, "%x", &b) == 1) {
145 insn_buf[nb++] = (unsigned char) b;
146 s += 3;
147 } else
148 break;
149 }
150 /* Decode an instruction */
151 insn_init(&insn, insn_buf, x86_64);
152 insn_get_length(&insn);
153 if (insn.length != nb) {
154 fprintf(stderr, "Error: %s found a difference at %s\n",
155 prog, sym);
156 fprintf(stderr, "Error: %s", line);
157 fprintf(stderr, "Error: objdump says %d bytes, but "
158 "insn_get_length() says %d\n", nb,
159 insn.length);
160 if (verbose)
161 dump_insn(stderr, &insn);
162 exit(2);
163 }
164 }
165 fprintf(stderr, "Succeed: decoded and checked %d instructions\n",
166 insns);
167 return 0;
168}
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d11770472bc8..43360c1d8f70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -117,12 +117,12 @@ struct ftrace_event_call {
117 struct dentry *dir; 117 struct dentry *dir;
118 struct trace_event *event; 118 struct trace_event *event;
119 int enabled; 119 int enabled;
120 int (*regfunc)(void *); 120 int (*regfunc)(struct ftrace_event_call *);
121 void (*unregfunc)(void *); 121 void (*unregfunc)(struct ftrace_event_call *);
122 int id; 122 int id;
123 int (*raw_init)(void); 123 int (*raw_init)(struct ftrace_event_call *);
124 int (*show_format)(struct ftrace_event_call *call, 124 int (*show_format)(struct ftrace_event_call *,
125 struct trace_seq *s); 125 struct trace_seq *);
126 int (*define_fields)(struct ftrace_event_call *); 126 int (*define_fields)(struct ftrace_event_call *);
127 struct list_head fields; 127 struct list_head fields;
128 int filter_active; 128 int filter_active;
@@ -131,14 +131,19 @@ struct ftrace_event_call {
131 void *data; 131 void *data;
132 132
133 atomic_t profile_count; 133 atomic_t profile_count;
134 int (*profile_enable)(void); 134 int (*profile_enable)(struct ftrace_event_call *);
135 void (*profile_disable)(void); 135 void (*profile_disable)(struct ftrace_event_call *);
136}; 136};
137 137
138#define FTRACE_MAX_PROFILE_SIZE 2048 138#define FTRACE_MAX_PROFILE_SIZE 2048
139 139
140extern char *trace_profile_buf; 140struct perf_trace_buf {
141extern char *trace_profile_buf_nmi; 141 char buf[FTRACE_MAX_PROFILE_SIZE];
142 int recursion;
143};
144
145extern struct perf_trace_buf *perf_trace_buf;
146extern struct perf_trace_buf *perf_trace_buf_nmi;
142 147
143#define MAX_FILTER_PRED 32 148#define MAX_FILTER_PRED 32
144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 149#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
@@ -157,11 +162,12 @@ enum {
157 FILTER_PTR_STRING, 162 FILTER_PTR_STRING,
158}; 163};
159 164
160extern int trace_define_field(struct ftrace_event_call *call,
161 const char *type, const char *name,
162 int offset, int size, int is_signed,
163 int filter_type);
164extern int trace_define_common_fields(struct ftrace_event_call *call); 165extern int trace_define_common_fields(struct ftrace_event_call *call);
166extern int trace_define_field(struct ftrace_event_call *call, const char *type,
167 const char *name, int offset, int size,
168 int is_signed, int filter_type);
169extern int trace_add_event_call(struct ftrace_event_call *call);
170extern void trace_remove_event_call(struct ftrace_event_call *call);
165 171
166#define is_signed_type(type) (((type)(-1)) < 0) 172#define is_signed_type(type) (((type)(-1)) < 0)
167 173
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 3a46b7b7abb2..1b672f74a32f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
296int disable_kprobe(struct kprobe *kp); 296int disable_kprobe(struct kprobe *kp);
297int enable_kprobe(struct kprobe *kp); 297int enable_kprobe(struct kprobe *kp);
298 298
299void dump_kprobe(struct kprobe *kp);
300
299#else /* !CONFIG_KPROBES: */ 301#else /* !CONFIG_KPROBES: */
300 302
301static inline int kprobes_built_in(void) 303static inline int kprobes_built_in(void)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace1a838..b50974a93af0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -100,23 +100,23 @@ struct perf_event_attr;
100 100
101#ifdef CONFIG_EVENT_PROFILE 101#ifdef CONFIG_EVENT_PROFILE
102#define TRACE_SYS_ENTER_PROFILE(sname) \ 102#define TRACE_SYS_ENTER_PROFILE(sname) \
103static int prof_sysenter_enable_##sname(void) \ 103static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \
104{ \ 104{ \
105 return reg_prof_syscall_enter("sys"#sname); \ 105 return reg_prof_syscall_enter("sys"#sname); \
106} \ 106} \
107 \ 107 \
108static void prof_sysenter_disable_##sname(void) \ 108static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \
109{ \ 109{ \
110 unreg_prof_syscall_enter("sys"#sname); \ 110 unreg_prof_syscall_enter("sys"#sname); \
111} 111}
112 112
113#define TRACE_SYS_EXIT_PROFILE(sname) \ 113#define TRACE_SYS_EXIT_PROFILE(sname) \
114static int prof_sysexit_enable_##sname(void) \ 114static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \
115{ \ 115{ \
116 return reg_prof_syscall_exit("sys"#sname); \ 116 return reg_prof_syscall_exit("sys"#sname); \
117} \ 117} \
118 \ 118 \
119static void prof_sysexit_disable_##sname(void) \ 119static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \
120{ \ 120{ \
121 unreg_prof_syscall_exit("sys"#sname); \ 121 unreg_prof_syscall_exit("sys"#sname); \
122} 122}
@@ -157,7 +157,7 @@ static void prof_sysexit_disable_##sname(void) \
157 struct trace_event enter_syscall_print_##sname = { \ 157 struct trace_event enter_syscall_print_##sname = { \
158 .trace = print_syscall_enter, \ 158 .trace = print_syscall_enter, \
159 }; \ 159 }; \
160 static int init_enter_##sname(void) \ 160 static int init_enter_##sname(struct ftrace_event_call *call) \
161 { \ 161 { \
162 int num, id; \ 162 int num, id; \
163 num = syscall_name_to_nr("sys"#sname); \ 163 num = syscall_name_to_nr("sys"#sname); \
@@ -193,7 +193,7 @@ static void prof_sysexit_disable_##sname(void) \
193 struct trace_event exit_syscall_print_##sname = { \ 193 struct trace_event exit_syscall_print_##sname = { \
194 .trace = print_syscall_exit, \ 194 .trace = print_syscall_exit, \
195 }; \ 195 }; \
196 static int init_exit_##sname(void) \ 196 static int init_exit_##sname(struct ftrace_event_call *call) \
197 { \ 197 { \
198 int num, id; \ 198 int num, id; \
199 num = syscall_name_to_nr("sys"#sname); \ 199 num = syscall_name_to_nr("sys"#sname); \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c9bbcab95fbe..4945d1c99864 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -402,12 +402,12 @@ static inline int ftrace_get_offsets_##call( \
402 \ 402 \
403static void ftrace_profile_##call(proto); \ 403static void ftrace_profile_##call(proto); \
404 \ 404 \
405static int ftrace_profile_enable_##call(void) \ 405static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\
406{ \ 406{ \
407 return register_trace_##call(ftrace_profile_##call); \ 407 return register_trace_##call(ftrace_profile_##call); \
408} \ 408} \
409 \ 409 \
410static void ftrace_profile_disable_##call(void) \ 410static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\
411{ \ 411{ \
412 unregister_trace_##call(ftrace_profile_##call); \ 412 unregister_trace_##call(ftrace_profile_##call); \
413} 413}
@@ -426,7 +426,7 @@ static void ftrace_profile_disable_##call(void) \
426 * event_trace_printk(_RET_IP_, "<call>: " <fmt>); 426 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
427 * } 427 * }
428 * 428 *
429 * static int ftrace_reg_event_<call>(void) 429 * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
430 * { 430 * {
431 * int ret; 431 * int ret;
432 * 432 *
@@ -437,7 +437,7 @@ static void ftrace_profile_disable_##call(void) \
437 * return ret; 437 * return ret;
438 * } 438 * }
439 * 439 *
440 * static void ftrace_unreg_event_<call>(void) 440 * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
441 * { 441 * {
442 * unregister_trace_<call>(ftrace_event_<call>); 442 * unregister_trace_<call>(ftrace_event_<call>);
443 * } 443 * }
@@ -472,7 +472,7 @@ static void ftrace_profile_disable_##call(void) \
472 * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); 472 * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
473 * } 473 * }
474 * 474 *
475 * static int ftrace_raw_reg_event_<call>(void) 475 * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
476 * { 476 * {
477 * int ret; 477 * int ret;
478 * 478 *
@@ -483,7 +483,7 @@ static void ftrace_profile_disable_##call(void) \
483 * return ret; 483 * return ret;
484 * } 484 * }
485 * 485 *
486 * static void ftrace_unreg_event_<call>(void) 486 * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
487 * { 487 * {
488 * unregister_trace_<call>(ftrace_raw_event_<call>); 488 * unregister_trace_<call>(ftrace_raw_event_<call>);
489 * } 489 * }
@@ -492,7 +492,7 @@ static void ftrace_profile_disable_##call(void) \
492 * .trace = ftrace_raw_output_<call>, <-- stage 2 492 * .trace = ftrace_raw_output_<call>, <-- stage 2
493 * }; 493 * };
494 * 494 *
495 * static int ftrace_raw_init_event_<call>(void) 495 * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
496 * { 496 * {
497 * int id; 497 * int id;
498 * 498 *
@@ -589,7 +589,7 @@ static void ftrace_raw_event_##call(proto) \
589 event, irq_flags, pc); \ 589 event, irq_flags, pc); \
590} \ 590} \
591 \ 591 \
592static int ftrace_raw_reg_event_##call(void *ptr) \ 592static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
593{ \ 593{ \
594 int ret; \ 594 int ret; \
595 \ 595 \
@@ -600,7 +600,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \
600 return ret; \ 600 return ret; \
601} \ 601} \
602 \ 602 \
603static void ftrace_raw_unreg_event_##call(void *ptr) \ 603static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
604{ \ 604{ \
605 unregister_trace_##call(ftrace_raw_event_##call); \ 605 unregister_trace_##call(ftrace_raw_event_##call); \
606} \ 606} \
@@ -609,7 +609,7 @@ static struct trace_event ftrace_event_type_##call = { \
609 .trace = ftrace_raw_output_##call, \ 609 .trace = ftrace_raw_output_##call, \
610}; \ 610}; \
611 \ 611 \
612static int ftrace_raw_init_event_##call(void) \ 612static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
613{ \ 613{ \
614 int id; \ 614 int id; \
615 \ 615 \
@@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
649 * struct ftrace_event_call *event_call = &event_<call>; 649 * struct ftrace_event_call *event_call = &event_<call>;
650 * extern void perf_tp_event(int, u64, u64, void *, int); 650 * extern void perf_tp_event(int, u64, u64, void *, int);
651 * struct ftrace_raw_##call *entry; 651 * struct ftrace_raw_##call *entry;
652 * struct perf_trace_buf *trace_buf;
652 * u64 __addr = 0, __count = 1; 653 * u64 __addr = 0, __count = 1;
653 * unsigned long irq_flags; 654 * unsigned long irq_flags;
654 * struct trace_entry *ent; 655 * struct trace_entry *ent;
@@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
673 * __cpu = smp_processor_id(); 674 * __cpu = smp_processor_id();
674 * 675 *
675 * if (in_nmi()) 676 * if (in_nmi())
676 * raw_data = rcu_dereference(trace_profile_buf_nmi); 677 * trace_buf = rcu_dereference(perf_trace_buf_nmi);
677 * else 678 * else
678 * raw_data = rcu_dereference(trace_profile_buf); 679 * trace_buf = rcu_dereference(perf_trace_buf);
679 * 680 *
680 * if (!raw_data) 681 * if (!trace_buf)
681 * goto end; 682 * goto end;
682 * 683 *
683 * raw_data = per_cpu_ptr(raw_data, __cpu); 684 * trace_buf = per_cpu_ptr(trace_buf, __cpu);
685 *
686 * // Avoid recursion from perf that could mess up the buffer
687 * if (trace_buf->recursion++)
688 * goto end_recursion;
689 *
690 * raw_data = trace_buf->buf;
691 *
692 * // Make recursion update visible before entering perf_tp_event
693 * // so that we protect from perf recursions.
694 *
695 * barrier();
684 * 696 *
685 * //zero dead bytes from alignment to avoid stack leak to userspace: 697 * //zero dead bytes from alignment to avoid stack leak to userspace:
686 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; 698 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \
713{ \ 725{ \
714 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 726 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
715 struct ftrace_event_call *event_call = &event_##call; \ 727 struct ftrace_event_call *event_call = &event_##call; \
716 extern void perf_tp_event(int, u64, u64, void *, int); \ 728 extern void perf_tp_event(int, u64, u64, void *, int); \
717 struct ftrace_raw_##call *entry; \ 729 struct ftrace_raw_##call *entry; \
730 struct perf_trace_buf *trace_buf; \
718 u64 __addr = 0, __count = 1; \ 731 u64 __addr = 0, __count = 1; \
719 unsigned long irq_flags; \ 732 unsigned long irq_flags; \
720 struct trace_entry *ent; \ 733 struct trace_entry *ent; \
@@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \
739 __cpu = smp_processor_id(); \ 752 __cpu = smp_processor_id(); \
740 \ 753 \
741 if (in_nmi()) \ 754 if (in_nmi()) \
742 raw_data = rcu_dereference(trace_profile_buf_nmi); \ 755 trace_buf = rcu_dereference(perf_trace_buf_nmi); \
743 else \ 756 else \
744 raw_data = rcu_dereference(trace_profile_buf); \ 757 trace_buf = rcu_dereference(perf_trace_buf); \
745 \ 758 \
746 if (!raw_data) \ 759 if (!trace_buf) \
747 goto end; \ 760 goto end; \
748 \ 761 \
749 raw_data = per_cpu_ptr(raw_data, __cpu); \ 762 trace_buf = per_cpu_ptr(trace_buf, __cpu); \
763 if (trace_buf->recursion++) \
764 goto end_recursion; \
765 \
766 barrier(); \
767 \
768 raw_data = trace_buf->buf; \
750 \ 769 \
751 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ 770 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
752 entry = (struct ftrace_raw_##call *)raw_data; \ 771 entry = (struct ftrace_raw_##call *)raw_data; \
@@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \
761 perf_tp_event(event_call->id, __addr, __count, entry, \ 780 perf_tp_event(event_call->id, __addr, __count, entry, \
762 __entry_size); \ 781 __entry_size); \
763 \ 782 \
783end_recursion: \
784 trace_buf->recursion--; \
764end: \ 785end: \
765 local_irq_restore(irq_flags); \ 786 local_irq_restore(irq_flags); \
766 \ 787 \
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index e972f0a40f8d..51ee17d3632a 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id);
39void set_syscall_exit_id(int num, int id); 39void set_syscall_exit_id(int num, int id);
40extern struct trace_event event_syscall_enter; 40extern struct trace_event event_syscall_enter;
41extern struct trace_event event_syscall_exit; 41extern struct trace_event event_syscall_exit;
42extern int reg_event_syscall_enter(void *ptr); 42
43extern void unreg_event_syscall_enter(void *ptr);
44extern int reg_event_syscall_exit(void *ptr);
45extern void unreg_event_syscall_exit(void *ptr);
46extern int syscall_enter_format(struct ftrace_event_call *call, 43extern int syscall_enter_format(struct ftrace_event_call *call,
47 struct trace_seq *s); 44 struct trace_seq *s);
48extern int syscall_exit_format(struct ftrace_event_call *call, 45extern int syscall_exit_format(struct ftrace_event_call *call,
49 struct trace_seq *s); 46 struct trace_seq *s);
50extern int syscall_enter_define_fields(struct ftrace_event_call *call); 47extern int syscall_enter_define_fields(struct ftrace_event_call *call);
51extern int syscall_exit_define_fields(struct ftrace_event_call *call); 48extern int syscall_exit_define_fields(struct ftrace_event_call *call);
49extern int reg_event_syscall_enter(struct ftrace_event_call *call);
50extern void unreg_event_syscall_enter(struct ftrace_event_call *call);
51extern int reg_event_syscall_exit(struct ftrace_event_call *call);
52extern void unreg_event_syscall_exit(struct ftrace_event_call *call);
53extern int
54ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
52enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); 55enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
53enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); 56enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
54#endif 57#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5240d75f4c60..84495958e703 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -90,6 +90,9 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
90 */ 90 */
91static struct kprobe_blackpoint kprobe_blacklist[] = { 91static struct kprobe_blackpoint kprobe_blacklist[] = {
92 {"preempt_schedule",}, 92 {"preempt_schedule",},
93 {"native_get_debugreg",},
94 {"irq_entries_start",},
95 {"common_interrupt",},
93 {NULL} /* Terminator */ 96 {NULL} /* Terminator */
94}; 97};
95 98
@@ -673,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
673 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 676 return (kprobe_opcode_t *)(((char *)addr) + p->offset);
674} 677}
675 678
679/* Check passed kprobe is valid and return kprobe in kprobe_table. */
680static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
681{
682 struct kprobe *old_p, *list_p;
683
684 old_p = get_kprobe(p->addr);
685 if (unlikely(!old_p))
686 return NULL;
687
688 if (p != old_p) {
689 list_for_each_entry_rcu(list_p, &old_p->list, list)
690 if (list_p == p)
691 /* kprobe p is a valid probe */
692 goto valid;
693 return NULL;
694 }
695valid:
696 return old_p;
697}
698
699/* Return error if the kprobe is being re-registered */
700static inline int check_kprobe_rereg(struct kprobe *p)
701{
702 int ret = 0;
703 struct kprobe *old_p;
704
705 mutex_lock(&kprobe_mutex);
706 old_p = __get_valid_kprobe(p);
707 if (old_p)
708 ret = -EINVAL;
709 mutex_unlock(&kprobe_mutex);
710 return ret;
711}
712
676int __kprobes register_kprobe(struct kprobe *p) 713int __kprobes register_kprobe(struct kprobe *p)
677{ 714{
678 int ret = 0; 715 int ret = 0;
@@ -685,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p)
685 return -EINVAL; 722 return -EINVAL;
686 p->addr = addr; 723 p->addr = addr;
687 724
725 ret = check_kprobe_rereg(p);
726 if (ret)
727 return ret;
728
688 preempt_disable(); 729 preempt_disable();
689 if (!kernel_text_address((unsigned long) p->addr) || 730 if (!kernel_text_address((unsigned long) p->addr) ||
690 in_kprobes_functions((unsigned long) p->addr)) { 731 in_kprobes_functions((unsigned long) p->addr)) {
@@ -754,26 +795,6 @@ out:
754} 795}
755EXPORT_SYMBOL_GPL(register_kprobe); 796EXPORT_SYMBOL_GPL(register_kprobe);
756 797
757/* Check passed kprobe is valid and return kprobe in kprobe_table. */
758static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
759{
760 struct kprobe *old_p, *list_p;
761
762 old_p = get_kprobe(p->addr);
763 if (unlikely(!old_p))
764 return NULL;
765
766 if (p != old_p) {
767 list_for_each_entry_rcu(list_p, &old_p->list, list)
768 if (list_p == p)
769 /* kprobe p is a valid probe */
770 goto valid;
771 return NULL;
772 }
773valid:
774 return old_p;
775}
776
777/* 798/*
778 * Unregister a kprobe without a scheduler synchronization. 799 * Unregister a kprobe without a scheduler synchronization.
779 */ 800 */
@@ -1141,6 +1162,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1141 arch_remove_kprobe(p); 1162 arch_remove_kprobe(p);
1142} 1163}
1143 1164
1165void __kprobes dump_kprobe(struct kprobe *kp)
1166{
1167 printk(KERN_WARNING "Dumping kprobe:\n");
1168 printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
1169 kp->symbol_name, kp->addr, kp->offset);
1170}
1171
1144/* Module notifier call back, checking kprobes on the module */ 1172/* Module notifier call back, checking kprobes on the module */
1145static int __kprobes kprobes_module_callback(struct notifier_block *nb, 1173static int __kprobes kprobes_module_callback(struct notifier_block *nb,
1146 unsigned long val, void *data) 1174 unsigned long val, void *data)
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 61d5aa5eced3..acd24e7643eb 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
558 558
559static ATOMIC_NOTIFIER_HEAD(die_chain); 559static ATOMIC_NOTIFIER_HEAD(die_chain);
560 560
561int notrace notify_die(enum die_val val, const char *str, 561int notrace __kprobes notify_die(enum die_val val, const char *str,
562 struct pt_regs *regs, long err, int trap, int sig) 562 struct pt_regs *regs, long err, int trap, int sig)
563{ 563{
564 struct die_args args = { 564 struct die_args args = {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..f05671609a89 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -428,6 +428,23 @@ config BLK_DEV_IO_TRACE
428 428
429 If unsure, say N. 429 If unsure, say N.
430 430
431config KPROBE_EVENT
432 depends on KPROBES
433 depends on X86
434 bool "Enable kprobes-based dynamic events"
435 select TRACING
436 default y
437 help
438 This allows the user to add tracing events (similar to tracepoints) on the fly
439 via the ftrace interface. See Documentation/trace/kprobetrace.txt
440 for more details.
441
442 Those events can be inserted wherever kprobes can probe, and record
443 various register and memory values.
444
445 This option is also required by perf-probe subcommand of perf tools. If
446 you want to use perf tools, this option is strongly recommended.
447
431config DYNAMIC_FTRACE 448config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 449 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 450 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..edc3a3cca1a1 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 57obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 58
58libftrace-y := ftrace.o 59libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4959ada9e0bb..b4e4212e66d7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -101,6 +101,29 @@ struct syscall_trace_exit {
101 unsigned long ret; 101 unsigned long ret;
102}; 102};
103 103
104struct kprobe_trace_entry {
105 struct trace_entry ent;
106 unsigned long ip;
107 int nargs;
108 unsigned long args[];
109};
110
111#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
112 (offsetof(struct kprobe_trace_entry, args) + \
113 (sizeof(unsigned long) * (n)))
114
115struct kretprobe_trace_entry {
116 struct trace_entry ent;
117 unsigned long func;
118 unsigned long ret_ip;
119 int nargs;
120 unsigned long args[];
121};
122
123#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
124 (offsetof(struct kretprobe_trace_entry, args) + \
125 (sizeof(unsigned long) * (n)))
126
104/* 127/*
105 * trace_flag_type is an enumeration that holds different 128 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 129 * states when a trace occurs. These are:
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171cc998..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,44 +8,39 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12struct perf_trace_buf *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
19 14
20char *trace_profile_buf_nmi; 15struct perf_trace_buf *perf_trace_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); 16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
22 17
23/* Count the events in use (per event id, not per instance) */ 18/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 19static int total_profile_count;
25 20
26static int ftrace_profile_enable_event(struct ftrace_event_call *event) 21static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{ 22{
28 char *buf; 23 struct perf_trace_buf *buf;
29 int ret = -ENOMEM; 24 int ret = -ENOMEM;
30 25
31 if (atomic_inc_return(&event->profile_count)) 26 if (atomic_inc_return(&event->profile_count))
32 return 0; 27 return 0;
33 28
34 if (!total_profile_count) { 29 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 30 buf = alloc_percpu(struct perf_trace_buf);
36 if (!buf) 31 if (!buf)
37 goto fail_buf; 32 goto fail_buf;
38 33
39 rcu_assign_pointer(trace_profile_buf, buf); 34 rcu_assign_pointer(perf_trace_buf, buf);
40 35
41 buf = (char *)alloc_percpu(profile_buf_t); 36 buf = alloc_percpu(struct perf_trace_buf);
42 if (!buf) 37 if (!buf)
43 goto fail_buf_nmi; 38 goto fail_buf_nmi;
44 39
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 40 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 41 }
47 42
48 ret = event->profile_enable(); 43 ret = event->profile_enable(event);
49 if (!ret) { 44 if (!ret) {
50 total_profile_count++; 45 total_profile_count++;
51 return 0; 46 return 0;
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 48
54fail_buf_nmi: 49fail_buf_nmi:
55 if (!total_profile_count) { 50 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 51 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 52 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 53 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 54 perf_trace_buf = NULL;
60 } 55 }
61fail_buf: 56fail_buf:
62 atomic_dec(&event->profile_count); 57 atomic_dec(&event->profile_count);
@@ -84,19 +79,19 @@ int ftrace_profile_enable(int event_id)
84 79
85static void ftrace_profile_disable_event(struct ftrace_event_call *event) 80static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{ 81{
87 char *buf, *nmi_buf; 82 struct perf_trace_buf *buf, *nmi_buf;
88 83
89 if (!atomic_add_negative(-1, &event->profile_count)) 84 if (!atomic_add_negative(-1, &event->profile_count))
90 return; 85 return;
91 86
92 event->profile_disable(); 87 event->profile_disable(event);
93 88
94 if (!--total_profile_count) { 89 if (!--total_profile_count) {
95 buf = trace_profile_buf; 90 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 91 rcu_assign_pointer(perf_trace_buf, NULL);
97 92
98 nmi_buf = trace_profile_buf_nmi; 93 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 94 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 95
101 /* 96 /*
102 * Ensure every events in profiling have finished before 97 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7c18d154ea28..1d18315dc836 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields); 94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 95
96#ifdef CONFIG_MODULES 96void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 97{
100 struct ftrace_event_field *field, *next; 98 struct ftrace_event_field *field, *next;
101 99
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 105 }
108} 106}
109 107
110#endif /* CONFIG_MODULES */
111
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 108static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 109 int enable)
114{ 110{
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
117 if (call->enabled) { 113 if (call->enabled) {
118 call->enabled = 0; 114 call->enabled = 0;
119 tracing_stop_cmdline_record(); 115 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 116 call->unregfunc(call);
121 } 117 }
122 break; 118 break;
123 case 1: 119 case 1:
124 if (!call->enabled) { 120 if (!call->enabled) {
125 call->enabled = 1; 121 call->enabled = 1;
126 tracing_start_cmdline_record(); 122 tracing_start_cmdline_record();
127 call->regfunc(call->data); 123 call->regfunc(call);
128 } 124 }
129 break; 125 break;
130 } 126 }
@@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
937 return 0; 933 return 0;
938} 934}
939 935
940#define for_each_event(event, start, end) \ 936static int __trace_add_event_call(struct ftrace_event_call *call)
941 for (event = start; \ 937{
942 (unsigned long)event < (unsigned long)end; \ 938 struct dentry *d_events;
943 event++) 939 int ret;
944 940
945#ifdef CONFIG_MODULES 941 if (!call->name)
942 return -EINVAL;
946 943
947static LIST_HEAD(ftrace_module_file_list); 944 if (call->raw_init) {
945 ret = call->raw_init(call);
946 if (ret < 0) {
947 if (ret != -ENOSYS)
948 pr_warning("Could not initialize trace "
949 "events/%s\n", call->name);
950 return ret;
951 }
952 }
948 953
949/* 954 d_events = event_trace_events_dir();
950 * Modules must own their file_operations to keep up with 955 if (!d_events)
951 * reference counting. 956 return -ENOENT;
952 */ 957
953struct ftrace_module_file_ops { 958 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
954 struct list_head list; 959 &ftrace_enable_fops, &ftrace_event_filter_fops,
955 struct module *mod; 960 &ftrace_event_format_fops);
956 struct file_operations id; 961 if (!ret)
957 struct file_operations enable; 962 list_add(&call->list, &ftrace_events);
958 struct file_operations format; 963
959 struct file_operations filter; 964 return ret;
960}; 965}
966
967/* Add an additional event_call dynamically */
968int trace_add_event_call(struct ftrace_event_call *call)
969{
970 int ret;
971 mutex_lock(&event_mutex);
972 ret = __trace_add_event_call(call);
973 mutex_unlock(&event_mutex);
974 return ret;
975}
961 976
962static void remove_subsystem_dir(const char *name) 977static void remove_subsystem_dir(const char *name)
963{ 978{
@@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name)
985 } 1000 }
986} 1001}
987 1002
1003/*
1004 * Must be called under locking both of event_mutex and trace_event_mutex.
1005 */
1006static void __trace_remove_event_call(struct ftrace_event_call *call)
1007{
1008 ftrace_event_enable_disable(call, 0);
1009 if (call->event)
1010 __unregister_ftrace_event(call->event);
1011 debugfs_remove_recursive(call->dir);
1012 list_del(&call->list);
1013 trace_destroy_fields(call);
1014 destroy_preds(call);
1015 remove_subsystem_dir(call->system);
1016}
1017
1018/* Remove an event_call */
1019void trace_remove_event_call(struct ftrace_event_call *call)
1020{
1021 mutex_lock(&event_mutex);
1022 down_write(&trace_event_mutex);
1023 __trace_remove_event_call(call);
1024 up_write(&trace_event_mutex);
1025 mutex_unlock(&event_mutex);
1026}
1027
1028#define for_each_event(event, start, end) \
1029 for (event = start; \
1030 (unsigned long)event < (unsigned long)end; \
1031 event++)
1032
1033#ifdef CONFIG_MODULES
1034
1035static LIST_HEAD(ftrace_module_file_list);
1036
1037/*
1038 * Modules must own their file_operations to keep up with
1039 * reference counting.
1040 */
1041struct ftrace_module_file_ops {
1042 struct list_head list;
1043 struct module *mod;
1044 struct file_operations id;
1045 struct file_operations enable;
1046 struct file_operations format;
1047 struct file_operations filter;
1048};
1049
988static struct ftrace_module_file_ops * 1050static struct ftrace_module_file_ops *
989trace_create_file_ops(struct module *mod) 1051trace_create_file_ops(struct module *mod)
990{ 1052{
@@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod)
1042 if (!call->name) 1104 if (!call->name)
1043 continue; 1105 continue;
1044 if (call->raw_init) { 1106 if (call->raw_init) {
1045 ret = call->raw_init(); 1107 ret = call->raw_init(call);
1046 if (ret < 0) { 1108 if (ret < 0) {
1047 if (ret != -ENOSYS) 1109 if (ret != -ENOSYS)
1048 pr_warning("Could not initialize trace " 1110 pr_warning("Could not initialize trace "
@@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod)
1060 return; 1122 return;
1061 } 1123 }
1062 call->mod = mod; 1124 call->mod = mod;
1063 list_add(&call->list, &ftrace_events); 1125 ret = event_create_dir(call, d_events,
1064 event_create_dir(call, d_events, 1126 &file_ops->id, &file_ops->enable,
1065 &file_ops->id, &file_ops->enable, 1127 &file_ops->filter, &file_ops->format);
1066 &file_ops->filter, &file_ops->format); 1128 if (!ret)
1129 list_add(&call->list, &ftrace_events);
1067 } 1130 }
1068} 1131}
1069 1132
@@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1140 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1078 if (call->mod == mod) { 1141 if (call->mod == mod) {
1079 found = true; 1142 found = true;
1080 ftrace_event_enable_disable(call, 0); 1143 __trace_remove_event_call(call);
1081 if (call->event)
1082 __unregister_ftrace_event(call->event);
1083 debugfs_remove_recursive(call->dir);
1084 list_del(&call->list);
1085 trace_destroy_fields(call);
1086 destroy_preds(call);
1087 remove_subsystem_dir(call->system);
1088 } 1144 }
1089 } 1145 }
1090 1146
@@ -1202,7 +1258,7 @@ static __init int event_trace_init(void)
1202 if (!call->name) 1258 if (!call->name)
1203 continue; 1259 continue;
1204 if (call->raw_init) { 1260 if (call->raw_init) {
1205 ret = call->raw_init(); 1261 ret = call->raw_init(call);
1206 if (ret < 0) { 1262 if (ret < 0) {
1207 if (ret != -ENOSYS) 1263 if (ret != -ENOSYS)
1208 pr_warning("Could not initialize trace " 1264 pr_warning("Could not initialize trace "
@@ -1210,10 +1266,12 @@ static __init int event_trace_init(void)
1210 continue; 1266 continue;
1211 } 1267 }
1212 } 1268 }
1213 list_add(&call->list, &ftrace_events); 1269 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1214 event_create_dir(call, d_events, &ftrace_event_id_fops, 1270 &ftrace_enable_fops,
1215 &ftrace_enable_fops, &ftrace_event_filter_fops, 1271 &ftrace_event_filter_fops,
1216 &ftrace_event_format_fops); 1272 &ftrace_event_format_fops);
1273 if (!ret)
1274 list_add(&call->list, &ftrace_events);
1217 } 1275 }
1218 1276
1219 while (true) { 1277 while (true) {
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 31da218ee10f..934d81fb4ca4 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -134,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
134 134
135#include "trace_entries.h" 135#include "trace_entries.h"
136 136
137
138#undef __field 137#undef __field
139#define __field(type, item) \ 138#define __field(type, item) \
140 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -196,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
196 195
197#include "trace_entries.h" 196#include "trace_entries.h"
198 197
198static int ftrace_raw_init_event(struct ftrace_event_call *call)
199{
200 INIT_LIST_HEAD(&call->fields);
201 return 0;
202}
199 203
200#undef __field 204#undef __field
201#define __field(type, item) 205#define __field(type, item)
@@ -214,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
214 218
215#undef FTRACE_ENTRY 219#undef FTRACE_ENTRY
216#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 220#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
217static int ftrace_raw_init_event_##call(void); \
218 \ 221 \
219struct ftrace_event_call __used \ 222struct ftrace_event_call __used \
220__attribute__((__aligned__(4))) \ 223__attribute__((__aligned__(4))) \
@@ -222,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
222 .name = #call, \ 225 .name = #call, \
223 .id = type, \ 226 .id = type, \
224 .system = __stringify(TRACE_SYSTEM), \ 227 .system = __stringify(TRACE_SYSTEM), \
225 .raw_init = ftrace_raw_init_event_##call, \ 228 .raw_init = ftrace_raw_init_event, \
226 .show_format = ftrace_format_##call, \ 229 .show_format = ftrace_format_##call, \
227 .define_fields = ftrace_define_fields_##call, \ 230 .define_fields = ftrace_define_fields_##call, \
228}; \ 231}; \
229static int ftrace_raw_init_event_##call(void) \
230{ \
231 INIT_LIST_HEAD(&event_##call.fields); \
232 return 0; \
233} \
234 232
235#include "trace_entries.h" 233#include "trace_entries.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..3696476f307d
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1513 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
247 } else if (ff->func == fetch_retvalue)
248 ret = snprintf(buf, n, "$retval");
249 else if (ff->func == fetch_stack_address)
250 ret = snprintf(buf, n, "$stack");
251 else if (ff->func == fetch_indirect) {
252 struct indirect_fetch_data *id = ff->data;
253 size_t l = 0;
254 ret = snprintf(buf, n, "%+ld(", id->offset);
255 if (ret >= n)
256 goto end;
257 l += ret;
258 ret = probe_arg_string(buf + l, n - l, &id->orig);
259 if (ret < 0)
260 goto end;
261 l += ret;
262 ret = snprintf(buf + l, n - l, ")");
263 ret += l;
264 }
265end:
266 if (ret >= n)
267 return -ENOSPC;
268 return ret;
269}
270
271static int register_probe_event(struct trace_probe *tp);
272static void unregister_probe_event(struct trace_probe *tp);
273
274static DEFINE_MUTEX(probe_lock);
275static LIST_HEAD(probe_list);
276
277static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
278static int kretprobe_dispatcher(struct kretprobe_instance *ri,
279 struct pt_regs *regs);
280
281/*
282 * Allocate new trace_probe and initialize it (including kprobes).
283 */
284static struct trace_probe *alloc_trace_probe(const char *group,
285 const char *event,
286 void *addr,
287 const char *symbol,
288 unsigned long offs,
289 int nargs, int is_return)
290{
291 struct trace_probe *tp;
292
293 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
294 if (!tp)
295 return ERR_PTR(-ENOMEM);
296
297 if (symbol) {
298 tp->symbol = kstrdup(symbol, GFP_KERNEL);
299 if (!tp->symbol)
300 goto error;
301 tp->rp.kp.symbol_name = tp->symbol;
302 tp->rp.kp.offset = offs;
303 } else
304 tp->rp.kp.addr = addr;
305
306 if (is_return)
307 tp->rp.handler = kretprobe_dispatcher;
308 else
309 tp->rp.kp.pre_handler = kprobe_dispatcher;
310
311 if (!event)
312 goto error;
313 tp->call.name = kstrdup(event, GFP_KERNEL);
314 if (!tp->call.name)
315 goto error;
316
317 if (!group)
318 goto error;
319 tp->call.system = kstrdup(group, GFP_KERNEL);
320 if (!tp->call.system)
321 goto error;
322
323 INIT_LIST_HEAD(&tp->list);
324 return tp;
325error:
326 kfree(tp->call.name);
327 kfree(tp->symbol);
328 kfree(tp);
329 return ERR_PTR(-ENOMEM);
330}
331
332static void free_probe_arg(struct probe_arg *arg)
333{
334 if (arg->fetch.func == fetch_symbol)
335 free_symbol_cache(arg->fetch.data);
336 else if (arg->fetch.func == fetch_indirect)
337 free_indirect_fetch_data(arg->fetch.data);
338 kfree(arg->name);
339}
340
341static void free_trace_probe(struct trace_probe *tp)
342{
343 int i;
344
345 for (i = 0; i < tp->nr_args; i++)
346 free_probe_arg(&tp->args[i]);
347
348 kfree(tp->call.system);
349 kfree(tp->call.name);
350 kfree(tp->symbol);
351 kfree(tp);
352}
353
354static struct trace_probe *find_probe_event(const char *event,
355 const char *group)
356{
357 struct trace_probe *tp;
358
359 list_for_each_entry(tp, &probe_list, list)
360 if (strcmp(tp->call.name, event) == 0 &&
361 strcmp(tp->call.system, group) == 0)
362 return tp;
363 return NULL;
364}
365
366/* Unregister a trace_probe and probe_event: call with locking probe_lock */
367static void unregister_trace_probe(struct trace_probe *tp)
368{
369 if (probe_is_return(tp))
370 unregister_kretprobe(&tp->rp);
371 else
372 unregister_kprobe(&tp->rp.kp);
373 list_del(&tp->list);
374 unregister_probe_event(tp);
375}
376
377/* Register a trace_probe and probe_event */
378static int register_trace_probe(struct trace_probe *tp)
379{
380 struct trace_probe *old_tp;
381 int ret;
382
383 mutex_lock(&probe_lock);
384
385 /* register as an event */
386 old_tp = find_probe_event(tp->call.name, tp->call.system);
387 if (old_tp) {
388 /* delete old event */
389 unregister_trace_probe(old_tp);
390 free_trace_probe(old_tp);
391 }
392 ret = register_probe_event(tp);
393 if (ret) {
394 pr_warning("Faild to register probe event(%d)\n", ret);
395 goto end;
396 }
397
398 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
399 if (probe_is_return(tp))
400 ret = register_kretprobe(&tp->rp);
401 else
402 ret = register_kprobe(&tp->rp.kp);
403
404 if (ret) {
405 pr_warning("Could not insert probe(%d)\n", ret);
406 if (ret == -EILSEQ) {
407 pr_warning("Probing address(0x%p) is not an "
408 "instruction boundary.\n",
409 tp->rp.kp.addr);
410 ret = -EINVAL;
411 }
412 unregister_probe_event(tp);
413 } else
414 list_add_tail(&tp->list, &probe_list);
415end:
416 mutex_unlock(&probe_lock);
417 return ret;
418}
419
420/* Split symbol and offset. */
421static int split_symbol_offset(char *symbol, unsigned long *offset)
422{
423 char *tmp;
424 int ret;
425
426 if (!offset)
427 return -EINVAL;
428
429 tmp = strchr(symbol, '+');
430 if (tmp) {
431 /* skip sign because strict_strtol doesn't accept '+' */
432 ret = strict_strtoul(tmp + 1, 0, offset);
433 if (ret)
434 return ret;
435 *tmp = '\0';
436 } else
437 *offset = 0;
438 return 0;
439}
440
441#define PARAM_MAX_ARGS 16
442#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
443
444static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
445{
446 int ret = 0;
447 unsigned long param;
448
449 if (strcmp(arg, "retval") == 0) {
450 if (is_return) {
451 ff->func = fetch_retvalue;
452 ff->data = NULL;
453 } else
454 ret = -EINVAL;
455 } else if (strncmp(arg, "stack", 5) == 0) {
456 if (arg[5] == '\0') {
457 ff->func = fetch_stack_address;
458 ff->data = NULL;
459 } else if (isdigit(arg[5])) {
460 ret = strict_strtoul(arg + 5, 10, &param);
461 if (ret || param > PARAM_MAX_STACK)
462 ret = -EINVAL;
463 else {
464 ff->func = fetch_stack;
465 ff->data = (void *)param;
466 }
467 } else
468 ret = -EINVAL;
469 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
470 ret = strict_strtoul(arg + 3, 10, &param);
471 if (ret || param > PARAM_MAX_ARGS)
472 ret = -EINVAL;
473 else {
474 ff->func = fetch_argument;
475 ff->data = (void *)param;
476 }
477 } else
478 ret = -EINVAL;
479 return ret;
480}
481
482static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
483{
484 int ret = 0;
485 unsigned long param;
486 long offset;
487 char *tmp;
488
489 switch (arg[0]) {
490 case '$':
491 ret = parse_probe_vars(arg + 1, ff, is_return);
492 break;
493 case '%': /* named register */
494 ret = regs_query_register_offset(arg + 1);
495 if (ret >= 0) {
496 ff->func = fetch_register;
497 ff->data = (void *)(unsigned long)ret;
498 ret = 0;
499 }
500 break;
501 case '@': /* memory or symbol */
502 if (isdigit(arg[1])) {
503 ret = strict_strtoul(arg + 1, 0, &param);
504 if (ret)
505 break;
506 ff->func = fetch_memory;
507 ff->data = (void *)param;
508 } else {
509 ret = split_symbol_offset(arg + 1, &offset);
510 if (ret)
511 break;
512 ff->data = alloc_symbol_cache(arg + 1, offset);
513 if (ff->data)
514 ff->func = fetch_symbol;
515 else
516 ret = -EINVAL;
517 }
518 break;
519 case '+': /* indirect memory */
520 case '-':
521 tmp = strchr(arg, '(');
522 if (!tmp) {
523 ret = -EINVAL;
524 break;
525 }
526 *tmp = '\0';
527 ret = strict_strtol(arg + 1, 0, &offset);
528 if (ret)
529 break;
530 if (arg[0] == '-')
531 offset = -offset;
532 arg = tmp + 1;
533 tmp = strrchr(arg, ')');
534 if (tmp) {
535 struct indirect_fetch_data *id;
536 *tmp = '\0';
537 id = kzalloc(sizeof(struct indirect_fetch_data),
538 GFP_KERNEL);
539 if (!id)
540 return -ENOMEM;
541 id->offset = offset;
542 ret = parse_probe_arg(arg, &id->orig, is_return);
543 if (ret)
544 kfree(id);
545 else {
546 ff->func = fetch_indirect;
547 ff->data = (void *)id;
548 }
549 } else
550 ret = -EINVAL;
551 break;
552 default:
553 /* TODO: support custom handler */
554 ret = -EINVAL;
555 }
556 return ret;
557}
558
559/* Return 1 if name is reserved or already used by another argument */
560static int conflict_field_name(const char *name,
561 struct probe_arg *args, int narg)
562{
563 int i;
564 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
565 if (strcmp(reserved_field_names[i], name) == 0)
566 return 1;
567 for (i = 0; i < narg; i++)
568 if (strcmp(args[i].name, name) == 0)
569 return 1;
570 return 0;
571}
572
573static int create_trace_probe(int argc, char **argv)
574{
575 /*
576 * Argument syntax:
577 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
578 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
579 * Fetch args:
580 * $argN : fetch Nth of function argument. (N:0-)
581 * $retval : fetch return value
582 * $stack : fetch stack address
583 * $stackN : fetch Nth of stack (N:0-)
584 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
585 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
586 * %REG : fetch register REG
587 * Indirect memory fetch:
588 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
589 * Alias name of args:
590 * NAME=FETCHARG : set NAME as alias of FETCHARG.
591 */
592 struct trace_probe *tp;
593 int i, ret = 0;
594 int is_return = 0;
595 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
596 unsigned long offset = 0;
597 void *addr = NULL;
598 char buf[MAX_EVENT_NAME_LEN];
599
600 if (argc < 2) {
601 pr_info("Probe point is not specified.\n");
602 return -EINVAL;
603 }
604
605 if (argv[0][0] == 'p')
606 is_return = 0;
607 else if (argv[0][0] == 'r')
608 is_return = 1;
609 else {
610 pr_info("Probe definition must be started with 'p' or 'r'.\n");
611 return -EINVAL;
612 }
613
614 if (argv[0][1] == ':') {
615 event = &argv[0][2];
616 if (strchr(event, '/')) {
617 group = event;
618 event = strchr(group, '/') + 1;
619 event[-1] = '\0';
620 if (strlen(group) == 0) {
621 pr_info("Group name is not specifiled\n");
622 return -EINVAL;
623 }
624 }
625 if (strlen(event) == 0) {
626 pr_info("Event name is not specifiled\n");
627 return -EINVAL;
628 }
629 }
630
631 if (isdigit(argv[1][0])) {
632 if (is_return) {
633 pr_info("Return probe point must be a symbol.\n");
634 return -EINVAL;
635 }
636 /* an address specified */
637 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
638 if (ret) {
639 pr_info("Failed to parse address.\n");
640 return ret;
641 }
642 } else {
643 /* a symbol specified */
644 symbol = argv[1];
645 /* TODO: support .init module functions */
646 ret = split_symbol_offset(symbol, &offset);
647 if (ret) {
648 pr_info("Failed to parse symbol.\n");
649 return ret;
650 }
651 if (offset && is_return) {
652 pr_info("Return probe must be used without offset.\n");
653 return -EINVAL;
654 }
655 }
656 argc -= 2; argv += 2;
657
658 /* setup a probe */
659 if (!group)
660 group = KPROBE_EVENT_SYSTEM;
661 if (!event) {
662 /* Make a new event name */
663 if (symbol)
664 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
665 is_return ? 'r' : 'p', symbol, offset);
666 else
667 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
668 is_return ? 'r' : 'p', addr);
669 event = buf;
670 }
671 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
672 is_return);
673 if (IS_ERR(tp)) {
674 pr_info("Failed to allocate trace_probe.(%d)\n",
675 (int)PTR_ERR(tp));
676 return PTR_ERR(tp);
677 }
678
679 /* parse arguments */
680 ret = 0;
681 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
682 /* Parse argument name */
683 arg = strchr(argv[i], '=');
684 if (arg)
685 *arg++ = '\0';
686 else
687 arg = argv[i];
688
689 if (conflict_field_name(argv[i], tp->args, i)) {
690 pr_info("Argument%d name '%s' conflicts with "
691 "another field.\n", i, argv[i]);
692 ret = -EINVAL;
693 goto error;
694 }
695
696 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
697
698 /* Parse fetch argument */
699 if (strlen(arg) > MAX_ARGSTR_LEN) {
700 pr_info("Argument%d(%s) is too long.\n", i, arg);
701 ret = -ENOSPC;
702 goto error;
703 }
704 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
705 if (ret) {
706 pr_info("Parse error at argument%d. (%d)\n", i, ret);
707 goto error;
708 }
709 }
710 tp->nr_args = i;
711
712 ret = register_trace_probe(tp);
713 if (ret)
714 goto error;
715 return 0;
716
717error:
718 free_trace_probe(tp);
719 return ret;
720}
721
722static void cleanup_all_probes(void)
723{
724 struct trace_probe *tp;
725
726 mutex_lock(&probe_lock);
727 /* TODO: Use batch unregistration */
728 while (!list_empty(&probe_list)) {
729 tp = list_entry(probe_list.next, struct trace_probe, list);
730 unregister_trace_probe(tp);
731 free_trace_probe(tp);
732 }
733 mutex_unlock(&probe_lock);
734}
735
736
737/* Probes listing interfaces */
738static void *probes_seq_start(struct seq_file *m, loff_t *pos)
739{
740 mutex_lock(&probe_lock);
741 return seq_list_start(&probe_list, *pos);
742}
743
744static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
745{
746 return seq_list_next(v, &probe_list, pos);
747}
748
749static void probes_seq_stop(struct seq_file *m, void *v)
750{
751 mutex_unlock(&probe_lock);
752}
753
754static int probes_seq_show(struct seq_file *m, void *v)
755{
756 struct trace_probe *tp = v;
757 int i, ret;
758 char buf[MAX_ARGSTR_LEN + 1];
759
760 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
761 seq_printf(m, ":%s", tp->call.name);
762
763 if (tp->symbol)
764 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
765 else
766 seq_printf(m, " 0x%p", tp->rp.kp.addr);
767
768 for (i = 0; i < tp->nr_args; i++) {
769 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
770 if (ret < 0) {
771 pr_warning("Argument%d decoding error(%d).\n", i, ret);
772 return ret;
773 }
774 seq_printf(m, " %s=%s", tp->args[i].name, buf);
775 }
776 seq_printf(m, "\n");
777 return 0;
778}
779
780static const struct seq_operations probes_seq_op = {
781 .start = probes_seq_start,
782 .next = probes_seq_next,
783 .stop = probes_seq_stop,
784 .show = probes_seq_show
785};
786
787static int probes_open(struct inode *inode, struct file *file)
788{
789 if ((file->f_mode & FMODE_WRITE) &&
790 (file->f_flags & O_TRUNC))
791 cleanup_all_probes();
792
793 return seq_open(file, &probes_seq_op);
794}
795
796static int command_trace_probe(const char *buf)
797{
798 char **argv;
799 int argc = 0, ret = 0;
800
801 argv = argv_split(GFP_KERNEL, buf, &argc);
802 if (!argv)
803 return -ENOMEM;
804
805 if (argc)
806 ret = create_trace_probe(argc, argv);
807
808 argv_free(argv);
809 return ret;
810}
811
812#define WRITE_BUFSIZE 128
813
814static ssize_t probes_write(struct file *file, const char __user *buffer,
815 size_t count, loff_t *ppos)
816{
817 char *kbuf, *tmp;
818 int ret;
819 size_t done;
820 size_t size;
821
822 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
823 if (!kbuf)
824 return -ENOMEM;
825
826 ret = done = 0;
827 while (done < count) {
828 size = count - done;
829 if (size >= WRITE_BUFSIZE)
830 size = WRITE_BUFSIZE - 1;
831 if (copy_from_user(kbuf, buffer + done, size)) {
832 ret = -EFAULT;
833 goto out;
834 }
835 kbuf[size] = '\0';
836 tmp = strchr(kbuf, '\n');
837 if (tmp) {
838 *tmp = '\0';
839 size = tmp - kbuf + 1;
840 } else if (done + size < count) {
841 pr_warning("Line length is too long: "
842 "Should be less than %d.", WRITE_BUFSIZE);
843 ret = -EINVAL;
844 goto out;
845 }
846 done += size;
847 /* Remove comments */
848 tmp = strchr(kbuf, '#');
849 if (tmp)
850 *tmp = '\0';
851
852 ret = command_trace_probe(kbuf);
853 if (ret)
854 goto out;
855 }
856 ret = done;
857out:
858 kfree(kbuf);
859 return ret;
860}
861
862static const struct file_operations kprobe_events_ops = {
863 .owner = THIS_MODULE,
864 .open = probes_open,
865 .read = seq_read,
866 .llseek = seq_lseek,
867 .release = seq_release,
868 .write = probes_write,
869};
870
871/* Probes profiling interfaces */
872static int probes_profile_seq_show(struct seq_file *m, void *v)
873{
874 struct trace_probe *tp = v;
875
876 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
877 tp->rp.kp.nmissed);
878
879 return 0;
880}
881
882static const struct seq_operations profile_seq_op = {
883 .start = probes_seq_start,
884 .next = probes_seq_next,
885 .stop = probes_seq_stop,
886 .show = probes_profile_seq_show
887};
888
889static int profile_open(struct inode *inode, struct file *file)
890{
891 return seq_open(file, &profile_seq_op);
892}
893
894static const struct file_operations kprobe_profile_ops = {
895 .owner = THIS_MODULE,
896 .open = profile_open,
897 .read = seq_read,
898 .llseek = seq_lseek,
899 .release = seq_release,
900};
901
902/* Kprobe handler */
903static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
904{
905 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
906 struct kprobe_trace_entry *entry;
907 struct ring_buffer_event *event;
908 struct ring_buffer *buffer;
909 int size, i, pc;
910 unsigned long irq_flags;
911 struct ftrace_event_call *call = &tp->call;
912
913 tp->nhit++;
914
915 local_save_flags(irq_flags);
916 pc = preempt_count();
917
918 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
919
920 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
921 irq_flags, pc);
922 if (!event)
923 return 0;
924
925 entry = ring_buffer_event_data(event);
926 entry->nargs = tp->nr_args;
927 entry->ip = (unsigned long)kp->addr;
928 for (i = 0; i < tp->nr_args; i++)
929 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
930
931 if (!filter_current_check_discard(buffer, call, entry, event))
932 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
933 return 0;
934}
935
936/* Kretprobe handler */
937static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
938 struct pt_regs *regs)
939{
940 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
941 struct kretprobe_trace_entry *entry;
942 struct ring_buffer_event *event;
943 struct ring_buffer *buffer;
944 int size, i, pc;
945 unsigned long irq_flags;
946 struct ftrace_event_call *call = &tp->call;
947
948 local_save_flags(irq_flags);
949 pc = preempt_count();
950
951 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
952
953 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
954 irq_flags, pc);
955 if (!event)
956 return 0;
957
958 entry = ring_buffer_event_data(event);
959 entry->nargs = tp->nr_args;
960 entry->func = (unsigned long)tp->rp.kp.addr;
961 entry->ret_ip = (unsigned long)ri->ret_addr;
962 for (i = 0; i < tp->nr_args; i++)
963 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
964
965 if (!filter_current_check_discard(buffer, call, entry, event))
966 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
967
968 return 0;
969}
970
971/* Event entry printers */
972enum print_line_t
973print_kprobe_event(struct trace_iterator *iter, int flags)
974{
975 struct kprobe_trace_entry *field;
976 struct trace_seq *s = &iter->seq;
977 struct trace_event *event;
978 struct trace_probe *tp;
979 int i;
980
981 field = (struct kprobe_trace_entry *)iter->ent;
982 event = ftrace_find_event(field->ent.type);
983 tp = container_of(event, struct trace_probe, event);
984
985 if (!trace_seq_printf(s, "%s: (", tp->call.name))
986 goto partial;
987
988 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
989 goto partial;
990
991 if (!trace_seq_puts(s, ")"))
992 goto partial;
993
994 for (i = 0; i < field->nargs; i++)
995 if (!trace_seq_printf(s, " %s=%lx",
996 tp->args[i].name, field->args[i]))
997 goto partial;
998
999 if (!trace_seq_puts(s, "\n"))
1000 goto partial;
1001
1002 return TRACE_TYPE_HANDLED;
1003partial:
1004 return TRACE_TYPE_PARTIAL_LINE;
1005}
1006
1007enum print_line_t
1008print_kretprobe_event(struct trace_iterator *iter, int flags)
1009{
1010 struct kretprobe_trace_entry *field;
1011 struct trace_seq *s = &iter->seq;
1012 struct trace_event *event;
1013 struct trace_probe *tp;
1014 int i;
1015
1016 field = (struct kretprobe_trace_entry *)iter->ent;
1017 event = ftrace_find_event(field->ent.type);
1018 tp = container_of(event, struct trace_probe, event);
1019
1020 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1021 goto partial;
1022
1023 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1024 goto partial;
1025
1026 if (!trace_seq_puts(s, " <- "))
1027 goto partial;
1028
1029 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1030 goto partial;
1031
1032 if (!trace_seq_puts(s, ")"))
1033 goto partial;
1034
1035 for (i = 0; i < field->nargs; i++)
1036 if (!trace_seq_printf(s, " %s=%lx",
1037 tp->args[i].name, field->args[i]))
1038 goto partial;
1039
1040 if (!trace_seq_puts(s, "\n"))
1041 goto partial;
1042
1043 return TRACE_TYPE_HANDLED;
1044partial:
1045 return TRACE_TYPE_PARTIAL_LINE;
1046}
1047
1048static int probe_event_enable(struct ftrace_event_call *call)
1049{
1050 struct trace_probe *tp = (struct trace_probe *)call->data;
1051
1052 tp->flags |= TP_FLAG_TRACE;
1053 if (probe_is_return(tp))
1054 return enable_kretprobe(&tp->rp);
1055 else
1056 return enable_kprobe(&tp->rp.kp);
1057}
1058
1059static void probe_event_disable(struct ftrace_event_call *call)
1060{
1061 struct trace_probe *tp = (struct trace_probe *)call->data;
1062
1063 tp->flags &= ~TP_FLAG_TRACE;
1064 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1065 if (probe_is_return(tp))
1066 disable_kretprobe(&tp->rp);
1067 else
1068 disable_kprobe(&tp->rp.kp);
1069 }
1070}
1071
1072static int probe_event_raw_init(struct ftrace_event_call *event_call)
1073{
1074 INIT_LIST_HEAD(&event_call->fields);
1075
1076 return 0;
1077}
1078
1079#undef DEFINE_FIELD
1080#define DEFINE_FIELD(type, item, name, is_signed) \
1081 do { \
1082 ret = trace_define_field(event_call, #type, name, \
1083 offsetof(typeof(field), item), \
1084 sizeof(field.item), is_signed, \
1085 FILTER_OTHER); \
1086 if (ret) \
1087 return ret; \
1088 } while (0)
1089
1090static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1091{
1092 int ret, i;
1093 struct kprobe_trace_entry field;
1094 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1095
1096 ret = trace_define_common_fields(event_call);
1097 if (!ret)
1098 return ret;
1099
1100 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1101 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1102 /* Set argument names as fields */
1103 for (i = 0; i < tp->nr_args; i++)
1104 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1105 return 0;
1106}
1107
1108static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1109{
1110 int ret, i;
1111 struct kretprobe_trace_entry field;
1112 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1113
1114 ret = trace_define_common_fields(event_call);
1115 if (!ret)
1116 return ret;
1117
1118 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1119 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1120 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1121 /* Set argument names as fields */
1122 for (i = 0; i < tp->nr_args; i++)
1123 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1124 return 0;
1125}
1126
1127static int __probe_event_show_format(struct trace_seq *s,
1128 struct trace_probe *tp, const char *fmt,
1129 const char *arg)
1130{
1131 int i;
1132
1133 /* Show format */
1134 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1135 return 0;
1136
1137 for (i = 0; i < tp->nr_args; i++)
1138 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1139 return 0;
1140
1141 if (!trace_seq_printf(s, "\", %s", arg))
1142 return 0;
1143
1144 for (i = 0; i < tp->nr_args; i++)
1145 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1146 return 0;
1147
1148 return trace_seq_puts(s, "\n");
1149}
1150
1151#undef SHOW_FIELD
1152#define SHOW_FIELD(type, item, name) \
1153 do { \
1154 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1155 "offset:%u;\tsize:%u;\n", name, \
1156 (unsigned int)offsetof(typeof(field), item),\
1157 (unsigned int)sizeof(type)); \
1158 if (!ret) \
1159 return 0; \
1160 } while (0)
1161
1162static int kprobe_event_show_format(struct ftrace_event_call *call,
1163 struct trace_seq *s)
1164{
1165 struct kprobe_trace_entry field __attribute__((unused));
1166 int ret, i;
1167 struct trace_probe *tp = (struct trace_probe *)call->data;
1168
1169 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1170 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1171
1172 /* Show fields */
1173 for (i = 0; i < tp->nr_args; i++)
1174 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1175 trace_seq_puts(s, "\n");
1176
1177 return __probe_event_show_format(s, tp, "(%lx)",
1178 "REC->" FIELD_STRING_IP);
1179}
1180
1181static int kretprobe_event_show_format(struct ftrace_event_call *call,
1182 struct trace_seq *s)
1183{
1184 struct kretprobe_trace_entry field __attribute__((unused));
1185 int ret, i;
1186 struct trace_probe *tp = (struct trace_probe *)call->data;
1187
1188 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1189 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1190 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1191
1192 /* Show fields */
1193 for (i = 0; i < tp->nr_args; i++)
1194 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1195 trace_seq_puts(s, "\n");
1196
1197 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1198 "REC->" FIELD_STRING_FUNC
1199 ", REC->" FIELD_STRING_RETIP);
1200}
1201
1202#ifdef CONFIG_EVENT_PROFILE
1203
1204/* Kprobe profile handler */
1205static __kprobes int kprobe_profile_func(struct kprobe *kp,
1206 struct pt_regs *regs)
1207{
1208 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1209 struct ftrace_event_call *call = &tp->call;
1210 struct kprobe_trace_entry *entry;
1211 struct perf_trace_buf *trace_buf;
1212 struct trace_entry *ent;
1213 int size, __size, i, pc, __cpu;
1214 unsigned long irq_flags;
1215 char *raw_data;
1216
1217 pc = preempt_count();
1218 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1219 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1220 size -= sizeof(u32);
1221 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1222 "profile buffer not large enough"))
1223 return 0;
1224
1225 /*
1226 * Protect the non nmi buffer
1227 * This also protects the rcu read side
1228 */
1229 local_irq_save(irq_flags);
1230 __cpu = smp_processor_id();
1231
1232 if (in_nmi())
1233 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1234 else
1235 trace_buf = rcu_dereference(perf_trace_buf);
1236
1237 if (!trace_buf)
1238 goto end;
1239
1240 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1241
1242 if (trace_buf->recursion++)
1243 goto end_recursion;
1244
1245 /*
1246 * Make recursion update visible before entering perf_tp_event
1247 * so that we protect from perf recursions.
1248 */
1249 barrier();
1250
1251 raw_data = trace_buf->buf;
1252
1253 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1254 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1255 entry = (struct kprobe_trace_entry *)raw_data;
1256 ent = &entry->ent;
1257
1258 tracing_generic_entry_update(ent, irq_flags, pc);
1259 ent->type = call->id;
1260 entry->nargs = tp->nr_args;
1261 entry->ip = (unsigned long)kp->addr;
1262 for (i = 0; i < tp->nr_args; i++)
1263 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1264 perf_tp_event(call->id, entry->ip, 1, entry, size);
1265
1266end_recursion:
1267 trace_buf->recursion--;
1268end:
1269 local_irq_restore(irq_flags);
1270
1271 return 0;
1272}
1273
1274/* Kretprobe profile handler */
1275static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1276 struct pt_regs *regs)
1277{
1278 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1279 struct ftrace_event_call *call = &tp->call;
1280 struct kretprobe_trace_entry *entry;
1281 struct perf_trace_buf *trace_buf;
1282 struct trace_entry *ent;
1283 int size, __size, i, pc, __cpu;
1284 unsigned long irq_flags;
1285 char *raw_data;
1286
1287 pc = preempt_count();
1288 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1289 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1290 size -= sizeof(u32);
1291 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1292 "profile buffer not large enough"))
1293 return 0;
1294
1295 /*
1296 * Protect the non nmi buffer
1297 * This also protects the rcu read side
1298 */
1299 local_irq_save(irq_flags);
1300 __cpu = smp_processor_id();
1301
1302 if (in_nmi())
1303 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1304 else
1305 trace_buf = rcu_dereference(perf_trace_buf);
1306
1307 if (!trace_buf)
1308 goto end;
1309
1310 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1311
1312 if (trace_buf->recursion++)
1313 goto end_recursion;
1314
1315 /*
1316 * Make recursion update visible before entering perf_tp_event
1317 * so that we protect from perf recursions.
1318 */
1319 barrier();
1320
1321 raw_data = trace_buf->buf;
1322
1323 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1324 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1325 entry = (struct kretprobe_trace_entry *)raw_data;
1326 ent = &entry->ent;
1327
1328 tracing_generic_entry_update(ent, irq_flags, pc);
1329 ent->type = call->id;
1330 entry->nargs = tp->nr_args;
1331 entry->func = (unsigned long)tp->rp.kp.addr;
1332 entry->ret_ip = (unsigned long)ri->ret_addr;
1333 for (i = 0; i < tp->nr_args; i++)
1334 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1335 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1336
1337end_recursion:
1338 trace_buf->recursion--;
1339end:
1340 local_irq_restore(irq_flags);
1341
1342 return 0;
1343}
1344
1345static int probe_profile_enable(struct ftrace_event_call *call)
1346{
1347 struct trace_probe *tp = (struct trace_probe *)call->data;
1348
1349 tp->flags |= TP_FLAG_PROFILE;
1350
1351 if (probe_is_return(tp))
1352 return enable_kretprobe(&tp->rp);
1353 else
1354 return enable_kprobe(&tp->rp.kp);
1355}
1356
1357static void probe_profile_disable(struct ftrace_event_call *call)
1358{
1359 struct trace_probe *tp = (struct trace_probe *)call->data;
1360
1361 tp->flags &= ~TP_FLAG_PROFILE;
1362
1363 if (!(tp->flags & TP_FLAG_TRACE)) {
1364 if (probe_is_return(tp))
1365 disable_kretprobe(&tp->rp);
1366 else
1367 disable_kprobe(&tp->rp.kp);
1368 }
1369}
1370#endif /* CONFIG_EVENT_PROFILE */
1371
1372
1373static __kprobes
1374int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1375{
1376 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1377
1378 if (tp->flags & TP_FLAG_TRACE)
1379 kprobe_trace_func(kp, regs);
1380#ifdef CONFIG_EVENT_PROFILE
1381 if (tp->flags & TP_FLAG_PROFILE)
1382 kprobe_profile_func(kp, regs);
1383#endif /* CONFIG_EVENT_PROFILE */
1384 return 0; /* We don't tweek kernel, so just return 0 */
1385}
1386
1387static __kprobes
1388int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1389{
1390 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1391
1392 if (tp->flags & TP_FLAG_TRACE)
1393 kretprobe_trace_func(ri, regs);
1394#ifdef CONFIG_EVENT_PROFILE
1395 if (tp->flags & TP_FLAG_PROFILE)
1396 kretprobe_profile_func(ri, regs);
1397#endif /* CONFIG_EVENT_PROFILE */
1398 return 0; /* We don't tweek kernel, so just return 0 */
1399}
1400
1401static int register_probe_event(struct trace_probe *tp)
1402{
1403 struct ftrace_event_call *call = &tp->call;
1404 int ret;
1405
1406 /* Initialize ftrace_event_call */
1407 if (probe_is_return(tp)) {
1408 tp->event.trace = print_kretprobe_event;
1409 call->raw_init = probe_event_raw_init;
1410 call->show_format = kretprobe_event_show_format;
1411 call->define_fields = kretprobe_event_define_fields;
1412 } else {
1413 tp->event.trace = print_kprobe_event;
1414 call->raw_init = probe_event_raw_init;
1415 call->show_format = kprobe_event_show_format;
1416 call->define_fields = kprobe_event_define_fields;
1417 }
1418 call->event = &tp->event;
1419 call->id = register_ftrace_event(&tp->event);
1420 if (!call->id)
1421 return -ENODEV;
1422 call->enabled = 0;
1423 call->regfunc = probe_event_enable;
1424 call->unregfunc = probe_event_disable;
1425
1426#ifdef CONFIG_EVENT_PROFILE
1427 atomic_set(&call->profile_count, -1);
1428 call->profile_enable = probe_profile_enable;
1429 call->profile_disable = probe_profile_disable;
1430#endif
1431 call->data = tp;
1432 ret = trace_add_event_call(call);
1433 if (ret) {
1434 pr_info("Failed to register kprobe event: %s\n", call->name);
1435 unregister_ftrace_event(&tp->event);
1436 }
1437 return ret;
1438}
1439
1440static void unregister_probe_event(struct trace_probe *tp)
1441{
1442 /* tp->event is unregistered in trace_remove_event_call() */
1443 trace_remove_event_call(&tp->call);
1444}
1445
1446/* Make a debugfs interface for controling probe points */
1447static __init int init_kprobe_trace(void)
1448{
1449 struct dentry *d_tracer;
1450 struct dentry *entry;
1451
1452 d_tracer = tracing_init_dentry();
1453 if (!d_tracer)
1454 return 0;
1455
1456 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1457 NULL, &kprobe_events_ops);
1458
1459 /* Event list interface */
1460 if (!entry)
1461 pr_warning("Could not create debugfs "
1462 "'kprobe_events' entry\n");
1463
1464 /* Profile interface */
1465 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1466 NULL, &kprobe_profile_ops);
1467
1468 if (!entry)
1469 pr_warning("Could not create debugfs "
1470 "'kprobe_profile' entry\n");
1471 return 0;
1472}
1473fs_initcall(init_kprobe_trace);
1474
1475
1476#ifdef CONFIG_FTRACE_STARTUP_TEST
1477
1478static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1479 int a4, int a5, int a6)
1480{
1481 return a1 + a2 + a3 + a4 + a5 + a6;
1482}
1483
1484static __init int kprobe_trace_self_tests_init(void)
1485{
1486 int ret;
1487 int (*target)(int, int, int, int, int, int);
1488
1489 target = kprobe_trace_selftest_target;
1490
1491 pr_info("Testing kprobe tracing: ");
1492
1493 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1494 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1495 if (WARN_ON_ONCE(ret))
1496 pr_warning("error enabling function entry\n");
1497
1498 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1499 "$retval");
1500 if (WARN_ON_ONCE(ret))
1501 pr_warning("error enabling function return\n");
1502
1503 ret = target(1, 2, 3, 4, 5, 6);
1504
1505 cleanup_all_probes();
1506
1507 pr_cont("OK\n");
1508 return 0;
1509}
1510
1511late_initcall(kprobe_trace_self_tests_init);
1512
1513#endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d00d1a8f1f26..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -354,13 +354,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
354 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 354 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
355} 355}
356 356
357int reg_event_syscall_enter(void *ptr) 357int reg_event_syscall_enter(struct ftrace_event_call *call)
358{ 358{
359 int ret = 0; 359 int ret = 0;
360 int num; 360 int num;
361 char *name; 361 char *name;
362 362
363 name = (char *)ptr; 363 name = (char *)call->data;
364 num = syscall_name_to_nr(name); 364 num = syscall_name_to_nr(name);
365 if (num < 0 || num >= NR_syscalls) 365 if (num < 0 || num >= NR_syscalls)
366 return -ENOSYS; 366 return -ENOSYS;
@@ -378,12 +378,12 @@ int reg_event_syscall_enter(void *ptr)
378 return ret; 378 return ret;
379} 379}
380 380
381void unreg_event_syscall_enter(void *ptr) 381void unreg_event_syscall_enter(struct ftrace_event_call *call)
382{ 382{
383 int num; 383 int num;
384 char *name; 384 char *name;
385 385
386 name = (char *)ptr; 386 name = (char *)call->data;
387 num = syscall_name_to_nr(name); 387 num = syscall_name_to_nr(name);
388 if (num < 0 || num >= NR_syscalls) 388 if (num < 0 || num >= NR_syscalls)
389 return; 389 return;
@@ -395,13 +395,13 @@ void unreg_event_syscall_enter(void *ptr)
395 mutex_unlock(&syscall_trace_lock); 395 mutex_unlock(&syscall_trace_lock);
396} 396}
397 397
398int reg_event_syscall_exit(void *ptr) 398int reg_event_syscall_exit(struct ftrace_event_call *call)
399{ 399{
400 int ret = 0; 400 int ret = 0;
401 int num; 401 int num;
402 char *name; 402 char *name;
403 403
404 name = (char *)ptr; 404 name = call->data;
405 num = syscall_name_to_nr(name); 405 num = syscall_name_to_nr(name);
406 if (num < 0 || num >= NR_syscalls) 406 if (num < 0 || num >= NR_syscalls)
407 return -ENOSYS; 407 return -ENOSYS;
@@ -419,12 +419,12 @@ int reg_event_syscall_exit(void *ptr)
419 return ret; 419 return ret;
420} 420}
421 421
422void unreg_event_syscall_exit(void *ptr) 422void unreg_event_syscall_exit(struct ftrace_event_call *call)
423{ 423{
424 int num; 424 int num;
425 char *name; 425 char *name;
426 426
427 name = (char *)ptr; 427 name = call->data;
428 num = syscall_name_to_nr(name); 428 num = syscall_name_to_nr(name);
429 if (num < 0 || num >= NR_syscalls) 429 if (num < 0 || num >= NR_syscalls)
430 return; 430 return;
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
477static void prof_syscall_enter(struct pt_regs *regs, long id) 477static void prof_syscall_enter(struct pt_regs *regs, long id)
478{ 478{
479 struct syscall_metadata *sys_data; 479 struct syscall_metadata *sys_data;
480 struct perf_trace_buf *trace_buf;
480 struct syscall_trace_enter *rec; 481 struct syscall_trace_enter *rec;
481 unsigned long flags; 482 unsigned long flags;
482 char *raw_data; 483 char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
507 cpu = smp_processor_id(); 508 cpu = smp_processor_id();
508 509
509 if (in_nmi()) 510 if (in_nmi())
510 raw_data = rcu_dereference(trace_profile_buf_nmi); 511 trace_buf = rcu_dereference(perf_trace_buf_nmi);
511 else 512 else
512 raw_data = rcu_dereference(trace_profile_buf); 513 trace_buf = rcu_dereference(perf_trace_buf);
513 514
514 if (!raw_data) 515 if (!trace_buf)
515 goto end; 516 goto end;
516 517
517 raw_data = per_cpu_ptr(raw_data, cpu); 518 trace_buf = per_cpu_ptr(trace_buf, cpu);
519
520 if (trace_buf->recursion++)
521 goto end_recursion;
522
523 /*
524 * Make recursion update visible before entering perf_tp_event
525 * so that we protect from perf recursions.
526 */
527 barrier();
528
529 raw_data = trace_buf->buf;
518 530
519 /* zero the dead bytes from align to not leak stack to user */ 531 /* zero the dead bytes from align to not leak stack to user */
520 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 532 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
527 (unsigned long *)&rec->args); 539 (unsigned long *)&rec->args);
528 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 540 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
529 541
542end_recursion:
543 trace_buf->recursion--;
530end: 544end:
531 local_irq_restore(flags); 545 local_irq_restore(flags);
532} 546}
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
574{ 588{
575 struct syscall_metadata *sys_data; 589 struct syscall_metadata *sys_data;
576 struct syscall_trace_exit *rec; 590 struct syscall_trace_exit *rec;
591 struct perf_trace_buf *trace_buf;
577 unsigned long flags; 592 unsigned long flags;
578 int syscall_nr; 593 int syscall_nr;
579 char *raw_data; 594 char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
605 cpu = smp_processor_id(); 620 cpu = smp_processor_id();
606 621
607 if (in_nmi()) 622 if (in_nmi())
608 raw_data = rcu_dereference(trace_profile_buf_nmi); 623 trace_buf = rcu_dereference(perf_trace_buf_nmi);
609 else 624 else
610 raw_data = rcu_dereference(trace_profile_buf); 625 trace_buf = rcu_dereference(perf_trace_buf);
611 626
612 if (!raw_data) 627 if (!trace_buf)
613 goto end; 628 goto end;
614 629
615 raw_data = per_cpu_ptr(raw_data, cpu); 630 trace_buf = per_cpu_ptr(trace_buf, cpu);
631
632 if (trace_buf->recursion++)
633 goto end_recursion;
634
635 /*
636 * Make recursion update visible before entering perf_tp_event
637 * so that we protect from perf recursions.
638 */
639 barrier();
640
641 raw_data = trace_buf->buf;
616 642
617 /* zero the dead bytes from align to not leak stack to user */ 643 /* zero the dead bytes from align to not leak stack to user */
618 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 644 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
626 652
627 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 653 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
628 654
655end_recursion:
656 trace_buf->recursion--;
629end: 657end:
630 local_irq_restore(flags); 658 local_irq_restore(flags);
631} 659}
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
new file mode 100644
index 000000000000..9270594e6dfd
--- /dev/null
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -0,0 +1,49 @@
1perf-probe(1)
2=============
3
4NAME
5----
6perf-probe - Define new dynamic tracepoints
7
8SYNOPSIS
9--------
10[verse]
11'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...]
12or
13'perf probe' [options] 'PROBE' ['PROBE' ...]
14
15
16DESCRIPTION
17-----------
18This command defines dynamic tracepoint events, by symbol and registers
19without debuginfo, or by C expressions (C line numbers, C function names,
20and C local variables) with debuginfo.
21
22
23OPTIONS
24-------
25-k::
26--vmlinux=PATH::
27 Specify vmlinux path which has debuginfo (Dwarf binary).
28
29-v::
30--verbose::
31 Be more verbose (show parsed arguments, etc).
32
33-a::
34--add::
35 Define a probe point (see PROBE SYNTAX for detail)
36
37PROBE SYNTAX
38------------
39Probe points are defined by following syntax.
40
41 "FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]"
42
43'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function.
44It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
45'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
46
47SEE ALSO
48--------
49linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 46a58a81c9ad..3dbb5c5bb8c6 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -337,6 +337,7 @@ LIB_FILE=libperf.a
337LIB_H += ../../include/linux/perf_event.h 337LIB_H += ../../include/linux/perf_event.h
338LIB_H += ../../include/linux/rbtree.h 338LIB_H += ../../include/linux/rbtree.h
339LIB_H += ../../include/linux/list.h 339LIB_H += ../../include/linux/list.h
340LIB_H += ../../include/linux/stringify.h
340LIB_H += util/include/linux/bitmap.h 341LIB_H += util/include/linux/bitmap.h
341LIB_H += util/include/linux/bitops.h 342LIB_H += util/include/linux/bitops.h
342LIB_H += util/include/linux/compiler.h 343LIB_H += util/include/linux/compiler.h
@@ -438,6 +439,7 @@ BUILTIN_OBJS += builtin-stat.o
438BUILTIN_OBJS += builtin-timechart.o 439BUILTIN_OBJS += builtin-timechart.o
439BUILTIN_OBJS += builtin-top.o 440BUILTIN_OBJS += builtin-top.o
440BUILTIN_OBJS += builtin-trace.o 441BUILTIN_OBJS += builtin-trace.o
442BUILTIN_OBJS += builtin-probe.o
441 443
442PERFLIBS = $(LIB_FILE) 444PERFLIBS = $(LIB_FILE)
443 445
@@ -469,6 +471,10 @@ ifeq ($(uname_S),Darwin)
469endif 471endif
470 472
471ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) 473ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
474ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
475 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
476endif
477
472 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) 478 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
473 BASIC_CFLAGS += -DLIBELF_NO_MMAP 479 BASIC_CFLAGS += -DLIBELF_NO_MMAP
474 endif 480 endif
@@ -476,6 +482,15 @@ else
476 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); 482 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
477endif 483endif
478 484
485ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
486 msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231);
487 BASIC_CFLAGS += -DNO_LIBDWARF
488else
489 EXTLIBS += -lelf -ldwarf
490 LIB_H += util/probe-finder.h
491 LIB_OBJS += util/probe-finder.o
492endif
493
479ifdef NO_DEMANGLE 494ifdef NO_DEMANGLE
480 BASIC_CFLAGS += -DNO_DEMANGLE 495 BASIC_CFLAGS += -DNO_DEMANGLE
481else 496else
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
new file mode 100644
index 000000000000..d78a3d945492
--- /dev/null
+++ b/tools/perf/builtin-probe.c
@@ -0,0 +1,435 @@
1/*
2 * builtin-probe.c
3 *
4 * Builtin probe command: Set up probe events by C expression
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23#define _GNU_SOURCE
24#include <sys/utsname.h>
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <fcntl.h>
28#include <errno.h>
29#include <stdio.h>
30#include <unistd.h>
31#include <stdlib.h>
32#include <string.h>
33
34#undef _GNU_SOURCE
35#include "perf.h"
36#include "builtin.h"
37#include "util/util.h"
38#include "util/event.h"
39#include "util/debug.h"
40#include "util/parse-options.h"
41#include "util/parse-events.h" /* For debugfs_path */
42#include "util/probe-finder.h"
43
44/* Default vmlinux search paths */
45#define NR_SEARCH_PATH 3
46const char *default_search_path[NR_SEARCH_PATH] = {
47"/lib/modules/%s/build/vmlinux", /* Custom build kernel */
48"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */
49"/boot/vmlinux-debug-%s", /* Ubuntu */
50};
51
52#define MAX_PATH_LEN 256
53#define MAX_PROBES 128
54#define MAX_PROBE_ARGS 128
55#define PERFPROBE_GROUP "probe"
56
57/* Session management structure */
58static struct {
59 char *vmlinux;
60 char *release;
61 int need_dwarf;
62 int nr_probe;
63 struct probe_point probes[MAX_PROBES];
64} session;
65
66#define semantic_error(msg ...) die("Semantic error :" msg)
67
68/* Parse probe point. Return 1 if return probe */
69static void parse_probe_point(char *arg, struct probe_point *pp)
70{
71 char *ptr, *tmp;
72 char c, nc = 0;
73 /*
74 * <Syntax>
75 * perf probe SRC:LN
76 * perf probe FUNC[+OFFS|%return][@SRC]
77 */
78
79 ptr = strpbrk(arg, ":+@%");
80 if (ptr) {
81 nc = *ptr;
82 *ptr++ = '\0';
83 }
84
85 /* Check arg is function or file and copy it */
86 if (strchr(arg, '.')) /* File */
87 pp->file = strdup(arg);
88 else /* Function */
89 pp->function = strdup(arg);
90 DIE_IF(pp->file == NULL && pp->function == NULL);
91
92 /* Parse other options */
93 while (ptr) {
94 arg = ptr;
95 c = nc;
96 ptr = strpbrk(arg, ":+@%");
97 if (ptr) {
98 nc = *ptr;
99 *ptr++ = '\0';
100 }
101 switch (c) {
102 case ':': /* Line number */
103 pp->line = strtoul(arg, &tmp, 0);
104 if (*tmp != '\0')
105 semantic_error("There is non-digit charactor"
106 " in line number.");
107 break;
108 case '+': /* Byte offset from a symbol */
109 pp->offset = strtoul(arg, &tmp, 0);
110 if (*tmp != '\0')
111 semantic_error("There is non-digit charactor"
112 " in offset.");
113 break;
114 case '@': /* File name */
115 if (pp->file)
116 semantic_error("SRC@SRC is not allowed.");
117 pp->file = strdup(arg);
118 DIE_IF(pp->file == NULL);
119 if (ptr)
120 semantic_error("@SRC must be the last "
121 "option.");
122 break;
123 case '%': /* Probe places */
124 if (strcmp(arg, "return") == 0) {
125 pp->retprobe = 1;
126 } else /* Others not supported yet */
127 semantic_error("%%%s is not supported.", arg);
128 break;
129 default:
130 DIE_IF("Program has a bug.");
131 break;
132 }
133 }
134
135 /* Exclusion check */
136 if (pp->line && pp->offset)
137 semantic_error("Offset can't be used with line number.");
138 if (!pp->line && pp->file && !pp->function)
139 semantic_error("File always requires line number.");
140 if (pp->offset && !pp->function)
141 semantic_error("Offset requires an entry function.");
142 if (pp->retprobe && !pp->function)
143 semantic_error("Return probe requires an entry function.");
144 if ((pp->offset || pp->line) && pp->retprobe)
145 semantic_error("Offset/Line can't be used with return probe.");
146
147 pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n",
148 pp->function, pp->file, pp->line, pp->offset, pp->retprobe);
149}
150
151/* Parse an event definition. Note that any error must die. */
152static void parse_probe_event(const char *str)
153{
154 char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */
155 int argc, i;
156 struct probe_point *pp = &session.probes[session.nr_probe];
157
158 pr_debug("probe-definition(%d): %s\n", session.nr_probe, str);
159 if (++session.nr_probe == MAX_PROBES)
160 semantic_error("Too many probes");
161
162 /* Separate arguments, similar to argv_split */
163 argc = 0;
164 do {
165 /* Skip separators */
166 while (isspace(*str))
167 str++;
168
169 /* Add an argument */
170 if (*str != '\0') {
171 const char *s = str;
172
173 /* Skip the argument */
174 while (!isspace(*str) && *str != '\0')
175 str++;
176
177 /* Duplicate the argument */
178 argv[argc] = strndup(s, str - s);
179 if (argv[argc] == NULL)
180 die("strndup");
181 if (++argc == MAX_PROBE_ARGS)
182 semantic_error("Too many arguments");
183 pr_debug("argv[%d]=%s\n", argc, argv[argc - 1]);
184 }
185 } while (*str != '\0');
186 if (!argc)
187 semantic_error("An empty argument.");
188
189 /* Parse probe point */
190 parse_probe_point(argv[0], pp);
191 free(argv[0]);
192 if (pp->file || pp->line)
193 session.need_dwarf = 1;
194
195 /* Copy arguments */
196 pp->nr_args = argc - 1;
197 if (pp->nr_args > 0) {
198 pp->args = (char **)malloc(sizeof(char *) * pp->nr_args);
199 if (!pp->args)
200 die("malloc");
201 memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args);
202 }
203
204 /* Ensure return probe has no C argument */
205 for (i = 0; i < pp->nr_args; i++)
206 if (is_c_varname(pp->args[i])) {
207 if (pp->retprobe)
208 semantic_error("You can't specify local"
209 " variable for kretprobe");
210 session.need_dwarf = 1;
211 }
212
213 pr_debug("%d arguments\n", pp->nr_args);
214}
215
216static int opt_add_probe_event(const struct option *opt __used,
217 const char *str, int unset __used)
218{
219 if (str)
220 parse_probe_event(str);
221 return 0;
222}
223
224#ifndef NO_LIBDWARF
225static int open_default_vmlinux(void)
226{
227 struct utsname uts;
228 char fname[MAX_PATH_LEN];
229 int fd, ret, i;
230
231 ret = uname(&uts);
232 if (ret) {
233 pr_debug("uname() failed.\n");
234 return -errno;
235 }
236 session.release = uts.release;
237 for (i = 0; i < NR_SEARCH_PATH; i++) {
238 ret = snprintf(fname, MAX_PATH_LEN,
239 default_search_path[i], session.release);
240 if (ret >= MAX_PATH_LEN || ret < 0) {
241 pr_debug("Filename(%d,%s) is too long.\n", i,
242 uts.release);
243 errno = E2BIG;
244 return -E2BIG;
245 }
246 pr_debug("try to open %s\n", fname);
247 fd = open(fname, O_RDONLY);
248 if (fd >= 0)
249 break;
250 }
251 return fd;
252}
253#endif
254
255static const char * const probe_usage[] = {
256 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
257 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
258 NULL
259};
260
261static const struct option options[] = {
262 OPT_BOOLEAN('v', "verbose", &verbose,
263 "be more verbose (show parsed arguments, etc)"),
264#ifndef NO_LIBDWARF
265 OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
266 "vmlinux/module pathname"),
267#endif
268 OPT_CALLBACK('a', "add", NULL,
269#ifdef NO_LIBDWARF
270 "FUNC[+OFFS|%return] [ARG ...]",
271#else
272 "FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]",
273#endif
274 "probe point definition, where\n"
275 "\t\tGRP:\tGroup name (optional)\n"
276 "\t\tNAME:\tEvent name\n"
277 "\t\tFUNC:\tFunction name\n"
278 "\t\tOFFS:\tOffset from function entry (in byte)\n"
279 "\t\t%return:\tPut the probe at function return\n"
280#ifdef NO_LIBDWARF
281 "\t\tARG:\tProbe argument (only \n"
282#else
283 "\t\tSRC:\tSource code path\n"
284 "\t\tRLN:\tRelative line number from function entry.\n"
285 "\t\tALN:\tAbsolute line number in file.\n"
286 "\t\tARG:\tProbe argument (local variable name or\n"
287#endif
288 "\t\t\tkprobe-tracer argument format is supported.)\n",
289 opt_add_probe_event),
290 OPT_END()
291};
292
293static int write_new_event(int fd, const char *buf)
294{
295 int ret;
296
297 ret = write(fd, buf, strlen(buf));
298 if (ret <= 0)
299 die("Failed to create event.");
300 else
301 printf("Added new event: %s\n", buf);
302
303 return ret;
304}
305
306#define MAX_CMDLEN 256
307
308static int synthesize_probe_event(struct probe_point *pp)
309{
310 char *buf;
311 int i, len, ret;
312 pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char));
313 if (!buf)
314 die("Failed to allocate memory by calloc.");
315 ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset);
316 if (ret <= 0 || ret >= MAX_CMDLEN)
317 goto error;
318 len = ret;
319
320 for (i = 0; i < pp->nr_args; i++) {
321 ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s",
322 pp->args[i]);
323 if (ret <= 0 || ret >= MAX_CMDLEN - len)
324 goto error;
325 len += ret;
326 }
327 pp->found = 1;
328 return pp->found;
329error:
330 free(pp->probes[0]);
331 if (ret > 0)
332 ret = -E2BIG;
333 return ret;
334}
335
336int cmd_probe(int argc, const char **argv, const char *prefix __used)
337{
338 int i, j, fd, ret;
339 struct probe_point *pp;
340 char buf[MAX_CMDLEN];
341
342 argc = parse_options(argc, argv, options, probe_usage,
343 PARSE_OPT_STOP_AT_NON_OPTION);
344 for (i = 0; i < argc; i++)
345 parse_probe_event(argv[i]);
346
347 if (session.nr_probe == 0)
348 usage_with_options(probe_usage, options);
349
350 if (session.need_dwarf)
351#ifdef NO_LIBDWARF
352 semantic_error("Debuginfo-analysis is not supported");
353#else /* !NO_LIBDWARF */
354 pr_info("Some probes require debuginfo.\n");
355
356 if (session.vmlinux)
357 fd = open(session.vmlinux, O_RDONLY);
358 else
359 fd = open_default_vmlinux();
360 if (fd < 0) {
361 if (session.need_dwarf)
362 die("Could not open vmlinux/module file.");
363
364 pr_warning("Could not open vmlinux/module file."
365 " Try to use symbols.\n");
366 goto end_dwarf;
367 }
368
369 /* Searching probe points */
370 for (j = 0; j < session.nr_probe; j++) {
371 pp = &session.probes[j];
372 if (pp->found)
373 continue;
374
375 lseek(fd, SEEK_SET, 0);
376 ret = find_probepoint(fd, pp);
377 if (ret < 0) {
378 if (session.need_dwarf)
379 die("Could not analyze debuginfo.");
380
381 pr_warning("An error occurred in debuginfo analysis. Try to use symbols.\n");
382 break;
383 }
384 if (ret == 0) /* No error but failed to find probe point. */
385 die("No probe point found.");
386 }
387 close(fd);
388
389end_dwarf:
390#endif /* !NO_LIBDWARF */
391
392 /* Synthesize probes without dwarf */
393 for (j = 0; j < session.nr_probe; j++) {
394 pp = &session.probes[j];
395 if (pp->found) /* This probe is already found. */
396 continue;
397
398 ret = synthesize_probe_event(pp);
399 if (ret == -E2BIG)
400 semantic_error("probe point is too long.");
401 else if (ret < 0)
402 die("Failed to synthesize a probe point.");
403 }
404
405 /* Settng up probe points */
406 snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path);
407 fd = open(buf, O_WRONLY, O_APPEND);
408 if (fd < 0) {
409 if (errno == ENOENT)
410 die("kprobe_events file does not exist - please rebuild with CONFIG_KPROBE_TRACER.");
411 else
412 die("Could not open kprobe_events file: %s",
413 strerror(errno));
414 }
415 for (j = 0; j < session.nr_probe; j++) {
416 pp = &session.probes[j];
417 if (pp->found == 1) {
418 snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n",
419 pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP,
420 pp->function, pp->offset, pp->probes[0]);
421 write_new_event(fd, buf);
422 } else
423 for (i = 0; i < pp->found; i++) {
424 snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n",
425 pp->retprobe ? 'r' : 'p',
426 PERFPROBE_GROUP,
427 pp->function, pp->offset, i,
428 pp->probes[0]);
429 write_new_event(fd, buf);
430 }
431 }
432 close(fd);
433 return 0;
434}
435
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index e97954a0a3d2..9b02d85091fe 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -27,5 +27,6 @@ extern int cmd_timechart(int argc, const char **argv, const char *prefix);
27extern int cmd_top(int argc, const char **argv, const char *prefix); 27extern int cmd_top(int argc, const char **argv, const char *prefix);
28extern int cmd_trace(int argc, const char **argv, const char *prefix); 28extern int cmd_trace(int argc, const char **argv, const char *prefix);
29extern int cmd_version(int argc, const char **argv, const char *prefix); 29extern int cmd_version(int argc, const char **argv, const char *prefix);
30extern int cmd_probe(int argc, const char **argv, const char *prefix);
30 31
31#endif 32#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index d37b16cf18ff..d3a6e18e4a5e 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -13,3 +13,4 @@ perf-stat mainporcelain common
13perf-timechart mainporcelain common 13perf-timechart mainporcelain common
14perf-top mainporcelain common 14perf-top mainporcelain common
15perf-trace mainporcelain common 15perf-trace mainporcelain common
16perf-probe mainporcelain common
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 53359ebb369a..89b82acac7d9 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -298,6 +298,7 @@ static void handle_internal_command(int argc, const char **argv)
298 { "version", cmd_version, 0 }, 298 { "version", cmd_version, 0 },
299 { "trace", cmd_trace, 0 }, 299 { "trace", cmd_trace, 0 },
300 { "sched", cmd_sched, 0 }, 300 { "sched", cmd_sched, 0 },
301 { "probe", cmd_probe, 0 },
301 }; 302 };
302 unsigned int i; 303 unsigned int i;
303 static const char ext[] = STRIP_EXTENSION; 304 static const char ext[] = STRIP_EXTENSION;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
new file mode 100644
index 000000000000..293cdfc1b8ca
--- /dev/null
+++ b/tools/perf/util/probe-finder.c
@@ -0,0 +1,732 @@
1/*
2 * probe-finder.c : C expression to kprobe event converter
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21
22#include <sys/utsname.h>
23#include <sys/types.h>
24#include <sys/stat.h>
25#include <fcntl.h>
26#include <errno.h>
27#include <stdio.h>
28#include <unistd.h>
29#include <getopt.h>
30#include <stdlib.h>
31#include <string.h>
32#include <stdarg.h>
33#include <ctype.h>
34
35#include "event.h"
36#include "debug.h"
37#include "util.h"
38#include "probe-finder.h"
39
40
41/* Dwarf_Die Linkage to parent Die */
42struct die_link {
43 struct die_link *parent; /* Parent die */
44 Dwarf_Die die; /* Current die */
45};
46
47static Dwarf_Debug __dw_debug;
48static Dwarf_Error __dw_error;
49
50/*
51 * Generic dwarf analysis helpers
52 */
53
54#define X86_32_MAX_REGS 8
55const char *x86_32_regs_table[X86_32_MAX_REGS] = {
56 "%ax",
57 "%cx",
58 "%dx",
59 "%bx",
60 "$stack", /* Stack address instead of %sp */
61 "%bp",
62 "%si",
63 "%di",
64};
65
66#define X86_64_MAX_REGS 16
67const char *x86_64_regs_table[X86_64_MAX_REGS] = {
68 "%ax",
69 "%dx",
70 "%cx",
71 "%bx",
72 "%si",
73 "%di",
74 "%bp",
75 "%sp",
76 "%r8",
77 "%r9",
78 "%r10",
79 "%r11",
80 "%r12",
81 "%r13",
82 "%r14",
83 "%r15",
84};
85
86/* TODO: switching by dwarf address size */
87#ifdef __x86_64__
88#define ARCH_MAX_REGS X86_64_MAX_REGS
89#define arch_regs_table x86_64_regs_table
90#else
91#define ARCH_MAX_REGS X86_32_MAX_REGS
92#define arch_regs_table x86_32_regs_table
93#endif
94
95/* Return architecture dependent register string (for kprobe-tracer) */
96static const char *get_arch_regstr(unsigned int n)
97{
98 return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
99}
100
101/*
102 * Compare the tail of two strings.
103 * Return 0 if whole of either string is same as another's tail part.
104 */
105static int strtailcmp(const char *s1, const char *s2)
106{
107 int i1 = strlen(s1);
108 int i2 = strlen(s2);
109 while (--i1 > 0 && --i2 > 0) {
110 if (s1[i1] != s2[i2])
111 return s1[i1] - s2[i2];
112 }
113 return 0;
114}
115
116/* Find the fileno of the target file. */
117static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
118{
119 Dwarf_Signed cnt, i;
120 Dwarf_Unsigned found = 0;
121 char **srcs;
122 int ret;
123
124 if (!fname)
125 return 0;
126
127 ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
128 if (ret == DW_DLV_OK) {
129 for (i = 0; i < cnt && !found; i++) {
130 if (strtailcmp(srcs[i], fname) == 0)
131 found = i + 1;
132 dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
133 }
134 for (; i < cnt; i++)
135 dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
136 dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
137 }
138 if (found)
139 pr_debug("found fno: %d\n", (int)found);
140 return found;
141}
142
143/* Compare diename and tname */
144static int die_compare_name(Dwarf_Die dw_die, const char *tname)
145{
146 char *name;
147 int ret;
148 ret = dwarf_diename(dw_die, &name, &__dw_error);
149 DIE_IF(ret == DW_DLV_ERROR);
150 if (ret == DW_DLV_OK) {
151 ret = strcmp(tname, name);
152 dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
153 } else
154 ret = -1;
155 return ret;
156}
157
158/* Check the address is in the subprogram(function). */
159static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr,
160 Dwarf_Signed *offs)
161{
162 Dwarf_Addr lopc, hipc;
163 int ret;
164
165 /* TODO: check ranges */
166 ret = dwarf_lowpc(sp_die, &lopc, &__dw_error);
167 DIE_IF(ret == DW_DLV_ERROR);
168 if (ret == DW_DLV_NO_ENTRY)
169 return 0;
170 ret = dwarf_highpc(sp_die, &hipc, &__dw_error);
171 DIE_IF(ret != DW_DLV_OK);
172 if (lopc <= addr && addr < hipc) {
173 *offs = addr - lopc;
174 return 1;
175 } else
176 return 0;
177}
178
179/* Check the die is inlined function */
180static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die)
181{
182 /* TODO: check strictly */
183 Dwarf_Bool inl;
184 int ret;
185
186 ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error);
187 DIE_IF(ret == DW_DLV_ERROR);
188 return inl;
189}
190
191/* Get the offset of abstruct_origin */
192static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die)
193{
194 Dwarf_Attribute attr;
195 Dwarf_Off cu_offs;
196 int ret;
197
198 ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error);
199 DIE_IF(ret != DW_DLV_OK);
200 ret = dwarf_formref(attr, &cu_offs, &__dw_error);
201 DIE_IF(ret != DW_DLV_OK);
202 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
203 return cu_offs;
204}
205
206/* Get entry pc(or low pc, 1st entry of ranges) of the die */
207static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die)
208{
209 Dwarf_Attribute attr;
210 Dwarf_Addr addr;
211 Dwarf_Off offs;
212 Dwarf_Ranges *ranges;
213 Dwarf_Signed cnt;
214 int ret;
215
216 /* Try to get entry pc */
217 ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error);
218 DIE_IF(ret == DW_DLV_ERROR);
219 if (ret == DW_DLV_OK) {
220 ret = dwarf_formaddr(attr, &addr, &__dw_error);
221 DIE_IF(ret != DW_DLV_OK);
222 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
223 return addr;
224 }
225
226 /* Try to get low pc */
227 ret = dwarf_lowpc(dw_die, &addr, &__dw_error);
228 DIE_IF(ret == DW_DLV_ERROR);
229 if (ret == DW_DLV_OK)
230 return addr;
231
232 /* Try to get ranges */
233 ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error);
234 DIE_IF(ret != DW_DLV_OK);
235 ret = dwarf_formref(attr, &offs, &__dw_error);
236 DIE_IF(ret != DW_DLV_OK);
237 ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL,
238 &__dw_error);
239 DIE_IF(ret != DW_DLV_OK);
240 addr = ranges[0].dwr_addr1;
241 dwarf_ranges_dealloc(__dw_debug, ranges, cnt);
242 return addr;
243}
244
245/*
246 * Search a Die from Die tree.
247 * Note: cur_link->die should be deallocated in this function.
248 */
249static int __search_die_tree(struct die_link *cur_link,
250 int (*die_cb)(struct die_link *, void *),
251 void *data)
252{
253 Dwarf_Die new_die;
254 struct die_link new_link;
255 int ret;
256
257 if (!die_cb)
258 return 0;
259
260 /* Check current die */
261 while (!(ret = die_cb(cur_link, data))) {
262 /* Check child die */
263 ret = dwarf_child(cur_link->die, &new_die, &__dw_error);
264 DIE_IF(ret == DW_DLV_ERROR);
265 if (ret == DW_DLV_OK) {
266 new_link.parent = cur_link;
267 new_link.die = new_die;
268 ret = __search_die_tree(&new_link, die_cb, data);
269 if (ret)
270 break;
271 }
272
273 /* Move to next sibling */
274 ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die,
275 &__dw_error);
276 DIE_IF(ret == DW_DLV_ERROR);
277 dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
278 cur_link->die = new_die;
279 if (ret == DW_DLV_NO_ENTRY)
280 return 0;
281 }
282 dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
283 return ret;
284}
285
286/* Search a die in its children's die tree */
287static int search_die_from_children(Dwarf_Die parent_die,
288 int (*die_cb)(struct die_link *, void *),
289 void *data)
290{
291 struct die_link new_link;
292 int ret;
293
294 new_link.parent = NULL;
295 ret = dwarf_child(parent_die, &new_link.die, &__dw_error);
296 DIE_IF(ret == DW_DLV_ERROR);
297 if (ret == DW_DLV_OK)
298 return __search_die_tree(&new_link, die_cb, data);
299 else
300 return 0;
301}
302
303/* Find a locdesc corresponding to the address */
304static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc,
305 Dwarf_Addr addr)
306{
307 Dwarf_Signed lcnt;
308 Dwarf_Locdesc **llbuf;
309 int ret, i;
310
311 ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error);
312 DIE_IF(ret != DW_DLV_OK);
313 ret = DW_DLV_NO_ENTRY;
314 for (i = 0; i < lcnt; ++i) {
315 if (llbuf[i]->ld_lopc <= addr &&
316 llbuf[i]->ld_hipc > addr) {
317 memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc));
318 desc->ld_s =
319 malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
320 DIE_IF(desc->ld_s == NULL);
321 memcpy(desc->ld_s, llbuf[i]->ld_s,
322 sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
323 ret = DW_DLV_OK;
324 break;
325 }
326 dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
327 dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
328 }
329 /* Releasing loop */
330 for (; i < lcnt; ++i) {
331 dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
332 dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
333 }
334 dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST);
335 return ret;
336}
337
338/* Get decl_file attribute value (file number) */
339static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die)
340{
341 Dwarf_Attribute attr;
342 Dwarf_Unsigned fno;
343 int ret;
344
345 ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error);
346 DIE_IF(ret != DW_DLV_OK);
347 dwarf_formudata(attr, &fno, &__dw_error);
348 DIE_IF(ret != DW_DLV_OK);
349 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
350 return fno;
351}
352
353/* Get decl_line attribute value (line number) */
354static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die)
355{
356 Dwarf_Attribute attr;
357 Dwarf_Unsigned lno;
358 int ret;
359
360 ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error);
361 DIE_IF(ret != DW_DLV_OK);
362 dwarf_formudata(attr, &lno, &__dw_error);
363 DIE_IF(ret != DW_DLV_OK);
364 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
365 return lno;
366}
367
368/*
369 * Probe finder related functions
370 */
371
372/* Show a location */
373static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
374{
375 Dwarf_Small op;
376 Dwarf_Unsigned regn;
377 Dwarf_Signed offs;
378 int deref = 0, ret;
379 const char *regs;
380
381 op = loc->lr_atom;
382
383 /* If this is based on frame buffer, set the offset */
384 if (op == DW_OP_fbreg) {
385 deref = 1;
386 offs = (Dwarf_Signed)loc->lr_number;
387 op = pf->fbloc.ld_s[0].lr_atom;
388 loc = &pf->fbloc.ld_s[0];
389 } else
390 offs = 0;
391
392 if (op >= DW_OP_breg0 && op <= DW_OP_breg31) {
393 regn = op - DW_OP_breg0;
394 offs += (Dwarf_Signed)loc->lr_number;
395 deref = 1;
396 } else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) {
397 regn = op - DW_OP_reg0;
398 } else if (op == DW_OP_bregx) {
399 regn = loc->lr_number;
400 offs += (Dwarf_Signed)loc->lr_number2;
401 deref = 1;
402 } else if (op == DW_OP_regx) {
403 regn = loc->lr_number;
404 } else
405 die("Dwarf_OP %d is not supported.\n", op);
406
407 regs = get_arch_regstr(regn);
408 if (!regs)
409 die("%lld exceeds max register number.\n", regn);
410
411 if (deref)
412 ret = snprintf(pf->buf, pf->len,
413 " %s=%+lld(%s)", pf->var, offs, regs);
414 else
415 ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs);
416 DIE_IF(ret < 0);
417 DIE_IF(ret >= pf->len);
418}
419
420/* Show a variables in kprobe event format */
421static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
422{
423 Dwarf_Attribute attr;
424 Dwarf_Locdesc ld;
425 int ret;
426
427 ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error);
428 if (ret != DW_DLV_OK)
429 goto error;
430 ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base));
431 if (ret != DW_DLV_OK)
432 goto error;
433 /* TODO? */
434 DIE_IF(ld.ld_cents != 1);
435 show_location(&ld.ld_s[0], pf);
436 free(ld.ld_s);
437 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
438 return ;
439error:
440 die("Failed to find the location of %s at this address.\n"
441 " Perhaps, it has been optimized out.\n", pf->var);
442}
443
444static int variable_callback(struct die_link *dlink, void *data)
445{
446 struct probe_finder *pf = (struct probe_finder *)data;
447 Dwarf_Half tag;
448 int ret;
449
450 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
451 DIE_IF(ret == DW_DLV_ERROR);
452 if ((tag == DW_TAG_formal_parameter ||
453 tag == DW_TAG_variable) &&
454 (die_compare_name(dlink->die, pf->var) == 0)) {
455 show_variable(dlink->die, pf);
456 return 1;
457 }
458 /* TODO: Support struct members and arrays */
459 return 0;
460}
461
462/* Find a variable in a subprogram die */
463static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
464{
465 int ret;
466
467 if (!is_c_varname(pf->var)) {
468 /* Output raw parameters */
469 ret = snprintf(pf->buf, pf->len, " %s", pf->var);
470 DIE_IF(ret < 0);
471 DIE_IF(ret >= pf->len);
472 return ;
473 }
474
475 pr_debug("Searching '%s' variable in context.\n", pf->var);
476 /* Search child die for local variables and parameters. */
477 ret = search_die_from_children(sp_die, variable_callback, pf);
478 if (!ret)
479 die("Failed to find '%s' in this function.\n", pf->var);
480}
481
482/* Get a frame base on the address */
483static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf)
484{
485 Dwarf_Attribute attr;
486 int ret;
487
488 ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error);
489 DIE_IF(ret != DW_DLV_OK);
490 ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base));
491 DIE_IF(ret != DW_DLV_OK);
492 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
493}
494
495static void free_current_frame_base(struct probe_finder *pf)
496{
497 free(pf->fbloc.ld_s);
498 memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc));
499}
500
501/* Show a probe point to output buffer */
502static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs,
503 struct probe_finder *pf)
504{
505 struct probe_point *pp = pf->pp;
506 char *name;
507 char tmp[MAX_PROBE_BUFFER];
508 int ret, i, len;
509
510 /* Output name of probe point */
511 ret = dwarf_diename(sp_die, &name, &__dw_error);
512 DIE_IF(ret == DW_DLV_ERROR);
513 if (ret == DW_DLV_OK) {
514 ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name,
515 (unsigned int)offs);
516 /* Copy the function name if possible */
517 if (!pp->function) {
518 pp->function = strdup(name);
519 pp->offset = offs;
520 }
521 dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
522 } else {
523 /* This function has no name. */
524 ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr);
525 if (!pp->function) {
526 /* TODO: Use _stext */
527 pp->function = strdup("");
528 pp->offset = (int)pf->addr;
529 }
530 }
531 DIE_IF(ret < 0);
532 DIE_IF(ret >= MAX_PROBE_BUFFER);
533 len = ret;
534 pr_debug("Probe point found: %s\n", tmp);
535
536 /* Find each argument */
537 get_current_frame_base(sp_die, pf);
538 for (i = 0; i < pp->nr_args; i++) {
539 pf->var = pp->args[i];
540 pf->buf = &tmp[len];
541 pf->len = MAX_PROBE_BUFFER - len;
542 find_variable(sp_die, pf);
543 len += strlen(pf->buf);
544 }
545 free_current_frame_base(pf);
546
547 pp->probes[pp->found] = strdup(tmp);
548 pp->found++;
549}
550
551static int probeaddr_callback(struct die_link *dlink, void *data)
552{
553 struct probe_finder *pf = (struct probe_finder *)data;
554 Dwarf_Half tag;
555 Dwarf_Signed offs;
556 int ret;
557
558 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
559 DIE_IF(ret == DW_DLV_ERROR);
560 /* Check the address is in this subprogram */
561 if (tag == DW_TAG_subprogram &&
562 die_within_subprogram(dlink->die, pf->addr, &offs)) {
563 show_probepoint(dlink->die, offs, pf);
564 return 1;
565 }
566 return 0;
567}
568
569/* Find probe point from its line number */
570static void find_by_line(struct probe_finder *pf)
571{
572 Dwarf_Signed cnt, i, clm;
573 Dwarf_Line *lines;
574 Dwarf_Unsigned lineno = 0;
575 Dwarf_Addr addr;
576 Dwarf_Unsigned fno;
577 int ret;
578
579 ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error);
580 DIE_IF(ret != DW_DLV_OK);
581
582 for (i = 0; i < cnt; i++) {
583 ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
584 DIE_IF(ret != DW_DLV_OK);
585 if (fno != pf->fno)
586 continue;
587
588 ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
589 DIE_IF(ret != DW_DLV_OK);
590 if (lineno != pf->lno)
591 continue;
592
593 ret = dwarf_lineoff(lines[i], &clm, &__dw_error);
594 DIE_IF(ret != DW_DLV_OK);
595
596 ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
597 DIE_IF(ret != DW_DLV_OK);
598 pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n",
599 (int)i, (unsigned)lineno, (int)clm, addr);
600 pf->addr = addr;
601 /* Search a real subprogram including this line, */
602 ret = search_die_from_children(pf->cu_die,
603 probeaddr_callback, pf);
604 if (ret == 0)
605 die("Probe point is not found in subprograms.\n");
606 /* Continuing, because target line might be inlined. */
607 }
608 dwarf_srclines_dealloc(__dw_debug, lines, cnt);
609}
610
611/* Search function from function name */
612static int probefunc_callback(struct die_link *dlink, void *data)
613{
614 struct probe_finder *pf = (struct probe_finder *)data;
615 struct probe_point *pp = pf->pp;
616 struct die_link *lk;
617 Dwarf_Signed offs;
618 Dwarf_Half tag;
619 int ret;
620
621 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
622 DIE_IF(ret == DW_DLV_ERROR);
623 if (tag == DW_TAG_subprogram) {
624 if (die_compare_name(dlink->die, pp->function) == 0) {
625 if (pp->line) { /* Function relative line */
626 pf->fno = die_get_decl_file(dlink->die);
627 pf->lno = die_get_decl_line(dlink->die)
628 + pp->line;
629 find_by_line(pf);
630 return 1;
631 }
632 if (die_inlined_subprogram(dlink->die)) {
633 /* Inlined function, save it. */
634 ret = dwarf_die_CU_offset(dlink->die,
635 &pf->inl_offs,
636 &__dw_error);
637 DIE_IF(ret != DW_DLV_OK);
638 pr_debug("inline definition offset %lld\n",
639 pf->inl_offs);
640 return 0; /* Continue to search */
641 }
642 /* Get probe address */
643 pf->addr = die_get_entrypc(dlink->die);
644 pf->addr += pp->offset;
645 /* TODO: Check the address in this function */
646 show_probepoint(dlink->die, pp->offset, pf);
647 return 1; /* Exit; no same symbol in this CU. */
648 }
649 } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) {
650 if (die_get_abstract_origin(dlink->die) == pf->inl_offs) {
651 /* Get probe address */
652 pf->addr = die_get_entrypc(dlink->die);
653 pf->addr += pp->offset;
654 pr_debug("found inline addr: 0x%llx\n", pf->addr);
655 /* Inlined function. Get a real subprogram */
656 for (lk = dlink->parent; lk != NULL; lk = lk->parent) {
657 tag = 0;
658 dwarf_tag(lk->die, &tag, &__dw_error);
659 DIE_IF(ret == DW_DLV_ERROR);
660 if (tag == DW_TAG_subprogram &&
661 !die_inlined_subprogram(lk->die))
662 goto found;
663 }
664 die("Failed to find real subprogram.\n");
665found:
666 /* Get offset from subprogram */
667 ret = die_within_subprogram(lk->die, pf->addr, &offs);
668 DIE_IF(!ret);
669 show_probepoint(lk->die, offs, pf);
670 /* Continue to search */
671 }
672 }
673 return 0;
674}
675
676static void find_by_func(struct probe_finder *pf)
677{
678 search_die_from_children(pf->cu_die, probefunc_callback, pf);
679}
680
681/* Find a probe point */
682int find_probepoint(int fd, struct probe_point *pp)
683{
684 Dwarf_Half addr_size = 0;
685 Dwarf_Unsigned next_cuh = 0;
686 int cu_number = 0, ret;
687 struct probe_finder pf = {.pp = pp};
688
689 ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
690 if (ret != DW_DLV_OK) {
691 pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n");
692 return -ENOENT;
693 }
694
695 pp->found = 0;
696 while (++cu_number) {
697 /* Search CU (Compilation Unit) */
698 ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
699 &addr_size, &next_cuh, &__dw_error);
700 DIE_IF(ret == DW_DLV_ERROR);
701 if (ret == DW_DLV_NO_ENTRY)
702 break;
703
704 /* Get the DIE(Debugging Information Entry) of this CU */
705 ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error);
706 DIE_IF(ret != DW_DLV_OK);
707
708 /* Check if target file is included. */
709 if (pp->file)
710 pf.fno = cu_find_fileno(pf.cu_die, pp->file);
711
712 if (!pp->file || pf.fno) {
713 /* Save CU base address (for frame_base) */
714 ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error);
715 DIE_IF(ret == DW_DLV_ERROR);
716 if (ret == DW_DLV_NO_ENTRY)
717 pf.cu_base = 0;
718 if (pp->function)
719 find_by_func(&pf);
720 else {
721 pf.lno = pp->line;
722 find_by_line(&pf);
723 }
724 }
725 dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
726 }
727 ret = dwarf_finish(__dw_debug, &__dw_error);
728 DIE_IF(ret != DW_DLV_OK);
729
730 return pp->found;
731}
732
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
new file mode 100644
index 000000000000..bdebca6697d2
--- /dev/null
+++ b/tools/perf/util/probe-finder.h
@@ -0,0 +1,57 @@
1#ifndef _PROBE_FINDER_H
2#define _PROBE_FINDER_H
3
4#define MAX_PATH_LEN 256
5#define MAX_PROBE_BUFFER 1024
6#define MAX_PROBES 128
7
8static inline int is_c_varname(const char *name)
9{
10 /* TODO */
11 return isalpha(name[0]) || name[0] == '_';
12}
13
14struct probe_point {
15 /* Inputs */
16 char *file; /* File name */
17 int line; /* Line number */
18
19 char *function; /* Function name */
20 int offset; /* Offset bytes */
21
22 int nr_args; /* Number of arguments */
23 char **args; /* Arguments */
24
25 int retprobe; /* Return probe */
26
27 /* Output */
28 int found; /* Number of found probe points */
29 char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
30};
31
32#ifndef NO_LIBDWARF
33extern int find_probepoint(int fd, struct probe_point *pp);
34
35#include <libdwarf/dwarf.h>
36#include <libdwarf/libdwarf.h>
37
38struct probe_finder {
39 struct probe_point *pp; /* Target probe point */
40
41 /* For function searching */
42 Dwarf_Addr addr; /* Address */
43 Dwarf_Unsigned fno; /* File number */
44 Dwarf_Unsigned lno; /* Line number */
45 Dwarf_Off inl_offs; /* Inline offset */
46 Dwarf_Die cu_die; /* Current CU */
47
48 /* For variable searching */
49 Dwarf_Addr cu_base; /* Current CU base address */
50 Dwarf_Locdesc fbloc; /* Location of Current Frame Base */
51 const char *var; /* Current variable name */
52 char *buf; /* Current output buffer */
53 int len; /* Length of output buffer */
54};
55#endif /* NO_LIBDWARF */
56
57#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7bd5bdaeb235..f2203a0946bc 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -134,6 +134,15 @@ extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1,
134extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); 134extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
135extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); 135extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
136 136
137#include "../../../include/linux/stringify.h"
138
139#define DIE_IF(cnd) \
140 do { if (cnd) \
141 die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \
142 __stringify(cnd) "\n"); \
143 } while (0)
144
145
137extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); 146extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
138 147
139extern int prefixcmp(const char *str, const char *prefix); 148extern int prefixcmp(const char *str, const char *prefix);