diff options
45 files changed, 6190 insertions, 288 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt new file mode 100644 index 000000000000..47aabeebbdf6 --- /dev/null +++ b/Documentation/trace/kprobetrace.txt | |||
@@ -0,0 +1,149 @@ | |||
1 | Kprobe-based Event Tracing | ||
2 | ========================== | ||
3 | |||
4 | Documentation is written by Masami Hiramatsu | ||
5 | |||
6 | |||
7 | Overview | ||
8 | -------- | ||
9 | These events are similar to tracepoint based events. Instead of Tracepoint, | ||
10 | this is based on kprobes (kprobe and kretprobe). So it can probe wherever | ||
11 | kprobes can probe (this means, all functions body except for __kprobes | ||
12 | functions). Unlike the Tracepoint based event, this can be added and removed | ||
13 | dynamically, on the fly. | ||
14 | |||
15 | To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y. | ||
16 | |||
17 | Similar to the events tracer, this doesn't need to be activated via | ||
18 | current_tracer. Instead of that, add probe points via | ||
19 | /sys/kernel/debug/tracing/kprobe_events, and enable it via | ||
20 | /sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled. | ||
21 | |||
22 | |||
23 | Synopsis of kprobe_events | ||
24 | ------------------------- | ||
25 | p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe | ||
26 | r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe | ||
27 | |||
28 | GRP : Group name. If omitted, use "kprobes" for it. | ||
29 | EVENT : Event name. If omitted, the event name is generated | ||
30 | based on SYMBOL+offs or MEMADDR. | ||
31 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. | ||
32 | MEMADDR : Address where the probe is inserted. | ||
33 | |||
34 | FETCHARGS : Arguments. Each probe can have up to 128 args. | ||
35 | %REG : Fetch register REG | ||
36 | @ADDR : Fetch memory at ADDR (ADDR should be in kernel) | ||
37 | @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) | ||
38 | $stackN : Fetch Nth entry of stack (N >= 0) | ||
39 | $stack : Fetch stack address. | ||
40 | $argN : Fetch function argument. (N >= 0)(*) | ||
41 | $retval : Fetch return value.(**) | ||
42 | +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) | ||
43 | NAME=FETCHARG: Set NAME as the argument name of FETCHARG. | ||
44 | |||
45 | (*) aN may not correct on asmlinkaged functions and at the middle of | ||
46 | function body. | ||
47 | (**) only for return probe. | ||
48 | (***) this is useful for fetching a field of data structures. | ||
49 | |||
50 | |||
51 | Per-Probe Event Filtering | ||
52 | ------------------------- | ||
53 | Per-probe event filtering feature allows you to set different filter on each | ||
54 | probe and gives you what arguments will be shown in trace buffer. If an event | ||
55 | name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event | ||
56 | under tracing/events/kprobes/<EVENT>, at the directory you can see 'id', | ||
57 | 'enabled', 'format' and 'filter'. | ||
58 | |||
59 | enabled: | ||
60 | You can enable/disable the probe by writing 1 or 0 on it. | ||
61 | |||
62 | format: | ||
63 | This shows the format of this probe event. | ||
64 | |||
65 | filter: | ||
66 | You can write filtering rules of this event. | ||
67 | |||
68 | id: | ||
69 | This shows the id of this probe event. | ||
70 | |||
71 | |||
72 | Event Profiling | ||
73 | --------------- | ||
74 | You can check the total number of probe hits and probe miss-hits via | ||
75 | /sys/kernel/debug/tracing/kprobe_profile. | ||
76 | The first column is event name, the second is the number of probe hits, | ||
77 | the third is the number of probe miss-hits. | ||
78 | |||
79 | |||
80 | Usage examples | ||
81 | -------------- | ||
82 | To add a probe as a new event, write a new definition to kprobe_events | ||
83 | as below. | ||
84 | |||
85 | echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events | ||
86 | |||
87 | This sets a kprobe on the top of do_sys_open() function with recording | ||
88 | 1st to 4th arguments as "myprobe" event. As this example shows, users can | ||
89 | choose more familiar names for each arguments. | ||
90 | |||
91 | echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events | ||
92 | |||
93 | This sets a kretprobe on the return point of do_sys_open() function with | ||
94 | recording return value as "myretprobe" event. | ||
95 | You can see the format of these events via | ||
96 | /sys/kernel/debug/tracing/events/kprobes/<EVENT>/format. | ||
97 | |||
98 | cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format | ||
99 | name: myprobe | ||
100 | ID: 75 | ||
101 | format: | ||
102 | field:unsigned short common_type; offset:0; size:2; | ||
103 | field:unsigned char common_flags; offset:2; size:1; | ||
104 | field:unsigned char common_preempt_count; offset:3; size:1; | ||
105 | field:int common_pid; offset:4; size:4; | ||
106 | field:int common_tgid; offset:8; size:4; | ||
107 | |||
108 | field: unsigned long ip; offset:16;tsize:8; | ||
109 | field: int nargs; offset:24;tsize:4; | ||
110 | field: unsigned long dfd; offset:32;tsize:8; | ||
111 | field: unsigned long filename; offset:40;tsize:8; | ||
112 | field: unsigned long flags; offset:48;tsize:8; | ||
113 | field: unsigned long mode; offset:56;tsize:8; | ||
114 | |||
115 | print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode | ||
116 | |||
117 | |||
118 | You can see that the event has 4 arguments as in the expressions you specified. | ||
119 | |||
120 | echo > /sys/kernel/debug/tracing/kprobe_events | ||
121 | |||
122 | This clears all probe points. | ||
123 | |||
124 | Right after definition, each event is disabled by default. For tracing these | ||
125 | events, you need to enable it. | ||
126 | |||
127 | echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable | ||
128 | echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable | ||
129 | |||
130 | And you can see the traced information via /sys/kernel/debug/tracing/trace. | ||
131 | |||
132 | cat /sys/kernel/debug/tracing/trace | ||
133 | # tracer: nop | ||
134 | # | ||
135 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
136 | # | | | | | | ||
137 | <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 | ||
138 | <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe | ||
139 | <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 | ||
140 | <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 | ||
141 | <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 | ||
142 | <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 | ||
143 | |||
144 | |||
145 | Each line shows when the kernel hits an event, and <- SYMBOL means kernel | ||
146 | returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel | ||
147 | returns from do_sys_open to sys_open+0x1b). | ||
148 | |||
149 | |||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6bb..7d0b681a132b 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST | |||
186 | config HAVE_MMIOTRACE_SUPPORT | 186 | config HAVE_MMIOTRACE_SUPPORT |
187 | def_bool y | 187 | def_bool y |
188 | 188 | ||
189 | config X86_DECODER_SELFTEST | ||
190 | bool "x86 instruction decoder selftest" | ||
191 | depends on DEBUG_KERNEL | ||
192 | ---help--- | ||
193 | Perform x86 instruction decoder selftests at build time. | ||
194 | This option is useful for checking the sanity of x86 instruction | ||
195 | decoder code. | ||
196 | If unsure, say "N". | ||
197 | |||
189 | # | 198 | # |
190 | # IO delay types: | 199 | # IO delay types: |
191 | # | 200 | # |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2d24c9ee64d..78b32be55e9e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -155,6 +155,9 @@ all: bzImage | |||
155 | KBUILD_IMAGE := $(boot)/bzImage | 155 | KBUILD_IMAGE := $(boot)/bzImage |
156 | 156 | ||
157 | bzImage: vmlinux | 157 | bzImage: vmlinux |
158 | ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) | ||
159 | $(Q)$(MAKE) $(build)=arch/x86/tools posttest | ||
160 | endif | ||
158 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) | 161 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) |
159 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot | 162 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot |
160 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ | 163 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ |
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 000000000000..205b063e3e32 --- /dev/null +++ b/arch/x86/include/asm/inat.h | |||
@@ -0,0 +1,220 @@ | |||
1 | #ifndef _ASM_X86_INAT_H | ||
2 | #define _ASM_X86_INAT_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | #include <asm/inat_types.h> | ||
24 | |||
25 | /* | ||
26 | * Internal bits. Don't use bitmasks directly, because these bits are | ||
27 | * unstable. You should use checking functions. | ||
28 | */ | ||
29 | |||
30 | #define INAT_OPCODE_TABLE_SIZE 256 | ||
31 | #define INAT_GROUP_TABLE_SIZE 8 | ||
32 | |||
33 | /* Legacy last prefixes */ | ||
34 | #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ | ||
35 | #define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ | ||
36 | #define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ | ||
37 | /* Other Legacy prefixes */ | ||
38 | #define INAT_PFX_LOCK 4 /* 0xF0 */ | ||
39 | #define INAT_PFX_CS 5 /* 0x2E */ | ||
40 | #define INAT_PFX_DS 6 /* 0x3E */ | ||
41 | #define INAT_PFX_ES 7 /* 0x26 */ | ||
42 | #define INAT_PFX_FS 8 /* 0x64 */ | ||
43 | #define INAT_PFX_GS 9 /* 0x65 */ | ||
44 | #define INAT_PFX_SS 10 /* 0x36 */ | ||
45 | #define INAT_PFX_ADDRSZ 11 /* 0x67 */ | ||
46 | /* x86-64 REX prefix */ | ||
47 | #define INAT_PFX_REX 12 /* 0x4X */ | ||
48 | /* AVX VEX prefixes */ | ||
49 | #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ | ||
50 | #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ | ||
51 | |||
52 | #define INAT_LSTPFX_MAX 3 | ||
53 | #define INAT_LGCPFX_MAX 11 | ||
54 | |||
55 | /* Immediate size */ | ||
56 | #define INAT_IMM_BYTE 1 | ||
57 | #define INAT_IMM_WORD 2 | ||
58 | #define INAT_IMM_DWORD 3 | ||
59 | #define INAT_IMM_QWORD 4 | ||
60 | #define INAT_IMM_PTR 5 | ||
61 | #define INAT_IMM_VWORD32 6 | ||
62 | #define INAT_IMM_VWORD 7 | ||
63 | |||
64 | /* Legacy prefix */ | ||
65 | #define INAT_PFX_OFFS 0 | ||
66 | #define INAT_PFX_BITS 4 | ||
67 | #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) | ||
68 | #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) | ||
69 | /* Escape opcodes */ | ||
70 | #define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) | ||
71 | #define INAT_ESC_BITS 2 | ||
72 | #define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) | ||
73 | #define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) | ||
74 | /* Group opcodes (1-16) */ | ||
75 | #define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) | ||
76 | #define INAT_GRP_BITS 5 | ||
77 | #define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) | ||
78 | #define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) | ||
79 | /* Immediates */ | ||
80 | #define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) | ||
81 | #define INAT_IMM_BITS 3 | ||
82 | #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) | ||
83 | /* Flags */ | ||
84 | #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) | ||
85 | #define INAT_MODRM (1 << (INAT_FLAG_OFFS)) | ||
86 | #define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) | ||
87 | #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) | ||
88 | #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) | ||
89 | #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) | ||
90 | #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) | ||
91 | #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) | ||
92 | /* Attribute making macros for attribute tables */ | ||
93 | #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) | ||
94 | #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) | ||
95 | #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) | ||
96 | #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) | ||
97 | |||
98 | /* Attribute search APIs */ | ||
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | ||
101 | insn_byte_t last_pfx, | ||
102 | insn_attr_t esc_attr); | ||
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | ||
104 | insn_byte_t last_pfx, | ||
105 | insn_attr_t esc_attr); | ||
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | ||
107 | insn_byte_t vex_m, | ||
108 | insn_byte_t vex_pp); | ||
109 | |||
110 | /* Attribute checking functions */ | ||
111 | static inline int inat_is_legacy_prefix(insn_attr_t attr) | ||
112 | { | ||
113 | attr &= INAT_PFX_MASK; | ||
114 | return attr && attr <= INAT_LGCPFX_MAX; | ||
115 | } | ||
116 | |||
117 | static inline int inat_is_address_size_prefix(insn_attr_t attr) | ||
118 | { | ||
119 | return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; | ||
120 | } | ||
121 | |||
122 | static inline int inat_is_operand_size_prefix(insn_attr_t attr) | ||
123 | { | ||
124 | return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; | ||
125 | } | ||
126 | |||
127 | static inline int inat_is_rex_prefix(insn_attr_t attr) | ||
128 | { | ||
129 | return (attr & INAT_PFX_MASK) == INAT_PFX_REX; | ||
130 | } | ||
131 | |||
132 | static inline int inat_last_prefix_id(insn_attr_t attr) | ||
133 | { | ||
134 | if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) | ||
135 | return 0; | ||
136 | else | ||
137 | return attr & INAT_PFX_MASK; | ||
138 | } | ||
139 | |||
140 | static inline int inat_is_vex_prefix(insn_attr_t attr) | ||
141 | { | ||
142 | attr &= INAT_PFX_MASK; | ||
143 | return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; | ||
144 | } | ||
145 | |||
146 | static inline int inat_is_vex3_prefix(insn_attr_t attr) | ||
147 | { | ||
148 | return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; | ||
149 | } | ||
150 | |||
151 | static inline int inat_is_escape(insn_attr_t attr) | ||
152 | { | ||
153 | return attr & INAT_ESC_MASK; | ||
154 | } | ||
155 | |||
156 | static inline int inat_escape_id(insn_attr_t attr) | ||
157 | { | ||
158 | return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; | ||
159 | } | ||
160 | |||
161 | static inline int inat_is_group(insn_attr_t attr) | ||
162 | { | ||
163 | return attr & INAT_GRP_MASK; | ||
164 | } | ||
165 | |||
166 | static inline int inat_group_id(insn_attr_t attr) | ||
167 | { | ||
168 | return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; | ||
169 | } | ||
170 | |||
171 | static inline int inat_group_common_attribute(insn_attr_t attr) | ||
172 | { | ||
173 | return attr & ~INAT_GRP_MASK; | ||
174 | } | ||
175 | |||
176 | static inline int inat_has_immediate(insn_attr_t attr) | ||
177 | { | ||
178 | return attr & INAT_IMM_MASK; | ||
179 | } | ||
180 | |||
181 | static inline int inat_immediate_size(insn_attr_t attr) | ||
182 | { | ||
183 | return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; | ||
184 | } | ||
185 | |||
186 | static inline int inat_has_modrm(insn_attr_t attr) | ||
187 | { | ||
188 | return attr & INAT_MODRM; | ||
189 | } | ||
190 | |||
191 | static inline int inat_is_force64(insn_attr_t attr) | ||
192 | { | ||
193 | return attr & INAT_FORCE64; | ||
194 | } | ||
195 | |||
196 | static inline int inat_has_second_immediate(insn_attr_t attr) | ||
197 | { | ||
198 | return attr & INAT_SCNDIMM; | ||
199 | } | ||
200 | |||
201 | static inline int inat_has_moffset(insn_attr_t attr) | ||
202 | { | ||
203 | return attr & INAT_MOFFSET; | ||
204 | } | ||
205 | |||
206 | static inline int inat_has_variant(insn_attr_t attr) | ||
207 | { | ||
208 | return attr & INAT_VARIANT; | ||
209 | } | ||
210 | |||
211 | static inline int inat_accept_vex(insn_attr_t attr) | ||
212 | { | ||
213 | return attr & INAT_VEXOK; | ||
214 | } | ||
215 | |||
216 | static inline int inat_must_vex(insn_attr_t attr) | ||
217 | { | ||
218 | return attr & INAT_VEXONLY; | ||
219 | } | ||
220 | #endif | ||
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/arch/x86/include/asm/inat_types.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef _ASM_X86_INAT_TYPES_H | ||
2 | #define _ASM_X86_INAT_TYPES_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | /* Instruction attributes */ | ||
25 | typedef unsigned int insn_attr_t; | ||
26 | typedef unsigned char insn_byte_t; | ||
27 | typedef signed int insn_value_t; | ||
28 | |||
29 | #endif | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 000000000000..96c2e0ad04ca --- /dev/null +++ b/arch/x86/include/asm/insn.h | |||
@@ -0,0 +1,184 @@ | |||
1 | #ifndef _ASM_X86_INSN_H | ||
2 | #define _ASM_X86_INSN_H | ||
3 | /* | ||
4 | * x86 instruction analysis | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2009 | ||
21 | */ | ||
22 | |||
23 | /* insn_attr_t is defined in inat.h */ | ||
24 | #include <asm/inat.h> | ||
25 | |||
26 | struct insn_field { | ||
27 | union { | ||
28 | insn_value_t value; | ||
29 | insn_byte_t bytes[4]; | ||
30 | }; | ||
31 | /* !0 if we've run insn_get_xxx() for this field */ | ||
32 | unsigned char got; | ||
33 | unsigned char nbytes; | ||
34 | }; | ||
35 | |||
36 | struct insn { | ||
37 | struct insn_field prefixes; /* | ||
38 | * Prefixes | ||
39 | * prefixes.bytes[3]: last prefix | ||
40 | */ | ||
41 | struct insn_field rex_prefix; /* REX prefix */ | ||
42 | struct insn_field vex_prefix; /* VEX prefix */ | ||
43 | struct insn_field opcode; /* | ||
44 | * opcode.bytes[0]: opcode1 | ||
45 | * opcode.bytes[1]: opcode2 | ||
46 | * opcode.bytes[2]: opcode3 | ||
47 | */ | ||
48 | struct insn_field modrm; | ||
49 | struct insn_field sib; | ||
50 | struct insn_field displacement; | ||
51 | union { | ||
52 | struct insn_field immediate; | ||
53 | struct insn_field moffset1; /* for 64bit MOV */ | ||
54 | struct insn_field immediate1; /* for 64bit imm or off16/32 */ | ||
55 | }; | ||
56 | union { | ||
57 | struct insn_field moffset2; /* for 64bit MOV */ | ||
58 | struct insn_field immediate2; /* for 64bit imm or seg16 */ | ||
59 | }; | ||
60 | |||
61 | insn_attr_t attr; | ||
62 | unsigned char opnd_bytes; | ||
63 | unsigned char addr_bytes; | ||
64 | unsigned char length; | ||
65 | unsigned char x86_64; | ||
66 | |||
67 | const insn_byte_t *kaddr; /* kernel address of insn to analyze */ | ||
68 | const insn_byte_t *next_byte; | ||
69 | }; | ||
70 | |||
71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | ||
72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | ||
73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | ||
74 | |||
75 | #define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) | ||
76 | #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) | ||
77 | #define X86_SIB_BASE(sib) ((sib) & 0x07) | ||
78 | |||
79 | #define X86_REX_W(rex) ((rex) & 8) | ||
80 | #define X86_REX_R(rex) ((rex) & 4) | ||
81 | #define X86_REX_X(rex) ((rex) & 2) | ||
82 | #define X86_REX_B(rex) ((rex) & 1) | ||
83 | |||
84 | /* VEX bit flags */ | ||
85 | #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ | ||
86 | #define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ | ||
87 | #define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ | ||
88 | #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ | ||
89 | #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ | ||
90 | /* VEX bit fields */ | ||
91 | #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ | ||
92 | #define X86_VEX2_M 1 /* VEX2.M always 1 */ | ||
93 | #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ | ||
94 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | ||
95 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | ||
96 | |||
97 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
98 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
99 | { | ||
100 | return insn->prefixes.bytes[3]; | ||
101 | } | ||
102 | |||
103 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | ||
104 | extern void insn_get_prefixes(struct insn *insn); | ||
105 | extern void insn_get_opcode(struct insn *insn); | ||
106 | extern void insn_get_modrm(struct insn *insn); | ||
107 | extern void insn_get_sib(struct insn *insn); | ||
108 | extern void insn_get_displacement(struct insn *insn); | ||
109 | extern void insn_get_immediate(struct insn *insn); | ||
110 | extern void insn_get_length(struct insn *insn); | ||
111 | |||
112 | /* Attribute will be determined after getting ModRM (for opcode groups) */ | ||
113 | static inline void insn_get_attribute(struct insn *insn) | ||
114 | { | ||
115 | insn_get_modrm(insn); | ||
116 | } | ||
117 | |||
118 | /* Instruction uses RIP-relative addressing */ | ||
119 | extern int insn_rip_relative(struct insn *insn); | ||
120 | |||
121 | /* Init insn for kernel text */ | ||
122 | static inline void kernel_insn_init(struct insn *insn, const void *kaddr) | ||
123 | { | ||
124 | #ifdef CONFIG_X86_64 | ||
125 | insn_init(insn, kaddr, 1); | ||
126 | #else /* CONFIG_X86_32 */ | ||
127 | insn_init(insn, kaddr, 0); | ||
128 | #endif | ||
129 | } | ||
130 | |||
131 | static inline int insn_is_avx(struct insn *insn) | ||
132 | { | ||
133 | if (!insn->prefixes.got) | ||
134 | insn_get_prefixes(insn); | ||
135 | return (insn->vex_prefix.value != 0); | ||
136 | } | ||
137 | |||
138 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | ||
139 | { | ||
140 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
141 | return X86_VEX2_M; | ||
142 | else | ||
143 | return X86_VEX3_M(insn->vex_prefix.bytes[1]); | ||
144 | } | ||
145 | |||
146 | static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | ||
147 | { | ||
148 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
149 | return X86_VEX_P(insn->vex_prefix.bytes[1]); | ||
150 | else | ||
151 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | ||
152 | } | ||
153 | |||
154 | /* Offset of each field from kaddr */ | ||
155 | static inline int insn_offset_rex_prefix(struct insn *insn) | ||
156 | { | ||
157 | return insn->prefixes.nbytes; | ||
158 | } | ||
159 | static inline int insn_offset_vex_prefix(struct insn *insn) | ||
160 | { | ||
161 | return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; | ||
162 | } | ||
163 | static inline int insn_offset_opcode(struct insn *insn) | ||
164 | { | ||
165 | return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; | ||
166 | } | ||
167 | static inline int insn_offset_modrm(struct insn *insn) | ||
168 | { | ||
169 | return insn_offset_opcode(insn) + insn->opcode.nbytes; | ||
170 | } | ||
171 | static inline int insn_offset_sib(struct insn *insn) | ||
172 | { | ||
173 | return insn_offset_modrm(insn) + insn->modrm.nbytes; | ||
174 | } | ||
175 | static inline int insn_offset_displacement(struct insn *insn) | ||
176 | { | ||
177 | return insn_offset_sib(insn) + insn->sib.nbytes; | ||
178 | } | ||
179 | static inline int insn_offset_immediate(struct insn *insn) | ||
180 | { | ||
181 | return insn_offset_displacement(insn) + insn->displacement.nbytes; | ||
182 | } | ||
183 | |||
184 | #endif /* _ASM_X86_INSN_H */ | ||
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349aa..a3d49dd7d26e 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
9 | #include <asm/segment.h> | 9 | #include <asm/segment.h> |
10 | #include <asm/page_types.h> | ||
10 | #endif | 11 | #endif |
11 | 12 | ||
12 | #ifndef __ASSEMBLY__ | 13 | #ifndef __ASSEMBLY__ |
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) | |||
216 | return regs->sp; | 217 | return regs->sp; |
217 | } | 218 | } |
218 | 219 | ||
220 | /* Query offset/name of register from its name/offset */ | ||
221 | extern int regs_query_register_offset(const char *name); | ||
222 | extern const char *regs_query_register_name(unsigned int offset); | ||
223 | #define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) | ||
224 | |||
225 | /** | ||
226 | * regs_get_register() - get register value from its offset | ||
227 | * @regs: pt_regs from which register value is gotten. | ||
228 | * @offset: offset number of the register. | ||
229 | * | ||
230 | * regs_get_register returns the value of a register whose offset from @regs | ||
231 | * is @offset. The @offset is the offset of the register in struct pt_regs. | ||
232 | * If @offset is bigger than MAX_REG_OFFSET, this returns 0. | ||
233 | */ | ||
234 | static inline unsigned long regs_get_register(struct pt_regs *regs, | ||
235 | unsigned int offset) | ||
236 | { | ||
237 | if (unlikely(offset > MAX_REG_OFFSET)) | ||
238 | return 0; | ||
239 | return *(unsigned long *)((unsigned long)regs + offset); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * regs_within_kernel_stack() - check the address in the stack | ||
244 | * @regs: pt_regs which contains kernel stack pointer. | ||
245 | * @addr: address which is checked. | ||
246 | * | ||
247 | * regs_within_kenel_stack() checks @addr is within the kernel stack page(s). | ||
248 | * If @addr is within the kernel stack, it returns true. If not, returns false. | ||
249 | */ | ||
250 | static inline int regs_within_kernel_stack(struct pt_regs *regs, | ||
251 | unsigned long addr) | ||
252 | { | ||
253 | return ((addr & ~(THREAD_SIZE - 1)) == | ||
254 | (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * regs_get_kernel_stack_nth() - get Nth entry of the stack | ||
259 | * @regs: pt_regs which contains kernel stack pointer. | ||
260 | * @n: stack entry number. | ||
261 | * | ||
262 | * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which | ||
263 | * is specifined by @regs. If the @n th entry is NOT in the kernel stack, | ||
264 | * this returns 0. | ||
265 | */ | ||
266 | static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, | ||
267 | unsigned int n) | ||
268 | { | ||
269 | unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); | ||
270 | addr += n; | ||
271 | if (regs_within_kernel_stack(regs, (unsigned long)addr)) | ||
272 | return *addr; | ||
273 | else | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | /* Get Nth argument at function call */ | ||
278 | extern unsigned long regs_get_argument_nth(struct pt_regs *regs, | ||
279 | unsigned int n); | ||
280 | |||
219 | /* | 281 | /* |
220 | * These are defined as per linux/ptrace.h, which see. | 282 | * These are defined as per linux/ptrace.h, which see. |
221 | */ | 283 | */ |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 7d52e9da5e0c..50b9c220e121 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork) | |||
334 | END(ret_from_fork) | 334 | END(ret_from_fork) |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * Interrupt exit functions should be protected against kprobes | ||
338 | */ | ||
339 | .pushsection .kprobes.text, "ax" | ||
340 | /* | ||
337 | * Return to user mode is not as complex as all this looks, | 341 | * Return to user mode is not as complex as all this looks, |
338 | * but we want the default path for a system call return to | 342 | * but we want the default path for a system call return to |
339 | * go as quickly as possible which is why some of this is | 343 | * go as quickly as possible which is why some of this is |
@@ -383,6 +387,10 @@ need_resched: | |||
383 | END(resume_kernel) | 387 | END(resume_kernel) |
384 | #endif | 388 | #endif |
385 | CFI_ENDPROC | 389 | CFI_ENDPROC |
390 | /* | ||
391 | * End of kprobes section | ||
392 | */ | ||
393 | .popsection | ||
386 | 394 | ||
387 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 395 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
388 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 396 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
@@ -513,6 +521,10 @@ sysexit_audit: | |||
513 | PTGS_TO_GS_EX | 521 | PTGS_TO_GS_EX |
514 | ENDPROC(ia32_sysenter_target) | 522 | ENDPROC(ia32_sysenter_target) |
515 | 523 | ||
524 | /* | ||
525 | * syscall stub including irq exit should be protected against kprobes | ||
526 | */ | ||
527 | .pushsection .kprobes.text, "ax" | ||
516 | # system call handler stub | 528 | # system call handler stub |
517 | ENTRY(system_call) | 529 | ENTRY(system_call) |
518 | RING0_INT_FRAME # can't unwind into user space anyway | 530 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -705,6 +717,10 @@ syscall_badsys: | |||
705 | jmp resume_userspace | 717 | jmp resume_userspace |
706 | END(syscall_badsys) | 718 | END(syscall_badsys) |
707 | CFI_ENDPROC | 719 | CFI_ENDPROC |
720 | /* | ||
721 | * End of kprobes section | ||
722 | */ | ||
723 | .popsection | ||
708 | 724 | ||
709 | /* | 725 | /* |
710 | * System calls that need a pt_regs pointer. | 726 | * System calls that need a pt_regs pointer. |
@@ -814,6 +830,10 @@ common_interrupt: | |||
814 | ENDPROC(common_interrupt) | 830 | ENDPROC(common_interrupt) |
815 | CFI_ENDPROC | 831 | CFI_ENDPROC |
816 | 832 | ||
833 | /* | ||
834 | * Irq entries should be protected against kprobes | ||
835 | */ | ||
836 | .pushsection .kprobes.text, "ax" | ||
817 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 837 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
818 | ENTRY(name) \ | 838 | ENTRY(name) \ |
819 | RING0_INT_FRAME; \ | 839 | RING0_INT_FRAME; \ |
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) | |||
980 | jmp error_code | 1000 | jmp error_code |
981 | CFI_ENDPROC | 1001 | CFI_ENDPROC |
982 | END(spurious_interrupt_bug) | 1002 | END(spurious_interrupt_bug) |
1003 | /* | ||
1004 | * End of kprobes section | ||
1005 | */ | ||
1006 | .popsection | ||
983 | 1007 | ||
984 | ENTRY(kernel_thread_helper) | 1008 | ENTRY(kernel_thread_helper) |
985 | pushl $0 # fake return address for unwinder | 1009 | pushl $0 # fake return address for unwinder |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bd5bbddddf91..722df1b1152d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -803,6 +803,10 @@ END(interrupt) | |||
803 | call \func | 803 | call \func |
804 | .endm | 804 | .endm |
805 | 805 | ||
806 | /* | ||
807 | * Interrupt entry/exit should be protected against kprobes | ||
808 | */ | ||
809 | .pushsection .kprobes.text, "ax" | ||
806 | /* | 810 | /* |
807 | * The interrupt stubs push (~vector+0x80) onto the stack and | 811 | * The interrupt stubs push (~vector+0x80) onto the stack and |
808 | * then jump to common_interrupt. | 812 | * then jump to common_interrupt. |
@@ -941,6 +945,10 @@ ENTRY(retint_kernel) | |||
941 | 945 | ||
942 | CFI_ENDPROC | 946 | CFI_ENDPROC |
943 | END(common_interrupt) | 947 | END(common_interrupt) |
948 | /* | ||
949 | * End of kprobes section | ||
950 | */ | ||
951 | .popsection | ||
944 | 952 | ||
945 | /* | 953 | /* |
946 | * APIC interrupts. | 954 | * APIC interrupts. |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..c5f1f117e0c0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,12 +48,14 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
51 | 52 | ||
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 55 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | #include <asm/insn.h> | ||
57 | 59 | ||
58 | void jprobe_return_end(void); | 60 | void jprobe_return_end(void); |
59 | 61 | ||
@@ -106,50 +108,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 108 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 109 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 110 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 111 | #undef W |
154 | 112 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 113 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -244,6 +202,75 @@ retry: | |||
244 | } | 202 | } |
245 | } | 203 | } |
246 | 204 | ||
205 | /* Recover the probed instruction at addr for further analysis. */ | ||
206 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
207 | { | ||
208 | struct kprobe *kp; | ||
209 | kp = get_kprobe((void *)addr); | ||
210 | if (!kp) | ||
211 | return -EINVAL; | ||
212 | |||
213 | /* | ||
214 | * Basically, kp->ainsn.insn has an original instruction. | ||
215 | * However, RIP-relative instruction can not do single-stepping | ||
216 | * at different place, fix_riprel() tweaks the displacement of | ||
217 | * that instruction. In that case, we can't recover the instruction | ||
218 | * from the kp->ainsn.insn. | ||
219 | * | ||
220 | * On the other hand, kp->opcode has a copy of the first byte of | ||
221 | * the probed instruction, which is overwritten by int3. And | ||
222 | * the instruction at kp->addr is not modified by kprobes except | ||
223 | * for the first byte, we can recover the original instruction | ||
224 | * from it and kp->opcode. | ||
225 | */ | ||
226 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
227 | buf[0] = kp->opcode; | ||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | /* Dummy buffers for kallsyms_lookup */ | ||
232 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
233 | |||
234 | /* Check if paddr is at an instruction boundary */ | ||
235 | static int __kprobes can_probe(unsigned long paddr) | ||
236 | { | ||
237 | int ret; | ||
238 | unsigned long addr, offset = 0; | ||
239 | struct insn insn; | ||
240 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
241 | |||
242 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
243 | return 0; | ||
244 | |||
245 | /* Decode instructions */ | ||
246 | addr = paddr - offset; | ||
247 | while (addr < paddr) { | ||
248 | kernel_insn_init(&insn, (void *)addr); | ||
249 | insn_get_opcode(&insn); | ||
250 | |||
251 | /* | ||
252 | * Check if the instruction has been modified by another | ||
253 | * kprobe, in which case we replace the breakpoint by the | ||
254 | * original instruction in our buffer. | ||
255 | */ | ||
256 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
257 | ret = recover_probed_instruction(buf, addr); | ||
258 | if (ret) | ||
259 | /* | ||
260 | * Another debugging subsystem might insert | ||
261 | * this breakpoint. In that case, we can't | ||
262 | * recover it. | ||
263 | */ | ||
264 | return 0; | ||
265 | kernel_insn_init(&insn, buf); | ||
266 | } | ||
267 | insn_get_length(&insn); | ||
268 | addr += insn.length; | ||
269 | } | ||
270 | |||
271 | return (addr == paddr); | ||
272 | } | ||
273 | |||
247 | /* | 274 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 275 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 276 | */ |
@@ -277,68 +304,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
277 | static void __kprobes fix_riprel(struct kprobe *p) | 304 | static void __kprobes fix_riprel(struct kprobe *p) |
278 | { | 305 | { |
279 | #ifdef CONFIG_X86_64 | 306 | #ifdef CONFIG_X86_64 |
280 | u8 *insn = p->ainsn.insn; | 307 | struct insn insn; |
281 | s64 disp; | 308 | kernel_insn_init(&insn, p->ainsn.insn); |
282 | int need_modrm; | ||
283 | |||
284 | /* Skip legacy instruction prefixes. */ | ||
285 | while (1) { | ||
286 | switch (*insn) { | ||
287 | case 0x66: | ||
288 | case 0x67: | ||
289 | case 0x2e: | ||
290 | case 0x3e: | ||
291 | case 0x26: | ||
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | ||
301 | break; | ||
302 | } | ||
303 | 309 | ||
304 | /* Skip REX instruction prefix. */ | 310 | if (insn_rip_relative(&insn)) { |
305 | if (is_REX_prefix(insn)) | 311 | s64 newdisp; |
306 | ++insn; | 312 | u8 *disp; |
307 | 313 | insn_get_displacement(&insn); | |
308 | if (*insn == 0x0f) { | 314 | /* |
309 | /* Two-byte opcode. */ | 315 | * The copied instruction uses the %rip-relative addressing |
310 | ++insn; | 316 | * mode. Adjust the displacement for the difference between |
311 | need_modrm = test_bit(*insn, | 317 | * the original location of this instruction and the location |
312 | (unsigned long *)twobyte_has_modrm); | 318 | * of the copy that will actually be run. The tricky bit here |
313 | } else | 319 | * is making sure that the sign extension happens correctly in |
314 | /* One-byte opcode. */ | 320 | * this calculation, since we need a signed 32-bit result to |
315 | need_modrm = test_bit(*insn, | 321 | * be sign-extended to 64 bits when it's added to the %rip |
316 | (unsigned long *)onebyte_has_modrm); | 322 | * value and yield the same 64-bit result that the sign- |
317 | 323 | * extension of the original signed 32-bit displacement would | |
318 | if (need_modrm) { | 324 | * have given. |
319 | u8 modrm = *++insn; | 325 | */ |
320 | if ((modrm & 0xc7) == 0x05) { | 326 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - |
321 | /* %rip+disp32 addressing mode */ | 327 | (u8 *) p->ainsn.insn; |
322 | /* Displacement follows ModRM byte. */ | 328 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
323 | ++insn; | 329 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); |
324 | /* | 330 | *(s32 *) disp = (s32) newdisp; |
325 | * The copied instruction uses the %rip-relative | ||
326 | * addressing mode. Adjust the displacement for the | ||
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 331 | } |
343 | #endif | 332 | #endif |
344 | } | 333 | } |
@@ -359,6 +348,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 348 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 349 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 350 | { |
351 | if (!can_probe((unsigned long)p->addr)) | ||
352 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 353 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 354 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 355 | if (!p->ainsn.insn) |
@@ -472,17 +463,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 463 | { |
473 | switch (kcb->kprobe_status) { | 464 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 465 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 466 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | 467 | save_previous_kprobe(kcb); |
488 | set_current_kprobe(p, regs, kcb); | 468 | set_current_kprobe(p, regs, kcb); |
@@ -491,18 +471,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
491 | kcb->kprobe_status = KPROBE_REENTER; | 471 | kcb->kprobe_status = KPROBE_REENTER; |
492 | break; | 472 | break; |
493 | case KPROBE_HIT_SS: | 473 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 474 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 475 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 476 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 477 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 478 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 479 | */ |
500 | * to, or just after, single-stepping of a probed | 480 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 481 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 482 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 483 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 484 | default: |
507 | /* impossible cases */ | 485 | /* impossible cases */ |
508 | WARN_ON(1); | 486 | WARN_ON(1); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66a..c4f76d275ee4 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -49,6 +49,118 @@ enum x86_regset { | |||
49 | REGSET_IOPERM32, | 49 | REGSET_IOPERM32, |
50 | }; | 50 | }; |
51 | 51 | ||
52 | struct pt_regs_offset { | ||
53 | const char *name; | ||
54 | int offset; | ||
55 | }; | ||
56 | |||
57 | #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} | ||
58 | #define REG_OFFSET_END {.name = NULL, .offset = 0} | ||
59 | |||
60 | static const struct pt_regs_offset regoffset_table[] = { | ||
61 | #ifdef CONFIG_X86_64 | ||
62 | REG_OFFSET_NAME(r15), | ||
63 | REG_OFFSET_NAME(r14), | ||
64 | REG_OFFSET_NAME(r13), | ||
65 | REG_OFFSET_NAME(r12), | ||
66 | REG_OFFSET_NAME(r11), | ||
67 | REG_OFFSET_NAME(r10), | ||
68 | REG_OFFSET_NAME(r9), | ||
69 | REG_OFFSET_NAME(r8), | ||
70 | #endif | ||
71 | REG_OFFSET_NAME(bx), | ||
72 | REG_OFFSET_NAME(cx), | ||
73 | REG_OFFSET_NAME(dx), | ||
74 | REG_OFFSET_NAME(si), | ||
75 | REG_OFFSET_NAME(di), | ||
76 | REG_OFFSET_NAME(bp), | ||
77 | REG_OFFSET_NAME(ax), | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | REG_OFFSET_NAME(ds), | ||
80 | REG_OFFSET_NAME(es), | ||
81 | REG_OFFSET_NAME(fs), | ||
82 | REG_OFFSET_NAME(gs), | ||
83 | #endif | ||
84 | REG_OFFSET_NAME(orig_ax), | ||
85 | REG_OFFSET_NAME(ip), | ||
86 | REG_OFFSET_NAME(cs), | ||
87 | REG_OFFSET_NAME(flags), | ||
88 | REG_OFFSET_NAME(sp), | ||
89 | REG_OFFSET_NAME(ss), | ||
90 | REG_OFFSET_END, | ||
91 | }; | ||
92 | |||
93 | /** | ||
94 | * regs_query_register_offset() - query register offset from its name | ||
95 | * @name: the name of a register | ||
96 | * | ||
97 | * regs_query_register_offset() returns the offset of a register in struct | ||
98 | * pt_regs from its name. If the name is invalid, this returns -EINVAL; | ||
99 | */ | ||
100 | int regs_query_register_offset(const char *name) | ||
101 | { | ||
102 | const struct pt_regs_offset *roff; | ||
103 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
104 | if (!strcmp(roff->name, name)) | ||
105 | return roff->offset; | ||
106 | return -EINVAL; | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * regs_query_register_name() - query register name from its offset | ||
111 | * @offset: the offset of a register in struct pt_regs. | ||
112 | * | ||
113 | * regs_query_register_name() returns the name of a register from its | ||
114 | * offset in struct pt_regs. If the @offset is invalid, this returns NULL; | ||
115 | */ | ||
116 | const char *regs_query_register_name(unsigned int offset) | ||
117 | { | ||
118 | const struct pt_regs_offset *roff; | ||
119 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
120 | if (roff->offset == offset) | ||
121 | return roff->name; | ||
122 | return NULL; | ||
123 | } | ||
124 | |||
125 | static const int arg_offs_table[] = { | ||
126 | #ifdef CONFIG_X86_32 | ||
127 | [0] = offsetof(struct pt_regs, ax), | ||
128 | [1] = offsetof(struct pt_regs, dx), | ||
129 | [2] = offsetof(struct pt_regs, cx) | ||
130 | #else /* CONFIG_X86_64 */ | ||
131 | [0] = offsetof(struct pt_regs, di), | ||
132 | [1] = offsetof(struct pt_regs, si), | ||
133 | [2] = offsetof(struct pt_regs, dx), | ||
134 | [3] = offsetof(struct pt_regs, cx), | ||
135 | [4] = offsetof(struct pt_regs, r8), | ||
136 | [5] = offsetof(struct pt_regs, r9) | ||
137 | #endif | ||
138 | }; | ||
139 | |||
140 | /** | ||
141 | * regs_get_argument_nth() - get Nth argument at function call | ||
142 | * @regs: pt_regs which contains registers at function entry. | ||
143 | * @n: argument number. | ||
144 | * | ||
145 | * regs_get_argument_nth() returns @n th argument of a function call. | ||
146 | * Since usually the kernel stack will be changed right after function entry, | ||
147 | * you must use this at function entry. If the @n th entry is NOT in the | ||
148 | * kernel stack or pt_regs, this returns 0. | ||
149 | */ | ||
150 | unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) | ||
151 | { | ||
152 | if (n < ARRAY_SIZE(arg_offs_table)) | ||
153 | return *(unsigned long *)((char *)regs + arg_offs_table[n]); | ||
154 | else { | ||
155 | /* | ||
156 | * The typical case: arg n is on the stack. | ||
157 | * (Note: stack[0] = return address, so skip it) | ||
158 | */ | ||
159 | n -= ARRAY_SIZE(arg_offs_table); | ||
160 | return regs_get_kernel_stack_nth(regs, 1 + n); | ||
161 | } | ||
162 | } | ||
163 | |||
52 | /* | 164 | /* |
53 | * does not yet catch signals sent when the child dies. | 165 | * does not yet catch signals sent when the child dies. |
54 | * in exit.c or in signal.c. | 166 | * in exit.c or in signal.c. |
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 000000000000..8df89f0a3fe6 --- /dev/null +++ b/arch/x86/lib/.gitignore | |||
@@ -0,0 +1 @@ | |||
inat-tables.c | |||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 85f5db95c60f..a2d6472895fb 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -2,12 +2,25 @@ | |||
2 | # Makefile for x86 specific library files. | 2 | # Makefile for x86 specific library files. |
3 | # | 3 | # |
4 | 4 | ||
5 | inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk | ||
6 | inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt | ||
7 | quiet_cmd_inat_tables = GEN $@ | ||
8 | cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ | ||
9 | |||
10 | $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) | ||
11 | $(call cmd,inat_tables) | ||
12 | |||
13 | $(obj)/inat.o: $(obj)/inat-tables.c | ||
14 | |||
15 | clean-files := inat-tables.c | ||
16 | |||
5 | obj-$(CONFIG_SMP) := msr.o | 17 | obj-$(CONFIG_SMP) := msr.o |
6 | 18 | ||
7 | lib-y := delay.o | 19 | lib-y := delay.o |
8 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
10 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-y += insn.o inat.o | ||
11 | 24 | ||
12 | obj-y += msr-reg.o msr-reg-export.o | 25 | obj-y += msr-reg.o msr-reg-export.o |
13 | 26 | ||
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 000000000000..46fc4ee09fc4 --- /dev/null +++ b/arch/x86/lib/inat.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * x86 instruction attribute tables | ||
3 | * | ||
4 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | */ | ||
21 | #include <asm/insn.h> | ||
22 | |||
23 | /* Attribute tables are generated from opcode map */ | ||
24 | #include "inat-tables.c" | ||
25 | |||
26 | /* Attribute search APIs */ | ||
27 | insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | ||
28 | { | ||
29 | return inat_primary_table[opcode]; | ||
30 | } | ||
31 | |||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | ||
33 | insn_attr_t esc_attr) | ||
34 | { | ||
35 | const insn_attr_t *table; | ||
36 | insn_attr_t lpfx_attr; | ||
37 | int n, m = 0; | ||
38 | |||
39 | n = inat_escape_id(esc_attr); | ||
40 | if (last_pfx) { | ||
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | ||
45 | if (!table) | ||
46 | return 0; | ||
47 | if (inat_has_variant(table[opcode]) && m) { | ||
48 | table = inat_escape_tables[n][m]; | ||
49 | if (!table) | ||
50 | return 0; | ||
51 | } | ||
52 | return table[opcode]; | ||
53 | } | ||
54 | |||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | ||
56 | insn_attr_t grp_attr) | ||
57 | { | ||
58 | const insn_attr_t *table; | ||
59 | insn_attr_t lpfx_attr; | ||
60 | int n, m = 0; | ||
61 | |||
62 | n = inat_group_id(grp_attr); | ||
63 | if (last_pfx) { | ||
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | ||
68 | if (!table) | ||
69 | return inat_group_common_attribute(grp_attr); | ||
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | ||
71 | table = inat_group_tables[n][m]; | ||
72 | if (!table) | ||
73 | return inat_group_common_attribute(grp_attr); | ||
74 | } | ||
75 | return table[X86_MODRM_REG(modrm)] | | ||
76 | inat_group_common_attribute(grp_attr); | ||
77 | } | ||
78 | |||
79 | insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, | ||
80 | insn_byte_t vex_p) | ||
81 | { | ||
82 | const insn_attr_t *table; | ||
83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) | ||
84 | return 0; | ||
85 | table = inat_avx_tables[vex_m][vex_p]; | ||
86 | if (!table) | ||
87 | return 0; | ||
88 | return table[opcode]; | ||
89 | } | ||
90 | |||
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..9f33b984d0ef --- /dev/null +++ b/arch/x86/lib/insn.c | |||
@@ -0,0 +1,516 @@ | |||
1 | /* | ||
2 | * x86 instruction analysis | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/string.h> | ||
22 | #include <asm/inat.h> | ||
23 | #include <asm/insn.h> | ||
24 | |||
25 | #define get_next(t, insn) \ | ||
26 | ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | ||
27 | |||
28 | #define peek_next(t, insn) \ | ||
29 | ({t r; r = *(t*)insn->next_byte; r; }) | ||
30 | |||
31 | #define peek_nbyte_next(t, insn, n) \ | ||
32 | ({t r; r = *(t*)((insn)->next_byte + n); r; }) | ||
33 | |||
34 | /** | ||
35 | * insn_init() - initialize struct insn | ||
36 | * @insn: &struct insn to be initialized | ||
37 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | ||
38 | * @x86_64: !0 for 64-bit kernel or 64-bit app | ||
39 | */ | ||
40 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | ||
41 | { | ||
42 | memset(insn, 0, sizeof(*insn)); | ||
43 | insn->kaddr = kaddr; | ||
44 | insn->next_byte = kaddr; | ||
45 | insn->x86_64 = x86_64 ? 1 : 0; | ||
46 | insn->opnd_bytes = 4; | ||
47 | if (x86_64) | ||
48 | insn->addr_bytes = 8; | ||
49 | else | ||
50 | insn->addr_bytes = 4; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * insn_get_prefixes - scan x86 instruction prefix bytes | ||
55 | * @insn: &struct insn containing instruction | ||
56 | * | ||
57 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | ||
58 | * to point to the (first) opcode. No effect if @insn->prefixes.got | ||
59 | * is already set. | ||
60 | */ | ||
61 | void insn_get_prefixes(struct insn *insn) | ||
62 | { | ||
63 | struct insn_field *prefixes = &insn->prefixes; | ||
64 | insn_attr_t attr; | ||
65 | insn_byte_t b, lb; | ||
66 | int i, nb; | ||
67 | |||
68 | if (prefixes->got) | ||
69 | return; | ||
70 | |||
71 | nb = 0; | ||
72 | lb = 0; | ||
73 | b = peek_next(insn_byte_t, insn); | ||
74 | attr = inat_get_opcode_attribute(b); | ||
75 | while (inat_is_legacy_prefix(attr)) { | ||
76 | /* Skip if same prefix */ | ||
77 | for (i = 0; i < nb; i++) | ||
78 | if (prefixes->bytes[i] == b) | ||
79 | goto found; | ||
80 | if (nb == 4) | ||
81 | /* Invalid instruction */ | ||
82 | break; | ||
83 | prefixes->bytes[nb++] = b; | ||
84 | if (inat_is_address_size_prefix(attr)) { | ||
85 | /* address size switches 2/4 or 4/8 */ | ||
86 | if (insn->x86_64) | ||
87 | insn->addr_bytes ^= 12; | ||
88 | else | ||
89 | insn->addr_bytes ^= 6; | ||
90 | } else if (inat_is_operand_size_prefix(attr)) { | ||
91 | /* oprand size switches 2/4 */ | ||
92 | insn->opnd_bytes ^= 6; | ||
93 | } | ||
94 | found: | ||
95 | prefixes->nbytes++; | ||
96 | insn->next_byte++; | ||
97 | lb = b; | ||
98 | b = peek_next(insn_byte_t, insn); | ||
99 | attr = inat_get_opcode_attribute(b); | ||
100 | } | ||
101 | /* Set the last prefix */ | ||
102 | if (lb && lb != insn->prefixes.bytes[3]) { | ||
103 | if (unlikely(insn->prefixes.bytes[3])) { | ||
104 | /* Swap the last prefix */ | ||
105 | b = insn->prefixes.bytes[3]; | ||
106 | for (i = 0; i < nb; i++) | ||
107 | if (prefixes->bytes[i] == lb) | ||
108 | prefixes->bytes[i] = b; | ||
109 | } | ||
110 | insn->prefixes.bytes[3] = lb; | ||
111 | } | ||
112 | |||
113 | /* Decode REX prefix */ | ||
114 | if (insn->x86_64) { | ||
115 | b = peek_next(insn_byte_t, insn); | ||
116 | attr = inat_get_opcode_attribute(b); | ||
117 | if (inat_is_rex_prefix(attr)) { | ||
118 | insn->rex_prefix.value = b; | ||
119 | insn->rex_prefix.nbytes = 1; | ||
120 | insn->next_byte++; | ||
121 | if (X86_REX_W(b)) | ||
122 | /* REX.W overrides opnd_size */ | ||
123 | insn->opnd_bytes = 8; | ||
124 | } | ||
125 | } | ||
126 | insn->rex_prefix.got = 1; | ||
127 | |||
128 | /* Decode VEX prefix */ | ||
129 | b = peek_next(insn_byte_t, insn); | ||
130 | attr = inat_get_opcode_attribute(b); | ||
131 | if (inat_is_vex_prefix(attr)) { | ||
132 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | ||
133 | if (!insn->x86_64) { | ||
134 | /* | ||
135 | * In 32-bits mode, if the [7:6] bits (mod bits of | ||
136 | * ModRM) on the second byte are not 11b, it is | ||
137 | * LDS or LES. | ||
138 | */ | ||
139 | if (X86_MODRM_MOD(b2) != 3) | ||
140 | goto vex_end; | ||
141 | } | ||
142 | insn->vex_prefix.bytes[0] = b; | ||
143 | insn->vex_prefix.bytes[1] = b2; | ||
144 | if (inat_is_vex3_prefix(attr)) { | ||
145 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | ||
146 | insn->vex_prefix.bytes[2] = b2; | ||
147 | insn->vex_prefix.nbytes = 3; | ||
148 | insn->next_byte += 3; | ||
149 | if (insn->x86_64 && X86_VEX_W(b2)) | ||
150 | /* VEX.W overrides opnd_size */ | ||
151 | insn->opnd_bytes = 8; | ||
152 | } else { | ||
153 | insn->vex_prefix.nbytes = 2; | ||
154 | insn->next_byte += 2; | ||
155 | } | ||
156 | } | ||
157 | vex_end: | ||
158 | insn->vex_prefix.got = 1; | ||
159 | |||
160 | prefixes->got = 1; | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | /** | ||
165 | * insn_get_opcode - collect opcode(s) | ||
166 | * @insn: &struct insn containing instruction | ||
167 | * | ||
168 | * Populates @insn->opcode, updates @insn->next_byte to point past the | ||
169 | * opcode byte(s), and set @insn->attr (except for groups). | ||
170 | * If necessary, first collects any preceding (prefix) bytes. | ||
171 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | ||
172 | * is already 1. | ||
173 | */ | ||
174 | void insn_get_opcode(struct insn *insn) | ||
175 | { | ||
176 | struct insn_field *opcode = &insn->opcode; | ||
177 | insn_byte_t op, pfx; | ||
178 | if (opcode->got) | ||
179 | return; | ||
180 | if (!insn->prefixes.got) | ||
181 | insn_get_prefixes(insn); | ||
182 | |||
183 | /* Get first opcode */ | ||
184 | op = get_next(insn_byte_t, insn); | ||
185 | opcode->bytes[0] = op; | ||
186 | opcode->nbytes = 1; | ||
187 | |||
188 | /* Check if there is VEX prefix or not */ | ||
189 | if (insn_is_avx(insn)) { | ||
190 | insn_byte_t m, p; | ||
191 | m = insn_vex_m_bits(insn); | ||
192 | p = insn_vex_p_bits(insn); | ||
193 | insn->attr = inat_get_avx_attribute(op, m, p); | ||
194 | if (!inat_accept_vex(insn->attr)) | ||
195 | insn->attr = 0; /* This instruction is bad */ | ||
196 | goto end; /* VEX has only 1 byte for opcode */ | ||
197 | } | ||
198 | |||
199 | insn->attr = inat_get_opcode_attribute(op); | ||
200 | while (inat_is_escape(insn->attr)) { | ||
201 | /* Get escaped opcode */ | ||
202 | op = get_next(insn_byte_t, insn); | ||
203 | opcode->bytes[opcode->nbytes++] = op; | ||
204 | pfx = insn_last_prefix(insn); | ||
205 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | ||
206 | } | ||
207 | if (inat_must_vex(insn->attr)) | ||
208 | insn->attr = 0; /* This instruction is bad */ | ||
209 | end: | ||
210 | opcode->got = 1; | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * insn_get_modrm - collect ModRM byte, if any | ||
215 | * @insn: &struct insn containing instruction | ||
216 | * | ||
217 | * Populates @insn->modrm and updates @insn->next_byte to point past the | ||
218 | * ModRM byte, if any. If necessary, first collects the preceding bytes | ||
219 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | ||
220 | */ | ||
221 | void insn_get_modrm(struct insn *insn) | ||
222 | { | ||
223 | struct insn_field *modrm = &insn->modrm; | ||
224 | insn_byte_t pfx, mod; | ||
225 | if (modrm->got) | ||
226 | return; | ||
227 | if (!insn->opcode.got) | ||
228 | insn_get_opcode(insn); | ||
229 | |||
230 | if (inat_has_modrm(insn->attr)) { | ||
231 | mod = get_next(insn_byte_t, insn); | ||
232 | modrm->value = mod; | ||
233 | modrm->nbytes = 1; | ||
234 | if (inat_is_group(insn->attr)) { | ||
235 | pfx = insn_last_prefix(insn); | ||
236 | insn->attr = inat_get_group_attribute(mod, pfx, | ||
237 | insn->attr); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | if (insn->x86_64 && inat_is_force64(insn->attr)) | ||
242 | insn->opnd_bytes = 8; | ||
243 | modrm->got = 1; | ||
244 | } | ||
245 | |||
246 | |||
247 | /** | ||
248 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | ||
249 | * @insn: &struct insn containing instruction | ||
250 | * | ||
251 | * If necessary, first collects the instruction up to and including the | ||
252 | * ModRM byte. No effect if @insn->x86_64 is 0. | ||
253 | */ | ||
254 | int insn_rip_relative(struct insn *insn) | ||
255 | { | ||
256 | struct insn_field *modrm = &insn->modrm; | ||
257 | |||
258 | if (!insn->x86_64) | ||
259 | return 0; | ||
260 | if (!modrm->got) | ||
261 | insn_get_modrm(insn); | ||
262 | /* | ||
263 | * For rip-relative instructions, the mod field (top 2 bits) | ||
264 | * is zero and the r/m field (bottom 3 bits) is 0x5. | ||
265 | */ | ||
266 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * insn_get_sib() - Get the SIB byte of instruction | ||
271 | * @insn: &struct insn containing instruction | ||
272 | * | ||
273 | * If necessary, first collects the instruction up to and including the | ||
274 | * ModRM byte. | ||
275 | */ | ||
276 | void insn_get_sib(struct insn *insn) | ||
277 | { | ||
278 | insn_byte_t modrm; | ||
279 | |||
280 | if (insn->sib.got) | ||
281 | return; | ||
282 | if (!insn->modrm.got) | ||
283 | insn_get_modrm(insn); | ||
284 | if (insn->modrm.nbytes) { | ||
285 | modrm = (insn_byte_t)insn->modrm.value; | ||
286 | if (insn->addr_bytes != 2 && | ||
287 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | ||
288 | insn->sib.value = get_next(insn_byte_t, insn); | ||
289 | insn->sib.nbytes = 1; | ||
290 | } | ||
291 | } | ||
292 | insn->sib.got = 1; | ||
293 | } | ||
294 | |||
295 | |||
296 | /** | ||
297 | * insn_get_displacement() - Get the displacement of instruction | ||
298 | * @insn: &struct insn containing instruction | ||
299 | * | ||
300 | * If necessary, first collects the instruction up to and including the | ||
301 | * SIB byte. | ||
302 | * Displacement value is sign-expanded. | ||
303 | */ | ||
304 | void insn_get_displacement(struct insn *insn) | ||
305 | { | ||
306 | insn_byte_t mod, rm, base; | ||
307 | |||
308 | if (insn->displacement.got) | ||
309 | return; | ||
310 | if (!insn->sib.got) | ||
311 | insn_get_sib(insn); | ||
312 | if (insn->modrm.nbytes) { | ||
313 | /* | ||
314 | * Interpreting the modrm byte: | ||
315 | * mod = 00 - no displacement fields (exceptions below) | ||
316 | * mod = 01 - 1-byte displacement field | ||
317 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | ||
318 | * address size = 2 (0x67 prefix in 32-bit mode) | ||
319 | * mod = 11 - no memory operand | ||
320 | * | ||
321 | * If address size = 2... | ||
322 | * mod = 00, r/m = 110 - displacement field is 2 bytes | ||
323 | * | ||
324 | * If address size != 2... | ||
325 | * mod != 11, r/m = 100 - SIB byte exists | ||
326 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | ||
327 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | ||
328 | * field is 4 bytes | ||
329 | */ | ||
330 | mod = X86_MODRM_MOD(insn->modrm.value); | ||
331 | rm = X86_MODRM_RM(insn->modrm.value); | ||
332 | base = X86_SIB_BASE(insn->sib.value); | ||
333 | if (mod == 3) | ||
334 | goto out; | ||
335 | if (mod == 1) { | ||
336 | insn->displacement.value = get_next(char, insn); | ||
337 | insn->displacement.nbytes = 1; | ||
338 | } else if (insn->addr_bytes == 2) { | ||
339 | if ((mod == 0 && rm == 6) || mod == 2) { | ||
340 | insn->displacement.value = | ||
341 | get_next(short, insn); | ||
342 | insn->displacement.nbytes = 2; | ||
343 | } | ||
344 | } else { | ||
345 | if ((mod == 0 && rm == 5) || mod == 2 || | ||
346 | (mod == 0 && base == 5)) { | ||
347 | insn->displacement.value = get_next(int, insn); | ||
348 | insn->displacement.nbytes = 4; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | out: | ||
353 | insn->displacement.got = 1; | ||
354 | } | ||
355 | |||
356 | /* Decode moffset16/32/64 */ | ||
357 | static void __get_moffset(struct insn *insn) | ||
358 | { | ||
359 | switch (insn->addr_bytes) { | ||
360 | case 2: | ||
361 | insn->moffset1.value = get_next(short, insn); | ||
362 | insn->moffset1.nbytes = 2; | ||
363 | break; | ||
364 | case 4: | ||
365 | insn->moffset1.value = get_next(int, insn); | ||
366 | insn->moffset1.nbytes = 4; | ||
367 | break; | ||
368 | case 8: | ||
369 | insn->moffset1.value = get_next(int, insn); | ||
370 | insn->moffset1.nbytes = 4; | ||
371 | insn->moffset2.value = get_next(int, insn); | ||
372 | insn->moffset2.nbytes = 4; | ||
373 | break; | ||
374 | } | ||
375 | insn->moffset1.got = insn->moffset2.got = 1; | ||
376 | } | ||
377 | |||
378 | /* Decode imm v32(Iz) */ | ||
379 | static void __get_immv32(struct insn *insn) | ||
380 | { | ||
381 | switch (insn->opnd_bytes) { | ||
382 | case 2: | ||
383 | insn->immediate.value = get_next(short, insn); | ||
384 | insn->immediate.nbytes = 2; | ||
385 | break; | ||
386 | case 4: | ||
387 | case 8: | ||
388 | insn->immediate.value = get_next(int, insn); | ||
389 | insn->immediate.nbytes = 4; | ||
390 | break; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | /* Decode imm v64(Iv/Ov) */ | ||
395 | static void __get_immv(struct insn *insn) | ||
396 | { | ||
397 | switch (insn->opnd_bytes) { | ||
398 | case 2: | ||
399 | insn->immediate1.value = get_next(short, insn); | ||
400 | insn->immediate1.nbytes = 2; | ||
401 | break; | ||
402 | case 4: | ||
403 | insn->immediate1.value = get_next(int, insn); | ||
404 | insn->immediate1.nbytes = 4; | ||
405 | break; | ||
406 | case 8: | ||
407 | insn->immediate1.value = get_next(int, insn); | ||
408 | insn->immediate1.nbytes = 4; | ||
409 | insn->immediate2.value = get_next(int, insn); | ||
410 | insn->immediate2.nbytes = 4; | ||
411 | break; | ||
412 | } | ||
413 | insn->immediate1.got = insn->immediate2.got = 1; | ||
414 | } | ||
415 | |||
416 | /* Decode ptr16:16/32(Ap) */ | ||
417 | static void __get_immptr(struct insn *insn) | ||
418 | { | ||
419 | switch (insn->opnd_bytes) { | ||
420 | case 2: | ||
421 | insn->immediate1.value = get_next(short, insn); | ||
422 | insn->immediate1.nbytes = 2; | ||
423 | break; | ||
424 | case 4: | ||
425 | insn->immediate1.value = get_next(int, insn); | ||
426 | insn->immediate1.nbytes = 4; | ||
427 | break; | ||
428 | case 8: | ||
429 | /* ptr16:64 is not exist (no segment) */ | ||
430 | return; | ||
431 | } | ||
432 | insn->immediate2.value = get_next(unsigned short, insn); | ||
433 | insn->immediate2.nbytes = 2; | ||
434 | insn->immediate1.got = insn->immediate2.got = 1; | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * insn_get_immediate() - Get the immediates of instruction | ||
439 | * @insn: &struct insn containing instruction | ||
440 | * | ||
441 | * If necessary, first collects the instruction up to and including the | ||
442 | * displacement bytes. | ||
443 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | ||
444 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | ||
445 | */ | ||
446 | void insn_get_immediate(struct insn *insn) | ||
447 | { | ||
448 | if (insn->immediate.got) | ||
449 | return; | ||
450 | if (!insn->displacement.got) | ||
451 | insn_get_displacement(insn); | ||
452 | |||
453 | if (inat_has_moffset(insn->attr)) { | ||
454 | __get_moffset(insn); | ||
455 | goto done; | ||
456 | } | ||
457 | |||
458 | if (!inat_has_immediate(insn->attr)) | ||
459 | /* no immediates */ | ||
460 | goto done; | ||
461 | |||
462 | switch (inat_immediate_size(insn->attr)) { | ||
463 | case INAT_IMM_BYTE: | ||
464 | insn->immediate.value = get_next(char, insn); | ||
465 | insn->immediate.nbytes = 1; | ||
466 | break; | ||
467 | case INAT_IMM_WORD: | ||
468 | insn->immediate.value = get_next(short, insn); | ||
469 | insn->immediate.nbytes = 2; | ||
470 | break; | ||
471 | case INAT_IMM_DWORD: | ||
472 | insn->immediate.value = get_next(int, insn); | ||
473 | insn->immediate.nbytes = 4; | ||
474 | break; | ||
475 | case INAT_IMM_QWORD: | ||
476 | insn->immediate1.value = get_next(int, insn); | ||
477 | insn->immediate1.nbytes = 4; | ||
478 | insn->immediate2.value = get_next(int, insn); | ||
479 | insn->immediate2.nbytes = 4; | ||
480 | break; | ||
481 | case INAT_IMM_PTR: | ||
482 | __get_immptr(insn); | ||
483 | break; | ||
484 | case INAT_IMM_VWORD32: | ||
485 | __get_immv32(insn); | ||
486 | break; | ||
487 | case INAT_IMM_VWORD: | ||
488 | __get_immv(insn); | ||
489 | break; | ||
490 | default: | ||
491 | break; | ||
492 | } | ||
493 | if (inat_has_second_immediate(insn->attr)) { | ||
494 | insn->immediate2.value = get_next(char, insn); | ||
495 | insn->immediate2.nbytes = 1; | ||
496 | } | ||
497 | done: | ||
498 | insn->immediate.got = 1; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * insn_get_length() - Get the length of instruction | ||
503 | * @insn: &struct insn containing instruction | ||
504 | * | ||
505 | * If necessary, first collects the instruction up to and including the | ||
506 | * immediates bytes. | ||
507 | */ | ||
508 | void insn_get_length(struct insn *insn) | ||
509 | { | ||
510 | if (insn->length) | ||
511 | return; | ||
512 | if (!insn->immediate.got) | ||
513 | insn_get_immediate(insn); | ||
514 | insn->length = (unsigned char)((unsigned long)insn->next_byte | ||
515 | - (unsigned long)insn->kaddr); | ||
516 | } | ||
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 000000000000..a793da5e560e --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -0,0 +1,893 @@ | |||
1 | # x86 Opcode Maps | ||
2 | # | ||
3 | #<Opcode maps> | ||
4 | # Table: table-name | ||
5 | # Referrer: escaped-name | ||
6 | # AVXcode: avx-code | ||
7 | # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
8 | # (or) | ||
9 | # opcode: escape # escaped-name | ||
10 | # EndTable | ||
11 | # | ||
12 | #<group maps> | ||
13 | # GrpTable: GrpXXX | ||
14 | # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
15 | # EndTable | ||
16 | # | ||
17 | # AVX Superscripts | ||
18 | # (VEX): this opcode can accept VEX prefix. | ||
19 | # (oVEX): this opcode requires VEX prefix. | ||
20 | # (o128): this opcode only supports 128bit VEX. | ||
21 | # (o256): this opcode only supports 256bit VEX. | ||
22 | # | ||
23 | |||
24 | Table: one byte opcode | ||
25 | Referrer: | ||
26 | AVXcode: | ||
27 | # 0x00 - 0x0f | ||
28 | 00: ADD Eb,Gb | ||
29 | 01: ADD Ev,Gv | ||
30 | 02: ADD Gb,Eb | ||
31 | 03: ADD Gv,Ev | ||
32 | 04: ADD AL,Ib | ||
33 | 05: ADD rAX,Iz | ||
34 | 06: PUSH ES (i64) | ||
35 | 07: POP ES (i64) | ||
36 | 08: OR Eb,Gb | ||
37 | 09: OR Ev,Gv | ||
38 | 0a: OR Gb,Eb | ||
39 | 0b: OR Gv,Ev | ||
40 | 0c: OR AL,Ib | ||
41 | 0d: OR rAX,Iz | ||
42 | 0e: PUSH CS (i64) | ||
43 | 0f: escape # 2-byte escape | ||
44 | # 0x10 - 0x1f | ||
45 | 10: ADC Eb,Gb | ||
46 | 11: ADC Ev,Gv | ||
47 | 12: ADC Gb,Eb | ||
48 | 13: ADC Gv,Ev | ||
49 | 14: ADC AL,Ib | ||
50 | 15: ADC rAX,Iz | ||
51 | 16: PUSH SS (i64) | ||
52 | 17: POP SS (i64) | ||
53 | 18: SBB Eb,Gb | ||
54 | 19: SBB Ev,Gv | ||
55 | 1a: SBB Gb,Eb | ||
56 | 1b: SBB Gv,Ev | ||
57 | 1c: SBB AL,Ib | ||
58 | 1d: SBB rAX,Iz | ||
59 | 1e: PUSH DS (i64) | ||
60 | 1f: POP DS (i64) | ||
61 | # 0x20 - 0x2f | ||
62 | 20: AND Eb,Gb | ||
63 | 21: AND Ev,Gv | ||
64 | 22: AND Gb,Eb | ||
65 | 23: AND Gv,Ev | ||
66 | 24: AND AL,Ib | ||
67 | 25: AND rAx,Iz | ||
68 | 26: SEG=ES (Prefix) | ||
69 | 27: DAA (i64) | ||
70 | 28: SUB Eb,Gb | ||
71 | 29: SUB Ev,Gv | ||
72 | 2a: SUB Gb,Eb | ||
73 | 2b: SUB Gv,Ev | ||
74 | 2c: SUB AL,Ib | ||
75 | 2d: SUB rAX,Iz | ||
76 | 2e: SEG=CS (Prefix) | ||
77 | 2f: DAS (i64) | ||
78 | # 0x30 - 0x3f | ||
79 | 30: XOR Eb,Gb | ||
80 | 31: XOR Ev,Gv | ||
81 | 32: XOR Gb,Eb | ||
82 | 33: XOR Gv,Ev | ||
83 | 34: XOR AL,Ib | ||
84 | 35: XOR rAX,Iz | ||
85 | 36: SEG=SS (Prefix) | ||
86 | 37: AAA (i64) | ||
87 | 38: CMP Eb,Gb | ||
88 | 39: CMP Ev,Gv | ||
89 | 3a: CMP Gb,Eb | ||
90 | 3b: CMP Gv,Ev | ||
91 | 3c: CMP AL,Ib | ||
92 | 3d: CMP rAX,Iz | ||
93 | 3e: SEG=DS (Prefix) | ||
94 | 3f: AAS (i64) | ||
95 | # 0x40 - 0x4f | ||
96 | 40: INC eAX (i64) | REX (o64) | ||
97 | 41: INC eCX (i64) | REX.B (o64) | ||
98 | 42: INC eDX (i64) | REX.X (o64) | ||
99 | 43: INC eBX (i64) | REX.XB (o64) | ||
100 | 44: INC eSP (i64) | REX.R (o64) | ||
101 | 45: INC eBP (i64) | REX.RB (o64) | ||
102 | 46: INC eSI (i64) | REX.RX (o64) | ||
103 | 47: INC eDI (i64) | REX.RXB (o64) | ||
104 | 48: DEC eAX (i64) | REX.W (o64) | ||
105 | 49: DEC eCX (i64) | REX.WB (o64) | ||
106 | 4a: DEC eDX (i64) | REX.WX (o64) | ||
107 | 4b: DEC eBX (i64) | REX.WXB (o64) | ||
108 | 4c: DEC eSP (i64) | REX.WR (o64) | ||
109 | 4d: DEC eBP (i64) | REX.WRB (o64) | ||
110 | 4e: DEC eSI (i64) | REX.WRX (o64) | ||
111 | 4f: DEC eDI (i64) | REX.WRXB (o64) | ||
112 | # 0x50 - 0x5f | ||
113 | 50: PUSH rAX/r8 (d64) | ||
114 | 51: PUSH rCX/r9 (d64) | ||
115 | 52: PUSH rDX/r10 (d64) | ||
116 | 53: PUSH rBX/r11 (d64) | ||
117 | 54: PUSH rSP/r12 (d64) | ||
118 | 55: PUSH rBP/r13 (d64) | ||
119 | 56: PUSH rSI/r14 (d64) | ||
120 | 57: PUSH rDI/r15 (d64) | ||
121 | 58: POP rAX/r8 (d64) | ||
122 | 59: POP rCX/r9 (d64) | ||
123 | 5a: POP rDX/r10 (d64) | ||
124 | 5b: POP rBX/r11 (d64) | ||
125 | 5c: POP rSP/r12 (d64) | ||
126 | 5d: POP rBP/r13 (d64) | ||
127 | 5e: POP rSI/r14 (d64) | ||
128 | 5f: POP rDI/r15 (d64) | ||
129 | # 0x60 - 0x6f | ||
130 | 60: PUSHA/PUSHAD (i64) | ||
131 | 61: POPA/POPAD (i64) | ||
132 | 62: BOUND Gv,Ma (i64) | ||
133 | 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) | ||
134 | 64: SEG=FS (Prefix) | ||
135 | 65: SEG=GS (Prefix) | ||
136 | 66: Operand-Size (Prefix) | ||
137 | 67: Address-Size (Prefix) | ||
138 | 68: PUSH Iz (d64) | ||
139 | 69: IMUL Gv,Ev,Iz | ||
140 | 6a: PUSH Ib (d64) | ||
141 | 6b: IMUL Gv,Ev,Ib | ||
142 | 6c: INS/INSB Yb,DX | ||
143 | 6d: INS/INSW/INSD Yz,DX | ||
144 | 6e: OUTS/OUTSB DX,Xb | ||
145 | 6f: OUTS/OUTSW/OUTSD DX,Xz | ||
146 | # 0x70 - 0x7f | ||
147 | 70: JO Jb | ||
148 | 71: JNO Jb | ||
149 | 72: JB/JNAE/JC Jb | ||
150 | 73: JNB/JAE/JNC Jb | ||
151 | 74: JZ/JE Jb | ||
152 | 75: JNZ/JNE Jb | ||
153 | 76: JBE/JNA Jb | ||
154 | 77: JNBE/JA Jb | ||
155 | 78: JS Jb | ||
156 | 79: JNS Jb | ||
157 | 7a: JP/JPE Jb | ||
158 | 7b: JNP/JPO Jb | ||
159 | 7c: JL/JNGE Jb | ||
160 | 7d: JNL/JGE Jb | ||
161 | 7e: JLE/JNG Jb | ||
162 | 7f: JNLE/JG Jb | ||
163 | # 0x80 - 0x8f | ||
164 | 80: Grp1 Eb,Ib (1A) | ||
165 | 81: Grp1 Ev,Iz (1A) | ||
166 | 82: Grp1 Eb,Ib (1A),(i64) | ||
167 | 83: Grp1 Ev,Ib (1A) | ||
168 | 84: TEST Eb,Gb | ||
169 | 85: TEST Ev,Gv | ||
170 | 86: XCHG Eb,Gb | ||
171 | 87: XCHG Ev,Gv | ||
172 | 88: MOV Eb,Gb | ||
173 | 89: MOV Ev,Gv | ||
174 | 8a: MOV Gb,Eb | ||
175 | 8b: MOV Gv,Ev | ||
176 | 8c: MOV Ev,Sw | ||
177 | 8d: LEA Gv,M | ||
178 | 8e: MOV Sw,Ew | ||
179 | 8f: Grp1A (1A) | POP Ev (d64) | ||
180 | # 0x90 - 0x9f | ||
181 | 90: NOP | PAUSE (F3) | XCHG r8,rAX | ||
182 | 91: XCHG rCX/r9,rAX | ||
183 | 92: XCHG rDX/r10,rAX | ||
184 | 93: XCHG rBX/r11,rAX | ||
185 | 94: XCHG rSP/r12,rAX | ||
186 | 95: XCHG rBP/r13,rAX | ||
187 | 96: XCHG rSI/r14,rAX | ||
188 | 97: XCHG rDI/r15,rAX | ||
189 | 98: CBW/CWDE/CDQE | ||
190 | 99: CWD/CDQ/CQO | ||
191 | 9a: CALLF Ap (i64) | ||
192 | 9b: FWAIT/WAIT | ||
193 | 9c: PUSHF/D/Q Fv (d64) | ||
194 | 9d: POPF/D/Q Fv (d64) | ||
195 | 9e: SAHF | ||
196 | 9f: LAHF | ||
197 | # 0xa0 - 0xaf | ||
198 | a0: MOV AL,Ob | ||
199 | a1: MOV rAX,Ov | ||
200 | a2: MOV Ob,AL | ||
201 | a3: MOV Ov,rAX | ||
202 | a4: MOVS/B Xb,Yb | ||
203 | a5: MOVS/W/D/Q Xv,Yv | ||
204 | a6: CMPS/B Xb,Yb | ||
205 | a7: CMPS/W/D Xv,Yv | ||
206 | a8: TEST AL,Ib | ||
207 | a9: TEST rAX,Iz | ||
208 | aa: STOS/B Yb,AL | ||
209 | ab: STOS/W/D/Q Yv,rAX | ||
210 | ac: LODS/B AL,Xb | ||
211 | ad: LODS/W/D/Q rAX,Xv | ||
212 | ae: SCAS/B AL,Yb | ||
213 | af: SCAS/W/D/Q rAX,Xv | ||
214 | # 0xb0 - 0xbf | ||
215 | b0: MOV AL/R8L,Ib | ||
216 | b1: MOV CL/R9L,Ib | ||
217 | b2: MOV DL/R10L,Ib | ||
218 | b3: MOV BL/R11L,Ib | ||
219 | b4: MOV AH/R12L,Ib | ||
220 | b5: MOV CH/R13L,Ib | ||
221 | b6: MOV DH/R14L,Ib | ||
222 | b7: MOV BH/R15L,Ib | ||
223 | b8: MOV rAX/r8,Iv | ||
224 | b9: MOV rCX/r9,Iv | ||
225 | ba: MOV rDX/r10,Iv | ||
226 | bb: MOV rBX/r11,Iv | ||
227 | bc: MOV rSP/r12,Iv | ||
228 | bd: MOV rBP/r13,Iv | ||
229 | be: MOV rSI/r14,Iv | ||
230 | bf: MOV rDI/r15,Iv | ||
231 | # 0xc0 - 0xcf | ||
232 | c0: Grp2 Eb,Ib (1A) | ||
233 | c1: Grp2 Ev,Ib (1A) | ||
234 | c2: RETN Iw (f64) | ||
235 | c3: RETN | ||
236 | c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) | ||
237 | c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) | ||
238 | c6: Grp11 Eb,Ib (1A) | ||
239 | c7: Grp11 Ev,Iz (1A) | ||
240 | c8: ENTER Iw,Ib | ||
241 | c9: LEAVE (d64) | ||
242 | ca: RETF Iw | ||
243 | cb: RETF | ||
244 | cc: INT3 | ||
245 | cd: INT Ib | ||
246 | ce: INTO (i64) | ||
247 | cf: IRET/D/Q | ||
248 | # 0xd0 - 0xdf | ||
249 | d0: Grp2 Eb,1 (1A) | ||
250 | d1: Grp2 Ev,1 (1A) | ||
251 | d2: Grp2 Eb,CL (1A) | ||
252 | d3: Grp2 Ev,CL (1A) | ||
253 | d4: AAM Ib (i64) | ||
254 | d5: AAD Ib (i64) | ||
255 | d6: | ||
256 | d7: XLAT/XLATB | ||
257 | d8: ESC | ||
258 | d9: ESC | ||
259 | da: ESC | ||
260 | db: ESC | ||
261 | dc: ESC | ||
262 | dd: ESC | ||
263 | de: ESC | ||
264 | df: ESC | ||
265 | # 0xe0 - 0xef | ||
266 | e0: LOOPNE/LOOPNZ Jb (f64) | ||
267 | e1: LOOPE/LOOPZ Jb (f64) | ||
268 | e2: LOOP Jb (f64) | ||
269 | e3: JrCXZ Jb (f64) | ||
270 | e4: IN AL,Ib | ||
271 | e5: IN eAX,Ib | ||
272 | e6: OUT Ib,AL | ||
273 | e7: OUT Ib,eAX | ||
274 | e8: CALL Jz (f64) | ||
275 | e9: JMP-near Jz (f64) | ||
276 | ea: JMP-far Ap (i64) | ||
277 | eb: JMP-short Jb (f64) | ||
278 | ec: IN AL,DX | ||
279 | ed: IN eAX,DX | ||
280 | ee: OUT DX,AL | ||
281 | ef: OUT DX,eAX | ||
282 | # 0xf0 - 0xff | ||
283 | f0: LOCK (Prefix) | ||
284 | f1: | ||
285 | f2: REPNE (Prefix) | ||
286 | f3: REP/REPE (Prefix) | ||
287 | f4: HLT | ||
288 | f5: CMC | ||
289 | f6: Grp3_1 Eb (1A) | ||
290 | f7: Grp3_2 Ev (1A) | ||
291 | f8: CLC | ||
292 | f9: STC | ||
293 | fa: CLI | ||
294 | fb: STI | ||
295 | fc: CLD | ||
296 | fd: STD | ||
297 | fe: Grp4 (1A) | ||
298 | ff: Grp5 (1A) | ||
299 | EndTable | ||
300 | |||
301 | Table: 2-byte opcode (0x0f) | ||
302 | Referrer: 2-byte escape | ||
303 | AVXcode: 1 | ||
304 | # 0x0f 0x00-0x0f | ||
305 | 00: Grp6 (1A) | ||
306 | 01: Grp7 (1A) | ||
307 | 02: LAR Gv,Ew | ||
308 | 03: LSL Gv,Ew | ||
309 | 04: | ||
310 | 05: SYSCALL (o64) | ||
311 | 06: CLTS | ||
312 | 07: SYSRET (o64) | ||
313 | 08: INVD | ||
314 | 09: WBINVD | ||
315 | 0a: | ||
316 | 0b: UD2 (1B) | ||
317 | 0c: | ||
318 | 0d: NOP Ev | GrpP | ||
319 | 0e: FEMMS | ||
320 | # 3DNow! uses the last imm byte as opcode extension. | ||
321 | 0f: 3DNow! Pq,Qq,Ib | ||
322 | # 0x0f 0x10-0x1f | ||
323 | 10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) | ||
324 | 11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) | ||
325 | 12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) | ||
326 | 13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) | ||
327 | 14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) | ||
328 | 15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) | ||
329 | 16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) | ||
330 | 17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) | ||
331 | 18: Grp16 (1A) | ||
332 | 19: | ||
333 | 1a: | ||
334 | 1b: | ||
335 | 1c: | ||
336 | 1d: | ||
337 | 1e: | ||
338 | 1f: NOP Ev | ||
339 | # 0x0f 0x20-0x2f | ||
340 | 20: MOV Rd,Cd | ||
341 | 21: MOV Rd,Dd | ||
342 | 22: MOV Cd,Rd | ||
343 | 23: MOV Dd,Rd | ||
344 | 24: | ||
345 | 25: | ||
346 | 26: | ||
347 | 27: | ||
348 | 28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) | ||
349 | 29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) | ||
350 | 2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) | ||
351 | 2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) | ||
352 | 2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) | ||
353 | 2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) | ||
354 | 2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) | ||
355 | 2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) | ||
356 | # 0x0f 0x30-0x3f | ||
357 | 30: WRMSR | ||
358 | 31: RDTSC | ||
359 | 32: RDMSR | ||
360 | 33: RDPMC | ||
361 | 34: SYSENTER | ||
362 | 35: SYSEXIT | ||
363 | 36: | ||
364 | 37: GETSEC | ||
365 | 38: escape # 3-byte escape 1 | ||
366 | 39: | ||
367 | 3a: escape # 3-byte escape 2 | ||
368 | 3b: | ||
369 | 3c: | ||
370 | 3d: | ||
371 | 3e: | ||
372 | 3f: | ||
373 | # 0x0f 0x40-0x4f | ||
374 | 40: CMOVO Gv,Ev | ||
375 | 41: CMOVNO Gv,Ev | ||
376 | 42: CMOVB/C/NAE Gv,Ev | ||
377 | 43: CMOVAE/NB/NC Gv,Ev | ||
378 | 44: CMOVE/Z Gv,Ev | ||
379 | 45: CMOVNE/NZ Gv,Ev | ||
380 | 46: CMOVBE/NA Gv,Ev | ||
381 | 47: CMOVA/NBE Gv,Ev | ||
382 | 48: CMOVS Gv,Ev | ||
383 | 49: CMOVNS Gv,Ev | ||
384 | 4a: CMOVP/PE Gv,Ev | ||
385 | 4b: CMOVNP/PO Gv,Ev | ||
386 | 4c: CMOVL/NGE Gv,Ev | ||
387 | 4d: CMOVNL/GE Gv,Ev | ||
388 | 4e: CMOVLE/NG Gv,Ev | ||
389 | 4f: CMOVNLE/G Gv,Ev | ||
390 | # 0x0f 0x50-0x5f | ||
391 | 50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) | ||
392 | 51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) | ||
393 | 52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) | ||
394 | 53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) | ||
395 | 54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) | ||
396 | 55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) | ||
397 | 56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) | ||
398 | 57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) | ||
399 | 58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) | ||
400 | 59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) | ||
401 | 5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) | ||
402 | 5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) | ||
403 | 5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) | ||
404 | 5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) | ||
405 | 5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) | ||
406 | 5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) | ||
407 | # 0x0f 0x60-0x6f | ||
408 | 60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) | ||
409 | 61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) | ||
410 | 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) | ||
411 | 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) | ||
412 | 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) | ||
413 | 65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) | ||
414 | 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) | ||
415 | 67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) | ||
416 | 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) | ||
417 | 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) | ||
418 | 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) | ||
419 | 6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) | ||
420 | 6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) | ||
421 | 6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) | ||
422 | 6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) | ||
423 | 6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) | ||
424 | # 0x0f 0x70-0x7f | ||
425 | 70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) | ||
426 | 71: Grp12 (1A) | ||
427 | 72: Grp13 (1A) | ||
428 | 73: Grp14 (1A) | ||
429 | 74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) | ||
430 | 75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) | ||
431 | 76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) | ||
432 | 77: emms/vzeroupper/vzeroall (VEX) | ||
433 | 78: VMREAD Ed/q,Gd/q | ||
434 | 79: VMWRITE Gd/q,Ed/q | ||
435 | 7a: | ||
436 | 7b: | ||
437 | 7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) | ||
438 | 7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) | ||
439 | 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) | ||
440 | 7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) | ||
441 | # 0x0f 0x80-0x8f | ||
442 | 80: JO Jz (f64) | ||
443 | 81: JNO Jz (f64) | ||
444 | 82: JB/JNAE/JC Jz (f64) | ||
445 | 83: JNB/JAE/JNC Jz (f64) | ||
446 | 84: JZ/JE Jz (f64) | ||
447 | 85: JNZ/JNE Jz (f64) | ||
448 | 86: JBE/JNA Jz (f64) | ||
449 | 87: JNBE/JA Jz (f64) | ||
450 | 88: JS Jz (f64) | ||
451 | 89: JNS Jz (f64) | ||
452 | 8a: JP/JPE Jz (f64) | ||
453 | 8b: JNP/JPO Jz (f64) | ||
454 | 8c: JL/JNGE Jz (f64) | ||
455 | 8d: JNL/JGE Jz (f64) | ||
456 | 8e: JLE/JNG Jz (f64) | ||
457 | 8f: JNLE/JG Jz (f64) | ||
458 | # 0x0f 0x90-0x9f | ||
459 | 90: SETO Eb | ||
460 | 91: SETNO Eb | ||
461 | 92: SETB/C/NAE Eb | ||
462 | 93: SETAE/NB/NC Eb | ||
463 | 94: SETE/Z Eb | ||
464 | 95: SETNE/NZ Eb | ||
465 | 96: SETBE/NA Eb | ||
466 | 97: SETA/NBE Eb | ||
467 | 98: SETS Eb | ||
468 | 99: SETNS Eb | ||
469 | 9a: SETP/PE Eb | ||
470 | 9b: SETNP/PO Eb | ||
471 | 9c: SETL/NGE Eb | ||
472 | 9d: SETNL/GE Eb | ||
473 | 9e: SETLE/NG Eb | ||
474 | 9f: SETNLE/G Eb | ||
475 | # 0x0f 0xa0-0xaf | ||
476 | a0: PUSH FS (d64) | ||
477 | a1: POP FS (d64) | ||
478 | a2: CPUID | ||
479 | a3: BT Ev,Gv | ||
480 | a4: SHLD Ev,Gv,Ib | ||
481 | a5: SHLD Ev,Gv,CL | ||
482 | a6: GrpPDLK | ||
483 | a7: GrpRNG | ||
484 | a8: PUSH GS (d64) | ||
485 | a9: POP GS (d64) | ||
486 | aa: RSM | ||
487 | ab: BTS Ev,Gv | ||
488 | ac: SHRD Ev,Gv,Ib | ||
489 | ad: SHRD Ev,Gv,CL | ||
490 | ae: Grp15 (1A),(1C) | ||
491 | af: IMUL Gv,Ev | ||
492 | # 0x0f 0xb0-0xbf | ||
493 | b0: CMPXCHG Eb,Gb | ||
494 | b1: CMPXCHG Ev,Gv | ||
495 | b2: LSS Gv,Mp | ||
496 | b3: BTR Ev,Gv | ||
497 | b4: LFS Gv,Mp | ||
498 | b5: LGS Gv,Mp | ||
499 | b6: MOVZX Gv,Eb | ||
500 | b7: MOVZX Gv,Ew | ||
501 | b8: JMPE | POPCNT Gv,Ev (F3) | ||
502 | b9: Grp10 (1A) | ||
503 | ba: Grp8 Ev,Ib (1A) | ||
504 | bb: BTC Ev,Gv | ||
505 | bc: BSF Gv,Ev | ||
506 | bd: BSR Gv,Ev | ||
507 | be: MOVSX Gv,Eb | ||
508 | bf: MOVSX Gv,Ew | ||
509 | # 0x0f 0xc0-0xcf | ||
510 | c0: XADD Eb,Gb | ||
511 | c1: XADD Ev,Gv | ||
512 | c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) | ||
513 | c3: movnti Md/q,Gd/q | ||
514 | c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) | ||
515 | c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) | ||
516 | c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) | ||
517 | c7: Grp9 (1A) | ||
518 | c8: BSWAP RAX/EAX/R8/R8D | ||
519 | c9: BSWAP RCX/ECX/R9/R9D | ||
520 | ca: BSWAP RDX/EDX/R10/R10D | ||
521 | cb: BSWAP RBX/EBX/R11/R11D | ||
522 | cc: BSWAP RSP/ESP/R12/R12D | ||
523 | cd: BSWAP RBP/EBP/R13/R13D | ||
524 | ce: BSWAP RSI/ESI/R14/R14D | ||
525 | cf: BSWAP RDI/EDI/R15/R15D | ||
526 | # 0x0f 0xd0-0xdf | ||
527 | d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) | ||
528 | d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) | ||
529 | d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) | ||
530 | d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) | ||
531 | d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) | ||
532 | d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) | ||
533 | d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) | ||
534 | d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) | ||
535 | d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) | ||
536 | d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) | ||
537 | da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) | ||
538 | db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) | ||
539 | dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) | ||
540 | dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) | ||
541 | de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) | ||
542 | df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) | ||
543 | # 0x0f 0xe0-0xef | ||
544 | e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) | ||
545 | e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) | ||
546 | e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) | ||
547 | e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) | ||
548 | e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) | ||
549 | e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) | ||
550 | e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) | ||
551 | e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) | ||
552 | e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) | ||
553 | e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) | ||
554 | ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) | ||
555 | eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) | ||
556 | ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) | ||
557 | ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) | ||
558 | ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) | ||
559 | ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) | ||
560 | # 0x0f 0xf0-0xff | ||
561 | f0: lddqu Vdq,Mdq (F2),(VEX) | ||
562 | f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) | ||
563 | f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) | ||
564 | f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) | ||
565 | f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) | ||
566 | f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) | ||
567 | f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) | ||
568 | f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) | ||
569 | f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) | ||
570 | f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) | ||
571 | fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) | ||
572 | fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) | ||
573 | fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) | ||
574 | fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) | ||
575 | fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) | ||
576 | ff: | ||
577 | EndTable | ||
578 | |||
579 | Table: 3-byte opcode 1 (0x0f 0x38) | ||
580 | Referrer: 3-byte escape 1 | ||
581 | AVXcode: 2 | ||
582 | # 0x0f 0x38 0x00-0x0f | ||
583 | 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) | ||
584 | 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) | ||
585 | 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) | ||
586 | 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) | ||
587 | 04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) | ||
588 | 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) | ||
589 | 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) | ||
590 | 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) | ||
591 | 08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) | ||
592 | 09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) | ||
593 | 0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) | ||
594 | 0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) | ||
595 | 0c: Vpermilps /r (66),(oVEX) | ||
596 | 0d: Vpermilpd /r (66),(oVEX) | ||
597 | 0e: vtestps /r (66),(oVEX) | ||
598 | 0f: vtestpd /r (66),(oVEX) | ||
599 | # 0x0f 0x38 0x10-0x1f | ||
600 | 10: pblendvb Vdq,Wdq (66) | ||
601 | 11: | ||
602 | 12: | ||
603 | 13: | ||
604 | 14: blendvps Vdq,Wdq (66) | ||
605 | 15: blendvpd Vdq,Wdq (66) | ||
606 | 16: | ||
607 | 17: ptest Vdq,Wdq (66),(VEX) | ||
608 | 18: vbroadcastss /r (66),(oVEX) | ||
609 | 19: vbroadcastsd /r (66),(oVEX),(o256) | ||
610 | 1a: vbroadcastf128 /r (66),(oVEX),(o256) | ||
611 | 1b: | ||
612 | 1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) | ||
613 | 1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) | ||
614 | 1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) | ||
615 | 1f: | ||
616 | # 0x0f 0x38 0x20-0x2f | ||
617 | 20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) | ||
618 | 21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) | ||
619 | 22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) | ||
620 | 23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) | ||
621 | 24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) | ||
622 | 25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) | ||
623 | 26: | ||
624 | 27: | ||
625 | 28: pmuldq Vdq,Wdq (66),(VEX),(o128) | ||
626 | 29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) | ||
627 | 2a: movntdqa Vdq,Mdq (66),(VEX),(o128) | ||
628 | 2b: packusdw Vdq,Wdq (66),(VEX),(o128) | ||
629 | 2c: vmaskmovps(ld) /r (66),(oVEX) | ||
630 | 2d: vmaskmovpd(ld) /r (66),(oVEX) | ||
631 | 2e: vmaskmovps(st) /r (66),(oVEX) | ||
632 | 2f: vmaskmovpd(st) /r (66),(oVEX) | ||
633 | # 0x0f 0x38 0x30-0x3f | ||
634 | 30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) | ||
635 | 31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) | ||
636 | 32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) | ||
637 | 33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) | ||
638 | 34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) | ||
639 | 35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) | ||
640 | 36: | ||
641 | 37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) | ||
642 | 38: pminsb Vdq,Wdq (66),(VEX),(o128) | ||
643 | 39: pminsd Vdq,Wdq (66),(VEX),(o128) | ||
644 | 3a: pminuw Vdq,Wdq (66),(VEX),(o128) | ||
645 | 3b: pminud Vdq,Wdq (66),(VEX),(o128) | ||
646 | 3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) | ||
647 | 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) | ||
648 | 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) | ||
649 | 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) | ||
650 | # 0x0f 0x38 0x40-0x8f | ||
651 | 40: pmulld Vdq,Wdq (66),(VEX),(o128) | ||
652 | 41: phminposuw Vdq,Wdq (66),(VEX),(o128) | ||
653 | 80: INVEPT Gd/q,Mdq (66) | ||
654 | 81: INVPID Gd/q,Mdq (66) | ||
655 | # 0x0f 0x38 0x90-0xbf (FMA) | ||
656 | 96: vfmaddsub132pd/ps /r (66),(VEX) | ||
657 | 97: vfmsubadd132pd/ps /r (66),(VEX) | ||
658 | 98: vfmadd132pd/ps /r (66),(VEX) | ||
659 | 99: vfmadd132sd/ss /r (66),(VEX),(o128) | ||
660 | 9a: vfmsub132pd/ps /r (66),(VEX) | ||
661 | 9b: vfmsub132sd/ss /r (66),(VEX),(o128) | ||
662 | 9c: vfnmadd132pd/ps /r (66),(VEX) | ||
663 | 9d: vfnmadd132sd/ss /r (66),(VEX),(o128) | ||
664 | 9e: vfnmsub132pd/ps /r (66),(VEX) | ||
665 | 9f: vfnmsub132sd/ss /r (66),(VEX),(o128) | ||
666 | a6: vfmaddsub213pd/ps /r (66),(VEX) | ||
667 | a7: vfmsubadd213pd/ps /r (66),(VEX) | ||
668 | a8: vfmadd213pd/ps /r (66),(VEX) | ||
669 | a9: vfmadd213sd/ss /r (66),(VEX),(o128) | ||
670 | aa: vfmsub213pd/ps /r (66),(VEX) | ||
671 | ab: vfmsub213sd/ss /r (66),(VEX),(o128) | ||
672 | ac: vfnmadd213pd/ps /r (66),(VEX) | ||
673 | ad: vfnmadd213sd/ss /r (66),(VEX),(o128) | ||
674 | ae: vfnmsub213pd/ps /r (66),(VEX) | ||
675 | af: vfnmsub213sd/ss /r (66),(VEX),(o128) | ||
676 | b6: vfmaddsub231pd/ps /r (66),(VEX) | ||
677 | b7: vfmsubadd231pd/ps /r (66),(VEX) | ||
678 | b8: vfmadd231pd/ps /r (66),(VEX) | ||
679 | b9: vfmadd231sd/ss /r (66),(VEX),(o128) | ||
680 | ba: vfmsub231pd/ps /r (66),(VEX) | ||
681 | bb: vfmsub231sd/ss /r (66),(VEX),(o128) | ||
682 | bc: vfnmadd231pd/ps /r (66),(VEX) | ||
683 | bd: vfnmadd231sd/ss /r (66),(VEX),(o128) | ||
684 | be: vfnmsub231pd/ps /r (66),(VEX) | ||
685 | bf: vfnmsub231sd/ss /r (66),(VEX),(o128) | ||
686 | # 0x0f 0x38 0xc0-0xff | ||
687 | db: aesimc Vdq,Wdq (66),(VEX),(o128) | ||
688 | dc: aesenc Vdq,Wdq (66),(VEX),(o128) | ||
689 | dd: aesenclast Vdq,Wdq (66),(VEX),(o128) | ||
690 | de: aesdec Vdq,Wdq (66),(VEX),(o128) | ||
691 | df: aesdeclast Vdq,Wdq (66),(VEX),(o128) | ||
692 | f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) | ||
693 | f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) | ||
694 | EndTable | ||
695 | |||
696 | Table: 3-byte opcode 2 (0x0f 0x3a) | ||
697 | Referrer: 3-byte escape 2 | ||
698 | AVXcode: 3 | ||
699 | # 0x0f 0x3a 0x00-0xff | ||
700 | 04: vpermilps /r,Ib (66),(oVEX) | ||
701 | 05: vpermilpd /r,Ib (66),(oVEX) | ||
702 | 06: vperm2f128 /r,Ib (66),(oVEX),(o256) | ||
703 | 08: roundps Vdq,Wdq,Ib (66),(VEX) | ||
704 | 09: roundpd Vdq,Wdq,Ib (66),(VEX) | ||
705 | 0a: roundss Vss,Wss,Ib (66),(VEX),(o128) | ||
706 | 0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) | ||
707 | 0c: blendps Vdq,Wdq,Ib (66),(VEX) | ||
708 | 0d: blendpd Vdq,Wdq,Ib (66),(VEX) | ||
709 | 0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) | ||
710 | 0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) | ||
711 | 14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) | ||
712 | 15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) | ||
713 | 16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) | ||
714 | 17: extractps Ed,Vdq,Ib (66),(VEX),(o128) | ||
715 | 18: vinsertf128 /r,Ib (66),(oVEX),(o256) | ||
716 | 19: vextractf128 /r,Ib (66),(oVEX),(o256) | ||
717 | 20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) | ||
718 | 21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) | ||
719 | 22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) | ||
720 | 40: dpps Vdq,Wdq,Ib (66),(VEX) | ||
721 | 41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) | ||
722 | 42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) | ||
723 | 44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) | ||
724 | 4a: vblendvps /r,Ib (66),(oVEX) | ||
725 | 4b: vblendvpd /r,Ib (66),(oVEX) | ||
726 | 4c: vpblendvb /r,Ib (66),(oVEX),(o128) | ||
727 | 60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) | ||
728 | 61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) | ||
729 | 62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) | ||
730 | 63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) | ||
731 | df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) | ||
732 | EndTable | ||
733 | |||
734 | GrpTable: Grp1 | ||
735 | 0: ADD | ||
736 | 1: OR | ||
737 | 2: ADC | ||
738 | 3: SBB | ||
739 | 4: AND | ||
740 | 5: SUB | ||
741 | 6: XOR | ||
742 | 7: CMP | ||
743 | EndTable | ||
744 | |||
745 | GrpTable: Grp1A | ||
746 | 0: POP | ||
747 | EndTable | ||
748 | |||
749 | GrpTable: Grp2 | ||
750 | 0: ROL | ||
751 | 1: ROR | ||
752 | 2: RCL | ||
753 | 3: RCR | ||
754 | 4: SHL/SAL | ||
755 | 5: SHR | ||
756 | 6: | ||
757 | 7: SAR | ||
758 | EndTable | ||
759 | |||
760 | GrpTable: Grp3_1 | ||
761 | 0: TEST Eb,Ib | ||
762 | 1: | ||
763 | 2: NOT Eb | ||
764 | 3: NEG Eb | ||
765 | 4: MUL AL,Eb | ||
766 | 5: IMUL AL,Eb | ||
767 | 6: DIV AL,Eb | ||
768 | 7: IDIV AL,Eb | ||
769 | EndTable | ||
770 | |||
771 | GrpTable: Grp3_2 | ||
772 | 0: TEST Ev,Iz | ||
773 | 1: | ||
774 | 2: NOT Ev | ||
775 | 3: NEG Ev | ||
776 | 4: MUL rAX,Ev | ||
777 | 5: IMUL rAX,Ev | ||
778 | 6: DIV rAX,Ev | ||
779 | 7: IDIV rAX,Ev | ||
780 | EndTable | ||
781 | |||
782 | GrpTable: Grp4 | ||
783 | 0: INC Eb | ||
784 | 1: DEC Eb | ||
785 | EndTable | ||
786 | |||
787 | GrpTable: Grp5 | ||
788 | 0: INC Ev | ||
789 | 1: DEC Ev | ||
790 | 2: CALLN Ev (f64) | ||
791 | 3: CALLF Ep | ||
792 | 4: JMPN Ev (f64) | ||
793 | 5: JMPF Ep | ||
794 | 6: PUSH Ev (d64) | ||
795 | 7: | ||
796 | EndTable | ||
797 | |||
798 | GrpTable: Grp6 | ||
799 | 0: SLDT Rv/Mw | ||
800 | 1: STR Rv/Mw | ||
801 | 2: LLDT Ew | ||
802 | 3: LTR Ew | ||
803 | 4: VERR Ew | ||
804 | 5: VERW Ew | ||
805 | EndTable | ||
806 | |||
807 | GrpTable: Grp7 | ||
808 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | ||
809 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | ||
810 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | ||
811 | 3: LIDT Ms | ||
812 | 4: SMSW Mw/Rv | ||
813 | 5: | ||
814 | 6: LMSW Ew | ||
815 | 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) | ||
816 | EndTable | ||
817 | |||
818 | GrpTable: Grp8 | ||
819 | 4: BT | ||
820 | 5: BTS | ||
821 | 6: BTR | ||
822 | 7: BTC | ||
823 | EndTable | ||
824 | |||
825 | GrpTable: Grp9 | ||
826 | 1: CMPXCHG8B/16B Mq/Mdq | ||
827 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | ||
828 | 7: VMPTRST Mq | ||
829 | EndTable | ||
830 | |||
831 | GrpTable: Grp10 | ||
832 | EndTable | ||
833 | |||
834 | GrpTable: Grp11 | ||
835 | 0: MOV | ||
836 | EndTable | ||
837 | |||
838 | GrpTable: Grp12 | ||
839 | 2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) | ||
840 | 4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) | ||
841 | 6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) | ||
842 | EndTable | ||
843 | |||
844 | GrpTable: Grp13 | ||
845 | 2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) | ||
846 | 4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) | ||
847 | 6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) | ||
848 | EndTable | ||
849 | |||
850 | GrpTable: Grp14 | ||
851 | 2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) | ||
852 | 3: psrldq Udq,Ib (66),(11B),(VEX),(o128) | ||
853 | 6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) | ||
854 | 7: pslldq Udq,Ib (66),(11B),(VEX),(o128) | ||
855 | EndTable | ||
856 | |||
857 | GrpTable: Grp15 | ||
858 | 0: fxsave | ||
859 | 1: fxstor | ||
860 | 2: ldmxcsr (VEX) | ||
861 | 3: stmxcsr (VEX) | ||
862 | 4: XSAVE | ||
863 | 5: XRSTOR | lfence (11B) | ||
864 | 6: mfence (11B) | ||
865 | 7: clflush | sfence (11B) | ||
866 | EndTable | ||
867 | |||
868 | GrpTable: Grp16 | ||
869 | 0: prefetch NTA | ||
870 | 1: prefetch T0 | ||
871 | 2: prefetch T1 | ||
872 | 3: prefetch T2 | ||
873 | EndTable | ||
874 | |||
875 | # AMD's Prefetch Group | ||
876 | GrpTable: GrpP | ||
877 | 0: PREFETCH | ||
878 | 1: PREFETCHW | ||
879 | EndTable | ||
880 | |||
881 | GrpTable: GrpPDLK | ||
882 | 0: MONTMUL | ||
883 | 1: XSHA1 | ||
884 | 2: XSHA2 | ||
885 | EndTable | ||
886 | |||
887 | GrpTable: GrpRNG | ||
888 | 0: xstore-rng | ||
889 | 1: xcrypt-ecb | ||
890 | 2: xcrypt-cbc | ||
891 | 4: xcrypt-cfb | ||
892 | 5: xcrypt-ofb | ||
893 | EndTable | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf0..8f4e2ac93928 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -38,7 +38,8 @@ enum x86_pf_error_code { | |||
38 | * Returns 0 if mmiotrace is disabled, or if the fault is not | 38 | * Returns 0 if mmiotrace is disabled, or if the fault is not |
39 | * handled by mmiotrace: | 39 | * handled by mmiotrace: |
40 | */ | 40 | */ |
41 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | 41 | static inline int __kprobes |
42 | kmmio_fault(struct pt_regs *regs, unsigned long addr) | ||
42 | { | 43 | { |
43 | if (unlikely(is_kmmio_active())) | 44 | if (unlikely(is_kmmio_active())) |
44 | if (kmmio_handler(regs, addr) == 1) | 45 | if (kmmio_handler(regs, addr) == 1) |
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
46 | return 0; | 47 | return 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static inline int notify_page_fault(struct pt_regs *regs) | 50 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) |
50 | { | 51 | { |
51 | int ret = 0; | 52 | int ret = 0; |
52 | 53 | ||
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void) | |||
240 | * | 241 | * |
241 | * Handle a fault on the vmalloc or module mapping area | 242 | * Handle a fault on the vmalloc or module mapping area |
242 | */ | 243 | */ |
243 | static noinline int vmalloc_fault(unsigned long address) | 244 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
244 | { | 245 | { |
245 | unsigned long pgd_paddr; | 246 | unsigned long pgd_paddr; |
246 | pmd_t *pmd_k; | 247 | pmd_t *pmd_k; |
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void) | |||
357 | * | 358 | * |
358 | * This assumes no large pages in there. | 359 | * This assumes no large pages in there. |
359 | */ | 360 | */ |
360 | static noinline int vmalloc_fault(unsigned long address) | 361 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
361 | { | 362 | { |
362 | pgd_t *pgd, *pgd_ref; | 363 | pgd_t *pgd, *pgd_ref; |
363 | pud_t *pud, *pud_ref; | 364 | pud_t *pud, *pud_ref; |
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
860 | * There are no security implications to leaving a stale TLB when | 861 | * There are no security implications to leaving a stale TLB when |
861 | * increasing the permissions on a page. | 862 | * increasing the permissions on a page. |
862 | */ | 863 | */ |
863 | static noinline int | 864 | static noinline __kprobes int |
864 | spurious_fault(unsigned long error_code, unsigned long address) | 865 | spurious_fault(unsigned long error_code, unsigned long address) |
865 | { | 866 | { |
866 | pgd_t *pgd; | 867 | pgd_t *pgd; |
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 000000000000..4688f90ce5a2 --- /dev/null +++ b/arch/x86/tools/Makefile | |||
@@ -0,0 +1,22 @@ | |||
1 | PHONY += posttest | ||
2 | |||
3 | ifeq ($(KBUILD_VERBOSE),1) | ||
4 | postest_verbose = -v | ||
5 | else | ||
6 | postest_verbose = | ||
7 | endif | ||
8 | |||
9 | quiet_cmd_posttest = TEST $@ | ||
10 | cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len -$(CONFIG_64BIT) $(posttest_verbose) | ||
11 | |||
12 | posttest: $(obj)/test_get_len vmlinux | ||
13 | $(call cmd,posttest) | ||
14 | |||
15 | hostprogs-y := test_get_len | ||
16 | |||
17 | # -I needed for generated C source and C source which in the kernel tree. | ||
18 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | ||
19 | |||
20 | # Dependencies are also needed. | ||
21 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | ||
22 | |||
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 000000000000..c13c0ee48ab4 --- /dev/null +++ b/arch/x86/tools/distill.awk | |||
@@ -0,0 +1,47 @@ | |||
1 | #!/bin/awk -f | ||
2 | # Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len | ||
3 | # Distills the disassembly as follows: | ||
4 | # - Removes all lines except the disassembled instructions. | ||
5 | # - For instructions that exceed 1 line (7 bytes), crams all the hex bytes | ||
6 | # into a single line. | ||
7 | # - Remove bad(or prefix only) instructions | ||
8 | |||
9 | BEGIN { | ||
10 | prev_addr = "" | ||
11 | prev_hex = "" | ||
12 | prev_mnemonic = "" | ||
13 | bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" | ||
14 | fwait_expr = "^9b " | ||
15 | fwait_str="9b\tfwait" | ||
16 | } | ||
17 | |||
18 | /^ *[0-9a-f]+ <[^>]*>:/ { | ||
19 | # Symbol entry | ||
20 | printf("%s%s\n", $2, $1) | ||
21 | } | ||
22 | |||
23 | /^ *[0-9a-f]+:/ { | ||
24 | if (split($0, field, "\t") < 3) { | ||
25 | # This is a continuation of the same insn. | ||
26 | prev_hex = prev_hex field[2] | ||
27 | } else { | ||
28 | # Skip bad instructions | ||
29 | if (match(prev_mnemonic, bad_expr)) | ||
30 | prev_addr = "" | ||
31 | # Split fwait from other f* instructions | ||
32 | if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { | ||
33 | printf "%s\t%s\n", prev_addr, fwait_str | ||
34 | sub(fwait_expr, "", prev_hex) | ||
35 | } | ||
36 | if (prev_addr != "") | ||
37 | printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic | ||
38 | prev_addr = field[1] | ||
39 | prev_hex = field[2] | ||
40 | prev_mnemonic = field[3] | ||
41 | } | ||
42 | } | ||
43 | |||
44 | END { | ||
45 | if (prev_addr != "") | ||
46 | printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic | ||
47 | } | ||
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..e34e92a28eb6 --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -0,0 +1,380 @@ | |||
1 | #!/bin/awk -f | ||
2 | # gen-insn-attr-x86.awk: Instruction attribute table generator | ||
3 | # Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
4 | # | ||
5 | # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c | ||
6 | |||
7 | # Awk implementation sanity check | ||
8 | function check_awk_implement() { | ||
9 | if (!match("abc", "[[:lower:]]+")) | ||
10 | return "Your awk doesn't support charactor-class." | ||
11 | if (sprintf("%x", 0) != "0") | ||
12 | return "Your awk has a printf-format problem." | ||
13 | return "" | ||
14 | } | ||
15 | |||
16 | # Clear working vars | ||
17 | function clear_vars() { | ||
18 | delete table | ||
19 | delete lptable2 | ||
20 | delete lptable1 | ||
21 | delete lptable3 | ||
22 | eid = -1 # escape id | ||
23 | gid = -1 # group id | ||
24 | aid = -1 # AVX id | ||
25 | tname = "" | ||
26 | } | ||
27 | |||
28 | BEGIN { | ||
29 | # Implementation error checking | ||
30 | awkchecked = check_awk_implement() | ||
31 | if (awkchecked != "") { | ||
32 | print "Error: " awkchecked > "/dev/stderr" | ||
33 | print "Please try to use gawk." > "/dev/stderr" | ||
34 | exit 1 | ||
35 | } | ||
36 | |||
37 | # Setup generating tables | ||
38 | print "/* x86 opcode map generated from x86-opcode-map.txt */" | ||
39 | print "/* Do not change this code. */\n" | ||
40 | ggid = 1 | ||
41 | geid = 1 | ||
42 | gaid = 0 | ||
43 | delete etable | ||
44 | delete gtable | ||
45 | delete atable | ||
46 | |||
47 | opnd_expr = "^[[:alpha:]/]" | ||
48 | ext_expr = "^\\(" | ||
49 | sep_expr = "^\\|$" | ||
50 | group_expr = "^Grp[[:alnum:]]+" | ||
51 | |||
52 | imm_expr = "^[IJAO][[:lower:]]" | ||
53 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
54 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
55 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | ||
56 | imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" | ||
57 | imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" | ||
58 | imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" | ||
59 | imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
60 | imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
61 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | ||
62 | imm_flag["Ob"] = "INAT_MOFFSET" | ||
63 | imm_flag["Ov"] = "INAT_MOFFSET" | ||
64 | |||
65 | modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" | ||
66 | force64_expr = "\\([df]64\\)" | ||
67 | rex_expr = "^REX(\\.[XRWB]+)*" | ||
68 | fpu_expr = "^ESC" # TODO | ||
69 | |||
70 | lprefix1_expr = "\\(66\\)" | ||
71 | lprefix2_expr = "\\(F3\\)" | ||
72 | lprefix3_expr = "\\(F2\\)" | ||
73 | max_lprefix = 4 | ||
74 | |||
75 | vexok_expr = "\\(VEX\\)" | ||
76 | vexonly_expr = "\\(oVEX\\)" | ||
77 | |||
78 | prefix_expr = "\\(Prefix\\)" | ||
79 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | ||
80 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | ||
81 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | ||
82 | prefix_num["LOCK"] = "INAT_PFX_LOCK" | ||
83 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | ||
84 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | ||
85 | prefix_num["SEG=ES"] = "INAT_PFX_ES" | ||
86 | prefix_num["SEG=FS"] = "INAT_PFX_FS" | ||
87 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | ||
88 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | ||
89 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | ||
90 | prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" | ||
91 | prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" | ||
92 | |||
93 | clear_vars() | ||
94 | } | ||
95 | |||
96 | function semantic_error(msg) { | ||
97 | print "Semantic error at " NR ": " msg > "/dev/stderr" | ||
98 | exit 1 | ||
99 | } | ||
100 | |||
101 | function debug(msg) { | ||
102 | print "DEBUG: " msg | ||
103 | } | ||
104 | |||
105 | function array_size(arr, i,c) { | ||
106 | c = 0 | ||
107 | for (i in arr) | ||
108 | c++ | ||
109 | return c | ||
110 | } | ||
111 | |||
112 | /^Table:/ { | ||
113 | print "/* " $0 " */" | ||
114 | if (tname != "") | ||
115 | semantic_error("Hit Table: before EndTable:."); | ||
116 | } | ||
117 | |||
118 | /^Referrer:/ { | ||
119 | if (NF != 1) { | ||
120 | # escape opcode table | ||
121 | ref = "" | ||
122 | for (i = 2; i <= NF; i++) | ||
123 | ref = ref $i | ||
124 | eid = escape[ref] | ||
125 | tname = sprintf("inat_escape_table_%d", eid) | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /^AVXcode:/ { | ||
130 | if (NF != 1) { | ||
131 | # AVX/escape opcode table | ||
132 | aid = $2 | ||
133 | if (gaid <= aid) | ||
134 | gaid = aid + 1 | ||
135 | if (tname == "") # AVX only opcode table | ||
136 | tname = sprintf("inat_avx_table_%d", $2) | ||
137 | } | ||
138 | if (aid == -1 && eid == -1) # primary opcode table | ||
139 | tname = "inat_primary_table" | ||
140 | } | ||
141 | |||
142 | /^GrpTable:/ { | ||
143 | print "/* " $0 " */" | ||
144 | if (!($2 in group)) | ||
145 | semantic_error("No group: " $2 ) | ||
146 | gid = group[$2] | ||
147 | tname = "inat_group_table_" gid | ||
148 | } | ||
149 | |||
150 | function print_table(tbl,name,fmt,n) | ||
151 | { | ||
152 | print "const insn_attr_t " name " = {" | ||
153 | for (i = 0; i < n; i++) { | ||
154 | id = sprintf(fmt, i) | ||
155 | if (tbl[id]) | ||
156 | print " [" id "] = " tbl[id] "," | ||
157 | } | ||
158 | print "};" | ||
159 | } | ||
160 | |||
161 | /^EndTable/ { | ||
162 | if (gid != -1) { | ||
163 | # print group tables | ||
164 | if (array_size(table) != 0) { | ||
165 | print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", | ||
166 | "0x%x", 8) | ||
167 | gtable[gid,0] = tname | ||
168 | } | ||
169 | if (array_size(lptable1) != 0) { | ||
170 | print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", | ||
171 | "0x%x", 8) | ||
172 | gtable[gid,1] = tname "_1" | ||
173 | } | ||
174 | if (array_size(lptable2) != 0) { | ||
175 | print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", | ||
176 | "0x%x", 8) | ||
177 | gtable[gid,2] = tname "_2" | ||
178 | } | ||
179 | if (array_size(lptable3) != 0) { | ||
180 | print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", | ||
181 | "0x%x", 8) | ||
182 | gtable[gid,3] = tname "_3" | ||
183 | } | ||
184 | } else { | ||
185 | # print primary/escaped tables | ||
186 | if (array_size(table) != 0) { | ||
187 | print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", | ||
188 | "0x%02x", 256) | ||
189 | etable[eid,0] = tname | ||
190 | if (aid >= 0) | ||
191 | atable[aid,0] = tname | ||
192 | } | ||
193 | if (array_size(lptable1) != 0) { | ||
194 | print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", | ||
195 | "0x%02x", 256) | ||
196 | etable[eid,1] = tname "_1" | ||
197 | if (aid >= 0) | ||
198 | atable[aid,1] = tname "_1" | ||
199 | } | ||
200 | if (array_size(lptable2) != 0) { | ||
201 | print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", | ||
202 | "0x%02x", 256) | ||
203 | etable[eid,2] = tname "_2" | ||
204 | if (aid >= 0) | ||
205 | atable[aid,2] = tname "_2" | ||
206 | } | ||
207 | if (array_size(lptable3) != 0) { | ||
208 | print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", | ||
209 | "0x%02x", 256) | ||
210 | etable[eid,3] = tname "_3" | ||
211 | if (aid >= 0) | ||
212 | atable[aid,3] = tname "_3" | ||
213 | } | ||
214 | } | ||
215 | print "" | ||
216 | clear_vars() | ||
217 | } | ||
218 | |||
219 | function add_flags(old,new) { | ||
220 | if (old && new) | ||
221 | return old " | " new | ||
222 | else if (old) | ||
223 | return old | ||
224 | else | ||
225 | return new | ||
226 | } | ||
227 | |||
228 | # convert operands to flags. | ||
229 | function convert_operands(opnd, i,imm,mod) | ||
230 | { | ||
231 | imm = null | ||
232 | mod = null | ||
233 | for (i in opnd) { | ||
234 | i = opnd[i] | ||
235 | if (match(i, imm_expr) == 1) { | ||
236 | if (!imm_flag[i]) | ||
237 | semantic_error("Unknown imm opnd: " i) | ||
238 | if (imm) { | ||
239 | if (i != "Ib") | ||
240 | semantic_error("Second IMM error") | ||
241 | imm = add_flags(imm, "INAT_SCNDIMM") | ||
242 | } else | ||
243 | imm = imm_flag[i] | ||
244 | } else if (match(i, modrm_expr)) | ||
245 | mod = "INAT_MODRM" | ||
246 | } | ||
247 | return add_flags(imm, mod) | ||
248 | } | ||
249 | |||
250 | /^[0-9a-f]+\:/ { | ||
251 | if (NR == 1) | ||
252 | next | ||
253 | # get index | ||
254 | idx = "0x" substr($1, 1, index($1,":") - 1) | ||
255 | if (idx in table) | ||
256 | semantic_error("Redefine " idx " in " tname) | ||
257 | |||
258 | # check if escaped opcode | ||
259 | if ("escape" == $2) { | ||
260 | if ($3 != "#") | ||
261 | semantic_error("No escaped name") | ||
262 | ref = "" | ||
263 | for (i = 4; i <= NF; i++) | ||
264 | ref = ref $i | ||
265 | if (ref in escape) | ||
266 | semantic_error("Redefine escape (" ref ")") | ||
267 | escape[ref] = geid | ||
268 | geid++ | ||
269 | table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" | ||
270 | next | ||
271 | } | ||
272 | |||
273 | variant = null | ||
274 | # converts | ||
275 | i = 2 | ||
276 | while (i <= NF) { | ||
277 | opcode = $(i++) | ||
278 | delete opnds | ||
279 | ext = null | ||
280 | flags = null | ||
281 | opnd = null | ||
282 | # parse one opcode | ||
283 | if (match($i, opnd_expr)) { | ||
284 | opnd = $i | ||
285 | split($(i++), opnds, ",") | ||
286 | flags = convert_operands(opnds) | ||
287 | } | ||
288 | if (match($i, ext_expr)) | ||
289 | ext = $(i++) | ||
290 | if (match($i, sep_expr)) | ||
291 | i++ | ||
292 | else if (i < NF) | ||
293 | semantic_error($i " is not a separator") | ||
294 | |||
295 | # check if group opcode | ||
296 | if (match(opcode, group_expr)) { | ||
297 | if (!(opcode in group)) { | ||
298 | group[opcode] = ggid | ||
299 | ggid++ | ||
300 | } | ||
301 | flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") | ||
302 | } | ||
303 | # check force(or default) 64bit | ||
304 | if (match(ext, force64_expr)) | ||
305 | flags = add_flags(flags, "INAT_FORCE64") | ||
306 | |||
307 | # check REX prefix | ||
308 | if (match(opcode, rex_expr)) | ||
309 | flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") | ||
310 | |||
311 | # check coprocessor escape : TODO | ||
312 | if (match(opcode, fpu_expr)) | ||
313 | flags = add_flags(flags, "INAT_MODRM") | ||
314 | |||
315 | # check VEX only code | ||
316 | if (match(ext, vexonly_expr)) | ||
317 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | ||
318 | |||
319 | # check VEX only code | ||
320 | if (match(ext, vexok_expr)) | ||
321 | flags = add_flags(flags, "INAT_VEXOK") | ||
322 | |||
323 | # check prefixes | ||
324 | if (match(ext, prefix_expr)) { | ||
325 | if (!prefix_num[opcode]) | ||
326 | semantic_error("Unknown prefix: " opcode) | ||
327 | flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") | ||
328 | } | ||
329 | if (length(flags) == 0) | ||
330 | continue | ||
331 | # check if last prefix | ||
332 | if (match(ext, lprefix1_expr)) { | ||
333 | lptable1[idx] = add_flags(lptable1[idx],flags) | ||
334 | variant = "INAT_VARIANT" | ||
335 | } else if (match(ext, lprefix2_expr)) { | ||
336 | lptable2[idx] = add_flags(lptable2[idx],flags) | ||
337 | variant = "INAT_VARIANT" | ||
338 | } else if (match(ext, lprefix3_expr)) { | ||
339 | lptable3[idx] = add_flags(lptable3[idx],flags) | ||
340 | variant = "INAT_VARIANT" | ||
341 | } else { | ||
342 | table[idx] = add_flags(table[idx],flags) | ||
343 | } | ||
344 | } | ||
345 | if (variant) | ||
346 | table[idx] = add_flags(table[idx],variant) | ||
347 | } | ||
348 | |||
349 | END { | ||
350 | if (awkchecked != "") | ||
351 | exit 1 | ||
352 | # print escape opcode map's array | ||
353 | print "/* Escape opcode map array */" | ||
354 | print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ | ||
355 | "[INAT_LSTPFX_MAX + 1] = {" | ||
356 | for (i = 0; i < geid; i++) | ||
357 | for (j = 0; j < max_lprefix; j++) | ||
358 | if (etable[i,j]) | ||
359 | print " ["i"]["j"] = "etable[i,j]"," | ||
360 | print "};\n" | ||
361 | # print group opcode map's array | ||
362 | print "/* Group opcode map array */" | ||
363 | print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ | ||
364 | "[INAT_LSTPFX_MAX + 1] = {" | ||
365 | for (i = 0; i < ggid; i++) | ||
366 | for (j = 0; j < max_lprefix; j++) | ||
367 | if (gtable[i,j]) | ||
368 | print " ["i"]["j"] = "gtable[i,j]"," | ||
369 | print "};\n" | ||
370 | # print AVX opcode map's array | ||
371 | print "/* AVX opcode map array */" | ||
372 | print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ | ||
373 | "[INAT_LSTPFX_MAX + 1] = {" | ||
374 | for (i = 0; i < gaid; i++) | ||
375 | for (j = 0; j < max_lprefix; j++) | ||
376 | if (atable[i,j]) | ||
377 | print " ["i"]["j"] = "atable[i,j]"," | ||
378 | print "};" | ||
379 | } | ||
380 | |||
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 000000000000..af75e07217ba --- /dev/null +++ b/arch/x86/tools/test_get_len.c | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2009 | ||
17 | */ | ||
18 | |||
19 | #include <stdlib.h> | ||
20 | #include <stdio.h> | ||
21 | #include <string.h> | ||
22 | #include <assert.h> | ||
23 | #include <unistd.h> | ||
24 | |||
25 | #define unlikely(cond) (cond) | ||
26 | |||
27 | #include <asm/insn.h> | ||
28 | #include <inat.c> | ||
29 | #include <insn.c> | ||
30 | |||
31 | /* | ||
32 | * Test of instruction analysis in general and insn_get_length() in | ||
33 | * particular. See if insn_get_length() and the disassembler agree | ||
34 | * on the length of each instruction in an elf disassembly. | ||
35 | * | ||
36 | * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len | ||
37 | */ | ||
38 | |||
39 | const char *prog; | ||
40 | static int verbose; | ||
41 | static int x86_64; | ||
42 | |||
43 | static void usage(void) | ||
44 | { | ||
45 | fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" | ||
46 | " %s [-y|-n] [-v] \n", prog); | ||
47 | fprintf(stderr, "\t-y 64bit mode\n"); | ||
48 | fprintf(stderr, "\t-n 32bit mode\n"); | ||
49 | fprintf(stderr, "\t-v verbose mode\n"); | ||
50 | exit(1); | ||
51 | } | ||
52 | |||
53 | static void malformed_line(const char *line, int line_nr) | ||
54 | { | ||
55 | fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); | ||
56 | exit(3); | ||
57 | } | ||
58 | |||
59 | static void dump_field(FILE *fp, const char *name, const char *indent, | ||
60 | struct insn_field *field) | ||
61 | { | ||
62 | fprintf(fp, "%s.%s = {\n", indent, name); | ||
63 | fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", | ||
64 | indent, field->value, field->bytes[0], field->bytes[1], | ||
65 | field->bytes[2], field->bytes[3]); | ||
66 | fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, | ||
67 | field->got, field->nbytes); | ||
68 | } | ||
69 | |||
70 | static void dump_insn(FILE *fp, struct insn *insn) | ||
71 | { | ||
72 | fprintf(fp, "Instruction = { \n"); | ||
73 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | ||
74 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | ||
75 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | ||
76 | dump_field(fp, "opcode", "\t", &insn->opcode); | ||
77 | dump_field(fp, "modrm", "\t", &insn->modrm); | ||
78 | dump_field(fp, "sib", "\t", &insn->sib); | ||
79 | dump_field(fp, "displacement", "\t", &insn->displacement); | ||
80 | dump_field(fp, "immediate1", "\t", &insn->immediate1); | ||
81 | dump_field(fp, "immediate2", "\t", &insn->immediate2); | ||
82 | fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", | ||
83 | insn->attr, insn->opnd_bytes, insn->addr_bytes); | ||
84 | fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", | ||
85 | insn->length, insn->x86_64, insn->kaddr); | ||
86 | } | ||
87 | |||
88 | static void parse_args(int argc, char **argv) | ||
89 | { | ||
90 | int c; | ||
91 | prog = argv[0]; | ||
92 | while ((c = getopt(argc, argv, "ynv")) != -1) { | ||
93 | switch (c) { | ||
94 | case 'y': | ||
95 | x86_64 = 1; | ||
96 | break; | ||
97 | case 'n': | ||
98 | x86_64 = 0; | ||
99 | break; | ||
100 | case 'v': | ||
101 | verbose = 1; | ||
102 | break; | ||
103 | default: | ||
104 | usage(); | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | #define BUFSIZE 256 | ||
110 | |||
111 | int main(int argc, char **argv) | ||
112 | { | ||
113 | char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; | ||
114 | unsigned char insn_buf[16]; | ||
115 | struct insn insn; | ||
116 | int insns = 0, c; | ||
117 | |||
118 | parse_args(argc, argv); | ||
119 | |||
120 | while (fgets(line, BUFSIZE, stdin)) { | ||
121 | char copy[BUFSIZE], *s, *tab1, *tab2; | ||
122 | int nb = 0; | ||
123 | unsigned int b; | ||
124 | |||
125 | if (line[0] == '<') { | ||
126 | /* Symbol line */ | ||
127 | strcpy(sym, line); | ||
128 | continue; | ||
129 | } | ||
130 | |||
131 | insns++; | ||
132 | memset(insn_buf, 0, 16); | ||
133 | strcpy(copy, line); | ||
134 | tab1 = strchr(copy, '\t'); | ||
135 | if (!tab1) | ||
136 | malformed_line(line, insns); | ||
137 | s = tab1 + 1; | ||
138 | s += strspn(s, " "); | ||
139 | tab2 = strchr(s, '\t'); | ||
140 | if (!tab2) | ||
141 | malformed_line(line, insns); | ||
142 | *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ | ||
143 | while (s < tab2) { | ||
144 | if (sscanf(s, "%x", &b) == 1) { | ||
145 | insn_buf[nb++] = (unsigned char) b; | ||
146 | s += 3; | ||
147 | } else | ||
148 | break; | ||
149 | } | ||
150 | /* Decode an instruction */ | ||
151 | insn_init(&insn, insn_buf, x86_64); | ||
152 | insn_get_length(&insn); | ||
153 | if (insn.length != nb) { | ||
154 | fprintf(stderr, "Error: %s found a difference at %s\n", | ||
155 | prog, sym); | ||
156 | fprintf(stderr, "Error: %s", line); | ||
157 | fprintf(stderr, "Error: objdump says %d bytes, but " | ||
158 | "insn_get_length() says %d\n", nb, | ||
159 | insn.length); | ||
160 | if (verbose) | ||
161 | dump_insn(stderr, &insn); | ||
162 | exit(2); | ||
163 | } | ||
164 | } | ||
165 | fprintf(stderr, "Succeed: decoded and checked %d instructions\n", | ||
166 | insns); | ||
167 | return 0; | ||
168 | } | ||
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d11770472bc8..43360c1d8f70 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -117,12 +117,12 @@ struct ftrace_event_call { | |||
117 | struct dentry *dir; | 117 | struct dentry *dir; |
118 | struct trace_event *event; | 118 | struct trace_event *event; |
119 | int enabled; | 119 | int enabled; |
120 | int (*regfunc)(void *); | 120 | int (*regfunc)(struct ftrace_event_call *); |
121 | void (*unregfunc)(void *); | 121 | void (*unregfunc)(struct ftrace_event_call *); |
122 | int id; | 122 | int id; |
123 | int (*raw_init)(void); | 123 | int (*raw_init)(struct ftrace_event_call *); |
124 | int (*show_format)(struct ftrace_event_call *call, | 124 | int (*show_format)(struct ftrace_event_call *, |
125 | struct trace_seq *s); | 125 | struct trace_seq *); |
126 | int (*define_fields)(struct ftrace_event_call *); | 126 | int (*define_fields)(struct ftrace_event_call *); |
127 | struct list_head fields; | 127 | struct list_head fields; |
128 | int filter_active; | 128 | int filter_active; |
@@ -131,14 +131,19 @@ struct ftrace_event_call { | |||
131 | void *data; | 131 | void *data; |
132 | 132 | ||
133 | atomic_t profile_count; | 133 | atomic_t profile_count; |
134 | int (*profile_enable)(void); | 134 | int (*profile_enable)(struct ftrace_event_call *); |
135 | void (*profile_disable)(void); | 135 | void (*profile_disable)(struct ftrace_event_call *); |
136 | }; | 136 | }; |
137 | 137 | ||
138 | #define FTRACE_MAX_PROFILE_SIZE 2048 | 138 | #define FTRACE_MAX_PROFILE_SIZE 2048 |
139 | 139 | ||
140 | extern char *trace_profile_buf; | 140 | struct perf_trace_buf { |
141 | extern char *trace_profile_buf_nmi; | 141 | char buf[FTRACE_MAX_PROFILE_SIZE]; |
142 | int recursion; | ||
143 | }; | ||
144 | |||
145 | extern struct perf_trace_buf *perf_trace_buf; | ||
146 | extern struct perf_trace_buf *perf_trace_buf_nmi; | ||
142 | 147 | ||
143 | #define MAX_FILTER_PRED 32 | 148 | #define MAX_FILTER_PRED 32 |
144 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ | 149 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ |
@@ -157,11 +162,12 @@ enum { | |||
157 | FILTER_PTR_STRING, | 162 | FILTER_PTR_STRING, |
158 | }; | 163 | }; |
159 | 164 | ||
160 | extern int trace_define_field(struct ftrace_event_call *call, | ||
161 | const char *type, const char *name, | ||
162 | int offset, int size, int is_signed, | ||
163 | int filter_type); | ||
164 | extern int trace_define_common_fields(struct ftrace_event_call *call); | 165 | extern int trace_define_common_fields(struct ftrace_event_call *call); |
166 | extern int trace_define_field(struct ftrace_event_call *call, const char *type, | ||
167 | const char *name, int offset, int size, | ||
168 | int is_signed, int filter_type); | ||
169 | extern int trace_add_event_call(struct ftrace_event_call *call); | ||
170 | extern void trace_remove_event_call(struct ftrace_event_call *call); | ||
165 | 171 | ||
166 | #define is_signed_type(type) (((type)(-1)) < 0) | 172 | #define is_signed_type(type) (((type)(-1)) < 0) |
167 | 173 | ||
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 3a46b7b7abb2..1b672f74a32f 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h | |||
@@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); | |||
296 | int disable_kprobe(struct kprobe *kp); | 296 | int disable_kprobe(struct kprobe *kp); |
297 | int enable_kprobe(struct kprobe *kp); | 297 | int enable_kprobe(struct kprobe *kp); |
298 | 298 | ||
299 | void dump_kprobe(struct kprobe *kp); | ||
300 | |||
299 | #else /* !CONFIG_KPROBES: */ | 301 | #else /* !CONFIG_KPROBES: */ |
300 | 302 | ||
301 | static inline int kprobes_built_in(void) | 303 | static inline int kprobes_built_in(void) |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a990ace1a838..b50974a93af0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -100,23 +100,23 @@ struct perf_event_attr; | |||
100 | 100 | ||
101 | #ifdef CONFIG_EVENT_PROFILE | 101 | #ifdef CONFIG_EVENT_PROFILE |
102 | #define TRACE_SYS_ENTER_PROFILE(sname) \ | 102 | #define TRACE_SYS_ENTER_PROFILE(sname) \ |
103 | static int prof_sysenter_enable_##sname(void) \ | 103 | static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ |
104 | { \ | 104 | { \ |
105 | return reg_prof_syscall_enter("sys"#sname); \ | 105 | return reg_prof_syscall_enter("sys"#sname); \ |
106 | } \ | 106 | } \ |
107 | \ | 107 | \ |
108 | static void prof_sysenter_disable_##sname(void) \ | 108 | static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ |
109 | { \ | 109 | { \ |
110 | unreg_prof_syscall_enter("sys"#sname); \ | 110 | unreg_prof_syscall_enter("sys"#sname); \ |
111 | } | 111 | } |
112 | 112 | ||
113 | #define TRACE_SYS_EXIT_PROFILE(sname) \ | 113 | #define TRACE_SYS_EXIT_PROFILE(sname) \ |
114 | static int prof_sysexit_enable_##sname(void) \ | 114 | static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ |
115 | { \ | 115 | { \ |
116 | return reg_prof_syscall_exit("sys"#sname); \ | 116 | return reg_prof_syscall_exit("sys"#sname); \ |
117 | } \ | 117 | } \ |
118 | \ | 118 | \ |
119 | static void prof_sysexit_disable_##sname(void) \ | 119 | static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ |
120 | { \ | 120 | { \ |
121 | unreg_prof_syscall_exit("sys"#sname); \ | 121 | unreg_prof_syscall_exit("sys"#sname); \ |
122 | } | 122 | } |
@@ -157,7 +157,7 @@ static void prof_sysexit_disable_##sname(void) \ | |||
157 | struct trace_event enter_syscall_print_##sname = { \ | 157 | struct trace_event enter_syscall_print_##sname = { \ |
158 | .trace = print_syscall_enter, \ | 158 | .trace = print_syscall_enter, \ |
159 | }; \ | 159 | }; \ |
160 | static int init_enter_##sname(void) \ | 160 | static int init_enter_##sname(struct ftrace_event_call *call) \ |
161 | { \ | 161 | { \ |
162 | int num, id; \ | 162 | int num, id; \ |
163 | num = syscall_name_to_nr("sys"#sname); \ | 163 | num = syscall_name_to_nr("sys"#sname); \ |
@@ -193,7 +193,7 @@ static void prof_sysexit_disable_##sname(void) \ | |||
193 | struct trace_event exit_syscall_print_##sname = { \ | 193 | struct trace_event exit_syscall_print_##sname = { \ |
194 | .trace = print_syscall_exit, \ | 194 | .trace = print_syscall_exit, \ |
195 | }; \ | 195 | }; \ |
196 | static int init_exit_##sname(void) \ | 196 | static int init_exit_##sname(struct ftrace_event_call *call) \ |
197 | { \ | 197 | { \ |
198 | int num, id; \ | 198 | int num, id; \ |
199 | num = syscall_name_to_nr("sys"#sname); \ | 199 | num = syscall_name_to_nr("sys"#sname); \ |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c9bbcab95fbe..4945d1c99864 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -402,12 +402,12 @@ static inline int ftrace_get_offsets_##call( \ | |||
402 | \ | 402 | \ |
403 | static void ftrace_profile_##call(proto); \ | 403 | static void ftrace_profile_##call(proto); \ |
404 | \ | 404 | \ |
405 | static int ftrace_profile_enable_##call(void) \ | 405 | static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ |
406 | { \ | 406 | { \ |
407 | return register_trace_##call(ftrace_profile_##call); \ | 407 | return register_trace_##call(ftrace_profile_##call); \ |
408 | } \ | 408 | } \ |
409 | \ | 409 | \ |
410 | static void ftrace_profile_disable_##call(void) \ | 410 | static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ |
411 | { \ | 411 | { \ |
412 | unregister_trace_##call(ftrace_profile_##call); \ | 412 | unregister_trace_##call(ftrace_profile_##call); \ |
413 | } | 413 | } |
@@ -426,7 +426,7 @@ static void ftrace_profile_disable_##call(void) \ | |||
426 | * event_trace_printk(_RET_IP_, "<call>: " <fmt>); | 426 | * event_trace_printk(_RET_IP_, "<call>: " <fmt>); |
427 | * } | 427 | * } |
428 | * | 428 | * |
429 | * static int ftrace_reg_event_<call>(void) | 429 | * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused) |
430 | * { | 430 | * { |
431 | * int ret; | 431 | * int ret; |
432 | * | 432 | * |
@@ -437,7 +437,7 @@ static void ftrace_profile_disable_##call(void) \ | |||
437 | * return ret; | 437 | * return ret; |
438 | * } | 438 | * } |
439 | * | 439 | * |
440 | * static void ftrace_unreg_event_<call>(void) | 440 | * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) |
441 | * { | 441 | * { |
442 | * unregister_trace_<call>(ftrace_event_<call>); | 442 | * unregister_trace_<call>(ftrace_event_<call>); |
443 | * } | 443 | * } |
@@ -472,7 +472,7 @@ static void ftrace_profile_disable_##call(void) \ | |||
472 | * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); | 472 | * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); |
473 | * } | 473 | * } |
474 | * | 474 | * |
475 | * static int ftrace_raw_reg_event_<call>(void) | 475 | * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused) |
476 | * { | 476 | * { |
477 | * int ret; | 477 | * int ret; |
478 | * | 478 | * |
@@ -483,7 +483,7 @@ static void ftrace_profile_disable_##call(void) \ | |||
483 | * return ret; | 483 | * return ret; |
484 | * } | 484 | * } |
485 | * | 485 | * |
486 | * static void ftrace_unreg_event_<call>(void) | 486 | * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) |
487 | * { | 487 | * { |
488 | * unregister_trace_<call>(ftrace_raw_event_<call>); | 488 | * unregister_trace_<call>(ftrace_raw_event_<call>); |
489 | * } | 489 | * } |
@@ -492,7 +492,7 @@ static void ftrace_profile_disable_##call(void) \ | |||
492 | * .trace = ftrace_raw_output_<call>, <-- stage 2 | 492 | * .trace = ftrace_raw_output_<call>, <-- stage 2 |
493 | * }; | 493 | * }; |
494 | * | 494 | * |
495 | * static int ftrace_raw_init_event_<call>(void) | 495 | * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused) |
496 | * { | 496 | * { |
497 | * int id; | 497 | * int id; |
498 | * | 498 | * |
@@ -589,7 +589,7 @@ static void ftrace_raw_event_##call(proto) \ | |||
589 | event, irq_flags, pc); \ | 589 | event, irq_flags, pc); \ |
590 | } \ | 590 | } \ |
591 | \ | 591 | \ |
592 | static int ftrace_raw_reg_event_##call(void *ptr) \ | 592 | static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ |
593 | { \ | 593 | { \ |
594 | int ret; \ | 594 | int ret; \ |
595 | \ | 595 | \ |
@@ -600,7 +600,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ | |||
600 | return ret; \ | 600 | return ret; \ |
601 | } \ | 601 | } \ |
602 | \ | 602 | \ |
603 | static void ftrace_raw_unreg_event_##call(void *ptr) \ | 603 | static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ |
604 | { \ | 604 | { \ |
605 | unregister_trace_##call(ftrace_raw_event_##call); \ | 605 | unregister_trace_##call(ftrace_raw_event_##call); \ |
606 | } \ | 606 | } \ |
@@ -609,7 +609,7 @@ static struct trace_event ftrace_event_type_##call = { \ | |||
609 | .trace = ftrace_raw_output_##call, \ | 609 | .trace = ftrace_raw_output_##call, \ |
610 | }; \ | 610 | }; \ |
611 | \ | 611 | \ |
612 | static int ftrace_raw_init_event_##call(void) \ | 612 | static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ |
613 | { \ | 613 | { \ |
614 | int id; \ | 614 | int id; \ |
615 | \ | 615 | \ |
@@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
649 | * struct ftrace_event_call *event_call = &event_<call>; | 649 | * struct ftrace_event_call *event_call = &event_<call>; |
650 | * extern void perf_tp_event(int, u64, u64, void *, int); | 650 | * extern void perf_tp_event(int, u64, u64, void *, int); |
651 | * struct ftrace_raw_##call *entry; | 651 | * struct ftrace_raw_##call *entry; |
652 | * struct perf_trace_buf *trace_buf; | ||
652 | * u64 __addr = 0, __count = 1; | 653 | * u64 __addr = 0, __count = 1; |
653 | * unsigned long irq_flags; | 654 | * unsigned long irq_flags; |
654 | * struct trace_entry *ent; | 655 | * struct trace_entry *ent; |
@@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
673 | * __cpu = smp_processor_id(); | 674 | * __cpu = smp_processor_id(); |
674 | * | 675 | * |
675 | * if (in_nmi()) | 676 | * if (in_nmi()) |
676 | * raw_data = rcu_dereference(trace_profile_buf_nmi); | 677 | * trace_buf = rcu_dereference(perf_trace_buf_nmi); |
677 | * else | 678 | * else |
678 | * raw_data = rcu_dereference(trace_profile_buf); | 679 | * trace_buf = rcu_dereference(perf_trace_buf); |
679 | * | 680 | * |
680 | * if (!raw_data) | 681 | * if (!trace_buf) |
681 | * goto end; | 682 | * goto end; |
682 | * | 683 | * |
683 | * raw_data = per_cpu_ptr(raw_data, __cpu); | 684 | * trace_buf = per_cpu_ptr(trace_buf, __cpu); |
685 | * | ||
686 | * // Avoid recursion from perf that could mess up the buffer | ||
687 | * if (trace_buf->recursion++) | ||
688 | * goto end_recursion; | ||
689 | * | ||
690 | * raw_data = trace_buf->buf; | ||
691 | * | ||
692 | * // Make recursion update visible before entering perf_tp_event | ||
693 | * // so that we protect from perf recursions. | ||
694 | * | ||
695 | * barrier(); | ||
684 | * | 696 | * |
685 | * //zero dead bytes from alignment to avoid stack leak to userspace: | 697 | * //zero dead bytes from alignment to avoid stack leak to userspace: |
686 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; | 698 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; |
@@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \ | |||
713 | { \ | 725 | { \ |
714 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ | 726 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ |
715 | struct ftrace_event_call *event_call = &event_##call; \ | 727 | struct ftrace_event_call *event_call = &event_##call; \ |
716 | extern void perf_tp_event(int, u64, u64, void *, int); \ | 728 | extern void perf_tp_event(int, u64, u64, void *, int); \ |
717 | struct ftrace_raw_##call *entry; \ | 729 | struct ftrace_raw_##call *entry; \ |
730 | struct perf_trace_buf *trace_buf; \ | ||
718 | u64 __addr = 0, __count = 1; \ | 731 | u64 __addr = 0, __count = 1; \ |
719 | unsigned long irq_flags; \ | 732 | unsigned long irq_flags; \ |
720 | struct trace_entry *ent; \ | 733 | struct trace_entry *ent; \ |
@@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \ | |||
739 | __cpu = smp_processor_id(); \ | 752 | __cpu = smp_processor_id(); \ |
740 | \ | 753 | \ |
741 | if (in_nmi()) \ | 754 | if (in_nmi()) \ |
742 | raw_data = rcu_dereference(trace_profile_buf_nmi); \ | 755 | trace_buf = rcu_dereference(perf_trace_buf_nmi); \ |
743 | else \ | 756 | else \ |
744 | raw_data = rcu_dereference(trace_profile_buf); \ | 757 | trace_buf = rcu_dereference(perf_trace_buf); \ |
745 | \ | 758 | \ |
746 | if (!raw_data) \ | 759 | if (!trace_buf) \ |
747 | goto end; \ | 760 | goto end; \ |
748 | \ | 761 | \ |
749 | raw_data = per_cpu_ptr(raw_data, __cpu); \ | 762 | trace_buf = per_cpu_ptr(trace_buf, __cpu); \ |
763 | if (trace_buf->recursion++) \ | ||
764 | goto end_recursion; \ | ||
765 | \ | ||
766 | barrier(); \ | ||
767 | \ | ||
768 | raw_data = trace_buf->buf; \ | ||
750 | \ | 769 | \ |
751 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ | 770 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ |
752 | entry = (struct ftrace_raw_##call *)raw_data; \ | 771 | entry = (struct ftrace_raw_##call *)raw_data; \ |
@@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \ | |||
761 | perf_tp_event(event_call->id, __addr, __count, entry, \ | 780 | perf_tp_event(event_call->id, __addr, __count, entry, \ |
762 | __entry_size); \ | 781 | __entry_size); \ |
763 | \ | 782 | \ |
783 | end_recursion: \ | ||
784 | trace_buf->recursion--; \ | ||
764 | end: \ | 785 | end: \ |
765 | local_irq_restore(irq_flags); \ | 786 | local_irq_restore(irq_flags); \ |
766 | \ | 787 | \ |
diff --git a/include/trace/syscall.h b/include/trace/syscall.h index e972f0a40f8d..51ee17d3632a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h | |||
@@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); | |||
39 | void set_syscall_exit_id(int num, int id); | 39 | void set_syscall_exit_id(int num, int id); |
40 | extern struct trace_event event_syscall_enter; | 40 | extern struct trace_event event_syscall_enter; |
41 | extern struct trace_event event_syscall_exit; | 41 | extern struct trace_event event_syscall_exit; |
42 | extern int reg_event_syscall_enter(void *ptr); | 42 | |
43 | extern void unreg_event_syscall_enter(void *ptr); | ||
44 | extern int reg_event_syscall_exit(void *ptr); | ||
45 | extern void unreg_event_syscall_exit(void *ptr); | ||
46 | extern int syscall_enter_format(struct ftrace_event_call *call, | 43 | extern int syscall_enter_format(struct ftrace_event_call *call, |
47 | struct trace_seq *s); | 44 | struct trace_seq *s); |
48 | extern int syscall_exit_format(struct ftrace_event_call *call, | 45 | extern int syscall_exit_format(struct ftrace_event_call *call, |
49 | struct trace_seq *s); | 46 | struct trace_seq *s); |
50 | extern int syscall_enter_define_fields(struct ftrace_event_call *call); | 47 | extern int syscall_enter_define_fields(struct ftrace_event_call *call); |
51 | extern int syscall_exit_define_fields(struct ftrace_event_call *call); | 48 | extern int syscall_exit_define_fields(struct ftrace_event_call *call); |
49 | extern int reg_event_syscall_enter(struct ftrace_event_call *call); | ||
50 | extern void unreg_event_syscall_enter(struct ftrace_event_call *call); | ||
51 | extern int reg_event_syscall_exit(struct ftrace_event_call *call); | ||
52 | extern void unreg_event_syscall_exit(struct ftrace_event_call *call); | ||
53 | extern int | ||
54 | ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); | ||
52 | enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); | 55 | enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); |
53 | enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); | 56 | enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); |
54 | #endif | 57 | #endif |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5240d75f4c60..84495958e703 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -90,6 +90,9 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) | |||
90 | */ | 90 | */ |
91 | static struct kprobe_blackpoint kprobe_blacklist[] = { | 91 | static struct kprobe_blackpoint kprobe_blacklist[] = { |
92 | {"preempt_schedule",}, | 92 | {"preempt_schedule",}, |
93 | {"native_get_debugreg",}, | ||
94 | {"irq_entries_start",}, | ||
95 | {"common_interrupt",}, | ||
93 | {NULL} /* Terminator */ | 96 | {NULL} /* Terminator */ |
94 | }; | 97 | }; |
95 | 98 | ||
@@ -673,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | |||
673 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); | 676 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); |
674 | } | 677 | } |
675 | 678 | ||
679 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ | ||
680 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) | ||
681 | { | ||
682 | struct kprobe *old_p, *list_p; | ||
683 | |||
684 | old_p = get_kprobe(p->addr); | ||
685 | if (unlikely(!old_p)) | ||
686 | return NULL; | ||
687 | |||
688 | if (p != old_p) { | ||
689 | list_for_each_entry_rcu(list_p, &old_p->list, list) | ||
690 | if (list_p == p) | ||
691 | /* kprobe p is a valid probe */ | ||
692 | goto valid; | ||
693 | return NULL; | ||
694 | } | ||
695 | valid: | ||
696 | return old_p; | ||
697 | } | ||
698 | |||
699 | /* Return error if the kprobe is being re-registered */ | ||
700 | static inline int check_kprobe_rereg(struct kprobe *p) | ||
701 | { | ||
702 | int ret = 0; | ||
703 | struct kprobe *old_p; | ||
704 | |||
705 | mutex_lock(&kprobe_mutex); | ||
706 | old_p = __get_valid_kprobe(p); | ||
707 | if (old_p) | ||
708 | ret = -EINVAL; | ||
709 | mutex_unlock(&kprobe_mutex); | ||
710 | return ret; | ||
711 | } | ||
712 | |||
676 | int __kprobes register_kprobe(struct kprobe *p) | 713 | int __kprobes register_kprobe(struct kprobe *p) |
677 | { | 714 | { |
678 | int ret = 0; | 715 | int ret = 0; |
@@ -685,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
685 | return -EINVAL; | 722 | return -EINVAL; |
686 | p->addr = addr; | 723 | p->addr = addr; |
687 | 724 | ||
725 | ret = check_kprobe_rereg(p); | ||
726 | if (ret) | ||
727 | return ret; | ||
728 | |||
688 | preempt_disable(); | 729 | preempt_disable(); |
689 | if (!kernel_text_address((unsigned long) p->addr) || | 730 | if (!kernel_text_address((unsigned long) p->addr) || |
690 | in_kprobes_functions((unsigned long) p->addr)) { | 731 | in_kprobes_functions((unsigned long) p->addr)) { |
@@ -754,26 +795,6 @@ out: | |||
754 | } | 795 | } |
755 | EXPORT_SYMBOL_GPL(register_kprobe); | 796 | EXPORT_SYMBOL_GPL(register_kprobe); |
756 | 797 | ||
757 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ | ||
758 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) | ||
759 | { | ||
760 | struct kprobe *old_p, *list_p; | ||
761 | |||
762 | old_p = get_kprobe(p->addr); | ||
763 | if (unlikely(!old_p)) | ||
764 | return NULL; | ||
765 | |||
766 | if (p != old_p) { | ||
767 | list_for_each_entry_rcu(list_p, &old_p->list, list) | ||
768 | if (list_p == p) | ||
769 | /* kprobe p is a valid probe */ | ||
770 | goto valid; | ||
771 | return NULL; | ||
772 | } | ||
773 | valid: | ||
774 | return old_p; | ||
775 | } | ||
776 | |||
777 | /* | 798 | /* |
778 | * Unregister a kprobe without a scheduler synchronization. | 799 | * Unregister a kprobe without a scheduler synchronization. |
779 | */ | 800 | */ |
@@ -1141,6 +1162,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) | |||
1141 | arch_remove_kprobe(p); | 1162 | arch_remove_kprobe(p); |
1142 | } | 1163 | } |
1143 | 1164 | ||
1165 | void __kprobes dump_kprobe(struct kprobe *kp) | ||
1166 | { | ||
1167 | printk(KERN_WARNING "Dumping kprobe:\n"); | ||
1168 | printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", | ||
1169 | kp->symbol_name, kp->addr, kp->offset); | ||
1170 | } | ||
1171 | |||
1144 | /* Module notifier call back, checking kprobes on the module */ | 1172 | /* Module notifier call back, checking kprobes on the module */ |
1145 | static int __kprobes kprobes_module_callback(struct notifier_block *nb, | 1173 | static int __kprobes kprobes_module_callback(struct notifier_block *nb, |
1146 | unsigned long val, void *data) | 1174 | unsigned long val, void *data) |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5eced3..acd24e7643eb 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
@@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); | |||
558 | 558 | ||
559 | static ATOMIC_NOTIFIER_HEAD(die_chain); | 559 | static ATOMIC_NOTIFIER_HEAD(die_chain); |
560 | 560 | ||
561 | int notrace notify_die(enum die_val val, const char *str, | 561 | int notrace __kprobes notify_die(enum die_val val, const char *str, |
562 | struct pt_regs *regs, long err, int trap, int sig) | 562 | struct pt_regs *regs, long err, int trap, int sig) |
563 | { | 563 | { |
564 | struct die_args args = { | 564 | struct die_args args = { |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b416512ad17f..f05671609a89 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -428,6 +428,23 @@ config BLK_DEV_IO_TRACE | |||
428 | 428 | ||
429 | If unsure, say N. | 429 | If unsure, say N. |
430 | 430 | ||
431 | config KPROBE_EVENT | ||
432 | depends on KPROBES | ||
433 | depends on X86 | ||
434 | bool "Enable kprobes-based dynamic events" | ||
435 | select TRACING | ||
436 | default y | ||
437 | help | ||
438 | This allows the user to add tracing events (similar to tracepoints) on the fly | ||
439 | via the ftrace interface. See Documentation/trace/kprobetrace.txt | ||
440 | for more details. | ||
441 | |||
442 | Those events can be inserted wherever kprobes can probe, and record | ||
443 | various register and memory values. | ||
444 | |||
445 | This option is also required by perf-probe subcommand of perf tools. If | ||
446 | you want to use perf tools, this option is strongly recommended. | ||
447 | |||
431 | config DYNAMIC_FTRACE | 448 | config DYNAMIC_FTRACE |
432 | bool "enable/disable ftrace tracepoints dynamically" | 449 | bool "enable/disable ftrace tracepoints dynamically" |
433 | depends on FUNCTION_TRACER | 450 | depends on FUNCTION_TRACER |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 26f03ac07c2b..edc3a3cca1a1 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o | |||
53 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 53 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | ||
56 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 57 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o |
57 | 58 | ||
58 | libftrace-y := ftrace.o | 59 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4959ada9e0bb..b4e4212e66d7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -101,6 +101,29 @@ struct syscall_trace_exit { | |||
101 | unsigned long ret; | 101 | unsigned long ret; |
102 | }; | 102 | }; |
103 | 103 | ||
104 | struct kprobe_trace_entry { | ||
105 | struct trace_entry ent; | ||
106 | unsigned long ip; | ||
107 | int nargs; | ||
108 | unsigned long args[]; | ||
109 | }; | ||
110 | |||
111 | #define SIZEOF_KPROBE_TRACE_ENTRY(n) \ | ||
112 | (offsetof(struct kprobe_trace_entry, args) + \ | ||
113 | (sizeof(unsigned long) * (n))) | ||
114 | |||
115 | struct kretprobe_trace_entry { | ||
116 | struct trace_entry ent; | ||
117 | unsigned long func; | ||
118 | unsigned long ret_ip; | ||
119 | int nargs; | ||
120 | unsigned long args[]; | ||
121 | }; | ||
122 | |||
123 | #define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \ | ||
124 | (offsetof(struct kretprobe_trace_entry, args) + \ | ||
125 | (sizeof(unsigned long) * (n))) | ||
126 | |||
104 | /* | 127 | /* |
105 | * trace_flag_type is an enumeration that holds different | 128 | * trace_flag_type is an enumeration that holds different |
106 | * states when a trace occurs. These are: | 129 | * states when a trace occurs. These are: |
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 8d5c171cc998..e0d351b01f5a 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -8,44 +8,39 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include "trace.h" | 9 | #include "trace.h" |
10 | 10 | ||
11 | /* | ||
12 | * We can't use a size but a type in alloc_percpu() | ||
13 | * So let's create a dummy type that matches the desired size | ||
14 | */ | ||
15 | typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; | ||
16 | 11 | ||
17 | char *trace_profile_buf; | 12 | struct perf_trace_buf *perf_trace_buf; |
18 | EXPORT_SYMBOL_GPL(trace_profile_buf); | 13 | EXPORT_SYMBOL_GPL(perf_trace_buf); |
19 | 14 | ||
20 | char *trace_profile_buf_nmi; | 15 | struct perf_trace_buf *perf_trace_buf_nmi; |
21 | EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); | 16 | EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); |
22 | 17 | ||
23 | /* Count the events in use (per event id, not per instance) */ | 18 | /* Count the events in use (per event id, not per instance) */ |
24 | static int total_profile_count; | 19 | static int total_profile_count; |
25 | 20 | ||
26 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) | 21 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) |
27 | { | 22 | { |
28 | char *buf; | 23 | struct perf_trace_buf *buf; |
29 | int ret = -ENOMEM; | 24 | int ret = -ENOMEM; |
30 | 25 | ||
31 | if (atomic_inc_return(&event->profile_count)) | 26 | if (atomic_inc_return(&event->profile_count)) |
32 | return 0; | 27 | return 0; |
33 | 28 | ||
34 | if (!total_profile_count) { | 29 | if (!total_profile_count) { |
35 | buf = (char *)alloc_percpu(profile_buf_t); | 30 | buf = alloc_percpu(struct perf_trace_buf); |
36 | if (!buf) | 31 | if (!buf) |
37 | goto fail_buf; | 32 | goto fail_buf; |
38 | 33 | ||
39 | rcu_assign_pointer(trace_profile_buf, buf); | 34 | rcu_assign_pointer(perf_trace_buf, buf); |
40 | 35 | ||
41 | buf = (char *)alloc_percpu(profile_buf_t); | 36 | buf = alloc_percpu(struct perf_trace_buf); |
42 | if (!buf) | 37 | if (!buf) |
43 | goto fail_buf_nmi; | 38 | goto fail_buf_nmi; |
44 | 39 | ||
45 | rcu_assign_pointer(trace_profile_buf_nmi, buf); | 40 | rcu_assign_pointer(perf_trace_buf_nmi, buf); |
46 | } | 41 | } |
47 | 42 | ||
48 | ret = event->profile_enable(); | 43 | ret = event->profile_enable(event); |
49 | if (!ret) { | 44 | if (!ret) { |
50 | total_profile_count++; | 45 | total_profile_count++; |
51 | return 0; | 46 | return 0; |
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) | |||
53 | 48 | ||
54 | fail_buf_nmi: | 49 | fail_buf_nmi: |
55 | if (!total_profile_count) { | 50 | if (!total_profile_count) { |
56 | free_percpu(trace_profile_buf_nmi); | 51 | free_percpu(perf_trace_buf_nmi); |
57 | free_percpu(trace_profile_buf); | 52 | free_percpu(perf_trace_buf); |
58 | trace_profile_buf_nmi = NULL; | 53 | perf_trace_buf_nmi = NULL; |
59 | trace_profile_buf = NULL; | 54 | perf_trace_buf = NULL; |
60 | } | 55 | } |
61 | fail_buf: | 56 | fail_buf: |
62 | atomic_dec(&event->profile_count); | 57 | atomic_dec(&event->profile_count); |
@@ -84,19 +79,19 @@ int ftrace_profile_enable(int event_id) | |||
84 | 79 | ||
85 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) | 80 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) |
86 | { | 81 | { |
87 | char *buf, *nmi_buf; | 82 | struct perf_trace_buf *buf, *nmi_buf; |
88 | 83 | ||
89 | if (!atomic_add_negative(-1, &event->profile_count)) | 84 | if (!atomic_add_negative(-1, &event->profile_count)) |
90 | return; | 85 | return; |
91 | 86 | ||
92 | event->profile_disable(); | 87 | event->profile_disable(event); |
93 | 88 | ||
94 | if (!--total_profile_count) { | 89 | if (!--total_profile_count) { |
95 | buf = trace_profile_buf; | 90 | buf = perf_trace_buf; |
96 | rcu_assign_pointer(trace_profile_buf, NULL); | 91 | rcu_assign_pointer(perf_trace_buf, NULL); |
97 | 92 | ||
98 | nmi_buf = trace_profile_buf_nmi; | 93 | nmi_buf = perf_trace_buf_nmi; |
99 | rcu_assign_pointer(trace_profile_buf_nmi, NULL); | 94 | rcu_assign_pointer(perf_trace_buf_nmi, NULL); |
100 | 95 | ||
101 | /* | 96 | /* |
102 | * Ensure every events in profiling have finished before | 97 | * Ensure every events in profiling have finished before |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7c18d154ea28..1d18315dc836 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) | |||
93 | } | 93 | } |
94 | EXPORT_SYMBOL_GPL(trace_define_common_fields); | 94 | EXPORT_SYMBOL_GPL(trace_define_common_fields); |
95 | 95 | ||
96 | #ifdef CONFIG_MODULES | 96 | void trace_destroy_fields(struct ftrace_event_call *call) |
97 | |||
98 | static void trace_destroy_fields(struct ftrace_event_call *call) | ||
99 | { | 97 | { |
100 | struct ftrace_event_field *field, *next; | 98 | struct ftrace_event_field *field, *next; |
101 | 99 | ||
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call) | |||
107 | } | 105 | } |
108 | } | 106 | } |
109 | 107 | ||
110 | #endif /* CONFIG_MODULES */ | ||
111 | |||
112 | static void ftrace_event_enable_disable(struct ftrace_event_call *call, | 108 | static void ftrace_event_enable_disable(struct ftrace_event_call *call, |
113 | int enable) | 109 | int enable) |
114 | { | 110 | { |
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, | |||
117 | if (call->enabled) { | 113 | if (call->enabled) { |
118 | call->enabled = 0; | 114 | call->enabled = 0; |
119 | tracing_stop_cmdline_record(); | 115 | tracing_stop_cmdline_record(); |
120 | call->unregfunc(call->data); | 116 | call->unregfunc(call); |
121 | } | 117 | } |
122 | break; | 118 | break; |
123 | case 1: | 119 | case 1: |
124 | if (!call->enabled) { | 120 | if (!call->enabled) { |
125 | call->enabled = 1; | 121 | call->enabled = 1; |
126 | tracing_start_cmdline_record(); | 122 | tracing_start_cmdline_record(); |
127 | call->regfunc(call->data); | 123 | call->regfunc(call); |
128 | } | 124 | } |
129 | break; | 125 | break; |
130 | } | 126 | } |
@@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, | |||
937 | return 0; | 933 | return 0; |
938 | } | 934 | } |
939 | 935 | ||
940 | #define for_each_event(event, start, end) \ | 936 | static int __trace_add_event_call(struct ftrace_event_call *call) |
941 | for (event = start; \ | 937 | { |
942 | (unsigned long)event < (unsigned long)end; \ | 938 | struct dentry *d_events; |
943 | event++) | 939 | int ret; |
944 | 940 | ||
945 | #ifdef CONFIG_MODULES | 941 | if (!call->name) |
942 | return -EINVAL; | ||
946 | 943 | ||
947 | static LIST_HEAD(ftrace_module_file_list); | 944 | if (call->raw_init) { |
945 | ret = call->raw_init(call); | ||
946 | if (ret < 0) { | ||
947 | if (ret != -ENOSYS) | ||
948 | pr_warning("Could not initialize trace " | ||
949 | "events/%s\n", call->name); | ||
950 | return ret; | ||
951 | } | ||
952 | } | ||
948 | 953 | ||
949 | /* | 954 | d_events = event_trace_events_dir(); |
950 | * Modules must own their file_operations to keep up with | 955 | if (!d_events) |
951 | * reference counting. | 956 | return -ENOENT; |
952 | */ | 957 | |
953 | struct ftrace_module_file_ops { | 958 | ret = event_create_dir(call, d_events, &ftrace_event_id_fops, |
954 | struct list_head list; | 959 | &ftrace_enable_fops, &ftrace_event_filter_fops, |
955 | struct module *mod; | 960 | &ftrace_event_format_fops); |
956 | struct file_operations id; | 961 | if (!ret) |
957 | struct file_operations enable; | 962 | list_add(&call->list, &ftrace_events); |
958 | struct file_operations format; | 963 | |
959 | struct file_operations filter; | 964 | return ret; |
960 | }; | 965 | } |
966 | |||
967 | /* Add an additional event_call dynamically */ | ||
968 | int trace_add_event_call(struct ftrace_event_call *call) | ||
969 | { | ||
970 | int ret; | ||
971 | mutex_lock(&event_mutex); | ||
972 | ret = __trace_add_event_call(call); | ||
973 | mutex_unlock(&event_mutex); | ||
974 | return ret; | ||
975 | } | ||
961 | 976 | ||
962 | static void remove_subsystem_dir(const char *name) | 977 | static void remove_subsystem_dir(const char *name) |
963 | { | 978 | { |
@@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name) | |||
985 | } | 1000 | } |
986 | } | 1001 | } |
987 | 1002 | ||
1003 | /* | ||
1004 | * Must be called under locking both of event_mutex and trace_event_mutex. | ||
1005 | */ | ||
1006 | static void __trace_remove_event_call(struct ftrace_event_call *call) | ||
1007 | { | ||
1008 | ftrace_event_enable_disable(call, 0); | ||
1009 | if (call->event) | ||
1010 | __unregister_ftrace_event(call->event); | ||
1011 | debugfs_remove_recursive(call->dir); | ||
1012 | list_del(&call->list); | ||
1013 | trace_destroy_fields(call); | ||
1014 | destroy_preds(call); | ||
1015 | remove_subsystem_dir(call->system); | ||
1016 | } | ||
1017 | |||
1018 | /* Remove an event_call */ | ||
1019 | void trace_remove_event_call(struct ftrace_event_call *call) | ||
1020 | { | ||
1021 | mutex_lock(&event_mutex); | ||
1022 | down_write(&trace_event_mutex); | ||
1023 | __trace_remove_event_call(call); | ||
1024 | up_write(&trace_event_mutex); | ||
1025 | mutex_unlock(&event_mutex); | ||
1026 | } | ||
1027 | |||
1028 | #define for_each_event(event, start, end) \ | ||
1029 | for (event = start; \ | ||
1030 | (unsigned long)event < (unsigned long)end; \ | ||
1031 | event++) | ||
1032 | |||
1033 | #ifdef CONFIG_MODULES | ||
1034 | |||
1035 | static LIST_HEAD(ftrace_module_file_list); | ||
1036 | |||
1037 | /* | ||
1038 | * Modules must own their file_operations to keep up with | ||
1039 | * reference counting. | ||
1040 | */ | ||
1041 | struct ftrace_module_file_ops { | ||
1042 | struct list_head list; | ||
1043 | struct module *mod; | ||
1044 | struct file_operations id; | ||
1045 | struct file_operations enable; | ||
1046 | struct file_operations format; | ||
1047 | struct file_operations filter; | ||
1048 | }; | ||
1049 | |||
988 | static struct ftrace_module_file_ops * | 1050 | static struct ftrace_module_file_ops * |
989 | trace_create_file_ops(struct module *mod) | 1051 | trace_create_file_ops(struct module *mod) |
990 | { | 1052 | { |
@@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod) | |||
1042 | if (!call->name) | 1104 | if (!call->name) |
1043 | continue; | 1105 | continue; |
1044 | if (call->raw_init) { | 1106 | if (call->raw_init) { |
1045 | ret = call->raw_init(); | 1107 | ret = call->raw_init(call); |
1046 | if (ret < 0) { | 1108 | if (ret < 0) { |
1047 | if (ret != -ENOSYS) | 1109 | if (ret != -ENOSYS) |
1048 | pr_warning("Could not initialize trace " | 1110 | pr_warning("Could not initialize trace " |
@@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod) | |||
1060 | return; | 1122 | return; |
1061 | } | 1123 | } |
1062 | call->mod = mod; | 1124 | call->mod = mod; |
1063 | list_add(&call->list, &ftrace_events); | 1125 | ret = event_create_dir(call, d_events, |
1064 | event_create_dir(call, d_events, | 1126 | &file_ops->id, &file_ops->enable, |
1065 | &file_ops->id, &file_ops->enable, | 1127 | &file_ops->filter, &file_ops->format); |
1066 | &file_ops->filter, &file_ops->format); | 1128 | if (!ret) |
1129 | list_add(&call->list, &ftrace_events); | ||
1067 | } | 1130 | } |
1068 | } | 1131 | } |
1069 | 1132 | ||
@@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod) | |||
1077 | list_for_each_entry_safe(call, p, &ftrace_events, list) { | 1140 | list_for_each_entry_safe(call, p, &ftrace_events, list) { |
1078 | if (call->mod == mod) { | 1141 | if (call->mod == mod) { |
1079 | found = true; | 1142 | found = true; |
1080 | ftrace_event_enable_disable(call, 0); | 1143 | __trace_remove_event_call(call); |
1081 | if (call->event) | ||
1082 | __unregister_ftrace_event(call->event); | ||
1083 | debugfs_remove_recursive(call->dir); | ||
1084 | list_del(&call->list); | ||
1085 | trace_destroy_fields(call); | ||
1086 | destroy_preds(call); | ||
1087 | remove_subsystem_dir(call->system); | ||
1088 | } | 1144 | } |
1089 | } | 1145 | } |
1090 | 1146 | ||
@@ -1202,7 +1258,7 @@ static __init int event_trace_init(void) | |||
1202 | if (!call->name) | 1258 | if (!call->name) |
1203 | continue; | 1259 | continue; |
1204 | if (call->raw_init) { | 1260 | if (call->raw_init) { |
1205 | ret = call->raw_init(); | 1261 | ret = call->raw_init(call); |
1206 | if (ret < 0) { | 1262 | if (ret < 0) { |
1207 | if (ret != -ENOSYS) | 1263 | if (ret != -ENOSYS) |
1208 | pr_warning("Could not initialize trace " | 1264 | pr_warning("Could not initialize trace " |
@@ -1210,10 +1266,12 @@ static __init int event_trace_init(void) | |||
1210 | continue; | 1266 | continue; |
1211 | } | 1267 | } |
1212 | } | 1268 | } |
1213 | list_add(&call->list, &ftrace_events); | 1269 | ret = event_create_dir(call, d_events, &ftrace_event_id_fops, |
1214 | event_create_dir(call, d_events, &ftrace_event_id_fops, | 1270 | &ftrace_enable_fops, |
1215 | &ftrace_enable_fops, &ftrace_event_filter_fops, | 1271 | &ftrace_event_filter_fops, |
1216 | &ftrace_event_format_fops); | 1272 | &ftrace_event_format_fops); |
1273 | if (!ret) | ||
1274 | list_add(&call->list, &ftrace_events); | ||
1217 | } | 1275 | } |
1218 | 1276 | ||
1219 | while (true) { | 1277 | while (true) { |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 31da218ee10f..934d81fb4ca4 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -134,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ | |||
134 | 134 | ||
135 | #include "trace_entries.h" | 135 | #include "trace_entries.h" |
136 | 136 | ||
137 | |||
138 | #undef __field | 137 | #undef __field |
139 | #define __field(type, item) \ | 138 | #define __field(type, item) \ |
140 | ret = trace_define_field(event_call, #type, #item, \ | 139 | ret = trace_define_field(event_call, #type, #item, \ |
@@ -196,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | |||
196 | 195 | ||
197 | #include "trace_entries.h" | 196 | #include "trace_entries.h" |
198 | 197 | ||
198 | static int ftrace_raw_init_event(struct ftrace_event_call *call) | ||
199 | { | ||
200 | INIT_LIST_HEAD(&call->fields); | ||
201 | return 0; | ||
202 | } | ||
199 | 203 | ||
200 | #undef __field | 204 | #undef __field |
201 | #define __field(type, item) | 205 | #define __field(type, item) |
@@ -214,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | |||
214 | 218 | ||
215 | #undef FTRACE_ENTRY | 219 | #undef FTRACE_ENTRY |
216 | #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ | 220 | #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ |
217 | static int ftrace_raw_init_event_##call(void); \ | ||
218 | \ | 221 | \ |
219 | struct ftrace_event_call __used \ | 222 | struct ftrace_event_call __used \ |
220 | __attribute__((__aligned__(4))) \ | 223 | __attribute__((__aligned__(4))) \ |
@@ -222,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
222 | .name = #call, \ | 225 | .name = #call, \ |
223 | .id = type, \ | 226 | .id = type, \ |
224 | .system = __stringify(TRACE_SYSTEM), \ | 227 | .system = __stringify(TRACE_SYSTEM), \ |
225 | .raw_init = ftrace_raw_init_event_##call, \ | 228 | .raw_init = ftrace_raw_init_event, \ |
226 | .show_format = ftrace_format_##call, \ | 229 | .show_format = ftrace_format_##call, \ |
227 | .define_fields = ftrace_define_fields_##call, \ | 230 | .define_fields = ftrace_define_fields_##call, \ |
228 | }; \ | 231 | }; \ |
229 | static int ftrace_raw_init_event_##call(void) \ | ||
230 | { \ | ||
231 | INIT_LIST_HEAD(&event_##call.fields); \ | ||
232 | return 0; \ | ||
233 | } \ | ||
234 | 232 | ||
235 | #include "trace_entries.h" | 233 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c new file mode 100644 index 000000000000..3696476f307d --- /dev/null +++ b/kernel/trace/trace_kprobe.c | |||
@@ -0,0 +1,1513 @@ | |||
1 | /* | ||
2 | * Kprobes-based tracing events | ||
3 | * | ||
4 | * Created by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | #include <linux/kprobes.h> | ||
23 | #include <linux/seq_file.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/smp.h> | ||
26 | #include <linux/debugfs.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/ctype.h> | ||
30 | #include <linux/ptrace.h> | ||
31 | #include <linux/perf_event.h> | ||
32 | |||
33 | #include "trace.h" | ||
34 | #include "trace_output.h" | ||
35 | |||
36 | #define MAX_TRACE_ARGS 128 | ||
37 | #define MAX_ARGSTR_LEN 63 | ||
38 | #define MAX_EVENT_NAME_LEN 64 | ||
39 | #define KPROBE_EVENT_SYSTEM "kprobes" | ||
40 | |||
41 | /* Reserved field names */ | ||
42 | #define FIELD_STRING_IP "__probe_ip" | ||
43 | #define FIELD_STRING_NARGS "__probe_nargs" | ||
44 | #define FIELD_STRING_RETIP "__probe_ret_ip" | ||
45 | #define FIELD_STRING_FUNC "__probe_func" | ||
46 | |||
47 | const char *reserved_field_names[] = { | ||
48 | "common_type", | ||
49 | "common_flags", | ||
50 | "common_preempt_count", | ||
51 | "common_pid", | ||
52 | "common_tgid", | ||
53 | "common_lock_depth", | ||
54 | FIELD_STRING_IP, | ||
55 | FIELD_STRING_NARGS, | ||
56 | FIELD_STRING_RETIP, | ||
57 | FIELD_STRING_FUNC, | ||
58 | }; | ||
59 | |||
60 | struct fetch_func { | ||
61 | unsigned long (*func)(struct pt_regs *, void *); | ||
62 | void *data; | ||
63 | }; | ||
64 | |||
65 | static __kprobes unsigned long call_fetch(struct fetch_func *f, | ||
66 | struct pt_regs *regs) | ||
67 | { | ||
68 | return f->func(regs, f->data); | ||
69 | } | ||
70 | |||
71 | /* fetch handlers */ | ||
72 | static __kprobes unsigned long fetch_register(struct pt_regs *regs, | ||
73 | void *offset) | ||
74 | { | ||
75 | return regs_get_register(regs, (unsigned int)((unsigned long)offset)); | ||
76 | } | ||
77 | |||
78 | static __kprobes unsigned long fetch_stack(struct pt_regs *regs, | ||
79 | void *num) | ||
80 | { | ||
81 | return regs_get_kernel_stack_nth(regs, | ||
82 | (unsigned int)((unsigned long)num)); | ||
83 | } | ||
84 | |||
85 | static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) | ||
86 | { | ||
87 | unsigned long retval; | ||
88 | |||
89 | if (probe_kernel_address(addr, retval)) | ||
90 | return 0; | ||
91 | return retval; | ||
92 | } | ||
93 | |||
94 | static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) | ||
95 | { | ||
96 | return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); | ||
97 | } | ||
98 | |||
99 | static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, | ||
100 | void *dummy) | ||
101 | { | ||
102 | return regs_return_value(regs); | ||
103 | } | ||
104 | |||
105 | static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, | ||
106 | void *dummy) | ||
107 | { | ||
108 | return kernel_stack_pointer(regs); | ||
109 | } | ||
110 | |||
111 | /* Memory fetching by symbol */ | ||
112 | struct symbol_cache { | ||
113 | char *symbol; | ||
114 | long offset; | ||
115 | unsigned long addr; | ||
116 | }; | ||
117 | |||
118 | static unsigned long update_symbol_cache(struct symbol_cache *sc) | ||
119 | { | ||
120 | sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); | ||
121 | if (sc->addr) | ||
122 | sc->addr += sc->offset; | ||
123 | return sc->addr; | ||
124 | } | ||
125 | |||
126 | static void free_symbol_cache(struct symbol_cache *sc) | ||
127 | { | ||
128 | kfree(sc->symbol); | ||
129 | kfree(sc); | ||
130 | } | ||
131 | |||
132 | static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) | ||
133 | { | ||
134 | struct symbol_cache *sc; | ||
135 | |||
136 | if (!sym || strlen(sym) == 0) | ||
137 | return NULL; | ||
138 | sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); | ||
139 | if (!sc) | ||
140 | return NULL; | ||
141 | |||
142 | sc->symbol = kstrdup(sym, GFP_KERNEL); | ||
143 | if (!sc->symbol) { | ||
144 | kfree(sc); | ||
145 | return NULL; | ||
146 | } | ||
147 | sc->offset = offset; | ||
148 | |||
149 | update_symbol_cache(sc); | ||
150 | return sc; | ||
151 | } | ||
152 | |||
153 | static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) | ||
154 | { | ||
155 | struct symbol_cache *sc = data; | ||
156 | |||
157 | if (sc->addr) | ||
158 | return fetch_memory(regs, (void *)sc->addr); | ||
159 | else | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | /* Special indirect memory access interface */ | ||
164 | struct indirect_fetch_data { | ||
165 | struct fetch_func orig; | ||
166 | long offset; | ||
167 | }; | ||
168 | |||
169 | static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) | ||
170 | { | ||
171 | struct indirect_fetch_data *ind = data; | ||
172 | unsigned long addr; | ||
173 | |||
174 | addr = call_fetch(&ind->orig, regs); | ||
175 | if (addr) { | ||
176 | addr += ind->offset; | ||
177 | return fetch_memory(regs, (void *)addr); | ||
178 | } else | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) | ||
183 | { | ||
184 | if (data->orig.func == fetch_indirect) | ||
185 | free_indirect_fetch_data(data->orig.data); | ||
186 | else if (data->orig.func == fetch_symbol) | ||
187 | free_symbol_cache(data->orig.data); | ||
188 | kfree(data); | ||
189 | } | ||
190 | |||
191 | /** | ||
192 | * Kprobe event core functions | ||
193 | */ | ||
194 | |||
195 | struct probe_arg { | ||
196 | struct fetch_func fetch; | ||
197 | const char *name; | ||
198 | }; | ||
199 | |||
200 | /* Flags for trace_probe */ | ||
201 | #define TP_FLAG_TRACE 1 | ||
202 | #define TP_FLAG_PROFILE 2 | ||
203 | |||
204 | struct trace_probe { | ||
205 | struct list_head list; | ||
206 | struct kretprobe rp; /* Use rp.kp for kprobe use */ | ||
207 | unsigned long nhit; | ||
208 | unsigned int flags; /* For TP_FLAG_* */ | ||
209 | const char *symbol; /* symbol name */ | ||
210 | struct ftrace_event_call call; | ||
211 | struct trace_event event; | ||
212 | unsigned int nr_args; | ||
213 | struct probe_arg args[]; | ||
214 | }; | ||
215 | |||
216 | #define SIZEOF_TRACE_PROBE(n) \ | ||
217 | (offsetof(struct trace_probe, args) + \ | ||
218 | (sizeof(struct probe_arg) * (n))) | ||
219 | |||
220 | static __kprobes int probe_is_return(struct trace_probe *tp) | ||
221 | { | ||
222 | return tp->rp.handler != NULL; | ||
223 | } | ||
224 | |||
225 | static __kprobes const char *probe_symbol(struct trace_probe *tp) | ||
226 | { | ||
227 | return tp->symbol ? tp->symbol : "unknown"; | ||
228 | } | ||
229 | |||
230 | static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) | ||
231 | { | ||
232 | int ret = -EINVAL; | ||
233 | |||
234 | if (ff->func == fetch_argument) | ||
235 | ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); | ||
236 | else if (ff->func == fetch_register) { | ||
237 | const char *name; | ||
238 | name = regs_query_register_name((unsigned int)((long)ff->data)); | ||
239 | ret = snprintf(buf, n, "%%%s", name); | ||
240 | } else if (ff->func == fetch_stack) | ||
241 | ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data); | ||
242 | else if (ff->func == fetch_memory) | ||
243 | ret = snprintf(buf, n, "@0x%p", ff->data); | ||
244 | else if (ff->func == fetch_symbol) { | ||
245 | struct symbol_cache *sc = ff->data; | ||
246 | ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); | ||
247 | } else if (ff->func == fetch_retvalue) | ||
248 | ret = snprintf(buf, n, "$retval"); | ||
249 | else if (ff->func == fetch_stack_address) | ||
250 | ret = snprintf(buf, n, "$stack"); | ||
251 | else if (ff->func == fetch_indirect) { | ||
252 | struct indirect_fetch_data *id = ff->data; | ||
253 | size_t l = 0; | ||
254 | ret = snprintf(buf, n, "%+ld(", id->offset); | ||
255 | if (ret >= n) | ||
256 | goto end; | ||
257 | l += ret; | ||
258 | ret = probe_arg_string(buf + l, n - l, &id->orig); | ||
259 | if (ret < 0) | ||
260 | goto end; | ||
261 | l += ret; | ||
262 | ret = snprintf(buf + l, n - l, ")"); | ||
263 | ret += l; | ||
264 | } | ||
265 | end: | ||
266 | if (ret >= n) | ||
267 | return -ENOSPC; | ||
268 | return ret; | ||
269 | } | ||
270 | |||
271 | static int register_probe_event(struct trace_probe *tp); | ||
272 | static void unregister_probe_event(struct trace_probe *tp); | ||
273 | |||
274 | static DEFINE_MUTEX(probe_lock); | ||
275 | static LIST_HEAD(probe_list); | ||
276 | |||
277 | static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); | ||
278 | static int kretprobe_dispatcher(struct kretprobe_instance *ri, | ||
279 | struct pt_regs *regs); | ||
280 | |||
281 | /* | ||
282 | * Allocate new trace_probe and initialize it (including kprobes). | ||
283 | */ | ||
284 | static struct trace_probe *alloc_trace_probe(const char *group, | ||
285 | const char *event, | ||
286 | void *addr, | ||
287 | const char *symbol, | ||
288 | unsigned long offs, | ||
289 | int nargs, int is_return) | ||
290 | { | ||
291 | struct trace_probe *tp; | ||
292 | |||
293 | tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); | ||
294 | if (!tp) | ||
295 | return ERR_PTR(-ENOMEM); | ||
296 | |||
297 | if (symbol) { | ||
298 | tp->symbol = kstrdup(symbol, GFP_KERNEL); | ||
299 | if (!tp->symbol) | ||
300 | goto error; | ||
301 | tp->rp.kp.symbol_name = tp->symbol; | ||
302 | tp->rp.kp.offset = offs; | ||
303 | } else | ||
304 | tp->rp.kp.addr = addr; | ||
305 | |||
306 | if (is_return) | ||
307 | tp->rp.handler = kretprobe_dispatcher; | ||
308 | else | ||
309 | tp->rp.kp.pre_handler = kprobe_dispatcher; | ||
310 | |||
311 | if (!event) | ||
312 | goto error; | ||
313 | tp->call.name = kstrdup(event, GFP_KERNEL); | ||
314 | if (!tp->call.name) | ||
315 | goto error; | ||
316 | |||
317 | if (!group) | ||
318 | goto error; | ||
319 | tp->call.system = kstrdup(group, GFP_KERNEL); | ||
320 | if (!tp->call.system) | ||
321 | goto error; | ||
322 | |||
323 | INIT_LIST_HEAD(&tp->list); | ||
324 | return tp; | ||
325 | error: | ||
326 | kfree(tp->call.name); | ||
327 | kfree(tp->symbol); | ||
328 | kfree(tp); | ||
329 | return ERR_PTR(-ENOMEM); | ||
330 | } | ||
331 | |||
332 | static void free_probe_arg(struct probe_arg *arg) | ||
333 | { | ||
334 | if (arg->fetch.func == fetch_symbol) | ||
335 | free_symbol_cache(arg->fetch.data); | ||
336 | else if (arg->fetch.func == fetch_indirect) | ||
337 | free_indirect_fetch_data(arg->fetch.data); | ||
338 | kfree(arg->name); | ||
339 | } | ||
340 | |||
341 | static void free_trace_probe(struct trace_probe *tp) | ||
342 | { | ||
343 | int i; | ||
344 | |||
345 | for (i = 0; i < tp->nr_args; i++) | ||
346 | free_probe_arg(&tp->args[i]); | ||
347 | |||
348 | kfree(tp->call.system); | ||
349 | kfree(tp->call.name); | ||
350 | kfree(tp->symbol); | ||
351 | kfree(tp); | ||
352 | } | ||
353 | |||
354 | static struct trace_probe *find_probe_event(const char *event, | ||
355 | const char *group) | ||
356 | { | ||
357 | struct trace_probe *tp; | ||
358 | |||
359 | list_for_each_entry(tp, &probe_list, list) | ||
360 | if (strcmp(tp->call.name, event) == 0 && | ||
361 | strcmp(tp->call.system, group) == 0) | ||
362 | return tp; | ||
363 | return NULL; | ||
364 | } | ||
365 | |||
366 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ | ||
367 | static void unregister_trace_probe(struct trace_probe *tp) | ||
368 | { | ||
369 | if (probe_is_return(tp)) | ||
370 | unregister_kretprobe(&tp->rp); | ||
371 | else | ||
372 | unregister_kprobe(&tp->rp.kp); | ||
373 | list_del(&tp->list); | ||
374 | unregister_probe_event(tp); | ||
375 | } | ||
376 | |||
377 | /* Register a trace_probe and probe_event */ | ||
378 | static int register_trace_probe(struct trace_probe *tp) | ||
379 | { | ||
380 | struct trace_probe *old_tp; | ||
381 | int ret; | ||
382 | |||
383 | mutex_lock(&probe_lock); | ||
384 | |||
385 | /* register as an event */ | ||
386 | old_tp = find_probe_event(tp->call.name, tp->call.system); | ||
387 | if (old_tp) { | ||
388 | /* delete old event */ | ||
389 | unregister_trace_probe(old_tp); | ||
390 | free_trace_probe(old_tp); | ||
391 | } | ||
392 | ret = register_probe_event(tp); | ||
393 | if (ret) { | ||
394 | pr_warning("Faild to register probe event(%d)\n", ret); | ||
395 | goto end; | ||
396 | } | ||
397 | |||
398 | tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; | ||
399 | if (probe_is_return(tp)) | ||
400 | ret = register_kretprobe(&tp->rp); | ||
401 | else | ||
402 | ret = register_kprobe(&tp->rp.kp); | ||
403 | |||
404 | if (ret) { | ||
405 | pr_warning("Could not insert probe(%d)\n", ret); | ||
406 | if (ret == -EILSEQ) { | ||
407 | pr_warning("Probing address(0x%p) is not an " | ||
408 | "instruction boundary.\n", | ||
409 | tp->rp.kp.addr); | ||
410 | ret = -EINVAL; | ||
411 | } | ||
412 | unregister_probe_event(tp); | ||
413 | } else | ||
414 | list_add_tail(&tp->list, &probe_list); | ||
415 | end: | ||
416 | mutex_unlock(&probe_lock); | ||
417 | return ret; | ||
418 | } | ||
419 | |||
420 | /* Split symbol and offset. */ | ||
421 | static int split_symbol_offset(char *symbol, unsigned long *offset) | ||
422 | { | ||
423 | char *tmp; | ||
424 | int ret; | ||
425 | |||
426 | if (!offset) | ||
427 | return -EINVAL; | ||
428 | |||
429 | tmp = strchr(symbol, '+'); | ||
430 | if (tmp) { | ||
431 | /* skip sign because strict_strtol doesn't accept '+' */ | ||
432 | ret = strict_strtoul(tmp + 1, 0, offset); | ||
433 | if (ret) | ||
434 | return ret; | ||
435 | *tmp = '\0'; | ||
436 | } else | ||
437 | *offset = 0; | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | #define PARAM_MAX_ARGS 16 | ||
442 | #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) | ||
443 | |||
444 | static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) | ||
445 | { | ||
446 | int ret = 0; | ||
447 | unsigned long param; | ||
448 | |||
449 | if (strcmp(arg, "retval") == 0) { | ||
450 | if (is_return) { | ||
451 | ff->func = fetch_retvalue; | ||
452 | ff->data = NULL; | ||
453 | } else | ||
454 | ret = -EINVAL; | ||
455 | } else if (strncmp(arg, "stack", 5) == 0) { | ||
456 | if (arg[5] == '\0') { | ||
457 | ff->func = fetch_stack_address; | ||
458 | ff->data = NULL; | ||
459 | } else if (isdigit(arg[5])) { | ||
460 | ret = strict_strtoul(arg + 5, 10, ¶m); | ||
461 | if (ret || param > PARAM_MAX_STACK) | ||
462 | ret = -EINVAL; | ||
463 | else { | ||
464 | ff->func = fetch_stack; | ||
465 | ff->data = (void *)param; | ||
466 | } | ||
467 | } else | ||
468 | ret = -EINVAL; | ||
469 | } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { | ||
470 | ret = strict_strtoul(arg + 3, 10, ¶m); | ||
471 | if (ret || param > PARAM_MAX_ARGS) | ||
472 | ret = -EINVAL; | ||
473 | else { | ||
474 | ff->func = fetch_argument; | ||
475 | ff->data = (void *)param; | ||
476 | } | ||
477 | } else | ||
478 | ret = -EINVAL; | ||
479 | return ret; | ||
480 | } | ||
481 | |||
482 | static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) | ||
483 | { | ||
484 | int ret = 0; | ||
485 | unsigned long param; | ||
486 | long offset; | ||
487 | char *tmp; | ||
488 | |||
489 | switch (arg[0]) { | ||
490 | case '$': | ||
491 | ret = parse_probe_vars(arg + 1, ff, is_return); | ||
492 | break; | ||
493 | case '%': /* named register */ | ||
494 | ret = regs_query_register_offset(arg + 1); | ||
495 | if (ret >= 0) { | ||
496 | ff->func = fetch_register; | ||
497 | ff->data = (void *)(unsigned long)ret; | ||
498 | ret = 0; | ||
499 | } | ||
500 | break; | ||
501 | case '@': /* memory or symbol */ | ||
502 | if (isdigit(arg[1])) { | ||
503 | ret = strict_strtoul(arg + 1, 0, ¶m); | ||
504 | if (ret) | ||
505 | break; | ||
506 | ff->func = fetch_memory; | ||
507 | ff->data = (void *)param; | ||
508 | } else { | ||
509 | ret = split_symbol_offset(arg + 1, &offset); | ||
510 | if (ret) | ||
511 | break; | ||
512 | ff->data = alloc_symbol_cache(arg + 1, offset); | ||
513 | if (ff->data) | ||
514 | ff->func = fetch_symbol; | ||
515 | else | ||
516 | ret = -EINVAL; | ||
517 | } | ||
518 | break; | ||
519 | case '+': /* indirect memory */ | ||
520 | case '-': | ||
521 | tmp = strchr(arg, '('); | ||
522 | if (!tmp) { | ||
523 | ret = -EINVAL; | ||
524 | break; | ||
525 | } | ||
526 | *tmp = '\0'; | ||
527 | ret = strict_strtol(arg + 1, 0, &offset); | ||
528 | if (ret) | ||
529 | break; | ||
530 | if (arg[0] == '-') | ||
531 | offset = -offset; | ||
532 | arg = tmp + 1; | ||
533 | tmp = strrchr(arg, ')'); | ||
534 | if (tmp) { | ||
535 | struct indirect_fetch_data *id; | ||
536 | *tmp = '\0'; | ||
537 | id = kzalloc(sizeof(struct indirect_fetch_data), | ||
538 | GFP_KERNEL); | ||
539 | if (!id) | ||
540 | return -ENOMEM; | ||
541 | id->offset = offset; | ||
542 | ret = parse_probe_arg(arg, &id->orig, is_return); | ||
543 | if (ret) | ||
544 | kfree(id); | ||
545 | else { | ||
546 | ff->func = fetch_indirect; | ||
547 | ff->data = (void *)id; | ||
548 | } | ||
549 | } else | ||
550 | ret = -EINVAL; | ||
551 | break; | ||
552 | default: | ||
553 | /* TODO: support custom handler */ | ||
554 | ret = -EINVAL; | ||
555 | } | ||
556 | return ret; | ||
557 | } | ||
558 | |||
559 | /* Return 1 if name is reserved or already used by another argument */ | ||
560 | static int conflict_field_name(const char *name, | ||
561 | struct probe_arg *args, int narg) | ||
562 | { | ||
563 | int i; | ||
564 | for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) | ||
565 | if (strcmp(reserved_field_names[i], name) == 0) | ||
566 | return 1; | ||
567 | for (i = 0; i < narg; i++) | ||
568 | if (strcmp(args[i].name, name) == 0) | ||
569 | return 1; | ||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | static int create_trace_probe(int argc, char **argv) | ||
574 | { | ||
575 | /* | ||
576 | * Argument syntax: | ||
577 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] | ||
578 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] | ||
579 | * Fetch args: | ||
580 | * $argN : fetch Nth of function argument. (N:0-) | ||
581 | * $retval : fetch return value | ||
582 | * $stack : fetch stack address | ||
583 | * $stackN : fetch Nth of stack (N:0-) | ||
584 | * @ADDR : fetch memory at ADDR (ADDR should be in kernel) | ||
585 | * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) | ||
586 | * %REG : fetch register REG | ||
587 | * Indirect memory fetch: | ||
588 | * +|-offs(ARG) : fetch memory at ARG +|- offs address. | ||
589 | * Alias name of args: | ||
590 | * NAME=FETCHARG : set NAME as alias of FETCHARG. | ||
591 | */ | ||
592 | struct trace_probe *tp; | ||
593 | int i, ret = 0; | ||
594 | int is_return = 0; | ||
595 | char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; | ||
596 | unsigned long offset = 0; | ||
597 | void *addr = NULL; | ||
598 | char buf[MAX_EVENT_NAME_LEN]; | ||
599 | |||
600 | if (argc < 2) { | ||
601 | pr_info("Probe point is not specified.\n"); | ||
602 | return -EINVAL; | ||
603 | } | ||
604 | |||
605 | if (argv[0][0] == 'p') | ||
606 | is_return = 0; | ||
607 | else if (argv[0][0] == 'r') | ||
608 | is_return = 1; | ||
609 | else { | ||
610 | pr_info("Probe definition must be started with 'p' or 'r'.\n"); | ||
611 | return -EINVAL; | ||
612 | } | ||
613 | |||
614 | if (argv[0][1] == ':') { | ||
615 | event = &argv[0][2]; | ||
616 | if (strchr(event, '/')) { | ||
617 | group = event; | ||
618 | event = strchr(group, '/') + 1; | ||
619 | event[-1] = '\0'; | ||
620 | if (strlen(group) == 0) { | ||
621 | pr_info("Group name is not specifiled\n"); | ||
622 | return -EINVAL; | ||
623 | } | ||
624 | } | ||
625 | if (strlen(event) == 0) { | ||
626 | pr_info("Event name is not specifiled\n"); | ||
627 | return -EINVAL; | ||
628 | } | ||
629 | } | ||
630 | |||
631 | if (isdigit(argv[1][0])) { | ||
632 | if (is_return) { | ||
633 | pr_info("Return probe point must be a symbol.\n"); | ||
634 | return -EINVAL; | ||
635 | } | ||
636 | /* an address specified */ | ||
637 | ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); | ||
638 | if (ret) { | ||
639 | pr_info("Failed to parse address.\n"); | ||
640 | return ret; | ||
641 | } | ||
642 | } else { | ||
643 | /* a symbol specified */ | ||
644 | symbol = argv[1]; | ||
645 | /* TODO: support .init module functions */ | ||
646 | ret = split_symbol_offset(symbol, &offset); | ||
647 | if (ret) { | ||
648 | pr_info("Failed to parse symbol.\n"); | ||
649 | return ret; | ||
650 | } | ||
651 | if (offset && is_return) { | ||
652 | pr_info("Return probe must be used without offset.\n"); | ||
653 | return -EINVAL; | ||
654 | } | ||
655 | } | ||
656 | argc -= 2; argv += 2; | ||
657 | |||
658 | /* setup a probe */ | ||
659 | if (!group) | ||
660 | group = KPROBE_EVENT_SYSTEM; | ||
661 | if (!event) { | ||
662 | /* Make a new event name */ | ||
663 | if (symbol) | ||
664 | snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", | ||
665 | is_return ? 'r' : 'p', symbol, offset); | ||
666 | else | ||
667 | snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", | ||
668 | is_return ? 'r' : 'p', addr); | ||
669 | event = buf; | ||
670 | } | ||
671 | tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, | ||
672 | is_return); | ||
673 | if (IS_ERR(tp)) { | ||
674 | pr_info("Failed to allocate trace_probe.(%d)\n", | ||
675 | (int)PTR_ERR(tp)); | ||
676 | return PTR_ERR(tp); | ||
677 | } | ||
678 | |||
679 | /* parse arguments */ | ||
680 | ret = 0; | ||
681 | for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { | ||
682 | /* Parse argument name */ | ||
683 | arg = strchr(argv[i], '='); | ||
684 | if (arg) | ||
685 | *arg++ = '\0'; | ||
686 | else | ||
687 | arg = argv[i]; | ||
688 | |||
689 | if (conflict_field_name(argv[i], tp->args, i)) { | ||
690 | pr_info("Argument%d name '%s' conflicts with " | ||
691 | "another field.\n", i, argv[i]); | ||
692 | ret = -EINVAL; | ||
693 | goto error; | ||
694 | } | ||
695 | |||
696 | tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); | ||
697 | |||
698 | /* Parse fetch argument */ | ||
699 | if (strlen(arg) > MAX_ARGSTR_LEN) { | ||
700 | pr_info("Argument%d(%s) is too long.\n", i, arg); | ||
701 | ret = -ENOSPC; | ||
702 | goto error; | ||
703 | } | ||
704 | ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); | ||
705 | if (ret) { | ||
706 | pr_info("Parse error at argument%d. (%d)\n", i, ret); | ||
707 | goto error; | ||
708 | } | ||
709 | } | ||
710 | tp->nr_args = i; | ||
711 | |||
712 | ret = register_trace_probe(tp); | ||
713 | if (ret) | ||
714 | goto error; | ||
715 | return 0; | ||
716 | |||
717 | error: | ||
718 | free_trace_probe(tp); | ||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | static void cleanup_all_probes(void) | ||
723 | { | ||
724 | struct trace_probe *tp; | ||
725 | |||
726 | mutex_lock(&probe_lock); | ||
727 | /* TODO: Use batch unregistration */ | ||
728 | while (!list_empty(&probe_list)) { | ||
729 | tp = list_entry(probe_list.next, struct trace_probe, list); | ||
730 | unregister_trace_probe(tp); | ||
731 | free_trace_probe(tp); | ||
732 | } | ||
733 | mutex_unlock(&probe_lock); | ||
734 | } | ||
735 | |||
736 | |||
737 | /* Probes listing interfaces */ | ||
738 | static void *probes_seq_start(struct seq_file *m, loff_t *pos) | ||
739 | { | ||
740 | mutex_lock(&probe_lock); | ||
741 | return seq_list_start(&probe_list, *pos); | ||
742 | } | ||
743 | |||
744 | static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
745 | { | ||
746 | return seq_list_next(v, &probe_list, pos); | ||
747 | } | ||
748 | |||
749 | static void probes_seq_stop(struct seq_file *m, void *v) | ||
750 | { | ||
751 | mutex_unlock(&probe_lock); | ||
752 | } | ||
753 | |||
754 | static int probes_seq_show(struct seq_file *m, void *v) | ||
755 | { | ||
756 | struct trace_probe *tp = v; | ||
757 | int i, ret; | ||
758 | char buf[MAX_ARGSTR_LEN + 1]; | ||
759 | |||
760 | seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); | ||
761 | seq_printf(m, ":%s", tp->call.name); | ||
762 | |||
763 | if (tp->symbol) | ||
764 | seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); | ||
765 | else | ||
766 | seq_printf(m, " 0x%p", tp->rp.kp.addr); | ||
767 | |||
768 | for (i = 0; i < tp->nr_args; i++) { | ||
769 | ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); | ||
770 | if (ret < 0) { | ||
771 | pr_warning("Argument%d decoding error(%d).\n", i, ret); | ||
772 | return ret; | ||
773 | } | ||
774 | seq_printf(m, " %s=%s", tp->args[i].name, buf); | ||
775 | } | ||
776 | seq_printf(m, "\n"); | ||
777 | return 0; | ||
778 | } | ||
779 | |||
780 | static const struct seq_operations probes_seq_op = { | ||
781 | .start = probes_seq_start, | ||
782 | .next = probes_seq_next, | ||
783 | .stop = probes_seq_stop, | ||
784 | .show = probes_seq_show | ||
785 | }; | ||
786 | |||
787 | static int probes_open(struct inode *inode, struct file *file) | ||
788 | { | ||
789 | if ((file->f_mode & FMODE_WRITE) && | ||
790 | (file->f_flags & O_TRUNC)) | ||
791 | cleanup_all_probes(); | ||
792 | |||
793 | return seq_open(file, &probes_seq_op); | ||
794 | } | ||
795 | |||
796 | static int command_trace_probe(const char *buf) | ||
797 | { | ||
798 | char **argv; | ||
799 | int argc = 0, ret = 0; | ||
800 | |||
801 | argv = argv_split(GFP_KERNEL, buf, &argc); | ||
802 | if (!argv) | ||
803 | return -ENOMEM; | ||
804 | |||
805 | if (argc) | ||
806 | ret = create_trace_probe(argc, argv); | ||
807 | |||
808 | argv_free(argv); | ||
809 | return ret; | ||
810 | } | ||
811 | |||
812 | #define WRITE_BUFSIZE 128 | ||
813 | |||
814 | static ssize_t probes_write(struct file *file, const char __user *buffer, | ||
815 | size_t count, loff_t *ppos) | ||
816 | { | ||
817 | char *kbuf, *tmp; | ||
818 | int ret; | ||
819 | size_t done; | ||
820 | size_t size; | ||
821 | |||
822 | kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); | ||
823 | if (!kbuf) | ||
824 | return -ENOMEM; | ||
825 | |||
826 | ret = done = 0; | ||
827 | while (done < count) { | ||
828 | size = count - done; | ||
829 | if (size >= WRITE_BUFSIZE) | ||
830 | size = WRITE_BUFSIZE - 1; | ||
831 | if (copy_from_user(kbuf, buffer + done, size)) { | ||
832 | ret = -EFAULT; | ||
833 | goto out; | ||
834 | } | ||
835 | kbuf[size] = '\0'; | ||
836 | tmp = strchr(kbuf, '\n'); | ||
837 | if (tmp) { | ||
838 | *tmp = '\0'; | ||
839 | size = tmp - kbuf + 1; | ||
840 | } else if (done + size < count) { | ||
841 | pr_warning("Line length is too long: " | ||
842 | "Should be less than %d.", WRITE_BUFSIZE); | ||
843 | ret = -EINVAL; | ||
844 | goto out; | ||
845 | } | ||
846 | done += size; | ||
847 | /* Remove comments */ | ||
848 | tmp = strchr(kbuf, '#'); | ||
849 | if (tmp) | ||
850 | *tmp = '\0'; | ||
851 | |||
852 | ret = command_trace_probe(kbuf); | ||
853 | if (ret) | ||
854 | goto out; | ||
855 | } | ||
856 | ret = done; | ||
857 | out: | ||
858 | kfree(kbuf); | ||
859 | return ret; | ||
860 | } | ||
861 | |||
862 | static const struct file_operations kprobe_events_ops = { | ||
863 | .owner = THIS_MODULE, | ||
864 | .open = probes_open, | ||
865 | .read = seq_read, | ||
866 | .llseek = seq_lseek, | ||
867 | .release = seq_release, | ||
868 | .write = probes_write, | ||
869 | }; | ||
870 | |||
871 | /* Probes profiling interfaces */ | ||
872 | static int probes_profile_seq_show(struct seq_file *m, void *v) | ||
873 | { | ||
874 | struct trace_probe *tp = v; | ||
875 | |||
876 | seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, | ||
877 | tp->rp.kp.nmissed); | ||
878 | |||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | static const struct seq_operations profile_seq_op = { | ||
883 | .start = probes_seq_start, | ||
884 | .next = probes_seq_next, | ||
885 | .stop = probes_seq_stop, | ||
886 | .show = probes_profile_seq_show | ||
887 | }; | ||
888 | |||
889 | static int profile_open(struct inode *inode, struct file *file) | ||
890 | { | ||
891 | return seq_open(file, &profile_seq_op); | ||
892 | } | ||
893 | |||
894 | static const struct file_operations kprobe_profile_ops = { | ||
895 | .owner = THIS_MODULE, | ||
896 | .open = profile_open, | ||
897 | .read = seq_read, | ||
898 | .llseek = seq_lseek, | ||
899 | .release = seq_release, | ||
900 | }; | ||
901 | |||
902 | /* Kprobe handler */ | ||
903 | static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | ||
904 | { | ||
905 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | ||
906 | struct kprobe_trace_entry *entry; | ||
907 | struct ring_buffer_event *event; | ||
908 | struct ring_buffer *buffer; | ||
909 | int size, i, pc; | ||
910 | unsigned long irq_flags; | ||
911 | struct ftrace_event_call *call = &tp->call; | ||
912 | |||
913 | tp->nhit++; | ||
914 | |||
915 | local_save_flags(irq_flags); | ||
916 | pc = preempt_count(); | ||
917 | |||
918 | size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); | ||
919 | |||
920 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, | ||
921 | irq_flags, pc); | ||
922 | if (!event) | ||
923 | return 0; | ||
924 | |||
925 | entry = ring_buffer_event_data(event); | ||
926 | entry->nargs = tp->nr_args; | ||
927 | entry->ip = (unsigned long)kp->addr; | ||
928 | for (i = 0; i < tp->nr_args; i++) | ||
929 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | ||
930 | |||
931 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
932 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | ||
933 | return 0; | ||
934 | } | ||
935 | |||
936 | /* Kretprobe handler */ | ||
937 | static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | ||
938 | struct pt_regs *regs) | ||
939 | { | ||
940 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | ||
941 | struct kretprobe_trace_entry *entry; | ||
942 | struct ring_buffer_event *event; | ||
943 | struct ring_buffer *buffer; | ||
944 | int size, i, pc; | ||
945 | unsigned long irq_flags; | ||
946 | struct ftrace_event_call *call = &tp->call; | ||
947 | |||
948 | local_save_flags(irq_flags); | ||
949 | pc = preempt_count(); | ||
950 | |||
951 | size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); | ||
952 | |||
953 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, | ||
954 | irq_flags, pc); | ||
955 | if (!event) | ||
956 | return 0; | ||
957 | |||
958 | entry = ring_buffer_event_data(event); | ||
959 | entry->nargs = tp->nr_args; | ||
960 | entry->func = (unsigned long)tp->rp.kp.addr; | ||
961 | entry->ret_ip = (unsigned long)ri->ret_addr; | ||
962 | for (i = 0; i < tp->nr_args; i++) | ||
963 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | ||
964 | |||
965 | if (!filter_current_check_discard(buffer, call, entry, event)) | ||
966 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | ||
967 | |||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | /* Event entry printers */ | ||
972 | enum print_line_t | ||
973 | print_kprobe_event(struct trace_iterator *iter, int flags) | ||
974 | { | ||
975 | struct kprobe_trace_entry *field; | ||
976 | struct trace_seq *s = &iter->seq; | ||
977 | struct trace_event *event; | ||
978 | struct trace_probe *tp; | ||
979 | int i; | ||
980 | |||
981 | field = (struct kprobe_trace_entry *)iter->ent; | ||
982 | event = ftrace_find_event(field->ent.type); | ||
983 | tp = container_of(event, struct trace_probe, event); | ||
984 | |||
985 | if (!trace_seq_printf(s, "%s: (", tp->call.name)) | ||
986 | goto partial; | ||
987 | |||
988 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) | ||
989 | goto partial; | ||
990 | |||
991 | if (!trace_seq_puts(s, ")")) | ||
992 | goto partial; | ||
993 | |||
994 | for (i = 0; i < field->nargs; i++) | ||
995 | if (!trace_seq_printf(s, " %s=%lx", | ||
996 | tp->args[i].name, field->args[i])) | ||
997 | goto partial; | ||
998 | |||
999 | if (!trace_seq_puts(s, "\n")) | ||
1000 | goto partial; | ||
1001 | |||
1002 | return TRACE_TYPE_HANDLED; | ||
1003 | partial: | ||
1004 | return TRACE_TYPE_PARTIAL_LINE; | ||
1005 | } | ||
1006 | |||
1007 | enum print_line_t | ||
1008 | print_kretprobe_event(struct trace_iterator *iter, int flags) | ||
1009 | { | ||
1010 | struct kretprobe_trace_entry *field; | ||
1011 | struct trace_seq *s = &iter->seq; | ||
1012 | struct trace_event *event; | ||
1013 | struct trace_probe *tp; | ||
1014 | int i; | ||
1015 | |||
1016 | field = (struct kretprobe_trace_entry *)iter->ent; | ||
1017 | event = ftrace_find_event(field->ent.type); | ||
1018 | tp = container_of(event, struct trace_probe, event); | ||
1019 | |||
1020 | if (!trace_seq_printf(s, "%s: (", tp->call.name)) | ||
1021 | goto partial; | ||
1022 | |||
1023 | if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) | ||
1024 | goto partial; | ||
1025 | |||
1026 | if (!trace_seq_puts(s, " <- ")) | ||
1027 | goto partial; | ||
1028 | |||
1029 | if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) | ||
1030 | goto partial; | ||
1031 | |||
1032 | if (!trace_seq_puts(s, ")")) | ||
1033 | goto partial; | ||
1034 | |||
1035 | for (i = 0; i < field->nargs; i++) | ||
1036 | if (!trace_seq_printf(s, " %s=%lx", | ||
1037 | tp->args[i].name, field->args[i])) | ||
1038 | goto partial; | ||
1039 | |||
1040 | if (!trace_seq_puts(s, "\n")) | ||
1041 | goto partial; | ||
1042 | |||
1043 | return TRACE_TYPE_HANDLED; | ||
1044 | partial: | ||
1045 | return TRACE_TYPE_PARTIAL_LINE; | ||
1046 | } | ||
1047 | |||
1048 | static int probe_event_enable(struct ftrace_event_call *call) | ||
1049 | { | ||
1050 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1051 | |||
1052 | tp->flags |= TP_FLAG_TRACE; | ||
1053 | if (probe_is_return(tp)) | ||
1054 | return enable_kretprobe(&tp->rp); | ||
1055 | else | ||
1056 | return enable_kprobe(&tp->rp.kp); | ||
1057 | } | ||
1058 | |||
1059 | static void probe_event_disable(struct ftrace_event_call *call) | ||
1060 | { | ||
1061 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1062 | |||
1063 | tp->flags &= ~TP_FLAG_TRACE; | ||
1064 | if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { | ||
1065 | if (probe_is_return(tp)) | ||
1066 | disable_kretprobe(&tp->rp); | ||
1067 | else | ||
1068 | disable_kprobe(&tp->rp.kp); | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | static int probe_event_raw_init(struct ftrace_event_call *event_call) | ||
1073 | { | ||
1074 | INIT_LIST_HEAD(&event_call->fields); | ||
1075 | |||
1076 | return 0; | ||
1077 | } | ||
1078 | |||
1079 | #undef DEFINE_FIELD | ||
1080 | #define DEFINE_FIELD(type, item, name, is_signed) \ | ||
1081 | do { \ | ||
1082 | ret = trace_define_field(event_call, #type, name, \ | ||
1083 | offsetof(typeof(field), item), \ | ||
1084 | sizeof(field.item), is_signed, \ | ||
1085 | FILTER_OTHER); \ | ||
1086 | if (ret) \ | ||
1087 | return ret; \ | ||
1088 | } while (0) | ||
1089 | |||
1090 | static int kprobe_event_define_fields(struct ftrace_event_call *event_call) | ||
1091 | { | ||
1092 | int ret, i; | ||
1093 | struct kprobe_trace_entry field; | ||
1094 | struct trace_probe *tp = (struct trace_probe *)event_call->data; | ||
1095 | |||
1096 | ret = trace_define_common_fields(event_call); | ||
1097 | if (!ret) | ||
1098 | return ret; | ||
1099 | |||
1100 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); | ||
1101 | DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); | ||
1102 | /* Set argument names as fields */ | ||
1103 | for (i = 0; i < tp->nr_args; i++) | ||
1104 | DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); | ||
1105 | return 0; | ||
1106 | } | ||
1107 | |||
1108 | static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) | ||
1109 | { | ||
1110 | int ret, i; | ||
1111 | struct kretprobe_trace_entry field; | ||
1112 | struct trace_probe *tp = (struct trace_probe *)event_call->data; | ||
1113 | |||
1114 | ret = trace_define_common_fields(event_call); | ||
1115 | if (!ret) | ||
1116 | return ret; | ||
1117 | |||
1118 | DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); | ||
1119 | DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); | ||
1120 | DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); | ||
1121 | /* Set argument names as fields */ | ||
1122 | for (i = 0; i < tp->nr_args; i++) | ||
1123 | DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); | ||
1124 | return 0; | ||
1125 | } | ||
1126 | |||
1127 | static int __probe_event_show_format(struct trace_seq *s, | ||
1128 | struct trace_probe *tp, const char *fmt, | ||
1129 | const char *arg) | ||
1130 | { | ||
1131 | int i; | ||
1132 | |||
1133 | /* Show format */ | ||
1134 | if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) | ||
1135 | return 0; | ||
1136 | |||
1137 | for (i = 0; i < tp->nr_args; i++) | ||
1138 | if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) | ||
1139 | return 0; | ||
1140 | |||
1141 | if (!trace_seq_printf(s, "\", %s", arg)) | ||
1142 | return 0; | ||
1143 | |||
1144 | for (i = 0; i < tp->nr_args; i++) | ||
1145 | if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) | ||
1146 | return 0; | ||
1147 | |||
1148 | return trace_seq_puts(s, "\n"); | ||
1149 | } | ||
1150 | |||
1151 | #undef SHOW_FIELD | ||
1152 | #define SHOW_FIELD(type, item, name) \ | ||
1153 | do { \ | ||
1154 | ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ | ||
1155 | "offset:%u;\tsize:%u;\n", name, \ | ||
1156 | (unsigned int)offsetof(typeof(field), item),\ | ||
1157 | (unsigned int)sizeof(type)); \ | ||
1158 | if (!ret) \ | ||
1159 | return 0; \ | ||
1160 | } while (0) | ||
1161 | |||
1162 | static int kprobe_event_show_format(struct ftrace_event_call *call, | ||
1163 | struct trace_seq *s) | ||
1164 | { | ||
1165 | struct kprobe_trace_entry field __attribute__((unused)); | ||
1166 | int ret, i; | ||
1167 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1168 | |||
1169 | SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); | ||
1170 | SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); | ||
1171 | |||
1172 | /* Show fields */ | ||
1173 | for (i = 0; i < tp->nr_args; i++) | ||
1174 | SHOW_FIELD(unsigned long, args[i], tp->args[i].name); | ||
1175 | trace_seq_puts(s, "\n"); | ||
1176 | |||
1177 | return __probe_event_show_format(s, tp, "(%lx)", | ||
1178 | "REC->" FIELD_STRING_IP); | ||
1179 | } | ||
1180 | |||
1181 | static int kretprobe_event_show_format(struct ftrace_event_call *call, | ||
1182 | struct trace_seq *s) | ||
1183 | { | ||
1184 | struct kretprobe_trace_entry field __attribute__((unused)); | ||
1185 | int ret, i; | ||
1186 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1187 | |||
1188 | SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); | ||
1189 | SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); | ||
1190 | SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); | ||
1191 | |||
1192 | /* Show fields */ | ||
1193 | for (i = 0; i < tp->nr_args; i++) | ||
1194 | SHOW_FIELD(unsigned long, args[i], tp->args[i].name); | ||
1195 | trace_seq_puts(s, "\n"); | ||
1196 | |||
1197 | return __probe_event_show_format(s, tp, "(%lx <- %lx)", | ||
1198 | "REC->" FIELD_STRING_FUNC | ||
1199 | ", REC->" FIELD_STRING_RETIP); | ||
1200 | } | ||
1201 | |||
1202 | #ifdef CONFIG_EVENT_PROFILE | ||
1203 | |||
1204 | /* Kprobe profile handler */ | ||
1205 | static __kprobes int kprobe_profile_func(struct kprobe *kp, | ||
1206 | struct pt_regs *regs) | ||
1207 | { | ||
1208 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | ||
1209 | struct ftrace_event_call *call = &tp->call; | ||
1210 | struct kprobe_trace_entry *entry; | ||
1211 | struct perf_trace_buf *trace_buf; | ||
1212 | struct trace_entry *ent; | ||
1213 | int size, __size, i, pc, __cpu; | ||
1214 | unsigned long irq_flags; | ||
1215 | char *raw_data; | ||
1216 | |||
1217 | pc = preempt_count(); | ||
1218 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); | ||
1219 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
1220 | size -= sizeof(u32); | ||
1221 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
1222 | "profile buffer not large enough")) | ||
1223 | return 0; | ||
1224 | |||
1225 | /* | ||
1226 | * Protect the non nmi buffer | ||
1227 | * This also protects the rcu read side | ||
1228 | */ | ||
1229 | local_irq_save(irq_flags); | ||
1230 | __cpu = smp_processor_id(); | ||
1231 | |||
1232 | if (in_nmi()) | ||
1233 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1234 | else | ||
1235 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1236 | |||
1237 | if (!trace_buf) | ||
1238 | goto end; | ||
1239 | |||
1240 | trace_buf = per_cpu_ptr(trace_buf, __cpu); | ||
1241 | |||
1242 | if (trace_buf->recursion++) | ||
1243 | goto end_recursion; | ||
1244 | |||
1245 | /* | ||
1246 | * Make recursion update visible before entering perf_tp_event | ||
1247 | * so that we protect from perf recursions. | ||
1248 | */ | ||
1249 | barrier(); | ||
1250 | |||
1251 | raw_data = trace_buf->buf; | ||
1252 | |||
1253 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | ||
1254 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
1255 | entry = (struct kprobe_trace_entry *)raw_data; | ||
1256 | ent = &entry->ent; | ||
1257 | |||
1258 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1259 | ent->type = call->id; | ||
1260 | entry->nargs = tp->nr_args; | ||
1261 | entry->ip = (unsigned long)kp->addr; | ||
1262 | for (i = 0; i < tp->nr_args; i++) | ||
1263 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | ||
1264 | perf_tp_event(call->id, entry->ip, 1, entry, size); | ||
1265 | |||
1266 | end_recursion: | ||
1267 | trace_buf->recursion--; | ||
1268 | end: | ||
1269 | local_irq_restore(irq_flags); | ||
1270 | |||
1271 | return 0; | ||
1272 | } | ||
1273 | |||
1274 | /* Kretprobe profile handler */ | ||
1275 | static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | ||
1276 | struct pt_regs *regs) | ||
1277 | { | ||
1278 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | ||
1279 | struct ftrace_event_call *call = &tp->call; | ||
1280 | struct kretprobe_trace_entry *entry; | ||
1281 | struct perf_trace_buf *trace_buf; | ||
1282 | struct trace_entry *ent; | ||
1283 | int size, __size, i, pc, __cpu; | ||
1284 | unsigned long irq_flags; | ||
1285 | char *raw_data; | ||
1286 | |||
1287 | pc = preempt_count(); | ||
1288 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); | ||
1289 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
1290 | size -= sizeof(u32); | ||
1291 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
1292 | "profile buffer not large enough")) | ||
1293 | return 0; | ||
1294 | |||
1295 | /* | ||
1296 | * Protect the non nmi buffer | ||
1297 | * This also protects the rcu read side | ||
1298 | */ | ||
1299 | local_irq_save(irq_flags); | ||
1300 | __cpu = smp_processor_id(); | ||
1301 | |||
1302 | if (in_nmi()) | ||
1303 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1304 | else | ||
1305 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1306 | |||
1307 | if (!trace_buf) | ||
1308 | goto end; | ||
1309 | |||
1310 | trace_buf = per_cpu_ptr(trace_buf, __cpu); | ||
1311 | |||
1312 | if (trace_buf->recursion++) | ||
1313 | goto end_recursion; | ||
1314 | |||
1315 | /* | ||
1316 | * Make recursion update visible before entering perf_tp_event | ||
1317 | * so that we protect from perf recursions. | ||
1318 | */ | ||
1319 | barrier(); | ||
1320 | |||
1321 | raw_data = trace_buf->buf; | ||
1322 | |||
1323 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | ||
1324 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
1325 | entry = (struct kretprobe_trace_entry *)raw_data; | ||
1326 | ent = &entry->ent; | ||
1327 | |||
1328 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1329 | ent->type = call->id; | ||
1330 | entry->nargs = tp->nr_args; | ||
1331 | entry->func = (unsigned long)tp->rp.kp.addr; | ||
1332 | entry->ret_ip = (unsigned long)ri->ret_addr; | ||
1333 | for (i = 0; i < tp->nr_args; i++) | ||
1334 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | ||
1335 | perf_tp_event(call->id, entry->ret_ip, 1, entry, size); | ||
1336 | |||
1337 | end_recursion: | ||
1338 | trace_buf->recursion--; | ||
1339 | end: | ||
1340 | local_irq_restore(irq_flags); | ||
1341 | |||
1342 | return 0; | ||
1343 | } | ||
1344 | |||
1345 | static int probe_profile_enable(struct ftrace_event_call *call) | ||
1346 | { | ||
1347 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1348 | |||
1349 | tp->flags |= TP_FLAG_PROFILE; | ||
1350 | |||
1351 | if (probe_is_return(tp)) | ||
1352 | return enable_kretprobe(&tp->rp); | ||
1353 | else | ||
1354 | return enable_kprobe(&tp->rp.kp); | ||
1355 | } | ||
1356 | |||
1357 | static void probe_profile_disable(struct ftrace_event_call *call) | ||
1358 | { | ||
1359 | struct trace_probe *tp = (struct trace_probe *)call->data; | ||
1360 | |||
1361 | tp->flags &= ~TP_FLAG_PROFILE; | ||
1362 | |||
1363 | if (!(tp->flags & TP_FLAG_TRACE)) { | ||
1364 | if (probe_is_return(tp)) | ||
1365 | disable_kretprobe(&tp->rp); | ||
1366 | else | ||
1367 | disable_kprobe(&tp->rp.kp); | ||
1368 | } | ||
1369 | } | ||
1370 | #endif /* CONFIG_EVENT_PROFILE */ | ||
1371 | |||
1372 | |||
1373 | static __kprobes | ||
1374 | int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) | ||
1375 | { | ||
1376 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | ||
1377 | |||
1378 | if (tp->flags & TP_FLAG_TRACE) | ||
1379 | kprobe_trace_func(kp, regs); | ||
1380 | #ifdef CONFIG_EVENT_PROFILE | ||
1381 | if (tp->flags & TP_FLAG_PROFILE) | ||
1382 | kprobe_profile_func(kp, regs); | ||
1383 | #endif /* CONFIG_EVENT_PROFILE */ | ||
1384 | return 0; /* We don't tweek kernel, so just return 0 */ | ||
1385 | } | ||
1386 | |||
1387 | static __kprobes | ||
1388 | int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) | ||
1389 | { | ||
1390 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | ||
1391 | |||
1392 | if (tp->flags & TP_FLAG_TRACE) | ||
1393 | kretprobe_trace_func(ri, regs); | ||
1394 | #ifdef CONFIG_EVENT_PROFILE | ||
1395 | if (tp->flags & TP_FLAG_PROFILE) | ||
1396 | kretprobe_profile_func(ri, regs); | ||
1397 | #endif /* CONFIG_EVENT_PROFILE */ | ||
1398 | return 0; /* We don't tweek kernel, so just return 0 */ | ||
1399 | } | ||
1400 | |||
1401 | static int register_probe_event(struct trace_probe *tp) | ||
1402 | { | ||
1403 | struct ftrace_event_call *call = &tp->call; | ||
1404 | int ret; | ||
1405 | |||
1406 | /* Initialize ftrace_event_call */ | ||
1407 | if (probe_is_return(tp)) { | ||
1408 | tp->event.trace = print_kretprobe_event; | ||
1409 | call->raw_init = probe_event_raw_init; | ||
1410 | call->show_format = kretprobe_event_show_format; | ||
1411 | call->define_fields = kretprobe_event_define_fields; | ||
1412 | } else { | ||
1413 | tp->event.trace = print_kprobe_event; | ||
1414 | call->raw_init = probe_event_raw_init; | ||
1415 | call->show_format = kprobe_event_show_format; | ||
1416 | call->define_fields = kprobe_event_define_fields; | ||
1417 | } | ||
1418 | call->event = &tp->event; | ||
1419 | call->id = register_ftrace_event(&tp->event); | ||
1420 | if (!call->id) | ||
1421 | return -ENODEV; | ||
1422 | call->enabled = 0; | ||
1423 | call->regfunc = probe_event_enable; | ||
1424 | call->unregfunc = probe_event_disable; | ||
1425 | |||
1426 | #ifdef CONFIG_EVENT_PROFILE | ||
1427 | atomic_set(&call->profile_count, -1); | ||
1428 | call->profile_enable = probe_profile_enable; | ||
1429 | call->profile_disable = probe_profile_disable; | ||
1430 | #endif | ||
1431 | call->data = tp; | ||
1432 | ret = trace_add_event_call(call); | ||
1433 | if (ret) { | ||
1434 | pr_info("Failed to register kprobe event: %s\n", call->name); | ||
1435 | unregister_ftrace_event(&tp->event); | ||
1436 | } | ||
1437 | return ret; | ||
1438 | } | ||
1439 | |||
1440 | static void unregister_probe_event(struct trace_probe *tp) | ||
1441 | { | ||
1442 | /* tp->event is unregistered in trace_remove_event_call() */ | ||
1443 | trace_remove_event_call(&tp->call); | ||
1444 | } | ||
1445 | |||
1446 | /* Make a debugfs interface for controling probe points */ | ||
1447 | static __init int init_kprobe_trace(void) | ||
1448 | { | ||
1449 | struct dentry *d_tracer; | ||
1450 | struct dentry *entry; | ||
1451 | |||
1452 | d_tracer = tracing_init_dentry(); | ||
1453 | if (!d_tracer) | ||
1454 | return 0; | ||
1455 | |||
1456 | entry = debugfs_create_file("kprobe_events", 0644, d_tracer, | ||
1457 | NULL, &kprobe_events_ops); | ||
1458 | |||
1459 | /* Event list interface */ | ||
1460 | if (!entry) | ||
1461 | pr_warning("Could not create debugfs " | ||
1462 | "'kprobe_events' entry\n"); | ||
1463 | |||
1464 | /* Profile interface */ | ||
1465 | entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, | ||
1466 | NULL, &kprobe_profile_ops); | ||
1467 | |||
1468 | if (!entry) | ||
1469 | pr_warning("Could not create debugfs " | ||
1470 | "'kprobe_profile' entry\n"); | ||
1471 | return 0; | ||
1472 | } | ||
1473 | fs_initcall(init_kprobe_trace); | ||
1474 | |||
1475 | |||
1476 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
1477 | |||
1478 | static int kprobe_trace_selftest_target(int a1, int a2, int a3, | ||
1479 | int a4, int a5, int a6) | ||
1480 | { | ||
1481 | return a1 + a2 + a3 + a4 + a5 + a6; | ||
1482 | } | ||
1483 | |||
1484 | static __init int kprobe_trace_self_tests_init(void) | ||
1485 | { | ||
1486 | int ret; | ||
1487 | int (*target)(int, int, int, int, int, int); | ||
1488 | |||
1489 | target = kprobe_trace_selftest_target; | ||
1490 | |||
1491 | pr_info("Testing kprobe tracing: "); | ||
1492 | |||
1493 | ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " | ||
1494 | "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); | ||
1495 | if (WARN_ON_ONCE(ret)) | ||
1496 | pr_warning("error enabling function entry\n"); | ||
1497 | |||
1498 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " | ||
1499 | "$retval"); | ||
1500 | if (WARN_ON_ONCE(ret)) | ||
1501 | pr_warning("error enabling function return\n"); | ||
1502 | |||
1503 | ret = target(1, 2, 3, 4, 5, 6); | ||
1504 | |||
1505 | cleanup_all_probes(); | ||
1506 | |||
1507 | pr_cont("OK\n"); | ||
1508 | return 0; | ||
1509 | } | ||
1510 | |||
1511 | late_initcall(kprobe_trace_self_tests_init); | ||
1512 | |||
1513 | #endif | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d00d1a8f1f26..51213b0aa81b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -354,13 +354,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) | |||
354 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 354 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); |
355 | } | 355 | } |
356 | 356 | ||
357 | int reg_event_syscall_enter(void *ptr) | 357 | int reg_event_syscall_enter(struct ftrace_event_call *call) |
358 | { | 358 | { |
359 | int ret = 0; | 359 | int ret = 0; |
360 | int num; | 360 | int num; |
361 | char *name; | 361 | char *name; |
362 | 362 | ||
363 | name = (char *)ptr; | 363 | name = (char *)call->data; |
364 | num = syscall_name_to_nr(name); | 364 | num = syscall_name_to_nr(name); |
365 | if (num < 0 || num >= NR_syscalls) | 365 | if (num < 0 || num >= NR_syscalls) |
366 | return -ENOSYS; | 366 | return -ENOSYS; |
@@ -378,12 +378,12 @@ int reg_event_syscall_enter(void *ptr) | |||
378 | return ret; | 378 | return ret; |
379 | } | 379 | } |
380 | 380 | ||
381 | void unreg_event_syscall_enter(void *ptr) | 381 | void unreg_event_syscall_enter(struct ftrace_event_call *call) |
382 | { | 382 | { |
383 | int num; | 383 | int num; |
384 | char *name; | 384 | char *name; |
385 | 385 | ||
386 | name = (char *)ptr; | 386 | name = (char *)call->data; |
387 | num = syscall_name_to_nr(name); | 387 | num = syscall_name_to_nr(name); |
388 | if (num < 0 || num >= NR_syscalls) | 388 | if (num < 0 || num >= NR_syscalls) |
389 | return; | 389 | return; |
@@ -395,13 +395,13 @@ void unreg_event_syscall_enter(void *ptr) | |||
395 | mutex_unlock(&syscall_trace_lock); | 395 | mutex_unlock(&syscall_trace_lock); |
396 | } | 396 | } |
397 | 397 | ||
398 | int reg_event_syscall_exit(void *ptr) | 398 | int reg_event_syscall_exit(struct ftrace_event_call *call) |
399 | { | 399 | { |
400 | int ret = 0; | 400 | int ret = 0; |
401 | int num; | 401 | int num; |
402 | char *name; | 402 | char *name; |
403 | 403 | ||
404 | name = (char *)ptr; | 404 | name = call->data; |
405 | num = syscall_name_to_nr(name); | 405 | num = syscall_name_to_nr(name); |
406 | if (num < 0 || num >= NR_syscalls) | 406 | if (num < 0 || num >= NR_syscalls) |
407 | return -ENOSYS; | 407 | return -ENOSYS; |
@@ -419,12 +419,12 @@ int reg_event_syscall_exit(void *ptr) | |||
419 | return ret; | 419 | return ret; |
420 | } | 420 | } |
421 | 421 | ||
422 | void unreg_event_syscall_exit(void *ptr) | 422 | void unreg_event_syscall_exit(struct ftrace_event_call *call) |
423 | { | 423 | { |
424 | int num; | 424 | int num; |
425 | char *name; | 425 | char *name; |
426 | 426 | ||
427 | name = (char *)ptr; | 427 | name = call->data; |
428 | num = syscall_name_to_nr(name); | 428 | num = syscall_name_to_nr(name); |
429 | if (num < 0 || num >= NR_syscalls) | 429 | if (num < 0 || num >= NR_syscalls) |
430 | return; | 430 | return; |
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit; | |||
477 | static void prof_syscall_enter(struct pt_regs *regs, long id) | 477 | static void prof_syscall_enter(struct pt_regs *regs, long id) |
478 | { | 478 | { |
479 | struct syscall_metadata *sys_data; | 479 | struct syscall_metadata *sys_data; |
480 | struct perf_trace_buf *trace_buf; | ||
480 | struct syscall_trace_enter *rec; | 481 | struct syscall_trace_enter *rec; |
481 | unsigned long flags; | 482 | unsigned long flags; |
482 | char *raw_data; | 483 | char *raw_data; |
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
507 | cpu = smp_processor_id(); | 508 | cpu = smp_processor_id(); |
508 | 509 | ||
509 | if (in_nmi()) | 510 | if (in_nmi()) |
510 | raw_data = rcu_dereference(trace_profile_buf_nmi); | 511 | trace_buf = rcu_dereference(perf_trace_buf_nmi); |
511 | else | 512 | else |
512 | raw_data = rcu_dereference(trace_profile_buf); | 513 | trace_buf = rcu_dereference(perf_trace_buf); |
513 | 514 | ||
514 | if (!raw_data) | 515 | if (!trace_buf) |
515 | goto end; | 516 | goto end; |
516 | 517 | ||
517 | raw_data = per_cpu_ptr(raw_data, cpu); | 518 | trace_buf = per_cpu_ptr(trace_buf, cpu); |
519 | |||
520 | if (trace_buf->recursion++) | ||
521 | goto end_recursion; | ||
522 | |||
523 | /* | ||
524 | * Make recursion update visible before entering perf_tp_event | ||
525 | * so that we protect from perf recursions. | ||
526 | */ | ||
527 | barrier(); | ||
528 | |||
529 | raw_data = trace_buf->buf; | ||
518 | 530 | ||
519 | /* zero the dead bytes from align to not leak stack to user */ | 531 | /* zero the dead bytes from align to not leak stack to user */ |
520 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 532 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
527 | (unsigned long *)&rec->args); | 539 | (unsigned long *)&rec->args); |
528 | perf_tp_event(sys_data->enter_id, 0, 1, rec, size); | 540 | perf_tp_event(sys_data->enter_id, 0, 1, rec, size); |
529 | 541 | ||
542 | end_recursion: | ||
543 | trace_buf->recursion--; | ||
530 | end: | 544 | end: |
531 | local_irq_restore(flags); | 545 | local_irq_restore(flags); |
532 | } | 546 | } |
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
574 | { | 588 | { |
575 | struct syscall_metadata *sys_data; | 589 | struct syscall_metadata *sys_data; |
576 | struct syscall_trace_exit *rec; | 590 | struct syscall_trace_exit *rec; |
591 | struct perf_trace_buf *trace_buf; | ||
577 | unsigned long flags; | 592 | unsigned long flags; |
578 | int syscall_nr; | 593 | int syscall_nr; |
579 | char *raw_data; | 594 | char *raw_data; |
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
605 | cpu = smp_processor_id(); | 620 | cpu = smp_processor_id(); |
606 | 621 | ||
607 | if (in_nmi()) | 622 | if (in_nmi()) |
608 | raw_data = rcu_dereference(trace_profile_buf_nmi); | 623 | trace_buf = rcu_dereference(perf_trace_buf_nmi); |
609 | else | 624 | else |
610 | raw_data = rcu_dereference(trace_profile_buf); | 625 | trace_buf = rcu_dereference(perf_trace_buf); |
611 | 626 | ||
612 | if (!raw_data) | 627 | if (!trace_buf) |
613 | goto end; | 628 | goto end; |
614 | 629 | ||
615 | raw_data = per_cpu_ptr(raw_data, cpu); | 630 | trace_buf = per_cpu_ptr(trace_buf, cpu); |
631 | |||
632 | if (trace_buf->recursion++) | ||
633 | goto end_recursion; | ||
634 | |||
635 | /* | ||
636 | * Make recursion update visible before entering perf_tp_event | ||
637 | * so that we protect from perf recursions. | ||
638 | */ | ||
639 | barrier(); | ||
640 | |||
641 | raw_data = trace_buf->buf; | ||
616 | 642 | ||
617 | /* zero the dead bytes from align to not leak stack to user */ | 643 | /* zero the dead bytes from align to not leak stack to user */ |
618 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 644 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
626 | 652 | ||
627 | perf_tp_event(sys_data->exit_id, 0, 1, rec, size); | 653 | perf_tp_event(sys_data->exit_id, 0, 1, rec, size); |
628 | 654 | ||
655 | end_recursion: | ||
656 | trace_buf->recursion--; | ||
629 | end: | 657 | end: |
630 | local_irq_restore(flags); | 658 | local_irq_restore(flags); |
631 | } | 659 | } |
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt new file mode 100644 index 000000000000..9270594e6dfd --- /dev/null +++ b/tools/perf/Documentation/perf-probe.txt | |||
@@ -0,0 +1,49 @@ | |||
1 | perf-probe(1) | ||
2 | ============= | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-probe - Define new dynamic tracepoints | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...] | ||
12 | or | ||
13 | 'perf probe' [options] 'PROBE' ['PROBE' ...] | ||
14 | |||
15 | |||
16 | DESCRIPTION | ||
17 | ----------- | ||
18 | This command defines dynamic tracepoint events, by symbol and registers | ||
19 | without debuginfo, or by C expressions (C line numbers, C function names, | ||
20 | and C local variables) with debuginfo. | ||
21 | |||
22 | |||
23 | OPTIONS | ||
24 | ------- | ||
25 | -k:: | ||
26 | --vmlinux=PATH:: | ||
27 | Specify vmlinux path which has debuginfo (Dwarf binary). | ||
28 | |||
29 | -v:: | ||
30 | --verbose:: | ||
31 | Be more verbose (show parsed arguments, etc). | ||
32 | |||
33 | -a:: | ||
34 | --add:: | ||
35 | Define a probe point (see PROBE SYNTAX for detail) | ||
36 | |||
37 | PROBE SYNTAX | ||
38 | ------------ | ||
39 | Probe points are defined by following syntax. | ||
40 | |||
41 | "FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]" | ||
42 | |||
43 | 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function. | ||
44 | It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number. | ||
45 | 'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). | ||
46 | |||
47 | SEE ALSO | ||
48 | -------- | ||
49 | linkperf:perf-trace[1], linkperf:perf-record[1] | ||
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 46a58a81c9ad..3dbb5c5bb8c6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -337,6 +337,7 @@ LIB_FILE=libperf.a | |||
337 | LIB_H += ../../include/linux/perf_event.h | 337 | LIB_H += ../../include/linux/perf_event.h |
338 | LIB_H += ../../include/linux/rbtree.h | 338 | LIB_H += ../../include/linux/rbtree.h |
339 | LIB_H += ../../include/linux/list.h | 339 | LIB_H += ../../include/linux/list.h |
340 | LIB_H += ../../include/linux/stringify.h | ||
340 | LIB_H += util/include/linux/bitmap.h | 341 | LIB_H += util/include/linux/bitmap.h |
341 | LIB_H += util/include/linux/bitops.h | 342 | LIB_H += util/include/linux/bitops.h |
342 | LIB_H += util/include/linux/compiler.h | 343 | LIB_H += util/include/linux/compiler.h |
@@ -438,6 +439,7 @@ BUILTIN_OBJS += builtin-stat.o | |||
438 | BUILTIN_OBJS += builtin-timechart.o | 439 | BUILTIN_OBJS += builtin-timechart.o |
439 | BUILTIN_OBJS += builtin-top.o | 440 | BUILTIN_OBJS += builtin-top.o |
440 | BUILTIN_OBJS += builtin-trace.o | 441 | BUILTIN_OBJS += builtin-trace.o |
442 | BUILTIN_OBJS += builtin-probe.o | ||
441 | 443 | ||
442 | PERFLIBS = $(LIB_FILE) | 444 | PERFLIBS = $(LIB_FILE) |
443 | 445 | ||
@@ -469,6 +471,10 @@ ifeq ($(uname_S),Darwin) | |||
469 | endif | 471 | endif |
470 | 472 | ||
471 | ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) | 473 | ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) |
474 | ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) | ||
475 | msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); | ||
476 | endif | ||
477 | |||
472 | ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) | 478 | ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) |
473 | BASIC_CFLAGS += -DLIBELF_NO_MMAP | 479 | BASIC_CFLAGS += -DLIBELF_NO_MMAP |
474 | endif | 480 | endif |
@@ -476,6 +482,15 @@ else | |||
476 | msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); | 482 | msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); |
477 | endif | 483 | endif |
478 | 484 | ||
485 | ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) | ||
486 | msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); | ||
487 | BASIC_CFLAGS += -DNO_LIBDWARF | ||
488 | else | ||
489 | EXTLIBS += -lelf -ldwarf | ||
490 | LIB_H += util/probe-finder.h | ||
491 | LIB_OBJS += util/probe-finder.o | ||
492 | endif | ||
493 | |||
479 | ifdef NO_DEMANGLE | 494 | ifdef NO_DEMANGLE |
480 | BASIC_CFLAGS += -DNO_DEMANGLE | 495 | BASIC_CFLAGS += -DNO_DEMANGLE |
481 | else | 496 | else |
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c new file mode 100644 index 000000000000..d78a3d945492 --- /dev/null +++ b/tools/perf/builtin-probe.c | |||
@@ -0,0 +1,435 @@ | |||
1 | /* | ||
2 | * builtin-probe.c | ||
3 | * | ||
4 | * Builtin probe command: Set up probe events by C expression | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | #define _GNU_SOURCE | ||
24 | #include <sys/utsname.h> | ||
25 | #include <sys/types.h> | ||
26 | #include <sys/stat.h> | ||
27 | #include <fcntl.h> | ||
28 | #include <errno.h> | ||
29 | #include <stdio.h> | ||
30 | #include <unistd.h> | ||
31 | #include <stdlib.h> | ||
32 | #include <string.h> | ||
33 | |||
34 | #undef _GNU_SOURCE | ||
35 | #include "perf.h" | ||
36 | #include "builtin.h" | ||
37 | #include "util/util.h" | ||
38 | #include "util/event.h" | ||
39 | #include "util/debug.h" | ||
40 | #include "util/parse-options.h" | ||
41 | #include "util/parse-events.h" /* For debugfs_path */ | ||
42 | #include "util/probe-finder.h" | ||
43 | |||
44 | /* Default vmlinux search paths */ | ||
45 | #define NR_SEARCH_PATH 3 | ||
46 | const char *default_search_path[NR_SEARCH_PATH] = { | ||
47 | "/lib/modules/%s/build/vmlinux", /* Custom build kernel */ | ||
48 | "/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */ | ||
49 | "/boot/vmlinux-debug-%s", /* Ubuntu */ | ||
50 | }; | ||
51 | |||
52 | #define MAX_PATH_LEN 256 | ||
53 | #define MAX_PROBES 128 | ||
54 | #define MAX_PROBE_ARGS 128 | ||
55 | #define PERFPROBE_GROUP "probe" | ||
56 | |||
57 | /* Session management structure */ | ||
58 | static struct { | ||
59 | char *vmlinux; | ||
60 | char *release; | ||
61 | int need_dwarf; | ||
62 | int nr_probe; | ||
63 | struct probe_point probes[MAX_PROBES]; | ||
64 | } session; | ||
65 | |||
66 | #define semantic_error(msg ...) die("Semantic error :" msg) | ||
67 | |||
68 | /* Parse probe point. Return 1 if return probe */ | ||
69 | static void parse_probe_point(char *arg, struct probe_point *pp) | ||
70 | { | ||
71 | char *ptr, *tmp; | ||
72 | char c, nc = 0; | ||
73 | /* | ||
74 | * <Syntax> | ||
75 | * perf probe SRC:LN | ||
76 | * perf probe FUNC[+OFFS|%return][@SRC] | ||
77 | */ | ||
78 | |||
79 | ptr = strpbrk(arg, ":+@%"); | ||
80 | if (ptr) { | ||
81 | nc = *ptr; | ||
82 | *ptr++ = '\0'; | ||
83 | } | ||
84 | |||
85 | /* Check arg is function or file and copy it */ | ||
86 | if (strchr(arg, '.')) /* File */ | ||
87 | pp->file = strdup(arg); | ||
88 | else /* Function */ | ||
89 | pp->function = strdup(arg); | ||
90 | DIE_IF(pp->file == NULL && pp->function == NULL); | ||
91 | |||
92 | /* Parse other options */ | ||
93 | while (ptr) { | ||
94 | arg = ptr; | ||
95 | c = nc; | ||
96 | ptr = strpbrk(arg, ":+@%"); | ||
97 | if (ptr) { | ||
98 | nc = *ptr; | ||
99 | *ptr++ = '\0'; | ||
100 | } | ||
101 | switch (c) { | ||
102 | case ':': /* Line number */ | ||
103 | pp->line = strtoul(arg, &tmp, 0); | ||
104 | if (*tmp != '\0') | ||
105 | semantic_error("There is non-digit charactor" | ||
106 | " in line number."); | ||
107 | break; | ||
108 | case '+': /* Byte offset from a symbol */ | ||
109 | pp->offset = strtoul(arg, &tmp, 0); | ||
110 | if (*tmp != '\0') | ||
111 | semantic_error("There is non-digit charactor" | ||
112 | " in offset."); | ||
113 | break; | ||
114 | case '@': /* File name */ | ||
115 | if (pp->file) | ||
116 | semantic_error("SRC@SRC is not allowed."); | ||
117 | pp->file = strdup(arg); | ||
118 | DIE_IF(pp->file == NULL); | ||
119 | if (ptr) | ||
120 | semantic_error("@SRC must be the last " | ||
121 | "option."); | ||
122 | break; | ||
123 | case '%': /* Probe places */ | ||
124 | if (strcmp(arg, "return") == 0) { | ||
125 | pp->retprobe = 1; | ||
126 | } else /* Others not supported yet */ | ||
127 | semantic_error("%%%s is not supported.", arg); | ||
128 | break; | ||
129 | default: | ||
130 | DIE_IF("Program has a bug."); | ||
131 | break; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | /* Exclusion check */ | ||
136 | if (pp->line && pp->offset) | ||
137 | semantic_error("Offset can't be used with line number."); | ||
138 | if (!pp->line && pp->file && !pp->function) | ||
139 | semantic_error("File always requires line number."); | ||
140 | if (pp->offset && !pp->function) | ||
141 | semantic_error("Offset requires an entry function."); | ||
142 | if (pp->retprobe && !pp->function) | ||
143 | semantic_error("Return probe requires an entry function."); | ||
144 | if ((pp->offset || pp->line) && pp->retprobe) | ||
145 | semantic_error("Offset/Line can't be used with return probe."); | ||
146 | |||
147 | pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n", | ||
148 | pp->function, pp->file, pp->line, pp->offset, pp->retprobe); | ||
149 | } | ||
150 | |||
151 | /* Parse an event definition. Note that any error must die. */ | ||
152 | static void parse_probe_event(const char *str) | ||
153 | { | ||
154 | char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */ | ||
155 | int argc, i; | ||
156 | struct probe_point *pp = &session.probes[session.nr_probe]; | ||
157 | |||
158 | pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); | ||
159 | if (++session.nr_probe == MAX_PROBES) | ||
160 | semantic_error("Too many probes"); | ||
161 | |||
162 | /* Separate arguments, similar to argv_split */ | ||
163 | argc = 0; | ||
164 | do { | ||
165 | /* Skip separators */ | ||
166 | while (isspace(*str)) | ||
167 | str++; | ||
168 | |||
169 | /* Add an argument */ | ||
170 | if (*str != '\0') { | ||
171 | const char *s = str; | ||
172 | |||
173 | /* Skip the argument */ | ||
174 | while (!isspace(*str) && *str != '\0') | ||
175 | str++; | ||
176 | |||
177 | /* Duplicate the argument */ | ||
178 | argv[argc] = strndup(s, str - s); | ||
179 | if (argv[argc] == NULL) | ||
180 | die("strndup"); | ||
181 | if (++argc == MAX_PROBE_ARGS) | ||
182 | semantic_error("Too many arguments"); | ||
183 | pr_debug("argv[%d]=%s\n", argc, argv[argc - 1]); | ||
184 | } | ||
185 | } while (*str != '\0'); | ||
186 | if (!argc) | ||
187 | semantic_error("An empty argument."); | ||
188 | |||
189 | /* Parse probe point */ | ||
190 | parse_probe_point(argv[0], pp); | ||
191 | free(argv[0]); | ||
192 | if (pp->file || pp->line) | ||
193 | session.need_dwarf = 1; | ||
194 | |||
195 | /* Copy arguments */ | ||
196 | pp->nr_args = argc - 1; | ||
197 | if (pp->nr_args > 0) { | ||
198 | pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); | ||
199 | if (!pp->args) | ||
200 | die("malloc"); | ||
201 | memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args); | ||
202 | } | ||
203 | |||
204 | /* Ensure return probe has no C argument */ | ||
205 | for (i = 0; i < pp->nr_args; i++) | ||
206 | if (is_c_varname(pp->args[i])) { | ||
207 | if (pp->retprobe) | ||
208 | semantic_error("You can't specify local" | ||
209 | " variable for kretprobe"); | ||
210 | session.need_dwarf = 1; | ||
211 | } | ||
212 | |||
213 | pr_debug("%d arguments\n", pp->nr_args); | ||
214 | } | ||
215 | |||
216 | static int opt_add_probe_event(const struct option *opt __used, | ||
217 | const char *str, int unset __used) | ||
218 | { | ||
219 | if (str) | ||
220 | parse_probe_event(str); | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | #ifndef NO_LIBDWARF | ||
225 | static int open_default_vmlinux(void) | ||
226 | { | ||
227 | struct utsname uts; | ||
228 | char fname[MAX_PATH_LEN]; | ||
229 | int fd, ret, i; | ||
230 | |||
231 | ret = uname(&uts); | ||
232 | if (ret) { | ||
233 | pr_debug("uname() failed.\n"); | ||
234 | return -errno; | ||
235 | } | ||
236 | session.release = uts.release; | ||
237 | for (i = 0; i < NR_SEARCH_PATH; i++) { | ||
238 | ret = snprintf(fname, MAX_PATH_LEN, | ||
239 | default_search_path[i], session.release); | ||
240 | if (ret >= MAX_PATH_LEN || ret < 0) { | ||
241 | pr_debug("Filename(%d,%s) is too long.\n", i, | ||
242 | uts.release); | ||
243 | errno = E2BIG; | ||
244 | return -E2BIG; | ||
245 | } | ||
246 | pr_debug("try to open %s\n", fname); | ||
247 | fd = open(fname, O_RDONLY); | ||
248 | if (fd >= 0) | ||
249 | break; | ||
250 | } | ||
251 | return fd; | ||
252 | } | ||
253 | #endif | ||
254 | |||
255 | static const char * const probe_usage[] = { | ||
256 | "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", | ||
257 | "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", | ||
258 | NULL | ||
259 | }; | ||
260 | |||
261 | static const struct option options[] = { | ||
262 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
263 | "be more verbose (show parsed arguments, etc)"), | ||
264 | #ifndef NO_LIBDWARF | ||
265 | OPT_STRING('k', "vmlinux", &session.vmlinux, "file", | ||
266 | "vmlinux/module pathname"), | ||
267 | #endif | ||
268 | OPT_CALLBACK('a', "add", NULL, | ||
269 | #ifdef NO_LIBDWARF | ||
270 | "FUNC[+OFFS|%return] [ARG ...]", | ||
271 | #else | ||
272 | "FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]", | ||
273 | #endif | ||
274 | "probe point definition, where\n" | ||
275 | "\t\tGRP:\tGroup name (optional)\n" | ||
276 | "\t\tNAME:\tEvent name\n" | ||
277 | "\t\tFUNC:\tFunction name\n" | ||
278 | "\t\tOFFS:\tOffset from function entry (in byte)\n" | ||
279 | "\t\t%return:\tPut the probe at function return\n" | ||
280 | #ifdef NO_LIBDWARF | ||
281 | "\t\tARG:\tProbe argument (only \n" | ||
282 | #else | ||
283 | "\t\tSRC:\tSource code path\n" | ||
284 | "\t\tRLN:\tRelative line number from function entry.\n" | ||
285 | "\t\tALN:\tAbsolute line number in file.\n" | ||
286 | "\t\tARG:\tProbe argument (local variable name or\n" | ||
287 | #endif | ||
288 | "\t\t\tkprobe-tracer argument format is supported.)\n", | ||
289 | opt_add_probe_event), | ||
290 | OPT_END() | ||
291 | }; | ||
292 | |||
293 | static int write_new_event(int fd, const char *buf) | ||
294 | { | ||
295 | int ret; | ||
296 | |||
297 | ret = write(fd, buf, strlen(buf)); | ||
298 | if (ret <= 0) | ||
299 | die("Failed to create event."); | ||
300 | else | ||
301 | printf("Added new event: %s\n", buf); | ||
302 | |||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | #define MAX_CMDLEN 256 | ||
307 | |||
308 | static int synthesize_probe_event(struct probe_point *pp) | ||
309 | { | ||
310 | char *buf; | ||
311 | int i, len, ret; | ||
312 | pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char)); | ||
313 | if (!buf) | ||
314 | die("Failed to allocate memory by calloc."); | ||
315 | ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); | ||
316 | if (ret <= 0 || ret >= MAX_CMDLEN) | ||
317 | goto error; | ||
318 | len = ret; | ||
319 | |||
320 | for (i = 0; i < pp->nr_args; i++) { | ||
321 | ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s", | ||
322 | pp->args[i]); | ||
323 | if (ret <= 0 || ret >= MAX_CMDLEN - len) | ||
324 | goto error; | ||
325 | len += ret; | ||
326 | } | ||
327 | pp->found = 1; | ||
328 | return pp->found; | ||
329 | error: | ||
330 | free(pp->probes[0]); | ||
331 | if (ret > 0) | ||
332 | ret = -E2BIG; | ||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | int cmd_probe(int argc, const char **argv, const char *prefix __used) | ||
337 | { | ||
338 | int i, j, fd, ret; | ||
339 | struct probe_point *pp; | ||
340 | char buf[MAX_CMDLEN]; | ||
341 | |||
342 | argc = parse_options(argc, argv, options, probe_usage, | ||
343 | PARSE_OPT_STOP_AT_NON_OPTION); | ||
344 | for (i = 0; i < argc; i++) | ||
345 | parse_probe_event(argv[i]); | ||
346 | |||
347 | if (session.nr_probe == 0) | ||
348 | usage_with_options(probe_usage, options); | ||
349 | |||
350 | if (session.need_dwarf) | ||
351 | #ifdef NO_LIBDWARF | ||
352 | semantic_error("Debuginfo-analysis is not supported"); | ||
353 | #else /* !NO_LIBDWARF */ | ||
354 | pr_info("Some probes require debuginfo.\n"); | ||
355 | |||
356 | if (session.vmlinux) | ||
357 | fd = open(session.vmlinux, O_RDONLY); | ||
358 | else | ||
359 | fd = open_default_vmlinux(); | ||
360 | if (fd < 0) { | ||
361 | if (session.need_dwarf) | ||
362 | die("Could not open vmlinux/module file."); | ||
363 | |||
364 | pr_warning("Could not open vmlinux/module file." | ||
365 | " Try to use symbols.\n"); | ||
366 | goto end_dwarf; | ||
367 | } | ||
368 | |||
369 | /* Searching probe points */ | ||
370 | for (j = 0; j < session.nr_probe; j++) { | ||
371 | pp = &session.probes[j]; | ||
372 | if (pp->found) | ||
373 | continue; | ||
374 | |||
375 | lseek(fd, SEEK_SET, 0); | ||
376 | ret = find_probepoint(fd, pp); | ||
377 | if (ret < 0) { | ||
378 | if (session.need_dwarf) | ||
379 | die("Could not analyze debuginfo."); | ||
380 | |||
381 | pr_warning("An error occurred in debuginfo analysis. Try to use symbols.\n"); | ||
382 | break; | ||
383 | } | ||
384 | if (ret == 0) /* No error but failed to find probe point. */ | ||
385 | die("No probe point found."); | ||
386 | } | ||
387 | close(fd); | ||
388 | |||
389 | end_dwarf: | ||
390 | #endif /* !NO_LIBDWARF */ | ||
391 | |||
392 | /* Synthesize probes without dwarf */ | ||
393 | for (j = 0; j < session.nr_probe; j++) { | ||
394 | pp = &session.probes[j]; | ||
395 | if (pp->found) /* This probe is already found. */ | ||
396 | continue; | ||
397 | |||
398 | ret = synthesize_probe_event(pp); | ||
399 | if (ret == -E2BIG) | ||
400 | semantic_error("probe point is too long."); | ||
401 | else if (ret < 0) | ||
402 | die("Failed to synthesize a probe point."); | ||
403 | } | ||
404 | |||
405 | /* Settng up probe points */ | ||
406 | snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); | ||
407 | fd = open(buf, O_WRONLY, O_APPEND); | ||
408 | if (fd < 0) { | ||
409 | if (errno == ENOENT) | ||
410 | die("kprobe_events file does not exist - please rebuild with CONFIG_KPROBE_TRACER."); | ||
411 | else | ||
412 | die("Could not open kprobe_events file: %s", | ||
413 | strerror(errno)); | ||
414 | } | ||
415 | for (j = 0; j < session.nr_probe; j++) { | ||
416 | pp = &session.probes[j]; | ||
417 | if (pp->found == 1) { | ||
418 | snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n", | ||
419 | pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP, | ||
420 | pp->function, pp->offset, pp->probes[0]); | ||
421 | write_new_event(fd, buf); | ||
422 | } else | ||
423 | for (i = 0; i < pp->found; i++) { | ||
424 | snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n", | ||
425 | pp->retprobe ? 'r' : 'p', | ||
426 | PERFPROBE_GROUP, | ||
427 | pp->function, pp->offset, i, | ||
428 | pp->probes[0]); | ||
429 | write_new_event(fd, buf); | ||
430 | } | ||
431 | } | ||
432 | close(fd); | ||
433 | return 0; | ||
434 | } | ||
435 | |||
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index e97954a0a3d2..9b02d85091fe 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h | |||
@@ -27,5 +27,6 @@ extern int cmd_timechart(int argc, const char **argv, const char *prefix); | |||
27 | extern int cmd_top(int argc, const char **argv, const char *prefix); | 27 | extern int cmd_top(int argc, const char **argv, const char *prefix); |
28 | extern int cmd_trace(int argc, const char **argv, const char *prefix); | 28 | extern int cmd_trace(int argc, const char **argv, const char *prefix); |
29 | extern int cmd_version(int argc, const char **argv, const char *prefix); | 29 | extern int cmd_version(int argc, const char **argv, const char *prefix); |
30 | extern int cmd_probe(int argc, const char **argv, const char *prefix); | ||
30 | 31 | ||
31 | #endif | 32 | #endif |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index d37b16cf18ff..d3a6e18e4a5e 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
@@ -13,3 +13,4 @@ perf-stat mainporcelain common | |||
13 | perf-timechart mainporcelain common | 13 | perf-timechart mainporcelain common |
14 | perf-top mainporcelain common | 14 | perf-top mainporcelain common |
15 | perf-trace mainporcelain common | 15 | perf-trace mainporcelain common |
16 | perf-probe mainporcelain common | ||
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 53359ebb369a..89b82acac7d9 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -298,6 +298,7 @@ static void handle_internal_command(int argc, const char **argv) | |||
298 | { "version", cmd_version, 0 }, | 298 | { "version", cmd_version, 0 }, |
299 | { "trace", cmd_trace, 0 }, | 299 | { "trace", cmd_trace, 0 }, |
300 | { "sched", cmd_sched, 0 }, | 300 | { "sched", cmd_sched, 0 }, |
301 | { "probe", cmd_probe, 0 }, | ||
301 | }; | 302 | }; |
302 | unsigned int i; | 303 | unsigned int i; |
303 | static const char ext[] = STRIP_EXTENSION; | 304 | static const char ext[] = STRIP_EXTENSION; |
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c new file mode 100644 index 000000000000..293cdfc1b8ca --- /dev/null +++ b/tools/perf/util/probe-finder.c | |||
@@ -0,0 +1,732 @@ | |||
1 | /* | ||
2 | * probe-finder.c : C expression to kprobe event converter | ||
3 | * | ||
4 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <sys/utsname.h> | ||
23 | #include <sys/types.h> | ||
24 | #include <sys/stat.h> | ||
25 | #include <fcntl.h> | ||
26 | #include <errno.h> | ||
27 | #include <stdio.h> | ||
28 | #include <unistd.h> | ||
29 | #include <getopt.h> | ||
30 | #include <stdlib.h> | ||
31 | #include <string.h> | ||
32 | #include <stdarg.h> | ||
33 | #include <ctype.h> | ||
34 | |||
35 | #include "event.h" | ||
36 | #include "debug.h" | ||
37 | #include "util.h" | ||
38 | #include "probe-finder.h" | ||
39 | |||
40 | |||
41 | /* Dwarf_Die Linkage to parent Die */ | ||
42 | struct die_link { | ||
43 | struct die_link *parent; /* Parent die */ | ||
44 | Dwarf_Die die; /* Current die */ | ||
45 | }; | ||
46 | |||
47 | static Dwarf_Debug __dw_debug; | ||
48 | static Dwarf_Error __dw_error; | ||
49 | |||
50 | /* | ||
51 | * Generic dwarf analysis helpers | ||
52 | */ | ||
53 | |||
54 | #define X86_32_MAX_REGS 8 | ||
55 | const char *x86_32_regs_table[X86_32_MAX_REGS] = { | ||
56 | "%ax", | ||
57 | "%cx", | ||
58 | "%dx", | ||
59 | "%bx", | ||
60 | "$stack", /* Stack address instead of %sp */ | ||
61 | "%bp", | ||
62 | "%si", | ||
63 | "%di", | ||
64 | }; | ||
65 | |||
66 | #define X86_64_MAX_REGS 16 | ||
67 | const char *x86_64_regs_table[X86_64_MAX_REGS] = { | ||
68 | "%ax", | ||
69 | "%dx", | ||
70 | "%cx", | ||
71 | "%bx", | ||
72 | "%si", | ||
73 | "%di", | ||
74 | "%bp", | ||
75 | "%sp", | ||
76 | "%r8", | ||
77 | "%r9", | ||
78 | "%r10", | ||
79 | "%r11", | ||
80 | "%r12", | ||
81 | "%r13", | ||
82 | "%r14", | ||
83 | "%r15", | ||
84 | }; | ||
85 | |||
86 | /* TODO: switching by dwarf address size */ | ||
87 | #ifdef __x86_64__ | ||
88 | #define ARCH_MAX_REGS X86_64_MAX_REGS | ||
89 | #define arch_regs_table x86_64_regs_table | ||
90 | #else | ||
91 | #define ARCH_MAX_REGS X86_32_MAX_REGS | ||
92 | #define arch_regs_table x86_32_regs_table | ||
93 | #endif | ||
94 | |||
95 | /* Return architecture dependent register string (for kprobe-tracer) */ | ||
96 | static const char *get_arch_regstr(unsigned int n) | ||
97 | { | ||
98 | return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Compare the tail of two strings. | ||
103 | * Return 0 if whole of either string is same as another's tail part. | ||
104 | */ | ||
105 | static int strtailcmp(const char *s1, const char *s2) | ||
106 | { | ||
107 | int i1 = strlen(s1); | ||
108 | int i2 = strlen(s2); | ||
109 | while (--i1 > 0 && --i2 > 0) { | ||
110 | if (s1[i1] != s2[i2]) | ||
111 | return s1[i1] - s2[i2]; | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* Find the fileno of the target file. */ | ||
117 | static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname) | ||
118 | { | ||
119 | Dwarf_Signed cnt, i; | ||
120 | Dwarf_Unsigned found = 0; | ||
121 | char **srcs; | ||
122 | int ret; | ||
123 | |||
124 | if (!fname) | ||
125 | return 0; | ||
126 | |||
127 | ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error); | ||
128 | if (ret == DW_DLV_OK) { | ||
129 | for (i = 0; i < cnt && !found; i++) { | ||
130 | if (strtailcmp(srcs[i], fname) == 0) | ||
131 | found = i + 1; | ||
132 | dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); | ||
133 | } | ||
134 | for (; i < cnt; i++) | ||
135 | dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); | ||
136 | dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); | ||
137 | } | ||
138 | if (found) | ||
139 | pr_debug("found fno: %d\n", (int)found); | ||
140 | return found; | ||
141 | } | ||
142 | |||
143 | /* Compare diename and tname */ | ||
144 | static int die_compare_name(Dwarf_Die dw_die, const char *tname) | ||
145 | { | ||
146 | char *name; | ||
147 | int ret; | ||
148 | ret = dwarf_diename(dw_die, &name, &__dw_error); | ||
149 | DIE_IF(ret == DW_DLV_ERROR); | ||
150 | if (ret == DW_DLV_OK) { | ||
151 | ret = strcmp(tname, name); | ||
152 | dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); | ||
153 | } else | ||
154 | ret = -1; | ||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | /* Check the address is in the subprogram(function). */ | ||
159 | static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr, | ||
160 | Dwarf_Signed *offs) | ||
161 | { | ||
162 | Dwarf_Addr lopc, hipc; | ||
163 | int ret; | ||
164 | |||
165 | /* TODO: check ranges */ | ||
166 | ret = dwarf_lowpc(sp_die, &lopc, &__dw_error); | ||
167 | DIE_IF(ret == DW_DLV_ERROR); | ||
168 | if (ret == DW_DLV_NO_ENTRY) | ||
169 | return 0; | ||
170 | ret = dwarf_highpc(sp_die, &hipc, &__dw_error); | ||
171 | DIE_IF(ret != DW_DLV_OK); | ||
172 | if (lopc <= addr && addr < hipc) { | ||
173 | *offs = addr - lopc; | ||
174 | return 1; | ||
175 | } else | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | /* Check the die is inlined function */ | ||
180 | static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die) | ||
181 | { | ||
182 | /* TODO: check strictly */ | ||
183 | Dwarf_Bool inl; | ||
184 | int ret; | ||
185 | |||
186 | ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error); | ||
187 | DIE_IF(ret == DW_DLV_ERROR); | ||
188 | return inl; | ||
189 | } | ||
190 | |||
191 | /* Get the offset of abstruct_origin */ | ||
192 | static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die) | ||
193 | { | ||
194 | Dwarf_Attribute attr; | ||
195 | Dwarf_Off cu_offs; | ||
196 | int ret; | ||
197 | |||
198 | ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error); | ||
199 | DIE_IF(ret != DW_DLV_OK); | ||
200 | ret = dwarf_formref(attr, &cu_offs, &__dw_error); | ||
201 | DIE_IF(ret != DW_DLV_OK); | ||
202 | dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); | ||
203 | return cu_offs; | ||
204 | } | ||
205 | |||
206 | /* Get entry pc(or low pc, 1st entry of ranges) of the die */ | ||
207 | static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die) | ||
208 | { | ||
209 | Dwarf_Attribute attr; | ||
210 | Dwarf_Addr addr; | ||
211 | Dwarf_Off offs; | ||
212 | Dwarf_Ranges *ranges; | ||
213 | Dwarf_Signed cnt; | ||
214 | int ret; | ||
215 | |||
216 | /* Try to get entry pc */ | ||
217 | ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error); | ||
218 | DIE_IF(ret == DW_DLV_ERROR); | ||
219 | if (ret == DW_DLV_OK) { | ||
220 | ret = dwarf_formaddr(attr, &addr, &__dw_error); | ||
221 | DIE_IF(ret != DW_DLV_OK); | ||
222 | dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); | ||
223 | return addr; | ||
224 | } | ||
225 | |||
226 | /* Try to get low pc */ | ||
227 | ret = dwarf_lowpc(dw_die, &addr, &__dw_error); | ||
228 | DIE_IF(ret == DW_DLV_ERROR); | ||
229 | if (ret == DW_DLV_OK) | ||
230 | return addr; | ||
231 | |||
232 | /* Try to get ranges */ | ||
233 | ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error); | ||
234 | DIE_IF(ret != DW_DLV_OK); | ||
235 | ret = dwarf_formref(attr, &offs, &__dw_error); | ||
236 | DIE_IF(ret != DW_DLV_OK); | ||
237 | ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL, | ||
238 | &__dw_error); | ||
239 | DIE_IF(ret != DW_DLV_OK); | ||
240 | addr = ranges[0].dwr_addr1; | ||
241 | dwarf_ranges_dealloc(__dw_debug, ranges, cnt); | ||
242 | return addr; | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Search a Die from Die tree. | ||
247 | * Note: cur_link->die should be deallocated in this function. | ||
248 | */ | ||
249 | static int __search_die_tree(struct die_link *cur_link, | ||
250 | int (*die_cb)(struct die_link *, void *), | ||
251 | void *data) | ||
252 | { | ||
253 | Dwarf_Die new_die; | ||
254 | struct die_link new_link; | ||
255 | int ret; | ||
256 | |||
257 | if (!die_cb) | ||
258 | return 0; | ||
259 | |||
260 | /* Check current die */ | ||
261 | while (!(ret = die_cb(cur_link, data))) { | ||
262 | /* Check child die */ | ||
263 | ret = dwarf_child(cur_link->die, &new_die, &__dw_error); | ||
264 | DIE_IF(ret == DW_DLV_ERROR); | ||
265 | if (ret == DW_DLV_OK) { | ||
266 | new_link.parent = cur_link; | ||
267 | new_link.die = new_die; | ||
268 | ret = __search_die_tree(&new_link, die_cb, data); | ||
269 | if (ret) | ||
270 | break; | ||
271 | } | ||
272 | |||
273 | /* Move to next sibling */ | ||
274 | ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die, | ||
275 | &__dw_error); | ||
276 | DIE_IF(ret == DW_DLV_ERROR); | ||
277 | dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); | ||
278 | cur_link->die = new_die; | ||
279 | if (ret == DW_DLV_NO_ENTRY) | ||
280 | return 0; | ||
281 | } | ||
282 | dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); | ||
283 | return ret; | ||
284 | } | ||
285 | |||
286 | /* Search a die in its children's die tree */ | ||
287 | static int search_die_from_children(Dwarf_Die parent_die, | ||
288 | int (*die_cb)(struct die_link *, void *), | ||
289 | void *data) | ||
290 | { | ||
291 | struct die_link new_link; | ||
292 | int ret; | ||
293 | |||
294 | new_link.parent = NULL; | ||
295 | ret = dwarf_child(parent_die, &new_link.die, &__dw_error); | ||
296 | DIE_IF(ret == DW_DLV_ERROR); | ||
297 | if (ret == DW_DLV_OK) | ||
298 | return __search_die_tree(&new_link, die_cb, data); | ||
299 | else | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | /* Find a locdesc corresponding to the address */ | ||
304 | static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, | ||
305 | Dwarf_Addr addr) | ||
306 | { | ||
307 | Dwarf_Signed lcnt; | ||
308 | Dwarf_Locdesc **llbuf; | ||
309 | int ret, i; | ||
310 | |||
311 | ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error); | ||
312 | DIE_IF(ret != DW_DLV_OK); | ||
313 | ret = DW_DLV_NO_ENTRY; | ||
314 | for (i = 0; i < lcnt; ++i) { | ||
315 | if (llbuf[i]->ld_lopc <= addr && | ||
316 | llbuf[i]->ld_hipc > addr) { | ||
317 | memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc)); | ||
318 | desc->ld_s = | ||
319 | malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); | ||
320 | DIE_IF(desc->ld_s == NULL); | ||
321 | memcpy(desc->ld_s, llbuf[i]->ld_s, | ||
322 | sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); | ||
323 | ret = DW_DLV_OK; | ||
324 | break; | ||
325 | } | ||
326 | dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); | ||
327 | dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); | ||
328 | } | ||
329 | /* Releasing loop */ | ||
330 | for (; i < lcnt; ++i) { | ||
331 | dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); | ||
332 | dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); | ||
333 | } | ||
334 | dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST); | ||
335 | return ret; | ||
336 | } | ||
337 | |||
338 | /* Get decl_file attribute value (file number) */ | ||
339 | static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die) | ||
340 | { | ||
341 | Dwarf_Attribute attr; | ||
342 | Dwarf_Unsigned fno; | ||
343 | int ret; | ||
344 | |||
345 | ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error); | ||
346 | DIE_IF(ret != DW_DLV_OK); | ||
347 | dwarf_formudata(attr, &fno, &__dw_error); | ||
348 | DIE_IF(ret != DW_DLV_OK); | ||
349 | dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); | ||
350 | return fno; | ||
351 | } | ||
352 | |||
353 | /* Get decl_line attribute value (line number) */ | ||
354 | static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die) | ||
355 | { | ||
356 | Dwarf_Attribute attr; | ||
357 | Dwarf_Unsigned lno; | ||
358 | int ret; | ||
359 | |||
360 | ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error); | ||
361 | DIE_IF(ret != DW_DLV_OK); | ||
362 | dwarf_formudata(attr, &lno, &__dw_error); | ||
363 | DIE_IF(ret != DW_DLV_OK); | ||
364 | dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); | ||
365 | return lno; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Probe finder related functions | ||
370 | */ | ||
371 | |||
372 | /* Show a location */ | ||
373 | static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) | ||
374 | { | ||
375 | Dwarf_Small op; | ||
376 | Dwarf_Unsigned regn; | ||
377 | Dwarf_Signed offs; | ||
378 | int deref = 0, ret; | ||
379 | const char *regs; | ||
380 | |||
381 | op = loc->lr_atom; | ||
382 | |||
383 | /* If this is based on frame buffer, set the offset */ | ||
384 | if (op == DW_OP_fbreg) { | ||
385 | deref = 1; | ||
386 | offs = (Dwarf_Signed)loc->lr_number; | ||
387 | op = pf->fbloc.ld_s[0].lr_atom; | ||
388 | loc = &pf->fbloc.ld_s[0]; | ||
389 | } else | ||
390 | offs = 0; | ||
391 | |||
392 | if (op >= DW_OP_breg0 && op <= DW_OP_breg31) { | ||
393 | regn = op - DW_OP_breg0; | ||
394 | offs += (Dwarf_Signed)loc->lr_number; | ||
395 | deref = 1; | ||
396 | } else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) { | ||
397 | regn = op - DW_OP_reg0; | ||
398 | } else if (op == DW_OP_bregx) { | ||
399 | regn = loc->lr_number; | ||
400 | offs += (Dwarf_Signed)loc->lr_number2; | ||
401 | deref = 1; | ||
402 | } else if (op == DW_OP_regx) { | ||
403 | regn = loc->lr_number; | ||
404 | } else | ||
405 | die("Dwarf_OP %d is not supported.\n", op); | ||
406 | |||
407 | regs = get_arch_regstr(regn); | ||
408 | if (!regs) | ||
409 | die("%lld exceeds max register number.\n", regn); | ||
410 | |||
411 | if (deref) | ||
412 | ret = snprintf(pf->buf, pf->len, | ||
413 | " %s=%+lld(%s)", pf->var, offs, regs); | ||
414 | else | ||
415 | ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); | ||
416 | DIE_IF(ret < 0); | ||
417 | DIE_IF(ret >= pf->len); | ||
418 | } | ||
419 | |||
420 | /* Show a variables in kprobe event format */ | ||
421 | static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) | ||
422 | { | ||
423 | Dwarf_Attribute attr; | ||
424 | Dwarf_Locdesc ld; | ||
425 | int ret; | ||
426 | |||
427 | ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error); | ||
428 | if (ret != DW_DLV_OK) | ||
429 | goto error; | ||
430 | ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base)); | ||
431 | if (ret != DW_DLV_OK) | ||
432 | goto error; | ||
433 | /* TODO? */ | ||
434 | DIE_IF(ld.ld_cents != 1); | ||
435 | show_location(&ld.ld_s[0], pf); | ||
436 | free(ld.ld_s); | ||
437 | dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); | ||
438 | return ; | ||
439 | error: | ||
440 | die("Failed to find the location of %s at this address.\n" | ||
441 | " Perhaps, it has been optimized out.\n", pf->var); | ||
442 | } | ||
443 | |||
444 | static int variable_callback(struct die_link *dlink, void *data) | ||
445 | { | ||
446 | struct probe_finder *pf = (struct probe_finder *)data; | ||
447 | Dwarf_Half tag; | ||
448 | int ret; | ||
449 | |||
450 | ret = dwarf_tag(dlink->die, &tag, &__dw_error); | ||
451 | DIE_IF(ret == DW_DLV_ERROR); | ||
452 | if ((tag == DW_TAG_formal_parameter || | ||
453 | tag == DW_TAG_variable) && | ||
454 | (die_compare_name(dlink->die, pf->var) == 0)) { | ||
455 | show_variable(dlink->die, pf); | ||
456 | return 1; | ||
457 | } | ||
458 | /* TODO: Support struct members and arrays */ | ||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | /* Find a variable in a subprogram die */ | ||
463 | static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) | ||
464 | { | ||
465 | int ret; | ||
466 | |||
467 | if (!is_c_varname(pf->var)) { | ||
468 | /* Output raw parameters */ | ||
469 | ret = snprintf(pf->buf, pf->len, " %s", pf->var); | ||
470 | DIE_IF(ret < 0); | ||
471 | DIE_IF(ret >= pf->len); | ||
472 | return ; | ||
473 | } | ||
474 | |||
475 | pr_debug("Searching '%s' variable in context.\n", pf->var); | ||
476 | /* Search child die for local variables and parameters. */ | ||
477 | ret = search_die_from_children(sp_die, variable_callback, pf); | ||
478 | if (!ret) | ||
479 | die("Failed to find '%s' in this function.\n", pf->var); | ||
480 | } | ||
481 | |||
482 | /* Get a frame base on the address */ | ||
483 | static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf) | ||
484 | { | ||
485 | Dwarf_Attribute attr; | ||
486 | int ret; | ||
487 | |||
488 | ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error); | ||
489 | DIE_IF(ret != DW_DLV_OK); | ||
490 | ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base)); | ||
491 | DIE_IF(ret != DW_DLV_OK); | ||
492 | dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); | ||
493 | } | ||
494 | |||
495 | static void free_current_frame_base(struct probe_finder *pf) | ||
496 | { | ||
497 | free(pf->fbloc.ld_s); | ||
498 | memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc)); | ||
499 | } | ||
500 | |||
501 | /* Show a probe point to output buffer */ | ||
502 | static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, | ||
503 | struct probe_finder *pf) | ||
504 | { | ||
505 | struct probe_point *pp = pf->pp; | ||
506 | char *name; | ||
507 | char tmp[MAX_PROBE_BUFFER]; | ||
508 | int ret, i, len; | ||
509 | |||
510 | /* Output name of probe point */ | ||
511 | ret = dwarf_diename(sp_die, &name, &__dw_error); | ||
512 | DIE_IF(ret == DW_DLV_ERROR); | ||
513 | if (ret == DW_DLV_OK) { | ||
514 | ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name, | ||
515 | (unsigned int)offs); | ||
516 | /* Copy the function name if possible */ | ||
517 | if (!pp->function) { | ||
518 | pp->function = strdup(name); | ||
519 | pp->offset = offs; | ||
520 | } | ||
521 | dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); | ||
522 | } else { | ||
523 | /* This function has no name. */ | ||
524 | ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr); | ||
525 | if (!pp->function) { | ||
526 | /* TODO: Use _stext */ | ||
527 | pp->function = strdup(""); | ||
528 | pp->offset = (int)pf->addr; | ||
529 | } | ||
530 | } | ||
531 | DIE_IF(ret < 0); | ||
532 | DIE_IF(ret >= MAX_PROBE_BUFFER); | ||
533 | len = ret; | ||
534 | pr_debug("Probe point found: %s\n", tmp); | ||
535 | |||
536 | /* Find each argument */ | ||
537 | get_current_frame_base(sp_die, pf); | ||
538 | for (i = 0; i < pp->nr_args; i++) { | ||
539 | pf->var = pp->args[i]; | ||
540 | pf->buf = &tmp[len]; | ||
541 | pf->len = MAX_PROBE_BUFFER - len; | ||
542 | find_variable(sp_die, pf); | ||
543 | len += strlen(pf->buf); | ||
544 | } | ||
545 | free_current_frame_base(pf); | ||
546 | |||
547 | pp->probes[pp->found] = strdup(tmp); | ||
548 | pp->found++; | ||
549 | } | ||
550 | |||
551 | static int probeaddr_callback(struct die_link *dlink, void *data) | ||
552 | { | ||
553 | struct probe_finder *pf = (struct probe_finder *)data; | ||
554 | Dwarf_Half tag; | ||
555 | Dwarf_Signed offs; | ||
556 | int ret; | ||
557 | |||
558 | ret = dwarf_tag(dlink->die, &tag, &__dw_error); | ||
559 | DIE_IF(ret == DW_DLV_ERROR); | ||
560 | /* Check the address is in this subprogram */ | ||
561 | if (tag == DW_TAG_subprogram && | ||
562 | die_within_subprogram(dlink->die, pf->addr, &offs)) { | ||
563 | show_probepoint(dlink->die, offs, pf); | ||
564 | return 1; | ||
565 | } | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | /* Find probe point from its line number */ | ||
570 | static void find_by_line(struct probe_finder *pf) | ||
571 | { | ||
572 | Dwarf_Signed cnt, i, clm; | ||
573 | Dwarf_Line *lines; | ||
574 | Dwarf_Unsigned lineno = 0; | ||
575 | Dwarf_Addr addr; | ||
576 | Dwarf_Unsigned fno; | ||
577 | int ret; | ||
578 | |||
579 | ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error); | ||
580 | DIE_IF(ret != DW_DLV_OK); | ||
581 | |||
582 | for (i = 0; i < cnt; i++) { | ||
583 | ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error); | ||
584 | DIE_IF(ret != DW_DLV_OK); | ||
585 | if (fno != pf->fno) | ||
586 | continue; | ||
587 | |||
588 | ret = dwarf_lineno(lines[i], &lineno, &__dw_error); | ||
589 | DIE_IF(ret != DW_DLV_OK); | ||
590 | if (lineno != pf->lno) | ||
591 | continue; | ||
592 | |||
593 | ret = dwarf_lineoff(lines[i], &clm, &__dw_error); | ||
594 | DIE_IF(ret != DW_DLV_OK); | ||
595 | |||
596 | ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); | ||
597 | DIE_IF(ret != DW_DLV_OK); | ||
598 | pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n", | ||
599 | (int)i, (unsigned)lineno, (int)clm, addr); | ||
600 | pf->addr = addr; | ||
601 | /* Search a real subprogram including this line, */ | ||
602 | ret = search_die_from_children(pf->cu_die, | ||
603 | probeaddr_callback, pf); | ||
604 | if (ret == 0) | ||
605 | die("Probe point is not found in subprograms.\n"); | ||
606 | /* Continuing, because target line might be inlined. */ | ||
607 | } | ||
608 | dwarf_srclines_dealloc(__dw_debug, lines, cnt); | ||
609 | } | ||
610 | |||
611 | /* Search function from function name */ | ||
612 | static int probefunc_callback(struct die_link *dlink, void *data) | ||
613 | { | ||
614 | struct probe_finder *pf = (struct probe_finder *)data; | ||
615 | struct probe_point *pp = pf->pp; | ||
616 | struct die_link *lk; | ||
617 | Dwarf_Signed offs; | ||
618 | Dwarf_Half tag; | ||
619 | int ret; | ||
620 | |||
621 | ret = dwarf_tag(dlink->die, &tag, &__dw_error); | ||
622 | DIE_IF(ret == DW_DLV_ERROR); | ||
623 | if (tag == DW_TAG_subprogram) { | ||
624 | if (die_compare_name(dlink->die, pp->function) == 0) { | ||
625 | if (pp->line) { /* Function relative line */ | ||
626 | pf->fno = die_get_decl_file(dlink->die); | ||
627 | pf->lno = die_get_decl_line(dlink->die) | ||
628 | + pp->line; | ||
629 | find_by_line(pf); | ||
630 | return 1; | ||
631 | } | ||
632 | if (die_inlined_subprogram(dlink->die)) { | ||
633 | /* Inlined function, save it. */ | ||
634 | ret = dwarf_die_CU_offset(dlink->die, | ||
635 | &pf->inl_offs, | ||
636 | &__dw_error); | ||
637 | DIE_IF(ret != DW_DLV_OK); | ||
638 | pr_debug("inline definition offset %lld\n", | ||
639 | pf->inl_offs); | ||
640 | return 0; /* Continue to search */ | ||
641 | } | ||
642 | /* Get probe address */ | ||
643 | pf->addr = die_get_entrypc(dlink->die); | ||
644 | pf->addr += pp->offset; | ||
645 | /* TODO: Check the address in this function */ | ||
646 | show_probepoint(dlink->die, pp->offset, pf); | ||
647 | return 1; /* Exit; no same symbol in this CU. */ | ||
648 | } | ||
649 | } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) { | ||
650 | if (die_get_abstract_origin(dlink->die) == pf->inl_offs) { | ||
651 | /* Get probe address */ | ||
652 | pf->addr = die_get_entrypc(dlink->die); | ||
653 | pf->addr += pp->offset; | ||
654 | pr_debug("found inline addr: 0x%llx\n", pf->addr); | ||
655 | /* Inlined function. Get a real subprogram */ | ||
656 | for (lk = dlink->parent; lk != NULL; lk = lk->parent) { | ||
657 | tag = 0; | ||
658 | dwarf_tag(lk->die, &tag, &__dw_error); | ||
659 | DIE_IF(ret == DW_DLV_ERROR); | ||
660 | if (tag == DW_TAG_subprogram && | ||
661 | !die_inlined_subprogram(lk->die)) | ||
662 | goto found; | ||
663 | } | ||
664 | die("Failed to find real subprogram.\n"); | ||
665 | found: | ||
666 | /* Get offset from subprogram */ | ||
667 | ret = die_within_subprogram(lk->die, pf->addr, &offs); | ||
668 | DIE_IF(!ret); | ||
669 | show_probepoint(lk->die, offs, pf); | ||
670 | /* Continue to search */ | ||
671 | } | ||
672 | } | ||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | static void find_by_func(struct probe_finder *pf) | ||
677 | { | ||
678 | search_die_from_children(pf->cu_die, probefunc_callback, pf); | ||
679 | } | ||
680 | |||
681 | /* Find a probe point */ | ||
682 | int find_probepoint(int fd, struct probe_point *pp) | ||
683 | { | ||
684 | Dwarf_Half addr_size = 0; | ||
685 | Dwarf_Unsigned next_cuh = 0; | ||
686 | int cu_number = 0, ret; | ||
687 | struct probe_finder pf = {.pp = pp}; | ||
688 | |||
689 | ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); | ||
690 | if (ret != DW_DLV_OK) { | ||
691 | pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n"); | ||
692 | return -ENOENT; | ||
693 | } | ||
694 | |||
695 | pp->found = 0; | ||
696 | while (++cu_number) { | ||
697 | /* Search CU (Compilation Unit) */ | ||
698 | ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL, | ||
699 | &addr_size, &next_cuh, &__dw_error); | ||
700 | DIE_IF(ret == DW_DLV_ERROR); | ||
701 | if (ret == DW_DLV_NO_ENTRY) | ||
702 | break; | ||
703 | |||
704 | /* Get the DIE(Debugging Information Entry) of this CU */ | ||
705 | ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error); | ||
706 | DIE_IF(ret != DW_DLV_OK); | ||
707 | |||
708 | /* Check if target file is included. */ | ||
709 | if (pp->file) | ||
710 | pf.fno = cu_find_fileno(pf.cu_die, pp->file); | ||
711 | |||
712 | if (!pp->file || pf.fno) { | ||
713 | /* Save CU base address (for frame_base) */ | ||
714 | ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error); | ||
715 | DIE_IF(ret == DW_DLV_ERROR); | ||
716 | if (ret == DW_DLV_NO_ENTRY) | ||
717 | pf.cu_base = 0; | ||
718 | if (pp->function) | ||
719 | find_by_func(&pf); | ||
720 | else { | ||
721 | pf.lno = pp->line; | ||
722 | find_by_line(&pf); | ||
723 | } | ||
724 | } | ||
725 | dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE); | ||
726 | } | ||
727 | ret = dwarf_finish(__dw_debug, &__dw_error); | ||
728 | DIE_IF(ret != DW_DLV_OK); | ||
729 | |||
730 | return pp->found; | ||
731 | } | ||
732 | |||
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h new file mode 100644 index 000000000000..bdebca6697d2 --- /dev/null +++ b/tools/perf/util/probe-finder.h | |||
@@ -0,0 +1,57 @@ | |||
1 | #ifndef _PROBE_FINDER_H | ||
2 | #define _PROBE_FINDER_H | ||
3 | |||
4 | #define MAX_PATH_LEN 256 | ||
5 | #define MAX_PROBE_BUFFER 1024 | ||
6 | #define MAX_PROBES 128 | ||
7 | |||
8 | static inline int is_c_varname(const char *name) | ||
9 | { | ||
10 | /* TODO */ | ||
11 | return isalpha(name[0]) || name[0] == '_'; | ||
12 | } | ||
13 | |||
14 | struct probe_point { | ||
15 | /* Inputs */ | ||
16 | char *file; /* File name */ | ||
17 | int line; /* Line number */ | ||
18 | |||
19 | char *function; /* Function name */ | ||
20 | int offset; /* Offset bytes */ | ||
21 | |||
22 | int nr_args; /* Number of arguments */ | ||
23 | char **args; /* Arguments */ | ||
24 | |||
25 | int retprobe; /* Return probe */ | ||
26 | |||
27 | /* Output */ | ||
28 | int found; /* Number of found probe points */ | ||
29 | char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ | ||
30 | }; | ||
31 | |||
32 | #ifndef NO_LIBDWARF | ||
33 | extern int find_probepoint(int fd, struct probe_point *pp); | ||
34 | |||
35 | #include <libdwarf/dwarf.h> | ||
36 | #include <libdwarf/libdwarf.h> | ||
37 | |||
38 | struct probe_finder { | ||
39 | struct probe_point *pp; /* Target probe point */ | ||
40 | |||
41 | /* For function searching */ | ||
42 | Dwarf_Addr addr; /* Address */ | ||
43 | Dwarf_Unsigned fno; /* File number */ | ||
44 | Dwarf_Unsigned lno; /* Line number */ | ||
45 | Dwarf_Off inl_offs; /* Inline offset */ | ||
46 | Dwarf_Die cu_die; /* Current CU */ | ||
47 | |||
48 | /* For variable searching */ | ||
49 | Dwarf_Addr cu_base; /* Current CU base address */ | ||
50 | Dwarf_Locdesc fbloc; /* Location of Current Frame Base */ | ||
51 | const char *var; /* Current variable name */ | ||
52 | char *buf; /* Current output buffer */ | ||
53 | int len; /* Length of output buffer */ | ||
54 | }; | ||
55 | #endif /* NO_LIBDWARF */ | ||
56 | |||
57 | #endif /*_PROBE_FINDER_H */ | ||
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7bd5bdaeb235..f2203a0946bc 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -134,6 +134,15 @@ extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, | |||
134 | extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); | 134 | extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); |
135 | extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); | 135 | extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); |
136 | 136 | ||
137 | #include "../../../include/linux/stringify.h" | ||
138 | |||
139 | #define DIE_IF(cnd) \ | ||
140 | do { if (cnd) \ | ||
141 | die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ | ||
142 | __stringify(cnd) "\n"); \ | ||
143 | } while (0) | ||
144 | |||
145 | |||
137 | extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); | 146 | extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); |
138 | 147 | ||
139 | extern int prefixcmp(const char *str, const char *prefix); | 148 | extern int prefixcmp(const char *str, const char *prefix); |