diff options
author | Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> | 2012-03-05 08:32:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-03-06 03:49:48 -0500 |
commit | 86b4ce3156c0dc140907ad03639564000cde694f (patch) | |
tree | b244e8f5ff005c4e186d8094f4aca7051231146a /arch/x86/kernel/kprobes.c | |
parent | 262760894c60f0527b319327918dc41cb4b33df5 (diff) |
x86/kprobes: Fix instruction recovery on optimized path
Current probed-instruction recovery expects that only breakpoint
instruction modifies instruction. However, since kprobes jump
optimization can replace original instructions with a jump,
that expectation is not enough. And it may cause instruction
decoding failure on the function where an optimized probe
already exists.
This bug can reproduce easily as below:
1) find a target function address (any kprobe-able function is OK)
$ grep __secure_computing /proc/kallsyms
ffffffff810c19d0 T __secure_computing
2) decode the function
$ objdump -d vmlinux --start-address=0xffffffff810c19d0 --stop-address=0xffffffff810c19eb
vmlinux: file format elf64-x86-64
Disassembly of section .text:
ffffffff810c19d0 <__secure_computing>:
ffffffff810c19d0: 55 push %rbp
ffffffff810c19d1: 48 89 e5 mov %rsp,%rbp
ffffffff810c19d4: e8 67 8f 72 00 callq
ffffffff817ea940 <mcount>
ffffffff810c19d9: 65 48 8b 04 25 40 b8 mov %gs:0xb840,%rax
ffffffff810c19e0: 00 00
ffffffff810c19e2: 83 b8 88 05 00 00 01 cmpl $0x1,0x588(%rax)
ffffffff810c19e9: 74 05 je ffffffff810c19f0 <__secure_computing+0x20>
3) put a kprobe-event at an optimize-able place, where no
call/jump places within the 5 bytes.
$ su -
# cd /sys/kernel/debug/tracing
# echo p __secure_computing+0x9 > kprobe_events
4) enable it and check it is optimized.
# echo 1 > events/kprobes/p___secure_computing_9/enable
# cat ../kprobes/list
ffffffff810c19d9 k __secure_computing+0x9 [OPTIMIZED]
5) put another kprobe on an instruction after previous probe in
the same function.
# echo p __secure_computing+0x12 >> kprobe_events
bash: echo: write error: Invalid argument
# dmesg | tail -n 1
[ 1666.500016] Probing address(0xffffffff810c19e2) is not an instruction boundary.
6) however, if the kprobes optimization is disabled, it works.
# echo 0 > /proc/sys/debug/kprobes-optimization
# cat ../kprobes/list
ffffffff810c19d9 k __secure_computing+0x9
# echo p __secure_computing+0x12 >> kprobe_events
(no error)
This is because kprobes doesn't recover the instruction
which is overwritten with a relative jump by another kprobe
when finding instruction boundary.
It only recovers the breakpoint instruction.
This patch fixes kprobes to recover such instructions.
With this fix:
# echo p __secure_computing+0x9 > kprobe_events
# echo 1 > events/kprobes/p___secure_computing_9/enable
# cat ../kprobes/list
ffffffff810c1aa9 k __secure_computing+0x9 [OPTIMIZED]
# echo p __secure_computing+0x12 >> kprobe_events
# cat ../kprobes/list
ffffffff810c1aa9 k __secure_computing+0x9 [OPTIMIZED]
ffffffff810c1ab2 k __secure_computing+0x12 [DISABLED]
Changes in v4:
- Fix a bug to ensure optimized probe is really optimized
by jump.
- Remove kprobe_optready() dependency.
- Cleanup code for preparing optprobe separation.
Changes in v3:
- Fix a build error when CONFIG_OPTPROBE=n. (Thanks, Ingo!)
To fix the error, split optprobe instruction recovering
path from kprobes path.
- Cleanup comments/styles.
Changes in v2:
- Fix a bug to recover original instruction address in
RIP-relative instruction fixup.
- Moved on tip/master.
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: systemtap@sourceware.org
Cc: anderson@redhat.com
Link: http://lkml.kernel.org/r/20120305133209.5982.36568.stgit@localhost.localdomain
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/kprobes.c')
-rw-r--r-- | arch/x86/kernel/kprobes.c | 140 |
1 files changed, 97 insertions, 43 deletions
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64c..6bec22f514b5 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -207,13 +207,15 @@ retry: | |||
207 | } | 207 | } |
208 | } | 208 | } |
209 | 209 | ||
210 | /* Recover the probed instruction at addr for further analysis. */ | 210 | static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, |
211 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 211 | unsigned long addr) |
212 | { | 212 | { |
213 | struct kprobe *kp; | 213 | struct kprobe *kp; |
214 | |||
214 | kp = get_kprobe((void *)addr); | 215 | kp = get_kprobe((void *)addr); |
216 | /* There is no probe, return original address */ | ||
215 | if (!kp) | 217 | if (!kp) |
216 | return -EINVAL; | 218 | return addr; |
217 | 219 | ||
218 | /* | 220 | /* |
219 | * Basically, kp->ainsn.insn has an original instruction. | 221 | * Basically, kp->ainsn.insn has an original instruction. |
@@ -230,14 +232,76 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
230 | */ | 232 | */ |
231 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 233 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
232 | buf[0] = kp->opcode; | 234 | buf[0] = kp->opcode; |
233 | return 0; | 235 | return (unsigned long)buf; |
236 | } | ||
237 | |||
238 | #ifdef CONFIG_OPTPROBES | ||
239 | static unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, | ||
240 | unsigned long addr) | ||
241 | { | ||
242 | struct optimized_kprobe *op; | ||
243 | struct kprobe *kp; | ||
244 | long offs; | ||
245 | int i; | ||
246 | |||
247 | for (i = 0; i < RELATIVEJUMP_SIZE; i++) { | ||
248 | kp = get_kprobe((void *)addr - i); | ||
249 | /* This function only handles jump-optimized kprobe */ | ||
250 | if (kp && kprobe_optimized(kp)) { | ||
251 | op = container_of(kp, struct optimized_kprobe, kp); | ||
252 | /* If op->list is not empty, op is under optimizing */ | ||
253 | if (list_empty(&op->list)) | ||
254 | goto found; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | return addr; | ||
259 | found: | ||
260 | /* | ||
261 | * If the kprobe can be optimized, original bytes which can be | ||
262 | * overwritten by jump destination address. In this case, original | ||
263 | * bytes must be recovered from op->optinsn.copied_insn buffer. | ||
264 | */ | ||
265 | memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
266 | if (addr == (unsigned long)kp->addr) { | ||
267 | buf[0] = kp->opcode; | ||
268 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
269 | } else { | ||
270 | offs = addr - (unsigned long)kp->addr - 1; | ||
271 | memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); | ||
272 | } | ||
273 | |||
274 | return (unsigned long)buf; | ||
275 | } | ||
276 | #else | ||
277 | static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, | ||
278 | unsigned long addr) | ||
279 | { | ||
280 | return addr; | ||
281 | } | ||
282 | #endif | ||
283 | |||
284 | /* | ||
285 | * Recover the probed instruction at addr for further analysis. | ||
286 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | ||
287 | * for preventing to release referencing kprobes. | ||
288 | */ | ||
289 | static unsigned long recover_probed_instruction(kprobe_opcode_t *buf, | ||
290 | unsigned long addr) | ||
291 | { | ||
292 | unsigned long __addr; | ||
293 | |||
294 | __addr = __recover_optprobed_insn(buf, addr); | ||
295 | if (__addr != addr) | ||
296 | return __addr; | ||
297 | |||
298 | return __recover_probed_insn(buf, addr); | ||
234 | } | 299 | } |
235 | 300 | ||
236 | /* Check if paddr is at an instruction boundary */ | 301 | /* Check if paddr is at an instruction boundary */ |
237 | static int __kprobes can_probe(unsigned long paddr) | 302 | static int __kprobes can_probe(unsigned long paddr) |
238 | { | 303 | { |
239 | int ret; | 304 | unsigned long addr, __addr, offset = 0; |
240 | unsigned long addr, offset = 0; | ||
241 | struct insn insn; | 305 | struct insn insn; |
242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 306 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
243 | 307 | ||
@@ -247,26 +311,24 @@ static int __kprobes can_probe(unsigned long paddr) | |||
247 | /* Decode instructions */ | 311 | /* Decode instructions */ |
248 | addr = paddr - offset; | 312 | addr = paddr - offset; |
249 | while (addr < paddr) { | 313 | while (addr < paddr) { |
250 | kernel_insn_init(&insn, (void *)addr); | ||
251 | insn_get_opcode(&insn); | ||
252 | |||
253 | /* | 314 | /* |
254 | * Check if the instruction has been modified by another | 315 | * Check if the instruction has been modified by another |
255 | * kprobe, in which case we replace the breakpoint by the | 316 | * kprobe, in which case we replace the breakpoint by the |
256 | * original instruction in our buffer. | 317 | * original instruction in our buffer. |
318 | * Also, jump optimization will change the breakpoint to | ||
319 | * relative-jump. Since the relative-jump itself is | ||
320 | * normally used, we just go through if there is no kprobe. | ||
257 | */ | 321 | */ |
258 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | 322 | __addr = recover_probed_instruction(buf, addr); |
259 | ret = recover_probed_instruction(buf, addr); | 323 | kernel_insn_init(&insn, (void *)__addr); |
260 | if (ret) | ||
261 | /* | ||
262 | * Another debugging subsystem might insert | ||
263 | * this breakpoint. In that case, we can't | ||
264 | * recover it. | ||
265 | */ | ||
266 | return 0; | ||
267 | kernel_insn_init(&insn, buf); | ||
268 | } | ||
269 | insn_get_length(&insn); | 324 | insn_get_length(&insn); |
325 | |||
326 | /* | ||
327 | * Another debugging subsystem might insert this breakpoint. | ||
328 | * In that case, we can't recover it. | ||
329 | */ | ||
330 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
331 | return 0; | ||
270 | addr += insn.length; | 332 | addr += insn.length; |
271 | } | 333 | } |
272 | 334 | ||
@@ -302,21 +364,17 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
302 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | 364 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) |
303 | { | 365 | { |
304 | struct insn insn; | 366 | struct insn insn; |
305 | int ret; | ||
306 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 367 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
368 | u8 *orig_src = src; /* Back up original src for RIP calculation */ | ||
369 | |||
370 | if (recover) | ||
371 | src = (u8 *)recover_probed_instruction(buf, (unsigned long)src); | ||
307 | 372 | ||
308 | kernel_insn_init(&insn, src); | 373 | kernel_insn_init(&insn, src); |
309 | if (recover) { | ||
310 | insn_get_opcode(&insn); | ||
311 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
312 | ret = recover_probed_instruction(buf, | ||
313 | (unsigned long)src); | ||
314 | if (ret) | ||
315 | return 0; | ||
316 | kernel_insn_init(&insn, buf); | ||
317 | } | ||
318 | } | ||
319 | insn_get_length(&insn); | 374 | insn_get_length(&insn); |
375 | /* Another subsystem puts a breakpoint, failed to recover */ | ||
376 | if (recover && insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
377 | return 0; | ||
320 | memcpy(dest, insn.kaddr, insn.length); | 378 | memcpy(dest, insn.kaddr, insn.length); |
321 | 379 | ||
322 | #ifdef CONFIG_X86_64 | 380 | #ifdef CONFIG_X86_64 |
@@ -337,8 +395,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
337 | * extension of the original signed 32-bit displacement would | 395 | * extension of the original signed 32-bit displacement would |
338 | * have given. | 396 | * have given. |
339 | */ | 397 | */ |
340 | newdisp = (u8 *) src + (s64) insn.displacement.value - | 398 | newdisp = (u8 *) orig_src + (s64) insn.displacement.value - (u8 *) dest; |
341 | (u8 *) dest; | ||
342 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 399 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
343 | disp = (u8 *) dest + insn_offset_displacement(&insn); | 400 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
344 | *(s32 *) disp = (s32) newdisp; | 401 | *(s32 *) disp = (s32) newdisp; |
@@ -1271,8 +1328,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | |||
1271 | /* Decode whole function to ensure any instructions don't jump into target */ | 1328 | /* Decode whole function to ensure any instructions don't jump into target */ |
1272 | static int __kprobes can_optimize(unsigned long paddr) | 1329 | static int __kprobes can_optimize(unsigned long paddr) |
1273 | { | 1330 | { |
1274 | int ret; | 1331 | unsigned long addr, __addr, size = 0, offset = 0; |
1275 | unsigned long addr, size = 0, offset = 0; | ||
1276 | struct insn insn; | 1332 | struct insn insn; |
1277 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 1333 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
1278 | 1334 | ||
@@ -1301,15 +1357,12 @@ static int __kprobes can_optimize(unsigned long paddr) | |||
1301 | * we can't optimize kprobe in this function. | 1357 | * we can't optimize kprobe in this function. |
1302 | */ | 1358 | */ |
1303 | return 0; | 1359 | return 0; |
1304 | kernel_insn_init(&insn, (void *)addr); | 1360 | __addr = recover_probed_instruction(buf, addr); |
1305 | insn_get_opcode(&insn); | 1361 | kernel_insn_init(&insn, (void *)__addr); |
1306 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1307 | ret = recover_probed_instruction(buf, addr); | ||
1308 | if (ret) | ||
1309 | return 0; | ||
1310 | kernel_insn_init(&insn, buf); | ||
1311 | } | ||
1312 | insn_get_length(&insn); | 1362 | insn_get_length(&insn); |
1363 | /* Another subsystem puts a breakpoint */ | ||
1364 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
1365 | return 0; | ||
1313 | /* Recover address */ | 1366 | /* Recover address */ |
1314 | insn.kaddr = (void *)addr; | 1367 | insn.kaddr = (void *)addr; |
1315 | insn.next_byte = (void *)(addr + insn.length); | 1368 | insn.next_byte = (void *)(addr + insn.length); |
@@ -1366,6 +1419,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | |||
1366 | /* | 1419 | /* |
1367 | * Copy replacing target instructions | 1420 | * Copy replacing target instructions |
1368 | * Target instructions MUST be relocatable (checked inside) | 1421 | * Target instructions MUST be relocatable (checked inside) |
1422 | * This is called when new aggr(opt)probe is allocated or reused. | ||
1369 | */ | 1423 | */ |
1370 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | 1424 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) |
1371 | { | 1425 | { |