aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/kprobes.c
diff options
context:
space:
mode:
authorMasami Hiramatsu <masami.hiramatsu.pt@hitachi.com>2012-03-05 08:32:09 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-06 03:49:48 -0500
commit86b4ce3156c0dc140907ad03639564000cde694f (patch)
treeb244e8f5ff005c4e186d8094f4aca7051231146a /arch/x86/kernel/kprobes.c
parent262760894c60f0527b319327918dc41cb4b33df5 (diff)
x86/kprobes: Fix instruction recovery on optimized path
Current probed-instruction recovery expects that only breakpoint instruction modifies instruction. However, since kprobes jump optimization can replace original instructions with a jump, that expectation is not enough. And it may cause instruction decoding failure on the function where an optimized probe already exists. This bug can reproduce easily as below: 1) find a target function address (any kprobe-able function is OK) $ grep __secure_computing /proc/kallsyms ffffffff810c19d0 T __secure_computing 2) decode the function $ objdump -d vmlinux --start-address=0xffffffff810c19d0 --stop-address=0xffffffff810c19eb vmlinux: file format elf64-x86-64 Disassembly of section .text: ffffffff810c19d0 <__secure_computing>: ffffffff810c19d0: 55 push %rbp ffffffff810c19d1: 48 89 e5 mov %rsp,%rbp ffffffff810c19d4: e8 67 8f 72 00 callq ffffffff817ea940 <mcount> ffffffff810c19d9: 65 48 8b 04 25 40 b8 mov %gs:0xb840,%rax ffffffff810c19e0: 00 00 ffffffff810c19e2: 83 b8 88 05 00 00 01 cmpl $0x1,0x588(%rax) ffffffff810c19e9: 74 05 je ffffffff810c19f0 <__secure_computing+0x20> 3) put a kprobe-event at an optimize-able place, where no call/jump places within the 5 bytes. $ su - # cd /sys/kernel/debug/tracing # echo p __secure_computing+0x9 > kprobe_events 4) enable it and check it is optimized. # echo 1 > events/kprobes/p___secure_computing_9/enable # cat ../kprobes/list ffffffff810c19d9 k __secure_computing+0x9 [OPTIMIZED] 5) put another kprobe on an instruction after previous probe in the same function. # echo p __secure_computing+0x12 >> kprobe_events bash: echo: write error: Invalid argument # dmesg | tail -n 1 [ 1666.500016] Probing address(0xffffffff810c19e2) is not an instruction boundary. 6) however, if the kprobes optimization is disabled, it works. # echo 0 > /proc/sys/debug/kprobes-optimization # cat ../kprobes/list ffffffff810c19d9 k __secure_computing+0x9 # echo p __secure_computing+0x12 >> kprobe_events (no error) This is because kprobes doesn't recover the instruction which is overwritten with a relative jump by another kprobe when finding instruction boundary. It only recovers the breakpoint instruction. This patch fixes kprobes to recover such instructions. With this fix: # echo p __secure_computing+0x9 > kprobe_events # echo 1 > events/kprobes/p___secure_computing_9/enable # cat ../kprobes/list ffffffff810c1aa9 k __secure_computing+0x9 [OPTIMIZED] # echo p __secure_computing+0x12 >> kprobe_events # cat ../kprobes/list ffffffff810c1aa9 k __secure_computing+0x9 [OPTIMIZED] ffffffff810c1ab2 k __secure_computing+0x12 [DISABLED] Changes in v4: - Fix a bug to ensure optimized probe is really optimized by jump. - Remove kprobe_optready() dependency. - Cleanup code for preparing optprobe separation. Changes in v3: - Fix a build error when CONFIG_OPTPROBE=n. (Thanks, Ingo!) To fix the error, split optprobe instruction recovering path from kprobes path. - Cleanup comments/styles. Changes in v2: - Fix a bug to recover original instruction address in RIP-relative instruction fixup. - Moved on tip/master. Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: yrl.pp-manager.tt@hitachi.com Cc: systemtap@sourceware.org Cc: anderson@redhat.com Link: http://lkml.kernel.org/r/20120305133209.5982.36568.stgit@localhost.localdomain Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/kprobes.c')
-rw-r--r--arch/x86/kernel/kprobes.c140
1 files changed, 97 insertions, 43 deletions
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d8b64c..6bec22f514b5 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -207,13 +207,15 @@ retry:
207 } 207 }
208} 208}
209 209
210/* Recover the probed instruction at addr for further analysis. */ 210static unsigned long __recover_probed_insn(kprobe_opcode_t *buf,
211static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) 211 unsigned long addr)
212{ 212{
213 struct kprobe *kp; 213 struct kprobe *kp;
214
214 kp = get_kprobe((void *)addr); 215 kp = get_kprobe((void *)addr);
216 /* There is no probe, return original address */
215 if (!kp) 217 if (!kp)
216 return -EINVAL; 218 return addr;
217 219
218 /* 220 /*
219 * Basically, kp->ainsn.insn has an original instruction. 221 * Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +232,76 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 */ 232 */
231 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 233 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
232 buf[0] = kp->opcode; 234 buf[0] = kp->opcode;
233 return 0; 235 return (unsigned long)buf;
236}
237
238#ifdef CONFIG_OPTPROBES
239static unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf,
240 unsigned long addr)
241{
242 struct optimized_kprobe *op;
243 struct kprobe *kp;
244 long offs;
245 int i;
246
247 for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
248 kp = get_kprobe((void *)addr - i);
249 /* This function only handles jump-optimized kprobe */
250 if (kp && kprobe_optimized(kp)) {
251 op = container_of(kp, struct optimized_kprobe, kp);
252 /* If op->list is not empty, op is under optimizing */
253 if (list_empty(&op->list))
254 goto found;
255 }
256 }
257
258 return addr;
259found:
260 /*
261 * If the kprobe can be optimized, original bytes which can be
262 * overwritten by jump destination address. In this case, original
263 * bytes must be recovered from op->optinsn.copied_insn buffer.
264 */
265 memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
266 if (addr == (unsigned long)kp->addr) {
267 buf[0] = kp->opcode;
268 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
269 } else {
270 offs = addr - (unsigned long)kp->addr - 1;
271 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
272 }
273
274 return (unsigned long)buf;
275}
276#else
277static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf,
278 unsigned long addr)
279{
280 return addr;
281}
282#endif
283
284/*
285 * Recover the probed instruction at addr for further analysis.
286 * Caller must lock kprobes by kprobe_mutex, or disable preemption
287 * for preventing to release referencing kprobes.
288 */
289static unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
290 unsigned long addr)
291{
292 unsigned long __addr;
293
294 __addr = __recover_optprobed_insn(buf, addr);
295 if (__addr != addr)
296 return __addr;
297
298 return __recover_probed_insn(buf, addr);
234} 299}
235 300
236/* Check if paddr is at an instruction boundary */ 301/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 302static int __kprobes can_probe(unsigned long paddr)
238{ 303{
239 int ret; 304 unsigned long addr, __addr, offset = 0;
240 unsigned long addr, offset = 0;
241 struct insn insn; 305 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 306 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 307
@@ -247,26 +311,24 @@ static int __kprobes can_probe(unsigned long paddr)
247 /* Decode instructions */ 311 /* Decode instructions */
248 addr = paddr - offset; 312 addr = paddr - offset;
249 while (addr < paddr) { 313 while (addr < paddr) {
250 kernel_insn_init(&insn, (void *)addr);
251 insn_get_opcode(&insn);
252
253 /* 314 /*
254 * Check if the instruction has been modified by another 315 * Check if the instruction has been modified by another
255 * kprobe, in which case we replace the breakpoint by the 316 * kprobe, in which case we replace the breakpoint by the
256 * original instruction in our buffer. 317 * original instruction in our buffer.
318 * Also, jump optimization will change the breakpoint to
319 * relative-jump. Since the relative-jump itself is
320 * normally used, we just go through if there is no kprobe.
257 */ 321 */
258 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { 322 __addr = recover_probed_instruction(buf, addr);
259 ret = recover_probed_instruction(buf, addr); 323 kernel_insn_init(&insn, (void *)__addr);
260 if (ret)
261 /*
262 * Another debugging subsystem might insert
263 * this breakpoint. In that case, we can't
264 * recover it.
265 */
266 return 0;
267 kernel_insn_init(&insn, buf);
268 }
269 insn_get_length(&insn); 324 insn_get_length(&insn);
325
326 /*
327 * Another debugging subsystem might insert this breakpoint.
328 * In that case, we can't recover it.
329 */
330 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
331 return 0;
270 addr += insn.length; 332 addr += insn.length;
271 } 333 }
272 334
@@ -302,21 +364,17 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
302static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) 364static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
303{ 365{
304 struct insn insn; 366 struct insn insn;
305 int ret;
306 kprobe_opcode_t buf[MAX_INSN_SIZE]; 367 kprobe_opcode_t buf[MAX_INSN_SIZE];
368 u8 *orig_src = src; /* Back up original src for RIP calculation */
369
370 if (recover)
371 src = (u8 *)recover_probed_instruction(buf, (unsigned long)src);
307 372
308 kernel_insn_init(&insn, src); 373 kernel_insn_init(&insn, src);
309 if (recover) {
310 insn_get_opcode(&insn);
311 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
312 ret = recover_probed_instruction(buf,
313 (unsigned long)src);
314 if (ret)
315 return 0;
316 kernel_insn_init(&insn, buf);
317 }
318 }
319 insn_get_length(&insn); 374 insn_get_length(&insn);
375 /* Another subsystem puts a breakpoint, failed to recover */
376 if (recover && insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
377 return 0;
320 memcpy(dest, insn.kaddr, insn.length); 378 memcpy(dest, insn.kaddr, insn.length);
321 379
322#ifdef CONFIG_X86_64 380#ifdef CONFIG_X86_64
@@ -337,8 +395,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
337 * extension of the original signed 32-bit displacement would 395 * extension of the original signed 32-bit displacement would
338 * have given. 396 * have given.
339 */ 397 */
340 newdisp = (u8 *) src + (s64) insn.displacement.value - 398 newdisp = (u8 *) orig_src + (s64) insn.displacement.value - (u8 *) dest;
341 (u8 *) dest;
342 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 399 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
343 disp = (u8 *) dest + insn_offset_displacement(&insn); 400 disp = (u8 *) dest + insn_offset_displacement(&insn);
344 *(s32 *) disp = (s32) newdisp; 401 *(s32 *) disp = (s32) newdisp;
@@ -1271,8 +1328,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1271/* Decode whole function to ensure any instructions don't jump into target */ 1328/* Decode whole function to ensure any instructions don't jump into target */
1272static int __kprobes can_optimize(unsigned long paddr) 1329static int __kprobes can_optimize(unsigned long paddr)
1273{ 1330{
1274 int ret; 1331 unsigned long addr, __addr, size = 0, offset = 0;
1275 unsigned long addr, size = 0, offset = 0;
1276 struct insn insn; 1332 struct insn insn;
1277 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1333 kprobe_opcode_t buf[MAX_INSN_SIZE];
1278 1334
@@ -1301,15 +1357,12 @@ static int __kprobes can_optimize(unsigned long paddr)
1301 * we can't optimize kprobe in this function. 1357 * we can't optimize kprobe in this function.
1302 */ 1358 */
1303 return 0; 1359 return 0;
1304 kernel_insn_init(&insn, (void *)addr); 1360 __addr = recover_probed_instruction(buf, addr);
1305 insn_get_opcode(&insn); 1361 kernel_insn_init(&insn, (void *)__addr);
1306 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1307 ret = recover_probed_instruction(buf, addr);
1308 if (ret)
1309 return 0;
1310 kernel_insn_init(&insn, buf);
1311 }
1312 insn_get_length(&insn); 1362 insn_get_length(&insn);
1363 /* Another subsystem puts a breakpoint */
1364 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
1365 return 0;
1313 /* Recover address */ 1366 /* Recover address */
1314 insn.kaddr = (void *)addr; 1367 insn.kaddr = (void *)addr;
1315 insn.next_byte = (void *)(addr + insn.length); 1368 insn.next_byte = (void *)(addr + insn.length);
@@ -1366,6 +1419,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1366/* 1419/*
1367 * Copy replacing target instructions 1420 * Copy replacing target instructions
1368 * Target instructions MUST be relocatable (checked inside) 1421 * Target instructions MUST be relocatable (checked inside)
1422 * This is called when new aggr(opt)probe is allocated or reused.
1369 */ 1423 */
1370int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) 1424int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1371{ 1425{