diff options
Diffstat (limited to 'arch/x86/kernel/kprobes_64.c')
-rw-r--r-- | arch/x86/kernel/kprobes_64.c | 749 |
1 files changed, 0 insertions, 749 deletions
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c deleted file mode 100644 index 5df19a9f9239..000000000000 --- a/arch/x86/kernel/kprobes_64.c +++ /dev/null | |||
@@ -1,749 +0,0 @@ | |||
1 | /* | ||
2 | * Kernel Probes (KProbes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
19 | * | ||
20 | * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | ||
21 | * Probes initial implementation ( includes contributions from | ||
22 | * Rusty Russell). | ||
23 | * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | ||
24 | * interface to access function arguments. | ||
25 | * 2004-Oct Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi | ||
26 | * <prasanna@in.ibm.com> adapted for x86_64 | ||
27 | * 2005-Mar Roland McGrath <roland@redhat.com> | ||
28 | * Fixed to handle %rip-relative addressing mode correctly. | ||
29 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | ||
30 | * Added function return probes functionality | ||
31 | */ | ||
32 | |||
33 | #include <linux/kprobes.h> | ||
34 | #include <linux/ptrace.h> | ||
35 | #include <linux/string.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/preempt.h> | ||
38 | #include <linux/module.h> | ||
39 | #include <linux/kdebug.h> | ||
40 | |||
41 | #include <asm/pgtable.h> | ||
42 | #include <asm/uaccess.h> | ||
43 | #include <asm/alternative.h> | ||
44 | |||
45 | void jprobe_return_end(void); | ||
46 | static void __kprobes arch_copy_kprobe(struct kprobe *p); | ||
47 | |||
48 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | ||
49 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | ||
50 | |||
51 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | ||
52 | {"__switch_to", }, /* This function switches only current task, but | ||
53 | doesn't switch kernel stack.*/ | ||
54 | {NULL, NULL} /* Terminator */ | ||
55 | }; | ||
56 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | ||
57 | |||
58 | /* | ||
59 | * returns non-zero if opcode modifies the interrupt flag. | ||
60 | */ | ||
61 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | ||
62 | { | ||
63 | switch (*insn) { | ||
64 | case 0xfa: /* cli */ | ||
65 | case 0xfb: /* sti */ | ||
66 | case 0xcf: /* iret/iretd */ | ||
67 | case 0x9d: /* popf/popfd */ | ||
68 | return 1; | ||
69 | } | ||
70 | |||
71 | if (*insn >= 0x40 && *insn <= 0x4f && *++insn == 0xcf) | ||
72 | return 1; | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | ||
77 | { | ||
78 | /* insn: must be on special executable page on x86_64. */ | ||
79 | p->ainsn.insn = get_insn_slot(); | ||
80 | if (!p->ainsn.insn) { | ||
81 | return -ENOMEM; | ||
82 | } | ||
83 | arch_copy_kprobe(p); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Determine if the instruction uses the %rip-relative addressing mode. | ||
89 | * If it does, return the address of the 32-bit displacement word. | ||
90 | * If not, return null. | ||
91 | */ | ||
92 | static s32 __kprobes *is_riprel(u8 *insn) | ||
93 | { | ||
94 | #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ | ||
95 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
96 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
97 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
98 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
99 | << (row % 64)) | ||
100 | static const u64 onebyte_has_modrm[256 / 64] = { | ||
101 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
102 | /* ------------------------------- */ | ||
103 | W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */ | ||
104 | W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */ | ||
105 | W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */ | ||
106 | W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */ | ||
107 | W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ | ||
108 | W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */ | ||
109 | W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */ | ||
110 | W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ | ||
111 | W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ | ||
112 | W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */ | ||
113 | W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */ | ||
114 | W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */ | ||
115 | W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */ | ||
116 | W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */ | ||
117 | W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ | ||
118 | W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */ | ||
119 | /* ------------------------------- */ | ||
120 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
121 | }; | ||
122 | static const u64 twobyte_has_modrm[256 / 64] = { | ||
123 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
124 | /* ------------------------------- */ | ||
125 | W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */ | ||
126 | W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */ | ||
127 | W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */ | ||
128 | W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */ | ||
129 | W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */ | ||
130 | W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */ | ||
131 | W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */ | ||
132 | W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */ | ||
133 | W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */ | ||
134 | W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */ | ||
135 | W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */ | ||
136 | W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */ | ||
137 | W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */ | ||
138 | W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */ | ||
139 | W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */ | ||
140 | W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */ | ||
141 | /* ------------------------------- */ | ||
142 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
143 | }; | ||
144 | #undef W | ||
145 | int need_modrm; | ||
146 | |||
147 | /* Skip legacy instruction prefixes. */ | ||
148 | while (1) { | ||
149 | switch (*insn) { | ||
150 | case 0x66: | ||
151 | case 0x67: | ||
152 | case 0x2e: | ||
153 | case 0x3e: | ||
154 | case 0x26: | ||
155 | case 0x64: | ||
156 | case 0x65: | ||
157 | case 0x36: | ||
158 | case 0xf0: | ||
159 | case 0xf3: | ||
160 | case 0xf2: | ||
161 | ++insn; | ||
162 | continue; | ||
163 | } | ||
164 | break; | ||
165 | } | ||
166 | |||
167 | /* Skip REX instruction prefix. */ | ||
168 | if ((*insn & 0xf0) == 0x40) | ||
169 | ++insn; | ||
170 | |||
171 | if (*insn == 0x0f) { /* Two-byte opcode. */ | ||
172 | ++insn; | ||
173 | need_modrm = test_bit(*insn, twobyte_has_modrm); | ||
174 | } else { /* One-byte opcode. */ | ||
175 | need_modrm = test_bit(*insn, onebyte_has_modrm); | ||
176 | } | ||
177 | |||
178 | if (need_modrm) { | ||
179 | u8 modrm = *++insn; | ||
180 | if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */ | ||
181 | /* Displacement follows ModRM byte. */ | ||
182 | return (s32 *) ++insn; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | /* No %rip-relative addressing mode here. */ | ||
187 | return NULL; | ||
188 | } | ||
189 | |||
190 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | ||
191 | { | ||
192 | s32 *ripdisp; | ||
193 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); | ||
194 | ripdisp = is_riprel(p->ainsn.insn); | ||
195 | if (ripdisp) { | ||
196 | /* | ||
197 | * The copied instruction uses the %rip-relative | ||
198 | * addressing mode. Adjust the displacement for the | ||
199 | * difference between the original location of this | ||
200 | * instruction and the location of the copy that will | ||
201 | * actually be run. The tricky bit here is making sure | ||
202 | * that the sign extension happens correctly in this | ||
203 | * calculation, since we need a signed 32-bit result to | ||
204 | * be sign-extended to 64 bits when it's added to the | ||
205 | * %rip value and yield the same 64-bit result that the | ||
206 | * sign-extension of the original signed 32-bit | ||
207 | * displacement would have given. | ||
208 | */ | ||
209 | s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn; | ||
210 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
211 | *ripdisp = disp; | ||
212 | } | ||
213 | p->opcode = *p->addr; | ||
214 | } | ||
215 | |||
216 | void __kprobes arch_arm_kprobe(struct kprobe *p) | ||
217 | { | ||
218 | text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); | ||
219 | } | ||
220 | |||
221 | void __kprobes arch_disarm_kprobe(struct kprobe *p) | ||
222 | { | ||
223 | text_poke(p->addr, &p->opcode, 1); | ||
224 | } | ||
225 | |||
226 | void __kprobes arch_remove_kprobe(struct kprobe *p) | ||
227 | { | ||
228 | mutex_lock(&kprobe_mutex); | ||
229 | free_insn_slot(p->ainsn.insn, 0); | ||
230 | mutex_unlock(&kprobe_mutex); | ||
231 | } | ||
232 | |||
233 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
234 | { | ||
235 | kcb->prev_kprobe.kp = kprobe_running(); | ||
236 | kcb->prev_kprobe.status = kcb->kprobe_status; | ||
237 | kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; | ||
238 | kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; | ||
239 | } | ||
240 | |||
241 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
242 | { | ||
243 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | ||
244 | kcb->kprobe_status = kcb->prev_kprobe.status; | ||
245 | kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; | ||
246 | kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; | ||
247 | } | ||
248 | |||
249 | static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | ||
250 | struct kprobe_ctlblk *kcb) | ||
251 | { | ||
252 | __get_cpu_var(current_kprobe) = p; | ||
253 | kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags | ||
254 | = (regs->eflags & (TF_MASK | IF_MASK)); | ||
255 | if (is_IF_modifier(p->ainsn.insn)) | ||
256 | kcb->kprobe_saved_rflags &= ~IF_MASK; | ||
257 | } | ||
258 | |||
259 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
260 | { | ||
261 | regs->eflags |= TF_MASK; | ||
262 | regs->eflags &= ~IF_MASK; | ||
263 | /*single step inline if the instruction is an int3*/ | ||
264 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
265 | regs->rip = (unsigned long)p->addr; | ||
266 | else | ||
267 | regs->rip = (unsigned long)p->ainsn.insn; | ||
268 | } | ||
269 | |||
270 | /* Called with kretprobe_lock held */ | ||
271 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | ||
272 | struct pt_regs *regs) | ||
273 | { | ||
274 | unsigned long *sara = (unsigned long *)regs->rsp; | ||
275 | |||
276 | ri->ret_addr = (kprobe_opcode_t *) *sara; | ||
277 | /* Replace the return addr with trampoline addr */ | ||
278 | *sara = (unsigned long) &kretprobe_trampoline; | ||
279 | } | ||
280 | |||
281 | int __kprobes kprobe_handler(struct pt_regs *regs) | ||
282 | { | ||
283 | struct kprobe *p; | ||
284 | int ret = 0; | ||
285 | kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); | ||
286 | struct kprobe_ctlblk *kcb; | ||
287 | |||
288 | /* | ||
289 | * We don't want to be preempted for the entire | ||
290 | * duration of kprobe processing | ||
291 | */ | ||
292 | preempt_disable(); | ||
293 | kcb = get_kprobe_ctlblk(); | ||
294 | |||
295 | /* Check we're not actually recursing */ | ||
296 | if (kprobe_running()) { | ||
297 | p = get_kprobe(addr); | ||
298 | if (p) { | ||
299 | if (kcb->kprobe_status == KPROBE_HIT_SS && | ||
300 | *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { | ||
301 | regs->eflags &= ~TF_MASK; | ||
302 | regs->eflags |= kcb->kprobe_saved_rflags; | ||
303 | goto no_kprobe; | ||
304 | } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { | ||
305 | /* TODO: Provide re-entrancy from | ||
306 | * post_kprobes_handler() and avoid exception | ||
307 | * stack corruption while single-stepping on | ||
308 | * the instruction of the new probe. | ||
309 | */ | ||
310 | arch_disarm_kprobe(p); | ||
311 | regs->rip = (unsigned long)p->addr; | ||
312 | reset_current_kprobe(); | ||
313 | ret = 1; | ||
314 | } else { | ||
315 | /* We have reentered the kprobe_handler(), since | ||
316 | * another probe was hit while within the | ||
317 | * handler. We here save the original kprobe | ||
318 | * variables and just single step on instruction | ||
319 | * of the new probe without calling any user | ||
320 | * handlers. | ||
321 | */ | ||
322 | save_previous_kprobe(kcb); | ||
323 | set_current_kprobe(p, regs, kcb); | ||
324 | kprobes_inc_nmissed_count(p); | ||
325 | prepare_singlestep(p, regs); | ||
326 | kcb->kprobe_status = KPROBE_REENTER; | ||
327 | return 1; | ||
328 | } | ||
329 | } else { | ||
330 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
331 | /* The breakpoint instruction was removed by | ||
332 | * another cpu right after we hit, no further | ||
333 | * handling of this interrupt is appropriate | ||
334 | */ | ||
335 | regs->rip = (unsigned long)addr; | ||
336 | ret = 1; | ||
337 | goto no_kprobe; | ||
338 | } | ||
339 | p = __get_cpu_var(current_kprobe); | ||
340 | if (p->break_handler && p->break_handler(p, regs)) { | ||
341 | goto ss_probe; | ||
342 | } | ||
343 | } | ||
344 | goto no_kprobe; | ||
345 | } | ||
346 | |||
347 | p = get_kprobe(addr); | ||
348 | if (!p) { | ||
349 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
350 | /* | ||
351 | * The breakpoint instruction was removed right | ||
352 | * after we hit it. Another cpu has removed | ||
353 | * either a probepoint or a debugger breakpoint | ||
354 | * at this address. In either case, no further | ||
355 | * handling of this interrupt is appropriate. | ||
356 | * Back up over the (now missing) int3 and run | ||
357 | * the original instruction. | ||
358 | */ | ||
359 | regs->rip = (unsigned long)addr; | ||
360 | ret = 1; | ||
361 | } | ||
362 | /* Not one of ours: let kernel handle it */ | ||
363 | goto no_kprobe; | ||
364 | } | ||
365 | |||
366 | set_current_kprobe(p, regs, kcb); | ||
367 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
368 | |||
369 | if (p->pre_handler && p->pre_handler(p, regs)) | ||
370 | /* handler has already set things up, so skip ss setup */ | ||
371 | return 1; | ||
372 | |||
373 | ss_probe: | ||
374 | prepare_singlestep(p, regs); | ||
375 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
376 | return 1; | ||
377 | |||
378 | no_kprobe: | ||
379 | preempt_enable_no_resched(); | ||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * For function-return probes, init_kprobes() establishes a probepoint | ||
385 | * here. When a retprobed function returns, this probe is hit and | ||
386 | * trampoline_probe_handler() runs, calling the kretprobe's handler. | ||
387 | */ | ||
388 | void kretprobe_trampoline_holder(void) | ||
389 | { | ||
390 | asm volatile ( ".global kretprobe_trampoline\n" | ||
391 | "kretprobe_trampoline: \n" | ||
392 | "nop\n"); | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * Called when we hit the probe point at kretprobe_trampoline | ||
397 | */ | ||
398 | int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | ||
399 | { | ||
400 | struct kretprobe_instance *ri = NULL; | ||
401 | struct hlist_head *head, empty_rp; | ||
402 | struct hlist_node *node, *tmp; | ||
403 | unsigned long flags, orig_ret_address = 0; | ||
404 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; | ||
405 | |||
406 | INIT_HLIST_HEAD(&empty_rp); | ||
407 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
408 | head = kretprobe_inst_table_head(current); | ||
409 | |||
410 | /* | ||
411 | * It is possible to have multiple instances associated with a given | ||
412 | * task either because an multiple functions in the call path | ||
413 | * have a return probe installed on them, and/or more then one return | ||
414 | * return probe was registered for a target function. | ||
415 | * | ||
416 | * We can handle this because: | ||
417 | * - instances are always inserted at the head of the list | ||
418 | * - when multiple return probes are registered for the same | ||
419 | * function, the first instance's ret_addr will point to the | ||
420 | * real return address, and all the rest will point to | ||
421 | * kretprobe_trampoline | ||
422 | */ | ||
423 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
424 | if (ri->task != current) | ||
425 | /* another task is sharing our hash bucket */ | ||
426 | continue; | ||
427 | |||
428 | if (ri->rp && ri->rp->handler) | ||
429 | ri->rp->handler(ri, regs); | ||
430 | |||
431 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
432 | recycle_rp_inst(ri, &empty_rp); | ||
433 | |||
434 | if (orig_ret_address != trampoline_address) | ||
435 | /* | ||
436 | * This is the real return address. Any other | ||
437 | * instances associated with this task are for | ||
438 | * other calls deeper on the call stack | ||
439 | */ | ||
440 | break; | ||
441 | } | ||
442 | |||
443 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
444 | regs->rip = orig_ret_address; | ||
445 | |||
446 | reset_current_kprobe(); | ||
447 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
448 | preempt_enable_no_resched(); | ||
449 | |||
450 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | ||
451 | hlist_del(&ri->hlist); | ||
452 | kfree(ri); | ||
453 | } | ||
454 | /* | ||
455 | * By returning a non-zero value, we are telling | ||
456 | * kprobe_handler() that we don't want the post_handler | ||
457 | * to run (and have re-enabled preemption) | ||
458 | */ | ||
459 | return 1; | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * Called after single-stepping. p->addr is the address of the | ||
464 | * instruction whose first byte has been replaced by the "int 3" | ||
465 | * instruction. To avoid the SMP problems that can occur when we | ||
466 | * temporarily put back the original opcode to single-step, we | ||
467 | * single-stepped a copy of the instruction. The address of this | ||
468 | * copy is p->ainsn.insn. | ||
469 | * | ||
470 | * This function prepares to return from the post-single-step | ||
471 | * interrupt. We have to fix up the stack as follows: | ||
472 | * | ||
473 | * 0) Except in the case of absolute or indirect jump or call instructions, | ||
474 | * the new rip is relative to the copied instruction. We need to make | ||
475 | * it relative to the original instruction. | ||
476 | * | ||
477 | * 1) If the single-stepped instruction was pushfl, then the TF and IF | ||
478 | * flags are set in the just-pushed eflags, and may need to be cleared. | ||
479 | * | ||
480 | * 2) If the single-stepped instruction was a call, the return address | ||
481 | * that is atop the stack is the address following the copied instruction. | ||
482 | * We need to make it the address following the original instruction. | ||
483 | */ | ||
484 | static void __kprobes resume_execution(struct kprobe *p, | ||
485 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | ||
486 | { | ||
487 | unsigned long *tos = (unsigned long *)regs->rsp; | ||
488 | unsigned long copy_rip = (unsigned long)p->ainsn.insn; | ||
489 | unsigned long orig_rip = (unsigned long)p->addr; | ||
490 | kprobe_opcode_t *insn = p->ainsn.insn; | ||
491 | |||
492 | /*skip the REX prefix*/ | ||
493 | if (*insn >= 0x40 && *insn <= 0x4f) | ||
494 | insn++; | ||
495 | |||
496 | regs->eflags &= ~TF_MASK; | ||
497 | switch (*insn) { | ||
498 | case 0x9c: /* pushfl */ | ||
499 | *tos &= ~(TF_MASK | IF_MASK); | ||
500 | *tos |= kcb->kprobe_old_rflags; | ||
501 | break; | ||
502 | case 0xc2: /* iret/ret/lret */ | ||
503 | case 0xc3: | ||
504 | case 0xca: | ||
505 | case 0xcb: | ||
506 | case 0xcf: | ||
507 | case 0xea: /* jmp absolute -- ip is correct */ | ||
508 | /* ip is already adjusted, no more changes required */ | ||
509 | goto no_change; | ||
510 | case 0xe8: /* call relative - Fix return addr */ | ||
511 | *tos = orig_rip + (*tos - copy_rip); | ||
512 | break; | ||
513 | case 0xff: | ||
514 | if ((insn[1] & 0x30) == 0x10) { | ||
515 | /* call absolute, indirect */ | ||
516 | /* Fix return addr; ip is correct. */ | ||
517 | *tos = orig_rip + (*tos - copy_rip); | ||
518 | goto no_change; | ||
519 | } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ | ||
520 | ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ | ||
521 | /* ip is correct. */ | ||
522 | goto no_change; | ||
523 | } | ||
524 | default: | ||
525 | break; | ||
526 | } | ||
527 | |||
528 | regs->rip = orig_rip + (regs->rip - copy_rip); | ||
529 | no_change: | ||
530 | |||
531 | return; | ||
532 | } | ||
533 | |||
534 | int __kprobes post_kprobe_handler(struct pt_regs *regs) | ||
535 | { | ||
536 | struct kprobe *cur = kprobe_running(); | ||
537 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
538 | |||
539 | if (!cur) | ||
540 | return 0; | ||
541 | |||
542 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | ||
543 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
544 | cur->post_handler(cur, regs, 0); | ||
545 | } | ||
546 | |||
547 | resume_execution(cur, regs, kcb); | ||
548 | regs->eflags |= kcb->kprobe_saved_rflags; | ||
549 | trace_hardirqs_fixup_flags(regs->eflags); | ||
550 | |||
551 | /* Restore the original saved kprobes variables and continue. */ | ||
552 | if (kcb->kprobe_status == KPROBE_REENTER) { | ||
553 | restore_previous_kprobe(kcb); | ||
554 | goto out; | ||
555 | } | ||
556 | reset_current_kprobe(); | ||
557 | out: | ||
558 | preempt_enable_no_resched(); | ||
559 | |||
560 | /* | ||
561 | * if somebody else is singlestepping across a probe point, eflags | ||
562 | * will have TF set, in which case, continue the remaining processing | ||
563 | * of do_debug, as if this is not a probe hit. | ||
564 | */ | ||
565 | if (regs->eflags & TF_MASK) | ||
566 | return 0; | ||
567 | |||
568 | return 1; | ||
569 | } | ||
570 | |||
571 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
572 | { | ||
573 | struct kprobe *cur = kprobe_running(); | ||
574 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
575 | const struct exception_table_entry *fixup; | ||
576 | |||
577 | switch(kcb->kprobe_status) { | ||
578 | case KPROBE_HIT_SS: | ||
579 | case KPROBE_REENTER: | ||
580 | /* | ||
581 | * We are here because the instruction being single | ||
582 | * stepped caused a page fault. We reset the current | ||
583 | * kprobe and the rip points back to the probe address | ||
584 | * and allow the page fault handler to continue as a | ||
585 | * normal page fault. | ||
586 | */ | ||
587 | regs->rip = (unsigned long)cur->addr; | ||
588 | regs->eflags |= kcb->kprobe_old_rflags; | ||
589 | if (kcb->kprobe_status == KPROBE_REENTER) | ||
590 | restore_previous_kprobe(kcb); | ||
591 | else | ||
592 | reset_current_kprobe(); | ||
593 | preempt_enable_no_resched(); | ||
594 | break; | ||
595 | case KPROBE_HIT_ACTIVE: | ||
596 | case KPROBE_HIT_SSDONE: | ||
597 | /* | ||
598 | * We increment the nmissed count for accounting, | ||
599 | * we can also use npre/npostfault count for accouting | ||
600 | * these specific fault cases. | ||
601 | */ | ||
602 | kprobes_inc_nmissed_count(cur); | ||
603 | |||
604 | /* | ||
605 | * We come here because instructions in the pre/post | ||
606 | * handler caused the page_fault, this could happen | ||
607 | * if handler tries to access user space by | ||
608 | * copy_from_user(), get_user() etc. Let the | ||
609 | * user-specified handler try to fix it first. | ||
610 | */ | ||
611 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) | ||
612 | return 1; | ||
613 | |||
614 | /* | ||
615 | * In case the user-specified fault handler returned | ||
616 | * zero, try to fix up. | ||
617 | */ | ||
618 | fixup = search_exception_tables(regs->rip); | ||
619 | if (fixup) { | ||
620 | regs->rip = fixup->fixup; | ||
621 | return 1; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * fixup() could not handle it, | ||
626 | * Let do_page_fault() fix it. | ||
627 | */ | ||
628 | break; | ||
629 | default: | ||
630 | break; | ||
631 | } | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | /* | ||
636 | * Wrapper routine for handling exceptions. | ||
637 | */ | ||
638 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | ||
639 | unsigned long val, void *data) | ||
640 | { | ||
641 | struct die_args *args = (struct die_args *)data; | ||
642 | int ret = NOTIFY_DONE; | ||
643 | |||
644 | if (args->regs && user_mode(args->regs)) | ||
645 | return ret; | ||
646 | |||
647 | switch (val) { | ||
648 | case DIE_INT3: | ||
649 | if (kprobe_handler(args->regs)) | ||
650 | ret = NOTIFY_STOP; | ||
651 | break; | ||
652 | case DIE_DEBUG: | ||
653 | if (post_kprobe_handler(args->regs)) | ||
654 | ret = NOTIFY_STOP; | ||
655 | break; | ||
656 | case DIE_GPF: | ||
657 | /* kprobe_running() needs smp_processor_id() */ | ||
658 | preempt_disable(); | ||
659 | if (kprobe_running() && | ||
660 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
661 | ret = NOTIFY_STOP; | ||
662 | preempt_enable(); | ||
663 | break; | ||
664 | default: | ||
665 | break; | ||
666 | } | ||
667 | return ret; | ||
668 | } | ||
669 | |||
670 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | ||
671 | { | ||
672 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
673 | unsigned long addr; | ||
674 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
675 | |||
676 | kcb->jprobe_saved_regs = *regs; | ||
677 | kcb->jprobe_saved_rsp = (long *) regs->rsp; | ||
678 | addr = (unsigned long)(kcb->jprobe_saved_rsp); | ||
679 | /* | ||
680 | * As Linus pointed out, gcc assumes that the callee | ||
681 | * owns the argument space and could overwrite it, e.g. | ||
682 | * tailcall optimization. So, to be absolutely safe | ||
683 | * we also save and restore enough stack bytes to cover | ||
684 | * the argument area. | ||
685 | */ | ||
686 | memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, | ||
687 | MIN_STACK_SIZE(addr)); | ||
688 | regs->eflags &= ~IF_MASK; | ||
689 | trace_hardirqs_off(); | ||
690 | regs->rip = (unsigned long)(jp->entry); | ||
691 | return 1; | ||
692 | } | ||
693 | |||
694 | void __kprobes jprobe_return(void) | ||
695 | { | ||
696 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
697 | |||
698 | asm volatile (" xchg %%rbx,%%rsp \n" | ||
699 | " int3 \n" | ||
700 | " .globl jprobe_return_end \n" | ||
701 | " jprobe_return_end: \n" | ||
702 | " nop \n"::"b" | ||
703 | (kcb->jprobe_saved_rsp):"memory"); | ||
704 | } | ||
705 | |||
706 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
707 | { | ||
708 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
709 | u8 *addr = (u8 *) (regs->rip - 1); | ||
710 | unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); | ||
711 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
712 | |||
713 | if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { | ||
714 | if ((unsigned long *)regs->rsp != kcb->jprobe_saved_rsp) { | ||
715 | struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; | ||
716 | printk("current rsp %p does not match saved rsp %p\n", | ||
717 | (long *)regs->rsp, kcb->jprobe_saved_rsp); | ||
718 | printk("Saved registers for jprobe %p\n", jp); | ||
719 | show_registers(saved_regs); | ||
720 | printk("Current registers\n"); | ||
721 | show_registers(regs); | ||
722 | BUG(); | ||
723 | } | ||
724 | *regs = kcb->jprobe_saved_regs; | ||
725 | memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, | ||
726 | MIN_STACK_SIZE(stack_addr)); | ||
727 | preempt_enable_no_resched(); | ||
728 | return 1; | ||
729 | } | ||
730 | return 0; | ||
731 | } | ||
732 | |||
733 | static struct kprobe trampoline_p = { | ||
734 | .addr = (kprobe_opcode_t *) &kretprobe_trampoline, | ||
735 | .pre_handler = trampoline_probe_handler | ||
736 | }; | ||
737 | |||
738 | int __init arch_init_kprobes(void) | ||
739 | { | ||
740 | return register_kprobe(&trampoline_p); | ||
741 | } | ||
742 | |||
743 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | ||
744 | { | ||
745 | if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) | ||
746 | return 1; | ||
747 | |||
748 | return 0; | ||
749 | } | ||