aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/kprobes.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel/kprobes.c')
-rw-r--r--arch/x86_64/kernel/kprobes.c236
1 files changed, 66 insertions, 170 deletions
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index 4e680f87a75f..acd2a778ebe6 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -38,7 +38,7 @@
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/preempt.h> 40#include <linux/preempt.h>
41#include <linux/moduleloader.h> 41
42#include <asm/cacheflush.h> 42#include <asm/cacheflush.h>
43#include <asm/pgtable.h> 43#include <asm/pgtable.h>
44#include <asm/kdebug.h> 44#include <asm/kdebug.h>
@@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev;
51static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; 51static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
52static struct pt_regs jprobe_saved_regs; 52static struct pt_regs jprobe_saved_regs;
53static long *jprobe_saved_rsp; 53static long *jprobe_saved_rsp;
54static kprobe_opcode_t *get_insn_slot(void);
55static void free_insn_slot(kprobe_opcode_t *slot);
56void jprobe_return_end(void); 54void jprobe_return_end(void);
57 55
58/* copy of the kernel stack at the probe fire time */ 56/* copy of the kernel stack at the probe fire time */
@@ -274,48 +272,23 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
274 regs->rip = (unsigned long)p->ainsn.insn; 272 regs->rip = (unsigned long)p->ainsn.insn;
275} 273}
276 274
277struct task_struct *arch_get_kprobe_task(void *ptr)
278{
279 return ((struct thread_info *) (((unsigned long) ptr) &
280 (~(THREAD_SIZE -1))))->task;
281}
282
283void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) 275void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
284{ 276{
285 unsigned long *sara = (unsigned long *)regs->rsp; 277 unsigned long *sara = (unsigned long *)regs->rsp;
286 struct kretprobe_instance *ri; 278 struct kretprobe_instance *ri;
287 static void *orig_ret_addr; 279
280 if ((ri = get_free_rp_inst(rp)) != NULL) {
281 ri->rp = rp;
282 ri->task = current;
283 ri->ret_addr = (kprobe_opcode_t *) *sara;
288 284
289 /*
290 * Save the return address when the return probe hits
291 * the first time, and use it to populate the (krprobe
292 * instance)->ret_addr for subsequent return probes at
293 * the same addrress since stack address would have
294 * the kretprobe_trampoline by then.
295 */
296 if (((void*) *sara) != kretprobe_trampoline)
297 orig_ret_addr = (void*) *sara;
298
299 if ((ri = get_free_rp_inst(rp)) != NULL) {
300 ri->rp = rp;
301 ri->stack_addr = sara;
302 ri->ret_addr = orig_ret_addr;
303 add_rp_inst(ri);
304 /* Replace the return addr with trampoline addr */ 285 /* Replace the return addr with trampoline addr */
305 *sara = (unsigned long) &kretprobe_trampoline; 286 *sara = (unsigned long) &kretprobe_trampoline;
306 } else {
307 rp->nmissed++;
308 }
309}
310 287
311void arch_kprobe_flush_task(struct task_struct *tk) 288 add_rp_inst(ri);
312{ 289 } else {
313 struct kretprobe_instance *ri; 290 rp->nmissed++;
314 while ((ri = get_rp_inst_tsk(tk)) != NULL) { 291 }
315 *((unsigned long *)(ri->stack_addr)) =
316 (unsigned long) ri->ret_addr;
317 recycle_rp_inst(ri);
318 }
319} 292}
320 293
321/* 294/*
@@ -428,36 +401,59 @@ no_kprobe:
428 */ 401 */
429int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) 402int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
430{ 403{
431 struct task_struct *tsk; 404 struct kretprobe_instance *ri = NULL;
432 struct kretprobe_instance *ri; 405 struct hlist_head *head;
433 struct hlist_head *head; 406 struct hlist_node *node, *tmp;
434 struct hlist_node *node; 407 unsigned long orig_ret_address = 0;
435 unsigned long *sara = (unsigned long *)regs->rsp - 1; 408 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
436
437 tsk = arch_get_kprobe_task(sara);
438 head = kretprobe_inst_table_head(tsk);
439
440 hlist_for_each_entry(ri, node, head, hlist) {
441 if (ri->stack_addr == sara && ri->rp) {
442 if (ri->rp->handler)
443 ri->rp->handler(ri, regs);
444 }
445 }
446 return 0;
447}
448 409
449void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, 410 head = kretprobe_inst_table_head(current);
450 unsigned long flags)
451{
452 struct kretprobe_instance *ri;
453 /* RA already popped */
454 unsigned long *sara = ((unsigned long *)regs->rsp) - 1;
455 411
456 while ((ri = get_rp_inst(sara))) { 412 /*
457 regs->rip = (unsigned long)ri->ret_addr; 413 * It is possible to have multiple instances associated with a given
414 * task either because an multiple functions in the call path
415 * have a return probe installed on them, and/or more then one return
416 * return probe was registered for a target function.
417 *
418 * We can handle this because:
419 * - instances are always inserted at the head of the list
420 * - when multiple return probes are registered for the same
421 * function, the first instance's ret_addr will point to the
422 * real return address, and all the rest will point to
423 * kretprobe_trampoline
424 */
425 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
426 if (ri->task != current)
427 /* another task is sharing our hash bucket */
428 continue;
429
430 if (ri->rp && ri->rp->handler)
431 ri->rp->handler(ri, regs);
432
433 orig_ret_address = (unsigned long)ri->ret_addr;
458 recycle_rp_inst(ri); 434 recycle_rp_inst(ri);
435
436 if (orig_ret_address != trampoline_address)
437 /*
438 * This is the real return address. Any other
439 * instances associated with this task are for
440 * other calls deeper on the call stack
441 */
442 break;
459 } 443 }
460 regs->eflags &= ~TF_MASK; 444
445 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
446 regs->rip = orig_ret_address;
447
448 unlock_kprobes();
449 preempt_enable_no_resched();
450
451 /*
452 * By returning a non-zero value, we are telling
453 * kprobe_handler() that we have handled unlocking
454 * and re-enabling preemption.
455 */
456 return 1;
461} 457}
462 458
463/* 459/*
@@ -550,8 +546,7 @@ int post_kprobe_handler(struct pt_regs *regs)
550 current_kprobe->post_handler(current_kprobe, regs, 0); 546 current_kprobe->post_handler(current_kprobe, regs, 0);
551 } 547 }
552 548
553 if (current_kprobe->post_handler != trampoline_post_handler) 549 resume_execution(current_kprobe, regs);
554 resume_execution(current_kprobe, regs);
555 regs->eflags |= kprobe_saved_rflags; 550 regs->eflags |= kprobe_saved_rflags;
556 551
557 /* Restore the original saved kprobes variables and continue. */ 552 /* Restore the original saved kprobes variables and continue. */
@@ -682,111 +677,12 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
682 return 0; 677 return 0;
683} 678}
684 679
685/* 680static struct kprobe trampoline_p = {
686 * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped. 681 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
687 * By default on x86_64, pages we get from kmalloc or vmalloc are not 682 .pre_handler = trampoline_probe_handler
688 * executable. Single-stepping an instruction on such a page yields an
689 * oops. So instead of storing the instruction copies in their respective
690 * kprobe objects, we allocate a page, map it executable, and store all the
691 * instruction copies there. (We can allocate additional pages if somebody
692 * inserts a huge number of probes.) Each page can hold up to INSNS_PER_PAGE
693 * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t)
694 * bytes.
695 */
696#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t)))
697struct kprobe_insn_page {
698 struct hlist_node hlist;
699 kprobe_opcode_t *insns; /* page of instruction slots */
700 char slot_used[INSNS_PER_PAGE];
701 int nused;
702}; 683};
703 684
704static struct hlist_head kprobe_insn_pages; 685int __init arch_init(void)
705
706/**
707 * get_insn_slot() - Find a slot on an executable page for an instruction.
708 * We allocate an executable page if there's no room on existing ones.
709 */
710static kprobe_opcode_t *get_insn_slot(void)
711{
712 struct kprobe_insn_page *kip;
713 struct hlist_node *pos;
714
715 hlist_for_each(pos, &kprobe_insn_pages) {
716 kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
717 if (kip->nused < INSNS_PER_PAGE) {
718 int i;
719 for (i = 0; i < INSNS_PER_PAGE; i++) {
720 if (!kip->slot_used[i]) {
721 kip->slot_used[i] = 1;
722 kip->nused++;
723 return kip->insns + (i*MAX_INSN_SIZE);
724 }
725 }
726 /* Surprise! No unused slots. Fix kip->nused. */
727 kip->nused = INSNS_PER_PAGE;
728 }
729 }
730
731 /* All out of space. Need to allocate a new page. Use slot 0.*/
732 kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
733 if (!kip) {
734 return NULL;
735 }
736
737 /*
738 * For the %rip-relative displacement fixups to be doable, we
739 * need our instruction copy to be within +/- 2GB of any data it
740 * might access via %rip. That is, within 2GB of where the
741 * kernel image and loaded module images reside. So we allocate
742 * a page in the module loading area.
743 */
744 kip->insns = module_alloc(PAGE_SIZE);
745 if (!kip->insns) {
746 kfree(kip);
747 return NULL;
748 }
749 INIT_HLIST_NODE(&kip->hlist);
750 hlist_add_head(&kip->hlist, &kprobe_insn_pages);
751 memset(kip->slot_used, 0, INSNS_PER_PAGE);
752 kip->slot_used[0] = 1;
753 kip->nused = 1;
754 return kip->insns;
755}
756
757/**
758 * free_insn_slot() - Free instruction slot obtained from get_insn_slot().
759 */
760static void free_insn_slot(kprobe_opcode_t *slot)
761{ 686{
762 struct kprobe_insn_page *kip; 687 return register_kprobe(&trampoline_p);
763 struct hlist_node *pos;
764
765 hlist_for_each(pos, &kprobe_insn_pages) {
766 kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
767 if (kip->insns <= slot
768 && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) {
769 int i = (slot - kip->insns) / MAX_INSN_SIZE;
770 kip->slot_used[i] = 0;
771 kip->nused--;
772 if (kip->nused == 0) {
773 /*
774 * Page is no longer in use. Free it unless
775 * it's the last one. We keep the last one
776 * so as not to have to set it up again the
777 * next time somebody inserts a probe.
778 */
779 hlist_del(&kip->hlist);
780 if (hlist_empty(&kprobe_insn_pages)) {
781 INIT_HLIST_NODE(&kip->hlist);
782 hlist_add_head(&kip->hlist,
783 &kprobe_insn_pages);
784 } else {
785 module_free(NULL, kip->insns);
786 kfree(kip);
787 }
788 }
789 return;
790 }
791 }
792} 688}