diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 236 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 29 |
2 files changed, 95 insertions, 170 deletions
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 4e680f87a75f..acd2a778ebe6 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c | |||
@@ -38,7 +38,7 @@ | |||
38 | #include <linux/string.h> | 38 | #include <linux/string.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/preempt.h> | 40 | #include <linux/preempt.h> |
41 | #include <linux/moduleloader.h> | 41 | |
42 | #include <asm/cacheflush.h> | 42 | #include <asm/cacheflush.h> |
43 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
44 | #include <asm/kdebug.h> | 44 | #include <asm/kdebug.h> |
@@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev; | |||
51 | static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; | 51 | static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; |
52 | static struct pt_regs jprobe_saved_regs; | 52 | static struct pt_regs jprobe_saved_regs; |
53 | static long *jprobe_saved_rsp; | 53 | static long *jprobe_saved_rsp; |
54 | static kprobe_opcode_t *get_insn_slot(void); | ||
55 | static void free_insn_slot(kprobe_opcode_t *slot); | ||
56 | void jprobe_return_end(void); | 54 | void jprobe_return_end(void); |
57 | 55 | ||
58 | /* copy of the kernel stack at the probe fire time */ | 56 | /* copy of the kernel stack at the probe fire time */ |
@@ -274,48 +272,23 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
274 | regs->rip = (unsigned long)p->ainsn.insn; | 272 | regs->rip = (unsigned long)p->ainsn.insn; |
275 | } | 273 | } |
276 | 274 | ||
277 | struct task_struct *arch_get_kprobe_task(void *ptr) | ||
278 | { | ||
279 | return ((struct thread_info *) (((unsigned long) ptr) & | ||
280 | (~(THREAD_SIZE -1))))->task; | ||
281 | } | ||
282 | |||
283 | void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) | 275 | void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) |
284 | { | 276 | { |
285 | unsigned long *sara = (unsigned long *)regs->rsp; | 277 | unsigned long *sara = (unsigned long *)regs->rsp; |
286 | struct kretprobe_instance *ri; | 278 | struct kretprobe_instance *ri; |
287 | static void *orig_ret_addr; | 279 | |
280 | if ((ri = get_free_rp_inst(rp)) != NULL) { | ||
281 | ri->rp = rp; | ||
282 | ri->task = current; | ||
283 | ri->ret_addr = (kprobe_opcode_t *) *sara; | ||
288 | 284 | ||
289 | /* | ||
290 | * Save the return address when the return probe hits | ||
291 | * the first time, and use it to populate the (krprobe | ||
292 | * instance)->ret_addr for subsequent return probes at | ||
293 | * the same addrress since stack address would have | ||
294 | * the kretprobe_trampoline by then. | ||
295 | */ | ||
296 | if (((void*) *sara) != kretprobe_trampoline) | ||
297 | orig_ret_addr = (void*) *sara; | ||
298 | |||
299 | if ((ri = get_free_rp_inst(rp)) != NULL) { | ||
300 | ri->rp = rp; | ||
301 | ri->stack_addr = sara; | ||
302 | ri->ret_addr = orig_ret_addr; | ||
303 | add_rp_inst(ri); | ||
304 | /* Replace the return addr with trampoline addr */ | 285 | /* Replace the return addr with trampoline addr */ |
305 | *sara = (unsigned long) &kretprobe_trampoline; | 286 | *sara = (unsigned long) &kretprobe_trampoline; |
306 | } else { | ||
307 | rp->nmissed++; | ||
308 | } | ||
309 | } | ||
310 | 287 | ||
311 | void arch_kprobe_flush_task(struct task_struct *tk) | 288 | add_rp_inst(ri); |
312 | { | 289 | } else { |
313 | struct kretprobe_instance *ri; | 290 | rp->nmissed++; |
314 | while ((ri = get_rp_inst_tsk(tk)) != NULL) { | 291 | } |
315 | *((unsigned long *)(ri->stack_addr)) = | ||
316 | (unsigned long) ri->ret_addr; | ||
317 | recycle_rp_inst(ri); | ||
318 | } | ||
319 | } | 292 | } |
320 | 293 | ||
321 | /* | 294 | /* |
@@ -428,36 +401,59 @@ no_kprobe: | |||
428 | */ | 401 | */ |
429 | int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | 402 | int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) |
430 | { | 403 | { |
431 | struct task_struct *tsk; | 404 | struct kretprobe_instance *ri = NULL; |
432 | struct kretprobe_instance *ri; | 405 | struct hlist_head *head; |
433 | struct hlist_head *head; | 406 | struct hlist_node *node, *tmp; |
434 | struct hlist_node *node; | 407 | unsigned long orig_ret_address = 0; |
435 | unsigned long *sara = (unsigned long *)regs->rsp - 1; | 408 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; |
436 | |||
437 | tsk = arch_get_kprobe_task(sara); | ||
438 | head = kretprobe_inst_table_head(tsk); | ||
439 | |||
440 | hlist_for_each_entry(ri, node, head, hlist) { | ||
441 | if (ri->stack_addr == sara && ri->rp) { | ||
442 | if (ri->rp->handler) | ||
443 | ri->rp->handler(ri, regs); | ||
444 | } | ||
445 | } | ||
446 | return 0; | ||
447 | } | ||
448 | 409 | ||
449 | void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, | 410 | head = kretprobe_inst_table_head(current); |
450 | unsigned long flags) | ||
451 | { | ||
452 | struct kretprobe_instance *ri; | ||
453 | /* RA already popped */ | ||
454 | unsigned long *sara = ((unsigned long *)regs->rsp) - 1; | ||
455 | 411 | ||
456 | while ((ri = get_rp_inst(sara))) { | 412 | /* |
457 | regs->rip = (unsigned long)ri->ret_addr; | 413 | * It is possible to have multiple instances associated with a given |
414 | * task either because an multiple functions in the call path | ||
415 | * have a return probe installed on them, and/or more then one return | ||
416 | * return probe was registered for a target function. | ||
417 | * | ||
418 | * We can handle this because: | ||
419 | * - instances are always inserted at the head of the list | ||
420 | * - when multiple return probes are registered for the same | ||
421 | * function, the first instance's ret_addr will point to the | ||
422 | * real return address, and all the rest will point to | ||
423 | * kretprobe_trampoline | ||
424 | */ | ||
425 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
426 | if (ri->task != current) | ||
427 | /* another task is sharing our hash bucket */ | ||
428 | continue; | ||
429 | |||
430 | if (ri->rp && ri->rp->handler) | ||
431 | ri->rp->handler(ri, regs); | ||
432 | |||
433 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
458 | recycle_rp_inst(ri); | 434 | recycle_rp_inst(ri); |
435 | |||
436 | if (orig_ret_address != trampoline_address) | ||
437 | /* | ||
438 | * This is the real return address. Any other | ||
439 | * instances associated with this task are for | ||
440 | * other calls deeper on the call stack | ||
441 | */ | ||
442 | break; | ||
459 | } | 443 | } |
460 | regs->eflags &= ~TF_MASK; | 444 | |
445 | BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); | ||
446 | regs->rip = orig_ret_address; | ||
447 | |||
448 | unlock_kprobes(); | ||
449 | preempt_enable_no_resched(); | ||
450 | |||
451 | /* | ||
452 | * By returning a non-zero value, we are telling | ||
453 | * kprobe_handler() that we have handled unlocking | ||
454 | * and re-enabling preemption. | ||
455 | */ | ||
456 | return 1; | ||
461 | } | 457 | } |
462 | 458 | ||
463 | /* | 459 | /* |
@@ -550,8 +546,7 @@ int post_kprobe_handler(struct pt_regs *regs) | |||
550 | current_kprobe->post_handler(current_kprobe, regs, 0); | 546 | current_kprobe->post_handler(current_kprobe, regs, 0); |
551 | } | 547 | } |
552 | 548 | ||
553 | if (current_kprobe->post_handler != trampoline_post_handler) | 549 | resume_execution(current_kprobe, regs); |
554 | resume_execution(current_kprobe, regs); | ||
555 | regs->eflags |= kprobe_saved_rflags; | 550 | regs->eflags |= kprobe_saved_rflags; |
556 | 551 | ||
557 | /* Restore the original saved kprobes variables and continue. */ | 552 | /* Restore the original saved kprobes variables and continue. */ |
@@ -682,111 +677,12 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
682 | return 0; | 677 | return 0; |
683 | } | 678 | } |
684 | 679 | ||
685 | /* | 680 | static struct kprobe trampoline_p = { |
686 | * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped. | 681 | .addr = (kprobe_opcode_t *) &kretprobe_trampoline, |
687 | * By default on x86_64, pages we get from kmalloc or vmalloc are not | 682 | .pre_handler = trampoline_probe_handler |
688 | * executable. Single-stepping an instruction on such a page yields an | ||
689 | * oops. So instead of storing the instruction copies in their respective | ||
690 | * kprobe objects, we allocate a page, map it executable, and store all the | ||
691 | * instruction copies there. (We can allocate additional pages if somebody | ||
692 | * inserts a huge number of probes.) Each page can hold up to INSNS_PER_PAGE | ||
693 | * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t) | ||
694 | * bytes. | ||
695 | */ | ||
696 | #define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t))) | ||
697 | struct kprobe_insn_page { | ||
698 | struct hlist_node hlist; | ||
699 | kprobe_opcode_t *insns; /* page of instruction slots */ | ||
700 | char slot_used[INSNS_PER_PAGE]; | ||
701 | int nused; | ||
702 | }; | 683 | }; |
703 | 684 | ||
704 | static struct hlist_head kprobe_insn_pages; | 685 | int __init arch_init(void) |
705 | |||
706 | /** | ||
707 | * get_insn_slot() - Find a slot on an executable page for an instruction. | ||
708 | * We allocate an executable page if there's no room on existing ones. | ||
709 | */ | ||
710 | static kprobe_opcode_t *get_insn_slot(void) | ||
711 | { | ||
712 | struct kprobe_insn_page *kip; | ||
713 | struct hlist_node *pos; | ||
714 | |||
715 | hlist_for_each(pos, &kprobe_insn_pages) { | ||
716 | kip = hlist_entry(pos, struct kprobe_insn_page, hlist); | ||
717 | if (kip->nused < INSNS_PER_PAGE) { | ||
718 | int i; | ||
719 | for (i = 0; i < INSNS_PER_PAGE; i++) { | ||
720 | if (!kip->slot_used[i]) { | ||
721 | kip->slot_used[i] = 1; | ||
722 | kip->nused++; | ||
723 | return kip->insns + (i*MAX_INSN_SIZE); | ||
724 | } | ||
725 | } | ||
726 | /* Surprise! No unused slots. Fix kip->nused. */ | ||
727 | kip->nused = INSNS_PER_PAGE; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | /* All out of space. Need to allocate a new page. Use slot 0.*/ | ||
732 | kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); | ||
733 | if (!kip) { | ||
734 | return NULL; | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * For the %rip-relative displacement fixups to be doable, we | ||
739 | * need our instruction copy to be within +/- 2GB of any data it | ||
740 | * might access via %rip. That is, within 2GB of where the | ||
741 | * kernel image and loaded module images reside. So we allocate | ||
742 | * a page in the module loading area. | ||
743 | */ | ||
744 | kip->insns = module_alloc(PAGE_SIZE); | ||
745 | if (!kip->insns) { | ||
746 | kfree(kip); | ||
747 | return NULL; | ||
748 | } | ||
749 | INIT_HLIST_NODE(&kip->hlist); | ||
750 | hlist_add_head(&kip->hlist, &kprobe_insn_pages); | ||
751 | memset(kip->slot_used, 0, INSNS_PER_PAGE); | ||
752 | kip->slot_used[0] = 1; | ||
753 | kip->nused = 1; | ||
754 | return kip->insns; | ||
755 | } | ||
756 | |||
757 | /** | ||
758 | * free_insn_slot() - Free instruction slot obtained from get_insn_slot(). | ||
759 | */ | ||
760 | static void free_insn_slot(kprobe_opcode_t *slot) | ||
761 | { | 686 | { |
762 | struct kprobe_insn_page *kip; | 687 | return register_kprobe(&trampoline_p); |
763 | struct hlist_node *pos; | ||
764 | |||
765 | hlist_for_each(pos, &kprobe_insn_pages) { | ||
766 | kip = hlist_entry(pos, struct kprobe_insn_page, hlist); | ||
767 | if (kip->insns <= slot | ||
768 | && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) { | ||
769 | int i = (slot - kip->insns) / MAX_INSN_SIZE; | ||
770 | kip->slot_used[i] = 0; | ||
771 | kip->nused--; | ||
772 | if (kip->nused == 0) { | ||
773 | /* | ||
774 | * Page is no longer in use. Free it unless | ||
775 | * it's the last one. We keep the last one | ||
776 | * so as not to have to set it up again the | ||
777 | * next time somebody inserts a probe. | ||
778 | */ | ||
779 | hlist_del(&kip->hlist); | ||
780 | if (hlist_empty(&kprobe_insn_pages)) { | ||
781 | INIT_HLIST_NODE(&kip->hlist); | ||
782 | hlist_add_head(&kip->hlist, | ||
783 | &kprobe_insn_pages); | ||
784 | } else { | ||
785 | module_free(NULL, kip->insns); | ||
786 | kfree(kip); | ||
787 | } | ||
788 | } | ||
789 | return; | ||
790 | } | ||
791 | } | ||
792 | } | 688 | } |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 1d91271796e5..7577f9d7a75d 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -482,6 +482,33 @@ out: | |||
482 | } | 482 | } |
483 | 483 | ||
484 | /* | 484 | /* |
485 | * This function selects if the context switch from prev to next | ||
486 | * has to tweak the TSC disable bit in the cr4. | ||
487 | */ | ||
488 | static inline void disable_tsc(struct task_struct *prev_p, | ||
489 | struct task_struct *next_p) | ||
490 | { | ||
491 | struct thread_info *prev, *next; | ||
492 | |||
493 | /* | ||
494 | * gcc should eliminate the ->thread_info dereference if | ||
495 | * has_secure_computing returns 0 at compile time (SECCOMP=n). | ||
496 | */ | ||
497 | prev = prev_p->thread_info; | ||
498 | next = next_p->thread_info; | ||
499 | |||
500 | if (has_secure_computing(prev) || has_secure_computing(next)) { | ||
501 | /* slow path here */ | ||
502 | if (has_secure_computing(prev) && | ||
503 | !has_secure_computing(next)) { | ||
504 | write_cr4(read_cr4() & ~X86_CR4_TSD); | ||
505 | } else if (!has_secure_computing(prev) && | ||
506 | has_secure_computing(next)) | ||
507 | write_cr4(read_cr4() | X86_CR4_TSD); | ||
508 | } | ||
509 | } | ||
510 | |||
511 | /* | ||
485 | * This special macro can be used to load a debugging register | 512 | * This special macro can be used to load a debugging register |
486 | */ | 513 | */ |
487 | #define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) | 514 | #define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) |
@@ -599,6 +626,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * | |||
599 | } | 626 | } |
600 | } | 627 | } |
601 | 628 | ||
629 | disable_tsc(prev_p, next_p); | ||
630 | |||
602 | return prev_p; | 631 | return prev_p; |
603 | } | 632 | } |
604 | 633 | ||