diff options
Diffstat (limited to 'arch/x86/kernel/ptrace.c')
-rw-r--r-- | arch/x86/kernel/ptrace.c | 415 |
1 files changed, 328 insertions, 87 deletions
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66a..04d182a7cfdb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/prctl.h> | 36 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 38 | #include <asm/ds.h> |
39 | #include <asm/hw_breakpoint.h> | ||
37 | 40 | ||
38 | #include "tls.h" | 41 | #include "tls.h" |
39 | 42 | ||
@@ -49,6 +52,118 @@ enum x86_regset { | |||
49 | REGSET_IOPERM32, | 52 | REGSET_IOPERM32, |
50 | }; | 53 | }; |
51 | 54 | ||
55 | struct pt_regs_offset { | ||
56 | const char *name; | ||
57 | int offset; | ||
58 | }; | ||
59 | |||
60 | #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} | ||
61 | #define REG_OFFSET_END {.name = NULL, .offset = 0} | ||
62 | |||
63 | static const struct pt_regs_offset regoffset_table[] = { | ||
64 | #ifdef CONFIG_X86_64 | ||
65 | REG_OFFSET_NAME(r15), | ||
66 | REG_OFFSET_NAME(r14), | ||
67 | REG_OFFSET_NAME(r13), | ||
68 | REG_OFFSET_NAME(r12), | ||
69 | REG_OFFSET_NAME(r11), | ||
70 | REG_OFFSET_NAME(r10), | ||
71 | REG_OFFSET_NAME(r9), | ||
72 | REG_OFFSET_NAME(r8), | ||
73 | #endif | ||
74 | REG_OFFSET_NAME(bx), | ||
75 | REG_OFFSET_NAME(cx), | ||
76 | REG_OFFSET_NAME(dx), | ||
77 | REG_OFFSET_NAME(si), | ||
78 | REG_OFFSET_NAME(di), | ||
79 | REG_OFFSET_NAME(bp), | ||
80 | REG_OFFSET_NAME(ax), | ||
81 | #ifdef CONFIG_X86_32 | ||
82 | REG_OFFSET_NAME(ds), | ||
83 | REG_OFFSET_NAME(es), | ||
84 | REG_OFFSET_NAME(fs), | ||
85 | REG_OFFSET_NAME(gs), | ||
86 | #endif | ||
87 | REG_OFFSET_NAME(orig_ax), | ||
88 | REG_OFFSET_NAME(ip), | ||
89 | REG_OFFSET_NAME(cs), | ||
90 | REG_OFFSET_NAME(flags), | ||
91 | REG_OFFSET_NAME(sp), | ||
92 | REG_OFFSET_NAME(ss), | ||
93 | REG_OFFSET_END, | ||
94 | }; | ||
95 | |||
96 | /** | ||
97 | * regs_query_register_offset() - query register offset from its name | ||
98 | * @name: the name of a register | ||
99 | * | ||
100 | * regs_query_register_offset() returns the offset of a register in struct | ||
101 | * pt_regs from its name. If the name is invalid, this returns -EINVAL; | ||
102 | */ | ||
103 | int regs_query_register_offset(const char *name) | ||
104 | { | ||
105 | const struct pt_regs_offset *roff; | ||
106 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
107 | if (!strcmp(roff->name, name)) | ||
108 | return roff->offset; | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * regs_query_register_name() - query register name from its offset | ||
114 | * @offset: the offset of a register in struct pt_regs. | ||
115 | * | ||
116 | * regs_query_register_name() returns the name of a register from its | ||
117 | * offset in struct pt_regs. If the @offset is invalid, this returns NULL; | ||
118 | */ | ||
119 | const char *regs_query_register_name(unsigned int offset) | ||
120 | { | ||
121 | const struct pt_regs_offset *roff; | ||
122 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
123 | if (roff->offset == offset) | ||
124 | return roff->name; | ||
125 | return NULL; | ||
126 | } | ||
127 | |||
128 | static const int arg_offs_table[] = { | ||
129 | #ifdef CONFIG_X86_32 | ||
130 | [0] = offsetof(struct pt_regs, ax), | ||
131 | [1] = offsetof(struct pt_regs, dx), | ||
132 | [2] = offsetof(struct pt_regs, cx) | ||
133 | #else /* CONFIG_X86_64 */ | ||
134 | [0] = offsetof(struct pt_regs, di), | ||
135 | [1] = offsetof(struct pt_regs, si), | ||
136 | [2] = offsetof(struct pt_regs, dx), | ||
137 | [3] = offsetof(struct pt_regs, cx), | ||
138 | [4] = offsetof(struct pt_regs, r8), | ||
139 | [5] = offsetof(struct pt_regs, r9) | ||
140 | #endif | ||
141 | }; | ||
142 | |||
143 | /** | ||
144 | * regs_get_argument_nth() - get Nth argument at function call | ||
145 | * @regs: pt_regs which contains registers at function entry. | ||
146 | * @n: argument number. | ||
147 | * | ||
148 | * regs_get_argument_nth() returns @n th argument of a function call. | ||
149 | * Since usually the kernel stack will be changed right after function entry, | ||
150 | * you must use this at function entry. If the @n th entry is NOT in the | ||
151 | * kernel stack or pt_regs, this returns 0. | ||
152 | */ | ||
153 | unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) | ||
154 | { | ||
155 | if (n < ARRAY_SIZE(arg_offs_table)) | ||
156 | return *(unsigned long *)((char *)regs + arg_offs_table[n]); | ||
157 | else { | ||
158 | /* | ||
159 | * The typical case: arg n is on the stack. | ||
160 | * (Note: stack[0] = return address, so skip it) | ||
161 | */ | ||
162 | n -= ARRAY_SIZE(arg_offs_table); | ||
163 | return regs_get_kernel_stack_nth(regs, 1 + n); | ||
164 | } | ||
165 | } | ||
166 | |||
52 | /* | 167 | /* |
53 | * does not yet catch signals sent when the child dies. | 168 | * does not yet catch signals sent when the child dies. |
54 | * in exit.c or in signal.c. | 169 | * in exit.c or in signal.c. |
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, | |||
137 | return 0; | 252 | return 0; |
138 | } | 253 | } |
139 | 254 | ||
140 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
141 | { | ||
142 | return TASK_SIZE - 3; | ||
143 | } | ||
144 | |||
145 | #else /* CONFIG_X86_64 */ | 255 | #else /* CONFIG_X86_64 */ |
146 | 256 | ||
147 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 257 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, | |||
266 | return 0; | 376 | return 0; |
267 | } | 377 | } |
268 | 378 | ||
269 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
270 | { | ||
271 | #ifdef CONFIG_IA32_EMULATION | ||
272 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
273 | return IA32_PAGE_OFFSET - 3; | ||
274 | #endif | ||
275 | return TASK_SIZE_MAX - 7; | ||
276 | } | ||
277 | |||
278 | #endif /* CONFIG_X86_32 */ | 379 | #endif /* CONFIG_X86_32 */ |
279 | 380 | ||
280 | static unsigned long get_flags(struct task_struct *task) | 381 | static unsigned long get_flags(struct task_struct *task) |
@@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target, | |||
454 | return ret; | 555 | return ret; |
455 | } | 556 | } |
456 | 557 | ||
558 | static void ptrace_triggered(struct perf_event *bp, void *data) | ||
559 | { | ||
560 | int i; | ||
561 | struct thread_struct *thread = &(current->thread); | ||
562 | |||
563 | /* | ||
564 | * Store in the virtual DR6 register the fact that the breakpoint | ||
565 | * was hit so the thread's debugger will see it. | ||
566 | */ | ||
567 | for (i = 0; i < HBP_NUM; i++) { | ||
568 | if (thread->ptrace_bps[i] == bp) | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
573 | } | ||
574 | |||
457 | /* | 575 | /* |
458 | * This function is trivial and will be inlined by the compiler. | 576 | * Walk through every ptrace breakpoints for this thread and |
459 | * Having it separates the implementation details of debug | 577 | * build the dr7 value on top of their attributes. |
460 | * registers from the interface details of ptrace. | 578 | * |
461 | */ | 579 | */ |
462 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 580 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
463 | { | 581 | { |
464 | switch (n) { | 582 | int i; |
465 | case 0: return child->thread.debugreg0; | 583 | int dr7 = 0; |
466 | case 1: return child->thread.debugreg1; | 584 | struct arch_hw_breakpoint *info; |
467 | case 2: return child->thread.debugreg2; | 585 | |
468 | case 3: return child->thread.debugreg3; | 586 | for (i = 0; i < HBP_NUM; i++) { |
469 | case 6: return child->thread.debugreg6; | 587 | if (bp[i] && !bp[i]->attr.disabled) { |
470 | case 7: return child->thread.debugreg7; | 588 | info = counter_arch_bp(bp[i]); |
589 | dr7 |= encode_dr7(i, info->len, info->type); | ||
590 | } | ||
471 | } | 591 | } |
472 | return 0; | 592 | |
593 | return dr7; | ||
473 | } | 594 | } |
474 | 595 | ||
475 | static int ptrace_set_debugreg(struct task_struct *child, | 596 | static struct perf_event * |
476 | int n, unsigned long data) | 597 | ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, |
598 | struct task_struct *tsk, int disabled) | ||
477 | { | 599 | { |
478 | int i; | 600 | int err; |
601 | int gen_len, gen_type; | ||
602 | DEFINE_BREAKPOINT_ATTR(attr); | ||
479 | 603 | ||
480 | if (unlikely(n == 4 || n == 5)) | 604 | /* |
481 | return -EIO; | 605 | * We shoud have at least an inactive breakpoint at this |
606 | * slot. It means the user is writing dr7 without having | ||
607 | * written the address register first | ||
608 | */ | ||
609 | if (!bp) | ||
610 | return ERR_PTR(-EINVAL); | ||
482 | 611 | ||
483 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 612 | err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
484 | return -EIO; | 613 | if (err) |
614 | return ERR_PTR(err); | ||
485 | 615 | ||
486 | switch (n) { | 616 | attr = bp->attr; |
487 | case 0: child->thread.debugreg0 = data; break; | 617 | attr.bp_len = gen_len; |
488 | case 1: child->thread.debugreg1 = data; break; | 618 | attr.bp_type = gen_type; |
489 | case 2: child->thread.debugreg2 = data; break; | 619 | attr.disabled = disabled; |
490 | case 3: child->thread.debugreg3 = data; break; | ||
491 | 620 | ||
492 | case 6: | 621 | return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); |
493 | if ((data & ~0xffffffffUL) != 0) | 622 | } |
494 | return -EIO; | 623 | |
495 | child->thread.debugreg6 = data; | 624 | /* |
496 | break; | 625 | * Handle ptrace writes to debug register 7. |
626 | */ | ||
627 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
628 | { | ||
629 | struct thread_struct *thread = &(tsk->thread); | ||
630 | unsigned long old_dr7; | ||
631 | int i, orig_ret = 0, rc = 0; | ||
632 | int enabled, second_pass = 0; | ||
633 | unsigned len, type; | ||
634 | struct perf_event *bp; | ||
635 | |||
636 | data &= ~DR_CONTROL_RESERVED; | ||
637 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
638 | restore: | ||
639 | /* | ||
640 | * Loop through all the hardware breakpoints, making the | ||
641 | * appropriate changes to each. | ||
642 | */ | ||
643 | for (i = 0; i < HBP_NUM; i++) { | ||
644 | enabled = decode_dr7(data, i, &len, &type); | ||
645 | bp = thread->ptrace_bps[i]; | ||
646 | |||
647 | if (!enabled) { | ||
648 | if (bp) { | ||
649 | /* | ||
650 | * Don't unregister the breakpoints right-away, | ||
651 | * unless all register_user_hw_breakpoint() | ||
652 | * requests have succeeded. This prevents | ||
653 | * any window of opportunity for debug | ||
654 | * register grabbing by other users. | ||
655 | */ | ||
656 | if (!second_pass) | ||
657 | continue; | ||
658 | |||
659 | thread->ptrace_bps[i] = NULL; | ||
660 | bp = ptrace_modify_breakpoint(bp, len, type, | ||
661 | tsk, 1); | ||
662 | if (IS_ERR(bp)) { | ||
663 | rc = PTR_ERR(bp); | ||
664 | thread->ptrace_bps[i] = NULL; | ||
665 | break; | ||
666 | } | ||
667 | thread->ptrace_bps[i] = bp; | ||
668 | } | ||
669 | continue; | ||
670 | } | ||
671 | |||
672 | bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); | ||
673 | |||
674 | /* Incorrect bp, or we have a bug in bp API */ | ||
675 | if (IS_ERR(bp)) { | ||
676 | rc = PTR_ERR(bp); | ||
677 | thread->ptrace_bps[i] = NULL; | ||
678 | break; | ||
679 | } | ||
680 | thread->ptrace_bps[i] = bp; | ||
681 | } | ||
682 | /* | ||
683 | * Make a second pass to free the remaining unused breakpoints | ||
684 | * or to restore the original breakpoints if an error occurred. | ||
685 | */ | ||
686 | if (!second_pass) { | ||
687 | second_pass = 1; | ||
688 | if (rc < 0) { | ||
689 | orig_ret = rc; | ||
690 | data = old_dr7; | ||
691 | } | ||
692 | goto restore; | ||
693 | } | ||
694 | return ((orig_ret < 0) ? orig_ret : rc); | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
699 | */ | ||
700 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
701 | { | ||
702 | struct thread_struct *thread = &(tsk->thread); | ||
703 | unsigned long val = 0; | ||
497 | 704 | ||
498 | case 7: | 705 | if (n < HBP_NUM) { |
706 | struct perf_event *bp; | ||
707 | bp = thread->ptrace_bps[n]; | ||
708 | if (!bp) | ||
709 | return 0; | ||
710 | val = bp->hw.info.address; | ||
711 | } else if (n == 6) { | ||
712 | val = thread->debugreg6; | ||
713 | } else if (n == 7) { | ||
714 | val = ptrace_get_dr7(thread->ptrace_bps); | ||
715 | } | ||
716 | return val; | ||
717 | } | ||
718 | |||
719 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
720 | unsigned long addr) | ||
721 | { | ||
722 | struct perf_event *bp; | ||
723 | struct thread_struct *t = &tsk->thread; | ||
724 | DEFINE_BREAKPOINT_ATTR(attr); | ||
725 | |||
726 | if (!t->ptrace_bps[nr]) { | ||
499 | /* | 727 | /* |
500 | * Sanity-check data. Take one half-byte at once with | 728 | * Put stub len and type to register (reserve) an inactive but |
501 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 729 | * correct bp |
502 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
503 | * 2 and 3 are LENi. Given a list of invalid values, | ||
504 | * we do mask |= 1 << invalid_value, so that | ||
505 | * (mask >> check) & 1 is a correct test for invalid | ||
506 | * values. | ||
507 | * | ||
508 | * R/Wi contains the type of the breakpoint / | ||
509 | * watchpoint, LENi contains the length of the watched | ||
510 | * data in the watchpoint case. | ||
511 | * | ||
512 | * The invalid values are: | ||
513 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
514 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
515 | * mask |= 0x4444. | ||
516 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
517 | * 0x1110. | ||
518 | * | ||
519 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
520 | * | ||
521 | * See the Intel Manual "System Programming Guide", | ||
522 | * 15.2.4 | ||
523 | * | ||
524 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
525 | * mode (i.e. even for 32-bit userspace software, but | ||
526 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
527 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
528 | */ | 730 | */ |
529 | #ifdef CONFIG_X86_32 | 731 | attr.bp_addr = addr; |
530 | #define DR7_MASK 0x5f54 | 732 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
531 | #else | 733 | attr.bp_type = HW_BREAKPOINT_W; |
532 | #define DR7_MASK 0x5554 | 734 | attr.disabled = 1; |
533 | #endif | 735 | |
534 | data &= ~DR_CONTROL_RESERVED; | 736 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); |
535 | for (i = 0; i < 4; i++) | 737 | } else { |
536 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 738 | bp = t->ptrace_bps[nr]; |
537 | return -EIO; | 739 | t->ptrace_bps[nr] = NULL; |
538 | child->thread.debugreg7 = data; | 740 | |
539 | if (data) | 741 | attr = bp->attr; |
540 | set_tsk_thread_flag(child, TIF_DEBUG); | 742 | attr.bp_addr = addr; |
541 | else | 743 | bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); |
542 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
543 | break; | ||
544 | } | 744 | } |
745 | /* | ||
746 | * CHECKME: the previous code returned -EIO if the addr wasn't a | ||
747 | * valid task virtual addr. The new one will return -EINVAL in this | ||
748 | * case. | ||
749 | * -EINVAL may be what we want for in-kernel breakpoints users, but | ||
750 | * -EIO looks better for ptrace, since we refuse a register writing | ||
751 | * for the user. And anyway this is the previous behaviour. | ||
752 | */ | ||
753 | if (IS_ERR(bp)) | ||
754 | return PTR_ERR(bp); | ||
755 | |||
756 | t->ptrace_bps[nr] = bp; | ||
545 | 757 | ||
546 | return 0; | 758 | return 0; |
547 | } | 759 | } |
548 | 760 | ||
549 | /* | 761 | /* |
762 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
763 | */ | ||
764 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
765 | { | ||
766 | struct thread_struct *thread = &(tsk->thread); | ||
767 | int rc = 0; | ||
768 | |||
769 | /* There are no DR4 or DR5 registers */ | ||
770 | if (n == 4 || n == 5) | ||
771 | return -EIO; | ||
772 | |||
773 | if (n == 6) { | ||
774 | thread->debugreg6 = val; | ||
775 | goto ret_path; | ||
776 | } | ||
777 | if (n < HBP_NUM) { | ||
778 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
779 | if (rc) | ||
780 | return rc; | ||
781 | } | ||
782 | /* All that's left is DR7 */ | ||
783 | if (n == 7) | ||
784 | rc = ptrace_write_dr7(tsk, val); | ||
785 | |||
786 | ret_path: | ||
787 | return rc; | ||
788 | } | ||
789 | |||
790 | /* | ||
550 | * These access the current or another (stopped) task's io permission | 791 | * These access the current or another (stopped) task's io permission |
551 | * bitmap for debugging or core dump. | 792 | * bitmap for debugging or core dump. |
552 | */ | 793 | */ |