aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ptrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/ptrace.c')
-rw-r--r--arch/x86/kernel/ptrace.c415
1 files changed, 328 insertions, 87 deletions
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7b058a2dc66a..04d182a7cfdb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
34#include <asm/prctl.h> 36#include <asm/prctl.h>
35#include <asm/proto.h> 37#include <asm/proto.h>
36#include <asm/ds.h> 38#include <asm/ds.h>
39#include <asm/hw_breakpoint.h>
37 40
38#include "tls.h" 41#include "tls.h"
39 42
@@ -49,6 +52,118 @@ enum x86_regset {
49 REGSET_IOPERM32, 52 REGSET_IOPERM32,
50}; 53};
51 54
55struct pt_regs_offset {
56 const char *name;
57 int offset;
58};
59
60#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
61#define REG_OFFSET_END {.name = NULL, .offset = 0}
62
63static const struct pt_regs_offset regoffset_table[] = {
64#ifdef CONFIG_X86_64
65 REG_OFFSET_NAME(r15),
66 REG_OFFSET_NAME(r14),
67 REG_OFFSET_NAME(r13),
68 REG_OFFSET_NAME(r12),
69 REG_OFFSET_NAME(r11),
70 REG_OFFSET_NAME(r10),
71 REG_OFFSET_NAME(r9),
72 REG_OFFSET_NAME(r8),
73#endif
74 REG_OFFSET_NAME(bx),
75 REG_OFFSET_NAME(cx),
76 REG_OFFSET_NAME(dx),
77 REG_OFFSET_NAME(si),
78 REG_OFFSET_NAME(di),
79 REG_OFFSET_NAME(bp),
80 REG_OFFSET_NAME(ax),
81#ifdef CONFIG_X86_32
82 REG_OFFSET_NAME(ds),
83 REG_OFFSET_NAME(es),
84 REG_OFFSET_NAME(fs),
85 REG_OFFSET_NAME(gs),
86#endif
87 REG_OFFSET_NAME(orig_ax),
88 REG_OFFSET_NAME(ip),
89 REG_OFFSET_NAME(cs),
90 REG_OFFSET_NAME(flags),
91 REG_OFFSET_NAME(sp),
92 REG_OFFSET_NAME(ss),
93 REG_OFFSET_END,
94};
95
96/**
97 * regs_query_register_offset() - query register offset from its name
98 * @name: the name of a register
99 *
100 * regs_query_register_offset() returns the offset of a register in struct
101 * pt_regs from its name. If the name is invalid, this returns -EINVAL;
102 */
103int regs_query_register_offset(const char *name)
104{
105 const struct pt_regs_offset *roff;
106 for (roff = regoffset_table; roff->name != NULL; roff++)
107 if (!strcmp(roff->name, name))
108 return roff->offset;
109 return -EINVAL;
110}
111
112/**
113 * regs_query_register_name() - query register name from its offset
114 * @offset: the offset of a register in struct pt_regs.
115 *
116 * regs_query_register_name() returns the name of a register from its
117 * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
118 */
119const char *regs_query_register_name(unsigned int offset)
120{
121 const struct pt_regs_offset *roff;
122 for (roff = regoffset_table; roff->name != NULL; roff++)
123 if (roff->offset == offset)
124 return roff->name;
125 return NULL;
126}
127
128static const int arg_offs_table[] = {
129#ifdef CONFIG_X86_32
130 [0] = offsetof(struct pt_regs, ax),
131 [1] = offsetof(struct pt_regs, dx),
132 [2] = offsetof(struct pt_regs, cx)
133#else /* CONFIG_X86_64 */
134 [0] = offsetof(struct pt_regs, di),
135 [1] = offsetof(struct pt_regs, si),
136 [2] = offsetof(struct pt_regs, dx),
137 [3] = offsetof(struct pt_regs, cx),
138 [4] = offsetof(struct pt_regs, r8),
139 [5] = offsetof(struct pt_regs, r9)
140#endif
141};
142
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
52/* 167/*
53 * does not yet catch signals sent when the child dies. 168 * does not yet catch signals sent when the child dies.
54 * in exit.c or in signal.c. 169 * in exit.c or in signal.c.
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
137 return 0; 252 return 0;
138} 253}
139 254
140static unsigned long debugreg_addr_limit(struct task_struct *task)
141{
142 return TASK_SIZE - 3;
143}
144
145#else /* CONFIG_X86_64 */ 255#else /* CONFIG_X86_64 */
146 256
147#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 257#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
266 return 0; 376 return 0;
267} 377}
268 378
269static unsigned long debugreg_addr_limit(struct task_struct *task)
270{
271#ifdef CONFIG_IA32_EMULATION
272 if (test_tsk_thread_flag(task, TIF_IA32))
273 return IA32_PAGE_OFFSET - 3;
274#endif
275 return TASK_SIZE_MAX - 7;
276}
277
278#endif /* CONFIG_X86_32 */ 379#endif /* CONFIG_X86_32 */
279 380
280static unsigned long get_flags(struct task_struct *task) 381static unsigned long get_flags(struct task_struct *task)
@@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target,
454 return ret; 555 return ret;
455} 556}
456 557
558static void ptrace_triggered(struct perf_event *bp, void *data)
559{
560 int i;
561 struct thread_struct *thread = &(current->thread);
562
563 /*
564 * Store in the virtual DR6 register the fact that the breakpoint
565 * was hit so the thread's debugger will see it.
566 */
567 for (i = 0; i < HBP_NUM; i++) {
568 if (thread->ptrace_bps[i] == bp)
569 break;
570 }
571
572 thread->debugreg6 |= (DR_TRAP0 << i);
573}
574
457/* 575/*
458 * This function is trivial and will be inlined by the compiler. 576 * Walk through every ptrace breakpoints for this thread and
459 * Having it separates the implementation details of debug 577 * build the dr7 value on top of their attributes.
460 * registers from the interface details of ptrace. 578 *
461 */ 579 */
462static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 580static unsigned long ptrace_get_dr7(struct perf_event *bp[])
463{ 581{
464 switch (n) { 582 int i;
465 case 0: return child->thread.debugreg0; 583 int dr7 = 0;
466 case 1: return child->thread.debugreg1; 584 struct arch_hw_breakpoint *info;
467 case 2: return child->thread.debugreg2; 585
468 case 3: return child->thread.debugreg3; 586 for (i = 0; i < HBP_NUM; i++) {
469 case 6: return child->thread.debugreg6; 587 if (bp[i] && !bp[i]->attr.disabled) {
470 case 7: return child->thread.debugreg7; 588 info = counter_arch_bp(bp[i]);
589 dr7 |= encode_dr7(i, info->len, info->type);
590 }
471 } 591 }
472 return 0; 592
593 return dr7;
473} 594}
474 595
475static int ptrace_set_debugreg(struct task_struct *child, 596static struct perf_event *
476 int n, unsigned long data) 597ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
598 struct task_struct *tsk, int disabled)
477{ 599{
478 int i; 600 int err;
601 int gen_len, gen_type;
602 DEFINE_BREAKPOINT_ATTR(attr);
479 603
480 if (unlikely(n == 4 || n == 5)) 604 /*
481 return -EIO; 605 * We shoud have at least an inactive breakpoint at this
606 * slot. It means the user is writing dr7 without having
607 * written the address register first
608 */
609 if (!bp)
610 return ERR_PTR(-EINVAL);
482 611
483 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 612 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
484 return -EIO; 613 if (err)
614 return ERR_PTR(err);
485 615
486 switch (n) { 616 attr = bp->attr;
487 case 0: child->thread.debugreg0 = data; break; 617 attr.bp_len = gen_len;
488 case 1: child->thread.debugreg1 = data; break; 618 attr.bp_type = gen_type;
489 case 2: child->thread.debugreg2 = data; break; 619 attr.disabled = disabled;
490 case 3: child->thread.debugreg3 = data; break;
491 620
492 case 6: 621 return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
493 if ((data & ~0xffffffffUL) != 0) 622}
494 return -EIO; 623
495 child->thread.debugreg6 = data; 624/*
496 break; 625 * Handle ptrace writes to debug register 7.
626 */
627static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
628{
629 struct thread_struct *thread = &(tsk->thread);
630 unsigned long old_dr7;
631 int i, orig_ret = 0, rc = 0;
632 int enabled, second_pass = 0;
633 unsigned len, type;
634 struct perf_event *bp;
635
636 data &= ~DR_CONTROL_RESERVED;
637 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
638restore:
639 /*
640 * Loop through all the hardware breakpoints, making the
641 * appropriate changes to each.
642 */
643 for (i = 0; i < HBP_NUM; i++) {
644 enabled = decode_dr7(data, i, &len, &type);
645 bp = thread->ptrace_bps[i];
646
647 if (!enabled) {
648 if (bp) {
649 /*
650 * Don't unregister the breakpoints right-away,
651 * unless all register_user_hw_breakpoint()
652 * requests have succeeded. This prevents
653 * any window of opportunity for debug
654 * register grabbing by other users.
655 */
656 if (!second_pass)
657 continue;
658
659 thread->ptrace_bps[i] = NULL;
660 bp = ptrace_modify_breakpoint(bp, len, type,
661 tsk, 1);
662 if (IS_ERR(bp)) {
663 rc = PTR_ERR(bp);
664 thread->ptrace_bps[i] = NULL;
665 break;
666 }
667 thread->ptrace_bps[i] = bp;
668 }
669 continue;
670 }
671
672 bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
673
674 /* Incorrect bp, or we have a bug in bp API */
675 if (IS_ERR(bp)) {
676 rc = PTR_ERR(bp);
677 thread->ptrace_bps[i] = NULL;
678 break;
679 }
680 thread->ptrace_bps[i] = bp;
681 }
682 /*
683 * Make a second pass to free the remaining unused breakpoints
684 * or to restore the original breakpoints if an error occurred.
685 */
686 if (!second_pass) {
687 second_pass = 1;
688 if (rc < 0) {
689 orig_ret = rc;
690 data = old_dr7;
691 }
692 goto restore;
693 }
694 return ((orig_ret < 0) ? orig_ret : rc);
695}
696
697/*
698 * Handle PTRACE_PEEKUSR calls for the debug register area.
699 */
700static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
701{
702 struct thread_struct *thread = &(tsk->thread);
703 unsigned long val = 0;
497 704
498 case 7: 705 if (n < HBP_NUM) {
706 struct perf_event *bp;
707 bp = thread->ptrace_bps[n];
708 if (!bp)
709 return 0;
710 val = bp->hw.info.address;
711 } else if (n == 6) {
712 val = thread->debugreg6;
713 } else if (n == 7) {
714 val = ptrace_get_dr7(thread->ptrace_bps);
715 }
716 return val;
717}
718
719static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
720 unsigned long addr)
721{
722 struct perf_event *bp;
723 struct thread_struct *t = &tsk->thread;
724 DEFINE_BREAKPOINT_ATTR(attr);
725
726 if (!t->ptrace_bps[nr]) {
499 /* 727 /*
500 * Sanity-check data. Take one half-byte at once with 728 * Put stub len and type to register (reserve) an inactive but
501 * check = (val >> (16 + 4*i)) & 0xf. It contains the 729 * correct bp
502 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
503 * 2 and 3 are LENi. Given a list of invalid values,
504 * we do mask |= 1 << invalid_value, so that
505 * (mask >> check) & 1 is a correct test for invalid
506 * values.
507 *
508 * R/Wi contains the type of the breakpoint /
509 * watchpoint, LENi contains the length of the watched
510 * data in the watchpoint case.
511 *
512 * The invalid values are:
513 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
514 * - R/Wi == 0x10 (break on I/O reads or writes), so
515 * mask |= 0x4444.
516 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
517 * 0x1110.
518 *
519 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
520 *
521 * See the Intel Manual "System Programming Guide",
522 * 15.2.4
523 *
524 * Note that LENi == 0x10 is defined on x86_64 in long
525 * mode (i.e. even for 32-bit userspace software, but
526 * 64-bit kernel), so the x86_64 mask value is 0x5454.
527 * See the AMD manual no. 24593 (AMD64 System Programming)
528 */ 730 */
529#ifdef CONFIG_X86_32 731 attr.bp_addr = addr;
530#define DR7_MASK 0x5f54 732 attr.bp_len = HW_BREAKPOINT_LEN_1;
531#else 733 attr.bp_type = HW_BREAKPOINT_W;
532#define DR7_MASK 0x5554 734 attr.disabled = 1;
533#endif 735
534 data &= ~DR_CONTROL_RESERVED; 736 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
535 for (i = 0; i < 4; i++) 737 } else {
536 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) 738 bp = t->ptrace_bps[nr];
537 return -EIO; 739 t->ptrace_bps[nr] = NULL;
538 child->thread.debugreg7 = data; 740
539 if (data) 741 attr = bp->attr;
540 set_tsk_thread_flag(child, TIF_DEBUG); 742 attr.bp_addr = addr;
541 else 743 bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
542 clear_tsk_thread_flag(child, TIF_DEBUG);
543 break;
544 } 744 }
745 /*
746 * CHECKME: the previous code returned -EIO if the addr wasn't a
747 * valid task virtual addr. The new one will return -EINVAL in this
748 * case.
749 * -EINVAL may be what we want for in-kernel breakpoints users, but
750 * -EIO looks better for ptrace, since we refuse a register writing
751 * for the user. And anyway this is the previous behaviour.
752 */
753 if (IS_ERR(bp))
754 return PTR_ERR(bp);
755
756 t->ptrace_bps[nr] = bp;
545 757
546 return 0; 758 return 0;
547} 759}
548 760
549/* 761/*
762 * Handle PTRACE_POKEUSR calls for the debug register area.
763 */
764int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
765{
766 struct thread_struct *thread = &(tsk->thread);
767 int rc = 0;
768
769 /* There are no DR4 or DR5 registers */
770 if (n == 4 || n == 5)
771 return -EIO;
772
773 if (n == 6) {
774 thread->debugreg6 = val;
775 goto ret_path;
776 }
777 if (n < HBP_NUM) {
778 rc = ptrace_set_breakpoint_addr(tsk, n, val);
779 if (rc)
780 return rc;
781 }
782 /* All that's left is DR7 */
783 if (n == 7)
784 rc = ptrace_write_dr7(tsk, val);
785
786ret_path:
787 return rc;
788}
789
790/*
550 * These access the current or another (stopped) task's io permission 791 * These access the current or another (stopped) task's io permission
551 * bitmap for debugging or core dump. 792 * bitmap for debugging or core dump.
552 */ 793 */