aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ptrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/ptrace.c')
-rw-r--r--arch/x86/kernel/ptrace.c449
1 files changed, 343 insertions, 106 deletions
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 09ecbde91c13..04d182a7cfdb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -34,11 +36,13 @@
34#include <asm/prctl.h> 36#include <asm/prctl.h>
35#include <asm/proto.h> 37#include <asm/proto.h>
36#include <asm/ds.h> 38#include <asm/ds.h>
37 39#include <asm/hw_breakpoint.h>
38#include <trace/syscall.h>
39 40
40#include "tls.h" 41#include "tls.h"
41 42
43#define CREATE_TRACE_POINTS
44#include <trace/events/syscalls.h>
45
42enum x86_regset { 46enum x86_regset {
43 REGSET_GENERAL, 47 REGSET_GENERAL,
44 REGSET_FP, 48 REGSET_FP,
@@ -48,6 +52,118 @@ enum x86_regset {
48 REGSET_IOPERM32, 52 REGSET_IOPERM32,
49}; 53};
50 54
55struct pt_regs_offset {
56 const char *name;
57 int offset;
58};
59
60#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
61#define REG_OFFSET_END {.name = NULL, .offset = 0}
62
63static const struct pt_regs_offset regoffset_table[] = {
64#ifdef CONFIG_X86_64
65 REG_OFFSET_NAME(r15),
66 REG_OFFSET_NAME(r14),
67 REG_OFFSET_NAME(r13),
68 REG_OFFSET_NAME(r12),
69 REG_OFFSET_NAME(r11),
70 REG_OFFSET_NAME(r10),
71 REG_OFFSET_NAME(r9),
72 REG_OFFSET_NAME(r8),
73#endif
74 REG_OFFSET_NAME(bx),
75 REG_OFFSET_NAME(cx),
76 REG_OFFSET_NAME(dx),
77 REG_OFFSET_NAME(si),
78 REG_OFFSET_NAME(di),
79 REG_OFFSET_NAME(bp),
80 REG_OFFSET_NAME(ax),
81#ifdef CONFIG_X86_32
82 REG_OFFSET_NAME(ds),
83 REG_OFFSET_NAME(es),
84 REG_OFFSET_NAME(fs),
85 REG_OFFSET_NAME(gs),
86#endif
87 REG_OFFSET_NAME(orig_ax),
88 REG_OFFSET_NAME(ip),
89 REG_OFFSET_NAME(cs),
90 REG_OFFSET_NAME(flags),
91 REG_OFFSET_NAME(sp),
92 REG_OFFSET_NAME(ss),
93 REG_OFFSET_END,
94};
95
96/**
97 * regs_query_register_offset() - query register offset from its name
98 * @name: the name of a register
99 *
100 * regs_query_register_offset() returns the offset of a register in struct
101 * pt_regs from its name. If the name is invalid, this returns -EINVAL;
102 */
103int regs_query_register_offset(const char *name)
104{
105 const struct pt_regs_offset *roff;
106 for (roff = regoffset_table; roff->name != NULL; roff++)
107 if (!strcmp(roff->name, name))
108 return roff->offset;
109 return -EINVAL;
110}
111
112/**
113 * regs_query_register_name() - query register name from its offset
114 * @offset: the offset of a register in struct pt_regs.
115 *
116 * regs_query_register_name() returns the name of a register from its
117 * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
118 */
119const char *regs_query_register_name(unsigned int offset)
120{
121 const struct pt_regs_offset *roff;
122 for (roff = regoffset_table; roff->name != NULL; roff++)
123 if (roff->offset == offset)
124 return roff->name;
125 return NULL;
126}
127
128static const int arg_offs_table[] = {
129#ifdef CONFIG_X86_32
130 [0] = offsetof(struct pt_regs, ax),
131 [1] = offsetof(struct pt_regs, dx),
132 [2] = offsetof(struct pt_regs, cx)
133#else /* CONFIG_X86_64 */
134 [0] = offsetof(struct pt_regs, di),
135 [1] = offsetof(struct pt_regs, si),
136 [2] = offsetof(struct pt_regs, dx),
137 [3] = offsetof(struct pt_regs, cx),
138 [4] = offsetof(struct pt_regs, r8),
139 [5] = offsetof(struct pt_regs, r9)
140#endif
141};
142
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
51/* 167/*
52 * does not yet catch signals sent when the child dies. 168 * does not yet catch signals sent when the child dies.
53 * in exit.c or in signal.c. 169 * in exit.c or in signal.c.
@@ -136,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
136 return 0; 252 return 0;
137} 253}
138 254
139static unsigned long debugreg_addr_limit(struct task_struct *task)
140{
141 return TASK_SIZE - 3;
142}
143
144#else /* CONFIG_X86_64 */ 255#else /* CONFIG_X86_64 */
145 256
146#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 257#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -265,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
265 return 0; 376 return 0;
266} 377}
267 378
268static unsigned long debugreg_addr_limit(struct task_struct *task)
269{
270#ifdef CONFIG_IA32_EMULATION
271 if (test_tsk_thread_flag(task, TIF_IA32))
272 return IA32_PAGE_OFFSET - 3;
273#endif
274 return TASK_SIZE_MAX - 7;
275}
276
277#endif /* CONFIG_X86_32 */ 379#endif /* CONFIG_X86_32 */
278 380
279static unsigned long get_flags(struct task_struct *task) 381static unsigned long get_flags(struct task_struct *task)
@@ -324,16 +426,6 @@ static int putreg(struct task_struct *child,
324 return set_flags(child, value); 426 return set_flags(child, value);
325 427
326#ifdef CONFIG_X86_64 428#ifdef CONFIG_X86_64
327 /*
328 * Orig_ax is really just a flag with small positive and
329 * negative values, so make sure to always sign-extend it
330 * from 32 bits so that it works correctly regardless of
331 * whether we come from a 32-bit environment or not.
332 */
333 case offsetof(struct user_regs_struct, orig_ax):
334 value = (long) (s32) value;
335 break;
336
337 case offsetof(struct user_regs_struct,fs_base): 429 case offsetof(struct user_regs_struct,fs_base):
338 if (value >= TASK_SIZE_OF(child)) 430 if (value >= TASK_SIZE_OF(child))
339 return -EIO; 431 return -EIO;
@@ -463,99 +555,239 @@ static int genregs_set(struct task_struct *target,
463 return ret; 555 return ret;
464} 556}
465 557
558static void ptrace_triggered(struct perf_event *bp, void *data)
559{
560 int i;
561 struct thread_struct *thread = &(current->thread);
562
563 /*
564 * Store in the virtual DR6 register the fact that the breakpoint
565 * was hit so the thread's debugger will see it.
566 */
567 for (i = 0; i < HBP_NUM; i++) {
568 if (thread->ptrace_bps[i] == bp)
569 break;
570 }
571
572 thread->debugreg6 |= (DR_TRAP0 << i);
573}
574
466/* 575/*
467 * This function is trivial and will be inlined by the compiler. 576 * Walk through every ptrace breakpoints for this thread and
468 * Having it separates the implementation details of debug 577 * build the dr7 value on top of their attributes.
469 * registers from the interface details of ptrace. 578 *
470 */ 579 */
471static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 580static unsigned long ptrace_get_dr7(struct perf_event *bp[])
472{ 581{
473 switch (n) { 582 int i;
474 case 0: return child->thread.debugreg0; 583 int dr7 = 0;
475 case 1: return child->thread.debugreg1; 584 struct arch_hw_breakpoint *info;
476 case 2: return child->thread.debugreg2; 585
477 case 3: return child->thread.debugreg3; 586 for (i = 0; i < HBP_NUM; i++) {
478 case 6: return child->thread.debugreg6; 587 if (bp[i] && !bp[i]->attr.disabled) {
479 case 7: return child->thread.debugreg7; 588 info = counter_arch_bp(bp[i]);
589 dr7 |= encode_dr7(i, info->len, info->type);
590 }
480 } 591 }
481 return 0; 592
593 return dr7;
482} 594}
483 595
484static int ptrace_set_debugreg(struct task_struct *child, 596static struct perf_event *
485 int n, unsigned long data) 597ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
598 struct task_struct *tsk, int disabled)
486{ 599{
487 int i; 600 int err;
601 int gen_len, gen_type;
602 DEFINE_BREAKPOINT_ATTR(attr);
488 603
489 if (unlikely(n == 4 || n == 5)) 604 /*
490 return -EIO; 605 * We shoud have at least an inactive breakpoint at this
606 * slot. It means the user is writing dr7 without having
607 * written the address register first
608 */
609 if (!bp)
610 return ERR_PTR(-EINVAL);
491 611
492 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 612 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
493 return -EIO; 613 if (err)
614 return ERR_PTR(err);
494 615
495 switch (n) { 616 attr = bp->attr;
496 case 0: child->thread.debugreg0 = data; break; 617 attr.bp_len = gen_len;
497 case 1: child->thread.debugreg1 = data; break; 618 attr.bp_type = gen_type;
498 case 2: child->thread.debugreg2 = data; break; 619 attr.disabled = disabled;
499 case 3: child->thread.debugreg3 = data; break;
500 620
501 case 6: 621 return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
502 if ((data & ~0xffffffffUL) != 0) 622}
503 return -EIO; 623
504 child->thread.debugreg6 = data; 624/*
505 break; 625 * Handle ptrace writes to debug register 7.
626 */
627static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
628{
629 struct thread_struct *thread = &(tsk->thread);
630 unsigned long old_dr7;
631 int i, orig_ret = 0, rc = 0;
632 int enabled, second_pass = 0;
633 unsigned len, type;
634 struct perf_event *bp;
635
636 data &= ~DR_CONTROL_RESERVED;
637 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
638restore:
639 /*
640 * Loop through all the hardware breakpoints, making the
641 * appropriate changes to each.
642 */
643 for (i = 0; i < HBP_NUM; i++) {
644 enabled = decode_dr7(data, i, &len, &type);
645 bp = thread->ptrace_bps[i];
646
647 if (!enabled) {
648 if (bp) {
649 /*
650 * Don't unregister the breakpoints right-away,
651 * unless all register_user_hw_breakpoint()
652 * requests have succeeded. This prevents
653 * any window of opportunity for debug
654 * register grabbing by other users.
655 */
656 if (!second_pass)
657 continue;
658
659 thread->ptrace_bps[i] = NULL;
660 bp = ptrace_modify_breakpoint(bp, len, type,
661 tsk, 1);
662 if (IS_ERR(bp)) {
663 rc = PTR_ERR(bp);
664 thread->ptrace_bps[i] = NULL;
665 break;
666 }
667 thread->ptrace_bps[i] = bp;
668 }
669 continue;
670 }
671
672 bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
673
674 /* Incorrect bp, or we have a bug in bp API */
675 if (IS_ERR(bp)) {
676 rc = PTR_ERR(bp);
677 thread->ptrace_bps[i] = NULL;
678 break;
679 }
680 thread->ptrace_bps[i] = bp;
681 }
682 /*
683 * Make a second pass to free the remaining unused breakpoints
684 * or to restore the original breakpoints if an error occurred.
685 */
686 if (!second_pass) {
687 second_pass = 1;
688 if (rc < 0) {
689 orig_ret = rc;
690 data = old_dr7;
691 }
692 goto restore;
693 }
694 return ((orig_ret < 0) ? orig_ret : rc);
695}
696
697/*
698 * Handle PTRACE_PEEKUSR calls for the debug register area.
699 */
700static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
701{
702 struct thread_struct *thread = &(tsk->thread);
703 unsigned long val = 0;
704
705 if (n < HBP_NUM) {
706 struct perf_event *bp;
707 bp = thread->ptrace_bps[n];
708 if (!bp)
709 return 0;
710 val = bp->hw.info.address;
711 } else if (n == 6) {
712 val = thread->debugreg6;
713 } else if (n == 7) {
714 val = ptrace_get_dr7(thread->ptrace_bps);
715 }
716 return val;
717}
718
719static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
720 unsigned long addr)
721{
722 struct perf_event *bp;
723 struct thread_struct *t = &tsk->thread;
724 DEFINE_BREAKPOINT_ATTR(attr);
506 725
507 case 7: 726 if (!t->ptrace_bps[nr]) {
508 /* 727 /*
509 * Sanity-check data. Take one half-byte at once with 728 * Put stub len and type to register (reserve) an inactive but
510 * check = (val >> (16 + 4*i)) & 0xf. It contains the 729 * correct bp
511 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
512 * 2 and 3 are LENi. Given a list of invalid values,
513 * we do mask |= 1 << invalid_value, so that
514 * (mask >> check) & 1 is a correct test for invalid
515 * values.
516 *
517 * R/Wi contains the type of the breakpoint /
518 * watchpoint, LENi contains the length of the watched
519 * data in the watchpoint case.
520 *
521 * The invalid values are:
522 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
523 * - R/Wi == 0x10 (break on I/O reads or writes), so
524 * mask |= 0x4444.
525 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
526 * 0x1110.
527 *
528 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
529 *
530 * See the Intel Manual "System Programming Guide",
531 * 15.2.4
532 *
533 * Note that LENi == 0x10 is defined on x86_64 in long
534 * mode (i.e. even for 32-bit userspace software, but
535 * 64-bit kernel), so the x86_64 mask value is 0x5454.
536 * See the AMD manual no. 24593 (AMD64 System Programming)
537 */ 730 */
538#ifdef CONFIG_X86_32 731 attr.bp_addr = addr;
539#define DR7_MASK 0x5f54 732 attr.bp_len = HW_BREAKPOINT_LEN_1;
540#else 733 attr.bp_type = HW_BREAKPOINT_W;
541#define DR7_MASK 0x5554 734 attr.disabled = 1;
542#endif 735
543 data &= ~DR_CONTROL_RESERVED; 736 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
544 for (i = 0; i < 4; i++) 737 } else {
545 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) 738 bp = t->ptrace_bps[nr];
546 return -EIO; 739 t->ptrace_bps[nr] = NULL;
547 child->thread.debugreg7 = data; 740
548 if (data) 741 attr = bp->attr;
549 set_tsk_thread_flag(child, TIF_DEBUG); 742 attr.bp_addr = addr;
550 else 743 bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
551 clear_tsk_thread_flag(child, TIF_DEBUG);
552 break;
553 } 744 }
745 /*
746 * CHECKME: the previous code returned -EIO if the addr wasn't a
747 * valid task virtual addr. The new one will return -EINVAL in this
748 * case.
749 * -EINVAL may be what we want for in-kernel breakpoints users, but
750 * -EIO looks better for ptrace, since we refuse a register writing
751 * for the user. And anyway this is the previous behaviour.
752 */
753 if (IS_ERR(bp))
754 return PTR_ERR(bp);
755
756 t->ptrace_bps[nr] = bp;
554 757
555 return 0; 758 return 0;
556} 759}
557 760
558/* 761/*
762 * Handle PTRACE_POKEUSR calls for the debug register area.
763 */
764int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
765{
766 struct thread_struct *thread = &(tsk->thread);
767 int rc = 0;
768
769 /* There are no DR4 or DR5 registers */
770 if (n == 4 || n == 5)
771 return -EIO;
772
773 if (n == 6) {
774 thread->debugreg6 = val;
775 goto ret_path;
776 }
777 if (n < HBP_NUM) {
778 rc = ptrace_set_breakpoint_addr(tsk, n, val);
779 if (rc)
780 return rc;
781 }
782 /* All that's left is DR7 */
783 if (n == 7)
784 rc = ptrace_write_dr7(tsk, val);
785
786ret_path:
787 return rc;
788}
789
790/*
559 * These access the current or another (stopped) task's io permission 791 * These access the current or another (stopped) task's io permission
560 * bitmap for debugging or core dump. 792 * bitmap for debugging or core dump.
561 */ 793 */
@@ -1125,10 +1357,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
1125 1357
1126 case offsetof(struct user32, regs.orig_eax): 1358 case offsetof(struct user32, regs.orig_eax):
1127 /* 1359 /*
1128 * Sign-extend the value so that orig_eax = -1 1360 * A 32-bit debugger setting orig_eax means to restore
1129 * causes (long)orig_ax < 0 tests to fire correctly. 1361 * the state of the task restarting a 32-bit syscall.
1362 * Make sure we interpret the -ERESTART* codes correctly
1363 * in case the task is not actually still sitting at the
1364 * exit from a 32-bit syscall with TS_COMPAT still set.
1130 */ 1365 */
1131 regs->orig_ax = (long) (s32) value; 1366 regs->orig_ax = value;
1367 if (syscall_get_nr(child, regs) >= 0)
1368 task_thread_info(child)->status |= TS_COMPAT;
1132 break; 1369 break;
1133 1370
1134 case offsetof(struct user32, regs.eflags): 1371 case offsetof(struct user32, regs.eflags):
@@ -1497,8 +1734,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
1497 tracehook_report_syscall_entry(regs)) 1734 tracehook_report_syscall_entry(regs))
1498 ret = -1L; 1735 ret = -1L;
1499 1736
1500 if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) 1737 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1501 ftrace_syscall_enter(regs); 1738 trace_sys_enter(regs, regs->orig_ax);
1502 1739
1503 if (unlikely(current->audit_context)) { 1740 if (unlikely(current->audit_context)) {
1504 if (IS_IA32) 1741 if (IS_IA32)
@@ -1523,8 +1760,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
1523 if (unlikely(current->audit_context)) 1760 if (unlikely(current->audit_context))
1524 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); 1761 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1525 1762
1526 if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) 1763 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1527 ftrace_syscall_exit(regs); 1764 trace_sys_exit(regs, regs->ax);
1528 1765
1529 if (test_thread_flag(TIF_SYSCALL_TRACE)) 1766 if (test_thread_flag(TIF_SYSCALL_TRACE))
1530 tracehook_report_syscall_exit(regs, 0); 1767 tracehook_report_syscall_exit(regs, 0);