aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-11-21 08:07:23 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-21 08:07:23 -0500
commit96200591a34f8ecb98481c626125df43a2463b55 (patch)
tree314c376b01f254d04f9aaf449b1f9147ad177fa6 /arch
parent7031281e02bf951a2259849217193fb9d75a9762 (diff)
parent68efa37df779b3e04280598e8b5b3a1919b65fee (diff)
Merge branch 'tracing/hw-breakpoints' into perf/core
Conflicts: arch/x86/kernel/kprobes.c kernel/trace/Makefile Merge reason: hw-breakpoints perf integration is looking good in testing and in reviews, plus conflicts are mounting up - so merge & resolve. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/a.out-core.h10
-rw-r--r--arch/x86/include/asm/debugreg.h33
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h73
-rw-r--r--arch/x86/include/asm/processor.h14
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c545
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/ptrace.c293
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c26
22 files changed, 954 insertions, 212 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bbc261a..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
126config HAVE_DEFAULT_NO_SPIN_MUTEXES 126config HAVE_DEFAULT_NO_SPIN_MUTEXES
127 bool 127 bool
128 128
129config HAVE_HW_BREAKPOINT
130 bool
131 depends on HAVE_PERF_EVENTS
132 select ANON_INODES
133 select PERF_EVENTS
134
135
129source "kernel/gcov/Kconfig" 136source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72ace9515a07..178084b4377c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
49 select HAVE_KERNEL_GZIP 49 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 50 select HAVE_KERNEL_BZIP2
51 select HAVE_KERNEL_LZMA 51 select HAVE_KERNEL_LZMA
52 select HAVE_HW_BREAKPOINT
52 select HAVE_ARCH_KMEMCHECK 53 select HAVE_ARCH_KMEMCHECK
53 54
54config OUTPUT_FORMAT 55config OUTPUT_FORMAT
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
10header-y += sigcontext32.h 10header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h
13 14
14unifdef-y += e820.h 15unifdef-y += e820.h
15unifdef-y += ist.h 16unifdef-y += ist.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..7a15588e45d4 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/user.h> 18#include <linux/user.h>
19#include <linux/elfcore.h> 19#include <linux/elfcore.h>
20#include <asm/debugreg.h>
20 21
21/* 22/*
22 * fill in the user structure for an a.out core dump 23 * fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
32 >> PAGE_SHIFT; 33 >> PAGE_SHIFT;
33 dump->u_dsize -= dump->u_tsize; 34 dump->u_dsize -= dump->u_tsize;
34 dump->u_ssize = 0; 35 dump->u_ssize = 0;
35 dump->u_debugreg[0] = current->thread.debugreg0; 36 aout_dump_debugregs(dump);
36 dump->u_debugreg[1] = current->thread.debugreg1;
37 dump->u_debugreg[2] = current->thread.debugreg2;
38 dump->u_debugreg[3] = current->thread.debugreg3;
39 dump->u_debugreg[4] = 0;
40 dump->u_debugreg[5] = 0;
41 dump->u_debugreg[6] = current->thread.debugreg6;
42 dump->u_debugreg[7] = current->thread.debugreg7;
43 37
44 if (dump->start_stack < TASK_SIZE) 38 if (dump->start_stack < TASK_SIZE)
45 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) 39 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..fdabd8435765 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
18#define DR_TRAP1 (0x2) /* db1 */ 18#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 19#define DR_TRAP2 (0x4) /* db2 */
20#define DR_TRAP3 (0x8) /* db3 */ 20#define DR_TRAP3 (0x8) /* db3 */
21#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
21 22
22#define DR_STEP (0x4000) /* single-step */ 23#define DR_STEP (0x4000) /* single-step */
23#define DR_SWITCH (0x8000) /* task switch */ 24#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
49 50
50#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ 51#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
51#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ 52#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
53#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
54#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
52#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ 55#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
53 56
54#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ 57#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,34 @@
67#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ 70#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
68#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ 71#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
69 72
73/*
74 * HW breakpoint additions
75 */
76#ifdef __KERNEL__
77
78DECLARE_PER_CPU(unsigned long, dr7);
79
80static inline void hw_breakpoint_disable(void)
81{
82 /* Zero the control register for HW Breakpoint */
83 set_debugreg(0UL, 7);
84
85 /* Zero-out the individual HW breakpoint address registers */
86 set_debugreg(0UL, 0);
87 set_debugreg(0UL, 1);
88 set_debugreg(0UL, 2);
89 set_debugreg(0UL, 3);
90}
91
92static inline int hw_breakpoint_active(void)
93{
94 return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
95}
96
97extern void aout_dump_debugregs(struct user *dump);
98
99extern void hw_breakpoint_restore(void);
100
101#endif /* __KERNEL__ */
102
70#endif /* _ASM_X86_DEBUGREG_H */ 103#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..0675a7c4c20e
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H
3
4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H
6
7/*
8 * The name should probably be something dealt in
9 * a higher level. While dealing with the user
10 * (display/resolving)
11 */
12struct arch_hw_breakpoint {
13 char *name; /* Contains name of the symbol to set bkpt */
14 unsigned long address;
15 u8 len;
16 u8 type;
17};
18
19#include <linux/kdebug.h>
20#include <linux/percpu.h>
21#include <linux/list.h>
22
23/* Available HW breakpoint length encodings */
24#define X86_BREAKPOINT_LEN_1 0x40
25#define X86_BREAKPOINT_LEN_2 0x44
26#define X86_BREAKPOINT_LEN_4 0x4c
27#define X86_BREAKPOINT_LEN_EXECUTE 0x40
28
29#ifdef CONFIG_X86_64
30#define X86_BREAKPOINT_LEN_8 0x48
31#endif
32
33/* Available HW breakpoint type encodings */
34
35/* trigger on instruction execute */
36#define X86_BREAKPOINT_EXECUTE 0x80
37/* trigger on memory write */
38#define X86_BREAKPOINT_WRITE 0x81
39/* trigger on memory read or write */
40#define X86_BREAKPOINT_RW 0x83
41
42/* Total number of available HW breakpoint registers */
43#define HBP_NUM 4
44
45struct perf_event;
46struct pmu;
47
48extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
49extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
50 struct task_struct *tsk);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data);
53
54
55int arch_install_hw_breakpoint(struct perf_event *bp);
56void arch_uninstall_hw_breakpoint(struct perf_event *bp);
57void hw_breakpoint_pmu_read(struct perf_event *bp);
58void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
59
60extern void
61arch_fill_perf_breakpoint(struct perf_event *bp);
62
63unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
64int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
65
66extern int arch_bp_generic_fields(int x86_len, int x86_type,
67 int *gen_len, int *gen_type);
68
69extern struct pmu perf_ops_bp;
70
71#endif /* __KERNEL__ */
72#endif /* _I386_HW_BREAKPOINT_H */
73
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c9786480f0fe..6f8ec1c37e0a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@ struct mm_struct;
30#include <linux/math64.h> 30#include <linux/math64.h>
31#include <linux/init.h> 31#include <linux/init.h>
32 32
33#define HBP_NUM 4
33/* 34/*
34 * Default implementation of macro that returns current 35 * Default implementation of macro that returns current
35 * instruction pointer ("program counter"). 36 * instruction pointer ("program counter").
@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
422extern void free_thread_xstate(struct task_struct *); 423extern void free_thread_xstate(struct task_struct *);
423extern struct kmem_cache *task_xstate_cachep; 424extern struct kmem_cache *task_xstate_cachep;
424 425
426struct perf_event;
427
425struct thread_struct { 428struct thread_struct {
426 /* Cached TLS descriptors: */ 429 /* Cached TLS descriptors: */
427 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 430 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@ struct thread_struct {
443 unsigned long fs; 446 unsigned long fs;
444#endif 447#endif
445 unsigned long gs; 448 unsigned long gs;
446 /* Hardware debugging registers: */ 449 /* Save middle states of ptrace breakpoints */
447 unsigned long debugreg0; 450 struct perf_event *ptrace_bps[HBP_NUM];
448 unsigned long debugreg1; 451 /* Debug status used for traps, single steps, etc... */
449 unsigned long debugreg2; 452 unsigned long debugreg6;
450 unsigned long debugreg3;
451 unsigned long debugreg6;
452 unsigned long debugreg7;
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0cdd678..4f2e66e29ecc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
41obj-y += bootflag.o e820.o 41obj-y += bootflag.o e820.o
42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
43obj-y += alternative.o i8253.o pci-nommu.o 43obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
44obj-y += tsc.o io_delay.o rtc.o 44obj-y += tsc.o io_delay.o rtc.o
45 45
46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..752daebe91c6
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,545 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation
18 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
19 */
20
21/*
22 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
23 * using the CPU's debug registers.
24 */
25
26#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h>
28#include <linux/irqflags.h>
29#include <linux/notifier.h>
30#include <linux/kallsyms.h>
31#include <linux/kprobes.h>
32#include <linux/percpu.h>
33#include <linux/kdebug.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/sched.h>
37#include <linux/init.h>
38#include <linux/smp.h>
39
40#include <asm/hw_breakpoint.h>
41#include <asm/processor.h>
42#include <asm/debugreg.h>
43
44/* Per cpu debug control register value */
45DEFINE_PER_CPU(unsigned long, dr7);
46EXPORT_PER_CPU_SYMBOL(dr7);
47
48/* Per cpu debug address registers values */
49static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
50
51/*
52 * Stores the breakpoints currently in use on each breakpoint address
53 * register for each cpus
54 */
55static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
56
57
58/*
59 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
60 * as stored in debug register 7.
61 */
62unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
63{
64 unsigned long bp_info;
65
66 bp_info = (len | type) & 0xf;
67 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
68 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
69 DR_GLOBAL_SLOWDOWN;
70 return bp_info;
71}
72
73/*
74 * Decode the length and type bits for a particular breakpoint as
75 * stored in debug register 7. Return the "enabled" status.
76 */
77int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
78{
79 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
80
81 *len = (bp_info & 0xc) | 0x40;
82 *type = (bp_info & 0x3) | 0x80;
83
84 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
85}
86
87/*
88 * Install a perf counter breakpoint.
89 *
90 * We seek a free debug address register and use it for this
91 * breakpoint. Eventually we enable it in the debug control register.
92 *
93 * Atomic: we hold the counter->ctx->lock and we only handle variables
94 * and registers local to this cpu.
95 */
96int arch_install_hw_breakpoint(struct perf_event *bp)
97{
98 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
99 unsigned long *dr7;
100 int i;
101
102 for (i = 0; i < HBP_NUM; i++) {
103 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
104
105 if (!*slot) {
106 *slot = bp;
107 break;
108 }
109 }
110
111 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
112 return -EBUSY;
113
114 set_debugreg(info->address, i);
115 __get_cpu_var(cpu_debugreg[i]) = info->address;
116
117 dr7 = &__get_cpu_var(dr7);
118 *dr7 |= encode_dr7(i, info->len, info->type);
119
120 set_debugreg(*dr7, 7);
121
122 return 0;
123}
124
125/*
126 * Uninstall the breakpoint contained in the given counter.
127 *
128 * First we search the debug address register it uses and then we disable
129 * it.
130 *
131 * Atomic: we hold the counter->ctx->lock and we only handle variables
132 * and registers local to this cpu.
133 */
134void arch_uninstall_hw_breakpoint(struct perf_event *bp)
135{
136 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
137 unsigned long *dr7;
138 int i;
139
140 for (i = 0; i < HBP_NUM; i++) {
141 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
142
143 if (*slot == bp) {
144 *slot = NULL;
145 break;
146 }
147 }
148
149 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
150 return;
151
152 dr7 = &__get_cpu_var(dr7);
153 *dr7 &= ~encode_dr7(i, info->len, info->type);
154
155 set_debugreg(*dr7, 7);
156}
157
158static int get_hbp_len(u8 hbp_len)
159{
160 unsigned int len_in_bytes = 0;
161
162 switch (hbp_len) {
163 case X86_BREAKPOINT_LEN_1:
164 len_in_bytes = 1;
165 break;
166 case X86_BREAKPOINT_LEN_2:
167 len_in_bytes = 2;
168 break;
169 case X86_BREAKPOINT_LEN_4:
170 len_in_bytes = 4;
171 break;
172#ifdef CONFIG_X86_64
173 case X86_BREAKPOINT_LEN_8:
174 len_in_bytes = 8;
175 break;
176#endif
177 }
178 return len_in_bytes;
179}
180
181/*
182 * Check for virtual address in user space.
183 */
184int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
185{
186 unsigned int len;
187
188 len = get_hbp_len(hbp_len);
189
190 return (va <= TASK_SIZE - len);
191}
192
193/*
194 * Check for virtual address in kernel space.
195 */
196static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
197{
198 unsigned int len;
199
200 len = get_hbp_len(hbp_len);
201
202 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
203}
204
205/*
206 * Store a breakpoint's encoded address, length, and type.
207 */
208static int arch_store_info(struct perf_event *bp)
209{
210 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
211 /*
212 * For kernel-addresses, either the address or symbol name can be
213 * specified.
214 */
215 if (info->name)
216 info->address = (unsigned long)
217 kallsyms_lookup_name(info->name);
218 if (info->address)
219 return 0;
220
221 return -EINVAL;
222}
223
224int arch_bp_generic_fields(int x86_len, int x86_type,
225 int *gen_len, int *gen_type)
226{
227 /* Len */
228 switch (x86_len) {
229 case X86_BREAKPOINT_LEN_1:
230 *gen_len = HW_BREAKPOINT_LEN_1;
231 break;
232 case X86_BREAKPOINT_LEN_2:
233 *gen_len = HW_BREAKPOINT_LEN_2;
234 break;
235 case X86_BREAKPOINT_LEN_4:
236 *gen_len = HW_BREAKPOINT_LEN_4;
237 break;
238#ifdef CONFIG_X86_64
239 case X86_BREAKPOINT_LEN_8:
240 *gen_len = HW_BREAKPOINT_LEN_8;
241 break;
242#endif
243 default:
244 return -EINVAL;
245 }
246
247 /* Type */
248 switch (x86_type) {
249 case X86_BREAKPOINT_EXECUTE:
250 *gen_type = HW_BREAKPOINT_X;
251 break;
252 case X86_BREAKPOINT_WRITE:
253 *gen_type = HW_BREAKPOINT_W;
254 break;
255 case X86_BREAKPOINT_RW:
256 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
257 break;
258 default:
259 return -EINVAL;
260 }
261
262 return 0;
263}
264
265
266static int arch_build_bp_info(struct perf_event *bp)
267{
268 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
269
270 info->address = bp->attr.bp_addr;
271
272 /* Len */
273 switch (bp->attr.bp_len) {
274 case HW_BREAKPOINT_LEN_1:
275 info->len = X86_BREAKPOINT_LEN_1;
276 break;
277 case HW_BREAKPOINT_LEN_2:
278 info->len = X86_BREAKPOINT_LEN_2;
279 break;
280 case HW_BREAKPOINT_LEN_4:
281 info->len = X86_BREAKPOINT_LEN_4;
282 break;
283#ifdef CONFIG_X86_64
284 case HW_BREAKPOINT_LEN_8:
285 info->len = X86_BREAKPOINT_LEN_8;
286 break;
287#endif
288 default:
289 return -EINVAL;
290 }
291
292 /* Type */
293 switch (bp->attr.bp_type) {
294 case HW_BREAKPOINT_W:
295 info->type = X86_BREAKPOINT_WRITE;
296 break;
297 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
298 info->type = X86_BREAKPOINT_RW;
299 break;
300 case HW_BREAKPOINT_X:
301 info->type = X86_BREAKPOINT_EXECUTE;
302 break;
303 default:
304 return -EINVAL;
305 }
306
307 return 0;
308}
309/*
310 * Validate the arch-specific HW Breakpoint register settings
311 */
312int arch_validate_hwbkpt_settings(struct perf_event *bp,
313 struct task_struct *tsk)
314{
315 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
316 unsigned int align;
317 int ret;
318
319
320 ret = arch_build_bp_info(bp);
321 if (ret)
322 return ret;
323
324 ret = -EINVAL;
325
326 if (info->type == X86_BREAKPOINT_EXECUTE)
327 /*
328 * Ptrace-refactoring code
329 * For now, we'll allow instruction breakpoint only for user-space
330 * addresses
331 */
332 if ((!arch_check_va_in_userspace(info->address, info->len)) &&
333 info->len != X86_BREAKPOINT_EXECUTE)
334 return ret;
335
336 switch (info->len) {
337 case X86_BREAKPOINT_LEN_1:
338 align = 0;
339 break;
340 case X86_BREAKPOINT_LEN_2:
341 align = 1;
342 break;
343 case X86_BREAKPOINT_LEN_4:
344 align = 3;
345 break;
346#ifdef CONFIG_X86_64
347 case X86_BREAKPOINT_LEN_8:
348 align = 7;
349 break;
350#endif
351 default:
352 return ret;
353 }
354
355 if (bp->callback)
356 ret = arch_store_info(bp);
357
358 if (ret < 0)
359 return ret;
360 /*
361 * Check that the low-order bits of the address are appropriate
362 * for the alignment implied by len.
363 */
364 if (info->address & align)
365 return -EINVAL;
366
367 /* Check that the virtual address is in the proper range */
368 if (tsk) {
369 if (!arch_check_va_in_userspace(info->address, info->len))
370 return -EFAULT;
371 } else {
372 if (!arch_check_va_in_kernelspace(info->address, info->len))
373 return -EFAULT;
374 }
375
376 return 0;
377}
378
379/*
380 * Dump the debug register contents to the user.
381 * We can't dump our per cpu values because it
382 * may contain cpu wide breakpoint, something that
383 * doesn't belong to the current task.
384 *
385 * TODO: include non-ptrace user breakpoints (perf)
386 */
387void aout_dump_debugregs(struct user *dump)
388{
389 int i;
390 int dr7 = 0;
391 struct perf_event *bp;
392 struct arch_hw_breakpoint *info;
393 struct thread_struct *thread = &current->thread;
394
395 for (i = 0; i < HBP_NUM; i++) {
396 bp = thread->ptrace_bps[i];
397
398 if (bp && !bp->attr.disabled) {
399 dump->u_debugreg[i] = bp->attr.bp_addr;
400 info = counter_arch_bp(bp);
401 dr7 |= encode_dr7(i, info->len, info->type);
402 } else {
403 dump->u_debugreg[i] = 0;
404 }
405 }
406
407 dump->u_debugreg[4] = 0;
408 dump->u_debugreg[5] = 0;
409 dump->u_debugreg[6] = current->thread.debugreg6;
410
411 dump->u_debugreg[7] = dr7;
412}
413EXPORT_SYMBOL_GPL(aout_dump_debugregs);
414
415/*
416 * Release the user breakpoints used by ptrace
417 */
418void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
419{
420 int i;
421 struct thread_struct *t = &tsk->thread;
422
423 for (i = 0; i < HBP_NUM; i++) {
424 unregister_hw_breakpoint(t->ptrace_bps[i]);
425 t->ptrace_bps[i] = NULL;
426 }
427}
428
429void hw_breakpoint_restore(void)
430{
431 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
432 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
433 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
434 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
435 set_debugreg(current->thread.debugreg6, 6);
436 set_debugreg(__get_cpu_var(dr7), 7);
437}
438EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
439
440/*
441 * Handle debug exception notifications.
442 *
443 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
444 *
445 * NOTIFY_DONE returned if one of the following conditions is true.
446 * i) When the causative address is from user-space and the exception
447 * is a valid one, i.e. not triggered as a result of lazy debug register
448 * switching
449 * ii) When there are more bits than trap<n> set in DR6 register (such
450 * as BD, BS or BT) indicating that more than one debug condition is
451 * met and requires some more action in do_debug().
452 *
453 * NOTIFY_STOP returned for all other cases
454 *
455 */
456static int __kprobes hw_breakpoint_handler(struct die_args *args)
457{
458 int i, cpu, rc = NOTIFY_STOP;
459 struct perf_event *bp;
460 unsigned long dr7, dr6;
461 unsigned long *dr6_p;
462
463 /* The DR6 value is pointed by args->err */
464 dr6_p = (unsigned long *)ERR_PTR(args->err);
465 dr6 = *dr6_p;
466
467 /* Do an early return if no trap bits are set in DR6 */
468 if ((dr6 & DR_TRAP_BITS) == 0)
469 return NOTIFY_DONE;
470
471 get_debugreg(dr7, 7);
472 /* Disable breakpoints during exception handling */
473 set_debugreg(0UL, 7);
474 /*
475 * Assert that local interrupts are disabled
476 * Reset the DRn bits in the virtualized register value.
477 * The ptrace trigger routine will add in whatever is needed.
478 */
479 current->thread.debugreg6 &= ~DR_TRAP_BITS;
480 cpu = get_cpu();
481
482 /* Handle all the breakpoints that were triggered */
483 for (i = 0; i < HBP_NUM; ++i) {
484 if (likely(!(dr6 & (DR_TRAP0 << i))))
485 continue;
486
487 /*
488 * The counter may be concurrently released but that can only
489 * occur from a call_rcu() path. We can then safely fetch
490 * the breakpoint, use its callback, touch its counter
491 * while we are in an rcu_read_lock() path.
492 */
493 rcu_read_lock();
494
495 bp = per_cpu(bp_per_reg[i], cpu);
496 if (bp)
497 rc = NOTIFY_DONE;
498 /*
499 * Reset the 'i'th TRAP bit in dr6 to denote completion of
500 * exception handling
501 */
502 (*dr6_p) &= ~(DR_TRAP0 << i);
503 /*
504 * bp can be NULL due to lazy debug register switching
505 * or due to concurrent perf counter removing.
506 */
507 if (!bp) {
508 rcu_read_unlock();
509 break;
510 }
511
512 (bp->callback)(bp, args->regs);
513
514 rcu_read_unlock();
515 }
516 if (dr6 & (~DR_TRAP_BITS))
517 rc = NOTIFY_DONE;
518
519 set_debugreg(dr7, 7);
520 put_cpu();
521
522 return rc;
523}
524
525/*
526 * Handle debug exception notifications.
527 */
528int __kprobes hw_breakpoint_exceptions_notify(
529 struct notifier_block *unused, unsigned long val, void *data)
530{
531 if (val != DIE_DEBUG)
532 return NOTIFY_DONE;
533
534 return hw_breakpoint_handler(data);
535}
536
537void hw_breakpoint_pmu_read(struct perf_event *bp)
538{
539 /* TODO */
540}
541
542void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
543{
544 /* TODO */
545}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..34e86b67550c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45 45
46#include <asm/debugreg.h>
46#include <asm/apicdef.h> 47#include <asm/apicdef.h>
47#include <asm/system.h> 48#include <asm/system.h>
48 49
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
434 "resuming...\n"); 435 "resuming...\n");
435 kgdb_arch_handle_exception(args->trapnr, args->signr, 436 kgdb_arch_handle_exception(args->trapnr, args->signr,
436 args->err, "c", "", regs); 437 args->err, "c", "", regs);
438 /*
439 * Reset the BS bit in dr6 (pointed by args->err) to
440 * denote completion of processing
441 */
442 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
437 443
438 return NOTIFY_STOP; 444 return NOTIFY_STOP;
439} 445}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c5f1f117e0c0..3fe86d706a14 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -56,6 +56,7 @@
56#include <asm/uaccess.h> 56#include <asm/uaccess.h>
57#include <asm/alternative.h> 57#include <asm/alternative.h>
58#include <asm/insn.h> 58#include <asm/insn.h>
59#include <asm/debugreg.h>
59 60
60void jprobe_return_end(void); 61void jprobe_return_end(void);
61 62
@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
945 ret = NOTIFY_STOP; 946 ret = NOTIFY_STOP;
946 break; 947 break;
947 case DIE_DEBUG: 948 case DIE_DEBUG:
948 if (post_kprobe_handler(args->regs)) 949 if (post_kprobe_handler(args->regs)) {
950 /*
951 * Reset the BS bit in dr6 (pointed by args->err) to
952 * denote completion of processing
953 */
954 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
949 ret = NOTIFY_STOP; 955 ret = NOTIFY_STOP;
956 }
950 break; 957 break;
951 case DIE_GPF: 958 case DIE_GPF:
952 /* 959 /*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h> 26#include <asm/system.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debugreg.h>
28 29
29static void set_idt(void *newidt, __u16 limit) 30static void set_idt(void *newidt, __u16 limit)
30{ 31{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
202 203
203 /* Interrupts aren't acceptable while we reboot */ 204 /* Interrupts aren't acceptable while we reboot */
204 local_irq_disable(); 205 local_irq_disable();
206 hw_breakpoint_disable();
205 207
206 if (image->preserve_context) { 208 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 209#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/debugreg.h>
21 22
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd, 23static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr) 24 unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
282 283
283 /* Interrupts aren't acceptable while we reboot */ 284 /* Interrupts aren't acceptable while we reboot */
284 local_irq_disable(); 285 local_irq_disable();
286 hw_breakpoint_disable();
285 287
286 if (image->preserve_context) { 288 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC 289#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b5776..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/events/power.h> 12#include <trace/events/power.h>
13#include <linux/hw_breakpoint.h>
13#include <asm/system.h> 14#include <asm/system.h>
14#include <asm/apic.h> 15#include <asm/apic.h>
15#include <asm/syscalls.h> 16#include <asm/syscalls.h>
@@ -17,6 +18,7 @@
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include <asm/i387.h> 19#include <asm/i387.h>
19#include <asm/ds.h> 20#include <asm/ds.h>
21#include <asm/debugreg.h>
20 22
21unsigned long idle_halt; 23unsigned long idle_halt;
22EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@ void flush_thread(void)
103 } 105 }
104#endif 106#endif
105 107
106 clear_tsk_thread_flag(tsk, TIF_DEBUG); 108 flush_ptrace_hw_breakpoint(tsk);
107
108 tsk->thread.debugreg0 = 0;
109 tsk->thread.debugreg1 = 0;
110 tsk->thread.debugreg2 = 0;
111 tsk->thread.debugreg3 = 0;
112 tsk->thread.debugreg6 = 0;
113 tsk->thread.debugreg7 = 0;
114 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 109 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
115 /* 110 /*
116 * Forget coprocessor state.. 111 * Forget coprocessor state..
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
192 else if (next->debugctlmsr != prev->debugctlmsr) 187 else if (next->debugctlmsr != prev->debugctlmsr)
193 update_debugctlmsr(next->debugctlmsr); 188 update_debugctlmsr(next->debugctlmsr);
194 189
195 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
196 set_debugreg(next->debugreg0, 0);
197 set_debugreg(next->debugreg1, 1);
198 set_debugreg(next->debugreg2, 2);
199 set_debugreg(next->debugreg3, 3);
200 /* no 4 and 5 */
201 set_debugreg(next->debugreg6, 6);
202 set_debugreg(next->debugreg7, 7);
203 }
204
205 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 190 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
206 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 191 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
207 /* prev and next are different */ 192 /* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf79567cdab..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,7 @@
58#include <asm/idle.h> 58#include <asm/idle.h>
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h>
61 62
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 64
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
259 260
260 task_user_gs(p) = get_user_gs(regs); 261 task_user_gs(p) = get_user_gs(regs);
261 262
263 p->thread.io_bitmap_ptr = NULL;
262 tsk = current; 264 tsk = current;
265 err = -ENOMEM;
266
267 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
268
263 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 269 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
264 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 270 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
265 IO_BITMAP_BYTES, GFP_KERNEL); 271 IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eb62cbcaa490..70cf15873f3d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,7 @@
52#include <asm/idle.h> 52#include <asm/idle.h>
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h>
55 56
56asmlinkage extern void ret_from_fork(void); 57asmlinkage extern void ret_from_fork(void);
57 58
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
297 298
298 p->thread.fs = me->thread.fs; 299 p->thread.fs = me->thread.fs;
299 p->thread.gs = me->thread.gs; 300 p->thread.gs = me->thread.gs;
301 p->thread.io_bitmap_ptr = NULL;
300 302
301 savesegment(gs, p->thread.gsindex); 303 savesegment(gs, p->thread.gsindex);
302 savesegment(fs, p->thread.fsindex); 304 savesegment(fs, p->thread.fsindex);
303 savesegment(es, p->thread.es); 305 savesegment(es, p->thread.es);
304 savesegment(ds, p->thread.ds); 306 savesegment(ds, p->thread.ds);
305 307
308 err = -ENOMEM;
309 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
310
306 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 311 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
307 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 312 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
308 if (!p->thread.io_bitmap_ptr) { 313 if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +346,7 @@ out:
341 kfree(p->thread.io_bitmap_ptr); 346 kfree(p->thread.io_bitmap_ptr);
342 p->thread.io_bitmap_max = 0; 347 p->thread.io_bitmap_max = 0;
343 } 348 }
349
344 return err; 350 return err;
345} 351}
346 352
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
495 */ 501 */
496 if (preload_fpu) 502 if (preload_fpu)
497 __math_state_restore(); 503 __math_state_restore();
504
498 return prev_p; 505 return prev_p;
499} 506}
500 507
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4f76d275ee4..b25f8947ed7a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
34#include <asm/prctl.h> 36#include <asm/prctl.h>
35#include <asm/proto.h> 37#include <asm/proto.h>
36#include <asm/ds.h> 38#include <asm/ds.h>
39#include <asm/hw_breakpoint.h>
37 40
38#include "tls.h" 41#include "tls.h"
39 42
@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
249 return 0; 252 return 0;
250} 253}
251 254
252static unsigned long debugreg_addr_limit(struct task_struct *task)
253{
254 return TASK_SIZE - 3;
255}
256
257#else /* CONFIG_X86_64 */ 255#else /* CONFIG_X86_64 */
258 256
259#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 257#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
378 return 0; 376 return 0;
379} 377}
380 378
381static unsigned long debugreg_addr_limit(struct task_struct *task)
382{
383#ifdef CONFIG_IA32_EMULATION
384 if (test_tsk_thread_flag(task, TIF_IA32))
385 return IA32_PAGE_OFFSET - 3;
386#endif
387 return TASK_SIZE_MAX - 7;
388}
389
390#endif /* CONFIG_X86_32 */ 379#endif /* CONFIG_X86_32 */
391 380
392static unsigned long get_flags(struct task_struct *task) 381static unsigned long get_flags(struct task_struct *task)
@@ -566,99 +555,229 @@ static int genregs_set(struct task_struct *target,
566 return ret; 555 return ret;
567} 556}
568 557
558static void ptrace_triggered(struct perf_event *bp, void *data)
559{
560 int i;
561 struct thread_struct *thread = &(current->thread);
562
563 /*
564 * Store in the virtual DR6 register the fact that the breakpoint
565 * was hit so the thread's debugger will see it.
566 */
567 for (i = 0; i < HBP_NUM; i++) {
568 if (thread->ptrace_bps[i] == bp)
569 break;
570 }
571
572 thread->debugreg6 |= (DR_TRAP0 << i);
573}
574
569/* 575/*
570 * This function is trivial and will be inlined by the compiler. 576 * Walk through every ptrace breakpoints for this thread and
571 * Having it separates the implementation details of debug 577 * build the dr7 value on top of their attributes.
572 * registers from the interface details of ptrace. 578 *
573 */ 579 */
574static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 580static unsigned long ptrace_get_dr7(struct perf_event *bp[])
575{ 581{
576 switch (n) { 582 int i;
577 case 0: return child->thread.debugreg0; 583 int dr7 = 0;
578 case 1: return child->thread.debugreg1; 584 struct arch_hw_breakpoint *info;
579 case 2: return child->thread.debugreg2; 585
580 case 3: return child->thread.debugreg3; 586 for (i = 0; i < HBP_NUM; i++) {
581 case 6: return child->thread.debugreg6; 587 if (bp[i] && !bp[i]->attr.disabled) {
582 case 7: return child->thread.debugreg7; 588 info = counter_arch_bp(bp[i]);
589 dr7 |= encode_dr7(i, info->len, info->type);
590 }
583 } 591 }
584 return 0; 592
593 return dr7;
585} 594}
586 595
587static int ptrace_set_debugreg(struct task_struct *child, 596/*
588 int n, unsigned long data) 597 * Handle ptrace writes to debug register 7.
598 */
599static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
589{ 600{
590 int i; 601 struct thread_struct *thread = &(tsk->thread);
602 unsigned long old_dr7;
603 int i, orig_ret = 0, rc = 0;
604 int enabled, second_pass = 0;
605 unsigned len, type;
606 int gen_len, gen_type;
607 struct perf_event *bp;
608
609 data &= ~DR_CONTROL_RESERVED;
610 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
611restore:
612 /*
613 * Loop through all the hardware breakpoints, making the
614 * appropriate changes to each.
615 */
616 for (i = 0; i < HBP_NUM; i++) {
617 enabled = decode_dr7(data, i, &len, &type);
618 bp = thread->ptrace_bps[i];
619
620 if (!enabled) {
621 if (bp) {
622 /*
623 * Don't unregister the breakpoints right-away,
624 * unless all register_user_hw_breakpoint()
625 * requests have succeeded. This prevents
626 * any window of opportunity for debug
627 * register grabbing by other users.
628 */
629 if (!second_pass)
630 continue;
631 thread->ptrace_bps[i] = NULL;
632 unregister_hw_breakpoint(bp);
633 }
634 continue;
635 }
591 636
592 if (unlikely(n == 4 || n == 5)) 637 /*
593 return -EIO; 638 * We shoud have at least an inactive breakpoint at this
639 * slot. It means the user is writing dr7 without having
640 * written the address register first
641 */
642 if (!bp) {
643 rc = -EINVAL;
644 break;
645 }
594 646
595 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 647 rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
596 return -EIO; 648 if (rc)
649 break;
597 650
598 switch (n) { 651 /*
599 case 0: child->thread.debugreg0 = data; break; 652 * This is a temporary thing as bp is unregistered/registered
600 case 1: child->thread.debugreg1 = data; break; 653 * to simulate modification
601 case 2: child->thread.debugreg2 = data; break; 654 */
602 case 3: child->thread.debugreg3 = data; break; 655 bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
656 gen_type, bp->callback,
657 tsk, true);
658 thread->ptrace_bps[i] = NULL;
603 659
604 case 6: 660 if (!bp) { /* incorrect bp, or we have a bug in bp API */
605 if ((data & ~0xffffffffUL) != 0) 661 rc = -EINVAL;
606 return -EIO; 662 break;
607 child->thread.debugreg6 = data; 663 }
608 break; 664 if (IS_ERR(bp)) {
665 rc = PTR_ERR(bp);
666 bp = NULL;
667 break;
668 }
669 thread->ptrace_bps[i] = bp;
670 }
671 /*
672 * Make a second pass to free the remaining unused breakpoints
673 * or to restore the original breakpoints if an error occurred.
674 */
675 if (!second_pass) {
676 second_pass = 1;
677 if (rc < 0) {
678 orig_ret = rc;
679 data = old_dr7;
680 }
681 goto restore;
682 }
683 return ((orig_ret < 0) ? orig_ret : rc);
684}
609 685
610 case 7: 686/*
687 * Handle PTRACE_PEEKUSR calls for the debug register area.
688 */
689static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
690{
691 struct thread_struct *thread = &(tsk->thread);
692 unsigned long val = 0;
693
694 if (n < HBP_NUM) {
695 struct perf_event *bp;
696 bp = thread->ptrace_bps[n];
697 if (!bp)
698 return 0;
699 val = bp->hw.info.address;
700 } else if (n == 6) {
701 val = thread->debugreg6;
702 } else if (n == 7) {
703 val = ptrace_get_dr7(thread->ptrace_bps);
704 }
705 return val;
706}
707
708static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
709 unsigned long addr)
710{
711 struct perf_event *bp;
712 struct thread_struct *t = &tsk->thread;
713
714 if (!t->ptrace_bps[nr]) {
611 /* 715 /*
612 * Sanity-check data. Take one half-byte at once with 716 * Put stub len and type to register (reserve) an inactive but
613 * check = (val >> (16 + 4*i)) & 0xf. It contains the 717 * correct bp
614 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
615 * 2 and 3 are LENi. Given a list of invalid values,
616 * we do mask |= 1 << invalid_value, so that
617 * (mask >> check) & 1 is a correct test for invalid
618 * values.
619 *
620 * R/Wi contains the type of the breakpoint /
621 * watchpoint, LENi contains the length of the watched
622 * data in the watchpoint case.
623 *
624 * The invalid values are:
625 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
626 * - R/Wi == 0x10 (break on I/O reads or writes), so
627 * mask |= 0x4444.
628 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
629 * 0x1110.
630 *
631 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
632 *
633 * See the Intel Manual "System Programming Guide",
634 * 15.2.4
635 *
636 * Note that LENi == 0x10 is defined on x86_64 in long
637 * mode (i.e. even for 32-bit userspace software, but
638 * 64-bit kernel), so the x86_64 mask value is 0x5454.
639 * See the AMD manual no. 24593 (AMD64 System Programming)
640 */ 718 */
641#ifdef CONFIG_X86_32 719 bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
642#define DR7_MASK 0x5f54 720 HW_BREAKPOINT_W,
643#else 721 ptrace_triggered, tsk,
644#define DR7_MASK 0x5554 722 false);
645#endif 723 } else {
646 data &= ~DR_CONTROL_RESERVED; 724 bp = t->ptrace_bps[nr];
647 for (i = 0; i < 4; i++) 725 t->ptrace_bps[nr] = NULL;
648 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) 726 bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
649 return -EIO; 727 bp->attr.bp_type,
650 child->thread.debugreg7 = data; 728 bp->callback,
651 if (data) 729 tsk,
652 set_tsk_thread_flag(child, TIF_DEBUG); 730 bp->attr.disabled);
653 else
654 clear_tsk_thread_flag(child, TIF_DEBUG);
655 break;
656 } 731 }
657 732
733 if (!bp)
734 return -EIO;
735 /*
736 * CHECKME: the previous code returned -EIO if the addr wasn't a
737 * valid task virtual addr. The new one will return -EINVAL in this
738 * case.
739 * -EINVAL may be what we want for in-kernel breakpoints users, but
740 * -EIO looks better for ptrace, since we refuse a register writing
741 * for the user. And anyway this is the previous behaviour.
742 */
743 if (IS_ERR(bp))
744 return PTR_ERR(bp);
745
746 t->ptrace_bps[nr] = bp;
747
658 return 0; 748 return 0;
659} 749}
660 750
661/* 751/*
752 * Handle PTRACE_POKEUSR calls for the debug register area.
753 */
754int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
755{
756 struct thread_struct *thread = &(tsk->thread);
757 int rc = 0;
758
759 /* There are no DR4 or DR5 registers */
760 if (n == 4 || n == 5)
761 return -EIO;
762
763 if (n == 6) {
764 thread->debugreg6 = val;
765 goto ret_path;
766 }
767 if (n < HBP_NUM) {
768 rc = ptrace_set_breakpoint_addr(tsk, n, val);
769 if (rc)
770 return rc;
771 }
772 /* All that's left is DR7 */
773 if (n == 7)
774 rc = ptrace_write_dr7(tsk, val);
775
776ret_path:
777 return rc;
778}
779
780/*
662 * These access the current or another (stopped) task's io permission 781 * These access the current or another (stopped) task's io permission
663 * bitmap for debugging or core dump. 782 * bitmap for debugging or core dump.
664 */ 783 */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055ad..fbf3b07c8567 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
799 799
800 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 800 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
801 if (signr > 0) { 801 if (signr > 0) {
802 /*
803 * Re-enable any watchpoints before delivering the
804 * signal to user space. The processor register will
805 * have been cleared if the watchpoint triggered
806 * inside the kernel.
807 */
808 if (current->thread.debugreg7)
809 set_debugreg(current->thread.debugreg7, 7);
810
811 /* Whee! Actually deliver the signal. */ 802 /* Whee! Actually deliver the signal. */
812 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { 803 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
813 /* 804 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dcee0cc3..33399176512a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
530{ 530{
531 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
532 unsigned long condition; 532 unsigned long dr6;
533 int si_code; 533 int si_code;
534 534
535 get_debugreg(condition, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Catch kmemcheck conditions first of all! */ 537 /* Catch kmemcheck conditions first of all! */
538 if (condition & DR_STEP && kmemcheck_trap(regs)) 538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 539 return;
540 540
541 /* DR6 may or may not be cleared by the CPU */
542 set_debugreg(0, 6);
541 /* 543 /*
542 * The processor cleared BTF, so don't mark that we need it set. 544 * The processor cleared BTF, so don't mark that we need it set.
543 */ 545 */
544 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 546 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
545 tsk->thread.debugctlmsr = 0; 547 tsk->thread.debugctlmsr = 0;
546 548
547 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 549 /* Store the virtualized DR6 value */
548 SIGTRAP) == NOTIFY_STOP) 550 tsk->thread.debugreg6 = dr6;
551
552 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
553 SIGTRAP) == NOTIFY_STOP)
549 return; 554 return;
550 555
551 /* It's safe to allow irq's after DR6 has been saved */ 556 /* It's safe to allow irq's after DR6 has been saved */
552 preempt_conditional_sti(regs); 557 preempt_conditional_sti(regs);
553 558
554 /* Mask out spurious debug traps due to lazy DR7 setting */ 559 if (regs->flags & X86_VM_MASK) {
555 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 560 handle_vm86_trap((struct kernel_vm86_regs *) regs,
556 if (!tsk->thread.debugreg7) 561 error_code, 1);
557 goto clear_dr7; 562 return;
558 } 563 }
559 564
560#ifdef CONFIG_X86_32
561 if (regs->flags & X86_VM_MASK)
562 goto debug_vm86;
563#endif
564
565 /* Save debug status register where ptrace can see it */
566 tsk->thread.debugreg6 = condition;
567
568 /* 565 /*
569 * Single-stepping through TF: make sure we ignore any events in 566 * Single-stepping through system calls: ignore any exceptions in
570 * kernel space (but re-enable TF when returning to user mode). 567 * kernel space, but re-enable TF when returning to user mode.
568 *
569 * We already checked v86 mode above, so we can check for kernel mode
570 * by just checking the CPL of CS.
571 */ 571 */
572 if (condition & DR_STEP) { 572 if ((dr6 & DR_STEP) && !user_mode(regs)) {
573 if (!user_mode(regs)) 573 tsk->thread.debugreg6 &= ~DR_STEP;
574 goto clear_TF_reenable; 574 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
575 regs->flags &= ~X86_EFLAGS_TF;
575 } 576 }
576 577 si_code = get_si_code(tsk->thread.debugreg6);
577 si_code = get_si_code(condition); 578 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
578 /* Ok, finally something we can handle */ 579 send_sigtrap(tsk, regs, error_code, si_code);
579 send_sigtrap(tsk, regs, error_code, si_code);
580
581 /*
582 * Disable additional traps. They'll be re-enabled when
583 * the signal is delivered.
584 */
585clear_dr7:
586 set_debugreg(0, 7);
587 preempt_conditional_cli(regs); 580 preempt_conditional_cli(regs);
588 return;
589 581
590#ifdef CONFIG_X86_32
591debug_vm86:
592 /* reenable preemption: handle_vm86_trap() might sleep */
593 dec_preempt_count();
594 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
595 conditional_cli(regs);
596 return;
597#endif
598
599clear_TF_reenable:
600 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
601 regs->flags &= ~X86_EFLAGS_TF;
602 preempt_conditional_cli(regs);
603 return; 582 return;
604} 583}
605 584
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d261527c..4fc80174191c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "trace.h" 43#include "trace.h"
44 44
45#include <asm/debugreg.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/msr.h> 47#include <asm/msr.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3643 trace_kvm_entry(vcpu->vcpu_id); 3644 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 3645 kvm_x86_ops->run(vcpu, kvm_run);
3645 3646
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3647 /*
3647 set_debugreg(current->thread.debugreg0, 0); 3648 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg1, 1); 3649 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg2, 2); 3650 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg3, 3); 3651 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 3652 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 3653 */
3653 } 3654 if (hw_breakpoint_active())
3655 hw_breakpoint_restore();
3654 3656
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 3657 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 3658 local_irq_enable();
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..11a4ad4d6253 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 541
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 542 if (val == DIE_DEBUG && (arg->err & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 543 if (post_kmmio_handler(arg->err, arg->regs) == 1) {
544 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing
547 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
544 return NOTIFY_STOP; 549 return NOTIFY_STOP;
550 }
545 551
546 return NOTIFY_DONE; 552 return NOTIFY_DONE;
547} 553}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 8aa85f17667e..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/xcr.h> 19#include <asm/xcr.h>
20#include <asm/suspend.h> 20#include <asm/suspend.h>
21#include <asm/debugreg.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23static struct saved_context saved_context; 24static struct saved_context saved_context;
@@ -142,31 +143,6 @@ static void fix_processor_context(void)
142#endif 143#endif
143 load_TR_desc(); /* This does ltr */ 144 load_TR_desc(); /* This does ltr */
144 load_LDT(&current->active_mm->context); /* This does lldt */ 145 load_LDT(&current->active_mm->context); /* This does lldt */
145
146 /*
147 * Now maybe reload the debug registers
148 */
149 if (current->thread.debugreg7) {
150#ifdef CONFIG_X86_32
151 set_debugreg(current->thread.debugreg0, 0);
152 set_debugreg(current->thread.debugreg1, 1);
153 set_debugreg(current->thread.debugreg2, 2);
154 set_debugreg(current->thread.debugreg3, 3);
155 /* no 4 and 5 */
156 set_debugreg(current->thread.debugreg6, 6);
157 set_debugreg(current->thread.debugreg7, 7);
158#else
159 /* CONFIG_X86_64 */
160 loaddebug(&current->thread, 0);
161 loaddebug(&current->thread, 1);
162 loaddebug(&current->thread, 2);
163 loaddebug(&current->thread, 3);
164 /* no 4 and 5 */
165 loaddebug(&current->thread, 6);
166 loaddebug(&current->thread, 7);
167#endif
168 }
169
170} 146}
171 147
172/** 148/**