aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-09-09 13:22:48 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-11-08 09:34:42 -0500
commit24f1e32c60c45c89a997c73395b69c8af6f0a84e (patch)
tree4f30f16e18cb4abbcf96b3b331e6a3f01bfa26e6 /arch/x86
parent2da3e160cb3d226d87b907fab26850d838ed8d7c (diff)
hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events
This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Prasad <prasad@linux.vnet.ibm.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jan Kiszka <jan.kiszka@web.de> Cc: Jiri Slaby <jirislaby@gmail.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Avi Kivity <avi@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/debugreg.h11
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h58
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/kernel/hw_breakpoint.c391
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/process_32.c26
-rw-r--r--arch/x86/kernel/process_64.c26
-rw-r--r--arch/x86/kernel/ptrace.c182
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/power/cpu.c6
12 files changed, 445 insertions, 296 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
10header-y += sigcontext32.h 10header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h
13 14
14unifdef-y += e820.h 15unifdef-y += e820.h
15unifdef-y += ist.h 16unifdef-y += ist.h
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 23439fbb1d0e..9a3333c91f9a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,13 +75,8 @@
75 */ 75 */
76#ifdef __KERNEL__ 76#ifdef __KERNEL__
77 77
78/* For process management */ 78DECLARE_PER_CPU(unsigned long, dr7);
79extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
80extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
81 struct task_struct *child, unsigned long clone_flags);
82 79
83/* For CPU management */
84extern void load_debug_registers(void);
85static inline void hw_breakpoint_disable(void) 80static inline void hw_breakpoint_disable(void)
86{ 81{
87 /* Zero the control register for HW Breakpoint */ 82 /* Zero the control register for HW Breakpoint */
@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
94 set_debugreg(0UL, 3); 89 set_debugreg(0UL, 3);
95} 90}
96 91
92#ifdef CONFIG_KVM
93extern void hw_breakpoint_restore(void);
94#endif
95
97#endif /* __KERNEL__ */ 96#endif /* __KERNEL__ */
98 97
99#endif /* _ASM_X86_DEBUGREG_H */ 98#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 3cfca8e2b5f6..0675a7c4c20e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -4,6 +4,11 @@
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H 5#define __ARCH_HW_BREAKPOINT_H
6 6
7/*
8 * The name should probably be something dealt in
9 * a higher level. While dealing with the user
10 * (display/resolving)
11 */
7struct arch_hw_breakpoint { 12struct arch_hw_breakpoint {
8 char *name; /* Contains name of the symbol to set bkpt */ 13 char *name; /* Contains name of the symbol to set bkpt */
9 unsigned long address; 14 unsigned long address;
@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
12}; 17};
13 18
14#include <linux/kdebug.h> 19#include <linux/kdebug.h>
15#include <linux/hw_breakpoint.h> 20#include <linux/percpu.h>
21#include <linux/list.h>
16 22
17/* Available HW breakpoint length encodings */ 23/* Available HW breakpoint length encodings */
18#define HW_BREAKPOINT_LEN_1 0x40 24#define X86_BREAKPOINT_LEN_1 0x40
19#define HW_BREAKPOINT_LEN_2 0x44 25#define X86_BREAKPOINT_LEN_2 0x44
20#define HW_BREAKPOINT_LEN_4 0x4c 26#define X86_BREAKPOINT_LEN_4 0x4c
21#define HW_BREAKPOINT_LEN_EXECUTE 0x40 27#define X86_BREAKPOINT_LEN_EXECUTE 0x40
22 28
23#ifdef CONFIG_X86_64 29#ifdef CONFIG_X86_64
24#define HW_BREAKPOINT_LEN_8 0x48 30#define X86_BREAKPOINT_LEN_8 0x48
25#endif 31#endif
26 32
27/* Available HW breakpoint type encodings */ 33/* Available HW breakpoint type encodings */
28 34
29/* trigger on instruction execute */ 35/* trigger on instruction execute */
30#define HW_BREAKPOINT_EXECUTE 0x80 36#define X86_BREAKPOINT_EXECUTE 0x80
31/* trigger on memory write */ 37/* trigger on memory write */
32#define HW_BREAKPOINT_WRITE 0x81 38#define X86_BREAKPOINT_WRITE 0x81
33/* trigger on memory read or write */ 39/* trigger on memory read or write */
34#define HW_BREAKPOINT_RW 0x83 40#define X86_BREAKPOINT_RW 0x83
35 41
36/* Total number of available HW breakpoint registers */ 42/* Total number of available HW breakpoint registers */
37#define HBP_NUM 4 43#define HBP_NUM 4
38 44
39extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; 45struct perf_event;
40DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); 46struct pmu;
41extern unsigned int hbp_user_refcount[HBP_NUM];
42 47
43extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
44extern void arch_uninstall_thread_hw_breakpoint(void);
45extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); 48extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
46extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, 49extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
47 struct task_struct *tsk); 50 struct task_struct *tsk);
48extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
49extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
50extern void arch_update_kernel_hw_breakpoint(void *);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, 51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data); 52 unsigned long val, void *data);
53
54
55int arch_install_hw_breakpoint(struct perf_event *bp);
56void arch_uninstall_hw_breakpoint(struct perf_event *bp);
57void hw_breakpoint_pmu_read(struct perf_event *bp);
58void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
59
60extern void
61arch_fill_perf_breakpoint(struct perf_event *bp);
62
63unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
64int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
65
66extern int arch_bp_generic_fields(int x86_len, int x86_type,
67 int *gen_len, int *gen_type);
68
69extern struct pmu perf_ops_bp;
70
53#endif /* __KERNEL__ */ 71#endif /* __KERNEL__ */
54#endif /* _I386_HW_BREAKPOINT_H */ 72#endif /* _I386_HW_BREAKPOINT_H */
55 73
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 61aafb71c7ef..820f3000f736 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
423extern void free_thread_xstate(struct task_struct *); 423extern void free_thread_xstate(struct task_struct *);
424extern struct kmem_cache *task_xstate_cachep; 424extern struct kmem_cache *task_xstate_cachep;
425 425
426struct perf_event;
427
426struct thread_struct { 428struct thread_struct {
427 /* Cached TLS descriptors: */ 429 /* Cached TLS descriptors: */
428 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 430 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -444,12 +446,10 @@ struct thread_struct {
444 unsigned long fs; 446 unsigned long fs;
445#endif 447#endif
446 unsigned long gs; 448 unsigned long gs;
447 /* Hardware debugging registers: */ 449 /* Save middle states of ptrace breakpoints */
448 unsigned long debugreg[HBP_NUM]; 450 struct perf_event *ptrace_bps[HBP_NUM];
449 unsigned long debugreg6; 451 /* Debug status used for traps, single steps, etc... */
450 unsigned long debugreg7; 452 unsigned long debugreg6;
451 /* Hardware breakpoint info */
452 struct hw_breakpoint *hbp[HBP_NUM];
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 9316a9de4de3..e622620790bd 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
15 * 15 *
16 * Copyright (C) 2007 Alan Stern 16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation 17 * Copyright (C) 2009 IBM Corporation
18 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
18 */ 19 */
19 20
20/* 21/*
@@ -22,6 +23,8 @@
22 * using the CPU's debug registers. 23 * using the CPU's debug registers.
23 */ 24 */
24 25
26#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h>
25#include <linux/irqflags.h> 28#include <linux/irqflags.h>
26#include <linux/notifier.h> 29#include <linux/notifier.h>
27#include <linux/kallsyms.h> 30#include <linux/kallsyms.h>
@@ -38,26 +41,24 @@
38#include <asm/processor.h> 41#include <asm/processor.h>
39#include <asm/debugreg.h> 42#include <asm/debugreg.h>
40 43
41/* Unmasked kernel DR7 value */ 44/* Per cpu debug control register value */
42static unsigned long kdr7; 45DEFINE_PER_CPU(unsigned long, dr7);
46
47/* Per cpu debug address registers values */
48static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
43 49
44/* 50/*
45 * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. 51 * Stores the breakpoints currently in use on each breakpoint address
46 * Used to clear and verify the status of bits corresponding to DR0 - DR3 52 * register for each cpus
47 */ 53 */
48static const unsigned long dr7_masks[HBP_NUM] = { 54static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
49 0x000f0003, /* LEN0, R/W0, G0, L0 */
50 0x00f0000c, /* LEN1, R/W1, G1, L1 */
51 0x0f000030, /* LEN2, R/W2, G2, L2 */
52 0xf00000c0 /* LEN3, R/W3, G3, L3 */
53};
54 55
55 56
56/* 57/*
57 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 58 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
58 * as stored in debug register 7. 59 * as stored in debug register 7.
59 */ 60 */
60static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 61unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
61{ 62{
62 unsigned long bp_info; 63 unsigned long bp_info;
63 64
@@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
68 return bp_info; 69 return bp_info;
69} 70}
70 71
71void arch_update_kernel_hw_breakpoint(void *unused) 72/*
73 * Decode the length and type bits for a particular breakpoint as
74 * stored in debug register 7. Return the "enabled" status.
75 */
76int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
72{ 77{
73 struct hw_breakpoint *bp; 78 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
74 int i, cpu = get_cpu();
75 unsigned long temp_kdr7 = 0;
76
77 /* Don't allow debug exceptions while we update the registers */
78 set_debugreg(0UL, 7);
79 79
80 for (i = hbp_kernel_pos; i < HBP_NUM; i++) { 80 *len = (bp_info & 0xc) | 0x40;
81 per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; 81 *type = (bp_info & 0x3) | 0x80;
82 if (bp) {
83 temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
84 set_debugreg(bp->info.address, i);
85 }
86 }
87 82
88 /* No need to set DR6. Update the debug registers with kernel-space 83 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
89 * breakpoint values from kdr7 and user-space requests from the
90 * current process
91 */
92 kdr7 = temp_kdr7;
93 set_debugreg(kdr7 | current->thread.debugreg7, 7);
94 put_cpu();
95} 84}
96 85
97/* 86/*
98 * Install the thread breakpoints in their debug registers. 87 * Install a perf counter breakpoint.
88 *
89 * We seek a free debug address register and use it for this
90 * breakpoint. Eventually we enable it in the debug control register.
91 *
92 * Atomic: we hold the counter->ctx->lock and we only handle variables
93 * and registers local to this cpu.
99 */ 94 */
100void arch_install_thread_hw_breakpoint(struct task_struct *tsk) 95int arch_install_hw_breakpoint(struct perf_event *bp)
101{ 96{
102 struct thread_struct *thread = &(tsk->thread); 97 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
103 98 unsigned long *dr7;
104 switch (hbp_kernel_pos) { 99 int i;
105 case 4: 100
106 set_debugreg(thread->debugreg[3], 3); 101 for (i = 0; i < HBP_NUM; i++) {
107 case 3: 102 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
108 set_debugreg(thread->debugreg[2], 2); 103
109 case 2: 104 if (!*slot) {
110 set_debugreg(thread->debugreg[1], 1); 105 *slot = bp;
111 case 1: 106 break;
112 set_debugreg(thread->debugreg[0], 0); 107 }
113 default:
114 break;
115 } 108 }
116 109
117 /* No need to set DR6 */ 110 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
118 set_debugreg((kdr7 | thread->debugreg7), 7); 111 return -EBUSY;
112
113 set_debugreg(info->address, i);
114 __get_cpu_var(cpu_debugreg[i]) = info->address;
115
116 dr7 = &__get_cpu_var(dr7);
117 *dr7 |= encode_dr7(i, info->len, info->type);
118
119 set_debugreg(*dr7, 7);
120
121 return 0;
119} 122}
120 123
121/* 124/*
122 * Install the debug register values for just the kernel, no thread. 125 * Uninstall the breakpoint contained in the given counter.
126 *
127 * First we search the debug address register it uses and then we disable
128 * it.
129 *
130 * Atomic: we hold the counter->ctx->lock and we only handle variables
131 * and registers local to this cpu.
123 */ 132 */
124void arch_uninstall_thread_hw_breakpoint(void) 133void arch_uninstall_hw_breakpoint(struct perf_event *bp)
125{ 134{
126 /* Clear the user-space portion of debugreg7 by setting only kdr7 */ 135 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
127 set_debugreg(kdr7, 7); 136 unsigned long *dr7;
137 int i;
138
139 for (i = 0; i < HBP_NUM; i++) {
140 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
141
142 if (*slot == bp) {
143 *slot = NULL;
144 break;
145 }
146 }
147
148 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
149 return;
128 150
151 dr7 = &__get_cpu_var(dr7);
152 *dr7 &= ~encode_dr7(i, info->len, info->type);
153
154 set_debugreg(*dr7, 7);
129} 155}
130 156
131static int get_hbp_len(u8 hbp_len) 157static int get_hbp_len(u8 hbp_len)
@@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len)
133 unsigned int len_in_bytes = 0; 159 unsigned int len_in_bytes = 0;
134 160
135 switch (hbp_len) { 161 switch (hbp_len) {
136 case HW_BREAKPOINT_LEN_1: 162 case X86_BREAKPOINT_LEN_1:
137 len_in_bytes = 1; 163 len_in_bytes = 1;
138 break; 164 break;
139 case HW_BREAKPOINT_LEN_2: 165 case X86_BREAKPOINT_LEN_2:
140 len_in_bytes = 2; 166 len_in_bytes = 2;
141 break; 167 break;
142 case HW_BREAKPOINT_LEN_4: 168 case X86_BREAKPOINT_LEN_4:
143 len_in_bytes = 4; 169 len_in_bytes = 4;
144 break; 170 break;
145#ifdef CONFIG_X86_64 171#ifdef CONFIG_X86_64
146 case HW_BREAKPOINT_LEN_8: 172 case X86_BREAKPOINT_LEN_8:
147 len_in_bytes = 8; 173 len_in_bytes = 8;
148 break; 174 break;
149#endif 175#endif
@@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
178/* 204/*
179 * Store a breakpoint's encoded address, length, and type. 205 * Store a breakpoint's encoded address, length, and type.
180 */ 206 */
181static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) 207static int arch_store_info(struct perf_event *bp)
182{ 208{
183 /* 209 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
184 * User-space requests will always have the address field populated
185 * Symbol names from user-space are rejected
186 */
187 if (tsk && bp->info.name)
188 return -EINVAL;
189 /* 210 /*
190 * For kernel-addresses, either the address or symbol name can be 211 * For kernel-addresses, either the address or symbol name can be
191 * specified. 212 * specified.
192 */ 213 */
193 if (bp->info.name) 214 if (info->name)
194 bp->info.address = (unsigned long) 215 info->address = (unsigned long)
195 kallsyms_lookup_name(bp->info.name); 216 kallsyms_lookup_name(info->name);
196 if (bp->info.address) 217 if (info->address)
197 return 0; 218 return 0;
219
198 return -EINVAL; 220 return -EINVAL;
199} 221}
200 222
201/* 223int arch_bp_generic_fields(int x86_len, int x86_type,
202 * Validate the arch-specific HW Breakpoint register settings 224 int *gen_len, int *gen_type)
203 */
204int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
205 struct task_struct *tsk)
206{ 225{
207 unsigned int align; 226 /* Len */
208 int ret = -EINVAL; 227 switch (x86_len) {
228 case X86_BREAKPOINT_LEN_1:
229 *gen_len = HW_BREAKPOINT_LEN_1;
230 break;
231 case X86_BREAKPOINT_LEN_2:
232 *gen_len = HW_BREAKPOINT_LEN_2;
233 break;
234 case X86_BREAKPOINT_LEN_4:
235 *gen_len = HW_BREAKPOINT_LEN_4;
236 break;
237#ifdef CONFIG_X86_64
238 case X86_BREAKPOINT_LEN_8:
239 *gen_len = HW_BREAKPOINT_LEN_8;
240 break;
241#endif
242 default:
243 return -EINVAL;
244 }
209 245
210 switch (bp->info.type) { 246 /* Type */
211 /* 247 switch (x86_type) {
212 * Ptrace-refactoring code 248 case X86_BREAKPOINT_EXECUTE:
213 * For now, we'll allow instruction breakpoint only for user-space 249 *gen_type = HW_BREAKPOINT_X;
214 * addresses
215 */
216 case HW_BREAKPOINT_EXECUTE:
217 if ((!arch_check_va_in_userspace(bp->info.address,
218 bp->info.len)) &&
219 bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
220 return ret;
221 break; 250 break;
222 case HW_BREAKPOINT_WRITE: 251 case X86_BREAKPOINT_WRITE:
252 *gen_type = HW_BREAKPOINT_W;
223 break; 253 break;
224 case HW_BREAKPOINT_RW: 254 case X86_BREAKPOINT_RW:
255 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
225 break; 256 break;
226 default: 257 default:
227 return ret; 258 return -EINVAL;
228 } 259 }
229 260
230 switch (bp->info.len) { 261 return 0;
262}
263
264
265static int arch_build_bp_info(struct perf_event *bp)
266{
267 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
268
269 info->address = bp->attr.bp_addr;
270
271 /* Len */
272 switch (bp->attr.bp_len) {
231 case HW_BREAKPOINT_LEN_1: 273 case HW_BREAKPOINT_LEN_1:
232 align = 0; 274 info->len = X86_BREAKPOINT_LEN_1;
233 break; 275 break;
234 case HW_BREAKPOINT_LEN_2: 276 case HW_BREAKPOINT_LEN_2:
235 align = 1; 277 info->len = X86_BREAKPOINT_LEN_2;
236 break; 278 break;
237 case HW_BREAKPOINT_LEN_4: 279 case HW_BREAKPOINT_LEN_4:
238 align = 3; 280 info->len = X86_BREAKPOINT_LEN_4;
239 break; 281 break;
240#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
241 case HW_BREAKPOINT_LEN_8: 283 case HW_BREAKPOINT_LEN_8:
284 info->len = X86_BREAKPOINT_LEN_8;
285 break;
286#endif
287 default:
288 return -EINVAL;
289 }
290
291 /* Type */
292 switch (bp->attr.bp_type) {
293 case HW_BREAKPOINT_W:
294 info->type = X86_BREAKPOINT_WRITE;
295 break;
296 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
297 info->type = X86_BREAKPOINT_RW;
298 break;
299 case HW_BREAKPOINT_X:
300 info->type = X86_BREAKPOINT_EXECUTE;
301 break;
302 default:
303 return -EINVAL;
304 }
305
306 return 0;
307}
308/*
309 * Validate the arch-specific HW Breakpoint register settings
310 */
311int arch_validate_hwbkpt_settings(struct perf_event *bp,
312 struct task_struct *tsk)
313{
314 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
315 unsigned int align;
316 int ret;
317
318
319 ret = arch_build_bp_info(bp);
320 if (ret)
321 return ret;
322
323 ret = -EINVAL;
324
325 if (info->type == X86_BREAKPOINT_EXECUTE)
326 /*
327 * Ptrace-refactoring code
328 * For now, we'll allow instruction breakpoint only for user-space
329 * addresses
330 */
331 if ((!arch_check_va_in_userspace(info->address, info->len)) &&
332 info->len != X86_BREAKPOINT_EXECUTE)
333 return ret;
334
335 switch (info->len) {
336 case X86_BREAKPOINT_LEN_1:
337 align = 0;
338 break;
339 case X86_BREAKPOINT_LEN_2:
340 align = 1;
341 break;
342 case X86_BREAKPOINT_LEN_4:
343 align = 3;
344 break;
345#ifdef CONFIG_X86_64
346 case X86_BREAKPOINT_LEN_8:
242 align = 7; 347 align = 7;
243 break; 348 break;
244#endif 349#endif
@@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
246 return ret; 351 return ret;
247 } 352 }
248 353
249 if (bp->triggered) 354 if (bp->callback)
250 ret = arch_store_info(bp, tsk); 355 ret = arch_store_info(bp);
251 356
252 if (ret < 0) 357 if (ret < 0)
253 return ret; 358 return ret;
@@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
255 * Check that the low-order bits of the address are appropriate 360 * Check that the low-order bits of the address are appropriate
256 * for the alignment implied by len. 361 * for the alignment implied by len.
257 */ 362 */
258 if (bp->info.address & align) 363 if (info->address & align)
259 return -EINVAL; 364 return -EINVAL;
260 365
261 /* Check that the virtual address is in the proper range */ 366 /* Check that the virtual address is in the proper range */
262 if (tsk) { 367 if (tsk) {
263 if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) 368 if (!arch_check_va_in_userspace(info->address, info->len))
264 return -EFAULT; 369 return -EFAULT;
265 } else { 370 } else {
266 if (!arch_check_va_in_kernelspace(bp->info.address, 371 if (!arch_check_va_in_kernelspace(info->address, info->len))
267 bp->info.len))
268 return -EFAULT; 372 return -EFAULT;
269 } 373 }
374
270 return 0; 375 return 0;
271} 376}
272 377
273void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) 378/*
379 * Release the user breakpoints used by ptrace
380 */
381void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
274{ 382{
275 struct thread_struct *thread = &(tsk->thread); 383 int i;
276 struct hw_breakpoint *bp = thread->hbp[pos]; 384 struct thread_struct *t = &tsk->thread;
277 385
278 thread->debugreg7 &= ~dr7_masks[pos]; 386 for (i = 0; i < HBP_NUM; i++) {
279 if (bp) { 387 unregister_hw_breakpoint(t->ptrace_bps[i]);
280 thread->debugreg[pos] = bp->info.address; 388 t->ptrace_bps[i] = NULL;
281 thread->debugreg7 |= encode_dr7(pos, bp->info.len, 389 }
282 bp->info.type);
283 } else
284 thread->debugreg[pos] = 0;
285} 390}
286 391
287void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) 392#ifdef CONFIG_KVM
393void hw_breakpoint_restore(void)
288{ 394{
289 int i; 395 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
290 struct thread_struct *thread = &(tsk->thread); 396 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
291 397 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
292 thread->debugreg7 = 0; 398 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
293 for (i = 0; i < HBP_NUM; i++) 399 set_debugreg(current->thread.debugreg6, 6);
294 thread->debugreg[i] = 0; 400 set_debugreg(__get_cpu_var(dr7), 7);
295} 401}
402EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
403#endif
296 404
297/* 405/*
298 * Handle debug exception notifications. 406 * Handle debug exception notifications.
@@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
313static int __kprobes hw_breakpoint_handler(struct die_args *args) 421static int __kprobes hw_breakpoint_handler(struct die_args *args)
314{ 422{
315 int i, cpu, rc = NOTIFY_STOP; 423 int i, cpu, rc = NOTIFY_STOP;
316 struct hw_breakpoint *bp; 424 struct perf_event *bp;
317 unsigned long dr7, dr6; 425 unsigned long dr7, dr6;
318 unsigned long *dr6_p; 426 unsigned long *dr6_p;
319 427
@@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
325 if ((dr6 & DR_TRAP_BITS) == 0) 433 if ((dr6 & DR_TRAP_BITS) == 0)
326 return NOTIFY_DONE; 434 return NOTIFY_DONE;
327 435
328 /* Lazy debug register switching */
329 if (!test_tsk_thread_flag(current, TIF_DEBUG))
330 arch_uninstall_thread_hw_breakpoint();
331
332 get_debugreg(dr7, 7); 436 get_debugreg(dr7, 7);
333 /* Disable breakpoints during exception handling */ 437 /* Disable breakpoints during exception handling */
334 set_debugreg(0UL, 7); 438 set_debugreg(0UL, 7);
@@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
344 for (i = 0; i < HBP_NUM; ++i) { 448 for (i = 0; i < HBP_NUM; ++i) {
345 if (likely(!(dr6 & (DR_TRAP0 << i)))) 449 if (likely(!(dr6 & (DR_TRAP0 << i))))
346 continue; 450 continue;
451
347 /* 452 /*
348 * Find the corresponding hw_breakpoint structure and 453 * The counter may be concurrently released but that can only
349 * invoke its triggered callback. 454 * occur from a call_rcu() path. We can then safely fetch
455 * the breakpoint, use its callback, touch its counter
456 * while we are in an rcu_read_lock() path.
350 */ 457 */
351 if (i >= hbp_kernel_pos) 458 rcu_read_lock();
352 bp = per_cpu(this_hbp_kernel[i], cpu); 459
353 else { 460 bp = per_cpu(bp_per_reg[i], cpu);
354 bp = current->thread.hbp[i]; 461 if (bp)
355 if (bp) 462 rc = NOTIFY_DONE;
356 rc = NOTIFY_DONE;
357 }
358 /* 463 /*
359 * Reset the 'i'th TRAP bit in dr6 to denote completion of 464 * Reset the 'i'th TRAP bit in dr6 to denote completion of
360 * exception handling 465 * exception handling
@@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
362 (*dr6_p) &= ~(DR_TRAP0 << i); 467 (*dr6_p) &= ~(DR_TRAP0 << i);
363 /* 468 /*
364 * bp can be NULL due to lazy debug register switching 469 * bp can be NULL due to lazy debug register switching
365 * or due to the delay between updates of hbp_kernel_pos 470 * or due to concurrent perf counter removing.
366 * and this_hbp_kernel.
367 */ 471 */
368 if (!bp) 472 if (!bp) {
369 continue; 473 rcu_read_unlock();
474 break;
475 }
476
477 (bp->callback)(bp, args->regs);
370 478
371 (bp->triggered)(bp, args->regs); 479 rcu_read_unlock();
372 } 480 }
373 if (dr6 & (~DR_TRAP_BITS)) 481 if (dr6 & (~DR_TRAP_BITS))
374 rc = NOTIFY_DONE; 482 rc = NOTIFY_DONE;
375 483
376 set_debugreg(dr7, 7); 484 set_debugreg(dr7, 7);
377 put_cpu(); 485 put_cpu();
486
378 return rc; 487 return rc;
379} 488}
380 489
@@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify(
389 498
390 return hw_breakpoint_handler(data); 499 return hw_breakpoint_handler(data);
391} 500}
501
502void hw_breakpoint_pmu_read(struct perf_event *bp)
503{
504 /* TODO */
505}
506
507void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
508{
509 /* TODO */
510}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cf8ee0016307..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/events/power.h> 12#include <trace/events/power.h>
13#include <linux/hw_breakpoint.h>
13#include <asm/system.h> 14#include <asm/system.h>
14#include <asm/apic.h> 15#include <asm/apic.h>
15#include <asm/syscalls.h> 16#include <asm/syscalls.h>
@@ -18,7 +19,6 @@
18#include <asm/i387.h> 19#include <asm/i387.h>
19#include <asm/ds.h> 20#include <asm/ds.h>
20#include <asm/debugreg.h> 21#include <asm/debugreg.h>
21#include <asm/hw_breakpoint.h>
22 22
23unsigned long idle_halt; 23unsigned long idle_halt;
24EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk)
47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
48 tsk->thread.xstate = NULL; 48 tsk->thread.xstate = NULL;
49 } 49 }
50 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
51 flush_thread_hw_breakpoint(tsk);
52 50
53 WARN(tsk->thread.ds_ctx, "leaking DS context\n"); 51 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
54} 52}
@@ -107,8 +105,7 @@ void flush_thread(void)
107 } 105 }
108#endif 106#endif
109 107
110 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) 108 flush_ptrace_hw_breakpoint(tsk);
111 flush_thread_hw_breakpoint(tsk);
112 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 109 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
113 /* 110 /*
114 * Forget coprocessor state.. 111 * Forget coprocessor state..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 209e74801763..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,6 @@
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h> 61#include <asm/debugreg.h>
62#include <asm/hw_breakpoint.h>
63 62
64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
65 64
@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
264 p->thread.io_bitmap_ptr = NULL; 263 p->thread.io_bitmap_ptr = NULL;
265 tsk = current; 264 tsk = current;
266 err = -ENOMEM; 265 err = -ENOMEM;
267 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) 266
268 if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) 267 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
269 goto out;
270 268
271 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 269 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
272 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 270 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
287 err = do_set_thread_area(p, -1, 285 err = do_set_thread_area(p, -1,
288 (struct user_desc __user *)childregs->si, 0); 286 (struct user_desc __user *)childregs->si, 0);
289 287
290out:
291 if (err && p->thread.io_bitmap_ptr) { 288 if (err && p->thread.io_bitmap_ptr) {
292 kfree(p->thread.io_bitmap_ptr); 289 kfree(p->thread.io_bitmap_ptr);
293 p->thread.io_bitmap_max = 0; 290 p->thread.io_bitmap_max = 0;
294 } 291 }
295 if (err)
296 flush_thread_hw_breakpoint(p);
297 292
298 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); 293 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
299 p->thread.ds_ctx = NULL; 294 p->thread.ds_ctx = NULL;
@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
437 lazy_load_gs(next->gs); 432 lazy_load_gs(next->gs);
438 433
439 percpu_write(current_task, next_p); 434 percpu_write(current_task, next_p);
440 /*
441 * There's a problem with moving the arch_install_thread_hw_breakpoint()
442 * call before current is updated. Suppose a kernel breakpoint is
443 * triggered in between the two, the hw-breakpoint handler will see that
444 * the 'current' task does not have TIF_DEBUG flag set and will think it
445 * is leftover from an old task (lazy switching) and will erase it. Then
446 * until the next context switch, no user-breakpoints will be installed.
447 *
448 * The real problem is that it's impossible to update both current and
449 * physical debug registers at the same instant, so there will always be
450 * a window in which they disagree and a breakpoint might get triggered.
451 * Since we use lazy switching, we are forced to assume that a
452 * disagreement means that current is correct and the exception is due
453 * to lazy debug register switching.
454 */
455 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
456 arch_install_thread_hw_breakpoint(next_p);
457 435
458 return prev_p; 436 return prev_p;
459} 437}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 72edac026a78..5bafdec34441 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -53,7 +53,6 @@
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h> 55#include <asm/debugreg.h>
56#include <asm/hw_breakpoint.h>
57 56
58asmlinkage extern void ret_from_fork(void); 57asmlinkage extern void ret_from_fork(void);
59 58
@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task)
244 BUG(); 243 BUG();
245 } 244 }
246 } 245 }
247 if (unlikely(dead_task->thread.debugreg7))
248 flush_thread_hw_breakpoint(dead_task);
249} 246}
250 247
251static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 248static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
309 savesegment(ds, p->thread.ds); 306 savesegment(ds, p->thread.ds);
310 307
311 err = -ENOMEM; 308 err = -ENOMEM;
312 if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) 309 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
313 if (copy_thread_hw_breakpoint(me, p, clone_flags))
314 goto out;
315 310
316 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 311 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
317 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 312 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -351,8 +346,6 @@ out:
351 kfree(p->thread.io_bitmap_ptr); 346 kfree(p->thread.io_bitmap_ptr);
352 p->thread.io_bitmap_max = 0; 347 p->thread.io_bitmap_max = 0;
353 } 348 }
354 if (err)
355 flush_thread_hw_breakpoint(p);
356 349
357 return err; 350 return err;
358} 351}
@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
508 */ 501 */
509 if (preload_fpu) 502 if (preload_fpu)
510 __math_state_restore(); 503 __math_state_restore();
511 /*
512 * There's a problem with moving the arch_install_thread_hw_breakpoint()
513 * call before current is updated. Suppose a kernel breakpoint is
514 * triggered in between the two, the hw-breakpoint handler will see that
515 * the 'current' task does not have TIF_DEBUG flag set and will think it
516 * is leftover from an old task (lazy switching) and will erase it. Then
517 * until the next context switch, no user-breakpoints will be installed.
518 *
519 * The real problem is that it's impossible to update both current and
520 * physical debug registers at the same instant, so there will always be
521 * a window in which they disagree and a breakpoint might get triggered.
522 * Since we use lazy switching, we are forced to assume that a
523 * disagreement means that current is correct and the exception is due
524 * to lazy debug register switching.
525 */
526 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
527 arch_install_thread_hw_breakpoint(next_p);
528 504
529 return prev_p; 505 return prev_p;
530} 506}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 267cb85b479c..e79610d95971 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target,
441 return ret; 443 return ret;
442} 444}
443 445
444/* 446static void ptrace_triggered(struct perf_event *bp, void *data)
445 * Decode the length and type bits for a particular breakpoint as
446 * stored in debug register 7. Return the "enabled" status.
447 */
448static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
449 unsigned *type)
450{
451 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
452
453 *len = (bp_info & 0xc) | 0x40;
454 *type = (bp_info & 0x3) | 0x80;
455 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
456}
457
458static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
459{ 447{
460 struct thread_struct *thread = &(current->thread);
461 int i; 448 int i;
449 struct thread_struct *thread = &(current->thread);
462 450
463 /* 451 /*
464 * Store in the virtual DR6 register the fact that the breakpoint 452 * Store in the virtual DR6 register the fact that the breakpoint
465 * was hit so the thread's debugger will see it. 453 * was hit so the thread's debugger will see it.
466 */ 454 */
467 for (i = 0; i < hbp_kernel_pos; i++) 455 for (i = 0; i < HBP_NUM; i++) {
468 /* 456 if (thread->ptrace_bps[i] == bp)
469 * We will check bp->info.address against the address stored in
470 * thread's hbp structure and not debugreg[i]. This is to ensure
471 * that the corresponding bit for 'i' in DR7 register is enabled
472 */
473 if (bp->info.address == thread->hbp[i]->info.address)
474 break; 457 break;
458 }
475 459
476 thread->debugreg6 |= (DR_TRAP0 << i); 460 thread->debugreg6 |= (DR_TRAP0 << i);
477} 461}
478 462
479/* 463/*
464 * Walk through every ptrace breakpoints for this thread and
465 * build the dr7 value on top of their attributes.
466 *
467 */
468static unsigned long ptrace_get_dr7(struct perf_event *bp[])
469{
470 int i;
471 int dr7 = 0;
472 struct arch_hw_breakpoint *info;
473
474 for (i = 0; i < HBP_NUM; i++) {
475 if (bp[i] && !bp[i]->attr.disabled) {
476 info = counter_arch_bp(bp[i]);
477 dr7 |= encode_dr7(i, info->len, info->type);
478 }
479 }
480
481 return dr7;
482}
483
484/*
480 * Handle ptrace writes to debug register 7. 485 * Handle ptrace writes to debug register 7.
481 */ 486 */
482static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) 487static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
483{ 488{
484 struct thread_struct *thread = &(tsk->thread); 489 struct thread_struct *thread = &(tsk->thread);
485 unsigned long old_dr7 = thread->debugreg7; 490 unsigned long old_dr7;
486 int i, orig_ret = 0, rc = 0; 491 int i, orig_ret = 0, rc = 0;
487 int enabled, second_pass = 0; 492 int enabled, second_pass = 0;
488 unsigned len, type; 493 unsigned len, type;
489 struct hw_breakpoint *bp; 494 int gen_len, gen_type;
495 struct perf_event *bp;
490 496
491 data &= ~DR_CONTROL_RESERVED; 497 data &= ~DR_CONTROL_RESERVED;
498 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
492restore: 499restore:
493 /* 500 /*
494 * Loop through all the hardware breakpoints, making the 501 * Loop through all the hardware breakpoints, making the
@@ -496,11 +503,12 @@ restore:
496 */ 503 */
497 for (i = 0; i < HBP_NUM; i++) { 504 for (i = 0; i < HBP_NUM; i++) {
498 enabled = decode_dr7(data, i, &len, &type); 505 enabled = decode_dr7(data, i, &len, &type);
499 bp = thread->hbp[i]; 506 bp = thread->ptrace_bps[i];
500 507
501 if (!enabled) { 508 if (!enabled) {
502 if (bp) { 509 if (bp) {
503 /* Don't unregister the breakpoints right-away, 510 /*
511 * Don't unregister the breakpoints right-away,
504 * unless all register_user_hw_breakpoint() 512 * unless all register_user_hw_breakpoint()
505 * requests have succeeded. This prevents 513 * requests have succeeded. This prevents
506 * any window of opportunity for debug 514 * any window of opportunity for debug
@@ -508,27 +516,45 @@ restore:
508 */ 516 */
509 if (!second_pass) 517 if (!second_pass)
510 continue; 518 continue;
511 unregister_user_hw_breakpoint(tsk, bp); 519 thread->ptrace_bps[i] = NULL;
512 kfree(bp); 520 unregister_hw_breakpoint(bp);
513 } 521 }
514 continue; 522 continue;
515 } 523 }
524
525 /*
526 * We shoud have at least an inactive breakpoint at this
527 * slot. It means the user is writing dr7 without having
528 * written the address register first
529 */
516 if (!bp) { 530 if (!bp) {
517 rc = -ENOMEM; 531 rc = -EINVAL;
518 bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); 532 break;
519 if (bp) { 533 }
520 bp->info.address = thread->debugreg[i]; 534
521 bp->triggered = ptrace_triggered; 535 rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
522 bp->info.len = len;
523 bp->info.type = type;
524 rc = register_user_hw_breakpoint(tsk, bp);
525 if (rc)
526 kfree(bp);
527 }
528 } else
529 rc = modify_user_hw_breakpoint(tsk, bp);
530 if (rc) 536 if (rc)
531 break; 537 break;
538
539 /*
540 * This is a temporary thing as bp is unregistered/registered
541 * to simulate modification
542 */
543 bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
544 gen_type, bp->callback,
545 tsk, true);
546 thread->ptrace_bps[i] = NULL;
547
548 if (!bp) { /* incorrect bp, or we have a bug in bp API */
549 rc = -EINVAL;
550 break;
551 }
552 if (IS_ERR(bp)) {
553 rc = PTR_ERR(bp);
554 bp = NULL;
555 break;
556 }
557 thread->ptrace_bps[i] = bp;
532 } 558 }
533 /* 559 /*
534 * Make a second pass to free the remaining unused breakpoints 560 * Make a second pass to free the remaining unused breakpoints
@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
553 struct thread_struct *thread = &(tsk->thread); 579 struct thread_struct *thread = &(tsk->thread);
554 unsigned long val = 0; 580 unsigned long val = 0;
555 581
556 if (n < HBP_NUM) 582 if (n < HBP_NUM) {
557 val = thread->debugreg[n]; 583 struct perf_event *bp;
558 else if (n == 6) 584 bp = thread->ptrace_bps[n];
585 if (!bp)
586 return 0;
587 val = bp->hw.info.address;
588 } else if (n == 6) {
559 val = thread->debugreg6; 589 val = thread->debugreg6;
560 else if (n == 7) 590 } else if (n == 7) {
561 val = thread->debugreg7; 591 val = ptrace_get_dr7(thread->ptrace_bps);
592 }
562 return val; 593 return val;
563} 594}
564 595
596static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
597 unsigned long addr)
598{
599 struct perf_event *bp;
600 struct thread_struct *t = &tsk->thread;
601
602 if (!t->ptrace_bps[nr]) {
603 /*
604 * Put stub len and type to register (reserve) an inactive but
605 * correct bp
606 */
607 bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
608 HW_BREAKPOINT_W,
609 ptrace_triggered, tsk,
610 false);
611 } else {
612 bp = t->ptrace_bps[nr];
613 t->ptrace_bps[nr] = NULL;
614 bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
615 bp->attr.bp_type,
616 bp->callback,
617 tsk,
618 bp->attr.disabled);
619 }
620
621 if (!bp)
622 return -EIO;
623 /*
624 * CHECKME: the previous code returned -EIO if the addr wasn't a
625 * valid task virtual addr. The new one will return -EINVAL in this
626 * case.
627 * -EINVAL may be what we want for in-kernel breakpoints users, but
628 * -EIO looks better for ptrace, since we refuse a register writing
629 * for the user. And anyway this is the previous behaviour.
630 */
631 if (IS_ERR(bp))
632 return PTR_ERR(bp);
633
634 t->ptrace_bps[nr] = bp;
635
636 return 0;
637}
638
565/* 639/*
566 * Handle PTRACE_POKEUSR calls for the debug register area. 640 * Handle PTRACE_POKEUSR calls for the debug register area.
567 */ 641 */
@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
575 return -EIO; 649 return -EIO;
576 650
577 if (n == 6) { 651 if (n == 6) {
578 tsk->thread.debugreg6 = val; 652 thread->debugreg6 = val;
579 goto ret_path; 653 goto ret_path;
580 } 654 }
581 if (n < HBP_NUM) { 655 if (n < HBP_NUM) {
582 if (thread->hbp[n]) { 656 rc = ptrace_set_breakpoint_addr(tsk, n, val);
583 if (arch_check_va_in_userspace(val, 657 if (rc)
584 thread->hbp[n]->info.len) == 0) { 658 return rc;
585 rc = -EIO;
586 goto ret_path;
587 }
588 thread->hbp[n]->info.address = val;
589 }
590 thread->debugreg[n] = val;
591 } 659 }
592 /* All that's left is DR7 */ 660 /* All that's left is DR7 */
593 if (n == 7) 661 if (n == 7)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 213a7a3e4562..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,7 +64,6 @@
64#include <asm/apic.h> 64#include <asm/apic.h>
65#include <asm/setup.h> 65#include <asm/setup.h>
66#include <asm/uv/uv.h> 66#include <asm/uv/uv.h>
67#include <asm/debugreg.h>
68#include <linux/mc146818rtc.h> 67#include <linux/mc146818rtc.h>
69 68
70#include <asm/smpboot_hooks.h> 69#include <asm/smpboot_hooks.h>
@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused)
328 x86_cpuinit.setup_percpu_clockev(); 327 x86_cpuinit.setup_percpu_clockev();
329 328
330 wmb(); 329 wmb();
331 load_debug_registers();
332 cpu_idle(); 330 cpu_idle();
333} 331}
334 332
@@ -1269,7 +1267,6 @@ void cpu_disable_common(void)
1269 remove_cpu_from_maps(cpu); 1267 remove_cpu_from_maps(cpu);
1270 unlock_vector_lock(); 1268 unlock_vector_lock();
1271 fixup_irqs(); 1269 fixup_irqs();
1272 hw_breakpoint_disable();
1273} 1270}
1274 1271
1275int native_cpu_disable(void) 1272int native_cpu_disable(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2974adf9b6..22dee7aa7813 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "trace.h" 43#include "trace.h"
44 44
45#include <asm/debugreg.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/msr.h> 47#include <asm/msr.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3643 trace_kvm_entry(vcpu->vcpu_id); 3644 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 3645 kvm_x86_ops->run(vcpu, kvm_run);
3645 3646
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3647 /*
3647 set_debugreg(current->thread.debugreg[0], 0); 3648 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg[1], 1); 3649 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg[2], 2); 3650 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg[3], 3); 3651 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 3652 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 3653 */
3653 } 3654 if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK)
3655 hw_breakpoint_restore();
3654 3656
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 3657 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 3658 local_irq_enable();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index e09a44fc4664..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt)
105 ctxt->cr4 = read_cr4(); 105 ctxt->cr4 = read_cr4();
106 ctxt->cr8 = read_cr8(); 106 ctxt->cr8 = read_cr8();
107#endif 107#endif
108 hw_breakpoint_disable();
109} 108}
110 109
111/* Needed by apm.c */ 110/* Needed by apm.c */
@@ -144,11 +143,6 @@ static void fix_processor_context(void)
144#endif 143#endif
145 load_TR_desc(); /* This does ltr */ 144 load_TR_desc(); /* This does ltr */
146 load_LDT(&current->active_mm->context); /* This does lldt */ 145 load_LDT(&current->active_mm->context); /* This does lldt */
147
148 /*
149 * Now maybe reload the debug registers
150 */
151 load_debug_registers();
152} 146}
153 147
154/** 148/**