aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-09-09 13:22:48 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-11-08 09:34:42 -0500
commit24f1e32c60c45c89a997c73395b69c8af6f0a84e (patch)
tree4f30f16e18cb4abbcf96b3b331e6a3f01bfa26e6
parent2da3e160cb3d226d87b907fab26850d838ed8d7c (diff)
hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events
This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Prasad <prasad@linux.vnet.ibm.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jan Kiszka <jan.kiszka@web.de> Cc: Jiri Slaby <jirislaby@gmail.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Avi Kivity <avi@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/debugreg.h11
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h58
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/kernel/hw_breakpoint.c391
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/process_32.c26
-rw-r--r--arch/x86/kernel/process_64.c26
-rw-r--r--arch/x86/kernel/ptrace.c182
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/power/cpu.c6
-rw-r--r--include/linux/hw_breakpoint.h243
-rw-r--r--include/linux/perf_event.h26
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hw_breakpoint.c424
-rw-r--r--kernel/perf_event.c53
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_ksym.c126
-rw-r--r--kernel/trace/trace_selftest.c3
22 files changed, 885 insertions, 750 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index acb664397945..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES
128 128
129config HAVE_HW_BREAKPOINT 129config HAVE_HW_BREAKPOINT
130 bool 130 bool
131 depends on HAVE_PERF_EVENTS
132 select ANON_INODES
133 select PERF_EVENTS
131 134
132 135
133source "kernel/gcov/Kconfig" 136source "kernel/gcov/Kconfig"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
10header-y += sigcontext32.h 10header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h
13 14
14unifdef-y += e820.h 15unifdef-y += e820.h
15unifdef-y += ist.h 16unifdef-y += ist.h
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 23439fbb1d0e..9a3333c91f9a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,13 +75,8 @@
75 */ 75 */
76#ifdef __KERNEL__ 76#ifdef __KERNEL__
77 77
78/* For process management */ 78DECLARE_PER_CPU(unsigned long, dr7);
79extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
80extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
81 struct task_struct *child, unsigned long clone_flags);
82 79
83/* For CPU management */
84extern void load_debug_registers(void);
85static inline void hw_breakpoint_disable(void) 80static inline void hw_breakpoint_disable(void)
86{ 81{
87 /* Zero the control register for HW Breakpoint */ 82 /* Zero the control register for HW Breakpoint */
@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
94 set_debugreg(0UL, 3); 89 set_debugreg(0UL, 3);
95} 90}
96 91
92#ifdef CONFIG_KVM
93extern void hw_breakpoint_restore(void);
94#endif
95
97#endif /* __KERNEL__ */ 96#endif /* __KERNEL__ */
98 97
99#endif /* _ASM_X86_DEBUGREG_H */ 98#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 3cfca8e2b5f6..0675a7c4c20e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -4,6 +4,11 @@
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H 5#define __ARCH_HW_BREAKPOINT_H
6 6
7/*
8 * The name should probably be something dealt in
9 * a higher level. While dealing with the user
10 * (display/resolving)
11 */
7struct arch_hw_breakpoint { 12struct arch_hw_breakpoint {
8 char *name; /* Contains name of the symbol to set bkpt */ 13 char *name; /* Contains name of the symbol to set bkpt */
9 unsigned long address; 14 unsigned long address;
@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
12}; 17};
13 18
14#include <linux/kdebug.h> 19#include <linux/kdebug.h>
15#include <linux/hw_breakpoint.h> 20#include <linux/percpu.h>
21#include <linux/list.h>
16 22
17/* Available HW breakpoint length encodings */ 23/* Available HW breakpoint length encodings */
18#define HW_BREAKPOINT_LEN_1 0x40 24#define X86_BREAKPOINT_LEN_1 0x40
19#define HW_BREAKPOINT_LEN_2 0x44 25#define X86_BREAKPOINT_LEN_2 0x44
20#define HW_BREAKPOINT_LEN_4 0x4c 26#define X86_BREAKPOINT_LEN_4 0x4c
21#define HW_BREAKPOINT_LEN_EXECUTE 0x40 27#define X86_BREAKPOINT_LEN_EXECUTE 0x40
22 28
23#ifdef CONFIG_X86_64 29#ifdef CONFIG_X86_64
24#define HW_BREAKPOINT_LEN_8 0x48 30#define X86_BREAKPOINT_LEN_8 0x48
25#endif 31#endif
26 32
27/* Available HW breakpoint type encodings */ 33/* Available HW breakpoint type encodings */
28 34
29/* trigger on instruction execute */ 35/* trigger on instruction execute */
30#define HW_BREAKPOINT_EXECUTE 0x80 36#define X86_BREAKPOINT_EXECUTE 0x80
31/* trigger on memory write */ 37/* trigger on memory write */
32#define HW_BREAKPOINT_WRITE 0x81 38#define X86_BREAKPOINT_WRITE 0x81
33/* trigger on memory read or write */ 39/* trigger on memory read or write */
34#define HW_BREAKPOINT_RW 0x83 40#define X86_BREAKPOINT_RW 0x83
35 41
36/* Total number of available HW breakpoint registers */ 42/* Total number of available HW breakpoint registers */
37#define HBP_NUM 4 43#define HBP_NUM 4
38 44
39extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; 45struct perf_event;
40DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); 46struct pmu;
41extern unsigned int hbp_user_refcount[HBP_NUM];
42 47
43extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
44extern void arch_uninstall_thread_hw_breakpoint(void);
45extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); 48extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
46extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, 49extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
47 struct task_struct *tsk); 50 struct task_struct *tsk);
48extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
49extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
50extern void arch_update_kernel_hw_breakpoint(void *);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, 51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data); 52 unsigned long val, void *data);
53
54
55int arch_install_hw_breakpoint(struct perf_event *bp);
56void arch_uninstall_hw_breakpoint(struct perf_event *bp);
57void hw_breakpoint_pmu_read(struct perf_event *bp);
58void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
59
60extern void
61arch_fill_perf_breakpoint(struct perf_event *bp);
62
63unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
64int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
65
66extern int arch_bp_generic_fields(int x86_len, int x86_type,
67 int *gen_len, int *gen_type);
68
69extern struct pmu perf_ops_bp;
70
53#endif /* __KERNEL__ */ 71#endif /* __KERNEL__ */
54#endif /* _I386_HW_BREAKPOINT_H */ 72#endif /* _I386_HW_BREAKPOINT_H */
55 73
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 61aafb71c7ef..820f3000f736 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
423extern void free_thread_xstate(struct task_struct *); 423extern void free_thread_xstate(struct task_struct *);
424extern struct kmem_cache *task_xstate_cachep; 424extern struct kmem_cache *task_xstate_cachep;
425 425
426struct perf_event;
427
426struct thread_struct { 428struct thread_struct {
427 /* Cached TLS descriptors: */ 429 /* Cached TLS descriptors: */
428 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 430 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -444,12 +446,10 @@ struct thread_struct {
444 unsigned long fs; 446 unsigned long fs;
445#endif 447#endif
446 unsigned long gs; 448 unsigned long gs;
447 /* Hardware debugging registers: */ 449 /* Save middle states of ptrace breakpoints */
448 unsigned long debugreg[HBP_NUM]; 450 struct perf_event *ptrace_bps[HBP_NUM];
449 unsigned long debugreg6; 451 /* Debug status used for traps, single steps, etc... */
450 unsigned long debugreg7; 452 unsigned long debugreg6;
451 /* Hardware breakpoint info */
452 struct hw_breakpoint *hbp[HBP_NUM];
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 9316a9de4de3..e622620790bd 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
15 * 15 *
16 * Copyright (C) 2007 Alan Stern 16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation 17 * Copyright (C) 2009 IBM Corporation
18 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
18 */ 19 */
19 20
20/* 21/*
@@ -22,6 +23,8 @@
22 * using the CPU's debug registers. 23 * using the CPU's debug registers.
23 */ 24 */
24 25
26#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h>
25#include <linux/irqflags.h> 28#include <linux/irqflags.h>
26#include <linux/notifier.h> 29#include <linux/notifier.h>
27#include <linux/kallsyms.h> 30#include <linux/kallsyms.h>
@@ -38,26 +41,24 @@
38#include <asm/processor.h> 41#include <asm/processor.h>
39#include <asm/debugreg.h> 42#include <asm/debugreg.h>
40 43
41/* Unmasked kernel DR7 value */ 44/* Per cpu debug control register value */
42static unsigned long kdr7; 45DEFINE_PER_CPU(unsigned long, dr7);
46
47/* Per cpu debug address registers values */
48static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
43 49
44/* 50/*
45 * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. 51 * Stores the breakpoints currently in use on each breakpoint address
46 * Used to clear and verify the status of bits corresponding to DR0 - DR3 52 * register for each cpus
47 */ 53 */
48static const unsigned long dr7_masks[HBP_NUM] = { 54static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
49 0x000f0003, /* LEN0, R/W0, G0, L0 */
50 0x00f0000c, /* LEN1, R/W1, G1, L1 */
51 0x0f000030, /* LEN2, R/W2, G2, L2 */
52 0xf00000c0 /* LEN3, R/W3, G3, L3 */
53};
54 55
55 56
56/* 57/*
57 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 58 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
58 * as stored in debug register 7. 59 * as stored in debug register 7.
59 */ 60 */
60static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 61unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
61{ 62{
62 unsigned long bp_info; 63 unsigned long bp_info;
63 64
@@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
68 return bp_info; 69 return bp_info;
69} 70}
70 71
71void arch_update_kernel_hw_breakpoint(void *unused) 72/*
73 * Decode the length and type bits for a particular breakpoint as
74 * stored in debug register 7. Return the "enabled" status.
75 */
76int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
72{ 77{
73 struct hw_breakpoint *bp; 78 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
74 int i, cpu = get_cpu();
75 unsigned long temp_kdr7 = 0;
76
77 /* Don't allow debug exceptions while we update the registers */
78 set_debugreg(0UL, 7);
79 79
80 for (i = hbp_kernel_pos; i < HBP_NUM; i++) { 80 *len = (bp_info & 0xc) | 0x40;
81 per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; 81 *type = (bp_info & 0x3) | 0x80;
82 if (bp) {
83 temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
84 set_debugreg(bp->info.address, i);
85 }
86 }
87 82
88 /* No need to set DR6. Update the debug registers with kernel-space 83 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
89 * breakpoint values from kdr7 and user-space requests from the
90 * current process
91 */
92 kdr7 = temp_kdr7;
93 set_debugreg(kdr7 | current->thread.debugreg7, 7);
94 put_cpu();
95} 84}
96 85
97/* 86/*
98 * Install the thread breakpoints in their debug registers. 87 * Install a perf counter breakpoint.
88 *
89 * We seek a free debug address register and use it for this
90 * breakpoint. Eventually we enable it in the debug control register.
91 *
92 * Atomic: we hold the counter->ctx->lock and we only handle variables
93 * and registers local to this cpu.
99 */ 94 */
100void arch_install_thread_hw_breakpoint(struct task_struct *tsk) 95int arch_install_hw_breakpoint(struct perf_event *bp)
101{ 96{
102 struct thread_struct *thread = &(tsk->thread); 97 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
103 98 unsigned long *dr7;
104 switch (hbp_kernel_pos) { 99 int i;
105 case 4: 100
106 set_debugreg(thread->debugreg[3], 3); 101 for (i = 0; i < HBP_NUM; i++) {
107 case 3: 102 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
108 set_debugreg(thread->debugreg[2], 2); 103
109 case 2: 104 if (!*slot) {
110 set_debugreg(thread->debugreg[1], 1); 105 *slot = bp;
111 case 1: 106 break;
112 set_debugreg(thread->debugreg[0], 0); 107 }
113 default:
114 break;
115 } 108 }
116 109
117 /* No need to set DR6 */ 110 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
118 set_debugreg((kdr7 | thread->debugreg7), 7); 111 return -EBUSY;
112
113 set_debugreg(info->address, i);
114 __get_cpu_var(cpu_debugreg[i]) = info->address;
115
116 dr7 = &__get_cpu_var(dr7);
117 *dr7 |= encode_dr7(i, info->len, info->type);
118
119 set_debugreg(*dr7, 7);
120
121 return 0;
119} 122}
120 123
121/* 124/*
122 * Install the debug register values for just the kernel, no thread. 125 * Uninstall the breakpoint contained in the given counter.
126 *
127 * First we search the debug address register it uses and then we disable
128 * it.
129 *
130 * Atomic: we hold the counter->ctx->lock and we only handle variables
131 * and registers local to this cpu.
123 */ 132 */
124void arch_uninstall_thread_hw_breakpoint(void) 133void arch_uninstall_hw_breakpoint(struct perf_event *bp)
125{ 134{
126 /* Clear the user-space portion of debugreg7 by setting only kdr7 */ 135 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
127 set_debugreg(kdr7, 7); 136 unsigned long *dr7;
137 int i;
138
139 for (i = 0; i < HBP_NUM; i++) {
140 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
141
142 if (*slot == bp) {
143 *slot = NULL;
144 break;
145 }
146 }
147
148 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
149 return;
128 150
151 dr7 = &__get_cpu_var(dr7);
152 *dr7 &= ~encode_dr7(i, info->len, info->type);
153
154 set_debugreg(*dr7, 7);
129} 155}
130 156
131static int get_hbp_len(u8 hbp_len) 157static int get_hbp_len(u8 hbp_len)
@@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len)
133 unsigned int len_in_bytes = 0; 159 unsigned int len_in_bytes = 0;
134 160
135 switch (hbp_len) { 161 switch (hbp_len) {
136 case HW_BREAKPOINT_LEN_1: 162 case X86_BREAKPOINT_LEN_1:
137 len_in_bytes = 1; 163 len_in_bytes = 1;
138 break; 164 break;
139 case HW_BREAKPOINT_LEN_2: 165 case X86_BREAKPOINT_LEN_2:
140 len_in_bytes = 2; 166 len_in_bytes = 2;
141 break; 167 break;
142 case HW_BREAKPOINT_LEN_4: 168 case X86_BREAKPOINT_LEN_4:
143 len_in_bytes = 4; 169 len_in_bytes = 4;
144 break; 170 break;
145#ifdef CONFIG_X86_64 171#ifdef CONFIG_X86_64
146 case HW_BREAKPOINT_LEN_8: 172 case X86_BREAKPOINT_LEN_8:
147 len_in_bytes = 8; 173 len_in_bytes = 8;
148 break; 174 break;
149#endif 175#endif
@@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
178/* 204/*
179 * Store a breakpoint's encoded address, length, and type. 205 * Store a breakpoint's encoded address, length, and type.
180 */ 206 */
181static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) 207static int arch_store_info(struct perf_event *bp)
182{ 208{
183 /* 209 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
184 * User-space requests will always have the address field populated
185 * Symbol names from user-space are rejected
186 */
187 if (tsk && bp->info.name)
188 return -EINVAL;
189 /* 210 /*
190 * For kernel-addresses, either the address or symbol name can be 211 * For kernel-addresses, either the address or symbol name can be
191 * specified. 212 * specified.
192 */ 213 */
193 if (bp->info.name) 214 if (info->name)
194 bp->info.address = (unsigned long) 215 info->address = (unsigned long)
195 kallsyms_lookup_name(bp->info.name); 216 kallsyms_lookup_name(info->name);
196 if (bp->info.address) 217 if (info->address)
197 return 0; 218 return 0;
219
198 return -EINVAL; 220 return -EINVAL;
199} 221}
200 222
201/* 223int arch_bp_generic_fields(int x86_len, int x86_type,
202 * Validate the arch-specific HW Breakpoint register settings 224 int *gen_len, int *gen_type)
203 */
204int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
205 struct task_struct *tsk)
206{ 225{
207 unsigned int align; 226 /* Len */
208 int ret = -EINVAL; 227 switch (x86_len) {
228 case X86_BREAKPOINT_LEN_1:
229 *gen_len = HW_BREAKPOINT_LEN_1;
230 break;
231 case X86_BREAKPOINT_LEN_2:
232 *gen_len = HW_BREAKPOINT_LEN_2;
233 break;
234 case X86_BREAKPOINT_LEN_4:
235 *gen_len = HW_BREAKPOINT_LEN_4;
236 break;
237#ifdef CONFIG_X86_64
238 case X86_BREAKPOINT_LEN_8:
239 *gen_len = HW_BREAKPOINT_LEN_8;
240 break;
241#endif
242 default:
243 return -EINVAL;
244 }
209 245
210 switch (bp->info.type) { 246 /* Type */
211 /* 247 switch (x86_type) {
212 * Ptrace-refactoring code 248 case X86_BREAKPOINT_EXECUTE:
213 * For now, we'll allow instruction breakpoint only for user-space 249 *gen_type = HW_BREAKPOINT_X;
214 * addresses
215 */
216 case HW_BREAKPOINT_EXECUTE:
217 if ((!arch_check_va_in_userspace(bp->info.address,
218 bp->info.len)) &&
219 bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
220 return ret;
221 break; 250 break;
222 case HW_BREAKPOINT_WRITE: 251 case X86_BREAKPOINT_WRITE:
252 *gen_type = HW_BREAKPOINT_W;
223 break; 253 break;
224 case HW_BREAKPOINT_RW: 254 case X86_BREAKPOINT_RW:
255 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
225 break; 256 break;
226 default: 257 default:
227 return ret; 258 return -EINVAL;
228 } 259 }
229 260
230 switch (bp->info.len) { 261 return 0;
262}
263
264
265static int arch_build_bp_info(struct perf_event *bp)
266{
267 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
268
269 info->address = bp->attr.bp_addr;
270
271 /* Len */
272 switch (bp->attr.bp_len) {
231 case HW_BREAKPOINT_LEN_1: 273 case HW_BREAKPOINT_LEN_1:
232 align = 0; 274 info->len = X86_BREAKPOINT_LEN_1;
233 break; 275 break;
234 case HW_BREAKPOINT_LEN_2: 276 case HW_BREAKPOINT_LEN_2:
235 align = 1; 277 info->len = X86_BREAKPOINT_LEN_2;
236 break; 278 break;
237 case HW_BREAKPOINT_LEN_4: 279 case HW_BREAKPOINT_LEN_4:
238 align = 3; 280 info->len = X86_BREAKPOINT_LEN_4;
239 break; 281 break;
240#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
241 case HW_BREAKPOINT_LEN_8: 283 case HW_BREAKPOINT_LEN_8:
284 info->len = X86_BREAKPOINT_LEN_8;
285 break;
286#endif
287 default:
288 return -EINVAL;
289 }
290
291 /* Type */
292 switch (bp->attr.bp_type) {
293 case HW_BREAKPOINT_W:
294 info->type = X86_BREAKPOINT_WRITE;
295 break;
296 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
297 info->type = X86_BREAKPOINT_RW;
298 break;
299 case HW_BREAKPOINT_X:
300 info->type = X86_BREAKPOINT_EXECUTE;
301 break;
302 default:
303 return -EINVAL;
304 }
305
306 return 0;
307}
308/*
309 * Validate the arch-specific HW Breakpoint register settings
310 */
311int arch_validate_hwbkpt_settings(struct perf_event *bp,
312 struct task_struct *tsk)
313{
314 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
315 unsigned int align;
316 int ret;
317
318
319 ret = arch_build_bp_info(bp);
320 if (ret)
321 return ret;
322
323 ret = -EINVAL;
324
325 if (info->type == X86_BREAKPOINT_EXECUTE)
326 /*
327 * Ptrace-refactoring code
328 * For now, we'll allow instruction breakpoint only for user-space
329 * addresses
330 */
331 if ((!arch_check_va_in_userspace(info->address, info->len)) &&
332 info->len != X86_BREAKPOINT_EXECUTE)
333 return ret;
334
335 switch (info->len) {
336 case X86_BREAKPOINT_LEN_1:
337 align = 0;
338 break;
339 case X86_BREAKPOINT_LEN_2:
340 align = 1;
341 break;
342 case X86_BREAKPOINT_LEN_4:
343 align = 3;
344 break;
345#ifdef CONFIG_X86_64
346 case X86_BREAKPOINT_LEN_8:
242 align = 7; 347 align = 7;
243 break; 348 break;
244#endif 349#endif
@@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
246 return ret; 351 return ret;
247 } 352 }
248 353
249 if (bp->triggered) 354 if (bp->callback)
250 ret = arch_store_info(bp, tsk); 355 ret = arch_store_info(bp);
251 356
252 if (ret < 0) 357 if (ret < 0)
253 return ret; 358 return ret;
@@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
255 * Check that the low-order bits of the address are appropriate 360 * Check that the low-order bits of the address are appropriate
256 * for the alignment implied by len. 361 * for the alignment implied by len.
257 */ 362 */
258 if (bp->info.address & align) 363 if (info->address & align)
259 return -EINVAL; 364 return -EINVAL;
260 365
261 /* Check that the virtual address is in the proper range */ 366 /* Check that the virtual address is in the proper range */
262 if (tsk) { 367 if (tsk) {
263 if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) 368 if (!arch_check_va_in_userspace(info->address, info->len))
264 return -EFAULT; 369 return -EFAULT;
265 } else { 370 } else {
266 if (!arch_check_va_in_kernelspace(bp->info.address, 371 if (!arch_check_va_in_kernelspace(info->address, info->len))
267 bp->info.len))
268 return -EFAULT; 372 return -EFAULT;
269 } 373 }
374
270 return 0; 375 return 0;
271} 376}
272 377
273void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) 378/*
379 * Release the user breakpoints used by ptrace
380 */
381void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
274{ 382{
275 struct thread_struct *thread = &(tsk->thread); 383 int i;
276 struct hw_breakpoint *bp = thread->hbp[pos]; 384 struct thread_struct *t = &tsk->thread;
277 385
278 thread->debugreg7 &= ~dr7_masks[pos]; 386 for (i = 0; i < HBP_NUM; i++) {
279 if (bp) { 387 unregister_hw_breakpoint(t->ptrace_bps[i]);
280 thread->debugreg[pos] = bp->info.address; 388 t->ptrace_bps[i] = NULL;
281 thread->debugreg7 |= encode_dr7(pos, bp->info.len, 389 }
282 bp->info.type);
283 } else
284 thread->debugreg[pos] = 0;
285} 390}
286 391
287void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) 392#ifdef CONFIG_KVM
393void hw_breakpoint_restore(void)
288{ 394{
289 int i; 395 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
290 struct thread_struct *thread = &(tsk->thread); 396 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
291 397 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
292 thread->debugreg7 = 0; 398 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
293 for (i = 0; i < HBP_NUM; i++) 399 set_debugreg(current->thread.debugreg6, 6);
294 thread->debugreg[i] = 0; 400 set_debugreg(__get_cpu_var(dr7), 7);
295} 401}
402EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
403#endif
296 404
297/* 405/*
298 * Handle debug exception notifications. 406 * Handle debug exception notifications.
@@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
313static int __kprobes hw_breakpoint_handler(struct die_args *args) 421static int __kprobes hw_breakpoint_handler(struct die_args *args)
314{ 422{
315 int i, cpu, rc = NOTIFY_STOP; 423 int i, cpu, rc = NOTIFY_STOP;
316 struct hw_breakpoint *bp; 424 struct perf_event *bp;
317 unsigned long dr7, dr6; 425 unsigned long dr7, dr6;
318 unsigned long *dr6_p; 426 unsigned long *dr6_p;
319 427
@@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
325 if ((dr6 & DR_TRAP_BITS) == 0) 433 if ((dr6 & DR_TRAP_BITS) == 0)
326 return NOTIFY_DONE; 434 return NOTIFY_DONE;
327 435
328 /* Lazy debug register switching */
329 if (!test_tsk_thread_flag(current, TIF_DEBUG))
330 arch_uninstall_thread_hw_breakpoint();
331
332 get_debugreg(dr7, 7); 436 get_debugreg(dr7, 7);
333 /* Disable breakpoints during exception handling */ 437 /* Disable breakpoints during exception handling */
334 set_debugreg(0UL, 7); 438 set_debugreg(0UL, 7);
@@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
344 for (i = 0; i < HBP_NUM; ++i) { 448 for (i = 0; i < HBP_NUM; ++i) {
345 if (likely(!(dr6 & (DR_TRAP0 << i)))) 449 if (likely(!(dr6 & (DR_TRAP0 << i))))
346 continue; 450 continue;
451
347 /* 452 /*
348 * Find the corresponding hw_breakpoint structure and 453 * The counter may be concurrently released but that can only
349 * invoke its triggered callback. 454 * occur from a call_rcu() path. We can then safely fetch
455 * the breakpoint, use its callback, touch its counter
456 * while we are in an rcu_read_lock() path.
350 */ 457 */
351 if (i >= hbp_kernel_pos) 458 rcu_read_lock();
352 bp = per_cpu(this_hbp_kernel[i], cpu); 459
353 else { 460 bp = per_cpu(bp_per_reg[i], cpu);
354 bp = current->thread.hbp[i]; 461 if (bp)
355 if (bp) 462 rc = NOTIFY_DONE;
356 rc = NOTIFY_DONE;
357 }
358 /* 463 /*
359 * Reset the 'i'th TRAP bit in dr6 to denote completion of 464 * Reset the 'i'th TRAP bit in dr6 to denote completion of
360 * exception handling 465 * exception handling
@@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
362 (*dr6_p) &= ~(DR_TRAP0 << i); 467 (*dr6_p) &= ~(DR_TRAP0 << i);
363 /* 468 /*
364 * bp can be NULL due to lazy debug register switching 469 * bp can be NULL due to lazy debug register switching
365 * or due to the delay between updates of hbp_kernel_pos 470 * or due to concurrent perf counter removing.
366 * and this_hbp_kernel.
367 */ 471 */
368 if (!bp) 472 if (!bp) {
369 continue; 473 rcu_read_unlock();
474 break;
475 }
476
477 (bp->callback)(bp, args->regs);
370 478
371 (bp->triggered)(bp, args->regs); 479 rcu_read_unlock();
372 } 480 }
373 if (dr6 & (~DR_TRAP_BITS)) 481 if (dr6 & (~DR_TRAP_BITS))
374 rc = NOTIFY_DONE; 482 rc = NOTIFY_DONE;
375 483
376 set_debugreg(dr7, 7); 484 set_debugreg(dr7, 7);
377 put_cpu(); 485 put_cpu();
486
378 return rc; 487 return rc;
379} 488}
380 489
@@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify(
389 498
390 return hw_breakpoint_handler(data); 499 return hw_breakpoint_handler(data);
391} 500}
501
502void hw_breakpoint_pmu_read(struct perf_event *bp)
503{
504 /* TODO */
505}
506
507void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
508{
509 /* TODO */
510}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cf8ee0016307..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/events/power.h> 12#include <trace/events/power.h>
13#include <linux/hw_breakpoint.h>
13#include <asm/system.h> 14#include <asm/system.h>
14#include <asm/apic.h> 15#include <asm/apic.h>
15#include <asm/syscalls.h> 16#include <asm/syscalls.h>
@@ -18,7 +19,6 @@
18#include <asm/i387.h> 19#include <asm/i387.h>
19#include <asm/ds.h> 20#include <asm/ds.h>
20#include <asm/debugreg.h> 21#include <asm/debugreg.h>
21#include <asm/hw_breakpoint.h>
22 22
23unsigned long idle_halt; 23unsigned long idle_halt;
24EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk)
47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
48 tsk->thread.xstate = NULL; 48 tsk->thread.xstate = NULL;
49 } 49 }
50 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
51 flush_thread_hw_breakpoint(tsk);
52 50
53 WARN(tsk->thread.ds_ctx, "leaking DS context\n"); 51 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
54} 52}
@@ -107,8 +105,7 @@ void flush_thread(void)
107 } 105 }
108#endif 106#endif
109 107
110 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) 108 flush_ptrace_hw_breakpoint(tsk);
111 flush_thread_hw_breakpoint(tsk);
112 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 109 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
113 /* 110 /*
114 * Forget coprocessor state.. 111 * Forget coprocessor state..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 209e74801763..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,6 @@
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h> 61#include <asm/debugreg.h>
62#include <asm/hw_breakpoint.h>
63 62
64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
65 64
@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
264 p->thread.io_bitmap_ptr = NULL; 263 p->thread.io_bitmap_ptr = NULL;
265 tsk = current; 264 tsk = current;
266 err = -ENOMEM; 265 err = -ENOMEM;
267 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) 266
268 if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) 267 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
269 goto out;
270 268
271 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 269 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
272 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 270 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
287 err = do_set_thread_area(p, -1, 285 err = do_set_thread_area(p, -1,
288 (struct user_desc __user *)childregs->si, 0); 286 (struct user_desc __user *)childregs->si, 0);
289 287
290out:
291 if (err && p->thread.io_bitmap_ptr) { 288 if (err && p->thread.io_bitmap_ptr) {
292 kfree(p->thread.io_bitmap_ptr); 289 kfree(p->thread.io_bitmap_ptr);
293 p->thread.io_bitmap_max = 0; 290 p->thread.io_bitmap_max = 0;
294 } 291 }
295 if (err)
296 flush_thread_hw_breakpoint(p);
297 292
298 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); 293 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
299 p->thread.ds_ctx = NULL; 294 p->thread.ds_ctx = NULL;
@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
437 lazy_load_gs(next->gs); 432 lazy_load_gs(next->gs);
438 433
439 percpu_write(current_task, next_p); 434 percpu_write(current_task, next_p);
440 /*
441 * There's a problem with moving the arch_install_thread_hw_breakpoint()
442 * call before current is updated. Suppose a kernel breakpoint is
443 * triggered in between the two, the hw-breakpoint handler will see that
444 * the 'current' task does not have TIF_DEBUG flag set and will think it
445 * is leftover from an old task (lazy switching) and will erase it. Then
446 * until the next context switch, no user-breakpoints will be installed.
447 *
448 * The real problem is that it's impossible to update both current and
449 * physical debug registers at the same instant, so there will always be
450 * a window in which they disagree and a breakpoint might get triggered.
451 * Since we use lazy switching, we are forced to assume that a
452 * disagreement means that current is correct and the exception is due
453 * to lazy debug register switching.
454 */
455 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
456 arch_install_thread_hw_breakpoint(next_p);
457 435
458 return prev_p; 436 return prev_p;
459} 437}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 72edac026a78..5bafdec34441 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -53,7 +53,6 @@
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h> 55#include <asm/debugreg.h>
56#include <asm/hw_breakpoint.h>
57 56
58asmlinkage extern void ret_from_fork(void); 57asmlinkage extern void ret_from_fork(void);
59 58
@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task)
244 BUG(); 243 BUG();
245 } 244 }
246 } 245 }
247 if (unlikely(dead_task->thread.debugreg7))
248 flush_thread_hw_breakpoint(dead_task);
249} 246}
250 247
251static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 248static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
309 savesegment(ds, p->thread.ds); 306 savesegment(ds, p->thread.ds);
310 307
311 err = -ENOMEM; 308 err = -ENOMEM;
312 if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) 309 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
313 if (copy_thread_hw_breakpoint(me, p, clone_flags))
314 goto out;
315 310
316 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 311 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
317 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 312 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -351,8 +346,6 @@ out:
351 kfree(p->thread.io_bitmap_ptr); 346 kfree(p->thread.io_bitmap_ptr);
352 p->thread.io_bitmap_max = 0; 347 p->thread.io_bitmap_max = 0;
353 } 348 }
354 if (err)
355 flush_thread_hw_breakpoint(p);
356 349
357 return err; 350 return err;
358} 351}
@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
508 */ 501 */
509 if (preload_fpu) 502 if (preload_fpu)
510 __math_state_restore(); 503 __math_state_restore();
511 /*
512 * There's a problem with moving the arch_install_thread_hw_breakpoint()
513 * call before current is updated. Suppose a kernel breakpoint is
514 * triggered in between the two, the hw-breakpoint handler will see that
515 * the 'current' task does not have TIF_DEBUG flag set and will think it
516 * is leftover from an old task (lazy switching) and will erase it. Then
517 * until the next context switch, no user-breakpoints will be installed.
518 *
519 * The real problem is that it's impossible to update both current and
520 * physical debug registers at the same instant, so there will always be
521 * a window in which they disagree and a breakpoint might get triggered.
522 * Since we use lazy switching, we are forced to assume that a
523 * disagreement means that current is correct and the exception is due
524 * to lazy debug register switching.
525 */
526 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
527 arch_install_thread_hw_breakpoint(next_p);
528 504
529 return prev_p; 505 return prev_p;
530} 506}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 267cb85b479c..e79610d95971 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target,
441 return ret; 443 return ret;
442} 444}
443 445
444/* 446static void ptrace_triggered(struct perf_event *bp, void *data)
445 * Decode the length and type bits for a particular breakpoint as
446 * stored in debug register 7. Return the "enabled" status.
447 */
448static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
449 unsigned *type)
450{
451 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
452
453 *len = (bp_info & 0xc) | 0x40;
454 *type = (bp_info & 0x3) | 0x80;
455 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
456}
457
458static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
459{ 447{
460 struct thread_struct *thread = &(current->thread);
461 int i; 448 int i;
449 struct thread_struct *thread = &(current->thread);
462 450
463 /* 451 /*
464 * Store in the virtual DR6 register the fact that the breakpoint 452 * Store in the virtual DR6 register the fact that the breakpoint
465 * was hit so the thread's debugger will see it. 453 * was hit so the thread's debugger will see it.
466 */ 454 */
467 for (i = 0; i < hbp_kernel_pos; i++) 455 for (i = 0; i < HBP_NUM; i++) {
468 /* 456 if (thread->ptrace_bps[i] == bp)
469 * We will check bp->info.address against the address stored in
470 * thread's hbp structure and not debugreg[i]. This is to ensure
471 * that the corresponding bit for 'i' in DR7 register is enabled
472 */
473 if (bp->info.address == thread->hbp[i]->info.address)
474 break; 457 break;
458 }
475 459
476 thread->debugreg6 |= (DR_TRAP0 << i); 460 thread->debugreg6 |= (DR_TRAP0 << i);
477} 461}
478 462
479/* 463/*
464 * Walk through every ptrace breakpoints for this thread and
465 * build the dr7 value on top of their attributes.
466 *
467 */
468static unsigned long ptrace_get_dr7(struct perf_event *bp[])
469{
470 int i;
471 int dr7 = 0;
472 struct arch_hw_breakpoint *info;
473
474 for (i = 0; i < HBP_NUM; i++) {
475 if (bp[i] && !bp[i]->attr.disabled) {
476 info = counter_arch_bp(bp[i]);
477 dr7 |= encode_dr7(i, info->len, info->type);
478 }
479 }
480
481 return dr7;
482}
483
484/*
480 * Handle ptrace writes to debug register 7. 485 * Handle ptrace writes to debug register 7.
481 */ 486 */
482static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) 487static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
483{ 488{
484 struct thread_struct *thread = &(tsk->thread); 489 struct thread_struct *thread = &(tsk->thread);
485 unsigned long old_dr7 = thread->debugreg7; 490 unsigned long old_dr7;
486 int i, orig_ret = 0, rc = 0; 491 int i, orig_ret = 0, rc = 0;
487 int enabled, second_pass = 0; 492 int enabled, second_pass = 0;
488 unsigned len, type; 493 unsigned len, type;
489 struct hw_breakpoint *bp; 494 int gen_len, gen_type;
495 struct perf_event *bp;
490 496
491 data &= ~DR_CONTROL_RESERVED; 497 data &= ~DR_CONTROL_RESERVED;
498 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
492restore: 499restore:
493 /* 500 /*
494 * Loop through all the hardware breakpoints, making the 501 * Loop through all the hardware breakpoints, making the
@@ -496,11 +503,12 @@ restore:
496 */ 503 */
497 for (i = 0; i < HBP_NUM; i++) { 504 for (i = 0; i < HBP_NUM; i++) {
498 enabled = decode_dr7(data, i, &len, &type); 505 enabled = decode_dr7(data, i, &len, &type);
499 bp = thread->hbp[i]; 506 bp = thread->ptrace_bps[i];
500 507
501 if (!enabled) { 508 if (!enabled) {
502 if (bp) { 509 if (bp) {
503 /* Don't unregister the breakpoints right-away, 510 /*
511 * Don't unregister the breakpoints right-away,
504 * unless all register_user_hw_breakpoint() 512 * unless all register_user_hw_breakpoint()
505 * requests have succeeded. This prevents 513 * requests have succeeded. This prevents
506 * any window of opportunity for debug 514 * any window of opportunity for debug
@@ -508,27 +516,45 @@ restore:
508 */ 516 */
509 if (!second_pass) 517 if (!second_pass)
510 continue; 518 continue;
511 unregister_user_hw_breakpoint(tsk, bp); 519 thread->ptrace_bps[i] = NULL;
512 kfree(bp); 520 unregister_hw_breakpoint(bp);
513 } 521 }
514 continue; 522 continue;
515 } 523 }
524
525 /*
526 * We shoud have at least an inactive breakpoint at this
527 * slot. It means the user is writing dr7 without having
528 * written the address register first
529 */
516 if (!bp) { 530 if (!bp) {
517 rc = -ENOMEM; 531 rc = -EINVAL;
518 bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); 532 break;
519 if (bp) { 533 }
520 bp->info.address = thread->debugreg[i]; 534
521 bp->triggered = ptrace_triggered; 535 rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
522 bp->info.len = len;
523 bp->info.type = type;
524 rc = register_user_hw_breakpoint(tsk, bp);
525 if (rc)
526 kfree(bp);
527 }
528 } else
529 rc = modify_user_hw_breakpoint(tsk, bp);
530 if (rc) 536 if (rc)
531 break; 537 break;
538
539 /*
540 * This is a temporary thing as bp is unregistered/registered
541 * to simulate modification
542 */
543 bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
544 gen_type, bp->callback,
545 tsk, true);
546 thread->ptrace_bps[i] = NULL;
547
548 if (!bp) { /* incorrect bp, or we have a bug in bp API */
549 rc = -EINVAL;
550 break;
551 }
552 if (IS_ERR(bp)) {
553 rc = PTR_ERR(bp);
554 bp = NULL;
555 break;
556 }
557 thread->ptrace_bps[i] = bp;
532 } 558 }
533 /* 559 /*
534 * Make a second pass to free the remaining unused breakpoints 560 * Make a second pass to free the remaining unused breakpoints
@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
553 struct thread_struct *thread = &(tsk->thread); 579 struct thread_struct *thread = &(tsk->thread);
554 unsigned long val = 0; 580 unsigned long val = 0;
555 581
556 if (n < HBP_NUM) 582 if (n < HBP_NUM) {
557 val = thread->debugreg[n]; 583 struct perf_event *bp;
558 else if (n == 6) 584 bp = thread->ptrace_bps[n];
585 if (!bp)
586 return 0;
587 val = bp->hw.info.address;
588 } else if (n == 6) {
559 val = thread->debugreg6; 589 val = thread->debugreg6;
560 else if (n == 7) 590 } else if (n == 7) {
561 val = thread->debugreg7; 591 val = ptrace_get_dr7(thread->ptrace_bps);
592 }
562 return val; 593 return val;
563} 594}
564 595
596static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
597 unsigned long addr)
598{
599 struct perf_event *bp;
600 struct thread_struct *t = &tsk->thread;
601
602 if (!t->ptrace_bps[nr]) {
603 /*
604 * Put stub len and type to register (reserve) an inactive but
605 * correct bp
606 */
607 bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
608 HW_BREAKPOINT_W,
609 ptrace_triggered, tsk,
610 false);
611 } else {
612 bp = t->ptrace_bps[nr];
613 t->ptrace_bps[nr] = NULL;
614 bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
615 bp->attr.bp_type,
616 bp->callback,
617 tsk,
618 bp->attr.disabled);
619 }
620
621 if (!bp)
622 return -EIO;
623 /*
624 * CHECKME: the previous code returned -EIO if the addr wasn't a
625 * valid task virtual addr. The new one will return -EINVAL in this
626 * case.
627 * -EINVAL may be what we want for in-kernel breakpoints users, but
628 * -EIO looks better for ptrace, since we refuse a register writing
629 * for the user. And anyway this is the previous behaviour.
630 */
631 if (IS_ERR(bp))
632 return PTR_ERR(bp);
633
634 t->ptrace_bps[nr] = bp;
635
636 return 0;
637}
638
565/* 639/*
566 * Handle PTRACE_POKEUSR calls for the debug register area. 640 * Handle PTRACE_POKEUSR calls for the debug register area.
567 */ 641 */
@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
575 return -EIO; 649 return -EIO;
576 650
577 if (n == 6) { 651 if (n == 6) {
578 tsk->thread.debugreg6 = val; 652 thread->debugreg6 = val;
579 goto ret_path; 653 goto ret_path;
580 } 654 }
581 if (n < HBP_NUM) { 655 if (n < HBP_NUM) {
582 if (thread->hbp[n]) { 656 rc = ptrace_set_breakpoint_addr(tsk, n, val);
583 if (arch_check_va_in_userspace(val, 657 if (rc)
584 thread->hbp[n]->info.len) == 0) { 658 return rc;
585 rc = -EIO;
586 goto ret_path;
587 }
588 thread->hbp[n]->info.address = val;
589 }
590 thread->debugreg[n] = val;
591 } 659 }
592 /* All that's left is DR7 */ 660 /* All that's left is DR7 */
593 if (n == 7) 661 if (n == 7)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 213a7a3e4562..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,7 +64,6 @@
64#include <asm/apic.h> 64#include <asm/apic.h>
65#include <asm/setup.h> 65#include <asm/setup.h>
66#include <asm/uv/uv.h> 66#include <asm/uv/uv.h>
67#include <asm/debugreg.h>
68#include <linux/mc146818rtc.h> 67#include <linux/mc146818rtc.h>
69 68
70#include <asm/smpboot_hooks.h> 69#include <asm/smpboot_hooks.h>
@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused)
328 x86_cpuinit.setup_percpu_clockev(); 327 x86_cpuinit.setup_percpu_clockev();
329 328
330 wmb(); 329 wmb();
331 load_debug_registers();
332 cpu_idle(); 330 cpu_idle();
333} 331}
334 332
@@ -1269,7 +1267,6 @@ void cpu_disable_common(void)
1269 remove_cpu_from_maps(cpu); 1267 remove_cpu_from_maps(cpu);
1270 unlock_vector_lock(); 1268 unlock_vector_lock();
1271 fixup_irqs(); 1269 fixup_irqs();
1272 hw_breakpoint_disable();
1273} 1270}
1274 1271
1275int native_cpu_disable(void) 1272int native_cpu_disable(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2974adf9b6..22dee7aa7813 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "trace.h" 43#include "trace.h"
44 44
45#include <asm/debugreg.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/msr.h> 47#include <asm/msr.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3643 trace_kvm_entry(vcpu->vcpu_id); 3644 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 3645 kvm_x86_ops->run(vcpu, kvm_run);
3645 3646
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3647 /*
3647 set_debugreg(current->thread.debugreg[0], 0); 3648 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg[1], 1); 3649 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg[2], 2); 3650 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg[3], 3); 3651 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 3652 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 3653 */
3653 } 3654 if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK)
3655 hw_breakpoint_restore();
3654 3656
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 3657 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 3658 local_irq_enable();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index e09a44fc4664..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt)
105 ctxt->cr4 = read_cr4(); 105 ctxt->cr4 = read_cr4();
106 ctxt->cr8 = read_cr8(); 106 ctxt->cr8 = read_cr8();
107#endif 107#endif
108 hw_breakpoint_disable();
109} 108}
110 109
111/* Needed by apm.c */ 110/* Needed by apm.c */
@@ -144,11 +143,6 @@ static void fix_processor_context(void)
144#endif 143#endif
145 load_TR_desc(); /* This does ltr */ 144 load_TR_desc(); /* This does ltr */
146 load_LDT(&current->active_mm->context); /* This does lldt */ 145 load_LDT(&current->active_mm->context); /* This does lldt */
147
148 /*
149 * Now maybe reload the debug registers
150 */
151 load_debug_registers();
152} 146}
153 147
154/** 148/**
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 61ccc8f17eac..7eba9b92e5f3 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,136 +1,131 @@
1#ifndef _LINUX_HW_BREAKPOINT_H 1#ifndef _LINUX_HW_BREAKPOINT_H
2#define _LINUX_HW_BREAKPOINT_H 2#define _LINUX_HW_BREAKPOINT_H
3 3
4#include <linux/perf_event.h>
4 5
5#ifdef __KERNEL__ 6enum {
6#include <linux/list.h> 7 HW_BREAKPOINT_LEN_1 = 1,
7#include <linux/types.h> 8 HW_BREAKPOINT_LEN_2 = 2,
8#include <linux/kallsyms.h> 9 HW_BREAKPOINT_LEN_4 = 4,
9 10 HW_BREAKPOINT_LEN_8 = 8,
10/**
11 * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
12 * @triggered: callback invoked after target address access
13 * @info: arch-specific breakpoint info (address, length, and type)
14 *
15 * %hw_breakpoint structures are the kernel's way of representing
16 * hardware breakpoints. These are data breakpoints
17 * (also known as "watchpoints", triggered on data access), and the breakpoint's
18 * target address can be located in either kernel space or user space.
19 *
20 * The breakpoint's address, length, and type are highly
21 * architecture-specific. The values are encoded in the @info field; you
22 * specify them when registering the breakpoint. To examine the encoded
23 * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
24 * below.
25 *
26 * The address is specified as a regular kernel pointer (for kernel-space
27 * breakponts) or as an %__user pointer (for user-space breakpoints).
28 * With register_user_hw_breakpoint(), the address must refer to a
29 * location in user space. The breakpoint will be active only while the
30 * requested task is running. Conversely with
31 * register_kernel_hw_breakpoint(), the address must refer to a location
32 * in kernel space, and the breakpoint will be active on all CPUs
33 * regardless of the current task.
34 *
35 * The length is the breakpoint's extent in bytes, which is subject to
36 * certain limitations. include/asm/hw_breakpoint.h contains macros
37 * defining the available lengths for a specific architecture. Note that
38 * the address's alignment must match the length. The breakpoint will
39 * catch accesses to any byte in the range from address to address +
40 * (length - 1).
41 *
42 * The breakpoint's type indicates the sort of access that will cause it
43 * to trigger. Possible values may include:
44 *
45 * %HW_BREAKPOINT_RW (triggered on read or write access),
46 * %HW_BREAKPOINT_WRITE (triggered on write access), and
47 * %HW_BREAKPOINT_READ (triggered on read access).
48 *
49 * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
50 * possibilities are available on all architectures. Execute breakpoints
51 * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
52 *
53 * When a breakpoint gets hit, the @triggered callback is
54 * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
55 * processor registers.
56 * Data breakpoints occur after the memory access has taken place.
57 * Breakpoints are disabled during execution @triggered, to avoid
58 * recursive traps and allow unhindered access to breakpointed memory.
59 *
60 * This sample code sets a breakpoint on pid_max and registers a callback
61 * function for writes to that variable. Note that it is not portable
62 * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
63 *
64 * ----------------------------------------------------------------------
65 *
66 * #include <asm/hw_breakpoint.h>
67 *
68 * struct hw_breakpoint my_bp;
69 *
70 * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
71 * {
72 * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
73 * dump_stack();
74 * .......<more debugging output>........
75 * }
76 *
77 * static struct hw_breakpoint my_bp;
78 *
79 * static int init_module(void)
80 * {
81 * ..........<do anything>............
82 * my_bp.info.type = HW_BREAKPOINT_WRITE;
83 * my_bp.info.len = HW_BREAKPOINT_LEN_4;
84 *
85 * my_bp.installed = (void *)my_bp_installed;
86 *
87 * rc = register_kernel_hw_breakpoint(&my_bp);
88 * ..........<do anything>............
89 * }
90 *
91 * static void cleanup_module(void)
92 * {
93 * ..........<do anything>............
94 * unregister_kernel_hw_breakpoint(&my_bp);
95 * ..........<do anything>............
96 * }
97 *
98 * ----------------------------------------------------------------------
99 */
100struct hw_breakpoint {
101 void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
102 struct arch_hw_breakpoint info;
103}; 11};
104 12
105/* 13enum {
106 * len and type values are defined in include/asm/hw_breakpoint.h. 14 HW_BREAKPOINT_R = 1,
107 * Available values vary according to the architecture. On i386 the 15 HW_BREAKPOINT_W = 2,
108 * possibilities are: 16 HW_BREAKPOINT_X = 4,
109 * 17};
110 * HW_BREAKPOINT_LEN_1 18
111 * HW_BREAKPOINT_LEN_2 19static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
112 * HW_BREAKPOINT_LEN_4 20{
113 * HW_BREAKPOINT_RW 21 return &bp->hw.info;
114 * HW_BREAKPOINT_READ 22}
115 * 23
116 * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the 24static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
117 * 1-, 2-, and 4-byte lengths may be unavailable. There also may be 25{
118 * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. 26 return bp->attr.bp_addr;
119 */ 27}
28
29static inline int hw_breakpoint_type(struct perf_event *bp)
30{
31 return bp->attr.bp_type;
32}
33
34static inline int hw_breakpoint_len(struct perf_event *bp)
35{
36 return bp->attr.bp_len;
37}
38
39#ifdef CONFIG_HAVE_HW_BREAKPOINT
40extern struct perf_event *
41register_user_hw_breakpoint(unsigned long addr,
42 int len,
43 int type,
44 perf_callback_t triggered,
45 struct task_struct *tsk,
46 bool active);
47
48/* FIXME: only change from the attr, and don't unregister */
49extern struct perf_event *
50modify_user_hw_breakpoint(struct perf_event *bp,
51 unsigned long addr,
52 int len,
53 int type,
54 perf_callback_t triggered,
55 struct task_struct *tsk,
56 bool active);
120 57
121extern int register_user_hw_breakpoint(struct task_struct *tsk,
122 struct hw_breakpoint *bp);
123extern int modify_user_hw_breakpoint(struct task_struct *tsk,
124 struct hw_breakpoint *bp);
125extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
126 struct hw_breakpoint *bp);
127/* 58/*
128 * Kernel breakpoints are not associated with any particular thread. 59 * Kernel breakpoints are not associated with any particular thread.
129 */ 60 */
130extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); 61extern struct perf_event *
131extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); 62register_wide_hw_breakpoint_cpu(unsigned long addr,
63 int len,
64 int type,
65 perf_callback_t triggered,
66 int cpu,
67 bool active);
68
69extern struct perf_event **
70register_wide_hw_breakpoint(unsigned long addr,
71 int len,
72 int type,
73 perf_callback_t triggered,
74 bool active);
75
76extern int register_perf_hw_breakpoint(struct perf_event *bp);
77extern int __register_perf_hw_breakpoint(struct perf_event *bp);
78extern void unregister_hw_breakpoint(struct perf_event *bp);
79extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
80
81extern int reserve_bp_slot(struct perf_event *bp);
82extern void release_bp_slot(struct perf_event *bp);
83
84extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
85
86#else /* !CONFIG_HAVE_HW_BREAKPOINT */
87
88static inline struct perf_event *
89register_user_hw_breakpoint(unsigned long addr,
90 int len,
91 int type,
92 perf_callback_t triggered,
93 struct task_struct *tsk,
94 bool active) { return NULL; }
95static inline struct perf_event *
96modify_user_hw_breakpoint(struct perf_event *bp,
97 unsigned long addr,
98 int len,
99 int type,
100 perf_callback_t triggered,
101 struct task_struct *tsk,
102 bool active) { return NULL; }
103static inline struct perf_event *
104register_wide_hw_breakpoint_cpu(unsigned long addr,
105 int len,
106 int type,
107 perf_callback_t triggered,
108 int cpu,
109 bool active) { return NULL; }
110static inline struct perf_event **
111register_wide_hw_breakpoint(unsigned long addr,
112 int len,
113 int type,
114 perf_callback_t triggered,
115 bool active) { return NULL; }
116static inline int
117register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
118static inline int
119__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
120static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
121static inline void
122unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
123static inline int
124reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
125static inline void release_bp_slot(struct perf_event *bp) { }
126
127static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
132 128
133extern unsigned int hbp_kernel_pos; 129#endif /* CONFIG_HAVE_HW_BREAKPOINT */
134 130
135#endif /* __KERNEL__ */ 131#endif /* _LINUX_HW_BREAKPOINT_H */
136#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8d54e6d25eeb..cead64ea6c15 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
18#include <linux/ioctl.h> 18#include <linux/ioctl.h>
19#include <asm/byteorder.h> 19#include <asm/byteorder.h>
20 20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22#include <asm/hw_breakpoint.h>
23#endif
24
21/* 25/*
22 * User-space ABI bits: 26 * User-space ABI bits:
23 */ 27 */
@@ -31,6 +35,7 @@ enum perf_type_id {
31 PERF_TYPE_TRACEPOINT = 2, 35 PERF_TYPE_TRACEPOINT = 2,
32 PERF_TYPE_HW_CACHE = 3, 36 PERF_TYPE_HW_CACHE = 3,
33 PERF_TYPE_RAW = 4, 37 PERF_TYPE_RAW = 4,
38 PERF_TYPE_BREAKPOINT = 5,
34 39
35 PERF_TYPE_MAX, /* non-ABI */ 40 PERF_TYPE_MAX, /* non-ABI */
36}; 41};
@@ -207,6 +212,15 @@ struct perf_event_attr {
207 __u32 wakeup_events; /* wakeup every n events */ 212 __u32 wakeup_events; /* wakeup every n events */
208 __u32 wakeup_watermark; /* bytes before wakeup */ 213 __u32 wakeup_watermark; /* bytes before wakeup */
209 }; 214 };
215
216 union {
217 struct { /* Hardware breakpoint info */
218 __u64 bp_addr;
219 __u32 bp_type;
220 __u32 bp_len;
221 };
222 };
223
210 __u32 __reserved_2; 224 __u32 __reserved_2;
211 225
212 __u64 __reserved_3; 226 __u64 __reserved_3;
@@ -476,6 +490,11 @@ struct hw_perf_event {
476 atomic64_t count; 490 atomic64_t count;
477 struct hrtimer hrtimer; 491 struct hrtimer hrtimer;
478 }; 492 };
493#ifdef CONFIG_HAVE_HW_BREAKPOINT
494 union { /* breakpoint */
495 struct arch_hw_breakpoint info;
496 };
497#endif
479 }; 498 };
480 atomic64_t prev_count; 499 atomic64_t prev_count;
481 u64 sample_period; 500 u64 sample_period;
@@ -588,7 +607,7 @@ struct perf_event {
588 u64 tstamp_running; 607 u64 tstamp_running;
589 u64 tstamp_stopped; 608 u64 tstamp_stopped;
590 609
591 struct perf_event_attr attr; 610 struct perf_event_attr attr;
592 struct hw_perf_event hw; 611 struct hw_perf_event hw;
593 612
594 struct perf_event_context *ctx; 613 struct perf_event_context *ctx;
@@ -643,6 +662,8 @@ struct perf_event {
643 662
644 perf_callback_t callback; 663 perf_callback_t callback;
645 664
665 perf_callback_t event_callback;
666
646#endif /* CONFIG_PERF_EVENTS */ 667#endif /* CONFIG_PERF_EVENTS */
647}; 668};
648 669
@@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate;
831extern void perf_event_init(void); 852extern void perf_event_init(void);
832extern void perf_tp_event(int event_id, u64 addr, u64 count, 853extern void perf_tp_event(int event_id, u64 addr, u64 count,
833 void *record, int entry_size); 854 void *record, int entry_size);
855extern void perf_bp_event(struct perf_event *event, void *data);
834 856
835#ifndef perf_misc_flags 857#ifndef perf_misc_flags
836#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ 858#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
865static inline void 887static inline void
866perf_sw_event(u32 event_id, u64 nr, int nmi, 888perf_sw_event(u32 event_id, u64 nr, int nmi,
867 struct pt_regs *regs, u64 addr) { } 889 struct pt_regs *regs, u64 addr) { }
890static inline void
891perf_bp_event(struct perf_event *event, void *data) { }
868 892
869static inline void perf_event_mmap(struct vm_area_struct *vma) { } 893static inline void perf_event_mmap(struct vm_area_struct *vma) { }
870static inline void perf_event_comm(struct task_struct *tsk) { } 894static inline void perf_event_comm(struct task_struct *tsk) { }
diff --git a/kernel/exit.c b/kernel/exit.c
index e61891f80123..266f8920628a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -980,6 +981,10 @@ NORET_TYPE void do_exit(long code)
980 proc_exit_connector(tsk); 981 proc_exit_connector(tsk);
981 982
982 /* 983 /*
984 * FIXME: do that only when needed, using sched_exit tracepoint
985 */
986 flush_ptrace_hw_breakpoint(tsk);
987 /*
983 * Flush inherited counters to the parent - before the parent 988 * Flush inherited counters to the parent - before the parent
984 * gets woken up by child-exit notifications. 989 * gets woken up by child-exit notifications.
985 */ 990 */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c1f64e65a9f3..08f6d0163201 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
15 * 15 *
16 * Copyright (C) 2007 Alan Stern 16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009 17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
18 */ 19 */
19 20
20/* 21/*
@@ -35,334 +36,242 @@
35#include <linux/init.h> 36#include <linux/init.h>
36#include <linux/smp.h> 37#include <linux/smp.h>
37 38
38#include <asm/hw_breakpoint.h> 39#include <linux/hw_breakpoint.h>
40
39#include <asm/processor.h> 41#include <asm/processor.h>
40 42
41#ifdef CONFIG_X86 43#ifdef CONFIG_X86
42#include <asm/debugreg.h> 44#include <asm/debugreg.h>
43#endif 45#endif
44/*
45 * Spinlock that protects all (un)register operations over kernel/user-space
46 * breakpoint requests
47 */
48static DEFINE_SPINLOCK(hw_breakpoint_lock);
49
50/* Array of kernel-space breakpoint structures */
51struct hw_breakpoint *hbp_kernel[HBP_NUM];
52
53/*
54 * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
55 * modified but we need the older copy to handle any hbp exceptions. It will
56 * sync with hbp_kernel[] value after updation is done through IPIs.
57 */
58DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
59
60/*
61 * Kernel breakpoints grow downwards, starting from HBP_NUM
62 * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
63 * kernel-space request. We will initialise it here and not in an __init
64 * routine because load_debug_registers(), which uses this variable can be
65 * called very early during CPU initialisation.
66 */
67unsigned int hbp_kernel_pos = HBP_NUM;
68 46
69/* 47static atomic_t bp_slot;
70 * An array containing refcount of threads using a given bkpt register
71 * Accesses are synchronised by acquiring hw_breakpoint_lock
72 */
73unsigned int hbp_user_refcount[HBP_NUM];
74 48
75/* 49int reserve_bp_slot(struct perf_event *bp)
76 * Load the debug registers during startup of a CPU.
77 */
78void load_debug_registers(void)
79{ 50{
80 unsigned long flags; 51 if (atomic_inc_return(&bp_slot) == HBP_NUM) {
81 struct task_struct *tsk = current; 52 atomic_dec(&bp_slot);
82
83 spin_lock_bh(&hw_breakpoint_lock);
84
85 /* Prevent IPIs for new kernel breakpoint updates */
86 local_irq_save(flags);
87 arch_update_kernel_hw_breakpoint(NULL);
88 local_irq_restore(flags);
89
90 if (test_tsk_thread_flag(tsk, TIF_DEBUG))
91 arch_install_thread_hw_breakpoint(tsk);
92
93 spin_unlock_bh(&hw_breakpoint_lock);
94}
95 53
96/* 54 return -ENOSPC;
97 * Erase all the hardware breakpoint info associated with a thread.
98 *
99 * If tsk != current then tsk must not be usable (for example, a
100 * child being cleaned up from a failed fork).
101 */
102void flush_thread_hw_breakpoint(struct task_struct *tsk)
103{
104 int i;
105 struct thread_struct *thread = &(tsk->thread);
106
107 spin_lock_bh(&hw_breakpoint_lock);
108
109 /* The thread no longer has any breakpoints associated with it */
110 clear_tsk_thread_flag(tsk, TIF_DEBUG);
111 for (i = 0; i < HBP_NUM; i++) {
112 if (thread->hbp[i]) {
113 hbp_user_refcount[i]--;
114 kfree(thread->hbp[i]);
115 thread->hbp[i] = NULL;
116 }
117 } 55 }
118 56
119 arch_flush_thread_hw_breakpoint(tsk); 57 return 0;
120
121 /* Actually uninstall the breakpoints if necessary */
122 if (tsk == current)
123 arch_uninstall_thread_hw_breakpoint();
124 spin_unlock_bh(&hw_breakpoint_lock);
125} 58}
126 59
127/* 60void release_bp_slot(struct perf_event *bp)
128 * Copy the hardware breakpoint info from a thread to its cloned child.
129 */
130int copy_thread_hw_breakpoint(struct task_struct *tsk,
131 struct task_struct *child, unsigned long clone_flags)
132{ 61{
133 /* 62 atomic_dec(&bp_slot);
134 * We will assume that breakpoint settings are not inherited
135 * and the child starts out with no debug registers set.
136 * But what about CLONE_PTRACE?
137 */
138 clear_tsk_thread_flag(child, TIF_DEBUG);
139
140 /* We will call flush routine since the debugregs are not inherited */
141 arch_flush_thread_hw_breakpoint(child);
142
143 return 0;
144} 63}
145 64
146static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, 65int __register_perf_hw_breakpoint(struct perf_event *bp)
147 struct hw_breakpoint *bp)
148{ 66{
149 struct thread_struct *thread = &(tsk->thread); 67 int ret;
150 int rc;
151 68
152 /* Do not overcommit. Fail if kernel has used the hbp registers */ 69 ret = reserve_bp_slot(bp);
153 if (pos >= hbp_kernel_pos) 70 if (ret)
154 return -ENOSPC; 71 return ret;
155 72
156 rc = arch_validate_hwbkpt_settings(bp, tsk); 73 if (!bp->attr.disabled)
157 if (rc) 74 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
158 return rc;
159 75
160 thread->hbp[pos] = bp; 76 return ret;
161 hbp_user_refcount[pos]++; 77}
162 78
163 arch_update_user_hw_breakpoint(pos, tsk); 79int register_perf_hw_breakpoint(struct perf_event *bp)
164 /* 80{
165 * Does it need to be installed right now? 81 bp->callback = perf_bp_event;
166 * Otherwise it will get installed the next time tsk runs
167 */
168 if (tsk == current)
169 arch_install_thread_hw_breakpoint(tsk);
170 82
171 return rc; 83 return __register_perf_hw_breakpoint(bp);
172} 84}
173 85
174/* 86/*
175 * Modify the address of a hbp register already in use by the task 87 * Register a breakpoint bound to a task and a given cpu.
176 * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() 88 * If cpu is -1, the breakpoint is active for the task in every cpu
89 * If the task is -1, the breakpoint is active for every tasks in the given
90 * cpu.
177 */ 91 */
178static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, 92static struct perf_event *
179 struct hw_breakpoint *bp) 93register_user_hw_breakpoint_cpu(unsigned long addr,
94 int len,
95 int type,
96 perf_callback_t triggered,
97 pid_t pid,
98 int cpu,
99 bool active)
180{ 100{
181 struct thread_struct *thread = &(tsk->thread); 101 struct perf_event_attr *attr;
182 102 struct perf_event *bp;
183 if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) 103
184 return -EINVAL; 104 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
185 105 if (!attr)
186 if (thread->hbp[pos] == NULL) 106 return ERR_PTR(-ENOMEM);
187 return -EINVAL; 107
188 108 attr->type = PERF_TYPE_BREAKPOINT;
189 thread->hbp[pos] = bp; 109 attr->size = sizeof(*attr);
110 attr->bp_addr = addr;
111 attr->bp_len = len;
112 attr->bp_type = type;
190 /* 113 /*
191 * 'pos' must be that of a hbp register already used by 'tsk' 114 * Such breakpoints are used by debuggers to trigger signals when
192 * Otherwise arch_modify_user_hw_breakpoint() will fail 115 * we hit the excepted memory op. We can't miss such events, they
116 * must be pinned.
193 */ 117 */
194 arch_update_user_hw_breakpoint(pos, tsk); 118 attr->pinned = 1;
195 119
196 if (tsk == current) 120 if (!active)
197 arch_install_thread_hw_breakpoint(tsk); 121 attr->disabled = 1;
198 122
199 return 0; 123 bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
200} 124 kfree(attr);
201
202static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
203{
204 hbp_user_refcount[pos]--;
205 tsk->thread.hbp[pos] = NULL;
206 125
207 arch_update_user_hw_breakpoint(pos, tsk); 126 return bp;
208
209 if (tsk == current)
210 arch_install_thread_hw_breakpoint(tsk);
211} 127}
212 128
213/** 129/**
214 * register_user_hw_breakpoint - register a hardware breakpoint for user space 130 * register_user_hw_breakpoint - register a hardware breakpoint for user space
131 * @addr: is the memory address that triggers the breakpoint
132 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
133 * @type: the type of the access to the memory (read/write/exec)
134 * @triggered: callback to trigger when we hit the breakpoint
215 * @tsk: pointer to 'task_struct' of the process to which the address belongs 135 * @tsk: pointer to 'task_struct' of the process to which the address belongs
216 * @bp: the breakpoint structure to register 136 * @active: should we activate it while registering it
217 *
218 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
219 * @bp->triggered must be set properly before invocation
220 * 137 *
221 */ 138 */
222int register_user_hw_breakpoint(struct task_struct *tsk, 139struct perf_event *
223 struct hw_breakpoint *bp) 140register_user_hw_breakpoint(unsigned long addr,
141 int len,
142 int type,
143 perf_callback_t triggered,
144 struct task_struct *tsk,
145 bool active)
224{ 146{
225 struct thread_struct *thread = &(tsk->thread); 147 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
226 int i, rc = -ENOSPC; 148 tsk->pid, -1, active);
227
228 spin_lock_bh(&hw_breakpoint_lock);
229
230 for (i = 0; i < hbp_kernel_pos; i++) {
231 if (!thread->hbp[i]) {
232 rc = __register_user_hw_breakpoint(i, tsk, bp);
233 break;
234 }
235 }
236 if (!rc)
237 set_tsk_thread_flag(tsk, TIF_DEBUG);
238
239 spin_unlock_bh(&hw_breakpoint_lock);
240 return rc;
241} 149}
242EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 150EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
243 151
244/** 152/**
245 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 153 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
154 * @bp: the breakpoint structure to modify
155 * @addr: is the memory address that triggers the breakpoint
156 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
157 * @type: the type of the access to the memory (read/write/exec)
158 * @triggered: callback to trigger when we hit the breakpoint
246 * @tsk: pointer to 'task_struct' of the process to which the address belongs 159 * @tsk: pointer to 'task_struct' of the process to which the address belongs
247 * @bp: the breakpoint structure to unregister 160 * @active: should we activate it while registering it
248 *
249 */ 161 */
250int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) 162struct perf_event *
163modify_user_hw_breakpoint(struct perf_event *bp,
164 unsigned long addr,
165 int len,
166 int type,
167 perf_callback_t triggered,
168 struct task_struct *tsk,
169 bool active)
251{ 170{
252 struct thread_struct *thread = &(tsk->thread); 171 /*
253 int i, ret = -ENOENT; 172 * FIXME: do it without unregistering
173 * - We don't want to lose our slot
174 * - If the new bp is incorrect, don't lose the older one
175 */
176 unregister_hw_breakpoint(bp);
254 177
255 spin_lock_bh(&hw_breakpoint_lock); 178 return register_user_hw_breakpoint(addr, len, type, triggered,
256 for (i = 0; i < hbp_kernel_pos; i++) { 179 tsk, active);
257 if (bp == thread->hbp[i]) {
258 ret = __modify_user_hw_breakpoint(i, tsk, bp);
259 break;
260 }
261 }
262 spin_unlock_bh(&hw_breakpoint_lock);
263 return ret;
264} 180}
265EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 181EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
266 182
267/** 183/**
268 * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint 184 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
269 * @tsk: pointer to 'task_struct' of the process to which the address belongs
270 * @bp: the breakpoint structure to unregister 185 * @bp: the breakpoint structure to unregister
271 *
272 */ 186 */
273void unregister_user_hw_breakpoint(struct task_struct *tsk, 187void unregister_hw_breakpoint(struct perf_event *bp)
274 struct hw_breakpoint *bp)
275{ 188{
276 struct thread_struct *thread = &(tsk->thread); 189 if (!bp)
277 int i, pos = -1, hbp_counter = 0; 190 return;
278 191 perf_event_release_kernel(bp);
279 spin_lock_bh(&hw_breakpoint_lock); 192}
280 for (i = 0; i < hbp_kernel_pos; i++) { 193EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
281 if (thread->hbp[i]) 194
282 hbp_counter++; 195static struct perf_event *
283 if (bp == thread->hbp[i]) 196register_kernel_hw_breakpoint_cpu(unsigned long addr,
284 pos = i; 197 int len,
285 } 198 int type,
286 if (pos >= 0) { 199 perf_callback_t triggered,
287 __unregister_user_hw_breakpoint(pos, tsk); 200 int cpu,
288 hbp_counter--; 201 bool active)
289 } 202{
290 if (!hbp_counter) 203 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
291 clear_tsk_thread_flag(tsk, TIF_DEBUG); 204 -1, cpu, active);
292
293 spin_unlock_bh(&hw_breakpoint_lock);
294} 205}
295EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
296 206
297/** 207/**
298 * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space 208 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
299 * @bp: the breakpoint structure to register 209 * @addr: is the memory address that triggers the breakpoint
300 * 210 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
301 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and 211 * @type: the type of the access to the memory (read/write/exec)
302 * @bp->triggered must be set properly before invocation 212 * @triggered: callback to trigger when we hit the breakpoint
213 * @active: should we activate it while registering it
303 * 214 *
215 * @return a set of per_cpu pointers to perf events
304 */ 216 */
305int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) 217struct perf_event **
218register_wide_hw_breakpoint(unsigned long addr,
219 int len,
220 int type,
221 perf_callback_t triggered,
222 bool active)
306{ 223{
307 int rc; 224 struct perf_event **cpu_events, **pevent, *bp;
225 long err;
226 int cpu;
227
228 cpu_events = alloc_percpu(typeof(*cpu_events));
229 if (!cpu_events)
230 return ERR_PTR(-ENOMEM);
308 231
309 rc = arch_validate_hwbkpt_settings(bp, NULL); 232 for_each_possible_cpu(cpu) {
310 if (rc) 233 pevent = per_cpu_ptr(cpu_events, cpu);
311 return rc; 234 bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
235 triggered, cpu, active);
312 236
313 spin_lock_bh(&hw_breakpoint_lock); 237 *pevent = bp;
314 238
315 rc = -ENOSPC; 239 if (IS_ERR(bp) || !bp) {
316 /* Check if we are over-committing */ 240 err = PTR_ERR(bp);
317 if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { 241 goto fail;
318 hbp_kernel_pos--; 242 }
319 hbp_kernel[hbp_kernel_pos] = bp;
320 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
321 rc = 0;
322 } 243 }
323 244
324 spin_unlock_bh(&hw_breakpoint_lock); 245 return cpu_events;
325 return rc; 246
247fail:
248 for_each_possible_cpu(cpu) {
249 pevent = per_cpu_ptr(cpu_events, cpu);
250 if (IS_ERR(*pevent) || !*pevent)
251 break;
252 unregister_hw_breakpoint(*pevent);
253 }
254 free_percpu(cpu_events);
255 /* return the error if any */
256 return ERR_PTR(err);
326} 257}
327EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
328 258
329/** 259/**
330 * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space 260 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
331 * @bp: the breakpoint structure to unregister 261 * @cpu_events: the per cpu set of events to unregister
332 *
333 * Uninstalls and unregisters @bp.
334 */ 262 */
335void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) 263void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
336{ 264{
337 int i, j; 265 int cpu;
338 266 struct perf_event **pevent;
339 spin_lock_bh(&hw_breakpoint_lock);
340
341 /* Find the 'bp' in our list of breakpoints for kernel */
342 for (i = hbp_kernel_pos; i < HBP_NUM; i++)
343 if (bp == hbp_kernel[i])
344 break;
345 267
346 /* Check if we did not find a match for 'bp'. If so return early */ 268 for_each_possible_cpu(cpu) {
347 if (i == HBP_NUM) { 269 pevent = per_cpu_ptr(cpu_events, cpu);
348 spin_unlock_bh(&hw_breakpoint_lock); 270 unregister_hw_breakpoint(*pevent);
349 return;
350 } 271 }
351 272 free_percpu(cpu_events);
352 /*
353 * We'll shift the breakpoints one-level above to compact if
354 * unregistration creates a hole
355 */
356 for (j = i; j > hbp_kernel_pos; j--)
357 hbp_kernel[j] = hbp_kernel[j-1];
358
359 hbp_kernel[hbp_kernel_pos] = NULL;
360 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
361 hbp_kernel_pos++;
362
363 spin_unlock_bh(&hw_breakpoint_lock);
364} 273}
365EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); 274
366 275
367static struct notifier_block hw_breakpoint_exceptions_nb = { 276static struct notifier_block hw_breakpoint_exceptions_nb = {
368 .notifier_call = hw_breakpoint_exceptions_notify, 277 .notifier_call = hw_breakpoint_exceptions_notify,
@@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void)
374{ 283{
375 return register_die_notifier(&hw_breakpoint_exceptions_nb); 284 return register_die_notifier(&hw_breakpoint_exceptions_nb);
376} 285}
377
378core_initcall(init_hw_breakpoint); 286core_initcall(init_hw_breakpoint);
287
288
289struct pmu perf_ops_bp = {
290 .enable = arch_install_hw_breakpoint,
291 .disable = arch_uninstall_hw_breakpoint,
292 .read = hw_breakpoint_pmu_read,
293 .unthrottle = hw_breakpoint_pmu_unthrottle
294};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5087125e2a00..98dc56b2ebe4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
32 33
33#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
34 35
@@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event)
4229 4230
4230#endif /* CONFIG_EVENT_PROFILE */ 4231#endif /* CONFIG_EVENT_PROFILE */
4231 4232
4233#ifdef CONFIG_HAVE_HW_BREAKPOINT
4234static void bp_perf_event_destroy(struct perf_event *event)
4235{
4236 release_bp_slot(event);
4237}
4238
4239static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4240{
4241 int err;
4242 /*
4243 * The breakpoint is already filled if we haven't created the counter
4244 * through perf syscall
4245 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4246 */
4247 if (!bp->callback)
4248 err = register_perf_hw_breakpoint(bp);
4249 else
4250 err = __register_perf_hw_breakpoint(bp);
4251 if (err)
4252 return ERR_PTR(err);
4253
4254 bp->destroy = bp_perf_event_destroy;
4255
4256 return &perf_ops_bp;
4257}
4258
4259void perf_bp_event(struct perf_event *bp, void *regs)
4260{
4261 /* TODO */
4262}
4263#else
4264static void bp_perf_event_destroy(struct perf_event *event)
4265{
4266}
4267
4268static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4269{
4270 return NULL;
4271}
4272
4273void perf_bp_event(struct perf_event *bp, void *regs)
4274{
4275}
4276#endif
4277
4232atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4278atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4233 4279
4234static void sw_perf_event_destroy(struct perf_event *event) 4280static void sw_perf_event_destroy(struct perf_event *event)
@@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4375 pmu = tp_perf_event_init(event); 4421 pmu = tp_perf_event_init(event);
4376 break; 4422 break;
4377 4423
4424 case PERF_TYPE_BREAKPOINT:
4425 pmu = bp_perf_event_init(event);
4426 break;
4427
4428
4378 default: 4429 default:
4379 break; 4430 break;
4380 } 4431 }
@@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4686 4737
4687 ctx = find_get_context(pid, cpu); 4738 ctx = find_get_context(pid, cpu);
4688 if (IS_ERR(ctx)) 4739 if (IS_ERR(ctx))
4689 return NULL ; 4740 return NULL;
4690 4741
4691 event = perf_event_alloc(attr, cpu, ctx, NULL, 4742 event = perf_event_alloc(attr, cpu, ctx, NULL,
4692 NULL, callback, GFP_KERNEL); 4743 NULL, callback, GFP_KERNEL);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 91c3d0e9a5a1..d72f06ff263f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,14 +11,11 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
17 18
18#ifdef CONFIG_KSYM_TRACER
19#include <asm/hw_breakpoint.h>
20#endif
21
22enum trace_type { 19enum trace_type {
23 __TRACE_FIRST_TYPE = 0, 20 __TRACE_FIRST_TYPE = 0,
24 21
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e19747d4f860..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
372 F_STRUCT( 372 F_STRUCT(
373 __field( unsigned long, ip ) 373 __field( unsigned long, ip )
374 __field( unsigned char, type ) 374 __field( unsigned char, type )
375 __array( char , ksym_name, KSYM_NAME_LEN )
376 __array( char , cmd, TASK_COMM_LEN ) 375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ), 377 ),
378 378
379 F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", 379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type, 380 (void *)__entry->ip, (unsigned int)__entry->type,
381 __entry->ksym_name, __entry->cmd) 381 (void *)__entry->addr, __entry->cmd)
382); 382);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 6d5609c67378..fea83eeeef09 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -29,7 +29,11 @@
29#include "trace_stat.h" 29#include "trace_stat.h"
30#include "trace.h" 30#include "trace.h"
31 31
32/* For now, let us restrict the no. of symbols traced simultaneously to number 32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
33 * of available hardware breakpoint registers. 37 * of available hardware breakpoint registers.
34 */ 38 */
35#define KSYM_TRACER_MAX HBP_NUM 39#define KSYM_TRACER_MAX HBP_NUM
@@ -37,8 +41,10 @@
37#define KSYM_TRACER_OP_LEN 3 /* rw- */ 41#define KSYM_TRACER_OP_LEN 3 /* rw- */
38 42
39struct trace_ksym { 43struct trace_ksym {
40 struct hw_breakpoint *ksym_hbp; 44 struct perf_event **ksym_hbp;
41 unsigned long ksym_addr; 45 unsigned long ksym_addr;
46 int type;
47 int len;
42#ifdef CONFIG_PROFILE_KSYM_TRACER 48#ifdef CONFIG_PROFILE_KSYM_TRACER
43 unsigned long counter; 49 unsigned long counter;
44#endif 50#endif
@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
75} 81}
76#endif /* CONFIG_PROFILE_KSYM_TRACER */ 82#endif /* CONFIG_PROFILE_KSYM_TRACER */
77 83
78void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) 84void ksym_hbp_handler(struct perf_event *hbp, void *data)
79{ 85{
80 struct ring_buffer_event *event; 86 struct ring_buffer_event *event;
81 struct ksym_trace_entry *entry; 87 struct ksym_trace_entry *entry;
88 struct pt_regs *regs = data;
82 struct ring_buffer *buffer; 89 struct ring_buffer *buffer;
83 int pc; 90 int pc;
84 91
@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
96 103
97 entry = ring_buffer_event_data(event); 104 entry = ring_buffer_event_data(event);
98 entry->ip = instruction_pointer(regs); 105 entry->ip = instruction_pointer(regs);
99 entry->type = hbp->info.type; 106 entry->type = hw_breakpoint_type(hbp);
100 strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); 107 entry->addr = hw_breakpoint_addr(hbp);
101 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); 108 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
102 109
103#ifdef CONFIG_PROFILE_KSYM_TRACER 110#ifdef CONFIG_PROFILE_KSYM_TRACER
104 ksym_collect_stats(hbp->info.address); 111 ksym_collect_stats(hw_breakpoint_addr(hbp));
105#endif /* CONFIG_PROFILE_KSYM_TRACER */ 112#endif /* CONFIG_PROFILE_KSYM_TRACER */
106 113
107 trace_buffer_unlock_commit(buffer, event, 0, pc); 114 trace_buffer_unlock_commit(buffer, event, 0, pc);
@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str)
120 int access = 0; 127 int access = 0;
121 128
122 if (str[0] == 'r') 129 if (str[0] == 'r')
123 access += 4; 130 access |= HW_BREAKPOINT_R;
124 else if (str[0] != '-')
125 return -EINVAL;
126 131
127 if (str[1] == 'w') 132 if (str[1] == 'w')
128 access += 2; 133 access |= HW_BREAKPOINT_W;
129 else if (str[1] != '-')
130 return -EINVAL;
131 134
132 if (str[2] != '-') 135 if (str[2] == 'x')
133 return -EINVAL; 136 access |= HW_BREAKPOINT_X;
134 137
135 switch (access) { 138 switch (access) {
136 case 6: 139 case HW_BREAKPOINT_W:
137 access = HW_BREAKPOINT_RW; 140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
138 break; 141 return access;
139 case 4: 142 default:
140 access = -EINVAL; 143 return -EINVAL;
141 break;
142 case 2:
143 access = HW_BREAKPOINT_WRITE;
144 break;
145 } 144 }
146
147 return access;
148} 145}
149 146
150/* 147/*
@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
194 if (!entry) 191 if (!entry)
195 return -ENOMEM; 192 return -ENOMEM;
196 193
197 entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); 194 entry->type = op;
198 if (!entry->ksym_hbp) 195 entry->ksym_addr = addr;
199 goto err; 196 entry->len = HW_BREAKPOINT_LEN_4;
200 197
201 entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); 198 ret = -EAGAIN;
202 if (!entry->ksym_hbp->info.name) 199 entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
203 goto err; 200 entry->len, entry->type,
204 201 ksym_hbp_handler, true);
205 entry->ksym_hbp->info.type = op; 202 if (IS_ERR(entry->ksym_hbp)) {
206 entry->ksym_addr = entry->ksym_hbp->info.address = addr; 203 entry->ksym_hbp = NULL;
207#ifdef CONFIG_X86 204 ret = PTR_ERR(entry->ksym_hbp);
208 entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; 205 }
209#endif
210 entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
211 206
212 ret = register_kernel_hw_breakpoint(entry->ksym_hbp); 207 if (!entry->ksym_hbp) {
213 if (ret < 0) {
214 printk(KERN_INFO "ksym_tracer request failed. Try again" 208 printk(KERN_INFO "ksym_tracer request failed. Try again"
215 " later!!\n"); 209 " later!!\n");
216 ret = -EAGAIN;
217 goto err; 210 goto err;
218 } 211 }
212
219 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); 213 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
220 ksym_filter_entry_count++; 214 ksym_filter_entry_count++;
215
221 return 0; 216 return 0;
217
222err: 218err:
223 if (entry->ksym_hbp)
224 kfree(entry->ksym_hbp->info.name);
225 kfree(entry->ksym_hbp);
226 kfree(entry); 219 kfree(entry);
220
227 return ret; 221 return ret;
228} 222}
229 223
@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
244 mutex_lock(&ksym_tracer_mutex); 238 mutex_lock(&ksym_tracer_mutex);
245 239
246 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 240 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
247 ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); 241 ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
248 if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) 242 if (entry->type == HW_BREAKPOINT_W)
249 ret = trace_seq_puts(s, "-w-\n"); 243 ret = trace_seq_puts(s, "-w-\n");
250 else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) 244 else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
251 ret = trace_seq_puts(s, "rw-\n"); 245 ret = trace_seq_puts(s, "rw-\n");
252 WARN_ON_ONCE(!ret); 246 WARN_ON_ONCE(!ret);
253 } 247 }
@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void)
269 mutex_lock(&ksym_tracer_mutex); 263 mutex_lock(&ksym_tracer_mutex);
270 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, 264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
271 ksym_hlist) { 265 ksym_hlist) {
272 unregister_kernel_hw_breakpoint(entry->ksym_hbp); 266 unregister_wide_hw_breakpoint(entry->ksym_hbp);
273 ksym_filter_entry_count--; 267 ksym_filter_entry_count--;
274 hlist_del_rcu(&(entry->ksym_hlist)); 268 hlist_del_rcu(&(entry->ksym_hlist));
275 synchronize_rcu(); 269 synchronize_rcu();
276 kfree(entry->ksym_hbp->info.name);
277 kfree(entry->ksym_hbp);
278 kfree(entry); 270 kfree(entry);
279 } 271 }
280 mutex_unlock(&ksym_tracer_mutex); 272 mutex_unlock(&ksym_tracer_mutex);
@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
327 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 319 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
328 if (entry->ksym_addr == ksym_addr) { 320 if (entry->ksym_addr == ksym_addr) {
329 /* Check for malformed request: (6) */ 321 /* Check for malformed request: (6) */
330 if (entry->ksym_hbp->info.type != op) 322 if (entry->type != op)
331 changed = 1; 323 changed = 1;
332 else 324 else
333 goto out; 325 goto out;
@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file,
335 } 327 }
336 } 328 }
337 if (changed) { 329 if (changed) {
338 unregister_kernel_hw_breakpoint(entry->ksym_hbp); 330 unregister_wide_hw_breakpoint(entry->ksym_hbp);
339 entry->ksym_hbp->info.type = op; 331 entry->type = op;
340 if (op > 0) { 332 if (op > 0) {
341 ret = register_kernel_hw_breakpoint(entry->ksym_hbp); 333 entry->ksym_hbp =
342 if (ret == 0) 334 register_wide_hw_breakpoint(entry->ksym_addr,
335 entry->len, entry->type,
336 ksym_hbp_handler, true);
337 if (IS_ERR(entry->ksym_hbp))
338 entry->ksym_hbp = NULL;
339 if (!entry->ksym_hbp)
343 goto out; 340 goto out;
344 } 341 }
345 ksym_filter_entry_count--; 342 ksym_filter_entry_count--;
346 hlist_del_rcu(&(entry->ksym_hlist)); 343 hlist_del_rcu(&(entry->ksym_hlist));
347 synchronize_rcu(); 344 synchronize_rcu();
348 kfree(entry->ksym_hbp->info.name);
349 kfree(entry->ksym_hbp);
350 kfree(entry); 345 kfree(entry);
351 ret = 0; 346 ret = 0;
352 goto out; 347 goto out;
@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
413 408
414 trace_assign_type(field, entry); 409 trace_assign_type(field, entry);
415 410
416 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, 411 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
417 entry->pid, iter->cpu, field->ksym_name); 412 entry->pid, iter->cpu, (char *)field->addr);
418 if (!ret) 413 if (!ret)
419 return TRACE_TYPE_PARTIAL_LINE; 414 return TRACE_TYPE_PARTIAL_LINE;
420 415
421 switch (field->type) { 416 switch (field->type) {
422 case HW_BREAKPOINT_WRITE: 417 case HW_BREAKPOINT_W:
423 ret = trace_seq_printf(s, " W "); 418 ret = trace_seq_printf(s, " W ");
424 break; 419 break;
425 case HW_BREAKPOINT_RW: 420 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
426 ret = trace_seq_printf(s, " RW "); 421 ret = trace_seq_printf(s, " RW ");
427 break; 422 break;
428 default: 423 default:
@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
490 485
491 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); 486 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
492 487
493 if (entry->ksym_hbp) 488 access_type = entry->type;
494 access_type = entry->ksym_hbp->info.type;
495 489
496 switch (access_type) { 490 switch (access_type) {
497 case HW_BREAKPOINT_WRITE: 491 case HW_BREAKPOINT_W:
498 seq_puts(m, " W "); 492 seq_puts(m, " W ");
499 break; 493 break;
500 case HW_BREAKPOINT_RW: 494 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
501 seq_puts(m, " RW "); 495 seq_puts(m, " RW ");
502 break; 496 break;
503 default: 497 default:
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 7179c12e4f0f..27c5072c2e6b 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
828 828
829 ksym_selftest_dummy = 0; 829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */ 830 /* Register the read-write tracing request */
831 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, 831 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY,
832 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
832 (unsigned long)(&ksym_selftest_dummy)); 833 (unsigned long)(&ksym_selftest_dummy));
833 834
834 if (ret < 0) { 835 if (ret < 0) {