aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/debugreg.h11
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h58
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/kernel/hw_breakpoint.c391
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/process_32.c26
-rw-r--r--arch/x86/kernel/process_64.c26
-rw-r--r--arch/x86/kernel/ptrace.c182
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/power/cpu.c6
-rw-r--r--include/linux/hw_breakpoint.h243
-rw-r--r--include/linux/perf_event.h26
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hw_breakpoint.c424
-rw-r--r--kernel/perf_event.c53
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_ksym.c126
-rw-r--r--kernel/trace/trace_selftest.c3
22 files changed, 885 insertions, 750 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index acb664397945..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES
128 128
129config HAVE_HW_BREAKPOINT 129config HAVE_HW_BREAKPOINT
130 bool 130 bool
131 depends on HAVE_PERF_EVENTS
132 select ANON_INODES
133 select PERF_EVENTS
131 134
132 135
133source "kernel/gcov/Kconfig" 136source "kernel/gcov/Kconfig"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
10header-y += sigcontext32.h 10header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h
13 14
14unifdef-y += e820.h 15unifdef-y += e820.h
15unifdef-y += ist.h 16unifdef-y += ist.h
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 23439fbb1d0e..9a3333c91f9a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -75,13 +75,8 @@
75 */ 75 */
76#ifdef __KERNEL__ 76#ifdef __KERNEL__
77 77
78/* For process management */ 78DECLARE_PER_CPU(unsigned long, dr7);
79extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
80extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
81 struct task_struct *child, unsigned long clone_flags);
82 79
83/* For CPU management */
84extern void load_debug_registers(void);
85static inline void hw_breakpoint_disable(void) 80static inline void hw_breakpoint_disable(void)
86{ 81{
87 /* Zero the control register for HW Breakpoint */ 82 /* Zero the control register for HW Breakpoint */
@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
94 set_debugreg(0UL, 3); 89 set_debugreg(0UL, 3);
95} 90}
96 91
92#ifdef CONFIG_KVM
93extern void hw_breakpoint_restore(void);
94#endif
95
97#endif /* __KERNEL__ */ 96#endif /* __KERNEL__ */
98 97
99#endif /* _ASM_X86_DEBUGREG_H */ 98#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 3cfca8e2b5f6..0675a7c4c20e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -4,6 +4,11 @@
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H 5#define __ARCH_HW_BREAKPOINT_H
6 6
7/*
8 * The name should probably be something dealt in
9 * a higher level. While dealing with the user
10 * (display/resolving)
11 */
7struct arch_hw_breakpoint { 12struct arch_hw_breakpoint {
8 char *name; /* Contains name of the symbol to set bkpt */ 13 char *name; /* Contains name of the symbol to set bkpt */
9 unsigned long address; 14 unsigned long address;
@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
12}; 17};
13 18
14#include <linux/kdebug.h> 19#include <linux/kdebug.h>
15#include <linux/hw_breakpoint.h> 20#include <linux/percpu.h>
21#include <linux/list.h>
16 22
17/* Available HW breakpoint length encodings */ 23/* Available HW breakpoint length encodings */
18#define HW_BREAKPOINT_LEN_1 0x40 24#define X86_BREAKPOINT_LEN_1 0x40
19#define HW_BREAKPOINT_LEN_2 0x44 25#define X86_BREAKPOINT_LEN_2 0x44
20#define HW_BREAKPOINT_LEN_4 0x4c 26#define X86_BREAKPOINT_LEN_4 0x4c
21#define HW_BREAKPOINT_LEN_EXECUTE 0x40 27#define X86_BREAKPOINT_LEN_EXECUTE 0x40
22 28
23#ifdef CONFIG_X86_64 29#ifdef CONFIG_X86_64
24#define HW_BREAKPOINT_LEN_8 0x48 30#define X86_BREAKPOINT_LEN_8 0x48
25#endif 31#endif
26 32
27/* Available HW breakpoint type encodings */ 33/* Available HW breakpoint type encodings */
28 34
29/* trigger on instruction execute */ 35/* trigger on instruction execute */
30#define HW_BREAKPOINT_EXECUTE 0x80 36#define X86_BREAKPOINT_EXECUTE 0x80
31/* trigger on memory write */ 37/* trigger on memory write */
32#define HW_BREAKPOINT_WRITE 0x81 38#define X86_BREAKPOINT_WRITE 0x81
33/* trigger on memory read or write */ 39/* trigger on memory read or write */
34#define HW_BREAKPOINT_RW 0x83 40#define X86_BREAKPOINT_RW 0x83
35 41
36/* Total number of available HW breakpoint registers */ 42/* Total number of available HW breakpoint registers */
37#define HBP_NUM 4 43#define HBP_NUM 4
38 44
39extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; 45struct perf_event;
40DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); 46struct pmu;
41extern unsigned int hbp_user_refcount[HBP_NUM];
42 47
43extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
44extern void arch_uninstall_thread_hw_breakpoint(void);
45extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); 48extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
46extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, 49extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
47 struct task_struct *tsk); 50 struct task_struct *tsk);
48extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
49extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
50extern void arch_update_kernel_hw_breakpoint(void *);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, 51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data); 52 unsigned long val, void *data);
53
54
55int arch_install_hw_breakpoint(struct perf_event *bp);
56void arch_uninstall_hw_breakpoint(struct perf_event *bp);
57void hw_breakpoint_pmu_read(struct perf_event *bp);
58void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
59
60extern void
61arch_fill_perf_breakpoint(struct perf_event *bp);
62
63unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
64int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
65
66extern int arch_bp_generic_fields(int x86_len, int x86_type,
67 int *gen_len, int *gen_type);
68
69extern struct pmu perf_ops_bp;
70
53#endif /* __KERNEL__ */ 71#endif /* __KERNEL__ */
54#endif /* _I386_HW_BREAKPOINT_H */ 72#endif /* _I386_HW_BREAKPOINT_H */
55 73
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 61aafb71c7ef..820f3000f736 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
423extern void free_thread_xstate(struct task_struct *); 423extern void free_thread_xstate(struct task_struct *);
424extern struct kmem_cache *task_xstate_cachep; 424extern struct kmem_cache *task_xstate_cachep;
425 425
426struct perf_event;
427
426struct thread_struct { 428struct thread_struct {
427 /* Cached TLS descriptors: */ 429 /* Cached TLS descriptors: */
428 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 430 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -444,12 +446,10 @@ struct thread_struct {
444 unsigned long fs; 446 unsigned long fs;
445#endif 447#endif
446 unsigned long gs; 448 unsigned long gs;
447 /* Hardware debugging registers: */ 449 /* Save middle states of ptrace breakpoints */
448 unsigned long debugreg[HBP_NUM]; 450 struct perf_event *ptrace_bps[HBP_NUM];
449 unsigned long debugreg6; 451 /* Debug status used for traps, single steps, etc... */
450 unsigned long debugreg7; 452 unsigned long debugreg6;
451 /* Hardware breakpoint info */
452 struct hw_breakpoint *hbp[HBP_NUM];
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 9316a9de4de3..e622620790bd 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
15 * 15 *
16 * Copyright (C) 2007 Alan Stern 16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation 17 * Copyright (C) 2009 IBM Corporation
18 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
18 */ 19 */
19 20
20/* 21/*
@@ -22,6 +23,8 @@
22 * using the CPU's debug registers. 23 * using the CPU's debug registers.
23 */ 24 */
24 25
26#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h>
25#include <linux/irqflags.h> 28#include <linux/irqflags.h>
26#include <linux/notifier.h> 29#include <linux/notifier.h>
27#include <linux/kallsyms.h> 30#include <linux/kallsyms.h>
@@ -38,26 +41,24 @@
38#include <asm/processor.h> 41#include <asm/processor.h>
39#include <asm/debugreg.h> 42#include <asm/debugreg.h>
40 43
41/* Unmasked kernel DR7 value */ 44/* Per cpu debug control register value */
42static unsigned long kdr7; 45DEFINE_PER_CPU(unsigned long, dr7);
46
47/* Per cpu debug address registers values */
48static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
43 49
44/* 50/*
45 * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. 51 * Stores the breakpoints currently in use on each breakpoint address
46 * Used to clear and verify the status of bits corresponding to DR0 - DR3 52 * register for each cpus
47 */ 53 */
48static const unsigned long dr7_masks[HBP_NUM] = { 54static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
49 0x000f0003, /* LEN0, R/W0, G0, L0 */
50 0x00f0000c, /* LEN1, R/W1, G1, L1 */
51 0x0f000030, /* LEN2, R/W2, G2, L2 */
52 0xf00000c0 /* LEN3, R/W3, G3, L3 */
53};
54 55
55 56
56/* 57/*
57 * Encode the length, type, Exact, and Enable bits for a particular breakpoint 58 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
58 * as stored in debug register 7. 59 * as stored in debug register 7.
59 */ 60 */
60static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) 61unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
61{ 62{
62 unsigned long bp_info; 63 unsigned long bp_info;
63 64
@@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
68 return bp_info; 69 return bp_info;
69} 70}
70 71
71void arch_update_kernel_hw_breakpoint(void *unused) 72/*
73 * Decode the length and type bits for a particular breakpoint as
74 * stored in debug register 7. Return the "enabled" status.
75 */
76int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
72{ 77{
73 struct hw_breakpoint *bp; 78 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
74 int i, cpu = get_cpu();
75 unsigned long temp_kdr7 = 0;
76
77 /* Don't allow debug exceptions while we update the registers */
78 set_debugreg(0UL, 7);
79 79
80 for (i = hbp_kernel_pos; i < HBP_NUM; i++) { 80 *len = (bp_info & 0xc) | 0x40;
81 per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; 81 *type = (bp_info & 0x3) | 0x80;
82 if (bp) {
83 temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
84 set_debugreg(bp->info.address, i);
85 }
86 }
87 82
88 /* No need to set DR6. Update the debug registers with kernel-space 83 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
89 * breakpoint values from kdr7 and user-space requests from the
90 * current process
91 */
92 kdr7 = temp_kdr7;
93 set_debugreg(kdr7 | current->thread.debugreg7, 7);
94 put_cpu();
95} 84}
96 85
97/* 86/*
98 * Install the thread breakpoints in their debug registers. 87 * Install a perf counter breakpoint.
88 *
89 * We seek a free debug address register and use it for this
90 * breakpoint. Eventually we enable it in the debug control register.
91 *
92 * Atomic: we hold the counter->ctx->lock and we only handle variables
93 * and registers local to this cpu.
99 */ 94 */
100void arch_install_thread_hw_breakpoint(struct task_struct *tsk) 95int arch_install_hw_breakpoint(struct perf_event *bp)
101{ 96{
102 struct thread_struct *thread = &(tsk->thread); 97 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
103 98 unsigned long *dr7;
104 switch (hbp_kernel_pos) { 99 int i;
105 case 4: 100
106 set_debugreg(thread->debugreg[3], 3); 101 for (i = 0; i < HBP_NUM; i++) {
107 case 3: 102 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
108 set_debugreg(thread->debugreg[2], 2); 103
109 case 2: 104 if (!*slot) {
110 set_debugreg(thread->debugreg[1], 1); 105 *slot = bp;
111 case 1: 106 break;
112 set_debugreg(thread->debugreg[0], 0); 107 }
113 default:
114 break;
115 } 108 }
116 109
117 /* No need to set DR6 */ 110 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
118 set_debugreg((kdr7 | thread->debugreg7), 7); 111 return -EBUSY;
112
113 set_debugreg(info->address, i);
114 __get_cpu_var(cpu_debugreg[i]) = info->address;
115
116 dr7 = &__get_cpu_var(dr7);
117 *dr7 |= encode_dr7(i, info->len, info->type);
118
119 set_debugreg(*dr7, 7);
120
121 return 0;
119} 122}
120 123
121/* 124/*
122 * Install the debug register values for just the kernel, no thread. 125 * Uninstall the breakpoint contained in the given counter.
126 *
127 * First we search the debug address register it uses and then we disable
128 * it.
129 *
130 * Atomic: we hold the counter->ctx->lock and we only handle variables
131 * and registers local to this cpu.
123 */ 132 */
124void arch_uninstall_thread_hw_breakpoint(void) 133void arch_uninstall_hw_breakpoint(struct perf_event *bp)
125{ 134{
126 /* Clear the user-space portion of debugreg7 by setting only kdr7 */ 135 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
127 set_debugreg(kdr7, 7); 136 unsigned long *dr7;
137 int i;
138
139 for (i = 0; i < HBP_NUM; i++) {
140 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
141
142 if (*slot == bp) {
143 *slot = NULL;
144 break;
145 }
146 }
147
148 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
149 return;
128 150
151 dr7 = &__get_cpu_var(dr7);
152 *dr7 &= ~encode_dr7(i, info->len, info->type);
153
154 set_debugreg(*dr7, 7);
129} 155}
130 156
131static int get_hbp_len(u8 hbp_len) 157static int get_hbp_len(u8 hbp_len)
@@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len)
133 unsigned int len_in_bytes = 0; 159 unsigned int len_in_bytes = 0;
134 160
135 switch (hbp_len) { 161 switch (hbp_len) {
136 case HW_BREAKPOINT_LEN_1: 162 case X86_BREAKPOINT_LEN_1:
137 len_in_bytes = 1; 163 len_in_bytes = 1;
138 break; 164 break;
139 case HW_BREAKPOINT_LEN_2: 165 case X86_BREAKPOINT_LEN_2:
140 len_in_bytes = 2; 166 len_in_bytes = 2;
141 break; 167 break;
142 case HW_BREAKPOINT_LEN_4: 168 case X86_BREAKPOINT_LEN_4:
143 len_in_bytes = 4; 169 len_in_bytes = 4;
144 break; 170 break;
145#ifdef CONFIG_X86_64 171#ifdef CONFIG_X86_64
146 case HW_BREAKPOINT_LEN_8: 172 case X86_BREAKPOINT_LEN_8:
147 len_in_bytes = 8; 173 len_in_bytes = 8;
148 break; 174 break;
149#endif 175#endif
@@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
178/* 204/*
179 * Store a breakpoint's encoded address, length, and type. 205 * Store a breakpoint's encoded address, length, and type.
180 */ 206 */
181static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) 207static int arch_store_info(struct perf_event *bp)
182{ 208{
183 /* 209 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
184 * User-space requests will always have the address field populated
185 * Symbol names from user-space are rejected
186 */
187 if (tsk && bp->info.name)
188 return -EINVAL;
189 /* 210 /*
190 * For kernel-addresses, either the address or symbol name can be 211 * For kernel-addresses, either the address or symbol name can be
191 * specified. 212 * specified.
192 */ 213 */
193 if (bp->info.name) 214 if (info->name)
194 bp->info.address = (unsigned long) 215 info->address = (unsigned long)
195 kallsyms_lookup_name(bp->info.name); 216 kallsyms_lookup_name(info->name);
196 if (bp->info.address) 217 if (info->address)
197 return 0; 218 return 0;
219
198 return -EINVAL; 220 return -EINVAL;
199} 221}
200 222
201/* 223int arch_bp_generic_fields(int x86_len, int x86_type,
202 * Validate the arch-specific HW Breakpoint register settings 224 int *gen_len, int *gen_type)
203 */
204int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
205 struct task_struct *tsk)
206{ 225{
207 unsigned int align; 226 /* Len */
208 int ret = -EINVAL; 227 switch (x86_len) {
228 case X86_BREAKPOINT_LEN_1:
229 *gen_len = HW_BREAKPOINT_LEN_1;
230 break;
231 case X86_BREAKPOINT_LEN_2:
232 *gen_len = HW_BREAKPOINT_LEN_2;
233 break;
234 case X86_BREAKPOINT_LEN_4:
235 *gen_len = HW_BREAKPOINT_LEN_4;
236 break;
237#ifdef CONFIG_X86_64
238 case X86_BREAKPOINT_LEN_8:
239 *gen_len = HW_BREAKPOINT_LEN_8;
240 break;
241#endif
242 default:
243 return -EINVAL;
244 }
209 245
210 switch (bp->info.type) { 246 /* Type */
211 /* 247 switch (x86_type) {
212 * Ptrace-refactoring code 248 case X86_BREAKPOINT_EXECUTE:
213 * For now, we'll allow instruction breakpoint only for user-space 249 *gen_type = HW_BREAKPOINT_X;
214 * addresses
215 */
216 case HW_BREAKPOINT_EXECUTE:
217 if ((!arch_check_va_in_userspace(bp->info.address,
218 bp->info.len)) &&
219 bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
220 return ret;
221 break; 250 break;
222 case HW_BREAKPOINT_WRITE: 251 case X86_BREAKPOINT_WRITE:
252 *gen_type = HW_BREAKPOINT_W;
223 break; 253 break;
224 case HW_BREAKPOINT_RW: 254 case X86_BREAKPOINT_RW:
255 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
225 break; 256 break;
226 default: 257 default:
227 return ret; 258 return -EINVAL;
228 } 259 }
229 260
230 switch (bp->info.len) { 261 return 0;
262}
263
264
265static int arch_build_bp_info(struct perf_event *bp)
266{
267 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
268
269 info->address = bp->attr.bp_addr;
270
271 /* Len */
272 switch (bp->attr.bp_len) {
231 case HW_BREAKPOINT_LEN_1: 273 case HW_BREAKPOINT_LEN_1:
232 align = 0; 274 info->len = X86_BREAKPOINT_LEN_1;
233 break; 275 break;
234 case HW_BREAKPOINT_LEN_2: 276 case HW_BREAKPOINT_LEN_2:
235 align = 1; 277 info->len = X86_BREAKPOINT_LEN_2;
236 break; 278 break;
237 case HW_BREAKPOINT_LEN_4: 279 case HW_BREAKPOINT_LEN_4:
238 align = 3; 280 info->len = X86_BREAKPOINT_LEN_4;
239 break; 281 break;
240#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
241 case HW_BREAKPOINT_LEN_8: 283 case HW_BREAKPOINT_LEN_8:
284 info->len = X86_BREAKPOINT_LEN_8;
285 break;
286#endif
287 default:
288 return -EINVAL;
289 }
290
291 /* Type */
292 switch (bp->attr.bp_type) {
293 case HW_BREAKPOINT_W:
294 info->type = X86_BREAKPOINT_WRITE;
295 break;
296 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
297 info->type = X86_BREAKPOINT_RW;
298 break;
299 case HW_BREAKPOINT_X:
300 info->type = X86_BREAKPOINT_EXECUTE;
301 break;
302 default:
303 return -EINVAL;
304 }
305
306 return 0;
307}
308/*
309 * Validate the arch-specific HW Breakpoint register settings
310 */
311int arch_validate_hwbkpt_settings(struct perf_event *bp,
312 struct task_struct *tsk)
313{
314 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
315 unsigned int align;
316 int ret;
317
318
319 ret = arch_build_bp_info(bp);
320 if (ret)
321 return ret;
322
323 ret = -EINVAL;
324
325 if (info->type == X86_BREAKPOINT_EXECUTE)
326 /*
327 * Ptrace-refactoring code
328 * For now, we'll allow instruction breakpoint only for user-space
329 * addresses
330 */
331 if ((!arch_check_va_in_userspace(info->address, info->len)) &&
332 info->len != X86_BREAKPOINT_EXECUTE)
333 return ret;
334
335 switch (info->len) {
336 case X86_BREAKPOINT_LEN_1:
337 align = 0;
338 break;
339 case X86_BREAKPOINT_LEN_2:
340 align = 1;
341 break;
342 case X86_BREAKPOINT_LEN_4:
343 align = 3;
344 break;
345#ifdef CONFIG_X86_64
346 case X86_BREAKPOINT_LEN_8:
242 align = 7; 347 align = 7;
243 break; 348 break;
244#endif 349#endif
@@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
246 return ret; 351 return ret;
247 } 352 }
248 353
249 if (bp->triggered) 354 if (bp->callback)
250 ret = arch_store_info(bp, tsk); 355 ret = arch_store_info(bp);
251 356
252 if (ret < 0) 357 if (ret < 0)
253 return ret; 358 return ret;
@@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
255 * Check that the low-order bits of the address are appropriate 360 * Check that the low-order bits of the address are appropriate
256 * for the alignment implied by len. 361 * for the alignment implied by len.
257 */ 362 */
258 if (bp->info.address & align) 363 if (info->address & align)
259 return -EINVAL; 364 return -EINVAL;
260 365
261 /* Check that the virtual address is in the proper range */ 366 /* Check that the virtual address is in the proper range */
262 if (tsk) { 367 if (tsk) {
263 if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) 368 if (!arch_check_va_in_userspace(info->address, info->len))
264 return -EFAULT; 369 return -EFAULT;
265 } else { 370 } else {
266 if (!arch_check_va_in_kernelspace(bp->info.address, 371 if (!arch_check_va_in_kernelspace(info->address, info->len))
267 bp->info.len))
268 return -EFAULT; 372 return -EFAULT;
269 } 373 }
374
270 return 0; 375 return 0;
271} 376}
272 377
273void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) 378/*
379 * Release the user breakpoints used by ptrace
380 */
381void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
274{ 382{
275 struct thread_struct *thread = &(tsk->thread); 383 int i;
276 struct hw_breakpoint *bp = thread->hbp[pos]; 384 struct thread_struct *t = &tsk->thread;
277 385
278 thread->debugreg7 &= ~dr7_masks[pos]; 386 for (i = 0; i < HBP_NUM; i++) {
279 if (bp) { 387 unregister_hw_breakpoint(t->ptrace_bps[i]);
280 thread->debugreg[pos] = bp->info.address; 388 t->ptrace_bps[i] = NULL;
281 thread->debugreg7 |= encode_dr7(pos, bp->info.len, 389 }
282 bp->info.type);
283 } else
284 thread->debugreg[pos] = 0;
285} 390}
286 391
287void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) 392#ifdef CONFIG_KVM
393void hw_breakpoint_restore(void)
288{ 394{
289 int i; 395 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
290 struct thread_struct *thread = &(tsk->thread); 396 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
291 397 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
292 thread->debugreg7 = 0; 398 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
293 for (i = 0; i < HBP_NUM; i++) 399 set_debugreg(current->thread.debugreg6, 6);
294 thread->debugreg[i] = 0; 400 set_debugreg(__get_cpu_var(dr7), 7);
295} 401}
402EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
403#endif
296 404
297/* 405/*
298 * Handle debug exception notifications. 406 * Handle debug exception notifications.
@@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
313static int __kprobes hw_breakpoint_handler(struct die_args *args) 421static int __kprobes hw_breakpoint_handler(struct die_args *args)
314{ 422{
315 int i, cpu, rc = NOTIFY_STOP; 423 int i, cpu, rc = NOTIFY_STOP;
316 struct hw_breakpoint *bp; 424 struct perf_event *bp;
317 unsigned long dr7, dr6; 425 unsigned long dr7, dr6;
318 unsigned long *dr6_p; 426 unsigned long *dr6_p;
319 427
@@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
325 if ((dr6 & DR_TRAP_BITS) == 0) 433 if ((dr6 & DR_TRAP_BITS) == 0)
326 return NOTIFY_DONE; 434 return NOTIFY_DONE;
327 435
328 /* Lazy debug register switching */
329 if (!test_tsk_thread_flag(current, TIF_DEBUG))
330 arch_uninstall_thread_hw_breakpoint();
331
332 get_debugreg(dr7, 7); 436 get_debugreg(dr7, 7);
333 /* Disable breakpoints during exception handling */ 437 /* Disable breakpoints during exception handling */
334 set_debugreg(0UL, 7); 438 set_debugreg(0UL, 7);
@@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
344 for (i = 0; i < HBP_NUM; ++i) { 448 for (i = 0; i < HBP_NUM; ++i) {
345 if (likely(!(dr6 & (DR_TRAP0 << i)))) 449 if (likely(!(dr6 & (DR_TRAP0 << i))))
346 continue; 450 continue;
451
347 /* 452 /*
348 * Find the corresponding hw_breakpoint structure and 453 * The counter may be concurrently released but that can only
349 * invoke its triggered callback. 454 * occur from a call_rcu() path. We can then safely fetch
455 * the breakpoint, use its callback, touch its counter
456 * while we are in an rcu_read_lock() path.
350 */ 457 */
351 if (i >= hbp_kernel_pos) 458 rcu_read_lock();
352 bp = per_cpu(this_hbp_kernel[i], cpu); 459
353 else { 460 bp = per_cpu(bp_per_reg[i], cpu);
354 bp = current->thread.hbp[i]; 461 if (bp)
355 if (bp) 462 rc = NOTIFY_DONE;
356 rc = NOTIFY_DONE;
357 }
358 /* 463 /*
359 * Reset the 'i'th TRAP bit in dr6 to denote completion of 464 * Reset the 'i'th TRAP bit in dr6 to denote completion of
360 * exception handling 465 * exception handling
@@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
362 (*dr6_p) &= ~(DR_TRAP0 << i); 467 (*dr6_p) &= ~(DR_TRAP0 << i);
363 /* 468 /*
364 * bp can be NULL due to lazy debug register switching 469 * bp can be NULL due to lazy debug register switching
365 * or due to the delay between updates of hbp_kernel_pos 470 * or due to concurrent perf counter removing.
366 * and this_hbp_kernel.
367 */ 471 */
368 if (!bp) 472 if (!bp) {
369 continue; 473 rcu_read_unlock();
474 break;
475 }
476
477 (bp->callback)(bp, args->regs);
370 478
371 (bp->triggered)(bp, args->regs); 479 rcu_read_unlock();
372 } 480 }
373 if (dr6 & (~DR_TRAP_BITS)) 481 if (dr6 & (~DR_TRAP_BITS))
374 rc = NOTIFY_DONE; 482 rc = NOTIFY_DONE;
375 483
376 set_debugreg(dr7, 7); 484 set_debugreg(dr7, 7);
377 put_cpu(); 485 put_cpu();
486
378 return rc; 487 return rc;
379} 488}
380 489
@@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify(
389 498
390 return hw_breakpoint_handler(data); 499 return hw_breakpoint_handler(data);
391} 500}
501
502void hw_breakpoint_pmu_read(struct perf_event *bp)
503{
504 /* TODO */
505}
506
507void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
508{
509 /* TODO */
510}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cf8ee0016307..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/events/power.h> 12#include <trace/events/power.h>
13#include <linux/hw_breakpoint.h>
13#include <asm/system.h> 14#include <asm/system.h>
14#include <asm/apic.h> 15#include <asm/apic.h>
15#include <asm/syscalls.h> 16#include <asm/syscalls.h>
@@ -18,7 +19,6 @@
18#include <asm/i387.h> 19#include <asm/i387.h>
19#include <asm/ds.h> 20#include <asm/ds.h>
20#include <asm/debugreg.h> 21#include <asm/debugreg.h>
21#include <asm/hw_breakpoint.h>
22 22
23unsigned long idle_halt; 23unsigned long idle_halt;
24EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk)
47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
48 tsk->thread.xstate = NULL; 48 tsk->thread.xstate = NULL;
49 } 49 }
50 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
51 flush_thread_hw_breakpoint(tsk);
52 50
53 WARN(tsk->thread.ds_ctx, "leaking DS context\n"); 51 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
54} 52}
@@ -107,8 +105,7 @@ void flush_thread(void)
107 } 105 }
108#endif 106#endif
109 107
110 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) 108 flush_ptrace_hw_breakpoint(tsk);
111 flush_thread_hw_breakpoint(tsk);
112 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 109 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
113 /* 110 /*
114 * Forget coprocessor state.. 111 * Forget coprocessor state..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 209e74801763..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,6 @@
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h> 61#include <asm/debugreg.h>
62#include <asm/hw_breakpoint.h>
63 62
64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
65 64
@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
264 p->thread.io_bitmap_ptr = NULL; 263 p->thread.io_bitmap_ptr = NULL;
265 tsk = current; 264 tsk = current;
266 err = -ENOMEM; 265 err = -ENOMEM;
267 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) 266
268 if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) 267 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
269 goto out;
270 268
271 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 269 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
272 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 270 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
287 err = do_set_thread_area(p, -1, 285 err = do_set_thread_area(p, -1,
288 (struct user_desc __user *)childregs->si, 0); 286 (struct user_desc __user *)childregs->si, 0);
289 287
290out:
291 if (err && p->thread.io_bitmap_ptr) { 288 if (err && p->thread.io_bitmap_ptr) {
292 kfree(p->thread.io_bitmap_ptr); 289 kfree(p->thread.io_bitmap_ptr);
293 p->thread.io_bitmap_max = 0; 290 p->thread.io_bitmap_max = 0;
294 } 291 }
295 if (err)
296 flush_thread_hw_breakpoint(p);
297 292
298 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); 293 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
299 p->thread.ds_ctx = NULL; 294 p->thread.ds_ctx = NULL;
@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
437 lazy_load_gs(next->gs); 432 lazy_load_gs(next->gs);
438 433
439 percpu_write(current_task, next_p); 434 percpu_write(current_task, next_p);
440 /*
441 * There's a problem with moving the arch_install_thread_hw_breakpoint()
442 * call before current is updated. Suppose a kernel breakpoint is
443 * triggered in between the two, the hw-breakpoint handler will see that
444 * the 'current' task does not have TIF_DEBUG flag set and will think it
445 * is leftover from an old task (lazy switching) and will erase it. Then
446 * until the next context switch, no user-breakpoints will be installed.
447 *
448 * The real problem is that it's impossible to update both current and
449 * physical debug registers at the same instant, so there will always be
450 * a window in which they disagree and a breakpoint might get triggered.
451 * Since we use lazy switching, we are forced to assume that a
452 * disagreement means that current is correct and the exception is due
453 * to lazy debug register switching.
454 */
455 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
456 arch_install_thread_hw_breakpoint(next_p);
457 435
458 return prev_p; 436 return prev_p;
459} 437}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 72edac026a78..5bafdec34441 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -53,7 +53,6 @@
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h> 55#include <asm/debugreg.h>
56#include <asm/hw_breakpoint.h>
57 56
58asmlinkage extern void ret_from_fork(void); 57asmlinkage extern void ret_from_fork(void);
59 58
@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task)
244 BUG(); 243 BUG();
245 } 244 }
246 } 245 }
247 if (unlikely(dead_task->thread.debugreg7))
248 flush_thread_hw_breakpoint(dead_task);
249} 246}
250 247
251static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 248static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
309 savesegment(ds, p->thread.ds); 306 savesegment(ds, p->thread.ds);
310 307
311 err = -ENOMEM; 308 err = -ENOMEM;
312 if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) 309 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
313 if (copy_thread_hw_breakpoint(me, p, clone_flags))
314 goto out;
315 310
316 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 311 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
317 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 312 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -351,8 +346,6 @@ out:
351 kfree(p->thread.io_bitmap_ptr); 346 kfree(p->thread.io_bitmap_ptr);
352 p->thread.io_bitmap_max = 0; 347 p->thread.io_bitmap_max = 0;
353 } 348 }
354 if (err)
355 flush_thread_hw_breakpoint(p);
356 349
357 return err; 350 return err;
358} 351}
@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
508 */ 501 */
509 if (preload_fpu) 502 if (preload_fpu)
510 __math_state_restore(); 503 __math_state_restore();
511 /*
512 * There's a problem with moving the arch_install_thread_hw_breakpoint()
513 * call before current is updated. Suppose a kernel breakpoint is
514 * triggered in between the two, the hw-breakpoint handler will see that
515 * the 'current' task does not have TIF_DEBUG flag set and will think it
516 * is leftover from an old task (lazy switching) and will erase it. Then
517 * until the next context switch, no user-breakpoints will be installed.
518 *
519 * The real problem is that it's impossible to update both current and
520 * physical debug registers at the same instant, so there will always be
521 * a window in which they disagree and a breakpoint might get triggered.
522 * Since we use lazy switching, we are forced to assume that a
523 * disagreement means that current is correct and the exception is due
524 * to lazy debug register switching.
525 */
526 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
527 arch_install_thread_hw_breakpoint(next_p);
528 504
529 return prev_p; 505 return prev_p;
530} 506}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 267cb85b479c..e79610d95971 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target,
441 return ret; 443 return ret;
442} 444}
443 445
444/* 446static void ptrace_triggered(struct perf_event *bp, void *data)
445 * Decode the length and type bits for a particular breakpoint as
446 * stored in debug register 7. Return the "enabled" status.
447 */
448static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
449 unsigned *type)
450{
451 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
452
453 *len = (bp_info & 0xc) | 0x40;
454 *type = (bp_info & 0x3) | 0x80;
455 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
456}
457
458static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
459{ 447{
460 struct thread_struct *thread = &(current->thread);
461 int i; 448 int i;
449 struct thread_struct *thread = &(current->thread);
462 450
463 /* 451 /*
464 * Store in the virtual DR6 register the fact that the breakpoint 452 * Store in the virtual DR6 register the fact that the breakpoint
465 * was hit so the thread's debugger will see it. 453 * was hit so the thread's debugger will see it.
466 */ 454 */
467 for (i = 0; i < hbp_kernel_pos; i++) 455 for (i = 0; i < HBP_NUM; i++) {
468 /* 456 if (thread->ptrace_bps[i] == bp)
469 * We will check bp->info.address against the address stored in
470 * thread's hbp structure and not debugreg[i]. This is to ensure
471 * that the corresponding bit for 'i' in DR7 register is enabled
472 */
473 if (bp->info.address == thread->hbp[i]->info.address)
474 break; 457 break;
458 }
475 459
476 thread->debugreg6 |= (DR_TRAP0 << i); 460 thread->debugreg6 |= (DR_TRAP0 << i);
477} 461}
478 462
479/* 463/*
464 * Walk through every ptrace breakpoints for this thread and
465 * build the dr7 value on top of their attributes.
466 *
467 */
468static unsigned long ptrace_get_dr7(struct perf_event *bp[])
469{
470 int i;
471 int dr7 = 0;
472 struct arch_hw_breakpoint *info;
473
474 for (i = 0; i < HBP_NUM; i++) {
475 if (bp[i] && !bp[i]->attr.disabled) {
476 info = counter_arch_bp(bp[i]);
477 dr7 |= encode_dr7(i, info->len, info->type);
478 }
479 }
480
481 return dr7;
482}
483
484/*
480 * Handle ptrace writes to debug register 7. 485 * Handle ptrace writes to debug register 7.
481 */ 486 */
482static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) 487static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
483{ 488{
484 struct thread_struct *thread = &(tsk->thread); 489 struct thread_struct *thread = &(tsk->thread);
485 unsigned long old_dr7 = thread->debugreg7; 490 unsigned long old_dr7;
486 int i, orig_ret = 0, rc = 0; 491 int i, orig_ret = 0, rc = 0;
487 int enabled, second_pass = 0; 492 int enabled, second_pass = 0;
488 unsigned len, type; 493 unsigned len, type;
489 struct hw_breakpoint *bp; 494 int gen_len, gen_type;
495 struct perf_event *bp;
490 496
491 data &= ~DR_CONTROL_RESERVED; 497 data &= ~DR_CONTROL_RESERVED;
498 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
492restore: 499restore:
493 /* 500 /*
494 * Loop through all the hardware breakpoints, making the 501 * Loop through all the hardware breakpoints, making the
@@ -496,11 +503,12 @@ restore:
496 */ 503 */
497 for (i = 0; i < HBP_NUM; i++) { 504 for (i = 0; i < HBP_NUM; i++) {
498 enabled = decode_dr7(data, i, &len, &type); 505 enabled = decode_dr7(data, i, &len, &type);
499 bp = thread->hbp[i]; 506 bp = thread->ptrace_bps[i];
500 507
501 if (!enabled) { 508 if (!enabled) {
502 if (bp) { 509 if (bp) {
503 /* Don't unregister the breakpoints right-away, 510 /*
511 * Don't unregister the breakpoints right-away,
504 * unless all register_user_hw_breakpoint() 512 * unless all register_user_hw_breakpoint()
505 * requests have succeeded. This prevents 513 * requests have succeeded. This prevents
506 * any window of opportunity for debug 514 * any window of opportunity for debug
@@ -508,27 +516,45 @@ restore:
508 */ 516 */
509 if (!second_pass) 517 if (!second_pass)
510 continue; 518 continue;
511 unregister_user_hw_breakpoint(tsk, bp); 519 thread->ptrace_bps[i] = NULL;
512 kfree(bp); 520 unregister_hw_breakpoint(bp);
513 } 521 }
514 continue; 522 continue;
515 } 523 }
524
525 /*
526 * We shoud have at least an inactive breakpoint at this
527 * slot. It means the user is writing dr7 without having
528 * written the address register first
529 */
516 if (!bp) { 530 if (!bp) {
517 rc = -ENOMEM; 531 rc = -EINVAL;
518 bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); 532 break;
519 if (bp) { 533 }
520 bp->info.address = thread->debugreg[i]; 534
521 bp->triggered = ptrace_triggered; 535 rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
522 bp->info.len = len;
523 bp->info.type = type;
524 rc = register_user_hw_breakpoint(tsk, bp);
525 if (rc)
526 kfree(bp);
527 }
528 } else
529 rc = modify_user_hw_breakpoint(tsk, bp);
530 if (rc) 536 if (rc)
531 break; 537 break;
538
539 /*
540 * This is a temporary thing as bp is unregistered/registered
541 * to simulate modification
542 */
543 bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
544 gen_type, bp->callback,
545 tsk, true);
546 thread->ptrace_bps[i] = NULL;
547
548 if (!bp) { /* incorrect bp, or we have a bug in bp API */
549 rc = -EINVAL;
550 break;
551 }
552 if (IS_ERR(bp)) {
553 rc = PTR_ERR(bp);
554 bp = NULL;
555 break;
556 }
557 thread->ptrace_bps[i] = bp;
532 } 558 }
533 /* 559 /*
534 * Make a second pass to free the remaining unused breakpoints 560 * Make a second pass to free the remaining unused breakpoints
@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
553 struct thread_struct *thread = &(tsk->thread); 579 struct thread_struct *thread = &(tsk->thread);
554 unsigned long val = 0; 580 unsigned long val = 0;
555 581
556 if (n < HBP_NUM) 582 if (n < HBP_NUM) {
557 val = thread->debugreg[n]; 583 struct perf_event *bp;
558 else if (n == 6) 584 bp = thread->ptrace_bps[n];
585 if (!bp)
586 return 0;
587 val = bp->hw.info.address;
588 } else if (n == 6) {
559 val = thread->debugreg6; 589 val = thread->debugreg6;
560 else if (n == 7) 590 } else if (n == 7) {
561 val = thread->debugreg7; 591 val = ptrace_get_dr7(thread->ptrace_bps);
592 }
562 return val; 593 return val;
563} 594}
564 595
596static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
597 unsigned long addr)
598{
599 struct perf_event *bp;
600 struct thread_struct *t = &tsk->thread;
601
602 if (!t->ptrace_bps[nr]) {
603 /*
604 * Put stub len and type to register (reserve) an inactive but
605 * correct bp
606 */
607 bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
608 HW_BREAKPOINT_W,
609 ptrace_triggered, tsk,
610 false);
611 } else {
612 bp = t->ptrace_bps[nr];
613 t->ptrace_bps[nr] = NULL;
614 bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
615 bp->attr.bp_type,
616 bp->callback,
617 tsk,
618 bp->attr.disabled);
619 }
620
621 if (!bp)
622 return -EIO;
623 /*
624 * CHECKME: the previous code returned -EIO if the addr wasn't a
625 * valid task virtual addr. The new one will return -EINVAL in this
626 * case.
627 * -EINVAL may be what we want for in-kernel breakpoints users, but
628 * -EIO looks better for ptrace, since we refuse a register writing
629 * for the user. And anyway this is the previous behaviour.
630 */
631 if (IS_ERR(bp))
632 return PTR_ERR(bp);
633
634 t->ptrace_bps[nr] = bp;
635
636 return 0;
637}
638
565/* 639/*
566 * Handle PTRACE_POKEUSR calls for the debug register area. 640 * Handle PTRACE_POKEUSR calls for the debug register area.
567 */ 641 */
@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
575 return -EIO; 649 return -EIO;
576 650
577 if (n == 6) { 651 if (n == 6) {
578 tsk->thread.debugreg6 = val; 652 thread->debugreg6 = val;
579 goto ret_path; 653 goto ret_path;
580 } 654 }
581 if (n < HBP_NUM) { 655 if (n < HBP_NUM) {
582 if (thread->hbp[n]) { 656 rc = ptrace_set_breakpoint_addr(tsk, n, val);
583 if (arch_check_va_in_userspace(val, 657 if (rc)
584 thread->hbp[n]->info.len) == 0) { 658 return rc;
585 rc = -EIO;
586 goto ret_path;
587 }
588 thread->hbp[n]->info.address = val;
589 }
590 thread->debugreg[n] = val;
591 } 659 }
592 /* All that's left is DR7 */ 660 /* All that's left is DR7 */
593 if (n == 7) 661 if (n == 7)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 213a7a3e4562..565ebc65920e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,7 +64,6 @@
64#include <asm/apic.h> 64#include <asm/apic.h>
65#include <asm/setup.h> 65#include <asm/setup.h>
66#include <asm/uv/uv.h> 66#include <asm/uv/uv.h>
67#include <asm/debugreg.h>
68#include <linux/mc146818rtc.h> 67#include <linux/mc146818rtc.h>
69 68
70#include <asm/smpboot_hooks.h> 69#include <asm/smpboot_hooks.h>
@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused)
328 x86_cpuinit.setup_percpu_clockev(); 327 x86_cpuinit.setup_percpu_clockev();
329 328
330 wmb(); 329 wmb();
331 load_debug_registers();
332 cpu_idle(); 330 cpu_idle();
333} 331}
334 332
@@ -1269,7 +1267,6 @@ void cpu_disable_common(void)
1269 remove_cpu_from_maps(cpu); 1267 remove_cpu_from_maps(cpu);
1270 unlock_vector_lock(); 1268 unlock_vector_lock();
1271 fixup_irqs(); 1269 fixup_irqs();
1272 hw_breakpoint_disable();
1273} 1270}
1274 1271
1275int native_cpu_disable(void) 1272int native_cpu_disable(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2974adf9b6..22dee7aa7813 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "trace.h" 43#include "trace.h"
44 44
45#include <asm/debugreg.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/msr.h> 47#include <asm/msr.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3643 trace_kvm_entry(vcpu->vcpu_id); 3644 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 3645 kvm_x86_ops->run(vcpu, kvm_run);
3645 3646
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3647 /*
3647 set_debugreg(current->thread.debugreg[0], 0); 3648 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg[1], 1); 3649 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg[2], 2); 3650 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg[3], 3); 3651 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 3652 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 3653 */
3653 } 3654 if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK)
3655 hw_breakpoint_restore();
3654 3656
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 3657 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 3658 local_irq_enable();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index e09a44fc4664..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt)
105 ctxt->cr4 = read_cr4(); 105 ctxt->cr4 = read_cr4();
106 ctxt->cr8 = read_cr8(); 106 ctxt->cr8 = read_cr8();
107#endif 107#endif
108 hw_breakpoint_disable();
109} 108}
110 109
111/* Needed by apm.c */ 110/* Needed by apm.c */
@@ -144,11 +143,6 @@ static void fix_processor_context(void)
144#endif 143#endif
145 load_TR_desc(); /* This does ltr */ 144 load_TR_desc(); /* This does ltr */
146 load_LDT(&current->active_mm->context); /* This does lldt */ 145 load_LDT(&current->active_mm->context); /* This does lldt */
147
148 /*
149 * Now maybe reload the debug registers
150 */
151 load_debug_registers();
152} 146}
153 147
154/** 148/**
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 61ccc8f17eac..7eba9b92e5f3 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,136 +1,131 @@
1#ifndef _LINUX_HW_BREAKPOINT_H 1#ifndef _LINUX_HW_BREAKPOINT_H
2#define _LINUX_HW_BREAKPOINT_H 2#define _LINUX_HW_BREAKPOINT_H
3 3
4#include <linux/perf_event.h>
4 5
5#ifdef __KERNEL__ 6enum {
6#include <linux/list.h> 7 HW_BREAKPOINT_LEN_1 = 1,
7#include <linux/types.h> 8 HW_BREAKPOINT_LEN_2 = 2,
8#include <linux/kallsyms.h> 9 HW_BREAKPOINT_LEN_4 = 4,
9 10 HW_BREAKPOINT_LEN_8 = 8,
10/**
11 * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
12 * @triggered: callback invoked after target address access
13 * @info: arch-specific breakpoint info (address, length, and type)
14 *
15 * %hw_breakpoint structures are the kernel's way of representing
16 * hardware breakpoints. These are data breakpoints
17 * (also known as "watchpoints", triggered on data access), and the breakpoint's
18 * target address can be located in either kernel space or user space.
19 *
20 * The breakpoint's address, length, and type are highly
21 * architecture-specific. The values are encoded in the @info field; you
22 * specify them when registering the breakpoint. To examine the encoded
23 * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
24 * below.
25 *
26 * The address is specified as a regular kernel pointer (for kernel-space
27 * breakponts) or as an %__user pointer (for user-space breakpoints).
28 * With register_user_hw_breakpoint(), the address must refer to a
29 * location in user space. The breakpoint will be active only while the
30 * requested task is running. Conversely with
31 * register_kernel_hw_breakpoint(), the address must refer to a location
32 * in kernel space, and the breakpoint will be active on all CPUs
33 * regardless of the current task.
34 *
35 * The length is the breakpoint's extent in bytes, which is subject to
36 * certain limitations. include/asm/hw_breakpoint.h contains macros
37 * defining the available lengths for a specific architecture. Note that
38 * the address's alignment must match the length. The breakpoint will
39 * catch accesses to any byte in the range from address to address +
40 * (length - 1).
41 *
42 * The breakpoint's type indicates the sort of access that will cause it
43 * to trigger. Possible values may include:
44 *
45 * %HW_BREAKPOINT_RW (triggered on read or write access),
46 * %HW_BREAKPOINT_WRITE (triggered on write access), and
47 * %HW_BREAKPOINT_READ (triggered on read access).
48 *
49 * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
50 * possibilities are available on all architectures. Execute breakpoints
51 * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
52 *
53 * When a breakpoint gets hit, the @triggered callback is
54 * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
55 * processor registers.
56 * Data breakpoints occur after the memory access has taken place.
57 * Breakpoints are disabled during execution @triggered, to avoid
58 * recursive traps and allow unhindered access to breakpointed memory.
59 *
60 * This sample code sets a breakpoint on pid_max and registers a callback
61 * function for writes to that variable. Note that it is not portable
62 * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
63 *
64 * ----------------------------------------------------------------------
65 *
66 * #include <asm/hw_breakpoint.h>
67 *
68 * struct hw_breakpoint my_bp;
69 *
70 * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
71 * {
72 * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
73 * dump_stack();
74 * .......<more debugging output>........
75 * }
76 *
77 * static struct hw_breakpoint my_bp;
78 *
79 * static int init_module(void)
80 * {
81 * ..........<do anything>............
82 * my_bp.info.type = HW_BREAKPOINT_WRITE;
83 * my_bp.info.len = HW_BREAKPOINT_LEN_4;
84 *
85 * my_bp.installed = (void *)my_bp_installed;
86 *
87 * rc = register_kernel_hw_breakpoint(&my_bp);
88 * ..........<do anything>............
89 * }
90 *
91 * static void cleanup_module(void)
92 * {
93 * ..........<do anything>............
94 * unregister_kernel_hw_breakpoint(&my_bp);
95 * ..........<do anything>............
96 * }
97 *
98 * ----------------------------------------------------------------------
99 */
100struct hw_breakpoint {
101 void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
102 struct arch_hw_breakpoint info;
103}; 11};
104 12
105/* 13enum {
106 * len and type values are defined in include/asm/hw_breakpoint.h. 14 HW_BREAKPOINT_R = 1,
107 * Available values vary according to the architecture. On i386 the 15 HW_BREAKPOINT_W = 2,
108 * possibilities are: 16 HW_BREAKPOINT_X = 4,
109 * 17};
110 * HW_BREAKPOINT_LEN_1 18
111 * HW_BREAKPOINT_LEN_2 19static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
112 * HW_BREAKPOINT_LEN_4 20{
113 * HW_BREAKPOINT_RW 21 return &bp->hw.info;
114 * HW_BREAKPOINT_READ 22}
115 * 23
116 * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the 24static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
117 * 1-, 2-, and 4-byte lengths may be unavailable. There also may be 25{
118 * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. 26 return bp->attr.bp_addr;
119 */ 27}
28
29static inline int hw_breakpoint_type(struct perf_event *bp)
30{
31 return bp->attr.bp_type;
32}
33
34static inline int hw_breakpoint_len(struct perf_event *bp)
35{
36 return bp->attr.bp_len;
37}
38
39#ifdef CONFIG_HAVE_HW_BREAKPOINT
40extern struct perf_event *
41register_user_hw_breakpoint(unsigned long addr,
42 int len,
43 int type,
44 perf_callback_t triggered,
45 struct task_struct *tsk,
46 bool active);
47
48/* FIXME: only change from the attr, and don't unregister */
49extern struct perf_event *
50modify_user_hw_breakpoint(struct perf_event *bp,
51 unsigned long addr,
52 int len,
53 int type,
54 perf_callback_t triggered,
55 struct task_struct *tsk,
56 bool active);
120 57
121extern int register_user_hw_breakpoint(struct task_struct *tsk,
122 struct hw_breakpoint *bp);
123extern int modify_user_hw_breakpoint(struct task_struct *tsk,
124 struct hw_breakpoint *bp);
125extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
126 struct hw_breakpoint *bp);
127/* 58/*
128 * Kernel breakpoints are not associated with any particular thread. 59 * Kernel breakpoints are not associated with any particular thread.
129 */ 60 */
130extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); 61extern struct perf_event *
131extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); 62register_wide_hw_breakpoint_cpu(unsigned long addr,
63 int len,
64 int type,
65 perf_callback_t triggered,
66 int cpu,
67 bool active);
68
69extern struct perf_event **
70register_wide_hw_breakpoint(unsigned long addr,
71 int len,
72 int type,
73 perf_callback_t triggered,
74 bool active);
75
76extern int register_perf_hw_breakpoint(struct perf_event *bp);
77extern int __register_perf_hw_breakpoint(struct perf_event *bp);
78extern void unregister_hw_breakpoint(struct perf_event *bp);
79extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
80
81extern int reserve_bp_slot(struct perf_event *bp);
82extern void release_bp_slot(struct perf_event *bp);
83
84extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
85
86#else /* !CONFIG_HAVE_HW_BREAKPOINT */
87
88static inline struct perf_event *
89register_user_hw_breakpoint(unsigned long addr,
90 int len,
91 int type,
92 perf_callback_t triggered,
93 struct task_struct *tsk,
94 bool active) { return NULL; }
95static inline struct perf_event *
96modify_user_hw_breakpoint(struct perf_event *bp,
97 unsigned long addr,
98 int len,
99 int type,
100 perf_callback_t triggered,
101 struct task_struct *tsk,
102 bool active) { return NULL; }
103static inline struct perf_event *
104register_wide_hw_breakpoint_cpu(unsigned long addr,
105 int len,
106 int type,
107 perf_callback_t triggered,
108 int cpu,
109 bool active) { return NULL; }
110static inline struct perf_event **
111register_wide_hw_breakpoint(unsigned long addr,
112 int len,
113 int type,
114 perf_callback_t triggered,
115 bool active) { return NULL; }
116static inline int
117register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
118static inline int
119__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
120static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
121static inline void
122unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
123static inline int
124reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
125static inline void release_bp_slot(struct perf_event *bp) { }
126
127static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
132 128
133extern unsigned int hbp_kernel_pos; 129#endif /* CONFIG_HAVE_HW_BREAKPOINT */
134 130
135#endif /* __KERNEL__ */ 131#endif /* _LINUX_HW_BREAKPOINT_H */
136#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8d54e6d25eeb..cead64ea6c15 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
18#include <linux/ioctl.h> 18#include <linux/ioctl.h>
19#include <asm/byteorder.h> 19#include <asm/byteorder.h>
20 20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22#include <asm/hw_breakpoint.h>
23#endif
24
21/* 25/*
22 * User-space ABI bits: 26 * User-space ABI bits:
23 */ 27 */
@@ -31,6 +35,7 @@ enum perf_type_id {
31 PERF_TYPE_TRACEPOINT = 2, 35 PERF_TYPE_TRACEPOINT = 2,
32 PERF_TYPE_HW_CACHE = 3, 36 PERF_TYPE_HW_CACHE = 3,
33 PERF_TYPE_RAW = 4, 37 PERF_TYPE_RAW = 4,
38 PERF_TYPE_BREAKPOINT = 5,
34 39
35 PERF_TYPE_MAX, /* non-ABI */ 40 PERF_TYPE_MAX, /* non-ABI */
36}; 41};
@@ -207,6 +212,15 @@ struct perf_event_attr {
207 __u32 wakeup_events; /* wakeup every n events */ 212 __u32 wakeup_events; /* wakeup every n events */
208 __u32 wakeup_watermark; /* bytes before wakeup */ 213 __u32 wakeup_watermark; /* bytes before wakeup */
209 }; 214 };
215
216 union {
217 struct { /* Hardware breakpoint info */
218 __u64 bp_addr;
219 __u32 bp_type;
220 __u32 bp_len;
221 };
222 };
223
210 __u32 __reserved_2; 224 __u32 __reserved_2;
211 225
212 __u64 __reserved_3; 226 __u64 __reserved_3;
@@ -476,6 +490,11 @@ struct hw_perf_event {
476 atomic64_t count; 490 atomic64_t count;
477 struct hrtimer hrtimer; 491 struct hrtimer hrtimer;
478 }; 492 };
493#ifdef CONFIG_HAVE_HW_BREAKPOINT
494 union { /* breakpoint */
495 struct arch_hw_breakpoint info;
496 };
497#endif
479 }; 498 };
480 atomic64_t prev_count; 499 atomic64_t prev_count;
481 u64 sample_period; 500 u64 sample_period;
@@ -588,7 +607,7 @@ struct perf_event {
588 u64 tstamp_running; 607 u64 tstamp_running;
589 u64 tstamp_stopped; 608 u64 tstamp_stopped;
590 609
591 struct perf_event_attr attr; 610 struct perf_event_attr attr;
592 struct hw_perf_event hw; 611 struct hw_perf_event hw;
593 612
594 struct perf_event_context *ctx; 613 struct perf_event_context *ctx;
@@ -643,6 +662,8 @@ struct perf_event {
643 662
644 perf_callback_t callback; 663 perf_callback_t callback;
645 664
665 perf_callback_t event_callback;
666
646#endif /* CONFIG_PERF_EVENTS */ 667#endif /* CONFIG_PERF_EVENTS */
647}; 668};
648 669
@@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate;
831extern void perf_event_init(void); 852extern void perf_event_init(void);
832extern void perf_tp_event(int event_id, u64 addr, u64 count, 853extern void perf_tp_event(int event_id, u64 addr, u64 count,
833 void *record, int entry_size); 854 void *record, int entry_size);
855extern void perf_bp_event(struct perf_event *event, void *data);
834 856
835#ifndef perf_misc_flags 857#ifndef perf_misc_flags
836#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ 858#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
865static inline void 887static inline void
866perf_sw_event(u32 event_id, u64 nr, int nmi, 888perf_sw_event(u32 event_id, u64 nr, int nmi,
867 struct pt_regs *regs, u64 addr) { } 889 struct pt_regs *regs, u64 addr) { }
890static inline void
891perf_bp_event(struct perf_event *event, void *data) { }
868 892
869static inline void perf_event_mmap(struct vm_area_struct *vma) { } 893static inline void perf_event_mmap(struct vm_area_struct *vma) { }
870static inline void perf_event_comm(struct task_struct *tsk) { } 894static inline void perf_event_comm(struct task_struct *tsk) { }
diff --git a/kernel/exit.c b/kernel/exit.c
index e61891f80123..266f8920628a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -980,6 +981,10 @@ NORET_TYPE void do_exit(long code)
980 proc_exit_connector(tsk); 981 proc_exit_connector(tsk);
981 982
982 /* 983 /*
984 * FIXME: do that only when needed, using sched_exit tracepoint
985 */
986 flush_ptrace_hw_breakpoint(tsk);
987 /*
983 * Flush inherited counters to the parent - before the parent 988 * Flush inherited counters to the parent - before the parent
984 * gets woken up by child-exit notifications. 989 * gets woken up by child-exit notifications.
985 */ 990 */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c1f64e65a9f3..08f6d0163201 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
15 * 15 *
16 * Copyright (C) 2007 Alan Stern 16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009 17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
18 */ 19 */
19 20
20/* 21/*
@@ -35,334 +36,242 @@
35#include <linux/init.h> 36#include <linux/init.h>
36#include <linux/smp.h> 37#include <linux/smp.h>
37 38
38#include <asm/hw_breakpoint.h> 39#include <linux/hw_breakpoint.h>
40
39#include <asm/processor.h> 41#include <asm/processor.h>
40 42
41#ifdef CONFIG_X86 43#ifdef CONFIG_X86
42#include <asm/debugreg.h> 44#include <asm/debugreg.h>
43#endif 45#endif
44/*
45 * Spinlock that protects all (un)register operations over kernel/user-space
46 * breakpoint requests
47 */
48static DEFINE_SPINLOCK(hw_breakpoint_lock);
49
50/* Array of kernel-space breakpoint structures */
51struct hw_breakpoint *hbp_kernel[HBP_NUM];
52
53/*
54 * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
55 * modified but we need the older copy to handle any hbp exceptions. It will
56 * sync with hbp_kernel[] value after updation is done through IPIs.
57 */
58DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
59
60/*
61 * Kernel breakpoints grow downwards, starting from HBP_NUM
62 * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
63 * kernel-space request. We will initialise it here and not in an __init
64 * routine because load_debug_registers(), which uses this variable can be
65 * called very early during CPU initialisation.
66 */
67unsigned int hbp_kernel_pos = HBP_NUM;
68 46
69/* 47static atomic_t bp_slot;
70 * An array containing refcount of threads using a given bkpt register
71 * Accesses are synchronised by acquiring hw_breakpoint_lock
72 */
73unsigned int hbp_user_refcount[HBP_NUM];
74 48
75/* 49int reserve_bp_slot(struct perf_event *bp)
76 * Load the debug registers during startup of a CPU.
77 */
78void load_debug_registers(void)
79{ 50{
80 unsigned long flags; 51 if (atomic_inc_return(&bp_slot) == HBP_NUM) {
81 struct task_struct *tsk = current; 52 atomic_dec(&bp_slot);
82
83 spin_lock_bh(&hw_breakpoint_lock);
84
85 /* Prevent IPIs for new kernel breakpoint updates */
86 local_irq_save(flags);
87 arch_update_kernel_hw_breakpoint(NULL);
88 local_irq_restore(flags);
89
90 if (test_tsk_thread_flag(tsk, TIF_DEBUG))
91 arch_install_thread_hw_breakpoint(tsk);
92
93 spin_unlock_bh(&hw_breakpoint_lock);
94}
95 53
96/* 54 return -ENOSPC;
97 * Erase all the hardware breakpoint info associated with a thread.
98 *
99 * If tsk != current then tsk must not be usable (for example, a
100 * child being cleaned up from a failed fork).
101 */
102void flush_thread_hw_breakpoint(struct task_struct *tsk)
103{
104 int i;
105 struct thread_struct *thread = &(tsk->thread);
106
107 spin_lock_bh(&hw_breakpoint_lock);
108
109 /* The thread no longer has any breakpoints associated with it */
110 clear_tsk_thread_flag(tsk, TIF_DEBUG);
111 for (i = 0; i < HBP_NUM; i++) {
112 if (thread->hbp[i]) {
113 hbp_user_refcount[i]--;
114 kfree(thread->hbp[i]);
115 thread->hbp[i] = NULL;
116 }
117 } 55 }
118 56
119 arch_flush_thread_hw_breakpoint(tsk); 57 return 0;
120
121 /* Actually uninstall the breakpoints if necessary */
122 if (tsk == current)
123 arch_uninstall_thread_hw_breakpoint();
124 spin_unlock_bh(&hw_breakpoint_lock);
125} 58}
126 59
127/* 60void release_bp_slot(struct perf_event *bp)
128 * Copy the hardware breakpoint info from a thread to its cloned child.
129 */
130int copy_thread_hw_breakpoint(struct task_struct *tsk,
131 struct task_struct *child, unsigned long clone_flags)
132{ 61{
133 /* 62 atomic_dec(&bp_slot);
134 * We will assume that breakpoint settings are not inherited
135 * and the child starts out with no debug registers set.
136 * But what about CLONE_PTRACE?
137 */
138 clear_tsk_thread_flag(child, TIF_DEBUG);
139
140 /* We will call flush routine since the debugregs are not inherited */
141 arch_flush_thread_hw_breakpoint(child);
142
143 return 0;
144} 63}
145 64
146static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, 65int __register_perf_hw_breakpoint(struct perf_event *bp)
147 struct hw_breakpoint *bp)
148{ 66{
149 struct thread_struct *thread = &(tsk->thread); 67 int ret;
150 int rc;
151 68
152 /* Do not overcommit. Fail if kernel has used the hbp registers */ 69 ret = reserve_bp_slot(bp);
153 if (pos >= hbp_kernel_pos) 70 if (ret)
154 return -ENOSPC; 71 return ret;
155 72
156 rc = arch_validate_hwbkpt_settings(bp, tsk); 73 if (!bp->attr.disabled)
157 if (rc) 74 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
158 return rc;
159 75
160 thread->hbp[pos] = bp; 76 return ret;
161 hbp_user_refcount[pos]++; 77}
162 78
163 arch_update_user_hw_breakpoint(pos, tsk); 79int register_perf_hw_breakpoint(struct perf_event *bp)
164 /* 80{
165 * Does it need to be installed right now? 81 bp->callback = perf_bp_event;
166 * Otherwise it will get installed the next time tsk runs
167 */
168 if (tsk == current)
169 arch_install_thread_hw_breakpoint(tsk);
170 82
171 return rc; 83 return __register_perf_hw_breakpoint(bp);
172} 84}
173 85
174/* 86/*
175 * Modify the address of a hbp register already in use by the task 87 * Register a breakpoint bound to a task and a given cpu.
176 * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() 88 * If cpu is -1, the breakpoint is active for the task in every cpu
89 * If the task is -1, the breakpoint is active for every tasks in the given
90 * cpu.
177 */ 91 */
178static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, 92static struct perf_event *
179 struct hw_breakpoint *bp) 93register_user_hw_breakpoint_cpu(unsigned long addr,
94 int len,
95 int type,
96 perf_callback_t triggered,
97 pid_t pid,
98 int cpu,
99 bool active)
180{ 100{
181 struct thread_struct *thread = &(tsk->thread); 101 struct perf_event_attr *attr;
182 102 struct perf_event *bp;
183 if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) 103
184 return -EINVAL; 104 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
185 105 if (!attr)
186 if (thread->hbp[pos] == NULL) 106 return ERR_PTR(-ENOMEM);
187 return -EINVAL; 107
188 108 attr->type = PERF_TYPE_BREAKPOINT;
189 thread->hbp[pos] = bp; 109 attr->size = sizeof(*attr);
110 attr->bp_addr = addr;
111 attr->bp_len = len;
112 attr->bp_type = type;
190 /* 113 /*
191 * 'pos' must be that of a hbp register already used by 'tsk' 114 * Such breakpoints are used by debuggers to trigger signals when
192 * Otherwise arch_modify_user_hw_breakpoint() will fail 115 * we hit the excepted memory op. We can't miss such events, they
116 * must be pinned.
193 */ 117 */
194 arch_update_user_hw_breakpoint(pos, tsk); 118 attr->pinned = 1;
195 119
196 if (tsk == current) 120 if (!active)
197 arch_install_thread_hw_breakpoint(tsk); 121 attr->disabled = 1;
198 122
199 return 0; 123 bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
200} 124 kfree(attr);
201
202static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
203{
204 hbp_user_refcount[pos]--;
205 tsk->thread.hbp[pos] = NULL;
206 125
207 arch_update_user_hw_breakpoint(pos, tsk); 126 return bp;
208
209 if (tsk == current)
210 arch_install_thread_hw_breakpoint(tsk);
211} 127}
212 128
213/** 129/**
214 * register_user_hw_breakpoint - register a hardware breakpoint for user space 130 * register_user_hw_breakpoint - register a hardware breakpoint for user space
131 * @addr: is the memory address that triggers the breakpoint
132 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
133 * @type: the type of the access to the memory (read/write/exec)
134 * @triggered: callback to trigger when we hit the breakpoint
215 * @tsk: pointer to 'task_struct' of the process to which the address belongs 135 * @tsk: pointer to 'task_struct' of the process to which the address belongs
216 * @bp: the breakpoint structure to register 136 * @active: should we activate it while registering it
217 *
218 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
219 * @bp->triggered must be set properly before invocation
220 * 137 *
221 */ 138 */
222int register_user_hw_breakpoint(struct task_struct *tsk, 139struct perf_event *
223 struct hw_breakpoint *bp) 140register_user_hw_breakpoint(unsigned long addr,
141 int len,
142 int type,
143 perf_callback_t triggered,
144 struct task_struct *tsk,
145 bool active)
224{ 146{
225 struct thread_struct *thread = &(tsk->thread); 147 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
226 int i, rc = -ENOSPC; 148 tsk->pid, -1, active);
227
228 spin_lock_bh(&hw_breakpoint_lock);
229
230 for (i = 0; i < hbp_kernel_pos; i++) {
231 if (!thread->hbp[i]) {
232 rc = __register_user_hw_breakpoint(i, tsk, bp);
233 break;
234 }
235 }
236 if (!rc)
237 set_tsk_thread_flag(tsk, TIF_DEBUG);
238
239 spin_unlock_bh(&hw_breakpoint_lock);
240 return rc;
241} 149}
242EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 150EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
243 151
244/** 152/**
245 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 153 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
154 * @bp: the breakpoint structure to modify
155 * @addr: is the memory address that triggers the breakpoint
156 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
157 * @type: the type of the access to the memory (read/write/exec)
158 * @triggered: callback to trigger when we hit the breakpoint
246 * @tsk: pointer to 'task_struct' of the process to which the address belongs 159 * @tsk: pointer to 'task_struct' of the process to which the address belongs
247 * @bp: the breakpoint structure to unregister 160 * @active: should we activate it while registering it
248 *
249 */ 161 */
250int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) 162struct perf_event *
163modify_user_hw_breakpoint(struct perf_event *bp,
164 unsigned long addr,
165 int len,
166 int type,
167 perf_callback_t triggered,
168 struct task_struct *tsk,
169 bool active)
251{ 170{
252 struct thread_struct *thread = &(tsk->thread); 171 /*
253 int i, ret = -ENOENT; 172 * FIXME: do it without unregistering
173 * - We don't want to lose our slot
174 * - If the new bp is incorrect, don't lose the older one
175 */
176 unregister_hw_breakpoint(bp);
254 177
255 spin_lock_bh(&hw_breakpoint_lock); 178 return register_user_hw_breakpoint(addr, len, type, triggered,
256 for (i = 0; i < hbp_kernel_pos; i++) { 179 tsk, active);
257 if (bp == thread->hbp[i]) {
258 ret = __modify_user_hw_breakpoint(i, tsk, bp);
259 break;
260 }
261 }
262 spin_unlock_bh(&hw_breakpoint_lock);
263 return ret;
264} 180}
265EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 181EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
266 182
267/** 183/**
268 * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint 184 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
269 * @tsk: pointer to 'task_struct' of the process to which the address belongs
270 * @bp: the breakpoint structure to unregister 185 * @bp: the breakpoint structure to unregister
271 *
272 */ 186 */
273void unregister_user_hw_breakpoint(struct task_struct *tsk, 187void unregister_hw_breakpoint(struct perf_event *bp)
274 struct hw_breakpoint *bp)
275{ 188{
276 struct thread_struct *thread = &(tsk->thread); 189 if (!bp)
277 int i, pos = -1, hbp_counter = 0; 190 return;
278 191 perf_event_release_kernel(bp);
279 spin_lock_bh(&hw_breakpoint_lock); 192}
280 for (i = 0; i < hbp_kernel_pos; i++) { 193EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
281 if (thread->hbp[i]) 194
282 hbp_counter++; 195static struct perf_event *
283 if (bp == thread->hbp[i]) 196register_kernel_hw_breakpoint_cpu(unsigned long addr,
284 pos = i; 197 int len,
285 } 198 int type,
286 if (pos >= 0) { 199 perf_callback_t triggered,
287 __unregister_user_hw_breakpoint(pos, tsk); 200 int cpu,
288 hbp_counter--; 201 bool active)
289 } 202{
290 if (!hbp_counter) 203 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
291 clear_tsk_thread_flag(tsk, TIF_DEBUG); 204 -1, cpu, active);
292
293 spin_unlock_bh(&hw_breakpoint_lock);
294} 205}
295EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
296 206
297/** 207/**
298 * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space 208 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
299 * @bp: the breakpoint structure to register 209 * @addr: is the memory address that triggers the breakpoint
300 * 210 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
301 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and 211 * @type: the type of the access to the memory (read/write/exec)
302 * @bp->triggered must be set properly before invocation 212 * @triggered: callback to trigger when we hit the breakpoint
213 * @active: should we activate it while registering it
303 * 214 *
215 * @return a set of per_cpu pointers to perf events
304 */ 216 */
305int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) 217struct perf_event **
218register_wide_hw_breakpoint(unsigned long addr,
219 int len,
220 int type,
221 perf_callback_t triggered,
222 bool active)
306{ 223{
307 int rc; 224 struct perf_event **cpu_events, **pevent, *bp;
225 long err;
226 int cpu;
227
228 cpu_events = alloc_percpu(typeof(*cpu_events));
229 if (!cpu_events)
230 return ERR_PTR(-ENOMEM);
308 231
309 rc = arch_validate_hwbkpt_settings(bp, NULL); 232 for_each_possible_cpu(cpu) {
310 if (rc) 233 pevent = per_cpu_ptr(cpu_events, cpu);
311 return rc; 234 bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
235 triggered, cpu, active);
312 236
313 spin_lock_bh(&hw_breakpoint_lock); 237 *pevent = bp;
314 238
315 rc = -ENOSPC; 239 if (IS_ERR(bp) || !bp) {
316 /* Check if we are over-committing */ 240 err = PTR_ERR(bp);
317 if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { 241 goto fail;
318 hbp_kernel_pos--; 242 }
319 hbp_kernel[hbp_kernel_pos] = bp;
320 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
321 rc = 0;
322 } 243 }
323 244
324 spin_unlock_bh(&hw_breakpoint_lock); 245 return cpu_events;
325 return rc; 246
247fail:
248 for_each_possible_cpu(cpu) {
249 pevent = per_cpu_ptr(cpu_events, cpu);
250 if (IS_ERR(*pevent) || !*pevent)
251 break;
252 unregister_hw_breakpoint(*pevent);
253 }
254 free_percpu(cpu_events);
255 /* return the error if any */
256 return ERR_PTR(err);
326} 257}
327EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
328 258
329/** 259/**
330 * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space 260 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
331 * @bp: the breakpoint structure to unregister 261 * @cpu_events: the per cpu set of events to unregister
332 *
333 * Uninstalls and unregisters @bp.
334 */ 262 */
335void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) 263void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
336{ 264{
337 int i, j; 265 int cpu;
338 266 struct perf_event **pevent;
339 spin_lock_bh(&hw_breakpoint_lock);
340
341 /* Find the 'bp' in our list of breakpoints for kernel */
342 for (i = hbp_kernel_pos; i < HBP_NUM; i++)
343 if (bp == hbp_kernel[i])
344 break;
345 267
346 /* Check if we did not find a match for 'bp'. If so return early */ 268 for_each_possible_cpu(cpu) {
347 if (i == HBP_NUM) { 269 pevent = per_cpu_ptr(cpu_events, cpu);
348 spin_unlock_bh(&hw_breakpoint_lock); 270 unregister_hw_breakpoint(*pevent);
349 return;
350 } 271 }
351 272 free_percpu(cpu_events);
352 /*
353 * We'll shift the breakpoints one-level above to compact if
354 * unregistration creates a hole
355 */
356 for (j = i; j > hbp_kernel_pos; j--)
357 hbp_kernel[j] = hbp_kernel[j-1];
358
359 hbp_kernel[hbp_kernel_pos] = NULL;
360 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
361 hbp_kernel_pos++;
362
363 spin_unlock_bh(&hw_breakpoint_lock);
364} 273}
365EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); 274
366 275
367static struct notifier_block hw_breakpoint_exceptions_nb = { 276static struct notifier_block hw_breakpoint_exceptions_nb = {
368 .notifier_call = hw_breakpoint_exceptions_notify, 277 .notifier_call = hw_breakpoint_exceptions_notify,
@@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void)
374{ 283{
375 return register_die_notifier(&hw_breakpoint_exceptions_nb); 284 return register_die_notifier(&hw_breakpoint_exceptions_nb);
376} 285}
377
378core_initcall(init_hw_breakpoint); 286core_initcall(init_hw_breakpoint);
287
288
289struct pmu perf_ops_bp = {
290 .enable = arch_install_hw_breakpoint,
291 .disable = arch_uninstall_hw_breakpoint,
292 .read = hw_breakpoint_pmu_read,
293 .unthrottle = hw_breakpoint_pmu_unthrottle
294};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5087125e2a00..98dc56b2ebe4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
32 33
33#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
34 35
@@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event)
4229 4230
4230#endif /* CONFIG_EVENT_PROFILE */ 4231#endif /* CONFIG_EVENT_PROFILE */
4231 4232
4233#ifdef CONFIG_HAVE_HW_BREAKPOINT
4234static void bp_perf_event_destroy(struct perf_event *event)
4235{
4236 release_bp_slot(event);
4237}
4238
4239static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4240{
4241 int err;
4242 /*
4243 * The breakpoint is already filled if we haven't created the counter
4244 * through perf syscall
4245 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4246 */
4247 if (!bp->callback)
4248 err = register_perf_hw_breakpoint(bp);
4249 else
4250 err = __register_perf_hw_breakpoint(bp);
4251 if (err)
4252 return ERR_PTR(err);
4253
4254 bp->destroy = bp_perf_event_destroy;
4255
4256 return &perf_ops_bp;
4257}
4258
4259void perf_bp_event(struct perf_event *bp, void *regs)
4260{
4261 /* TODO */
4262}
4263#else
4264static void bp_perf_event_destroy(struct perf_event *event)
4265{
4266}
4267
4268static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4269{
4270 return NULL;
4271}
4272
4273void perf_bp_event(struct perf_event *bp, void *regs)
4274{
4275}
4276#endif
4277
4232atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4278atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4233 4279
4234static void sw_perf_event_destroy(struct perf_event *event) 4280static void sw_perf_event_destroy(struct perf_event *event)
@@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4375 pmu = tp_perf_event_init(event); 4421 pmu = tp_perf_event_init(event);
4376 break; 4422 break;
4377 4423
4424 case PERF_TYPE_BREAKPOINT:
4425 pmu = bp_perf_event_init(event);
4426 break;
4427
4428
4378 default: 4429 default:
4379 break; 4430 break;
4380 } 4431 }
@@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4686 4737
4687 ctx = find_get_context(pid, cpu); 4738 ctx = find_get_context(pid, cpu);
4688 if (IS_ERR(ctx)) 4739 if (IS_ERR(ctx))
4689 return NULL ; 4740 return NULL;
4690 4741
4691 event = perf_event_alloc(attr, cpu, ctx, NULL, 4742 event = perf_event_alloc(attr, cpu, ctx, NULL,
4692 NULL, callback, GFP_KERNEL); 4743 NULL, callback, GFP_KERNEL);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 91c3d0e9a5a1..d72f06ff263f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,14 +11,11 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
17 18
18#ifdef CONFIG_KSYM_TRACER
19#include <asm/hw_breakpoint.h>
20#endif
21
22enum trace_type { 19enum trace_type {
23 __TRACE_FIRST_TYPE = 0, 20 __TRACE_FIRST_TYPE = 0,
24 21
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e19747d4f860..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
372 F_STRUCT( 372 F_STRUCT(
373 __field( unsigned long, ip ) 373 __field( unsigned long, ip )
374 __field( unsigned char, type ) 374 __field( unsigned char, type )
375 __array( char , ksym_name, KSYM_NAME_LEN )
376 __array( char , cmd, TASK_COMM_LEN ) 375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ), 377 ),
378 378
379 F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", 379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type, 380 (void *)__entry->ip, (unsigned int)__entry->type,
381 __entry->ksym_name, __entry->cmd) 381 (void *)__entry->addr, __entry->cmd)
382); 382);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 6d5609c67378..fea83eeeef09 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -29,7 +29,11 @@
29#include "trace_stat.h" 29#include "trace_stat.h"
30#include "trace.h" 30#include "trace.h"
31 31
32/* For now, let us restrict the no. of symbols traced simultaneously to number 32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
33 * of available hardware breakpoint registers. 37 * of available hardware breakpoint registers.
34 */ 38 */
35#define KSYM_TRACER_MAX HBP_NUM 39#define KSYM_TRACER_MAX HBP_NUM
@@ -37,8 +41,10 @@
37#define KSYM_TRACER_OP_LEN 3 /* rw- */ 41#define KSYM_TRACER_OP_LEN 3 /* rw- */
38 42
39struct trace_ksym { 43struct trace_ksym {
40 struct hw_breakpoint *ksym_hbp; 44 struct perf_event **ksym_hbp;
41 unsigned long ksym_addr; 45 unsigned long ksym_addr;
46 int type;
47 int len;
42#ifdef CONFIG_PROFILE_KSYM_TRACER 48#ifdef CONFIG_PROFILE_KSYM_TRACER
43 unsigned long counter; 49 unsigned long counter;
44#endif 50#endif
@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
75} 81}
76#endif /* CONFIG_PROFILE_KSYM_TRACER */ 82#endif /* CONFIG_PROFILE_KSYM_TRACER */
77 83
78void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) 84void ksym_hbp_handler(struct perf_event *hbp, void *data)
79{ 85{
80 struct ring_buffer_event *event; 86 struct ring_buffer_event *event;
81 struct ksym_trace_entry *entry; 87 struct ksym_trace_entry *entry;
88 struct pt_regs *regs = data;
82 struct ring_buffer *buffer; 89 struct ring_buffer *buffer;
83 int pc; 90 int pc;
84 91
@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
96 103
97 entry = ring_buffer_event_data(event); 104 entry = ring_buffer_event_data(event);
98 entry->ip = instruction_pointer(regs); 105 entry->ip = instruction_pointer(regs);
99 entry->type = hbp->info.type; 106 entry->type = hw_breakpoint_type(hbp);
100 strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); 107 entry->addr = hw_breakpoint_addr(hbp);
101 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); 108 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
102 109
103#ifdef CONFIG_PROFILE_KSYM_TRACER 110#ifdef CONFIG_PROFILE_KSYM_TRACER
104 ksym_collect_stats(hbp->info.address); 111 ksym_collect_stats(hw_breakpoint_addr(hbp));
105#endif /* CONFIG_PROFILE_KSYM_TRACER */ 112#endif /* CONFIG_PROFILE_KSYM_TRACER */
106 113
107 trace_buffer_unlock_commit(buffer, event, 0, pc); 114 trace_buffer_unlock_commit(buffer, event, 0, pc);
@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str)
120 int access = 0; 127 int access = 0;
121 128
122 if (str[0] == 'r') 129 if (str[0] == 'r')
123 access += 4; 130 access |= HW_BREAKPOINT_R;
124 else if (str[0] != '-')
125 return -EINVAL;
126 131
127 if (str[1] == 'w') 132 if (str[1] == 'w')
128 access += 2; 133 access |= HW_BREAKPOINT_W;
129 else if (str[1] != '-')
130 return -EINVAL;
131 134
132 if (str[2] != '-') 135 if (str[2] == 'x')
133 return -EINVAL; 136 access |= HW_BREAKPOINT_X;
134 137
135 switch (access) { 138 switch (access) {
136 case 6: 139 case HW_BREAKPOINT_W:
137 access = HW_BREAKPOINT_RW; 140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
138 break; 141 return access;
139 case 4: 142 default:
140 access = -EINVAL; 143 return -EINVAL;
141 break;
142 case 2:
143 access = HW_BREAKPOINT_WRITE;
144 break;
145 } 144 }
146
147 return access;
148} 145}
149 146
150/* 147/*
@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
194 if (!entry) 191 if (!entry)
195 return -ENOMEM; 192 return -ENOMEM;
196 193
197 entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); 194 entry->type = op;
198 if (!entry->ksym_hbp) 195 entry->ksym_addr = addr;
199 goto err; 196 entry->len = HW_BREAKPOINT_LEN_4;
200 197
201 entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); 198 ret = -EAGAIN;
202 if (!entry->ksym_hbp->info.name) 199 entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
203 goto err; 200 entry->len, entry->type,
204 201 ksym_hbp_handler, true);
205 entry->ksym_hbp->info.type = op; 202 if (IS_ERR(entry->ksym_hbp)) {
206 entry->ksym_addr = entry->ksym_hbp->info.address = addr; 203 entry->ksym_hbp = NULL;
207#ifdef CONFIG_X86 204 ret = PTR_ERR(entry->ksym_hbp);
208 entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; 205 }
209#endif
210 entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
211 206
212 ret = register_kernel_hw_breakpoint(entry->ksym_hbp); 207 if (!entry->ksym_hbp) {
213 if (ret < 0) {
214 printk(KERN_INFO "ksym_tracer request failed. Try again" 208 printk(KERN_INFO "ksym_tracer request failed. Try again"
215 " later!!\n"); 209 " later!!\n");
216 ret = -EAGAIN;
217 goto err; 210 goto err;
218 } 211 }
212
219 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); 213 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
220 ksym_filter_entry_count++; 214 ksym_filter_entry_count++;
215
221 return 0; 216 return 0;
217
222err: 218err:
223 if (entry->ksym_hbp)
224 kfree(entry->ksym_hbp->info.name);
225 kfree(entry->ksym_hbp);
226 kfree(entry); 219 kfree(entry);
220
227 return ret; 221 return ret;
228} 222}
229 223
@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
244 mutex_lock(&ksym_tracer_mutex); 238 mutex_lock(&ksym_tracer_mutex);
245 239
246 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 240 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
247 ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); 241 ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
248 if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) 242 if (entry->type == HW_BREAKPOINT_W)
249 ret = trace_seq_puts(s, "-w-\n"); 243 ret = trace_seq_puts(s, "-w-\n");
250 else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) 244 else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
251 ret = trace_seq_puts(s, "rw-\n"); 245 ret = trace_seq_puts(s, "rw-\n");
252 WARN_ON_ONCE(!ret); 246 WARN_ON_ONCE(!ret);
253 } 247 }
@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void)
269 mutex_lock(&ksym_tracer_mutex); 263 mutex_lock(&ksym_tracer_mutex);
270 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, 264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
271 ksym_hlist) { 265 ksym_hlist) {
272 unregister_kernel_hw_breakpoint(entry->ksym_hbp); 266 unregister_wide_hw_breakpoint(entry->ksym_hbp);
273 ksym_filter_entry_count--; 267 ksym_filter_entry_count--;
274 hlist_del_rcu(&(entry->ksym_hlist)); 268 hlist_del_rcu(&(entry->ksym_hlist));
275 synchronize_rcu(); 269 synchronize_rcu();
276 kfree(entry->ksym_hbp->info.name);
277 kfree(entry->ksym_hbp);
278 kfree(entry); 270 kfree(entry);
279 } 271 }
280 mutex_unlock(&ksym_tracer_mutex); 272 mutex_unlock(&ksym_tracer_mutex);
@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
327 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { 319 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
328 if (entry->ksym_addr == ksym_addr) { 320 if (entry->ksym_addr == ksym_addr) {
329 /* Check for malformed request: (6) */ 321 /* Check for malformed request: (6) */
330 if (entry->ksym_hbp->info.type != op) 322 if (entry->type != op)
331 changed = 1; 323 changed = 1;
332 else 324 else
333 goto out; 325 goto out;
@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file,
335 } 327 }
336 } 328 }
337 if (changed) { 329 if (changed) {
338 unregister_kernel_hw_breakpoint(entry->ksym_hbp); 330 unregister_wide_hw_breakpoint(entry->ksym_hbp);
339 entry->ksym_hbp->info.type = op; 331 entry->type = op;
340 if (op > 0) { 332 if (op > 0) {
341 ret = register_kernel_hw_breakpoint(entry->ksym_hbp); 333 entry->ksym_hbp =
342 if (ret == 0) 334 register_wide_hw_breakpoint(entry->ksym_addr,
335 entry->len, entry->type,
336 ksym_hbp_handler, true);
337 if (IS_ERR(entry->ksym_hbp))
338 entry->ksym_hbp = NULL;
339 if (!entry->ksym_hbp)
343 goto out; 340 goto out;
344 } 341 }
345 ksym_filter_entry_count--; 342 ksym_filter_entry_count--;
346 hlist_del_rcu(&(entry->ksym_hlist)); 343 hlist_del_rcu(&(entry->ksym_hlist));
347 synchronize_rcu(); 344 synchronize_rcu();
348 kfree(entry->ksym_hbp->info.name);
349 kfree(entry->ksym_hbp);
350 kfree(entry); 345 kfree(entry);
351 ret = 0; 346 ret = 0;
352 goto out; 347 goto out;
@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
413 408
414 trace_assign_type(field, entry); 409 trace_assign_type(field, entry);
415 410
416 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, 411 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
417 entry->pid, iter->cpu, field->ksym_name); 412 entry->pid, iter->cpu, (char *)field->addr);
418 if (!ret) 413 if (!ret)
419 return TRACE_TYPE_PARTIAL_LINE; 414 return TRACE_TYPE_PARTIAL_LINE;
420 415
421 switch (field->type) { 416 switch (field->type) {
422 case HW_BREAKPOINT_WRITE: 417 case HW_BREAKPOINT_W:
423 ret = trace_seq_printf(s, " W "); 418 ret = trace_seq_printf(s, " W ");
424 break; 419 break;
425 case HW_BREAKPOINT_RW: 420 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
426 ret = trace_seq_printf(s, " RW "); 421 ret = trace_seq_printf(s, " RW ");
427 break; 422 break;
428 default: 423 default:
@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
490 485
491 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); 486 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
492 487
493 if (entry->ksym_hbp) 488 access_type = entry->type;
494 access_type = entry->ksym_hbp->info.type;
495 489
496 switch (access_type) { 490 switch (access_type) {
497 case HW_BREAKPOINT_WRITE: 491 case HW_BREAKPOINT_W:
498 seq_puts(m, " W "); 492 seq_puts(m, " W ");
499 break; 493 break;
500 case HW_BREAKPOINT_RW: 494 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
501 seq_puts(m, " RW "); 495 seq_puts(m, " RW ");
502 break; 496 break;
503 default: 497 default:
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 7179c12e4f0f..27c5072c2e6b 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
828 828
829 ksym_selftest_dummy = 0; 829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */ 830 /* Register the read-write tracing request */
831 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, 831 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY,
832 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
832 (unsigned long)(&ksym_selftest_dummy)); 833 (unsigned long)(&ksym_selftest_dummy));
833 834
834 if (ret < 0) { 835 if (ret < 0) {