aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-11-21 08:07:23 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-21 08:07:23 -0500
commit96200591a34f8ecb98481c626125df43a2463b55 (patch)
tree314c376b01f254d04f9aaf449b1f9147ad177fa6
parent7031281e02bf951a2259849217193fb9d75a9762 (diff)
parent68efa37df779b3e04280598e8b5b3a1919b65fee (diff)
Merge branch 'tracing/hw-breakpoints' into perf/core
Conflicts: arch/x86/kernel/kprobes.c kernel/trace/Makefile Merge reason: hw-breakpoints perf integration is looking good in testing and in reviews, plus conflicts are mounting up - so merge & resolve. Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/a.out-core.h10
-rw-r--r--arch/x86/include/asm/debugreg.h33
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h73
-rw-r--r--arch/x86/include/asm/processor.h14
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c545
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/ptrace.c293
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c26
-rw-r--r--include/linux/hw_breakpoint.h137
-rw-r--r--include/linux/perf_event.h37
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hw_breakpoint.c494
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/perf_event.c136
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_ksym.c554
-rw-r--r--kernel/trace/trace_selftest.c55
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile3
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c88
39 files changed, 2512 insertions, 217 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bbc261a..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
126config HAVE_DEFAULT_NO_SPIN_MUTEXES 126config HAVE_DEFAULT_NO_SPIN_MUTEXES
127 bool 127 bool
128 128
129config HAVE_HW_BREAKPOINT
130 bool
131 depends on HAVE_PERF_EVENTS
132 select ANON_INODES
133 select PERF_EVENTS
134
135
129source "kernel/gcov/Kconfig" 136source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72ace9515a07..178084b4377c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
49 select HAVE_KERNEL_GZIP 49 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 50 select HAVE_KERNEL_BZIP2
51 select HAVE_KERNEL_LZMA 51 select HAVE_KERNEL_LZMA
52 select HAVE_HW_BREAKPOINT
52 select HAVE_ARCH_KMEMCHECK 53 select HAVE_ARCH_KMEMCHECK
53 54
54config OUTPUT_FORMAT 55config OUTPUT_FORMAT
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
10header-y += sigcontext32.h 10header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h
13 14
14unifdef-y += e820.h 15unifdef-y += e820.h
15unifdef-y += ist.h 16unifdef-y += ist.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..7a15588e45d4 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/user.h> 18#include <linux/user.h>
19#include <linux/elfcore.h> 19#include <linux/elfcore.h>
20#include <asm/debugreg.h>
20 21
21/* 22/*
22 * fill in the user structure for an a.out core dump 23 * fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
32 >> PAGE_SHIFT; 33 >> PAGE_SHIFT;
33 dump->u_dsize -= dump->u_tsize; 34 dump->u_dsize -= dump->u_tsize;
34 dump->u_ssize = 0; 35 dump->u_ssize = 0;
35 dump->u_debugreg[0] = current->thread.debugreg0; 36 aout_dump_debugregs(dump);
36 dump->u_debugreg[1] = current->thread.debugreg1;
37 dump->u_debugreg[2] = current->thread.debugreg2;
38 dump->u_debugreg[3] = current->thread.debugreg3;
39 dump->u_debugreg[4] = 0;
40 dump->u_debugreg[5] = 0;
41 dump->u_debugreg[6] = current->thread.debugreg6;
42 dump->u_debugreg[7] = current->thread.debugreg7;
43 37
44 if (dump->start_stack < TASK_SIZE) 38 if (dump->start_stack < TASK_SIZE)
45 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) 39 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..fdabd8435765 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
18#define DR_TRAP1 (0x2) /* db1 */ 18#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 19#define DR_TRAP2 (0x4) /* db2 */
20#define DR_TRAP3 (0x8) /* db3 */ 20#define DR_TRAP3 (0x8) /* db3 */
21#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
21 22
22#define DR_STEP (0x4000) /* single-step */ 23#define DR_STEP (0x4000) /* single-step */
23#define DR_SWITCH (0x8000) /* task switch */ 24#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
49 50
50#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ 51#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
51#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ 52#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
53#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
54#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
52#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ 55#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
53 56
54#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ 57#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,34 @@
67#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ 70#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
68#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ 71#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
69 72
73/*
74 * HW breakpoint additions
75 */
76#ifdef __KERNEL__
77
78DECLARE_PER_CPU(unsigned long, dr7);
79
80static inline void hw_breakpoint_disable(void)
81{
82 /* Zero the control register for HW Breakpoint */
83 set_debugreg(0UL, 7);
84
85 /* Zero-out the individual HW breakpoint address registers */
86 set_debugreg(0UL, 0);
87 set_debugreg(0UL, 1);
88 set_debugreg(0UL, 2);
89 set_debugreg(0UL, 3);
90}
91
92static inline int hw_breakpoint_active(void)
93{
94 return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
95}
96
97extern void aout_dump_debugregs(struct user *dump);
98
99extern void hw_breakpoint_restore(void);
100
101#endif /* __KERNEL__ */
102
70#endif /* _ASM_X86_DEBUGREG_H */ 103#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..0675a7c4c20e
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H
3
4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H
6
7/*
8 * The name should probably be something dealt in
9 * a higher level. While dealing with the user
10 * (display/resolving)
11 */
12struct arch_hw_breakpoint {
13 char *name; /* Contains name of the symbol to set bkpt */
14 unsigned long address;
15 u8 len;
16 u8 type;
17};
18
19#include <linux/kdebug.h>
20#include <linux/percpu.h>
21#include <linux/list.h>
22
23/* Available HW breakpoint length encodings */
24#define X86_BREAKPOINT_LEN_1 0x40
25#define X86_BREAKPOINT_LEN_2 0x44
26#define X86_BREAKPOINT_LEN_4 0x4c
27#define X86_BREAKPOINT_LEN_EXECUTE 0x40
28
29#ifdef CONFIG_X86_64
30#define X86_BREAKPOINT_LEN_8 0x48
31#endif
32
33/* Available HW breakpoint type encodings */
34
35/* trigger on instruction execute */
36#define X86_BREAKPOINT_EXECUTE 0x80
37/* trigger on memory write */
38#define X86_BREAKPOINT_WRITE 0x81
39/* trigger on memory read or write */
40#define X86_BREAKPOINT_RW 0x83
41
42/* Total number of available HW breakpoint registers */
43#define HBP_NUM 4
44
45struct perf_event;
46struct pmu;
47
48extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
49extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
50 struct task_struct *tsk);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data);
53
54
55int arch_install_hw_breakpoint(struct perf_event *bp);
56void arch_uninstall_hw_breakpoint(struct perf_event *bp);
57void hw_breakpoint_pmu_read(struct perf_event *bp);
58void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
59
60extern void
61arch_fill_perf_breakpoint(struct perf_event *bp);
62
63unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
64int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
65
66extern int arch_bp_generic_fields(int x86_len, int x86_type,
67 int *gen_len, int *gen_type);
68
69extern struct pmu perf_ops_bp;
70
71#endif /* __KERNEL__ */
72#endif /* _I386_HW_BREAKPOINT_H */
73
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c9786480f0fe..6f8ec1c37e0a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@ struct mm_struct;
30#include <linux/math64.h> 30#include <linux/math64.h>
31#include <linux/init.h> 31#include <linux/init.h>
32 32
33#define HBP_NUM 4
33/* 34/*
34 * Default implementation of macro that returns current 35 * Default implementation of macro that returns current
35 * instruction pointer ("program counter"). 36 * instruction pointer ("program counter").
@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
422extern void free_thread_xstate(struct task_struct *); 423extern void free_thread_xstate(struct task_struct *);
423extern struct kmem_cache *task_xstate_cachep; 424extern struct kmem_cache *task_xstate_cachep;
424 425
426struct perf_event;
427
425struct thread_struct { 428struct thread_struct {
426 /* Cached TLS descriptors: */ 429 /* Cached TLS descriptors: */
427 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 430 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@ struct thread_struct {
443 unsigned long fs; 446 unsigned long fs;
444#endif 447#endif
445 unsigned long gs; 448 unsigned long gs;
446 /* Hardware debugging registers: */ 449 /* Save middle states of ptrace breakpoints */
447 unsigned long debugreg0; 450 struct perf_event *ptrace_bps[HBP_NUM];
448 unsigned long debugreg1; 451 /* Debug status used for traps, single steps, etc... */
449 unsigned long debugreg2; 452 unsigned long debugreg6;
450 unsigned long debugreg3;
451 unsigned long debugreg6;
452 unsigned long debugreg7;
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0cdd678..4f2e66e29ecc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
41obj-y += bootflag.o e820.o 41obj-y += bootflag.o e820.o
42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
43obj-y += alternative.o i8253.o pci-nommu.o 43obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
44obj-y += tsc.o io_delay.o rtc.o 44obj-y += tsc.o io_delay.o rtc.o
45 45
46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..752daebe91c6
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,545 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation
18 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
19 */
20
21/*
22 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
23 * using the CPU's debug registers.
24 */
25
26#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h>
28#include <linux/irqflags.h>
29#include <linux/notifier.h>
30#include <linux/kallsyms.h>
31#include <linux/kprobes.h>
32#include <linux/percpu.h>
33#include <linux/kdebug.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/sched.h>
37#include <linux/init.h>
38#include <linux/smp.h>
39
40#include <asm/hw_breakpoint.h>
41#include <asm/processor.h>
42#include <asm/debugreg.h>
43
44/* Per cpu debug control register value */
45DEFINE_PER_CPU(unsigned long, dr7);
46EXPORT_PER_CPU_SYMBOL(dr7);
47
48/* Per cpu debug address registers values */
49static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
50
51/*
52 * Stores the breakpoints currently in use on each breakpoint address
53 * register for each cpus
54 */
55static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
56
57
58/*
59 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
60 * as stored in debug register 7.
61 */
62unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
63{
64 unsigned long bp_info;
65
66 bp_info = (len | type) & 0xf;
67 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
68 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
69 DR_GLOBAL_SLOWDOWN;
70 return bp_info;
71}
72
73/*
74 * Decode the length and type bits for a particular breakpoint as
75 * stored in debug register 7. Return the "enabled" status.
76 */
77int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
78{
79 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
80
81 *len = (bp_info & 0xc) | 0x40;
82 *type = (bp_info & 0x3) | 0x80;
83
84 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
85}
86
87/*
88 * Install a perf counter breakpoint.
89 *
90 * We seek a free debug address register and use it for this
91 * breakpoint. Eventually we enable it in the debug control register.
92 *
93 * Atomic: we hold the counter->ctx->lock and we only handle variables
94 * and registers local to this cpu.
95 */
96int arch_install_hw_breakpoint(struct perf_event *bp)
97{
98 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
99 unsigned long *dr7;
100 int i;
101
102 for (i = 0; i < HBP_NUM; i++) {
103 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
104
105 if (!*slot) {
106 *slot = bp;
107 break;
108 }
109 }
110
111 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
112 return -EBUSY;
113
114 set_debugreg(info->address, i);
115 __get_cpu_var(cpu_debugreg[i]) = info->address;
116
117 dr7 = &__get_cpu_var(dr7);
118 *dr7 |= encode_dr7(i, info->len, info->type);
119
120 set_debugreg(*dr7, 7);
121
122 return 0;
123}
124
125/*
126 * Uninstall the breakpoint contained in the given counter.
127 *
128 * First we search the debug address register it uses and then we disable
129 * it.
130 *
131 * Atomic: we hold the counter->ctx->lock and we only handle variables
132 * and registers local to this cpu.
133 */
134void arch_uninstall_hw_breakpoint(struct perf_event *bp)
135{
136 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
137 unsigned long *dr7;
138 int i;
139
140 for (i = 0; i < HBP_NUM; i++) {
141 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
142
143 if (*slot == bp) {
144 *slot = NULL;
145 break;
146 }
147 }
148
149 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
150 return;
151
152 dr7 = &__get_cpu_var(dr7);
153 *dr7 &= ~encode_dr7(i, info->len, info->type);
154
155 set_debugreg(*dr7, 7);
156}
157
158static int get_hbp_len(u8 hbp_len)
159{
160 unsigned int len_in_bytes = 0;
161
162 switch (hbp_len) {
163 case X86_BREAKPOINT_LEN_1:
164 len_in_bytes = 1;
165 break;
166 case X86_BREAKPOINT_LEN_2:
167 len_in_bytes = 2;
168 break;
169 case X86_BREAKPOINT_LEN_4:
170 len_in_bytes = 4;
171 break;
172#ifdef CONFIG_X86_64
173 case X86_BREAKPOINT_LEN_8:
174 len_in_bytes = 8;
175 break;
176#endif
177 }
178 return len_in_bytes;
179}
180
181/*
182 * Check for virtual address in user space.
183 */
184int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
185{
186 unsigned int len;
187
188 len = get_hbp_len(hbp_len);
189
190 return (va <= TASK_SIZE - len);
191}
192
193/*
194 * Check for virtual address in kernel space.
195 */
196static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
197{
198 unsigned int len;
199
200 len = get_hbp_len(hbp_len);
201
202 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
203}
204
205/*
206 * Store a breakpoint's encoded address, length, and type.
207 */
208static int arch_store_info(struct perf_event *bp)
209{
210 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
211 /*
212 * For kernel-addresses, either the address or symbol name can be
213 * specified.
214 */
215 if (info->name)
216 info->address = (unsigned long)
217 kallsyms_lookup_name(info->name);
218 if (info->address)
219 return 0;
220
221 return -EINVAL;
222}
223
224int arch_bp_generic_fields(int x86_len, int x86_type,
225 int *gen_len, int *gen_type)
226{
227 /* Len */
228 switch (x86_len) {
229 case X86_BREAKPOINT_LEN_1:
230 *gen_len = HW_BREAKPOINT_LEN_1;
231 break;
232 case X86_BREAKPOINT_LEN_2:
233 *gen_len = HW_BREAKPOINT_LEN_2;
234 break;
235 case X86_BREAKPOINT_LEN_4:
236 *gen_len = HW_BREAKPOINT_LEN_4;
237 break;
238#ifdef CONFIG_X86_64
239 case X86_BREAKPOINT_LEN_8:
240 *gen_len = HW_BREAKPOINT_LEN_8;
241 break;
242#endif
243 default:
244 return -EINVAL;
245 }
246
247 /* Type */
248 switch (x86_type) {
249 case X86_BREAKPOINT_EXECUTE:
250 *gen_type = HW_BREAKPOINT_X;
251 break;
252 case X86_BREAKPOINT_WRITE:
253 *gen_type = HW_BREAKPOINT_W;
254 break;
255 case X86_BREAKPOINT_RW:
256 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
257 break;
258 default:
259 return -EINVAL;
260 }
261
262 return 0;
263}
264
265
266static int arch_build_bp_info(struct perf_event *bp)
267{
268 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
269
270 info->address = bp->attr.bp_addr;
271
272 /* Len */
273 switch (bp->attr.bp_len) {
274 case HW_BREAKPOINT_LEN_1:
275 info->len = X86_BREAKPOINT_LEN_1;
276 break;
277 case HW_BREAKPOINT_LEN_2:
278 info->len = X86_BREAKPOINT_LEN_2;
279 break;
280 case HW_BREAKPOINT_LEN_4:
281 info->len = X86_BREAKPOINT_LEN_4;
282 break;
283#ifdef CONFIG_X86_64
284 case HW_BREAKPOINT_LEN_8:
285 info->len = X86_BREAKPOINT_LEN_8;
286 break;
287#endif
288 default:
289 return -EINVAL;
290 }
291
292 /* Type */
293 switch (bp->attr.bp_type) {
294 case HW_BREAKPOINT_W:
295 info->type = X86_BREAKPOINT_WRITE;
296 break;
297 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
298 info->type = X86_BREAKPOINT_RW;
299 break;
300 case HW_BREAKPOINT_X:
301 info->type = X86_BREAKPOINT_EXECUTE;
302 break;
303 default:
304 return -EINVAL;
305 }
306
307 return 0;
308}
309/*
310 * Validate the arch-specific HW Breakpoint register settings
311 */
312int arch_validate_hwbkpt_settings(struct perf_event *bp,
313 struct task_struct *tsk)
314{
315 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
316 unsigned int align;
317 int ret;
318
319
320 ret = arch_build_bp_info(bp);
321 if (ret)
322 return ret;
323
324 ret = -EINVAL;
325
326 if (info->type == X86_BREAKPOINT_EXECUTE)
327 /*
328 * Ptrace-refactoring code
329 * For now, we'll allow instruction breakpoint only for user-space
330 * addresses
331 */
332 if ((!arch_check_va_in_userspace(info->address, info->len)) &&
333 info->len != X86_BREAKPOINT_EXECUTE)
334 return ret;
335
336 switch (info->len) {
337 case X86_BREAKPOINT_LEN_1:
338 align = 0;
339 break;
340 case X86_BREAKPOINT_LEN_2:
341 align = 1;
342 break;
343 case X86_BREAKPOINT_LEN_4:
344 align = 3;
345 break;
346#ifdef CONFIG_X86_64
347 case X86_BREAKPOINT_LEN_8:
348 align = 7;
349 break;
350#endif
351 default:
352 return ret;
353 }
354
355 if (bp->callback)
356 ret = arch_store_info(bp);
357
358 if (ret < 0)
359 return ret;
360 /*
361 * Check that the low-order bits of the address are appropriate
362 * for the alignment implied by len.
363 */
364 if (info->address & align)
365 return -EINVAL;
366
367 /* Check that the virtual address is in the proper range */
368 if (tsk) {
369 if (!arch_check_va_in_userspace(info->address, info->len))
370 return -EFAULT;
371 } else {
372 if (!arch_check_va_in_kernelspace(info->address, info->len))
373 return -EFAULT;
374 }
375
376 return 0;
377}
378
379/*
380 * Dump the debug register contents to the user.
381 * We can't dump our per cpu values because it
382 * may contain cpu wide breakpoint, something that
383 * doesn't belong to the current task.
384 *
385 * TODO: include non-ptrace user breakpoints (perf)
386 */
387void aout_dump_debugregs(struct user *dump)
388{
389 int i;
390 int dr7 = 0;
391 struct perf_event *bp;
392 struct arch_hw_breakpoint *info;
393 struct thread_struct *thread = &current->thread;
394
395 for (i = 0; i < HBP_NUM; i++) {
396 bp = thread->ptrace_bps[i];
397
398 if (bp && !bp->attr.disabled) {
399 dump->u_debugreg[i] = bp->attr.bp_addr;
400 info = counter_arch_bp(bp);
401 dr7 |= encode_dr7(i, info->len, info->type);
402 } else {
403 dump->u_debugreg[i] = 0;
404 }
405 }
406
407 dump->u_debugreg[4] = 0;
408 dump->u_debugreg[5] = 0;
409 dump->u_debugreg[6] = current->thread.debugreg6;
410
411 dump->u_debugreg[7] = dr7;
412}
413EXPORT_SYMBOL_GPL(aout_dump_debugregs);
414
415/*
416 * Release the user breakpoints used by ptrace
417 */
418void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
419{
420 int i;
421 struct thread_struct *t = &tsk->thread;
422
423 for (i = 0; i < HBP_NUM; i++) {
424 unregister_hw_breakpoint(t->ptrace_bps[i]);
425 t->ptrace_bps[i] = NULL;
426 }
427}
428
429void hw_breakpoint_restore(void)
430{
431 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
432 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
433 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
434 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
435 set_debugreg(current->thread.debugreg6, 6);
436 set_debugreg(__get_cpu_var(dr7), 7);
437}
438EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
439
440/*
441 * Handle debug exception notifications.
442 *
443 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
444 *
445 * NOTIFY_DONE returned if one of the following conditions is true.
446 * i) When the causative address is from user-space and the exception
447 * is a valid one, i.e. not triggered as a result of lazy debug register
448 * switching
449 * ii) When there are more bits than trap<n> set in DR6 register (such
450 * as BD, BS or BT) indicating that more than one debug condition is
451 * met and requires some more action in do_debug().
452 *
453 * NOTIFY_STOP returned for all other cases
454 *
455 */
456static int __kprobes hw_breakpoint_handler(struct die_args *args)
457{
458 int i, cpu, rc = NOTIFY_STOP;
459 struct perf_event *bp;
460 unsigned long dr7, dr6;
461 unsigned long *dr6_p;
462
463 /* The DR6 value is pointed by args->err */
464 dr6_p = (unsigned long *)ERR_PTR(args->err);
465 dr6 = *dr6_p;
466
467 /* Do an early return if no trap bits are set in DR6 */
468 if ((dr6 & DR_TRAP_BITS) == 0)
469 return NOTIFY_DONE;
470
471 get_debugreg(dr7, 7);
472 /* Disable breakpoints during exception handling */
473 set_debugreg(0UL, 7);
474 /*
475 * Assert that local interrupts are disabled
476 * Reset the DRn bits in the virtualized register value.
477 * The ptrace trigger routine will add in whatever is needed.
478 */
479 current->thread.debugreg6 &= ~DR_TRAP_BITS;
480 cpu = get_cpu();
481
482 /* Handle all the breakpoints that were triggered */
483 for (i = 0; i < HBP_NUM; ++i) {
484 if (likely(!(dr6 & (DR_TRAP0 << i))))
485 continue;
486
487 /*
488 * The counter may be concurrently released but that can only
489 * occur from a call_rcu() path. We can then safely fetch
490 * the breakpoint, use its callback, touch its counter
491 * while we are in an rcu_read_lock() path.
492 */
493 rcu_read_lock();
494
495 bp = per_cpu(bp_per_reg[i], cpu);
496 if (bp)
497 rc = NOTIFY_DONE;
498 /*
499 * Reset the 'i'th TRAP bit in dr6 to denote completion of
500 * exception handling
501 */
502 (*dr6_p) &= ~(DR_TRAP0 << i);
503 /*
504 * bp can be NULL due to lazy debug register switching
505 * or due to concurrent perf counter removing.
506 */
507 if (!bp) {
508 rcu_read_unlock();
509 break;
510 }
511
512 (bp->callback)(bp, args->regs);
513
514 rcu_read_unlock();
515 }
516 if (dr6 & (~DR_TRAP_BITS))
517 rc = NOTIFY_DONE;
518
519 set_debugreg(dr7, 7);
520 put_cpu();
521
522 return rc;
523}
524
525/*
526 * Handle debug exception notifications.
527 */
528int __kprobes hw_breakpoint_exceptions_notify(
529 struct notifier_block *unused, unsigned long val, void *data)
530{
531 if (val != DIE_DEBUG)
532 return NOTIFY_DONE;
533
534 return hw_breakpoint_handler(data);
535}
536
537void hw_breakpoint_pmu_read(struct perf_event *bp)
538{
539 /* TODO */
540}
541
542void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
543{
544 /* TODO */
545}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..34e86b67550c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45 45
46#include <asm/debugreg.h>
46#include <asm/apicdef.h> 47#include <asm/apicdef.h>
47#include <asm/system.h> 48#include <asm/system.h>
48 49
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
434 "resuming...\n"); 435 "resuming...\n");
435 kgdb_arch_handle_exception(args->trapnr, args->signr, 436 kgdb_arch_handle_exception(args->trapnr, args->signr,
436 args->err, "c", "", regs); 437 args->err, "c", "", regs);
438 /*
439 * Reset the BS bit in dr6 (pointed by args->err) to
440 * denote completion of processing
441 */
442 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
437 443
438 return NOTIFY_STOP; 444 return NOTIFY_STOP;
439} 445}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c5f1f117e0c0..3fe86d706a14 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -56,6 +56,7 @@
56#include <asm/uaccess.h> 56#include <asm/uaccess.h>
57#include <asm/alternative.h> 57#include <asm/alternative.h>
58#include <asm/insn.h> 58#include <asm/insn.h>
59#include <asm/debugreg.h>
59 60
60void jprobe_return_end(void); 61void jprobe_return_end(void);
61 62
@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
945 ret = NOTIFY_STOP; 946 ret = NOTIFY_STOP;
946 break; 947 break;
947 case DIE_DEBUG: 948 case DIE_DEBUG:
948 if (post_kprobe_handler(args->regs)) 949 if (post_kprobe_handler(args->regs)) {
950 /*
951 * Reset the BS bit in dr6 (pointed by args->err) to
952 * denote completion of processing
953 */
954 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
949 ret = NOTIFY_STOP; 955 ret = NOTIFY_STOP;
956 }
950 break; 957 break;
951 case DIE_GPF: 958 case DIE_GPF:
952 /* 959 /*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h> 26#include <asm/system.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debugreg.h>
28 29
29static void set_idt(void *newidt, __u16 limit) 30static void set_idt(void *newidt, __u16 limit)
30{ 31{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
202 203
203 /* Interrupts aren't acceptable while we reboot */ 204 /* Interrupts aren't acceptable while we reboot */
204 local_irq_disable(); 205 local_irq_disable();
206 hw_breakpoint_disable();
205 207
206 if (image->preserve_context) { 208 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 209#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/debugreg.h>
21 22
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd, 23static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr) 24 unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
282 283
283 /* Interrupts aren't acceptable while we reboot */ 284 /* Interrupts aren't acceptable while we reboot */
284 local_irq_disable(); 285 local_irq_disable();
286 hw_breakpoint_disable();
285 287
286 if (image->preserve_context) { 288 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC 289#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b5776..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/events/power.h> 12#include <trace/events/power.h>
13#include <linux/hw_breakpoint.h>
13#include <asm/system.h> 14#include <asm/system.h>
14#include <asm/apic.h> 15#include <asm/apic.h>
15#include <asm/syscalls.h> 16#include <asm/syscalls.h>
@@ -17,6 +18,7 @@
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include <asm/i387.h> 19#include <asm/i387.h>
19#include <asm/ds.h> 20#include <asm/ds.h>
21#include <asm/debugreg.h>
20 22
21unsigned long idle_halt; 23unsigned long idle_halt;
22EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@ void flush_thread(void)
103 } 105 }
104#endif 106#endif
105 107
106 clear_tsk_thread_flag(tsk, TIF_DEBUG); 108 flush_ptrace_hw_breakpoint(tsk);
107
108 tsk->thread.debugreg0 = 0;
109 tsk->thread.debugreg1 = 0;
110 tsk->thread.debugreg2 = 0;
111 tsk->thread.debugreg3 = 0;
112 tsk->thread.debugreg6 = 0;
113 tsk->thread.debugreg7 = 0;
114 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 109 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
115 /* 110 /*
116 * Forget coprocessor state.. 111 * Forget coprocessor state..
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
192 else if (next->debugctlmsr != prev->debugctlmsr) 187 else if (next->debugctlmsr != prev->debugctlmsr)
193 update_debugctlmsr(next->debugctlmsr); 188 update_debugctlmsr(next->debugctlmsr);
194 189
195 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
196 set_debugreg(next->debugreg0, 0);
197 set_debugreg(next->debugreg1, 1);
198 set_debugreg(next->debugreg2, 2);
199 set_debugreg(next->debugreg3, 3);
200 /* no 4 and 5 */
201 set_debugreg(next->debugreg6, 6);
202 set_debugreg(next->debugreg7, 7);
203 }
204
205 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 190 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
206 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 191 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
207 /* prev and next are different */ 192 /* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf79567cdab..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,7 @@
58#include <asm/idle.h> 58#include <asm/idle.h>
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h>
61 62
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 64
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
259 260
260 task_user_gs(p) = get_user_gs(regs); 261 task_user_gs(p) = get_user_gs(regs);
261 262
263 p->thread.io_bitmap_ptr = NULL;
262 tsk = current; 264 tsk = current;
265 err = -ENOMEM;
266
267 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
268
263 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 269 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
264 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 270 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
265 IO_BITMAP_BYTES, GFP_KERNEL); 271 IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eb62cbcaa490..70cf15873f3d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,7 @@
52#include <asm/idle.h> 52#include <asm/idle.h>
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h>
55 56
56asmlinkage extern void ret_from_fork(void); 57asmlinkage extern void ret_from_fork(void);
57 58
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
297 298
298 p->thread.fs = me->thread.fs; 299 p->thread.fs = me->thread.fs;
299 p->thread.gs = me->thread.gs; 300 p->thread.gs = me->thread.gs;
301 p->thread.io_bitmap_ptr = NULL;
300 302
301 savesegment(gs, p->thread.gsindex); 303 savesegment(gs, p->thread.gsindex);
302 savesegment(fs, p->thread.fsindex); 304 savesegment(fs, p->thread.fsindex);
303 savesegment(es, p->thread.es); 305 savesegment(es, p->thread.es);
304 savesegment(ds, p->thread.ds); 306 savesegment(ds, p->thread.ds);
305 307
308 err = -ENOMEM;
309 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
310
306 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 311 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
307 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 312 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
308 if (!p->thread.io_bitmap_ptr) { 313 if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +346,7 @@ out:
341 kfree(p->thread.io_bitmap_ptr); 346 kfree(p->thread.io_bitmap_ptr);
342 p->thread.io_bitmap_max = 0; 347 p->thread.io_bitmap_max = 0;
343 } 348 }
349
344 return err; 350 return err;
345} 351}
346 352
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
495 */ 501 */
496 if (preload_fpu) 502 if (preload_fpu)
497 __math_state_restore(); 503 __math_state_restore();
504
498 return prev_p; 505 return prev_p;
499} 506}
500 507
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4f76d275ee4..b25f8947ed7a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
34#include <asm/prctl.h> 36#include <asm/prctl.h>
35#include <asm/proto.h> 37#include <asm/proto.h>
36#include <asm/ds.h> 38#include <asm/ds.h>
39#include <asm/hw_breakpoint.h>
37 40
38#include "tls.h" 41#include "tls.h"
39 42
@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
249 return 0; 252 return 0;
250} 253}
251 254
252static unsigned long debugreg_addr_limit(struct task_struct *task)
253{
254 return TASK_SIZE - 3;
255}
256
257#else /* CONFIG_X86_64 */ 255#else /* CONFIG_X86_64 */
258 256
259#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 257#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
378 return 0; 376 return 0;
379} 377}
380 378
381static unsigned long debugreg_addr_limit(struct task_struct *task)
382{
383#ifdef CONFIG_IA32_EMULATION
384 if (test_tsk_thread_flag(task, TIF_IA32))
385 return IA32_PAGE_OFFSET - 3;
386#endif
387 return TASK_SIZE_MAX - 7;
388}
389
390#endif /* CONFIG_X86_32 */ 379#endif /* CONFIG_X86_32 */
391 380
392static unsigned long get_flags(struct task_struct *task) 381static unsigned long get_flags(struct task_struct *task)
@@ -566,99 +555,229 @@ static int genregs_set(struct task_struct *target,
566 return ret; 555 return ret;
567} 556}
568 557
558static void ptrace_triggered(struct perf_event *bp, void *data)
559{
560 int i;
561 struct thread_struct *thread = &(current->thread);
562
563 /*
564 * Store in the virtual DR6 register the fact that the breakpoint
565 * was hit so the thread's debugger will see it.
566 */
567 for (i = 0; i < HBP_NUM; i++) {
568 if (thread->ptrace_bps[i] == bp)
569 break;
570 }
571
572 thread->debugreg6 |= (DR_TRAP0 << i);
573}
574
569/* 575/*
570 * This function is trivial and will be inlined by the compiler. 576 * Walk through every ptrace breakpoints for this thread and
571 * Having it separates the implementation details of debug 577 * build the dr7 value on top of their attributes.
572 * registers from the interface details of ptrace. 578 *
573 */ 579 */
574static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 580static unsigned long ptrace_get_dr7(struct perf_event *bp[])
575{ 581{
576 switch (n) { 582 int i;
577 case 0: return child->thread.debugreg0; 583 int dr7 = 0;
578 case 1: return child->thread.debugreg1; 584 struct arch_hw_breakpoint *info;
579 case 2: return child->thread.debugreg2; 585
580 case 3: return child->thread.debugreg3; 586 for (i = 0; i < HBP_NUM; i++) {
581 case 6: return child->thread.debugreg6; 587 if (bp[i] && !bp[i]->attr.disabled) {
582 case 7: return child->thread.debugreg7; 588 info = counter_arch_bp(bp[i]);
589 dr7 |= encode_dr7(i, info->len, info->type);
590 }
583 } 591 }
584 return 0; 592
593 return dr7;
585} 594}
586 595
587static int ptrace_set_debugreg(struct task_struct *child, 596/*
588 int n, unsigned long data) 597 * Handle ptrace writes to debug register 7.
598 */
599static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
589{ 600{
590 int i; 601 struct thread_struct *thread = &(tsk->thread);
602 unsigned long old_dr7;
603 int i, orig_ret = 0, rc = 0;
604 int enabled, second_pass = 0;
605 unsigned len, type;
606 int gen_len, gen_type;
607 struct perf_event *bp;
608
609 data &= ~DR_CONTROL_RESERVED;
610 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
611restore:
612 /*
613 * Loop through all the hardware breakpoints, making the
614 * appropriate changes to each.
615 */
616 for (i = 0; i < HBP_NUM; i++) {
617 enabled = decode_dr7(data, i, &len, &type);
618 bp = thread->ptrace_bps[i];
619
620 if (!enabled) {
621 if (bp) {
622 /*
623 * Don't unregister the breakpoints right-away,
624 * unless all register_user_hw_breakpoint()
625 * requests have succeeded. This prevents
626 * any window of opportunity for debug
627 * register grabbing by other users.
628 */
629 if (!second_pass)
630 continue;
631 thread->ptrace_bps[i] = NULL;
632 unregister_hw_breakpoint(bp);
633 }
634 continue;
635 }
591 636
592 if (unlikely(n == 4 || n == 5)) 637 /*
593 return -EIO; 638 * We shoud have at least an inactive breakpoint at this
639 * slot. It means the user is writing dr7 without having
640 * written the address register first
641 */
642 if (!bp) {
643 rc = -EINVAL;
644 break;
645 }
594 646
595 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 647 rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
596 return -EIO; 648 if (rc)
649 break;
597 650
598 switch (n) { 651 /*
599 case 0: child->thread.debugreg0 = data; break; 652 * This is a temporary thing as bp is unregistered/registered
600 case 1: child->thread.debugreg1 = data; break; 653 * to simulate modification
601 case 2: child->thread.debugreg2 = data; break; 654 */
602 case 3: child->thread.debugreg3 = data; break; 655 bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
656 gen_type, bp->callback,
657 tsk, true);
658 thread->ptrace_bps[i] = NULL;
603 659
604 case 6: 660 if (!bp) { /* incorrect bp, or we have a bug in bp API */
605 if ((data & ~0xffffffffUL) != 0) 661 rc = -EINVAL;
606 return -EIO; 662 break;
607 child->thread.debugreg6 = data; 663 }
608 break; 664 if (IS_ERR(bp)) {
665 rc = PTR_ERR(bp);
666 bp = NULL;
667 break;
668 }
669 thread->ptrace_bps[i] = bp;
670 }
671 /*
672 * Make a second pass to free the remaining unused breakpoints
673 * or to restore the original breakpoints if an error occurred.
674 */
675 if (!second_pass) {
676 second_pass = 1;
677 if (rc < 0) {
678 orig_ret = rc;
679 data = old_dr7;
680 }
681 goto restore;
682 }
683 return ((orig_ret < 0) ? orig_ret : rc);
684}
609 685
610 case 7: 686/*
687 * Handle PTRACE_PEEKUSR calls for the debug register area.
688 */
689static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
690{
691 struct thread_struct *thread = &(tsk->thread);
692 unsigned long val = 0;
693
694 if (n < HBP_NUM) {
695 struct perf_event *bp;
696 bp = thread->ptrace_bps[n];
697 if (!bp)
698 return 0;
699 val = bp->hw.info.address;
700 } else if (n == 6) {
701 val = thread->debugreg6;
702 } else if (n == 7) {
703 val = ptrace_get_dr7(thread->ptrace_bps);
704 }
705 return val;
706}
707
708static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
709 unsigned long addr)
710{
711 struct perf_event *bp;
712 struct thread_struct *t = &tsk->thread;
713
714 if (!t->ptrace_bps[nr]) {
611 /* 715 /*
612 * Sanity-check data. Take one half-byte at once with 716 * Put stub len and type to register (reserve) an inactive but
613 * check = (val >> (16 + 4*i)) & 0xf. It contains the 717 * correct bp
614 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
615 * 2 and 3 are LENi. Given a list of invalid values,
616 * we do mask |= 1 << invalid_value, so that
617 * (mask >> check) & 1 is a correct test for invalid
618 * values.
619 *
620 * R/Wi contains the type of the breakpoint /
621 * watchpoint, LENi contains the length of the watched
622 * data in the watchpoint case.
623 *
624 * The invalid values are:
625 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
626 * - R/Wi == 0x10 (break on I/O reads or writes), so
627 * mask |= 0x4444.
628 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
629 * 0x1110.
630 *
631 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
632 *
633 * See the Intel Manual "System Programming Guide",
634 * 15.2.4
635 *
636 * Note that LENi == 0x10 is defined on x86_64 in long
637 * mode (i.e. even for 32-bit userspace software, but
638 * 64-bit kernel), so the x86_64 mask value is 0x5454.
639 * See the AMD manual no. 24593 (AMD64 System Programming)
640 */ 718 */
641#ifdef CONFIG_X86_32 719 bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
642#define DR7_MASK 0x5f54 720 HW_BREAKPOINT_W,
643#else 721 ptrace_triggered, tsk,
644#define DR7_MASK 0x5554 722 false);
645#endif 723 } else {
646 data &= ~DR_CONTROL_RESERVED; 724 bp = t->ptrace_bps[nr];
647 for (i = 0; i < 4; i++) 725 t->ptrace_bps[nr] = NULL;
648 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) 726 bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
649 return -EIO; 727 bp->attr.bp_type,
650 child->thread.debugreg7 = data; 728 bp->callback,
651 if (data) 729 tsk,
652 set_tsk_thread_flag(child, TIF_DEBUG); 730 bp->attr.disabled);
653 else
654 clear_tsk_thread_flag(child, TIF_DEBUG);
655 break;
656 } 731 }
657 732
733 if (!bp)
734 return -EIO;
735 /*
736 * CHECKME: the previous code returned -EIO if the addr wasn't a
737 * valid task virtual addr. The new one will return -EINVAL in this
738 * case.
739 * -EINVAL may be what we want for in-kernel breakpoints users, but
740 * -EIO looks better for ptrace, since we refuse a register writing
741 * for the user. And anyway this is the previous behaviour.
742 */
743 if (IS_ERR(bp))
744 return PTR_ERR(bp);
745
746 t->ptrace_bps[nr] = bp;
747
658 return 0; 748 return 0;
659} 749}
660 750
661/* 751/*
752 * Handle PTRACE_POKEUSR calls for the debug register area.
753 */
754int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
755{
756 struct thread_struct *thread = &(tsk->thread);
757 int rc = 0;
758
759 /* There are no DR4 or DR5 registers */
760 if (n == 4 || n == 5)
761 return -EIO;
762
763 if (n == 6) {
764 thread->debugreg6 = val;
765 goto ret_path;
766 }
767 if (n < HBP_NUM) {
768 rc = ptrace_set_breakpoint_addr(tsk, n, val);
769 if (rc)
770 return rc;
771 }
772 /* All that's left is DR7 */
773 if (n == 7)
774 rc = ptrace_write_dr7(tsk, val);
775
776ret_path:
777 return rc;
778}
779
780/*
662 * These access the current or another (stopped) task's io permission 781 * These access the current or another (stopped) task's io permission
663 * bitmap for debugging or core dump. 782 * bitmap for debugging or core dump.
664 */ 783 */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055ad..fbf3b07c8567 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
799 799
800 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 800 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
801 if (signr > 0) { 801 if (signr > 0) {
802 /*
803 * Re-enable any watchpoints before delivering the
804 * signal to user space. The processor register will
805 * have been cleared if the watchpoint triggered
806 * inside the kernel.
807 */
808 if (current->thread.debugreg7)
809 set_debugreg(current->thread.debugreg7, 7);
810
811 /* Whee! Actually deliver the signal. */ 802 /* Whee! Actually deliver the signal. */
812 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { 803 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
813 /* 804 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dcee0cc3..33399176512a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
530{ 530{
531 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
532 unsigned long condition; 532 unsigned long dr6;
533 int si_code; 533 int si_code;
534 534
535 get_debugreg(condition, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Catch kmemcheck conditions first of all! */ 537 /* Catch kmemcheck conditions first of all! */
538 if (condition & DR_STEP && kmemcheck_trap(regs)) 538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 539 return;
540 540
541 /* DR6 may or may not be cleared by the CPU */
542 set_debugreg(0, 6);
541 /* 543 /*
542 * The processor cleared BTF, so don't mark that we need it set. 544 * The processor cleared BTF, so don't mark that we need it set.
543 */ 545 */
544 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 546 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
545 tsk->thread.debugctlmsr = 0; 547 tsk->thread.debugctlmsr = 0;
546 548
547 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 549 /* Store the virtualized DR6 value */
548 SIGTRAP) == NOTIFY_STOP) 550 tsk->thread.debugreg6 = dr6;
551
552 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
553 SIGTRAP) == NOTIFY_STOP)
549 return; 554 return;
550 555
551 /* It's safe to allow irq's after DR6 has been saved */ 556 /* It's safe to allow irq's after DR6 has been saved */
552 preempt_conditional_sti(regs); 557 preempt_conditional_sti(regs);
553 558
554 /* Mask out spurious debug traps due to lazy DR7 setting */ 559 if (regs->flags & X86_VM_MASK) {
555 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 560 handle_vm86_trap((struct kernel_vm86_regs *) regs,
556 if (!tsk->thread.debugreg7) 561 error_code, 1);
557 goto clear_dr7; 562 return;
558 } 563 }
559 564
560#ifdef CONFIG_X86_32
561 if (regs->flags & X86_VM_MASK)
562 goto debug_vm86;
563#endif
564
565 /* Save debug status register where ptrace can see it */
566 tsk->thread.debugreg6 = condition;
567
568 /* 565 /*
569 * Single-stepping through TF: make sure we ignore any events in 566 * Single-stepping through system calls: ignore any exceptions in
570 * kernel space (but re-enable TF when returning to user mode). 567 * kernel space, but re-enable TF when returning to user mode.
568 *
569 * We already checked v86 mode above, so we can check for kernel mode
570 * by just checking the CPL of CS.
571 */ 571 */
572 if (condition & DR_STEP) { 572 if ((dr6 & DR_STEP) && !user_mode(regs)) {
573 if (!user_mode(regs)) 573 tsk->thread.debugreg6 &= ~DR_STEP;
574 goto clear_TF_reenable; 574 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
575 regs->flags &= ~X86_EFLAGS_TF;
575 } 576 }
576 577 si_code = get_si_code(tsk->thread.debugreg6);
577 si_code = get_si_code(condition); 578 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
578 /* Ok, finally something we can handle */ 579 send_sigtrap(tsk, regs, error_code, si_code);
579 send_sigtrap(tsk, regs, error_code, si_code);
580
581 /*
582 * Disable additional traps. They'll be re-enabled when
583 * the signal is delivered.
584 */
585clear_dr7:
586 set_debugreg(0, 7);
587 preempt_conditional_cli(regs); 580 preempt_conditional_cli(regs);
588 return;
589 581
590#ifdef CONFIG_X86_32
591debug_vm86:
592 /* reenable preemption: handle_vm86_trap() might sleep */
593 dec_preempt_count();
594 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
595 conditional_cli(regs);
596 return;
597#endif
598
599clear_TF_reenable:
600 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
601 regs->flags &= ~X86_EFLAGS_TF;
602 preempt_conditional_cli(regs);
603 return; 582 return;
604} 583}
605 584
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d261527c..4fc80174191c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "trace.h" 43#include "trace.h"
44 44
45#include <asm/debugreg.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/msr.h> 47#include <asm/msr.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3643 trace_kvm_entry(vcpu->vcpu_id); 3644 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 3645 kvm_x86_ops->run(vcpu, kvm_run);
3645 3646
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3647 /*
3647 set_debugreg(current->thread.debugreg0, 0); 3648 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg1, 1); 3649 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg2, 2); 3650 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg3, 3); 3651 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 3652 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 3653 */
3653 } 3654 if (hw_breakpoint_active())
3655 hw_breakpoint_restore();
3654 3656
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 3657 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 3658 local_irq_enable();
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..11a4ad4d6253 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 541
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 542 if (val == DIE_DEBUG && (arg->err & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 543 if (post_kmmio_handler(arg->err, arg->regs) == 1) {
544 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing
547 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
544 return NOTIFY_STOP; 549 return NOTIFY_STOP;
550 }
545 551
546 return NOTIFY_DONE; 552 return NOTIFY_DONE;
547} 553}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 8aa85f17667e..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/xcr.h> 19#include <asm/xcr.h>
20#include <asm/suspend.h> 20#include <asm/suspend.h>
21#include <asm/debugreg.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23static struct saved_context saved_context; 24static struct saved_context saved_context;
@@ -142,31 +143,6 @@ static void fix_processor_context(void)
142#endif 143#endif
143 load_TR_desc(); /* This does ltr */ 144 load_TR_desc(); /* This does ltr */
144 load_LDT(&current->active_mm->context); /* This does lldt */ 145 load_LDT(&current->active_mm->context); /* This does lldt */
145
146 /*
147 * Now maybe reload the debug registers
148 */
149 if (current->thread.debugreg7) {
150#ifdef CONFIG_X86_32
151 set_debugreg(current->thread.debugreg0, 0);
152 set_debugreg(current->thread.debugreg1, 1);
153 set_debugreg(current->thread.debugreg2, 2);
154 set_debugreg(current->thread.debugreg3, 3);
155 /* no 4 and 5 */
156 set_debugreg(current->thread.debugreg6, 6);
157 set_debugreg(current->thread.debugreg7, 7);
158#else
159 /* CONFIG_X86_64 */
160 loaddebug(&current->thread, 0);
161 loaddebug(&current->thread, 1);
162 loaddebug(&current->thread, 2);
163 loaddebug(&current->thread, 3);
164 /* no 4 and 5 */
165 loaddebug(&current->thread, 6);
166 loaddebug(&current->thread, 7);
167#endif
168 }
169
170} 146}
171 147
172/** 148/**
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..0b98cbf76da7
--- /dev/null
+++ b/include/linux/hw_breakpoint.h
@@ -0,0 +1,137 @@
1#ifndef _LINUX_HW_BREAKPOINT_H
2#define _LINUX_HW_BREAKPOINT_H
3
4#include <linux/perf_event.h>
5
6enum {
7 HW_BREAKPOINT_LEN_1 = 1,
8 HW_BREAKPOINT_LEN_2 = 2,
9 HW_BREAKPOINT_LEN_4 = 4,
10 HW_BREAKPOINT_LEN_8 = 8,
11};
12
13enum {
14 HW_BREAKPOINT_R = 1,
15 HW_BREAKPOINT_W = 2,
16 HW_BREAKPOINT_X = 4,
17};
18
19#ifdef CONFIG_HAVE_HW_BREAKPOINT
20
21static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
22{
23 return bp->attr.bp_addr;
24}
25
26static inline int hw_breakpoint_type(struct perf_event *bp)
27{
28 return bp->attr.bp_type;
29}
30
31static inline int hw_breakpoint_len(struct perf_event *bp)
32{
33 return bp->attr.bp_len;
34}
35
36extern struct perf_event *
37register_user_hw_breakpoint(unsigned long addr,
38 int len,
39 int type,
40 perf_callback_t triggered,
41 struct task_struct *tsk,
42 bool active);
43
44/* FIXME: only change from the attr, and don't unregister */
45extern struct perf_event *
46modify_user_hw_breakpoint(struct perf_event *bp,
47 unsigned long addr,
48 int len,
49 int type,
50 perf_callback_t triggered,
51 struct task_struct *tsk,
52 bool active);
53
54/*
55 * Kernel breakpoints are not associated with any particular thread.
56 */
57extern struct perf_event *
58register_wide_hw_breakpoint_cpu(unsigned long addr,
59 int len,
60 int type,
61 perf_callback_t triggered,
62 int cpu,
63 bool active);
64
65extern struct perf_event **
66register_wide_hw_breakpoint(unsigned long addr,
67 int len,
68 int type,
69 perf_callback_t triggered,
70 bool active);
71
72extern int register_perf_hw_breakpoint(struct perf_event *bp);
73extern int __register_perf_hw_breakpoint(struct perf_event *bp);
74extern void unregister_hw_breakpoint(struct perf_event *bp);
75extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
76
77extern int reserve_bp_slot(struct perf_event *bp);
78extern void release_bp_slot(struct perf_event *bp);
79
80extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
81
82static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
83{
84 return &bp->hw.info;
85}
86
87#else /* !CONFIG_HAVE_HW_BREAKPOINT */
88
89static inline struct perf_event *
90register_user_hw_breakpoint(unsigned long addr,
91 int len,
92 int type,
93 perf_callback_t triggered,
94 struct task_struct *tsk,
95 bool active) { return NULL; }
96static inline struct perf_event *
97modify_user_hw_breakpoint(struct perf_event *bp,
98 unsigned long addr,
99 int len,
100 int type,
101 perf_callback_t triggered,
102 struct task_struct *tsk,
103 bool active) { return NULL; }
104static inline struct perf_event *
105register_wide_hw_breakpoint_cpu(unsigned long addr,
106 int len,
107 int type,
108 perf_callback_t triggered,
109 int cpu,
110 bool active) { return NULL; }
111static inline struct perf_event **
112register_wide_hw_breakpoint(unsigned long addr,
113 int len,
114 int type,
115 perf_callback_t triggered,
116 bool active) { return NULL; }
117static inline int
118register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
119static inline int
120__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
121static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
122static inline void
123unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
124static inline int
125reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
126static inline void release_bp_slot(struct perf_event *bp) { }
127
128static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
129
130static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
131{
132 return NULL;
133}
134
135#endif /* CONFIG_HAVE_HW_BREAKPOINT */
136
137#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7f87563c8485..b5cdac0de370 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
18#include <linux/ioctl.h> 18#include <linux/ioctl.h>
19#include <asm/byteorder.h> 19#include <asm/byteorder.h>
20 20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22#include <asm/hw_breakpoint.h>
23#endif
24
21/* 25/*
22 * User-space ABI bits: 26 * User-space ABI bits:
23 */ 27 */
@@ -31,6 +35,7 @@ enum perf_type_id {
31 PERF_TYPE_TRACEPOINT = 2, 35 PERF_TYPE_TRACEPOINT = 2,
32 PERF_TYPE_HW_CACHE = 3, 36 PERF_TYPE_HW_CACHE = 3,
33 PERF_TYPE_RAW = 4, 37 PERF_TYPE_RAW = 4,
38 PERF_TYPE_BREAKPOINT = 5,
34 39
35 PERF_TYPE_MAX, /* non-ABI */ 40 PERF_TYPE_MAX, /* non-ABI */
36}; 41};
@@ -209,6 +214,15 @@ struct perf_event_attr {
209 __u32 wakeup_events; /* wakeup every n events */ 214 __u32 wakeup_events; /* wakeup every n events */
210 __u32 wakeup_watermark; /* bytes before wakeup */ 215 __u32 wakeup_watermark; /* bytes before wakeup */
211 }; 216 };
217
218 union {
219 struct { /* Hardware breakpoint info */
220 __u64 bp_addr;
221 __u32 bp_type;
222 __u32 bp_len;
223 };
224 };
225
212 __u32 __reserved_2; 226 __u32 __reserved_2;
213 227
214 __u64 __reserved_3; 228 __u64 __reserved_3;
@@ -478,6 +492,11 @@ struct hw_perf_event {
478 s64 remaining; 492 s64 remaining;
479 struct hrtimer hrtimer; 493 struct hrtimer hrtimer;
480 }; 494 };
495#ifdef CONFIG_HAVE_HW_BREAKPOINT
496 union { /* breakpoint */
497 struct arch_hw_breakpoint info;
498 };
499#endif
481 }; 500 };
482 atomic64_t prev_count; 501 atomic64_t prev_count;
483 u64 sample_period; 502 u64 sample_period;
@@ -546,6 +565,8 @@ struct perf_pending_entry {
546 void (*func)(struct perf_pending_entry *); 565 void (*func)(struct perf_pending_entry *);
547}; 566};
548 567
568typedef void (*perf_callback_t)(struct perf_event *, void *);
569
549/** 570/**
550 * struct perf_event - performance event kernel representation: 571 * struct perf_event - performance event kernel representation:
551 */ 572 */
@@ -588,7 +609,7 @@ struct perf_event {
588 u64 tstamp_running; 609 u64 tstamp_running;
589 u64 tstamp_stopped; 610 u64 tstamp_stopped;
590 611
591 struct perf_event_attr attr; 612 struct perf_event_attr attr;
592 struct hw_perf_event hw; 613 struct hw_perf_event hw;
593 614
594 struct perf_event_context *ctx; 615 struct perf_event_context *ctx;
@@ -641,6 +662,10 @@ struct perf_event {
641 struct event_filter *filter; 662 struct event_filter *filter;
642#endif 663#endif
643 664
665 perf_callback_t callback;
666
667 perf_callback_t event_callback;
668
644#endif /* CONFIG_PERF_EVENTS */ 669#endif /* CONFIG_PERF_EVENTS */
645}; 670};
646 671
@@ -745,6 +770,13 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
745 struct perf_cpu_context *cpuctx, 770 struct perf_cpu_context *cpuctx,
746 struct perf_event_context *ctx, int cpu); 771 struct perf_event_context *ctx, int cpu);
747extern void perf_event_update_userpage(struct perf_event *event); 772extern void perf_event_update_userpage(struct perf_event *event);
773extern int perf_event_release_kernel(struct perf_event *event);
774extern struct perf_event *
775perf_event_create_kernel_counter(struct perf_event_attr *attr,
776 int cpu,
777 pid_t pid,
778 perf_callback_t callback);
779extern u64 perf_event_read_value(struct perf_event *event);
748 780
749struct perf_sample_data { 781struct perf_sample_data {
750 u64 type; 782 u64 type;
@@ -821,6 +853,7 @@ extern int sysctl_perf_event_sample_rate;
821extern void perf_event_init(void); 853extern void perf_event_init(void);
822extern void perf_tp_event(int event_id, u64 addr, u64 count, 854extern void perf_tp_event(int event_id, u64 addr, u64 count,
823 void *record, int entry_size); 855 void *record, int entry_size);
856extern void perf_bp_event(struct perf_event *event, void *data);
824 857
825#ifndef perf_misc_flags 858#ifndef perf_misc_flags
826#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ 859#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -855,6 +888,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
855static inline void 888static inline void
856perf_sw_event(u32 event_id, u64 nr, int nmi, 889perf_sw_event(u32 event_id, u64 nr, int nmi,
857 struct pt_regs *regs, u64 addr) { } 890 struct pt_regs *regs, u64 addr) { }
891static inline void
892perf_bp_event(struct perf_event *event, void *data) { }
858 893
859static inline void perf_event_mmap(struct vm_area_struct *vma) { } 894static inline void perf_event_mmap(struct vm_area_struct *vma) { }
860static inline void perf_event_comm(struct task_struct *tsk) { } 895static inline void perf_event_comm(struct task_struct *tsk) { }
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..17b575ec7d07 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
95obj-$(CONFIG_SMP) += sched_cpupri.o 95obj-$(CONFIG_SMP) += sched_cpupri.o
96obj-$(CONFIG_SLOW_WORK) += slow-work.o 96obj-$(CONFIG_SLOW_WORK) += slow-work.o
97obj-$(CONFIG_PERF_EVENTS) += perf_event.o 97obj-$(CONFIG_PERF_EVENTS) += perf_event.o
98obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
98 99
99ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 100ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
100# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 101# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..3f45e3cf931d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code)
978 proc_exit_connector(tsk); 979 proc_exit_connector(tsk);
979 980
980 /* 981 /*
982 * FIXME: do that only when needed, using sched_exit tracepoint
983 */
984 flush_ptrace_hw_breakpoint(tsk);
985 /*
981 * Flush inherited counters to the parent - before the parent 986 * Flush inherited counters to the parent - before the parent
982 * gets woken up by child-exit notifications. 987 * gets woken up by child-exit notifications.
983 */ 988 */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..9ea9414e0e58
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,494 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
19 *
20 * Thanks to Ingo Molnar for his many suggestions.
21 */
22
23/*
24 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
25 * using the CPU's debug registers.
26 * This file contains the arch-independent routines.
27 */
28
29#include <linux/irqflags.h>
30#include <linux/kallsyms.h>
31#include <linux/notifier.h>
32#include <linux/kprobes.h>
33#include <linux/kdebug.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/percpu.h>
37#include <linux/sched.h>
38#include <linux/init.h>
39#include <linux/smp.h>
40
41#include <linux/hw_breakpoint.h>
42
43#include <asm/processor.h>
44
45#ifdef CONFIG_X86
46#include <asm/debugreg.h>
47#endif
48
49/*
50 * Constraints data
51 */
52
53/* Number of pinned cpu breakpoints in a cpu */
54static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
55
56/* Number of pinned task breakpoints in a cpu */
57static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
58
59/* Number of non-pinned cpu/task breakpoints in a cpu */
60static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
61
62/* Gather the number of total pinned and un-pinned bp in a cpuset */
63struct bp_busy_slots {
64 unsigned int pinned;
65 unsigned int flexible;
66};
67
68/* Serialize accesses to the above constraints */
69static DEFINE_MUTEX(nr_bp_mutex);
70
71/*
72 * Report the maximum number of pinned breakpoints a task
73 * have in this cpu
74 */
75static unsigned int max_task_bp_pinned(int cpu)
76{
77 int i;
78 unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
79
80 for (i = HBP_NUM -1; i >= 0; i--) {
81 if (tsk_pinned[i] > 0)
82 return i + 1;
83 }
84
85 return 0;
86}
87
88/*
89 * Report the number of pinned/un-pinned breakpoints we have in
90 * a given cpu (cpu > -1) or in all of them (cpu = -1).
91 */
92static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
93{
94 if (cpu >= 0) {
95 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
96 slots->pinned += max_task_bp_pinned(cpu);
97 slots->flexible = per_cpu(nr_bp_flexible, cpu);
98
99 return;
100 }
101
102 for_each_online_cpu(cpu) {
103 unsigned int nr;
104
105 nr = per_cpu(nr_cpu_bp_pinned, cpu);
106 nr += max_task_bp_pinned(cpu);
107
108 if (nr > slots->pinned)
109 slots->pinned = nr;
110
111 nr = per_cpu(nr_bp_flexible, cpu);
112
113 if (nr > slots->flexible)
114 slots->flexible = nr;
115 }
116}
117
118/*
119 * Add a pinned breakpoint for the given task in our constraint table
120 */
121static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
122{
123 int count = 0;
124 struct perf_event *bp;
125 struct perf_event_context *ctx = tsk->perf_event_ctxp;
126 unsigned int *task_bp_pinned;
127 struct list_head *list;
128 unsigned long flags;
129
130 if (WARN_ONCE(!ctx, "No perf context for this task"))
131 return;
132
133 list = &ctx->event_list;
134
135 spin_lock_irqsave(&ctx->lock, flags);
136
137 /*
138 * The current breakpoint counter is not included in the list
139 * at the open() callback time
140 */
141 list_for_each_entry(bp, list, event_entry) {
142 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
143 count++;
144 }
145
146 spin_unlock_irqrestore(&ctx->lock, flags);
147
148 if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
149 return;
150
151 task_bp_pinned = per_cpu(task_bp_pinned, cpu);
152 if (enable) {
153 task_bp_pinned[count]++;
154 if (count > 0)
155 task_bp_pinned[count-1]--;
156 } else {
157 task_bp_pinned[count]--;
158 if (count > 0)
159 task_bp_pinned[count-1]++;
160 }
161}
162
163/*
164 * Add/remove the given breakpoint in our constraint table
165 */
166static void toggle_bp_slot(struct perf_event *bp, bool enable)
167{
168 int cpu = bp->cpu;
169 struct task_struct *tsk = bp->ctx->task;
170
171 /* Pinned counter task profiling */
172 if (tsk) {
173 if (cpu >= 0) {
174 toggle_bp_task_slot(tsk, cpu, enable);
175 return;
176 }
177
178 for_each_online_cpu(cpu)
179 toggle_bp_task_slot(tsk, cpu, enable);
180 return;
181 }
182
183 /* Pinned counter cpu profiling */
184 if (enable)
185 per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
186 else
187 per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
188}
189
190/*
191 * Contraints to check before allowing this new breakpoint counter:
192 *
193 * == Non-pinned counter == (Considered as pinned for now)
194 *
195 * - If attached to a single cpu, check:
196 *
197 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
198 * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
199 *
200 * -> If there are already non-pinned counters in this cpu, it means
201 * there is already a free slot for them.
202 * Otherwise, we check that the maximum number of per task
203 * breakpoints (for this cpu) plus the number of per cpu breakpoint
204 * (for this cpu) doesn't cover every registers.
205 *
206 * - If attached to every cpus, check:
207 *
208 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
209 * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
210 *
211 * -> This is roughly the same, except we check the number of per cpu
212 * bp for every cpu and we keep the max one. Same for the per tasks
213 * breakpoints.
214 *
215 *
216 * == Pinned counter ==
217 *
218 * - If attached to a single cpu, check:
219 *
220 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
221 * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
222 *
223 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
224 * one register at least (or they will never be fed).
225 *
226 * - If attached to every cpus, check:
227 *
228 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
229 * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
230 */
231int reserve_bp_slot(struct perf_event *bp)
232{
233 struct bp_busy_slots slots = {0};
234 int ret = 0;
235
236 mutex_lock(&nr_bp_mutex);
237
238 fetch_bp_busy_slots(&slots, bp->cpu);
239
240 /* Flexible counters need to keep at least one slot */
241 if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
242 ret = -ENOSPC;
243 goto end;
244 }
245
246 toggle_bp_slot(bp, true);
247
248end:
249 mutex_unlock(&nr_bp_mutex);
250
251 return ret;
252}
253
254void release_bp_slot(struct perf_event *bp)
255{
256 mutex_lock(&nr_bp_mutex);
257
258 toggle_bp_slot(bp, false);
259
260 mutex_unlock(&nr_bp_mutex);
261}
262
263
264int __register_perf_hw_breakpoint(struct perf_event *bp)
265{
266 int ret;
267
268 ret = reserve_bp_slot(bp);
269 if (ret)
270 return ret;
271
272 if (!bp->attr.disabled)
273 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
274
275 return ret;
276}
277
278int register_perf_hw_breakpoint(struct perf_event *bp)
279{
280 bp->callback = perf_bp_event;
281
282 return __register_perf_hw_breakpoint(bp);
283}
284
285/*
286 * Register a breakpoint bound to a task and a given cpu.
287 * If cpu is -1, the breakpoint is active for the task in every cpu
288 * If the task is -1, the breakpoint is active for every tasks in the given
289 * cpu.
290 */
291static struct perf_event *
292register_user_hw_breakpoint_cpu(unsigned long addr,
293 int len,
294 int type,
295 perf_callback_t triggered,
296 pid_t pid,
297 int cpu,
298 bool active)
299{
300 struct perf_event_attr *attr;
301 struct perf_event *bp;
302
303 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
304 if (!attr)
305 return ERR_PTR(-ENOMEM);
306
307 attr->type = PERF_TYPE_BREAKPOINT;
308 attr->size = sizeof(*attr);
309 attr->bp_addr = addr;
310 attr->bp_len = len;
311 attr->bp_type = type;
312 /*
313 * Such breakpoints are used by debuggers to trigger signals when
314 * we hit the excepted memory op. We can't miss such events, they
315 * must be pinned.
316 */
317 attr->pinned = 1;
318
319 if (!active)
320 attr->disabled = 1;
321
322 bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
323 kfree(attr);
324
325 return bp;
326}
327
328/**
329 * register_user_hw_breakpoint - register a hardware breakpoint for user space
330 * @addr: is the memory address that triggers the breakpoint
331 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
332 * @type: the type of the access to the memory (read/write/exec)
333 * @triggered: callback to trigger when we hit the breakpoint
334 * @tsk: pointer to 'task_struct' of the process to which the address belongs
335 * @active: should we activate it while registering it
336 *
337 */
338struct perf_event *
339register_user_hw_breakpoint(unsigned long addr,
340 int len,
341 int type,
342 perf_callback_t triggered,
343 struct task_struct *tsk,
344 bool active)
345{
346 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
347 tsk->pid, -1, active);
348}
349EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
350
351/**
352 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
353 * @bp: the breakpoint structure to modify
354 * @addr: is the memory address that triggers the breakpoint
355 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
356 * @type: the type of the access to the memory (read/write/exec)
357 * @triggered: callback to trigger when we hit the breakpoint
358 * @tsk: pointer to 'task_struct' of the process to which the address belongs
359 * @active: should we activate it while registering it
360 */
361struct perf_event *
362modify_user_hw_breakpoint(struct perf_event *bp,
363 unsigned long addr,
364 int len,
365 int type,
366 perf_callback_t triggered,
367 struct task_struct *tsk,
368 bool active)
369{
370 /*
371 * FIXME: do it without unregistering
372 * - We don't want to lose our slot
373 * - If the new bp is incorrect, don't lose the older one
374 */
375 unregister_hw_breakpoint(bp);
376
377 return register_user_hw_breakpoint(addr, len, type, triggered,
378 tsk, active);
379}
380EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
381
382/**
383 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
384 * @bp: the breakpoint structure to unregister
385 */
386void unregister_hw_breakpoint(struct perf_event *bp)
387{
388 if (!bp)
389 return;
390 perf_event_release_kernel(bp);
391}
392EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
393
394static struct perf_event *
395register_kernel_hw_breakpoint_cpu(unsigned long addr,
396 int len,
397 int type,
398 perf_callback_t triggered,
399 int cpu,
400 bool active)
401{
402 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
403 -1, cpu, active);
404}
405
406/**
407 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
408 * @addr: is the memory address that triggers the breakpoint
409 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
410 * @type: the type of the access to the memory (read/write/exec)
411 * @triggered: callback to trigger when we hit the breakpoint
412 * @active: should we activate it while registering it
413 *
414 * @return a set of per_cpu pointers to perf events
415 */
416struct perf_event **
417register_wide_hw_breakpoint(unsigned long addr,
418 int len,
419 int type,
420 perf_callback_t triggered,
421 bool active)
422{
423 struct perf_event **cpu_events, **pevent, *bp;
424 long err;
425 int cpu;
426
427 cpu_events = alloc_percpu(typeof(*cpu_events));
428 if (!cpu_events)
429 return ERR_PTR(-ENOMEM);
430
431 for_each_possible_cpu(cpu) {
432 pevent = per_cpu_ptr(cpu_events, cpu);
433 bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
434 triggered, cpu, active);
435
436 *pevent = bp;
437
438 if (IS_ERR(bp) || !bp) {
439 err = PTR_ERR(bp);
440 goto fail;
441 }
442 }
443
444 return cpu_events;
445
446fail:
447 for_each_possible_cpu(cpu) {
448 pevent = per_cpu_ptr(cpu_events, cpu);
449 if (IS_ERR(*pevent) || !*pevent)
450 break;
451 unregister_hw_breakpoint(*pevent);
452 }
453 free_percpu(cpu_events);
454 /* return the error if any */
455 return ERR_PTR(err);
456}
457EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
458
459/**
460 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
461 * @cpu_events: the per cpu set of events to unregister
462 */
463void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
464{
465 int cpu;
466 struct perf_event **pevent;
467
468 for_each_possible_cpu(cpu) {
469 pevent = per_cpu_ptr(cpu_events, cpu);
470 unregister_hw_breakpoint(*pevent);
471 }
472 free_percpu(cpu_events);
473}
474EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
475
476static struct notifier_block hw_breakpoint_exceptions_nb = {
477 .notifier_call = hw_breakpoint_exceptions_notify,
478 /* we need to be notified first */
479 .priority = 0x7fffffff
480};
481
482static int __init init_hw_breakpoint(void)
483{
484 return register_die_notifier(&hw_breakpoint_exceptions_nb);
485}
486core_initcall(init_hw_breakpoint);
487
488
489struct pmu perf_ops_bp = {
490 .enable = arch_install_hw_breakpoint,
491 .disable = arch_uninstall_hw_breakpoint,
492 .read = hw_breakpoint_pmu_read,
493 .unthrottle = hw_breakpoint_pmu_unthrottle
494};
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b697c68..8e5288a8a355 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
181 } 181 }
182 return module_kallsyms_lookup_name(name); 182 return module_kallsyms_lookup_name(name);
183} 183}
184EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
184 185
185int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, 186int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
186 unsigned long), 187 unsigned long),
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3256e36ad251..3852e2656bb0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
32 33
33#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
34 35
@@ -1725,6 +1726,26 @@ static int perf_release(struct inode *inode, struct file *file)
1725 return 0; 1726 return 0;
1726} 1727}
1727 1728
1729int perf_event_release_kernel(struct perf_event *event)
1730{
1731 struct perf_event_context *ctx = event->ctx;
1732
1733 WARN_ON_ONCE(ctx->parent_ctx);
1734 mutex_lock(&ctx->mutex);
1735 perf_event_remove_from_context(event);
1736 mutex_unlock(&ctx->mutex);
1737
1738 mutex_lock(&event->owner->perf_event_mutex);
1739 list_del_init(&event->owner_entry);
1740 mutex_unlock(&event->owner->perf_event_mutex);
1741 put_task_struct(event->owner);
1742
1743 free_event(event);
1744
1745 return 0;
1746}
1747EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1748
1728static int perf_event_read_size(struct perf_event *event) 1749static int perf_event_read_size(struct perf_event *event)
1729{ 1750{
1730 int entry = sizeof(u64); /* value */ 1751 int entry = sizeof(u64); /* value */
@@ -1750,7 +1771,7 @@ static int perf_event_read_size(struct perf_event *event)
1750 return size; 1771 return size;
1751} 1772}
1752 1773
1753static u64 perf_event_read_value(struct perf_event *event) 1774u64 perf_event_read_value(struct perf_event *event)
1754{ 1775{
1755 struct perf_event *child; 1776 struct perf_event *child;
1756 u64 total = 0; 1777 u64 total = 0;
@@ -1761,6 +1782,7 @@ static u64 perf_event_read_value(struct perf_event *event)
1761 1782
1762 return total; 1783 return total;
1763} 1784}
1785EXPORT_SYMBOL_GPL(perf_event_read_value);
1764 1786
1765static int perf_event_read_entry(struct perf_event *event, 1787static int perf_event_read_entry(struct perf_event *event,
1766 u64 read_format, char __user *buf) 1788 u64 read_format, char __user *buf)
@@ -4231,6 +4253,51 @@ static void perf_event_free_filter(struct perf_event *event)
4231 4253
4232#endif /* CONFIG_EVENT_PROFILE */ 4254#endif /* CONFIG_EVENT_PROFILE */
4233 4255
4256#ifdef CONFIG_HAVE_HW_BREAKPOINT
4257static void bp_perf_event_destroy(struct perf_event *event)
4258{
4259 release_bp_slot(event);
4260}
4261
4262static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4263{
4264 int err;
4265 /*
4266 * The breakpoint is already filled if we haven't created the counter
4267 * through perf syscall
4268 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4269 */
4270 if (!bp->callback)
4271 err = register_perf_hw_breakpoint(bp);
4272 else
4273 err = __register_perf_hw_breakpoint(bp);
4274 if (err)
4275 return ERR_PTR(err);
4276
4277 bp->destroy = bp_perf_event_destroy;
4278
4279 return &perf_ops_bp;
4280}
4281
4282void perf_bp_event(struct perf_event *bp, void *regs)
4283{
4284 /* TODO */
4285}
4286#else
4287static void bp_perf_event_destroy(struct perf_event *event)
4288{
4289}
4290
4291static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4292{
4293 return NULL;
4294}
4295
4296void perf_bp_event(struct perf_event *bp, void *regs)
4297{
4298}
4299#endif
4300
4234atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4301atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4235 4302
4236static void sw_perf_event_destroy(struct perf_event *event) 4303static void sw_perf_event_destroy(struct perf_event *event)
@@ -4297,6 +4364,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4297 struct perf_event_context *ctx, 4364 struct perf_event_context *ctx,
4298 struct perf_event *group_leader, 4365 struct perf_event *group_leader,
4299 struct perf_event *parent_event, 4366 struct perf_event *parent_event,
4367 perf_callback_t callback,
4300 gfp_t gfpflags) 4368 gfp_t gfpflags)
4301{ 4369{
4302 const struct pmu *pmu; 4370 const struct pmu *pmu;
@@ -4339,6 +4407,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4339 4407
4340 event->state = PERF_EVENT_STATE_INACTIVE; 4408 event->state = PERF_EVENT_STATE_INACTIVE;
4341 4409
4410 if (!callback && parent_event)
4411 callback = parent_event->callback;
4412
4413 event->callback = callback;
4414
4342 if (attr->disabled) 4415 if (attr->disabled)
4343 event->state = PERF_EVENT_STATE_OFF; 4416 event->state = PERF_EVENT_STATE_OFF;
4344 4417
@@ -4373,6 +4446,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4373 pmu = tp_perf_event_init(event); 4446 pmu = tp_perf_event_init(event);
4374 break; 4447 break;
4375 4448
4449 case PERF_TYPE_BREAKPOINT:
4450 pmu = bp_perf_event_init(event);
4451 break;
4452
4453
4376 default: 4454 default:
4377 break; 4455 break;
4378 } 4456 }
@@ -4615,7 +4693,7 @@ SYSCALL_DEFINE5(perf_event_open,
4615 } 4693 }
4616 4694
4617 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4695 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4618 NULL, GFP_KERNEL); 4696 NULL, NULL, GFP_KERNEL);
4619 err = PTR_ERR(event); 4697 err = PTR_ERR(event);
4620 if (IS_ERR(event)) 4698 if (IS_ERR(event))
4621 goto err_put_context; 4699 goto err_put_context;
@@ -4663,6 +4741,58 @@ err_put_context:
4663 return err; 4741 return err;
4664} 4742}
4665 4743
4744/**
4745 * perf_event_create_kernel_counter
4746 *
4747 * @attr: attributes of the counter to create
4748 * @cpu: cpu in which the counter is bound
4749 * @pid: task to profile
4750 */
4751struct perf_event *
4752perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4753 pid_t pid, perf_callback_t callback)
4754{
4755 struct perf_event *event;
4756 struct perf_event_context *ctx;
4757 int err;
4758
4759 /*
4760 * Get the target context (task or percpu):
4761 */
4762
4763 ctx = find_get_context(pid, cpu);
4764 if (IS_ERR(ctx))
4765 return NULL;
4766
4767 event = perf_event_alloc(attr, cpu, ctx, NULL,
4768 NULL, callback, GFP_KERNEL);
4769 err = PTR_ERR(event);
4770 if (IS_ERR(event))
4771 goto err_put_context;
4772
4773 event->filp = NULL;
4774 WARN_ON_ONCE(ctx->parent_ctx);
4775 mutex_lock(&ctx->mutex);
4776 perf_install_in_context(ctx, event, cpu);
4777 ++ctx->generation;
4778 mutex_unlock(&ctx->mutex);
4779
4780 event->owner = current;
4781 get_task_struct(current);
4782 mutex_lock(&current->perf_event_mutex);
4783 list_add_tail(&event->owner_entry, &current->perf_event_list);
4784 mutex_unlock(&current->perf_event_mutex);
4785
4786 return event;
4787
4788err_put_context:
4789 if (err < 0)
4790 put_ctx(ctx);
4791
4792 return NULL;
4793}
4794EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4795
4666/* 4796/*
4667 * inherit a event from parent task to child task: 4797 * inherit a event from parent task to child task:
4668 */ 4798 */
@@ -4688,7 +4818,7 @@ inherit_event(struct perf_event *parent_event,
4688 child_event = perf_event_alloc(&parent_event->attr, 4818 child_event = perf_event_alloc(&parent_event->attr,
4689 parent_event->cpu, child_ctx, 4819 parent_event->cpu, child_ctx,
4690 group_leader, parent_event, 4820 group_leader, parent_event,
4691 GFP_KERNEL); 4821 NULL, GFP_KERNEL);
4692 if (IS_ERR(child_event)) 4822 if (IS_ERR(child_event))
4693 return child_event; 4823 return child_event;
4694 get_ctx(child_ctx); 4824 get_ctx(child_ctx);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f05671609a89..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index edc3a3cca1a1..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
57obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
58 59
59libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b4e4212e66d7..4da6ede74401 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void);
232 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
233 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
234 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
235 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
236 } while (0) 239 } while (0)
237 240
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type);
387void unregister_tracer(struct tracer *type); 390void unregister_tracer(struct tracer *type);
388int is_tracing_stopped(void); 391int is_tracing_stopped(void);
389 392
393extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
394
390extern unsigned long nsecs_to_usecs(unsigned long nsecs); 395extern unsigned long nsecs_to_usecs(unsigned long nsecs);
391 396
392#ifdef CONFIG_TRACER_MAX_TRACE 397#ifdef CONFIG_TRACER_MAX_TRACE
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
461 struct trace_array *tr); 466 struct trace_array *tr);
462extern int trace_selftest_startup_hw_branches(struct tracer *trace, 467extern int trace_selftest_startup_hw_branches(struct tracer *trace,
463 struct trace_array *tr); 468 struct trace_array *tr);
469extern int trace_selftest_startup_ksym(struct tracer *trace,
470 struct trace_array *tr);
464#endif /* CONFIG_FTRACE_STARTUP_TEST */ 471#endif /* CONFIG_FTRACE_STARTUP_TEST */
465 472
466extern void *head_page(struct trace_array_cpu *data); 473extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..11935b53a6cb
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,554 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers.
38 */
39#define KSYM_TRACER_MAX HBP_NUM
40
41#define KSYM_TRACER_OP_LEN 3 /* rw- */
42
43struct trace_ksym {
44 struct perf_event **ksym_hbp;
45 unsigned long ksym_addr;
46 int type;
47 int len;
48#ifdef CONFIG_PROFILE_KSYM_TRACER
49 unsigned long counter;
50#endif
51 struct hlist_node ksym_hlist;
52};
53
54static struct trace_array *ksym_trace_array;
55
56static unsigned int ksym_filter_entry_count;
57static unsigned int ksym_tracing_enabled;
58
59static HLIST_HEAD(ksym_filter_head);
60
61static DEFINE_MUTEX(ksym_tracer_mutex);
62
63#ifdef CONFIG_PROFILE_KSYM_TRACER
64
65#define MAX_UL_INT 0xffffffff
66
67void ksym_collect_stats(unsigned long hbp_hit_addr)
68{
69 struct hlist_node *node;
70 struct trace_ksym *entry;
71
72 rcu_read_lock();
73 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
74 if ((entry->ksym_addr == hbp_hit_addr) &&
75 (entry->counter <= MAX_UL_INT)) {
76 entry->counter++;
77 break;
78 }
79 }
80 rcu_read_unlock();
81}
82#endif /* CONFIG_PROFILE_KSYM_TRACER */
83
84void ksym_hbp_handler(struct perf_event *hbp, void *data)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct pt_regs *regs = data;
89 struct ring_buffer *buffer;
90 int pc;
91
92 if (!ksym_tracing_enabled)
93 return;
94
95 buffer = ksym_trace_array->buffer;
96
97 pc = preempt_count();
98
99 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
100 sizeof(*entry), 0, pc);
101 if (!event)
102 return;
103
104 entry = ring_buffer_event_data(event);
105 entry->ip = instruction_pointer(regs);
106 entry->type = hw_breakpoint_type(hbp);
107 entry->addr = hw_breakpoint_addr(hbp);
108 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
109
110#ifdef CONFIG_PROFILE_KSYM_TRACER
111 ksym_collect_stats(hw_breakpoint_addr(hbp));
112#endif /* CONFIG_PROFILE_KSYM_TRACER */
113
114 trace_buffer_unlock_commit(buffer, event, 0, pc);
115}
116
117/* Valid access types are represented as
118 *
119 * rw- : Set Read/Write Access Breakpoint
120 * -w- : Set Write Access Breakpoint
121 * --- : Clear Breakpoints
122 * --x : Set Execution Break points (Not available yet)
123 *
124 */
125static int ksym_trace_get_access_type(char *str)
126{
127 int access = 0;
128
129 if (str[0] == 'r')
130 access |= HW_BREAKPOINT_R;
131
132 if (str[1] == 'w')
133 access |= HW_BREAKPOINT_W;
134
135 if (str[2] == 'x')
136 access |= HW_BREAKPOINT_X;
137
138 switch (access) {
139 case HW_BREAKPOINT_R:
140 case HW_BREAKPOINT_W:
141 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
142 return access;
143 default:
144 return -EINVAL;
145 }
146}
147
148/*
149 * There can be several possible malformed requests and we attempt to capture
150 * all of them. We enumerate some of the rules
151 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
152 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
153 * <module>:<ksym_name>:<op>.
154 * 2. No delimiter symbol ':' in the input string
155 * 3. Spurious operator symbols or symbols not in their respective positions
156 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
157 * 5. Kernel symbol not a part of /proc/kallsyms
158 * 6. Duplicate requests
159 */
160static int parse_ksym_trace_str(char *input_string, char **ksymname,
161 unsigned long *addr)
162{
163 int ret;
164
165 *ksymname = strsep(&input_string, ":");
166 *addr = kallsyms_lookup_name(*ksymname);
167
168 /* Check for malformed request: (2), (1) and (5) */
169 if ((!input_string) ||
170 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
171 (*addr == 0))
172 return -EINVAL;;
173
174 ret = ksym_trace_get_access_type(input_string);
175
176 return ret;
177}
178
179int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
180{
181 struct trace_ksym *entry;
182 int ret = -ENOMEM;
183
184 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
185 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
186 " new requests for tracing can be accepted now.\n",
187 KSYM_TRACER_MAX);
188 return -ENOSPC;
189 }
190
191 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
192 if (!entry)
193 return -ENOMEM;
194
195 entry->type = op;
196 entry->ksym_addr = addr;
197 entry->len = HW_BREAKPOINT_LEN_4;
198
199 ret = -EAGAIN;
200 entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
201 entry->len, entry->type,
202 ksym_hbp_handler, true);
203 if (IS_ERR(entry->ksym_hbp)) {
204 entry->ksym_hbp = NULL;
205 ret = PTR_ERR(entry->ksym_hbp);
206 }
207
208 if (!entry->ksym_hbp) {
209 printk(KERN_INFO "ksym_tracer request failed. Try again"
210 " later!!\n");
211 goto err;
212 }
213
214 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
215 ksym_filter_entry_count++;
216
217 return 0;
218
219err:
220 kfree(entry);
221
222 return ret;
223}
224
225static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
226 size_t count, loff_t *ppos)
227{
228 struct trace_ksym *entry;
229 struct hlist_node *node;
230 struct trace_seq *s;
231 ssize_t cnt = 0;
232 int ret;
233
234 s = kmalloc(sizeof(*s), GFP_KERNEL);
235 if (!s)
236 return -ENOMEM;
237 trace_seq_init(s);
238
239 mutex_lock(&ksym_tracer_mutex);
240
241 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
242 ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
243 if (entry->type == HW_BREAKPOINT_R)
244 ret = trace_seq_puts(s, "r--\n");
245 else if (entry->type == HW_BREAKPOINT_W)
246 ret = trace_seq_puts(s, "-w-\n");
247 else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
248 ret = trace_seq_puts(s, "rw-\n");
249 WARN_ON_ONCE(!ret);
250 }
251
252 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
253
254 mutex_unlock(&ksym_tracer_mutex);
255
256 kfree(s);
257
258 return cnt;
259}
260
261static void __ksym_trace_reset(void)
262{
263 struct trace_ksym *entry;
264 struct hlist_node *node, *node1;
265
266 mutex_lock(&ksym_tracer_mutex);
267 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
268 ksym_hlist) {
269 unregister_wide_hw_breakpoint(entry->ksym_hbp);
270 ksym_filter_entry_count--;
271 hlist_del_rcu(&(entry->ksym_hlist));
272 synchronize_rcu();
273 kfree(entry);
274 }
275 mutex_unlock(&ksym_tracer_mutex);
276}
277
278static ssize_t ksym_trace_filter_write(struct file *file,
279 const char __user *buffer,
280 size_t count, loff_t *ppos)
281{
282 struct trace_ksym *entry;
283 struct hlist_node *node;
284 char *input_string, *ksymname = NULL;
285 unsigned long ksym_addr = 0;
286 int ret, op, changed = 0;
287
288 input_string = kzalloc(count + 1, GFP_KERNEL);
289 if (!input_string)
290 return -ENOMEM;
291
292 if (copy_from_user(input_string, buffer, count)) {
293 kfree(input_string);
294 return -EFAULT;
295 }
296 input_string[count] = '\0';
297
298 strstrip(input_string);
299
300 /*
301 * Clear all breakpoints if:
302 * 1: echo > ksym_trace_filter
303 * 2: echo 0 > ksym_trace_filter
304 * 3: echo "*:---" > ksym_trace_filter
305 */
306 if (!input_string[0] || !strcmp(input_string, "0") ||
307 !strcmp(input_string, "*:---")) {
308 __ksym_trace_reset();
309 kfree(input_string);
310 return count;
311 }
312
313 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
314 if (ret < 0) {
315 kfree(input_string);
316 return ret;
317 }
318
319 mutex_lock(&ksym_tracer_mutex);
320
321 ret = -EINVAL;
322 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
323 if (entry->ksym_addr == ksym_addr) {
324 /* Check for malformed request: (6) */
325 if (entry->type != op)
326 changed = 1;
327 else
328 goto out;
329 break;
330 }
331 }
332 if (changed) {
333 unregister_wide_hw_breakpoint(entry->ksym_hbp);
334 entry->type = op;
335 if (op > 0) {
336 entry->ksym_hbp =
337 register_wide_hw_breakpoint(entry->ksym_addr,
338 entry->len, entry->type,
339 ksym_hbp_handler, true);
340 if (IS_ERR(entry->ksym_hbp))
341 entry->ksym_hbp = NULL;
342 if (!entry->ksym_hbp)
343 goto out;
344 }
345 ksym_filter_entry_count--;
346 hlist_del_rcu(&(entry->ksym_hlist));
347 synchronize_rcu();
348 kfree(entry);
349 ret = 0;
350 goto out;
351 } else {
352 /* Check for malformed request: (4) */
353 if (op == 0)
354 goto out;
355 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
356 }
357out:
358 mutex_unlock(&ksym_tracer_mutex);
359
360 kfree(input_string);
361
362 if (!ret)
363 ret = count;
364 return ret;
365}
366
367static const struct file_operations ksym_tracing_fops = {
368 .open = tracing_open_generic,
369 .read = ksym_trace_filter_read,
370 .write = ksym_trace_filter_write,
371};
372
373static void ksym_trace_reset(struct trace_array *tr)
374{
375 ksym_tracing_enabled = 0;
376 __ksym_trace_reset();
377}
378
379static int ksym_trace_init(struct trace_array *tr)
380{
381 int cpu, ret = 0;
382
383 for_each_online_cpu(cpu)
384 tracing_reset(tr, cpu);
385 ksym_tracing_enabled = 1;
386 ksym_trace_array = tr;
387
388 return ret;
389}
390
391static void ksym_trace_print_header(struct seq_file *m)
392{
393 seq_puts(m,
394 "# TASK-PID CPU# Symbol "
395 "Type Function\n");
396 seq_puts(m,
397 "# | | | "
398 " | |\n");
399}
400
401static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
402{
403 struct trace_entry *entry = iter->ent;
404 struct trace_seq *s = &iter->seq;
405 struct ksym_trace_entry *field;
406 char str[KSYM_SYMBOL_LEN];
407 int ret;
408
409 if (entry->type != TRACE_KSYM)
410 return TRACE_TYPE_UNHANDLED;
411
412 trace_assign_type(field, entry);
413
414 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
415 entry->pid, iter->cpu, (char *)field->addr);
416 if (!ret)
417 return TRACE_TYPE_PARTIAL_LINE;
418
419 switch (field->type) {
420 case HW_BREAKPOINT_R:
421 ret = trace_seq_printf(s, " R ");
422 break;
423 case HW_BREAKPOINT_W:
424 ret = trace_seq_printf(s, " W ");
425 break;
426 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
427 ret = trace_seq_printf(s, " RW ");
428 break;
429 default:
430 return TRACE_TYPE_PARTIAL_LINE;
431 }
432
433 if (!ret)
434 return TRACE_TYPE_PARTIAL_LINE;
435
436 sprint_symbol(str, field->ip);
437 ret = trace_seq_printf(s, "%s\n", str);
438 if (!ret)
439 return TRACE_TYPE_PARTIAL_LINE;
440
441 return TRACE_TYPE_HANDLED;
442}
443
444struct tracer ksym_tracer __read_mostly =
445{
446 .name = "ksym_tracer",
447 .init = ksym_trace_init,
448 .reset = ksym_trace_reset,
449#ifdef CONFIG_FTRACE_SELFTEST
450 .selftest = trace_selftest_startup_ksym,
451#endif
452 .print_header = ksym_trace_print_header,
453 .print_line = ksym_trace_output
454};
455
456__init static int init_ksym_trace(void)
457{
458 struct dentry *d_tracer;
459 struct dentry *entry;
460
461 d_tracer = tracing_init_dentry();
462 ksym_filter_entry_count = 0;
463
464 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
465 NULL, &ksym_tracing_fops);
466 if (!entry)
467 pr_warning("Could not create debugfs "
468 "'ksym_trace_filter' file\n");
469
470 return register_tracer(&ksym_tracer);
471}
472device_initcall(init_ksym_trace);
473
474
475#ifdef CONFIG_PROFILE_KSYM_TRACER
476static int ksym_tracer_stat_headers(struct seq_file *m)
477{
478 seq_puts(m, " Access Type ");
479 seq_puts(m, " Symbol Counter\n");
480 seq_puts(m, " ----------- ");
481 seq_puts(m, " ------ -------\n");
482 return 0;
483}
484
485static int ksym_tracer_stat_show(struct seq_file *m, void *v)
486{
487 struct hlist_node *stat = v;
488 struct trace_ksym *entry;
489 int access_type = 0;
490 char fn_name[KSYM_NAME_LEN];
491
492 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
493
494 access_type = entry->type;
495
496 switch (access_type) {
497 case HW_BREAKPOINT_R:
498 seq_puts(m, " R ");
499 break;
500 case HW_BREAKPOINT_W:
501 seq_puts(m, " W ");
502 break;
503 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
504 seq_puts(m, " RW ");
505 break;
506 default:
507 seq_puts(m, " NA ");
508 }
509
510 if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
511 seq_printf(m, " %-36s", fn_name);
512 else
513 seq_printf(m, " %-36s", "<NA>");
514 seq_printf(m, " %15lu\n", entry->counter);
515
516 return 0;
517}
518
519static void *ksym_tracer_stat_start(struct tracer_stat *trace)
520{
521 return ksym_filter_head.first;
522}
523
524static void *
525ksym_tracer_stat_next(void *v, int idx)
526{
527 struct hlist_node *stat = v;
528
529 return stat->next;
530}
531
532static struct tracer_stat ksym_tracer_stats = {
533 .name = "ksym_tracer",
534 .stat_start = ksym_tracer_stat_start,
535 .stat_next = ksym_tracer_stat_next,
536 .stat_headers = ksym_tracer_stat_headers,
537 .stat_show = ksym_tracer_stat_show
538};
539
540__init static int ksym_tracer_stat_init(void)
541{
542 int ret;
543
544 ret = register_stat_tracer(&ksym_tracer_stats);
545 if (ret) {
546 printk(KERN_WARNING "Warning: could not register "
547 "ksym tracer stats\n");
548 return 1;
549 }
550
551 return 0;
552}
553fs_initcall(ksym_tracer_stat_init);
554#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..dc98309e839a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/samples/Kconfig b/samples/Kconfig
index b92bde3c6a89..e4be84ac3d38 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
40 default m 40 default m
41 depends on SAMPLE_KPROBES && KRETPROBES 41 depends on SAMPLE_KPROBES && KRETPROBES
42 42
43config SAMPLE_HW_BREAKPOINT
44 tristate "Build kernel hardware breakpoint examples -- loadable module only"
45 depends on HAVE_HW_BREAKPOINT && m
46 help
47 This builds kernel hardware breakpoint example modules.
48
43endif # SAMPLES 49endif # SAMPLES
44 50
diff --git a/samples/Makefile b/samples/Makefile
index 43343a03b1f4..0f15e6d77fd6 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ 3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
4 hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 000000000000..0f5c31c2fc47
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 000000000000..5bc9819a819e
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,88 @@
1/*
2 * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * usage: insmod data_breakpoint.ko ksym=<ksym_name>
19 *
20 * This file is a kernel module that places a breakpoint over ksym_name kernel
21 * variable using Hardware Breakpoint register. The corresponding handler which
22 * prints a backtrace is invoked everytime a write operation is performed on
23 * that variable.
24 *
25 * Copyright (C) IBM Corporation, 2009
26 */
27#include <linux/module.h> /* Needed by all modules */
28#include <linux/kernel.h> /* Needed for KERN_INFO */
29#include <linux/init.h> /* Needed for the macros */
30#include <linux/kallsyms.h>
31
32#include <linux/perf_event.h>
33#include <linux/hw_breakpoint.h>
34
35struct perf_event **sample_hbp;
36
37static char ksym_name[KSYM_NAME_LEN] = "pid_max";
38module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
39MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
40 " write operations on the kernel symbol");
41
42static void sample_hbp_handler(struct perf_event *temp, void *data)
43{
44 printk(KERN_INFO "%s value is changed\n", ksym_name);
45 dump_stack();
46 printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
47}
48
49static int __init hw_break_module_init(void)
50{
51 int ret;
52 unsigned long addr;
53
54 addr = kallsyms_lookup_name(ksym_name);
55
56 sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4,
57 HW_BREAKPOINT_W | HW_BREAKPOINT_R,
58 sample_hbp_handler, true);
59 if (IS_ERR(sample_hbp)) {
60 ret = PTR_ERR(sample_hbp);
61 goto fail;
62 } else if (!sample_hbp) {
63 ret = -EINVAL;
64 goto fail;
65 }
66
67 printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
68
69 return 0;
70
71fail:
72 printk(KERN_INFO "Breakpoint registration failed\n");
73
74 return ret;
75}
76
77static void __exit hw_break_module_exit(void)
78{
79 unregister_wide_hw_breakpoint(sample_hbp);
80 printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
81}
82
83module_init(hw_break_module_init);
84module_exit(hw_break_module_exit);
85
86MODULE_LICENSE("GPL");
87MODULE_AUTHOR("K.Prasad");
88MODULE_DESCRIPTION("ksym breakpoint");