aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/a.out-core.h8
-rw-r--r--arch/x86/include/asm/debugreg.h29
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h55
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c391
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c28
-rw-r--r--arch/x86/kernel/process_64.c31
-rw-r--r--arch/x86/kernel/ptrace.c231
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c24
-rw-r--r--include/asm-generic/hw_breakpoint.h139
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/hw_breakpoint.c378
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h11
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_ksym.c551
-rw-r--r--kernel/trace/trace_selftest.c53
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile3
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c83
35 files changed, 2019 insertions, 199 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bbc261a..acb664397945 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -126,4 +126,8 @@ config HAVE_DMA_API_DEBUG
126config HAVE_DEFAULT_NO_SPIN_MUTEXES 126config HAVE_DEFAULT_NO_SPIN_MUTEXES
127 bool 127 bool
128 128
129config HAVE_HW_BREAKPOINT
130 bool
131
132
129source "kernel/gcov/Kconfig" 133source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 07e01149e3bf..1b7c74350a04 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
49 select HAVE_KERNEL_GZIP 49 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 50 select HAVE_KERNEL_BZIP2
51 select HAVE_KERNEL_LZMA 51 select HAVE_KERNEL_LZMA
52 select HAVE_HW_BREAKPOINT
52 select HAVE_ARCH_KMEMCHECK 53 select HAVE_ARCH_KMEMCHECK
53 54
54config OUTPUT_FORMAT 55config OUTPUT_FORMAT
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..fc4685dd6e4d 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
32 >> PAGE_SHIFT; 32 >> PAGE_SHIFT;
33 dump->u_dsize -= dump->u_tsize; 33 dump->u_dsize -= dump->u_tsize;
34 dump->u_ssize = 0; 34 dump->u_ssize = 0;
35 dump->u_debugreg[0] = current->thread.debugreg0; 35 dump->u_debugreg[0] = current->thread.debugreg[0];
36 dump->u_debugreg[1] = current->thread.debugreg1; 36 dump->u_debugreg[1] = current->thread.debugreg[1];
37 dump->u_debugreg[2] = current->thread.debugreg2; 37 dump->u_debugreg[2] = current->thread.debugreg[2];
38 dump->u_debugreg[3] = current->thread.debugreg3; 38 dump->u_debugreg[3] = current->thread.debugreg[3];
39 dump->u_debugreg[4] = 0; 39 dump->u_debugreg[4] = 0;
40 dump->u_debugreg[5] = 0; 40 dump->u_debugreg[5] = 0;
41 dump->u_debugreg[6] = current->thread.debugreg6; 41 dump->u_debugreg[6] = current->thread.debugreg6;
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..23439fbb1d0e 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
18#define DR_TRAP1 (0x2) /* db1 */ 18#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 19#define DR_TRAP2 (0x4) /* db2 */
20#define DR_TRAP3 (0x8) /* db3 */ 20#define DR_TRAP3 (0x8) /* db3 */
21#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
21 22
22#define DR_STEP (0x4000) /* single-step */ 23#define DR_STEP (0x4000) /* single-step */
23#define DR_SWITCH (0x8000) /* task switch */ 24#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
49 50
50#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ 51#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
51#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ 52#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
53#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
54#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
52#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ 55#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
53 56
54#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ 57#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,30 @@
67#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ 70#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
68#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ 71#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
69 72
73/*
74 * HW breakpoint additions
75 */
76#ifdef __KERNEL__
77
78/* For process management */
79extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
80extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
81 struct task_struct *child, unsigned long clone_flags);
82
83/* For CPU management */
84extern void load_debug_registers(void);
85static inline void hw_breakpoint_disable(void)
86{
87 /* Zero the control register for HW Breakpoint */
88 set_debugreg(0UL, 7);
89
90 /* Zero-out the individual HW breakpoint address registers */
91 set_debugreg(0UL, 0);
92 set_debugreg(0UL, 1);
93 set_debugreg(0UL, 2);
94 set_debugreg(0UL, 3);
95}
96
97#endif /* __KERNEL__ */
98
70#endif /* _ASM_X86_DEBUGREG_H */ 99#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..1acb4d45de70
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,55 @@
1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H
3
4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H
6
7struct arch_hw_breakpoint {
8 char *name; /* Contains name of the symbol to set bkpt */
9 unsigned long address;
10 u8 len;
11 u8 type;
12};
13
14#include <linux/kdebug.h>
15#include <asm-generic/hw_breakpoint.h>
16
17/* Available HW breakpoint length encodings */
18#define HW_BREAKPOINT_LEN_1 0x40
19#define HW_BREAKPOINT_LEN_2 0x44
20#define HW_BREAKPOINT_LEN_4 0x4c
21#define HW_BREAKPOINT_LEN_EXECUTE 0x40
22
23#ifdef CONFIG_X86_64
24#define HW_BREAKPOINT_LEN_8 0x48
25#endif
26
27/* Available HW breakpoint type encodings */
28
29/* trigger on instruction execute */
30#define HW_BREAKPOINT_EXECUTE 0x80
31/* trigger on memory write */
32#define HW_BREAKPOINT_WRITE 0x81
33/* trigger on memory read or write */
34#define HW_BREAKPOINT_RW 0x83
35
36/* Total number of available HW breakpoint registers */
37#define HBP_NUM 4
38
39extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
40DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
41extern unsigned int hbp_user_refcount[HBP_NUM];
42
43extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
44extern void arch_uninstall_thread_hw_breakpoint(void);
45extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
46extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
47 struct task_struct *tsk);
48extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
49extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
50extern void arch_update_kernel_hw_breakpoint(void *);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data);
53#endif /* __KERNEL__ */
54#endif /* _I386_HW_BREAKPOINT_H */
55
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c3429e8b2424..61aafb71c7ef 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@ struct mm_struct;
30#include <linux/math64.h> 30#include <linux/math64.h>
31#include <linux/init.h> 31#include <linux/init.h>
32 32
33#define HBP_NUM 4
33/* 34/*
34 * Default implementation of macro that returns current 35 * Default implementation of macro that returns current
35 * instruction pointer ("program counter"). 36 * instruction pointer ("program counter").
@@ -444,12 +445,11 @@ struct thread_struct {
444#endif 445#endif
445 unsigned long gs; 446 unsigned long gs;
446 /* Hardware debugging registers: */ 447 /* Hardware debugging registers: */
447 unsigned long debugreg0; 448 unsigned long debugreg[HBP_NUM];
448 unsigned long debugreg1;
449 unsigned long debugreg2;
450 unsigned long debugreg3;
451 unsigned long debugreg6; 449 unsigned long debugreg6;
452 unsigned long debugreg7; 450 unsigned long debugreg7;
451 /* Hardware breakpoint info */
452 struct hw_breakpoint *hbp[HBP_NUM];
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0cdd678..4f2e66e29ecc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
41obj-y += bootflag.o e820.o 41obj-y += bootflag.o e820.o
42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
43obj-y += alternative.o i8253.o pci-nommu.o 43obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
44obj-y += tsc.o io_delay.o rtc.o 44obj-y += tsc.o io_delay.o rtc.o
45 45
46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..9316a9de4de3
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,391 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation
18 */
19
20/*
21 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
22 * using the CPU's debug registers.
23 */
24
25#include <linux/irqflags.h>
26#include <linux/notifier.h>
27#include <linux/kallsyms.h>
28#include <linux/kprobes.h>
29#include <linux/percpu.h>
30#include <linux/kdebug.h>
31#include <linux/kernel.h>
32#include <linux/module.h>
33#include <linux/sched.h>
34#include <linux/init.h>
35#include <linux/smp.h>
36
37#include <asm/hw_breakpoint.h>
38#include <asm/processor.h>
39#include <asm/debugreg.h>
40
41/* Unmasked kernel DR7 value */
42static unsigned long kdr7;
43
44/*
45 * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
46 * Used to clear and verify the status of bits corresponding to DR0 - DR3
47 */
48static const unsigned long dr7_masks[HBP_NUM] = {
49 0x000f0003, /* LEN0, R/W0, G0, L0 */
50 0x00f0000c, /* LEN1, R/W1, G1, L1 */
51 0x0f000030, /* LEN2, R/W2, G2, L2 */
52 0xf00000c0 /* LEN3, R/W3, G3, L3 */
53};
54
55
56/*
57 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
58 * as stored in debug register 7.
59 */
60static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
61{
62 unsigned long bp_info;
63
64 bp_info = (len | type) & 0xf;
65 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
66 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
67 DR_GLOBAL_SLOWDOWN;
68 return bp_info;
69}
70
71void arch_update_kernel_hw_breakpoint(void *unused)
72{
73 struct hw_breakpoint *bp;
74 int i, cpu = get_cpu();
75 unsigned long temp_kdr7 = 0;
76
77 /* Don't allow debug exceptions while we update the registers */
78 set_debugreg(0UL, 7);
79
80 for (i = hbp_kernel_pos; i < HBP_NUM; i++) {
81 per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
82 if (bp) {
83 temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
84 set_debugreg(bp->info.address, i);
85 }
86 }
87
88 /* No need to set DR6. Update the debug registers with kernel-space
89 * breakpoint values from kdr7 and user-space requests from the
90 * current process
91 */
92 kdr7 = temp_kdr7;
93 set_debugreg(kdr7 | current->thread.debugreg7, 7);
94 put_cpu();
95}
96
97/*
98 * Install the thread breakpoints in their debug registers.
99 */
100void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
101{
102 struct thread_struct *thread = &(tsk->thread);
103
104 switch (hbp_kernel_pos) {
105 case 4:
106 set_debugreg(thread->debugreg[3], 3);
107 case 3:
108 set_debugreg(thread->debugreg[2], 2);
109 case 2:
110 set_debugreg(thread->debugreg[1], 1);
111 case 1:
112 set_debugreg(thread->debugreg[0], 0);
113 default:
114 break;
115 }
116
117 /* No need to set DR6 */
118 set_debugreg((kdr7 | thread->debugreg7), 7);
119}
120
121/*
122 * Install the debug register values for just the kernel, no thread.
123 */
124void arch_uninstall_thread_hw_breakpoint(void)
125{
126 /* Clear the user-space portion of debugreg7 by setting only kdr7 */
127 set_debugreg(kdr7, 7);
128
129}
130
131static int get_hbp_len(u8 hbp_len)
132{
133 unsigned int len_in_bytes = 0;
134
135 switch (hbp_len) {
136 case HW_BREAKPOINT_LEN_1:
137 len_in_bytes = 1;
138 break;
139 case HW_BREAKPOINT_LEN_2:
140 len_in_bytes = 2;
141 break;
142 case HW_BREAKPOINT_LEN_4:
143 len_in_bytes = 4;
144 break;
145#ifdef CONFIG_X86_64
146 case HW_BREAKPOINT_LEN_8:
147 len_in_bytes = 8;
148 break;
149#endif
150 }
151 return len_in_bytes;
152}
153
154/*
155 * Check for virtual address in user space.
156 */
157int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
158{
159 unsigned int len;
160
161 len = get_hbp_len(hbp_len);
162
163 return (va <= TASK_SIZE - len);
164}
165
166/*
167 * Check for virtual address in kernel space.
168 */
169static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
170{
171 unsigned int len;
172
173 len = get_hbp_len(hbp_len);
174
175 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
176}
177
178/*
179 * Store a breakpoint's encoded address, length, and type.
180 */
181static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
182{
183 /*
184 * User-space requests will always have the address field populated
185 * Symbol names from user-space are rejected
186 */
187 if (tsk && bp->info.name)
188 return -EINVAL;
189 /*
190 * For kernel-addresses, either the address or symbol name can be
191 * specified.
192 */
193 if (bp->info.name)
194 bp->info.address = (unsigned long)
195 kallsyms_lookup_name(bp->info.name);
196 if (bp->info.address)
197 return 0;
198 return -EINVAL;
199}
200
201/*
202 * Validate the arch-specific HW Breakpoint register settings
203 */
204int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
205 struct task_struct *tsk)
206{
207 unsigned int align;
208 int ret = -EINVAL;
209
210 switch (bp->info.type) {
211 /*
212 * Ptrace-refactoring code
213 * For now, we'll allow instruction breakpoint only for user-space
214 * addresses
215 */
216 case HW_BREAKPOINT_EXECUTE:
217 if ((!arch_check_va_in_userspace(bp->info.address,
218 bp->info.len)) &&
219 bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
220 return ret;
221 break;
222 case HW_BREAKPOINT_WRITE:
223 break;
224 case HW_BREAKPOINT_RW:
225 break;
226 default:
227 return ret;
228 }
229
230 switch (bp->info.len) {
231 case HW_BREAKPOINT_LEN_1:
232 align = 0;
233 break;
234 case HW_BREAKPOINT_LEN_2:
235 align = 1;
236 break;
237 case HW_BREAKPOINT_LEN_4:
238 align = 3;
239 break;
240#ifdef CONFIG_X86_64
241 case HW_BREAKPOINT_LEN_8:
242 align = 7;
243 break;
244#endif
245 default:
246 return ret;
247 }
248
249 if (bp->triggered)
250 ret = arch_store_info(bp, tsk);
251
252 if (ret < 0)
253 return ret;
254 /*
255 * Check that the low-order bits of the address are appropriate
256 * for the alignment implied by len.
257 */
258 if (bp->info.address & align)
259 return -EINVAL;
260
261 /* Check that the virtual address is in the proper range */
262 if (tsk) {
263 if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
264 return -EFAULT;
265 } else {
266 if (!arch_check_va_in_kernelspace(bp->info.address,
267 bp->info.len))
268 return -EFAULT;
269 }
270 return 0;
271}
272
273void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
274{
275 struct thread_struct *thread = &(tsk->thread);
276 struct hw_breakpoint *bp = thread->hbp[pos];
277
278 thread->debugreg7 &= ~dr7_masks[pos];
279 if (bp) {
280 thread->debugreg[pos] = bp->info.address;
281 thread->debugreg7 |= encode_dr7(pos, bp->info.len,
282 bp->info.type);
283 } else
284 thread->debugreg[pos] = 0;
285}
286
287void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
288{
289 int i;
290 struct thread_struct *thread = &(tsk->thread);
291
292 thread->debugreg7 = 0;
293 for (i = 0; i < HBP_NUM; i++)
294 thread->debugreg[i] = 0;
295}
296
297/*
298 * Handle debug exception notifications.
299 *
300 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
301 *
302 * NOTIFY_DONE returned if one of the following conditions is true.
303 * i) When the causative address is from user-space and the exception
304 * is a valid one, i.e. not triggered as a result of lazy debug register
305 * switching
306 * ii) When there are more bits than trap<n> set in DR6 register (such
307 * as BD, BS or BT) indicating that more than one debug condition is
308 * met and requires some more action in do_debug().
309 *
310 * NOTIFY_STOP returned for all other cases
311 *
312 */
313static int __kprobes hw_breakpoint_handler(struct die_args *args)
314{
315 int i, cpu, rc = NOTIFY_STOP;
316 struct hw_breakpoint *bp;
317 unsigned long dr7, dr6;
318 unsigned long *dr6_p;
319
320 /* The DR6 value is pointed by args->err */
321 dr6_p = (unsigned long *)ERR_PTR(args->err);
322 dr6 = *dr6_p;
323
324 /* Do an early return if no trap bits are set in DR6 */
325 if ((dr6 & DR_TRAP_BITS) == 0)
326 return NOTIFY_DONE;
327
328 /* Lazy debug register switching */
329 if (!test_tsk_thread_flag(current, TIF_DEBUG))
330 arch_uninstall_thread_hw_breakpoint();
331
332 get_debugreg(dr7, 7);
333 /* Disable breakpoints during exception handling */
334 set_debugreg(0UL, 7);
335 /*
336 * Assert that local interrupts are disabled
337 * Reset the DRn bits in the virtualized register value.
338 * The ptrace trigger routine will add in whatever is needed.
339 */
340 current->thread.debugreg6 &= ~DR_TRAP_BITS;
341 cpu = get_cpu();
342
343 /* Handle all the breakpoints that were triggered */
344 for (i = 0; i < HBP_NUM; ++i) {
345 if (likely(!(dr6 & (DR_TRAP0 << i))))
346 continue;
347 /*
348 * Find the corresponding hw_breakpoint structure and
349 * invoke its triggered callback.
350 */
351 if (i >= hbp_kernel_pos)
352 bp = per_cpu(this_hbp_kernel[i], cpu);
353 else {
354 bp = current->thread.hbp[i];
355 if (bp)
356 rc = NOTIFY_DONE;
357 }
358 /*
359 * Reset the 'i'th TRAP bit in dr6 to denote completion of
360 * exception handling
361 */
362 (*dr6_p) &= ~(DR_TRAP0 << i);
363 /*
364 * bp can be NULL due to lazy debug register switching
365 * or due to the delay between updates of hbp_kernel_pos
366 * and this_hbp_kernel.
367 */
368 if (!bp)
369 continue;
370
371 (bp->triggered)(bp, args->regs);
372 }
373 if (dr6 & (~DR_TRAP_BITS))
374 rc = NOTIFY_DONE;
375
376 set_debugreg(dr7, 7);
377 put_cpu();
378 return rc;
379}
380
381/*
382 * Handle debug exception notifications.
383 */
384int __kprobes hw_breakpoint_exceptions_notify(
385 struct notifier_block *unused, unsigned long val, void *data)
386{
387 if (val != DIE_DEBUG)
388 return NOTIFY_DONE;
389
390 return hw_breakpoint_handler(data);
391}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..34e86b67550c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45 45
46#include <asm/debugreg.h>
46#include <asm/apicdef.h> 47#include <asm/apicdef.h>
47#include <asm/system.h> 48#include <asm/system.h>
48 49
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
434 "resuming...\n"); 435 "resuming...\n");
435 kgdb_arch_handle_exception(args->trapnr, args->signr, 436 kgdb_arch_handle_exception(args->trapnr, args->signr,
436 args->err, "c", "", regs); 437 args->err, "c", "", regs);
438 /*
439 * Reset the BS bit in dr6 (pointed by args->err) to
440 * denote completion of processing
441 */
442 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
437 443
438 return NOTIFY_STOP; 444 return NOTIFY_STOP;
439} 445}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b000..b5b1848c5336 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -54,6 +54,7 @@
54#include <asm/pgtable.h> 54#include <asm/pgtable.h>
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/alternative.h> 56#include <asm/alternative.h>
57#include <asm/debugreg.h>
57 58
58void jprobe_return_end(void); 59void jprobe_return_end(void);
59 60
@@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
967 ret = NOTIFY_STOP; 968 ret = NOTIFY_STOP;
968 break; 969 break;
969 case DIE_DEBUG: 970 case DIE_DEBUG:
970 if (post_kprobe_handler(args->regs)) 971 if (post_kprobe_handler(args->regs)) {
972 /*
973 * Reset the BS bit in dr6 (pointed by args->err) to
974 * denote completion of processing
975 */
976 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
971 ret = NOTIFY_STOP; 977 ret = NOTIFY_STOP;
978 }
972 break; 979 break;
973 case DIE_GPF: 980 case DIE_GPF:
974 /* 981 /*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h> 26#include <asm/system.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debugreg.h>
28 29
29static void set_idt(void *newidt, __u16 limit) 30static void set_idt(void *newidt, __u16 limit)
30{ 31{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
202 203
203 /* Interrupts aren't acceptable while we reboot */ 204 /* Interrupts aren't acceptable while we reboot */
204 local_irq_disable(); 205 local_irq_disable();
206 hw_breakpoint_disable();
205 207
206 if (image->preserve_context) { 208 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 209#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/debugreg.h>
21 22
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd, 23static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr) 24 unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
282 283
283 /* Interrupts aren't acceptable while we reboot */ 284 /* Interrupts aren't acceptable while we reboot */
284 local_irq_disable(); 285 local_irq_disable();
286 hw_breakpoint_disable();
285 287
286 if (image->preserve_context) { 288 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC 289#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b5776..2275ce5776de 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/i387.h> 18#include <asm/i387.h>
19#include <asm/ds.h> 19#include <asm/ds.h>
20#include <asm/debugreg.h>
21#include <asm/hw_breakpoint.h>
20 22
21unsigned long idle_halt; 23unsigned long idle_halt;
22EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -45,6 +47,8 @@ void free_thread_xstate(struct task_struct *tsk)
45 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 47 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
46 tsk->thread.xstate = NULL; 48 tsk->thread.xstate = NULL;
47 } 49 }
50 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
51 flush_thread_hw_breakpoint(tsk);
48 52
49 WARN(tsk->thread.ds_ctx, "leaking DS context\n"); 53 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
50} 54}
@@ -105,12 +109,8 @@ void flush_thread(void)
105 109
106 clear_tsk_thread_flag(tsk, TIF_DEBUG); 110 clear_tsk_thread_flag(tsk, TIF_DEBUG);
107 111
108 tsk->thread.debugreg0 = 0; 112 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
109 tsk->thread.debugreg1 = 0; 113 flush_thread_hw_breakpoint(tsk);
110 tsk->thread.debugreg2 = 0;
111 tsk->thread.debugreg3 = 0;
112 tsk->thread.debugreg6 = 0;
113 tsk->thread.debugreg7 = 0;
114 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 114 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
115 /* 115 /*
116 * Forget coprocessor state.. 116 * Forget coprocessor state..
@@ -192,16 +192,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
192 else if (next->debugctlmsr != prev->debugctlmsr) 192 else if (next->debugctlmsr != prev->debugctlmsr)
193 update_debugctlmsr(next->debugctlmsr); 193 update_debugctlmsr(next->debugctlmsr);
194 194
195 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
196 set_debugreg(next->debugreg0, 0);
197 set_debugreg(next->debugreg1, 1);
198 set_debugreg(next->debugreg2, 2);
199 set_debugreg(next->debugreg3, 3);
200 /* no 4 and 5 */
201 set_debugreg(next->debugreg6, 6);
202 set_debugreg(next->debugreg7, 7);
203 }
204
205 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 195 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
206 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 196 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
207 /* prev and next are different */ 197 /* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf79567cdab..209e74801763 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
58#include <asm/idle.h> 58#include <asm/idle.h>
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h>
62#include <asm/hw_breakpoint.h>
61 63
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 65
@@ -259,7 +261,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
259 261
260 task_user_gs(p) = get_user_gs(regs); 262 task_user_gs(p) = get_user_gs(regs);
261 263
264 p->thread.io_bitmap_ptr = NULL;
262 tsk = current; 265 tsk = current;
266 err = -ENOMEM;
267 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
268 if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
269 goto out;
270
263 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 271 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
264 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 272 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
265 IO_BITMAP_BYTES, GFP_KERNEL); 273 IO_BITMAP_BYTES, GFP_KERNEL);
@@ -279,10 +287,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
279 err = do_set_thread_area(p, -1, 287 err = do_set_thread_area(p, -1,
280 (struct user_desc __user *)childregs->si, 0); 288 (struct user_desc __user *)childregs->si, 0);
281 289
290out:
282 if (err && p->thread.io_bitmap_ptr) { 291 if (err && p->thread.io_bitmap_ptr) {
283 kfree(p->thread.io_bitmap_ptr); 292 kfree(p->thread.io_bitmap_ptr);
284 p->thread.io_bitmap_max = 0; 293 p->thread.io_bitmap_max = 0;
285 } 294 }
295 if (err)
296 flush_thread_hw_breakpoint(p);
286 297
287 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); 298 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
288 p->thread.ds_ctx = NULL; 299 p->thread.ds_ctx = NULL;
@@ -426,6 +437,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
426 lazy_load_gs(next->gs); 437 lazy_load_gs(next->gs);
427 438
428 percpu_write(current_task, next_p); 439 percpu_write(current_task, next_p);
440 /*
441 * There's a problem with moving the arch_install_thread_hw_breakpoint()
442 * call before current is updated. Suppose a kernel breakpoint is
443 * triggered in between the two, the hw-breakpoint handler will see that
444 * the 'current' task does not have TIF_DEBUG flag set and will think it
445 * is leftover from an old task (lazy switching) and will erase it. Then
446 * until the next context switch, no user-breakpoints will be installed.
447 *
448 * The real problem is that it's impossible to update both current and
449 * physical debug registers at the same instant, so there will always be
450 * a window in which they disagree and a breakpoint might get triggered.
451 * Since we use lazy switching, we are forced to assume that a
452 * disagreement means that current is correct and the exception is due
453 * to lazy debug register switching.
454 */
455 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
456 arch_install_thread_hw_breakpoint(next_p);
429 457
430 return prev_p; 458 return prev_p;
431} 459}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b683170..72edac026a78 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
52#include <asm/idle.h> 52#include <asm/idle.h>
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h>
56#include <asm/hw_breakpoint.h>
55 57
56asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
57 59
@@ -242,6 +244,8 @@ void release_thread(struct task_struct *dead_task)
242 BUG(); 244 BUG();
243 } 245 }
244 } 246 }
247 if (unlikely(dead_task->thread.debugreg7))
248 flush_thread_hw_breakpoint(dead_task);
245} 249}
246 250
247static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 251static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -297,12 +301,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
297 301
298 p->thread.fs = me->thread.fs; 302 p->thread.fs = me->thread.fs;
299 p->thread.gs = me->thread.gs; 303 p->thread.gs = me->thread.gs;
304 p->thread.io_bitmap_ptr = NULL;
300 305
301 savesegment(gs, p->thread.gsindex); 306 savesegment(gs, p->thread.gsindex);
302 savesegment(fs, p->thread.fsindex); 307 savesegment(fs, p->thread.fsindex);
303 savesegment(es, p->thread.es); 308 savesegment(es, p->thread.es);
304 savesegment(ds, p->thread.ds); 309 savesegment(ds, p->thread.ds);
305 310
311 err = -ENOMEM;
312 if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
313 if (copy_thread_hw_breakpoint(me, p, clone_flags))
314 goto out;
315
306 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 316 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
307 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 317 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
308 if (!p->thread.io_bitmap_ptr) { 318 if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +351,9 @@ out:
341 kfree(p->thread.io_bitmap_ptr); 351 kfree(p->thread.io_bitmap_ptr);
342 p->thread.io_bitmap_max = 0; 352 p->thread.io_bitmap_max = 0;
343 } 353 }
354 if (err)
355 flush_thread_hw_breakpoint(p);
356
344 return err; 357 return err;
345} 358}
346 359
@@ -495,6 +508,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
495 */ 508 */
496 if (preload_fpu) 509 if (preload_fpu)
497 __math_state_restore(); 510 __math_state_restore();
511 /*
512 * There's a problem with moving the arch_install_thread_hw_breakpoint()
513 * call before current is updated. Suppose a kernel breakpoint is
514 * triggered in between the two, the hw-breakpoint handler will see that
515 * the 'current' task does not have TIF_DEBUG flag set and will think it
516 * is leftover from an old task (lazy switching) and will erase it. Then
517 * until the next context switch, no user-breakpoints will be installed.
518 *
519 * The real problem is that it's impossible to update both current and
520 * physical debug registers at the same instant, so there will always be
521 * a window in which they disagree and a breakpoint might get triggered.
522 * Since we use lazy switching, we are forced to assume that a
523 * disagreement means that current is correct and the exception is due
524 * to lazy debug register switching.
525 */
526 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
527 arch_install_thread_hw_breakpoint(next_p);
528
498 return prev_p; 529 return prev_p;
499} 530}
500 531
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7b058a2dc66a..267cb85b479c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -34,6 +34,7 @@
34#include <asm/prctl.h> 34#include <asm/prctl.h>
35#include <asm/proto.h> 35#include <asm/proto.h>
36#include <asm/ds.h> 36#include <asm/ds.h>
37#include <asm/hw_breakpoint.h>
37 38
38#include "tls.h" 39#include "tls.h"
39 40
@@ -137,11 +138,6 @@ static int set_segment_reg(struct task_struct *task,
137 return 0; 138 return 0;
138} 139}
139 140
140static unsigned long debugreg_addr_limit(struct task_struct *task)
141{
142 return TASK_SIZE - 3;
143}
144
145#else /* CONFIG_X86_64 */ 141#else /* CONFIG_X86_64 */
146 142
147#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 143#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +262,6 @@ static int set_segment_reg(struct task_struct *task,
266 return 0; 262 return 0;
267} 263}
268 264
269static unsigned long debugreg_addr_limit(struct task_struct *task)
270{
271#ifdef CONFIG_IA32_EMULATION
272 if (test_tsk_thread_flag(task, TIF_IA32))
273 return IA32_PAGE_OFFSET - 3;
274#endif
275 return TASK_SIZE_MAX - 7;
276}
277
278#endif /* CONFIG_X86_32 */ 265#endif /* CONFIG_X86_32 */
279 266
280static unsigned long get_flags(struct task_struct *task) 267static unsigned long get_flags(struct task_struct *task)
@@ -455,95 +442,159 @@ static int genregs_set(struct task_struct *target,
455} 442}
456 443
457/* 444/*
458 * This function is trivial and will be inlined by the compiler. 445 * Decode the length and type bits for a particular breakpoint as
459 * Having it separates the implementation details of debug 446 * stored in debug register 7. Return the "enabled" status.
460 * registers from the interface details of ptrace.
461 */ 447 */
462static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 448static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
449 unsigned *type)
463{ 450{
464 switch (n) { 451 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
465 case 0: return child->thread.debugreg0; 452
466 case 1: return child->thread.debugreg1; 453 *len = (bp_info & 0xc) | 0x40;
467 case 2: return child->thread.debugreg2; 454 *type = (bp_info & 0x3) | 0x80;
468 case 3: return child->thread.debugreg3; 455 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
469 case 6: return child->thread.debugreg6;
470 case 7: return child->thread.debugreg7;
471 }
472 return 0;
473} 456}
474 457
475static int ptrace_set_debugreg(struct task_struct *child, 458static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
476 int n, unsigned long data)
477{ 459{
460 struct thread_struct *thread = &(current->thread);
478 int i; 461 int i;
479 462
480 if (unlikely(n == 4 || n == 5)) 463 /*
481 return -EIO; 464 * Store in the virtual DR6 register the fact that the breakpoint
465 * was hit so the thread's debugger will see it.
466 */
467 for (i = 0; i < hbp_kernel_pos; i++)
468 /*
469 * We will check bp->info.address against the address stored in
470 * thread's hbp structure and not debugreg[i]. This is to ensure
471 * that the corresponding bit for 'i' in DR7 register is enabled
472 */
473 if (bp->info.address == thread->hbp[i]->info.address)
474 break;
482 475
483 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 476 thread->debugreg6 |= (DR_TRAP0 << i);
484 return -EIO; 477}
485 478
486 switch (n) { 479/*
487 case 0: child->thread.debugreg0 = data; break; 480 * Handle ptrace writes to debug register 7.
488 case 1: child->thread.debugreg1 = data; break; 481 */
489 case 2: child->thread.debugreg2 = data; break; 482static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
490 case 3: child->thread.debugreg3 = data; break; 483{
484 struct thread_struct *thread = &(tsk->thread);
485 unsigned long old_dr7 = thread->debugreg7;
486 int i, orig_ret = 0, rc = 0;
487 int enabled, second_pass = 0;
488 unsigned len, type;
489 struct hw_breakpoint *bp;
490
491 data &= ~DR_CONTROL_RESERVED;
492restore:
493 /*
494 * Loop through all the hardware breakpoints, making the
495 * appropriate changes to each.
496 */
497 for (i = 0; i < HBP_NUM; i++) {
498 enabled = decode_dr7(data, i, &len, &type);
499 bp = thread->hbp[i];
500
501 if (!enabled) {
502 if (bp) {
503 /* Don't unregister the breakpoints right-away,
504 * unless all register_user_hw_breakpoint()
505 * requests have succeeded. This prevents
506 * any window of opportunity for debug
507 * register grabbing by other users.
508 */
509 if (!second_pass)
510 continue;
511 unregister_user_hw_breakpoint(tsk, bp);
512 kfree(bp);
513 }
514 continue;
515 }
516 if (!bp) {
517 rc = -ENOMEM;
518 bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
519 if (bp) {
520 bp->info.address = thread->debugreg[i];
521 bp->triggered = ptrace_triggered;
522 bp->info.len = len;
523 bp->info.type = type;
524 rc = register_user_hw_breakpoint(tsk, bp);
525 if (rc)
526 kfree(bp);
527 }
528 } else
529 rc = modify_user_hw_breakpoint(tsk, bp);
530 if (rc)
531 break;
532 }
533 /*
534 * Make a second pass to free the remaining unused breakpoints
535 * or to restore the original breakpoints if an error occurred.
536 */
537 if (!second_pass) {
538 second_pass = 1;
539 if (rc < 0) {
540 orig_ret = rc;
541 data = old_dr7;
542 }
543 goto restore;
544 }
545 return ((orig_ret < 0) ? orig_ret : rc);
546}
491 547
492 case 6: 548/*
493 if ((data & ~0xffffffffUL) != 0) 549 * Handle PTRACE_PEEKUSR calls for the debug register area.
494 return -EIO; 550 */
495 child->thread.debugreg6 = data; 551static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
496 break; 552{
553 struct thread_struct *thread = &(tsk->thread);
554 unsigned long val = 0;
555
556 if (n < HBP_NUM)
557 val = thread->debugreg[n];
558 else if (n == 6)
559 val = thread->debugreg6;
560 else if (n == 7)
561 val = thread->debugreg7;
562 return val;
563}
497 564
498 case 7: 565/*
499 /* 566 * Handle PTRACE_POKEUSR calls for the debug register area.
500 * Sanity-check data. Take one half-byte at once with 567 */
501 * check = (val >> (16 + 4*i)) & 0xf. It contains the 568int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
502 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits 569{
503 * 2 and 3 are LENi. Given a list of invalid values, 570 struct thread_struct *thread = &(tsk->thread);
504 * we do mask |= 1 << invalid_value, so that 571 int rc = 0;
505 * (mask >> check) & 1 is a correct test for invalid 572
506 * values. 573 /* There are no DR4 or DR5 registers */
507 * 574 if (n == 4 || n == 5)
508 * R/Wi contains the type of the breakpoint / 575 return -EIO;
509 * watchpoint, LENi contains the length of the watched 576
510 * data in the watchpoint case. 577 if (n == 6) {
511 * 578 tsk->thread.debugreg6 = val;
512 * The invalid values are: 579 goto ret_path;
513 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
514 * - R/Wi == 0x10 (break on I/O reads or writes), so
515 * mask |= 0x4444.
516 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
517 * 0x1110.
518 *
519 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
520 *
521 * See the Intel Manual "System Programming Guide",
522 * 15.2.4
523 *
524 * Note that LENi == 0x10 is defined on x86_64 in long
525 * mode (i.e. even for 32-bit userspace software, but
526 * 64-bit kernel), so the x86_64 mask value is 0x5454.
527 * See the AMD manual no. 24593 (AMD64 System Programming)
528 */
529#ifdef CONFIG_X86_32
530#define DR7_MASK 0x5f54
531#else
532#define DR7_MASK 0x5554
533#endif
534 data &= ~DR_CONTROL_RESERVED;
535 for (i = 0; i < 4; i++)
536 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
537 return -EIO;
538 child->thread.debugreg7 = data;
539 if (data)
540 set_tsk_thread_flag(child, TIF_DEBUG);
541 else
542 clear_tsk_thread_flag(child, TIF_DEBUG);
543 break;
544 } 580 }
581 if (n < HBP_NUM) {
582 if (thread->hbp[n]) {
583 if (arch_check_va_in_userspace(val,
584 thread->hbp[n]->info.len) == 0) {
585 rc = -EIO;
586 goto ret_path;
587 }
588 thread->hbp[n]->info.address = val;
589 }
590 thread->debugreg[n] = val;
591 }
592 /* All that's left is DR7 */
593 if (n == 7)
594 rc = ptrace_write_dr7(tsk, val);
545 595
546 return 0; 596ret_path:
597 return rc;
547} 598}
548 599
549/* 600/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055ad..fbf3b07c8567 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
799 799
800 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 800 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
801 if (signr > 0) { 801 if (signr > 0) {
802 /*
803 * Re-enable any watchpoints before delivering the
804 * signal to user space. The processor register will
805 * have been cleared if the watchpoint triggered
806 * inside the kernel.
807 */
808 if (current->thread.debugreg7)
809 set_debugreg(current->thread.debugreg7, 7);
810
811 /* Whee! Actually deliver the signal. */ 802 /* Whee! Actually deliver the signal. */
812 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { 803 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
813 /* 804 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 565ebc65920e..213a7a3e4562 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,6 +64,7 @@
64#include <asm/apic.h> 64#include <asm/apic.h>
65#include <asm/setup.h> 65#include <asm/setup.h>
66#include <asm/uv/uv.h> 66#include <asm/uv/uv.h>
67#include <asm/debugreg.h>
67#include <linux/mc146818rtc.h> 68#include <linux/mc146818rtc.h>
68 69
69#include <asm/smpboot_hooks.h> 70#include <asm/smpboot_hooks.h>
@@ -327,6 +328,7 @@ notrace static void __cpuinit start_secondary(void *unused)
327 x86_cpuinit.setup_percpu_clockev(); 328 x86_cpuinit.setup_percpu_clockev();
328 329
329 wmb(); 330 wmb();
331 load_debug_registers();
330 cpu_idle(); 332 cpu_idle();
331} 333}
332 334
@@ -1267,6 +1269,7 @@ void cpu_disable_common(void)
1267 remove_cpu_from_maps(cpu); 1269 remove_cpu_from_maps(cpu);
1268 unlock_vector_lock(); 1270 unlock_vector_lock();
1269 fixup_irqs(); 1271 fixup_irqs();
1272 hw_breakpoint_disable();
1270} 1273}
1271 1274
1272int native_cpu_disable(void) 1275int native_cpu_disable(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dcee0cc3..33399176512a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
530{ 530{
531 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
532 unsigned long condition; 532 unsigned long dr6;
533 int si_code; 533 int si_code;
534 534
535 get_debugreg(condition, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Catch kmemcheck conditions first of all! */ 537 /* Catch kmemcheck conditions first of all! */
538 if (condition & DR_STEP && kmemcheck_trap(regs)) 538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 539 return;
540 540
541 /* DR6 may or may not be cleared by the CPU */
542 set_debugreg(0, 6);
541 /* 543 /*
542 * The processor cleared BTF, so don't mark that we need it set. 544 * The processor cleared BTF, so don't mark that we need it set.
543 */ 545 */
544 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 546 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
545 tsk->thread.debugctlmsr = 0; 547 tsk->thread.debugctlmsr = 0;
546 548
547 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 549 /* Store the virtualized DR6 value */
548 SIGTRAP) == NOTIFY_STOP) 550 tsk->thread.debugreg6 = dr6;
551
552 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
553 SIGTRAP) == NOTIFY_STOP)
549 return; 554 return;
550 555
551 /* It's safe to allow irq's after DR6 has been saved */ 556 /* It's safe to allow irq's after DR6 has been saved */
552 preempt_conditional_sti(regs); 557 preempt_conditional_sti(regs);
553 558
554 /* Mask out spurious debug traps due to lazy DR7 setting */ 559 if (regs->flags & X86_VM_MASK) {
555 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 560 handle_vm86_trap((struct kernel_vm86_regs *) regs,
556 if (!tsk->thread.debugreg7) 561 error_code, 1);
557 goto clear_dr7; 562 return;
558 } 563 }
559 564
560#ifdef CONFIG_X86_32
561 if (regs->flags & X86_VM_MASK)
562 goto debug_vm86;
563#endif
564
565 /* Save debug status register where ptrace can see it */
566 tsk->thread.debugreg6 = condition;
567
568 /* 565 /*
569 * Single-stepping through TF: make sure we ignore any events in 566 * Single-stepping through system calls: ignore any exceptions in
570 * kernel space (but re-enable TF when returning to user mode). 567 * kernel space, but re-enable TF when returning to user mode.
568 *
569 * We already checked v86 mode above, so we can check for kernel mode
570 * by just checking the CPL of CS.
571 */ 571 */
572 if (condition & DR_STEP) { 572 if ((dr6 & DR_STEP) && !user_mode(regs)) {
573 if (!user_mode(regs)) 573 tsk->thread.debugreg6 &= ~DR_STEP;
574 goto clear_TF_reenable; 574 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
575 regs->flags &= ~X86_EFLAGS_TF;
575 } 576 }
576 577 si_code = get_si_code(tsk->thread.debugreg6);
577 si_code = get_si_code(condition); 578 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
578 /* Ok, finally something we can handle */ 579 send_sigtrap(tsk, regs, error_code, si_code);
579 send_sigtrap(tsk, regs, error_code, si_code);
580
581 /*
582 * Disable additional traps. They'll be re-enabled when
583 * the signal is delivered.
584 */
585clear_dr7:
586 set_debugreg(0, 7);
587 preempt_conditional_cli(regs); 580 preempt_conditional_cli(regs);
588 return;
589 581
590#ifdef CONFIG_X86_32
591debug_vm86:
592 /* reenable preemption: handle_vm86_trap() might sleep */
593 dec_preempt_count();
594 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
595 conditional_cli(regs);
596 return;
597#endif
598
599clear_TF_reenable:
600 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
601 regs->flags &= ~X86_EFLAGS_TF;
602 preempt_conditional_cli(regs);
603 return; 582 return;
604} 583}
605 584
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b9695322f56..fc2974adf9b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3644,10 +3644,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3644 kvm_x86_ops->run(vcpu, kvm_run); 3644 kvm_x86_ops->run(vcpu, kvm_run);
3645 3645
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
3647 set_debugreg(current->thread.debugreg0, 0); 3647 set_debugreg(current->thread.debugreg[0], 0);
3648 set_debugreg(current->thread.debugreg1, 1); 3648 set_debugreg(current->thread.debugreg[1], 1);
3649 set_debugreg(current->thread.debugreg2, 2); 3649 set_debugreg(current->thread.debugreg[2], 2);
3650 set_debugreg(current->thread.debugreg3, 3); 3650 set_debugreg(current->thread.debugreg[3], 3);
3651 set_debugreg(current->thread.debugreg6, 6); 3651 set_debugreg(current->thread.debugreg6, 6);
3652 set_debugreg(current->thread.debugreg7, 7); 3652 set_debugreg(current->thread.debugreg7, 7);
3653 } 3653 }
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..11a4ad4d6253 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 541
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 542 if (val == DIE_DEBUG && (arg->err & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 543 if (post_kmmio_handler(arg->err, arg->regs) == 1) {
544 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing
547 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
544 return NOTIFY_STOP; 549 return NOTIFY_STOP;
550 }
545 551
546 return NOTIFY_DONE; 552 return NOTIFY_DONE;
547} 553}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 8aa85f17667e..e09a44fc4664 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/xcr.h> 19#include <asm/xcr.h>
20#include <asm/suspend.h> 20#include <asm/suspend.h>
21#include <asm/debugreg.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23static struct saved_context saved_context; 24static struct saved_context saved_context;
@@ -104,6 +105,7 @@ static void __save_processor_state(struct saved_context *ctxt)
104 ctxt->cr4 = read_cr4(); 105 ctxt->cr4 = read_cr4();
105 ctxt->cr8 = read_cr8(); 106 ctxt->cr8 = read_cr8();
106#endif 107#endif
108 hw_breakpoint_disable();
107} 109}
108 110
109/* Needed by apm.c */ 111/* Needed by apm.c */
@@ -146,27 +148,7 @@ static void fix_processor_context(void)
146 /* 148 /*
147 * Now maybe reload the debug registers 149 * Now maybe reload the debug registers
148 */ 150 */
149 if (current->thread.debugreg7) { 151 load_debug_registers();
150#ifdef CONFIG_X86_32
151 set_debugreg(current->thread.debugreg0, 0);
152 set_debugreg(current->thread.debugreg1, 1);
153 set_debugreg(current->thread.debugreg2, 2);
154 set_debugreg(current->thread.debugreg3, 3);
155 /* no 4 and 5 */
156 set_debugreg(current->thread.debugreg6, 6);
157 set_debugreg(current->thread.debugreg7, 7);
158#else
159 /* CONFIG_X86_64 */
160 loaddebug(&current->thread, 0);
161 loaddebug(&current->thread, 1);
162 loaddebug(&current->thread, 2);
163 loaddebug(&current->thread, 3);
164 /* no 4 and 5 */
165 loaddebug(&current->thread, 6);
166 loaddebug(&current->thread, 7);
167#endif
168 }
169
170} 152}
171 153
172/** 154/**
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h
new file mode 100644
index 000000000000..9bf2d12eb74a
--- /dev/null
+++ b/include/asm-generic/hw_breakpoint.h
@@ -0,0 +1,139 @@
1#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
2#define _ASM_GENERIC_HW_BREAKPOINT_H
3
4#ifndef __ARCH_HW_BREAKPOINT_H
5#error "Please don't include this file directly"
6#endif
7
8#ifdef __KERNEL__
9#include <linux/list.h>
10#include <linux/types.h>
11#include <linux/kallsyms.h>
12
13/**
14 * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
15 * @triggered: callback invoked after target address access
16 * @info: arch-specific breakpoint info (address, length, and type)
17 *
18 * %hw_breakpoint structures are the kernel's way of representing
19 * hardware breakpoints. These are data breakpoints
20 * (also known as "watchpoints", triggered on data access), and the breakpoint's
21 * target address can be located in either kernel space or user space.
22 *
23 * The breakpoint's address, length, and type are highly
24 * architecture-specific. The values are encoded in the @info field; you
25 * specify them when registering the breakpoint. To examine the encoded
26 * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
27 * below.
28 *
29 * The address is specified as a regular kernel pointer (for kernel-space
30 * breakponts) or as an %__user pointer (for user-space breakpoints).
31 * With register_user_hw_breakpoint(), the address must refer to a
32 * location in user space. The breakpoint will be active only while the
33 * requested task is running. Conversely with
34 * register_kernel_hw_breakpoint(), the address must refer to a location
35 * in kernel space, and the breakpoint will be active on all CPUs
36 * regardless of the current task.
37 *
38 * The length is the breakpoint's extent in bytes, which is subject to
39 * certain limitations. include/asm/hw_breakpoint.h contains macros
40 * defining the available lengths for a specific architecture. Note that
41 * the address's alignment must match the length. The breakpoint will
42 * catch accesses to any byte in the range from address to address +
43 * (length - 1).
44 *
45 * The breakpoint's type indicates the sort of access that will cause it
46 * to trigger. Possible values may include:
47 *
48 * %HW_BREAKPOINT_RW (triggered on read or write access),
49 * %HW_BREAKPOINT_WRITE (triggered on write access), and
50 * %HW_BREAKPOINT_READ (triggered on read access).
51 *
52 * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
53 * possibilities are available on all architectures. Execute breakpoints
54 * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
55 *
56 * When a breakpoint gets hit, the @triggered callback is
57 * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
58 * processor registers.
59 * Data breakpoints occur after the memory access has taken place.
60 * Breakpoints are disabled during execution @triggered, to avoid
61 * recursive traps and allow unhindered access to breakpointed memory.
62 *
63 * This sample code sets a breakpoint on pid_max and registers a callback
64 * function for writes to that variable. Note that it is not portable
65 * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
66 *
67 * ----------------------------------------------------------------------
68 *
69 * #include <asm/hw_breakpoint.h>
70 *
71 * struct hw_breakpoint my_bp;
72 *
73 * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
74 * {
75 * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
76 * dump_stack();
77 * .......<more debugging output>........
78 * }
79 *
80 * static struct hw_breakpoint my_bp;
81 *
82 * static int init_module(void)
83 * {
84 * ..........<do anything>............
85 * my_bp.info.type = HW_BREAKPOINT_WRITE;
86 * my_bp.info.len = HW_BREAKPOINT_LEN_4;
87 *
88 * my_bp.installed = (void *)my_bp_installed;
89 *
90 * rc = register_kernel_hw_breakpoint(&my_bp);
91 * ..........<do anything>............
92 * }
93 *
94 * static void cleanup_module(void)
95 * {
96 * ..........<do anything>............
97 * unregister_kernel_hw_breakpoint(&my_bp);
98 * ..........<do anything>............
99 * }
100 *
101 * ----------------------------------------------------------------------
102 */
103struct hw_breakpoint {
104 void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
105 struct arch_hw_breakpoint info;
106};
107
108/*
109 * len and type values are defined in include/asm/hw_breakpoint.h.
110 * Available values vary according to the architecture. On i386 the
111 * possibilities are:
112 *
113 * HW_BREAKPOINT_LEN_1
114 * HW_BREAKPOINT_LEN_2
115 * HW_BREAKPOINT_LEN_4
116 * HW_BREAKPOINT_RW
117 * HW_BREAKPOINT_READ
118 *
119 * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
120 * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
121 * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
122 */
123
124extern int register_user_hw_breakpoint(struct task_struct *tsk,
125 struct hw_breakpoint *bp);
126extern int modify_user_hw_breakpoint(struct task_struct *tsk,
127 struct hw_breakpoint *bp);
128extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
129 struct hw_breakpoint *bp);
130/*
131 * Kernel breakpoints are not associated with any particular thread.
132 */
133extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
134extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
135
136extern unsigned int hbp_kernel_pos;
137
138#endif /* __KERNEL__ */
139#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..17b575ec7d07 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
95obj-$(CONFIG_SMP) += sched_cpupri.o 95obj-$(CONFIG_SMP) += sched_cpupri.o
96obj-$(CONFIG_SLOW_WORK) += slow-work.o 96obj-$(CONFIG_SLOW_WORK) += slow-work.o
97obj-$(CONFIG_PERF_EVENTS) += perf_event.o 97obj-$(CONFIG_PERF_EVENTS) += perf_event.o
98obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
98 99
99ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 100ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
100# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 101# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..c1f64e65a9f3
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,378 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 */
19
20/*
21 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
22 * using the CPU's debug registers.
23 * This file contains the arch-independent routines.
24 */
25
26#include <linux/irqflags.h>
27#include <linux/kallsyms.h>
28#include <linux/notifier.h>
29#include <linux/kprobes.h>
30#include <linux/kdebug.h>
31#include <linux/kernel.h>
32#include <linux/module.h>
33#include <linux/percpu.h>
34#include <linux/sched.h>
35#include <linux/init.h>
36#include <linux/smp.h>
37
38#include <asm/hw_breakpoint.h>
39#include <asm/processor.h>
40
41#ifdef CONFIG_X86
42#include <asm/debugreg.h>
43#endif
44/*
45 * Spinlock that protects all (un)register operations over kernel/user-space
46 * breakpoint requests
47 */
48static DEFINE_SPINLOCK(hw_breakpoint_lock);
49
50/* Array of kernel-space breakpoint structures */
51struct hw_breakpoint *hbp_kernel[HBP_NUM];
52
53/*
54 * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
55 * modified but we need the older copy to handle any hbp exceptions. It will
56 * sync with hbp_kernel[] value after updation is done through IPIs.
57 */
58DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
59
60/*
61 * Kernel breakpoints grow downwards, starting from HBP_NUM
62 * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
63 * kernel-space request. We will initialise it here and not in an __init
64 * routine because load_debug_registers(), which uses this variable can be
65 * called very early during CPU initialisation.
66 */
67unsigned int hbp_kernel_pos = HBP_NUM;
68
69/*
70 * An array containing refcount of threads using a given bkpt register
71 * Accesses are synchronised by acquiring hw_breakpoint_lock
72 */
73unsigned int hbp_user_refcount[HBP_NUM];
74
75/*
76 * Load the debug registers during startup of a CPU.
77 */
78void load_debug_registers(void)
79{
80 unsigned long flags;
81 struct task_struct *tsk = current;
82
83 spin_lock_bh(&hw_breakpoint_lock);
84
85 /* Prevent IPIs for new kernel breakpoint updates */
86 local_irq_save(flags);
87 arch_update_kernel_hw_breakpoint(NULL);
88 local_irq_restore(flags);
89
90 if (test_tsk_thread_flag(tsk, TIF_DEBUG))
91 arch_install_thread_hw_breakpoint(tsk);
92
93 spin_unlock_bh(&hw_breakpoint_lock);
94}
95
96/*
97 * Erase all the hardware breakpoint info associated with a thread.
98 *
99 * If tsk != current then tsk must not be usable (for example, a
100 * child being cleaned up from a failed fork).
101 */
102void flush_thread_hw_breakpoint(struct task_struct *tsk)
103{
104 int i;
105 struct thread_struct *thread = &(tsk->thread);
106
107 spin_lock_bh(&hw_breakpoint_lock);
108
109 /* The thread no longer has any breakpoints associated with it */
110 clear_tsk_thread_flag(tsk, TIF_DEBUG);
111 for (i = 0; i < HBP_NUM; i++) {
112 if (thread->hbp[i]) {
113 hbp_user_refcount[i]--;
114 kfree(thread->hbp[i]);
115 thread->hbp[i] = NULL;
116 }
117 }
118
119 arch_flush_thread_hw_breakpoint(tsk);
120
121 /* Actually uninstall the breakpoints if necessary */
122 if (tsk == current)
123 arch_uninstall_thread_hw_breakpoint();
124 spin_unlock_bh(&hw_breakpoint_lock);
125}
126
127/*
128 * Copy the hardware breakpoint info from a thread to its cloned child.
129 */
130int copy_thread_hw_breakpoint(struct task_struct *tsk,
131 struct task_struct *child, unsigned long clone_flags)
132{
133 /*
134 * We will assume that breakpoint settings are not inherited
135 * and the child starts out with no debug registers set.
136 * But what about CLONE_PTRACE?
137 */
138 clear_tsk_thread_flag(child, TIF_DEBUG);
139
140 /* We will call flush routine since the debugregs are not inherited */
141 arch_flush_thread_hw_breakpoint(child);
142
143 return 0;
144}
145
146static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
147 struct hw_breakpoint *bp)
148{
149 struct thread_struct *thread = &(tsk->thread);
150 int rc;
151
152 /* Do not overcommit. Fail if kernel has used the hbp registers */
153 if (pos >= hbp_kernel_pos)
154 return -ENOSPC;
155
156 rc = arch_validate_hwbkpt_settings(bp, tsk);
157 if (rc)
158 return rc;
159
160 thread->hbp[pos] = bp;
161 hbp_user_refcount[pos]++;
162
163 arch_update_user_hw_breakpoint(pos, tsk);
164 /*
165 * Does it need to be installed right now?
166 * Otherwise it will get installed the next time tsk runs
167 */
168 if (tsk == current)
169 arch_install_thread_hw_breakpoint(tsk);
170
171 return rc;
172}
173
174/*
175 * Modify the address of a hbp register already in use by the task
176 * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
177 */
178static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
179 struct hw_breakpoint *bp)
180{
181 struct thread_struct *thread = &(tsk->thread);
182
183 if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
184 return -EINVAL;
185
186 if (thread->hbp[pos] == NULL)
187 return -EINVAL;
188
189 thread->hbp[pos] = bp;
190 /*
191 * 'pos' must be that of a hbp register already used by 'tsk'
192 * Otherwise arch_modify_user_hw_breakpoint() will fail
193 */
194 arch_update_user_hw_breakpoint(pos, tsk);
195
196 if (tsk == current)
197 arch_install_thread_hw_breakpoint(tsk);
198
199 return 0;
200}
201
202static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
203{
204 hbp_user_refcount[pos]--;
205 tsk->thread.hbp[pos] = NULL;
206
207 arch_update_user_hw_breakpoint(pos, tsk);
208
209 if (tsk == current)
210 arch_install_thread_hw_breakpoint(tsk);
211}
212
213/**
214 * register_user_hw_breakpoint - register a hardware breakpoint for user space
215 * @tsk: pointer to 'task_struct' of the process to which the address belongs
216 * @bp: the breakpoint structure to register
217 *
218 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
219 * @bp->triggered must be set properly before invocation
220 *
221 */
222int register_user_hw_breakpoint(struct task_struct *tsk,
223 struct hw_breakpoint *bp)
224{
225 struct thread_struct *thread = &(tsk->thread);
226 int i, rc = -ENOSPC;
227
228 spin_lock_bh(&hw_breakpoint_lock);
229
230 for (i = 0; i < hbp_kernel_pos; i++) {
231 if (!thread->hbp[i]) {
232 rc = __register_user_hw_breakpoint(i, tsk, bp);
233 break;
234 }
235 }
236 if (!rc)
237 set_tsk_thread_flag(tsk, TIF_DEBUG);
238
239 spin_unlock_bh(&hw_breakpoint_lock);
240 return rc;
241}
242EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
243
244/**
245 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
246 * @tsk: pointer to 'task_struct' of the process to which the address belongs
247 * @bp: the breakpoint structure to unregister
248 *
249 */
250int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp)
251{
252 struct thread_struct *thread = &(tsk->thread);
253 int i, ret = -ENOENT;
254
255 spin_lock_bh(&hw_breakpoint_lock);
256 for (i = 0; i < hbp_kernel_pos; i++) {
257 if (bp == thread->hbp[i]) {
258 ret = __modify_user_hw_breakpoint(i, tsk, bp);
259 break;
260 }
261 }
262 spin_unlock_bh(&hw_breakpoint_lock);
263 return ret;
264}
265EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
266
267/**
268 * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
269 * @tsk: pointer to 'task_struct' of the process to which the address belongs
270 * @bp: the breakpoint structure to unregister
271 *
272 */
273void unregister_user_hw_breakpoint(struct task_struct *tsk,
274 struct hw_breakpoint *bp)
275{
276 struct thread_struct *thread = &(tsk->thread);
277 int i, pos = -1, hbp_counter = 0;
278
279 spin_lock_bh(&hw_breakpoint_lock);
280 for (i = 0; i < hbp_kernel_pos; i++) {
281 if (thread->hbp[i])
282 hbp_counter++;
283 if (bp == thread->hbp[i])
284 pos = i;
285 }
286 if (pos >= 0) {
287 __unregister_user_hw_breakpoint(pos, tsk);
288 hbp_counter--;
289 }
290 if (!hbp_counter)
291 clear_tsk_thread_flag(tsk, TIF_DEBUG);
292
293 spin_unlock_bh(&hw_breakpoint_lock);
294}
295EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
296
297/**
298 * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
299 * @bp: the breakpoint structure to register
300 *
301 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
302 * @bp->triggered must be set properly before invocation
303 *
304 */
305int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
306{
307 int rc;
308
309 rc = arch_validate_hwbkpt_settings(bp, NULL);
310 if (rc)
311 return rc;
312
313 spin_lock_bh(&hw_breakpoint_lock);
314
315 rc = -ENOSPC;
316 /* Check if we are over-committing */
317 if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
318 hbp_kernel_pos--;
319 hbp_kernel[hbp_kernel_pos] = bp;
320 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
321 rc = 0;
322 }
323
324 spin_unlock_bh(&hw_breakpoint_lock);
325 return rc;
326}
327EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
328
329/**
330 * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
331 * @bp: the breakpoint structure to unregister
332 *
333 * Uninstalls and unregisters @bp.
334 */
335void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
336{
337 int i, j;
338
339 spin_lock_bh(&hw_breakpoint_lock);
340
341 /* Find the 'bp' in our list of breakpoints for kernel */
342 for (i = hbp_kernel_pos; i < HBP_NUM; i++)
343 if (bp == hbp_kernel[i])
344 break;
345
346 /* Check if we did not find a match for 'bp'. If so return early */
347 if (i == HBP_NUM) {
348 spin_unlock_bh(&hw_breakpoint_lock);
349 return;
350 }
351
352 /*
353 * We'll shift the breakpoints one-level above to compact if
354 * unregistration creates a hole
355 */
356 for (j = i; j > hbp_kernel_pos; j--)
357 hbp_kernel[j] = hbp_kernel[j-1];
358
359 hbp_kernel[hbp_kernel_pos] = NULL;
360 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
361 hbp_kernel_pos++;
362
363 spin_unlock_bh(&hw_breakpoint_lock);
364}
365EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
366
367static struct notifier_block hw_breakpoint_exceptions_nb = {
368 .notifier_call = hw_breakpoint_exceptions_notify,
369 /* we need to be notified first */
370 .priority = 0x7fffffff
371};
372
373static int __init init_hw_breakpoint(void)
374{
375 return register_die_notifier(&hw_breakpoint_exceptions_nb);
376}
377
378core_initcall(init_hw_breakpoint);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..06c3d5be6759 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..0f84c52e58fe 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 57obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 58
58libftrace-y := ftrace.o 59libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4959ada9e0bb..91c3d0e9a5a1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,6 +15,10 @@
15#include <linux/trace_seq.h> 15#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 16#include <linux/ftrace_event.h>
17 17
18#ifdef CONFIG_KSYM_TRACER
19#include <asm/hw_breakpoint.h>
20#endif
21
18enum trace_type { 22enum trace_type {
19 __TRACE_FIRST_TYPE = 0, 23 __TRACE_FIRST_TYPE = 0,
20 24
@@ -37,6 +41,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 41 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 42 TRACE_KMEM_FREE,
39 TRACE_BLK, 43 TRACE_BLK,
44 TRACE_KSYM,
40 45
41 __TRACE_LAST_TYPE, 46 __TRACE_LAST_TYPE,
42}; 47};
@@ -209,6 +214,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 214 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 215 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 216 TRACE_KMEM_FREE); \
217 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 218 __ftrace_bad_type(); \
213 } while (0) 219 } while (0)
214 220
@@ -364,6 +370,9 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 370void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 371int is_tracing_stopped(void);
366 372
373#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
374extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
375
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 376extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 377
369#ifdef CONFIG_TRACER_MAX_TRACE 378#ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +447,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 447 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 448extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 449 struct trace_array *tr);
450extern int trace_selftest_startup_ksym(struct tracer *trace,
451 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 452#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 453
443extern void *head_page(struct trace_array_cpu *data); 454extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..e19747d4f860 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , ksym_name, KSYM_NAME_LEN )
376 __array( char , cmd, TASK_COMM_LEN )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %s cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 __entry->ksym_name, __entry->cmd)
382);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..6d5609c67378
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,551 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32/* For now, let us restrict the no. of symbols traced simultaneously to number
33 * of available hardware breakpoint registers.
34 */
35#define KSYM_TRACER_MAX HBP_NUM
36
37#define KSYM_TRACER_OP_LEN 3 /* rw- */
38
39struct trace_ksym {
40 struct hw_breakpoint *ksym_hbp;
41 unsigned long ksym_addr;
42#ifdef CONFIG_PROFILE_KSYM_TRACER
43 unsigned long counter;
44#endif
45 struct hlist_node ksym_hlist;
46};
47
48static struct trace_array *ksym_trace_array;
49
50static unsigned int ksym_filter_entry_count;
51static unsigned int ksym_tracing_enabled;
52
53static HLIST_HEAD(ksym_filter_head);
54
55static DEFINE_MUTEX(ksym_tracer_mutex);
56
57#ifdef CONFIG_PROFILE_KSYM_TRACER
58
59#define MAX_UL_INT 0xffffffff
60
61void ksym_collect_stats(unsigned long hbp_hit_addr)
62{
63 struct hlist_node *node;
64 struct trace_ksym *entry;
65
66 rcu_read_lock();
67 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
68 if ((entry->ksym_addr == hbp_hit_addr) &&
69 (entry->counter <= MAX_UL_INT)) {
70 entry->counter++;
71 break;
72 }
73 }
74 rcu_read_unlock();
75}
76#endif /* CONFIG_PROFILE_KSYM_TRACER */
77
78void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
79{
80 struct ring_buffer_event *event;
81 struct ksym_trace_entry *entry;
82 struct ring_buffer *buffer;
83 int pc;
84
85 if (!ksym_tracing_enabled)
86 return;
87
88 buffer = ksym_trace_array->buffer;
89
90 pc = preempt_count();
91
92 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
93 sizeof(*entry), 0, pc);
94 if (!event)
95 return;
96
97 entry = ring_buffer_event_data(event);
98 entry->ip = instruction_pointer(regs);
99 entry->type = hbp->info.type;
100 strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
101 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
102
103#ifdef CONFIG_PROFILE_KSYM_TRACER
104 ksym_collect_stats(hbp->info.address);
105#endif /* CONFIG_PROFILE_KSYM_TRACER */
106
107 trace_buffer_unlock_commit(buffer, event, 0, pc);
108}
109
110/* Valid access types are represented as
111 *
112 * rw- : Set Read/Write Access Breakpoint
113 * -w- : Set Write Access Breakpoint
114 * --- : Clear Breakpoints
115 * --x : Set Execution Break points (Not available yet)
116 *
117 */
118static int ksym_trace_get_access_type(char *str)
119{
120 int access = 0;
121
122 if (str[0] == 'r')
123 access += 4;
124 else if (str[0] != '-')
125 return -EINVAL;
126
127 if (str[1] == 'w')
128 access += 2;
129 else if (str[1] != '-')
130 return -EINVAL;
131
132 if (str[2] != '-')
133 return -EINVAL;
134
135 switch (access) {
136 case 6:
137 access = HW_BREAKPOINT_RW;
138 break;
139 case 4:
140 access = -EINVAL;
141 break;
142 case 2:
143 access = HW_BREAKPOINT_WRITE;
144 break;
145 }
146
147 return access;
148}
149
150/*
151 * There can be several possible malformed requests and we attempt to capture
152 * all of them. We enumerate some of the rules
153 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
154 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
155 * <module>:<ksym_name>:<op>.
156 * 2. No delimiter symbol ':' in the input string
157 * 3. Spurious operator symbols or symbols not in their respective positions
158 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
159 * 5. Kernel symbol not a part of /proc/kallsyms
160 * 6. Duplicate requests
161 */
162static int parse_ksym_trace_str(char *input_string, char **ksymname,
163 unsigned long *addr)
164{
165 int ret;
166
167 *ksymname = strsep(&input_string, ":");
168 *addr = kallsyms_lookup_name(*ksymname);
169
170 /* Check for malformed request: (2), (1) and (5) */
171 if ((!input_string) ||
172 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
173 (*addr == 0))
174 return -EINVAL;;
175
176 ret = ksym_trace_get_access_type(input_string);
177
178 return ret;
179}
180
181int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
182{
183 struct trace_ksym *entry;
184 int ret = -ENOMEM;
185
186 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
187 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
188 " new requests for tracing can be accepted now.\n",
189 KSYM_TRACER_MAX);
190 return -ENOSPC;
191 }
192
193 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
194 if (!entry)
195 return -ENOMEM;
196
197 entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
198 if (!entry->ksym_hbp)
199 goto err;
200
201 entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL);
202 if (!entry->ksym_hbp->info.name)
203 goto err;
204
205 entry->ksym_hbp->info.type = op;
206 entry->ksym_addr = entry->ksym_hbp->info.address = addr;
207#ifdef CONFIG_X86
208 entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
209#endif
210 entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
211
212 ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
213 if (ret < 0) {
214 printk(KERN_INFO "ksym_tracer request failed. Try again"
215 " later!!\n");
216 ret = -EAGAIN;
217 goto err;
218 }
219 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
220 ksym_filter_entry_count++;
221 return 0;
222err:
223 if (entry->ksym_hbp)
224 kfree(entry->ksym_hbp->info.name);
225 kfree(entry->ksym_hbp);
226 kfree(entry);
227 return ret;
228}
229
230static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
231 size_t count, loff_t *ppos)
232{
233 struct trace_ksym *entry;
234 struct hlist_node *node;
235 struct trace_seq *s;
236 ssize_t cnt = 0;
237 int ret;
238
239 s = kmalloc(sizeof(*s), GFP_KERNEL);
240 if (!s)
241 return -ENOMEM;
242 trace_seq_init(s);
243
244 mutex_lock(&ksym_tracer_mutex);
245
246 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
247 ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name);
248 if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
249 ret = trace_seq_puts(s, "-w-\n");
250 else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
251 ret = trace_seq_puts(s, "rw-\n");
252 WARN_ON_ONCE(!ret);
253 }
254
255 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
256
257 mutex_unlock(&ksym_tracer_mutex);
258
259 kfree(s);
260
261 return cnt;
262}
263
264static void __ksym_trace_reset(void)
265{
266 struct trace_ksym *entry;
267 struct hlist_node *node, *node1;
268
269 mutex_lock(&ksym_tracer_mutex);
270 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
271 ksym_hlist) {
272 unregister_kernel_hw_breakpoint(entry->ksym_hbp);
273 ksym_filter_entry_count--;
274 hlist_del_rcu(&(entry->ksym_hlist));
275 synchronize_rcu();
276 kfree(entry->ksym_hbp->info.name);
277 kfree(entry->ksym_hbp);
278 kfree(entry);
279 }
280 mutex_unlock(&ksym_tracer_mutex);
281}
282
283static ssize_t ksym_trace_filter_write(struct file *file,
284 const char __user *buffer,
285 size_t count, loff_t *ppos)
286{
287 struct trace_ksym *entry;
288 struct hlist_node *node;
289 char *input_string, *ksymname = NULL;
290 unsigned long ksym_addr = 0;
291 int ret, op, changed = 0;
292
293 input_string = kzalloc(count + 1, GFP_KERNEL);
294 if (!input_string)
295 return -ENOMEM;
296
297 if (copy_from_user(input_string, buffer, count)) {
298 kfree(input_string);
299 return -EFAULT;
300 }
301 input_string[count] = '\0';
302
303 strstrip(input_string);
304
305 /*
306 * Clear all breakpoints if:
307 * 1: echo > ksym_trace_filter
308 * 2: echo 0 > ksym_trace_filter
309 * 3: echo "*:---" > ksym_trace_filter
310 */
311 if (!input_string[0] || !strcmp(input_string, "0") ||
312 !strcmp(input_string, "*:---")) {
313 __ksym_trace_reset();
314 kfree(input_string);
315 return count;
316 }
317
318 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
319 if (ret < 0) {
320 kfree(input_string);
321 return ret;
322 }
323
324 mutex_lock(&ksym_tracer_mutex);
325
326 ret = -EINVAL;
327 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
328 if (entry->ksym_addr == ksym_addr) {
329 /* Check for malformed request: (6) */
330 if (entry->ksym_hbp->info.type != op)
331 changed = 1;
332 else
333 goto out;
334 break;
335 }
336 }
337 if (changed) {
338 unregister_kernel_hw_breakpoint(entry->ksym_hbp);
339 entry->ksym_hbp->info.type = op;
340 if (op > 0) {
341 ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
342 if (ret == 0)
343 goto out;
344 }
345 ksym_filter_entry_count--;
346 hlist_del_rcu(&(entry->ksym_hlist));
347 synchronize_rcu();
348 kfree(entry->ksym_hbp->info.name);
349 kfree(entry->ksym_hbp);
350 kfree(entry);
351 ret = 0;
352 goto out;
353 } else {
354 /* Check for malformed request: (4) */
355 if (op == 0)
356 goto out;
357 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
358 }
359out:
360 mutex_unlock(&ksym_tracer_mutex);
361
362 kfree(input_string);
363
364 if (!ret)
365 ret = count;
366 return ret;
367}
368
369static const struct file_operations ksym_tracing_fops = {
370 .open = tracing_open_generic,
371 .read = ksym_trace_filter_read,
372 .write = ksym_trace_filter_write,
373};
374
375static void ksym_trace_reset(struct trace_array *tr)
376{
377 ksym_tracing_enabled = 0;
378 __ksym_trace_reset();
379}
380
381static int ksym_trace_init(struct trace_array *tr)
382{
383 int cpu, ret = 0;
384
385 for_each_online_cpu(cpu)
386 tracing_reset(tr, cpu);
387 ksym_tracing_enabled = 1;
388 ksym_trace_array = tr;
389
390 return ret;
391}
392
393static void ksym_trace_print_header(struct seq_file *m)
394{
395 seq_puts(m,
396 "# TASK-PID CPU# Symbol "
397 "Type Function\n");
398 seq_puts(m,
399 "# | | | "
400 " | |\n");
401}
402
403static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
404{
405 struct trace_entry *entry = iter->ent;
406 struct trace_seq *s = &iter->seq;
407 struct ksym_trace_entry *field;
408 char str[KSYM_SYMBOL_LEN];
409 int ret;
410
411 if (entry->type != TRACE_KSYM)
412 return TRACE_TYPE_UNHANDLED;
413
414 trace_assign_type(field, entry);
415
416 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd,
417 entry->pid, iter->cpu, field->ksym_name);
418 if (!ret)
419 return TRACE_TYPE_PARTIAL_LINE;
420
421 switch (field->type) {
422 case HW_BREAKPOINT_WRITE:
423 ret = trace_seq_printf(s, " W ");
424 break;
425 case HW_BREAKPOINT_RW:
426 ret = trace_seq_printf(s, " RW ");
427 break;
428 default:
429 return TRACE_TYPE_PARTIAL_LINE;
430 }
431
432 if (!ret)
433 return TRACE_TYPE_PARTIAL_LINE;
434
435 sprint_symbol(str, field->ip);
436 ret = trace_seq_printf(s, "%s\n", str);
437 if (!ret)
438 return TRACE_TYPE_PARTIAL_LINE;
439
440 return TRACE_TYPE_HANDLED;
441}
442
443struct tracer ksym_tracer __read_mostly =
444{
445 .name = "ksym_tracer",
446 .init = ksym_trace_init,
447 .reset = ksym_trace_reset,
448#ifdef CONFIG_FTRACE_SELFTEST
449 .selftest = trace_selftest_startup_ksym,
450#endif
451 .print_header = ksym_trace_print_header,
452 .print_line = ksym_trace_output
453};
454
455__init static int init_ksym_trace(void)
456{
457 struct dentry *d_tracer;
458 struct dentry *entry;
459
460 d_tracer = tracing_init_dentry();
461 ksym_filter_entry_count = 0;
462
463 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
464 NULL, &ksym_tracing_fops);
465 if (!entry)
466 pr_warning("Could not create debugfs "
467 "'ksym_trace_filter' file\n");
468
469 return register_tracer(&ksym_tracer);
470}
471device_initcall(init_ksym_trace);
472
473
474#ifdef CONFIG_PROFILE_KSYM_TRACER
475static int ksym_tracer_stat_headers(struct seq_file *m)
476{
477 seq_puts(m, " Access Type ");
478 seq_puts(m, " Symbol Counter\n");
479 seq_puts(m, " ----------- ");
480 seq_puts(m, " ------ -------\n");
481 return 0;
482}
483
484static int ksym_tracer_stat_show(struct seq_file *m, void *v)
485{
486 struct hlist_node *stat = v;
487 struct trace_ksym *entry;
488 int access_type = 0;
489 char fn_name[KSYM_NAME_LEN];
490
491 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
492
493 if (entry->ksym_hbp)
494 access_type = entry->ksym_hbp->info.type;
495
496 switch (access_type) {
497 case HW_BREAKPOINT_WRITE:
498 seq_puts(m, " W ");
499 break;
500 case HW_BREAKPOINT_RW:
501 seq_puts(m, " RW ");
502 break;
503 default:
504 seq_puts(m, " NA ");
505 }
506
507 if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
508 seq_printf(m, " %-36s", fn_name);
509 else
510 seq_printf(m, " %-36s", "<NA>");
511 seq_printf(m, " %15lu\n", entry->counter);
512
513 return 0;
514}
515
516static void *ksym_tracer_stat_start(struct tracer_stat *trace)
517{
518 return ksym_filter_head.first;
519}
520
521static void *
522ksym_tracer_stat_next(void *v, int idx)
523{
524 struct hlist_node *stat = v;
525
526 return stat->next;
527}
528
529static struct tracer_stat ksym_tracer_stats = {
530 .name = "ksym_tracer",
531 .stat_start = ksym_tracer_stat_start,
532 .stat_next = ksym_tracer_stat_next,
533 .stat_headers = ksym_tracer_stat_headers,
534 .stat_show = ksym_tracer_stat_show
535};
536
537__init static int ksym_tracer_stat_init(void)
538{
539 int ret;
540
541 ret = register_stat_tracer(&ksym_tracer_stats);
542 if (ret) {
543 printk(KERN_WARNING "Warning: could not register "
544 "ksym tracer stats\n");
545 return 1;
546 }
547
548 return 0;
549}
550fs_initcall(ksym_tracer_stat_init);
551#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..7179c12e4f0f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,55 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
832 (unsigned long)(&ksym_selftest_dummy));
833
834 if (ret < 0) {
835 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
836 goto ret_path;
837 }
838 /* Perform a read and a write operation over the dummy variable to
839 * trigger the tracer
840 */
841 if (ksym_selftest_dummy == 0)
842 ksym_selftest_dummy++;
843
844 /* stop the tracing. */
845 tracing_stop();
846 /* check the trace buffer */
847 ret = trace_test_buffer(tr, &count);
848 trace->reset(tr);
849 tracing_start();
850
851 /* read & write operations - one each is performed on the dummy variable
852 * triggering two entries in the trace buffer
853 */
854 if (!ret && count != 2) {
855 printk(KERN_CONT "Ksym tracer startup test failed");
856 ret = -1;
857 }
858
859ret_path:
860 return ret;
861}
862#endif /* CONFIG_KSYM_TRACER */
863
diff --git a/samples/Kconfig b/samples/Kconfig
index b92bde3c6a89..e4be84ac3d38 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
40 default m 40 default m
41 depends on SAMPLE_KPROBES && KRETPROBES 41 depends on SAMPLE_KPROBES && KRETPROBES
42 42
43config SAMPLE_HW_BREAKPOINT
44 tristate "Build kernel hardware breakpoint examples -- loadable module only"
45 depends on HAVE_HW_BREAKPOINT && m
46 help
47 This builds kernel hardware breakpoint example modules.
48
43endif # SAMPLES 49endif # SAMPLES
44 50
diff --git a/samples/Makefile b/samples/Makefile
index 43343a03b1f4..0f15e6d77fd6 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ 3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
4 hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 000000000000..0f5c31c2fc47
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 000000000000..9cbdbb871b7a
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,83 @@
1/*
2 * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * usage: insmod data_breakpoint.ko ksym=<ksym_name>
19 *
20 * This file is a kernel module that places a breakpoint over ksym_name kernel
21 * variable using Hardware Breakpoint register. The corresponding handler which
22 * prints a backtrace is invoked everytime a write operation is performed on
23 * that variable.
24 *
25 * Copyright (C) IBM Corporation, 2009
26 */
27#include <linux/module.h> /* Needed by all modules */
28#include <linux/kernel.h> /* Needed for KERN_INFO */
29#include <linux/init.h> /* Needed for the macros */
30
31#include <asm/hw_breakpoint.h>
32
33struct hw_breakpoint sample_hbp;
34
35static char ksym_name[KSYM_NAME_LEN] = "pid_max";
36module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
37MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
38 " write operations on the kernel symbol");
39
40void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs
41 *temp_regs)
42{
43 printk(KERN_INFO "%s value is changed\n", ksym_name);
44 dump_stack();
45 printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
46}
47
48static int __init hw_break_module_init(void)
49{
50 int ret;
51
52#ifdef CONFIG_X86
53 sample_hbp.info.name = ksym_name;
54 sample_hbp.info.type = HW_BREAKPOINT_WRITE;
55 sample_hbp.info.len = HW_BREAKPOINT_LEN_4;
56#endif /* CONFIG_X86 */
57
58 sample_hbp.triggered = (void *)sample_hbp_handler;
59
60 ret = register_kernel_hw_breakpoint(&sample_hbp);
61
62 if (ret < 0) {
63 printk(KERN_INFO "Breakpoint registration failed\n");
64 return ret;
65 } else
66 printk(KERN_INFO "HW Breakpoint for %s write installed\n",
67 ksym_name);
68
69 return 0;
70}
71
72static void __exit hw_break_module_exit(void)
73{
74 unregister_kernel_hw_breakpoint(&sample_hbp);
75 printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
76}
77
78module_init(hw_break_module_init);
79module_exit(hw_break_module_exit);
80
81MODULE_LICENSE("GPL");
82MODULE_AUTHOR("K.Prasad");
83MODULE_DESCRIPTION("ksym breakpoint");