aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/a.out-core.h8
-rw-r--r--arch/x86/include/asm/debugreg.h29
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h55
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c391
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c28
-rw-r--r--arch/x86/kernel/process_64.c31
-rw-r--r--arch/x86/kernel/ptrace.c231
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c24
-rw-r--r--include/asm-generic/hw_breakpoint.h139
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/hw_breakpoint.c378
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h23
-rw-r--r--kernel/trace/trace_ksym.c525
-rw-r--r--kernel/trace/trace_selftest.c53
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile3
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c83
33 files changed, 1985 insertions, 195 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 78a35e9dc104..1adf2d0e6356 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -112,3 +112,7 @@ config HAVE_DMA_API_DEBUG
112 112
113config HAVE_DEFAULT_NO_SPIN_MUTEXES 113config HAVE_DEFAULT_NO_SPIN_MUTEXES
114 bool 114 bool
115
116config HAVE_HW_BREAKPOINT
117 bool
118
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cf42fc305419..52421d52f21e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -46,6 +46,7 @@ config X86
46 select HAVE_KERNEL_GZIP 46 select HAVE_KERNEL_GZIP
47 select HAVE_KERNEL_BZIP2 47 select HAVE_KERNEL_BZIP2
48 select HAVE_KERNEL_LZMA 48 select HAVE_KERNEL_LZMA
49 select HAVE_HW_BREAKPOINT
49 select HAVE_ARCH_KMEMCHECK 50 select HAVE_ARCH_KMEMCHECK
50 51
51config OUTPUT_FORMAT 52config OUTPUT_FORMAT
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..fc4685dd6e4d 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
32 >> PAGE_SHIFT; 32 >> PAGE_SHIFT;
33 dump->u_dsize -= dump->u_tsize; 33 dump->u_dsize -= dump->u_tsize;
34 dump->u_ssize = 0; 34 dump->u_ssize = 0;
35 dump->u_debugreg[0] = current->thread.debugreg0; 35 dump->u_debugreg[0] = current->thread.debugreg[0];
36 dump->u_debugreg[1] = current->thread.debugreg1; 36 dump->u_debugreg[1] = current->thread.debugreg[1];
37 dump->u_debugreg[2] = current->thread.debugreg2; 37 dump->u_debugreg[2] = current->thread.debugreg[2];
38 dump->u_debugreg[3] = current->thread.debugreg3; 38 dump->u_debugreg[3] = current->thread.debugreg[3];
39 dump->u_debugreg[4] = 0; 39 dump->u_debugreg[4] = 0;
40 dump->u_debugreg[5] = 0; 40 dump->u_debugreg[5] = 0;
41 dump->u_debugreg[6] = current->thread.debugreg6; 41 dump->u_debugreg[6] = current->thread.debugreg6;
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..23439fbb1d0e 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
18#define DR_TRAP1 (0x2) /* db1 */ 18#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 19#define DR_TRAP2 (0x4) /* db2 */
20#define DR_TRAP3 (0x8) /* db3 */ 20#define DR_TRAP3 (0x8) /* db3 */
21#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
21 22
22#define DR_STEP (0x4000) /* single-step */ 23#define DR_STEP (0x4000) /* single-step */
23#define DR_SWITCH (0x8000) /* task switch */ 24#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
49 50
50#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ 51#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
51#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ 52#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
53#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
54#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
52#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ 55#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
53 56
54#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ 57#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,30 @@
67#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ 70#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
68#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ 71#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
69 72
73/*
74 * HW breakpoint additions
75 */
76#ifdef __KERNEL__
77
78/* For process management */
79extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
80extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
81 struct task_struct *child, unsigned long clone_flags);
82
83/* For CPU management */
84extern void load_debug_registers(void);
85static inline void hw_breakpoint_disable(void)
86{
87 /* Zero the control register for HW Breakpoint */
88 set_debugreg(0UL, 7);
89
90 /* Zero-out the individual HW breakpoint address registers */
91 set_debugreg(0UL, 0);
92 set_debugreg(0UL, 1);
93 set_debugreg(0UL, 2);
94 set_debugreg(0UL, 3);
95}
96
97#endif /* __KERNEL__ */
98
70#endif /* _ASM_X86_DEBUGREG_H */ 99#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..1acb4d45de70
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,55 @@
1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H
3
4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H
6
7struct arch_hw_breakpoint {
8 char *name; /* Contains name of the symbol to set bkpt */
9 unsigned long address;
10 u8 len;
11 u8 type;
12};
13
14#include <linux/kdebug.h>
15#include <asm-generic/hw_breakpoint.h>
16
17/* Available HW breakpoint length encodings */
18#define HW_BREAKPOINT_LEN_1 0x40
19#define HW_BREAKPOINT_LEN_2 0x44
20#define HW_BREAKPOINT_LEN_4 0x4c
21#define HW_BREAKPOINT_LEN_EXECUTE 0x40
22
23#ifdef CONFIG_X86_64
24#define HW_BREAKPOINT_LEN_8 0x48
25#endif
26
27/* Available HW breakpoint type encodings */
28
29/* trigger on instruction execute */
30#define HW_BREAKPOINT_EXECUTE 0x80
31/* trigger on memory write */
32#define HW_BREAKPOINT_WRITE 0x81
33/* trigger on memory read or write */
34#define HW_BREAKPOINT_RW 0x83
35
36/* Total number of available HW breakpoint registers */
37#define HBP_NUM 4
38
39extern struct hw_breakpoint *hbp_kernel[HBP_NUM];
40DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
41extern unsigned int hbp_user_refcount[HBP_NUM];
42
43extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
44extern void arch_uninstall_thread_hw_breakpoint(void);
45extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
46extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
47 struct task_struct *tsk);
48extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
49extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
50extern void arch_update_kernel_hw_breakpoint(void *);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data);
53#endif /* __KERNEL__ */
54#endif /* _I386_HW_BREAKPOINT_H */
55
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7768269b1cf..2b03f700d3f2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -29,6 +29,7 @@ struct mm_struct;
29#include <linux/threads.h> 29#include <linux/threads.h>
30#include <linux/init.h> 30#include <linux/init.h>
31 31
32#define HBP_NUM 4
32/* 33/*
33 * Default implementation of macro that returns current 34 * Default implementation of macro that returns current
34 * instruction pointer ("program counter"). 35 * instruction pointer ("program counter").
@@ -433,12 +434,11 @@ struct thread_struct {
433#endif 434#endif
434 unsigned long gs; 435 unsigned long gs;
435 /* Hardware debugging registers: */ 436 /* Hardware debugging registers: */
436 unsigned long debugreg0; 437 unsigned long debugreg[HBP_NUM];
437 unsigned long debugreg1;
438 unsigned long debugreg2;
439 unsigned long debugreg3;
440 unsigned long debugreg6; 438 unsigned long debugreg6;
441 unsigned long debugreg7; 439 unsigned long debugreg7;
440 /* Hardware breakpoint info */
441 struct hw_breakpoint *hbp[HBP_NUM];
442 /* Fault info: */ 442 /* Fault info: */
443 unsigned long cr2; 443 unsigned long cr2;
444 unsigned long trap_no; 444 unsigned long trap_no;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f3477bb84566..b67efd1cf59b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
36obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 36obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
37obj-y += bootflag.o e820.o 37obj-y += bootflag.o e820.o
38obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 38obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
39obj-y += alternative.o i8253.o pci-nommu.o 39obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
40obj-y += tsc.o io_delay.o rtc.o 40obj-y += tsc.o io_delay.o rtc.o
41 41
42obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 42obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..51d959528b1d
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,391 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation
18 */
19
20/*
21 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
22 * using the CPU's debug registers.
23 */
24
25#include <linux/irqflags.h>
26#include <linux/notifier.h>
27#include <linux/kallsyms.h>
28#include <linux/kprobes.h>
29#include <linux/percpu.h>
30#include <linux/kdebug.h>
31#include <linux/kernel.h>
32#include <linux/module.h>
33#include <linux/sched.h>
34#include <linux/init.h>
35#include <linux/smp.h>
36
37#include <asm/hw_breakpoint.h>
38#include <asm/processor.h>
39#include <asm/debugreg.h>
40
41/* Unmasked kernel DR7 value */
42static unsigned long kdr7;
43
44/*
45 * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
46 * Used to clear and verify the status of bits corresponding to DR0 - DR3
47 */
48static const unsigned long dr7_masks[HBP_NUM] = {
49 0x000f0003, /* LEN0, R/W0, G0, L0 */
50 0x00f0000c, /* LEN1, R/W1, G1, L1 */
51 0x0f000030, /* LEN2, R/W2, G2, L2 */
52 0xf00000c0 /* LEN3, R/W3, G3, L3 */
53};
54
55
56/*
57 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
58 * as stored in debug register 7.
59 */
60static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
61{
62 unsigned long bp_info;
63
64 bp_info = (len | type) & 0xf;
65 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
66 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
67 DR_GLOBAL_SLOWDOWN;
68 return bp_info;
69}
70
71void arch_update_kernel_hw_breakpoint(void *unused)
72{
73 struct hw_breakpoint *bp;
74 int i, cpu = get_cpu();
75 unsigned long temp_kdr7 = 0;
76
77 /* Don't allow debug exceptions while we update the registers */
78 set_debugreg(0UL, 7);
79
80 for (i = hbp_kernel_pos; i < HBP_NUM; i++) {
81 per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
82 if (bp) {
83 temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
84 set_debugreg(bp->info.address, i);
85 }
86 }
87
88 /* No need to set DR6. Update the debug registers with kernel-space
89 * breakpoint values from kdr7 and user-space requests from the
90 * current process
91 */
92 kdr7 = temp_kdr7;
93 set_debugreg(kdr7 | current->thread.debugreg7, 7);
94 put_cpu();
95}
96
97/*
98 * Install the thread breakpoints in their debug registers.
99 */
100void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
101{
102 struct thread_struct *thread = &(tsk->thread);
103
104 switch (hbp_kernel_pos) {
105 case 4:
106 set_debugreg(thread->debugreg[3], 3);
107 case 3:
108 set_debugreg(thread->debugreg[2], 2);
109 case 2:
110 set_debugreg(thread->debugreg[1], 1);
111 case 1:
112 set_debugreg(thread->debugreg[0], 0);
113 default:
114 break;
115 }
116
117 /* No need to set DR6 */
118 set_debugreg((kdr7 | thread->debugreg7), 7);
119}
120
121/*
122 * Install the debug register values for just the kernel, no thread.
123 */
124void arch_uninstall_thread_hw_breakpoint()
125{
126 /* Clear the user-space portion of debugreg7 by setting only kdr7 */
127 set_debugreg(kdr7, 7);
128
129}
130
131static int get_hbp_len(u8 hbp_len)
132{
133 unsigned int len_in_bytes = 0;
134
135 switch (hbp_len) {
136 case HW_BREAKPOINT_LEN_1:
137 len_in_bytes = 1;
138 break;
139 case HW_BREAKPOINT_LEN_2:
140 len_in_bytes = 2;
141 break;
142 case HW_BREAKPOINT_LEN_4:
143 len_in_bytes = 4;
144 break;
145#ifdef CONFIG_X86_64
146 case HW_BREAKPOINT_LEN_8:
147 len_in_bytes = 8;
148 break;
149#endif
150 }
151 return len_in_bytes;
152}
153
154/*
155 * Check for virtual address in user space.
156 */
157int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
158{
159 unsigned int len;
160
161 len = get_hbp_len(hbp_len);
162
163 return (va <= TASK_SIZE - len);
164}
165
166/*
167 * Check for virtual address in kernel space.
168 */
169int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
170{
171 unsigned int len;
172
173 len = get_hbp_len(hbp_len);
174
175 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
176}
177
178/*
179 * Store a breakpoint's encoded address, length, and type.
180 */
181static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
182{
183 /*
184 * User-space requests will always have the address field populated
185 * Symbol names from user-space are rejected
186 */
187 if (tsk && bp->info.name)
188 return -EINVAL;
189 /*
190 * For kernel-addresses, either the address or symbol name can be
191 * specified.
192 */
193 if (bp->info.name)
194 bp->info.address = (unsigned long)
195 kallsyms_lookup_name(bp->info.name);
196 if (bp->info.address)
197 return 0;
198 return -EINVAL;
199}
200
201/*
202 * Validate the arch-specific HW Breakpoint register settings
203 */
204int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
205 struct task_struct *tsk)
206{
207 unsigned int align;
208 int ret = -EINVAL;
209
210 switch (bp->info.type) {
211 /*
212 * Ptrace-refactoring code
213 * For now, we'll allow instruction breakpoint only for user-space
214 * addresses
215 */
216 case HW_BREAKPOINT_EXECUTE:
217 if ((!arch_check_va_in_userspace(bp->info.address,
218 bp->info.len)) &&
219 bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
220 return ret;
221 break;
222 case HW_BREAKPOINT_WRITE:
223 break;
224 case HW_BREAKPOINT_RW:
225 break;
226 default:
227 return ret;
228 }
229
230 switch (bp->info.len) {
231 case HW_BREAKPOINT_LEN_1:
232 align = 0;
233 break;
234 case HW_BREAKPOINT_LEN_2:
235 align = 1;
236 break;
237 case HW_BREAKPOINT_LEN_4:
238 align = 3;
239 break;
240#ifdef CONFIG_X86_64
241 case HW_BREAKPOINT_LEN_8:
242 align = 7;
243 break;
244#endif
245 default:
246 return ret;
247 }
248
249 if (bp->triggered)
250 ret = arch_store_info(bp, tsk);
251
252 if (ret < 0)
253 return ret;
254 /*
255 * Check that the low-order bits of the address are appropriate
256 * for the alignment implied by len.
257 */
258 if (bp->info.address & align)
259 return -EINVAL;
260
261 /* Check that the virtual address is in the proper range */
262 if (tsk) {
263 if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
264 return -EFAULT;
265 } else {
266 if (!arch_check_va_in_kernelspace(bp->info.address,
267 bp->info.len))
268 return -EFAULT;
269 }
270 return 0;
271}
272
273void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
274{
275 struct thread_struct *thread = &(tsk->thread);
276 struct hw_breakpoint *bp = thread->hbp[pos];
277
278 thread->debugreg7 &= ~dr7_masks[pos];
279 if (bp) {
280 thread->debugreg[pos] = bp->info.address;
281 thread->debugreg7 |= encode_dr7(pos, bp->info.len,
282 bp->info.type);
283 } else
284 thread->debugreg[pos] = 0;
285}
286
287void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
288{
289 int i;
290 struct thread_struct *thread = &(tsk->thread);
291
292 thread->debugreg7 = 0;
293 for (i = 0; i < HBP_NUM; i++)
294 thread->debugreg[i] = 0;
295}
296
297/*
298 * Handle debug exception notifications.
299 *
300 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
301 *
302 * NOTIFY_DONE returned if one of the following conditions is true.
303 * i) When the causative address is from user-space and the exception
304 * is a valid one, i.e. not triggered as a result of lazy debug register
305 * switching
306 * ii) When there are more bits than trap<n> set in DR6 register (such
307 * as BD, BS or BT) indicating that more than one debug condition is
308 * met and requires some more action in do_debug().
309 *
310 * NOTIFY_STOP returned for all other cases
311 *
312 */
313int __kprobes hw_breakpoint_handler(struct die_args *args)
314{
315 int i, cpu, rc = NOTIFY_STOP;
316 struct hw_breakpoint *bp;
317 unsigned long dr7, dr6;
318 unsigned long *dr6_p;
319
320 /* The DR6 value is pointed by args->err */
321 dr6_p = (unsigned long *)ERR_PTR(args->err);
322 dr6 = *dr6_p;
323
324 /* Do an early return if no trap bits are set in DR6 */
325 if ((dr6 & DR_TRAP_BITS) == 0)
326 return NOTIFY_DONE;
327
328 /* Lazy debug register switching */
329 if (!test_tsk_thread_flag(current, TIF_DEBUG))
330 arch_uninstall_thread_hw_breakpoint();
331
332 get_debugreg(dr7, 7);
333 /* Disable breakpoints during exception handling */
334 set_debugreg(0UL, 7);
335 /*
336 * Assert that local interrupts are disabled
337 * Reset the DRn bits in the virtualized register value.
338 * The ptrace trigger routine will add in whatever is needed.
339 */
340 current->thread.debugreg6 &= ~DR_TRAP_BITS;
341 cpu = get_cpu();
342
343 /* Handle all the breakpoints that were triggered */
344 for (i = 0; i < HBP_NUM; ++i) {
345 if (likely(!(dr6 & (DR_TRAP0 << i))))
346 continue;
347 /*
348 * Find the corresponding hw_breakpoint structure and
349 * invoke its triggered callback.
350 */
351 if (i >= hbp_kernel_pos)
352 bp = per_cpu(this_hbp_kernel[i], cpu);
353 else {
354 bp = current->thread.hbp[i];
355 if (bp)
356 rc = NOTIFY_DONE;
357 }
358 /*
359 * Reset the 'i'th TRAP bit in dr6 to denote completion of
360 * exception handling
361 */
362 (*dr6_p) &= ~(DR_TRAP0 << i);
363 /*
364 * bp can be NULL due to lazy debug register switching
365 * or due to the delay between updates of hbp_kernel_pos
366 * and this_hbp_kernel.
367 */
368 if (!bp)
369 continue;
370
371 (bp->triggered)(bp, args->regs);
372 }
373 if (dr6 & (~DR_TRAP_BITS))
374 rc = NOTIFY_DONE;
375
376 set_debugreg(dr7, 7);
377 put_cpu();
378 return rc;
379}
380
381/*
382 * Handle debug exception notifications.
383 */
384int __kprobes hw_breakpoint_exceptions_notify(
385 struct notifier_block *unused, unsigned long val, void *data)
386{
387 if (val != DIE_DEBUG)
388 return NOTIFY_DONE;
389
390 return hw_breakpoint_handler(data);
391}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..34e86b67550c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45 45
46#include <asm/debugreg.h>
46#include <asm/apicdef.h> 47#include <asm/apicdef.h>
47#include <asm/system.h> 48#include <asm/system.h>
48 49
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
434 "resuming...\n"); 435 "resuming...\n");
435 kgdb_arch_handle_exception(args->trapnr, args->signr, 436 kgdb_arch_handle_exception(args->trapnr, args->signr,
436 args->err, "c", "", regs); 437 args->err, "c", "", regs);
438 /*
439 * Reset the BS bit in dr6 (pointed by args->err) to
440 * denote completion of processing
441 */
442 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
437 443
438 return NOTIFY_STOP; 444 return NOTIFY_STOP;
439} 445}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b000..b5b1848c5336 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -54,6 +54,7 @@
54#include <asm/pgtable.h> 54#include <asm/pgtable.h>
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/alternative.h> 56#include <asm/alternative.h>
57#include <asm/debugreg.h>
57 58
58void jprobe_return_end(void); 59void jprobe_return_end(void);
59 60
@@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
967 ret = NOTIFY_STOP; 968 ret = NOTIFY_STOP;
968 break; 969 break;
969 case DIE_DEBUG: 970 case DIE_DEBUG:
970 if (post_kprobe_handler(args->regs)) 971 if (post_kprobe_handler(args->regs)) {
972 /*
973 * Reset the BS bit in dr6 (pointed by args->err) to
974 * denote completion of processing
975 */
976 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
971 ret = NOTIFY_STOP; 977 ret = NOTIFY_STOP;
978 }
972 break; 979 break;
973 case DIE_GPF: 980 case DIE_GPF:
974 /* 981 /*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h> 26#include <asm/system.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debugreg.h>
28 29
29static void set_idt(void *newidt, __u16 limit) 30static void set_idt(void *newidt, __u16 limit)
30{ 31{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
202 203
203 /* Interrupts aren't acceptable while we reboot */ 204 /* Interrupts aren't acceptable while we reboot */
204 local_irq_disable(); 205 local_irq_disable();
206 hw_breakpoint_disable();
205 207
206 if (image->preserve_context) { 208 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 209#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/debugreg.h>
21 22
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd, 23static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr) 24 unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
282 283
283 /* Interrupts aren't acceptable while we reboot */ 284 /* Interrupts aren't acceptable while we reboot */
284 local_irq_disable(); 285 local_irq_disable();
286 hw_breakpoint_disable();
285 287
286 if (image->preserve_context) { 288 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC 289#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 994dd6a4a2a0..fc6e4b773fc4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/i387.h> 18#include <asm/i387.h>
19#include <asm/ds.h> 19#include <asm/ds.h>
20#include <asm/debugreg.h>
21#include <asm/hw_breakpoint.h>
20 22
21unsigned long idle_halt; 23unsigned long idle_halt;
22EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_struct *tsk)
48 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 50 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
49 tsk->thread.xstate = NULL; 51 tsk->thread.xstate = NULL;
50 } 52 }
53 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
54 flush_thread_hw_breakpoint(tsk);
51 55
52 WARN(tsk->thread.ds_ctx, "leaking DS context\n"); 56 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
53} 57}
@@ -108,12 +112,8 @@ void flush_thread(void)
108 112
109 clear_tsk_thread_flag(tsk, TIF_DEBUG); 113 clear_tsk_thread_flag(tsk, TIF_DEBUG);
110 114
111 tsk->thread.debugreg0 = 0; 115 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
112 tsk->thread.debugreg1 = 0; 116 flush_thread_hw_breakpoint(tsk);
113 tsk->thread.debugreg2 = 0;
114 tsk->thread.debugreg3 = 0;
115 tsk->thread.debugreg6 = 0;
116 tsk->thread.debugreg7 = 0;
117 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 117 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
118 /* 118 /*
119 * Forget coprocessor state.. 119 * Forget coprocessor state..
@@ -195,16 +195,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
195 else if (next->debugctlmsr != prev->debugctlmsr) 195 else if (next->debugctlmsr != prev->debugctlmsr)
196 update_debugctlmsr(next->debugctlmsr); 196 update_debugctlmsr(next->debugctlmsr);
197 197
198 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
199 set_debugreg(next->debugreg0, 0);
200 set_debugreg(next->debugreg1, 1);
201 set_debugreg(next->debugreg2, 2);
202 set_debugreg(next->debugreg3, 3);
203 /* no 4 and 5 */
204 set_debugreg(next->debugreg6, 6);
205 set_debugreg(next->debugreg7, 7);
206 }
207
208 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 198 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
209 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 199 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
210 /* prev and next are different */ 200 /* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..00a8fe4c58bb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
58#include <asm/idle.h> 58#include <asm/idle.h>
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h>
62#include <asm/hw_breakpoint.h>
61 63
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 64asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 65
@@ -262,7 +264,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
262 264
263 task_user_gs(p) = get_user_gs(regs); 265 task_user_gs(p) = get_user_gs(regs);
264 266
267 p->thread.io_bitmap_ptr = NULL;
265 tsk = current; 268 tsk = current;
269 err = -ENOMEM;
270 if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
271 if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
272 goto out;
273
266 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 274 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
267 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 275 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
268 IO_BITMAP_BYTES, GFP_KERNEL); 276 IO_BITMAP_BYTES, GFP_KERNEL);
@@ -282,10 +290,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
282 err = do_set_thread_area(p, -1, 290 err = do_set_thread_area(p, -1,
283 (struct user_desc __user *)childregs->si, 0); 291 (struct user_desc __user *)childregs->si, 0);
284 292
293out:
285 if (err && p->thread.io_bitmap_ptr) { 294 if (err && p->thread.io_bitmap_ptr) {
286 kfree(p->thread.io_bitmap_ptr); 295 kfree(p->thread.io_bitmap_ptr);
287 p->thread.io_bitmap_max = 0; 296 p->thread.io_bitmap_max = 0;
288 } 297 }
298 if (err)
299 flush_thread_hw_breakpoint(p);
289 300
290 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); 301 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
291 p->thread.ds_ctx = NULL; 302 p->thread.ds_ctx = NULL;
@@ -424,6 +435,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
424 lazy_load_gs(next->gs); 435 lazy_load_gs(next->gs);
425 436
426 percpu_write(current_task, next_p); 437 percpu_write(current_task, next_p);
438 /*
439 * There's a problem with moving the arch_install_thread_hw_breakpoint()
440 * call before current is updated. Suppose a kernel breakpoint is
441 * triggered in between the two, the hw-breakpoint handler will see that
442 * the 'current' task does not have TIF_DEBUG flag set and will think it
443 * is leftover from an old task (lazy switching) and will erase it. Then
444 * until the next context switch, no user-breakpoints will be installed.
445 *
446 * The real problem is that it's impossible to update both current and
447 * physical debug registers at the same instant, so there will always be
448 * a window in which they disagree and a breakpoint might get triggered.
449 * Since we use lazy switching, we are forced to assume that a
450 * disagreement means that current is correct and the exception is due
451 * to lazy debug register switching.
452 */
453 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
454 arch_install_thread_hw_breakpoint(next_p);
427 455
428 return prev_p; 456 return prev_p;
429} 457}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9d..89c46f1259d3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
52#include <asm/idle.h> 52#include <asm/idle.h>
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h>
56#include <asm/hw_breakpoint.h>
55 57
56asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
57 59
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *dead_task)
245 BUG(); 247 BUG();
246 } 248 }
247 } 249 }
250 if (unlikely(dead_task->thread.debugreg7))
251 flush_thread_hw_breakpoint(dead_task);
248} 252}
249 253
250static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 254static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
300 304
301 p->thread.fs = me->thread.fs; 305 p->thread.fs = me->thread.fs;
302 p->thread.gs = me->thread.gs; 306 p->thread.gs = me->thread.gs;
307 p->thread.io_bitmap_ptr = NULL;
303 308
304 savesegment(gs, p->thread.gsindex); 309 savesegment(gs, p->thread.gsindex);
305 savesegment(fs, p->thread.fsindex); 310 savesegment(fs, p->thread.fsindex);
306 savesegment(es, p->thread.es); 311 savesegment(es, p->thread.es);
307 savesegment(ds, p->thread.ds); 312 savesegment(ds, p->thread.ds);
308 313
314 err = -ENOMEM;
315 if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
316 if (copy_thread_hw_breakpoint(me, p, clone_flags))
317 goto out;
318
309 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 319 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
310 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 320 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
311 if (!p->thread.io_bitmap_ptr) { 321 if (!p->thread.io_bitmap_ptr) {
@@ -344,6 +354,9 @@ out:
344 kfree(p->thread.io_bitmap_ptr); 354 kfree(p->thread.io_bitmap_ptr);
345 p->thread.io_bitmap_max = 0; 355 p->thread.io_bitmap_max = 0;
346 } 356 }
357 if (err)
358 flush_thread_hw_breakpoint(p);
359
347 return err; 360 return err;
348} 361}
349 362
@@ -489,6 +502,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
489 */ 502 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 503 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
491 math_state_restore(); 504 math_state_restore();
505 /*
506 * There's a problem with moving the arch_install_thread_hw_breakpoint()
507 * call before current is updated. Suppose a kernel breakpoint is
508 * triggered in between the two, the hw-breakpoint handler will see that
509 * the 'current' task does not have TIF_DEBUG flag set and will think it
510 * is leftover from an old task (lazy switching) and will erase it. Then
511 * until the next context switch, no user-breakpoints will be installed.
512 *
513 * The real problem is that it's impossible to update both current and
514 * physical debug registers at the same instant, so there will always be
515 * a window in which they disagree and a breakpoint might get triggered.
516 * Since we use lazy switching, we are forced to assume that a
517 * disagreement means that current is correct and the exception is due
518 * to lazy debug register switching.
519 */
520 if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
521 arch_install_thread_hw_breakpoint(next_p);
522
492 return prev_p; 523 return prev_p;
493} 524}
494 525
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 09ecbde91c13..b457f78b7dbf 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -34,6 +34,7 @@
34#include <asm/prctl.h> 34#include <asm/prctl.h>
35#include <asm/proto.h> 35#include <asm/proto.h>
36#include <asm/ds.h> 36#include <asm/ds.h>
37#include <asm/hw_breakpoint.h>
37 38
38#include <trace/syscall.h> 39#include <trace/syscall.h>
39 40
@@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task,
136 return 0; 137 return 0;
137} 138}
138 139
139static unsigned long debugreg_addr_limit(struct task_struct *task)
140{
141 return TASK_SIZE - 3;
142}
143
144#else /* CONFIG_X86_64 */ 140#else /* CONFIG_X86_64 */
145 141
146#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 142#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task,
265 return 0; 261 return 0;
266} 262}
267 263
268static unsigned long debugreg_addr_limit(struct task_struct *task)
269{
270#ifdef CONFIG_IA32_EMULATION
271 if (test_tsk_thread_flag(task, TIF_IA32))
272 return IA32_PAGE_OFFSET - 3;
273#endif
274 return TASK_SIZE_MAX - 7;
275}
276
277#endif /* CONFIG_X86_32 */ 264#endif /* CONFIG_X86_32 */
278 265
279static unsigned long get_flags(struct task_struct *task) 266static unsigned long get_flags(struct task_struct *task)
@@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target,
464} 451}
465 452
466/* 453/*
467 * This function is trivial and will be inlined by the compiler. 454 * Decode the length and type bits for a particular breakpoint as
468 * Having it separates the implementation details of debug 455 * stored in debug register 7. Return the "enabled" status.
469 * registers from the interface details of ptrace.
470 */ 456 */
471static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 457static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
458 unsigned *type)
472{ 459{
473 switch (n) { 460 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
474 case 0: return child->thread.debugreg0; 461
475 case 1: return child->thread.debugreg1; 462 *len = (bp_info & 0xc) | 0x40;
476 case 2: return child->thread.debugreg2; 463 *type = (bp_info & 0x3) | 0x80;
477 case 3: return child->thread.debugreg3; 464 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
478 case 6: return child->thread.debugreg6;
479 case 7: return child->thread.debugreg7;
480 }
481 return 0;
482} 465}
483 466
484static int ptrace_set_debugreg(struct task_struct *child, 467static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
485 int n, unsigned long data)
486{ 468{
469 struct thread_struct *thread = &(current->thread);
487 int i; 470 int i;
488 471
489 if (unlikely(n == 4 || n == 5)) 472 /*
490 return -EIO; 473 * Store in the virtual DR6 register the fact that the breakpoint
474 * was hit so the thread's debugger will see it.
475 */
476 for (i = 0; i < hbp_kernel_pos; i++)
477 /*
478 * We will check bp->info.address against the address stored in
479 * thread's hbp structure and not debugreg[i]. This is to ensure
480 * that the corresponding bit for 'i' in DR7 register is enabled
481 */
482 if (bp->info.address == thread->hbp[i]->info.address)
483 break;
491 484
492 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 485 thread->debugreg6 |= (DR_TRAP0 << i);
493 return -EIO; 486}
494 487
495 switch (n) { 488/*
496 case 0: child->thread.debugreg0 = data; break; 489 * Handle ptrace writes to debug register 7.
497 case 1: child->thread.debugreg1 = data; break; 490 */
498 case 2: child->thread.debugreg2 = data; break; 491static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
499 case 3: child->thread.debugreg3 = data; break; 492{
493 struct thread_struct *thread = &(tsk->thread);
494 unsigned long old_dr7 = thread->debugreg7;
495 int i, orig_ret = 0, rc = 0;
496 int enabled, second_pass = 0;
497 unsigned len, type;
498 struct hw_breakpoint *bp;
499
500 data &= ~DR_CONTROL_RESERVED;
501restore:
502 /*
503 * Loop through all the hardware breakpoints, making the
504 * appropriate changes to each.
505 */
506 for (i = 0; i < HBP_NUM; i++) {
507 enabled = decode_dr7(data, i, &len, &type);
508 bp = thread->hbp[i];
509
510 if (!enabled) {
511 if (bp) {
512 /* Don't unregister the breakpoints right-away,
513 * unless all register_user_hw_breakpoint()
514 * requests have succeeded. This prevents
515 * any window of opportunity for debug
516 * register grabbing by other users.
517 */
518 if (!second_pass)
519 continue;
520 unregister_user_hw_breakpoint(tsk, bp);
521 kfree(bp);
522 }
523 continue;
524 }
525 if (!bp) {
526 rc = -ENOMEM;
527 bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
528 if (bp) {
529 bp->info.address = thread->debugreg[i];
530 bp->triggered = ptrace_triggered;
531 bp->info.len = len;
532 bp->info.type = type;
533 rc = register_user_hw_breakpoint(tsk, bp);
534 if (rc)
535 kfree(bp);
536 }
537 } else
538 rc = modify_user_hw_breakpoint(tsk, bp);
539 if (rc)
540 break;
541 }
542 /*
543 * Make a second pass to free the remaining unused breakpoints
544 * or to restore the original breakpoints if an error occurred.
545 */
546 if (!second_pass) {
547 second_pass = 1;
548 if (rc < 0) {
549 orig_ret = rc;
550 data = old_dr7;
551 }
552 goto restore;
553 }
554 return ((orig_ret < 0) ? orig_ret : rc);
555}
500 556
501 case 6: 557/*
502 if ((data & ~0xffffffffUL) != 0) 558 * Handle PTRACE_PEEKUSR calls for the debug register area.
503 return -EIO; 559 */
504 child->thread.debugreg6 = data; 560unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
505 break; 561{
562 struct thread_struct *thread = &(tsk->thread);
563 unsigned long val = 0;
564
565 if (n < HBP_NUM)
566 val = thread->debugreg[n];
567 else if (n == 6)
568 val = thread->debugreg6;
569 else if (n == 7)
570 val = thread->debugreg7;
571 return val;
572}
506 573
507 case 7: 574/*
508 /* 575 * Handle PTRACE_POKEUSR calls for the debug register area.
509 * Sanity-check data. Take one half-byte at once with 576 */
510 * check = (val >> (16 + 4*i)) & 0xf. It contains the 577int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
511 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits 578{
512 * 2 and 3 are LENi. Given a list of invalid values, 579 struct thread_struct *thread = &(tsk->thread);
513 * we do mask |= 1 << invalid_value, so that 580 int rc = 0;
514 * (mask >> check) & 1 is a correct test for invalid 581
515 * values. 582 /* There are no DR4 or DR5 registers */
516 * 583 if (n == 4 || n == 5)
517 * R/Wi contains the type of the breakpoint / 584 return -EIO;
518 * watchpoint, LENi contains the length of the watched 585
519 * data in the watchpoint case. 586 if (n == 6) {
520 * 587 tsk->thread.debugreg6 = val;
521 * The invalid values are: 588 goto ret_path;
522 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
523 * - R/Wi == 0x10 (break on I/O reads or writes), so
524 * mask |= 0x4444.
525 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
526 * 0x1110.
527 *
528 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
529 *
530 * See the Intel Manual "System Programming Guide",
531 * 15.2.4
532 *
533 * Note that LENi == 0x10 is defined on x86_64 in long
534 * mode (i.e. even for 32-bit userspace software, but
535 * 64-bit kernel), so the x86_64 mask value is 0x5454.
536 * See the AMD manual no. 24593 (AMD64 System Programming)
537 */
538#ifdef CONFIG_X86_32
539#define DR7_MASK 0x5f54
540#else
541#define DR7_MASK 0x5554
542#endif
543 data &= ~DR_CONTROL_RESERVED;
544 for (i = 0; i < 4; i++)
545 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
546 return -EIO;
547 child->thread.debugreg7 = data;
548 if (data)
549 set_tsk_thread_flag(child, TIF_DEBUG);
550 else
551 clear_tsk_thread_flag(child, TIF_DEBUG);
552 break;
553 } 589 }
590 if (n < HBP_NUM) {
591 if (thread->hbp[n]) {
592 if (arch_check_va_in_userspace(val,
593 thread->hbp[n]->info.len) == 0) {
594 rc = -EIO;
595 goto ret_path;
596 }
597 thread->hbp[n]->info.address = val;
598 }
599 thread->debugreg[n] = val;
600 }
601 /* All that's left is DR7 */
602 if (n == 7)
603 rc = ptrace_write_dr7(tsk, val);
554 604
555 return 0; 605ret_path:
606 return rc;
556} 607}
557 608
558/* 609/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94e..0f89a4f20db2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
799 799
800 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 800 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
801 if (signr > 0) { 801 if (signr > 0) {
802 /*
803 * Re-enable any watchpoints before delivering the
804 * signal to user space. The processor register will
805 * have been cleared if the watchpoint triggered
806 * inside the kernel.
807 */
808 if (current->thread.debugreg7)
809 set_debugreg(current->thread.debugreg7, 7);
810
811 /* Whee! Actually deliver the signal. */ 802 /* Whee! Actually deliver the signal. */
812 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { 803 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
813 /* 804 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2fecda69ee64..dee0f3d814af 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -63,6 +63,7 @@
63#include <asm/apic.h> 63#include <asm/apic.h>
64#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h> 65#include <asm/uv/uv.h>
66#include <asm/debugreg.h>
66#include <linux/mc146818rtc.h> 67#include <linux/mc146818rtc.h>
67 68
68#include <asm/smpboot_hooks.h> 69#include <asm/smpboot_hooks.h>
@@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused)
326 setup_secondary_clock(); 327 setup_secondary_clock();
327 328
328 wmb(); 329 wmb();
330 load_debug_registers();
329 cpu_idle(); 331 cpu_idle();
330} 332}
331 333
@@ -1254,6 +1256,7 @@ void cpu_disable_common(void)
1254 remove_cpu_from_maps(cpu); 1256 remove_cpu_from_maps(cpu);
1255 unlock_vector_lock(); 1257 unlock_vector_lock();
1256 fixup_irqs(); 1258 fixup_irqs();
1259 hw_breakpoint_disable();
1257} 1260}
1258 1261
1259int native_cpu_disable(void) 1262int native_cpu_disable(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5f935f0d5861..286d64eba31b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -530,77 +530,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
530dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 530dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
531{ 531{
532 struct task_struct *tsk = current; 532 struct task_struct *tsk = current;
533 unsigned long condition; 533 unsigned long dr6;
534 int si_code; 534 int si_code;
535 535
536 get_debugreg(condition, 6); 536 get_debugreg(dr6, 6);
537 537
538 /* Catch kmemcheck conditions first of all! */ 538 /* Catch kmemcheck conditions first of all! */
539 if (condition & DR_STEP && kmemcheck_trap(regs)) 539 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
540 return; 540 return;
541 541
542 /* DR6 may or may not be cleared by the CPU */
543 set_debugreg(0, 6);
542 /* 544 /*
543 * The processor cleared BTF, so don't mark that we need it set. 545 * The processor cleared BTF, so don't mark that we need it set.
544 */ 546 */
545 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 547 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
546 tsk->thread.debugctlmsr = 0; 548 tsk->thread.debugctlmsr = 0;
547 549
548 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 550 /* Store the virtualized DR6 value */
549 SIGTRAP) == NOTIFY_STOP) 551 tsk->thread.debugreg6 = dr6;
552
553 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
554 SIGTRAP) == NOTIFY_STOP)
550 return; 555 return;
551 556
552 /* It's safe to allow irq's after DR6 has been saved */ 557 /* It's safe to allow irq's after DR6 has been saved */
553 preempt_conditional_sti(regs); 558 preempt_conditional_sti(regs);
554 559
555 /* Mask out spurious debug traps due to lazy DR7 setting */ 560 if (regs->flags & X86_VM_MASK) {
556 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 561 handle_vm86_trap((struct kernel_vm86_regs *) regs,
557 if (!tsk->thread.debugreg7) 562 error_code, 1);
558 goto clear_dr7; 563 return;
559 } 564 }
560 565
561#ifdef CONFIG_X86_32
562 if (regs->flags & X86_VM_MASK)
563 goto debug_vm86;
564#endif
565
566 /* Save debug status register where ptrace can see it */
567 tsk->thread.debugreg6 = condition;
568
569 /* 566 /*
570 * Single-stepping through TF: make sure we ignore any events in 567 * Single-stepping through system calls: ignore any exceptions in
571 * kernel space (but re-enable TF when returning to user mode). 568 * kernel space, but re-enable TF when returning to user mode.
569 *
570 * We already checked v86 mode above, so we can check for kernel mode
571 * by just checking the CPL of CS.
572 */ 572 */
573 if (condition & DR_STEP) { 573 if ((dr6 & DR_STEP) && !user_mode(regs)) {
574 if (!user_mode(regs)) 574 tsk->thread.debugreg6 &= ~DR_STEP;
575 goto clear_TF_reenable; 575 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
576 regs->flags &= ~X86_EFLAGS_TF;
576 } 577 }
577 578 si_code = get_si_code(tsk->thread.debugreg6);
578 si_code = get_si_code(condition); 579 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
579 /* Ok, finally something we can handle */ 580 send_sigtrap(tsk, regs, error_code, si_code);
580 send_sigtrap(tsk, regs, error_code, si_code);
581
582 /*
583 * Disable additional traps. They'll be re-enabled when
584 * the signal is delivered.
585 */
586clear_dr7:
587 set_debugreg(0, 7);
588 preempt_conditional_cli(regs); 581 preempt_conditional_cli(regs);
589 return;
590 582
591#ifdef CONFIG_X86_32
592debug_vm86:
593 /* reenable preemption: handle_vm86_trap() might sleep */
594 dec_preempt_count();
595 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
596 conditional_cli(regs);
597 return;
598#endif
599
600clear_TF_reenable:
601 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
602 regs->flags &= ~X86_EFLAGS_TF;
603 preempt_conditional_cli(regs);
604 return; 583 return;
605} 584}
606 585
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..11a4ad4d6253 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 541
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 542 if (val == DIE_DEBUG && (arg->err & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 543 if (post_kmmio_handler(arg->err, arg->regs) == 1) {
544 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing
547 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
544 return NOTIFY_STOP; 549 return NOTIFY_STOP;
550 }
545 551
546 return NOTIFY_DONE; 552 return NOTIFY_DONE;
547} 553}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d277ef1eea51..394cbb88987c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/xcr.h> 19#include <asm/xcr.h>
20#include <asm/suspend.h> 20#include <asm/suspend.h>
21#include <asm/debugreg.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23static struct saved_context saved_context; 24static struct saved_context saved_context;
@@ -104,6 +105,7 @@ static void __save_processor_state(struct saved_context *ctxt)
104 ctxt->cr4 = read_cr4(); 105 ctxt->cr4 = read_cr4();
105 ctxt->cr8 = read_cr8(); 106 ctxt->cr8 = read_cr8();
106#endif 107#endif
108 hw_breakpoint_disable();
107} 109}
108 110
109/* Needed by apm.c */ 111/* Needed by apm.c */
@@ -146,27 +148,7 @@ static void fix_processor_context(void)
146 /* 148 /*
147 * Now maybe reload the debug registers 149 * Now maybe reload the debug registers
148 */ 150 */
149 if (current->thread.debugreg7) { 151 load_debug_registers();
150#ifdef CONFIG_X86_32
151 set_debugreg(current->thread.debugreg0, 0);
152 set_debugreg(current->thread.debugreg1, 1);
153 set_debugreg(current->thread.debugreg2, 2);
154 set_debugreg(current->thread.debugreg3, 3);
155 /* no 4 and 5 */
156 set_debugreg(current->thread.debugreg6, 6);
157 set_debugreg(current->thread.debugreg7, 7);
158#else
159 /* CONFIG_X86_64 */
160 loaddebug(&current->thread, 0);
161 loaddebug(&current->thread, 1);
162 loaddebug(&current->thread, 2);
163 loaddebug(&current->thread, 3);
164 /* no 4 and 5 */
165 loaddebug(&current->thread, 6);
166 loaddebug(&current->thread, 7);
167#endif
168 }
169
170} 152}
171 153
172/** 154/**
diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h
new file mode 100644
index 000000000000..9bf2d12eb74a
--- /dev/null
+++ b/include/asm-generic/hw_breakpoint.h
@@ -0,0 +1,139 @@
1#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
2#define _ASM_GENERIC_HW_BREAKPOINT_H
3
4#ifndef __ARCH_HW_BREAKPOINT_H
5#error "Please don't include this file directly"
6#endif
7
8#ifdef __KERNEL__
9#include <linux/list.h>
10#include <linux/types.h>
11#include <linux/kallsyms.h>
12
13/**
14 * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
15 * @triggered: callback invoked after target address access
16 * @info: arch-specific breakpoint info (address, length, and type)
17 *
18 * %hw_breakpoint structures are the kernel's way of representing
19 * hardware breakpoints. These are data breakpoints
20 * (also known as "watchpoints", triggered on data access), and the breakpoint's
21 * target address can be located in either kernel space or user space.
22 *
23 * The breakpoint's address, length, and type are highly
24 * architecture-specific. The values are encoded in the @info field; you
25 * specify them when registering the breakpoint. To examine the encoded
26 * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
27 * below.
28 *
29 * The address is specified as a regular kernel pointer (for kernel-space
30 * breakponts) or as an %__user pointer (for user-space breakpoints).
31 * With register_user_hw_breakpoint(), the address must refer to a
32 * location in user space. The breakpoint will be active only while the
33 * requested task is running. Conversely with
34 * register_kernel_hw_breakpoint(), the address must refer to a location
35 * in kernel space, and the breakpoint will be active on all CPUs
36 * regardless of the current task.
37 *
38 * The length is the breakpoint's extent in bytes, which is subject to
39 * certain limitations. include/asm/hw_breakpoint.h contains macros
40 * defining the available lengths for a specific architecture. Note that
41 * the address's alignment must match the length. The breakpoint will
42 * catch accesses to any byte in the range from address to address +
43 * (length - 1).
44 *
45 * The breakpoint's type indicates the sort of access that will cause it
46 * to trigger. Possible values may include:
47 *
48 * %HW_BREAKPOINT_RW (triggered on read or write access),
49 * %HW_BREAKPOINT_WRITE (triggered on write access), and
50 * %HW_BREAKPOINT_READ (triggered on read access).
51 *
52 * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
53 * possibilities are available on all architectures. Execute breakpoints
54 * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
55 *
56 * When a breakpoint gets hit, the @triggered callback is
57 * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
58 * processor registers.
59 * Data breakpoints occur after the memory access has taken place.
60 * Breakpoints are disabled during execution @triggered, to avoid
61 * recursive traps and allow unhindered access to breakpointed memory.
62 *
63 * This sample code sets a breakpoint on pid_max and registers a callback
64 * function for writes to that variable. Note that it is not portable
65 * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
66 *
67 * ----------------------------------------------------------------------
68 *
69 * #include <asm/hw_breakpoint.h>
70 *
71 * struct hw_breakpoint my_bp;
72 *
73 * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
74 * {
75 * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
76 * dump_stack();
77 * .......<more debugging output>........
78 * }
79 *
80 * static struct hw_breakpoint my_bp;
81 *
82 * static int init_module(void)
83 * {
84 * ..........<do anything>............
85 * my_bp.info.type = HW_BREAKPOINT_WRITE;
86 * my_bp.info.len = HW_BREAKPOINT_LEN_4;
87 *
88 * my_bp.installed = (void *)my_bp_installed;
89 *
90 * rc = register_kernel_hw_breakpoint(&my_bp);
91 * ..........<do anything>............
92 * }
93 *
94 * static void cleanup_module(void)
95 * {
96 * ..........<do anything>............
97 * unregister_kernel_hw_breakpoint(&my_bp);
98 * ..........<do anything>............
99 * }
100 *
101 * ----------------------------------------------------------------------
102 */
103struct hw_breakpoint {
104 void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
105 struct arch_hw_breakpoint info;
106};
107
108/*
109 * len and type values are defined in include/asm/hw_breakpoint.h.
110 * Available values vary according to the architecture. On i386 the
111 * possibilities are:
112 *
113 * HW_BREAKPOINT_LEN_1
114 * HW_BREAKPOINT_LEN_2
115 * HW_BREAKPOINT_LEN_4
116 * HW_BREAKPOINT_RW
117 * HW_BREAKPOINT_READ
118 *
119 * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
120 * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
121 * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
122 */
123
124extern int register_user_hw_breakpoint(struct task_struct *tsk,
125 struct hw_breakpoint *bp);
126extern int modify_user_hw_breakpoint(struct task_struct *tsk,
127 struct hw_breakpoint *bp);
128extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
129 struct hw_breakpoint *bp);
130/*
131 * Kernel breakpoints are not associated with any particular thread.
132 */
133extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
134extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
135
136extern unsigned int hbp_kernel_pos;
137
138#endif /* __KERNEL__ */
139#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 9df4501cb921..f88decb1b445 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_TRACING) += trace/
97obj-$(CONFIG_X86_DS) += trace/ 97obj-$(CONFIG_X86_DS) += trace/
98obj-$(CONFIG_SMP) += sched_cpupri.o 98obj-$(CONFIG_SMP) += sched_cpupri.o
99obj-$(CONFIG_SLOW_WORK) += slow-work.o 99obj-$(CONFIG_SLOW_WORK) += slow-work.o
100obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
100obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o 101obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
101 102
102ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 103ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..c1f64e65a9f3
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,378 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 */
19
20/*
21 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
22 * using the CPU's debug registers.
23 * This file contains the arch-independent routines.
24 */
25
26#include <linux/irqflags.h>
27#include <linux/kallsyms.h>
28#include <linux/notifier.h>
29#include <linux/kprobes.h>
30#include <linux/kdebug.h>
31#include <linux/kernel.h>
32#include <linux/module.h>
33#include <linux/percpu.h>
34#include <linux/sched.h>
35#include <linux/init.h>
36#include <linux/smp.h>
37
38#include <asm/hw_breakpoint.h>
39#include <asm/processor.h>
40
41#ifdef CONFIG_X86
42#include <asm/debugreg.h>
43#endif
44/*
45 * Spinlock that protects all (un)register operations over kernel/user-space
46 * breakpoint requests
47 */
48static DEFINE_SPINLOCK(hw_breakpoint_lock);
49
50/* Array of kernel-space breakpoint structures */
51struct hw_breakpoint *hbp_kernel[HBP_NUM];
52
53/*
54 * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
55 * modified but we need the older copy to handle any hbp exceptions. It will
56 * sync with hbp_kernel[] value after updation is done through IPIs.
57 */
58DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]);
59
60/*
61 * Kernel breakpoints grow downwards, starting from HBP_NUM
62 * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
63 * kernel-space request. We will initialise it here and not in an __init
64 * routine because load_debug_registers(), which uses this variable can be
65 * called very early during CPU initialisation.
66 */
67unsigned int hbp_kernel_pos = HBP_NUM;
68
69/*
70 * An array containing refcount of threads using a given bkpt register
71 * Accesses are synchronised by acquiring hw_breakpoint_lock
72 */
73unsigned int hbp_user_refcount[HBP_NUM];
74
75/*
76 * Load the debug registers during startup of a CPU.
77 */
78void load_debug_registers(void)
79{
80 unsigned long flags;
81 struct task_struct *tsk = current;
82
83 spin_lock_bh(&hw_breakpoint_lock);
84
85 /* Prevent IPIs for new kernel breakpoint updates */
86 local_irq_save(flags);
87 arch_update_kernel_hw_breakpoint(NULL);
88 local_irq_restore(flags);
89
90 if (test_tsk_thread_flag(tsk, TIF_DEBUG))
91 arch_install_thread_hw_breakpoint(tsk);
92
93 spin_unlock_bh(&hw_breakpoint_lock);
94}
95
96/*
97 * Erase all the hardware breakpoint info associated with a thread.
98 *
99 * If tsk != current then tsk must not be usable (for example, a
100 * child being cleaned up from a failed fork).
101 */
102void flush_thread_hw_breakpoint(struct task_struct *tsk)
103{
104 int i;
105 struct thread_struct *thread = &(tsk->thread);
106
107 spin_lock_bh(&hw_breakpoint_lock);
108
109 /* The thread no longer has any breakpoints associated with it */
110 clear_tsk_thread_flag(tsk, TIF_DEBUG);
111 for (i = 0; i < HBP_NUM; i++) {
112 if (thread->hbp[i]) {
113 hbp_user_refcount[i]--;
114 kfree(thread->hbp[i]);
115 thread->hbp[i] = NULL;
116 }
117 }
118
119 arch_flush_thread_hw_breakpoint(tsk);
120
121 /* Actually uninstall the breakpoints if necessary */
122 if (tsk == current)
123 arch_uninstall_thread_hw_breakpoint();
124 spin_unlock_bh(&hw_breakpoint_lock);
125}
126
127/*
128 * Copy the hardware breakpoint info from a thread to its cloned child.
129 */
130int copy_thread_hw_breakpoint(struct task_struct *tsk,
131 struct task_struct *child, unsigned long clone_flags)
132{
133 /*
134 * We will assume that breakpoint settings are not inherited
135 * and the child starts out with no debug registers set.
136 * But what about CLONE_PTRACE?
137 */
138 clear_tsk_thread_flag(child, TIF_DEBUG);
139
140 /* We will call flush routine since the debugregs are not inherited */
141 arch_flush_thread_hw_breakpoint(child);
142
143 return 0;
144}
145
146static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
147 struct hw_breakpoint *bp)
148{
149 struct thread_struct *thread = &(tsk->thread);
150 int rc;
151
152 /* Do not overcommit. Fail if kernel has used the hbp registers */
153 if (pos >= hbp_kernel_pos)
154 return -ENOSPC;
155
156 rc = arch_validate_hwbkpt_settings(bp, tsk);
157 if (rc)
158 return rc;
159
160 thread->hbp[pos] = bp;
161 hbp_user_refcount[pos]++;
162
163 arch_update_user_hw_breakpoint(pos, tsk);
164 /*
165 * Does it need to be installed right now?
166 * Otherwise it will get installed the next time tsk runs
167 */
168 if (tsk == current)
169 arch_install_thread_hw_breakpoint(tsk);
170
171 return rc;
172}
173
174/*
175 * Modify the address of a hbp register already in use by the task
176 * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
177 */
178static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
179 struct hw_breakpoint *bp)
180{
181 struct thread_struct *thread = &(tsk->thread);
182
183 if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
184 return -EINVAL;
185
186 if (thread->hbp[pos] == NULL)
187 return -EINVAL;
188
189 thread->hbp[pos] = bp;
190 /*
191 * 'pos' must be that of a hbp register already used by 'tsk'
192 * Otherwise arch_modify_user_hw_breakpoint() will fail
193 */
194 arch_update_user_hw_breakpoint(pos, tsk);
195
196 if (tsk == current)
197 arch_install_thread_hw_breakpoint(tsk);
198
199 return 0;
200}
201
202static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
203{
204 hbp_user_refcount[pos]--;
205 tsk->thread.hbp[pos] = NULL;
206
207 arch_update_user_hw_breakpoint(pos, tsk);
208
209 if (tsk == current)
210 arch_install_thread_hw_breakpoint(tsk);
211}
212
213/**
214 * register_user_hw_breakpoint - register a hardware breakpoint for user space
215 * @tsk: pointer to 'task_struct' of the process to which the address belongs
216 * @bp: the breakpoint structure to register
217 *
218 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
219 * @bp->triggered must be set properly before invocation
220 *
221 */
222int register_user_hw_breakpoint(struct task_struct *tsk,
223 struct hw_breakpoint *bp)
224{
225 struct thread_struct *thread = &(tsk->thread);
226 int i, rc = -ENOSPC;
227
228 spin_lock_bh(&hw_breakpoint_lock);
229
230 for (i = 0; i < hbp_kernel_pos; i++) {
231 if (!thread->hbp[i]) {
232 rc = __register_user_hw_breakpoint(i, tsk, bp);
233 break;
234 }
235 }
236 if (!rc)
237 set_tsk_thread_flag(tsk, TIF_DEBUG);
238
239 spin_unlock_bh(&hw_breakpoint_lock);
240 return rc;
241}
242EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
243
244/**
245 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
246 * @tsk: pointer to 'task_struct' of the process to which the address belongs
247 * @bp: the breakpoint structure to unregister
248 *
249 */
250int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp)
251{
252 struct thread_struct *thread = &(tsk->thread);
253 int i, ret = -ENOENT;
254
255 spin_lock_bh(&hw_breakpoint_lock);
256 for (i = 0; i < hbp_kernel_pos; i++) {
257 if (bp == thread->hbp[i]) {
258 ret = __modify_user_hw_breakpoint(i, tsk, bp);
259 break;
260 }
261 }
262 spin_unlock_bh(&hw_breakpoint_lock);
263 return ret;
264}
265EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
266
267/**
268 * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
269 * @tsk: pointer to 'task_struct' of the process to which the address belongs
270 * @bp: the breakpoint structure to unregister
271 *
272 */
273void unregister_user_hw_breakpoint(struct task_struct *tsk,
274 struct hw_breakpoint *bp)
275{
276 struct thread_struct *thread = &(tsk->thread);
277 int i, pos = -1, hbp_counter = 0;
278
279 spin_lock_bh(&hw_breakpoint_lock);
280 for (i = 0; i < hbp_kernel_pos; i++) {
281 if (thread->hbp[i])
282 hbp_counter++;
283 if (bp == thread->hbp[i])
284 pos = i;
285 }
286 if (pos >= 0) {
287 __unregister_user_hw_breakpoint(pos, tsk);
288 hbp_counter--;
289 }
290 if (!hbp_counter)
291 clear_tsk_thread_flag(tsk, TIF_DEBUG);
292
293 spin_unlock_bh(&hw_breakpoint_lock);
294}
295EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
296
297/**
298 * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
299 * @bp: the breakpoint structure to register
300 *
301 * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
302 * @bp->triggered must be set properly before invocation
303 *
304 */
305int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
306{
307 int rc;
308
309 rc = arch_validate_hwbkpt_settings(bp, NULL);
310 if (rc)
311 return rc;
312
313 spin_lock_bh(&hw_breakpoint_lock);
314
315 rc = -ENOSPC;
316 /* Check if we are over-committing */
317 if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
318 hbp_kernel_pos--;
319 hbp_kernel[hbp_kernel_pos] = bp;
320 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
321 rc = 0;
322 }
323
324 spin_unlock_bh(&hw_breakpoint_lock);
325 return rc;
326}
327EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
328
329/**
330 * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
331 * @bp: the breakpoint structure to unregister
332 *
333 * Uninstalls and unregisters @bp.
334 */
335void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
336{
337 int i, j;
338
339 spin_lock_bh(&hw_breakpoint_lock);
340
341 /* Find the 'bp' in our list of breakpoints for kernel */
342 for (i = hbp_kernel_pos; i < HBP_NUM; i++)
343 if (bp == hbp_kernel[i])
344 break;
345
346 /* Check if we did not find a match for 'bp'. If so return early */
347 if (i == HBP_NUM) {
348 spin_unlock_bh(&hw_breakpoint_lock);
349 return;
350 }
351
352 /*
353 * We'll shift the breakpoints one-level above to compact if
354 * unregistration creates a hole
355 */
356 for (j = i; j > hbp_kernel_pos; j--)
357 hbp_kernel[j] = hbp_kernel[j-1];
358
359 hbp_kernel[hbp_kernel_pos] = NULL;
360 on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
361 hbp_kernel_pos++;
362
363 spin_unlock_bh(&hw_breakpoint_lock);
364}
365EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
366
367static struct notifier_block hw_breakpoint_exceptions_nb = {
368 .notifier_call = hw_breakpoint_exceptions_notify,
369 /* we need to be notified first */
370 .priority = 0x7fffffff
371};
372
373static int __init init_hw_breakpoint(void)
374{
375 return register_die_notifier(&hw_breakpoint_exceptions_nb);
376}
377
378core_initcall(init_hw_breakpoint);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61071fecc82e..ae048a2dbbe8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -314,6 +314,27 @@ config POWER_TRACER
314 power management decisions, specifically the C-state and P-state 314 power management decisions, specifically the C-state and P-state
315 behavior. 315 behavior.
316 316
317config KSYM_TRACER
318 bool "Trace read and write access on kernel memory locations"
319 depends on HAVE_HW_BREAKPOINT
320 select TRACING
321 help
322 This tracer helps find read and write operations on any given kernel
323 symbol i.e. /proc/kallsyms.
324
325config PROFILE_KSYM_TRACER
326 bool "Profile all kernel memory accesses on 'watched' variables"
327 depends on KSYM_TRACER
328 help
329 This tracer profiles kernel accesses on variables watched through the
330 ksym tracer ftrace plugin. Depending upon the hardware, all read
331 and write operations on kernel variables can be monitored for
332 accesses.
333
334 The results will be displayed in:
335 /debugfs/tracing/profile_ksym
336
337 Say N if unsure.
317 338
318config STACK_TRACER 339config STACK_TRACER
319 bool "Trace max stack" 340 bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..ce3b1cd02732 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -54,5 +54,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
57 58
58libftrace-y := ftrace.o 59libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6e735d4771f8..7d5cc37b8fca 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,6 +15,10 @@
15#include <linux/trace_seq.h> 15#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 16#include <linux/ftrace_event.h>
17 17
18#ifdef CONFIG_KSYM_TRACER
19#include <asm/hw_breakpoint.h>
20#endif
21
18enum trace_type { 22enum trace_type {
19 __TRACE_FIRST_TYPE = 0, 23 __TRACE_FIRST_TYPE = 0,
20 24
@@ -40,6 +44,7 @@ enum trace_type {
40 TRACE_KMEM_FREE, 44 TRACE_KMEM_FREE,
41 TRACE_POWER, 45 TRACE_POWER,
42 TRACE_BLK, 46 TRACE_BLK,
47 TRACE_KSYM,
43 48
44 __TRACE_LAST_TYPE, 49 __TRACE_LAST_TYPE,
45}; 50};
@@ -207,6 +212,21 @@ struct syscall_trace_exit {
207 unsigned long ret; 212 unsigned long ret;
208}; 213};
209 214
215#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
216extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
217
218struct trace_ksym {
219 struct trace_entry ent;
220 struct hw_breakpoint *ksym_hbp;
221 unsigned long ksym_addr;
222 unsigned long ip;
223#ifdef CONFIG_PROFILE_KSYM_TRACER
224 unsigned long counter;
225#endif
226 struct hlist_node ksym_hlist;
227 char ksym_name[KSYM_NAME_LEN];
228 char p_name[TASK_COMM_LEN];
229};
210 230
211/* 231/*
212 * trace_flag_type is an enumeration that holds different 232 * trace_flag_type is an enumeration that holds different
@@ -323,6 +343,7 @@ extern void __ftrace_bad_type(void);
323 TRACE_SYSCALL_ENTER); \ 343 TRACE_SYSCALL_ENTER); \
324 IF_ASSIGN(var, ent, struct syscall_trace_exit, \ 344 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
325 TRACE_SYSCALL_EXIT); \ 345 TRACE_SYSCALL_EXIT); \
346 IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
326 __ftrace_bad_type(); \ 347 __ftrace_bad_type(); \
327 } while (0) 348 } while (0)
328 349
@@ -540,6 +561,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
540 struct trace_array *tr); 561 struct trace_array *tr);
541extern int trace_selftest_startup_hw_branches(struct tracer *trace, 562extern int trace_selftest_startup_hw_branches(struct tracer *trace,
542 struct trace_array *tr); 563 struct trace_array *tr);
564extern int trace_selftest_startup_ksym(struct tracer *trace,
565 struct trace_array *tr);
543#endif /* CONFIG_FTRACE_STARTUP_TEST */ 566#endif /* CONFIG_FTRACE_STARTUP_TEST */
544 567
545extern void *head_page(struct trace_array_cpu *data); 568extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..eef97e7c8db7
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,525 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32/* For now, let us restrict the no. of symbols traced simultaneously to number
33 * of available hardware breakpoint registers.
34 */
35#define KSYM_TRACER_MAX HBP_NUM
36
37#define KSYM_TRACER_OP_LEN 3 /* rw- */
38#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
39
40static struct trace_array *ksym_trace_array;
41
42static unsigned int ksym_filter_entry_count;
43static unsigned int ksym_tracing_enabled;
44
45static HLIST_HEAD(ksym_filter_head);
46
47static DEFINE_MUTEX(ksym_tracer_mutex);
48
49#ifdef CONFIG_PROFILE_KSYM_TRACER
50
51#define MAX_UL_INT 0xffffffff
52
53void ksym_collect_stats(unsigned long hbp_hit_addr)
54{
55 struct hlist_node *node;
56 struct trace_ksym *entry;
57
58 rcu_read_lock();
59 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
60 if ((entry->ksym_addr == hbp_hit_addr) &&
61 (entry->counter <= MAX_UL_INT)) {
62 entry->counter++;
63 break;
64 }
65 }
66 rcu_read_unlock();
67}
68#endif /* CONFIG_PROFILE_KSYM_TRACER */
69
70void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
71{
72 struct ring_buffer_event *event;
73 struct trace_array *tr;
74 struct trace_ksym *entry;
75 int pc;
76
77 if (!ksym_tracing_enabled)
78 return;
79
80 tr = ksym_trace_array;
81 pc = preempt_count();
82
83 event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
84 sizeof(*entry), 0, pc);
85 if (!event)
86 return;
87
88 entry = ring_buffer_event_data(event);
89 strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
90 entry->ksym_hbp = hbp;
91 entry->ip = instruction_pointer(regs);
92 strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
93#ifdef CONFIG_PROFILE_KSYM_TRACER
94 ksym_collect_stats(hbp->info.address);
95#endif /* CONFIG_PROFILE_KSYM_TRACER */
96
97 trace_buffer_unlock_commit(tr, event, 0, pc);
98}
99
100/* Valid access types are represented as
101 *
102 * rw- : Set Read/Write Access Breakpoint
103 * -w- : Set Write Access Breakpoint
104 * --- : Clear Breakpoints
105 * --x : Set Execution Break points (Not available yet)
106 *
107 */
108static int ksym_trace_get_access_type(char *access_str)
109{
110 int pos, access = 0;
111
112 for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
113 switch (access_str[pos]) {
114 case 'r':
115 access += (pos == 0) ? 4 : -1;
116 break;
117 case 'w':
118 access += (pos == 1) ? 2 : -1;
119 break;
120 case '-':
121 break;
122 default:
123 return -EINVAL;
124 }
125 }
126
127 switch (access) {
128 case 6:
129 access = HW_BREAKPOINT_RW;
130 break;
131 case 2:
132 access = HW_BREAKPOINT_WRITE;
133 break;
134 case 0:
135 access = 0;
136 }
137
138 return access;
139}
140
141/*
142 * There can be several possible malformed requests and we attempt to capture
143 * all of them. We enumerate some of the rules
144 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
145 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
146 * <module>:<ksym_name>:<op>.
147 * 2. No delimiter symbol ':' in the input string
148 * 3. Spurious operator symbols or symbols not in their respective positions
149 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
150 * 5. Kernel symbol not a part of /proc/kallsyms
151 * 6. Duplicate requests
152 */
153static int parse_ksym_trace_str(char *input_string, char **ksymname,
154 unsigned long *addr)
155{
156 char *delimiter = ":";
157 int ret;
158
159 ret = -EINVAL;
160 *ksymname = strsep(&input_string, delimiter);
161 *addr = kallsyms_lookup_name(*ksymname);
162
163 /* Check for malformed request: (2), (1) and (5) */
164 if ((!input_string) ||
165 (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
166 (*addr == 0))
167 goto return_code;
168 ret = ksym_trace_get_access_type(input_string);
169
170return_code:
171 return ret;
172}
173
174int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
175{
176 struct trace_ksym *entry;
177 int ret;
178
179 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
180 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
181 " new requests for tracing can be accepted now.\n",
182 KSYM_TRACER_MAX);
183 return -ENOSPC;
184 }
185
186 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
187 if (!entry)
188 return -ENOMEM;
189
190 entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
191 if (!entry->ksym_hbp) {
192 kfree(entry);
193 return -ENOMEM;
194 }
195
196 entry->ksym_hbp->info.name = ksymname;
197 entry->ksym_hbp->info.type = op;
198 entry->ksym_addr = entry->ksym_hbp->info.address = addr;
199#ifdef CONFIG_X86
200 entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
201#endif
202 entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
203
204 ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
205 if (ret < 0) {
206 printk(KERN_INFO "ksym_tracer request failed. Try again"
207 " later!!\n");
208 kfree(entry->ksym_hbp);
209 kfree(entry);
210 return -EAGAIN;
211 }
212 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
213 ksym_filter_entry_count++;
214
215 return 0;
216}
217
218static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
219 size_t count, loff_t *ppos)
220{
221 struct trace_ksym *entry;
222 struct hlist_node *node;
223 char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
224 ssize_t ret, cnt = 0;
225
226 mutex_lock(&ksym_tracer_mutex);
227
228 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
229 cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
230 entry->ksym_hbp->info.name);
231 if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
232 cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
233 "-w-\n");
234 else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
235 cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
236 "rw-\n");
237 }
238 ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
239 mutex_unlock(&ksym_tracer_mutex);
240
241 return ret;
242}
243
244static ssize_t ksym_trace_filter_write(struct file *file,
245 const char __user *buffer,
246 size_t count, loff_t *ppos)
247{
248 struct trace_ksym *entry;
249 struct hlist_node *node;
250 char *input_string, *ksymname = NULL;
251 unsigned long ksym_addr = 0;
252 int ret, op, changed = 0;
253
254 /* Ignore echo "" > ksym_trace_filter */
255 if (count == 0)
256 return 0;
257
258 input_string = kzalloc(count, GFP_KERNEL);
259 if (!input_string)
260 return -ENOMEM;
261
262 if (copy_from_user(input_string, buffer, count)) {
263 kfree(input_string);
264 return -EFAULT;
265 }
266
267 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
268 if (ret < 0) {
269 kfree(input_string);
270 return ret;
271 }
272
273 mutex_lock(&ksym_tracer_mutex);
274
275 ret = -EINVAL;
276 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
277 if (entry->ksym_addr == ksym_addr) {
278 /* Check for malformed request: (6) */
279 if (entry->ksym_hbp->info.type != op)
280 changed = 1;
281 else
282 goto err_ret;
283 break;
284 }
285 }
286 if (changed) {
287 unregister_kernel_hw_breakpoint(entry->ksym_hbp);
288 entry->ksym_hbp->info.type = op;
289 if (op > 0) {
290 ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
291 if (ret == 0) {
292 ret = count;
293 goto unlock_ret_path;
294 }
295 }
296 ksym_filter_entry_count--;
297 hlist_del_rcu(&(entry->ksym_hlist));
298 synchronize_rcu();
299 kfree(entry->ksym_hbp);
300 kfree(entry);
301 ret = count;
302 goto err_ret;
303 } else {
304 /* Check for malformed request: (4) */
305 if (op == 0)
306 goto err_ret;
307 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
308 if (ret)
309 goto err_ret;
310 }
311 ret = count;
312 goto unlock_ret_path;
313
314err_ret:
315 kfree(input_string);
316
317unlock_ret_path:
318 mutex_unlock(&ksym_tracer_mutex);
319 return ret;
320}
321
322static const struct file_operations ksym_tracing_fops = {
323 .open = tracing_open_generic,
324 .read = ksym_trace_filter_read,
325 .write = ksym_trace_filter_write,
326};
327
328static void ksym_trace_reset(struct trace_array *tr)
329{
330 struct trace_ksym *entry;
331 struct hlist_node *node, *node1;
332
333 ksym_tracing_enabled = 0;
334
335 mutex_lock(&ksym_tracer_mutex);
336 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
337 ksym_hlist) {
338 unregister_kernel_hw_breakpoint(entry->ksym_hbp);
339 ksym_filter_entry_count--;
340 hlist_del_rcu(&(entry->ksym_hlist));
341 synchronize_rcu();
342 /* Free the 'input_string' only if reset
343 * after startup self-test
344 */
345#ifdef CONFIG_FTRACE_SELFTEST
346 if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
347 strlen(KSYM_SELFTEST_ENTRY)) != 0)
348#endif /* CONFIG_FTRACE_SELFTEST*/
349 kfree(entry->ksym_hbp->info.name);
350 kfree(entry->ksym_hbp);
351 kfree(entry);
352 }
353 mutex_unlock(&ksym_tracer_mutex);
354}
355
356static int ksym_trace_init(struct trace_array *tr)
357{
358 int cpu, ret = 0;
359
360 for_each_online_cpu(cpu)
361 tracing_reset(tr, cpu);
362 ksym_tracing_enabled = 1;
363 ksym_trace_array = tr;
364
365 return ret;
366}
367
368static void ksym_trace_print_header(struct seq_file *m)
369{
370
371 seq_puts(m,
372 "# TASK-PID CPU# Symbol Type "
373 "Function \n");
374 seq_puts(m,
375 "# | | | | "
376 "| \n");
377}
378
379static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
380{
381 struct trace_entry *entry = iter->ent;
382 struct trace_seq *s = &iter->seq;
383 struct trace_ksym *field;
384 char str[KSYM_SYMBOL_LEN];
385 int ret;
386
387 if (entry->type != TRACE_KSYM)
388 return TRACE_TYPE_UNHANDLED;
389
390 trace_assign_type(field, entry);
391
392 ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
393 entry->pid, iter->cpu, field->ksym_name);
394 if (!ret)
395 return TRACE_TYPE_PARTIAL_LINE;
396
397 switch (field->ksym_hbp->info.type) {
398 case HW_BREAKPOINT_WRITE:
399 ret = trace_seq_printf(s, " W ");
400 break;
401 case HW_BREAKPOINT_RW:
402 ret = trace_seq_printf(s, " RW ");
403 break;
404 default:
405 return TRACE_TYPE_PARTIAL_LINE;
406 }
407
408 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE;
410
411 sprint_symbol(str, field->ip);
412 ret = trace_seq_printf(s, "%-20s\n", str);
413 if (!ret)
414 return TRACE_TYPE_PARTIAL_LINE;
415
416 return TRACE_TYPE_HANDLED;
417}
418
419struct tracer ksym_tracer __read_mostly =
420{
421 .name = "ksym_tracer",
422 .init = ksym_trace_init,
423 .reset = ksym_trace_reset,
424#ifdef CONFIG_FTRACE_SELFTEST
425 .selftest = trace_selftest_startup_ksym,
426#endif
427 .print_header = ksym_trace_print_header,
428 .print_line = ksym_trace_output
429};
430
431__init static int init_ksym_trace(void)
432{
433 struct dentry *d_tracer;
434 struct dentry *entry;
435
436 d_tracer = tracing_init_dentry();
437 ksym_filter_entry_count = 0;
438
439 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
440 NULL, &ksym_tracing_fops);
441 if (!entry)
442 pr_warning("Could not create debugfs "
443 "'ksym_trace_filter' file\n");
444
445 return register_tracer(&ksym_tracer);
446}
447device_initcall(init_ksym_trace);
448
449
450#ifdef CONFIG_PROFILE_KSYM_TRACER
451static int ksym_tracer_stat_headers(struct seq_file *m)
452{
453 seq_printf(m, " Access type ");
454 seq_printf(m, " Symbol Counter \n");
455 return 0;
456}
457
458static int ksym_tracer_stat_show(struct seq_file *m, void *v)
459{
460 struct hlist_node *stat = v;
461 struct trace_ksym *entry;
462 int access_type = 0;
463 char fn_name[KSYM_NAME_LEN];
464
465 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
466
467 if (entry->ksym_hbp)
468 access_type = entry->ksym_hbp->info.type;
469
470 switch (access_type) {
471 case HW_BREAKPOINT_WRITE:
472 seq_printf(m, " W ");
473 break;
474 case HW_BREAKPOINT_RW:
475 seq_printf(m, " RW ");
476 break;
477 default:
478 seq_printf(m, " NA ");
479 }
480
481 if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
482 seq_printf(m, " %s ", fn_name);
483 else
484 seq_printf(m, " <NA> ");
485
486 seq_printf(m, "%15lu\n", entry->counter);
487 return 0;
488}
489
490static void *ksym_tracer_stat_start(struct tracer_stat *trace)
491{
492 return &(ksym_filter_head.first);
493}
494
495static void *
496ksym_tracer_stat_next(void *v, int idx)
497{
498 struct hlist_node *stat = v;
499
500 return stat->next;
501}
502
503static struct tracer_stat ksym_tracer_stats = {
504 .name = "ksym_tracer",
505 .stat_start = ksym_tracer_stat_start,
506 .stat_next = ksym_tracer_stat_next,
507 .stat_headers = ksym_tracer_stat_headers,
508 .stat_show = ksym_tracer_stat_show
509};
510
511__init static int ksym_tracer_stat_init(void)
512{
513 int ret;
514
515 ret = register_stat_tracer(&ksym_tracer_stats);
516 if (ret) {
517 printk(KERN_WARNING "Warning: could not register "
518 "ksym tracer stats\n");
519 return 1;
520 }
521
522 return 0;
523}
524fs_initcall(ksym_tracer_stat_init);
525#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 00dd6485bdd7..71f2edb0fd84 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -807,3 +808,55 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
807 return ret; 808 return ret;
808} 809}
809#endif /* CONFIG_HW_BRANCH_TRACER */ 810#endif /* CONFIG_HW_BRANCH_TRACER */
811
812#ifdef CONFIG_KSYM_TRACER
813static int ksym_selftest_dummy;
814
815int
816trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
817{
818 unsigned long count;
819 int ret;
820
821 /* start the tracing */
822 ret = tracer_init(trace, tr);
823 if (ret) {
824 warn_failed_init_tracer(trace, ret);
825 return ret;
826 }
827
828 ksym_selftest_dummy = 0;
829 /* Register the read-write tracing request */
830 ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
831 (unsigned long)(&ksym_selftest_dummy));
832
833 if (ret < 0) {
834 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
835 goto ret_path;
836 }
837 /* Perform a read and a write operation over the dummy variable to
838 * trigger the tracer
839 */
840 if (ksym_selftest_dummy == 0)
841 ksym_selftest_dummy++;
842
843 /* stop the tracing. */
844 tracing_stop();
845 /* check the trace buffer */
846 ret = trace_test_buffer(tr, &count);
847 trace->reset(tr);
848 tracing_start();
849
850 /* read & write operations - one each is performed on the dummy variable
851 * triggering two entries in the trace buffer
852 */
853 if (!ret && count != 2) {
854 printk(KERN_CONT "Ksym tracer startup test failed");
855 ret = -1;
856 }
857
858ret_path:
859 return ret;
860}
861#endif /* CONFIG_KSYM_TRACER */
862
diff --git a/samples/Kconfig b/samples/Kconfig
index 428b065ba695..17d64ba7864c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -46,5 +46,11 @@ config SAMPLE_KRETPROBES
46 default m 46 default m
47 depends on SAMPLE_KPROBES && KRETPROBES 47 depends on SAMPLE_KPROBES && KRETPROBES
48 48
49config SAMPLE_HW_BREAKPOINT
50 tristate "Build kernel hardware breakpoint examples -- loadable module only"
51 depends on HAVE_HW_BREAKPOINT && m
52 help
53 This builds kernel hardware breakpoint example modules.
54
49endif # SAMPLES 55endif # SAMPLES
50 56
diff --git a/samples/Makefile b/samples/Makefile
index 13e4b470b539..42e175598777 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ 3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \
4 trace_events/ hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 000000000000..0f5c31c2fc47
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 000000000000..9cbdbb871b7a
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,83 @@
1/*
2 * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * usage: insmod data_breakpoint.ko ksym=<ksym_name>
19 *
20 * This file is a kernel module that places a breakpoint over ksym_name kernel
21 * variable using Hardware Breakpoint register. The corresponding handler which
22 * prints a backtrace is invoked everytime a write operation is performed on
23 * that variable.
24 *
25 * Copyright (C) IBM Corporation, 2009
26 */
27#include <linux/module.h> /* Needed by all modules */
28#include <linux/kernel.h> /* Needed for KERN_INFO */
29#include <linux/init.h> /* Needed for the macros */
30
31#include <asm/hw_breakpoint.h>
32
33struct hw_breakpoint sample_hbp;
34
35static char ksym_name[KSYM_NAME_LEN] = "pid_max";
36module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
37MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
38 " write operations on the kernel symbol");
39
40void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs
41 *temp_regs)
42{
43 printk(KERN_INFO "%s value is changed\n", ksym_name);
44 dump_stack();
45 printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
46}
47
48static int __init hw_break_module_init(void)
49{
50 int ret;
51
52#ifdef CONFIG_X86
53 sample_hbp.info.name = ksym_name;
54 sample_hbp.info.type = HW_BREAKPOINT_WRITE;
55 sample_hbp.info.len = HW_BREAKPOINT_LEN_4;
56#endif /* CONFIG_X86 */
57
58 sample_hbp.triggered = (void *)sample_hbp_handler;
59
60 ret = register_kernel_hw_breakpoint(&sample_hbp);
61
62 if (ret < 0) {
63 printk(KERN_INFO "Breakpoint registration failed\n");
64 return ret;
65 } else
66 printk(KERN_INFO "HW Breakpoint for %s write installed\n",
67 ksym_name);
68
69 return 0;
70}
71
72static void __exit hw_break_module_exit(void)
73{
74 unregister_kernel_hw_breakpoint(&sample_hbp);
75 printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
76}
77
78module_init(hw_break_module_init);
79module_exit(hw_break_module_exit);
80
81MODULE_LICENSE("GPL");
82MODULE_AUTHOR("K.Prasad");
83MODULE_DESCRIPTION("ksym breakpoint");