diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-09-09 13:22:48 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2009-11-08 09:34:42 -0500 |
commit | 24f1e32c60c45c89a997c73395b69c8af6f0a84e (patch) | |
tree | 4f30f16e18cb4abbcf96b3b331e6a3f01bfa26e6 /kernel/trace | |
parent | 2da3e160cb3d226d87b907fab26850d838ed8d7c (diff) |
hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events
This patch rebase the implementation of the breakpoints API on top of
perf events instances.
Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..
The new layering is now made as follows:
ptrace kgdb ftrace perf syscall
\ | / /
\ | / /
/
Core breakpoint API /
/
| /
| /
Breakpoints perf events
|
|
Breakpoints PMU ---- Debug Register constraints handling
(Part of core breakpoint API)
|
|
Hardware debug registers
Reasons of this rewrite:
- Use the centralized/optimized pmu registers scheduling,
implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
events, exclusive/non-exclusive, tunable period, etc...)
Impact:
- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
thread breakpoints references.
Todo (in the order):
- Support breakpoints perf counter events for perf tools (ie: implement
perf_bpcounter_event())
- Support from perf tools
Changes in v2:
- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
perf_event_attr.
- Separate core and arch specific headers, drop
asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch
Changes in v3:
- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
changes to kvm when we exit the guest and restore the bp registers
to the host.
Changes in v4:
- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
module
- Restore the breakpoints unconditionally on kvm guest exit:
TIF_DEBUG_THREAD doesn't anymore cover every cases of running
breakpoints and vcpu->arch.switch_db_regs might not always be
set when the guest used debug registers.
(Waiting for a reliable optimization)
Changes in v5:
- Split-up the asm-generic/hw-breakpoint.h moving to
linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
to host. We only want to restore the state if we have active
breakpoints to the host, otherwise we don't care about messed-up
address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c
Changes in v6:
- Fix wrong header inclusion in trace.h (triggered a build
error with CONFIG_FTRACE_SELFTEST
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace.h | 5 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 6 | ||||
-rw-r--r-- | kernel/trace/trace_ksym.c | 126 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 3 |
4 files changed, 66 insertions, 74 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 91c3d0e9a5a1..d72f06ff263f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -11,14 +11,11 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <trace/boot.h> | 12 | #include <trace/boot.h> |
13 | #include <linux/kmemtrace.h> | 13 | #include <linux/kmemtrace.h> |
14 | #include <linux/hw_breakpoint.h> | ||
14 | 15 | ||
15 | #include <linux/trace_seq.h> | 16 | #include <linux/trace_seq.h> |
16 | #include <linux/ftrace_event.h> | 17 | #include <linux/ftrace_event.h> |
17 | 18 | ||
18 | #ifdef CONFIG_KSYM_TRACER | ||
19 | #include <asm/hw_breakpoint.h> | ||
20 | #endif | ||
21 | |||
22 | enum trace_type { | 19 | enum trace_type { |
23 | __TRACE_FIRST_TYPE = 0, | 20 | __TRACE_FIRST_TYPE = 0, |
24 | 21 | ||
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e19747d4f860..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry, | |||
372 | F_STRUCT( | 372 | F_STRUCT( |
373 | __field( unsigned long, ip ) | 373 | __field( unsigned long, ip ) |
374 | __field( unsigned char, type ) | 374 | __field( unsigned char, type ) |
375 | __array( char , ksym_name, KSYM_NAME_LEN ) | ||
376 | __array( char , cmd, TASK_COMM_LEN ) | 375 | __array( char , cmd, TASK_COMM_LEN ) |
376 | __field( unsigned long, addr ) | ||
377 | ), | 377 | ), |
378 | 378 | ||
379 | F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", | 379 | F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", |
380 | (void *)__entry->ip, (unsigned int)__entry->type, | 380 | (void *)__entry->ip, (unsigned int)__entry->type, |
381 | __entry->ksym_name, __entry->cmd) | 381 | (void *)__entry->addr, __entry->cmd) |
382 | ); | 382 | ); |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 6d5609c67378..fea83eeeef09 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c | |||
@@ -29,7 +29,11 @@ | |||
29 | #include "trace_stat.h" | 29 | #include "trace_stat.h" |
30 | #include "trace.h" | 30 | #include "trace.h" |
31 | 31 | ||
32 | /* For now, let us restrict the no. of symbols traced simultaneously to number | 32 | #include <linux/hw_breakpoint.h> |
33 | #include <asm/hw_breakpoint.h> | ||
34 | |||
35 | /* | ||
36 | * For now, let us restrict the no. of symbols traced simultaneously to number | ||
33 | * of available hardware breakpoint registers. | 37 | * of available hardware breakpoint registers. |
34 | */ | 38 | */ |
35 | #define KSYM_TRACER_MAX HBP_NUM | 39 | #define KSYM_TRACER_MAX HBP_NUM |
@@ -37,8 +41,10 @@ | |||
37 | #define KSYM_TRACER_OP_LEN 3 /* rw- */ | 41 | #define KSYM_TRACER_OP_LEN 3 /* rw- */ |
38 | 42 | ||
39 | struct trace_ksym { | 43 | struct trace_ksym { |
40 | struct hw_breakpoint *ksym_hbp; | 44 | struct perf_event **ksym_hbp; |
41 | unsigned long ksym_addr; | 45 | unsigned long ksym_addr; |
46 | int type; | ||
47 | int len; | ||
42 | #ifdef CONFIG_PROFILE_KSYM_TRACER | 48 | #ifdef CONFIG_PROFILE_KSYM_TRACER |
43 | unsigned long counter; | 49 | unsigned long counter; |
44 | #endif | 50 | #endif |
@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) | |||
75 | } | 81 | } |
76 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | 82 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ |
77 | 83 | ||
78 | void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) | 84 | void ksym_hbp_handler(struct perf_event *hbp, void *data) |
79 | { | 85 | { |
80 | struct ring_buffer_event *event; | 86 | struct ring_buffer_event *event; |
81 | struct ksym_trace_entry *entry; | 87 | struct ksym_trace_entry *entry; |
88 | struct pt_regs *regs = data; | ||
82 | struct ring_buffer *buffer; | 89 | struct ring_buffer *buffer; |
83 | int pc; | 90 | int pc; |
84 | 91 | ||
@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) | |||
96 | 103 | ||
97 | entry = ring_buffer_event_data(event); | 104 | entry = ring_buffer_event_data(event); |
98 | entry->ip = instruction_pointer(regs); | 105 | entry->ip = instruction_pointer(regs); |
99 | entry->type = hbp->info.type; | 106 | entry->type = hw_breakpoint_type(hbp); |
100 | strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); | 107 | entry->addr = hw_breakpoint_addr(hbp); |
101 | strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); | 108 | strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); |
102 | 109 | ||
103 | #ifdef CONFIG_PROFILE_KSYM_TRACER | 110 | #ifdef CONFIG_PROFILE_KSYM_TRACER |
104 | ksym_collect_stats(hbp->info.address); | 111 | ksym_collect_stats(hw_breakpoint_addr(hbp)); |
105 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | 112 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ |
106 | 113 | ||
107 | trace_buffer_unlock_commit(buffer, event, 0, pc); | 114 | trace_buffer_unlock_commit(buffer, event, 0, pc); |
@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str) | |||
120 | int access = 0; | 127 | int access = 0; |
121 | 128 | ||
122 | if (str[0] == 'r') | 129 | if (str[0] == 'r') |
123 | access += 4; | 130 | access |= HW_BREAKPOINT_R; |
124 | else if (str[0] != '-') | ||
125 | return -EINVAL; | ||
126 | 131 | ||
127 | if (str[1] == 'w') | 132 | if (str[1] == 'w') |
128 | access += 2; | 133 | access |= HW_BREAKPOINT_W; |
129 | else if (str[1] != '-') | ||
130 | return -EINVAL; | ||
131 | 134 | ||
132 | if (str[2] != '-') | 135 | if (str[2] == 'x') |
133 | return -EINVAL; | 136 | access |= HW_BREAKPOINT_X; |
134 | 137 | ||
135 | switch (access) { | 138 | switch (access) { |
136 | case 6: | 139 | case HW_BREAKPOINT_W: |
137 | access = HW_BREAKPOINT_RW; | 140 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: |
138 | break; | 141 | return access; |
139 | case 4: | 142 | default: |
140 | access = -EINVAL; | 143 | return -EINVAL; |
141 | break; | ||
142 | case 2: | ||
143 | access = HW_BREAKPOINT_WRITE; | ||
144 | break; | ||
145 | } | 144 | } |
146 | |||
147 | return access; | ||
148 | } | 145 | } |
149 | 146 | ||
150 | /* | 147 | /* |
@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | |||
194 | if (!entry) | 191 | if (!entry) |
195 | return -ENOMEM; | 192 | return -ENOMEM; |
196 | 193 | ||
197 | entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); | 194 | entry->type = op; |
198 | if (!entry->ksym_hbp) | 195 | entry->ksym_addr = addr; |
199 | goto err; | 196 | entry->len = HW_BREAKPOINT_LEN_4; |
200 | 197 | ||
201 | entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); | 198 | ret = -EAGAIN; |
202 | if (!entry->ksym_hbp->info.name) | 199 | entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, |
203 | goto err; | 200 | entry->len, entry->type, |
204 | 201 | ksym_hbp_handler, true); | |
205 | entry->ksym_hbp->info.type = op; | 202 | if (IS_ERR(entry->ksym_hbp)) { |
206 | entry->ksym_addr = entry->ksym_hbp->info.address = addr; | 203 | entry->ksym_hbp = NULL; |
207 | #ifdef CONFIG_X86 | 204 | ret = PTR_ERR(entry->ksym_hbp); |
208 | entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; | 205 | } |
209 | #endif | ||
210 | entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; | ||
211 | 206 | ||
212 | ret = register_kernel_hw_breakpoint(entry->ksym_hbp); | 207 | if (!entry->ksym_hbp) { |
213 | if (ret < 0) { | ||
214 | printk(KERN_INFO "ksym_tracer request failed. Try again" | 208 | printk(KERN_INFO "ksym_tracer request failed. Try again" |
215 | " later!!\n"); | 209 | " later!!\n"); |
216 | ret = -EAGAIN; | ||
217 | goto err; | 210 | goto err; |
218 | } | 211 | } |
212 | |||
219 | hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); | 213 | hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); |
220 | ksym_filter_entry_count++; | 214 | ksym_filter_entry_count++; |
215 | |||
221 | return 0; | 216 | return 0; |
217 | |||
222 | err: | 218 | err: |
223 | if (entry->ksym_hbp) | ||
224 | kfree(entry->ksym_hbp->info.name); | ||
225 | kfree(entry->ksym_hbp); | ||
226 | kfree(entry); | 219 | kfree(entry); |
220 | |||
227 | return ret; | 221 | return ret; |
228 | } | 222 | } |
229 | 223 | ||
@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, | |||
244 | mutex_lock(&ksym_tracer_mutex); | 238 | mutex_lock(&ksym_tracer_mutex); |
245 | 239 | ||
246 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | 240 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { |
247 | ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); | 241 | ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); |
248 | if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) | 242 | if (entry->type == HW_BREAKPOINT_W) |
249 | ret = trace_seq_puts(s, "-w-\n"); | 243 | ret = trace_seq_puts(s, "-w-\n"); |
250 | else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) | 244 | else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) |
251 | ret = trace_seq_puts(s, "rw-\n"); | 245 | ret = trace_seq_puts(s, "rw-\n"); |
252 | WARN_ON_ONCE(!ret); | 246 | WARN_ON_ONCE(!ret); |
253 | } | 247 | } |
@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void) | |||
269 | mutex_lock(&ksym_tracer_mutex); | 263 | mutex_lock(&ksym_tracer_mutex); |
270 | hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, | 264 | hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, |
271 | ksym_hlist) { | 265 | ksym_hlist) { |
272 | unregister_kernel_hw_breakpoint(entry->ksym_hbp); | 266 | unregister_wide_hw_breakpoint(entry->ksym_hbp); |
273 | ksym_filter_entry_count--; | 267 | ksym_filter_entry_count--; |
274 | hlist_del_rcu(&(entry->ksym_hlist)); | 268 | hlist_del_rcu(&(entry->ksym_hlist)); |
275 | synchronize_rcu(); | 269 | synchronize_rcu(); |
276 | kfree(entry->ksym_hbp->info.name); | ||
277 | kfree(entry->ksym_hbp); | ||
278 | kfree(entry); | 270 | kfree(entry); |
279 | } | 271 | } |
280 | mutex_unlock(&ksym_tracer_mutex); | 272 | mutex_unlock(&ksym_tracer_mutex); |
@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, | |||
327 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | 319 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { |
328 | if (entry->ksym_addr == ksym_addr) { | 320 | if (entry->ksym_addr == ksym_addr) { |
329 | /* Check for malformed request: (6) */ | 321 | /* Check for malformed request: (6) */ |
330 | if (entry->ksym_hbp->info.type != op) | 322 | if (entry->type != op) |
331 | changed = 1; | 323 | changed = 1; |
332 | else | 324 | else |
333 | goto out; | 325 | goto out; |
@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, | |||
335 | } | 327 | } |
336 | } | 328 | } |
337 | if (changed) { | 329 | if (changed) { |
338 | unregister_kernel_hw_breakpoint(entry->ksym_hbp); | 330 | unregister_wide_hw_breakpoint(entry->ksym_hbp); |
339 | entry->ksym_hbp->info.type = op; | 331 | entry->type = op; |
340 | if (op > 0) { | 332 | if (op > 0) { |
341 | ret = register_kernel_hw_breakpoint(entry->ksym_hbp); | 333 | entry->ksym_hbp = |
342 | if (ret == 0) | 334 | register_wide_hw_breakpoint(entry->ksym_addr, |
335 | entry->len, entry->type, | ||
336 | ksym_hbp_handler, true); | ||
337 | if (IS_ERR(entry->ksym_hbp)) | ||
338 | entry->ksym_hbp = NULL; | ||
339 | if (!entry->ksym_hbp) | ||
343 | goto out; | 340 | goto out; |
344 | } | 341 | } |
345 | ksym_filter_entry_count--; | 342 | ksym_filter_entry_count--; |
346 | hlist_del_rcu(&(entry->ksym_hlist)); | 343 | hlist_del_rcu(&(entry->ksym_hlist)); |
347 | synchronize_rcu(); | 344 | synchronize_rcu(); |
348 | kfree(entry->ksym_hbp->info.name); | ||
349 | kfree(entry->ksym_hbp); | ||
350 | kfree(entry); | 345 | kfree(entry); |
351 | ret = 0; | 346 | ret = 0; |
352 | goto out; | 347 | goto out; |
@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) | |||
413 | 408 | ||
414 | trace_assign_type(field, entry); | 409 | trace_assign_type(field, entry); |
415 | 410 | ||
416 | ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, | 411 | ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, |
417 | entry->pid, iter->cpu, field->ksym_name); | 412 | entry->pid, iter->cpu, (char *)field->addr); |
418 | if (!ret) | 413 | if (!ret) |
419 | return TRACE_TYPE_PARTIAL_LINE; | 414 | return TRACE_TYPE_PARTIAL_LINE; |
420 | 415 | ||
421 | switch (field->type) { | 416 | switch (field->type) { |
422 | case HW_BREAKPOINT_WRITE: | 417 | case HW_BREAKPOINT_W: |
423 | ret = trace_seq_printf(s, " W "); | 418 | ret = trace_seq_printf(s, " W "); |
424 | break; | 419 | break; |
425 | case HW_BREAKPOINT_RW: | 420 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: |
426 | ret = trace_seq_printf(s, " RW "); | 421 | ret = trace_seq_printf(s, " RW "); |
427 | break; | 422 | break; |
428 | default: | 423 | default: |
@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) | |||
490 | 485 | ||
491 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | 486 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); |
492 | 487 | ||
493 | if (entry->ksym_hbp) | 488 | access_type = entry->type; |
494 | access_type = entry->ksym_hbp->info.type; | ||
495 | 489 | ||
496 | switch (access_type) { | 490 | switch (access_type) { |
497 | case HW_BREAKPOINT_WRITE: | 491 | case HW_BREAKPOINT_W: |
498 | seq_puts(m, " W "); | 492 | seq_puts(m, " W "); |
499 | break; | 493 | break; |
500 | case HW_BREAKPOINT_RW: | 494 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: |
501 | seq_puts(m, " RW "); | 495 | seq_puts(m, " RW "); |
502 | break; | 496 | break; |
503 | default: | 497 | default: |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 7179c12e4f0f..27c5072c2e6b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) | |||
828 | 828 | ||
829 | ksym_selftest_dummy = 0; | 829 | ksym_selftest_dummy = 0; |
830 | /* Register the read-write tracing request */ | 830 | /* Register the read-write tracing request */ |
831 | ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, | 831 | ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, |
832 | HW_BREAKPOINT_R | HW_BREAKPOINT_W, | ||
832 | (unsigned long)(&ksym_selftest_dummy)); | 833 | (unsigned long)(&ksym_selftest_dummy)); |
833 | 834 | ||
834 | if (ret < 0) { | 835 | if (ret < 0) { |