diff options
author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2010-01-27 20:32:29 -0500 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2010-01-28 20:02:57 -0500 |
commit | 430ad5a600a83956749307b13257c464c3826b55 (patch) | |
tree | 9cd3dd3f54e29397ff303478de9fe6902f675b9b /kernel/trace | |
parent | 339ce1a4dc2ca26444c4f65c31b71a5056f3bb0b (diff) |
perf: Factorize trace events raw sample buffer operations
Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to
gather the common code that operates on raw events sampling buffer.
This cleans up redundant code between regular trace events, syscall
events and kprobe events.
Changelog v1->v2:
- Rename function name as per Masami and Frederic's suggestion
- Add __kprobes for ftrace_perf_buf_prepare() and make
ftrace_perf_buf_submit() inline as per Masami's suggestion
- Export ftrace_perf_buf_prepare since modules will use it
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace_event_profile.c | 52 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 86 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 71 |
3 files changed, 67 insertions, 142 deletions
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 9e25573242c..f0d69300507 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -6,14 +6,12 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/kprobes.h> | ||
9 | #include "trace.h" | 10 | #include "trace.h" |
10 | 11 | ||
11 | 12 | ||
12 | char *perf_trace_buf; | 13 | static char *perf_trace_buf; |
13 | EXPORT_SYMBOL_GPL(perf_trace_buf); | 14 | static char *perf_trace_buf_nmi; |
14 | |||
15 | char *perf_trace_buf_nmi; | ||
16 | EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); | ||
17 | 15 | ||
18 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; | 16 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; |
19 | 17 | ||
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) | |||
120 | } | 118 | } |
121 | mutex_unlock(&event_mutex); | 119 | mutex_unlock(&event_mutex); |
122 | } | 120 | } |
121 | |||
122 | __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, | ||
123 | int *rctxp, unsigned long *irq_flags) | ||
124 | { | ||
125 | struct trace_entry *entry; | ||
126 | char *trace_buf, *raw_data; | ||
127 | int pc, cpu; | ||
128 | |||
129 | pc = preempt_count(); | ||
130 | |||
131 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
132 | local_irq_save(*irq_flags); | ||
133 | |||
134 | *rctxp = perf_swevent_get_recursion_context(); | ||
135 | if (*rctxp < 0) | ||
136 | goto err_recursion; | ||
137 | |||
138 | cpu = smp_processor_id(); | ||
139 | |||
140 | if (in_nmi()) | ||
141 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
142 | else | ||
143 | trace_buf = rcu_dereference(perf_trace_buf); | ||
144 | |||
145 | if (!trace_buf) | ||
146 | goto err; | ||
147 | |||
148 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
149 | |||
150 | /* zero the dead bytes from align to not leak stack to user */ | ||
151 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
152 | |||
153 | entry = (struct trace_entry *)raw_data; | ||
154 | tracing_generic_entry_update(entry, *irq_flags, pc); | ||
155 | entry->type = type; | ||
156 | |||
157 | return raw_data; | ||
158 | err: | ||
159 | perf_swevent_put_recursion_context(*rctxp); | ||
160 | err_recursion: | ||
161 | local_irq_restore(*irq_flags); | ||
162 | return NULL; | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d6266cad695..2e28ee36646 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, | |||
1243 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1243 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1244 | struct ftrace_event_call *call = &tp->call; | 1244 | struct ftrace_event_call *call = &tp->call; |
1245 | struct kprobe_trace_entry *entry; | 1245 | struct kprobe_trace_entry *entry; |
1246 | struct trace_entry *ent; | 1246 | int size, __size, i; |
1247 | int size, __size, i, pc, __cpu; | ||
1248 | unsigned long irq_flags; | 1247 | unsigned long irq_flags; |
1249 | char *trace_buf; | ||
1250 | char *raw_data; | ||
1251 | int rctx; | 1248 | int rctx; |
1252 | 1249 | ||
1253 | pc = preempt_count(); | ||
1254 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); | 1250 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); |
1255 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1251 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1256 | size -= sizeof(u32); | 1252 | size -= sizeof(u32); |
@@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, | |||
1258 | "profile buffer not large enough")) | 1254 | "profile buffer not large enough")) |
1259 | return 0; | 1255 | return 0; |
1260 | 1256 | ||
1261 | /* | 1257 | entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); |
1262 | * Protect the non nmi buffer | 1258 | if (!entry) |
1263 | * This also protects the rcu read side | 1259 | return 0; |
1264 | */ | ||
1265 | local_irq_save(irq_flags); | ||
1266 | |||
1267 | rctx = perf_swevent_get_recursion_context(); | ||
1268 | if (rctx < 0) | ||
1269 | goto end_recursion; | ||
1270 | |||
1271 | __cpu = smp_processor_id(); | ||
1272 | |||
1273 | if (in_nmi()) | ||
1274 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1275 | else | ||
1276 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1277 | |||
1278 | if (!trace_buf) | ||
1279 | goto end; | ||
1280 | |||
1281 | raw_data = per_cpu_ptr(trace_buf, __cpu); | ||
1282 | |||
1283 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | ||
1284 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
1285 | entry = (struct kprobe_trace_entry *)raw_data; | ||
1286 | ent = &entry->ent; | ||
1287 | 1260 | ||
1288 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1289 | ent->type = call->id; | ||
1290 | entry->nargs = tp->nr_args; | 1261 | entry->nargs = tp->nr_args; |
1291 | entry->ip = (unsigned long)kp->addr; | 1262 | entry->ip = (unsigned long)kp->addr; |
1292 | for (i = 0; i < tp->nr_args; i++) | 1263 | for (i = 0; i < tp->nr_args; i++) |
1293 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1264 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1294 | perf_tp_event(call->id, entry->ip, 1, entry, size); | ||
1295 | 1265 | ||
1296 | end: | 1266 | ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); |
1297 | perf_swevent_put_recursion_context(rctx); | ||
1298 | end_recursion: | ||
1299 | local_irq_restore(irq_flags); | ||
1300 | 1267 | ||
1301 | return 0; | 1268 | return 0; |
1302 | } | 1269 | } |
@@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | |||
1308 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1275 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1309 | struct ftrace_event_call *call = &tp->call; | 1276 | struct ftrace_event_call *call = &tp->call; |
1310 | struct kretprobe_trace_entry *entry; | 1277 | struct kretprobe_trace_entry *entry; |
1311 | struct trace_entry *ent; | 1278 | int size, __size, i; |
1312 | int size, __size, i, pc, __cpu; | ||
1313 | unsigned long irq_flags; | 1279 | unsigned long irq_flags; |
1314 | char *trace_buf; | ||
1315 | char *raw_data; | ||
1316 | int rctx; | 1280 | int rctx; |
1317 | 1281 | ||
1318 | pc = preempt_count(); | ||
1319 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); | 1282 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); |
1320 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1283 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1321 | size -= sizeof(u32); | 1284 | size -= sizeof(u32); |
@@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | |||
1323 | "profile buffer not large enough")) | 1286 | "profile buffer not large enough")) |
1324 | return 0; | 1287 | return 0; |
1325 | 1288 | ||
1326 | /* | 1289 | entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); |
1327 | * Protect the non nmi buffer | 1290 | if (!entry) |
1328 | * This also protects the rcu read side | 1291 | return 0; |
1329 | */ | ||
1330 | local_irq_save(irq_flags); | ||
1331 | |||
1332 | rctx = perf_swevent_get_recursion_context(); | ||
1333 | if (rctx < 0) | ||
1334 | goto end_recursion; | ||
1335 | |||
1336 | __cpu = smp_processor_id(); | ||
1337 | |||
1338 | if (in_nmi()) | ||
1339 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1340 | else | ||
1341 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1342 | |||
1343 | if (!trace_buf) | ||
1344 | goto end; | ||
1345 | |||
1346 | raw_data = per_cpu_ptr(trace_buf, __cpu); | ||
1347 | |||
1348 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | ||
1349 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
1350 | entry = (struct kretprobe_trace_entry *)raw_data; | ||
1351 | ent = &entry->ent; | ||
1352 | 1292 | ||
1353 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1354 | ent->type = call->id; | ||
1355 | entry->nargs = tp->nr_args; | 1293 | entry->nargs = tp->nr_args; |
1356 | entry->func = (unsigned long)tp->rp.kp.addr; | 1294 | entry->func = (unsigned long)tp->rp.kp.addr; |
1357 | entry->ret_ip = (unsigned long)ri->ret_addr; | 1295 | entry->ret_ip = (unsigned long)ri->ret_addr; |
1358 | for (i = 0; i < tp->nr_args; i++) | 1296 | for (i = 0; i < tp->nr_args; i++) |
1359 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1297 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1360 | perf_tp_event(call->id, entry->ret_ip, 1, entry, size); | ||
1361 | 1298 | ||
1362 | end: | 1299 | ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); |
1363 | perf_swevent_put_recursion_context(rctx); | ||
1364 | end_recursion: | ||
1365 | local_irq_restore(irq_flags); | ||
1366 | 1300 | ||
1367 | return 0; | 1301 | return 0; |
1368 | } | 1302 | } |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f694f66d75b..4e332b9e449 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
433 | struct syscall_metadata *sys_data; | 433 | struct syscall_metadata *sys_data; |
434 | struct syscall_trace_enter *rec; | 434 | struct syscall_trace_enter *rec; |
435 | unsigned long flags; | 435 | unsigned long flags; |
436 | char *trace_buf; | ||
437 | char *raw_data; | ||
438 | int syscall_nr; | 436 | int syscall_nr; |
439 | int rctx; | 437 | int rctx; |
440 | int size; | 438 | int size; |
441 | int cpu; | ||
442 | 439 | ||
443 | syscall_nr = syscall_get_nr(current, regs); | 440 | syscall_nr = syscall_get_nr(current, regs); |
444 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 441 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
457 | "profile buffer not large enough")) | 454 | "profile buffer not large enough")) |
458 | return; | 455 | return; |
459 | 456 | ||
460 | /* Protect the per cpu buffer, begin the rcu read side */ | 457 | rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, |
461 | local_irq_save(flags); | 458 | sys_data->enter_event->id, &rctx, &flags); |
462 | 459 | if (!rec) | |
463 | rctx = perf_swevent_get_recursion_context(); | 460 | return; |
464 | if (rctx < 0) | ||
465 | goto end_recursion; | ||
466 | |||
467 | cpu = smp_processor_id(); | ||
468 | |||
469 | trace_buf = rcu_dereference(perf_trace_buf); | ||
470 | |||
471 | if (!trace_buf) | ||
472 | goto end; | ||
473 | |||
474 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
475 | |||
476 | /* zero the dead bytes from align to not leak stack to user */ | ||
477 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
478 | 461 | ||
479 | rec = (struct syscall_trace_enter *) raw_data; | ||
480 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
481 | rec->ent.type = sys_data->enter_event->id; | ||
482 | rec->nr = syscall_nr; | 462 | rec->nr = syscall_nr; |
483 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 463 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
484 | (unsigned long *)&rec->args); | 464 | (unsigned long *)&rec->args); |
485 | perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); | 465 | ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); |
486 | |||
487 | end: | ||
488 | perf_swevent_put_recursion_context(rctx); | ||
489 | end_recursion: | ||
490 | local_irq_restore(flags); | ||
491 | } | 466 | } |
492 | 467 | ||
493 | int prof_sysenter_enable(struct ftrace_event_call *call) | 468 | int prof_sysenter_enable(struct ftrace_event_call *call) |
@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
531 | struct syscall_trace_exit *rec; | 506 | struct syscall_trace_exit *rec; |
532 | unsigned long flags; | 507 | unsigned long flags; |
533 | int syscall_nr; | 508 | int syscall_nr; |
534 | char *trace_buf; | ||
535 | char *raw_data; | ||
536 | int rctx; | 509 | int rctx; |
537 | int size; | 510 | int size; |
538 | int cpu; | ||
539 | 511 | ||
540 | syscall_nr = syscall_get_nr(current, regs); | 512 | syscall_nr = syscall_get_nr(current, regs); |
541 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 513 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
557 | "exit event has grown above profile buffer size")) | 529 | "exit event has grown above profile buffer size")) |
558 | return; | 530 | return; |
559 | 531 | ||
560 | /* Protect the per cpu buffer, begin the rcu read side */ | 532 | rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, |
561 | local_irq_save(flags); | 533 | sys_data->exit_event->id, &rctx, &flags); |
562 | 534 | if (!rec) | |
563 | rctx = perf_swevent_get_recursion_context(); | 535 | return; |
564 | if (rctx < 0) | ||
565 | goto end_recursion; | ||
566 | |||
567 | cpu = smp_processor_id(); | ||
568 | |||
569 | trace_buf = rcu_dereference(perf_trace_buf); | ||
570 | |||
571 | if (!trace_buf) | ||
572 | goto end; | ||
573 | |||
574 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
575 | |||
576 | /* zero the dead bytes from align to not leak stack to user */ | ||
577 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
578 | |||
579 | rec = (struct syscall_trace_exit *)raw_data; | ||
580 | 536 | ||
581 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
582 | rec->ent.type = sys_data->exit_event->id; | ||
583 | rec->nr = syscall_nr; | 537 | rec->nr = syscall_nr; |
584 | rec->ret = syscall_get_return_value(current, regs); | 538 | rec->ret = syscall_get_return_value(current, regs); |
585 | 539 | ||
586 | perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); | 540 | ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); |
587 | |||
588 | end: | ||
589 | perf_swevent_put_recursion_context(rctx); | ||
590 | end_recursion: | ||
591 | local_irq_restore(flags); | ||
592 | } | 541 | } |
593 | 542 | ||
594 | int prof_sysexit_enable(struct ftrace_event_call *call) | 543 | int prof_sysexit_enable(struct ftrace_event_call *call) |