diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/trace_event_profile.c | 79 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 97 |
2 files changed, 155 insertions, 21 deletions
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 55a25c933d15..3aaa77c3309b 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -8,6 +8,54 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include "trace.h" | 9 | #include "trace.h" |
10 | 10 | ||
11 | /* | ||
12 | * We can't use a size but a type in alloc_percpu() | ||
13 | * So let's create a dummy type that matches the desired size | ||
14 | */ | ||
15 | typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; | ||
16 | |||
17 | char *trace_profile_buf; | ||
18 | char *trace_profile_buf_nmi; | ||
19 | |||
20 | /* Count the events in use (per event id, not per instance) */ | ||
21 | static int total_profile_count; | ||
22 | |||
23 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) | ||
24 | { | ||
25 | char *buf; | ||
26 | int ret = -ENOMEM; | ||
27 | |||
28 | if (atomic_inc_return(&event->profile_count)) | ||
29 | return 0; | ||
30 | |||
31 | if (!total_profile_count++) { | ||
32 | buf = (char *)alloc_percpu(profile_buf_t); | ||
33 | if (!buf) | ||
34 | goto fail_buf; | ||
35 | |||
36 | rcu_assign_pointer(trace_profile_buf, buf); | ||
37 | |||
38 | buf = (char *)alloc_percpu(profile_buf_t); | ||
39 | if (!buf) | ||
40 | goto fail_buf_nmi; | ||
41 | |||
42 | rcu_assign_pointer(trace_profile_buf_nmi, buf); | ||
43 | } | ||
44 | |||
45 | ret = event->profile_enable(); | ||
46 | if (!ret) | ||
47 | return 0; | ||
48 | |||
49 | kfree(trace_profile_buf_nmi); | ||
50 | fail_buf_nmi: | ||
51 | kfree(trace_profile_buf); | ||
52 | fail_buf: | ||
53 | total_profile_count--; | ||
54 | atomic_dec(&event->profile_count); | ||
55 | |||
56 | return ret; | ||
57 | } | ||
58 | |||
11 | int ftrace_profile_enable(int event_id) | 59 | int ftrace_profile_enable(int event_id) |
12 | { | 60 | { |
13 | struct ftrace_event_call *event; | 61 | struct ftrace_event_call *event; |
@@ -17,7 +65,7 @@ int ftrace_profile_enable(int event_id) | |||
17 | list_for_each_entry(event, &ftrace_events, list) { | 65 | list_for_each_entry(event, &ftrace_events, list) { |
18 | if (event->id == event_id && event->profile_enable && | 66 | if (event->id == event_id && event->profile_enable && |
19 | try_module_get(event->mod)) { | 67 | try_module_get(event->mod)) { |
20 | ret = event->profile_enable(event); | 68 | ret = ftrace_profile_enable_event(event); |
21 | break; | 69 | break; |
22 | } | 70 | } |
23 | } | 71 | } |
@@ -26,6 +74,33 @@ int ftrace_profile_enable(int event_id) | |||
26 | return ret; | 74 | return ret; |
27 | } | 75 | } |
28 | 76 | ||
77 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) | ||
78 | { | ||
79 | char *buf, *nmi_buf; | ||
80 | |||
81 | if (!atomic_add_negative(-1, &event->profile_count)) | ||
82 | return; | ||
83 | |||
84 | event->profile_disable(); | ||
85 | |||
86 | if (!--total_profile_count) { | ||
87 | buf = trace_profile_buf; | ||
88 | rcu_assign_pointer(trace_profile_buf, NULL); | ||
89 | |||
90 | nmi_buf = trace_profile_buf_nmi; | ||
91 | rcu_assign_pointer(trace_profile_buf_nmi, NULL); | ||
92 | |||
93 | /* | ||
94 | * Ensure every events in profiling have finished before | ||
95 | * releasing the buffers | ||
96 | */ | ||
97 | synchronize_sched(); | ||
98 | |||
99 | free_percpu(buf); | ||
100 | free_percpu(nmi_buf); | ||
101 | } | ||
102 | } | ||
103 | |||
29 | void ftrace_profile_disable(int event_id) | 104 | void ftrace_profile_disable(int event_id) |
30 | { | 105 | { |
31 | struct ftrace_event_call *event; | 106 | struct ftrace_event_call *event; |
@@ -33,7 +108,7 @@ void ftrace_profile_disable(int event_id) | |||
33 | mutex_lock(&event_mutex); | 108 | mutex_lock(&event_mutex); |
34 | list_for_each_entry(event, &ftrace_events, list) { | 109 | list_for_each_entry(event, &ftrace_events, list) { |
35 | if (event->id == event_id) { | 110 | if (event->id == event_id) { |
36 | event->profile_disable(event); | 111 | ftrace_profile_disable_event(event); |
37 | module_put(event->mod); | 112 | module_put(event->mod); |
38 | break; | 113 | break; |
39 | } | 114 | } |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0e..7a3550cf2597 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit; | |||
384 | 384 | ||
385 | static void prof_syscall_enter(struct pt_regs *regs, long id) | 385 | static void prof_syscall_enter(struct pt_regs *regs, long id) |
386 | { | 386 | { |
387 | struct syscall_trace_enter *rec; | ||
388 | struct syscall_metadata *sys_data; | 387 | struct syscall_metadata *sys_data; |
388 | struct syscall_trace_enter *rec; | ||
389 | unsigned long flags; | ||
390 | char *raw_data; | ||
389 | int syscall_nr; | 391 | int syscall_nr; |
390 | int size; | 392 | int size; |
393 | int cpu; | ||
391 | 394 | ||
392 | syscall_nr = syscall_get_nr(current, regs); | 395 | syscall_nr = syscall_get_nr(current, regs); |
393 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 396 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
402 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | 405 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
403 | size -= sizeof(u32); | 406 | size -= sizeof(u32); |
404 | 407 | ||
405 | do { | 408 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
406 | char raw_data[size]; | 409 | "profile buffer not large enough")) |
410 | return; | ||
411 | |||
412 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
413 | local_irq_save(flags); | ||
407 | 414 | ||
408 | /* zero the dead bytes from align to not leak stack to user */ | 415 | cpu = smp_processor_id(); |
409 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 416 | |
417 | if (in_nmi()) | ||
418 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
419 | else | ||
420 | raw_data = rcu_dereference(trace_profile_buf); | ||
421 | |||
422 | if (!raw_data) | ||
423 | goto end; | ||
410 | 424 | ||
411 | rec = (struct syscall_trace_enter *) raw_data; | 425 | raw_data = per_cpu_ptr(raw_data, cpu); |
412 | tracing_generic_entry_update(&rec->ent, 0, 0); | 426 | |
413 | rec->ent.type = sys_data->enter_id; | 427 | /* zero the dead bytes from align to not leak stack to user */ |
414 | rec->nr = syscall_nr; | 428 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
415 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 429 | |
416 | (unsigned long *)&rec->args); | 430 | rec = (struct syscall_trace_enter *) raw_data; |
417 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | 431 | tracing_generic_entry_update(&rec->ent, 0, 0); |
418 | } while(0); | 432 | rec->ent.type = sys_data->enter_id; |
433 | rec->nr = syscall_nr; | ||
434 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | ||
435 | (unsigned long *)&rec->args); | ||
436 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | ||
437 | |||
438 | end: | ||
439 | local_irq_restore(flags); | ||
419 | } | 440 | } |
420 | 441 | ||
421 | int reg_prof_syscall_enter(char *name) | 442 | int reg_prof_syscall_enter(char *name) |
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name) | |||
460 | static void prof_syscall_exit(struct pt_regs *regs, long ret) | 481 | static void prof_syscall_exit(struct pt_regs *regs, long ret) |
461 | { | 482 | { |
462 | struct syscall_metadata *sys_data; | 483 | struct syscall_metadata *sys_data; |
463 | struct syscall_trace_exit rec; | 484 | struct syscall_trace_exit *rec; |
485 | unsigned long flags; | ||
464 | int syscall_nr; | 486 | int syscall_nr; |
487 | char *raw_data; | ||
488 | int size; | ||
489 | int cpu; | ||
465 | 490 | ||
466 | syscall_nr = syscall_get_nr(current, regs); | 491 | syscall_nr = syscall_get_nr(current, regs); |
467 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 492 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
471 | if (!sys_data) | 496 | if (!sys_data) |
472 | return; | 497 | return; |
473 | 498 | ||
474 | tracing_generic_entry_update(&rec.ent, 0, 0); | 499 | /* We can probably do that at build time */ |
475 | rec.ent.type = sys_data->exit_id; | 500 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
476 | rec.nr = syscall_nr; | 501 | size -= sizeof(u32); |
477 | rec.ret = syscall_get_return_value(current, regs); | ||
478 | 502 | ||
479 | perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); | 503 | /* |
504 | * Impossible, but be paranoid with the future | ||
505 | * How to put this check outside runtime? | ||
506 | */ | ||
507 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
508 | "exit event has grown above profile buffer size")) | ||
509 | return; | ||
510 | |||
511 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
512 | local_irq_save(flags); | ||
513 | cpu = smp_processor_id(); | ||
514 | |||
515 | if (in_nmi()) | ||
516 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
517 | else | ||
518 | raw_data = rcu_dereference(trace_profile_buf); | ||
519 | |||
520 | if (!raw_data) | ||
521 | goto end; | ||
522 | |||
523 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
524 | |||
525 | /* zero the dead bytes from align to not leak stack to user */ | ||
526 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
527 | |||
528 | rec = (struct syscall_trace_exit *)raw_data; | ||
529 | |||
530 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
531 | rec->ent.type = sys_data->exit_id; | ||
532 | rec->nr = syscall_nr; | ||
533 | rec->ret = syscall_get_return_value(current, regs); | ||
534 | |||
535 | perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size); | ||
536 | |||
537 | end: | ||
538 | local_irq_restore(flags); | ||
480 | } | 539 | } |
481 | 540 | ||
482 | int reg_prof_syscall_exit(char *name) | 541 | int reg_prof_syscall_exit(char *name) |