diff options
Diffstat (limited to 'kernel/trace/trace_syscalls.c')
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 207 |
1 files changed, 179 insertions, 28 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0e..d00d1a8f1f26 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | #include <trace/events/syscalls.h> | 2 | #include <trace/events/syscalls.h> |
| 3 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
| 4 | #include <linux/ftrace.h> | 4 | #include <linux/ftrace.h> |
| 5 | #include <linux/perf_counter.h> | 5 | #include <linux/perf_event.h> |
| 6 | #include <asm/syscall.h> | 6 | #include <asm/syscall.h> |
| 7 | 7 | ||
| 8 | #include "trace_output.h" | 8 | #include "trace_output.h" |
| @@ -14,6 +14,69 @@ static int sys_refcount_exit; | |||
| 14 | static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); | 14 | static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); |
| 15 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); | 15 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); |
| 16 | 16 | ||
| 17 | extern unsigned long __start_syscalls_metadata[]; | ||
| 18 | extern unsigned long __stop_syscalls_metadata[]; | ||
| 19 | |||
| 20 | static struct syscall_metadata **syscalls_metadata; | ||
| 21 | |||
| 22 | static struct syscall_metadata *find_syscall_meta(unsigned long syscall) | ||
| 23 | { | ||
| 24 | struct syscall_metadata *start; | ||
| 25 | struct syscall_metadata *stop; | ||
| 26 | char str[KSYM_SYMBOL_LEN]; | ||
| 27 | |||
| 28 | |||
| 29 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
| 30 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
| 31 | kallsyms_lookup(syscall, NULL, NULL, NULL, str); | ||
| 32 | |||
| 33 | for ( ; start < stop; start++) { | ||
| 34 | /* | ||
| 35 | * Only compare after the "sys" prefix. Archs that use | ||
| 36 | * syscall wrappers may have syscalls symbols aliases prefixed | ||
| 37 | * with "SyS" instead of "sys", leading to an unwanted | ||
| 38 | * mismatch. | ||
| 39 | */ | ||
| 40 | if (start->name && !strcmp(start->name + 3, str + 3)) | ||
| 41 | return start; | ||
| 42 | } | ||
| 43 | return NULL; | ||
| 44 | } | ||
| 45 | |||
| 46 | static struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
| 47 | { | ||
| 48 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) | ||
| 49 | return NULL; | ||
| 50 | |||
| 51 | return syscalls_metadata[nr]; | ||
| 52 | } | ||
| 53 | |||
| 54 | int syscall_name_to_nr(char *name) | ||
| 55 | { | ||
| 56 | int i; | ||
| 57 | |||
| 58 | if (!syscalls_metadata) | ||
| 59 | return -1; | ||
| 60 | |||
| 61 | for (i = 0; i < NR_syscalls; i++) { | ||
| 62 | if (syscalls_metadata[i]) { | ||
| 63 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
| 64 | return i; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | return -1; | ||
| 68 | } | ||
| 69 | |||
| 70 | void set_syscall_enter_id(int num, int id) | ||
| 71 | { | ||
| 72 | syscalls_metadata[num]->enter_id = id; | ||
| 73 | } | ||
| 74 | |||
| 75 | void set_syscall_exit_id(int num, int id) | ||
| 76 | { | ||
| 77 | syscalls_metadata[num]->exit_id = id; | ||
| 78 | } | ||
| 79 | |||
| 17 | enum print_line_t | 80 | enum print_line_t |
| 18 | print_syscall_enter(struct trace_iterator *iter, int flags) | 81 | print_syscall_enter(struct trace_iterator *iter, int flags) |
| 19 | { | 82 | { |
| @@ -103,7 +166,8 @@ extern char *__bad_type_size(void); | |||
| 103 | #define SYSCALL_FIELD(type, name) \ | 166 | #define SYSCALL_FIELD(type, name) \ |
| 104 | sizeof(type) != sizeof(trace.name) ? \ | 167 | sizeof(type) != sizeof(trace.name) ? \ |
| 105 | __bad_type_size() : \ | 168 | __bad_type_size() : \ |
| 106 | #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) | 169 | #type, #name, offsetof(typeof(trace), name), \ |
| 170 | sizeof(trace.name), is_signed_type(type) | ||
| 107 | 171 | ||
| 108 | int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) | 172 | int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) |
| 109 | { | 173 | { |
| @@ -120,7 +184,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) | |||
| 120 | if (!entry) | 184 | if (!entry) |
| 121 | return 0; | 185 | return 0; |
| 122 | 186 | ||
| 123 | ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", | 187 | ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" |
| 188 | "\tsigned:%u;\n", | ||
| 124 | SYSCALL_FIELD(int, nr)); | 189 | SYSCALL_FIELD(int, nr)); |
| 125 | if (!ret) | 190 | if (!ret) |
| 126 | return 0; | 191 | return 0; |
| @@ -130,8 +195,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) | |||
| 130 | entry->args[i]); | 195 | entry->args[i]); |
| 131 | if (!ret) | 196 | if (!ret) |
| 132 | return 0; | 197 | return 0; |
| 133 | ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, | 198 | ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" |
| 134 | sizeof(unsigned long)); | 199 | "\tsigned:%u;\n", offset, |
| 200 | sizeof(unsigned long), | ||
| 201 | is_signed_type(unsigned long)); | ||
| 135 | if (!ret) | 202 | if (!ret) |
| 136 | return 0; | 203 | return 0; |
| 137 | offset += sizeof(unsigned long); | 204 | offset += sizeof(unsigned long); |
| @@ -163,10 +230,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) | |||
| 163 | struct syscall_trace_exit trace; | 230 | struct syscall_trace_exit trace; |
| 164 | 231 | ||
| 165 | ret = trace_seq_printf(s, | 232 | ret = trace_seq_printf(s, |
| 166 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | 233 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" |
| 167 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", | 234 | "\tsigned:%u;\n" |
| 235 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" | ||
| 236 | "\tsigned:%u;\n", | ||
| 168 | SYSCALL_FIELD(int, nr), | 237 | SYSCALL_FIELD(int, nr), |
| 169 | SYSCALL_FIELD(unsigned long, ret)); | 238 | SYSCALL_FIELD(long, ret)); |
| 170 | if (!ret) | 239 | if (!ret) |
| 171 | return 0; | 240 | return 0; |
| 172 | 241 | ||
| @@ -212,7 +281,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) | |||
| 212 | if (ret) | 281 | if (ret) |
| 213 | return ret; | 282 | return ret; |
| 214 | 283 | ||
| 215 | ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, | 284 | ret = trace_define_field(call, SYSCALL_FIELD(long, ret), |
| 216 | FILTER_OTHER); | 285 | FILTER_OTHER); |
| 217 | 286 | ||
| 218 | return ret; | 287 | return ret; |
| @@ -375,6 +444,29 @@ struct trace_event event_syscall_exit = { | |||
| 375 | .trace = print_syscall_exit, | 444 | .trace = print_syscall_exit, |
| 376 | }; | 445 | }; |
| 377 | 446 | ||
| 447 | int __init init_ftrace_syscalls(void) | ||
| 448 | { | ||
| 449 | struct syscall_metadata *meta; | ||
| 450 | unsigned long addr; | ||
| 451 | int i; | ||
| 452 | |||
| 453 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
| 454 | NR_syscalls, GFP_KERNEL); | ||
| 455 | if (!syscalls_metadata) { | ||
| 456 | WARN_ON(1); | ||
| 457 | return -ENOMEM; | ||
| 458 | } | ||
| 459 | |||
| 460 | for (i = 0; i < NR_syscalls; i++) { | ||
| 461 | addr = arch_syscall_addr(i); | ||
| 462 | meta = find_syscall_meta(addr); | ||
| 463 | syscalls_metadata[i] = meta; | ||
| 464 | } | ||
| 465 | |||
| 466 | return 0; | ||
| 467 | } | ||
| 468 | core_initcall(init_ftrace_syscalls); | ||
| 469 | |||
| 378 | #ifdef CONFIG_EVENT_PROFILE | 470 | #ifdef CONFIG_EVENT_PROFILE |
| 379 | 471 | ||
| 380 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); | 472 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); |
| @@ -384,10 +476,13 @@ static int sys_prof_refcount_exit; | |||
| 384 | 476 | ||
| 385 | static void prof_syscall_enter(struct pt_regs *regs, long id) | 477 | static void prof_syscall_enter(struct pt_regs *regs, long id) |
| 386 | { | 478 | { |
| 387 | struct syscall_trace_enter *rec; | ||
| 388 | struct syscall_metadata *sys_data; | 479 | struct syscall_metadata *sys_data; |
| 480 | struct syscall_trace_enter *rec; | ||
| 481 | unsigned long flags; | ||
| 482 | char *raw_data; | ||
| 389 | int syscall_nr; | 483 | int syscall_nr; |
| 390 | int size; | 484 | int size; |
| 485 | int cpu; | ||
| 391 | 486 | ||
| 392 | syscall_nr = syscall_get_nr(current, regs); | 487 | syscall_nr = syscall_get_nr(current, regs); |
| 393 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 488 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
| @@ -402,20 +497,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
| 402 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | 497 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
| 403 | size -= sizeof(u32); | 498 | size -= sizeof(u32); |
| 404 | 499 | ||
| 405 | do { | 500 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
| 406 | char raw_data[size]; | 501 | "profile buffer not large enough")) |
| 502 | return; | ||
| 503 | |||
| 504 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
| 505 | local_irq_save(flags); | ||
| 407 | 506 | ||
| 408 | /* zero the dead bytes from align to not leak stack to user */ | 507 | cpu = smp_processor_id(); |
| 409 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
| 410 | 508 | ||
| 411 | rec = (struct syscall_trace_enter *) raw_data; | 509 | if (in_nmi()) |
| 412 | tracing_generic_entry_update(&rec->ent, 0, 0); | 510 | raw_data = rcu_dereference(trace_profile_buf_nmi); |
| 413 | rec->ent.type = sys_data->enter_id; | 511 | else |
| 414 | rec->nr = syscall_nr; | 512 | raw_data = rcu_dereference(trace_profile_buf); |
| 415 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 513 | |
| 416 | (unsigned long *)&rec->args); | 514 | if (!raw_data) |
| 417 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | 515 | goto end; |
| 418 | } while(0); | 516 | |
| 517 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
| 518 | |||
| 519 | /* zero the dead bytes from align to not leak stack to user */ | ||
| 520 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
| 521 | |||
| 522 | rec = (struct syscall_trace_enter *) raw_data; | ||
| 523 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
| 524 | rec->ent.type = sys_data->enter_id; | ||
| 525 | rec->nr = syscall_nr; | ||
| 526 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | ||
| 527 | (unsigned long *)&rec->args); | ||
| 528 | perf_tp_event(sys_data->enter_id, 0, 1, rec, size); | ||
| 529 | |||
| 530 | end: | ||
| 531 | local_irq_restore(flags); | ||
| 419 | } | 532 | } |
| 420 | 533 | ||
| 421 | int reg_prof_syscall_enter(char *name) | 534 | int reg_prof_syscall_enter(char *name) |
| @@ -460,8 +573,12 @@ void unreg_prof_syscall_enter(char *name) | |||
| 460 | static void prof_syscall_exit(struct pt_regs *regs, long ret) | 573 | static void prof_syscall_exit(struct pt_regs *regs, long ret) |
| 461 | { | 574 | { |
| 462 | struct syscall_metadata *sys_data; | 575 | struct syscall_metadata *sys_data; |
| 463 | struct syscall_trace_exit rec; | 576 | struct syscall_trace_exit *rec; |
| 577 | unsigned long flags; | ||
| 464 | int syscall_nr; | 578 | int syscall_nr; |
| 579 | char *raw_data; | ||
| 580 | int size; | ||
| 581 | int cpu; | ||
| 465 | 582 | ||
| 466 | syscall_nr = syscall_get_nr(current, regs); | 583 | syscall_nr = syscall_get_nr(current, regs); |
| 467 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 584 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
| @@ -471,12 +588,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
| 471 | if (!sys_data) | 588 | if (!sys_data) |
| 472 | return; | 589 | return; |
| 473 | 590 | ||
| 474 | tracing_generic_entry_update(&rec.ent, 0, 0); | 591 | /* We can probably do that at build time */ |
| 475 | rec.ent.type = sys_data->exit_id; | 592 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
| 476 | rec.nr = syscall_nr; | 593 | size -= sizeof(u32); |
| 477 | rec.ret = syscall_get_return_value(current, regs); | 594 | |
| 595 | /* | ||
| 596 | * Impossible, but be paranoid with the future | ||
| 597 | * How to put this check outside runtime? | ||
| 598 | */ | ||
| 599 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
| 600 | "exit event has grown above profile buffer size")) | ||
| 601 | return; | ||
| 602 | |||
| 603 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
| 604 | local_irq_save(flags); | ||
| 605 | cpu = smp_processor_id(); | ||
| 606 | |||
| 607 | if (in_nmi()) | ||
| 608 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
| 609 | else | ||
| 610 | raw_data = rcu_dereference(trace_profile_buf); | ||
| 611 | |||
| 612 | if (!raw_data) | ||
| 613 | goto end; | ||
| 614 | |||
| 615 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
| 616 | |||
| 617 | /* zero the dead bytes from align to not leak stack to user */ | ||
| 618 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
| 619 | |||
| 620 | rec = (struct syscall_trace_exit *)raw_data; | ||
| 478 | 621 | ||
| 479 | perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); | 622 | tracing_generic_entry_update(&rec->ent, 0, 0); |
| 623 | rec->ent.type = sys_data->exit_id; | ||
| 624 | rec->nr = syscall_nr; | ||
| 625 | rec->ret = syscall_get_return_value(current, regs); | ||
| 626 | |||
| 627 | perf_tp_event(sys_data->exit_id, 0, 1, rec, size); | ||
| 628 | |||
| 629 | end: | ||
| 630 | local_irq_restore(flags); | ||
| 480 | } | 631 | } |
| 481 | 632 | ||
| 482 | int reg_prof_syscall_exit(char *name) | 633 | int reg_prof_syscall_exit(char *name) |
