aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace_syscalls.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace_syscalls.c')
-rw-r--r--kernel/trace/trace_syscalls.c207
1 files changed, 179 insertions, 28 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..d00d1a8f1f26 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,7 +2,7 @@
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/ftrace.h> 4#include <linux/ftrace.h>
5#include <linux/perf_counter.h> 5#include <linux/perf_event.h>
6#include <asm/syscall.h> 6#include <asm/syscall.h>
7 7
8#include "trace_output.h" 8#include "trace_output.h"
@@ -14,6 +14,69 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 16
17extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[];
19
20static struct syscall_metadata **syscalls_metadata;
21
22static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23{
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
27
28
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33 for ( ; start < stop; start++) {
34 /*
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
38 * mismatch.
39 */
40 if (start->name && !strcmp(start->name + 3, str + 3))
41 return start;
42 }
43 return NULL;
44}
45
46static struct syscall_metadata *syscall_nr_to_meta(int nr)
47{
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49 return NULL;
50
51 return syscalls_metadata[nr];
52}
53
54int syscall_name_to_nr(char *name)
55{
56 int i;
57
58 if (!syscalls_metadata)
59 return -1;
60
61 for (i = 0; i < NR_syscalls; i++) {
62 if (syscalls_metadata[i]) {
63 if (!strcmp(syscalls_metadata[i]->name, name))
64 return i;
65 }
66 }
67 return -1;
68}
69
70void set_syscall_enter_id(int num, int id)
71{
72 syscalls_metadata[num]->enter_id = id;
73}
74
75void set_syscall_exit_id(int num, int id)
76{
77 syscalls_metadata[num]->exit_id = id;
78}
79
17enum print_line_t 80enum print_line_t
18print_syscall_enter(struct trace_iterator *iter, int flags) 81print_syscall_enter(struct trace_iterator *iter, int flags)
19{ 82{
@@ -103,7 +166,8 @@ extern char *__bad_type_size(void);
103#define SYSCALL_FIELD(type, name) \ 166#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \ 167 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \ 168 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 169 #type, #name, offsetof(typeof(trace), name), \
170 sizeof(trace.name), is_signed_type(type)
107 171
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 172int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
109{ 173{
@@ -120,7 +184,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
120 if (!entry) 184 if (!entry)
121 return 0; 185 return 0;
122 186
123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 187 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
188 "\tsigned:%u;\n",
124 SYSCALL_FIELD(int, nr)); 189 SYSCALL_FIELD(int, nr));
125 if (!ret) 190 if (!ret)
126 return 0; 191 return 0;
@@ -130,8 +195,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
130 entry->args[i]); 195 entry->args[i]);
131 if (!ret) 196 if (!ret)
132 return 0; 197 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 198 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
134 sizeof(unsigned long)); 199 "\tsigned:%u;\n", offset,
200 sizeof(unsigned long),
201 is_signed_type(unsigned long));
135 if (!ret) 202 if (!ret)
136 return 0; 203 return 0;
137 offset += sizeof(unsigned long); 204 offset += sizeof(unsigned long);
@@ -163,10 +230,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
163 struct syscall_trace_exit trace; 230 struct syscall_trace_exit trace;
164 231
165 ret = trace_seq_printf(s, 232 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 233 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 234 "\tsigned:%u;\n"
235 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
236 "\tsigned:%u;\n",
168 SYSCALL_FIELD(int, nr), 237 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(unsigned long, ret)); 238 SYSCALL_FIELD(long, ret));
170 if (!ret) 239 if (!ret)
171 return 0; 240 return 0;
172 241
@@ -212,7 +281,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
212 if (ret) 281 if (ret)
213 return ret; 282 return ret;
214 283
215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, 284 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
216 FILTER_OTHER); 285 FILTER_OTHER);
217 286
218 return ret; 287 return ret;
@@ -375,6 +444,29 @@ struct trace_event event_syscall_exit = {
375 .trace = print_syscall_exit, 444 .trace = print_syscall_exit,
376}; 445};
377 446
447int __init init_ftrace_syscalls(void)
448{
449 struct syscall_metadata *meta;
450 unsigned long addr;
451 int i;
452
453 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
454 NR_syscalls, GFP_KERNEL);
455 if (!syscalls_metadata) {
456 WARN_ON(1);
457 return -ENOMEM;
458 }
459
460 for (i = 0; i < NR_syscalls; i++) {
461 addr = arch_syscall_addr(i);
462 meta = find_syscall_meta(addr);
463 syscalls_metadata[i] = meta;
464 }
465
466 return 0;
467}
468core_initcall(init_ftrace_syscalls);
469
378#ifdef CONFIG_EVENT_PROFILE 470#ifdef CONFIG_EVENT_PROFILE
379 471
380static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 472static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
@@ -384,10 +476,13 @@ static int sys_prof_refcount_exit;
384 476
385static void prof_syscall_enter(struct pt_regs *regs, long id) 477static void prof_syscall_enter(struct pt_regs *regs, long id)
386{ 478{
387 struct syscall_trace_enter *rec;
388 struct syscall_metadata *sys_data; 479 struct syscall_metadata *sys_data;
480 struct syscall_trace_enter *rec;
481 unsigned long flags;
482 char *raw_data;
389 int syscall_nr; 483 int syscall_nr;
390 int size; 484 int size;
485 int cpu;
391 486
392 syscall_nr = syscall_get_nr(current, regs); 487 syscall_nr = syscall_get_nr(current, regs);
393 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 488 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +497,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
402 size = ALIGN(size + sizeof(u32), sizeof(u64)); 497 size = ALIGN(size + sizeof(u32), sizeof(u64));
403 size -= sizeof(u32); 498 size -= sizeof(u32);
404 499
405 do { 500 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
406 char raw_data[size]; 501 "profile buffer not large enough"))
502 return;
503
504 /* Protect the per cpu buffer, begin the rcu read side */
505 local_irq_save(flags);
407 506
408 /* zero the dead bytes from align to not leak stack to user */ 507 cpu = smp_processor_id();
409 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
410 508
411 rec = (struct syscall_trace_enter *) raw_data; 509 if (in_nmi())
412 tracing_generic_entry_update(&rec->ent, 0, 0); 510 raw_data = rcu_dereference(trace_profile_buf_nmi);
413 rec->ent.type = sys_data->enter_id; 511 else
414 rec->nr = syscall_nr; 512 raw_data = rcu_dereference(trace_profile_buf);
415 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 513
416 (unsigned long *)&rec->args); 514 if (!raw_data)
417 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); 515 goto end;
418 } while(0); 516
517 raw_data = per_cpu_ptr(raw_data, cpu);
518
519 /* zero the dead bytes from align to not leak stack to user */
520 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
521
522 rec = (struct syscall_trace_enter *) raw_data;
523 tracing_generic_entry_update(&rec->ent, 0, 0);
524 rec->ent.type = sys_data->enter_id;
525 rec->nr = syscall_nr;
526 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
527 (unsigned long *)&rec->args);
528 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
529
530end:
531 local_irq_restore(flags);
419} 532}
420 533
421int reg_prof_syscall_enter(char *name) 534int reg_prof_syscall_enter(char *name)
@@ -460,8 +573,12 @@ void unreg_prof_syscall_enter(char *name)
460static void prof_syscall_exit(struct pt_regs *regs, long ret) 573static void prof_syscall_exit(struct pt_regs *regs, long ret)
461{ 574{
462 struct syscall_metadata *sys_data; 575 struct syscall_metadata *sys_data;
463 struct syscall_trace_exit rec; 576 struct syscall_trace_exit *rec;
577 unsigned long flags;
464 int syscall_nr; 578 int syscall_nr;
579 char *raw_data;
580 int size;
581 int cpu;
465 582
466 syscall_nr = syscall_get_nr(current, regs); 583 syscall_nr = syscall_get_nr(current, regs);
467 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 584 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +588,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
471 if (!sys_data) 588 if (!sys_data)
472 return; 589 return;
473 590
474 tracing_generic_entry_update(&rec.ent, 0, 0); 591 /* We can probably do that at build time */
475 rec.ent.type = sys_data->exit_id; 592 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
476 rec.nr = syscall_nr; 593 size -= sizeof(u32);
477 rec.ret = syscall_get_return_value(current, regs); 594
595 /*
596 * Impossible, but be paranoid with the future
597 * How to put this check outside runtime?
598 */
599 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
600 "exit event has grown above profile buffer size"))
601 return;
602
603 /* Protect the per cpu buffer, begin the rcu read side */
604 local_irq_save(flags);
605 cpu = smp_processor_id();
606
607 if (in_nmi())
608 raw_data = rcu_dereference(trace_profile_buf_nmi);
609 else
610 raw_data = rcu_dereference(trace_profile_buf);
611
612 if (!raw_data)
613 goto end;
614
615 raw_data = per_cpu_ptr(raw_data, cpu);
616
617 /* zero the dead bytes from align to not leak stack to user */
618 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
619
620 rec = (struct syscall_trace_exit *)raw_data;
478 621
479 perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); 622 tracing_generic_entry_update(&rec->ent, 0, 0);
623 rec->ent.type = sys_data->exit_id;
624 rec->nr = syscall_nr;
625 rec->ret = syscall_get_return_value(current, regs);
626
627 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
628
629end:
630 local_irq_restore(flags);
480} 631}
481 632
482int reg_prof_syscall_exit(char *name) 633int reg_prof_syscall_exit(char *name)