diff options
Diffstat (limited to 'kernel/trace/trace_syscalls.c')
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 530 |
1 files changed, 431 insertions, 99 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5e579645ac86..9fbce6c9d2e1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -1,30 +1,18 @@ | |||
| 1 | #include <trace/syscall.h> | 1 | #include <trace/syscall.h> |
| 2 | #include <trace/events/syscalls.h> | ||
| 2 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
| 4 | #include <linux/ftrace.h> | ||
| 5 | #include <linux/perf_event.h> | ||
| 3 | #include <asm/syscall.h> | 6 | #include <asm/syscall.h> |
| 4 | 7 | ||
| 5 | #include "trace_output.h" | 8 | #include "trace_output.h" |
| 6 | #include "trace.h" | 9 | #include "trace.h" |
| 7 | 10 | ||
| 8 | /* Keep a counter of the syscall tracing users */ | ||
| 9 | static int refcount; | ||
| 10 | |||
| 11 | /* Prevent from races on thread flags toggling */ | ||
| 12 | static DEFINE_MUTEX(syscall_trace_lock); | 11 | static DEFINE_MUTEX(syscall_trace_lock); |
| 13 | 12 | static int sys_refcount_enter; | |
| 14 | /* Option to display the parameters types */ | 13 | static int sys_refcount_exit; |
| 15 | enum { | 14 | static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); |
| 16 | TRACE_SYSCALLS_OPT_TYPES = 0x1, | 15 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); |
| 17 | }; | ||
| 18 | |||
| 19 | static struct tracer_opt syscalls_opts[] = { | ||
| 20 | { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) }, | ||
| 21 | { } | ||
| 22 | }; | ||
| 23 | |||
| 24 | static struct tracer_flags syscalls_flags = { | ||
| 25 | .val = 0, /* By default: no parameters types */ | ||
| 26 | .opts = syscalls_opts | ||
| 27 | }; | ||
| 28 | 16 | ||
| 29 | enum print_line_t | 17 | enum print_line_t |
| 30 | print_syscall_enter(struct trace_iterator *iter, int flags) | 18 | print_syscall_enter(struct trace_iterator *iter, int flags) |
| @@ -35,35 +23,46 @@ print_syscall_enter(struct trace_iterator *iter, int flags) | |||
| 35 | struct syscall_metadata *entry; | 23 | struct syscall_metadata *entry; |
| 36 | int i, ret, syscall; | 24 | int i, ret, syscall; |
| 37 | 25 | ||
| 38 | trace_assign_type(trace, ent); | 26 | trace = (typeof(trace))ent; |
| 39 | |||
| 40 | syscall = trace->nr; | 27 | syscall = trace->nr; |
| 41 | |||
| 42 | entry = syscall_nr_to_meta(syscall); | 28 | entry = syscall_nr_to_meta(syscall); |
| 29 | |||
| 43 | if (!entry) | 30 | if (!entry) |
| 44 | goto end; | 31 | goto end; |
| 45 | 32 | ||
| 33 | if (entry->enter_id != ent->type) { | ||
| 34 | WARN_ON_ONCE(1); | ||
| 35 | goto end; | ||
| 36 | } | ||
| 37 | |||
| 46 | ret = trace_seq_printf(s, "%s(", entry->name); | 38 | ret = trace_seq_printf(s, "%s(", entry->name); |
| 47 | if (!ret) | 39 | if (!ret) |
| 48 | return TRACE_TYPE_PARTIAL_LINE; | 40 | return TRACE_TYPE_PARTIAL_LINE; |
| 49 | 41 | ||
| 50 | for (i = 0; i < entry->nb_args; i++) { | 42 | for (i = 0; i < entry->nb_args; i++) { |
| 51 | /* parameter types */ | 43 | /* parameter types */ |
| 52 | if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { | 44 | if (trace_flags & TRACE_ITER_VERBOSE) { |
| 53 | ret = trace_seq_printf(s, "%s ", entry->types[i]); | 45 | ret = trace_seq_printf(s, "%s ", entry->types[i]); |
| 54 | if (!ret) | 46 | if (!ret) |
| 55 | return TRACE_TYPE_PARTIAL_LINE; | 47 | return TRACE_TYPE_PARTIAL_LINE; |
| 56 | } | 48 | } |
| 57 | /* parameter values */ | 49 | /* parameter values */ |
| 58 | ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], | 50 | ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], |
| 59 | trace->args[i], | 51 | trace->args[i], |
| 60 | i == entry->nb_args - 1 ? ")" : ","); | 52 | i == entry->nb_args - 1 ? "" : ", "); |
| 61 | if (!ret) | 53 | if (!ret) |
| 62 | return TRACE_TYPE_PARTIAL_LINE; | 54 | return TRACE_TYPE_PARTIAL_LINE; |
| 63 | } | 55 | } |
| 64 | 56 | ||
| 57 | ret = trace_seq_putc(s, ')'); | ||
| 58 | if (!ret) | ||
| 59 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 60 | |||
| 65 | end: | 61 | end: |
| 66 | trace_seq_printf(s, "\n"); | 62 | ret = trace_seq_putc(s, '\n'); |
| 63 | if (!ret) | ||
| 64 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 65 | |||
| 67 | return TRACE_TYPE_HANDLED; | 66 | return TRACE_TYPE_HANDLED; |
| 68 | } | 67 | } |
| 69 | 68 | ||
| @@ -77,16 +76,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags) | |||
| 77 | struct syscall_metadata *entry; | 76 | struct syscall_metadata *entry; |
| 78 | int ret; | 77 | int ret; |
| 79 | 78 | ||
| 80 | trace_assign_type(trace, ent); | 79 | trace = (typeof(trace))ent; |
| 81 | |||
| 82 | syscall = trace->nr; | 80 | syscall = trace->nr; |
| 83 | |||
| 84 | entry = syscall_nr_to_meta(syscall); | 81 | entry = syscall_nr_to_meta(syscall); |
| 82 | |||
| 85 | if (!entry) { | 83 | if (!entry) { |
| 86 | trace_seq_printf(s, "\n"); | 84 | trace_seq_printf(s, "\n"); |
| 87 | return TRACE_TYPE_HANDLED; | 85 | return TRACE_TYPE_HANDLED; |
| 88 | } | 86 | } |
| 89 | 87 | ||
| 88 | if (entry->exit_id != ent->type) { | ||
| 89 | WARN_ON_ONCE(1); | ||
| 90 | return TRACE_TYPE_UNHANDLED; | ||
| 91 | } | ||
| 92 | |||
| 90 | ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, | 93 | ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, |
| 91 | trace->ret); | 94 | trace->ret); |
| 92 | if (!ret) | 95 | if (!ret) |
| @@ -95,62 +98,140 @@ print_syscall_exit(struct trace_iterator *iter, int flags) | |||
| 95 | return TRACE_TYPE_HANDLED; | 98 | return TRACE_TYPE_HANDLED; |
| 96 | } | 99 | } |
| 97 | 100 | ||
| 98 | void start_ftrace_syscalls(void) | 101 | extern char *__bad_type_size(void); |
| 102 | |||
| 103 | #define SYSCALL_FIELD(type, name) \ | ||
| 104 | sizeof(type) != sizeof(trace.name) ? \ | ||
| 105 | __bad_type_size() : \ | ||
| 106 | #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) | ||
| 107 | |||
| 108 | int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) | ||
| 99 | { | 109 | { |
| 100 | unsigned long flags; | 110 | int i; |
| 101 | struct task_struct *g, *t; | 111 | int nr; |
| 112 | int ret; | ||
| 113 | struct syscall_metadata *entry; | ||
| 114 | struct syscall_trace_enter trace; | ||
| 115 | int offset = offsetof(struct syscall_trace_enter, args); | ||
| 102 | 116 | ||
| 103 | mutex_lock(&syscall_trace_lock); | 117 | nr = syscall_name_to_nr(call->data); |
| 118 | entry = syscall_nr_to_meta(nr); | ||
| 104 | 119 | ||
| 105 | /* Don't enable the flag on the tasks twice */ | 120 | if (!entry) |
| 106 | if (++refcount != 1) | 121 | return 0; |
| 107 | goto unlock; | ||
| 108 | 122 | ||
| 109 | arch_init_ftrace_syscalls(); | 123 | ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", |
| 110 | read_lock_irqsave(&tasklist_lock, flags); | 124 | SYSCALL_FIELD(int, nr)); |
| 125 | if (!ret) | ||
| 126 | return 0; | ||
| 111 | 127 | ||
| 112 | do_each_thread(g, t) { | 128 | for (i = 0; i < entry->nb_args; i++) { |
| 113 | set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); | 129 | ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], |
| 114 | } while_each_thread(g, t); | 130 | entry->args[i]); |
| 131 | if (!ret) | ||
| 132 | return 0; | ||
| 133 | ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, | ||
| 134 | sizeof(unsigned long)); | ||
| 135 | if (!ret) | ||
| 136 | return 0; | ||
| 137 | offset += sizeof(unsigned long); | ||
| 138 | } | ||
| 115 | 139 | ||
| 116 | read_unlock_irqrestore(&tasklist_lock, flags); | 140 | trace_seq_puts(s, "\nprint fmt: \""); |
| 141 | for (i = 0; i < entry->nb_args; i++) { | ||
| 142 | ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], | ||
| 143 | sizeof(unsigned long), | ||
| 144 | i == entry->nb_args - 1 ? "" : ", "); | ||
| 145 | if (!ret) | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | trace_seq_putc(s, '"'); | ||
| 117 | 149 | ||
| 118 | unlock: | 150 | for (i = 0; i < entry->nb_args; i++) { |
| 119 | mutex_unlock(&syscall_trace_lock); | 151 | ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", |
| 152 | entry->args[i]); | ||
| 153 | if (!ret) | ||
| 154 | return 0; | ||
| 155 | } | ||
| 156 | |||
| 157 | return trace_seq_putc(s, '\n'); | ||
| 120 | } | 158 | } |
| 121 | 159 | ||
| 122 | void stop_ftrace_syscalls(void) | 160 | int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) |
| 123 | { | 161 | { |
| 124 | unsigned long flags; | 162 | int ret; |
| 125 | struct task_struct *g, *t; | 163 | struct syscall_trace_exit trace; |
| 126 | 164 | ||
| 127 | mutex_lock(&syscall_trace_lock); | 165 | ret = trace_seq_printf(s, |
| 166 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" | ||
| 167 | "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", | ||
| 168 | SYSCALL_FIELD(int, nr), | ||
| 169 | SYSCALL_FIELD(unsigned long, ret)); | ||
| 170 | if (!ret) | ||
| 171 | return 0; | ||
| 128 | 172 | ||
| 129 | /* There are perhaps still some users */ | 173 | return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); |
| 130 | if (--refcount) | 174 | } |
| 131 | goto unlock; | ||
| 132 | 175 | ||
| 133 | read_lock_irqsave(&tasklist_lock, flags); | 176 | int syscall_enter_define_fields(struct ftrace_event_call *call) |
| 177 | { | ||
| 178 | struct syscall_trace_enter trace; | ||
| 179 | struct syscall_metadata *meta; | ||
| 180 | int ret; | ||
| 181 | int nr; | ||
| 182 | int i; | ||
| 183 | int offset = offsetof(typeof(trace), args); | ||
| 184 | |||
| 185 | nr = syscall_name_to_nr(call->data); | ||
| 186 | meta = syscall_nr_to_meta(nr); | ||
| 187 | |||
| 188 | if (!meta) | ||
| 189 | return 0; | ||
| 190 | |||
| 191 | ret = trace_define_common_fields(call); | ||
| 192 | if (ret) | ||
| 193 | return ret; | ||
| 194 | |||
| 195 | for (i = 0; i < meta->nb_args; i++) { | ||
| 196 | ret = trace_define_field(call, meta->types[i], | ||
| 197 | meta->args[i], offset, | ||
| 198 | sizeof(unsigned long), 0, | ||
| 199 | FILTER_OTHER); | ||
| 200 | offset += sizeof(unsigned long); | ||
| 201 | } | ||
| 134 | 202 | ||
| 135 | do_each_thread(g, t) { | 203 | return ret; |
| 136 | clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); | 204 | } |
| 137 | } while_each_thread(g, t); | ||
| 138 | 205 | ||
| 139 | read_unlock_irqrestore(&tasklist_lock, flags); | 206 | int syscall_exit_define_fields(struct ftrace_event_call *call) |
| 207 | { | ||
| 208 | struct syscall_trace_exit trace; | ||
| 209 | int ret; | ||
| 140 | 210 | ||
| 141 | unlock: | 211 | ret = trace_define_common_fields(call); |
| 142 | mutex_unlock(&syscall_trace_lock); | 212 | if (ret) |
| 213 | return ret; | ||
| 214 | |||
| 215 | ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, | ||
| 216 | FILTER_OTHER); | ||
| 217 | |||
| 218 | return ret; | ||
| 143 | } | 219 | } |
| 144 | 220 | ||
| 145 | void ftrace_syscall_enter(struct pt_regs *regs) | 221 | void ftrace_syscall_enter(struct pt_regs *regs, long id) |
| 146 | { | 222 | { |
| 147 | struct syscall_trace_enter *entry; | 223 | struct syscall_trace_enter *entry; |
| 148 | struct syscall_metadata *sys_data; | 224 | struct syscall_metadata *sys_data; |
| 149 | struct ring_buffer_event *event; | 225 | struct ring_buffer_event *event; |
| 226 | struct ring_buffer *buffer; | ||
| 150 | int size; | 227 | int size; |
| 151 | int syscall_nr; | 228 | int syscall_nr; |
| 152 | 229 | ||
| 153 | syscall_nr = syscall_get_nr(current, regs); | 230 | syscall_nr = syscall_get_nr(current, regs); |
| 231 | if (syscall_nr < 0) | ||
| 232 | return; | ||
| 233 | if (!test_bit(syscall_nr, enabled_enter_syscalls)) | ||
| 234 | return; | ||
| 154 | 235 | ||
| 155 | sys_data = syscall_nr_to_meta(syscall_nr); | 236 | sys_data = syscall_nr_to_meta(syscall_nr); |
| 156 | if (!sys_data) | 237 | if (!sys_data) |
| @@ -158,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs) | |||
| 158 | 239 | ||
| 159 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; | 240 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; |
| 160 | 241 | ||
| 161 | event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, | 242 | event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, |
| 162 | 0, 0); | 243 | size, 0, 0); |
| 163 | if (!event) | 244 | if (!event) |
| 164 | return; | 245 | return; |
| 165 | 246 | ||
| @@ -167,24 +248,30 @@ void ftrace_syscall_enter(struct pt_regs *regs) | |||
| 167 | entry->nr = syscall_nr; | 248 | entry->nr = syscall_nr; |
| 168 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); | 249 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); |
| 169 | 250 | ||
| 170 | trace_current_buffer_unlock_commit(event, 0, 0); | 251 | if (!filter_current_check_discard(buffer, sys_data->enter_event, |
| 171 | trace_wake_up(); | 252 | entry, event)) |
| 253 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | ||
| 172 | } | 254 | } |
| 173 | 255 | ||
| 174 | void ftrace_syscall_exit(struct pt_regs *regs) | 256 | void ftrace_syscall_exit(struct pt_regs *regs, long ret) |
| 175 | { | 257 | { |
| 176 | struct syscall_trace_exit *entry; | 258 | struct syscall_trace_exit *entry; |
| 177 | struct syscall_metadata *sys_data; | 259 | struct syscall_metadata *sys_data; |
| 178 | struct ring_buffer_event *event; | 260 | struct ring_buffer_event *event; |
| 261 | struct ring_buffer *buffer; | ||
| 179 | int syscall_nr; | 262 | int syscall_nr; |
| 180 | 263 | ||
| 181 | syscall_nr = syscall_get_nr(current, regs); | 264 | syscall_nr = syscall_get_nr(current, regs); |
| 265 | if (syscall_nr < 0) | ||
| 266 | return; | ||
| 267 | if (!test_bit(syscall_nr, enabled_exit_syscalls)) | ||
| 268 | return; | ||
| 182 | 269 | ||
| 183 | sys_data = syscall_nr_to_meta(syscall_nr); | 270 | sys_data = syscall_nr_to_meta(syscall_nr); |
| 184 | if (!sys_data) | 271 | if (!sys_data) |
| 185 | return; | 272 | return; |
| 186 | 273 | ||
| 187 | event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, | 274 | event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, |
| 188 | sizeof(*entry), 0, 0); | 275 | sizeof(*entry), 0, 0); |
| 189 | if (!event) | 276 | if (!event) |
| 190 | return; | 277 | return; |
| @@ -193,58 +280,303 @@ void ftrace_syscall_exit(struct pt_regs *regs) | |||
| 193 | entry->nr = syscall_nr; | 280 | entry->nr = syscall_nr; |
| 194 | entry->ret = syscall_get_return_value(current, regs); | 281 | entry->ret = syscall_get_return_value(current, regs); |
| 195 | 282 | ||
| 196 | trace_current_buffer_unlock_commit(event, 0, 0); | 283 | if (!filter_current_check_discard(buffer, sys_data->exit_event, |
| 197 | trace_wake_up(); | 284 | entry, event)) |
| 285 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | ||
| 198 | } | 286 | } |
| 199 | 287 | ||
| 200 | static int init_syscall_tracer(struct trace_array *tr) | 288 | int reg_event_syscall_enter(void *ptr) |
| 201 | { | 289 | { |
| 202 | start_ftrace_syscalls(); | 290 | int ret = 0; |
| 291 | int num; | ||
| 292 | char *name; | ||
| 293 | |||
| 294 | name = (char *)ptr; | ||
| 295 | num = syscall_name_to_nr(name); | ||
| 296 | if (num < 0 || num >= NR_syscalls) | ||
| 297 | return -ENOSYS; | ||
| 298 | mutex_lock(&syscall_trace_lock); | ||
| 299 | if (!sys_refcount_enter) | ||
| 300 | ret = register_trace_sys_enter(ftrace_syscall_enter); | ||
| 301 | if (ret) { | ||
| 302 | pr_info("event trace: Could not activate" | ||
| 303 | "syscall entry trace point"); | ||
| 304 | } else { | ||
| 305 | set_bit(num, enabled_enter_syscalls); | ||
| 306 | sys_refcount_enter++; | ||
| 307 | } | ||
| 308 | mutex_unlock(&syscall_trace_lock); | ||
| 309 | return ret; | ||
| 310 | } | ||
| 311 | |||
| 312 | void unreg_event_syscall_enter(void *ptr) | ||
| 313 | { | ||
| 314 | int num; | ||
| 315 | char *name; | ||
| 203 | 316 | ||
| 204 | return 0; | 317 | name = (char *)ptr; |
| 318 | num = syscall_name_to_nr(name); | ||
| 319 | if (num < 0 || num >= NR_syscalls) | ||
| 320 | return; | ||
| 321 | mutex_lock(&syscall_trace_lock); | ||
| 322 | sys_refcount_enter--; | ||
| 323 | clear_bit(num, enabled_enter_syscalls); | ||
| 324 | if (!sys_refcount_enter) | ||
| 325 | unregister_trace_sys_enter(ftrace_syscall_enter); | ||
| 326 | mutex_unlock(&syscall_trace_lock); | ||
| 205 | } | 327 | } |
| 206 | 328 | ||
| 207 | static void reset_syscall_tracer(struct trace_array *tr) | 329 | int reg_event_syscall_exit(void *ptr) |
| 208 | { | 330 | { |
| 209 | stop_ftrace_syscalls(); | 331 | int ret = 0; |
| 210 | tracing_reset_online_cpus(tr); | 332 | int num; |
| 333 | char *name; | ||
| 334 | |||
| 335 | name = (char *)ptr; | ||
| 336 | num = syscall_name_to_nr(name); | ||
| 337 | if (num < 0 || num >= NR_syscalls) | ||
| 338 | return -ENOSYS; | ||
| 339 | mutex_lock(&syscall_trace_lock); | ||
| 340 | if (!sys_refcount_exit) | ||
| 341 | ret = register_trace_sys_exit(ftrace_syscall_exit); | ||
| 342 | if (ret) { | ||
| 343 | pr_info("event trace: Could not activate" | ||
| 344 | "syscall exit trace point"); | ||
| 345 | } else { | ||
| 346 | set_bit(num, enabled_exit_syscalls); | ||
| 347 | sys_refcount_exit++; | ||
| 348 | } | ||
| 349 | mutex_unlock(&syscall_trace_lock); | ||
| 350 | return ret; | ||
| 211 | } | 351 | } |
| 212 | 352 | ||
| 213 | static struct trace_event syscall_enter_event = { | 353 | void unreg_event_syscall_exit(void *ptr) |
| 214 | .type = TRACE_SYSCALL_ENTER, | 354 | { |
| 215 | .trace = print_syscall_enter, | 355 | int num; |
| 216 | }; | 356 | char *name; |
| 357 | |||
| 358 | name = (char *)ptr; | ||
| 359 | num = syscall_name_to_nr(name); | ||
| 360 | if (num < 0 || num >= NR_syscalls) | ||
| 361 | return; | ||
| 362 | mutex_lock(&syscall_trace_lock); | ||
| 363 | sys_refcount_exit--; | ||
| 364 | clear_bit(num, enabled_exit_syscalls); | ||
| 365 | if (!sys_refcount_exit) | ||
| 366 | unregister_trace_sys_exit(ftrace_syscall_exit); | ||
| 367 | mutex_unlock(&syscall_trace_lock); | ||
| 368 | } | ||
| 217 | 369 | ||
| 218 | static struct trace_event syscall_exit_event = { | 370 | struct trace_event event_syscall_enter = { |
| 219 | .type = TRACE_SYSCALL_EXIT, | 371 | .trace = print_syscall_enter, |
| 220 | .trace = print_syscall_exit, | ||
| 221 | }; | 372 | }; |
| 222 | 373 | ||
| 223 | static struct tracer syscall_tracer __read_mostly = { | 374 | struct trace_event event_syscall_exit = { |
| 224 | .name = "syscall", | 375 | .trace = print_syscall_exit, |
| 225 | .init = init_syscall_tracer, | ||
| 226 | .reset = reset_syscall_tracer, | ||
| 227 | .flags = &syscalls_flags, | ||
| 228 | }; | 376 | }; |
| 229 | 377 | ||
| 230 | __init int register_ftrace_syscalls(void) | 378 | #ifdef CONFIG_EVENT_PROFILE |
| 379 | |||
| 380 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); | ||
| 381 | static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); | ||
| 382 | static int sys_prof_refcount_enter; | ||
| 383 | static int sys_prof_refcount_exit; | ||
| 384 | |||
| 385 | static void prof_syscall_enter(struct pt_regs *regs, long id) | ||
| 231 | { | 386 | { |
| 232 | int ret; | 387 | struct syscall_metadata *sys_data; |
| 388 | struct syscall_trace_enter *rec; | ||
| 389 | unsigned long flags; | ||
| 390 | char *raw_data; | ||
| 391 | int syscall_nr; | ||
| 392 | int size; | ||
| 393 | int cpu; | ||
| 233 | 394 | ||
| 234 | ret = register_ftrace_event(&syscall_enter_event); | 395 | syscall_nr = syscall_get_nr(current, regs); |
| 235 | if (!ret) { | 396 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
| 236 | printk(KERN_WARNING "event %d failed to register\n", | 397 | return; |
| 237 | syscall_enter_event.type); | 398 | |
| 238 | WARN_ON_ONCE(1); | 399 | sys_data = syscall_nr_to_meta(syscall_nr); |
| 400 | if (!sys_data) | ||
| 401 | return; | ||
| 402 | |||
| 403 | /* get the size after alignment with the u32 buffer size field */ | ||
| 404 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); | ||
| 405 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | ||
| 406 | size -= sizeof(u32); | ||
| 407 | |||
| 408 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
| 409 | "profile buffer not large enough")) | ||
| 410 | return; | ||
| 411 | |||
| 412 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
| 413 | local_irq_save(flags); | ||
| 414 | |||
| 415 | cpu = smp_processor_id(); | ||
| 416 | |||
| 417 | if (in_nmi()) | ||
| 418 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
| 419 | else | ||
| 420 | raw_data = rcu_dereference(trace_profile_buf); | ||
| 421 | |||
| 422 | if (!raw_data) | ||
| 423 | goto end; | ||
| 424 | |||
| 425 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
| 426 | |||
| 427 | /* zero the dead bytes from align to not leak stack to user */ | ||
| 428 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
| 429 | |||
| 430 | rec = (struct syscall_trace_enter *) raw_data; | ||
| 431 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
| 432 | rec->ent.type = sys_data->enter_id; | ||
| 433 | rec->nr = syscall_nr; | ||
| 434 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | ||
| 435 | (unsigned long *)&rec->args); | ||
| 436 | perf_tp_event(sys_data->enter_id, 0, 1, rec, size); | ||
| 437 | |||
| 438 | end: | ||
| 439 | local_irq_restore(flags); | ||
| 440 | } | ||
| 441 | |||
| 442 | int reg_prof_syscall_enter(char *name) | ||
| 443 | { | ||
| 444 | int ret = 0; | ||
| 445 | int num; | ||
| 446 | |||
| 447 | num = syscall_name_to_nr(name); | ||
| 448 | if (num < 0 || num >= NR_syscalls) | ||
| 449 | return -ENOSYS; | ||
| 450 | |||
| 451 | mutex_lock(&syscall_trace_lock); | ||
| 452 | if (!sys_prof_refcount_enter) | ||
| 453 | ret = register_trace_sys_enter(prof_syscall_enter); | ||
| 454 | if (ret) { | ||
| 455 | pr_info("event trace: Could not activate" | ||
| 456 | "syscall entry trace point"); | ||
| 457 | } else { | ||
| 458 | set_bit(num, enabled_prof_enter_syscalls); | ||
| 459 | sys_prof_refcount_enter++; | ||
| 239 | } | 460 | } |
| 461 | mutex_unlock(&syscall_trace_lock); | ||
| 462 | return ret; | ||
| 463 | } | ||
| 240 | 464 | ||
| 241 | ret = register_ftrace_event(&syscall_exit_event); | 465 | void unreg_prof_syscall_enter(char *name) |
| 242 | if (!ret) { | 466 | { |
| 243 | printk(KERN_WARNING "event %d failed to register\n", | 467 | int num; |
| 244 | syscall_exit_event.type); | 468 | |
| 245 | WARN_ON_ONCE(1); | 469 | num = syscall_name_to_nr(name); |
| 470 | if (num < 0 || num >= NR_syscalls) | ||
| 471 | return; | ||
| 472 | |||
| 473 | mutex_lock(&syscall_trace_lock); | ||
| 474 | sys_prof_refcount_enter--; | ||
| 475 | clear_bit(num, enabled_prof_enter_syscalls); | ||
| 476 | if (!sys_prof_refcount_enter) | ||
| 477 | unregister_trace_sys_enter(prof_syscall_enter); | ||
| 478 | mutex_unlock(&syscall_trace_lock); | ||
| 479 | } | ||
| 480 | |||
| 481 | static void prof_syscall_exit(struct pt_regs *regs, long ret) | ||
| 482 | { | ||
| 483 | struct syscall_metadata *sys_data; | ||
| 484 | struct syscall_trace_exit *rec; | ||
| 485 | unsigned long flags; | ||
| 486 | int syscall_nr; | ||
| 487 | char *raw_data; | ||
| 488 | int size; | ||
| 489 | int cpu; | ||
| 490 | |||
| 491 | syscall_nr = syscall_get_nr(current, regs); | ||
| 492 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | ||
| 493 | return; | ||
| 494 | |||
| 495 | sys_data = syscall_nr_to_meta(syscall_nr); | ||
| 496 | if (!sys_data) | ||
| 497 | return; | ||
| 498 | |||
| 499 | /* We can probably do that at build time */ | ||
| 500 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); | ||
| 501 | size -= sizeof(u32); | ||
| 502 | |||
| 503 | /* | ||
| 504 | * Impossible, but be paranoid with the future | ||
| 505 | * How to put this check outside runtime? | ||
| 506 | */ | ||
| 507 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
| 508 | "exit event has grown above profile buffer size")) | ||
| 509 | return; | ||
| 510 | |||
| 511 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
| 512 | local_irq_save(flags); | ||
| 513 | cpu = smp_processor_id(); | ||
| 514 | |||
| 515 | if (in_nmi()) | ||
| 516 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
| 517 | else | ||
| 518 | raw_data = rcu_dereference(trace_profile_buf); | ||
| 519 | |||
| 520 | if (!raw_data) | ||
| 521 | goto end; | ||
| 522 | |||
| 523 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
| 524 | |||
| 525 | /* zero the dead bytes from align to not leak stack to user */ | ||
| 526 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
| 527 | |||
| 528 | rec = (struct syscall_trace_exit *)raw_data; | ||
| 529 | |||
| 530 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
| 531 | rec->ent.type = sys_data->exit_id; | ||
| 532 | rec->nr = syscall_nr; | ||
| 533 | rec->ret = syscall_get_return_value(current, regs); | ||
| 534 | |||
| 535 | perf_tp_event(sys_data->exit_id, 0, 1, rec, size); | ||
| 536 | |||
| 537 | end: | ||
| 538 | local_irq_restore(flags); | ||
| 539 | } | ||
| 540 | |||
| 541 | int reg_prof_syscall_exit(char *name) | ||
| 542 | { | ||
| 543 | int ret = 0; | ||
| 544 | int num; | ||
| 545 | |||
| 546 | num = syscall_name_to_nr(name); | ||
| 547 | if (num < 0 || num >= NR_syscalls) | ||
| 548 | return -ENOSYS; | ||
| 549 | |||
| 550 | mutex_lock(&syscall_trace_lock); | ||
| 551 | if (!sys_prof_refcount_exit) | ||
| 552 | ret = register_trace_sys_exit(prof_syscall_exit); | ||
| 553 | if (ret) { | ||
| 554 | pr_info("event trace: Could not activate" | ||
| 555 | "syscall entry trace point"); | ||
| 556 | } else { | ||
| 557 | set_bit(num, enabled_prof_exit_syscalls); | ||
| 558 | sys_prof_refcount_exit++; | ||
| 246 | } | 559 | } |
| 560 | mutex_unlock(&syscall_trace_lock); | ||
| 561 | return ret; | ||
| 562 | } | ||
| 563 | |||
| 564 | void unreg_prof_syscall_exit(char *name) | ||
| 565 | { | ||
| 566 | int num; | ||
| 567 | |||
| 568 | num = syscall_name_to_nr(name); | ||
| 569 | if (num < 0 || num >= NR_syscalls) | ||
| 570 | return; | ||
| 247 | 571 | ||
| 248 | return register_tracer(&syscall_tracer); | 572 | mutex_lock(&syscall_trace_lock); |
| 573 | sys_prof_refcount_exit--; | ||
| 574 | clear_bit(num, enabled_prof_exit_syscalls); | ||
| 575 | if (!sys_prof_refcount_exit) | ||
| 576 | unregister_trace_sys_exit(prof_syscall_exit); | ||
| 577 | mutex_unlock(&syscall_trace_lock); | ||
| 249 | } | 578 | } |
| 250 | device_initcall(register_ftrace_syscalls); | 579 | |
| 580 | #endif | ||
| 581 | |||
| 582 | |||
