aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-01-16 13:55:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-01-16 13:55:52 -0500
commit23aa4b416afdf37869e53c188944dd7c4c47949a (patch)
tree152beefdd89d3b6af0e67e623d475aaad1883cd5
parentcb59670870d90ff8bc31f5f2efc407c6fe4938c0 (diff)
parentce1039bd3a89e99e4f624e75fb1777fc92d76eb3 (diff)
Merge tag 'trace-fixes-v3.19-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull ftrace fixes from Steven Rostedt: "This holds a few fixes to the ftrace infrastructure as well as the mixture of function graph tracing and kprobes. When jprobes and function graph tracing is enabled at the same time it will crash the system: # modprobe jprobe_example # echo function_graph > /sys/kernel/debug/tracing/current_tracer After the first fork (jprobe_example probes it), the system will crash. This is due to the way jprobes copies the stack frame and does not do a normal function return. This messes up with the function graph tracing accounting which hijacks the return address from the stack and replaces it with a hook function. It saves the return addresses in a separate stack to put back the correct return address when done. But because the jprobe functions do not do a normal return, their stack addresses are not put back until the function they probe is called, which means that the probed function will get the return address of the jprobe handler instead of its own. The simple fix here was to disable function graph tracing while the jprobe handler is being called. While debugging this I found two minor bugs with the function graph tracing. The first was about the function graph tracer sharing its function hash with the function tracer (they both get filtered by the same input). The changing of the set_ftrace_filter would not sync the function recording records after a change if the function tracer was disabled but the function graph tracer was enabled. This was due to the update only checking one of the ops instead of the shared ops to see if they were enabled and should perform the sync. This caused the ftrace accounting to break and a ftrace_bug() would be triggered, disabling ftrace until a reboot. The second was that the check to update records only checked one of the filter hashes. It needs to test both the "filter" and "notrace" hashes. The "filter" hash determines what functions to trace where as the "notrace" hash determines what functions not to trace (trace all but these). Both hashes need to be passed to the update code to find out what change is being done during the update. This also broke the ftrace record accounting and triggered a ftrace_bug(). This patch set also include two more fixes that were reported separately from the kprobe issue. One was that init_ftrace_syscalls() was called twice at boot up. This is not a major bug, but that call performed a rather large kmalloc (NR_syscalls * sizeof(*syscalls_metadata)). The second call made the first one a memory leak, and wastes memory. The other fix is a regression caused by an update in the v3.19 merge window. The moving to enable events early, moved the enabling before PID 1 was created. The syscall events require setting the TIF_SYSCALL_TRACEPOINT for all tasks. But for_each_process_thread() does not include the swapper task (PID 0), and ended up being a nop. A suggested fix was to add the init_task() to have its flag set, but I didn't really want to mess with PID 0 for this minor bug. Instead I disable and re-enable events again at early_initcall() where it use to be enabled. This also handles any other event that might have its own reg function that could break at early boot up" * tag 'trace-fixes-v3.19-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: tracing: Fix enabling of syscall events on the command line tracing: Remove extra call to init_ftrace_syscalls() ftrace/jprobes/x86: Fix conflict between jprobes and function graph tracing ftrace: Check both notrace and filter for old hash ftrace: Fix updating of filters for shared global_ops filters
-rw-r--r--arch/x86/kernel/kprobes/core.c20
-rw-r--r--kernel/trace/ftrace.c53
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_events.c69
4 files changed, 115 insertions, 28 deletions
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f7e3cd50ece0..98f654d466e5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1020,6 +1020,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
1020 regs->flags &= ~X86_EFLAGS_IF; 1020 regs->flags &= ~X86_EFLAGS_IF;
1021 trace_hardirqs_off(); 1021 trace_hardirqs_off();
1022 regs->ip = (unsigned long)(jp->entry); 1022 regs->ip = (unsigned long)(jp->entry);
1023
1024 /*
1025 * jprobes use jprobe_return() which skips the normal return
1026 * path of the function, and this messes up the accounting of the
1027 * function graph tracer to get messed up.
1028 *
1029 * Pause function graph tracing while performing the jprobe function.
1030 */
1031 pause_graph_tracing();
1023 return 1; 1032 return 1;
1024} 1033}
1025NOKPROBE_SYMBOL(setjmp_pre_handler); 1034NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -1048,24 +1057,25 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1048 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1057 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1049 u8 *addr = (u8 *) (regs->ip - 1); 1058 u8 *addr = (u8 *) (regs->ip - 1);
1050 struct jprobe *jp = container_of(p, struct jprobe, kp); 1059 struct jprobe *jp = container_of(p, struct jprobe, kp);
1060 void *saved_sp = kcb->jprobe_saved_sp;
1051 1061
1052 if ((addr > (u8 *) jprobe_return) && 1062 if ((addr > (u8 *) jprobe_return) &&
1053 (addr < (u8 *) jprobe_return_end)) { 1063 (addr < (u8 *) jprobe_return_end)) {
1054 if (stack_addr(regs) != kcb->jprobe_saved_sp) { 1064 if (stack_addr(regs) != saved_sp) {
1055 struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; 1065 struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
1056 printk(KERN_ERR 1066 printk(KERN_ERR
1057 "current sp %p does not match saved sp %p\n", 1067 "current sp %p does not match saved sp %p\n",
1058 stack_addr(regs), kcb->jprobe_saved_sp); 1068 stack_addr(regs), saved_sp);
1059 printk(KERN_ERR "Saved registers for jprobe %p\n", jp); 1069 printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
1060 show_regs(saved_regs); 1070 show_regs(saved_regs);
1061 printk(KERN_ERR "Current registers\n"); 1071 printk(KERN_ERR "Current registers\n");
1062 show_regs(regs); 1072 show_regs(regs);
1063 BUG(); 1073 BUG();
1064 } 1074 }
1075 /* It's OK to start function graph tracing again */
1076 unpause_graph_tracing();
1065 *regs = kcb->jprobe_saved_regs; 1077 *regs = kcb->jprobe_saved_regs;
1066 memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), 1078 memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
1067 kcb->jprobes_stack,
1068 MIN_STACK_SIZE(kcb->jprobe_saved_sp));
1069 preempt_enable_no_resched(); 1079 preempt_enable_no_resched();
1070 return 1; 1080 return 1;
1071 } 1081 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 929a733d302e..224e768bdc73 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2497,12 +2497,14 @@ static void ftrace_run_update_code(int command)
2497} 2497}
2498 2498
2499static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, 2499static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
2500 struct ftrace_hash *old_hash) 2500 struct ftrace_ops_hash *old_hash)
2501{ 2501{
2502 ops->flags |= FTRACE_OPS_FL_MODIFYING; 2502 ops->flags |= FTRACE_OPS_FL_MODIFYING;
2503 ops->old_hash.filter_hash = old_hash; 2503 ops->old_hash.filter_hash = old_hash->filter_hash;
2504 ops->old_hash.notrace_hash = old_hash->notrace_hash;
2504 ftrace_run_update_code(command); 2505 ftrace_run_update_code(command);
2505 ops->old_hash.filter_hash = NULL; 2506 ops->old_hash.filter_hash = NULL;
2507 ops->old_hash.notrace_hash = NULL;
2506 ops->flags &= ~FTRACE_OPS_FL_MODIFYING; 2508 ops->flags &= ~FTRACE_OPS_FL_MODIFYING;
2507} 2509}
2508 2510
@@ -3579,7 +3581,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly =
3579 3581
3580static int ftrace_probe_registered; 3582static int ftrace_probe_registered;
3581 3583
3582static void __enable_ftrace_function_probe(struct ftrace_hash *old_hash) 3584static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
3583{ 3585{
3584 int ret; 3586 int ret;
3585 int i; 3587 int i;
@@ -3637,6 +3639,7 @@ int
3637register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, 3639register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3638 void *data) 3640 void *data)
3639{ 3641{
3642 struct ftrace_ops_hash old_hash_ops;
3640 struct ftrace_func_probe *entry; 3643 struct ftrace_func_probe *entry;
3641 struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; 3644 struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash;
3642 struct ftrace_hash *old_hash = *orig_hash; 3645 struct ftrace_hash *old_hash = *orig_hash;
@@ -3658,6 +3661,10 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3658 3661
3659 mutex_lock(&trace_probe_ops.func_hash->regex_lock); 3662 mutex_lock(&trace_probe_ops.func_hash->regex_lock);
3660 3663
3664 old_hash_ops.filter_hash = old_hash;
3665 /* Probes only have filters */
3666 old_hash_ops.notrace_hash = NULL;
3667
3661 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); 3668 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash);
3662 if (!hash) { 3669 if (!hash) {
3663 count = -ENOMEM; 3670 count = -ENOMEM;
@@ -3718,7 +3725,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3718 3725
3719 ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); 3726 ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
3720 3727
3721 __enable_ftrace_function_probe(old_hash); 3728 __enable_ftrace_function_probe(&old_hash_ops);
3722 3729
3723 if (!ret) 3730 if (!ret)
3724 free_ftrace_hash_rcu(old_hash); 3731 free_ftrace_hash_rcu(old_hash);
@@ -4006,10 +4013,34 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
4006} 4013}
4007 4014
4008static void ftrace_ops_update_code(struct ftrace_ops *ops, 4015static void ftrace_ops_update_code(struct ftrace_ops *ops,
4009 struct ftrace_hash *old_hash) 4016 struct ftrace_ops_hash *old_hash)
4010{ 4017{
4011 if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) 4018 struct ftrace_ops *op;
4019
4020 if (!ftrace_enabled)
4021 return;
4022
4023 if (ops->flags & FTRACE_OPS_FL_ENABLED) {
4012 ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); 4024 ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash);
4025 return;
4026 }
4027
4028 /*
4029 * If this is the shared global_ops filter, then we need to
4030 * check if there is another ops that shares it, is enabled.
4031 * If so, we still need to run the modify code.
4032 */
4033 if (ops->func_hash != &global_ops.local_hash)
4034 return;
4035
4036 do_for_each_ftrace_op(op, ftrace_ops_list) {
4037 if (op->func_hash == &global_ops.local_hash &&
4038 op->flags & FTRACE_OPS_FL_ENABLED) {
4039 ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash);
4040 /* Only need to do this once */
4041 return;
4042 }
4043 } while_for_each_ftrace_op(op);
4013} 4044}
4014 4045
4015static int 4046static int
@@ -4017,6 +4048,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
4017 unsigned long ip, int remove, int reset, int enable) 4048 unsigned long ip, int remove, int reset, int enable)
4018{ 4049{
4019 struct ftrace_hash **orig_hash; 4050 struct ftrace_hash **orig_hash;
4051 struct ftrace_ops_hash old_hash_ops;
4020 struct ftrace_hash *old_hash; 4052 struct ftrace_hash *old_hash;
4021 struct ftrace_hash *hash; 4053 struct ftrace_hash *hash;
4022 int ret; 4054 int ret;
@@ -4053,9 +4085,11 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
4053 4085
4054 mutex_lock(&ftrace_lock); 4086 mutex_lock(&ftrace_lock);
4055 old_hash = *orig_hash; 4087 old_hash = *orig_hash;
4088 old_hash_ops.filter_hash = ops->func_hash->filter_hash;
4089 old_hash_ops.notrace_hash = ops->func_hash->notrace_hash;
4056 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 4090 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
4057 if (!ret) { 4091 if (!ret) {
4058 ftrace_ops_update_code(ops, old_hash); 4092 ftrace_ops_update_code(ops, &old_hash_ops);
4059 free_ftrace_hash_rcu(old_hash); 4093 free_ftrace_hash_rcu(old_hash);
4060 } 4094 }
4061 mutex_unlock(&ftrace_lock); 4095 mutex_unlock(&ftrace_lock);
@@ -4267,6 +4301,7 @@ static void __init set_ftrace_early_filters(void)
4267int ftrace_regex_release(struct inode *inode, struct file *file) 4301int ftrace_regex_release(struct inode *inode, struct file *file)
4268{ 4302{
4269 struct seq_file *m = (struct seq_file *)file->private_data; 4303 struct seq_file *m = (struct seq_file *)file->private_data;
4304 struct ftrace_ops_hash old_hash_ops;
4270 struct ftrace_iterator *iter; 4305 struct ftrace_iterator *iter;
4271 struct ftrace_hash **orig_hash; 4306 struct ftrace_hash **orig_hash;
4272 struct ftrace_hash *old_hash; 4307 struct ftrace_hash *old_hash;
@@ -4300,10 +4335,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
4300 4335
4301 mutex_lock(&ftrace_lock); 4336 mutex_lock(&ftrace_lock);
4302 old_hash = *orig_hash; 4337 old_hash = *orig_hash;
4338 old_hash_ops.filter_hash = iter->ops->func_hash->filter_hash;
4339 old_hash_ops.notrace_hash = iter->ops->func_hash->notrace_hash;
4303 ret = ftrace_hash_move(iter->ops, filter_hash, 4340 ret = ftrace_hash_move(iter->ops, filter_hash,
4304 orig_hash, iter->hash); 4341 orig_hash, iter->hash);
4305 if (!ret) { 4342 if (!ret) {
4306 ftrace_ops_update_code(iter->ops, old_hash); 4343 ftrace_ops_update_code(iter->ops, &old_hash_ops);
4307 free_ftrace_hash_rcu(old_hash); 4344 free_ftrace_hash_rcu(old_hash);
4308 } 4345 }
4309 mutex_unlock(&ftrace_lock); 4346 mutex_unlock(&ftrace_lock);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2e767972e99c..4a9079b9f082 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6918,7 +6918,6 @@ void __init trace_init(void)
6918 tracepoint_printk = 0; 6918 tracepoint_printk = 0;
6919 } 6919 }
6920 tracer_alloc_buffers(); 6920 tracer_alloc_buffers();
6921 init_ftrace_syscalls();
6922 trace_event_init(); 6921 trace_event_init();
6923} 6922}
6924 6923
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 366a78a3e61e..b03a0ea77b99 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2429,12 +2429,39 @@ static __init int event_trace_memsetup(void)
2429 return 0; 2429 return 0;
2430} 2430}
2431 2431
2432static __init void
2433early_enable_events(struct trace_array *tr, bool disable_first)
2434{
2435 char *buf = bootup_event_buf;
2436 char *token;
2437 int ret;
2438
2439 while (true) {
2440 token = strsep(&buf, ",");
2441
2442 if (!token)
2443 break;
2444 if (!*token)
2445 continue;
2446
2447 /* Restarting syscalls requires that we stop them first */
2448 if (disable_first)
2449 ftrace_set_clr_event(tr, token, 0);
2450
2451 ret = ftrace_set_clr_event(tr, token, 1);
2452 if (ret)
2453 pr_warn("Failed to enable trace event: %s\n", token);
2454
2455 /* Put back the comma to allow this to be called again */
2456 if (buf)
2457 *(buf - 1) = ',';
2458 }
2459}
2460
2432static __init int event_trace_enable(void) 2461static __init int event_trace_enable(void)
2433{ 2462{
2434 struct trace_array *tr = top_trace_array(); 2463 struct trace_array *tr = top_trace_array();
2435 struct ftrace_event_call **iter, *call; 2464 struct ftrace_event_call **iter, *call;
2436 char *buf = bootup_event_buf;
2437 char *token;
2438 int ret; 2465 int ret;
2439 2466
2440 if (!tr) 2467 if (!tr)
@@ -2456,18 +2483,7 @@ static __init int event_trace_enable(void)
2456 */ 2483 */
2457 __trace_early_add_events(tr); 2484 __trace_early_add_events(tr);
2458 2485
2459 while (true) { 2486 early_enable_events(tr, false);
2460 token = strsep(&buf, ",");
2461
2462 if (!token)
2463 break;
2464 if (!*token)
2465 continue;
2466
2467 ret = ftrace_set_clr_event(tr, token, 1);
2468 if (ret)
2469 pr_warn("Failed to enable trace event: %s\n", token);
2470 }
2471 2487
2472 trace_printk_start_comm(); 2488 trace_printk_start_comm();
2473 2489
@@ -2478,6 +2494,31 @@ static __init int event_trace_enable(void)
2478 return 0; 2494 return 0;
2479} 2495}
2480 2496
2497/*
2498 * event_trace_enable() is called from trace_event_init() first to
2499 * initialize events and perhaps start any events that are on the
2500 * command line. Unfortunately, there are some events that will not
2501 * start this early, like the system call tracepoints that need
2502 * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
2503 * is called before pid 1 starts, and this flag is never set, making
2504 * the syscall tracepoint never get reached, but the event is enabled
2505 * regardless (and not doing anything).
2506 */
2507static __init int event_trace_enable_again(void)
2508{
2509 struct trace_array *tr;
2510
2511 tr = top_trace_array();
2512 if (!tr)
2513 return -ENODEV;
2514
2515 early_enable_events(tr, true);
2516
2517 return 0;
2518}
2519
2520early_initcall(event_trace_enable_again);
2521
2481static __init int event_trace_init(void) 2522static __init int event_trace_init(void)
2482{ 2523{
2483 struct trace_array *tr; 2524 struct trace_array *tr;