aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2013-02-21 10:16:55 -0500
committerRalf Baechle <ralf@linux-mips.org>2013-02-22 04:07:30 -0500
commitedb15d83a875a1f4b1576188844db5c330c3267d (patch)
tree74d54eab401b6ccf2a6ad4821227108a8d160f03 /kernel/trace
parent8bfc245f9ad7bd4e461179e4e7852ef99b8b6144 (diff)
parenta0b1c42951dd06ec83cc1bc2c9788131d9fefcd8 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux into mips-for-linux-next
Conflicts: include/linux/ssb/ssb_driver_gige.h Also resolves a logical merge conflict in drivers/net/ethernet/broadcom/- bgmac.c due to change of an API.
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig33
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/ftrace.c88
-rw-r--r--kernel/trace/power-traces.c3
-rw-r--r--kernel/trace/ring_buffer.c108
-rw-r--r--kernel/trace/trace.c253
-rw-r--r--kernel/trace/trace.h134
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/trace/trace_functions.c61
-rw-r--r--kernel/trace/trace_functions_graph.c68
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_selftest.c21
-rw-r--r--kernel/trace/trace_syscalls.c18
-rw-r--r--kernel/trace/trace_uprobe.c217
16 files changed, 728 insertions, 287 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335a485f..192473b22799 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
39 help 39 help
40 See Documentation/trace/ftrace-design.txt 40 See Documentation/trace/ftrace-design.txt
41 41
42config HAVE_DYNAMIC_FTRACE_WITH_REGS
43 bool
44
42config HAVE_FTRACE_MCOUNT_RECORD 45config HAVE_FTRACE_MCOUNT_RECORD
43 bool 46 bool
44 help 47 help
@@ -78,21 +81,6 @@ config EVENT_TRACING
78 select CONTEXT_SWITCH_TRACER 81 select CONTEXT_SWITCH_TRACER
79 bool 82 bool
80 83
81config EVENT_POWER_TRACING_DEPRECATED
82 depends on EVENT_TRACING
83 bool "Deprecated power event trace API, to be removed"
84 default y
85 help
86 Provides old power event types:
87 C-state/idle accounting events:
88 power:power_start
89 power:power_end
90 and old cpufreq accounting event:
91 power:power_frequency
92 This is for userspace compatibility
93 and will vanish after 5 kernel iterations,
94 namely 3.1.
95
96config CONTEXT_SWITCH_TRACER 84config CONTEXT_SWITCH_TRACER
97 bool 85 bool
98 86
@@ -250,6 +238,16 @@ config FTRACE_SYSCALLS
250 help 238 help
251 Basic tracer to catch the syscall entry and exit events. 239 Basic tracer to catch the syscall entry and exit events.
252 240
241config TRACER_SNAPSHOT
242 bool "Create a snapshot trace buffer"
243 select TRACER_MAX_TRACE
244 help
245 Allow tracing users to take snapshot of the current buffer using the
246 ftrace interface, e.g.:
247
248 echo 1 > /sys/kernel/debug/tracing/snapshot
249 cat snapshot
250
253config TRACE_BRANCH_PROFILING 251config TRACE_BRANCH_PROFILING
254 bool 252 bool
255 select GENERIC_TRACER 253 select GENERIC_TRACER
@@ -434,6 +432,11 @@ config DYNAMIC_FTRACE
434 were made. If so, it runs stop_machine (stops all CPUS) 432 were made. If so, it runs stop_machine (stops all CPUS)
435 and modifies the code to jump over the call to ftrace. 433 and modifies the code to jump over the call to ftrace.
436 434
435config DYNAMIC_FTRACE_WITH_REGS
436 def_bool y
437 depends on DYNAMIC_FTRACE
438 depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
439
437config FUNCTION_PROFILER 440config FUNCTION_PROFILER
438 bool "Kernel function profiler" 441 bool "Kernel function profiler"
439 depends on FUNCTION_TRACER 442 depends on FUNCTION_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd0308741c..71259e2b6b61 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
147 return; 147 return;
148 148
149 local_irq_save(flags); 149 local_irq_save(flags);
150 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 150 buf = this_cpu_ptr(bt->msg_data);
151 va_start(args, fmt); 151 va_start(args, fmt);
152 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); 152 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
153 va_end(args); 153 va_end(args);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 41473b4ad7a4..ce8c3d68292f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) 111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
112#endif 112#endif
113 113
114/*
115 * Traverse the ftrace_global_list, invoking all entries. The reason that we
116 * can use rcu_dereference_raw() is that elements removed from this list
117 * are simply leaked, so there is no need to interact with a grace-period
118 * mechanism. The rcu_dereference_raw() calls are needed to handle
119 * concurrent insertions into the ftrace_global_list.
120 *
121 * Silly Alpha and silly pointer-speculation compiler optimizations!
122 */
123#define do_for_each_ftrace_op(op, list) \
124 op = rcu_dereference_raw(list); \
125 do
126
127/*
128 * Optimized for just a single item in the list (as that is the normal case).
129 */
130#define while_for_each_ftrace_op(op) \
131 while (likely(op = rcu_dereference_raw((op)->next)) && \
132 unlikely((op) != &ftrace_list_end))
133
114/** 134/**
115 * ftrace_nr_registered_ops - return number of ops registered 135 * ftrace_nr_registered_ops - return number of ops registered
116 * 136 *
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
132 return cnt; 152 return cnt;
133} 153}
134 154
135/*
136 * Traverse the ftrace_global_list, invoking all entries. The reason that we
137 * can use rcu_dereference_raw() is that elements removed from this list
138 * are simply leaked, so there is no need to interact with a grace-period
139 * mechanism. The rcu_dereference_raw() calls are needed to handle
140 * concurrent insertions into the ftrace_global_list.
141 *
142 * Silly Alpha and silly pointer-speculation compiler optimizations!
143 */
144static void 155static void
145ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, 156ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
146 struct ftrace_ops *op, struct pt_regs *regs) 157 struct ftrace_ops *op, struct pt_regs *regs)
147{ 158{
148 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) 159 int bit;
160
161 bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
162 if (bit < 0)
149 return; 163 return;
150 164
151 trace_recursion_set(TRACE_GLOBAL_BIT); 165 do_for_each_ftrace_op(op, ftrace_global_list) {
152 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
153 while (op != &ftrace_list_end) {
154 op->func(ip, parent_ip, op, regs); 166 op->func(ip, parent_ip, op, regs);
155 op = rcu_dereference_raw(op->next); /*see above*/ 167 } while_for_each_ftrace_op(op);
156 }; 168
157 trace_recursion_clear(TRACE_GLOBAL_BIT); 169 trace_clear_recursion(bit);
158} 170}
159 171
160static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, 172static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
@@ -221,10 +233,24 @@ static void update_global_ops(void)
221 * registered callers. 233 * registered callers.
222 */ 234 */
223 if (ftrace_global_list == &ftrace_list_end || 235 if (ftrace_global_list == &ftrace_list_end ||
224 ftrace_global_list->next == &ftrace_list_end) 236 ftrace_global_list->next == &ftrace_list_end) {
225 func = ftrace_global_list->func; 237 func = ftrace_global_list->func;
226 else 238 /*
239 * As we are calling the function directly.
240 * If it does not have recursion protection,
241 * the function_trace_op needs to be updated
242 * accordingly.
243 */
244 if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
245 global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
246 else
247 global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
248 } else {
227 func = ftrace_global_list_func; 249 func = ftrace_global_list_func;
250 /* The list has its own recursion protection. */
251 global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
252 }
253
228 254
229 /* If we filter on pids, update to use the pid function */ 255 /* If we filter on pids, update to use the pid function */
230 if (!list_empty(&ftrace_pids)) { 256 if (!list_empty(&ftrace_pids)) {
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
337 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) 363 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
338 return -EINVAL; 364 return -EINVAL;
339 365
340#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS 366#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
341 /* 367 /*
342 * If the ftrace_ops specifies SAVE_REGS, then it only can be used 368 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
343 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. 369 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4090 */ 4116 */
4091 preempt_disable_notrace(); 4117 preempt_disable_notrace();
4092 trace_recursion_set(TRACE_CONTROL_BIT); 4118 trace_recursion_set(TRACE_CONTROL_BIT);
4093 op = rcu_dereference_raw(ftrace_control_list); 4119 do_for_each_ftrace_op(op, ftrace_control_list) {
4094 while (op != &ftrace_list_end) {
4095 if (!ftrace_function_local_disabled(op) && 4120 if (!ftrace_function_local_disabled(op) &&
4096 ftrace_ops_test(op, ip)) 4121 ftrace_ops_test(op, ip))
4097 op->func(ip, parent_ip, op, regs); 4122 op->func(ip, parent_ip, op, regs);
4098 4123 } while_for_each_ftrace_op(op);
4099 op = rcu_dereference_raw(op->next);
4100 };
4101 trace_recursion_clear(TRACE_CONTROL_BIT); 4124 trace_recursion_clear(TRACE_CONTROL_BIT);
4102 preempt_enable_notrace(); 4125 preempt_enable_notrace();
4103} 4126}
@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4112 struct ftrace_ops *ignored, struct pt_regs *regs) 4135 struct ftrace_ops *ignored, struct pt_regs *regs)
4113{ 4136{
4114 struct ftrace_ops *op; 4137 struct ftrace_ops *op;
4138 int bit;
4115 4139
4116 if (function_trace_stop) 4140 if (function_trace_stop)
4117 return; 4141 return;
4118 4142
4119 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) 4143 bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
4144 if (bit < 0)
4120 return; 4145 return;
4121 4146
4122 trace_recursion_set(TRACE_INTERNAL_BIT);
4123 /* 4147 /*
4124 * Some of the ops may be dynamically allocated, 4148 * Some of the ops may be dynamically allocated,
4125 * they must be freed after a synchronize_sched(). 4149 * they must be freed after a synchronize_sched().
4126 */ 4150 */
4127 preempt_disable_notrace(); 4151 preempt_disable_notrace();
4128 op = rcu_dereference_raw(ftrace_ops_list); 4152 do_for_each_ftrace_op(op, ftrace_ops_list) {
4129 while (op != &ftrace_list_end) {
4130 if (ftrace_ops_test(op, ip)) 4153 if (ftrace_ops_test(op, ip))
4131 op->func(ip, parent_ip, op, regs); 4154 op->func(ip, parent_ip, op, regs);
4132 op = rcu_dereference_raw(op->next); 4155 } while_for_each_ftrace_op(op);
4133 };
4134 preempt_enable_notrace(); 4156 preempt_enable_notrace();
4135 trace_recursion_clear(TRACE_INTERNAL_BIT); 4157 trace_clear_recursion(bit);
4136} 4158}
4137 4159
4138/* 4160/*
@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4143 * Archs are to support both the regs and ftrace_ops at the same time. 4165 * Archs are to support both the regs and ftrace_ops at the same time.
4144 * If they support ftrace_ops, it is assumed they support regs. 4166 * If they support ftrace_ops, it is assumed they support regs.
4145 * If call backs want to use regs, they must either check for regs 4167 * If call backs want to use regs, they must either check for regs
4146 * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. 4168 * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
4147 * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. 4169 * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
4148 * An architecture can pass partial regs with ftrace_ops and still 4170 * An architecture can pass partial regs with ftrace_ops and still
4149 * set the ARCH_SUPPORT_FTARCE_OPS. 4171 * set the ARCH_SUPPORT_FTARCE_OPS.
4150 */ 4172 */
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index f55fcf61b223..1c71382b283d 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -13,8 +13,5 @@
13#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
14#include <trace/events/power.h> 14#include <trace/events/power.h>
15 15
16#ifdef EVENT_POWER_TRACING_DEPRECATED
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18#endif
19EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); 16EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
20 17
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ce8514feedcd..7244acde77b0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
3 * 3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ftrace_event.h>
6#include <linux/ring_buffer.h> 7#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/debugfs.h> 11#include <linux/debugfs.h>
10#include <linux/uaccess.h> 12#include <linux/uaccess.h>
@@ -21,7 +23,6 @@
21#include <linux/fs.h> 23#include <linux/fs.h>
22 24
23#include <asm/local.h> 25#include <asm/local.h>
24#include "trace.h"
25 26
26static void update_pages_handler(struct work_struct *work); 27static void update_pages_handler(struct work_struct *work);
27 28
@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2432 2433
2433#ifdef CONFIG_TRACING 2434#ifdef CONFIG_TRACING
2434 2435
2435#define TRACE_RECURSIVE_DEPTH 16 2436/*
2437 * The lock and unlock are done within a preempt disable section.
2438 * The current_context per_cpu variable can only be modified
2439 * by the current task between lock and unlock. But it can
2440 * be modified more than once via an interrupt. To pass this
2441 * information from the lock to the unlock without having to
2442 * access the 'in_interrupt()' functions again (which do show
2443 * a bit of overhead in something as critical as function tracing,
2444 * we use a bitmask trick.
2445 *
2446 * bit 0 = NMI context
2447 * bit 1 = IRQ context
2448 * bit 2 = SoftIRQ context
2449 * bit 3 = normal context.
2450 *
2451 * This works because this is the order of contexts that can
2452 * preempt other contexts. A SoftIRQ never preempts an IRQ
2453 * context.
2454 *
2455 * When the context is determined, the corresponding bit is
2456 * checked and set (if it was set, then a recursion of that context
2457 * happened).
2458 *
2459 * On unlock, we need to clear this bit. To do so, just subtract
2460 * 1 from the current_context and AND it to itself.
2461 *
2462 * (binary)
2463 * 101 - 1 = 100
2464 * 101 & 100 = 100 (clearing bit zero)
2465 *
2466 * 1010 - 1 = 1001
2467 * 1010 & 1001 = 1000 (clearing bit 1)
2468 *
2469 * The least significant bit can be cleared this way, and it
2470 * just so happens that it is the same bit corresponding to
2471 * the current context.
2472 */
2473static DEFINE_PER_CPU(unsigned int, current_context);
2436 2474
2437/* Keep this code out of the fast path cache */ 2475static __always_inline int trace_recursive_lock(void)
2438static noinline void trace_recursive_fail(void)
2439{ 2476{
2440 /* Disable all tracing before we do anything else */ 2477 unsigned int val = this_cpu_read(current_context);
2441 tracing_off_permanent(); 2478 int bit;
2442
2443 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2444 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2445 trace_recursion_buffer(),
2446 hardirq_count() >> HARDIRQ_SHIFT,
2447 softirq_count() >> SOFTIRQ_SHIFT,
2448 in_nmi());
2449
2450 WARN_ON_ONCE(1);
2451}
2452 2479
2453static inline int trace_recursive_lock(void) 2480 if (in_interrupt()) {
2454{ 2481 if (in_nmi())
2455 trace_recursion_inc(); 2482 bit = 0;
2483 else if (in_irq())
2484 bit = 1;
2485 else
2486 bit = 2;
2487 } else
2488 bit = 3;
2456 2489
2457 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) 2490 if (unlikely(val & (1 << bit)))
2458 return 0; 2491 return 1;
2459 2492
2460 trace_recursive_fail(); 2493 val |= (1 << bit);
2494 this_cpu_write(current_context, val);
2461 2495
2462 return -1; 2496 return 0;
2463} 2497}
2464 2498
2465static inline void trace_recursive_unlock(void) 2499static __always_inline void trace_recursive_unlock(void)
2466{ 2500{
2467 WARN_ON_ONCE(!trace_recursion_buffer()); 2501 unsigned int val = this_cpu_read(current_context);
2468 2502
2469 trace_recursion_dec(); 2503 val--;
2504 val &= this_cpu_read(current_context);
2505 this_cpu_write(current_context, val);
2470} 2506}
2471 2507
2472#else 2508#else
@@ -3067,6 +3103,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3067EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); 3103EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3068 3104
3069/** 3105/**
3106 * ring_buffer_read_events_cpu - get the number of events successfully read
3107 * @buffer: The ring buffer
3108 * @cpu: The per CPU buffer to get the number of events read
3109 */
3110unsigned long
3111ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3112{
3113 struct ring_buffer_per_cpu *cpu_buffer;
3114
3115 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3116 return 0;
3117
3118 cpu_buffer = buffer->buffers[cpu];
3119 return cpu_buffer->read;
3120}
3121EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3122
3123/**
3070 * ring_buffer_entries - get the number of entries in a buffer 3124 * ring_buffer_entries - get the number of entries in a buffer
3071 * @buffer: The ring buffer 3125 * @buffer: The ring buffer
3072 * 3126 *
@@ -3425,7 +3479,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
3425 /* check for end of page padding */ 3479 /* check for end of page padding */
3426 if ((iter->head >= rb_page_size(iter->head_page)) && 3480 if ((iter->head >= rb_page_size(iter->head_page)) &&
3427 (iter->head_page != cpu_buffer->commit_page)) 3481 (iter->head_page != cpu_buffer->commit_page))
3428 rb_advance_iter(iter); 3482 rb_inc_iter(iter);
3429} 3483}
3430 3484
3431static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) 3485static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46d7d24..c2e2c2310374 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
39#include <linux/poll.h> 39#include <linux/poll.h>
40#include <linux/nmi.h> 40#include <linux/nmi.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/sched/rt.h>
42 43
43#include "trace.h" 44#include "trace.h"
44#include "trace_output.h" 45#include "trace_output.h"
@@ -249,7 +250,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
249static struct tracer *trace_types __read_mostly; 250static struct tracer *trace_types __read_mostly;
250 251
251/* current_trace points to the tracer that is currently active */ 252/* current_trace points to the tracer that is currently active */
252static struct tracer *current_trace __read_mostly; 253static struct tracer *current_trace __read_mostly = &nop_trace;
253 254
254/* 255/*
255 * trace_types_lock is used to protect the trace_types list. 256 * trace_types_lock is used to protect the trace_types list.
@@ -709,10 +710,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
709 return; 710 return;
710 711
711 WARN_ON_ONCE(!irqs_disabled()); 712 WARN_ON_ONCE(!irqs_disabled());
712 if (!current_trace->use_max_tr) { 713
713 WARN_ON_ONCE(1); 714 if (!current_trace->allocated_snapshot) {
715 /* Only the nop tracer should hit this when disabling */
716 WARN_ON_ONCE(current_trace != &nop_trace);
714 return; 717 return;
715 } 718 }
719
716 arch_spin_lock(&ftrace_max_lock); 720 arch_spin_lock(&ftrace_max_lock);
717 721
718 tr->buffer = max_tr.buffer; 722 tr->buffer = max_tr.buffer;
@@ -739,10 +743,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
739 return; 743 return;
740 744
741 WARN_ON_ONCE(!irqs_disabled()); 745 WARN_ON_ONCE(!irqs_disabled());
742 if (!current_trace->use_max_tr) { 746 if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
743 WARN_ON_ONCE(1);
744 return; 747 return;
745 }
746 748
747 arch_spin_lock(&ftrace_max_lock); 749 arch_spin_lock(&ftrace_max_lock);
748 750
@@ -862,10 +864,13 @@ int register_tracer(struct tracer *type)
862 864
863 current_trace = type; 865 current_trace = type;
864 866
865 /* If we expanded the buffers, make sure the max is expanded too */ 867 if (type->use_max_tr) {
866 if (ring_buffer_expanded && type->use_max_tr) 868 /* If we expanded the buffers, make sure the max is expanded too */
867 ring_buffer_resize(max_tr.buffer, trace_buf_size, 869 if (ring_buffer_expanded)
868 RING_BUFFER_ALL_CPUS); 870 ring_buffer_resize(max_tr.buffer, trace_buf_size,
871 RING_BUFFER_ALL_CPUS);
872 type->allocated_snapshot = true;
873 }
869 874
870 /* the test is responsible for initializing and enabling */ 875 /* the test is responsible for initializing and enabling */
871 pr_info("Testing tracer %s: ", type->name); 876 pr_info("Testing tracer %s: ", type->name);
@@ -881,10 +886,14 @@ int register_tracer(struct tracer *type)
881 /* Only reset on passing, to avoid touching corrupted buffers */ 886 /* Only reset on passing, to avoid touching corrupted buffers */
882 tracing_reset_online_cpus(tr); 887 tracing_reset_online_cpus(tr);
883 888
884 /* Shrink the max buffer again */ 889 if (type->use_max_tr) {
885 if (ring_buffer_expanded && type->use_max_tr) 890 type->allocated_snapshot = false;
886 ring_buffer_resize(max_tr.buffer, 1, 891
887 RING_BUFFER_ALL_CPUS); 892 /* Shrink the max buffer again */
893 if (ring_buffer_expanded)
894 ring_buffer_resize(max_tr.buffer, 1,
895 RING_BUFFER_ALL_CPUS);
896 }
888 897
889 printk(KERN_CONT "PASSED\n"); 898 printk(KERN_CONT "PASSED\n");
890 } 899 }
@@ -922,6 +931,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
922{ 931{
923 struct ring_buffer *buffer = tr->buffer; 932 struct ring_buffer *buffer = tr->buffer;
924 933
934 if (!buffer)
935 return;
936
925 ring_buffer_record_disable(buffer); 937 ring_buffer_record_disable(buffer);
926 938
927 /* Make sure all commits have finished */ 939 /* Make sure all commits have finished */
@@ -936,6 +948,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
936 struct ring_buffer *buffer = tr->buffer; 948 struct ring_buffer *buffer = tr->buffer;
937 int cpu; 949 int cpu;
938 950
951 if (!buffer)
952 return;
953
939 ring_buffer_record_disable(buffer); 954 ring_buffer_record_disable(buffer);
940 955
941 /* Make sure all commits have finished */ 956 /* Make sure all commits have finished */
@@ -1167,7 +1182,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1167 1182
1168 entry->preempt_count = pc & 0xff; 1183 entry->preempt_count = pc & 0xff;
1169 entry->pid = (tsk) ? tsk->pid : 0; 1184 entry->pid = (tsk) ? tsk->pid : 0;
1170 entry->padding = 0;
1171 entry->flags = 1185 entry->flags =
1172#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1186#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1173 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1187 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1335,7 +1349,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1335 */ 1349 */
1336 preempt_disable_notrace(); 1350 preempt_disable_notrace();
1337 1351
1338 use_stack = ++__get_cpu_var(ftrace_stack_reserve); 1352 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1339 /* 1353 /*
1340 * We don't need any atomic variables, just a barrier. 1354 * We don't need any atomic variables, just a barrier.
1341 * If an interrupt comes in, we don't care, because it would 1355 * If an interrupt comes in, we don't care, because it would
@@ -1389,7 +1403,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1389 out: 1403 out:
1390 /* Again, don't let gcc optimize things here */ 1404 /* Again, don't let gcc optimize things here */
1391 barrier(); 1405 barrier();
1392 __get_cpu_var(ftrace_stack_reserve)--; 1406 __this_cpu_dec(ftrace_stack_reserve);
1393 preempt_enable_notrace(); 1407 preempt_enable_notrace();
1394 1408
1395} 1409}
@@ -1517,7 +1531,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1517static char *get_trace_buf(void) 1531static char *get_trace_buf(void)
1518{ 1532{
1519 struct trace_buffer_struct *percpu_buffer; 1533 struct trace_buffer_struct *percpu_buffer;
1520 struct trace_buffer_struct *buffer;
1521 1534
1522 /* 1535 /*
1523 * If we have allocated per cpu buffers, then we do not 1536 * If we have allocated per cpu buffers, then we do not
@@ -1535,9 +1548,7 @@ static char *get_trace_buf(void)
1535 if (!percpu_buffer) 1548 if (!percpu_buffer)
1536 return NULL; 1549 return NULL;
1537 1550
1538 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); 1551 return this_cpu_ptr(&percpu_buffer->buffer[0]);
1539
1540 return buffer->buffer;
1541} 1552}
1542 1553
1543static int alloc_percpu_trace_buffer(void) 1554static int alloc_percpu_trace_buffer(void)
@@ -1942,21 +1953,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1942static void *s_start(struct seq_file *m, loff_t *pos) 1953static void *s_start(struct seq_file *m, loff_t *pos)
1943{ 1954{
1944 struct trace_iterator *iter = m->private; 1955 struct trace_iterator *iter = m->private;
1945 static struct tracer *old_tracer;
1946 int cpu_file = iter->cpu_file; 1956 int cpu_file = iter->cpu_file;
1947 void *p = NULL; 1957 void *p = NULL;
1948 loff_t l = 0; 1958 loff_t l = 0;
1949 int cpu; 1959 int cpu;
1950 1960
1951 /* copy the tracer to avoid using a global lock all around */ 1961 /*
1962 * copy the tracer to avoid using a global lock all around.
1963 * iter->trace is a copy of current_trace, the pointer to the
1964 * name may be used instead of a strcmp(), as iter->trace->name
1965 * will point to the same string as current_trace->name.
1966 */
1952 mutex_lock(&trace_types_lock); 1967 mutex_lock(&trace_types_lock);
1953 if (unlikely(old_tracer != current_trace && current_trace)) { 1968 if (unlikely(current_trace && iter->trace->name != current_trace->name))
1954 old_tracer = current_trace;
1955 *iter->trace = *current_trace; 1969 *iter->trace = *current_trace;
1956 }
1957 mutex_unlock(&trace_types_lock); 1970 mutex_unlock(&trace_types_lock);
1958 1971
1959 atomic_inc(&trace_record_cmdline_disabled); 1972 if (iter->snapshot && iter->trace->use_max_tr)
1973 return ERR_PTR(-EBUSY);
1974
1975 if (!iter->snapshot)
1976 atomic_inc(&trace_record_cmdline_disabled);
1960 1977
1961 if (*pos != iter->pos) { 1978 if (*pos != iter->pos) {
1962 iter->ent = NULL; 1979 iter->ent = NULL;
@@ -1995,7 +2012,11 @@ static void s_stop(struct seq_file *m, void *p)
1995{ 2012{
1996 struct trace_iterator *iter = m->private; 2013 struct trace_iterator *iter = m->private;
1997 2014
1998 atomic_dec(&trace_record_cmdline_disabled); 2015 if (iter->snapshot && iter->trace->use_max_tr)
2016 return;
2017
2018 if (!iter->snapshot)
2019 atomic_dec(&trace_record_cmdline_disabled);
1999 trace_access_unlock(iter->cpu_file); 2020 trace_access_unlock(iter->cpu_file);
2000 trace_event_read_unlock(); 2021 trace_event_read_unlock();
2001} 2022}
@@ -2080,8 +2101,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2080 unsigned long total; 2101 unsigned long total;
2081 const char *name = "preemption"; 2102 const char *name = "preemption";
2082 2103
2083 if (type) 2104 name = type->name;
2084 name = type->name;
2085 2105
2086 get_total_entries(tr, &total, &entries); 2106 get_total_entries(tr, &total, &entries);
2087 2107
@@ -2430,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = {
2430}; 2450};
2431 2451
2432static struct trace_iterator * 2452static struct trace_iterator *
2433__tracing_open(struct inode *inode, struct file *file) 2453__tracing_open(struct inode *inode, struct file *file, bool snapshot)
2434{ 2454{
2435 long cpu_file = (long) inode->i_private; 2455 long cpu_file = (long) inode->i_private;
2436 struct trace_iterator *iter; 2456 struct trace_iterator *iter;
@@ -2457,16 +2477,16 @@ __tracing_open(struct inode *inode, struct file *file)
2457 if (!iter->trace) 2477 if (!iter->trace)
2458 goto fail; 2478 goto fail;
2459 2479
2460 if (current_trace) 2480 *iter->trace = *current_trace;
2461 *iter->trace = *current_trace;
2462 2481
2463 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 2482 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2464 goto fail; 2483 goto fail;
2465 2484
2466 if (current_trace && current_trace->print_max) 2485 if (current_trace->print_max || snapshot)
2467 iter->tr = &max_tr; 2486 iter->tr = &max_tr;
2468 else 2487 else
2469 iter->tr = &global_trace; 2488 iter->tr = &global_trace;
2489 iter->snapshot = snapshot;
2470 iter->pos = -1; 2490 iter->pos = -1;
2471 mutex_init(&iter->mutex); 2491 mutex_init(&iter->mutex);
2472 iter->cpu_file = cpu_file; 2492 iter->cpu_file = cpu_file;
@@ -2483,8 +2503,9 @@ __tracing_open(struct inode *inode, struct file *file)
2483 if (trace_clocks[trace_clock_id].in_ns) 2503 if (trace_clocks[trace_clock_id].in_ns)
2484 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 2504 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2485 2505
2486 /* stop the trace while dumping */ 2506 /* stop the trace while dumping if we are not opening "snapshot" */
2487 tracing_stop(); 2507 if (!iter->snapshot)
2508 tracing_stop();
2488 2509
2489 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2510 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2490 for_each_tracing_cpu(cpu) { 2511 for_each_tracing_cpu(cpu) {
@@ -2547,8 +2568,9 @@ static int tracing_release(struct inode *inode, struct file *file)
2547 if (iter->trace && iter->trace->close) 2568 if (iter->trace && iter->trace->close)
2548 iter->trace->close(iter); 2569 iter->trace->close(iter);
2549 2570
2550 /* reenable tracing if it was previously enabled */ 2571 if (!iter->snapshot)
2551 tracing_start(); 2572 /* reenable tracing if it was previously enabled */
2573 tracing_start();
2552 mutex_unlock(&trace_types_lock); 2574 mutex_unlock(&trace_types_lock);
2553 2575
2554 mutex_destroy(&iter->mutex); 2576 mutex_destroy(&iter->mutex);
@@ -2576,7 +2598,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2576 } 2598 }
2577 2599
2578 if (file->f_mode & FMODE_READ) { 2600 if (file->f_mode & FMODE_READ) {
2579 iter = __tracing_open(inode, file); 2601 iter = __tracing_open(inode, file, false);
2580 if (IS_ERR(iter)) 2602 if (IS_ERR(iter))
2581 ret = PTR_ERR(iter); 2603 ret = PTR_ERR(iter);
2582 else if (trace_flags & TRACE_ITER_LATENCY_FMT) 2604 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3014,10 +3036,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
3014 int r; 3036 int r;
3015 3037
3016 mutex_lock(&trace_types_lock); 3038 mutex_lock(&trace_types_lock);
3017 if (current_trace) 3039 r = sprintf(buf, "%s\n", current_trace->name);
3018 r = sprintf(buf, "%s\n", current_trace->name);
3019 else
3020 r = sprintf(buf, "\n");
3021 mutex_unlock(&trace_types_lock); 3040 mutex_unlock(&trace_types_lock);
3022 3041
3023 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3042 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -3183,6 +3202,7 @@ static int tracing_set_tracer(const char *buf)
3183 static struct trace_option_dentry *topts; 3202 static struct trace_option_dentry *topts;
3184 struct trace_array *tr = &global_trace; 3203 struct trace_array *tr = &global_trace;
3185 struct tracer *t; 3204 struct tracer *t;
3205 bool had_max_tr;
3186 int ret = 0; 3206 int ret = 0;
3187 3207
3188 mutex_lock(&trace_types_lock); 3208 mutex_lock(&trace_types_lock);
@@ -3207,9 +3227,21 @@ static int tracing_set_tracer(const char *buf)
3207 goto out; 3227 goto out;
3208 3228
3209 trace_branch_disable(); 3229 trace_branch_disable();
3210 if (current_trace && current_trace->reset) 3230 if (current_trace->reset)
3211 current_trace->reset(tr); 3231 current_trace->reset(tr);
3212 if (current_trace && current_trace->use_max_tr) { 3232
3233 had_max_tr = current_trace->allocated_snapshot;
3234 current_trace = &nop_trace;
3235
3236 if (had_max_tr && !t->use_max_tr) {
3237 /*
3238 * We need to make sure that the update_max_tr sees that
3239 * current_trace changed to nop_trace to keep it from
3240 * swapping the buffers after we resize it.
3241 * The update_max_tr is called from interrupts disabled
3242 * so a synchronized_sched() is sufficient.
3243 */
3244 synchronize_sched();
3213 /* 3245 /*
3214 * We don't free the ring buffer. instead, resize it because 3246 * We don't free the ring buffer. instead, resize it because
3215 * The max_tr ring buffer has some state (e.g. ring->clock) and 3247 * The max_tr ring buffer has some state (e.g. ring->clock) and
@@ -3217,18 +3249,19 @@ static int tracing_set_tracer(const char *buf)
3217 */ 3249 */
3218 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); 3250 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
3219 set_buffer_entries(&max_tr, 1); 3251 set_buffer_entries(&max_tr, 1);
3252 tracing_reset_online_cpus(&max_tr);
3253 current_trace->allocated_snapshot = false;
3220 } 3254 }
3221 destroy_trace_option_files(topts); 3255 destroy_trace_option_files(topts);
3222 3256
3223 current_trace = &nop_trace;
3224
3225 topts = create_trace_option_files(t); 3257 topts = create_trace_option_files(t);
3226 if (t->use_max_tr) { 3258 if (t->use_max_tr && !had_max_tr) {
3227 /* we need to make per cpu buffer sizes equivalent */ 3259 /* we need to make per cpu buffer sizes equivalent */
3228 ret = resize_buffer_duplicate_size(&max_tr, &global_trace, 3260 ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
3229 RING_BUFFER_ALL_CPUS); 3261 RING_BUFFER_ALL_CPUS);
3230 if (ret < 0) 3262 if (ret < 0)
3231 goto out; 3263 goto out;
3264 t->allocated_snapshot = true;
3232 } 3265 }
3233 3266
3234 if (t->init) { 3267 if (t->init) {
@@ -3336,8 +3369,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3336 ret = -ENOMEM; 3369 ret = -ENOMEM;
3337 goto fail; 3370 goto fail;
3338 } 3371 }
3339 if (current_trace) 3372 *iter->trace = *current_trace;
3340 *iter->trace = *current_trace;
3341 3373
3342 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 3374 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3343 ret = -ENOMEM; 3375 ret = -ENOMEM;
@@ -3477,7 +3509,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
3477 size_t cnt, loff_t *ppos) 3509 size_t cnt, loff_t *ppos)
3478{ 3510{
3479 struct trace_iterator *iter = filp->private_data; 3511 struct trace_iterator *iter = filp->private_data;
3480 static struct tracer *old_tracer;
3481 ssize_t sret; 3512 ssize_t sret;
3482 3513
3483 /* return any leftover data */ 3514 /* return any leftover data */
@@ -3489,10 +3520,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
3489 3520
3490 /* copy the tracer to avoid using a global lock all around */ 3521 /* copy the tracer to avoid using a global lock all around */
3491 mutex_lock(&trace_types_lock); 3522 mutex_lock(&trace_types_lock);
3492 if (unlikely(old_tracer != current_trace && current_trace)) { 3523 if (unlikely(iter->trace->name != current_trace->name))
3493 old_tracer = current_trace;
3494 *iter->trace = *current_trace; 3524 *iter->trace = *current_trace;
3495 }
3496 mutex_unlock(&trace_types_lock); 3525 mutex_unlock(&trace_types_lock);
3497 3526
3498 /* 3527 /*
@@ -3648,7 +3677,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3648 .ops = &tracing_pipe_buf_ops, 3677 .ops = &tracing_pipe_buf_ops,
3649 .spd_release = tracing_spd_release_pipe, 3678 .spd_release = tracing_spd_release_pipe,
3650 }; 3679 };
3651 static struct tracer *old_tracer;
3652 ssize_t ret; 3680 ssize_t ret;
3653 size_t rem; 3681 size_t rem;
3654 unsigned int i; 3682 unsigned int i;
@@ -3658,10 +3686,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3658 3686
3659 /* copy the tracer to avoid using a global lock all around */ 3687 /* copy the tracer to avoid using a global lock all around */
3660 mutex_lock(&trace_types_lock); 3688 mutex_lock(&trace_types_lock);
3661 if (unlikely(old_tracer != current_trace && current_trace)) { 3689 if (unlikely(iter->trace->name != current_trace->name))
3662 old_tracer = current_trace;
3663 *iter->trace = *current_trace; 3690 *iter->trace = *current_trace;
3664 }
3665 mutex_unlock(&trace_types_lock); 3691 mutex_unlock(&trace_types_lock);
3666 3692
3667 mutex_lock(&iter->mutex); 3693 mutex_lock(&iter->mutex);
@@ -4037,8 +4063,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4037 * Reset the buffer so that it doesn't have incomparable timestamps. 4063 * Reset the buffer so that it doesn't have incomparable timestamps.
4038 */ 4064 */
4039 tracing_reset_online_cpus(&global_trace); 4065 tracing_reset_online_cpus(&global_trace);
4040 if (max_tr.buffer) 4066 tracing_reset_online_cpus(&max_tr);
4041 tracing_reset_online_cpus(&max_tr);
4042 4067
4043 mutex_unlock(&trace_types_lock); 4068 mutex_unlock(&trace_types_lock);
4044 4069
@@ -4054,6 +4079,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
4054 return single_open(file, tracing_clock_show, NULL); 4079 return single_open(file, tracing_clock_show, NULL);
4055} 4080}
4056 4081
4082#ifdef CONFIG_TRACER_SNAPSHOT
4083static int tracing_snapshot_open(struct inode *inode, struct file *file)
4084{
4085 struct trace_iterator *iter;
4086 int ret = 0;
4087
4088 if (file->f_mode & FMODE_READ) {
4089 iter = __tracing_open(inode, file, true);
4090 if (IS_ERR(iter))
4091 ret = PTR_ERR(iter);
4092 }
4093 return ret;
4094}
4095
4096static ssize_t
4097tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4098 loff_t *ppos)
4099{
4100 unsigned long val;
4101 int ret;
4102
4103 ret = tracing_update_buffers();
4104 if (ret < 0)
4105 return ret;
4106
4107 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4108 if (ret)
4109 return ret;
4110
4111 mutex_lock(&trace_types_lock);
4112
4113 if (current_trace->use_max_tr) {
4114 ret = -EBUSY;
4115 goto out;
4116 }
4117
4118 switch (val) {
4119 case 0:
4120 if (current_trace->allocated_snapshot) {
4121 /* free spare buffer */
4122 ring_buffer_resize(max_tr.buffer, 1,
4123 RING_BUFFER_ALL_CPUS);
4124 set_buffer_entries(&max_tr, 1);
4125 tracing_reset_online_cpus(&max_tr);
4126 current_trace->allocated_snapshot = false;
4127 }
4128 break;
4129 case 1:
4130 if (!current_trace->allocated_snapshot) {
4131 /* allocate spare buffer */
4132 ret = resize_buffer_duplicate_size(&max_tr,
4133 &global_trace, RING_BUFFER_ALL_CPUS);
4134 if (ret < 0)
4135 break;
4136 current_trace->allocated_snapshot = true;
4137 }
4138
4139 local_irq_disable();
4140 /* Now, we're going to swap */
4141 update_max_tr(&global_trace, current, smp_processor_id());
4142 local_irq_enable();
4143 break;
4144 default:
4145 if (current_trace->allocated_snapshot)
4146 tracing_reset_online_cpus(&max_tr);
4147 else
4148 ret = -EINVAL;
4149 break;
4150 }
4151
4152 if (ret >= 0) {
4153 *ppos += cnt;
4154 ret = cnt;
4155 }
4156out:
4157 mutex_unlock(&trace_types_lock);
4158 return ret;
4159}
4160#endif /* CONFIG_TRACER_SNAPSHOT */
4161
4162
4057static const struct file_operations tracing_max_lat_fops = { 4163static const struct file_operations tracing_max_lat_fops = {
4058 .open = tracing_open_generic, 4164 .open = tracing_open_generic,
4059 .read = tracing_max_lat_read, 4165 .read = tracing_max_lat_read,
@@ -4110,6 +4216,16 @@ static const struct file_operations trace_clock_fops = {
4110 .write = tracing_clock_write, 4216 .write = tracing_clock_write,
4111}; 4217};
4112 4218
4219#ifdef CONFIG_TRACER_SNAPSHOT
4220static const struct file_operations snapshot_fops = {
4221 .open = tracing_snapshot_open,
4222 .read = seq_read,
4223 .write = tracing_snapshot_write,
4224 .llseek = tracing_seek,
4225 .release = tracing_release,
4226};
4227#endif /* CONFIG_TRACER_SNAPSHOT */
4228
4113struct ftrace_buffer_info { 4229struct ftrace_buffer_info {
4114 struct trace_array *tr; 4230 struct trace_array *tr;
4115 void *spare; 4231 void *spare;
@@ -4414,6 +4530,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4414 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); 4530 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
4415 trace_seq_printf(s, "dropped events: %ld\n", cnt); 4531 trace_seq_printf(s, "dropped events: %ld\n", cnt);
4416 4532
4533 cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
4534 trace_seq_printf(s, "read events: %ld\n", cnt);
4535
4417 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 4536 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4418 4537
4419 kfree(s); 4538 kfree(s);
@@ -4490,7 +4609,7 @@ struct dentry *tracing_init_dentry(void)
4490 4609
4491static struct dentry *d_percpu; 4610static struct dentry *d_percpu;
4492 4611
4493struct dentry *tracing_dentry_percpu(void) 4612static struct dentry *tracing_dentry_percpu(void)
4494{ 4613{
4495 static int once; 4614 static int once;
4496 struct dentry *d_tracer; 4615 struct dentry *d_tracer;
@@ -4906,6 +5025,11 @@ static __init int tracer_init_debugfs(void)
4906 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 5025 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4907#endif 5026#endif
4908 5027
5028#ifdef CONFIG_TRACER_SNAPSHOT
5029 trace_create_file("snapshot", 0644, d_tracer,
5030 (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
5031#endif
5032
4909 create_trace_options_dir(); 5033 create_trace_options_dir();
4910 5034
4911 for_each_tracing_cpu(cpu) 5035 for_each_tracing_cpu(cpu)
@@ -5014,6 +5138,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5014 if (disable_tracing) 5138 if (disable_tracing)
5015 ftrace_kill(); 5139 ftrace_kill();
5016 5140
5141 /* Simulate the iterator */
5017 trace_init_global_iter(&iter); 5142 trace_init_global_iter(&iter);
5018 5143
5019 for_each_tracing_cpu(cpu) { 5144 for_each_tracing_cpu(cpu) {
@@ -5025,10 +5150,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5025 /* don't look at user memory in panic mode */ 5150 /* don't look at user memory in panic mode */
5026 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 5151 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
5027 5152
5028 /* Simulate the iterator */
5029 iter.tr = &global_trace;
5030 iter.trace = current_trace;
5031
5032 switch (oops_dump_mode) { 5153 switch (oops_dump_mode) {
5033 case DUMP_ALL: 5154 case DUMP_ALL:
5034 iter.cpu_file = TRACE_PIPE_ALL_CPU; 5155 iter.cpu_file = TRACE_PIPE_ALL_CPU;
@@ -5173,7 +5294,7 @@ __init static int tracer_alloc_buffers(void)
5173 init_irq_work(&trace_work_wakeup, trace_wake_up); 5294 init_irq_work(&trace_work_wakeup, trace_wake_up);
5174 5295
5175 register_tracer(&nop_trace); 5296 register_tracer(&nop_trace);
5176 current_trace = &nop_trace; 5297
5177 /* All seems OK, enable tracing */ 5298 /* All seems OK, enable tracing */
5178 tracing_disabled = 0; 5299 tracing_disabled = 0;
5179 5300
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c75d7988902c..57d7e5397d56 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,20 +287,62 @@ struct tracer {
287 struct tracer_flags *flags; 287 struct tracer_flags *flags;
288 bool print_max; 288 bool print_max;
289 bool use_max_tr; 289 bool use_max_tr;
290 bool allocated_snapshot;
290}; 291};
291 292
292 293
293/* Only current can touch trace_recursion */ 294/* Only current can touch trace_recursion */
294#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
295#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
296 295
297/* Ring buffer has the 10 LSB bits to count */ 296/*
298#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) 297 * For function tracing recursion:
299 298 * The order of these bits are important.
300/* for function tracing recursion */ 299 *
301#define TRACE_INTERNAL_BIT (1<<11) 300 * When function tracing occurs, the following steps are made:
302#define TRACE_GLOBAL_BIT (1<<12) 301 * If arch does not support a ftrace feature:
303#define TRACE_CONTROL_BIT (1<<13) 302 * call internal function (uses INTERNAL bits) which calls...
303 * If callback is registered to the "global" list, the list
304 * function is called and recursion checks the GLOBAL bits.
305 * then this function calls...
306 * The function callback, which can use the FTRACE bits to
307 * check for recursion.
308 *
309 * Now if the arch does not suppport a feature, and it calls
310 * the global list function which calls the ftrace callback
311 * all three of these steps will do a recursion protection.
312 * There's no reason to do one if the previous caller already
313 * did. The recursion that we are protecting against will
314 * go through the same steps again.
315 *
316 * To prevent the multiple recursion checks, if a recursion
317 * bit is set that is higher than the MAX bit of the current
318 * check, then we know that the check was made by the previous
319 * caller, and we can skip the current check.
320 */
321enum {
322 TRACE_BUFFER_BIT,
323 TRACE_BUFFER_NMI_BIT,
324 TRACE_BUFFER_IRQ_BIT,
325 TRACE_BUFFER_SIRQ_BIT,
326
327 /* Start of function recursion bits */
328 TRACE_FTRACE_BIT,
329 TRACE_FTRACE_NMI_BIT,
330 TRACE_FTRACE_IRQ_BIT,
331 TRACE_FTRACE_SIRQ_BIT,
332
333 /* GLOBAL_BITs must be greater than FTRACE_BITs */
334 TRACE_GLOBAL_BIT,
335 TRACE_GLOBAL_NMI_BIT,
336 TRACE_GLOBAL_IRQ_BIT,
337 TRACE_GLOBAL_SIRQ_BIT,
338
339 /* INTERNAL_BITs must be greater than GLOBAL_BITs */
340 TRACE_INTERNAL_BIT,
341 TRACE_INTERNAL_NMI_BIT,
342 TRACE_INTERNAL_IRQ_BIT,
343 TRACE_INTERNAL_SIRQ_BIT,
344
345 TRACE_CONTROL_BIT,
304 346
305/* 347/*
306 * Abuse of the trace_recursion. 348 * Abuse of the trace_recursion.
@@ -309,11 +351,77 @@ struct tracer {
309 * was called in irq context but we have irq tracing off. Since this 351 * was called in irq context but we have irq tracing off. Since this
310 * can only be modified by current, we can reuse trace_recursion. 352 * can only be modified by current, we can reuse trace_recursion.
311 */ 353 */
312#define TRACE_IRQ_BIT (1<<13) 354 TRACE_IRQ_BIT,
355};
356
357#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
358#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
359#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
360
361#define TRACE_CONTEXT_BITS 4
362
363#define TRACE_FTRACE_START TRACE_FTRACE_BIT
364#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
365
366#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
367#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
368
369#define TRACE_LIST_START TRACE_INTERNAL_BIT
370#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
371
372#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
373
374static __always_inline int trace_get_context_bit(void)
375{
376 int bit;
313 377
314#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) 378 if (in_interrupt()) {
315#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) 379 if (in_nmi())
316#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) 380 bit = 0;
381
382 else if (in_irq())
383 bit = 1;
384 else
385 bit = 2;
386 } else
387 bit = 3;
388
389 return bit;
390}
391
392static __always_inline int trace_test_and_set_recursion(int start, int max)
393{
394 unsigned int val = current->trace_recursion;
395 int bit;
396
397 /* A previous recursion check was made */
398 if ((val & TRACE_CONTEXT_MASK) > max)
399 return 0;
400
401 bit = trace_get_context_bit() + start;
402 if (unlikely(val & (1 << bit)))
403 return -1;
404
405 val |= 1 << bit;
406 current->trace_recursion = val;
407 barrier();
408
409 return bit;
410}
411
412static __always_inline void trace_clear_recursion(int bit)
413{
414 unsigned int val = current->trace_recursion;
415
416 if (!bit)
417 return;
418
419 bit = 1 << bit;
420 val &= ~bit;
421
422 barrier();
423 current->trace_recursion = val;
424}
317 425
318#define TRACE_PIPE_ALL_CPU -1 426#define TRACE_PIPE_ALL_CPU -1
319 427
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 394783531cbb..aa8f5f48dae6 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -21,8 +21,6 @@
21#include <linux/ktime.h> 21#include <linux/ktime.h>
22#include <linux/trace_clock.h> 22#include <linux/trace_clock.h>
23 23
24#include "trace.h"
25
26/* 24/*
27 * trace_clock_local(): the simplest and least coherent tracing clock. 25 * trace_clock_local(): the simplest and least coherent tracing clock.
28 * 26 *
@@ -44,6 +42,7 @@ u64 notrace trace_clock_local(void)
44 42
45 return clock; 43 return clock;
46} 44}
45EXPORT_SYMBOL_GPL(trace_clock_local);
47 46
48/* 47/*
49 * trace_clock(): 'between' trace clock. Not completely serialized, 48 * trace_clock(): 'between' trace clock. Not completely serialized,
@@ -86,7 +85,7 @@ u64 notrace trace_clock_global(void)
86 local_irq_save(flags); 85 local_irq_save(flags);
87 86
88 this_cpu = raw_smp_processor_id(); 87 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 88 now = sched_clock_cpu(this_cpu);
90 /* 89 /*
91 * If in an NMI context then dont risk lockups and return the 90 * If in an NMI context then dont risk lockups and return the
92 * cpu_clock() time: 91 * cpu_clock() time:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d0b946..57e9b284250c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
116 __common_field(unsigned char, flags); 116 __common_field(unsigned char, flags);
117 __common_field(unsigned char, preempt_count); 117 __common_field(unsigned char, preempt_count);
118 __common_field(int, pid); 118 __common_field(int, pid);
119 __common_field(int, padding);
120 119
121 return ret; 120 return ret;
122} 121}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e3ad8082ab7..601152523326 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
47 tracing_reset_online_cpus(tr); 47 tracing_reset_online_cpus(tr);
48} 48}
49 49
50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
52 struct ftrace_ops *op, struct pt_regs *pt_regs)
53{
54 struct trace_array *tr = func_trace;
55 struct trace_array_cpu *data;
56 unsigned long flags;
57 long disabled;
58 int cpu;
59 int pc;
60
61 if (unlikely(!ftrace_function_enabled))
62 return;
63
64 pc = preempt_count();
65 preempt_disable_notrace();
66 local_save_flags(flags);
67 cpu = raw_smp_processor_id();
68 data = tr->data[cpu];
69 disabled = atomic_inc_return(&data->disabled);
70
71 if (likely(disabled == 1))
72 trace_function(tr, ip, parent_ip, flags, pc);
73
74 atomic_dec(&data->disabled);
75 preempt_enable_notrace();
76}
77
78/* Our option */ 50/* Our option */
79enum { 51enum {
80 TRACE_FUNC_OPT_STACK = 0x1, 52 TRACE_FUNC_OPT_STACK = 0x1,
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
85static void 57static void
86function_trace_call(unsigned long ip, unsigned long parent_ip, 58function_trace_call(unsigned long ip, unsigned long parent_ip,
87 struct ftrace_ops *op, struct pt_regs *pt_regs) 59 struct ftrace_ops *op, struct pt_regs *pt_regs)
88
89{ 60{
90 struct trace_array *tr = func_trace; 61 struct trace_array *tr = func_trace;
91 struct trace_array_cpu *data; 62 struct trace_array_cpu *data;
92 unsigned long flags; 63 unsigned long flags;
93 long disabled; 64 int bit;
94 int cpu; 65 int cpu;
95 int pc; 66 int pc;
96 67
97 if (unlikely(!ftrace_function_enabled)) 68 if (unlikely(!ftrace_function_enabled))
98 return; 69 return;
99 70
100 /* 71 pc = preempt_count();
101 * Need to use raw, since this must be called before the 72 preempt_disable_notrace();
102 * recursive protection is performed.
103 */
104 local_irq_save(flags);
105 cpu = raw_smp_processor_id();
106 data = tr->data[cpu];
107 disabled = atomic_inc_return(&data->disabled);
108 73
109 if (likely(disabled == 1)) { 74 bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
110 pc = preempt_count(); 75 if (bit < 0)
76 goto out;
77
78 cpu = smp_processor_id();
79 data = tr->data[cpu];
80 if (!atomic_read(&data->disabled)) {
81 local_save_flags(flags);
111 trace_function(tr, ip, parent_ip, flags, pc); 82 trace_function(tr, ip, parent_ip, flags, pc);
112 } 83 }
84 trace_clear_recursion(bit);
113 85
114 atomic_dec(&data->disabled); 86 out:
115 local_irq_restore(flags); 87 preempt_enable_notrace();
116} 88}
117 89
118static void 90static void
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
185{ 157{
186 ftrace_function_enabled = 0; 158 ftrace_function_enabled = 0;
187 159
188 if (trace_flags & TRACE_ITER_PREEMPTONLY)
189 trace_ops.func = function_trace_call_preempt_only;
190 else
191 trace_ops.func = function_trace_call;
192
193 if (func_flags.val & TRACE_FUNC_OPT_STACK) 160 if (func_flags.val & TRACE_FUNC_OPT_STACK)
194 register_ftrace_function(&trace_stack_ops); 161 register_ftrace_function(&trace_stack_ops);
195 else 162 else
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4edb4b74eb7e..39ada66389cc 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40 48#define TRACE_GRAPH_PRINT_IRQS 0x40
49 49
50static unsigned int max_depth;
51
50static struct tracer_opt trace_opts[] = { 52static struct tracer_opt trace_opts[] = {
51 /* Display overruns? (for self-debug purpose) */ 53 /* Display overruns? (for self-debug purpose) */
52 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 54 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
189 191
190 ftrace_pop_return_trace(&trace, &ret, frame_pointer); 192 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
191 trace.rettime = trace_clock_local(); 193 trace.rettime = trace_clock_local();
192 ftrace_graph_return(&trace);
193 barrier(); 194 barrier();
194 current->curr_ret_stack--; 195 current->curr_ret_stack--;
195 196
197 /*
198 * The trace should run after decrementing the ret counter
199 * in case an interrupt were to come in. We don't want to
200 * lose the interrupt if max_depth is set.
201 */
202 ftrace_graph_return(&trace);
203
196 if (unlikely(!ret)) { 204 if (unlikely(!ret)) {
197 ftrace_graph_stop(); 205 ftrace_graph_stop();
198 WARN_ON(1); 206 WARN_ON(1);
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
250 return 0; 258 return 0;
251 259
252 /* trace it when it is-nested-in or is a function enabled. */ 260 /* trace it when it is-nested-in or is a function enabled. */
253 if (!(trace->depth || ftrace_graph_addr(trace->func)) || 261 if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
254 ftrace_graph_ignore_irqs()) 262 ftrace_graph_ignore_irqs()) ||
263 (max_depth && trace->depth >= max_depth))
255 return 0; 264 return 0;
256 265
257 local_irq_save(flags); 266 local_irq_save(flags);
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = {
1457#endif 1466#endif
1458}; 1467};
1459 1468
1469
1470static ssize_t
1471graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
1472 loff_t *ppos)
1473{
1474 unsigned long val;
1475 int ret;
1476
1477 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1478 if (ret)
1479 return ret;
1480
1481 max_depth = val;
1482
1483 *ppos += cnt;
1484
1485 return cnt;
1486}
1487
1488static ssize_t
1489graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
1490 loff_t *ppos)
1491{
1492 char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
1493 int n;
1494
1495 n = sprintf(buf, "%d\n", max_depth);
1496
1497 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
1498}
1499
1500static const struct file_operations graph_depth_fops = {
1501 .open = tracing_open_generic,
1502 .write = graph_depth_write,
1503 .read = graph_depth_read,
1504 .llseek = generic_file_llseek,
1505};
1506
1507static __init int init_graph_debugfs(void)
1508{
1509 struct dentry *d_tracer;
1510
1511 d_tracer = tracing_init_dentry();
1512 if (!d_tracer)
1513 return 0;
1514
1515 trace_create_file("max_graph_depth", 0644, d_tracer,
1516 NULL, &graph_depth_fops);
1517
1518 return 0;
1519}
1520fs_initcall(init_graph_debugfs);
1521
1460static __init int init_graph_trace(void) 1522static __init int init_graph_trace(void)
1461{ 1523{
1462 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); 1524 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 933708677814..5c7e09d10d74 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -66,7 +66,6 @@
66#define TP_FLAG_TRACE 1 66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2 67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4 68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70 69
71 70
72/* data_rloc: data relative location, compatible with u32 */ 71/* data_rloc: data relative location, compatible with u32 */
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca0..75aa97fbe1a1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/sched/rt.h>
18#include <trace/events/sched.h> 19#include <trace/events/sched.h>
19
20#include "trace.h" 20#include "trace.h"
21 21
22static struct trace_array *wakeup_trace; 22static struct trace_array *wakeup_trace;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 47623169a815..51c819c12c29 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
415 * The ftrace infrastructure should provide the recursion 415 * The ftrace infrastructure should provide the recursion
416 * protection. If not, this will crash the kernel! 416 * protection. If not, this will crash the kernel!
417 */ 417 */
418 trace_selftest_recursion_cnt++; 418 if (trace_selftest_recursion_cnt++ > 10)
419 return;
419 DYN_FTRACE_TEST_NAME(); 420 DYN_FTRACE_TEST_NAME();
420} 421}
421 422
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
452 char *func_name; 453 char *func_name;
453 int len; 454 int len;
454 int ret; 455 int ret;
455 int cnt;
456 456
457 /* The previous test PASSED */ 457 /* The previous test PASSED */
458 pr_cont("PASSED\n"); 458 pr_cont("PASSED\n");
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
510 510
511 unregister_ftrace_function(&test_recsafe_probe); 511 unregister_ftrace_function(&test_recsafe_probe);
512 512
513 /*
514 * If arch supports all ftrace features, and no other task
515 * was on the list, we should be fine.
516 */
517 if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
518 cnt = 2; /* Should have recursed */
519 else
520 cnt = 1;
521
522 ret = -1; 513 ret = -1;
523 if (trace_selftest_recursion_cnt != cnt) { 514 if (trace_selftest_recursion_cnt != 2) {
524 pr_cont("*callback not called expected %d times (%d)* ", 515 pr_cont("*callback not called expected 2 times (%d)* ",
525 cnt, trace_selftest_recursion_cnt); 516 trace_selftest_recursion_cnt);
526 goto out; 517 goto out;
527 } 518 }
528 519
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
568 int ret; 559 int ret;
569 int supported = 0; 560 int supported = 0;
570 561
571#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS 562#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
572 supported = 1; 563 supported = 1;
573#endif 564#endif
574 565
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7609dd6714c2..5329e13e74a1 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
77 return syscalls_metadata[nr]; 77 return syscalls_metadata[nr];
78} 78}
79 79
80enum print_line_t 80static enum print_line_t
81print_syscall_enter(struct trace_iterator *iter, int flags, 81print_syscall_enter(struct trace_iterator *iter, int flags,
82 struct trace_event *event) 82 struct trace_event *event)
83{ 83{
@@ -130,7 +130,7 @@ end:
130 return TRACE_TYPE_HANDLED; 130 return TRACE_TYPE_HANDLED;
131} 131}
132 132
133enum print_line_t 133static enum print_line_t
134print_syscall_exit(struct trace_iterator *iter, int flags, 134print_syscall_exit(struct trace_iterator *iter, int flags,
135 struct trace_event *event) 135 struct trace_event *event)
136{ 136{
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
270 return ret; 270 return ret;
271} 271}
272 272
273void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 273static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
274{ 274{
275 struct syscall_trace_enter *entry; 275 struct syscall_trace_enter *entry;
276 struct syscall_metadata *sys_data; 276 struct syscall_metadata *sys_data;
@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
305 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 305 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
306} 306}
307 307
308void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 308static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
309{ 309{
310 struct syscall_trace_exit *entry; 310 struct syscall_trace_exit *entry;
311 struct syscall_metadata *sys_data; 311 struct syscall_metadata *sys_data;
@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
337 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 337 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
338} 338}
339 339
340int reg_event_syscall_enter(struct ftrace_event_call *call) 340static int reg_event_syscall_enter(struct ftrace_event_call *call)
341{ 341{
342 int ret = 0; 342 int ret = 0;
343 int num; 343 int num;
@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
356 return ret; 356 return ret;
357} 357}
358 358
359void unreg_event_syscall_enter(struct ftrace_event_call *call) 359static void unreg_event_syscall_enter(struct ftrace_event_call *call)
360{ 360{
361 int num; 361 int num;
362 362
@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
371 mutex_unlock(&syscall_trace_lock); 371 mutex_unlock(&syscall_trace_lock);
372} 372}
373 373
374int reg_event_syscall_exit(struct ftrace_event_call *call) 374static int reg_event_syscall_exit(struct ftrace_event_call *call)
375{ 375{
376 int ret = 0; 376 int ret = 0;
377 int num; 377 int num;
@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
390 return ret; 390 return ret;
391} 391}
392 392
393void unreg_event_syscall_exit(struct ftrace_event_call *call) 393static void unreg_event_syscall_exit(struct ftrace_event_call *call)
394{ 394{
395 int num; 395 int num;
396 396
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
459 return (unsigned long)sys_call_table[nr]; 459 return (unsigned long)sys_call_table[nr];
460} 460}
461 461
462int __init init_ftrace_syscalls(void) 462static int __init init_ftrace_syscalls(void)
463{ 463{
464 struct syscall_metadata *meta; 464 struct syscall_metadata *meta;
465 unsigned long addr; 465 unsigned long addr;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c86e6d4f67fb..8dad2a92dee9 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,20 +28,21 @@
28 28
29#define UPROBE_EVENT_SYSTEM "uprobes" 29#define UPROBE_EVENT_SYSTEM "uprobes"
30 30
31struct trace_uprobe_filter {
32 rwlock_t rwlock;
33 int nr_systemwide;
34 struct list_head perf_events;
35};
36
31/* 37/*
32 * uprobe event core functions 38 * uprobe event core functions
33 */ 39 */
34struct trace_uprobe;
35struct uprobe_trace_consumer {
36 struct uprobe_consumer cons;
37 struct trace_uprobe *tu;
38};
39
40struct trace_uprobe { 40struct trace_uprobe {
41 struct list_head list; 41 struct list_head list;
42 struct ftrace_event_class class; 42 struct ftrace_event_class class;
43 struct ftrace_event_call call; 43 struct ftrace_event_call call;
44 struct uprobe_trace_consumer *consumer; 44 struct trace_uprobe_filter filter;
45 struct uprobe_consumer consumer;
45 struct inode *inode; 46 struct inode *inode;
46 char *filename; 47 char *filename;
47 unsigned long offset; 48 unsigned long offset;
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
64 65
65static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 66static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
66 67
68static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
69{
70 rwlock_init(&filter->rwlock);
71 filter->nr_systemwide = 0;
72 INIT_LIST_HEAD(&filter->perf_events);
73}
74
75static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
76{
77 return !filter->nr_systemwide && list_empty(&filter->perf_events);
78}
79
67/* 80/*
68 * Allocate new trace_uprobe and initialize it (including uprobes). 81 * Allocate new trace_uprobe and initialize it (including uprobes).
69 */ 82 */
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
92 goto error; 105 goto error;
93 106
94 INIT_LIST_HEAD(&tu->list); 107 INIT_LIST_HEAD(&tu->list);
108 tu->consumer.handler = uprobe_dispatcher;
109 init_trace_uprobe_filter(&tu->filter);
95 return tu; 110 return tu;
96 111
97error: 112error:
@@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
253 if (ret) 268 if (ret)
254 goto fail_address_parse; 269 goto fail_address_parse;
255 270
271 inode = igrab(path.dentry->d_inode);
272 path_put(&path);
273
274 if (!inode || !S_ISREG(inode->i_mode)) {
275 ret = -EINVAL;
276 goto fail_address_parse;
277 }
278
256 ret = kstrtoul(arg, 0, &offset); 279 ret = kstrtoul(arg, 0, &offset);
257 if (ret) 280 if (ret)
258 goto fail_address_parse; 281 goto fail_address_parse;
259 282
260 inode = igrab(path.dentry->d_inode);
261
262 argc -= 2; 283 argc -= 2;
263 argv += 2; 284 argv += 2;
264 285
@@ -356,7 +377,7 @@ fail_address_parse:
356 if (inode) 377 if (inode)
357 iput(inode); 378 iput(inode);
358 379
359 pr_info("Failed to parse address.\n"); 380 pr_info("Failed to parse address or file.\n");
360 381
361 return ret; 382 return ret;
362} 383}
@@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
465}; 486};
466 487
467/* uprobe handler */ 488/* uprobe handler */
468static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 489static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
469{ 490{
470 struct uprobe_trace_entry_head *entry; 491 struct uprobe_trace_entry_head *entry;
471 struct ring_buffer_event *event; 492 struct ring_buffer_event *event;
@@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
475 unsigned long irq_flags; 496 unsigned long irq_flags;
476 struct ftrace_event_call *call = &tu->call; 497 struct ftrace_event_call *call = &tu->call;
477 498
478 tu->nhit++;
479
480 local_save_flags(irq_flags); 499 local_save_flags(irq_flags);
481 pc = preempt_count(); 500 pc = preempt_count();
482 501
@@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
485 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 504 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
486 size, irq_flags, pc); 505 size, irq_flags, pc);
487 if (!event) 506 if (!event)
488 return; 507 return 0;
489 508
490 entry = ring_buffer_event_data(event); 509 entry = ring_buffer_event_data(event);
491 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 510 entry->ip = instruction_pointer(task_pt_regs(current));
492 data = (u8 *)&entry[1]; 511 data = (u8 *)&entry[1];
493 for (i = 0; i < tu->nr_args; i++) 512 for (i = 0; i < tu->nr_args; i++)
494 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 513 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
495 514
496 if (!filter_current_check_discard(buffer, call, entry, event)) 515 if (!filter_current_check_discard(buffer, call, entry, event))
497 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 516 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
517
518 return 0;
498} 519}
499 520
500/* Event entry printers */ 521/* Event entry printers */
@@ -533,42 +554,43 @@ partial:
533 return TRACE_TYPE_PARTIAL_LINE; 554 return TRACE_TYPE_PARTIAL_LINE;
534} 555}
535 556
536static int probe_event_enable(struct trace_uprobe *tu, int flag) 557static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
537{ 558{
538 struct uprobe_trace_consumer *utc; 559 return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
539 int ret = 0; 560}
540 561
541 if (!tu->inode || tu->consumer) 562typedef bool (*filter_func_t)(struct uprobe_consumer *self,
542 return -EINTR; 563 enum uprobe_filter_ctx ctx,
564 struct mm_struct *mm);
543 565
544 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); 566static int
545 if (!utc) 567probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
568{
569 int ret = 0;
570
571 if (is_trace_uprobe_enabled(tu))
546 return -EINTR; 572 return -EINTR;
547 573
548 utc->cons.handler = uprobe_dispatcher; 574 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
549 utc->cons.filter = NULL;
550 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
551 if (ret) {
552 kfree(utc);
553 return ret;
554 }
555 575
556 tu->flags |= flag; 576 tu->flags |= flag;
557 utc->tu = tu; 577 tu->consumer.filter = filter;
558 tu->consumer = utc; 578 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
579 if (ret)
580 tu->flags &= ~flag;
559 581
560 return 0; 582 return ret;
561} 583}
562 584
563static void probe_event_disable(struct trace_uprobe *tu, int flag) 585static void probe_event_disable(struct trace_uprobe *tu, int flag)
564{ 586{
565 if (!tu->inode || !tu->consumer) 587 if (!is_trace_uprobe_enabled(tu))
566 return; 588 return;
567 589
568 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); 590 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
591
592 uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
569 tu->flags &= ~flag; 593 tu->flags &= ~flag;
570 kfree(tu->consumer);
571 tu->consumer = NULL;
572} 594}
573 595
574static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 596static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
642} 664}
643 665
644#ifdef CONFIG_PERF_EVENTS 666#ifdef CONFIG_PERF_EVENTS
667static bool
668__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
669{
670 struct perf_event *event;
671
672 if (filter->nr_systemwide)
673 return true;
674
675 list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
676 if (event->hw.tp_target->mm == mm)
677 return true;
678 }
679
680 return false;
681}
682
683static inline bool
684uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
685{
686 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
687}
688
689static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
690{
691 bool done;
692
693 write_lock(&tu->filter.rwlock);
694 if (event->hw.tp_target) {
695 /*
696 * event->parent != NULL means copy_process(), we can avoid
697 * uprobe_apply(). current->mm must be probed and we can rely
698 * on dup_mmap() which preserves the already installed bp's.
699 *
700 * attr.enable_on_exec means that exec/mmap will install the
701 * breakpoints we need.
702 */
703 done = tu->filter.nr_systemwide ||
704 event->parent || event->attr.enable_on_exec ||
705 uprobe_filter_event(tu, event);
706 list_add(&event->hw.tp_list, &tu->filter.perf_events);
707 } else {
708 done = tu->filter.nr_systemwide;
709 tu->filter.nr_systemwide++;
710 }
711 write_unlock(&tu->filter.rwlock);
712
713 if (!done)
714 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
715
716 return 0;
717}
718
719static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
720{
721 bool done;
722
723 write_lock(&tu->filter.rwlock);
724 if (event->hw.tp_target) {
725 list_del(&event->hw.tp_list);
726 done = tu->filter.nr_systemwide ||
727 (event->hw.tp_target->flags & PF_EXITING) ||
728 uprobe_filter_event(tu, event);
729 } else {
730 tu->filter.nr_systemwide--;
731 done = tu->filter.nr_systemwide;
732 }
733 write_unlock(&tu->filter.rwlock);
734
735 if (!done)
736 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
737
738 return 0;
739}
740
741static bool uprobe_perf_filter(struct uprobe_consumer *uc,
742 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
743{
744 struct trace_uprobe *tu;
745 int ret;
746
747 tu = container_of(uc, struct trace_uprobe, consumer);
748 read_lock(&tu->filter.rwlock);
749 ret = __uprobe_perf_filter(&tu->filter, mm);
750 read_unlock(&tu->filter.rwlock);
751
752 return ret;
753}
754
645/* uprobe profile handler */ 755/* uprobe profile handler */
646static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 756static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
647{ 757{
648 struct ftrace_event_call *call = &tu->call; 758 struct ftrace_event_call *call = &tu->call;
649 struct uprobe_trace_entry_head *entry; 759 struct uprobe_trace_entry_head *entry;
@@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
652 int size, __size, i; 762 int size, __size, i;
653 int rctx; 763 int rctx;
654 764
765 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
766 return UPROBE_HANDLER_REMOVE;
767
655 __size = sizeof(*entry) + tu->size; 768 __size = sizeof(*entry) + tu->size;
656 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 769 size = ALIGN(__size + sizeof(u32), sizeof(u64));
657 size -= sizeof(u32); 770 size -= sizeof(u32);
658 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 771 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
659 return; 772 return 0;
660 773
661 preempt_disable(); 774 preempt_disable();
662 775
@@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
664 if (!entry) 777 if (!entry)
665 goto out; 778 goto out;
666 779
667 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 780 entry->ip = instruction_pointer(task_pt_regs(current));
668 data = (u8 *)&entry[1]; 781 data = (u8 *)&entry[1];
669 for (i = 0; i < tu->nr_args; i++) 782 for (i = 0; i < tu->nr_args; i++)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 783 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
@@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
674 787
675 out: 788 out:
676 preempt_enable(); 789 preempt_enable();
790 return 0;
677} 791}
678#endif /* CONFIG_PERF_EVENTS */ 792#endif /* CONFIG_PERF_EVENTS */
679 793
@@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
684 798
685 switch (type) { 799 switch (type) {
686 case TRACE_REG_REGISTER: 800 case TRACE_REG_REGISTER:
687 return probe_event_enable(tu, TP_FLAG_TRACE); 801 return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
688 802
689 case TRACE_REG_UNREGISTER: 803 case TRACE_REG_UNREGISTER:
690 probe_event_disable(tu, TP_FLAG_TRACE); 804 probe_event_disable(tu, TP_FLAG_TRACE);
@@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
692 806
693#ifdef CONFIG_PERF_EVENTS 807#ifdef CONFIG_PERF_EVENTS
694 case TRACE_REG_PERF_REGISTER: 808 case TRACE_REG_PERF_REGISTER:
695 return probe_event_enable(tu, TP_FLAG_PROFILE); 809 return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
696 810
697 case TRACE_REG_PERF_UNREGISTER: 811 case TRACE_REG_PERF_UNREGISTER:
698 probe_event_disable(tu, TP_FLAG_PROFILE); 812 probe_event_disable(tu, TP_FLAG_PROFILE);
699 return 0; 813 return 0;
814
815 case TRACE_REG_PERF_OPEN:
816 return uprobe_perf_open(tu, data);
817
818 case TRACE_REG_PERF_CLOSE:
819 return uprobe_perf_close(tu, data);
820
700#endif 821#endif
701 default: 822 default:
702 return 0; 823 return 0;
@@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
706 827
707static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) 828static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
708{ 829{
709 struct uprobe_trace_consumer *utc;
710 struct trace_uprobe *tu; 830 struct trace_uprobe *tu;
831 int ret = 0;
711 832
712 utc = container_of(con, struct uprobe_trace_consumer, cons); 833 tu = container_of(con, struct trace_uprobe, consumer);
713 tu = utc->tu; 834 tu->nhit++;
714 if (!tu || tu->consumer != utc)
715 return 0;
716 835
717 if (tu->flags & TP_FLAG_TRACE) 836 if (tu->flags & TP_FLAG_TRACE)
718 uprobe_trace_func(tu, regs); 837 ret |= uprobe_trace_func(tu, regs);
719 838
720#ifdef CONFIG_PERF_EVENTS 839#ifdef CONFIG_PERF_EVENTS
721 if (tu->flags & TP_FLAG_PROFILE) 840 if (tu->flags & TP_FLAG_PROFILE)
722 uprobe_perf_func(tu, regs); 841 ret |= uprobe_perf_func(tu, regs);
723#endif 842#endif
724 return 0; 843 return ret;
725} 844}
726 845
727static struct trace_event_functions uprobe_funcs = { 846static struct trace_event_functions uprobe_funcs = {