aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig55
-rw-r--r--kernel/trace/blktrace.c30
-rw-r--r--kernel/trace/ftrace.c216
-rw-r--r--kernel/trace/power-traces.c3
-rw-r--r--kernel/trace/ring_buffer.c114
-rw-r--r--kernel/trace/trace.c340
-rw-r--r--kernel/trace/trace.h140
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/trace/trace_functions.c61
-rw-r--r--kernel/trace/trace_functions_graph.c68
-rw-r--r--kernel/trace/trace_irqsoff.c19
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_sched_wakeup.c20
-rw-r--r--kernel/trace/trace_selftest.c21
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/trace/trace_syscalls.c61
-rw-r--r--kernel/trace/trace_uprobe.c217
19 files changed, 985 insertions, 392 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335a485f..fc382d6e2765 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
39 help 39 help
40 See Documentation/trace/ftrace-design.txt 40 See Documentation/trace/ftrace-design.txt
41 41
42config HAVE_DYNAMIC_FTRACE_WITH_REGS
43 bool
44
42config HAVE_FTRACE_MCOUNT_RECORD 45config HAVE_FTRACE_MCOUNT_RECORD
43 bool 46 bool
44 help 47 help
@@ -78,21 +81,6 @@ config EVENT_TRACING
78 select CONTEXT_SWITCH_TRACER 81 select CONTEXT_SWITCH_TRACER
79 bool 82 bool
80 83
81config EVENT_POWER_TRACING_DEPRECATED
82 depends on EVENT_TRACING
83 bool "Deprecated power event trace API, to be removed"
84 default y
85 help
86 Provides old power event types:
87 C-state/idle accounting events:
88 power:power_start
89 power:power_end
90 and old cpufreq accounting event:
91 power:power_frequency
92 This is for userspace compatibility
93 and will vanish after 5 kernel iterations,
94 namely 3.1.
95
96config CONTEXT_SWITCH_TRACER 84config CONTEXT_SWITCH_TRACER
97 bool 85 bool
98 86
@@ -250,6 +238,16 @@ config FTRACE_SYSCALLS
250 help 238 help
251 Basic tracer to catch the syscall entry and exit events. 239 Basic tracer to catch the syscall entry and exit events.
252 240
241config TRACER_SNAPSHOT
242 bool "Create a snapshot trace buffer"
243 select TRACER_MAX_TRACE
244 help
245 Allow tracing users to take snapshot of the current buffer using the
246 ftrace interface, e.g.:
247
248 echo 1 > /sys/kernel/debug/tracing/snapshot
249 cat snapshot
250
253config TRACE_BRANCH_PROFILING 251config TRACE_BRANCH_PROFILING
254 bool 252 bool
255 select GENERIC_TRACER 253 select GENERIC_TRACER
@@ -416,23 +414,32 @@ config PROBE_EVENTS
416 def_bool n 414 def_bool n
417 415
418config DYNAMIC_FTRACE 416config DYNAMIC_FTRACE
419 bool "enable/disable ftrace tracepoints dynamically" 417 bool "enable/disable function tracing dynamically"
420 depends on FUNCTION_TRACER 418 depends on FUNCTION_TRACER
421 depends on HAVE_DYNAMIC_FTRACE 419 depends on HAVE_DYNAMIC_FTRACE
422 default y 420 default y
423 help 421 help
424 This option will modify all the calls to ftrace dynamically 422 This option will modify all the calls to function tracing
425 (will patch them out of the binary image and replace them 423 dynamically (will patch them out of the binary image and
426 with a No-Op instruction) as they are called. A table is 424 replace them with a No-Op instruction) on boot up. During
427 created to dynamically enable them again. 425 compile time, a table is made of all the locations that ftrace
426 can function trace, and this table is linked into the kernel
427 image. When this is enabled, functions can be individually
428 enabled, and the functions not enabled will not affect
429 performance of the system.
430
431 See the files in /sys/kernel/debug/tracing:
432 available_filter_functions
433 set_ftrace_filter
434 set_ftrace_notrace
428 435
429 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but 436 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
430 otherwise has native performance as long as no tracing is active. 437 otherwise has native performance as long as no tracing is active.
431 438
432 The changes to the code are done by a kernel thread that 439config DYNAMIC_FTRACE_WITH_REGS
433 wakes up once a second and checks to see if any ftrace calls 440 def_bool y
434 were made. If so, it runs stop_machine (stops all CPUS) 441 depends on DYNAMIC_FTRACE
435 and modifies the code to jump over the call to ftrace. 442 depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
436 443
437config FUNCTION_PROFILER 444config FUNCTION_PROFILER
438 bool "Kernel function profiler" 445 bool "Kernel function profiler"
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd0308741c..9e5b8c272eec 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
147 return; 147 return;
148 148
149 local_irq_save(flags); 149 local_irq_save(flags);
150 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 150 buf = this_cpu_ptr(bt->msg_data);
151 va_start(args, fmt); 151 va_start(args, fmt);
152 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); 152 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
153 va_end(args); 153 va_end(args);
@@ -739,6 +739,12 @@ static void blk_add_trace_rq_complete(void *ignore,
739 struct request_queue *q, 739 struct request_queue *q,
740 struct request *rq) 740 struct request *rq)
741{ 741{
742 struct blk_trace *bt = q->blk_trace;
743
744 /* if control ever passes through here, it's a request based driver */
745 if (unlikely(bt && !bt->rq_based))
746 bt->rq_based = true;
747
742 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 748 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
743} 749}
744 750
@@ -774,15 +780,30 @@ static void blk_add_trace_bio_bounce(void *ignore,
774 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); 780 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
775} 781}
776 782
777static void blk_add_trace_bio_complete(void *ignore, 783static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error)
778 struct request_queue *q, struct bio *bio,
779 int error)
780{ 784{
785 struct request_queue *q;
786 struct blk_trace *bt;
787
788 if (!bio->bi_bdev)
789 return;
790
791 q = bdev_get_queue(bio->bi_bdev);
792 bt = q->blk_trace;
793
794 /*
795 * Request based drivers will generate both rq and bio completions.
796 * Ignore bio ones.
797 */
798 if (likely(!bt) || bt->rq_based)
799 return;
800
781 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); 801 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
782} 802}
783 803
784static void blk_add_trace_bio_backmerge(void *ignore, 804static void blk_add_trace_bio_backmerge(void *ignore,
785 struct request_queue *q, 805 struct request_queue *q,
806 struct request *rq,
786 struct bio *bio) 807 struct bio *bio)
787{ 808{
788 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); 809 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
@@ -790,6 +811,7 @@ static void blk_add_trace_bio_backmerge(void *ignore,
790 811
791static void blk_add_trace_bio_frontmerge(void *ignore, 812static void blk_add_trace_bio_frontmerge(void *ignore,
792 struct request_queue *q, 813 struct request_queue *q,
814 struct request *rq,
793 struct bio *bio) 815 struct bio *bio)
794{ 816{
795 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); 817 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 41473b4ad7a4..b3fde6d7b7fc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -66,7 +66,7 @@
66 66
67static struct ftrace_ops ftrace_list_end __read_mostly = { 67static struct ftrace_ops ftrace_list_end __read_mostly = {
68 .func = ftrace_stub, 68 .func = ftrace_stub,
69 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 69 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
70}; 70};
71 71
72/* ftrace_enabled is a method to turn ftrace on or off */ 72/* ftrace_enabled is a method to turn ftrace on or off */
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) 111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
112#endif 112#endif
113 113
114/*
115 * Traverse the ftrace_global_list, invoking all entries. The reason that we
116 * can use rcu_dereference_raw() is that elements removed from this list
117 * are simply leaked, so there is no need to interact with a grace-period
118 * mechanism. The rcu_dereference_raw() calls are needed to handle
119 * concurrent insertions into the ftrace_global_list.
120 *
121 * Silly Alpha and silly pointer-speculation compiler optimizations!
122 */
123#define do_for_each_ftrace_op(op, list) \
124 op = rcu_dereference_raw(list); \
125 do
126
127/*
128 * Optimized for just a single item in the list (as that is the normal case).
129 */
130#define while_for_each_ftrace_op(op) \
131 while (likely(op = rcu_dereference_raw((op)->next)) && \
132 unlikely((op) != &ftrace_list_end))
133
114/** 134/**
115 * ftrace_nr_registered_ops - return number of ops registered 135 * ftrace_nr_registered_ops - return number of ops registered
116 * 136 *
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
132 return cnt; 152 return cnt;
133} 153}
134 154
135/*
136 * Traverse the ftrace_global_list, invoking all entries. The reason that we
137 * can use rcu_dereference_raw() is that elements removed from this list
138 * are simply leaked, so there is no need to interact with a grace-period
139 * mechanism. The rcu_dereference_raw() calls are needed to handle
140 * concurrent insertions into the ftrace_global_list.
141 *
142 * Silly Alpha and silly pointer-speculation compiler optimizations!
143 */
144static void 155static void
145ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, 156ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
146 struct ftrace_ops *op, struct pt_regs *regs) 157 struct ftrace_ops *op, struct pt_regs *regs)
147{ 158{
148 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) 159 int bit;
160
161 bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
162 if (bit < 0)
149 return; 163 return;
150 164
151 trace_recursion_set(TRACE_GLOBAL_BIT); 165 do_for_each_ftrace_op(op, ftrace_global_list) {
152 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
153 while (op != &ftrace_list_end) {
154 op->func(ip, parent_ip, op, regs); 166 op->func(ip, parent_ip, op, regs);
155 op = rcu_dereference_raw(op->next); /*see above*/ 167 } while_for_each_ftrace_op(op);
156 }; 168
157 trace_recursion_clear(TRACE_GLOBAL_BIT); 169 trace_clear_recursion(bit);
158} 170}
159 171
160static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, 172static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
@@ -221,10 +233,24 @@ static void update_global_ops(void)
221 * registered callers. 233 * registered callers.
222 */ 234 */
223 if (ftrace_global_list == &ftrace_list_end || 235 if (ftrace_global_list == &ftrace_list_end ||
224 ftrace_global_list->next == &ftrace_list_end) 236 ftrace_global_list->next == &ftrace_list_end) {
225 func = ftrace_global_list->func; 237 func = ftrace_global_list->func;
226 else 238 /*
239 * As we are calling the function directly.
240 * If it does not have recursion protection,
241 * the function_trace_op needs to be updated
242 * accordingly.
243 */
244 if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
245 global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
246 else
247 global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
248 } else {
227 func = ftrace_global_list_func; 249 func = ftrace_global_list_func;
250 /* The list has its own recursion protection. */
251 global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
252 }
253
228 254
229 /* If we filter on pids, update to use the pid function */ 255 /* If we filter on pids, update to use the pid function */
230 if (!list_empty(&ftrace_pids)) { 256 if (!list_empty(&ftrace_pids)) {
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
337 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) 363 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
338 return -EINVAL; 364 return -EINVAL;
339 365
340#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS 366#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
341 /* 367 /*
342 * If the ftrace_ops specifies SAVE_REGS, then it only can be used 368 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
343 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. 369 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -668,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
668 free_page(tmp); 694 free_page(tmp);
669 } 695 }
670 696
671 free_page((unsigned long)stat->pages);
672 stat->pages = NULL; 697 stat->pages = NULL;
673 stat->start = NULL; 698 stat->start = NULL;
674 699
@@ -736,7 +761,6 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
736{ 761{
737 struct ftrace_profile *rec; 762 struct ftrace_profile *rec;
738 struct hlist_head *hhd; 763 struct hlist_head *hhd;
739 struct hlist_node *n;
740 unsigned long key; 764 unsigned long key;
741 765
742 key = hash_long(ip, ftrace_profile_bits); 766 key = hash_long(ip, ftrace_profile_bits);
@@ -745,7 +769,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
745 if (hlist_empty(hhd)) 769 if (hlist_empty(hhd))
746 return NULL; 770 return NULL;
747 771
748 hlist_for_each_entry_rcu(rec, n, hhd, node) { 772 hlist_for_each_entry_rcu(rec, hhd, node) {
749 if (rec->ip == ip) 773 if (rec->ip == ip)
750 return rec; 774 return rec;
751 } 775 }
@@ -1028,6 +1052,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1028 1052
1029static struct pid * const ftrace_swapper_pid = &init_struct_pid; 1053static struct pid * const ftrace_swapper_pid = &init_struct_pid;
1030 1054
1055loff_t
1056ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
1057{
1058 loff_t ret;
1059
1060 if (file->f_mode & FMODE_READ)
1061 ret = seq_lseek(file, offset, whence);
1062 else
1063 file->f_pos = ret = 1;
1064
1065 return ret;
1066}
1067
1031#ifdef CONFIG_DYNAMIC_FTRACE 1068#ifdef CONFIG_DYNAMIC_FTRACE
1032 1069
1033#ifndef CONFIG_FTRACE_MCOUNT_RECORD 1070#ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@ -1107,7 +1144,6 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
1107 unsigned long key; 1144 unsigned long key;
1108 struct ftrace_func_entry *entry; 1145 struct ftrace_func_entry *entry;
1109 struct hlist_head *hhd; 1146 struct hlist_head *hhd;
1110 struct hlist_node *n;
1111 1147
1112 if (ftrace_hash_empty(hash)) 1148 if (ftrace_hash_empty(hash))
1113 return NULL; 1149 return NULL;
@@ -1119,7 +1155,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
1119 1155
1120 hhd = &hash->buckets[key]; 1156 hhd = &hash->buckets[key];
1121 1157
1122 hlist_for_each_entry_rcu(entry, n, hhd, hlist) { 1158 hlist_for_each_entry_rcu(entry, hhd, hlist) {
1123 if (entry->ip == ip) 1159 if (entry->ip == ip)
1124 return entry; 1160 return entry;
1125 } 1161 }
@@ -1176,7 +1212,7 @@ remove_hash_entry(struct ftrace_hash *hash,
1176static void ftrace_hash_clear(struct ftrace_hash *hash) 1212static void ftrace_hash_clear(struct ftrace_hash *hash)
1177{ 1213{
1178 struct hlist_head *hhd; 1214 struct hlist_head *hhd;
1179 struct hlist_node *tp, *tn; 1215 struct hlist_node *tn;
1180 struct ftrace_func_entry *entry; 1216 struct ftrace_func_entry *entry;
1181 int size = 1 << hash->size_bits; 1217 int size = 1 << hash->size_bits;
1182 int i; 1218 int i;
@@ -1186,7 +1222,7 @@ static void ftrace_hash_clear(struct ftrace_hash *hash)
1186 1222
1187 for (i = 0; i < size; i++) { 1223 for (i = 0; i < size; i++) {
1188 hhd = &hash->buckets[i]; 1224 hhd = &hash->buckets[i];
1189 hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) 1225 hlist_for_each_entry_safe(entry, tn, hhd, hlist)
1190 free_hash_entry(hash, entry); 1226 free_hash_entry(hash, entry);
1191 } 1227 }
1192 FTRACE_WARN_ON(hash->count); 1228 FTRACE_WARN_ON(hash->count);
@@ -1249,7 +1285,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1249{ 1285{
1250 struct ftrace_func_entry *entry; 1286 struct ftrace_func_entry *entry;
1251 struct ftrace_hash *new_hash; 1287 struct ftrace_hash *new_hash;
1252 struct hlist_node *tp;
1253 int size; 1288 int size;
1254 int ret; 1289 int ret;
1255 int i; 1290 int i;
@@ -1264,7 +1299,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1264 1299
1265 size = 1 << hash->size_bits; 1300 size = 1 << hash->size_bits;
1266 for (i = 0; i < size; i++) { 1301 for (i = 0; i < size; i++) {
1267 hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) { 1302 hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
1268 ret = add_hash_entry(new_hash, entry->ip); 1303 ret = add_hash_entry(new_hash, entry->ip);
1269 if (ret < 0) 1304 if (ret < 0)
1270 goto free_hash; 1305 goto free_hash;
@@ -1290,7 +1325,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
1290 struct ftrace_hash **dst, struct ftrace_hash *src) 1325 struct ftrace_hash **dst, struct ftrace_hash *src)
1291{ 1326{
1292 struct ftrace_func_entry *entry; 1327 struct ftrace_func_entry *entry;
1293 struct hlist_node *tp, *tn; 1328 struct hlist_node *tn;
1294 struct hlist_head *hhd; 1329 struct hlist_head *hhd;
1295 struct ftrace_hash *old_hash; 1330 struct ftrace_hash *old_hash;
1296 struct ftrace_hash *new_hash; 1331 struct ftrace_hash *new_hash;
@@ -1336,7 +1371,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
1336 size = 1 << src->size_bits; 1371 size = 1 << src->size_bits;
1337 for (i = 0; i < size; i++) { 1372 for (i = 0; i < size; i++) {
1338 hhd = &src->buckets[i]; 1373 hhd = &src->buckets[i];
1339 hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) { 1374 hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
1340 if (bits > 0) 1375 if (bits > 0)
1341 key = hash_long(entry->ip, bits); 1376 key = hash_long(entry->ip, bits);
1342 else 1377 else
@@ -2590,7 +2625,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
2590 * routine, you can use ftrace_filter_write() for the write 2625 * routine, you can use ftrace_filter_write() for the write
2591 * routine if @flag has FTRACE_ITER_FILTER set, or 2626 * routine if @flag has FTRACE_ITER_FILTER set, or
2592 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. 2627 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
2593 * ftrace_regex_lseek() should be used as the lseek routine, and 2628 * ftrace_filter_lseek() should be used as the lseek routine, and
2594 * release must call ftrace_regex_release(). 2629 * release must call ftrace_regex_release().
2595 */ 2630 */
2596int 2631int
@@ -2674,19 +2709,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
2674 inode, file); 2709 inode, file);
2675} 2710}
2676 2711
2677loff_t
2678ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
2679{
2680 loff_t ret;
2681
2682 if (file->f_mode & FMODE_READ)
2683 ret = seq_lseek(file, offset, whence);
2684 else
2685 file->f_pos = ret = 1;
2686
2687 return ret;
2688}
2689
2690static int ftrace_match(char *str, char *regex, int len, int type) 2712static int ftrace_match(char *str, char *regex, int len, int type)
2691{ 2713{
2692 int matched = 0; 2714 int matched = 0;
@@ -2875,7 +2897,6 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
2875{ 2897{
2876 struct ftrace_func_probe *entry; 2898 struct ftrace_func_probe *entry;
2877 struct hlist_head *hhd; 2899 struct hlist_head *hhd;
2878 struct hlist_node *n;
2879 unsigned long key; 2900 unsigned long key;
2880 2901
2881 key = hash_long(ip, FTRACE_HASH_BITS); 2902 key = hash_long(ip, FTRACE_HASH_BITS);
@@ -2891,7 +2912,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
2891 * on the hash. rcu_read_lock is too dangerous here. 2912 * on the hash. rcu_read_lock is too dangerous here.
2892 */ 2913 */
2893 preempt_disable_notrace(); 2914 preempt_disable_notrace();
2894 hlist_for_each_entry_rcu(entry, n, hhd, node) { 2915 hlist_for_each_entry_rcu(entry, hhd, node) {
2895 if (entry->ip == ip) 2916 if (entry->ip == ip)
2896 entry->ops->func(ip, parent_ip, &entry->data); 2917 entry->ops->func(ip, parent_ip, &entry->data);
2897 } 2918 }
@@ -3042,7 +3063,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3042 void *data, int flags) 3063 void *data, int flags)
3043{ 3064{
3044 struct ftrace_func_probe *entry; 3065 struct ftrace_func_probe *entry;
3045 struct hlist_node *n, *tmp; 3066 struct hlist_node *tmp;
3046 char str[KSYM_SYMBOL_LEN]; 3067 char str[KSYM_SYMBOL_LEN];
3047 int type = MATCH_FULL; 3068 int type = MATCH_FULL;
3048 int i, len = 0; 3069 int i, len = 0;
@@ -3065,7 +3086,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3065 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { 3086 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
3066 struct hlist_head *hhd = &ftrace_func_hash[i]; 3087 struct hlist_head *hhd = &ftrace_func_hash[i];
3067 3088
3068 hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { 3089 hlist_for_each_entry_safe(entry, tmp, hhd, node) {
3069 3090
3070 /* break up if statements for readability */ 3091 /* break up if statements for readability */
3071 if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) 3092 if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
@@ -3082,8 +3103,8 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3082 continue; 3103 continue;
3083 } 3104 }
3084 3105
3085 hlist_del(&entry->node); 3106 hlist_del_rcu(&entry->node);
3086 call_rcu(&entry->rcu, ftrace_free_entry_rcu); 3107 call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu);
3087 } 3108 }
3088 } 3109 }
3089 __disable_ftrace_function_probe(); 3110 __disable_ftrace_function_probe();
@@ -3419,14 +3440,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
3419 3440
3420static int __init set_ftrace_notrace(char *str) 3441static int __init set_ftrace_notrace(char *str)
3421{ 3442{
3422 strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); 3443 strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
3423 return 1; 3444 return 1;
3424} 3445}
3425__setup("ftrace_notrace=", set_ftrace_notrace); 3446__setup("ftrace_notrace=", set_ftrace_notrace);
3426 3447
3427static int __init set_ftrace_filter(char *str) 3448static int __init set_ftrace_filter(char *str)
3428{ 3449{
3429 strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); 3450 strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
3430 return 1; 3451 return 1;
3431} 3452}
3432__setup("ftrace_filter=", set_ftrace_filter); 3453__setup("ftrace_filter=", set_ftrace_filter);
@@ -3549,7 +3570,7 @@ static const struct file_operations ftrace_filter_fops = {
3549 .open = ftrace_filter_open, 3570 .open = ftrace_filter_open,
3550 .read = seq_read, 3571 .read = seq_read,
3551 .write = ftrace_filter_write, 3572 .write = ftrace_filter_write,
3552 .llseek = ftrace_regex_lseek, 3573 .llseek = ftrace_filter_lseek,
3553 .release = ftrace_regex_release, 3574 .release = ftrace_regex_release,
3554}; 3575};
3555 3576
@@ -3557,7 +3578,7 @@ static const struct file_operations ftrace_notrace_fops = {
3557 .open = ftrace_notrace_open, 3578 .open = ftrace_notrace_open,
3558 .read = seq_read, 3579 .read = seq_read,
3559 .write = ftrace_notrace_write, 3580 .write = ftrace_notrace_write,
3560 .llseek = ftrace_regex_lseek, 3581 .llseek = ftrace_filter_lseek,
3561 .release = ftrace_regex_release, 3582 .release = ftrace_regex_release,
3562}; 3583};
3563 3584
@@ -3762,8 +3783,8 @@ static const struct file_operations ftrace_graph_fops = {
3762 .open = ftrace_graph_open, 3783 .open = ftrace_graph_open,
3763 .read = seq_read, 3784 .read = seq_read,
3764 .write = ftrace_graph_write, 3785 .write = ftrace_graph_write,
3786 .llseek = ftrace_filter_lseek,
3765 .release = ftrace_graph_release, 3787 .release = ftrace_graph_release,
3766 .llseek = seq_lseek,
3767}; 3788};
3768#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3789#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
3769 3790
@@ -3970,37 +3991,51 @@ static void ftrace_init_module(struct module *mod,
3970 ftrace_process_locs(mod, start, end); 3991 ftrace_process_locs(mod, start, end);
3971} 3992}
3972 3993
3973static int ftrace_module_notify(struct notifier_block *self, 3994static int ftrace_module_notify_enter(struct notifier_block *self,
3974 unsigned long val, void *data) 3995 unsigned long val, void *data)
3975{ 3996{
3976 struct module *mod = data; 3997 struct module *mod = data;
3977 3998
3978 switch (val) { 3999 if (val == MODULE_STATE_COMING)
3979 case MODULE_STATE_COMING:
3980 ftrace_init_module(mod, mod->ftrace_callsites, 4000 ftrace_init_module(mod, mod->ftrace_callsites,
3981 mod->ftrace_callsites + 4001 mod->ftrace_callsites +
3982 mod->num_ftrace_callsites); 4002 mod->num_ftrace_callsites);
3983 break; 4003 return 0;
3984 case MODULE_STATE_GOING: 4004}
4005
4006static int ftrace_module_notify_exit(struct notifier_block *self,
4007 unsigned long val, void *data)
4008{
4009 struct module *mod = data;
4010
4011 if (val == MODULE_STATE_GOING)
3985 ftrace_release_mod(mod); 4012 ftrace_release_mod(mod);
3986 break;
3987 }
3988 4013
3989 return 0; 4014 return 0;
3990} 4015}
3991#else 4016#else
3992static int ftrace_module_notify(struct notifier_block *self, 4017static int ftrace_module_notify_enter(struct notifier_block *self,
3993 unsigned long val, void *data) 4018 unsigned long val, void *data)
4019{
4020 return 0;
4021}
4022static int ftrace_module_notify_exit(struct notifier_block *self,
4023 unsigned long val, void *data)
3994{ 4024{
3995 return 0; 4025 return 0;
3996} 4026}
3997#endif /* CONFIG_MODULES */ 4027#endif /* CONFIG_MODULES */
3998 4028
3999struct notifier_block ftrace_module_nb = { 4029struct notifier_block ftrace_module_enter_nb = {
4000 .notifier_call = ftrace_module_notify, 4030 .notifier_call = ftrace_module_notify_enter,
4001 .priority = INT_MAX, /* Run before anything that can use kprobes */ 4031 .priority = INT_MAX, /* Run before anything that can use kprobes */
4002}; 4032};
4003 4033
4034struct notifier_block ftrace_module_exit_nb = {
4035 .notifier_call = ftrace_module_notify_exit,
4036 .priority = INT_MIN, /* Run after anything that can remove kprobes */
4037};
4038
4004extern unsigned long __start_mcount_loc[]; 4039extern unsigned long __start_mcount_loc[];
4005extern unsigned long __stop_mcount_loc[]; 4040extern unsigned long __stop_mcount_loc[];
4006 4041
@@ -4032,9 +4067,13 @@ void __init ftrace_init(void)
4032 __start_mcount_loc, 4067 __start_mcount_loc,
4033 __stop_mcount_loc); 4068 __stop_mcount_loc);
4034 4069
4035 ret = register_module_notifier(&ftrace_module_nb); 4070 ret = register_module_notifier(&ftrace_module_enter_nb);
4036 if (ret) 4071 if (ret)
4037 pr_warning("Failed to register trace ftrace module notifier\n"); 4072 pr_warning("Failed to register trace ftrace module enter notifier\n");
4073
4074 ret = register_module_notifier(&ftrace_module_exit_nb);
4075 if (ret)
4076 pr_warning("Failed to register trace ftrace module exit notifier\n");
4038 4077
4039 set_ftrace_early_filters(); 4078 set_ftrace_early_filters();
4040 4079
@@ -4090,14 +4129,12 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4090 */ 4129 */
4091 preempt_disable_notrace(); 4130 preempt_disable_notrace();
4092 trace_recursion_set(TRACE_CONTROL_BIT); 4131 trace_recursion_set(TRACE_CONTROL_BIT);
4093 op = rcu_dereference_raw(ftrace_control_list); 4132 do_for_each_ftrace_op(op, ftrace_control_list) {
4094 while (op != &ftrace_list_end) { 4133 if (!(op->flags & FTRACE_OPS_FL_STUB) &&
4095 if (!ftrace_function_local_disabled(op) && 4134 !ftrace_function_local_disabled(op) &&
4096 ftrace_ops_test(op, ip)) 4135 ftrace_ops_test(op, ip))
4097 op->func(ip, parent_ip, op, regs); 4136 op->func(ip, parent_ip, op, regs);
4098 4137 } while_for_each_ftrace_op(op);
4099 op = rcu_dereference_raw(op->next);
4100 };
4101 trace_recursion_clear(TRACE_CONTROL_BIT); 4138 trace_recursion_clear(TRACE_CONTROL_BIT);
4102 preempt_enable_notrace(); 4139 preempt_enable_notrace();
4103} 4140}
@@ -4112,27 +4149,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4112 struct ftrace_ops *ignored, struct pt_regs *regs) 4149 struct ftrace_ops *ignored, struct pt_regs *regs)
4113{ 4150{
4114 struct ftrace_ops *op; 4151 struct ftrace_ops *op;
4152 int bit;
4115 4153
4116 if (function_trace_stop) 4154 if (function_trace_stop)
4117 return; 4155 return;
4118 4156
4119 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) 4157 bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
4158 if (bit < 0)
4120 return; 4159 return;
4121 4160
4122 trace_recursion_set(TRACE_INTERNAL_BIT);
4123 /* 4161 /*
4124 * Some of the ops may be dynamically allocated, 4162 * Some of the ops may be dynamically allocated,
4125 * they must be freed after a synchronize_sched(). 4163 * they must be freed after a synchronize_sched().
4126 */ 4164 */
4127 preempt_disable_notrace(); 4165 preempt_disable_notrace();
4128 op = rcu_dereference_raw(ftrace_ops_list); 4166 do_for_each_ftrace_op(op, ftrace_ops_list) {
4129 while (op != &ftrace_list_end) {
4130 if (ftrace_ops_test(op, ip)) 4167 if (ftrace_ops_test(op, ip))
4131 op->func(ip, parent_ip, op, regs); 4168 op->func(ip, parent_ip, op, regs);
4132 op = rcu_dereference_raw(op->next); 4169 } while_for_each_ftrace_op(op);
4133 };
4134 preempt_enable_notrace(); 4170 preempt_enable_notrace();
4135 trace_recursion_clear(TRACE_INTERNAL_BIT); 4171 trace_clear_recursion(bit);
4136} 4172}
4137 4173
4138/* 4174/*
@@ -4143,8 +4179,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4143 * Archs are to support both the regs and ftrace_ops at the same time. 4179 * Archs are to support both the regs and ftrace_ops at the same time.
4144 * If they support ftrace_ops, it is assumed they support regs. 4180 * If they support ftrace_ops, it is assumed they support regs.
4145 * If call backs want to use regs, they must either check for regs 4181 * If call backs want to use regs, they must either check for regs
4146 * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. 4182 * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
4147 * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. 4183 * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
4148 * An architecture can pass partial regs with ftrace_ops and still 4184 * An architecture can pass partial regs with ftrace_ops and still
4149 * set the ARCH_SUPPORT_FTARCE_OPS. 4185 * set the ARCH_SUPPORT_FTARCE_OPS.
4150 */ 4186 */
@@ -4403,7 +4439,7 @@ static const struct file_operations ftrace_pid_fops = {
4403 .open = ftrace_pid_open, 4439 .open = ftrace_pid_open,
4404 .write = ftrace_pid_write, 4440 .write = ftrace_pid_write,
4405 .read = seq_read, 4441 .read = seq_read,
4406 .llseek = seq_lseek, 4442 .llseek = ftrace_filter_lseek,
4407 .release = ftrace_pid_release, 4443 .release = ftrace_pid_release,
4408}; 4444};
4409 4445
@@ -4519,12 +4555,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
4519 ftrace_startup_sysctl(); 4555 ftrace_startup_sysctl();
4520 4556
4521 /* we are starting ftrace again */ 4557 /* we are starting ftrace again */
4522 if (ftrace_ops_list != &ftrace_list_end) { 4558 if (ftrace_ops_list != &ftrace_list_end)
4523 if (ftrace_ops_list->next == &ftrace_list_end) 4559 update_ftrace_function();
4524 ftrace_trace_function = ftrace_ops_list->func;
4525 else
4526 ftrace_trace_function = ftrace_ops_list_func;
4527 }
4528 4560
4529 } else { 4561 } else {
4530 /* stopping ftrace calls (just send to ftrace_stub) */ 4562 /* stopping ftrace calls (just send to ftrace_stub) */
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index f55fcf61b223..1c71382b283d 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -13,8 +13,5 @@
13#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
14#include <trace/events/power.h> 14#include <trace/events/power.h>
15 15
16#ifdef EVENT_POWER_TRACING_DEPRECATED
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18#endif
19EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); 16EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
20 17
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ce8514feedcd..6989df2ba194 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
3 * 3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ftrace_event.h>
6#include <linux/ring_buffer.h> 7#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/debugfs.h> 11#include <linux/debugfs.h>
10#include <linux/uaccess.h> 12#include <linux/uaccess.h>
@@ -21,7 +23,6 @@
21#include <linux/fs.h> 23#include <linux/fs.h>
22 24
23#include <asm/local.h> 25#include <asm/local.h>
24#include "trace.h"
25 26
26static void update_pages_handler(struct work_struct *work); 27static void update_pages_handler(struct work_struct *work);
27 28
@@ -177,7 +178,7 @@ void tracing_off_permanent(void)
177#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 178#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
178#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 179#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
179 180
180#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 181#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
181# define RB_FORCE_8BYTE_ALIGNMENT 0 182# define RB_FORCE_8BYTE_ALIGNMENT 0
182# define RB_ARCH_ALIGNMENT RB_ALIGNMENT 183# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
183#else 184#else
@@ -185,6 +186,8 @@ void tracing_off_permanent(void)
185# define RB_ARCH_ALIGNMENT 8U 186# define RB_ARCH_ALIGNMENT 8U
186#endif 187#endif
187 188
189#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
190
188/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 191/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
189#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 192#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
190 193
@@ -333,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
333struct buffer_data_page { 336struct buffer_data_page {
334 u64 time_stamp; /* page time stamp */ 337 u64 time_stamp; /* page time stamp */
335 local_t commit; /* write committed index */ 338 local_t commit; /* write committed index */
336 unsigned char data[]; /* data of buffer page */ 339 unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
337}; 340};
338 341
339/* 342/*
@@ -2432,41 +2435,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2432 2435
2433#ifdef CONFIG_TRACING 2436#ifdef CONFIG_TRACING
2434 2437
2435#define TRACE_RECURSIVE_DEPTH 16 2438/*
2439 * The lock and unlock are done within a preempt disable section.
2440 * The current_context per_cpu variable can only be modified
2441 * by the current task between lock and unlock. But it can
2442 * be modified more than once via an interrupt. To pass this
2443 * information from the lock to the unlock without having to
2444 * access the 'in_interrupt()' functions again (which do show
2445 * a bit of overhead in something as critical as function tracing,
2446 * we use a bitmask trick.
2447 *
2448 * bit 0 = NMI context
2449 * bit 1 = IRQ context
2450 * bit 2 = SoftIRQ context
2451 * bit 3 = normal context.
2452 *
2453 * This works because this is the order of contexts that can
2454 * preempt other contexts. A SoftIRQ never preempts an IRQ
2455 * context.
2456 *
2457 * When the context is determined, the corresponding bit is
2458 * checked and set (if it was set, then a recursion of that context
2459 * happened).
2460 *
2461 * On unlock, we need to clear this bit. To do so, just subtract
2462 * 1 from the current_context and AND it to itself.
2463 *
2464 * (binary)
2465 * 101 - 1 = 100
2466 * 101 & 100 = 100 (clearing bit zero)
2467 *
2468 * 1010 - 1 = 1001
2469 * 1010 & 1001 = 1000 (clearing bit 1)
2470 *
2471 * The least significant bit can be cleared this way, and it
2472 * just so happens that it is the same bit corresponding to
2473 * the current context.
2474 */
2475static DEFINE_PER_CPU(unsigned int, current_context);
2436 2476
2437/* Keep this code out of the fast path cache */ 2477static __always_inline int trace_recursive_lock(void)
2438static noinline void trace_recursive_fail(void)
2439{ 2478{
2440 /* Disable all tracing before we do anything else */ 2479 unsigned int val = this_cpu_read(current_context);
2441 tracing_off_permanent(); 2480 int bit;
2442
2443 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2444 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2445 trace_recursion_buffer(),
2446 hardirq_count() >> HARDIRQ_SHIFT,
2447 softirq_count() >> SOFTIRQ_SHIFT,
2448 in_nmi());
2449
2450 WARN_ON_ONCE(1);
2451}
2452 2481
2453static inline int trace_recursive_lock(void) 2482 if (in_interrupt()) {
2454{ 2483 if (in_nmi())
2455 trace_recursion_inc(); 2484 bit = 0;
2485 else if (in_irq())
2486 bit = 1;
2487 else
2488 bit = 2;
2489 } else
2490 bit = 3;
2456 2491
2457 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) 2492 if (unlikely(val & (1 << bit)))
2458 return 0; 2493 return 1;
2459 2494
2460 trace_recursive_fail(); 2495 val |= (1 << bit);
2496 this_cpu_write(current_context, val);
2461 2497
2462 return -1; 2498 return 0;
2463} 2499}
2464 2500
2465static inline void trace_recursive_unlock(void) 2501static __always_inline void trace_recursive_unlock(void)
2466{ 2502{
2467 WARN_ON_ONCE(!trace_recursion_buffer()); 2503 unsigned int val = this_cpu_read(current_context);
2468 2504
2469 trace_recursion_dec(); 2505 val--;
2506 val &= this_cpu_read(current_context);
2507 this_cpu_write(current_context, val);
2470} 2508}
2471 2509
2472#else 2510#else
@@ -3067,6 +3105,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3067EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); 3105EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3068 3106
3069/** 3107/**
3108 * ring_buffer_read_events_cpu - get the number of events successfully read
3109 * @buffer: The ring buffer
3110 * @cpu: The per CPU buffer to get the number of events read
3111 */
3112unsigned long
3113ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3114{
3115 struct ring_buffer_per_cpu *cpu_buffer;
3116
3117 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3118 return 0;
3119
3120 cpu_buffer = buffer->buffers[cpu];
3121 return cpu_buffer->read;
3122}
3123EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3124
3125/**
3070 * ring_buffer_entries - get the number of entries in a buffer 3126 * ring_buffer_entries - get the number of entries in a buffer
3071 * @buffer: The ring buffer 3127 * @buffer: The ring buffer
3072 * 3128 *
@@ -3425,7 +3481,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
3425 /* check for end of page padding */ 3481 /* check for end of page padding */
3426 if ((iter->head >= rb_page_size(iter->head_page)) && 3482 if ((iter->head >= rb_page_size(iter->head_page)) &&
3427 (iter->head_page != cpu_buffer->commit_page)) 3483 (iter->head_page != cpu_buffer->commit_page))
3428 rb_advance_iter(iter); 3484 rb_inc_iter(iter);
3429} 3485}
3430 3486
3431static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) 3487static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46d7d24..66338c4f7f4b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
39#include <linux/poll.h> 39#include <linux/poll.h>
40#include <linux/nmi.h> 40#include <linux/nmi.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/sched/rt.h>
42 43
43#include "trace.h" 44#include "trace.h"
44#include "trace_output.h" 45#include "trace_output.h"
@@ -131,7 +132,7 @@ static char *default_bootup_tracer;
131 132
132static int __init set_cmdline_ftrace(char *str) 133static int __init set_cmdline_ftrace(char *str)
133{ 134{
134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 135 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 136 default_bootup_tracer = bootup_tracer_buf;
136 /* We are using ftrace early, expand it */ 137 /* We are using ftrace early, expand it */
137 ring_buffer_expanded = 1; 138 ring_buffer_expanded = 1;
@@ -161,7 +162,7 @@ static char *trace_boot_options __initdata;
161 162
162static int __init set_trace_boot_options(char *str) 163static int __init set_trace_boot_options(char *str)
163{ 164{
164 strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 165 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
165 trace_boot_options = trace_boot_options_buf; 166 trace_boot_options = trace_boot_options_buf;
166 return 0; 167 return 0;
167} 168}
@@ -249,7 +250,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
249static struct tracer *trace_types __read_mostly; 250static struct tracer *trace_types __read_mostly;
250 251
251/* current_trace points to the tracer that is currently active */ 252/* current_trace points to the tracer that is currently active */
252static struct tracer *current_trace __read_mostly; 253static struct tracer *current_trace __read_mostly = &nop_trace;
253 254
254/* 255/*
255 * trace_types_lock is used to protect the trace_types list. 256 * trace_types_lock is used to protect the trace_types list.
@@ -703,18 +704,22 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
703void 704void
704update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 705update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
705{ 706{
706 struct ring_buffer *buf = tr->buffer; 707 struct ring_buffer *buf;
707 708
708 if (trace_stop_count) 709 if (trace_stop_count)
709 return; 710 return;
710 711
711 WARN_ON_ONCE(!irqs_disabled()); 712 WARN_ON_ONCE(!irqs_disabled());
712 if (!current_trace->use_max_tr) { 713
713 WARN_ON_ONCE(1); 714 if (!current_trace->allocated_snapshot) {
715 /* Only the nop tracer should hit this when disabling */
716 WARN_ON_ONCE(current_trace != &nop_trace);
714 return; 717 return;
715 } 718 }
719
716 arch_spin_lock(&ftrace_max_lock); 720 arch_spin_lock(&ftrace_max_lock);
717 721
722 buf = tr->buffer;
718 tr->buffer = max_tr.buffer; 723 tr->buffer = max_tr.buffer;
719 max_tr.buffer = buf; 724 max_tr.buffer = buf;
720 725
@@ -739,8 +744,9 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
739 return; 744 return;
740 745
741 WARN_ON_ONCE(!irqs_disabled()); 746 WARN_ON_ONCE(!irqs_disabled());
742 if (!current_trace->use_max_tr) { 747 if (!current_trace->allocated_snapshot) {
743 WARN_ON_ONCE(1); 748 /* Only the nop tracer should hit this when disabling */
749 WARN_ON_ONCE(current_trace != &nop_trace);
744 return; 750 return;
745 } 751 }
746 752
@@ -862,10 +868,13 @@ int register_tracer(struct tracer *type)
862 868
863 current_trace = type; 869 current_trace = type;
864 870
865 /* If we expanded the buffers, make sure the max is expanded too */ 871 if (type->use_max_tr) {
866 if (ring_buffer_expanded && type->use_max_tr) 872 /* If we expanded the buffers, make sure the max is expanded too */
867 ring_buffer_resize(max_tr.buffer, trace_buf_size, 873 if (ring_buffer_expanded)
868 RING_BUFFER_ALL_CPUS); 874 ring_buffer_resize(max_tr.buffer, trace_buf_size,
875 RING_BUFFER_ALL_CPUS);
876 type->allocated_snapshot = true;
877 }
869 878
870 /* the test is responsible for initializing and enabling */ 879 /* the test is responsible for initializing and enabling */
871 pr_info("Testing tracer %s: ", type->name); 880 pr_info("Testing tracer %s: ", type->name);
@@ -881,10 +890,14 @@ int register_tracer(struct tracer *type)
881 /* Only reset on passing, to avoid touching corrupted buffers */ 890 /* Only reset on passing, to avoid touching corrupted buffers */
882 tracing_reset_online_cpus(tr); 891 tracing_reset_online_cpus(tr);
883 892
884 /* Shrink the max buffer again */ 893 if (type->use_max_tr) {
885 if (ring_buffer_expanded && type->use_max_tr) 894 type->allocated_snapshot = false;
886 ring_buffer_resize(max_tr.buffer, 1, 895
887 RING_BUFFER_ALL_CPUS); 896 /* Shrink the max buffer again */
897 if (ring_buffer_expanded)
898 ring_buffer_resize(max_tr.buffer, 1,
899 RING_BUFFER_ALL_CPUS);
900 }
888 901
889 printk(KERN_CONT "PASSED\n"); 902 printk(KERN_CONT "PASSED\n");
890 } 903 }
@@ -922,6 +935,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
922{ 935{
923 struct ring_buffer *buffer = tr->buffer; 936 struct ring_buffer *buffer = tr->buffer;
924 937
938 if (!buffer)
939 return;
940
925 ring_buffer_record_disable(buffer); 941 ring_buffer_record_disable(buffer);
926 942
927 /* Make sure all commits have finished */ 943 /* Make sure all commits have finished */
@@ -936,6 +952,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
936 struct ring_buffer *buffer = tr->buffer; 952 struct ring_buffer *buffer = tr->buffer;
937 int cpu; 953 int cpu;
938 954
955 if (!buffer)
956 return;
957
939 ring_buffer_record_disable(buffer); 958 ring_buffer_record_disable(buffer);
940 959
941 /* Make sure all commits have finished */ 960 /* Make sure all commits have finished */
@@ -1167,7 +1186,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1167 1186
1168 entry->preempt_count = pc & 0xff; 1187 entry->preempt_count = pc & 0xff;
1169 entry->pid = (tsk) ? tsk->pid : 0; 1188 entry->pid = (tsk) ? tsk->pid : 0;
1170 entry->padding = 0;
1171 entry->flags = 1189 entry->flags =
1172#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1190#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1173 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1191 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1335,7 +1353,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1335 */ 1353 */
1336 preempt_disable_notrace(); 1354 preempt_disable_notrace();
1337 1355
1338 use_stack = ++__get_cpu_var(ftrace_stack_reserve); 1356 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1339 /* 1357 /*
1340 * We don't need any atomic variables, just a barrier. 1358 * We don't need any atomic variables, just a barrier.
1341 * If an interrupt comes in, we don't care, because it would 1359 * If an interrupt comes in, we don't care, because it would
@@ -1389,7 +1407,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1389 out: 1407 out:
1390 /* Again, don't let gcc optimize things here */ 1408 /* Again, don't let gcc optimize things here */
1391 barrier(); 1409 barrier();
1392 __get_cpu_var(ftrace_stack_reserve)--; 1410 __this_cpu_dec(ftrace_stack_reserve);
1393 preempt_enable_notrace(); 1411 preempt_enable_notrace();
1394 1412
1395} 1413}
@@ -1517,7 +1535,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1517static char *get_trace_buf(void) 1535static char *get_trace_buf(void)
1518{ 1536{
1519 struct trace_buffer_struct *percpu_buffer; 1537 struct trace_buffer_struct *percpu_buffer;
1520 struct trace_buffer_struct *buffer;
1521 1538
1522 /* 1539 /*
1523 * If we have allocated per cpu buffers, then we do not 1540 * If we have allocated per cpu buffers, then we do not
@@ -1535,9 +1552,7 @@ static char *get_trace_buf(void)
1535 if (!percpu_buffer) 1552 if (!percpu_buffer)
1536 return NULL; 1553 return NULL;
1537 1554
1538 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); 1555 return this_cpu_ptr(&percpu_buffer->buffer[0]);
1539
1540 return buffer->buffer;
1541} 1556}
1542 1557
1543static int alloc_percpu_trace_buffer(void) 1558static int alloc_percpu_trace_buffer(void)
@@ -1942,21 +1957,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1942static void *s_start(struct seq_file *m, loff_t *pos) 1957static void *s_start(struct seq_file *m, loff_t *pos)
1943{ 1958{
1944 struct trace_iterator *iter = m->private; 1959 struct trace_iterator *iter = m->private;
1945 static struct tracer *old_tracer;
1946 int cpu_file = iter->cpu_file; 1960 int cpu_file = iter->cpu_file;
1947 void *p = NULL; 1961 void *p = NULL;
1948 loff_t l = 0; 1962 loff_t l = 0;
1949 int cpu; 1963 int cpu;
1950 1964
1951 /* copy the tracer to avoid using a global lock all around */ 1965 /*
1966 * copy the tracer to avoid using a global lock all around.
1967 * iter->trace is a copy of current_trace, the pointer to the
1968 * name may be used instead of a strcmp(), as iter->trace->name
1969 * will point to the same string as current_trace->name.
1970 */
1952 mutex_lock(&trace_types_lock); 1971 mutex_lock(&trace_types_lock);
1953 if (unlikely(old_tracer != current_trace && current_trace)) { 1972 if (unlikely(current_trace && iter->trace->name != current_trace->name))
1954 old_tracer = current_trace;
1955 *iter->trace = *current_trace; 1973 *iter->trace = *current_trace;
1956 }
1957 mutex_unlock(&trace_types_lock); 1974 mutex_unlock(&trace_types_lock);
1958 1975
1959 atomic_inc(&trace_record_cmdline_disabled); 1976 if (iter->snapshot && iter->trace->use_max_tr)
1977 return ERR_PTR(-EBUSY);
1978
1979 if (!iter->snapshot)
1980 atomic_inc(&trace_record_cmdline_disabled);
1960 1981
1961 if (*pos != iter->pos) { 1982 if (*pos != iter->pos) {
1962 iter->ent = NULL; 1983 iter->ent = NULL;
@@ -1995,7 +2016,11 @@ static void s_stop(struct seq_file *m, void *p)
1995{ 2016{
1996 struct trace_iterator *iter = m->private; 2017 struct trace_iterator *iter = m->private;
1997 2018
1998 atomic_dec(&trace_record_cmdline_disabled); 2019 if (iter->snapshot && iter->trace->use_max_tr)
2020 return;
2021
2022 if (!iter->snapshot)
2023 atomic_dec(&trace_record_cmdline_disabled);
1999 trace_access_unlock(iter->cpu_file); 2024 trace_access_unlock(iter->cpu_file);
2000 trace_event_read_unlock(); 2025 trace_event_read_unlock();
2001} 2026}
@@ -2080,8 +2105,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2080 unsigned long total; 2105 unsigned long total;
2081 const char *name = "preemption"; 2106 const char *name = "preemption";
2082 2107
2083 if (type) 2108 name = type->name;
2084 name = type->name;
2085 2109
2086 get_total_entries(tr, &total, &entries); 2110 get_total_entries(tr, &total, &entries);
2087 2111
@@ -2380,6 +2404,27 @@ static void test_ftrace_alive(struct seq_file *m)
2380 seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); 2404 seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
2381} 2405}
2382 2406
2407#ifdef CONFIG_TRACER_MAX_TRACE
2408static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2409{
2410 if (iter->trace->allocated_snapshot)
2411 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2412 else
2413 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2414
2415 seq_printf(m, "# Snapshot commands:\n");
2416 seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2417 seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2418 seq_printf(m, "# Takes a snapshot of the main buffer.\n");
2419 seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
2420 seq_printf(m, "# (Doesn't have to be '2' works with any number that\n");
2421 seq_printf(m, "# is not a '0' or '1')\n");
2422}
2423#else
2424/* Should never be called */
2425static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2426#endif
2427
2383static int s_show(struct seq_file *m, void *v) 2428static int s_show(struct seq_file *m, void *v)
2384{ 2429{
2385 struct trace_iterator *iter = v; 2430 struct trace_iterator *iter = v;
@@ -2391,7 +2436,9 @@ static int s_show(struct seq_file *m, void *v)
2391 seq_puts(m, "#\n"); 2436 seq_puts(m, "#\n");
2392 test_ftrace_alive(m); 2437 test_ftrace_alive(m);
2393 } 2438 }
2394 if (iter->trace && iter->trace->print_header) 2439 if (iter->snapshot && trace_empty(iter))
2440 print_snapshot_help(m, iter);
2441 else if (iter->trace && iter->trace->print_header)
2395 iter->trace->print_header(m); 2442 iter->trace->print_header(m);
2396 else 2443 else
2397 trace_default_header(m); 2444 trace_default_header(m);
@@ -2430,7 +2477,7 @@ static const struct seq_operations tracer_seq_ops = {
2430}; 2477};
2431 2478
2432static struct trace_iterator * 2479static struct trace_iterator *
2433__tracing_open(struct inode *inode, struct file *file) 2480__tracing_open(struct inode *inode, struct file *file, bool snapshot)
2434{ 2481{
2435 long cpu_file = (long) inode->i_private; 2482 long cpu_file = (long) inode->i_private;
2436 struct trace_iterator *iter; 2483 struct trace_iterator *iter;
@@ -2457,16 +2504,16 @@ __tracing_open(struct inode *inode, struct file *file)
2457 if (!iter->trace) 2504 if (!iter->trace)
2458 goto fail; 2505 goto fail;
2459 2506
2460 if (current_trace) 2507 *iter->trace = *current_trace;
2461 *iter->trace = *current_trace;
2462 2508
2463 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 2509 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2464 goto fail; 2510 goto fail;
2465 2511
2466 if (current_trace && current_trace->print_max) 2512 if (current_trace->print_max || snapshot)
2467 iter->tr = &max_tr; 2513 iter->tr = &max_tr;
2468 else 2514 else
2469 iter->tr = &global_trace; 2515 iter->tr = &global_trace;
2516 iter->snapshot = snapshot;
2470 iter->pos = -1; 2517 iter->pos = -1;
2471 mutex_init(&iter->mutex); 2518 mutex_init(&iter->mutex);
2472 iter->cpu_file = cpu_file; 2519 iter->cpu_file = cpu_file;
@@ -2483,8 +2530,9 @@ __tracing_open(struct inode *inode, struct file *file)
2483 if (trace_clocks[trace_clock_id].in_ns) 2530 if (trace_clocks[trace_clock_id].in_ns)
2484 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 2531 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2485 2532
2486 /* stop the trace while dumping */ 2533 /* stop the trace while dumping if we are not opening "snapshot" */
2487 tracing_stop(); 2534 if (!iter->snapshot)
2535 tracing_stop();
2488 2536
2489 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2537 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2490 for_each_tracing_cpu(cpu) { 2538 for_each_tracing_cpu(cpu) {
@@ -2547,8 +2595,9 @@ static int tracing_release(struct inode *inode, struct file *file)
2547 if (iter->trace && iter->trace->close) 2595 if (iter->trace && iter->trace->close)
2548 iter->trace->close(iter); 2596 iter->trace->close(iter);
2549 2597
2550 /* reenable tracing if it was previously enabled */ 2598 if (!iter->snapshot)
2551 tracing_start(); 2599 /* reenable tracing if it was previously enabled */
2600 tracing_start();
2552 mutex_unlock(&trace_types_lock); 2601 mutex_unlock(&trace_types_lock);
2553 2602
2554 mutex_destroy(&iter->mutex); 2603 mutex_destroy(&iter->mutex);
@@ -2576,7 +2625,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2576 } 2625 }
2577 2626
2578 if (file->f_mode & FMODE_READ) { 2627 if (file->f_mode & FMODE_READ) {
2579 iter = __tracing_open(inode, file); 2628 iter = __tracing_open(inode, file, false);
2580 if (IS_ERR(iter)) 2629 if (IS_ERR(iter))
2581 ret = PTR_ERR(iter); 2630 ret = PTR_ERR(iter);
2582 else if (trace_flags & TRACE_ITER_LATENCY_FMT) 2631 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -2835,11 +2884,25 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2835 return -EINVAL; 2884 return -EINVAL;
2836} 2885}
2837 2886
2838static void set_tracer_flags(unsigned int mask, int enabled) 2887/* Some tracers require overwrite to stay enabled */
2888int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
2889{
2890 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
2891 return -1;
2892
2893 return 0;
2894}
2895
2896int set_tracer_flag(unsigned int mask, int enabled)
2839{ 2897{
2840 /* do nothing if flag is already set */ 2898 /* do nothing if flag is already set */
2841 if (!!(trace_flags & mask) == !!enabled) 2899 if (!!(trace_flags & mask) == !!enabled)
2842 return; 2900 return 0;
2901
2902 /* Give the tracer a chance to approve the change */
2903 if (current_trace->flag_changed)
2904 if (current_trace->flag_changed(current_trace, mask, !!enabled))
2905 return -EINVAL;
2843 2906
2844 if (enabled) 2907 if (enabled)
2845 trace_flags |= mask; 2908 trace_flags |= mask;
@@ -2849,18 +2912,24 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2849 if (mask == TRACE_ITER_RECORD_CMD) 2912 if (mask == TRACE_ITER_RECORD_CMD)
2850 trace_event_enable_cmd_record(enabled); 2913 trace_event_enable_cmd_record(enabled);
2851 2914
2852 if (mask == TRACE_ITER_OVERWRITE) 2915 if (mask == TRACE_ITER_OVERWRITE) {
2853 ring_buffer_change_overwrite(global_trace.buffer, enabled); 2916 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2917#ifdef CONFIG_TRACER_MAX_TRACE
2918 ring_buffer_change_overwrite(max_tr.buffer, enabled);
2919#endif
2920 }
2854 2921
2855 if (mask == TRACE_ITER_PRINTK) 2922 if (mask == TRACE_ITER_PRINTK)
2856 trace_printk_start_stop_comm(enabled); 2923 trace_printk_start_stop_comm(enabled);
2924
2925 return 0;
2857} 2926}
2858 2927
2859static int trace_set_options(char *option) 2928static int trace_set_options(char *option)
2860{ 2929{
2861 char *cmp; 2930 char *cmp;
2862 int neg = 0; 2931 int neg = 0;
2863 int ret = 0; 2932 int ret = -ENODEV;
2864 int i; 2933 int i;
2865 2934
2866 cmp = strstrip(option); 2935 cmp = strstrip(option);
@@ -2870,19 +2939,20 @@ static int trace_set_options(char *option)
2870 cmp += 2; 2939 cmp += 2;
2871 } 2940 }
2872 2941
2942 mutex_lock(&trace_types_lock);
2943
2873 for (i = 0; trace_options[i]; i++) { 2944 for (i = 0; trace_options[i]; i++) {
2874 if (strcmp(cmp, trace_options[i]) == 0) { 2945 if (strcmp(cmp, trace_options[i]) == 0) {
2875 set_tracer_flags(1 << i, !neg); 2946 ret = set_tracer_flag(1 << i, !neg);
2876 break; 2947 break;
2877 } 2948 }
2878 } 2949 }
2879 2950
2880 /* If no option could be set, test the specific tracer options */ 2951 /* If no option could be set, test the specific tracer options */
2881 if (!trace_options[i]) { 2952 if (!trace_options[i])
2882 mutex_lock(&trace_types_lock);
2883 ret = set_tracer_option(current_trace, cmp, neg); 2953 ret = set_tracer_option(current_trace, cmp, neg);
2884 mutex_unlock(&trace_types_lock); 2954
2885 } 2955 mutex_unlock(&trace_types_lock);
2886 2956
2887 return ret; 2957 return ret;
2888} 2958}
@@ -2892,6 +2962,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2892 size_t cnt, loff_t *ppos) 2962 size_t cnt, loff_t *ppos)
2893{ 2963{
2894 char buf[64]; 2964 char buf[64];
2965 int ret;
2895 2966
2896 if (cnt >= sizeof(buf)) 2967 if (cnt >= sizeof(buf))
2897 return -EINVAL; 2968 return -EINVAL;
@@ -2901,7 +2972,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2901 2972
2902 buf[cnt] = 0; 2973 buf[cnt] = 0;
2903 2974
2904 trace_set_options(buf); 2975 ret = trace_set_options(buf);
2976 if (ret < 0)
2977 return ret;
2905 2978
2906 *ppos += cnt; 2979 *ppos += cnt;
2907 2980
@@ -3014,10 +3087,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
3014 int r; 3087 int r;
3015 3088
3016 mutex_lock(&trace_types_lock); 3089 mutex_lock(&trace_types_lock);
3017 if (current_trace) 3090 r = sprintf(buf, "%s\n", current_trace->name);
3018 r = sprintf(buf, "%s\n", current_trace->name);
3019 else
3020 r = sprintf(buf, "\n");
3021 mutex_unlock(&trace_types_lock); 3091 mutex_unlock(&trace_types_lock);
3022 3092
3023 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3093 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -3183,6 +3253,7 @@ static int tracing_set_tracer(const char *buf)
3183 static struct trace_option_dentry *topts; 3253 static struct trace_option_dentry *topts;
3184 struct trace_array *tr = &global_trace; 3254 struct trace_array *tr = &global_trace;
3185 struct tracer *t; 3255 struct tracer *t;
3256 bool had_max_tr;
3186 int ret = 0; 3257 int ret = 0;
3187 3258
3188 mutex_lock(&trace_types_lock); 3259 mutex_lock(&trace_types_lock);
@@ -3207,9 +3278,24 @@ static int tracing_set_tracer(const char *buf)
3207 goto out; 3278 goto out;
3208 3279
3209 trace_branch_disable(); 3280 trace_branch_disable();
3210 if (current_trace && current_trace->reset) 3281
3282 current_trace->enabled = false;
3283
3284 if (current_trace->reset)
3211 current_trace->reset(tr); 3285 current_trace->reset(tr);
3212 if (current_trace && current_trace->use_max_tr) { 3286
3287 had_max_tr = current_trace->allocated_snapshot;
3288 current_trace = &nop_trace;
3289
3290 if (had_max_tr && !t->use_max_tr) {
3291 /*
3292 * We need to make sure that the update_max_tr sees that
3293 * current_trace changed to nop_trace to keep it from
3294 * swapping the buffers after we resize it.
3295 * The update_max_tr is called from interrupts disabled
3296 * so a synchronized_sched() is sufficient.
3297 */
3298 synchronize_sched();
3213 /* 3299 /*
3214 * We don't free the ring buffer. instead, resize it because 3300 * We don't free the ring buffer. instead, resize it because
3215 * The max_tr ring buffer has some state (e.g. ring->clock) and 3301 * The max_tr ring buffer has some state (e.g. ring->clock) and
@@ -3217,18 +3303,19 @@ static int tracing_set_tracer(const char *buf)
3217 */ 3303 */
3218 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); 3304 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
3219 set_buffer_entries(&max_tr, 1); 3305 set_buffer_entries(&max_tr, 1);
3306 tracing_reset_online_cpus(&max_tr);
3307 current_trace->allocated_snapshot = false;
3220 } 3308 }
3221 destroy_trace_option_files(topts); 3309 destroy_trace_option_files(topts);
3222 3310
3223 current_trace = &nop_trace;
3224
3225 topts = create_trace_option_files(t); 3311 topts = create_trace_option_files(t);
3226 if (t->use_max_tr) { 3312 if (t->use_max_tr && !had_max_tr) {
3227 /* we need to make per cpu buffer sizes equivalent */ 3313 /* we need to make per cpu buffer sizes equivalent */
3228 ret = resize_buffer_duplicate_size(&max_tr, &global_trace, 3314 ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
3229 RING_BUFFER_ALL_CPUS); 3315 RING_BUFFER_ALL_CPUS);
3230 if (ret < 0) 3316 if (ret < 0)
3231 goto out; 3317 goto out;
3318 t->allocated_snapshot = true;
3232 } 3319 }
3233 3320
3234 if (t->init) { 3321 if (t->init) {
@@ -3238,6 +3325,7 @@ static int tracing_set_tracer(const char *buf)
3238 } 3325 }
3239 3326
3240 current_trace = t; 3327 current_trace = t;
3328 current_trace->enabled = true;
3241 trace_branch_enable(tr); 3329 trace_branch_enable(tr);
3242 out: 3330 out:
3243 mutex_unlock(&trace_types_lock); 3331 mutex_unlock(&trace_types_lock);
@@ -3336,8 +3424,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3336 ret = -ENOMEM; 3424 ret = -ENOMEM;
3337 goto fail; 3425 goto fail;
3338 } 3426 }
3339 if (current_trace) 3427 *iter->trace = *current_trace;
3340 *iter->trace = *current_trace;
3341 3428
3342 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 3429 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3343 ret = -ENOMEM; 3430 ret = -ENOMEM;
@@ -3477,7 +3564,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
3477 size_t cnt, loff_t *ppos) 3564 size_t cnt, loff_t *ppos)
3478{ 3565{
3479 struct trace_iterator *iter = filp->private_data; 3566 struct trace_iterator *iter = filp->private_data;
3480 static struct tracer *old_tracer;
3481 ssize_t sret; 3567 ssize_t sret;
3482 3568
3483 /* return any leftover data */ 3569 /* return any leftover data */
@@ -3489,10 +3575,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
3489 3575
3490 /* copy the tracer to avoid using a global lock all around */ 3576 /* copy the tracer to avoid using a global lock all around */
3491 mutex_lock(&trace_types_lock); 3577 mutex_lock(&trace_types_lock);
3492 if (unlikely(old_tracer != current_trace && current_trace)) { 3578 if (unlikely(iter->trace->name != current_trace->name))
3493 old_tracer = current_trace;
3494 *iter->trace = *current_trace; 3579 *iter->trace = *current_trace;
3495 }
3496 mutex_unlock(&trace_types_lock); 3580 mutex_unlock(&trace_types_lock);
3497 3581
3498 /* 3582 /*
@@ -3648,7 +3732,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3648 .ops = &tracing_pipe_buf_ops, 3732 .ops = &tracing_pipe_buf_ops,
3649 .spd_release = tracing_spd_release_pipe, 3733 .spd_release = tracing_spd_release_pipe,
3650 }; 3734 };
3651 static struct tracer *old_tracer;
3652 ssize_t ret; 3735 ssize_t ret;
3653 size_t rem; 3736 size_t rem;
3654 unsigned int i; 3737 unsigned int i;
@@ -3658,10 +3741,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3658 3741
3659 /* copy the tracer to avoid using a global lock all around */ 3742 /* copy the tracer to avoid using a global lock all around */
3660 mutex_lock(&trace_types_lock); 3743 mutex_lock(&trace_types_lock);
3661 if (unlikely(old_tracer != current_trace && current_trace)) { 3744 if (unlikely(iter->trace->name != current_trace->name))
3662 old_tracer = current_trace;
3663 *iter->trace = *current_trace; 3745 *iter->trace = *current_trace;
3664 }
3665 mutex_unlock(&trace_types_lock); 3746 mutex_unlock(&trace_types_lock);
3666 3747
3667 mutex_lock(&iter->mutex); 3748 mutex_lock(&iter->mutex);
@@ -4037,8 +4118,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4037 * Reset the buffer so that it doesn't have incomparable timestamps. 4118 * Reset the buffer so that it doesn't have incomparable timestamps.
4038 */ 4119 */
4039 tracing_reset_online_cpus(&global_trace); 4120 tracing_reset_online_cpus(&global_trace);
4040 if (max_tr.buffer) 4121 tracing_reset_online_cpus(&max_tr);
4041 tracing_reset_online_cpus(&max_tr);
4042 4122
4043 mutex_unlock(&trace_types_lock); 4123 mutex_unlock(&trace_types_lock);
4044 4124
@@ -4054,6 +4134,85 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
4054 return single_open(file, tracing_clock_show, NULL); 4134 return single_open(file, tracing_clock_show, NULL);
4055} 4135}
4056 4136
4137#ifdef CONFIG_TRACER_SNAPSHOT
4138static int tracing_snapshot_open(struct inode *inode, struct file *file)
4139{
4140 struct trace_iterator *iter;
4141 int ret = 0;
4142
4143 if (file->f_mode & FMODE_READ) {
4144 iter = __tracing_open(inode, file, true);
4145 if (IS_ERR(iter))
4146 ret = PTR_ERR(iter);
4147 }
4148 return ret;
4149}
4150
4151static ssize_t
4152tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4153 loff_t *ppos)
4154{
4155 unsigned long val;
4156 int ret;
4157
4158 ret = tracing_update_buffers();
4159 if (ret < 0)
4160 return ret;
4161
4162 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4163 if (ret)
4164 return ret;
4165
4166 mutex_lock(&trace_types_lock);
4167
4168 if (current_trace->use_max_tr) {
4169 ret = -EBUSY;
4170 goto out;
4171 }
4172
4173 switch (val) {
4174 case 0:
4175 if (current_trace->allocated_snapshot) {
4176 /* free spare buffer */
4177 ring_buffer_resize(max_tr.buffer, 1,
4178 RING_BUFFER_ALL_CPUS);
4179 set_buffer_entries(&max_tr, 1);
4180 tracing_reset_online_cpus(&max_tr);
4181 current_trace->allocated_snapshot = false;
4182 }
4183 break;
4184 case 1:
4185 if (!current_trace->allocated_snapshot) {
4186 /* allocate spare buffer */
4187 ret = resize_buffer_duplicate_size(&max_tr,
4188 &global_trace, RING_BUFFER_ALL_CPUS);
4189 if (ret < 0)
4190 break;
4191 current_trace->allocated_snapshot = true;
4192 }
4193
4194 local_irq_disable();
4195 /* Now, we're going to swap */
4196 update_max_tr(&global_trace, current, smp_processor_id());
4197 local_irq_enable();
4198 break;
4199 default:
4200 if (current_trace->allocated_snapshot)
4201 tracing_reset_online_cpus(&max_tr);
4202 break;
4203 }
4204
4205 if (ret >= 0) {
4206 *ppos += cnt;
4207 ret = cnt;
4208 }
4209out:
4210 mutex_unlock(&trace_types_lock);
4211 return ret;
4212}
4213#endif /* CONFIG_TRACER_SNAPSHOT */
4214
4215
4057static const struct file_operations tracing_max_lat_fops = { 4216static const struct file_operations tracing_max_lat_fops = {
4058 .open = tracing_open_generic, 4217 .open = tracing_open_generic,
4059 .read = tracing_max_lat_read, 4218 .read = tracing_max_lat_read,
@@ -4110,6 +4269,16 @@ static const struct file_operations trace_clock_fops = {
4110 .write = tracing_clock_write, 4269 .write = tracing_clock_write,
4111}; 4270};
4112 4271
4272#ifdef CONFIG_TRACER_SNAPSHOT
4273static const struct file_operations snapshot_fops = {
4274 .open = tracing_snapshot_open,
4275 .read = seq_read,
4276 .write = tracing_snapshot_write,
4277 .llseek = tracing_seek,
4278 .release = tracing_release,
4279};
4280#endif /* CONFIG_TRACER_SNAPSHOT */
4281
4113struct ftrace_buffer_info { 4282struct ftrace_buffer_info {
4114 struct trace_array *tr; 4283 struct trace_array *tr;
4115 void *spare; 4284 void *spare;
@@ -4414,6 +4583,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4414 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); 4583 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
4415 trace_seq_printf(s, "dropped events: %ld\n", cnt); 4584 trace_seq_printf(s, "dropped events: %ld\n", cnt);
4416 4585
4586 cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
4587 trace_seq_printf(s, "read events: %ld\n", cnt);
4588
4417 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 4589 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4418 4590
4419 kfree(s); 4591 kfree(s);
@@ -4490,7 +4662,7 @@ struct dentry *tracing_init_dentry(void)
4490 4662
4491static struct dentry *d_percpu; 4663static struct dentry *d_percpu;
4492 4664
4493struct dentry *tracing_dentry_percpu(void) 4665static struct dentry *tracing_dentry_percpu(void)
4494{ 4666{
4495 static int once; 4667 static int once;
4496 struct dentry *d_tracer; 4668 struct dentry *d_tracer;
@@ -4640,7 +4812,13 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4640 4812
4641 if (val != 0 && val != 1) 4813 if (val != 0 && val != 1)
4642 return -EINVAL; 4814 return -EINVAL;
4643 set_tracer_flags(1 << index, val); 4815
4816 mutex_lock(&trace_types_lock);
4817 ret = set_tracer_flag(1 << index, val);
4818 mutex_unlock(&trace_types_lock);
4819
4820 if (ret < 0)
4821 return ret;
4644 4822
4645 *ppos += cnt; 4823 *ppos += cnt;
4646 4824
@@ -4906,6 +5084,11 @@ static __init int tracer_init_debugfs(void)
4906 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 5084 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4907#endif 5085#endif
4908 5086
5087#ifdef CONFIG_TRACER_SNAPSHOT
5088 trace_create_file("snapshot", 0644, d_tracer,
5089 (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
5090#endif
5091
4909 create_trace_options_dir(); 5092 create_trace_options_dir();
4910 5093
4911 for_each_tracing_cpu(cpu) 5094 for_each_tracing_cpu(cpu)
@@ -5014,6 +5197,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5014 if (disable_tracing) 5197 if (disable_tracing)
5015 ftrace_kill(); 5198 ftrace_kill();
5016 5199
5200 /* Simulate the iterator */
5017 trace_init_global_iter(&iter); 5201 trace_init_global_iter(&iter);
5018 5202
5019 for_each_tracing_cpu(cpu) { 5203 for_each_tracing_cpu(cpu) {
@@ -5025,10 +5209,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5025 /* don't look at user memory in panic mode */ 5209 /* don't look at user memory in panic mode */
5026 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 5210 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
5027 5211
5028 /* Simulate the iterator */
5029 iter.tr = &global_trace;
5030 iter.trace = current_trace;
5031
5032 switch (oops_dump_mode) { 5212 switch (oops_dump_mode) {
5033 case DUMP_ALL: 5213 case DUMP_ALL:
5034 iter.cpu_file = TRACE_PIPE_ALL_CPU; 5214 iter.cpu_file = TRACE_PIPE_ALL_CPU;
@@ -5173,7 +5353,7 @@ __init static int tracer_alloc_buffers(void)
5173 init_irq_work(&trace_work_wakeup, trace_wake_up); 5353 init_irq_work(&trace_work_wakeup, trace_wake_up);
5174 5354
5175 register_tracer(&nop_trace); 5355 register_tracer(&nop_trace);
5176 current_trace = &nop_trace; 5356
5177 /* All seems OK, enable tracing */ 5357 /* All seems OK, enable tracing */
5178 tracing_disabled = 0; 5358 tracing_disabled = 0;
5179 5359
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c75d7988902c..2081971367ea 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -283,24 +283,70 @@ struct tracer {
283 enum print_line_t (*print_line)(struct trace_iterator *iter); 283 enum print_line_t (*print_line)(struct trace_iterator *iter);
284 /* If you handled the flag setting, return 0 */ 284 /* If you handled the flag setting, return 0 */
285 int (*set_flag)(u32 old_flags, u32 bit, int set); 285 int (*set_flag)(u32 old_flags, u32 bit, int set);
286 /* Return 0 if OK with change, else return non-zero */
287 int (*flag_changed)(struct tracer *tracer,
288 u32 mask, int set);
286 struct tracer *next; 289 struct tracer *next;
287 struct tracer_flags *flags; 290 struct tracer_flags *flags;
288 bool print_max; 291 bool print_max;
289 bool use_max_tr; 292 bool use_max_tr;
293 bool allocated_snapshot;
294 bool enabled;
290}; 295};
291 296
292 297
293/* Only current can touch trace_recursion */ 298/* Only current can touch trace_recursion */
294#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
295#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
296 299
297/* Ring buffer has the 10 LSB bits to count */ 300/*
298#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) 301 * For function tracing recursion:
299 302 * The order of these bits are important.
300/* for function tracing recursion */ 303 *
301#define TRACE_INTERNAL_BIT (1<<11) 304 * When function tracing occurs, the following steps are made:
302#define TRACE_GLOBAL_BIT (1<<12) 305 * If arch does not support a ftrace feature:
303#define TRACE_CONTROL_BIT (1<<13) 306 * call internal function (uses INTERNAL bits) which calls...
307 * If callback is registered to the "global" list, the list
308 * function is called and recursion checks the GLOBAL bits.
309 * then this function calls...
310 * The function callback, which can use the FTRACE bits to
311 * check for recursion.
312 *
313 * Now if the arch does not suppport a feature, and it calls
314 * the global list function which calls the ftrace callback
315 * all three of these steps will do a recursion protection.
316 * There's no reason to do one if the previous caller already
317 * did. The recursion that we are protecting against will
318 * go through the same steps again.
319 *
320 * To prevent the multiple recursion checks, if a recursion
321 * bit is set that is higher than the MAX bit of the current
322 * check, then we know that the check was made by the previous
323 * caller, and we can skip the current check.
324 */
325enum {
326 TRACE_BUFFER_BIT,
327 TRACE_BUFFER_NMI_BIT,
328 TRACE_BUFFER_IRQ_BIT,
329 TRACE_BUFFER_SIRQ_BIT,
330
331 /* Start of function recursion bits */
332 TRACE_FTRACE_BIT,
333 TRACE_FTRACE_NMI_BIT,
334 TRACE_FTRACE_IRQ_BIT,
335 TRACE_FTRACE_SIRQ_BIT,
336
337 /* GLOBAL_BITs must be greater than FTRACE_BITs */
338 TRACE_GLOBAL_BIT,
339 TRACE_GLOBAL_NMI_BIT,
340 TRACE_GLOBAL_IRQ_BIT,
341 TRACE_GLOBAL_SIRQ_BIT,
342
343 /* INTERNAL_BITs must be greater than GLOBAL_BITs */
344 TRACE_INTERNAL_BIT,
345 TRACE_INTERNAL_NMI_BIT,
346 TRACE_INTERNAL_IRQ_BIT,
347 TRACE_INTERNAL_SIRQ_BIT,
348
349 TRACE_CONTROL_BIT,
304 350
305/* 351/*
306 * Abuse of the trace_recursion. 352 * Abuse of the trace_recursion.
@@ -309,11 +355,77 @@ struct tracer {
309 * was called in irq context but we have irq tracing off. Since this 355 * was called in irq context but we have irq tracing off. Since this
310 * can only be modified by current, we can reuse trace_recursion. 356 * can only be modified by current, we can reuse trace_recursion.
311 */ 357 */
312#define TRACE_IRQ_BIT (1<<13) 358 TRACE_IRQ_BIT,
359};
360
361#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
362#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
363#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
364
365#define TRACE_CONTEXT_BITS 4
366
367#define TRACE_FTRACE_START TRACE_FTRACE_BIT
368#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
369
370#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
371#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
372
373#define TRACE_LIST_START TRACE_INTERNAL_BIT
374#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
375
376#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
377
378static __always_inline int trace_get_context_bit(void)
379{
380 int bit;
313 381
314#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) 382 if (in_interrupt()) {
315#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) 383 if (in_nmi())
316#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) 384 bit = 0;
385
386 else if (in_irq())
387 bit = 1;
388 else
389 bit = 2;
390 } else
391 bit = 3;
392
393 return bit;
394}
395
396static __always_inline int trace_test_and_set_recursion(int start, int max)
397{
398 unsigned int val = current->trace_recursion;
399 int bit;
400
401 /* A previous recursion check was made */
402 if ((val & TRACE_CONTEXT_MASK) > max)
403 return 0;
404
405 bit = trace_get_context_bit() + start;
406 if (unlikely(val & (1 << bit)))
407 return -1;
408
409 val |= 1 << bit;
410 current->trace_recursion = val;
411 barrier();
412
413 return bit;
414}
415
416static __always_inline void trace_clear_recursion(int bit)
417{
418 unsigned int val = current->trace_recursion;
419
420 if (!bit)
421 return;
422
423 bit = 1 << bit;
424 val &= ~bit;
425
426 barrier();
427 current->trace_recursion = val;
428}
317 429
318#define TRACE_PIPE_ALL_CPU -1 430#define TRACE_PIPE_ALL_CPU -1
319 431
@@ -835,6 +947,8 @@ extern const char *__stop___trace_bprintk_fmt[];
835 947
836void trace_printk_init_buffers(void); 948void trace_printk_init_buffers(void);
837void trace_printk_start_comm(void); 949void trace_printk_start_comm(void);
950int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
951int set_tracer_flag(unsigned int mask, int enabled);
838 952
839#undef FTRACE_ENTRY 953#undef FTRACE_ENTRY
840#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 954#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 394783531cbb..aa8f5f48dae6 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -21,8 +21,6 @@
21#include <linux/ktime.h> 21#include <linux/ktime.h>
22#include <linux/trace_clock.h> 22#include <linux/trace_clock.h>
23 23
24#include "trace.h"
25
26/* 24/*
27 * trace_clock_local(): the simplest and least coherent tracing clock. 25 * trace_clock_local(): the simplest and least coherent tracing clock.
28 * 26 *
@@ -44,6 +42,7 @@ u64 notrace trace_clock_local(void)
44 42
45 return clock; 43 return clock;
46} 44}
45EXPORT_SYMBOL_GPL(trace_clock_local);
47 46
48/* 47/*
49 * trace_clock(): 'between' trace clock. Not completely serialized, 48 * trace_clock(): 'between' trace clock. Not completely serialized,
@@ -86,7 +85,7 @@ u64 notrace trace_clock_global(void)
86 local_irq_save(flags); 85 local_irq_save(flags);
87 86
88 this_cpu = raw_smp_processor_id(); 87 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 88 now = sched_clock_cpu(this_cpu);
90 /* 89 /*
91 * If in an NMI context then dont risk lockups and return the 90 * If in an NMI context then dont risk lockups and return the
92 * cpu_clock() time: 91 * cpu_clock() time:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d0b946..57e9b284250c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
116 __common_field(unsigned char, flags); 116 __common_field(unsigned char, flags);
117 __common_field(unsigned char, preempt_count); 117 __common_field(unsigned char, preempt_count);
118 __common_field(int, pid); 118 __common_field(int, pid);
119 __common_field(int, padding);
120 119
121 return ret; 120 return ret;
122} 121}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e3ad8082ab7..601152523326 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
47 tracing_reset_online_cpus(tr); 47 tracing_reset_online_cpus(tr);
48} 48}
49 49
50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
52 struct ftrace_ops *op, struct pt_regs *pt_regs)
53{
54 struct trace_array *tr = func_trace;
55 struct trace_array_cpu *data;
56 unsigned long flags;
57 long disabled;
58 int cpu;
59 int pc;
60
61 if (unlikely(!ftrace_function_enabled))
62 return;
63
64 pc = preempt_count();
65 preempt_disable_notrace();
66 local_save_flags(flags);
67 cpu = raw_smp_processor_id();
68 data = tr->data[cpu];
69 disabled = atomic_inc_return(&data->disabled);
70
71 if (likely(disabled == 1))
72 trace_function(tr, ip, parent_ip, flags, pc);
73
74 atomic_dec(&data->disabled);
75 preempt_enable_notrace();
76}
77
78/* Our option */ 50/* Our option */
79enum { 51enum {
80 TRACE_FUNC_OPT_STACK = 0x1, 52 TRACE_FUNC_OPT_STACK = 0x1,
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
85static void 57static void
86function_trace_call(unsigned long ip, unsigned long parent_ip, 58function_trace_call(unsigned long ip, unsigned long parent_ip,
87 struct ftrace_ops *op, struct pt_regs *pt_regs) 59 struct ftrace_ops *op, struct pt_regs *pt_regs)
88
89{ 60{
90 struct trace_array *tr = func_trace; 61 struct trace_array *tr = func_trace;
91 struct trace_array_cpu *data; 62 struct trace_array_cpu *data;
92 unsigned long flags; 63 unsigned long flags;
93 long disabled; 64 int bit;
94 int cpu; 65 int cpu;
95 int pc; 66 int pc;
96 67
97 if (unlikely(!ftrace_function_enabled)) 68 if (unlikely(!ftrace_function_enabled))
98 return; 69 return;
99 70
100 /* 71 pc = preempt_count();
101 * Need to use raw, since this must be called before the 72 preempt_disable_notrace();
102 * recursive protection is performed.
103 */
104 local_irq_save(flags);
105 cpu = raw_smp_processor_id();
106 data = tr->data[cpu];
107 disabled = atomic_inc_return(&data->disabled);
108 73
109 if (likely(disabled == 1)) { 74 bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
110 pc = preempt_count(); 75 if (bit < 0)
76 goto out;
77
78 cpu = smp_processor_id();
79 data = tr->data[cpu];
80 if (!atomic_read(&data->disabled)) {
81 local_save_flags(flags);
111 trace_function(tr, ip, parent_ip, flags, pc); 82 trace_function(tr, ip, parent_ip, flags, pc);
112 } 83 }
84 trace_clear_recursion(bit);
113 85
114 atomic_dec(&data->disabled); 86 out:
115 local_irq_restore(flags); 87 preempt_enable_notrace();
116} 88}
117 89
118static void 90static void
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
185{ 157{
186 ftrace_function_enabled = 0; 158 ftrace_function_enabled = 0;
187 159
188 if (trace_flags & TRACE_ITER_PREEMPTONLY)
189 trace_ops.func = function_trace_call_preempt_only;
190 else
191 trace_ops.func = function_trace_call;
192
193 if (func_flags.val & TRACE_FUNC_OPT_STACK) 160 if (func_flags.val & TRACE_FUNC_OPT_STACK)
194 register_ftrace_function(&trace_stack_ops); 161 register_ftrace_function(&trace_stack_ops);
195 else 162 else
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4edb4b74eb7e..39ada66389cc 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40 48#define TRACE_GRAPH_PRINT_IRQS 0x40
49 49
50static unsigned int max_depth;
51
50static struct tracer_opt trace_opts[] = { 52static struct tracer_opt trace_opts[] = {
51 /* Display overruns? (for self-debug purpose) */ 53 /* Display overruns? (for self-debug purpose) */
52 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 54 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
189 191
190 ftrace_pop_return_trace(&trace, &ret, frame_pointer); 192 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
191 trace.rettime = trace_clock_local(); 193 trace.rettime = trace_clock_local();
192 ftrace_graph_return(&trace);
193 barrier(); 194 barrier();
194 current->curr_ret_stack--; 195 current->curr_ret_stack--;
195 196
197 /*
198 * The trace should run after decrementing the ret counter
199 * in case an interrupt were to come in. We don't want to
200 * lose the interrupt if max_depth is set.
201 */
202 ftrace_graph_return(&trace);
203
196 if (unlikely(!ret)) { 204 if (unlikely(!ret)) {
197 ftrace_graph_stop(); 205 ftrace_graph_stop();
198 WARN_ON(1); 206 WARN_ON(1);
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
250 return 0; 258 return 0;
251 259
252 /* trace it when it is-nested-in or is a function enabled. */ 260 /* trace it when it is-nested-in or is a function enabled. */
253 if (!(trace->depth || ftrace_graph_addr(trace->func)) || 261 if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
254 ftrace_graph_ignore_irqs()) 262 ftrace_graph_ignore_irqs()) ||
263 (max_depth && trace->depth >= max_depth))
255 return 0; 264 return 0;
256 265
257 local_irq_save(flags); 266 local_irq_save(flags);
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = {
1457#endif 1466#endif
1458}; 1467};
1459 1468
1469
1470static ssize_t
1471graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
1472 loff_t *ppos)
1473{
1474 unsigned long val;
1475 int ret;
1476
1477 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1478 if (ret)
1479 return ret;
1480
1481 max_depth = val;
1482
1483 *ppos += cnt;
1484
1485 return cnt;
1486}
1487
1488static ssize_t
1489graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
1490 loff_t *ppos)
1491{
1492 char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
1493 int n;
1494
1495 n = sprintf(buf, "%d\n", max_depth);
1496
1497 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
1498}
1499
1500static const struct file_operations graph_depth_fops = {
1501 .open = tracing_open_generic,
1502 .write = graph_depth_write,
1503 .read = graph_depth_read,
1504 .llseek = generic_file_llseek,
1505};
1506
1507static __init int init_graph_debugfs(void)
1508{
1509 struct dentry *d_tracer;
1510
1511 d_tracer = tracing_init_dentry();
1512 if (!d_tracer)
1513 return 0;
1514
1515 trace_create_file("max_graph_depth", 0644, d_tracer,
1516 NULL, &graph_depth_fops);
1517
1518 return 0;
1519}
1520fs_initcall(init_graph_debugfs);
1521
1460static __init int init_graph_trace(void) 1522static __init int init_graph_trace(void)
1461{ 1523{
1462 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); 1524 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 713a2cac4881..443b25b43b4f 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -32,7 +32,7 @@ enum {
32 32
33static int trace_type __read_mostly; 33static int trace_type __read_mostly;
34 34
35static int save_lat_flag; 35static int save_flags;
36 36
37static void stop_irqsoff_tracer(struct trace_array *tr, int graph); 37static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
38static int start_irqsoff_tracer(struct trace_array *tr, int graph); 38static int start_irqsoff_tracer(struct trace_array *tr, int graph);
@@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
558 558
559static void __irqsoff_tracer_init(struct trace_array *tr) 559static void __irqsoff_tracer_init(struct trace_array *tr)
560{ 560{
561 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; 561 save_flags = trace_flags;
562 trace_flags |= TRACE_ITER_LATENCY_FMT; 562
563 /* non overwrite screws up the latency tracers */
564 set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
565 set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
563 566
564 tracing_max_latency = 0; 567 tracing_max_latency = 0;
565 irqsoff_trace = tr; 568 irqsoff_trace = tr;
@@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
573 576
574static void irqsoff_tracer_reset(struct trace_array *tr) 577static void irqsoff_tracer_reset(struct trace_array *tr)
575{ 578{
579 int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
580 int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
581
576 stop_irqsoff_tracer(tr, is_graph()); 582 stop_irqsoff_tracer(tr, is_graph());
577 583
578 if (!save_lat_flag) 584 set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
579 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 585 set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
580} 586}
581 587
582static void irqsoff_tracer_start(struct trace_array *tr) 588static void irqsoff_tracer_start(struct trace_array *tr)
@@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __read_mostly =
609 .print_line = irqsoff_print_line, 615 .print_line = irqsoff_print_line,
610 .flags = &tracer_flags, 616 .flags = &tracer_flags,
611 .set_flag = irqsoff_set_flag, 617 .set_flag = irqsoff_set_flag,
618 .flag_changed = trace_keep_overwrite,
612#ifdef CONFIG_FTRACE_SELFTEST 619#ifdef CONFIG_FTRACE_SELFTEST
613 .selftest = trace_selftest_startup_irqsoff, 620 .selftest = trace_selftest_startup_irqsoff,
614#endif 621#endif
@@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer __read_mostly =
642 .print_line = irqsoff_print_line, 649 .print_line = irqsoff_print_line,
643 .flags = &tracer_flags, 650 .flags = &tracer_flags,
644 .set_flag = irqsoff_set_flag, 651 .set_flag = irqsoff_set_flag,
652 .flag_changed = trace_keep_overwrite,
645#ifdef CONFIG_FTRACE_SELFTEST 653#ifdef CONFIG_FTRACE_SELFTEST
646 .selftest = trace_selftest_startup_preemptoff, 654 .selftest = trace_selftest_startup_preemptoff,
647#endif 655#endif
@@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
677 .print_line = irqsoff_print_line, 685 .print_line = irqsoff_print_line,
678 .flags = &tracer_flags, 686 .flags = &tracer_flags,
679 .set_flag = irqsoff_set_flag, 687 .set_flag = irqsoff_set_flag,
688 .flag_changed = trace_keep_overwrite,
680#ifdef CONFIG_FTRACE_SELFTEST 689#ifdef CONFIG_FTRACE_SELFTEST
681 .selftest = trace_selftest_startup_preemptirqsoff, 690 .selftest = trace_selftest_startup_preemptirqsoff,
682#endif 691#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 194d79602dc7..697e88d13907 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -739,12 +739,11 @@ static int task_state_char(unsigned long state)
739struct trace_event *ftrace_find_event(int type) 739struct trace_event *ftrace_find_event(int type)
740{ 740{
741 struct trace_event *event; 741 struct trace_event *event;
742 struct hlist_node *n;
743 unsigned key; 742 unsigned key;
744 743
745 key = type & (EVENT_HASHSIZE - 1); 744 key = type & (EVENT_HASHSIZE - 1);
746 745
747 hlist_for_each_entry(event, n, &event_hash[key], node) { 746 hlist_for_each_entry(event, &event_hash[key], node) {
748 if (event->type == type) 747 if (event->type == type)
749 return event; 748 return event;
750 } 749 }
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 933708677814..5c7e09d10d74 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -66,7 +66,6 @@
66#define TP_FLAG_TRACE 1 66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2 67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4 68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70 69
71 70
72/* data_rloc: data relative location, compatible with u32 */ 71/* data_rloc: data relative location, compatible with u32 */
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca0..fde652c9a511 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/sched/rt.h>
18#include <trace/events/sched.h> 19#include <trace/events/sched.h>
19
20#include "trace.h" 20#include "trace.h"
21 21
22static struct trace_array *wakeup_trace; 22static struct trace_array *wakeup_trace;
@@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace); 36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace); 37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38 38
39static int save_lat_flag; 39static int save_flags;
40 40
41#define TRACE_DISPLAY_GRAPH 1 41#define TRACE_DISPLAY_GRAPH 1
42 42
@@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct trace_array *tr)
540 540
541static int __wakeup_tracer_init(struct trace_array *tr) 541static int __wakeup_tracer_init(struct trace_array *tr)
542{ 542{
543 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; 543 save_flags = trace_flags;
544 trace_flags |= TRACE_ITER_LATENCY_FMT; 544
545 /* non overwrite screws up the latency tracers */
546 set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
547 set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
545 548
546 tracing_max_latency = 0; 549 tracing_max_latency = 0;
547 wakeup_trace = tr; 550 wakeup_trace = tr;
@@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct trace_array *tr)
563 566
564static void wakeup_tracer_reset(struct trace_array *tr) 567static void wakeup_tracer_reset(struct trace_array *tr)
565{ 568{
569 int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
570 int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
571
566 stop_wakeup_tracer(tr); 572 stop_wakeup_tracer(tr);
567 /* make sure we put back any tasks we are tracing */ 573 /* make sure we put back any tasks we are tracing */
568 wakeup_reset(tr); 574 wakeup_reset(tr);
569 575
570 if (!save_lat_flag) 576 set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
571 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 577 set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
572} 578}
573 579
574static void wakeup_tracer_start(struct trace_array *tr) 580static void wakeup_tracer_start(struct trace_array *tr)
@@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __read_mostly =
594 .print_line = wakeup_print_line, 600 .print_line = wakeup_print_line,
595 .flags = &tracer_flags, 601 .flags = &tracer_flags,
596 .set_flag = wakeup_set_flag, 602 .set_flag = wakeup_set_flag,
603 .flag_changed = trace_keep_overwrite,
597#ifdef CONFIG_FTRACE_SELFTEST 604#ifdef CONFIG_FTRACE_SELFTEST
598 .selftest = trace_selftest_startup_wakeup, 605 .selftest = trace_selftest_startup_wakeup,
599#endif 606#endif
@@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
615 .print_line = wakeup_print_line, 622 .print_line = wakeup_print_line,
616 .flags = &tracer_flags, 623 .flags = &tracer_flags,
617 .set_flag = wakeup_set_flag, 624 .set_flag = wakeup_set_flag,
625 .flag_changed = trace_keep_overwrite,
618#ifdef CONFIG_FTRACE_SELFTEST 626#ifdef CONFIG_FTRACE_SELFTEST
619 .selftest = trace_selftest_startup_wakeup, 627 .selftest = trace_selftest_startup_wakeup,
620#endif 628#endif
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 47623169a815..51c819c12c29 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
415 * The ftrace infrastructure should provide the recursion 415 * The ftrace infrastructure should provide the recursion
416 * protection. If not, this will crash the kernel! 416 * protection. If not, this will crash the kernel!
417 */ 417 */
418 trace_selftest_recursion_cnt++; 418 if (trace_selftest_recursion_cnt++ > 10)
419 return;
419 DYN_FTRACE_TEST_NAME(); 420 DYN_FTRACE_TEST_NAME();
420} 421}
421 422
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
452 char *func_name; 453 char *func_name;
453 int len; 454 int len;
454 int ret; 455 int ret;
455 int cnt;
456 456
457 /* The previous test PASSED */ 457 /* The previous test PASSED */
458 pr_cont("PASSED\n"); 458 pr_cont("PASSED\n");
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
510 510
511 unregister_ftrace_function(&test_recsafe_probe); 511 unregister_ftrace_function(&test_recsafe_probe);
512 512
513 /*
514 * If arch supports all ftrace features, and no other task
515 * was on the list, we should be fine.
516 */
517 if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
518 cnt = 2; /* Should have recursed */
519 else
520 cnt = 1;
521
522 ret = -1; 513 ret = -1;
523 if (trace_selftest_recursion_cnt != cnt) { 514 if (trace_selftest_recursion_cnt != 2) {
524 pr_cont("*callback not called expected %d times (%d)* ", 515 pr_cont("*callback not called expected 2 times (%d)* ",
525 cnt, trace_selftest_recursion_cnt); 516 trace_selftest_recursion_cnt);
526 goto out; 517 goto out;
527 } 518 }
528 519
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
568 int ret; 559 int ret;
569 int supported = 0; 560 int supported = 0;
570 561
571#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS 562#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
572 supported = 1; 563 supported = 1;
573#endif 564#endif
574 565
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 42ca822fc701..83a8b5b7bd35 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -322,7 +322,7 @@ static const struct file_operations stack_trace_filter_fops = {
322 .open = stack_trace_filter_open, 322 .open = stack_trace_filter_open,
323 .read = seq_read, 323 .read = seq_read,
324 .write = ftrace_filter_write, 324 .write = ftrace_filter_write,
325 .llseek = ftrace_regex_lseek, 325 .llseek = ftrace_filter_lseek,
326 .release = ftrace_regex_release, 326 .release = ftrace_regex_release,
327}; 327};
328 328
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7609dd6714c2..7a809e321058 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/syscalls.h>
3#include <linux/slab.h> 4#include <linux/slab.h>
4#include <linux/kernel.h> 5#include <linux/kernel.h>
5#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ 6#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
@@ -47,6 +48,38 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
47} 48}
48#endif 49#endif
49 50
51#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
52/*
53 * Some architectures that allow for 32bit applications
54 * to run on a 64bit kernel, do not map the syscalls for
55 * the 32bit tasks the same as they do for 64bit tasks.
56 *
57 * *cough*x86*cough*
58 *
59 * In such a case, instead of reporting the wrong syscalls,
60 * simply ignore them.
61 *
62 * For an arch to ignore the compat syscalls it needs to
63 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
64 * define the function arch_trace_is_compat_syscall() to let
65 * the tracing system know that it should ignore it.
66 */
67static int
68trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
69{
70 if (unlikely(arch_trace_is_compat_syscall(regs)))
71 return -1;
72
73 return syscall_get_nr(task, regs);
74}
75#else
76static inline int
77trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
78{
79 return syscall_get_nr(task, regs);
80}
81#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
82
50static __init struct syscall_metadata * 83static __init struct syscall_metadata *
51find_syscall_meta(unsigned long syscall) 84find_syscall_meta(unsigned long syscall)
52{ 85{
@@ -77,7 +110,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
77 return syscalls_metadata[nr]; 110 return syscalls_metadata[nr];
78} 111}
79 112
80enum print_line_t 113static enum print_line_t
81print_syscall_enter(struct trace_iterator *iter, int flags, 114print_syscall_enter(struct trace_iterator *iter, int flags,
82 struct trace_event *event) 115 struct trace_event *event)
83{ 116{
@@ -130,7 +163,7 @@ end:
130 return TRACE_TYPE_HANDLED; 163 return TRACE_TYPE_HANDLED;
131} 164}
132 165
133enum print_line_t 166static enum print_line_t
134print_syscall_exit(struct trace_iterator *iter, int flags, 167print_syscall_exit(struct trace_iterator *iter, int flags,
135 struct trace_event *event) 168 struct trace_event *event)
136{ 169{
@@ -270,16 +303,16 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
270 return ret; 303 return ret;
271} 304}
272 305
273void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 306static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
274{ 307{
275 struct syscall_trace_enter *entry; 308 struct syscall_trace_enter *entry;
276 struct syscall_metadata *sys_data; 309 struct syscall_metadata *sys_data;
277 struct ring_buffer_event *event; 310 struct ring_buffer_event *event;
278 struct ring_buffer *buffer; 311 struct ring_buffer *buffer;
279 int size;
280 int syscall_nr; 312 int syscall_nr;
313 int size;
281 314
282 syscall_nr = syscall_get_nr(current, regs); 315 syscall_nr = trace_get_syscall_nr(current, regs);
283 if (syscall_nr < 0) 316 if (syscall_nr < 0)
284 return; 317 return;
285 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 318 if (!test_bit(syscall_nr, enabled_enter_syscalls))
@@ -305,7 +338,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
305 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 338 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
306} 339}
307 340
308void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 341static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
309{ 342{
310 struct syscall_trace_exit *entry; 343 struct syscall_trace_exit *entry;
311 struct syscall_metadata *sys_data; 344 struct syscall_metadata *sys_data;
@@ -313,7 +346,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
313 struct ring_buffer *buffer; 346 struct ring_buffer *buffer;
314 int syscall_nr; 347 int syscall_nr;
315 348
316 syscall_nr = syscall_get_nr(current, regs); 349 syscall_nr = trace_get_syscall_nr(current, regs);
317 if (syscall_nr < 0) 350 if (syscall_nr < 0)
318 return; 351 return;
319 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 352 if (!test_bit(syscall_nr, enabled_exit_syscalls))
@@ -337,7 +370,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
337 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 370 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
338} 371}
339 372
340int reg_event_syscall_enter(struct ftrace_event_call *call) 373static int reg_event_syscall_enter(struct ftrace_event_call *call)
341{ 374{
342 int ret = 0; 375 int ret = 0;
343 int num; 376 int num;
@@ -356,7 +389,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
356 return ret; 389 return ret;
357} 390}
358 391
359void unreg_event_syscall_enter(struct ftrace_event_call *call) 392static void unreg_event_syscall_enter(struct ftrace_event_call *call)
360{ 393{
361 int num; 394 int num;
362 395
@@ -371,7 +404,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
371 mutex_unlock(&syscall_trace_lock); 404 mutex_unlock(&syscall_trace_lock);
372} 405}
373 406
374int reg_event_syscall_exit(struct ftrace_event_call *call) 407static int reg_event_syscall_exit(struct ftrace_event_call *call)
375{ 408{
376 int ret = 0; 409 int ret = 0;
377 int num; 410 int num;
@@ -390,7 +423,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
390 return ret; 423 return ret;
391} 424}
392 425
393void unreg_event_syscall_exit(struct ftrace_event_call *call) 426static void unreg_event_syscall_exit(struct ftrace_event_call *call)
394{ 427{
395 int num; 428 int num;
396 429
@@ -459,7 +492,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
459 return (unsigned long)sys_call_table[nr]; 492 return (unsigned long)sys_call_table[nr];
460} 493}
461 494
462int __init init_ftrace_syscalls(void) 495static int __init init_ftrace_syscalls(void)
463{ 496{
464 struct syscall_metadata *meta; 497 struct syscall_metadata *meta;
465 unsigned long addr; 498 unsigned long addr;
@@ -502,7 +535,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
502 int rctx; 535 int rctx;
503 int size; 536 int size;
504 537
505 syscall_nr = syscall_get_nr(current, regs); 538 syscall_nr = trace_get_syscall_nr(current, regs);
506 if (syscall_nr < 0) 539 if (syscall_nr < 0)
507 return; 540 return;
508 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 541 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
@@ -578,7 +611,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
578 int rctx; 611 int rctx;
579 int size; 612 int size;
580 613
581 syscall_nr = syscall_get_nr(current, regs); 614 syscall_nr = trace_get_syscall_nr(current, regs);
582 if (syscall_nr < 0) 615 if (syscall_nr < 0)
583 return; 616 return;
584 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 617 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c86e6d4f67fb..8dad2a92dee9 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,20 +28,21 @@
28 28
29#define UPROBE_EVENT_SYSTEM "uprobes" 29#define UPROBE_EVENT_SYSTEM "uprobes"
30 30
31struct trace_uprobe_filter {
32 rwlock_t rwlock;
33 int nr_systemwide;
34 struct list_head perf_events;
35};
36
31/* 37/*
32 * uprobe event core functions 38 * uprobe event core functions
33 */ 39 */
34struct trace_uprobe;
35struct uprobe_trace_consumer {
36 struct uprobe_consumer cons;
37 struct trace_uprobe *tu;
38};
39
40struct trace_uprobe { 40struct trace_uprobe {
41 struct list_head list; 41 struct list_head list;
42 struct ftrace_event_class class; 42 struct ftrace_event_class class;
43 struct ftrace_event_call call; 43 struct ftrace_event_call call;
44 struct uprobe_trace_consumer *consumer; 44 struct trace_uprobe_filter filter;
45 struct uprobe_consumer consumer;
45 struct inode *inode; 46 struct inode *inode;
46 char *filename; 47 char *filename;
47 unsigned long offset; 48 unsigned long offset;
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
64 65
65static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 66static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
66 67
68static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
69{
70 rwlock_init(&filter->rwlock);
71 filter->nr_systemwide = 0;
72 INIT_LIST_HEAD(&filter->perf_events);
73}
74
75static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
76{
77 return !filter->nr_systemwide && list_empty(&filter->perf_events);
78}
79
67/* 80/*
68 * Allocate new trace_uprobe and initialize it (including uprobes). 81 * Allocate new trace_uprobe and initialize it (including uprobes).
69 */ 82 */
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
92 goto error; 105 goto error;
93 106
94 INIT_LIST_HEAD(&tu->list); 107 INIT_LIST_HEAD(&tu->list);
108 tu->consumer.handler = uprobe_dispatcher;
109 init_trace_uprobe_filter(&tu->filter);
95 return tu; 110 return tu;
96 111
97error: 112error:
@@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
253 if (ret) 268 if (ret)
254 goto fail_address_parse; 269 goto fail_address_parse;
255 270
271 inode = igrab(path.dentry->d_inode);
272 path_put(&path);
273
274 if (!inode || !S_ISREG(inode->i_mode)) {
275 ret = -EINVAL;
276 goto fail_address_parse;
277 }
278
256 ret = kstrtoul(arg, 0, &offset); 279 ret = kstrtoul(arg, 0, &offset);
257 if (ret) 280 if (ret)
258 goto fail_address_parse; 281 goto fail_address_parse;
259 282
260 inode = igrab(path.dentry->d_inode);
261
262 argc -= 2; 283 argc -= 2;
263 argv += 2; 284 argv += 2;
264 285
@@ -356,7 +377,7 @@ fail_address_parse:
356 if (inode) 377 if (inode)
357 iput(inode); 378 iput(inode);
358 379
359 pr_info("Failed to parse address.\n"); 380 pr_info("Failed to parse address or file.\n");
360 381
361 return ret; 382 return ret;
362} 383}
@@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
465}; 486};
466 487
467/* uprobe handler */ 488/* uprobe handler */
468static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 489static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
469{ 490{
470 struct uprobe_trace_entry_head *entry; 491 struct uprobe_trace_entry_head *entry;
471 struct ring_buffer_event *event; 492 struct ring_buffer_event *event;
@@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
475 unsigned long irq_flags; 496 unsigned long irq_flags;
476 struct ftrace_event_call *call = &tu->call; 497 struct ftrace_event_call *call = &tu->call;
477 498
478 tu->nhit++;
479
480 local_save_flags(irq_flags); 499 local_save_flags(irq_flags);
481 pc = preempt_count(); 500 pc = preempt_count();
482 501
@@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
485 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 504 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
486 size, irq_flags, pc); 505 size, irq_flags, pc);
487 if (!event) 506 if (!event)
488 return; 507 return 0;
489 508
490 entry = ring_buffer_event_data(event); 509 entry = ring_buffer_event_data(event);
491 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 510 entry->ip = instruction_pointer(task_pt_regs(current));
492 data = (u8 *)&entry[1]; 511 data = (u8 *)&entry[1];
493 for (i = 0; i < tu->nr_args; i++) 512 for (i = 0; i < tu->nr_args; i++)
494 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 513 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
495 514
496 if (!filter_current_check_discard(buffer, call, entry, event)) 515 if (!filter_current_check_discard(buffer, call, entry, event))
497 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 516 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
517
518 return 0;
498} 519}
499 520
500/* Event entry printers */ 521/* Event entry printers */
@@ -533,42 +554,43 @@ partial:
533 return TRACE_TYPE_PARTIAL_LINE; 554 return TRACE_TYPE_PARTIAL_LINE;
534} 555}
535 556
536static int probe_event_enable(struct trace_uprobe *tu, int flag) 557static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
537{ 558{
538 struct uprobe_trace_consumer *utc; 559 return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
539 int ret = 0; 560}
540 561
541 if (!tu->inode || tu->consumer) 562typedef bool (*filter_func_t)(struct uprobe_consumer *self,
542 return -EINTR; 563 enum uprobe_filter_ctx ctx,
564 struct mm_struct *mm);
543 565
544 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); 566static int
545 if (!utc) 567probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
568{
569 int ret = 0;
570
571 if (is_trace_uprobe_enabled(tu))
546 return -EINTR; 572 return -EINTR;
547 573
548 utc->cons.handler = uprobe_dispatcher; 574 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
549 utc->cons.filter = NULL;
550 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
551 if (ret) {
552 kfree(utc);
553 return ret;
554 }
555 575
556 tu->flags |= flag; 576 tu->flags |= flag;
557 utc->tu = tu; 577 tu->consumer.filter = filter;
558 tu->consumer = utc; 578 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
579 if (ret)
580 tu->flags &= ~flag;
559 581
560 return 0; 582 return ret;
561} 583}
562 584
563static void probe_event_disable(struct trace_uprobe *tu, int flag) 585static void probe_event_disable(struct trace_uprobe *tu, int flag)
564{ 586{
565 if (!tu->inode || !tu->consumer) 587 if (!is_trace_uprobe_enabled(tu))
566 return; 588 return;
567 589
568 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); 590 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
591
592 uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
569 tu->flags &= ~flag; 593 tu->flags &= ~flag;
570 kfree(tu->consumer);
571 tu->consumer = NULL;
572} 594}
573 595
574static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 596static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
642} 664}
643 665
644#ifdef CONFIG_PERF_EVENTS 666#ifdef CONFIG_PERF_EVENTS
667static bool
668__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
669{
670 struct perf_event *event;
671
672 if (filter->nr_systemwide)
673 return true;
674
675 list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
676 if (event->hw.tp_target->mm == mm)
677 return true;
678 }
679
680 return false;
681}
682
683static inline bool
684uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
685{
686 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
687}
688
689static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
690{
691 bool done;
692
693 write_lock(&tu->filter.rwlock);
694 if (event->hw.tp_target) {
695 /*
696 * event->parent != NULL means copy_process(), we can avoid
697 * uprobe_apply(). current->mm must be probed and we can rely
698 * on dup_mmap() which preserves the already installed bp's.
699 *
700 * attr.enable_on_exec means that exec/mmap will install the
701 * breakpoints we need.
702 */
703 done = tu->filter.nr_systemwide ||
704 event->parent || event->attr.enable_on_exec ||
705 uprobe_filter_event(tu, event);
706 list_add(&event->hw.tp_list, &tu->filter.perf_events);
707 } else {
708 done = tu->filter.nr_systemwide;
709 tu->filter.nr_systemwide++;
710 }
711 write_unlock(&tu->filter.rwlock);
712
713 if (!done)
714 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
715
716 return 0;
717}
718
719static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
720{
721 bool done;
722
723 write_lock(&tu->filter.rwlock);
724 if (event->hw.tp_target) {
725 list_del(&event->hw.tp_list);
726 done = tu->filter.nr_systemwide ||
727 (event->hw.tp_target->flags & PF_EXITING) ||
728 uprobe_filter_event(tu, event);
729 } else {
730 tu->filter.nr_systemwide--;
731 done = tu->filter.nr_systemwide;
732 }
733 write_unlock(&tu->filter.rwlock);
734
735 if (!done)
736 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
737
738 return 0;
739}
740
741static bool uprobe_perf_filter(struct uprobe_consumer *uc,
742 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
743{
744 struct trace_uprobe *tu;
745 int ret;
746
747 tu = container_of(uc, struct trace_uprobe, consumer);
748 read_lock(&tu->filter.rwlock);
749 ret = __uprobe_perf_filter(&tu->filter, mm);
750 read_unlock(&tu->filter.rwlock);
751
752 return ret;
753}
754
645/* uprobe profile handler */ 755/* uprobe profile handler */
646static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 756static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
647{ 757{
648 struct ftrace_event_call *call = &tu->call; 758 struct ftrace_event_call *call = &tu->call;
649 struct uprobe_trace_entry_head *entry; 759 struct uprobe_trace_entry_head *entry;
@@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
652 int size, __size, i; 762 int size, __size, i;
653 int rctx; 763 int rctx;
654 764
765 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
766 return UPROBE_HANDLER_REMOVE;
767
655 __size = sizeof(*entry) + tu->size; 768 __size = sizeof(*entry) + tu->size;
656 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 769 size = ALIGN(__size + sizeof(u32), sizeof(u64));
657 size -= sizeof(u32); 770 size -= sizeof(u32);
658 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 771 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
659 return; 772 return 0;
660 773
661 preempt_disable(); 774 preempt_disable();
662 775
@@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
664 if (!entry) 777 if (!entry)
665 goto out; 778 goto out;
666 779
667 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 780 entry->ip = instruction_pointer(task_pt_regs(current));
668 data = (u8 *)&entry[1]; 781 data = (u8 *)&entry[1];
669 for (i = 0; i < tu->nr_args; i++) 782 for (i = 0; i < tu->nr_args; i++)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 783 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
@@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
674 787
675 out: 788 out:
676 preempt_enable(); 789 preempt_enable();
790 return 0;
677} 791}
678#endif /* CONFIG_PERF_EVENTS */ 792#endif /* CONFIG_PERF_EVENTS */
679 793
@@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
684 798
685 switch (type) { 799 switch (type) {
686 case TRACE_REG_REGISTER: 800 case TRACE_REG_REGISTER:
687 return probe_event_enable(tu, TP_FLAG_TRACE); 801 return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
688 802
689 case TRACE_REG_UNREGISTER: 803 case TRACE_REG_UNREGISTER:
690 probe_event_disable(tu, TP_FLAG_TRACE); 804 probe_event_disable(tu, TP_FLAG_TRACE);
@@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
692 806
693#ifdef CONFIG_PERF_EVENTS 807#ifdef CONFIG_PERF_EVENTS
694 case TRACE_REG_PERF_REGISTER: 808 case TRACE_REG_PERF_REGISTER:
695 return probe_event_enable(tu, TP_FLAG_PROFILE); 809 return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
696 810
697 case TRACE_REG_PERF_UNREGISTER: 811 case TRACE_REG_PERF_UNREGISTER:
698 probe_event_disable(tu, TP_FLAG_PROFILE); 812 probe_event_disable(tu, TP_FLAG_PROFILE);
699 return 0; 813 return 0;
814
815 case TRACE_REG_PERF_OPEN:
816 return uprobe_perf_open(tu, data);
817
818 case TRACE_REG_PERF_CLOSE:
819 return uprobe_perf_close(tu, data);
820
700#endif 821#endif
701 default: 822 default:
702 return 0; 823 return 0;
@@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
706 827
707static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) 828static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
708{ 829{
709 struct uprobe_trace_consumer *utc;
710 struct trace_uprobe *tu; 830 struct trace_uprobe *tu;
831 int ret = 0;
711 832
712 utc = container_of(con, struct uprobe_trace_consumer, cons); 833 tu = container_of(con, struct trace_uprobe, consumer);
713 tu = utc->tu; 834 tu->nhit++;
714 if (!tu || tu->consumer != utc)
715 return 0;
716 835
717 if (tu->flags & TP_FLAG_TRACE) 836 if (tu->flags & TP_FLAG_TRACE)
718 uprobe_trace_func(tu, regs); 837 ret |= uprobe_trace_func(tu, regs);
719 838
720#ifdef CONFIG_PERF_EVENTS 839#ifdef CONFIG_PERF_EVENTS
721 if (tu->flags & TP_FLAG_PROFILE) 840 if (tu->flags & TP_FLAG_PROFILE)
722 uprobe_perf_func(tu, regs); 841 ret |= uprobe_perf_func(tu, regs);
723#endif 842#endif
724 return 0; 843 return ret;
725} 844}
726 845
727static struct trace_event_functions uprobe_funcs = { 846static struct trace_event_functions uprobe_funcs = {