aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig26
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c128
-rw-r--r--kernel/trace/ftrace.c1457
-rw-r--r--kernel/trace/power-traces.c5
-rw-r--r--kernel/trace/ring_buffer.c398
-rw-r--r--kernel/trace/trace.c96
-rw-r--r--kernel/trace/trace.h62
-rw-r--r--kernel/trace/trace_clock.c2
-rw-r--r--kernel/trace/trace_entries.h10
-rw-r--r--kernel/trace/trace_event_perf.c59
-rw-r--r--kernel/trace/trace_events.c89
-rw-r--r--kernel/trace/trace_events_filter.c885
-rw-r--r--kernel/trace/trace_export.c20
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_functions_graph.c209
-rw-r--r--kernel/trace/trace_irqsoff.c163
-rw-r--r--kernel/trace/trace_kdb.c1
-rw-r--r--kernel/trace/trace_kprobe.c125
-rw-r--r--kernel/trace/trace_output.c66
-rw-r--r--kernel/trace/trace_printk.c117
-rw-r--r--kernel/trace/trace_sched_switch.c48
-rw-r--r--kernel/trace/trace_sched_wakeup.c257
-rw-r--r--kernel/trace/trace_selftest.c216
-rw-r--r--kernel/trace/trace_selftest_dynamic.c6
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/trace/trace_syscalls.c92
-rw-r--r--kernel/trace/trace_workqueue.c10
28 files changed, 3397 insertions, 1156 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 538501c6ea50..2ad39e556cb4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
49 help 49 help
50 See Documentation/trace/ftrace-design.txt 50 See Documentation/trace/ftrace-design.txt
51 51
52config HAVE_C_RECORDMCOUNT
53 bool
54 help
55 C version of recordmcount available?
56
52config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
53 bool 58 bool
54 59
@@ -64,6 +69,21 @@ config EVENT_TRACING
64 select CONTEXT_SWITCH_TRACER 69 select CONTEXT_SWITCH_TRACER
65 bool 70 bool
66 71
72config EVENT_POWER_TRACING_DEPRECATED
73 depends on EVENT_TRACING
74 bool "Deprecated power event trace API, to be removed"
75 default y
76 help
77 Provides old power event types:
78 C-state/idle accounting events:
79 power:power_start
80 power:power_end
81 and old cpufreq accounting event:
82 power:power_frequency
83 This is for userspace compatibility
84 and will vanish after 5 kernel iterations,
85 namely 2.6.41.
86
67config CONTEXT_SWITCH_TRACER 87config CONTEXT_SWITCH_TRACER
68 bool 88 bool
69 89
@@ -121,7 +141,7 @@ if FTRACE
121config FUNCTION_TRACER 141config FUNCTION_TRACER
122 bool "Kernel Function Tracer" 142 bool "Kernel Function Tracer"
123 depends on HAVE_FUNCTION_TRACER 143 depends on HAVE_FUNCTION_TRACER
124 select FRAME_POINTER 144 select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE
125 select KALLSYMS 145 select KALLSYMS
126 select GENERIC_TRACER 146 select GENERIC_TRACER
127 select CONTEXT_SWITCH_TRACER 147 select CONTEXT_SWITCH_TRACER
@@ -255,7 +275,7 @@ config PROFILE_ANNOTATED_BRANCHES
255 This tracer profiles all the the likely and unlikely macros 275 This tracer profiles all the the likely and unlikely macros
256 in the kernel. It will display the results in: 276 in the kernel. It will display the results in:
257 277
258 /sys/kernel/debug/tracing/profile_annotated_branch 278 /sys/kernel/debug/tracing/trace_stat/branch_annotated
259 279
260 Note: this will add a significant overhead; only turn this 280 Note: this will add a significant overhead; only turn this
261 on if you need to profile the system's use of these macros. 281 on if you need to profile the system's use of these macros.
@@ -268,7 +288,7 @@ config PROFILE_ALL_BRANCHES
268 taken in the kernel is recorded whether it hit or miss. 288 taken in the kernel is recorded whether it hit or miss.
269 The results will be displayed in: 289 The results will be displayed in:
270 290
271 /sys/kernel/debug/tracing/profile_branch 291 /sys/kernel/debug/tracing/trace_stat/branch_all
272 292
273 This option also enables the likely/unlikely profiler. 293 This option also enables the likely/unlikely profiler.
274 294
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 53f338190b26..761c510a06c5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
52endif 52endif
53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
55obj-$(CONFIG_EVENT_TRACING) += power-traces.o 55obj-$(CONFIG_TRACEPOINTS) += power-traces.o
56ifeq ($(CONFIG_TRACING),y) 56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif 58endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 959f8d6c8cc1..6957aa298dfa 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,7 +23,6 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/smp_lock.h>
27#include <linux/time.h> 26#include <linux/time.h>
28#include <linux/uaccess.h> 27#include <linux/uaccess.h>
29 28
@@ -139,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
139 !blk_tracer_enabled)) 138 !blk_tracer_enabled))
140 return; 139 return;
141 140
141 /*
142 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
143 * message to the trace.
144 */
145 if (!(bt->act_mask & BLK_TC_NOTIFY))
146 return;
147
142 local_irq_save(flags); 148 local_irq_save(flags);
143 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 149 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
144 va_start(args, fmt); 150 va_start(args, fmt);
@@ -169,7 +175,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
169static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 175static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
170 BLK_TC_ACT(BLK_TC_WRITE) }; 176 BLK_TC_ACT(BLK_TC_WRITE) };
171 177
172#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
173#define BLK_TC_RAHEAD BLK_TC_AHEAD 178#define BLK_TC_RAHEAD BLK_TC_AHEAD
174 179
175/* The ilog2() calls fall out because they're constant */ 180/* The ilog2() calls fall out because they're constant */
@@ -197,7 +202,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
197 return; 202 return;
198 203
199 what |= ddir_act[rw & WRITE]; 204 what |= ddir_act[rw & WRITE];
200 what |= MASK_TC_BIT(rw, HARDBARRIER);
201 what |= MASK_TC_BIT(rw, SYNC); 205 what |= MASK_TC_BIT(rw, SYNC);
202 what |= MASK_TC_BIT(rw, RAHEAD); 206 what |= MASK_TC_BIT(rw, RAHEAD);
203 what |= MASK_TC_BIT(rw, META); 207 what |= MASK_TC_BIT(rw, META);
@@ -326,6 +330,7 @@ static const struct file_operations blk_dropped_fops = {
326 .owner = THIS_MODULE, 330 .owner = THIS_MODULE,
327 .open = blk_dropped_open, 331 .open = blk_dropped_open,
328 .read = blk_dropped_read, 332 .read = blk_dropped_read,
333 .llseek = default_llseek,
329}; 334};
330 335
331static int blk_msg_open(struct inode *inode, struct file *filp) 336static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -365,6 +370,7 @@ static const struct file_operations blk_msg_fops = {
365 .owner = THIS_MODULE, 370 .owner = THIS_MODULE,
366 .open = blk_msg_open, 371 .open = blk_msg_open,
367 .write = blk_msg_write, 372 .write = blk_msg_write,
373 .llseek = noop_llseek,
368}; 374};
369 375
370/* 376/*
@@ -639,7 +645,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
639 if (!q) 645 if (!q)
640 return -ENXIO; 646 return -ENXIO;
641 647
642 lock_kernel();
643 mutex_lock(&bdev->bd_mutex); 648 mutex_lock(&bdev->bd_mutex);
644 649
645 switch (cmd) { 650 switch (cmd) {
@@ -667,7 +672,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
667 } 672 }
668 673
669 mutex_unlock(&bdev->bd_mutex); 674 mutex_unlock(&bdev->bd_mutex);
670 unlock_kernel();
671 return ret; 675 return ret;
672} 676}
673 677
@@ -699,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
699 * 703 *
700 **/ 704 **/
701static void blk_add_trace_rq(struct request_queue *q, struct request *rq, 705static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
702 u32 what) 706 u32 what)
703{ 707{
704 struct blk_trace *bt = q->blk_trace; 708 struct blk_trace *bt = q->blk_trace;
705 int rw = rq->cmd_flags & 0x03;
706 709
707 if (likely(!bt)) 710 if (likely(!bt))
708 return; 711 return;
709 712
710 if (rq->cmd_flags & REQ_DISCARD)
711 rw |= REQ_DISCARD;
712
713 if (rq->cmd_flags & REQ_SECURE)
714 rw |= REQ_SECURE;
715
716 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
717 what |= BLK_TC_ACT(BLK_TC_PC); 714 what |= BLK_TC_ACT(BLK_TC_PC);
718 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, 715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
719 what, rq->errors, rq->cmd_len, rq->cmd); 716 what, rq->errors, rq->cmd_len, rq->cmd);
720 } else { 717 } else {
721 what |= BLK_TC_ACT(BLK_TC_FS); 718 what |= BLK_TC_ACT(BLK_TC_FS);
722 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, 719 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
723 what, rq->errors, 0, NULL); 720 rq->cmd_flags, what, rq->errors, 0, NULL);
724 } 721 }
725} 722}
726 723
@@ -761,53 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore,
761 * @q: queue the io is for 758 * @q: queue the io is for
762 * @bio: the source bio 759 * @bio: the source bio
763 * @what: the action 760 * @what: the action
761 * @error: error, if any
764 * 762 *
765 * Description: 763 * Description:
766 * Records an action against a bio. Will log the bio offset + size. 764 * Records an action against a bio. Will log the bio offset + size.
767 * 765 *
768 **/ 766 **/
769static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 767static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
770 u32 what) 768 u32 what, int error)
771{ 769{
772 struct blk_trace *bt = q->blk_trace; 770 struct blk_trace *bt = q->blk_trace;
773 771
774 if (likely(!bt)) 772 if (likely(!bt))
775 return; 773 return;
776 774
775 if (!error && !bio_flagged(bio, BIO_UPTODATE))
776 error = EIO;
777
777 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 778 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
778 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 779 error, 0, NULL);
779} 780}
780 781
781static void blk_add_trace_bio_bounce(void *ignore, 782static void blk_add_trace_bio_bounce(void *ignore,
782 struct request_queue *q, struct bio *bio) 783 struct request_queue *q, struct bio *bio)
783{ 784{
784 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 785 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
785} 786}
786 787
787static void blk_add_trace_bio_complete(void *ignore, 788static void blk_add_trace_bio_complete(void *ignore,
788 struct request_queue *q, struct bio *bio) 789 struct request_queue *q, struct bio *bio,
790 int error)
789{ 791{
790 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 792 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
791} 793}
792 794
793static void blk_add_trace_bio_backmerge(void *ignore, 795static void blk_add_trace_bio_backmerge(void *ignore,
794 struct request_queue *q, 796 struct request_queue *q,
795 struct bio *bio) 797 struct bio *bio)
796{ 798{
797 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 799 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
798} 800}
799 801
800static void blk_add_trace_bio_frontmerge(void *ignore, 802static void blk_add_trace_bio_frontmerge(void *ignore,
801 struct request_queue *q, 803 struct request_queue *q,
802 struct bio *bio) 804 struct bio *bio)
803{ 805{
804 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 806 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
805} 807}
806 808
807static void blk_add_trace_bio_queue(void *ignore, 809static void blk_add_trace_bio_queue(void *ignore,
808 struct request_queue *q, struct bio *bio) 810 struct request_queue *q, struct bio *bio)
809{ 811{
810 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 812 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
811} 813}
812 814
813static void blk_add_trace_getrq(void *ignore, 815static void blk_add_trace_getrq(void *ignore,
@@ -815,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore,
815 struct bio *bio, int rw) 817 struct bio *bio, int rw)
816{ 818{
817 if (bio) 819 if (bio)
818 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 820 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
819 else { 821 else {
820 struct blk_trace *bt = q->blk_trace; 822 struct blk_trace *bt = q->blk_trace;
821 823
@@ -830,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore,
830 struct bio *bio, int rw) 832 struct bio *bio, int rw)
831{ 833{
832 if (bio) 834 if (bio)
833 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 835 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
834 else { 836 else {
835 struct blk_trace *bt = q->blk_trace; 837 struct blk_trace *bt = q->blk_trace;
836 838
@@ -848,29 +850,21 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
848 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 850 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
849} 851}
850 852
851static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) 853static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
854 unsigned int depth, bool explicit)
852{ 855{
853 struct blk_trace *bt = q->blk_trace; 856 struct blk_trace *bt = q->blk_trace;
854 857
855 if (bt) { 858 if (bt) {
856 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; 859 __be64 rpdu = cpu_to_be64(depth);
857 __be64 rpdu = cpu_to_be64(pdu); 860 u32 what;
858 861
859 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, 862 if (explicit)
860 sizeof(rpdu), &rpdu); 863 what = BLK_TA_UNPLUG_IO;
861 } 864 else
862} 865 what = BLK_TA_UNPLUG_TIMER;
863
864static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
865{
866 struct blk_trace *bt = q->blk_trace;
867
868 if (bt) {
869 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
870 __be64 rpdu = cpu_to_be64(pdu);
871 866
872 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, 867 __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
873 sizeof(rpdu), &rpdu);
874 } 868 }
875} 869}
876 870
@@ -890,7 +884,7 @@ static void blk_add_trace_split(void *ignore,
890} 884}
891 885
892/** 886/**
893 * blk_add_trace_remap - Add a trace for a remap operation 887 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
894 * @ignore: trace callback data parameter (not used) 888 * @ignore: trace callback data parameter (not used)
895 * @q: queue the io is for 889 * @q: queue the io is for
896 * @bio: the source bio 890 * @bio: the source bio
@@ -902,9 +896,9 @@ static void blk_add_trace_split(void *ignore,
902 * it spans a stripe (or similar). Add a trace for that action. 896 * it spans a stripe (or similar). Add a trace for that action.
903 * 897 *
904 **/ 898 **/
905static void blk_add_trace_remap(void *ignore, 899static void blk_add_trace_bio_remap(void *ignore,
906 struct request_queue *q, struct bio *bio, 900 struct request_queue *q, struct bio *bio,
907 dev_t dev, sector_t from) 901 dev_t dev, sector_t from)
908{ 902{
909 struct blk_trace *bt = q->blk_trace; 903 struct blk_trace *bt = q->blk_trace;
910 struct blk_io_trace_remap r; 904 struct blk_io_trace_remap r;
@@ -1013,13 +1007,11 @@ static void blk_register_tracepoints(void)
1013 WARN_ON(ret); 1007 WARN_ON(ret);
1014 ret = register_trace_block_plug(blk_add_trace_plug, NULL); 1008 ret = register_trace_block_plug(blk_add_trace_plug, NULL);
1015 WARN_ON(ret); 1009 WARN_ON(ret);
1016 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); 1010 ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
1017 WARN_ON(ret);
1018 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
1019 WARN_ON(ret); 1011 WARN_ON(ret);
1020 ret = register_trace_block_split(blk_add_trace_split, NULL); 1012 ret = register_trace_block_split(blk_add_trace_split, NULL);
1021 WARN_ON(ret); 1013 WARN_ON(ret);
1022 ret = register_trace_block_remap(blk_add_trace_remap, NULL); 1014 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1023 WARN_ON(ret); 1015 WARN_ON(ret);
1024 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1016 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1025 WARN_ON(ret); 1017 WARN_ON(ret);
@@ -1028,10 +1020,9 @@ static void blk_register_tracepoints(void)
1028static void blk_unregister_tracepoints(void) 1020static void blk_unregister_tracepoints(void)
1029{ 1021{
1030 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1022 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1031 unregister_trace_block_remap(blk_add_trace_remap, NULL); 1023 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1032 unregister_trace_block_split(blk_add_trace_split, NULL); 1024 unregister_trace_block_split(blk_add_trace_split, NULL);
1033 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); 1025 unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
1034 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
1035 unregister_trace_block_plug(blk_add_trace_plug, NULL); 1026 unregister_trace_block_plug(blk_add_trace_plug, NULL);
1036 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); 1027 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
1037 unregister_trace_block_getrq(blk_add_trace_getrq, NULL); 1028 unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
@@ -1652,10 +1643,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1652 struct block_device *bdev; 1643 struct block_device *bdev;
1653 ssize_t ret = -ENXIO; 1644 ssize_t ret = -ENXIO;
1654 1645
1655 lock_kernel();
1656 bdev = bdget(part_devt(p)); 1646 bdev = bdget(part_devt(p));
1657 if (bdev == NULL) 1647 if (bdev == NULL)
1658 goto out_unlock_kernel; 1648 goto out;
1659 1649
1660 q = blk_trace_get_queue(bdev); 1650 q = blk_trace_get_queue(bdev);
1661 if (q == NULL) 1651 if (q == NULL)
@@ -1683,8 +1673,7 @@ out_unlock_bdev:
1683 mutex_unlock(&bdev->bd_mutex); 1673 mutex_unlock(&bdev->bd_mutex);
1684out_bdput: 1674out_bdput:
1685 bdput(bdev); 1675 bdput(bdev);
1686out_unlock_kernel: 1676out:
1687 unlock_kernel();
1688 return ret; 1677 return ret;
1689} 1678}
1690 1679
@@ -1714,11 +1703,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1714 1703
1715 ret = -ENXIO; 1704 ret = -ENXIO;
1716 1705
1717 lock_kernel();
1718 p = dev_to_part(dev); 1706 p = dev_to_part(dev);
1719 bdev = bdget(part_devt(p)); 1707 bdev = bdget(part_devt(p));
1720 if (bdev == NULL) 1708 if (bdev == NULL)
1721 goto out_unlock_kernel; 1709 goto out;
1722 1710
1723 q = blk_trace_get_queue(bdev); 1711 q = blk_trace_get_queue(bdev);
1724 if (q == NULL) 1712 if (q == NULL)
@@ -1753,8 +1741,6 @@ out_unlock_bdev:
1753 mutex_unlock(&bdev->bd_mutex); 1741 mutex_unlock(&bdev->bd_mutex);
1754out_bdput: 1742out_bdput:
1755 bdput(bdev); 1743 bdput(bdev);
1756out_unlock_kernel:
1757 unlock_kernel();
1758out: 1744out:
1759 return ret ? ret : count; 1745 return ret ? ret : count;
1760} 1746}
@@ -1813,8 +1799,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1813 1799
1814 if (rw & REQ_RAHEAD) 1800 if (rw & REQ_RAHEAD)
1815 rwbs[i++] = 'A'; 1801 rwbs[i++] = 'A';
1816 if (rw & REQ_HARDBARRIER)
1817 rwbs[i++] = 'B';
1818 if (rw & REQ_SYNC) 1802 if (rw & REQ_SYNC)
1819 rwbs[i++] = 'S'; 1803 rwbs[i++] = 'S';
1820 if (rw & REQ_META) 1804 if (rw & REQ_META)
@@ -1825,21 +1809,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1825 rwbs[i] = '\0'; 1809 rwbs[i] = '\0';
1826} 1810}
1827 1811
1828void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1829{
1830 int rw = rq->cmd_flags & 0x03;
1831 int bytes;
1832
1833 if (rq->cmd_flags & REQ_DISCARD)
1834 rw |= REQ_DISCARD;
1835
1836 if (rq->cmd_flags & REQ_SECURE)
1837 rw |= REQ_SECURE;
1838
1839 bytes = blk_rq_bytes(rq);
1840
1841 blk_fill_rwbs(rwbs, rw, bytes);
1842}
1843
1844#endif /* CONFIG_EVENT_TRACING */ 1812#endif /* CONFIG_EVENT_TRACING */
1845 1813
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa7ece649fe1..908038f57440 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -39,20 +39,26 @@
39#include "trace_stat.h" 39#include "trace_stat.h"
40 40
41#define FTRACE_WARN_ON(cond) \ 41#define FTRACE_WARN_ON(cond) \
42 do { \ 42 ({ \
43 if (WARN_ON(cond)) \ 43 int ___r = cond; \
44 if (WARN_ON(___r)) \
44 ftrace_kill(); \ 45 ftrace_kill(); \
45 } while (0) 46 ___r; \
47 })
46 48
47#define FTRACE_WARN_ON_ONCE(cond) \ 49#define FTRACE_WARN_ON_ONCE(cond) \
48 do { \ 50 ({ \
49 if (WARN_ON_ONCE(cond)) \ 51 int ___r = cond; \
52 if (WARN_ON_ONCE(___r)) \
50 ftrace_kill(); \ 53 ftrace_kill(); \
51 } while (0) 54 ___r; \
55 })
52 56
53/* hash bits for specific function selection */ 57/* hash bits for specific function selection */
54#define FTRACE_HASH_BITS 7 58#define FTRACE_HASH_BITS 7
55#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) 59#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
60#define FTRACE_HASH_DEFAULT_BITS 10
61#define FTRACE_HASH_MAX_BITS 12
56 62
57/* ftrace_enabled is a method to turn ftrace on or off */ 63/* ftrace_enabled is a method to turn ftrace on or off */
58int ftrace_enabled __read_mostly; 64int ftrace_enabled __read_mostly;
@@ -81,28 +87,40 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
81 .func = ftrace_stub, 87 .func = ftrace_stub,
82}; 88};
83 89
84static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 90static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
91static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
85ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 92ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 93ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 94ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
95static struct ftrace_ops global_ops;
96
97static void
98ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
88 99
89/* 100/*
90 * Traverse the ftrace_list, invoking all entries. The reason that we 101 * Traverse the ftrace_global_list, invoking all entries. The reason that we
91 * can use rcu_dereference_raw() is that elements removed from this list 102 * can use rcu_dereference_raw() is that elements removed from this list
92 * are simply leaked, so there is no need to interact with a grace-period 103 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle 104 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list. 105 * concurrent insertions into the ftrace_global_list.
95 * 106 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations! 107 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */ 108 */
98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 109static void ftrace_global_list_func(unsigned long ip,
110 unsigned long parent_ip)
99{ 111{
100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/ 112 struct ftrace_ops *op;
101 113
114 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
115 return;
116
117 trace_recursion_set(TRACE_GLOBAL_BIT);
118 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
102 while (op != &ftrace_list_end) { 119 while (op != &ftrace_list_end) {
103 op->func(ip, parent_ip); 120 op->func(ip, parent_ip);
104 op = rcu_dereference_raw(op->next); /*see above*/ 121 op = rcu_dereference_raw(op->next); /*see above*/
105 }; 122 };
123 trace_recursion_clear(TRACE_GLOBAL_BIT);
106} 124}
107 125
108static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) 126static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
@@ -147,46 +165,69 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
147} 165}
148#endif 166#endif
149 167
150static int __register_ftrace_function(struct ftrace_ops *ops) 168static void update_global_ops(void)
151{ 169{
152 ops->next = ftrace_list; 170 ftrace_func_t func;
171
153 /* 172 /*
154 * We are entering ops into the ftrace_list but another 173 * If there's only one function registered, then call that
155 * CPU might be walking that list. We need to make sure 174 * function directly. Otherwise, we need to iterate over the
156 * the ops->next pointer is valid before another CPU sees 175 * registered callers.
157 * the ops pointer included into the ftrace_list.
158 */ 176 */
159 rcu_assign_pointer(ftrace_list, ops); 177 if (ftrace_global_list == &ftrace_list_end ||
178 ftrace_global_list->next == &ftrace_list_end)
179 func = ftrace_global_list->func;
180 else
181 func = ftrace_global_list_func;
160 182
161 if (ftrace_enabled) { 183 /* If we filter on pids, update to use the pid function */
162 ftrace_func_t func; 184 if (!list_empty(&ftrace_pids)) {
185 set_ftrace_pid_function(func);
186 func = ftrace_pid_func;
187 }
163 188
164 if (ops->next == &ftrace_list_end) 189 global_ops.func = func;
165 func = ops->func; 190}
166 else
167 func = ftrace_list_func;
168 191
169 if (!list_empty(&ftrace_pids)) { 192static void update_ftrace_function(void)
170 set_ftrace_pid_function(func); 193{
171 func = ftrace_pid_func; 194 ftrace_func_t func;
172 } 195
196 update_global_ops();
197
198 /*
199 * If we are at the end of the list and this ops is
200 * not dynamic, then have the mcount trampoline call
201 * the function directly
202 */
203 if (ftrace_ops_list == &ftrace_list_end ||
204 (ftrace_ops_list->next == &ftrace_list_end &&
205 !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
206 func = ftrace_ops_list->func;
207 else
208 func = ftrace_ops_list_func;
173 209
174 /*
175 * For one func, simply call it directly.
176 * For more than one func, call the chain.
177 */
178#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 210#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
179 ftrace_trace_function = func; 211 ftrace_trace_function = func;
180#else 212#else
181 __ftrace_trace_function = func; 213 __ftrace_trace_function = func;
182 ftrace_trace_function = ftrace_test_stop_func; 214 ftrace_trace_function = ftrace_test_stop_func;
183#endif 215#endif
184 } 216}
185 217
186 return 0; 218static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
219{
220 ops->next = *list;
221 /*
222 * We are entering ops into the list but another
223 * CPU might be walking that list. We need to make sure
224 * the ops->next pointer is valid before another CPU sees
225 * the ops pointer included into the list.
226 */
227 rcu_assign_pointer(*list, ops);
187} 228}
188 229
189static int __unregister_ftrace_function(struct ftrace_ops *ops) 230static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
190{ 231{
191 struct ftrace_ops **p; 232 struct ftrace_ops **p;
192 233
@@ -194,13 +235,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
194 * If we are removing the last function, then simply point 235 * If we are removing the last function, then simply point
195 * to the ftrace_stub. 236 * to the ftrace_stub.
196 */ 237 */
197 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 238 if (*list == ops && ops->next == &ftrace_list_end) {
198 ftrace_trace_function = ftrace_stub; 239 *list = &ftrace_list_end;
199 ftrace_list = &ftrace_list_end;
200 return 0; 240 return 0;
201 } 241 }
202 242
203 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) 243 for (p = list; *p != &ftrace_list_end; p = &(*p)->next)
204 if (*p == ops) 244 if (*p == ops)
205 break; 245 break;
206 246
@@ -208,53 +248,83 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
208 return -1; 248 return -1;
209 249
210 *p = (*p)->next; 250 *p = (*p)->next;
251 return 0;
252}
211 253
212 if (ftrace_enabled) { 254static int __register_ftrace_function(struct ftrace_ops *ops)
213 /* If we only have one func left, then call that directly */ 255{
214 if (ftrace_list->next == &ftrace_list_end) { 256 if (ftrace_disabled)
215 ftrace_func_t func = ftrace_list->func; 257 return -ENODEV;
216 258
217 if (!list_empty(&ftrace_pids)) { 259 if (FTRACE_WARN_ON(ops == &global_ops))
218 set_ftrace_pid_function(func); 260 return -EINVAL;
219 func = ftrace_pid_func; 261
220 } 262 if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
221#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 263 return -EBUSY;
222 ftrace_trace_function = func; 264
223#else 265 if (!core_kernel_data((unsigned long)ops))
224 __ftrace_trace_function = func; 266 ops->flags |= FTRACE_OPS_FL_DYNAMIC;
225#endif 267
226 } 268 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
227 } 269 int first = ftrace_global_list == &ftrace_list_end;
270 add_ftrace_ops(&ftrace_global_list, ops);
271 ops->flags |= FTRACE_OPS_FL_ENABLED;
272 if (first)
273 add_ftrace_ops(&ftrace_ops_list, &global_ops);
274 } else
275 add_ftrace_ops(&ftrace_ops_list, ops);
276
277 if (ftrace_enabled)
278 update_ftrace_function();
228 279
229 return 0; 280 return 0;
230} 281}
231 282
232static void ftrace_update_pid_func(void) 283static int __unregister_ftrace_function(struct ftrace_ops *ops)
233{ 284{
234 ftrace_func_t func; 285 int ret;
235 286
236 if (ftrace_trace_function == ftrace_stub) 287 if (ftrace_disabled)
237 return; 288 return -ENODEV;
238 289
239#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 290 if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
240 func = ftrace_trace_function; 291 return -EBUSY;
241#else
242 func = __ftrace_trace_function;
243#endif
244 292
245 if (!list_empty(&ftrace_pids)) { 293 if (FTRACE_WARN_ON(ops == &global_ops))
246 set_ftrace_pid_function(func); 294 return -EINVAL;
247 func = ftrace_pid_func;
248 } else {
249 if (func == ftrace_pid_func)
250 func = ftrace_pid_function;
251 }
252 295
253#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 296 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
254 ftrace_trace_function = func; 297 ret = remove_ftrace_ops(&ftrace_global_list, ops);
255#else 298 if (!ret && ftrace_global_list == &ftrace_list_end)
256 __ftrace_trace_function = func; 299 ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
257#endif 300 if (!ret)
301 ops->flags &= ~FTRACE_OPS_FL_ENABLED;
302 } else
303 ret = remove_ftrace_ops(&ftrace_ops_list, ops);
304
305 if (ret < 0)
306 return ret;
307
308 if (ftrace_enabled)
309 update_ftrace_function();
310
311 /*
312 * Dynamic ops may be freed, we must make sure that all
313 * callers are done before leaving this function.
314 */
315 if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
316 synchronize_sched();
317
318 return 0;
319}
320
321static void ftrace_update_pid_func(void)
322{
323 /* Only do something if we are tracing something */
324 if (ftrace_trace_function == ftrace_stub)
325 return;
326
327 update_ftrace_function();
258} 328}
259 329
260#ifdef CONFIG_FUNCTION_PROFILER 330#ifdef CONFIG_FUNCTION_PROFILER
@@ -800,6 +870,7 @@ static const struct file_operations ftrace_profile_fops = {
800 .open = tracing_open_generic, 870 .open = tracing_open_generic,
801 .read = ftrace_profile_read, 871 .read = ftrace_profile_read,
802 .write = ftrace_profile_write, 872 .write = ftrace_profile_write,
873 .llseek = default_llseek,
803}; 874};
804 875
805/* used to initialize the real stat files */ 876/* used to initialize the real stat files */
@@ -884,13 +955,38 @@ enum {
884 FTRACE_ENABLE_CALLS = (1 << 0), 955 FTRACE_ENABLE_CALLS = (1 << 0),
885 FTRACE_DISABLE_CALLS = (1 << 1), 956 FTRACE_DISABLE_CALLS = (1 << 1),
886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 957 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
887 FTRACE_ENABLE_MCOUNT = (1 << 3), 958 FTRACE_START_FUNC_RET = (1 << 3),
888 FTRACE_DISABLE_MCOUNT = (1 << 4), 959 FTRACE_STOP_FUNC_RET = (1 << 4),
889 FTRACE_START_FUNC_RET = (1 << 5), 960};
890 FTRACE_STOP_FUNC_RET = (1 << 6), 961struct ftrace_func_entry {
962 struct hlist_node hlist;
963 unsigned long ip;
964};
965
966struct ftrace_hash {
967 unsigned long size_bits;
968 struct hlist_head *buckets;
969 unsigned long count;
970 struct rcu_head rcu;
971};
972
973/*
974 * We make these constant because no one should touch them,
975 * but they are used as the default "empty hash", to avoid allocating
976 * it all the time. These are in a read only section such that if
977 * anyone does try to modify it, it will cause an exception.
978 */
979static const struct hlist_head empty_buckets[1];
980static const struct ftrace_hash empty_hash = {
981 .buckets = (struct hlist_head *)empty_buckets,
891}; 982};
983#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash)
892 984
893static int ftrace_filtered; 985static struct ftrace_ops global_ops = {
986 .func = ftrace_stub,
987 .notrace_hash = EMPTY_HASH,
988 .filter_hash = EMPTY_HASH,
989};
894 990
895static struct dyn_ftrace *ftrace_new_addrs; 991static struct dyn_ftrace *ftrace_new_addrs;
896 992
@@ -913,6 +1009,269 @@ static struct ftrace_page *ftrace_pages;
913 1009
914static struct dyn_ftrace *ftrace_free_records; 1010static struct dyn_ftrace *ftrace_free_records;
915 1011
1012static struct ftrace_func_entry *
1013ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
1014{
1015 unsigned long key;
1016 struct ftrace_func_entry *entry;
1017 struct hlist_head *hhd;
1018 struct hlist_node *n;
1019
1020 if (!hash->count)
1021 return NULL;
1022
1023 if (hash->size_bits > 0)
1024 key = hash_long(ip, hash->size_bits);
1025 else
1026 key = 0;
1027
1028 hhd = &hash->buckets[key];
1029
1030 hlist_for_each_entry_rcu(entry, n, hhd, hlist) {
1031 if (entry->ip == ip)
1032 return entry;
1033 }
1034 return NULL;
1035}
1036
1037static void __add_hash_entry(struct ftrace_hash *hash,
1038 struct ftrace_func_entry *entry)
1039{
1040 struct hlist_head *hhd;
1041 unsigned long key;
1042
1043 if (hash->size_bits)
1044 key = hash_long(entry->ip, hash->size_bits);
1045 else
1046 key = 0;
1047
1048 hhd = &hash->buckets[key];
1049 hlist_add_head(&entry->hlist, hhd);
1050 hash->count++;
1051}
1052
1053static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
1054{
1055 struct ftrace_func_entry *entry;
1056
1057 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
1058 if (!entry)
1059 return -ENOMEM;
1060
1061 entry->ip = ip;
1062 __add_hash_entry(hash, entry);
1063
1064 return 0;
1065}
1066
1067static void
1068free_hash_entry(struct ftrace_hash *hash,
1069 struct ftrace_func_entry *entry)
1070{
1071 hlist_del(&entry->hlist);
1072 kfree(entry);
1073 hash->count--;
1074}
1075
1076static void
1077remove_hash_entry(struct ftrace_hash *hash,
1078 struct ftrace_func_entry *entry)
1079{
1080 hlist_del(&entry->hlist);
1081 hash->count--;
1082}
1083
1084static void ftrace_hash_clear(struct ftrace_hash *hash)
1085{
1086 struct hlist_head *hhd;
1087 struct hlist_node *tp, *tn;
1088 struct ftrace_func_entry *entry;
1089 int size = 1 << hash->size_bits;
1090 int i;
1091
1092 if (!hash->count)
1093 return;
1094
1095 for (i = 0; i < size; i++) {
1096 hhd = &hash->buckets[i];
1097 hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist)
1098 free_hash_entry(hash, entry);
1099 }
1100 FTRACE_WARN_ON(hash->count);
1101}
1102
1103static void free_ftrace_hash(struct ftrace_hash *hash)
1104{
1105 if (!hash || hash == EMPTY_HASH)
1106 return;
1107 ftrace_hash_clear(hash);
1108 kfree(hash->buckets);
1109 kfree(hash);
1110}
1111
1112static void __free_ftrace_hash_rcu(struct rcu_head *rcu)
1113{
1114 struct ftrace_hash *hash;
1115
1116 hash = container_of(rcu, struct ftrace_hash, rcu);
1117 free_ftrace_hash(hash);
1118}
1119
1120static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
1121{
1122 if (!hash || hash == EMPTY_HASH)
1123 return;
1124 call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
1125}
1126
1127static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
1128{
1129 struct ftrace_hash *hash;
1130 int size;
1131
1132 hash = kzalloc(sizeof(*hash), GFP_KERNEL);
1133 if (!hash)
1134 return NULL;
1135
1136 size = 1 << size_bits;
1137 hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL);
1138
1139 if (!hash->buckets) {
1140 kfree(hash);
1141 return NULL;
1142 }
1143
1144 hash->size_bits = size_bits;
1145
1146 return hash;
1147}
1148
1149static struct ftrace_hash *
1150alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1151{
1152 struct ftrace_func_entry *entry;
1153 struct ftrace_hash *new_hash;
1154 struct hlist_node *tp;
1155 int size;
1156 int ret;
1157 int i;
1158
1159 new_hash = alloc_ftrace_hash(size_bits);
1160 if (!new_hash)
1161 return NULL;
1162
1163 /* Empty hash? */
1164 if (!hash || !hash->count)
1165 return new_hash;
1166
1167 size = 1 << hash->size_bits;
1168 for (i = 0; i < size; i++) {
1169 hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) {
1170 ret = add_hash_entry(new_hash, entry->ip);
1171 if (ret < 0)
1172 goto free_hash;
1173 }
1174 }
1175
1176 FTRACE_WARN_ON(new_hash->count != hash->count);
1177
1178 return new_hash;
1179
1180 free_hash:
1181 free_ftrace_hash(new_hash);
1182 return NULL;
1183}
1184
1185static int
1186ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1187{
1188 struct ftrace_func_entry *entry;
1189 struct hlist_node *tp, *tn;
1190 struct hlist_head *hhd;
1191 struct ftrace_hash *old_hash;
1192 struct ftrace_hash *new_hash;
1193 unsigned long key;
1194 int size = src->count;
1195 int bits = 0;
1196 int i;
1197
1198 /*
1199 * If the new source is empty, just free dst and assign it
1200 * the empty_hash.
1201 */
1202 if (!src->count) {
1203 free_ftrace_hash_rcu(*dst);
1204 rcu_assign_pointer(*dst, EMPTY_HASH);
1205 return 0;
1206 }
1207
1208 /*
1209 * Make the hash size about 1/2 the # found
1210 */
1211 for (size /= 2; size; size >>= 1)
1212 bits++;
1213
1214 /* Don't allocate too much */
1215 if (bits > FTRACE_HASH_MAX_BITS)
1216 bits = FTRACE_HASH_MAX_BITS;
1217
1218 new_hash = alloc_ftrace_hash(bits);
1219 if (!new_hash)
1220 return -ENOMEM;
1221
1222 size = 1 << src->size_bits;
1223 for (i = 0; i < size; i++) {
1224 hhd = &src->buckets[i];
1225 hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) {
1226 if (bits > 0)
1227 key = hash_long(entry->ip, bits);
1228 else
1229 key = 0;
1230 remove_hash_entry(src, entry);
1231 __add_hash_entry(new_hash, entry);
1232 }
1233 }
1234
1235 old_hash = *dst;
1236 rcu_assign_pointer(*dst, new_hash);
1237 free_ftrace_hash_rcu(old_hash);
1238
1239 return 0;
1240}
1241
1242/*
1243 * Test the hashes for this ops to see if we want to call
1244 * the ops->func or not.
1245 *
1246 * It's a match if the ip is in the ops->filter_hash or
1247 * the filter_hash does not exist or is empty,
1248 * AND
1249 * the ip is not in the ops->notrace_hash.
1250 *
1251 * This needs to be called with preemption disabled as
1252 * the hashes are freed with call_rcu_sched().
1253 */
1254static int
1255ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
1256{
1257 struct ftrace_hash *filter_hash;
1258 struct ftrace_hash *notrace_hash;
1259 int ret;
1260
1261 filter_hash = rcu_dereference_raw(ops->filter_hash);
1262 notrace_hash = rcu_dereference_raw(ops->notrace_hash);
1263
1264 if ((!filter_hash || !filter_hash->count ||
1265 ftrace_lookup_ip(filter_hash, ip)) &&
1266 (!notrace_hash || !notrace_hash->count ||
1267 !ftrace_lookup_ip(notrace_hash, ip)))
1268 ret = 1;
1269 else
1270 ret = 0;
1271
1272 return ret;
1273}
1274
916/* 1275/*
917 * This is a double for. Do not use 'break' to break out of the loop, 1276 * This is a double for. Do not use 'break' to break out of the loop,
918 * you must use a goto. 1277 * you must use a goto.
@@ -927,6 +1286,105 @@ static struct dyn_ftrace *ftrace_free_records;
927 } \ 1286 } \
928 } 1287 }
929 1288
1289static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1290 int filter_hash,
1291 bool inc)
1292{
1293 struct ftrace_hash *hash;
1294 struct ftrace_hash *other_hash;
1295 struct ftrace_page *pg;
1296 struct dyn_ftrace *rec;
1297 int count = 0;
1298 int all = 0;
1299
1300 /* Only update if the ops has been registered */
1301 if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
1302 return;
1303
1304 /*
1305 * In the filter_hash case:
1306 * If the count is zero, we update all records.
1307 * Otherwise we just update the items in the hash.
1308 *
1309 * In the notrace_hash case:
1310 * We enable the update in the hash.
1311 * As disabling notrace means enabling the tracing,
1312 * and enabling notrace means disabling, the inc variable
1313 * gets inversed.
1314 */
1315 if (filter_hash) {
1316 hash = ops->filter_hash;
1317 other_hash = ops->notrace_hash;
1318 if (!hash || !hash->count)
1319 all = 1;
1320 } else {
1321 inc = !inc;
1322 hash = ops->notrace_hash;
1323 other_hash = ops->filter_hash;
1324 /*
1325 * If the notrace hash has no items,
1326 * then there's nothing to do.
1327 */
1328 if (hash && !hash->count)
1329 return;
1330 }
1331
1332 do_for_each_ftrace_rec(pg, rec) {
1333 int in_other_hash = 0;
1334 int in_hash = 0;
1335 int match = 0;
1336
1337 if (all) {
1338 /*
1339 * Only the filter_hash affects all records.
1340 * Update if the record is not in the notrace hash.
1341 */
1342 if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
1343 match = 1;
1344 } else {
1345 in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
1346 in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
1347
1348 /*
1349 *
1350 */
1351 if (filter_hash && in_hash && !in_other_hash)
1352 match = 1;
1353 else if (!filter_hash && in_hash &&
1354 (in_other_hash || !other_hash->count))
1355 match = 1;
1356 }
1357 if (!match)
1358 continue;
1359
1360 if (inc) {
1361 rec->flags++;
1362 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
1363 return;
1364 } else {
1365 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
1366 return;
1367 rec->flags--;
1368 }
1369 count++;
1370 /* Shortcut, if we handled all records, we are done. */
1371 if (!all && count == hash->count)
1372 return;
1373 } while_for_each_ftrace_rec();
1374}
1375
1376static void ftrace_hash_rec_disable(struct ftrace_ops *ops,
1377 int filter_hash)
1378{
1379 __ftrace_hash_rec_update(ops, filter_hash, 0);
1380}
1381
1382static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
1383 int filter_hash)
1384{
1385 __ftrace_hash_rec_update(ops, filter_hash, 1);
1386}
1387
930static void ftrace_free_rec(struct dyn_ftrace *rec) 1388static void ftrace_free_rec(struct dyn_ftrace *rec)
931{ 1389{
932 rec->freelist = ftrace_free_records; 1390 rec->freelist = ftrace_free_records;
@@ -1048,18 +1506,18 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1048 ftrace_addr = (unsigned long)FTRACE_ADDR; 1506 ftrace_addr = (unsigned long)FTRACE_ADDR;
1049 1507
1050 /* 1508 /*
1051 * If this record is not to be traced or we want to disable it, 1509 * If we are enabling tracing:
1052 * then disable it.
1053 * 1510 *
1054 * If we want to enable it and filtering is off, then enable it. 1511 * If the record has a ref count, then we need to enable it
1512 * because someone is using it.
1055 * 1513 *
1056 * If we want to enable it and filtering is on, enable it only if 1514 * Otherwise we make sure its disabled.
1057 * it's filtered 1515 *
1516 * If we are disabling tracing, then disable all records that
1517 * are enabled.
1058 */ 1518 */
1059 if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) { 1519 if (enable && (rec->flags & ~FTRACE_FL_MASK))
1060 if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER)) 1520 flag = FTRACE_FL_ENABLED;
1061 flag = FTRACE_FL_ENABLED;
1062 }
1063 1521
1064 /* If the state of this record hasn't changed, then do nothing */ 1522 /* If the state of this record hasn't changed, then do nothing */
1065 if ((rec->flags & FTRACE_FL_ENABLED) == flag) 1523 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
@@ -1080,19 +1538,16 @@ static void ftrace_replace_code(int enable)
1080 struct ftrace_page *pg; 1538 struct ftrace_page *pg;
1081 int failed; 1539 int failed;
1082 1540
1541 if (unlikely(ftrace_disabled))
1542 return;
1543
1083 do_for_each_ftrace_rec(pg, rec) { 1544 do_for_each_ftrace_rec(pg, rec) {
1084 /* 1545 /* Skip over free records */
1085 * Skip over free records, records that have 1546 if (rec->flags & FTRACE_FL_FREE)
1086 * failed and not converted.
1087 */
1088 if (rec->flags & FTRACE_FL_FREE ||
1089 rec->flags & FTRACE_FL_FAILED ||
1090 !(rec->flags & FTRACE_FL_CONVERTED))
1091 continue; 1547 continue;
1092 1548
1093 failed = __ftrace_replace_code(rec, enable); 1549 failed = __ftrace_replace_code(rec, enable);
1094 if (failed) { 1550 if (failed) {
1095 rec->flags |= FTRACE_FL_FAILED;
1096 ftrace_bug(failed, rec->ip); 1551 ftrace_bug(failed, rec->ip);
1097 /* Stop processing */ 1552 /* Stop processing */
1098 return; 1553 return;
@@ -1108,10 +1563,12 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
1108 1563
1109 ip = rec->ip; 1564 ip = rec->ip;
1110 1565
1566 if (unlikely(ftrace_disabled))
1567 return 0;
1568
1111 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); 1569 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
1112 if (ret) { 1570 if (ret) {
1113 ftrace_bug(ret, ip); 1571 ftrace_bug(ret, ip);
1114 rec->flags |= FTRACE_FL_FAILED;
1115 return 0; 1572 return 0;
1116 } 1573 }
1117 return 1; 1574 return 1;
@@ -1172,6 +1629,7 @@ static void ftrace_run_update_code(int command)
1172 1629
1173static ftrace_func_t saved_ftrace_func; 1630static ftrace_func_t saved_ftrace_func;
1174static int ftrace_start_up; 1631static int ftrace_start_up;
1632static int global_start_up;
1175 1633
1176static void ftrace_startup_enable(int command) 1634static void ftrace_startup_enable(int command)
1177{ 1635{
@@ -1186,19 +1644,38 @@ static void ftrace_startup_enable(int command)
1186 ftrace_run_update_code(command); 1644 ftrace_run_update_code(command);
1187} 1645}
1188 1646
1189static void ftrace_startup(int command) 1647static int ftrace_startup(struct ftrace_ops *ops, int command)
1190{ 1648{
1649 bool hash_enable = true;
1650
1191 if (unlikely(ftrace_disabled)) 1651 if (unlikely(ftrace_disabled))
1192 return; 1652 return -ENODEV;
1193 1653
1194 ftrace_start_up++; 1654 ftrace_start_up++;
1195 command |= FTRACE_ENABLE_CALLS; 1655 command |= FTRACE_ENABLE_CALLS;
1196 1656
1657 /* ops marked global share the filter hashes */
1658 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
1659 ops = &global_ops;
1660 /* Don't update hash if global is already set */
1661 if (global_start_up)
1662 hash_enable = false;
1663 global_start_up++;
1664 }
1665
1666 ops->flags |= FTRACE_OPS_FL_ENABLED;
1667 if (hash_enable)
1668 ftrace_hash_rec_enable(ops, 1);
1669
1197 ftrace_startup_enable(command); 1670 ftrace_startup_enable(command);
1671
1672 return 0;
1198} 1673}
1199 1674
1200static void ftrace_shutdown(int command) 1675static void ftrace_shutdown(struct ftrace_ops *ops, int command)
1201{ 1676{
1677 bool hash_disable = true;
1678
1202 if (unlikely(ftrace_disabled)) 1679 if (unlikely(ftrace_disabled))
1203 return; 1680 return;
1204 1681
@@ -1210,6 +1687,23 @@ static void ftrace_shutdown(int command)
1210 */ 1687 */
1211 WARN_ON_ONCE(ftrace_start_up < 0); 1688 WARN_ON_ONCE(ftrace_start_up < 0);
1212 1689
1690 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
1691 ops = &global_ops;
1692 global_start_up--;
1693 WARN_ON_ONCE(global_start_up < 0);
1694 /* Don't update hash if global still has users */
1695 if (global_start_up) {
1696 WARN_ON_ONCE(!ftrace_start_up);
1697 hash_disable = false;
1698 }
1699 }
1700
1701 if (hash_disable)
1702 ftrace_hash_rec_disable(ops, 1);
1703
1704 if (ops != &global_ops || !global_start_up)
1705 ops->flags &= ~FTRACE_OPS_FL_ENABLED;
1706
1213 if (!ftrace_start_up) 1707 if (!ftrace_start_up)
1214 command |= FTRACE_DISABLE_CALLS; 1708 command |= FTRACE_DISABLE_CALLS;
1215 1709
@@ -1226,8 +1720,6 @@ static void ftrace_shutdown(int command)
1226 1720
1227static void ftrace_startup_sysctl(void) 1721static void ftrace_startup_sysctl(void)
1228{ 1722{
1229 int command = FTRACE_ENABLE_MCOUNT;
1230
1231 if (unlikely(ftrace_disabled)) 1723 if (unlikely(ftrace_disabled))
1232 return; 1724 return;
1233 1725
@@ -1235,23 +1727,17 @@ static void ftrace_startup_sysctl(void)
1235 saved_ftrace_func = NULL; 1727 saved_ftrace_func = NULL;
1236 /* ftrace_start_up is true if we want ftrace running */ 1728 /* ftrace_start_up is true if we want ftrace running */
1237 if (ftrace_start_up) 1729 if (ftrace_start_up)
1238 command |= FTRACE_ENABLE_CALLS; 1730 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1239
1240 ftrace_run_update_code(command);
1241} 1731}
1242 1732
1243static void ftrace_shutdown_sysctl(void) 1733static void ftrace_shutdown_sysctl(void)
1244{ 1734{
1245 int command = FTRACE_DISABLE_MCOUNT;
1246
1247 if (unlikely(ftrace_disabled)) 1735 if (unlikely(ftrace_disabled))
1248 return; 1736 return;
1249 1737
1250 /* ftrace_start_up is true if ftrace is running */ 1738 /* ftrace_start_up is true if ftrace is running */
1251 if (ftrace_start_up) 1739 if (ftrace_start_up)
1252 command |= FTRACE_DISABLE_CALLS; 1740 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1253
1254 ftrace_run_update_code(command);
1255} 1741}
1256 1742
1257static cycle_t ftrace_update_time; 1743static cycle_t ftrace_update_time;
@@ -1277,15 +1763,15 @@ static int ftrace_update_code(struct module *mod)
1277 p->flags = 0L; 1763 p->flags = 0L;
1278 1764
1279 /* 1765 /*
1280 * Do the initial record convertion from mcount jump 1766 * Do the initial record conversion from mcount jump
1281 * to the NOP instructions. 1767 * to the NOP instructions.
1282 */ 1768 */
1283 if (!ftrace_code_disable(mod, p)) { 1769 if (!ftrace_code_disable(mod, p)) {
1284 ftrace_free_rec(p); 1770 ftrace_free_rec(p);
1285 continue; 1771 /* Game over */
1772 break;
1286 } 1773 }
1287 1774
1288 p->flags |= FTRACE_FL_CONVERTED;
1289 ftrace_update_cnt++; 1775 ftrace_update_cnt++;
1290 1776
1291 /* 1777 /*
@@ -1360,32 +1846,39 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
1360enum { 1846enum {
1361 FTRACE_ITER_FILTER = (1 << 0), 1847 FTRACE_ITER_FILTER = (1 << 0),
1362 FTRACE_ITER_NOTRACE = (1 << 1), 1848 FTRACE_ITER_NOTRACE = (1 << 1),
1363 FTRACE_ITER_FAILURES = (1 << 2), 1849 FTRACE_ITER_PRINTALL = (1 << 2),
1364 FTRACE_ITER_PRINTALL = (1 << 3), 1850 FTRACE_ITER_HASH = (1 << 3),
1365 FTRACE_ITER_HASH = (1 << 4), 1851 FTRACE_ITER_ENABLED = (1 << 4),
1366}; 1852};
1367 1853
1368#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1854#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1369 1855
1370struct ftrace_iterator { 1856struct ftrace_iterator {
1371 struct ftrace_page *pg; 1857 loff_t pos;
1372 int hidx; 1858 loff_t func_pos;
1373 int idx; 1859 struct ftrace_page *pg;
1374 unsigned flags; 1860 struct dyn_ftrace *func;
1375 struct trace_parser parser; 1861 struct ftrace_func_probe *probe;
1862 struct trace_parser parser;
1863 struct ftrace_hash *hash;
1864 struct ftrace_ops *ops;
1865 int hidx;
1866 int idx;
1867 unsigned flags;
1376}; 1868};
1377 1869
1378static void * 1870static void *
1379t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1871t_hash_next(struct seq_file *m, loff_t *pos)
1380{ 1872{
1381 struct ftrace_iterator *iter = m->private; 1873 struct ftrace_iterator *iter = m->private;
1382 struct hlist_node *hnd = v; 1874 struct hlist_node *hnd = NULL;
1383 struct hlist_head *hhd; 1875 struct hlist_head *hhd;
1384 1876
1385 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1386
1387 (*pos)++; 1877 (*pos)++;
1878 iter->pos = *pos;
1388 1879
1880 if (iter->probe)
1881 hnd = &iter->probe->node;
1389 retry: 1882 retry:
1390 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1883 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1391 return NULL; 1884 return NULL;
@@ -1408,7 +1901,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1408 } 1901 }
1409 } 1902 }
1410 1903
1411 return hnd; 1904 if (WARN_ON_ONCE(!hnd))
1905 return NULL;
1906
1907 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1908
1909 return iter;
1412} 1910}
1413 1911
1414static void *t_hash_start(struct seq_file *m, loff_t *pos) 1912static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1417,26 +1915,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417 void *p = NULL; 1915 void *p = NULL;
1418 loff_t l; 1916 loff_t l;
1419 1917
1420 if (!(iter->flags & FTRACE_ITER_HASH)) 1918 if (iter->func_pos > *pos)
1421 *pos = 0; 1919 return NULL;
1422
1423 iter->flags |= FTRACE_ITER_HASH;
1424 1920
1425 iter->hidx = 0; 1921 iter->hidx = 0;
1426 for (l = 0; l <= *pos; ) { 1922 for (l = 0; l <= (*pos - iter->func_pos); ) {
1427 p = t_hash_next(m, p, &l); 1923 p = t_hash_next(m, &l);
1428 if (!p) 1924 if (!p)
1429 break; 1925 break;
1430 } 1926 }
1431 return p; 1927 if (!p)
1928 return NULL;
1929
1930 /* Only set this if we have an item */
1931 iter->flags |= FTRACE_ITER_HASH;
1932
1933 return iter;
1432} 1934}
1433 1935
1434static int t_hash_show(struct seq_file *m, void *v) 1936static int
1937t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1435{ 1938{
1436 struct ftrace_func_probe *rec; 1939 struct ftrace_func_probe *rec;
1437 struct hlist_node *hnd = v;
1438 1940
1439 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1941 rec = iter->probe;
1942 if (WARN_ON_ONCE(!rec))
1943 return -EIO;
1440 1944
1441 if (rec->ops->print) 1945 if (rec->ops->print)
1442 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1946 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1454,15 +1958,20 @@ static void *
1454t_next(struct seq_file *m, void *v, loff_t *pos) 1958t_next(struct seq_file *m, void *v, loff_t *pos)
1455{ 1959{
1456 struct ftrace_iterator *iter = m->private; 1960 struct ftrace_iterator *iter = m->private;
1961 struct ftrace_ops *ops = &global_ops;
1457 struct dyn_ftrace *rec = NULL; 1962 struct dyn_ftrace *rec = NULL;
1458 1963
1964 if (unlikely(ftrace_disabled))
1965 return NULL;
1966
1459 if (iter->flags & FTRACE_ITER_HASH) 1967 if (iter->flags & FTRACE_ITER_HASH)
1460 return t_hash_next(m, v, pos); 1968 return t_hash_next(m, pos);
1461 1969
1462 (*pos)++; 1970 (*pos)++;
1971 iter->pos = iter->func_pos = *pos;
1463 1972
1464 if (iter->flags & FTRACE_ITER_PRINTALL) 1973 if (iter->flags & FTRACE_ITER_PRINTALL)
1465 return NULL; 1974 return t_hash_start(m, pos);
1466 1975
1467 retry: 1976 retry:
1468 if (iter->idx >= iter->pg->index) { 1977 if (iter->idx >= iter->pg->index) {
@@ -1475,38 +1984,59 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1475 rec = &iter->pg->records[iter->idx++]; 1984 rec = &iter->pg->records[iter->idx++];
1476 if ((rec->flags & FTRACE_FL_FREE) || 1985 if ((rec->flags & FTRACE_FL_FREE) ||
1477 1986
1478 (!(iter->flags & FTRACE_ITER_FAILURES) &&
1479 (rec->flags & FTRACE_FL_FAILED)) ||
1480
1481 ((iter->flags & FTRACE_ITER_FAILURES) &&
1482 !(rec->flags & FTRACE_FL_FAILED)) ||
1483
1484 ((iter->flags & FTRACE_ITER_FILTER) && 1987 ((iter->flags & FTRACE_ITER_FILTER) &&
1485 !(rec->flags & FTRACE_FL_FILTER)) || 1988 !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
1486 1989
1487 ((iter->flags & FTRACE_ITER_NOTRACE) && 1990 ((iter->flags & FTRACE_ITER_NOTRACE) &&
1488 !(rec->flags & FTRACE_FL_NOTRACE))) { 1991 !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) ||
1992
1993 ((iter->flags & FTRACE_ITER_ENABLED) &&
1994 !(rec->flags & ~FTRACE_FL_MASK))) {
1995
1489 rec = NULL; 1996 rec = NULL;
1490 goto retry; 1997 goto retry;
1491 } 1998 }
1492 } 1999 }
1493 2000
1494 return rec; 2001 if (!rec)
2002 return t_hash_start(m, pos);
2003
2004 iter->func = rec;
2005
2006 return iter;
2007}
2008
2009static void reset_iter_read(struct ftrace_iterator *iter)
2010{
2011 iter->pos = 0;
2012 iter->func_pos = 0;
2013 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1495} 2014}
1496 2015
1497static void *t_start(struct seq_file *m, loff_t *pos) 2016static void *t_start(struct seq_file *m, loff_t *pos)
1498{ 2017{
1499 struct ftrace_iterator *iter = m->private; 2018 struct ftrace_iterator *iter = m->private;
2019 struct ftrace_ops *ops = &global_ops;
1500 void *p = NULL; 2020 void *p = NULL;
1501 loff_t l; 2021 loff_t l;
1502 2022
1503 mutex_lock(&ftrace_lock); 2023 mutex_lock(&ftrace_lock);
2024
2025 if (unlikely(ftrace_disabled))
2026 return NULL;
2027
2028 /*
2029 * If an lseek was done, then reset and start from beginning.
2030 */
2031 if (*pos < iter->pos)
2032 reset_iter_read(iter);
2033
1504 /* 2034 /*
1505 * For set_ftrace_filter reading, if we have the filter 2035 * For set_ftrace_filter reading, if we have the filter
1506 * off, we can short cut and just print out that all 2036 * off, we can short cut and just print out that all
1507 * functions are enabled. 2037 * functions are enabled.
1508 */ 2038 */
1509 if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) { 2039 if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
1510 if (*pos > 0) 2040 if (*pos > 0)
1511 return t_hash_start(m, pos); 2041 return t_hash_start(m, pos);
1512 iter->flags |= FTRACE_ITER_PRINTALL; 2042 iter->flags |= FTRACE_ITER_PRINTALL;
@@ -1518,6 +2048,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1518 if (iter->flags & FTRACE_ITER_HASH) 2048 if (iter->flags & FTRACE_ITER_HASH)
1519 return t_hash_start(m, pos); 2049 return t_hash_start(m, pos);
1520 2050
2051 /*
2052 * Unfortunately, we need to restart at ftrace_pages_start
2053 * every time we let go of the ftrace_mutex. This is because
2054 * those pointers can change without the lock.
2055 */
1521 iter->pg = ftrace_pages_start; 2056 iter->pg = ftrace_pages_start;
1522 iter->idx = 0; 2057 iter->idx = 0;
1523 for (l = 0; l <= *pos; ) { 2058 for (l = 0; l <= *pos; ) {
@@ -1526,10 +2061,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1526 break; 2061 break;
1527 } 2062 }
1528 2063
1529 if (!p && iter->flags & FTRACE_ITER_FILTER) 2064 if (!p) {
1530 return t_hash_start(m, pos); 2065 if (iter->flags & FTRACE_ITER_FILTER)
2066 return t_hash_start(m, pos);
1531 2067
1532 return p; 2068 return NULL;
2069 }
2070
2071 return iter;
1533} 2072}
1534 2073
1535static void t_stop(struct seq_file *m, void *p) 2074static void t_stop(struct seq_file *m, void *p)
@@ -1540,20 +2079,26 @@ static void t_stop(struct seq_file *m, void *p)
1540static int t_show(struct seq_file *m, void *v) 2079static int t_show(struct seq_file *m, void *v)
1541{ 2080{
1542 struct ftrace_iterator *iter = m->private; 2081 struct ftrace_iterator *iter = m->private;
1543 struct dyn_ftrace *rec = v; 2082 struct dyn_ftrace *rec;
1544 2083
1545 if (iter->flags & FTRACE_ITER_HASH) 2084 if (iter->flags & FTRACE_ITER_HASH)
1546 return t_hash_show(m, v); 2085 return t_hash_show(m, iter);
1547 2086
1548 if (iter->flags & FTRACE_ITER_PRINTALL) { 2087 if (iter->flags & FTRACE_ITER_PRINTALL) {
1549 seq_printf(m, "#### all functions enabled ####\n"); 2088 seq_printf(m, "#### all functions enabled ####\n");
1550 return 0; 2089 return 0;
1551 } 2090 }
1552 2091
2092 rec = iter->func;
2093
1553 if (!rec) 2094 if (!rec)
1554 return 0; 2095 return 0;
1555 2096
1556 seq_printf(m, "%ps\n", (void *)rec->ip); 2097 seq_printf(m, "%ps", (void *)rec->ip);
2098 if (iter->flags & FTRACE_ITER_ENABLED)
2099 seq_printf(m, " (%ld)",
2100 rec->flags & ~FTRACE_FL_MASK);
2101 seq_printf(m, "\n");
1557 2102
1558 return 0; 2103 return 0;
1559} 2104}
@@ -1593,44 +2138,46 @@ ftrace_avail_open(struct inode *inode, struct file *file)
1593} 2138}
1594 2139
1595static int 2140static int
1596ftrace_failures_open(struct inode *inode, struct file *file) 2141ftrace_enabled_open(struct inode *inode, struct file *file)
1597{ 2142{
1598 int ret;
1599 struct seq_file *m;
1600 struct ftrace_iterator *iter; 2143 struct ftrace_iterator *iter;
2144 int ret;
2145
2146 if (unlikely(ftrace_disabled))
2147 return -ENODEV;
2148
2149 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2150 if (!iter)
2151 return -ENOMEM;
2152
2153 iter->pg = ftrace_pages_start;
2154 iter->flags = FTRACE_ITER_ENABLED;
1601 2155
1602 ret = ftrace_avail_open(inode, file); 2156 ret = seq_open(file, &show_ftrace_seq_ops);
1603 if (!ret) { 2157 if (!ret) {
1604 m = (struct seq_file *)file->private_data; 2158 struct seq_file *m = file->private_data;
1605 iter = (struct ftrace_iterator *)m->private; 2159
1606 iter->flags = FTRACE_ITER_FAILURES; 2160 m->private = iter;
2161 } else {
2162 kfree(iter);
1607 } 2163 }
1608 2164
1609 return ret; 2165 return ret;
1610} 2166}
1611 2167
1612 2168static void ftrace_filter_reset(struct ftrace_hash *hash)
1613static void ftrace_filter_reset(int enable)
1614{ 2169{
1615 struct ftrace_page *pg;
1616 struct dyn_ftrace *rec;
1617 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1618
1619 mutex_lock(&ftrace_lock); 2170 mutex_lock(&ftrace_lock);
1620 if (enable) 2171 ftrace_hash_clear(hash);
1621 ftrace_filtered = 0;
1622 do_for_each_ftrace_rec(pg, rec) {
1623 if (rec->flags & FTRACE_FL_FAILED)
1624 continue;
1625 rec->flags &= ~type;
1626 } while_for_each_ftrace_rec();
1627 mutex_unlock(&ftrace_lock); 2172 mutex_unlock(&ftrace_lock);
1628} 2173}
1629 2174
1630static int 2175static int
1631ftrace_regex_open(struct inode *inode, struct file *file, int enable) 2176ftrace_regex_open(struct ftrace_ops *ops, int flag,
2177 struct inode *inode, struct file *file)
1632{ 2178{
1633 struct ftrace_iterator *iter; 2179 struct ftrace_iterator *iter;
2180 struct ftrace_hash *hash;
1634 int ret = 0; 2181 int ret = 0;
1635 2182
1636 if (unlikely(ftrace_disabled)) 2183 if (unlikely(ftrace_disabled))
@@ -1645,21 +2192,42 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1645 return -ENOMEM; 2192 return -ENOMEM;
1646 } 2193 }
1647 2194
2195 if (flag & FTRACE_ITER_NOTRACE)
2196 hash = ops->notrace_hash;
2197 else
2198 hash = ops->filter_hash;
2199
2200 iter->ops = ops;
2201 iter->flags = flag;
2202
2203 if (file->f_mode & FMODE_WRITE) {
2204 mutex_lock(&ftrace_lock);
2205 iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash);
2206 mutex_unlock(&ftrace_lock);
2207
2208 if (!iter->hash) {
2209 trace_parser_put(&iter->parser);
2210 kfree(iter);
2211 return -ENOMEM;
2212 }
2213 }
2214
1648 mutex_lock(&ftrace_regex_lock); 2215 mutex_lock(&ftrace_regex_lock);
2216
1649 if ((file->f_mode & FMODE_WRITE) && 2217 if ((file->f_mode & FMODE_WRITE) &&
1650 (file->f_flags & O_TRUNC)) 2218 (file->f_flags & O_TRUNC))
1651 ftrace_filter_reset(enable); 2219 ftrace_filter_reset(iter->hash);
1652 2220
1653 if (file->f_mode & FMODE_READ) { 2221 if (file->f_mode & FMODE_READ) {
1654 iter->pg = ftrace_pages_start; 2222 iter->pg = ftrace_pages_start;
1655 iter->flags = enable ? FTRACE_ITER_FILTER :
1656 FTRACE_ITER_NOTRACE;
1657 2223
1658 ret = seq_open(file, &show_ftrace_seq_ops); 2224 ret = seq_open(file, &show_ftrace_seq_ops);
1659 if (!ret) { 2225 if (!ret) {
1660 struct seq_file *m = file->private_data; 2226 struct seq_file *m = file->private_data;
1661 m->private = iter; 2227 m->private = iter;
1662 } else { 2228 } else {
2229 /* Failed */
2230 free_ftrace_hash(iter->hash);
1663 trace_parser_put(&iter->parser); 2231 trace_parser_put(&iter->parser);
1664 kfree(iter); 2232 kfree(iter);
1665 } 2233 }
@@ -1673,13 +2241,15 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1673static int 2241static int
1674ftrace_filter_open(struct inode *inode, struct file *file) 2242ftrace_filter_open(struct inode *inode, struct file *file)
1675{ 2243{
1676 return ftrace_regex_open(inode, file, 1); 2244 return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
2245 inode, file);
1677} 2246}
1678 2247
1679static int 2248static int
1680ftrace_notrace_open(struct inode *inode, struct file *file) 2249ftrace_notrace_open(struct inode *inode, struct file *file)
1681{ 2250{
1682 return ftrace_regex_open(inode, file, 0); 2251 return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE,
2252 inode, file);
1683} 2253}
1684 2254
1685static loff_t 2255static loff_t
@@ -1724,86 +2294,99 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1724} 2294}
1725 2295
1726static int 2296static int
1727ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type) 2297enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not)
2298{
2299 struct ftrace_func_entry *entry;
2300 int ret = 0;
2301
2302 entry = ftrace_lookup_ip(hash, rec->ip);
2303 if (not) {
2304 /* Do nothing if it doesn't exist */
2305 if (!entry)
2306 return 0;
2307
2308 free_hash_entry(hash, entry);
2309 } else {
2310 /* Do nothing if it exists */
2311 if (entry)
2312 return 0;
2313
2314 ret = add_hash_entry(hash, rec->ip);
2315 }
2316 return ret;
2317}
2318
2319static int
2320ftrace_match_record(struct dyn_ftrace *rec, char *mod,
2321 char *regex, int len, int type)
1728{ 2322{
1729 char str[KSYM_SYMBOL_LEN]; 2323 char str[KSYM_SYMBOL_LEN];
2324 char *modname;
2325
2326 kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
2327
2328 if (mod) {
2329 /* module lookup requires matching the module */
2330 if (!modname || strcmp(modname, mod))
2331 return 0;
2332
2333 /* blank search means to match all funcs in the mod */
2334 if (!len)
2335 return 1;
2336 }
1730 2337
1731 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1732 return ftrace_match(str, regex, len, type); 2338 return ftrace_match(str, regex, len, type);
1733} 2339}
1734 2340
1735static int ftrace_match_records(char *buff, int len, int enable) 2341static int
2342match_records(struct ftrace_hash *hash, char *buff,
2343 int len, char *mod, int not)
1736{ 2344{
1737 unsigned int search_len; 2345 unsigned search_len = 0;
1738 struct ftrace_page *pg; 2346 struct ftrace_page *pg;
1739 struct dyn_ftrace *rec; 2347 struct dyn_ftrace *rec;
1740 unsigned long flag; 2348 int type = MATCH_FULL;
1741 char *search; 2349 char *search = buff;
1742 int type;
1743 int not;
1744 int found = 0; 2350 int found = 0;
2351 int ret;
1745 2352
1746 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 2353 if (len) {
1747 type = filter_parse_regex(buff, len, &search, &not); 2354 type = filter_parse_regex(buff, len, &search, &not);
1748 2355 search_len = strlen(search);
1749 search_len = strlen(search); 2356 }
1750 2357
1751 mutex_lock(&ftrace_lock); 2358 mutex_lock(&ftrace_lock);
1752 do_for_each_ftrace_rec(pg, rec) {
1753 2359
1754 if (rec->flags & FTRACE_FL_FAILED) 2360 if (unlikely(ftrace_disabled))
1755 continue; 2361 goto out_unlock;
1756 2362
1757 if (ftrace_match_record(rec, search, search_len, type)) { 2363 do_for_each_ftrace_rec(pg, rec) {
1758 if (not) 2364
1759 rec->flags &= ~flag; 2365 if (ftrace_match_record(rec, mod, search, search_len, type)) {
1760 else 2366 ret = enter_record(hash, rec, not);
1761 rec->flags |= flag; 2367 if (ret < 0) {
2368 found = ret;
2369 goto out_unlock;
2370 }
1762 found = 1; 2371 found = 1;
1763 } 2372 }
1764 /*
1765 * Only enable filtering if we have a function that
1766 * is filtered on.
1767 */
1768 if (enable && (rec->flags & FTRACE_FL_FILTER))
1769 ftrace_filtered = 1;
1770 } while_for_each_ftrace_rec(); 2373 } while_for_each_ftrace_rec();
2374 out_unlock:
1771 mutex_unlock(&ftrace_lock); 2375 mutex_unlock(&ftrace_lock);
1772 2376
1773 return found; 2377 return found;
1774} 2378}
1775 2379
1776static int 2380static int
1777ftrace_match_module_record(struct dyn_ftrace *rec, char *mod, 2381ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)
1778 char *regex, int len, int type)
1779{ 2382{
1780 char str[KSYM_SYMBOL_LEN]; 2383 return match_records(hash, buff, len, NULL, 0);
1781 char *modname;
1782
1783 kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
1784
1785 if (!modname || strcmp(modname, mod))
1786 return 0;
1787
1788 /* blank search means to match all funcs in the mod */
1789 if (len)
1790 return ftrace_match(str, regex, len, type);
1791 else
1792 return 1;
1793} 2384}
1794 2385
1795static int ftrace_match_module_records(char *buff, char *mod, int enable) 2386static int
2387ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
1796{ 2388{
1797 unsigned search_len = 0;
1798 struct ftrace_page *pg;
1799 struct dyn_ftrace *rec;
1800 int type = MATCH_FULL;
1801 char *search = buff;
1802 unsigned long flag;
1803 int not = 0; 2389 int not = 0;
1804 int found = 0;
1805
1806 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1807 2390
1808 /* blank or '*' mean the same */ 2391 /* blank or '*' mean the same */
1809 if (strcmp(buff, "*") == 0) 2392 if (strcmp(buff, "*") == 0)
@@ -1815,32 +2398,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
1815 not = 1; 2398 not = 1;
1816 } 2399 }
1817 2400
1818 if (strlen(buff)) { 2401 return match_records(hash, buff, strlen(buff), mod, not);
1819 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1820 search_len = strlen(search);
1821 }
1822
1823 mutex_lock(&ftrace_lock);
1824 do_for_each_ftrace_rec(pg, rec) {
1825
1826 if (rec->flags & FTRACE_FL_FAILED)
1827 continue;
1828
1829 if (ftrace_match_module_record(rec, mod,
1830 search, search_len, type)) {
1831 if (not)
1832 rec->flags &= ~flag;
1833 else
1834 rec->flags |= flag;
1835 found = 1;
1836 }
1837 if (enable && (rec->flags & FTRACE_FL_FILTER))
1838 ftrace_filtered = 1;
1839
1840 } while_for_each_ftrace_rec();
1841 mutex_unlock(&ftrace_lock);
1842
1843 return found;
1844} 2402}
1845 2403
1846/* 2404/*
@@ -1851,7 +2409,10 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
1851static int 2409static int
1852ftrace_mod_callback(char *func, char *cmd, char *param, int enable) 2410ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1853{ 2411{
2412 struct ftrace_ops *ops = &global_ops;
2413 struct ftrace_hash *hash;
1854 char *mod; 2414 char *mod;
2415 int ret = -EINVAL;
1855 2416
1856 /* 2417 /*
1857 * cmd == 'mod' because we only registered this func 2418 * cmd == 'mod' because we only registered this func
@@ -1863,15 +2424,24 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1863 2424
1864 /* we must have a module name */ 2425 /* we must have a module name */
1865 if (!param) 2426 if (!param)
1866 return -EINVAL; 2427 return ret;
1867 2428
1868 mod = strsep(&param, ":"); 2429 mod = strsep(&param, ":");
1869 if (!strlen(mod)) 2430 if (!strlen(mod))
1870 return -EINVAL; 2431 return ret;
1871 2432
1872 if (ftrace_match_module_records(func, mod, enable)) 2433 if (enable)
1873 return 0; 2434 hash = ops->filter_hash;
1874 return -EINVAL; 2435 else
2436 hash = ops->notrace_hash;
2437
2438 ret = ftrace_match_module_records(hash, func, mod);
2439 if (!ret)
2440 ret = -EINVAL;
2441 if (ret < 0)
2442 return ret;
2443
2444 return 0;
1875} 2445}
1876 2446
1877static struct ftrace_func_command ftrace_mod_cmd = { 2447static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1922,6 +2492,7 @@ static int ftrace_probe_registered;
1922 2492
1923static void __enable_ftrace_function_probe(void) 2493static void __enable_ftrace_function_probe(void)
1924{ 2494{
2495 int ret;
1925 int i; 2496 int i;
1926 2497
1927 if (ftrace_probe_registered) 2498 if (ftrace_probe_registered)
@@ -1936,13 +2507,16 @@ static void __enable_ftrace_function_probe(void)
1936 if (i == FTRACE_FUNC_HASHSIZE) 2507 if (i == FTRACE_FUNC_HASHSIZE)
1937 return; 2508 return;
1938 2509
1939 __register_ftrace_function(&trace_probe_ops); 2510 ret = __register_ftrace_function(&trace_probe_ops);
1940 ftrace_startup(0); 2511 if (!ret)
2512 ret = ftrace_startup(&trace_probe_ops, 0);
2513
1941 ftrace_probe_registered = 1; 2514 ftrace_probe_registered = 1;
1942} 2515}
1943 2516
1944static void __disable_ftrace_function_probe(void) 2517static void __disable_ftrace_function_probe(void)
1945{ 2518{
2519 int ret;
1946 int i; 2520 int i;
1947 2521
1948 if (!ftrace_probe_registered) 2522 if (!ftrace_probe_registered)
@@ -1955,8 +2529,10 @@ static void __disable_ftrace_function_probe(void)
1955 } 2529 }
1956 2530
1957 /* no more funcs left */ 2531 /* no more funcs left */
1958 __unregister_ftrace_function(&trace_probe_ops); 2532 ret = __unregister_ftrace_function(&trace_probe_ops);
1959 ftrace_shutdown(0); 2533 if (!ret)
2534 ftrace_shutdown(&trace_probe_ops, 0);
2535
1960 ftrace_probe_registered = 0; 2536 ftrace_probe_registered = 0;
1961} 2537}
1962 2538
@@ -1992,12 +2568,13 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1992 return -EINVAL; 2568 return -EINVAL;
1993 2569
1994 mutex_lock(&ftrace_lock); 2570 mutex_lock(&ftrace_lock);
1995 do_for_each_ftrace_rec(pg, rec) {
1996 2571
1997 if (rec->flags & FTRACE_FL_FAILED) 2572 if (unlikely(ftrace_disabled))
1998 continue; 2573 goto out_unlock;
2574
2575 do_for_each_ftrace_rec(pg, rec) {
1999 2576
2000 if (!ftrace_match_record(rec, search, len, type)) 2577 if (!ftrace_match_record(rec, NULL, search, len, type))
2001 continue; 2578 continue;
2002 2579
2003 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 2580 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
@@ -2158,7 +2735,8 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd)
2158 return ret; 2735 return ret;
2159} 2736}
2160 2737
2161static int ftrace_process_regex(char *buff, int len, int enable) 2738static int ftrace_process_regex(struct ftrace_hash *hash,
2739 char *buff, int len, int enable)
2162{ 2740{
2163 char *func, *command, *next = buff; 2741 char *func, *command, *next = buff;
2164 struct ftrace_func_command *p; 2742 struct ftrace_func_command *p;
@@ -2167,9 +2745,12 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2167 func = strsep(&next, ":"); 2745 func = strsep(&next, ":");
2168 2746
2169 if (!next) { 2747 if (!next) {
2170 if (ftrace_match_records(func, len, enable)) 2748 ret = ftrace_match_records(hash, func, len);
2171 return 0; 2749 if (!ret)
2172 return ret; 2750 ret = -EINVAL;
2751 if (ret < 0)
2752 return ret;
2753 return 0;
2173 } 2754 }
2174 2755
2175 /* command found */ 2756 /* command found */
@@ -2202,6 +2783,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2202 2783
2203 mutex_lock(&ftrace_regex_lock); 2784 mutex_lock(&ftrace_regex_lock);
2204 2785
2786 ret = -ENODEV;
2787 if (unlikely(ftrace_disabled))
2788 goto out_unlock;
2789
2205 if (file->f_mode & FMODE_READ) { 2790 if (file->f_mode & FMODE_READ) {
2206 struct seq_file *m = file->private_data; 2791 struct seq_file *m = file->private_data;
2207 iter = m->private; 2792 iter = m->private;
@@ -2213,7 +2798,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2213 2798
2214 if (read >= 0 && trace_parser_loaded(parser) && 2799 if (read >= 0 && trace_parser_loaded(parser) &&
2215 !trace_parser_cont(parser)) { 2800 !trace_parser_cont(parser)) {
2216 ret = ftrace_process_regex(parser->buffer, 2801 ret = ftrace_process_regex(iter->hash, parser->buffer,
2217 parser->idx, enable); 2802 parser->idx, enable);
2218 trace_parser_clear(parser); 2803 trace_parser_clear(parser);
2219 if (ret) 2804 if (ret)
@@ -2241,22 +2826,49 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
2241 return ftrace_regex_write(file, ubuf, cnt, ppos, 0); 2826 return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
2242} 2827}
2243 2828
2244static void 2829static int
2245ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) 2830ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
2831 int reset, int enable)
2246{ 2832{
2833 struct ftrace_hash **orig_hash;
2834 struct ftrace_hash *hash;
2835 int ret;
2836
2837 /* All global ops uses the global ops filters */
2838 if (ops->flags & FTRACE_OPS_FL_GLOBAL)
2839 ops = &global_ops;
2840
2247 if (unlikely(ftrace_disabled)) 2841 if (unlikely(ftrace_disabled))
2248 return; 2842 return -ENODEV;
2843
2844 if (enable)
2845 orig_hash = &ops->filter_hash;
2846 else
2847 orig_hash = &ops->notrace_hash;
2848
2849 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
2850 if (!hash)
2851 return -ENOMEM;
2249 2852
2250 mutex_lock(&ftrace_regex_lock); 2853 mutex_lock(&ftrace_regex_lock);
2251 if (reset) 2854 if (reset)
2252 ftrace_filter_reset(enable); 2855 ftrace_filter_reset(hash);
2253 if (buf) 2856 if (buf)
2254 ftrace_match_records(buf, len, enable); 2857 ftrace_match_records(hash, buf, len);
2858
2859 mutex_lock(&ftrace_lock);
2860 ret = ftrace_hash_move(orig_hash, hash);
2861 mutex_unlock(&ftrace_lock);
2862
2255 mutex_unlock(&ftrace_regex_lock); 2863 mutex_unlock(&ftrace_regex_lock);
2864
2865 free_ftrace_hash(hash);
2866 return ret;
2256} 2867}
2257 2868
2258/** 2869/**
2259 * ftrace_set_filter - set a function to filter on in ftrace 2870 * ftrace_set_filter - set a function to filter on in ftrace
2871 * @ops - the ops to set the filter with
2260 * @buf - the string that holds the function filter text. 2872 * @buf - the string that holds the function filter text.
2261 * @len - the length of the string. 2873 * @len - the length of the string.
2262 * @reset - non zero to reset all filters before applying this filter. 2874 * @reset - non zero to reset all filters before applying this filter.
@@ -2264,13 +2876,16 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
2264 * Filters denote which functions should be enabled when tracing is enabled. 2876 * Filters denote which functions should be enabled when tracing is enabled.
2265 * If @buf is NULL and reset is set, all functions will be enabled for tracing. 2877 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
2266 */ 2878 */
2267void ftrace_set_filter(unsigned char *buf, int len, int reset) 2879void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
2880 int len, int reset)
2268{ 2881{
2269 ftrace_set_regex(buf, len, reset, 1); 2882 ftrace_set_regex(ops, buf, len, reset, 1);
2270} 2883}
2884EXPORT_SYMBOL_GPL(ftrace_set_filter);
2271 2885
2272/** 2886/**
2273 * ftrace_set_notrace - set a function to not trace in ftrace 2887 * ftrace_set_notrace - set a function to not trace in ftrace
2888 * @ops - the ops to set the notrace filter with
2274 * @buf - the string that holds the function notrace text. 2889 * @buf - the string that holds the function notrace text.
2275 * @len - the length of the string. 2890 * @len - the length of the string.
2276 * @reset - non zero to reset all filters before applying this filter. 2891 * @reset - non zero to reset all filters before applying this filter.
@@ -2279,10 +2894,44 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
2279 * is enabled. If @buf is NULL and reset is set, all functions will be enabled 2894 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
2280 * for tracing. 2895 * for tracing.
2281 */ 2896 */
2282void ftrace_set_notrace(unsigned char *buf, int len, int reset) 2897void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
2898 int len, int reset)
2283{ 2899{
2284 ftrace_set_regex(buf, len, reset, 0); 2900 ftrace_set_regex(ops, buf, len, reset, 0);
2285} 2901}
2902EXPORT_SYMBOL_GPL(ftrace_set_notrace);
2903/**
2904 * ftrace_set_filter - set a function to filter on in ftrace
2905 * @ops - the ops to set the filter with
2906 * @buf - the string that holds the function filter text.
2907 * @len - the length of the string.
2908 * @reset - non zero to reset all filters before applying this filter.
2909 *
2910 * Filters denote which functions should be enabled when tracing is enabled.
2911 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
2912 */
2913void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
2914{
2915 ftrace_set_regex(&global_ops, buf, len, reset, 1);
2916}
2917EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
2918
2919/**
2920 * ftrace_set_notrace - set a function to not trace in ftrace
2921 * @ops - the ops to set the notrace filter with
2922 * @buf - the string that holds the function notrace text.
2923 * @len - the length of the string.
2924 * @reset - non zero to reset all filters before applying this filter.
2925 *
2926 * Notrace Filters denote which functions should not be enabled when tracing
2927 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
2928 * for tracing.
2929 */
2930void ftrace_set_global_notrace(unsigned char *buf, int len, int reset)
2931{
2932 ftrace_set_regex(&global_ops, buf, len, reset, 0);
2933}
2934EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
2286 2935
2287/* 2936/*
2288 * command line interface to allow users to set filters on boot up. 2937 * command line interface to allow users to set filters on boot up.
@@ -2333,22 +2982,23 @@ static void __init set_ftrace_early_graph(char *buf)
2333} 2982}
2334#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2983#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2335 2984
2336static void __init set_ftrace_early_filter(char *buf, int enable) 2985static void __init
2986set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
2337{ 2987{
2338 char *func; 2988 char *func;
2339 2989
2340 while (buf) { 2990 while (buf) {
2341 func = strsep(&buf, ","); 2991 func = strsep(&buf, ",");
2342 ftrace_set_regex(func, strlen(func), 0, enable); 2992 ftrace_set_regex(ops, func, strlen(func), 0, enable);
2343 } 2993 }
2344} 2994}
2345 2995
2346static void __init set_ftrace_early_filters(void) 2996static void __init set_ftrace_early_filters(void)
2347{ 2997{
2348 if (ftrace_filter_buf[0]) 2998 if (ftrace_filter_buf[0])
2349 set_ftrace_early_filter(ftrace_filter_buf, 1); 2999 set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
2350 if (ftrace_notrace_buf[0]) 3000 if (ftrace_notrace_buf[0])
2351 set_ftrace_early_filter(ftrace_notrace_buf, 0); 3001 set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
2352#ifdef CONFIG_FUNCTION_GRAPH_TRACER 3002#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2353 if (ftrace_graph_buf[0]) 3003 if (ftrace_graph_buf[0])
2354 set_ftrace_early_graph(ftrace_graph_buf); 3004 set_ftrace_early_graph(ftrace_graph_buf);
@@ -2356,11 +3006,14 @@ static void __init set_ftrace_early_filters(void)
2356} 3006}
2357 3007
2358static int 3008static int
2359ftrace_regex_release(struct inode *inode, struct file *file, int enable) 3009ftrace_regex_release(struct inode *inode, struct file *file)
2360{ 3010{
2361 struct seq_file *m = (struct seq_file *)file->private_data; 3011 struct seq_file *m = (struct seq_file *)file->private_data;
2362 struct ftrace_iterator *iter; 3012 struct ftrace_iterator *iter;
3013 struct ftrace_hash **orig_hash;
2363 struct trace_parser *parser; 3014 struct trace_parser *parser;
3015 int filter_hash;
3016 int ret;
2364 3017
2365 mutex_lock(&ftrace_regex_lock); 3018 mutex_lock(&ftrace_regex_lock);
2366 if (file->f_mode & FMODE_READ) { 3019 if (file->f_mode & FMODE_READ) {
@@ -2373,33 +3026,41 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2373 parser = &iter->parser; 3026 parser = &iter->parser;
2374 if (trace_parser_loaded(parser)) { 3027 if (trace_parser_loaded(parser)) {
2375 parser->buffer[parser->idx] = 0; 3028 parser->buffer[parser->idx] = 0;
2376 ftrace_match_records(parser->buffer, parser->idx, enable); 3029 ftrace_match_records(iter->hash, parser->buffer, parser->idx);
2377 } 3030 }
2378 3031
2379 mutex_lock(&ftrace_lock);
2380 if (ftrace_start_up && ftrace_enabled)
2381 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
2382 mutex_unlock(&ftrace_lock);
2383
2384 trace_parser_put(parser); 3032 trace_parser_put(parser);
3033
3034 if (file->f_mode & FMODE_WRITE) {
3035 filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
3036
3037 if (filter_hash)
3038 orig_hash = &iter->ops->filter_hash;
3039 else
3040 orig_hash = &iter->ops->notrace_hash;
3041
3042 mutex_lock(&ftrace_lock);
3043 /*
3044 * Remove the current set, update the hash and add
3045 * them back.
3046 */
3047 ftrace_hash_rec_disable(iter->ops, filter_hash);
3048 ret = ftrace_hash_move(orig_hash, iter->hash);
3049 if (!ret) {
3050 ftrace_hash_rec_enable(iter->ops, filter_hash);
3051 if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
3052 && ftrace_enabled)
3053 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
3054 }
3055 mutex_unlock(&ftrace_lock);
3056 }
3057 free_ftrace_hash(iter->hash);
2385 kfree(iter); 3058 kfree(iter);
2386 3059
2387 mutex_unlock(&ftrace_regex_lock); 3060 mutex_unlock(&ftrace_regex_lock);
2388 return 0; 3061 return 0;
2389} 3062}
2390 3063
2391static int
2392ftrace_filter_release(struct inode *inode, struct file *file)
2393{
2394 return ftrace_regex_release(inode, file, 1);
2395}
2396
2397static int
2398ftrace_notrace_release(struct inode *inode, struct file *file)
2399{
2400 return ftrace_regex_release(inode, file, 0);
2401}
2402
2403static const struct file_operations ftrace_avail_fops = { 3064static const struct file_operations ftrace_avail_fops = {
2404 .open = ftrace_avail_open, 3065 .open = ftrace_avail_open,
2405 .read = seq_read, 3066 .read = seq_read,
@@ -2407,8 +3068,8 @@ static const struct file_operations ftrace_avail_fops = {
2407 .release = seq_release_private, 3068 .release = seq_release_private,
2408}; 3069};
2409 3070
2410static const struct file_operations ftrace_failures_fops = { 3071static const struct file_operations ftrace_enabled_fops = {
2411 .open = ftrace_failures_open, 3072 .open = ftrace_enabled_open,
2412 .read = seq_read, 3073 .read = seq_read,
2413 .llseek = seq_lseek, 3074 .llseek = seq_lseek,
2414 .release = seq_release_private, 3075 .release = seq_release_private,
@@ -2418,8 +3079,8 @@ static const struct file_operations ftrace_filter_fops = {
2418 .open = ftrace_filter_open, 3079 .open = ftrace_filter_open,
2419 .read = seq_read, 3080 .read = seq_read,
2420 .write = ftrace_filter_write, 3081 .write = ftrace_filter_write,
2421 .llseek = no_llseek, 3082 .llseek = ftrace_regex_lseek,
2422 .release = ftrace_filter_release, 3083 .release = ftrace_regex_release,
2423}; 3084};
2424 3085
2425static const struct file_operations ftrace_notrace_fops = { 3086static const struct file_operations ftrace_notrace_fops = {
@@ -2427,7 +3088,7 @@ static const struct file_operations ftrace_notrace_fops = {
2427 .read = seq_read, 3088 .read = seq_read,
2428 .write = ftrace_notrace_write, 3089 .write = ftrace_notrace_write,
2429 .llseek = ftrace_regex_lseek, 3090 .llseek = ftrace_regex_lseek,
2430 .release = ftrace_notrace_release, 3091 .release = ftrace_regex_release,
2431}; 3092};
2432 3093
2433#ifdef CONFIG_FUNCTION_GRAPH_TRACER 3094#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2536,9 +3197,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2536 bool exists; 3197 bool exists;
2537 int i; 3198 int i;
2538 3199
2539 if (ftrace_disabled)
2540 return -ENODEV;
2541
2542 /* decode regex */ 3200 /* decode regex */
2543 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 3201 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2544 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) 3202 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
@@ -2547,12 +3205,18 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2547 search_len = strlen(search); 3205 search_len = strlen(search);
2548 3206
2549 mutex_lock(&ftrace_lock); 3207 mutex_lock(&ftrace_lock);
3208
3209 if (unlikely(ftrace_disabled)) {
3210 mutex_unlock(&ftrace_lock);
3211 return -ENODEV;
3212 }
3213
2550 do_for_each_ftrace_rec(pg, rec) { 3214 do_for_each_ftrace_rec(pg, rec) {
2551 3215
2552 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 3216 if (rec->flags & FTRACE_FL_FREE)
2553 continue; 3217 continue;
2554 3218
2555 if (ftrace_match_record(rec, search, search_len, type)) { 3219 if (ftrace_match_record(rec, NULL, search, search_len, type)) {
2556 /* if it is in the array */ 3220 /* if it is in the array */
2557 exists = false; 3221 exists = false;
2558 for (i = 0; i < *idx; i++) { 3222 for (i = 0; i < *idx; i++) {
@@ -2632,6 +3296,7 @@ static const struct file_operations ftrace_graph_fops = {
2632 .read = seq_read, 3296 .read = seq_read,
2633 .write = ftrace_graph_write, 3297 .write = ftrace_graph_write,
2634 .release = ftrace_graph_release, 3298 .release = ftrace_graph_release,
3299 .llseek = seq_lseek,
2635}; 3300};
2636#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3301#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2637 3302
@@ -2641,8 +3306,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2641 trace_create_file("available_filter_functions", 0444, 3306 trace_create_file("available_filter_functions", 0444,
2642 d_tracer, NULL, &ftrace_avail_fops); 3307 d_tracer, NULL, &ftrace_avail_fops);
2643 3308
2644 trace_create_file("failures", 0444, 3309 trace_create_file("enabled_functions", 0444,
2645 d_tracer, NULL, &ftrace_failures_fops); 3310 d_tracer, NULL, &ftrace_enabled_fops);
2646 3311
2647 trace_create_file("set_ftrace_filter", 0644, d_tracer, 3312 trace_create_file("set_ftrace_filter", 0644, d_tracer,
2648 NULL, &ftrace_filter_fops); 3313 NULL, &ftrace_filter_fops);
@@ -2682,7 +3347,10 @@ static int ftrace_process_locs(struct module *mod,
2682 ftrace_record_ip(addr); 3347 ftrace_record_ip(addr);
2683 } 3348 }
2684 3349
2685 /* disable interrupts to prevent kstop machine */ 3350 /*
3351 * Disable interrupts to prevent interrupts from executing
3352 * code that is being modified.
3353 */
2686 local_irq_save(flags); 3354 local_irq_save(flags);
2687 ftrace_update_code(mod); 3355 ftrace_update_code(mod);
2688 local_irq_restore(flags); 3356 local_irq_restore(flags);
@@ -2697,10 +3365,11 @@ void ftrace_release_mod(struct module *mod)
2697 struct dyn_ftrace *rec; 3365 struct dyn_ftrace *rec;
2698 struct ftrace_page *pg; 3366 struct ftrace_page *pg;
2699 3367
3368 mutex_lock(&ftrace_lock);
3369
2700 if (ftrace_disabled) 3370 if (ftrace_disabled)
2701 return; 3371 goto out_unlock;
2702 3372
2703 mutex_lock(&ftrace_lock);
2704 do_for_each_ftrace_rec(pg, rec) { 3373 do_for_each_ftrace_rec(pg, rec) {
2705 if (within_module_core(rec->ip, mod)) { 3374 if (within_module_core(rec->ip, mod)) {
2706 /* 3375 /*
@@ -2711,6 +3380,7 @@ void ftrace_release_mod(struct module *mod)
2711 ftrace_free_rec(rec); 3380 ftrace_free_rec(rec);
2712 } 3381 }
2713 } while_for_each_ftrace_rec(); 3382 } while_for_each_ftrace_rec();
3383 out_unlock:
2714 mutex_unlock(&ftrace_lock); 3384 mutex_unlock(&ftrace_lock);
2715} 3385}
2716 3386
@@ -2797,6 +3467,10 @@ void __init ftrace_init(void)
2797 3467
2798#else 3468#else
2799 3469
3470static struct ftrace_ops global_ops = {
3471 .func = ftrace_stub,
3472};
3473
2800static int __init ftrace_nodyn_init(void) 3474static int __init ftrace_nodyn_init(void)
2801{ 3475{
2802 ftrace_enabled = 1; 3476 ftrace_enabled = 1;
@@ -2807,12 +3481,47 @@ device_initcall(ftrace_nodyn_init);
2807static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } 3481static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
2808static inline void ftrace_startup_enable(int command) { } 3482static inline void ftrace_startup_enable(int command) { }
2809/* Keep as macros so we do not need to define the commands */ 3483/* Keep as macros so we do not need to define the commands */
2810# define ftrace_startup(command) do { } while (0) 3484# define ftrace_startup(ops, command) \
2811# define ftrace_shutdown(command) do { } while (0) 3485 ({ \
3486 (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
3487 0; \
3488 })
3489# define ftrace_shutdown(ops, command) do { } while (0)
2812# define ftrace_startup_sysctl() do { } while (0) 3490# define ftrace_startup_sysctl() do { } while (0)
2813# define ftrace_shutdown_sysctl() do { } while (0) 3491# define ftrace_shutdown_sysctl() do { } while (0)
3492
3493static inline int
3494ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
3495{
3496 return 1;
3497}
3498
2814#endif /* CONFIG_DYNAMIC_FTRACE */ 3499#endif /* CONFIG_DYNAMIC_FTRACE */
2815 3500
3501static void
3502ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3503{
3504 struct ftrace_ops *op;
3505
3506 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
3507 return;
3508
3509 trace_recursion_set(TRACE_INTERNAL_BIT);
3510 /*
3511 * Some of the ops may be dynamically allocated,
3512 * they must be freed after a synchronize_sched().
3513 */
3514 preempt_disable_notrace();
3515 op = rcu_dereference_raw(ftrace_ops_list);
3516 while (op != &ftrace_list_end) {
3517 if (ftrace_ops_test(op, ip))
3518 op->func(ip, parent_ip);
3519 op = rcu_dereference_raw(op->next);
3520 };
3521 preempt_enable_notrace();
3522 trace_recursion_clear(TRACE_INTERNAL_BIT);
3523}
3524
2816static void clear_ftrace_swapper(void) 3525static void clear_ftrace_swapper(void)
2817{ 3526{
2818 struct task_struct *p; 3527 struct task_struct *p;
@@ -3105,19 +3814,23 @@ void ftrace_kill(void)
3105 */ 3814 */
3106int register_ftrace_function(struct ftrace_ops *ops) 3815int register_ftrace_function(struct ftrace_ops *ops)
3107{ 3816{
3108 int ret; 3817 int ret = -1;
3109
3110 if (unlikely(ftrace_disabled))
3111 return -1;
3112 3818
3113 mutex_lock(&ftrace_lock); 3819 mutex_lock(&ftrace_lock);
3114 3820
3821 if (unlikely(ftrace_disabled))
3822 goto out_unlock;
3823
3115 ret = __register_ftrace_function(ops); 3824 ret = __register_ftrace_function(ops);
3116 ftrace_startup(0); 3825 if (!ret)
3826 ret = ftrace_startup(ops, 0);
3117 3827
3828
3829 out_unlock:
3118 mutex_unlock(&ftrace_lock); 3830 mutex_unlock(&ftrace_lock);
3119 return ret; 3831 return ret;
3120} 3832}
3833EXPORT_SYMBOL_GPL(register_ftrace_function);
3121 3834
3122/** 3835/**
3123 * unregister_ftrace_function - unregister a function for profiling. 3836 * unregister_ftrace_function - unregister a function for profiling.
@@ -3131,25 +3844,27 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
3131 3844
3132 mutex_lock(&ftrace_lock); 3845 mutex_lock(&ftrace_lock);
3133 ret = __unregister_ftrace_function(ops); 3846 ret = __unregister_ftrace_function(ops);
3134 ftrace_shutdown(0); 3847 if (!ret)
3848 ftrace_shutdown(ops, 0);
3135 mutex_unlock(&ftrace_lock); 3849 mutex_unlock(&ftrace_lock);
3136 3850
3137 return ret; 3851 return ret;
3138} 3852}
3853EXPORT_SYMBOL_GPL(unregister_ftrace_function);
3139 3854
3140int 3855int
3141ftrace_enable_sysctl(struct ctl_table *table, int write, 3856ftrace_enable_sysctl(struct ctl_table *table, int write,
3142 void __user *buffer, size_t *lenp, 3857 void __user *buffer, size_t *lenp,
3143 loff_t *ppos) 3858 loff_t *ppos)
3144{ 3859{
3145 int ret; 3860 int ret = -ENODEV;
3146
3147 if (unlikely(ftrace_disabled))
3148 return -ENODEV;
3149 3861
3150 mutex_lock(&ftrace_lock); 3862 mutex_lock(&ftrace_lock);
3151 3863
3152 ret = proc_dointvec(table, write, buffer, lenp, ppos); 3864 if (unlikely(ftrace_disabled))
3865 goto out;
3866
3867 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3153 3868
3154 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) 3869 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3155 goto out; 3870 goto out;
@@ -3161,11 +3876,11 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3161 ftrace_startup_sysctl(); 3876 ftrace_startup_sysctl();
3162 3877
3163 /* we are starting ftrace again */ 3878 /* we are starting ftrace again */
3164 if (ftrace_list != &ftrace_list_end) { 3879 if (ftrace_ops_list != &ftrace_list_end) {
3165 if (ftrace_list->next == &ftrace_list_end) 3880 if (ftrace_ops_list->next == &ftrace_list_end)
3166 ftrace_trace_function = ftrace_list->func; 3881 ftrace_trace_function = ftrace_ops_list->func;
3167 else 3882 else
3168 ftrace_trace_function = ftrace_list_func; 3883 ftrace_trace_function = ftrace_ops_list_func;
3169 } 3884 }
3170 3885
3171 } else { 3886 } else {
@@ -3289,7 +4004,7 @@ static int start_graph_tracing(void)
3289 /* The cpu_boot init_task->ret_stack will never be freed */ 4004 /* The cpu_boot init_task->ret_stack will never be freed */
3290 for_each_online_cpu(cpu) { 4005 for_each_online_cpu(cpu) {
3291 if (!idle_task(cpu)->ret_stack) 4006 if (!idle_task(cpu)->ret_stack)
3292 ftrace_graph_init_task(idle_task(cpu)); 4007 ftrace_graph_init_idle_task(idle_task(cpu), cpu);
3293 } 4008 }
3294 4009
3295 do { 4010 do {
@@ -3354,7 +4069,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
3354 ftrace_graph_return = retfunc; 4069 ftrace_graph_return = retfunc;
3355 ftrace_graph_entry = entryfunc; 4070 ftrace_graph_entry = entryfunc;
3356 4071
3357 ftrace_startup(FTRACE_START_FUNC_RET); 4072 ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
3358 4073
3359out: 4074out:
3360 mutex_unlock(&ftrace_lock); 4075 mutex_unlock(&ftrace_lock);
@@ -3371,7 +4086,7 @@ void unregister_ftrace_graph(void)
3371 ftrace_graph_active--; 4086 ftrace_graph_active--;
3372 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 4087 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
3373 ftrace_graph_entry = ftrace_graph_entry_stub; 4088 ftrace_graph_entry = ftrace_graph_entry_stub;
3374 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 4089 ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
3375 unregister_pm_notifier(&ftrace_suspend_notifier); 4090 unregister_pm_notifier(&ftrace_suspend_notifier);
3376 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); 4091 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3377 4092
@@ -3379,6 +4094,49 @@ void unregister_ftrace_graph(void)
3379 mutex_unlock(&ftrace_lock); 4094 mutex_unlock(&ftrace_lock);
3380} 4095}
3381 4096
4097static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
4098
4099static void
4100graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
4101{
4102 atomic_set(&t->tracing_graph_pause, 0);
4103 atomic_set(&t->trace_overrun, 0);
4104 t->ftrace_timestamp = 0;
4105 /* make curr_ret_stack visible before we add the ret_stack */
4106 smp_wmb();
4107 t->ret_stack = ret_stack;
4108}
4109
4110/*
4111 * Allocate a return stack for the idle task. May be the first
4112 * time through, or it may be done by CPU hotplug online.
4113 */
4114void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
4115{
4116 t->curr_ret_stack = -1;
4117 /*
4118 * The idle task has no parent, it either has its own
4119 * stack or no stack at all.
4120 */
4121 if (t->ret_stack)
4122 WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
4123
4124 if (ftrace_graph_active) {
4125 struct ftrace_ret_stack *ret_stack;
4126
4127 ret_stack = per_cpu(idle_ret_stack, cpu);
4128 if (!ret_stack) {
4129 ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
4130 * sizeof(struct ftrace_ret_stack),
4131 GFP_KERNEL);
4132 if (!ret_stack)
4133 return;
4134 per_cpu(idle_ret_stack, cpu) = ret_stack;
4135 }
4136 graph_init_task(t, ret_stack);
4137 }
4138}
4139
3382/* Allocate a return stack for newly created task */ 4140/* Allocate a return stack for newly created task */
3383void ftrace_graph_init_task(struct task_struct *t) 4141void ftrace_graph_init_task(struct task_struct *t)
3384{ 4142{
@@ -3394,12 +4152,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3394 GFP_KERNEL); 4152 GFP_KERNEL);
3395 if (!ret_stack) 4153 if (!ret_stack)
3396 return; 4154 return;
3397 atomic_set(&t->tracing_graph_pause, 0); 4155 graph_init_task(t, ret_stack);
3398 atomic_set(&t->trace_overrun, 0);
3399 t->ftrace_timestamp = 0;
3400 /* make curr_ret_stack visable before we add the ret_stack */
3401 smp_wmb();
3402 t->ret_stack = ret_stack;
3403 } 4156 }
3404} 4157}
3405 4158
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index a22582a06161..f55fcf61b223 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -13,5 +13,8 @@
13#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
14#include <trace/events/power.h> 14#include <trace/events/power.h>
15 15
16EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 16#ifdef EVENT_POWER_TRACING_DEPRECATED
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18#endif
19EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
17 20
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bca96377fd4e..b0c7aa407943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5,7 +5,6 @@
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
9#include <linux/spinlock.h> 8#include <linux/spinlock.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/uaccess.h> 10#include <linux/uaccess.h>
@@ -224,6 +223,9 @@ enum {
224 RB_LEN_TIME_STAMP = 16, 223 RB_LEN_TIME_STAMP = 16,
225}; 224};
226 225
226#define skip_time_extend(event) \
227 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
228
227static inline int rb_null_event(struct ring_buffer_event *event) 229static inline int rb_null_event(struct ring_buffer_event *event)
228{ 230{
229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 231 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +250,12 @@ rb_event_data_length(struct ring_buffer_event *event)
248 return length + RB_EVNT_HDR_SIZE; 250 return length + RB_EVNT_HDR_SIZE;
249} 251}
250 252
251/* inline for ring buffer fast paths */ 253/*
252static unsigned 254 * Return the length of the given event. Will return
255 * the length of the time extend if the event is a
256 * time extend.
257 */
258static inline unsigned
253rb_event_length(struct ring_buffer_event *event) 259rb_event_length(struct ring_buffer_event *event)
254{ 260{
255 switch (event->type_len) { 261 switch (event->type_len) {
@@ -274,13 +280,41 @@ rb_event_length(struct ring_buffer_event *event)
274 return 0; 280 return 0;
275} 281}
276 282
283/*
284 * Return total length of time extend and data,
285 * or just the event length for all other events.
286 */
287static inline unsigned
288rb_event_ts_length(struct ring_buffer_event *event)
289{
290 unsigned len = 0;
291
292 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
293 /* time extends include the data event after it */
294 len = RB_LEN_TIME_EXTEND;
295 event = skip_time_extend(event);
296 }
297 return len + rb_event_length(event);
298}
299
277/** 300/**
278 * ring_buffer_event_length - return the length of the event 301 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of 302 * @event: the event to get the length of
303 *
304 * Returns the size of the data load of a data event.
305 * If the event is something other than a data event, it
306 * returns the size of the event itself. With the exception
307 * of a TIME EXTEND, where it still returns the size of the
308 * data load of the data event after it.
280 */ 309 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event) 310unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{ 311{
283 unsigned length = rb_event_length(event); 312 unsigned length;
313
314 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
315 event = skip_time_extend(event);
316
317 length = rb_event_length(event);
284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 318 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
285 return length; 319 return length;
286 length -= RB_EVNT_HDR_SIZE; 320 length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +328,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
294static void * 328static void *
295rb_event_data(struct ring_buffer_event *event) 329rb_event_data(struct ring_buffer_event *event)
296{ 330{
331 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
332 event = skip_time_extend(event);
297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 333 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
298 /* If length is in len field, then array[0] has the data */ 334 /* If length is in len field, then array[0] has the data */
299 if (event->type_len) 335 if (event->type_len)
@@ -404,9 +440,6 @@ static inline int test_time_stamp(u64 delta)
404/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 440/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
405#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 441#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
406 442
407/* Max number of timestamps that can fit on a page */
408#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
409
410int ring_buffer_print_page_header(struct trace_seq *s) 443int ring_buffer_print_page_header(struct trace_seq *s)
411{ 444{
412 struct buffer_data_page field; 445 struct buffer_data_page field;
@@ -635,7 +668,7 @@ static struct list_head *rb_list_head(struct list_head *list)
635 * the reader page). But if the next page is a header page, 668 * the reader page). But if the next page is a header page,
636 * its flags will be non zero. 669 * its flags will be non zero.
637 */ 670 */
638static int inline 671static inline int
639rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, 672rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
640 struct buffer_page *page, struct list_head *list) 673 struct buffer_page *page, struct list_head *list)
641{ 674{
@@ -1395,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1395} 1428}
1396EXPORT_SYMBOL_GPL(ring_buffer_resize); 1429EXPORT_SYMBOL_GPL(ring_buffer_resize);
1397 1430
1431void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1432{
1433 mutex_lock(&buffer->mutex);
1434 if (val)
1435 buffer->flags |= RB_FL_OVERWRITE;
1436 else
1437 buffer->flags &= ~RB_FL_OVERWRITE;
1438 mutex_unlock(&buffer->mutex);
1439}
1440EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1441
1398static inline void * 1442static inline void *
1399__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 1443__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1400{ 1444{
@@ -1434,7 +1478,7 @@ static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1434 return local_read(&bpage->entries) & RB_WRITE_MASK; 1478 return local_read(&bpage->entries) & RB_WRITE_MASK;
1435} 1479}
1436 1480
1437/* Size is determined by what has been commited */ 1481/* Size is determined by what has been committed */
1438static inline unsigned rb_page_size(struct buffer_page *bpage) 1482static inline unsigned rb_page_size(struct buffer_page *bpage)
1439{ 1483{
1440 return rb_page_commit(bpage); 1484 return rb_page_commit(bpage);
@@ -1546,6 +1590,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1546 iter->head = 0; 1590 iter->head = 0;
1547} 1591}
1548 1592
1593/* Slow path, do not inline */
1594static noinline struct ring_buffer_event *
1595rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1596{
1597 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1598
1599 /* Not the first event on the page? */
1600 if (rb_event_index(event)) {
1601 event->time_delta = delta & TS_MASK;
1602 event->array[0] = delta >> TS_SHIFT;
1603 } else {
1604 /* nope, just zero it */
1605 event->time_delta = 0;
1606 event->array[0] = 0;
1607 }
1608
1609 return skip_time_extend(event);
1610}
1611
1549/** 1612/**
1550 * ring_buffer_update_event - update event type and data 1613 * ring_buffer_update_event - update event type and data
1551 * @event: the even to update 1614 * @event: the even to update
@@ -1558,28 +1621,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1558 * data field. 1621 * data field.
1559 */ 1622 */
1560static void 1623static void
1561rb_update_event(struct ring_buffer_event *event, 1624rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1562 unsigned type, unsigned length) 1625 struct ring_buffer_event *event, unsigned length,
1626 int add_timestamp, u64 delta)
1563{ 1627{
1564 event->type_len = type; 1628 /* Only a commit updates the timestamp */
1565 1629 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1566 switch (type) { 1630 delta = 0;
1567
1568 case RINGBUF_TYPE_PADDING:
1569 case RINGBUF_TYPE_TIME_EXTEND:
1570 case RINGBUF_TYPE_TIME_STAMP:
1571 break;
1572 1631
1573 case 0: 1632 /*
1574 length -= RB_EVNT_HDR_SIZE; 1633 * If we need to add a timestamp, then we
1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) 1634 * add it to the start of the resevered space.
1576 event->array[0] = length; 1635 */
1577 else 1636 if (unlikely(add_timestamp)) {
1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1637 event = rb_add_time_stamp(event, delta);
1579 break; 1638 length -= RB_LEN_TIME_EXTEND;
1580 default: 1639 delta = 0;
1581 BUG();
1582 } 1640 }
1641
1642 event->time_delta = delta;
1643 length -= RB_EVNT_HDR_SIZE;
1644 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1645 event->type_len = 0;
1646 event->array[0] = length;
1647 } else
1648 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1583} 1649}
1584 1650
1585/* 1651/*
@@ -1823,10 +1889,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1823 local_sub(length, &tail_page->write); 1889 local_sub(length, &tail_page->write);
1824} 1890}
1825 1891
1826static struct ring_buffer_event * 1892/*
1893 * This is the slow path, force gcc not to inline it.
1894 */
1895static noinline struct ring_buffer_event *
1827rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1896rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1828 unsigned long length, unsigned long tail, 1897 unsigned long length, unsigned long tail,
1829 struct buffer_page *tail_page, u64 *ts) 1898 struct buffer_page *tail_page, u64 ts)
1830{ 1899{
1831 struct buffer_page *commit_page = cpu_buffer->commit_page; 1900 struct buffer_page *commit_page = cpu_buffer->commit_page;
1832 struct ring_buffer *buffer = cpu_buffer->buffer; 1901 struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1978,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1909 * Nested commits always have zero deltas, so 1978 * Nested commits always have zero deltas, so
1910 * just reread the time stamp 1979 * just reread the time stamp
1911 */ 1980 */
1912 *ts = rb_time_stamp(buffer); 1981 ts = rb_time_stamp(buffer);
1913 next_page->page->time_stamp = *ts; 1982 next_page->page->time_stamp = ts;
1914 } 1983 }
1915 1984
1916 out_again: 1985 out_again:
@@ -1929,12 +1998,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1929 1998
1930static struct ring_buffer_event * 1999static struct ring_buffer_event *
1931__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 2000__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1932 unsigned type, unsigned long length, u64 *ts) 2001 unsigned long length, u64 ts,
2002 u64 delta, int add_timestamp)
1933{ 2003{
1934 struct buffer_page *tail_page; 2004 struct buffer_page *tail_page;
1935 struct ring_buffer_event *event; 2005 struct ring_buffer_event *event;
1936 unsigned long tail, write; 2006 unsigned long tail, write;
1937 2007
2008 /*
2009 * If the time delta since the last event is too big to
2010 * hold in the time field of the event, then we append a
2011 * TIME EXTEND event ahead of the data event.
2012 */
2013 if (unlikely(add_timestamp))
2014 length += RB_LEN_TIME_EXTEND;
2015
1938 tail_page = cpu_buffer->tail_page; 2016 tail_page = cpu_buffer->tail_page;
1939 write = local_add_return(length, &tail_page->write); 2017 write = local_add_return(length, &tail_page->write);
1940 2018
@@ -1943,7 +2021,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1943 tail = write - length; 2021 tail = write - length;
1944 2022
1945 /* See if we shot pass the end of this buffer page */ 2023 /* See if we shot pass the end of this buffer page */
1946 if (write > BUF_PAGE_SIZE) 2024 if (unlikely(write > BUF_PAGE_SIZE))
1947 return rb_move_tail(cpu_buffer, length, tail, 2025 return rb_move_tail(cpu_buffer, length, tail,
1948 tail_page, ts); 2026 tail_page, ts);
1949 2027
@@ -1951,18 +2029,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1951 2029
1952 event = __rb_page_index(tail_page, tail); 2030 event = __rb_page_index(tail_page, tail);
1953 kmemcheck_annotate_bitfield(event, bitfield); 2031 kmemcheck_annotate_bitfield(event, bitfield);
1954 rb_update_event(event, type, length); 2032 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1955 2033
1956 /* The passed in type is zero for DATA */ 2034 local_inc(&tail_page->entries);
1957 if (likely(!type))
1958 local_inc(&tail_page->entries);
1959 2035
1960 /* 2036 /*
1961 * If this is the first commit on the page, then update 2037 * If this is the first commit on the page, then update
1962 * its timestamp. 2038 * its timestamp.
1963 */ 2039 */
1964 if (!tail) 2040 if (!tail)
1965 tail_page->page->time_stamp = *ts; 2041 tail_page->page->time_stamp = ts;
1966 2042
1967 return event; 2043 return event;
1968} 2044}
@@ -1977,7 +2053,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1977 unsigned long addr; 2053 unsigned long addr;
1978 2054
1979 new_index = rb_event_index(event); 2055 new_index = rb_event_index(event);
1980 old_index = new_index + rb_event_length(event); 2056 old_index = new_index + rb_event_ts_length(event);
1981 addr = (unsigned long)event; 2057 addr = (unsigned long)event;
1982 addr &= PAGE_MASK; 2058 addr &= PAGE_MASK;
1983 2059
@@ -2003,76 +2079,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2003 return 0; 2079 return 0;
2004} 2080}
2005 2081
2006static int
2007rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2008 u64 *ts, u64 *delta)
2009{
2010 struct ring_buffer_event *event;
2011 int ret;
2012
2013 WARN_ONCE(*delta > (1ULL << 59),
2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2015 (unsigned long long)*delta,
2016 (unsigned long long)*ts,
2017 (unsigned long long)cpu_buffer->write_stamp);
2018
2019 /*
2020 * The delta is too big, we to add a
2021 * new timestamp.
2022 */
2023 event = __rb_reserve_next(cpu_buffer,
2024 RINGBUF_TYPE_TIME_EXTEND,
2025 RB_LEN_TIME_EXTEND,
2026 ts);
2027 if (!event)
2028 return -EBUSY;
2029
2030 if (PTR_ERR(event) == -EAGAIN)
2031 return -EAGAIN;
2032
2033 /* Only a commited time event can update the write stamp */
2034 if (rb_event_is_commit(cpu_buffer, event)) {
2035 /*
2036 * If this is the first on the page, then it was
2037 * updated with the page itself. Try to discard it
2038 * and if we can't just make it zero.
2039 */
2040 if (rb_event_index(event)) {
2041 event->time_delta = *delta & TS_MASK;
2042 event->array[0] = *delta >> TS_SHIFT;
2043 } else {
2044 /* try to discard, since we do not need this */
2045 if (!rb_try_to_discard(cpu_buffer, event)) {
2046 /* nope, just zero it */
2047 event->time_delta = 0;
2048 event->array[0] = 0;
2049 }
2050 }
2051 cpu_buffer->write_stamp = *ts;
2052 /* let the caller know this was the commit */
2053 ret = 1;
2054 } else {
2055 /* Try to discard the event */
2056 if (!rb_try_to_discard(cpu_buffer, event)) {
2057 /* Darn, this is just wasted space */
2058 event->time_delta = 0;
2059 event->array[0] = 0;
2060 }
2061 ret = 0;
2062 }
2063
2064 *delta = 0;
2065
2066 return ret;
2067}
2068
2069static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2082static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2070{ 2083{
2071 local_inc(&cpu_buffer->committing); 2084 local_inc(&cpu_buffer->committing);
2072 local_inc(&cpu_buffer->commits); 2085 local_inc(&cpu_buffer->commits);
2073} 2086}
2074 2087
2075static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2088static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2076{ 2089{
2077 unsigned long commits; 2090 unsigned long commits;
2078 2091
@@ -2110,9 +2123,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2110 unsigned long length) 2123 unsigned long length)
2111{ 2124{
2112 struct ring_buffer_event *event; 2125 struct ring_buffer_event *event;
2113 u64 ts, delta = 0; 2126 u64 ts, delta;
2114 int commit = 0;
2115 int nr_loops = 0; 2127 int nr_loops = 0;
2128 int add_timestamp;
2129 u64 diff;
2116 2130
2117 rb_start_commit(cpu_buffer); 2131 rb_start_commit(cpu_buffer);
2118 2132
@@ -2133,6 +2147,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2133 2147
2134 length = rb_calculate_event_length(length); 2148 length = rb_calculate_event_length(length);
2135 again: 2149 again:
2150 add_timestamp = 0;
2151 delta = 0;
2152
2136 /* 2153 /*
2137 * We allow for interrupts to reenter here and do a trace. 2154 * We allow for interrupts to reenter here and do a trace.
2138 * If one does, it will cause this original code to loop 2155 * If one does, it will cause this original code to loop
@@ -2146,56 +2163,40 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2146 goto out_fail; 2163 goto out_fail;
2147 2164
2148 ts = rb_time_stamp(cpu_buffer->buffer); 2165 ts = rb_time_stamp(cpu_buffer->buffer);
2166 diff = ts - cpu_buffer->write_stamp;
2149 2167
2150 /* 2168 /* make sure this diff is calculated here */
2151 * Only the first commit can update the timestamp. 2169 barrier();
2152 * Yes there is a race here. If an interrupt comes in
2153 * just after the conditional and it traces too, then it
2154 * will also check the deltas. More than one timestamp may
2155 * also be made. But only the entry that did the actual
2156 * commit will be something other than zero.
2157 */
2158 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2159 rb_page_write(cpu_buffer->tail_page) ==
2160 rb_commit_index(cpu_buffer))) {
2161 u64 diff;
2162
2163 diff = ts - cpu_buffer->write_stamp;
2164
2165 /* make sure this diff is calculated here */
2166 barrier();
2167
2168 /* Did the write stamp get updated already? */
2169 if (unlikely(ts < cpu_buffer->write_stamp))
2170 goto get_event;
2171 2170
2171 /* Did the write stamp get updated already? */
2172 if (likely(ts >= cpu_buffer->write_stamp)) {
2172 delta = diff; 2173 delta = diff;
2173 if (unlikely(test_time_stamp(delta))) { 2174 if (unlikely(test_time_stamp(delta))) {
2174 2175 int local_clock_stable = 1;
2175 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2176#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2176 if (commit == -EBUSY) 2177 local_clock_stable = sched_clock_stable;
2177 goto out_fail; 2178#endif
2178 2179 WARN_ONCE(delta > (1ULL << 59),
2179 if (commit == -EAGAIN) 2180 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2180 goto again; 2181 (unsigned long long)delta,
2181 2182 (unsigned long long)ts,
2182 RB_WARN_ON(cpu_buffer, commit < 0); 2183 (unsigned long long)cpu_buffer->write_stamp,
2184 local_clock_stable ? "" :
2185 "If you just came from a suspend/resume,\n"
2186 "please switch to the trace global clock:\n"
2187 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2188 add_timestamp = 1;
2183 } 2189 }
2184 } 2190 }
2185 2191
2186 get_event: 2192 event = __rb_reserve_next(cpu_buffer, length, ts,
2187 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2193 delta, add_timestamp);
2188 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2194 if (unlikely(PTR_ERR(event) == -EAGAIN))
2189 goto again; 2195 goto again;
2190 2196
2191 if (!event) 2197 if (!event)
2192 goto out_fail; 2198 goto out_fail;
2193 2199
2194 if (!rb_event_is_commit(cpu_buffer, event))
2195 delta = 0;
2196
2197 event->time_delta = delta;
2198
2199 return event; 2200 return event;
2200 2201
2201 out_fail: 2202 out_fail:
@@ -2207,32 +2208,39 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2207 2208
2208#define TRACE_RECURSIVE_DEPTH 16 2209#define TRACE_RECURSIVE_DEPTH 16
2209 2210
2210static int trace_recursive_lock(void) 2211/* Keep this code out of the fast path cache */
2212static noinline void trace_recursive_fail(void)
2211{ 2213{
2212 current->trace_recursion++;
2213
2214 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2215 return 0;
2216
2217 /* Disable all tracing before we do anything else */ 2214 /* Disable all tracing before we do anything else */
2218 tracing_off_permanent(); 2215 tracing_off_permanent();
2219 2216
2220 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" 2217 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2221 "HC[%lu]:SC[%lu]:NMI[%lu]\n", 2218 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2222 current->trace_recursion, 2219 trace_recursion_buffer(),
2223 hardirq_count() >> HARDIRQ_SHIFT, 2220 hardirq_count() >> HARDIRQ_SHIFT,
2224 softirq_count() >> SOFTIRQ_SHIFT, 2221 softirq_count() >> SOFTIRQ_SHIFT,
2225 in_nmi()); 2222 in_nmi());
2226 2223
2227 WARN_ON_ONCE(1); 2224 WARN_ON_ONCE(1);
2225}
2226
2227static inline int trace_recursive_lock(void)
2228{
2229 trace_recursion_inc();
2230
2231 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
2232 return 0;
2233
2234 trace_recursive_fail();
2235
2228 return -1; 2236 return -1;
2229} 2237}
2230 2238
2231static void trace_recursive_unlock(void) 2239static inline void trace_recursive_unlock(void)
2232{ 2240{
2233 WARN_ON_ONCE(!current->trace_recursion); 2241 WARN_ON_ONCE(!trace_recursion_buffer());
2234 2242
2235 current->trace_recursion--; 2243 trace_recursion_dec();
2236} 2244}
2237 2245
2238#else 2246#else
@@ -2308,12 +2316,28 @@ static void
2308rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2316rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2309 struct ring_buffer_event *event) 2317 struct ring_buffer_event *event)
2310{ 2318{
2319 u64 delta;
2320
2311 /* 2321 /*
2312 * The event first in the commit queue updates the 2322 * The event first in the commit queue updates the
2313 * time stamp. 2323 * time stamp.
2314 */ 2324 */
2315 if (rb_event_is_commit(cpu_buffer, event)) 2325 if (rb_event_is_commit(cpu_buffer, event)) {
2316 cpu_buffer->write_stamp += event->time_delta; 2326 /*
2327 * A commit event that is first on a page
2328 * updates the write timestamp with the page stamp
2329 */
2330 if (!rb_event_index(event))
2331 cpu_buffer->write_stamp =
2332 cpu_buffer->commit_page->page->time_stamp;
2333 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2334 delta = event->array[0];
2335 delta <<= TS_SHIFT;
2336 delta += event->time_delta;
2337 cpu_buffer->write_stamp += delta;
2338 } else
2339 cpu_buffer->write_stamp += event->time_delta;
2340 }
2317} 2341}
2318 2342
2319static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2343static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2377,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2353 2377
2354static inline void rb_event_discard(struct ring_buffer_event *event) 2378static inline void rb_event_discard(struct ring_buffer_event *event)
2355{ 2379{
2380 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2381 event = skip_time_extend(event);
2382
2356 /* array[0] holds the actual length for the discarded event */ 2383 /* array[0] holds the actual length for the discarded event */
2357 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2384 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2358 event->type_len = RINGBUF_TYPE_PADDING; 2385 event->type_len = RINGBUF_TYPE_PADDING;
@@ -2606,6 +2633,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2606} 2633}
2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2634EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2608 2635
2636/*
2637 * The total entries in the ring buffer is the running counter
2638 * of entries entered into the ring buffer, minus the sum of
2639 * the entries read from the ring buffer and the number of
2640 * entries that were overwritten.
2641 */
2642static inline unsigned long
2643rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2644{
2645 return local_read(&cpu_buffer->entries) -
2646 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2647}
2648
2609/** 2649/**
2610 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2650 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2611 * @buffer: The ring buffer 2651 * @buffer: The ring buffer
@@ -2614,16 +2654,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2614unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2654unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2615{ 2655{
2616 struct ring_buffer_per_cpu *cpu_buffer; 2656 struct ring_buffer_per_cpu *cpu_buffer;
2617 unsigned long ret;
2618 2657
2619 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2658 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2620 return 0; 2659 return 0;
2621 2660
2622 cpu_buffer = buffer->buffers[cpu]; 2661 cpu_buffer = buffer->buffers[cpu];
2623 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2624 - cpu_buffer->read;
2625 2662
2626 return ret; 2663 return rb_num_of_entries(cpu_buffer);
2627} 2664}
2628EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2665EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2629 2666
@@ -2684,8 +2721,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2684 /* if you care about this being correct, lock the buffer */ 2721 /* if you care about this being correct, lock the buffer */
2685 for_each_buffer_cpu(buffer, cpu) { 2722 for_each_buffer_cpu(buffer, cpu) {
2686 cpu_buffer = buffer->buffers[cpu]; 2723 cpu_buffer = buffer->buffers[cpu];
2687 entries += (local_read(&cpu_buffer->entries) - 2724 entries += rb_num_of_entries(cpu_buffer);
2688 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2689 } 2725 }
2690 2726
2691 return entries; 2727 return entries;
@@ -2896,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2896 /* 2932 /*
2897 * cpu_buffer->pages just needs to point to the buffer, it 2933 * cpu_buffer->pages just needs to point to the buffer, it
2898 * has no specific buffer page to point to. Lets move it out 2934 * has no specific buffer page to point to. Lets move it out
2899 * of our way so we don't accidently swap it. 2935 * of our way so we don't accidentally swap it.
2900 */ 2936 */
2901 cpu_buffer->pages = reader->list.prev; 2937 cpu_buffer->pages = reader->list.prev;
2902 2938
@@ -3040,12 +3076,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3040 3076
3041 again: 3077 again:
3042 /* 3078 /*
3043 * We repeat when a timestamp is encountered. It is possible 3079 * We repeat when a time extend is encountered.
3044 * to get multiple timestamps from an interrupt entering just 3080 * Since the time extend is always attached to a data event,
3045 * as one timestamp is about to be written, or from discarded 3081 * we should never loop more than once.
3046 * commits. The most that we can have is the number on a single page. 3082 * (We never hit the following condition more than twice).
3047 */ 3083 */
3048 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3084 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3049 return NULL; 3085 return NULL;
3050 3086
3051 reader = rb_get_reader_page(cpu_buffer); 3087 reader = rb_get_reader_page(cpu_buffer);
@@ -3121,14 +3157,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3121 return NULL; 3157 return NULL;
3122 3158
3123 /* 3159 /*
3124 * We repeat when a timestamp is encountered. 3160 * We repeat when a time extend is encountered.
3125 * We can get multiple timestamps by nested interrupts or also 3161 * Since the time extend is always attached to a data event,
3126 * if filtering is on (discarding commits). Since discarding 3162 * we should never loop more than once.
3127 * commits can be frequent we can get a lot of timestamps. 3163 * (We never hit the following condition more than twice).
3128 * But we limit them by not adding timestamps if they begin
3129 * at the start of a page.
3130 */ 3164 */
3131 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3165 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3132 return NULL; 3166 return NULL;
3133 3167
3134 if (rb_per_cpu_empty(cpu_buffer)) 3168 if (rb_per_cpu_empty(cpu_buffer))
@@ -3826,7 +3860,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3826 if (len > (commit - read)) 3860 if (len > (commit - read))
3827 len = (commit - read); 3861 len = (commit - read);
3828 3862
3829 size = rb_event_length(event); 3863 /* Always keep the time extend and data together */
3864 size = rb_event_ts_length(event);
3830 3865
3831 if (len < size) 3866 if (len < size)
3832 goto out_unlock; 3867 goto out_unlock;
@@ -3836,6 +3871,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3836 3871
3837 /* Need to copy one event at a time */ 3872 /* Need to copy one event at a time */
3838 do { 3873 do {
3874 /* We need the size of one event, because
3875 * rb_advance_reader only advances by one event,
3876 * whereas rb_event_ts_length may include the size of
3877 * one or two events.
3878 * We have already ensured there's enough space if this
3879 * is a time extend. */
3880 size = rb_event_length(event);
3839 memcpy(bpage->data + pos, rpage->data + rpos, size); 3881 memcpy(bpage->data + pos, rpage->data + rpos, size);
3840 3882
3841 len -= size; 3883 len -= size;
@@ -3848,8 +3890,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3848 break; 3890 break;
3849 3891
3850 event = rb_reader_event(cpu_buffer); 3892 event = rb_reader_event(cpu_buffer);
3851 size = rb_event_length(event); 3893 /* Always keep the time extend and data together */
3852 } while (len > size); 3894 size = rb_event_ts_length(event);
3895 } while (len >= size);
3853 3896
3854 /* update bpage */ 3897 /* update bpage */
3855 local_set(&bpage->commit, pos); 3898 local_set(&bpage->commit, pos);
@@ -3965,6 +4008,7 @@ static const struct file_operations rb_simple_fops = {
3965 .open = tracing_open_generic, 4008 .open = tracing_open_generic,
3966 .read = rb_simple_read, 4009 .read = rb_simple_read,
3967 .write = rb_simple_write, 4010 .write = rb_simple_write,
4011 .llseek = default_llseek,
3968}; 4012};
3969 4013
3970 4014
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ec59f541156..ee9c921d7f21 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
21#include <linux/notifier.h> 20#include <linux/notifier.h>
22#include <linux/irqflags.h> 21#include <linux/irqflags.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
@@ -42,8 +41,6 @@
42#include "trace.h" 41#include "trace.h"
43#include "trace_output.h" 42#include "trace_output.h"
44 43
45#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
46
47/* 44/*
48 * On boot up, the ring buffer is set to the minimum size, so that 45 * On boot up, the ring buffer is set to the minimum size, so that
49 * we do not waste memory on systems that are not using tracing. 46 * we do not waste memory on systems that are not using tracing.
@@ -341,7 +338,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
341/* trace_flags holds trace_options default values */ 338/* trace_flags holds trace_options default values */
342unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 339unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
343 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
344 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD; 341 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
345 342
346static int trace_stop_count; 343static int trace_stop_count;
347static DEFINE_SPINLOCK(tracing_start_lock); 344static DEFINE_SPINLOCK(tracing_start_lock);
@@ -426,6 +423,7 @@ static const char *trace_options[] = {
426 "sleep-time", 423 "sleep-time",
427 "graph-time", 424 "graph-time",
428 "record-cmd", 425 "record-cmd",
426 "overwrite",
429 NULL 427 NULL
430}; 428};
431 429
@@ -781,6 +779,11 @@ __acquires(kernel_lock)
781 tracing_reset_online_cpus(tr); 779 tracing_reset_online_cpus(tr);
782 780
783 current_trace = type; 781 current_trace = type;
782
783 /* If we expanded the buffers, make sure the max is expanded too */
784 if (ring_buffer_expanded && type->use_max_tr)
785 ring_buffer_resize(max_tr.buffer, trace_buf_size);
786
784 /* the test is responsible for initializing and enabling */ 787 /* the test is responsible for initializing and enabling */
785 pr_info("Testing tracer %s: ", type->name); 788 pr_info("Testing tracer %s: ", type->name);
786 ret = type->selftest(type, tr); 789 ret = type->selftest(type, tr);
@@ -793,6 +796,10 @@ __acquires(kernel_lock)
793 /* Only reset on passing, to avoid touching corrupted buffers */ 796 /* Only reset on passing, to avoid touching corrupted buffers */
794 tracing_reset_online_cpus(tr); 797 tracing_reset_online_cpus(tr);
795 798
799 /* Shrink the max buffer again */
800 if (ring_buffer_expanded && type->use_max_tr)
801 ring_buffer_resize(max_tr.buffer, 1);
802
796 printk(KERN_CONT "PASSED\n"); 803 printk(KERN_CONT "PASSED\n");
797 } 804 }
798#endif 805#endif
@@ -1103,7 +1110,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1103 1110
1104 entry->preempt_count = pc & 0xff; 1111 entry->preempt_count = pc & 0xff;
1105 entry->pid = (tsk) ? tsk->pid : 0; 1112 entry->pid = (tsk) ? tsk->pid : 0;
1106 entry->lock_depth = (tsk) ? tsk->lock_depth : 0; 1113 entry->padding = 0;
1107 entry->flags = 1114 entry->flags =
1108#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1115#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1109 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1116 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1284,6 +1291,8 @@ void trace_dump_stack(void)
1284 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1291 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1285} 1292}
1286 1293
1294static DEFINE_PER_CPU(int, user_stack_count);
1295
1287void 1296void
1288ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1297ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1289{ 1298{
@@ -1302,10 +1311,20 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1302 if (unlikely(in_nmi())) 1311 if (unlikely(in_nmi()))
1303 return; 1312 return;
1304 1313
1314 /*
1315 * prevent recursion, since the user stack tracing may
1316 * trigger other kernel events.
1317 */
1318 preempt_disable();
1319 if (__this_cpu_read(user_stack_count))
1320 goto out;
1321
1322 __this_cpu_inc(user_stack_count);
1323
1305 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1324 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1306 sizeof(*entry), flags, pc); 1325 sizeof(*entry), flags, pc);
1307 if (!event) 1326 if (!event)
1308 return; 1327 goto out_drop_count;
1309 entry = ring_buffer_event_data(event); 1328 entry = ring_buffer_event_data(event);
1310 1329
1311 entry->tgid = current->tgid; 1330 entry->tgid = current->tgid;
@@ -1319,6 +1338,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1319 save_stack_trace_user(&trace); 1338 save_stack_trace_user(&trace);
1320 if (!filter_check_discard(call, entry, buffer, event)) 1339 if (!filter_check_discard(call, entry, buffer, event))
1321 ring_buffer_unlock_commit(buffer, event); 1340 ring_buffer_unlock_commit(buffer, event);
1341
1342 out_drop_count:
1343 __this_cpu_dec(user_stack_count);
1344 out:
1345 preempt_enable();
1322} 1346}
1323 1347
1324#ifdef UNUSED 1348#ifdef UNUSED
@@ -1733,10 +1757,9 @@ static void print_lat_help_header(struct seq_file *m)
1733 seq_puts(m, "# | / _----=> need-resched \n"); 1757 seq_puts(m, "# | / _----=> need-resched \n");
1734 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1758 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1735 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1759 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1736 seq_puts(m, "# |||| /_--=> lock-depth \n"); 1760 seq_puts(m, "# |||| / delay \n");
1737 seq_puts(m, "# |||||/ delay \n"); 1761 seq_puts(m, "# cmd pid ||||| time | caller \n");
1738 seq_puts(m, "# cmd pid |||||| time | caller \n"); 1762 seq_puts(m, "# \\ / ||||| \\ | / \n");
1739 seq_puts(m, "# \\ / |||||| \\ | / \n");
1740} 1763}
1741 1764
1742static void print_func_help_header(struct seq_file *m) 1765static void print_func_help_header(struct seq_file *m)
@@ -1991,9 +2014,10 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
1991{ 2014{
1992 enum print_line_t ret; 2015 enum print_line_t ret;
1993 2016
1994 if (iter->lost_events) 2017 if (iter->lost_events &&
1995 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 2018 !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
1996 iter->cpu, iter->lost_events); 2019 iter->cpu, iter->lost_events))
2020 return TRACE_TYPE_PARTIAL_LINE;
1997 2021
1998 if (iter->trace && iter->trace->print_line) { 2022 if (iter->trace && iter->trace->print_line) {
1999 ret = iter->trace->print_line(iter); 2023 ret = iter->trace->print_line(iter);
@@ -2196,7 +2220,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2196 2220
2197static int tracing_release(struct inode *inode, struct file *file) 2221static int tracing_release(struct inode *inode, struct file *file)
2198{ 2222{
2199 struct seq_file *m = (struct seq_file *)file->private_data; 2223 struct seq_file *m = file->private_data;
2200 struct trace_iterator *iter; 2224 struct trace_iterator *iter;
2201 int cpu; 2225 int cpu;
2202 2226
@@ -2320,11 +2344,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
2320 return count; 2344 return count;
2321} 2345}
2322 2346
2347static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2348{
2349 if (file->f_mode & FMODE_READ)
2350 return seq_lseek(file, offset, origin);
2351 else
2352 return 0;
2353}
2354
2323static const struct file_operations tracing_fops = { 2355static const struct file_operations tracing_fops = {
2324 .open = tracing_open, 2356 .open = tracing_open,
2325 .read = seq_read, 2357 .read = seq_read,
2326 .write = tracing_write_stub, 2358 .write = tracing_write_stub,
2327 .llseek = seq_lseek, 2359 .llseek = tracing_seek,
2328 .release = tracing_release, 2360 .release = tracing_release,
2329}; 2361};
2330 2362
@@ -2505,6 +2537,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2505 2537
2506 if (mask == TRACE_ITER_RECORD_CMD) 2538 if (mask == TRACE_ITER_RECORD_CMD)
2507 trace_event_enable_cmd_record(enabled); 2539 trace_event_enable_cmd_record(enabled);
2540
2541 if (mask == TRACE_ITER_OVERWRITE)
2542 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2508} 2543}
2509 2544
2510static ssize_t 2545static ssize_t
@@ -2686,6 +2721,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2686 2721
2687 mutex_lock(&trace_types_lock); 2722 mutex_lock(&trace_types_lock);
2688 if (tracer_enabled ^ val) { 2723 if (tracer_enabled ^ val) {
2724
2725 /* Only need to warn if this is used to change the state */
2726 WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
2727
2689 if (val) { 2728 if (val) {
2690 tracer_enabled = 1; 2729 tracer_enabled = 1;
2691 if (current_trace->start) 2730 if (current_trace->start)
@@ -3192,6 +3231,14 @@ waitagain:
3192 3231
3193 if (iter->seq.len >= cnt) 3232 if (iter->seq.len >= cnt)
3194 break; 3233 break;
3234
3235 /*
3236 * Setting the full flag means we reached the trace_seq buffer
3237 * size and we should leave by partial output condition above.
3238 * One of the trace_seq_* functions is not used properly.
3239 */
3240 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
3241 iter->ent->type);
3195 } 3242 }
3196 trace_access_unlock(iter->cpu_file); 3243 trace_access_unlock(iter->cpu_file);
3197 trace_event_read_unlock(); 3244 trace_event_read_unlock();
@@ -3202,7 +3249,7 @@ waitagain:
3202 trace_seq_init(&iter->seq); 3249 trace_seq_init(&iter->seq);
3203 3250
3204 /* 3251 /*
3205 * If there was nothing to send to user, inspite of consuming trace 3252 * If there was nothing to send to user, in spite of consuming trace
3206 * entries, go back to wait for more entries. 3253 * entries, go back to wait for more entries.
3207 */ 3254 */
3208 if (sret == -EBUSY) 3255 if (sret == -EBUSY)
@@ -3996,13 +4043,9 @@ static void tracing_init_debugfs_percpu(long cpu)
3996{ 4043{
3997 struct dentry *d_percpu = tracing_dentry_percpu(); 4044 struct dentry *d_percpu = tracing_dentry_percpu();
3998 struct dentry *d_cpu; 4045 struct dentry *d_cpu;
3999 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 4046 char cpu_dir[30]; /* 30 characters should be more than enough */
4000 char cpu_dir[7];
4001 4047
4002 if (cpu > 999 || cpu < 0) 4048 snprintf(cpu_dir, 30, "cpu%ld", cpu);
4003 return;
4004
4005 sprintf(cpu_dir, "cpu%ld", cpu);
4006 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4049 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4007 if (!d_cpu) { 4050 if (!d_cpu) {
4008 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); 4051 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
@@ -4531,9 +4574,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4531__init static int tracer_alloc_buffers(void) 4574__init static int tracer_alloc_buffers(void)
4532{ 4575{
4533 int ring_buf_size; 4576 int ring_buf_size;
4577 enum ring_buffer_flags rb_flags;
4534 int i; 4578 int i;
4535 int ret = -ENOMEM; 4579 int ret = -ENOMEM;
4536 4580
4581
4537 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 4582 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4538 goto out; 4583 goto out;
4539 4584
@@ -4546,12 +4591,13 @@ __init static int tracer_alloc_buffers(void)
4546 else 4591 else
4547 ring_buf_size = 1; 4592 ring_buf_size = 1;
4548 4593
4594 rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
4595
4549 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 4596 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4550 cpumask_copy(tracing_cpumask, cpu_all_mask); 4597 cpumask_copy(tracing_cpumask, cpu_all_mask);
4551 4598
4552 /* TODO: make the number of buffers hot pluggable with CPUS */ 4599 /* TODO: make the number of buffers hot pluggable with CPUS */
4553 global_trace.buffer = ring_buffer_alloc(ring_buf_size, 4600 global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
4554 TRACE_BUFFER_FLAGS);
4555 if (!global_trace.buffer) { 4601 if (!global_trace.buffer) {
4556 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); 4602 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
4557 WARN_ON(1); 4603 WARN_ON(1);
@@ -4561,7 +4607,7 @@ __init static int tracer_alloc_buffers(void)
4561 4607
4562 4608
4563#ifdef CONFIG_TRACER_MAX_TRACE 4609#ifdef CONFIG_TRACER_MAX_TRACE
4564 max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); 4610 max_tr.buffer = ring_buffer_alloc(1, rb_flags);
4565 if (!max_tr.buffer) { 4611 if (!max_tr.buffer) {
4566 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4612 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4567 WARN_ON(1); 4613 WARN_ON(1);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d39b3c5454a5..229f8591f61d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,8 +272,8 @@ struct tracer {
272 /* If you handled the flag setting, return 0 */ 272 /* If you handled the flag setting, return 0 */
273 int (*set_flag)(u32 old_flags, u32 bit, int set); 273 int (*set_flag)(u32 old_flags, u32 bit, int set);
274 struct tracer *next; 274 struct tracer *next;
275 int print_max;
276 struct tracer_flags *flags; 275 struct tracer_flags *flags;
276 int print_max;
277 int use_max_tr; 277 int use_max_tr;
278}; 278};
279 279
@@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr,
343 unsigned long ip, 343 unsigned long ip,
344 unsigned long parent_ip, 344 unsigned long parent_ip,
345 unsigned long flags, int pc); 345 unsigned long flags, int pc);
346void trace_graph_function(struct trace_array *tr,
347 unsigned long ip,
348 unsigned long parent_ip,
349 unsigned long flags, int pc);
346void trace_default_header(struct seq_file *m); 350void trace_default_header(struct seq_file *m);
347void print_trace_header(struct seq_file *m, struct trace_iterator *iter); 351void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
348int trace_empty(struct trace_iterator *iter); 352int trace_empty(struct trace_iterator *iter);
@@ -415,6 +419,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
415extern unsigned long ftrace_update_tot_cnt; 419extern unsigned long ftrace_update_tot_cnt;
416#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 420#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
417extern int DYN_FTRACE_TEST_NAME(void); 421extern int DYN_FTRACE_TEST_NAME(void);
422#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
423extern int DYN_FTRACE_TEST_NAME2(void);
418#endif 424#endif
419 425
420extern int ring_buffer_expanded; 426extern int ring_buffer_expanded;
@@ -602,6 +608,7 @@ enum trace_iterator_flags {
602 TRACE_ITER_SLEEP_TIME = 0x40000, 608 TRACE_ITER_SLEEP_TIME = 0x40000,
603 TRACE_ITER_GRAPH_TIME = 0x80000, 609 TRACE_ITER_GRAPH_TIME = 0x80000,
604 TRACE_ITER_RECORD_CMD = 0x100000, 610 TRACE_ITER_RECORD_CMD = 0x100000,
611 TRACE_ITER_OVERWRITE = 0x200000,
605}; 612};
606 613
607/* 614/*
@@ -657,8 +664,10 @@ struct ftrace_event_field {
657}; 664};
658 665
659struct event_filter { 666struct event_filter {
660 int n_preds; 667 int n_preds; /* Number assigned */
661 struct filter_pred **preds; 668 int a_preds; /* allocated */
669 struct filter_pred *preds;
670 struct filter_pred *root;
662 char *filter_string; 671 char *filter_string;
663}; 672};
664 673
@@ -670,11 +679,23 @@ struct event_subsystem {
670 int nr_events; 679 int nr_events;
671}; 680};
672 681
682#define FILTER_PRED_INVALID ((unsigned short)-1)
683#define FILTER_PRED_IS_RIGHT (1 << 15)
684#define FILTER_PRED_FOLD (1 << 15)
685
686/*
687 * The max preds is the size of unsigned short with
688 * two flags at the MSBs. One bit is used for both the IS_RIGHT
689 * and FOLD flags. The other is reserved.
690 *
691 * 2^14 preds is way more than enough.
692 */
693#define MAX_FILTER_PRED 16384
694
673struct filter_pred; 695struct filter_pred;
674struct regex; 696struct regex;
675 697
676typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 698typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
677 int val1, int val2);
678 699
679typedef int (*regex_match_func)(char *str, struct regex *r, int len); 700typedef int (*regex_match_func)(char *str, struct regex *r, int len);
680 701
@@ -696,11 +717,23 @@ struct filter_pred {
696 filter_pred_fn_t fn; 717 filter_pred_fn_t fn;
697 u64 val; 718 u64 val;
698 struct regex regex; 719 struct regex regex;
699 char *field_name; 720 /*
721 * Leaf nodes use field_name, ops is used by AND and OR
722 * nodes. The field_name is always freed when freeing a pred.
723 * We can overload field_name for ops and have it freed
724 * as well.
725 */
726 union {
727 char *field_name;
728 unsigned short *ops;
729 };
700 int offset; 730 int offset;
701 int not; 731 int not;
702 int op; 732 int op;
703 int pop_n; 733 unsigned short index;
734 unsigned short parent;
735 unsigned short left;
736 unsigned short right;
704}; 737};
705 738
706extern struct list_head ftrace_common_fields; 739extern struct list_head ftrace_common_fields;
@@ -751,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[];
751 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
752#include "trace_entries.h" 785#include "trace_entries.h"
753 786
787/* Only current can touch trace_recursion */
788#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
789#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
790
791/* Ring buffer has the 10 LSB bits to count */
792#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
793
794/* for function tracing recursion */
795#define TRACE_INTERNAL_BIT (1<<11)
796#define TRACE_GLOBAL_BIT (1<<12)
797
798#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
799#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
800#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
801
754#endif /* _LINUX_KERNEL_TRACE_H */ 802#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 685a67d55db0..6302747a1398 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -46,7 +46,7 @@ u64 notrace trace_clock_local(void)
46} 46}
47 47
48/* 48/*
49 * trace_clock(): 'inbetween' trace clock. Not completely serialized, 49 * trace_clock(): 'between' trace clock. Not completely serialized,
50 * but not completely incorrect when crossing CPUs either. 50 * but not completely incorrect when crossing CPUs either.
51 * 51 *
52 * This is based on cpu_clock(), which will allow at most ~1 jiffy of 52 * This is based on cpu_clock(), which will allow at most ~1 jiffy of
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e3dfecaf13e6..e32744c84d94 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -27,7 +27,7 @@
27 * in the structure. 27 * in the structure.
28 * 28 *
29 * * for structures within structures, the format of the internal 29 * * for structures within structures, the format of the internal
30 * structure is layed out. This allows the internal structure 30 * structure is laid out. This allows the internal structure
31 * to be deciphered for the format file. Although these macros 31 * to be deciphered for the format file. Although these macros
32 * may become out of sync with the internal structure, they 32 * may become out of sync with the internal structure, they
33 * will create a compile error if it happens. Since the 33 * will create a compile error if it happens. Since the
@@ -53,7 +53,7 @@
53 */ 53 */
54 54
55/* 55/*
56 * Function trace entry - function address and parent function addres: 56 * Function trace entry - function address and parent function address:
57 */ 57 */
58FTRACE_ENTRY(function, ftrace_entry, 58FTRACE_ENTRY(function, ftrace_entry,
59 59
@@ -109,12 +109,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
109 */ 109 */
110#define FTRACE_CTX_FIELDS \ 110#define FTRACE_CTX_FIELDS \
111 __field( unsigned int, prev_pid ) \ 111 __field( unsigned int, prev_pid ) \
112 __field( unsigned int, next_pid ) \
113 __field( unsigned int, next_cpu ) \
112 __field( unsigned char, prev_prio ) \ 114 __field( unsigned char, prev_prio ) \
113 __field( unsigned char, prev_state ) \ 115 __field( unsigned char, prev_state ) \
114 __field( unsigned int, next_pid ) \
115 __field( unsigned char, next_prio ) \ 116 __field( unsigned char, next_prio ) \
116 __field( unsigned char, next_state ) \ 117 __field( unsigned char, next_state )
117 __field( unsigned int, next_cpu )
118 118
119FTRACE_ENTRY(context_switch, ctx_switch_entry, 119FTRACE_ENTRY(context_switch, ctx_switch_entry,
120 120
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf2..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12static char *perf_trace_buf[4]; 12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 13
14/* 14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses 15 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
21/* Count the events in use (per event id, not per instance) */ 21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count; 22static int total_ref_count;
23 23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 48 struct perf_event *p_event)
26{ 49{
27 struct hlist_head *list; 50 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 51 int ret;
29 int cpu; 52 int cpu;
30 53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
31 p_event->tp_event = tp_event; 58 p_event->tp_event = tp_event;
32 if (tp_event->perf_refcount++ > 0) 59 if (tp_event->perf_refcount++ > 0)
33 return 0; 60 return 0;
34 61
62 ret = -ENOMEM;
63
35 list = alloc_percpu(struct hlist_head); 64 list = alloc_percpu(struct hlist_head);
36 if (!list) 65 if (!list)
37 goto fail; 66 goto fail;
@@ -42,11 +71,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
42 tp_event->perf_events = list; 71 tp_event->perf_events = list;
43 72
44 if (!total_ref_count) { 73 if (!total_ref_count) {
45 char *buf; 74 char __percpu *buf;
46 int i; 75 int i;
47 76
48 for (i = 0; i < 4; i++) { 77 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
49 buf = (char *)alloc_percpu(perf_trace_t); 78 buf = (char __percpu *)alloc_percpu(perf_trace_t);
50 if (!buf) 79 if (!buf)
51 goto fail; 80 goto fail;
52 81
@@ -65,7 +94,7 @@ fail:
65 if (!total_ref_count) { 94 if (!total_ref_count) {
66 int i; 95 int i;
67 96
68 for (i = 0; i < 4; i++) { 97 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
69 free_percpu(perf_trace_buf[i]); 98 free_percpu(perf_trace_buf[i]);
70 perf_trace_buf[i] = NULL; 99 perf_trace_buf[i] = NULL;
71 } 100 }
@@ -101,22 +130,26 @@ int perf_trace_init(struct perf_event *p_event)
101 return ret; 130 return ret;
102} 131}
103 132
104int perf_trace_enable(struct perf_event *p_event) 133int perf_trace_add(struct perf_event *p_event, int flags)
105{ 134{
106 struct ftrace_event_call *tp_event = p_event->tp_event; 135 struct ftrace_event_call *tp_event = p_event->tp_event;
136 struct hlist_head __percpu *pcpu_list;
107 struct hlist_head *list; 137 struct hlist_head *list;
108 138
109 list = tp_event->perf_events; 139 pcpu_list = tp_event->perf_events;
110 if (WARN_ON_ONCE(!list)) 140 if (WARN_ON_ONCE(!pcpu_list))
111 return -EINVAL; 141 return -EINVAL;
112 142
113 list = this_cpu_ptr(list); 143 if (!(flags & PERF_EF_START))
144 p_event->hw.state = PERF_HES_STOPPED;
145
146 list = this_cpu_ptr(pcpu_list);
114 hlist_add_head_rcu(&p_event->hlist_entry, list); 147 hlist_add_head_rcu(&p_event->hlist_entry, list);
115 148
116 return 0; 149 return 0;
117} 150}
118 151
119void perf_trace_disable(struct perf_event *p_event) 152void perf_trace_del(struct perf_event *p_event, int flags)
120{ 153{
121 hlist_del_rcu(&p_event->hlist_entry); 154 hlist_del_rcu(&p_event->hlist_entry);
122} 155}
@@ -142,7 +175,7 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->perf_events = NULL; 175 tp_event->perf_events = NULL;
143 176
144 if (!--total_ref_count) { 177 if (!--total_ref_count) {
145 for (i = 0; i < 4; i++) { 178 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
146 free_percpu(perf_trace_buf[i]); 179 free_percpu(perf_trace_buf[i]);
147 perf_trace_buf[i] = NULL; 180 perf_trace_buf[i] = NULL;
148 } 181 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..686ec399f2a8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,6 +27,12 @@
27 27
28DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
29 29
30DEFINE_MUTEX(event_storage_mutex);
31EXPORT_SYMBOL_GPL(event_storage_mutex);
32
33char event_storage[EVENT_STORAGE_SIZE];
34EXPORT_SYMBOL_GPL(event_storage);
35
30LIST_HEAD(ftrace_events); 36LIST_HEAD(ftrace_events);
31LIST_HEAD(ftrace_common_fields); 37LIST_HEAD(ftrace_common_fields);
32 38
@@ -110,7 +116,7 @@ static int trace_define_common_fields(void)
110 __common_field(unsigned char, flags); 116 __common_field(unsigned char, flags);
111 __common_field(unsigned char, preempt_count); 117 __common_field(unsigned char, preempt_count);
112 __common_field(int, pid); 118 __common_field(int, pid);
113 __common_field(int, lock_depth); 119 __common_field(int, padding);
114 120
115 return ret; 121 return ret;
116} 122}
@@ -320,6 +326,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
320{ 326{
321 return __ftrace_set_clr_event(NULL, system, event, set); 327 return __ftrace_set_clr_event(NULL, system, event, set);
322} 328}
329EXPORT_SYMBOL_GPL(trace_set_clr_event);
323 330
324/* 128 should be much more than enough */ 331/* 128 should be much more than enough */
325#define EVENT_BUF_SIZE 127 332#define EVENT_BUF_SIZE 127
@@ -600,21 +607,29 @@ out:
600 607
601enum { 608enum {
602 FORMAT_HEADER = 1, 609 FORMAT_HEADER = 1,
603 FORMAT_PRINTFMT = 2, 610 FORMAT_FIELD_SEPERATOR = 2,
611 FORMAT_PRINTFMT = 3,
604}; 612};
605 613
606static void *f_next(struct seq_file *m, void *v, loff_t *pos) 614static void *f_next(struct seq_file *m, void *v, loff_t *pos)
607{ 615{
608 struct ftrace_event_call *call = m->private; 616 struct ftrace_event_call *call = m->private;
609 struct ftrace_event_field *field; 617 struct ftrace_event_field *field;
610 struct list_head *head; 618 struct list_head *common_head = &ftrace_common_fields;
619 struct list_head *head = trace_get_fields(call);
611 620
612 (*pos)++; 621 (*pos)++;
613 622
614 switch ((unsigned long)v) { 623 switch ((unsigned long)v) {
615 case FORMAT_HEADER: 624 case FORMAT_HEADER:
616 head = &ftrace_common_fields; 625 if (unlikely(list_empty(common_head)))
626 return NULL;
617 627
628 field = list_entry(common_head->prev,
629 struct ftrace_event_field, link);
630 return field;
631
632 case FORMAT_FIELD_SEPERATOR:
618 if (unlikely(list_empty(head))) 633 if (unlikely(list_empty(head)))
619 return NULL; 634 return NULL;
620 635
@@ -626,31 +641,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
626 return NULL; 641 return NULL;
627 } 642 }
628 643
629 head = trace_get_fields(call);
630
631 /*
632 * To separate common fields from event fields, the
633 * LSB is set on the first event field. Clear it in case.
634 */
635 v = (void *)((unsigned long)v & ~1L);
636
637 field = v; 644 field = v;
638 /* 645 if (field->link.prev == common_head)
639 * If this is a common field, and at the end of the list, then 646 return (void *)FORMAT_FIELD_SEPERATOR;
640 * continue with main list. 647 else if (field->link.prev == head)
641 */
642 if (field->link.prev == &ftrace_common_fields) {
643 if (unlikely(list_empty(head)))
644 return NULL;
645 field = list_entry(head->prev, struct ftrace_event_field, link);
646 /* Set the LSB to notify f_show to print an extra newline */
647 field = (struct ftrace_event_field *)
648 ((unsigned long)field | 1);
649 return field;
650 }
651
652 /* If we are done tell f_show to print the format */
653 if (field->link.prev == head)
654 return (void *)FORMAT_PRINTFMT; 648 return (void *)FORMAT_PRINTFMT;
655 649
656 field = list_entry(field->link.prev, struct ftrace_event_field, link); 650 field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +682,16 @@ static int f_show(struct seq_file *m, void *v)
688 seq_printf(m, "format:\n"); 682 seq_printf(m, "format:\n");
689 return 0; 683 return 0;
690 684
685 case FORMAT_FIELD_SEPERATOR:
686 seq_putc(m, '\n');
687 return 0;
688
691 case FORMAT_PRINTFMT: 689 case FORMAT_PRINTFMT:
692 seq_printf(m, "\nprint fmt: %s\n", 690 seq_printf(m, "\nprint fmt: %s\n",
693 call->print_fmt); 691 call->print_fmt);
694 return 0; 692 return 0;
695 } 693 }
696 694
697 /*
698 * To separate common fields from event fields, the
699 * LSB is set on the first event field. Clear it and
700 * print a newline if it is set.
701 */
702 if ((unsigned long)v & 1) {
703 seq_putc(m, '\n');
704 v = (void *)((unsigned long)v & ~1L);
705 }
706
707 field = v; 695 field = v;
708 696
709 /* 697 /*
@@ -951,6 +939,7 @@ static const struct file_operations ftrace_enable_fops = {
951 .open = tracing_open_generic, 939 .open = tracing_open_generic,
952 .read = event_enable_read, 940 .read = event_enable_read,
953 .write = event_enable_write, 941 .write = event_enable_write,
942 .llseek = default_llseek,
954}; 943};
955 944
956static const struct file_operations ftrace_event_format_fops = { 945static const struct file_operations ftrace_event_format_fops = {
@@ -963,29 +952,34 @@ static const struct file_operations ftrace_event_format_fops = {
963static const struct file_operations ftrace_event_id_fops = { 952static const struct file_operations ftrace_event_id_fops = {
964 .open = tracing_open_generic, 953 .open = tracing_open_generic,
965 .read = event_id_read, 954 .read = event_id_read,
955 .llseek = default_llseek,
966}; 956};
967 957
968static const struct file_operations ftrace_event_filter_fops = { 958static const struct file_operations ftrace_event_filter_fops = {
969 .open = tracing_open_generic, 959 .open = tracing_open_generic,
970 .read = event_filter_read, 960 .read = event_filter_read,
971 .write = event_filter_write, 961 .write = event_filter_write,
962 .llseek = default_llseek,
972}; 963};
973 964
974static const struct file_operations ftrace_subsystem_filter_fops = { 965static const struct file_operations ftrace_subsystem_filter_fops = {
975 .open = tracing_open_generic, 966 .open = tracing_open_generic,
976 .read = subsystem_filter_read, 967 .read = subsystem_filter_read,
977 .write = subsystem_filter_write, 968 .write = subsystem_filter_write,
969 .llseek = default_llseek,
978}; 970};
979 971
980static const struct file_operations ftrace_system_enable_fops = { 972static const struct file_operations ftrace_system_enable_fops = {
981 .open = tracing_open_generic, 973 .open = tracing_open_generic,
982 .read = system_enable_read, 974 .read = system_enable_read,
983 .write = system_enable_write, 975 .write = system_enable_write,
976 .llseek = default_llseek,
984}; 977};
985 978
986static const struct file_operations ftrace_show_header_fops = { 979static const struct file_operations ftrace_show_header_fops = {
987 .open = tracing_open_generic, 980 .open = tracing_open_generic,
988 .read = show_header, 981 .read = show_header,
982 .llseek = default_llseek,
989}; 983};
990 984
991static struct dentry *event_trace_events_dir(void) 985static struct dentry *event_trace_events_dir(void)
@@ -1291,7 +1285,7 @@ trace_create_file_ops(struct module *mod)
1291static void trace_module_add_events(struct module *mod) 1285static void trace_module_add_events(struct module *mod)
1292{ 1286{
1293 struct ftrace_module_file_ops *file_ops = NULL; 1287 struct ftrace_module_file_ops *file_ops = NULL;
1294 struct ftrace_event_call *call, *start, *end; 1288 struct ftrace_event_call **call, **start, **end;
1295 1289
1296 start = mod->trace_events; 1290 start = mod->trace_events;
1297 end = mod->trace_events + mod->num_trace_events; 1291 end = mod->trace_events + mod->num_trace_events;
@@ -1304,7 +1298,7 @@ static void trace_module_add_events(struct module *mod)
1304 return; 1298 return;
1305 1299
1306 for_each_event(call, start, end) { 1300 for_each_event(call, start, end) {
1307 __trace_add_event_call(call, mod, 1301 __trace_add_event_call(*call, mod,
1308 &file_ops->id, &file_ops->enable, 1302 &file_ops->id, &file_ops->enable,
1309 &file_ops->filter, &file_ops->format); 1303 &file_ops->filter, &file_ops->format);
1310 } 1304 }
@@ -1374,8 +1368,8 @@ static struct notifier_block trace_module_nb = {
1374 .priority = 0, 1368 .priority = 0,
1375}; 1369};
1376 1370
1377extern struct ftrace_event_call __start_ftrace_events[]; 1371extern struct ftrace_event_call *__start_ftrace_events[];
1378extern struct ftrace_event_call __stop_ftrace_events[]; 1372extern struct ftrace_event_call *__stop_ftrace_events[];
1379 1373
1380static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; 1374static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1381 1375
@@ -1391,7 +1385,7 @@ __setup("trace_event=", setup_trace_event);
1391 1385
1392static __init int event_trace_init(void) 1386static __init int event_trace_init(void)
1393{ 1387{
1394 struct ftrace_event_call *call; 1388 struct ftrace_event_call **call;
1395 struct dentry *d_tracer; 1389 struct dentry *d_tracer;
1396 struct dentry *entry; 1390 struct dentry *entry;
1397 struct dentry *d_events; 1391 struct dentry *d_events;
@@ -1437,7 +1431,7 @@ static __init int event_trace_init(void)
1437 pr_warning("tracing: Failed to allocate common fields"); 1431 pr_warning("tracing: Failed to allocate common fields");
1438 1432
1439 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1433 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1440 __trace_add_event_call(call, NULL, &ftrace_event_id_fops, 1434 __trace_add_event_call(*call, NULL, &ftrace_event_id_fops,
1441 &ftrace_enable_fops, 1435 &ftrace_enable_fops,
1442 &ftrace_event_filter_fops, 1436 &ftrace_event_filter_fops,
1443 &ftrace_event_format_fops); 1437 &ftrace_event_format_fops);
@@ -1663,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata =
1663 1657
1664static __init void event_trace_self_test_with_function(void) 1658static __init void event_trace_self_test_with_function(void)
1665{ 1659{
1666 register_ftrace_function(&trace_ops); 1660 int ret;
1661 ret = register_ftrace_function(&trace_ops);
1662 if (WARN_ON(ret < 0)) {
1663 pr_info("Failed to enable function tracer for event tests\n");
1664 return;
1665 }
1667 pr_info("Running tests again, along with the function tracer\n"); 1666 pr_info("Running tests again, along with the function tracer\n");
1668 event_trace_self_tests(); 1667 event_trace_self_tests();
1669 unregister_ftrace_function(&trace_ops); 1668 unregister_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 36d40104b17f..8008ddcfbf20 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,9 +123,13 @@ struct filter_parse_state {
123 } operand; 123 } operand;
124}; 124};
125 125
126struct pred_stack {
127 struct filter_pred **preds;
128 int index;
129};
130
126#define DEFINE_COMPARISON_PRED(type) \ 131#define DEFINE_COMPARISON_PRED(type) \
127static int filter_pred_##type(struct filter_pred *pred, void *event, \ 132static int filter_pred_##type(struct filter_pred *pred, void *event) \
128 int val1, int val2) \
129{ \ 133{ \
130 type *addr = (type *)(event + pred->offset); \ 134 type *addr = (type *)(event + pred->offset); \
131 type val = (type)pred->val; \ 135 type val = (type)pred->val; \
@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \
152} 156}
153 157
154#define DEFINE_EQUALITY_PRED(size) \ 158#define DEFINE_EQUALITY_PRED(size) \
155static int filter_pred_##size(struct filter_pred *pred, void *event, \ 159static int filter_pred_##size(struct filter_pred *pred, void *event) \
156 int val1, int val2) \
157{ \ 160{ \
158 u##size *addr = (u##size *)(event + pred->offset); \ 161 u##size *addr = (u##size *)(event + pred->offset); \
159 u##size val = (u##size)pred->val; \ 162 u##size val = (u##size)pred->val; \
@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
178DEFINE_EQUALITY_PRED(16); 181DEFINE_EQUALITY_PRED(16);
179DEFINE_EQUALITY_PRED(8); 182DEFINE_EQUALITY_PRED(8);
180 183
181static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
182 void *event __attribute((unused)),
183 int val1, int val2)
184{
185 return val1 && val2;
186}
187
188static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
189 void *event __attribute((unused)),
190 int val1, int val2)
191{
192 return val1 || val2;
193}
194
195/* Filter predicate for fixed sized arrays of characters */ 184/* Filter predicate for fixed sized arrays of characters */
196static int filter_pred_string(struct filter_pred *pred, void *event, 185static int filter_pred_string(struct filter_pred *pred, void *event)
197 int val1, int val2)
198{ 186{
199 char *addr = (char *)(event + pred->offset); 187 char *addr = (char *)(event + pred->offset);
200 int cmp, match; 188 int cmp, match;
@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
207} 195}
208 196
209/* Filter predicate for char * pointers */ 197/* Filter predicate for char * pointers */
210static int filter_pred_pchar(struct filter_pred *pred, void *event, 198static int filter_pred_pchar(struct filter_pred *pred, void *event)
211 int val1, int val2)
212{ 199{
213 char **addr = (char **)(event + pred->offset); 200 char **addr = (char **)(event + pred->offset);
214 int cmp, match; 201 int cmp, match;
@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
231 * and add it to the address of the entry, and at last we have 218 * and add it to the address of the entry, and at last we have
232 * the address of the string. 219 * the address of the string.
233 */ 220 */
234static int filter_pred_strloc(struct filter_pred *pred, void *event, 221static int filter_pred_strloc(struct filter_pred *pred, void *event)
235 int val1, int val2)
236{ 222{
237 u32 str_item = *(u32 *)(event + pred->offset); 223 u32 str_item = *(u32 *)(event + pred->offset);
238 int str_loc = str_item & 0xffff; 224 int str_loc = str_item & 0xffff;
@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
247 return match; 233 return match;
248} 234}
249 235
250static int filter_pred_none(struct filter_pred *pred, void *event, 236static int filter_pred_none(struct filter_pred *pred, void *event)
251 int val1, int val2)
252{ 237{
253 return 0; 238 return 0;
254} 239}
@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
377 pred->not ^= not; 362 pred->not ^= not;
378} 363}
379 364
365enum move_type {
366 MOVE_DOWN,
367 MOVE_UP_FROM_LEFT,
368 MOVE_UP_FROM_RIGHT
369};
370
371static struct filter_pred *
372get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
373 int index, enum move_type *move)
374{
375 if (pred->parent & FILTER_PRED_IS_RIGHT)
376 *move = MOVE_UP_FROM_RIGHT;
377 else
378 *move = MOVE_UP_FROM_LEFT;
379 pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
380
381 return pred;
382}
383
384/*
385 * A series of AND or ORs where found together. Instead of
386 * climbing up and down the tree branches, an array of the
387 * ops were made in order of checks. We can just move across
388 * the array and short circuit if needed.
389 */
390static int process_ops(struct filter_pred *preds,
391 struct filter_pred *op, void *rec)
392{
393 struct filter_pred *pred;
394 int match = 0;
395 int type;
396 int i;
397
398 /*
399 * Micro-optimization: We set type to true if op
400 * is an OR and false otherwise (AND). Then we
401 * just need to test if the match is equal to
402 * the type, and if it is, we can short circuit the
403 * rest of the checks:
404 *
405 * if ((match && op->op == OP_OR) ||
406 * (!match && op->op == OP_AND))
407 * return match;
408 */
409 type = op->op == OP_OR;
410
411 for (i = 0; i < op->val; i++) {
412 pred = &preds[op->ops[i]];
413 match = pred->fn(pred, rec);
414 if (!!match == type)
415 return match;
416 }
417 return match;
418}
419
380/* return 1 if event matches, 0 otherwise (discard) */ 420/* return 1 if event matches, 0 otherwise (discard) */
381int filter_match_preds(struct event_filter *filter, void *rec) 421int filter_match_preds(struct event_filter *filter, void *rec)
382{ 422{
383 int match, top = 0, val1 = 0, val2 = 0; 423 int match = -1;
384 int stack[MAX_FILTER_PRED]; 424 enum move_type move = MOVE_DOWN;
425 struct filter_pred *preds;
385 struct filter_pred *pred; 426 struct filter_pred *pred;
386 int i; 427 struct filter_pred *root;
428 int n_preds;
429 int done = 0;
430
431 /* no filter is considered a match */
432 if (!filter)
433 return 1;
434
435 n_preds = filter->n_preds;
436
437 if (!n_preds)
438 return 1;
439
440 /*
441 * n_preds, root and filter->preds are protect with preemption disabled.
442 */
443 preds = rcu_dereference_sched(filter->preds);
444 root = rcu_dereference_sched(filter->root);
445 if (!root)
446 return 1;
447
448 pred = root;
387 449
388 for (i = 0; i < filter->n_preds; i++) { 450 /* match is currently meaningless */
389 pred = filter->preds[i]; 451 match = -1;
390 if (!pred->pop_n) { 452
391 match = pred->fn(pred, rec, val1, val2); 453 do {
392 stack[top++] = match; 454 switch (move) {
455 case MOVE_DOWN:
456 /* only AND and OR have children */
457 if (pred->left != FILTER_PRED_INVALID) {
458 /* If ops is set, then it was folded. */
459 if (!pred->ops) {
460 /* keep going to down the left side */
461 pred = &preds[pred->left];
462 continue;
463 }
464 /* We can treat folded ops as a leaf node */
465 match = process_ops(preds, pred, rec);
466 } else
467 match = pred->fn(pred, rec);
468 /* If this pred is the only pred */
469 if (pred == root)
470 break;
471 pred = get_pred_parent(pred, preds,
472 pred->parent, &move);
473 continue;
474 case MOVE_UP_FROM_LEFT:
475 /*
476 * Check for short circuits.
477 *
478 * Optimization: !!match == (pred->op == OP_OR)
479 * is the same as:
480 * if ((match && pred->op == OP_OR) ||
481 * (!match && pred->op == OP_AND))
482 */
483 if (!!match == (pred->op == OP_OR)) {
484 if (pred == root)
485 break;
486 pred = get_pred_parent(pred, preds,
487 pred->parent, &move);
488 continue;
489 }
490 /* now go down the right side of the tree. */
491 pred = &preds[pred->right];
492 move = MOVE_DOWN;
493 continue;
494 case MOVE_UP_FROM_RIGHT:
495 /* We finished this equation. */
496 if (pred == root)
497 break;
498 pred = get_pred_parent(pred, preds,
499 pred->parent, &move);
393 continue; 500 continue;
394 } 501 }
395 if (pred->pop_n > top) { 502 done = 1;
396 WARN_ON_ONCE(1); 503 } while (!done);
397 return 0;
398 }
399 val1 = stack[--top];
400 val2 = stack[--top];
401 match = pred->fn(pred, rec, val1, val2);
402 stack[top++] = match;
403 }
404 504
405 return stack[--top]; 505 return match;
406} 506}
407EXPORT_SYMBOL_GPL(filter_match_preds); 507EXPORT_SYMBOL_GPL(filter_match_preds);
408 508
@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
414 514
415static void remove_filter_string(struct event_filter *filter) 515static void remove_filter_string(struct event_filter *filter)
416{ 516{
517 if (!filter)
518 return;
519
417 kfree(filter->filter_string); 520 kfree(filter->filter_string);
418 filter->filter_string = NULL; 521 filter->filter_string = NULL;
419} 522}
@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
473 576
474void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) 577void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
475{ 578{
476 struct event_filter *filter = call->filter; 579 struct event_filter *filter;
477 580
478 mutex_lock(&event_mutex); 581 mutex_lock(&event_mutex);
582 filter = call->filter;
479 if (filter && filter->filter_string) 583 if (filter && filter->filter_string)
480 trace_seq_printf(s, "%s\n", filter->filter_string); 584 trace_seq_printf(s, "%s\n", filter->filter_string);
481 else 585 else
@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
486void print_subsystem_event_filter(struct event_subsystem *system, 590void print_subsystem_event_filter(struct event_subsystem *system,
487 struct trace_seq *s) 591 struct trace_seq *s)
488{ 592{
489 struct event_filter *filter = system->filter; 593 struct event_filter *filter;
490 594
491 mutex_lock(&event_mutex); 595 mutex_lock(&event_mutex);
596 filter = system->filter;
492 if (filter && filter->filter_string) 597 if (filter && filter->filter_string)
493 trace_seq_printf(s, "%s\n", filter->filter_string); 598 trace_seq_printf(s, "%s\n", filter->filter_string);
494 else 599 else
@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
539 pred->regex.len = 0; 644 pred->regex.len = 0;
540} 645}
541 646
542static int filter_set_pred(struct filter_pred *dest, 647static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
648{
649 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
650 if (!stack->preds)
651 return -ENOMEM;
652 stack->index = n_preds;
653 return 0;
654}
655
656static void __free_pred_stack(struct pred_stack *stack)
657{
658 kfree(stack->preds);
659 stack->index = 0;
660}
661
662static int __push_pred_stack(struct pred_stack *stack,
663 struct filter_pred *pred)
664{
665 int index = stack->index;
666
667 if (WARN_ON(index == 0))
668 return -ENOSPC;
669
670 stack->preds[--index] = pred;
671 stack->index = index;
672 return 0;
673}
674
675static struct filter_pred *
676__pop_pred_stack(struct pred_stack *stack)
677{
678 struct filter_pred *pred;
679 int index = stack->index;
680
681 pred = stack->preds[index++];
682 if (!pred)
683 return NULL;
684
685 stack->index = index;
686 return pred;
687}
688
689static int filter_set_pred(struct event_filter *filter,
690 int idx,
691 struct pred_stack *stack,
543 struct filter_pred *src, 692 struct filter_pred *src,
544 filter_pred_fn_t fn) 693 filter_pred_fn_t fn)
545{ 694{
695 struct filter_pred *dest = &filter->preds[idx];
696 struct filter_pred *left;
697 struct filter_pred *right;
698
546 *dest = *src; 699 *dest = *src;
547 if (src->field_name) { 700 if (src->field_name) {
548 dest->field_name = kstrdup(src->field_name, GFP_KERNEL); 701 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
550 return -ENOMEM; 703 return -ENOMEM;
551 } 704 }
552 dest->fn = fn; 705 dest->fn = fn;
706 dest->index = idx;
553 707
554 return 0; 708 if (dest->op == OP_OR || dest->op == OP_AND) {
709 right = __pop_pred_stack(stack);
710 left = __pop_pred_stack(stack);
711 if (!left || !right)
712 return -EINVAL;
713 /*
714 * If both children can be folded
715 * and they are the same op as this op or a leaf,
716 * then this op can be folded.
717 */
718 if (left->index & FILTER_PRED_FOLD &&
719 (left->op == dest->op ||
720 left->left == FILTER_PRED_INVALID) &&
721 right->index & FILTER_PRED_FOLD &&
722 (right->op == dest->op ||
723 right->left == FILTER_PRED_INVALID))
724 dest->index |= FILTER_PRED_FOLD;
725
726 dest->left = left->index & ~FILTER_PRED_FOLD;
727 dest->right = right->index & ~FILTER_PRED_FOLD;
728 left->parent = dest->index & ~FILTER_PRED_FOLD;
729 right->parent = dest->index | FILTER_PRED_IS_RIGHT;
730 } else {
731 /*
732 * Make dest->left invalid to be used as a quick
733 * way to know this is a leaf node.
734 */
735 dest->left = FILTER_PRED_INVALID;
736
737 /* All leafs allow folding the parent ops. */
738 dest->index |= FILTER_PRED_FOLD;
739 }
740
741 return __push_pred_stack(stack, dest);
555} 742}
556 743
557static void filter_disable_preds(struct ftrace_event_call *call) 744static void __free_preds(struct event_filter *filter)
558{ 745{
559 struct event_filter *filter = call->filter;
560 int i; 746 int i;
561 747
562 call->flags &= ~TRACE_EVENT_FL_FILTERED; 748 if (filter->preds) {
749 for (i = 0; i < filter->a_preds; i++)
750 kfree(filter->preds[i].field_name);
751 kfree(filter->preds);
752 filter->preds = NULL;
753 }
754 filter->a_preds = 0;
563 filter->n_preds = 0; 755 filter->n_preds = 0;
564
565 for (i = 0; i < MAX_FILTER_PRED; i++)
566 filter->preds[i]->fn = filter_pred_none;
567} 756}
568 757
569static void __free_preds(struct event_filter *filter) 758static void filter_disable(struct ftrace_event_call *call)
570{ 759{
571 int i; 760 call->flags &= ~TRACE_EVENT_FL_FILTERED;
761}
572 762
763static void __free_filter(struct event_filter *filter)
764{
573 if (!filter) 765 if (!filter)
574 return; 766 return;
575 767
576 for (i = 0; i < MAX_FILTER_PRED; i++) { 768 __free_preds(filter);
577 if (filter->preds[i])
578 filter_free_pred(filter->preds[i]);
579 }
580 kfree(filter->preds);
581 kfree(filter->filter_string); 769 kfree(filter->filter_string);
582 kfree(filter); 770 kfree(filter);
583} 771}
584 772
773/*
774 * Called when destroying the ftrace_event_call.
775 * The call is being freed, so we do not need to worry about
776 * the call being currently used. This is for module code removing
777 * the tracepoints from within it.
778 */
585void destroy_preds(struct ftrace_event_call *call) 779void destroy_preds(struct ftrace_event_call *call)
586{ 780{
587 __free_preds(call->filter); 781 __free_filter(call->filter);
588 call->filter = NULL; 782 call->filter = NULL;
589 call->flags &= ~TRACE_EVENT_FL_FILTERED;
590} 783}
591 784
592static struct event_filter *__alloc_preds(void) 785static struct event_filter *__alloc_filter(void)
593{ 786{
594 struct event_filter *filter; 787 struct event_filter *filter;
788
789 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
790 return filter;
791}
792
793static int __alloc_preds(struct event_filter *filter, int n_preds)
794{
595 struct filter_pred *pred; 795 struct filter_pred *pred;
596 int i; 796 int i;
597 797
598 filter = kzalloc(sizeof(*filter), GFP_KERNEL); 798 if (filter->preds)
599 if (!filter) 799 __free_preds(filter);
600 return ERR_PTR(-ENOMEM);
601 800
602 filter->n_preds = 0; 801 filter->preds =
802 kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
603 803
604 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
605 if (!filter->preds) 804 if (!filter->preds)
606 goto oom; 805 return -ENOMEM;
607 806
608 for (i = 0; i < MAX_FILTER_PRED; i++) { 807 filter->a_preds = n_preds;
609 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 808 filter->n_preds = 0;
610 if (!pred) 809
611 goto oom; 810 for (i = 0; i < n_preds; i++) {
811 pred = &filter->preds[i];
612 pred->fn = filter_pred_none; 812 pred->fn = filter_pred_none;
613 filter->preds[i] = pred;
614 } 813 }
615 814
616 return filter;
617
618oom:
619 __free_preds(filter);
620 return ERR_PTR(-ENOMEM);
621}
622
623static int init_preds(struct ftrace_event_call *call)
624{
625 if (call->filter)
626 return 0;
627
628 call->flags &= ~TRACE_EVENT_FL_FILTERED;
629 call->filter = __alloc_preds();
630 if (IS_ERR(call->filter))
631 return PTR_ERR(call->filter);
632
633 return 0; 815 return 0;
634} 816}
635 817
636static int init_subsystem_preds(struct event_subsystem *system) 818static void filter_free_subsystem_preds(struct event_subsystem *system)
637{ 819{
638 struct ftrace_event_call *call; 820 struct ftrace_event_call *call;
639 int err;
640 821
641 list_for_each_entry(call, &ftrace_events, list) { 822 list_for_each_entry(call, &ftrace_events, list) {
642 if (strcmp(call->class->system, system->name) != 0) 823 if (strcmp(call->class->system, system->name) != 0)
643 continue; 824 continue;
644 825
645 err = init_preds(call); 826 filter_disable(call);
646 if (err) 827 remove_filter_string(call->filter);
647 return err;
648 } 828 }
649
650 return 0;
651} 829}
652 830
653static void filter_free_subsystem_preds(struct event_subsystem *system) 831static void filter_free_subsystem_filters(struct event_subsystem *system)
654{ 832{
655 struct ftrace_event_call *call; 833 struct ftrace_event_call *call;
656 834
657 list_for_each_entry(call, &ftrace_events, list) { 835 list_for_each_entry(call, &ftrace_events, list) {
658 if (strcmp(call->class->system, system->name) != 0) 836 if (strcmp(call->class->system, system->name) != 0)
659 continue; 837 continue;
660 838 __free_filter(call->filter);
661 filter_disable_preds(call); 839 call->filter = NULL;
662 remove_filter_string(call->filter);
663 } 840 }
664} 841}
665 842
@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
667 struct ftrace_event_call *call, 844 struct ftrace_event_call *call,
668 struct event_filter *filter, 845 struct event_filter *filter,
669 struct filter_pred *pred, 846 struct filter_pred *pred,
847 struct pred_stack *stack,
670 filter_pred_fn_t fn) 848 filter_pred_fn_t fn)
671{ 849{
672 int idx, err; 850 int idx, err;
673 851
674 if (filter->n_preds == MAX_FILTER_PRED) { 852 if (WARN_ON(filter->n_preds == filter->a_preds)) {
675 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 853 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
676 return -ENOSPC; 854 return -ENOSPC;
677 } 855 }
678 856
679 idx = filter->n_preds; 857 idx = filter->n_preds;
680 filter_clear_pred(filter->preds[idx]); 858 filter_clear_pred(&filter->preds[idx]);
681 err = filter_set_pred(filter->preds[idx], pred, fn); 859 err = filter_set_pred(filter, idx, stack, pred, fn);
682 if (err) 860 if (err)
683 return err; 861 return err;
684 862
@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
763 struct ftrace_event_call *call, 941 struct ftrace_event_call *call,
764 struct event_filter *filter, 942 struct event_filter *filter,
765 struct filter_pred *pred, 943 struct filter_pred *pred,
944 struct pred_stack *stack,
766 bool dry_run) 945 bool dry_run)
767{ 946{
768 struct ftrace_event_field *field; 947 struct ftrace_event_field *field;
@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
770 unsigned long long val; 949 unsigned long long val;
771 int ret; 950 int ret;
772 951
773 pred->fn = filter_pred_none; 952 fn = pred->fn = filter_pred_none;
774 953
775 if (pred->op == OP_AND) { 954 if (pred->op == OP_AND)
776 pred->pop_n = 2;
777 fn = filter_pred_and;
778 goto add_pred_fn; 955 goto add_pred_fn;
779 } else if (pred->op == OP_OR) { 956 else if (pred->op == OP_OR)
780 pred->pop_n = 2;
781 fn = filter_pred_or;
782 goto add_pred_fn; 957 goto add_pred_fn;
783 }
784 958
785 field = find_event_field(call, pred->field_name); 959 field = find_event_field(call, pred->field_name);
786 if (!field) { 960 if (!field) {
@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
829 1003
830add_pred_fn: 1004add_pred_fn:
831 if (!dry_run) 1005 if (!dry_run)
832 return filter_add_pred_fn(ps, call, filter, pred, fn); 1006 return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
833 return 0; 1007 return 0;
834} 1008}
835 1009
@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
1187 return 0; 1361 return 0;
1188} 1362}
1189 1363
1364static int count_preds(struct filter_parse_state *ps)
1365{
1366 struct postfix_elt *elt;
1367 int n_preds = 0;
1368
1369 list_for_each_entry(elt, &ps->postfix, list) {
1370 if (elt->op == OP_NONE)
1371 continue;
1372 n_preds++;
1373 }
1374
1375 return n_preds;
1376}
1377
1378/*
1379 * The tree is walked at filtering of an event. If the tree is not correctly
1380 * built, it may cause an infinite loop. Check here that the tree does
1381 * indeed terminate.
1382 */
1383static int check_pred_tree(struct event_filter *filter,
1384 struct filter_pred *root)
1385{
1386 struct filter_pred *preds;
1387 struct filter_pred *pred;
1388 enum move_type move = MOVE_DOWN;
1389 int count = 0;
1390 int done = 0;
1391 int max;
1392
1393 /*
1394 * The max that we can hit a node is three times.
1395 * Once going down, once coming up from left, and
1396 * once coming up from right. This is more than enough
1397 * since leafs are only hit a single time.
1398 */
1399 max = 3 * filter->n_preds;
1400
1401 preds = filter->preds;
1402 if (!preds)
1403 return -EINVAL;
1404 pred = root;
1405
1406 do {
1407 if (WARN_ON(count++ > max))
1408 return -EINVAL;
1409
1410 switch (move) {
1411 case MOVE_DOWN:
1412 if (pred->left != FILTER_PRED_INVALID) {
1413 pred = &preds[pred->left];
1414 continue;
1415 }
1416 /* A leaf at the root is just a leaf in the tree */
1417 if (pred == root)
1418 break;
1419 pred = get_pred_parent(pred, preds,
1420 pred->parent, &move);
1421 continue;
1422 case MOVE_UP_FROM_LEFT:
1423 pred = &preds[pred->right];
1424 move = MOVE_DOWN;
1425 continue;
1426 case MOVE_UP_FROM_RIGHT:
1427 if (pred == root)
1428 break;
1429 pred = get_pred_parent(pred, preds,
1430 pred->parent, &move);
1431 continue;
1432 }
1433 done = 1;
1434 } while (!done);
1435
1436 /* We are fine. */
1437 return 0;
1438}
1439
1440static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
1441{
1442 struct filter_pred *pred;
1443 enum move_type move = MOVE_DOWN;
1444 int count = 0;
1445 int done = 0;
1446
1447 pred = root;
1448
1449 do {
1450 switch (move) {
1451 case MOVE_DOWN:
1452 if (pred->left != FILTER_PRED_INVALID) {
1453 pred = &preds[pred->left];
1454 continue;
1455 }
1456 /* A leaf at the root is just a leaf in the tree */
1457 if (pred == root)
1458 return 1;
1459 count++;
1460 pred = get_pred_parent(pred, preds,
1461 pred->parent, &move);
1462 continue;
1463 case MOVE_UP_FROM_LEFT:
1464 pred = &preds[pred->right];
1465 move = MOVE_DOWN;
1466 continue;
1467 case MOVE_UP_FROM_RIGHT:
1468 if (pred == root)
1469 break;
1470 pred = get_pred_parent(pred, preds,
1471 pred->parent, &move);
1472 continue;
1473 }
1474 done = 1;
1475 } while (!done);
1476
1477 return count;
1478}
1479
1480static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1481{
1482 struct filter_pred *pred;
1483 enum move_type move = MOVE_DOWN;
1484 int count = 0;
1485 int children;
1486 int done = 0;
1487
1488 /* No need to keep the fold flag */
1489 root->index &= ~FILTER_PRED_FOLD;
1490
1491 /* If the root is a leaf then do nothing */
1492 if (root->left == FILTER_PRED_INVALID)
1493 return 0;
1494
1495 /* count the children */
1496 children = count_leafs(preds, &preds[root->left]);
1497 children += count_leafs(preds, &preds[root->right]);
1498
1499 root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
1500 if (!root->ops)
1501 return -ENOMEM;
1502
1503 root->val = children;
1504
1505 pred = root;
1506 do {
1507 switch (move) {
1508 case MOVE_DOWN:
1509 if (pred->left != FILTER_PRED_INVALID) {
1510 pred = &preds[pred->left];
1511 continue;
1512 }
1513 if (WARN_ON(count == children))
1514 return -EINVAL;
1515 pred->index &= ~FILTER_PRED_FOLD;
1516 root->ops[count++] = pred->index;
1517 pred = get_pred_parent(pred, preds,
1518 pred->parent, &move);
1519 continue;
1520 case MOVE_UP_FROM_LEFT:
1521 pred = &preds[pred->right];
1522 move = MOVE_DOWN;
1523 continue;
1524 case MOVE_UP_FROM_RIGHT:
1525 if (pred == root)
1526 break;
1527 pred = get_pred_parent(pred, preds,
1528 pred->parent, &move);
1529 continue;
1530 }
1531 done = 1;
1532 } while (!done);
1533
1534 return 0;
1535}
1536
1537/*
1538 * To optimize the processing of the ops, if we have several "ors" or
1539 * "ands" together, we can put them in an array and process them all
1540 * together speeding up the filter logic.
1541 */
1542static int fold_pred_tree(struct event_filter *filter,
1543 struct filter_pred *root)
1544{
1545 struct filter_pred *preds;
1546 struct filter_pred *pred;
1547 enum move_type move = MOVE_DOWN;
1548 int done = 0;
1549 int err;
1550
1551 preds = filter->preds;
1552 if (!preds)
1553 return -EINVAL;
1554 pred = root;
1555
1556 do {
1557 switch (move) {
1558 case MOVE_DOWN:
1559 if (pred->index & FILTER_PRED_FOLD) {
1560 err = fold_pred(preds, pred);
1561 if (err)
1562 return err;
1563 /* Folded nodes are like leafs */
1564 } else if (pred->left != FILTER_PRED_INVALID) {
1565 pred = &preds[pred->left];
1566 continue;
1567 }
1568
1569 /* A leaf at the root is just a leaf in the tree */
1570 if (pred == root)
1571 break;
1572 pred = get_pred_parent(pred, preds,
1573 pred->parent, &move);
1574 continue;
1575 case MOVE_UP_FROM_LEFT:
1576 pred = &preds[pred->right];
1577 move = MOVE_DOWN;
1578 continue;
1579 case MOVE_UP_FROM_RIGHT:
1580 if (pred == root)
1581 break;
1582 pred = get_pred_parent(pred, preds,
1583 pred->parent, &move);
1584 continue;
1585 }
1586 done = 1;
1587 } while (!done);
1588
1589 return 0;
1590}
1591
1190static int replace_preds(struct ftrace_event_call *call, 1592static int replace_preds(struct ftrace_event_call *call,
1191 struct event_filter *filter, 1593 struct event_filter *filter,
1192 struct filter_parse_state *ps, 1594 struct filter_parse_state *ps,
@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call,
1195{ 1597{
1196 char *operand1 = NULL, *operand2 = NULL; 1598 char *operand1 = NULL, *operand2 = NULL;
1197 struct filter_pred *pred; 1599 struct filter_pred *pred;
1600 struct filter_pred *root;
1198 struct postfix_elt *elt; 1601 struct postfix_elt *elt;
1602 struct pred_stack stack = { }; /* init to NULL */
1199 int err; 1603 int err;
1200 int n_preds = 0; 1604 int n_preds = 0;
1201 1605
1606 n_preds = count_preds(ps);
1607 if (n_preds >= MAX_FILTER_PRED) {
1608 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1609 return -ENOSPC;
1610 }
1611
1202 err = check_preds(ps); 1612 err = check_preds(ps);
1203 if (err) 1613 if (err)
1204 return err; 1614 return err;
1205 1615
1616 if (!dry_run) {
1617 err = __alloc_pred_stack(&stack, n_preds);
1618 if (err)
1619 return err;
1620 err = __alloc_preds(filter, n_preds);
1621 if (err)
1622 goto fail;
1623 }
1624
1625 n_preds = 0;
1206 list_for_each_entry(elt, &ps->postfix, list) { 1626 list_for_each_entry(elt, &ps->postfix, list) {
1207 if (elt->op == OP_NONE) { 1627 if (elt->op == OP_NONE) {
1208 if (!operand1) 1628 if (!operand1)
@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call,
1211 operand2 = elt->operand; 1631 operand2 = elt->operand;
1212 else { 1632 else {
1213 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); 1633 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
1214 return -EINVAL; 1634 err = -EINVAL;
1635 goto fail;
1215 } 1636 }
1216 continue; 1637 continue;
1217 } 1638 }
1218 1639
1219 if (n_preds++ == MAX_FILTER_PRED) { 1640 if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
1220 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 1641 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1221 return -ENOSPC; 1642 err = -ENOSPC;
1643 goto fail;
1222 } 1644 }
1223 1645
1224 if (elt->op == OP_AND || elt->op == OP_OR) { 1646 if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call,
1228 1650
1229 if (!operand1 || !operand2) { 1651 if (!operand1 || !operand2) {
1230 parse_error(ps, FILT_ERR_MISSING_FIELD, 0); 1652 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1231 return -EINVAL; 1653 err = -EINVAL;
1654 goto fail;
1232 } 1655 }
1233 1656
1234 pred = create_pred(elt->op, operand1, operand2); 1657 pred = create_pred(elt->op, operand1, operand2);
1235add_pred: 1658add_pred:
1236 if (!pred) 1659 if (!pred) {
1237 return -ENOMEM; 1660 err = -ENOMEM;
1238 err = filter_add_pred(ps, call, filter, pred, dry_run); 1661 goto fail;
1662 }
1663 err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
1239 filter_free_pred(pred); 1664 filter_free_pred(pred);
1240 if (err) 1665 if (err)
1241 return err; 1666 goto fail;
1242 1667
1243 operand1 = operand2 = NULL; 1668 operand1 = operand2 = NULL;
1244 } 1669 }
1245 1670
1246 return 0; 1671 if (!dry_run) {
1672 /* We should have one item left on the stack */
1673 pred = __pop_pred_stack(&stack);
1674 if (!pred)
1675 return -EINVAL;
1676 /* This item is where we start from in matching */
1677 root = pred;
1678 /* Make sure the stack is empty */
1679 pred = __pop_pred_stack(&stack);
1680 if (WARN_ON(pred)) {
1681 err = -EINVAL;
1682 filter->root = NULL;
1683 goto fail;
1684 }
1685 err = check_pred_tree(filter, root);
1686 if (err)
1687 goto fail;
1688
1689 /* Optimize the tree */
1690 err = fold_pred_tree(filter, root);
1691 if (err)
1692 goto fail;
1693
1694 /* We don't set root until we know it works */
1695 barrier();
1696 filter->root = root;
1697 }
1698
1699 err = 0;
1700fail:
1701 __free_pred_stack(&stack);
1702 return err;
1247} 1703}
1248 1704
1705struct filter_list {
1706 struct list_head list;
1707 struct event_filter *filter;
1708};
1709
1249static int replace_system_preds(struct event_subsystem *system, 1710static int replace_system_preds(struct event_subsystem *system,
1250 struct filter_parse_state *ps, 1711 struct filter_parse_state *ps,
1251 char *filter_string) 1712 char *filter_string)
1252{ 1713{
1253 struct ftrace_event_call *call; 1714 struct ftrace_event_call *call;
1715 struct filter_list *filter_item;
1716 struct filter_list *tmp;
1717 LIST_HEAD(filter_list);
1254 bool fail = true; 1718 bool fail = true;
1255 int err; 1719 int err;
1256 1720
1257 list_for_each_entry(call, &ftrace_events, list) { 1721 list_for_each_entry(call, &ftrace_events, list) {
1258 struct event_filter *filter = call->filter;
1259 1722
1260 if (strcmp(call->class->system, system->name) != 0) 1723 if (strcmp(call->class->system, system->name) != 0)
1261 continue; 1724 continue;
1262 1725
1263 /* try to see if the filter can be applied */ 1726 /*
1264 err = replace_preds(call, filter, ps, filter_string, true); 1727 * Try to see if the filter can be applied
1728 * (filter arg is ignored on dry_run)
1729 */
1730 err = replace_preds(call, NULL, ps, filter_string, true);
1265 if (err) 1731 if (err)
1732 goto fail;
1733 }
1734
1735 list_for_each_entry(call, &ftrace_events, list) {
1736 struct event_filter *filter;
1737
1738 if (strcmp(call->class->system, system->name) != 0)
1266 continue; 1739 continue;
1267 1740
1268 /* really apply the filter */ 1741 filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
1269 filter_disable_preds(call); 1742 if (!filter_item)
1270 err = replace_preds(call, filter, ps, filter_string, false); 1743 goto fail_mem;
1744
1745 list_add_tail(&filter_item->list, &filter_list);
1746
1747 filter_item->filter = __alloc_filter();
1748 if (!filter_item->filter)
1749 goto fail_mem;
1750 filter = filter_item->filter;
1751
1752 /* Can only fail on no memory */
1753 err = replace_filter_string(filter, filter_string);
1271 if (err) 1754 if (err)
1272 filter_disable_preds(call); 1755 goto fail_mem;
1273 else { 1756
1757 err = replace_preds(call, filter, ps, filter_string, false);
1758 if (err) {
1759 filter_disable(call);
1760 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1761 append_filter_err(ps, filter);
1762 } else
1274 call->flags |= TRACE_EVENT_FL_FILTERED; 1763 call->flags |= TRACE_EVENT_FL_FILTERED;
1275 replace_filter_string(filter, filter_string); 1764 /*
1276 } 1765 * Regardless of if this returned an error, we still
1766 * replace the filter for the call.
1767 */
1768 filter = call->filter;
1769 call->filter = filter_item->filter;
1770 filter_item->filter = filter;
1771
1277 fail = false; 1772 fail = false;
1278 } 1773 }
1279 1774
1280 if (fail) { 1775 if (fail)
1281 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 1776 goto fail;
1282 return -EINVAL; 1777
1778 /*
1779 * The calls can still be using the old filters.
1780 * Do a synchronize_sched() to ensure all calls are
1781 * done with them before we free them.
1782 */
1783 synchronize_sched();
1784 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1785 __free_filter(filter_item->filter);
1786 list_del(&filter_item->list);
1787 kfree(filter_item);
1283 } 1788 }
1284 return 0; 1789 return 0;
1790 fail:
1791 /* No call succeeded */
1792 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1793 list_del(&filter_item->list);
1794 kfree(filter_item);
1795 }
1796 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1797 return -EINVAL;
1798 fail_mem:
1799 /* If any call succeeded, we still need to sync */
1800 if (!fail)
1801 synchronize_sched();
1802 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1803 __free_filter(filter_item->filter);
1804 list_del(&filter_item->list);
1805 kfree(filter_item);
1806 }
1807 return -ENOMEM;
1285} 1808}
1286 1809
1287int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1810int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1288{ 1811{
1289 int err;
1290 struct filter_parse_state *ps; 1812 struct filter_parse_state *ps;
1813 struct event_filter *filter;
1814 struct event_filter *tmp;
1815 int err = 0;
1291 1816
1292 mutex_lock(&event_mutex); 1817 mutex_lock(&event_mutex);
1293 1818
1294 err = init_preds(call);
1295 if (err)
1296 goto out_unlock;
1297
1298 if (!strcmp(strstrip(filter_string), "0")) { 1819 if (!strcmp(strstrip(filter_string), "0")) {
1299 filter_disable_preds(call); 1820 filter_disable(call);
1300 remove_filter_string(call->filter); 1821 filter = call->filter;
1822 if (!filter)
1823 goto out_unlock;
1824 call->filter = NULL;
1825 /* Make sure the filter is not being used */
1826 synchronize_sched();
1827 __free_filter(filter);
1301 goto out_unlock; 1828 goto out_unlock;
1302 } 1829 }
1303 1830
@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1306 if (!ps) 1833 if (!ps)
1307 goto out_unlock; 1834 goto out_unlock;
1308 1835
1309 filter_disable_preds(call); 1836 filter = __alloc_filter();
1310 replace_filter_string(call->filter, filter_string); 1837 if (!filter) {
1838 kfree(ps);
1839 goto out_unlock;
1840 }
1841
1842 replace_filter_string(filter, filter_string);
1311 1843
1312 parse_init(ps, filter_ops, filter_string); 1844 parse_init(ps, filter_ops, filter_string);
1313 err = filter_parse(ps); 1845 err = filter_parse(ps);
1314 if (err) { 1846 if (err) {
1315 append_filter_err(ps, call->filter); 1847 append_filter_err(ps, filter);
1316 goto out; 1848 goto out;
1317 } 1849 }
1318 1850
1319 err = replace_preds(call, call->filter, ps, filter_string, false); 1851 err = replace_preds(call, filter, ps, filter_string, false);
1320 if (err) 1852 if (err) {
1321 append_filter_err(ps, call->filter); 1853 filter_disable(call);
1322 else 1854 append_filter_err(ps, filter);
1855 } else
1323 call->flags |= TRACE_EVENT_FL_FILTERED; 1856 call->flags |= TRACE_EVENT_FL_FILTERED;
1324out: 1857out:
1858 /*
1859 * Always swap the call filter with the new filter
1860 * even if there was an error. If there was an error
1861 * in the filter, we disable the filter and show the error
1862 * string
1863 */
1864 tmp = call->filter;
1865 call->filter = filter;
1866 if (tmp) {
1867 /* Make sure the call is done with the filter */
1868 synchronize_sched();
1869 __free_filter(tmp);
1870 }
1325 filter_opstack_clear(ps); 1871 filter_opstack_clear(ps);
1326 postfix_clear(ps); 1872 postfix_clear(ps);
1327 kfree(ps); 1873 kfree(ps);
@@ -1334,18 +1880,21 @@ out_unlock:
1334int apply_subsystem_event_filter(struct event_subsystem *system, 1880int apply_subsystem_event_filter(struct event_subsystem *system,
1335 char *filter_string) 1881 char *filter_string)
1336{ 1882{
1337 int err;
1338 struct filter_parse_state *ps; 1883 struct filter_parse_state *ps;
1884 struct event_filter *filter;
1885 int err = 0;
1339 1886
1340 mutex_lock(&event_mutex); 1887 mutex_lock(&event_mutex);
1341 1888
1342 err = init_subsystem_preds(system);
1343 if (err)
1344 goto out_unlock;
1345
1346 if (!strcmp(strstrip(filter_string), "0")) { 1889 if (!strcmp(strstrip(filter_string), "0")) {
1347 filter_free_subsystem_preds(system); 1890 filter_free_subsystem_preds(system);
1348 remove_filter_string(system->filter); 1891 remove_filter_string(system->filter);
1892 filter = system->filter;
1893 system->filter = NULL;
1894 /* Ensure all filters are no longer used */
1895 synchronize_sched();
1896 filter_free_subsystem_filters(system);
1897 __free_filter(filter);
1349 goto out_unlock; 1898 goto out_unlock;
1350 } 1899 }
1351 1900
@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1354 if (!ps) 1903 if (!ps)
1355 goto out_unlock; 1904 goto out_unlock;
1356 1905
1357 replace_filter_string(system->filter, filter_string); 1906 filter = __alloc_filter();
1907 if (!filter)
1908 goto out;
1909
1910 replace_filter_string(filter, filter_string);
1911 /*
1912 * No event actually uses the system filter
1913 * we can free it without synchronize_sched().
1914 */
1915 __free_filter(system->filter);
1916 system->filter = filter;
1358 1917
1359 parse_init(ps, filter_ops, filter_string); 1918 parse_init(ps, filter_ops, filter_string);
1360 err = filter_parse(ps); 1919 err = filter_parse(ps);
@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event)
1384 struct event_filter *filter = event->filter; 1943 struct event_filter *filter = event->filter;
1385 1944
1386 event->filter = NULL; 1945 event->filter = NULL;
1387 __free_preds(filter); 1946 __free_filter(filter);
1388} 1947}
1389 1948
1390int ftrace_profile_set_filter(struct perf_event *event, int event_id, 1949int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1410 if (event->filter) 1969 if (event->filter)
1411 goto out_unlock; 1970 goto out_unlock;
1412 1971
1413 filter = __alloc_preds(); 1972 filter = __alloc_filter();
1414 if (IS_ERR(filter)) { 1973 if (!filter) {
1415 err = PTR_ERR(filter); 1974 err = PTR_ERR(filter);
1416 goto out_unlock; 1975 goto out_unlock;
1417 } 1976 }
@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1419 err = -ENOMEM; 1978 err = -ENOMEM;
1420 ps = kzalloc(sizeof(*ps), GFP_KERNEL); 1979 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1421 if (!ps) 1980 if (!ps)
1422 goto free_preds; 1981 goto free_filter;
1423 1982
1424 parse_init(ps, filter_ops, filter_str); 1983 parse_init(ps, filter_ops, filter_str);
1425 err = filter_parse(ps); 1984 err = filter_parse(ps);
@@ -1435,9 +1994,9 @@ free_ps:
1435 postfix_clear(ps); 1994 postfix_clear(ps);
1436 kfree(ps); 1995 kfree(ps);
1437 1996
1438free_preds: 1997free_filter:
1439 if (err) 1998 if (err)
1440 __free_preds(filter); 1999 __free_filter(filter);
1441 2000
1442out_unlock: 2001out_unlock:
1443 mutex_unlock(&event_mutex); 2002 mutex_unlock(&event_mutex);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4ba44deaac25..bbeec31e0ae3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \
83 83
84#undef __array 84#undef __array
85#define __array(type, item, len) \ 85#define __array(type, item, len) \
86 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 do { \
87 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
88 mutex_lock(&event_storage_mutex); \
89 snprintf(event_storage, sizeof(event_storage), \
90 "%s[%d]", #type, len); \
91 ret = trace_define_field(event_call, event_storage, #item, \
88 offsetof(typeof(field), item), \ 92 offsetof(typeof(field), item), \
89 sizeof(field.item), \ 93 sizeof(field.item), \
90 is_signed_type(type), FILTER_OTHER); \ 94 is_signed_type(type), FILTER_OTHER); \
91 if (ret) \ 95 mutex_unlock(&event_storage_mutex); \
92 return ret; 96 if (ret) \
97 return ret; \
98 } while (0);
93 99
94#undef __array_desc 100#undef __array_desc
95#define __array_desc(type, container, item, len) \ 101#define __array_desc(type, container, item, len) \
@@ -155,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \
155 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ 161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
156}; \ 162}; \
157 \ 163 \
158struct ftrace_event_call __used \ 164struct ftrace_event_call __used event_##call = { \
159__attribute__((__aligned__(4))) \
160__attribute__((section("_ftrace_events"))) event_##call = { \
161 .name = #call, \ 165 .name = #call, \
162 .event.type = etype, \ 166 .event.type = etype, \
163 .class = &event_class_ftrace_##call, \ 167 .class = &event_class_ftrace_##call, \
164 .print_fmt = print, \ 168 .print_fmt = print, \
165}; \ 169}; \
170struct ftrace_event_call __used \
171__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
166 172
167#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 16aee4d44e8f..8d0e1cc4e974 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -149,11 +149,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
149static struct ftrace_ops trace_ops __read_mostly = 149static struct ftrace_ops trace_ops __read_mostly =
150{ 150{
151 .func = function_trace_call, 151 .func = function_trace_call,
152 .flags = FTRACE_OPS_FL_GLOBAL,
152}; 153};
153 154
154static struct ftrace_ops trace_stack_ops __read_mostly = 155static struct ftrace_ops trace_stack_ops __read_mostly =
155{ 156{
156 .func = function_stack_trace_call, 157 .func = function_stack_trace_call,
158 .flags = FTRACE_OPS_FL_GLOBAL,
157}; 159};
158 160
159/* Our two options */ 161/* Our two options */
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..962cdb24ed81 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
15#include "trace.h" 15#include "trace.h"
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
18struct fgraph_cpu_data { 21struct fgraph_cpu_data {
19 pid_t last_pid; 22 pid_t last_pid;
20 int depth; 23 int depth;
24 int depth_irq;
21 int ignore; 25 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; 26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
23}; 27};
24 28
25struct fgraph_data { 29struct fgraph_data {
26 struct fgraph_cpu_data *cpu_data; 30 struct fgraph_cpu_data __percpu *cpu_data;
27 31
28 /* Place to preserve last processed entry. */ 32 /* Place to preserve last processed entry. */
29 struct ftrace_graph_ent_entry ent; 33 struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
41#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
42#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
43#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
44 49
45static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
46 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
55 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
56 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
57 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
58 { } /* Empty entry */ 65 { } /* Empty entry */
59}; 66};
60 67
61static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
62 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
63 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
64 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
65 .opts = trace_opts 72 .opts = trace_opts
66}; 73};
67 74
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
204 return 1; 211 return 1;
205} 212}
206 213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
207int trace_graph_entry(struct ftrace_graph_ent *trace) 222int trace_graph_entry(struct ftrace_graph_ent *trace)
208{ 223{
209 struct trace_array *tr = graph_array; 224 struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
218 return 0; 233 return 0;
219 234
220 /* trace it when it is-nested-in or is a function enabled. */ 235 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func))) 236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
222 return 0; 238 return 0;
223 239
224 local_irq_save(flags); 240 local_irq_save(flags);
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
246 return trace_graph_entry(trace); 262 return trace_graph_entry(trace);
247} 263}
248 264
265static void
266__trace_graph_function(struct trace_array *tr,
267 unsigned long ip, unsigned long flags, int pc)
268{
269 u64 time = trace_clock_local();
270 struct ftrace_graph_ent ent = {
271 .func = ip,
272 .depth = 0,
273 };
274 struct ftrace_graph_ret ret = {
275 .func = ip,
276 .depth = 0,
277 .calltime = time,
278 .rettime = time,
279 };
280
281 __trace_graph_entry(tr, &ent, flags, pc);
282 __trace_graph_return(tr, &ret, flags, pc);
283}
284
285void
286trace_graph_function(struct trace_array *tr,
287 unsigned long ip, unsigned long parent_ip,
288 unsigned long flags, int pc)
289{
290 __trace_graph_function(tr, ip, flags, pc);
291}
292
249void __trace_graph_return(struct trace_array *tr, 293void __trace_graph_return(struct trace_array *tr,
250 struct ftrace_graph_ret *trace, 294 struct ftrace_graph_ret *trace,
251 unsigned long flags, 295 unsigned long flags,
@@ -649,8 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
649 693
650 /* Print nsecs (we don't want to exceed 7 numbers) */ 694 /* Print nsecs (we don't want to exceed 7 numbers) */
651 if (len < 7) { 695 if (len < 7) {
652 snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", 696 size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
653 nsecs_rem); 697
698 snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
654 ret = trace_seq_printf(s, ".%s", nsecs_str); 699 ret = trace_seq_printf(s, ".%s", nsecs_str);
655 if (!ret) 700 if (!ret)
656 return TRACE_TYPE_PARTIAL_LINE; 701 return TRACE_TYPE_PARTIAL_LINE;
@@ -855,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
855 return 0; 900 return 0;
856} 901}
857 902
903/*
904 * Entry check for irq code
905 *
906 * returns 1 if
907 * - we are inside irq code
908 * - we just entered irq code
909 *
910 * retunns 0 if
911 * - funcgraph-interrupts option is set
912 * - we are not inside irq code
913 */
914static int
915check_irq_entry(struct trace_iterator *iter, u32 flags,
916 unsigned long addr, int depth)
917{
918 int cpu = iter->cpu;
919 int *depth_irq;
920 struct fgraph_data *data = iter->private;
921
922 /*
923 * If we are either displaying irqs, or we got called as
924 * a graph event and private data does not exist,
925 * then we bypass the irq check.
926 */
927 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
928 (!data))
929 return 0;
930
931 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
932
933 /*
934 * We are inside the irq code
935 */
936 if (*depth_irq >= 0)
937 return 1;
938
939 if ((addr < (unsigned long)__irqentry_text_start) ||
940 (addr >= (unsigned long)__irqentry_text_end))
941 return 0;
942
943 /*
944 * We are entering irq code.
945 */
946 *depth_irq = depth;
947 return 1;
948}
949
950/*
951 * Return check for irq code
952 *
953 * returns 1 if
954 * - we are inside irq code
955 * - we just left irq code
956 *
957 * returns 0 if
958 * - funcgraph-interrupts option is set
959 * - we are not inside irq code
960 */
961static int
962check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
963{
964 int cpu = iter->cpu;
965 int *depth_irq;
966 struct fgraph_data *data = iter->private;
967
968 /*
969 * If we are either displaying irqs, or we got called as
970 * a graph event and private data does not exist,
971 * then we bypass the irq check.
972 */
973 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
974 (!data))
975 return 0;
976
977 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
978
979 /*
980 * We are not inside the irq code.
981 */
982 if (*depth_irq == -1)
983 return 0;
984
985 /*
986 * We are inside the irq code, and this is returning entry.
987 * Let's not trace it and clear the entry depth, since
988 * we are out of irq code.
989 *
990 * This condition ensures that we 'leave the irq code' once
991 * we are out of the entry depth. Thus protecting us from
992 * the RETURN entry loss.
993 */
994 if (*depth_irq >= depth) {
995 *depth_irq = -1;
996 return 1;
997 }
998
999 /*
1000 * We are inside the irq code, and this is not the entry.
1001 */
1002 return 1;
1003}
1004
858static enum print_line_t 1005static enum print_line_t
859print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 1006print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
860 struct trace_iterator *iter, u32 flags) 1007 struct trace_iterator *iter, u32 flags)
@@ -865,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
865 static enum print_line_t ret; 1012 static enum print_line_t ret;
866 int cpu = iter->cpu; 1013 int cpu = iter->cpu;
867 1014
1015 if (check_irq_entry(iter, flags, call->func, call->depth))
1016 return TRACE_TYPE_HANDLED;
1017
868 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) 1018 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
869 return TRACE_TYPE_PARTIAL_LINE; 1019 return TRACE_TYPE_PARTIAL_LINE;
870 1020
@@ -902,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
902 int ret; 1052 int ret;
903 int i; 1053 int i;
904 1054
1055 if (check_irq_return(iter, flags, trace->depth))
1056 return TRACE_TYPE_HANDLED;
1057
905 if (data) { 1058 if (data) {
906 struct fgraph_cpu_data *cpu_data; 1059 struct fgraph_cpu_data *cpu_data;
907 int cpu = iter->cpu; 1060 int cpu = iter->cpu;
@@ -1054,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1054 1207
1055 1208
1056enum print_line_t 1209enum print_line_t
1057print_graph_function_flags(struct trace_iterator *iter, u32 flags) 1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1058{ 1211{
1059 struct ftrace_graph_ent_entry *field; 1212 struct ftrace_graph_ent_entry *field;
1060 struct fgraph_data *data = iter->private; 1213 struct fgraph_data *data = iter->private;
@@ -1117,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1117static enum print_line_t 1270static enum print_line_t
1118print_graph_function(struct trace_iterator *iter) 1271print_graph_function(struct trace_iterator *iter)
1119{ 1272{
1120 return print_graph_function_flags(iter, tracer_flags.val); 1273 return __print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1121} 1285}
1122 1286
1123static enum print_line_t 1287static enum print_line_t
@@ -1149,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
1149 seq_printf(s, "#%.*s|||| / \n", size, spaces); 1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1150} 1314}
1151 1315
1152void print_graph_headers_flags(struct seq_file *s, u32 flags) 1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1153{ 1317{
1154 int lat = trace_flags & TRACE_ITER_LATENCY_FMT; 1318 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
1155 1319
@@ -1190,6 +1354,23 @@ void print_graph_headers(struct seq_file *s)
1190 print_graph_headers_flags(s, tracer_flags.val); 1354 print_graph_headers_flags(s, tracer_flags.val);
1191} 1355}
1192 1356
1357void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{
1359 struct trace_iterator *iter = s->private;
1360
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter))
1364 return;
1365
1366 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION;
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370
1371 __print_graph_headers_flags(s, flags);
1372}
1373
1193void graph_trace_open(struct trace_iterator *iter) 1374void graph_trace_open(struct trace_iterator *iter)
1194{ 1375{
1195 /* pid and depth on the last trace processed */ 1376 /* pid and depth on the last trace processed */
@@ -1210,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter)
1210 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); 1391 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1211 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1392 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1212 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); 1393 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1394 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1395
1213 *pid = -1; 1396 *pid = -1;
1214 *depth = 0; 1397 *depth = 0;
1215 *ignore = 0; 1398 *ignore = 0;
1399 *depth_irq = -1;
1216 } 1400 }
1217 1401
1218 iter->private = data; 1402 iter->private = data;
@@ -1235,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter)
1235 } 1419 }
1236} 1420}
1237 1421
1422static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
1423{
1424 if (bit == TRACE_GRAPH_PRINT_IRQS)
1425 ftrace_graph_skip_irqs = !set;
1426
1427 return 0;
1428}
1429
1238static struct trace_event_functions graph_functions = { 1430static struct trace_event_functions graph_functions = {
1239 .trace = print_graph_function_event, 1431 .trace = print_graph_function_event,
1240}; 1432};
@@ -1261,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
1261 .print_line = print_graph_function, 1453 .print_line = print_graph_function,
1262 .print_header = print_graph_headers, 1454 .print_header = print_graph_headers,
1263 .flags = &tracer_flags, 1455 .flags = &tracer_flags,
1456 .set_flag = func_graph_set_flag,
1264#ifdef CONFIG_FTRACE_SELFTEST 1457#ifdef CONFIG_FTRACE_SELFTEST
1265 .selftest = trace_selftest_startup_function_graph, 1458 .selftest = trace_selftest_startup_function_graph,
1266#endif 1459#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 73a6b0601f2e..c77424be284d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -80,21 +80,29 @@ static struct tracer_flags tracer_flags = {
80 * skip the latency if the sequence has changed - some other section 80 * skip the latency if the sequence has changed - some other section
81 * did a maximum and could disturb our measurement with serial console 81 * did a maximum and could disturb our measurement with serial console
82 * printouts, etc. Truly coinciding maximum latencies should be rare 82 * printouts, etc. Truly coinciding maximum latencies should be rare
83 * and what happens together happens separately as well, so this doesnt 83 * and what happens together happens separately as well, so this doesn't
84 * decrease the validity of the maximum found: 84 * decrease the validity of the maximum found:
85 */ 85 */
86static __cacheline_aligned_in_smp unsigned long max_sequence; 86static __cacheline_aligned_in_smp unsigned long max_sequence;
87 87
88#ifdef CONFIG_FUNCTION_TRACER 88#ifdef CONFIG_FUNCTION_TRACER
89/* 89/*
90 * irqsoff uses its own tracer function to keep the overhead down: 90 * Prologue for the preempt and irqs off function tracers.
91 *
92 * Returns 1 if it is OK to continue, and data->disabled is
93 * incremented.
94 * 0 if the trace is to be ignored, and data->disabled
95 * is kept the same.
96 *
97 * Note, this function is also used outside this ifdef but
98 * inside the #ifdef of the function graph tracer below.
99 * This is OK, since the function graph tracer is
100 * dependent on the function tracer.
91 */ 101 */
92static void 102static int func_prolog_dec(struct trace_array *tr,
93irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) 103 struct trace_array_cpu **data,
104 unsigned long *flags)
94{ 105{
95 struct trace_array *tr = irqsoff_trace;
96 struct trace_array_cpu *data;
97 unsigned long flags;
98 long disabled; 106 long disabled;
99 int cpu; 107 int cpu;
100 108
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
106 */ 114 */
107 cpu = raw_smp_processor_id(); 115 cpu = raw_smp_processor_id();
108 if (likely(!per_cpu(tracing_cpu, cpu))) 116 if (likely(!per_cpu(tracing_cpu, cpu)))
109 return; 117 return 0;
110 118
111 local_save_flags(flags); 119 local_save_flags(*flags);
112 /* slight chance to get a false positive on tracing_cpu */ 120 /* slight chance to get a false positive on tracing_cpu */
113 if (!irqs_disabled_flags(flags)) 121 if (!irqs_disabled_flags(*flags))
114 return; 122 return 0;
115 123
116 data = tr->data[cpu]; 124 *data = tr->data[cpu];
117 disabled = atomic_inc_return(&data->disabled); 125 disabled = atomic_inc_return(&(*data)->disabled);
118 126
119 if (likely(disabled == 1)) 127 if (likely(disabled == 1))
120 trace_function(tr, ip, parent_ip, flags, preempt_count()); 128 return 1;
129
130 atomic_dec(&(*data)->disabled);
131
132 return 0;
133}
134
135/*
136 * irqsoff uses its own tracer function to keep the overhead down:
137 */
138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
140{
141 struct trace_array *tr = irqsoff_trace;
142 struct trace_array_cpu *data;
143 unsigned long flags;
144
145 if (!func_prolog_dec(tr, &data, &flags))
146 return;
147
148 trace_function(tr, ip, parent_ip, flags, preempt_count());
121 149
122 atomic_dec(&data->disabled); 150 atomic_dec(&data->disabled);
123} 151}
@@ -125,6 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
125static struct ftrace_ops trace_ops __read_mostly = 153static struct ftrace_ops trace_ops __read_mostly =
126{ 154{
127 .func = irqsoff_tracer_call, 155 .func = irqsoff_tracer_call,
156 .flags = FTRACE_OPS_FL_GLOBAL,
128}; 157};
129#endif /* CONFIG_FUNCTION_TRACER */ 158#endif /* CONFIG_FUNCTION_TRACER */
130 159
@@ -155,30 +184,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
155 struct trace_array *tr = irqsoff_trace; 184 struct trace_array *tr = irqsoff_trace;
156 struct trace_array_cpu *data; 185 struct trace_array_cpu *data;
157 unsigned long flags; 186 unsigned long flags;
158 long disabled;
159 int ret; 187 int ret;
160 int cpu;
161 int pc; 188 int pc;
162 189
163 cpu = raw_smp_processor_id(); 190 if (!func_prolog_dec(tr, &data, &flags))
164 if (likely(!per_cpu(tracing_cpu, cpu)))
165 return 0;
166
167 local_save_flags(flags);
168 /* slight chance to get a false positive on tracing_cpu */
169 if (!irqs_disabled_flags(flags))
170 return 0; 191 return 0;
171 192
172 data = tr->data[cpu]; 193 pc = preempt_count();
173 disabled = atomic_inc_return(&data->disabled); 194 ret = __trace_graph_entry(tr, trace, flags, pc);
174
175 if (likely(disabled == 1)) {
176 pc = preempt_count();
177 ret = __trace_graph_entry(tr, trace, flags, pc);
178 } else
179 ret = 0;
180
181 atomic_dec(&data->disabled); 195 atomic_dec(&data->disabled);
196
182 return ret; 197 return ret;
183} 198}
184 199
@@ -187,27 +202,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
187 struct trace_array *tr = irqsoff_trace; 202 struct trace_array *tr = irqsoff_trace;
188 struct trace_array_cpu *data; 203 struct trace_array_cpu *data;
189 unsigned long flags; 204 unsigned long flags;
190 long disabled;
191 int cpu;
192 int pc; 205 int pc;
193 206
194 cpu = raw_smp_processor_id(); 207 if (!func_prolog_dec(tr, &data, &flags))
195 if (likely(!per_cpu(tracing_cpu, cpu)))
196 return;
197
198 local_save_flags(flags);
199 /* slight chance to get a false positive on tracing_cpu */
200 if (!irqs_disabled_flags(flags))
201 return; 208 return;
202 209
203 data = tr->data[cpu]; 210 pc = preempt_count();
204 disabled = atomic_inc_return(&data->disabled); 211 __trace_graph_return(tr, trace, flags, pc);
205
206 if (likely(disabled == 1)) {
207 pc = preempt_count();
208 __trace_graph_return(tr, trace, flags, pc);
209 }
210
211 atomic_dec(&data->disabled); 212 atomic_dec(&data->disabled);
212} 213}
213 214
@@ -229,75 +230,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
229 230
230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) 231static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
231{ 232{
232 u32 flags = GRAPH_TRACER_FLAGS;
233
234 if (trace_flags & TRACE_ITER_LATENCY_FMT)
235 flags |= TRACE_GRAPH_PRINT_DURATION;
236 else
237 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
238
239 /* 233 /*
240 * In graph mode call the graph tracer output function, 234 * In graph mode call the graph tracer output function,
241 * otherwise go with the TRACE_FN event handler 235 * otherwise go with the TRACE_FN event handler
242 */ 236 */
243 if (is_graph()) 237 if (is_graph())
244 return print_graph_function_flags(iter, flags); 238 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
245 239
246 return TRACE_TYPE_UNHANDLED; 240 return TRACE_TYPE_UNHANDLED;
247} 241}
248 242
249static void irqsoff_print_header(struct seq_file *s) 243static void irqsoff_print_header(struct seq_file *s)
250{ 244{
251 if (is_graph()) { 245 if (is_graph())
252 struct trace_iterator *iter = s->private; 246 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
253 u32 flags = GRAPH_TRACER_FLAGS; 247 else
254
255 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
256 /* print nothing if the buffers are empty */
257 if (trace_empty(iter))
258 return;
259
260 print_trace_header(s, iter);
261 flags |= TRACE_GRAPH_PRINT_DURATION;
262 } else
263 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
264
265 print_graph_headers_flags(s, flags);
266 } else
267 trace_default_header(s); 248 trace_default_header(s);
268} 249}
269 250
270static void 251static void
271trace_graph_function(struct trace_array *tr,
272 unsigned long ip, unsigned long flags, int pc)
273{
274 u64 time = trace_clock_local();
275 struct ftrace_graph_ent ent = {
276 .func = ip,
277 .depth = 0,
278 };
279 struct ftrace_graph_ret ret = {
280 .func = ip,
281 .depth = 0,
282 .calltime = time,
283 .rettime = time,
284 };
285
286 __trace_graph_entry(tr, &ent, flags, pc);
287 __trace_graph_return(tr, &ret, flags, pc);
288}
289
290static void
291__trace_function(struct trace_array *tr, 252__trace_function(struct trace_array *tr,
292 unsigned long ip, unsigned long parent_ip, 253 unsigned long ip, unsigned long parent_ip,
293 unsigned long flags, int pc) 254 unsigned long flags, int pc)
294{ 255{
295 if (!is_graph()) 256 if (is_graph())
257 trace_graph_function(tr, ip, parent_ip, flags, pc);
258 else
296 trace_function(tr, ip, parent_ip, flags, pc); 259 trace_function(tr, ip, parent_ip, flags, pc);
297 else {
298 trace_graph_function(tr, parent_ip, flags, pc);
299 trace_graph_function(tr, ip, flags, pc);
300 }
301} 260}
302 261
303#else 262#else
@@ -495,14 +454,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
495 * Stubs: 454 * Stubs:
496 */ 455 */
497 456
498void early_boot_irqs_off(void)
499{
500}
501
502void early_boot_irqs_on(void)
503{
504}
505
506void trace_softirqs_on(unsigned long ip) 457void trace_softirqs_on(unsigned long ip)
507{ 458{
508} 459}
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 7b8ecd751d93..3c5c5dfea0b3 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -13,7 +13,6 @@
13#include <linux/kdb.h> 13#include <linux/kdb.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15 15
16#include "../debug/kdb/kdb_private.h"
17#include "trace.h" 16#include "trace.h"
18#include "trace_output.h" 17#include "trace_output.h"
19 18
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 544301d29dee..27d13b36b8be 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -31,7 +31,6 @@
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h> 32#include <linux/stringify.h>
33#include <linux/limits.h> 33#include <linux/limits.h>
34#include <linux/uaccess.h>
35#include <asm/bitsperlong.h> 34#include <asm/bitsperlong.h>
36 35
37#include "trace.h" 36#include "trace.h"
@@ -54,7 +53,6 @@ const char *reserved_field_names[] = {
54 "common_preempt_count", 53 "common_preempt_count",
55 "common_pid", 54 "common_pid",
56 "common_tgid", 55 "common_tgid",
57 "common_lock_depth",
58 FIELD_STRING_IP, 56 FIELD_STRING_IP,
59 FIELD_STRING_RETIP, 57 FIELD_STRING_RETIP,
60 FIELD_STRING_FUNC, 58 FIELD_STRING_FUNC,
@@ -354,6 +352,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
354 kfree(data); 352 kfree(data);
355} 353}
356 354
355/* Bitfield fetch function */
356struct bitfield_fetch_param {
357 struct fetch_param orig;
358 unsigned char hi_shift;
359 unsigned char low_shift;
360};
361
362#define DEFINE_FETCH_bitfield(type) \
363static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
364 void *data, void *dest) \
365{ \
366 struct bitfield_fetch_param *bprm = data; \
367 type buf = 0; \
368 call_fetch(&bprm->orig, regs, &buf); \
369 if (buf) { \
370 buf <<= bprm->hi_shift; \
371 buf >>= bprm->low_shift; \
372 } \
373 *(type *)dest = buf; \
374}
375DEFINE_BASIC_FETCH_FUNCS(bitfield)
376#define fetch_bitfield_string NULL
377#define fetch_bitfield_string_size NULL
378
379static __kprobes void
380free_bitfield_fetch_param(struct bitfield_fetch_param *data)
381{
382 /*
383 * Don't check the bitfield itself, because this must be the
384 * last fetch function.
385 */
386 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
387 free_deref_fetch_param(data->orig.data);
388 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
389 free_symbol_cache(data->orig.data);
390 kfree(data);
391}
357/* Default (unsigned long) fetch type */ 392/* Default (unsigned long) fetch type */
358#define __DEFAULT_FETCH_TYPE(t) u##t 393#define __DEFAULT_FETCH_TYPE(t) u##t
359#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 394#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -368,6 +403,7 @@ enum {
368 FETCH_MTD_memory, 403 FETCH_MTD_memory,
369 FETCH_MTD_symbol, 404 FETCH_MTD_symbol,
370 FETCH_MTD_deref, 405 FETCH_MTD_deref,
406 FETCH_MTD_bitfield,
371 FETCH_MTD_END, 407 FETCH_MTD_END,
372}; 408};
373 409
@@ -388,6 +424,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \
388ASSIGN_FETCH_FUNC(memory, ftype), \ 424ASSIGN_FETCH_FUNC(memory, ftype), \
389ASSIGN_FETCH_FUNC(symbol, ftype), \ 425ASSIGN_FETCH_FUNC(symbol, ftype), \
390ASSIGN_FETCH_FUNC(deref, ftype), \ 426ASSIGN_FETCH_FUNC(deref, ftype), \
427ASSIGN_FETCH_FUNC(bitfield, ftype), \
391 } \ 428 } \
392 } 429 }
393 430
@@ -431,9 +468,33 @@ static const struct fetch_type *find_fetch_type(const char *type)
431 if (!type) 468 if (!type)
432 type = DEFAULT_FETCH_TYPE_STR; 469 type = DEFAULT_FETCH_TYPE_STR;
433 470
471 /* Special case: bitfield */
472 if (*type == 'b') {
473 unsigned long bs;
474 type = strchr(type, '/');
475 if (!type)
476 goto fail;
477 type++;
478 if (strict_strtoul(type, 0, &bs))
479 goto fail;
480 switch (bs) {
481 case 8:
482 return find_fetch_type("u8");
483 case 16:
484 return find_fetch_type("u16");
485 case 32:
486 return find_fetch_type("u32");
487 case 64:
488 return find_fetch_type("u64");
489 default:
490 goto fail;
491 }
492 }
493
434 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) 494 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
435 if (strcmp(type, fetch_type_table[i].name) == 0) 495 if (strcmp(type, fetch_type_table[i].name) == 0)
436 return &fetch_type_table[i]; 496 return &fetch_type_table[i];
497fail:
437 return NULL; 498 return NULL;
438} 499}
439 500
@@ -587,7 +648,9 @@ error:
587 648
588static void free_probe_arg(struct probe_arg *arg) 649static void free_probe_arg(struct probe_arg *arg)
589{ 650{
590 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) 651 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
652 free_bitfield_fetch_param(arg->fetch.data);
653 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
591 free_deref_fetch_param(arg->fetch.data); 654 free_deref_fetch_param(arg->fetch.data);
592 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) 655 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
593 free_symbol_cache(arg->fetch.data); 656 free_symbol_cache(arg->fetch.data);
@@ -648,7 +711,7 @@ static int register_trace_probe(struct trace_probe *tp)
648 } 711 }
649 ret = register_probe_event(tp); 712 ret = register_probe_event(tp);
650 if (ret) { 713 if (ret) {
651 pr_warning("Faild to register probe event(%d)\n", ret); 714 pr_warning("Failed to register probe event(%d)\n", ret);
652 goto end; 715 goto end;
653 } 716 }
654 717
@@ -768,16 +831,15 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
768 } 831 }
769 break; 832 break;
770 case '+': /* deref memory */ 833 case '+': /* deref memory */
834 arg++; /* Skip '+', because strict_strtol() rejects it. */
771 case '-': 835 case '-':
772 tmp = strchr(arg, '('); 836 tmp = strchr(arg, '(');
773 if (!tmp) 837 if (!tmp)
774 break; 838 break;
775 *tmp = '\0'; 839 *tmp = '\0';
776 ret = strict_strtol(arg + 1, 0, &offset); 840 ret = strict_strtol(arg, 0, &offset);
777 if (ret) 841 if (ret)
778 break; 842 break;
779 if (arg[0] == '-')
780 offset = -offset;
781 arg = tmp + 1; 843 arg = tmp + 1;
782 tmp = strrchr(arg, ')'); 844 tmp = strrchr(arg, ')');
783 if (tmp) { 845 if (tmp) {
@@ -808,6 +870,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
808 return ret; 870 return ret;
809} 871}
810 872
873#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
874
875/* Bitfield type needs to be parsed into a fetch function */
876static int __parse_bitfield_probe_arg(const char *bf,
877 const struct fetch_type *t,
878 struct fetch_param *f)
879{
880 struct bitfield_fetch_param *bprm;
881 unsigned long bw, bo;
882 char *tail;
883
884 if (*bf != 'b')
885 return 0;
886
887 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
888 if (!bprm)
889 return -ENOMEM;
890 bprm->orig = *f;
891 f->fn = t->fetch[FETCH_MTD_bitfield];
892 f->data = (void *)bprm;
893
894 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
895 if (bw == 0 || *tail != '@')
896 return -EINVAL;
897
898 bf = tail + 1;
899 bo = simple_strtoul(bf, &tail, 0);
900 if (tail == bf || *tail != '/')
901 return -EINVAL;
902
903 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
904 bprm->low_shift = bprm->hi_shift + bo;
905 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
906}
907
811/* String length checking wrapper */ 908/* String length checking wrapper */
812static int parse_probe_arg(char *arg, struct trace_probe *tp, 909static int parse_probe_arg(char *arg, struct trace_probe *tp,
813 struct probe_arg *parg, int is_return) 910 struct probe_arg *parg, int is_return)
@@ -837,6 +934,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
837 parg->offset = tp->size; 934 parg->offset = tp->size;
838 tp->size += parg->type->size; 935 tp->size += parg->type->size;
839 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 936 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
937 if (ret >= 0 && t != NULL)
938 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
840 if (ret >= 0) { 939 if (ret >= 0) {
841 parg->fetch_size.fn = get_fetch_size_function(parg->type, 940 parg->fetch_size.fn = get_fetch_size_function(parg->type,
842 parg->fetch.fn); 941 parg->fetch.fn);
@@ -1131,7 +1230,7 @@ static int command_trace_probe(const char *buf)
1131 return ret; 1230 return ret;
1132} 1231}
1133 1232
1134#define WRITE_BUFSIZE 128 1233#define WRITE_BUFSIZE 4096
1135 1234
1136static ssize_t probes_write(struct file *file, const char __user *buffer, 1235static ssize_t probes_write(struct file *file, const char __user *buffer,
1137 size_t count, loff_t *ppos) 1236 size_t count, loff_t *ppos)
@@ -1739,7 +1838,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1739 kfree(tp->call.print_fmt); 1838 kfree(tp->call.print_fmt);
1740} 1839}
1741 1840
1742/* Make a debugfs interface for controling probe points */ 1841/* Make a debugfs interface for controlling probe points */
1743static __init int init_kprobe_trace(void) 1842static __init int init_kprobe_trace(void)
1744{ 1843{
1745 struct dentry *d_tracer; 1844 struct dentry *d_tracer;
@@ -1771,8 +1870,12 @@ fs_initcall(init_kprobe_trace);
1771 1870
1772#ifdef CONFIG_FTRACE_STARTUP_TEST 1871#ifdef CONFIG_FTRACE_STARTUP_TEST
1773 1872
1774static int kprobe_trace_selftest_target(int a1, int a2, int a3, 1873/*
1775 int a4, int a5, int a6) 1874 * The "__used" keeps gcc from removing the function symbol
1875 * from the kallsyms table.
1876 */
1877static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1878 int a4, int a5, int a6)
1776{ 1879{
1777 return a1 + a2 + a3 + a4 + a5 + a6; 1880 return a1 + a2 + a3 + a4 + a5 + a6;
1778} 1881}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 02272baa2206..e37de492a9e1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
353} 353}
354EXPORT_SYMBOL(ftrace_print_symbols_seq); 354EXPORT_SYMBOL(ftrace_print_symbols_seq);
355 355
356#if BITS_PER_LONG == 32
357const char *
358ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
359 const struct trace_print_flags_u64 *symbol_array)
360{
361 int i;
362 const char *ret = p->buffer + p->len;
363
364 for (i = 0; symbol_array[i].name; i++) {
365
366 if (val != symbol_array[i].mask)
367 continue;
368
369 trace_seq_puts(p, symbol_array[i].name);
370 break;
371 }
372
373 if (!p->len)
374 trace_seq_printf(p, "0x%llx", val);
375
376 trace_seq_putc(p, 0);
377
378 return ret;
379}
380EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
381#endif
382
356const char * 383const char *
357ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) 384ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
358{ 385{
@@ -529,24 +556,34 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
529 * @entry: The trace entry field from the ring buffer 556 * @entry: The trace entry field from the ring buffer
530 * 557 *
531 * Prints the generic fields of irqs off, in hard or softirq, preempt 558 * Prints the generic fields of irqs off, in hard or softirq, preempt
532 * count and lock depth. 559 * count.
533 */ 560 */
534int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) 561int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
535{ 562{
536 int hardirq, softirq; 563 char hardsoft_irq;
564 char need_resched;
565 char irqs_off;
566 int hardirq;
567 int softirq;
537 int ret; 568 int ret;
538 569
539 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 570 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
540 softirq = entry->flags & TRACE_FLAG_SOFTIRQ; 571 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
541 572
573 irqs_off =
574 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
575 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
576 '.';
577 need_resched =
578 (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
579 hardsoft_irq =
580 (hardirq && softirq) ? 'H' :
581 hardirq ? 'h' :
582 softirq ? 's' :
583 '.';
584
542 if (!trace_seq_printf(s, "%c%c%c", 585 if (!trace_seq_printf(s, "%c%c%c",
543 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : 586 irqs_off, need_resched, hardsoft_irq))
544 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
545 'X' : '.',
546 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
547 'N' : '.',
548 (hardirq && softirq) ? 'H' :
549 hardirq ? 'h' : softirq ? 's' : '.'))
550 return 0; 587 return 0;
551 588
552 if (entry->preempt_count) 589 if (entry->preempt_count)
@@ -554,13 +591,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
554 else 591 else
555 ret = trace_seq_putc(s, '.'); 592 ret = trace_seq_putc(s, '.');
556 593
557 if (!ret) 594 return ret;
558 return 0;
559
560 if (entry->lock_depth < 0)
561 return trace_seq_putc(s, '.');
562
563 return trace_seq_printf(s, "%d", entry->lock_depth);
564} 595}
565 596
566static int 597static int
@@ -826,6 +857,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
826enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, 857enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
827 struct trace_event *event) 858 struct trace_event *event)
828{ 859{
860 if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type))
861 return TRACE_TYPE_PARTIAL_LINE;
862
829 return TRACE_TYPE_HANDLED; 863 return TRACE_TYPE_HANDLED;
830} 864}
831 865
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 2547d8813cf0..1f06468a10d7 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -32,7 +32,7 @@ static DEFINE_MUTEX(btrace_mutex);
32 32
33struct trace_bprintk_fmt { 33struct trace_bprintk_fmt {
34 struct list_head list; 34 struct list_head list;
35 char fmt[0]; 35 const char *fmt;
36}; 36};
37 37
38static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) 38static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
@@ -49,6 +49,7 @@ static
49void hold_module_trace_bprintk_format(const char **start, const char **end) 49void hold_module_trace_bprintk_format(const char **start, const char **end)
50{ 50{
51 const char **iter; 51 const char **iter;
52 char *fmt;
52 53
53 mutex_lock(&btrace_mutex); 54 mutex_lock(&btrace_mutex);
54 for (iter = start; iter < end; iter++) { 55 for (iter = start; iter < end; iter++) {
@@ -58,14 +59,18 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
58 continue; 59 continue;
59 } 60 }
60 61
61 tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) 62 tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
62 + strlen(*iter) + 1, GFP_KERNEL); 63 if (tb_fmt)
63 if (tb_fmt) { 64 fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
65 if (tb_fmt && fmt) {
64 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); 66 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
65 strcpy(tb_fmt->fmt, *iter); 67 strcpy(fmt, *iter);
68 tb_fmt->fmt = fmt;
66 *iter = tb_fmt->fmt; 69 *iter = tb_fmt->fmt;
67 } else 70 } else {
71 kfree(tb_fmt);
68 *iter = NULL; 72 *iter = NULL;
73 }
69 } 74 }
70 mutex_unlock(&btrace_mutex); 75 mutex_unlock(&btrace_mutex);
71} 76}
@@ -84,6 +89,76 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
84 return 0; 89 return 0;
85} 90}
86 91
92/*
93 * The debugfs/tracing/printk_formats file maps the addresses with
94 * the ASCII formats that are used in the bprintk events in the
95 * buffer. For userspace tools to be able to decode the events from
96 * the buffer, they need to be able to map the address with the format.
97 *
98 * The addresses of the bprintk formats are in their own section
99 * __trace_printk_fmt. But for modules we copy them into a link list.
100 * The code to print the formats and their addresses passes around the
101 * address of the fmt string. If the fmt address passed into the seq
102 * functions is within the kernel core __trace_printk_fmt section, then
103 * it simply uses the next pointer in the list.
104 *
105 * When the fmt pointer is outside the kernel core __trace_printk_fmt
106 * section, then we need to read the link list pointers. The trick is
107 * we pass the address of the string to the seq function just like
108 * we do for the kernel core formats. To get back the structure that
109 * holds the format, we simply use containerof() and then go to the
110 * next format in the list.
111 */
112static const char **
113find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
114{
115 struct trace_bprintk_fmt *mod_fmt;
116
117 if (list_empty(&trace_bprintk_fmt_list))
118 return NULL;
119
120 /*
121 * v will point to the address of the fmt record from t_next
122 * v will be NULL from t_start.
123 * If this is the first pointer or called from start
124 * then we need to walk the list.
125 */
126 if (!v || start_index == *pos) {
127 struct trace_bprintk_fmt *p;
128
129 /* search the module list */
130 list_for_each_entry(p, &trace_bprintk_fmt_list, list) {
131 if (start_index == *pos)
132 return &p->fmt;
133 start_index++;
134 }
135 /* pos > index */
136 return NULL;
137 }
138
139 /*
140 * v points to the address of the fmt field in the mod list
141 * structure that holds the module print format.
142 */
143 mod_fmt = container_of(v, typeof(*mod_fmt), fmt);
144 if (mod_fmt->list.next == &trace_bprintk_fmt_list)
145 return NULL;
146
147 mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list);
148
149 return &mod_fmt->fmt;
150}
151
152static void format_mod_start(void)
153{
154 mutex_lock(&btrace_mutex);
155}
156
157static void format_mod_stop(void)
158{
159 mutex_unlock(&btrace_mutex);
160}
161
87#else /* !CONFIG_MODULES */ 162#else /* !CONFIG_MODULES */
88__init static int 163__init static int
89module_trace_bprintk_format_notify(struct notifier_block *self, 164module_trace_bprintk_format_notify(struct notifier_block *self,
@@ -91,6 +166,13 @@ module_trace_bprintk_format_notify(struct notifier_block *self,
91{ 166{
92 return 0; 167 return 0;
93} 168}
169static inline const char **
170find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
171{
172 return NULL;
173}
174static inline void format_mod_start(void) { }
175static inline void format_mod_stop(void) { }
94#endif /* CONFIG_MODULES */ 176#endif /* CONFIG_MODULES */
95 177
96 178
@@ -153,20 +235,30 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
153} 235}
154EXPORT_SYMBOL_GPL(__ftrace_vprintk); 236EXPORT_SYMBOL_GPL(__ftrace_vprintk);
155 237
238static const char **find_next(void *v, loff_t *pos)
239{
240 const char **fmt = v;
241 int start_index;
242
243 start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
244
245 if (*pos < start_index)
246 return __start___trace_bprintk_fmt + *pos;
247
248 return find_next_mod_format(start_index, v, fmt, pos);
249}
250
156static void * 251static void *
157t_start(struct seq_file *m, loff_t *pos) 252t_start(struct seq_file *m, loff_t *pos)
158{ 253{
159 const char **fmt = __start___trace_bprintk_fmt + *pos; 254 format_mod_start();
160 255 return find_next(NULL, pos);
161 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
162 return NULL;
163 return fmt;
164} 256}
165 257
166static void *t_next(struct seq_file *m, void * v, loff_t *pos) 258static void *t_next(struct seq_file *m, void * v, loff_t *pos)
167{ 259{
168 (*pos)++; 260 (*pos)++;
169 return t_start(m, pos); 261 return find_next(v, pos);
170} 262}
171 263
172static int t_show(struct seq_file *m, void *v) 264static int t_show(struct seq_file *m, void *v)
@@ -205,6 +297,7 @@ static int t_show(struct seq_file *m, void *v)
205 297
206static void t_stop(struct seq_file *m, void *p) 298static void t_stop(struct seq_file *m, void *p)
207{ 299{
300 format_mod_stop();
208} 301}
209 302
210static const struct seq_operations show_format_seq_ops = { 303static const struct seq_operations show_format_seq_ops = {
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 8f758d070c43..7e62c0a18456 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
247 ctx_trace = tr; 247 ctx_trace = tr;
248} 248}
249 249
250static void stop_sched_trace(struct trace_array *tr)
251{
252 tracing_stop_sched_switch_record();
253}
254
255static int sched_switch_trace_init(struct trace_array *tr)
256{
257 ctx_trace = tr;
258 tracing_reset_online_cpus(tr);
259 tracing_start_sched_switch_record();
260 return 0;
261}
262
263static void sched_switch_trace_reset(struct trace_array *tr)
264{
265 if (sched_ref)
266 stop_sched_trace(tr);
267}
268
269static void sched_switch_trace_start(struct trace_array *tr)
270{
271 sched_stopped = 0;
272}
273
274static void sched_switch_trace_stop(struct trace_array *tr)
275{
276 sched_stopped = 1;
277}
278
279static struct tracer sched_switch_trace __read_mostly =
280{
281 .name = "sched_switch",
282 .init = sched_switch_trace_init,
283 .reset = sched_switch_trace_reset,
284 .start = sched_switch_trace_start,
285 .stop = sched_switch_trace_stop,
286 .wait_pipe = poll_wait_pipe,
287#ifdef CONFIG_FTRACE_SELFTEST
288 .selftest = trace_selftest_startup_sched_switch,
289#endif
290};
291
292__init static int init_sched_switch_trace(void)
293{
294 return register_tracer(&sched_switch_trace);
295}
296device_initcall(init_sched_switch_trace);
297
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 4086eae6e81b..f029dd4fd2ca 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,57 +31,258 @@ static int wakeup_rt;
31static arch_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void wakeup_reset(struct trace_array *tr);
34static void __wakeup_reset(struct trace_array *tr); 35static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
35 38
36static int save_lat_flag; 39static int save_lat_flag;
37 40
41#define TRACE_DISPLAY_GRAPH 1
42
43static struct tracer_opt trace_opts[] = {
44#ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 /* display latency trace as call graph */
46 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47#endif
48 { } /* Empty entry */
49};
50
51static struct tracer_flags tracer_flags = {
52 .val = 0,
53 .opts = trace_opts,
54};
55
56#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57
38#ifdef CONFIG_FUNCTION_TRACER 58#ifdef CONFIG_FUNCTION_TRACER
59
39/* 60/*
40 * irqsoff uses its own tracer function to keep the overhead down: 61 * Prologue for the wakeup function tracers.
62 *
63 * Returns 1 if it is OK to continue, and preemption
64 * is disabled and data->disabled is incremented.
65 * 0 if the trace is to be ignored, and preemption
66 * is not disabled and data->disabled is
67 * kept the same.
68 *
69 * Note, this function is also used outside this ifdef but
70 * inside the #ifdef of the function graph tracer below.
71 * This is OK, since the function graph tracer is
72 * dependent on the function tracer.
41 */ 73 */
42static void 74static int
43wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 75func_prolog_preempt_disable(struct trace_array *tr,
76 struct trace_array_cpu **data,
77 int *pc)
44{ 78{
45 struct trace_array *tr = wakeup_trace;
46 struct trace_array_cpu *data;
47 unsigned long flags;
48 long disabled; 79 long disabled;
49 int cpu; 80 int cpu;
50 int pc;
51 81
52 if (likely(!wakeup_task)) 82 if (likely(!wakeup_task))
53 return; 83 return 0;
54 84
55 pc = preempt_count(); 85 *pc = preempt_count();
56 preempt_disable_notrace(); 86 preempt_disable_notrace();
57 87
58 cpu = raw_smp_processor_id(); 88 cpu = raw_smp_processor_id();
59 if (cpu != wakeup_current_cpu) 89 if (cpu != wakeup_current_cpu)
60 goto out_enable; 90 goto out_enable;
61 91
62 data = tr->data[cpu]; 92 *data = tr->data[cpu];
63 disabled = atomic_inc_return(&data->disabled); 93 disabled = atomic_inc_return(&(*data)->disabled);
64 if (unlikely(disabled != 1)) 94 if (unlikely(disabled != 1))
65 goto out; 95 goto out;
66 96
67 local_irq_save(flags); 97 return 1;
68 98
69 trace_function(tr, ip, parent_ip, flags, pc); 99out:
100 atomic_dec(&(*data)->disabled);
101
102out_enable:
103 preempt_enable_notrace();
104 return 0;
105}
70 106
107/*
108 * wakeup uses its own tracer function to keep the overhead down:
109 */
110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112{
113 struct trace_array *tr = wakeup_trace;
114 struct trace_array_cpu *data;
115 unsigned long flags;
116 int pc;
117
118 if (!func_prolog_preempt_disable(tr, &data, &pc))
119 return;
120
121 local_irq_save(flags);
122 trace_function(tr, ip, parent_ip, flags, pc);
71 local_irq_restore(flags); 123 local_irq_restore(flags);
72 124
73 out:
74 atomic_dec(&data->disabled); 125 atomic_dec(&data->disabled);
75 out_enable:
76 preempt_enable_notrace(); 126 preempt_enable_notrace();
77} 127}
78 128
79static struct ftrace_ops trace_ops __read_mostly = 129static struct ftrace_ops trace_ops __read_mostly =
80{ 130{
81 .func = wakeup_tracer_call, 131 .func = wakeup_tracer_call,
132 .flags = FTRACE_OPS_FL_GLOBAL,
82}; 133};
83#endif /* CONFIG_FUNCTION_TRACER */ 134#endif /* CONFIG_FUNCTION_TRACER */
84 135
136static int start_func_tracer(int graph)
137{
138 int ret;
139
140 if (!graph)
141 ret = register_ftrace_function(&trace_ops);
142 else
143 ret = register_ftrace_graph(&wakeup_graph_return,
144 &wakeup_graph_entry);
145
146 if (!ret && tracing_is_enabled())
147 tracer_enabled = 1;
148 else
149 tracer_enabled = 0;
150
151 return ret;
152}
153
154static void stop_func_tracer(int graph)
155{
156 tracer_enabled = 0;
157
158 if (!graph)
159 unregister_ftrace_function(&trace_ops);
160 else
161 unregister_ftrace_graph();
162}
163
164#ifdef CONFIG_FUNCTION_GRAPH_TRACER
165static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
166{
167
168 if (!(bit & TRACE_DISPLAY_GRAPH))
169 return -EINVAL;
170
171 if (!(is_graph() ^ set))
172 return 0;
173
174 stop_func_tracer(!set);
175
176 wakeup_reset(wakeup_trace);
177 tracing_max_latency = 0;
178
179 return start_func_tracer(set);
180}
181
182static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
183{
184 struct trace_array *tr = wakeup_trace;
185 struct trace_array_cpu *data;
186 unsigned long flags;
187 int pc, ret = 0;
188
189 if (!func_prolog_preempt_disable(tr, &data, &pc))
190 return 0;
191
192 local_save_flags(flags);
193 ret = __trace_graph_entry(tr, trace, flags, pc);
194 atomic_dec(&data->disabled);
195 preempt_enable_notrace();
196
197 return ret;
198}
199
200static void wakeup_graph_return(struct ftrace_graph_ret *trace)
201{
202 struct trace_array *tr = wakeup_trace;
203 struct trace_array_cpu *data;
204 unsigned long flags;
205 int pc;
206
207 if (!func_prolog_preempt_disable(tr, &data, &pc))
208 return;
209
210 local_save_flags(flags);
211 __trace_graph_return(tr, trace, flags, pc);
212 atomic_dec(&data->disabled);
213
214 preempt_enable_notrace();
215 return;
216}
217
218static void wakeup_trace_open(struct trace_iterator *iter)
219{
220 if (is_graph())
221 graph_trace_open(iter);
222}
223
224static void wakeup_trace_close(struct trace_iterator *iter)
225{
226 if (iter->private)
227 graph_trace_close(iter);
228}
229
230#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
231
232static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
233{
234 /*
235 * In graph mode call the graph tracer output function,
236 * otherwise go with the TRACE_FN event handler
237 */
238 if (is_graph())
239 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
240
241 return TRACE_TYPE_UNHANDLED;
242}
243
244static void wakeup_print_header(struct seq_file *s)
245{
246 if (is_graph())
247 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
248 else
249 trace_default_header(s);
250}
251
252static void
253__trace_function(struct trace_array *tr,
254 unsigned long ip, unsigned long parent_ip,
255 unsigned long flags, int pc)
256{
257 if (is_graph())
258 trace_graph_function(tr, ip, parent_ip, flags, pc);
259 else
260 trace_function(tr, ip, parent_ip, flags, pc);
261}
262#else
263#define __trace_function trace_function
264
265static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
266{
267 return -EINVAL;
268}
269
270static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
271{
272 return -1;
273}
274
275static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
276{
277 return TRACE_TYPE_UNHANDLED;
278}
279
280static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
281static void wakeup_print_header(struct seq_file *s) { }
282static void wakeup_trace_open(struct trace_iterator *iter) { }
283static void wakeup_trace_close(struct trace_iterator *iter) { }
284#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
285
85/* 286/*
86 * Should this new latency be reported/recorded? 287 * Should this new latency be reported/recorded?
87 */ 288 */
@@ -152,7 +353,7 @@ probe_wakeup_sched_switch(void *ignore,
152 /* The task we are waiting for is waking up */ 353 /* The task we are waiting for is waking up */
153 data = wakeup_trace->data[wakeup_cpu]; 354 data = wakeup_trace->data[wakeup_cpu];
154 355
155 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 356 __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 357 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
157 358
158 T0 = data->preempt_timestamp; 359 T0 = data->preempt_timestamp;
@@ -252,7 +453,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
252 * is not called by an assembly function (where as schedule is) 453 * is not called by an assembly function (where as schedule is)
253 * it should be safe to use it here. 454 * it should be safe to use it here.
254 */ 455 */
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 456 __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 457
257out_locked: 458out_locked:
258 arch_spin_unlock(&wakeup_lock); 459 arch_spin_unlock(&wakeup_lock);
@@ -303,12 +504,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
303 */ 504 */
304 smp_wmb(); 505 smp_wmb();
305 506
306 register_ftrace_function(&trace_ops); 507 if (start_func_tracer(is_graph()))
307 508 printk(KERN_ERR "failed to start wakeup tracer\n");
308 if (tracing_is_enabled())
309 tracer_enabled = 1;
310 else
311 tracer_enabled = 0;
312 509
313 return; 510 return;
314fail_deprobe_wake_new: 511fail_deprobe_wake_new:
@@ -320,7 +517,7 @@ fail_deprobe:
320static void stop_wakeup_tracer(struct trace_array *tr) 517static void stop_wakeup_tracer(struct trace_array *tr)
321{ 518{
322 tracer_enabled = 0; 519 tracer_enabled = 0;
323 unregister_ftrace_function(&trace_ops); 520 stop_func_tracer(is_graph());
324 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); 521 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
325 unregister_trace_sched_wakeup_new(probe_wakeup, NULL); 522 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
326 unregister_trace_sched_wakeup(probe_wakeup, NULL); 523 unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -379,9 +576,15 @@ static struct tracer wakeup_tracer __read_mostly =
379 .start = wakeup_tracer_start, 576 .start = wakeup_tracer_start,
380 .stop = wakeup_tracer_stop, 577 .stop = wakeup_tracer_stop,
381 .print_max = 1, 578 .print_max = 1,
579 .print_header = wakeup_print_header,
580 .print_line = wakeup_print_line,
581 .flags = &tracer_flags,
582 .set_flag = wakeup_set_flag,
382#ifdef CONFIG_FTRACE_SELFTEST 583#ifdef CONFIG_FTRACE_SELFTEST
383 .selftest = trace_selftest_startup_wakeup, 584 .selftest = trace_selftest_startup_wakeup,
384#endif 585#endif
586 .open = wakeup_trace_open,
587 .close = wakeup_trace_close,
385 .use_max_tr = 1, 588 .use_max_tr = 1,
386}; 589};
387 590
@@ -394,9 +597,15 @@ static struct tracer wakeup_rt_tracer __read_mostly =
394 .stop = wakeup_tracer_stop, 597 .stop = wakeup_tracer_stop,
395 .wait_pipe = poll_wait_pipe, 598 .wait_pipe = poll_wait_pipe,
396 .print_max = 1, 599 .print_max = 1,
600 .print_header = wakeup_print_header,
601 .print_line = wakeup_print_line,
602 .flags = &tracer_flags,
603 .set_flag = wakeup_set_flag,
397#ifdef CONFIG_FTRACE_SELFTEST 604#ifdef CONFIG_FTRACE_SELFTEST
398 .selftest = trace_selftest_startup_wakeup, 605 .selftest = trace_selftest_startup_wakeup,
399#endif 606#endif
607 .open = wakeup_trace_open,
608 .close = wakeup_trace_close,
400 .use_max_tr = 1, 609 .use_max_tr = 1,
401}; 610};
402 611
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 155a415b3209..288541f977fb 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -101,6 +101,206 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
101 101
102#ifdef CONFIG_DYNAMIC_FTRACE 102#ifdef CONFIG_DYNAMIC_FTRACE
103 103
104static int trace_selftest_test_probe1_cnt;
105static void trace_selftest_test_probe1_func(unsigned long ip,
106 unsigned long pip)
107{
108 trace_selftest_test_probe1_cnt++;
109}
110
111static int trace_selftest_test_probe2_cnt;
112static void trace_selftest_test_probe2_func(unsigned long ip,
113 unsigned long pip)
114{
115 trace_selftest_test_probe2_cnt++;
116}
117
118static int trace_selftest_test_probe3_cnt;
119static void trace_selftest_test_probe3_func(unsigned long ip,
120 unsigned long pip)
121{
122 trace_selftest_test_probe3_cnt++;
123}
124
125static int trace_selftest_test_global_cnt;
126static void trace_selftest_test_global_func(unsigned long ip,
127 unsigned long pip)
128{
129 trace_selftest_test_global_cnt++;
130}
131
132static int trace_selftest_test_dyn_cnt;
133static void trace_selftest_test_dyn_func(unsigned long ip,
134 unsigned long pip)
135{
136 trace_selftest_test_dyn_cnt++;
137}
138
139static struct ftrace_ops test_probe1 = {
140 .func = trace_selftest_test_probe1_func,
141};
142
143static struct ftrace_ops test_probe2 = {
144 .func = trace_selftest_test_probe2_func,
145};
146
147static struct ftrace_ops test_probe3 = {
148 .func = trace_selftest_test_probe3_func,
149};
150
151static struct ftrace_ops test_global = {
152 .func = trace_selftest_test_global_func,
153 .flags = FTRACE_OPS_FL_GLOBAL,
154};
155
156static void print_counts(void)
157{
158 printk("(%d %d %d %d %d) ",
159 trace_selftest_test_probe1_cnt,
160 trace_selftest_test_probe2_cnt,
161 trace_selftest_test_probe3_cnt,
162 trace_selftest_test_global_cnt,
163 trace_selftest_test_dyn_cnt);
164}
165
166static void reset_counts(void)
167{
168 trace_selftest_test_probe1_cnt = 0;
169 trace_selftest_test_probe2_cnt = 0;
170 trace_selftest_test_probe3_cnt = 0;
171 trace_selftest_test_global_cnt = 0;
172 trace_selftest_test_dyn_cnt = 0;
173}
174
175static int trace_selftest_ops(int cnt)
176{
177 int save_ftrace_enabled = ftrace_enabled;
178 struct ftrace_ops *dyn_ops;
179 char *func1_name;
180 char *func2_name;
181 int len1;
182 int len2;
183 int ret = -1;
184
185 printk(KERN_CONT "PASSED\n");
186 pr_info("Testing dynamic ftrace ops #%d: ", cnt);
187
188 ftrace_enabled = 1;
189 reset_counts();
190
191 /* Handle PPC64 '.' name */
192 func1_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
193 func2_name = "*" __stringify(DYN_FTRACE_TEST_NAME2);
194 len1 = strlen(func1_name);
195 len2 = strlen(func2_name);
196
197 /*
198 * Probe 1 will trace function 1.
199 * Probe 2 will trace function 2.
200 * Probe 3 will trace functions 1 and 2.
201 */
202 ftrace_set_filter(&test_probe1, func1_name, len1, 1);
203 ftrace_set_filter(&test_probe2, func2_name, len2, 1);
204 ftrace_set_filter(&test_probe3, func1_name, len1, 1);
205 ftrace_set_filter(&test_probe3, func2_name, len2, 0);
206
207 register_ftrace_function(&test_probe1);
208 register_ftrace_function(&test_probe2);
209 register_ftrace_function(&test_probe3);
210 register_ftrace_function(&test_global);
211
212 DYN_FTRACE_TEST_NAME();
213
214 print_counts();
215
216 if (trace_selftest_test_probe1_cnt != 1)
217 goto out;
218 if (trace_selftest_test_probe2_cnt != 0)
219 goto out;
220 if (trace_selftest_test_probe3_cnt != 1)
221 goto out;
222 if (trace_selftest_test_global_cnt == 0)
223 goto out;
224
225 DYN_FTRACE_TEST_NAME2();
226
227 print_counts();
228
229 if (trace_selftest_test_probe1_cnt != 1)
230 goto out;
231 if (trace_selftest_test_probe2_cnt != 1)
232 goto out;
233 if (trace_selftest_test_probe3_cnt != 2)
234 goto out;
235
236 /* Add a dynamic probe */
237 dyn_ops = kzalloc(sizeof(*dyn_ops), GFP_KERNEL);
238 if (!dyn_ops) {
239 printk("MEMORY ERROR ");
240 goto out;
241 }
242
243 dyn_ops->func = trace_selftest_test_dyn_func;
244
245 register_ftrace_function(dyn_ops);
246
247 trace_selftest_test_global_cnt = 0;
248
249 DYN_FTRACE_TEST_NAME();
250
251 print_counts();
252
253 if (trace_selftest_test_probe1_cnt != 2)
254 goto out_free;
255 if (trace_selftest_test_probe2_cnt != 1)
256 goto out_free;
257 if (trace_selftest_test_probe3_cnt != 3)
258 goto out_free;
259 if (trace_selftest_test_global_cnt == 0)
260 goto out;
261 if (trace_selftest_test_dyn_cnt == 0)
262 goto out_free;
263
264 DYN_FTRACE_TEST_NAME2();
265
266 print_counts();
267
268 if (trace_selftest_test_probe1_cnt != 2)
269 goto out_free;
270 if (trace_selftest_test_probe2_cnt != 2)
271 goto out_free;
272 if (trace_selftest_test_probe3_cnt != 4)
273 goto out_free;
274
275 ret = 0;
276 out_free:
277 unregister_ftrace_function(dyn_ops);
278 kfree(dyn_ops);
279
280 out:
281 /* Purposely unregister in the same order */
282 unregister_ftrace_function(&test_probe1);
283 unregister_ftrace_function(&test_probe2);
284 unregister_ftrace_function(&test_probe3);
285 unregister_ftrace_function(&test_global);
286
287 /* Make sure everything is off */
288 reset_counts();
289 DYN_FTRACE_TEST_NAME();
290 DYN_FTRACE_TEST_NAME();
291
292 if (trace_selftest_test_probe1_cnt ||
293 trace_selftest_test_probe2_cnt ||
294 trace_selftest_test_probe3_cnt ||
295 trace_selftest_test_global_cnt ||
296 trace_selftest_test_dyn_cnt)
297 ret = -1;
298
299 ftrace_enabled = save_ftrace_enabled;
300
301 return ret;
302}
303
104/* Test dynamic code modification and ftrace filters */ 304/* Test dynamic code modification and ftrace filters */
105int trace_selftest_startup_dynamic_tracing(struct tracer *trace, 305int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
106 struct trace_array *tr, 306 struct trace_array *tr,
@@ -131,7 +331,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
131 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); 331 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
132 332
133 /* filter only on our function */ 333 /* filter only on our function */
134 ftrace_set_filter(func_name, strlen(func_name), 1); 334 ftrace_set_global_filter(func_name, strlen(func_name), 1);
135 335
136 /* enable tracing */ 336 /* enable tracing */
137 ret = tracer_init(trace, tr); 337 ret = tracer_init(trace, tr);
@@ -166,22 +366,30 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
166 366
167 /* check the trace buffer */ 367 /* check the trace buffer */
168 ret = trace_test_buffer(tr, &count); 368 ret = trace_test_buffer(tr, &count);
169 trace->reset(tr);
170 tracing_start(); 369 tracing_start();
171 370
172 /* we should only have one item */ 371 /* we should only have one item */
173 if (!ret && count != 1) { 372 if (!ret && count != 1) {
373 trace->reset(tr);
174 printk(KERN_CONT ".. filter failed count=%ld ..", count); 374 printk(KERN_CONT ".. filter failed count=%ld ..", count);
175 ret = -1; 375 ret = -1;
176 goto out; 376 goto out;
177 } 377 }
178 378
379 /* Test the ops with global tracing running */
380 ret = trace_selftest_ops(1);
381 trace->reset(tr);
382
179 out: 383 out:
180 ftrace_enabled = save_ftrace_enabled; 384 ftrace_enabled = save_ftrace_enabled;
181 tracer_enabled = save_tracer_enabled; 385 tracer_enabled = save_tracer_enabled;
182 386
183 /* Enable tracing on all functions again */ 387 /* Enable tracing on all functions again */
184 ftrace_set_filter(NULL, 0, 1); 388 ftrace_set_global_filter(NULL, 0, 1);
389
390 /* Test the ops with global tracing off */
391 if (!ret)
392 ret = trace_selftest_ops(2);
185 393
186 return ret; 394 return ret;
187} 395}
@@ -558,7 +766,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
558static int trace_wakeup_test_thread(void *data) 766static int trace_wakeup_test_thread(void *data)
559{ 767{
560 /* Make this a RT thread, doesn't need to be too high */ 768 /* Make this a RT thread, doesn't need to be too high */
561 struct sched_param param = { .sched_priority = 5 }; 769 static const struct sched_param param = { .sched_priority = 5 };
562 struct completion *x = data; 770 struct completion *x = data;
563 771
564 sched_setscheduler(current, SCHED_FIFO, &param); 772 sched_setscheduler(current, SCHED_FIFO, &param);
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
index 54dd77cce5bf..b4c475a0a48b 100644
--- a/kernel/trace/trace_selftest_dynamic.c
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -5,3 +5,9 @@ int DYN_FTRACE_TEST_NAME(void)
5 /* used to call mcount */ 5 /* used to call mcount */
6 return 0; 6 return 0;
7} 7}
8
9int DYN_FTRACE_TEST_NAME2(void)
10{
11 /* used to call mcount */
12 return 0;
13}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a6b7e0e0f3eb..b0b53b8e4c25 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -133,6 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
134{ 134{
135 .func = stack_trace_call, 135 .func = stack_trace_call,
136 .flags = FTRACE_OPS_FL_GLOBAL,
136}; 137};
137 138
138static ssize_t 139static ssize_t
@@ -195,6 +196,7 @@ static const struct file_operations stack_max_size_fops = {
195 .open = tracing_open_generic, 196 .open = tracing_open_generic,
196 .read = stack_max_size_read, 197 .read = stack_max_size_read,
197 .write = stack_max_size_write, 198 .write = stack_max_size_write,
199 .llseek = default_llseek,
198}; 200};
199 201
200static void * 202static void *
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index bac752f0cfb5..ee7b5a0bb9f8 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event,
23static int syscall_enter_define_fields(struct ftrace_event_call *call); 23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call); 24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25 25
26/* All syscall exit events have the same fields */
27static LIST_HEAD(syscall_exit_fields);
28
29static struct list_head * 26static struct list_head *
30syscall_get_enter_fields(struct ftrace_event_call *call) 27syscall_get_enter_fields(struct ftrace_event_call *call)
31{ 28{
@@ -34,61 +31,66 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
34 return &entry->enter_fields; 31 return &entry->enter_fields;
35} 32}
36 33
37static struct list_head *
38syscall_get_exit_fields(struct ftrace_event_call *call)
39{
40 return &syscall_exit_fields;
41}
42
43struct trace_event_functions enter_syscall_print_funcs = { 34struct trace_event_functions enter_syscall_print_funcs = {
44 .trace = print_syscall_enter, 35 .trace = print_syscall_enter,
45}; 36};
46 37
47struct trace_event_functions exit_syscall_print_funcs = { 38struct trace_event_functions exit_syscall_print_funcs = {
48 .trace = print_syscall_exit, 39 .trace = print_syscall_exit,
49}; 40};
50 41
51struct ftrace_event_class event_class_syscall_enter = { 42struct ftrace_event_class event_class_syscall_enter = {
52 .system = "syscalls", 43 .system = "syscalls",
53 .reg = syscall_enter_register, 44 .reg = syscall_enter_register,
54 .define_fields = syscall_enter_define_fields, 45 .define_fields = syscall_enter_define_fields,
55 .get_fields = syscall_get_enter_fields, 46 .get_fields = syscall_get_enter_fields,
56 .raw_init = init_syscall_trace, 47 .raw_init = init_syscall_trace,
57}; 48};
58 49
59struct ftrace_event_class event_class_syscall_exit = { 50struct ftrace_event_class event_class_syscall_exit = {
60 .system = "syscalls", 51 .system = "syscalls",
61 .reg = syscall_exit_register, 52 .reg = syscall_exit_register,
62 .define_fields = syscall_exit_define_fields, 53 .define_fields = syscall_exit_define_fields,
63 .get_fields = syscall_get_exit_fields, 54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
64 .raw_init = init_syscall_trace, 55 .raw_init = init_syscall_trace,
65}; 56};
66 57
67extern unsigned long __start_syscalls_metadata[]; 58extern struct syscall_metadata *__start_syscalls_metadata[];
68extern unsigned long __stop_syscalls_metadata[]; 59extern struct syscall_metadata *__stop_syscalls_metadata[];
69 60
70static struct syscall_metadata **syscalls_metadata; 61static struct syscall_metadata **syscalls_metadata;
71 62
72static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 63#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
64static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
65{
66 /*
67 * Only compare after the "sys" prefix. Archs that use
68 * syscall wrappers may have syscalls symbols aliases prefixed
69 * with "SyS" instead of "sys", leading to an unwanted
70 * mismatch.
71 */
72 return !strcmp(sym + 3, name + 3);
73}
74#endif
75
76static __init struct syscall_metadata *
77find_syscall_meta(unsigned long syscall)
73{ 78{
74 struct syscall_metadata *start; 79 struct syscall_metadata **start;
75 struct syscall_metadata *stop; 80 struct syscall_metadata **stop;
76 char str[KSYM_SYMBOL_LEN]; 81 char str[KSYM_SYMBOL_LEN];
77 82
78 83
79 start = (struct syscall_metadata *)__start_syscalls_metadata; 84 start = __start_syscalls_metadata;
80 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 85 stop = __stop_syscalls_metadata;
81 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 86 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
82 87
88 if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
89 return NULL;
90
83 for ( ; start < stop; start++) { 91 for ( ; start < stop; start++) {
84 /* 92 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
85 * Only compare after the "sys" prefix. Archs that use 93 return *start;
86 * syscall wrappers may have syscalls symbols aliases prefixed
87 * with "SyS" instead of "sys", leading to an unwanted
88 * mismatch.
89 */
90 if (start->name && !strcmp(start->name + 3, str + 3))
91 return start;
92 } 94 }
93 return NULL; 95 return NULL;
94} 96}
@@ -367,7 +369,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
367 int num; 369 int num;
368 370
369 num = ((struct syscall_metadata *)call->data)->syscall_nr; 371 num = ((struct syscall_metadata *)call->data)->syscall_nr;
370 if (num < 0 || num >= NR_syscalls) 372 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
371 return -ENOSYS; 373 return -ENOSYS;
372 mutex_lock(&syscall_trace_lock); 374 mutex_lock(&syscall_trace_lock);
373 if (!sys_refcount_enter) 375 if (!sys_refcount_enter)
@@ -385,7 +387,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
385 int num; 387 int num;
386 388
387 num = ((struct syscall_metadata *)call->data)->syscall_nr; 389 num = ((struct syscall_metadata *)call->data)->syscall_nr;
388 if (num < 0 || num >= NR_syscalls) 390 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
389 return; 391 return;
390 mutex_lock(&syscall_trace_lock); 392 mutex_lock(&syscall_trace_lock);
391 sys_refcount_enter--; 393 sys_refcount_enter--;
@@ -401,7 +403,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
401 int num; 403 int num;
402 404
403 num = ((struct syscall_metadata *)call->data)->syscall_nr; 405 num = ((struct syscall_metadata *)call->data)->syscall_nr;
404 if (num < 0 || num >= NR_syscalls) 406 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
405 return -ENOSYS; 407 return -ENOSYS;
406 mutex_lock(&syscall_trace_lock); 408 mutex_lock(&syscall_trace_lock);
407 if (!sys_refcount_exit) 409 if (!sys_refcount_exit)
@@ -419,7 +421,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
419 int num; 421 int num;
420 422
421 num = ((struct syscall_metadata *)call->data)->syscall_nr; 423 num = ((struct syscall_metadata *)call->data)->syscall_nr;
422 if (num < 0 || num >= NR_syscalls) 424 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
423 return; 425 return;
424 mutex_lock(&syscall_trace_lock); 426 mutex_lock(&syscall_trace_lock);
425 sys_refcount_exit--; 427 sys_refcount_exit--;
@@ -432,6 +434,14 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
432int init_syscall_trace(struct ftrace_event_call *call) 434int init_syscall_trace(struct ftrace_event_call *call)
433{ 435{
434 int id; 436 int id;
437 int num;
438
439 num = ((struct syscall_metadata *)call->data)->syscall_nr;
440 if (num < 0 || num >= NR_syscalls) {
441 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
442 ((struct syscall_metadata *)call->data)->name);
443 return -ENOSYS;
444 }
435 445
436 if (set_syscall_print_fmt(call) < 0) 446 if (set_syscall_print_fmt(call) < 0)
437 return -ENOMEM; 447 return -ENOMEM;
@@ -446,7 +456,7 @@ int init_syscall_trace(struct ftrace_event_call *call)
446 return id; 456 return id;
447} 457}
448 458
449unsigned long __init arch_syscall_addr(int nr) 459unsigned long __init __weak arch_syscall_addr(int nr)
450{ 460{
451 return (unsigned long)sys_call_table[nr]; 461 return (unsigned long)sys_call_table[nr];
452} 462}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
263{ 263{
264 int ret, cpu; 264 int ret, cpu;
265 265
266 for_each_possible_cpu(cpu) {
267 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
268 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
269 }
270
266 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); 271 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
267 if (ret) 272 if (ret)
268 goto out; 273 goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
279 if (ret) 284 if (ret)
280 goto no_creation; 285 goto no_creation;
281 286
282 for_each_possible_cpu(cpu) {
283 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
284 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
285 }
286
287 return 0; 287 return 0;
288 288
289no_creation: 289no_creation: