aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/entry_32.S130
-rw-r--r--arch/powerpc/kernel/entry_64.S62
-rw-r--r--arch/powerpc/kernel/ftrace.c165
-rw-r--r--arch/powerpc/kernel/io.c3
-rw-r--r--arch/powerpc/kernel/irq.c6
-rw-r--r--arch/powerpc/kernel/setup_32.c11
-rw-r--r--arch/powerpc/kernel/setup_64.c5
-rw-r--r--arch/powerpc/platforms/powermac/Makefile5
-rw-r--r--arch/sparc64/Kconfig2
-rw-r--r--arch/sparc64/Kconfig.debug2
-rw-r--r--arch/sparc64/kernel/Makefile1
-rw-r--r--arch/sparc64/kernel/ftrace.c99
-rw-r--r--arch/sparc64/lib/mcount.S58
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/entry_32.S68
-rw-r--r--arch/x86/kernel/entry_64.S102
-rw-r--r--arch/x86/kernel/ftrace.c159
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mm/fault.c56
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--include/asm-powerpc/hw_irq.h10
-rw-r--r--include/asm-x86/alternative.h2
-rw-r--r--include/asm-x86/irqflags.h24
-rw-r--r--include/asm-x86/kdebug.h9
-rw-r--r--include/asm-x86/vsyscall.h3
-rw-r--r--include/linux/ftrace.h132
-rw-r--r--include/linux/irqflags.h13
-rw-r--r--include/linux/linkage.h2
-rw-r--r--include/linux/marker.h40
-rw-r--r--include/linux/preempt.h34
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/writeback.h2
-rw-r--r--kernel/Makefile14
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/lockdep.c33
-rw-r--r--kernel/marker.c30
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched.c55
-rw-r--r--kernel/semaphore.c2
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/trace/Kconfig135
-rw-r--r--kernel/trace/Makefile23
-rw-r--r--kernel/trace/ftrace.c1398
-rw-r--r--kernel/trace/trace.c3034
-rw-r--r--kernel/trace/trace.h319
-rw-r--r--kernel/trace/trace_functions.c78
-rw-r--r--kernel/trace/trace_irqsoff.c502
-rw-r--r--kernel/trace/trace_sched_switch.c301
-rw-r--r--kernel/trace/trace_sched_wakeup.c382
-rw-r--r--kernel/trace/trace_selftest.c562
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
-rw-r--r--kernel/trace/trace_sysprof.c363
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile9
-rw-r--r--lib/smp_processor_id.c6
-rw-r--r--mm/page-writeback.c10
-rw-r--r--scripts/Makefile.lib3
76 files changed, 8593 insertions, 111 deletions
diff --git a/Makefile b/Makefile
index 6aff5f47c21d..ff2c681fb5dd 100644
--- a/Makefile
+++ b/Makefile
@@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g
528KBUILD_AFLAGS += -gdwarf-2 528KBUILD_AFLAGS += -gdwarf-2
529endif 529endif
530 530
531ifdef CONFIG_FTRACE
532KBUILD_CFLAGS += -pg
533endif
534
531# We trigger additional mismatches with less inlining 535# We trigger additional mismatches with less inlining
532ifdef CONFIG_DEBUG_SECTION_MISMATCH 536ifdef CONFIG_DEBUG_SECTION_MISMATCH
533KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once) 537KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e2659407..a5e9912e2d37 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -105,11 +105,13 @@ config ARCH_NO_VIRT_TO_BUS
105config PPC 105config PPC
106 bool 106 bool
107 default y 107 default y
108 select HAVE_DYNAMIC_FTRACE
109 select HAVE_FTRACE
108 select HAVE_IDE 110 select HAVE_IDE
109 select HAVE_OPROFILE
110 select HAVE_KPROBES 111 select HAVE_KPROBES
111 select HAVE_KRETPROBES 112 select HAVE_KRETPROBES
112 select HAVE_LMB 113 select HAVE_LMB
114 select HAVE_OPROFILE
113 115
114config EARLY_PRINTK 116config EARLY_PRINTK
115 bool 117 bool
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2346d271fbfd..f3f5e2641432 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC
12CFLAGS_btext.o += -fPIC 12CFLAGS_btext.o += -fPIC
13endif 13endif
14 14
15ifdef CONFIG_FTRACE
16# Do not trace early boot code
17CFLAGS_REMOVE_cputable.o = -pg
18CFLAGS_REMOVE_prom_init.o = -pg
19
20ifdef CONFIG_DYNAMIC_FTRACE
21# dynamic ftrace setup.
22CFLAGS_REMOVE_ftrace.o = -pg
23endif
24
25endif
26
15obj-y := cputable.o ptrace.o syscalls.o \ 27obj-y := cputable.o ptrace.o syscalls.o \
16 irq.o align.o signal_32.o pmc.o vdso.o \ 28 irq.o align.o signal_32.o pmc.o vdso.o \
17 init_task.o process.o systbl.o idle.o \ 29 init_task.o process.o systbl.o idle.o \
@@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
78obj-$(CONFIG_AUDIT) += audit.o 90obj-$(CONFIG_AUDIT) += audit.o
79obj64-$(CONFIG_AUDIT) += compat_audit.o 91obj64-$(CONFIG_AUDIT) += compat_audit.o
80 92
93obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
94
81obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 95obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
82 96
83ifneq ($(CONFIG_PPC_INDIRECT_IO),y) 97ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c8614d9875c..0e6221889ca9 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1035,3 +1035,133 @@ machine_check_in_rtas:
1035 /* XXX load up BATs and panic */ 1035 /* XXX load up BATs and panic */
1036 1036
1037#endif /* CONFIG_PPC_RTAS */ 1037#endif /* CONFIG_PPC_RTAS */
1038
1039#ifdef CONFIG_FTRACE
1040#ifdef CONFIG_DYNAMIC_FTRACE
1041_GLOBAL(mcount)
1042_GLOBAL(_mcount)
1043 stwu r1,-48(r1)
1044 stw r3, 12(r1)
1045 stw r4, 16(r1)
1046 stw r5, 20(r1)
1047 stw r6, 24(r1)
1048 mflr r3
1049 stw r7, 28(r1)
1050 mfcr r5
1051 stw r8, 32(r1)
1052 stw r9, 36(r1)
1053 stw r10,40(r1)
1054 stw r3, 44(r1)
1055 stw r5, 8(r1)
1056 .globl mcount_call
1057mcount_call:
1058 bl ftrace_stub
1059 nop
1060 lwz r6, 8(r1)
1061 lwz r0, 44(r1)
1062 lwz r3, 12(r1)
1063 mtctr r0
1064 lwz r4, 16(r1)
1065 mtcr r6
1066 lwz r5, 20(r1)
1067 lwz r6, 24(r1)
1068 lwz r0, 52(r1)
1069 lwz r7, 28(r1)
1070 lwz r8, 32(r1)
1071 mtlr r0
1072 lwz r9, 36(r1)
1073 lwz r10,40(r1)
1074 addi r1, r1, 48
1075 bctr
1076
1077_GLOBAL(ftrace_caller)
1078 /* Based off of objdump optput from glibc */
1079 stwu r1,-48(r1)
1080 stw r3, 12(r1)
1081 stw r4, 16(r1)
1082 stw r5, 20(r1)
1083 stw r6, 24(r1)
1084 mflr r3
1085 lwz r4, 52(r1)
1086 mfcr r5
1087 stw r7, 28(r1)
1088 stw r8, 32(r1)
1089 stw r9, 36(r1)
1090 stw r10,40(r1)
1091 stw r3, 44(r1)
1092 stw r5, 8(r1)
1093.globl ftrace_call
1094ftrace_call:
1095 bl ftrace_stub
1096 nop
1097 lwz r6, 8(r1)
1098 lwz r0, 44(r1)
1099 lwz r3, 12(r1)
1100 mtctr r0
1101 lwz r4, 16(r1)
1102 mtcr r6
1103 lwz r5, 20(r1)
1104 lwz r6, 24(r1)
1105 lwz r0, 52(r1)
1106 lwz r7, 28(r1)
1107 lwz r8, 32(r1)
1108 mtlr r0
1109 lwz r9, 36(r1)
1110 lwz r10,40(r1)
1111 addi r1, r1, 48
1112 bctr
1113#else
1114_GLOBAL(mcount)
1115_GLOBAL(_mcount)
1116 stwu r1,-48(r1)
1117 stw r3, 12(r1)
1118 stw r4, 16(r1)
1119 stw r5, 20(r1)
1120 stw r6, 24(r1)
1121 mflr r3
1122 lwz r4, 52(r1)
1123 mfcr r5
1124 stw r7, 28(r1)
1125 stw r8, 32(r1)
1126 stw r9, 36(r1)
1127 stw r10,40(r1)
1128 stw r3, 44(r1)
1129 stw r5, 8(r1)
1130
1131 LOAD_REG_ADDR(r5, ftrace_trace_function)
1132#if 0
1133 mtctr r3
1134 mr r1, r5
1135 bctrl
1136#endif
1137 lwz r5,0(r5)
1138#if 1
1139 mtctr r5
1140 bctrl
1141#else
1142 bl ftrace_stub
1143#endif
1144 nop
1145
1146 lwz r6, 8(r1)
1147 lwz r0, 44(r1)
1148 lwz r3, 12(r1)
1149 mtctr r0
1150 lwz r4, 16(r1)
1151 mtcr r6
1152 lwz r5, 20(r1)
1153 lwz r6, 24(r1)
1154 lwz r0, 52(r1)
1155 lwz r7, 28(r1)
1156 lwz r8, 32(r1)
1157 mtlr r0
1158 lwz r9, 36(r1)
1159 lwz r10,40(r1)
1160 addi r1, r1, 48
1161 bctr
1162#endif
1163
1164_GLOBAL(ftrace_stub)
1165 blr
1166
1167#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c0db5b769e55..2c4d9e056ead 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -870,3 +870,65 @@ _GLOBAL(enter_prom)
870 ld r0,16(r1) 870 ld r0,16(r1)
871 mtlr r0 871 mtlr r0
872 blr 872 blr
873
874#ifdef CONFIG_FTRACE
875#ifdef CONFIG_DYNAMIC_FTRACE
876_GLOBAL(mcount)
877_GLOBAL(_mcount)
878 /* Taken from output of objdump from lib64/glibc */
879 mflr r3
880 stdu r1, -112(r1)
881 std r3, 128(r1)
882 .globl mcount_call
883mcount_call:
884 bl ftrace_stub
885 nop
886 ld r0, 128(r1)
887 mtlr r0
888 addi r1, r1, 112
889 blr
890
891_GLOBAL(ftrace_caller)
892 /* Taken from output of objdump from lib64/glibc */
893 mflr r3
894 ld r11, 0(r1)
895 stdu r1, -112(r1)
896 std r3, 128(r1)
897 ld r4, 16(r11)
898.globl ftrace_call
899ftrace_call:
900 bl ftrace_stub
901 nop
902 ld r0, 128(r1)
903 mtlr r0
904 addi r1, r1, 112
905_GLOBAL(ftrace_stub)
906 blr
907#else
908_GLOBAL(mcount)
909 blr
910
911_GLOBAL(_mcount)
912 /* Taken from output of objdump from lib64/glibc */
913 mflr r3
914 ld r11, 0(r1)
915 stdu r1, -112(r1)
916 std r3, 128(r1)
917 ld r4, 16(r11)
918
919
920 LOAD_REG_ADDR(r5,ftrace_trace_function)
921 ld r5,0(r5)
922 ld r5,0(r5)
923 mtctr r5
924 bctrl
925
926 nop
927 ld r0, 128(r1)
928 mtlr r0
929 addi r1, r1, 112
930_GLOBAL(ftrace_stub)
931 blr
932
933#endif
934#endif
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 000000000000..5a4993fefa45
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,165 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
7 *
8 */
9
10#include <linux/spinlock.h>
11#include <linux/hardirq.h>
12#include <linux/ftrace.h>
13#include <linux/percpu.h>
14#include <linux/init.h>
15#include <linux/list.h>
16
17#include <asm/cacheflush.h>
18
19#define CALL_BACK 4
20
21static unsigned int ftrace_nop = 0x60000000;
22
23#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr
25#else
26/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr
28#endif
29
30notrace int ftrace_ip_converted(unsigned long ip)
31{
32 unsigned int save;
33
34 ip -= CALL_BACK;
35 save = *(unsigned int *)ip;
36
37 return save == ftrace_nop;
38}
39
40static unsigned int notrace ftrace_calc_offset(long ip, long addr)
41{
42 return (int)((addr + CALL_BACK) - ip);
43}
44
45notrace unsigned char *ftrace_nop_replace(void)
46{
47 return (char *)&ftrace_nop;
48}
49
50notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
51{
52 static unsigned int op;
53
54 addr = GET_ADDR(addr);
55
56 /* Set to "bl addr" */
57 op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffe);
58
59 /*
60 * No locking needed, this must be called via kstop_machine
61 * which in essence is like running on a uniprocessor machine.
62 */
63 return (unsigned char *)&op;
64}
65
66#ifdef CONFIG_PPC64
67# define _ASM_ALIGN " .align 3 "
68# define _ASM_PTR " .llong "
69#else
70# define _ASM_ALIGN " .align 2 "
71# define _ASM_PTR " .long "
72#endif
73
74notrace int
75ftrace_modify_code(unsigned long ip, unsigned char *old_code,
76 unsigned char *new_code)
77{
78 unsigned replaced;
79 unsigned old = *(unsigned *)old_code;
80 unsigned new = *(unsigned *)new_code;
81 int faulted = 0;
82
83 /* move the IP back to the start of the call */
84 ip -= CALL_BACK;
85
86 /*
87 * Note: Due to modules and __init, code can
88 * disappear and change, we need to protect against faulting
89 * as well as code changing.
90 *
91 * No real locking needed, this code is run through
92 * kstop_machine.
93 */
94 asm volatile (
95 "1: lwz %1, 0(%2)\n"
96 " cmpw %1, %5\n"
97 " bne 2f\n"
98 " stwu %3, 0(%2)\n"
99 "2:\n"
100 ".section .fixup, \"ax\"\n"
101 "3: li %0, 1\n"
102 " b 2b\n"
103 ".previous\n"
104 ".section __ex_table,\"a\"\n"
105 _ASM_ALIGN "\n"
106 _ASM_PTR "1b, 3b\n"
107 ".previous"
108 : "=r"(faulted), "=r"(replaced)
109 : "r"(ip), "r"(new),
110 "0"(faulted), "r"(old)
111 : "memory");
112
113 if (replaced != old && replaced != new)
114 faulted = 2;
115
116 if (!faulted)
117 flush_icache_range(ip, ip + 8);
118
119 return faulted;
120}
121
122notrace int ftrace_update_ftrace_func(ftrace_func_t func)
123{
124 unsigned long ip = (unsigned long)(&ftrace_call);
125 unsigned char old[4], *new;
126 int ret;
127
128 ip += CALL_BACK;
129
130 memcpy(old, &ftrace_call, 4);
131 new = ftrace_call_replace(ip, (unsigned long)func);
132 ret = ftrace_modify_code(ip, old, new);
133
134 return ret;
135}
136
137notrace int ftrace_mcount_set(unsigned long *data)
138{
139 unsigned long ip = (long)(&mcount_call);
140 unsigned long *addr = data;
141 unsigned char old[4], *new;
142
143 /* ip is at the location, but modify code will subtact this */
144 ip += CALL_BACK;
145
146 /*
147 * Replace the mcount stub with a pointer to the
148 * ip recorder function.
149 */
150 memcpy(old, &mcount_call, 4);
151 new = ftrace_call_replace(ip, *addr);
152 *addr = ftrace_modify_code(ip, old, new);
153
154 return 0;
155}
156
157int __init ftrace_dyn_arch_init(void *data)
158{
159 /* This is running in kstop_machine */
160
161 ftrace_mcount_set(data);
162
163 return 0;
164}
165
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index e31aca9208eb..1882bf419fa6 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns);
120 120
121#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) 121#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
122 122
123void _memset_io(volatile void __iomem *addr, int c, unsigned long n) 123notrace void
124_memset_io(volatile void __iomem *addr, int c, unsigned long n)
124{ 125{
125 void *p = (void __force *)addr; 126 void *p = (void __force *)addr;
126 u32 lc = c; 127 u32 lc = c;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bcc249d90c4d..dcc946e67099 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc);
98 98
99int distribute_irqs = 1; 99int distribute_irqs = 1;
100 100
101static inline unsigned long get_hard_enabled(void) 101static inline notrace unsigned long get_hard_enabled(void)
102{ 102{
103 unsigned long enabled; 103 unsigned long enabled;
104 104
@@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void)
108 return enabled; 108 return enabled;
109} 109}
110 110
111static inline void set_soft_enabled(unsigned long enable) 111static inline notrace void set_soft_enabled(unsigned long enable)
112{ 112{
113 __asm__ __volatile__("stb %0,%1(13)" 113 __asm__ __volatile__("stb %0,%1(13)"
114 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); 114 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
115} 115}
116 116
117void raw_local_irq_restore(unsigned long en) 117notrace void raw_local_irq_restore(unsigned long en)
118{ 118{
119 /* 119 /*
120 * get_paca()->soft_enabled = en; 120 * get_paca()->soft_enabled = en;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5112a4aa801d..22f8e2bacd32 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -47,6 +47,11 @@
47#include <asm/kgdb.h> 47#include <asm/kgdb.h>
48#endif 48#endif
49 49
50#ifdef CONFIG_FTRACE
51extern void _mcount(void);
52EXPORT_SYMBOL(_mcount);
53#endif
54
50extern void bootx_init(unsigned long r4, unsigned long phys); 55extern void bootx_init(unsigned long r4, unsigned long phys);
51 56
52int boot_cpuid; 57int boot_cpuid;
@@ -81,7 +86,7 @@ int ucache_bsize;
81 * from the address that it was linked at, so we must use RELOC/PTRRELOC 86 * from the address that it was linked at, so we must use RELOC/PTRRELOC
82 * to access static data (including strings). -- paulus 87 * to access static data (including strings). -- paulus
83 */ 88 */
84unsigned long __init early_init(unsigned long dt_ptr) 89notrace unsigned long __init early_init(unsigned long dt_ptr)
85{ 90{
86 unsigned long offset = reloc_offset(); 91 unsigned long offset = reloc_offset();
87 struct cpu_spec *spec; 92 struct cpu_spec *spec;
@@ -111,7 +116,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
111 * This is called very early on the boot process, after a minimal 116 * This is called very early on the boot process, after a minimal
112 * MMU environment has been set up but before MMU_init is called. 117 * MMU environment has been set up but before MMU_init is called.
113 */ 118 */
114void __init machine_init(unsigned long dt_ptr, unsigned long phys) 119notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys)
115{ 120{
116 /* Enable early debugging if any specified (see udbg.h) */ 121 /* Enable early debugging if any specified (see udbg.h) */
117 udbg_early_init(); 122 udbg_early_init();
@@ -133,7 +138,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
133 138
134#ifdef CONFIG_BOOKE_WDT 139#ifdef CONFIG_BOOKE_WDT
135/* Checks wdt=x and wdt_period=xx command-line option */ 140/* Checks wdt=x and wdt_period=xx command-line option */
136int __init early_parse_wdt(char *p) 141notrace int __init early_parse_wdt(char *p)
137{ 142{
138 if (p && strncmp(p, "0", 1) != 0) 143 if (p && strncmp(p, "0", 1) != 0)
139 booke_wdt_enabled = 1; 144 booke_wdt_enabled = 1;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 098fd96a394a..277bf18cbbcc 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -85,6 +85,11 @@ struct ppc64_caches ppc64_caches = {
85}; 85};
86EXPORT_SYMBOL_GPL(ppc64_caches); 86EXPORT_SYMBOL_GPL(ppc64_caches);
87 87
88#ifdef CONFIG_FTRACE
89extern void _mcount(void);
90EXPORT_SYMBOL(_mcount);
91#endif
92
88/* 93/*
89 * These are used in binfmt_elf.c to put aux entries on the stack 94 * These are used in binfmt_elf.c to put aux entries on the stack
90 * for each elf executable being started. 95 * for each elf executable being started.
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 4d72c8f72159..89774177b209 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,5 +1,10 @@
1CFLAGS_bootx_init.o += -fPIC 1CFLAGS_bootx_init.o += -fPIC
2 2
3ifdef CONFIG_FTRACE
4# Do not trace early boot code
5CFLAGS_REMOVE_bootx_init.o = -pg
6endif
7
3obj-y += pic.o setup.o time.o feature.o pci.o \ 8obj-y += pic.o setup.o time.o feature.o pci.o \
4 sleep.o low_i2c.o cache.o pfunc_core.o \ 9 sleep.o low_i2c.o cache.o pfunc_core.o \
5 pfunc_base.o 10 pfunc_base.o
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index eb36f3b746b8..fca9246470b1 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -11,6 +11,8 @@ config SPARC
11config SPARC64 11config SPARC64
12 bool 12 bool
13 default y 13 default y
14 select HAVE_DYNAMIC_FTRACE
15 select HAVE_FTRACE
14 select HAVE_IDE 16 select HAVE_IDE
15 select HAVE_LMB 17 select HAVE_LMB
16 select HAVE_ARCH_KGDB 18 select HAVE_ARCH_KGDB
diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index 6a4d28a4076d..d6d32d178fc8 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC
33 33
34config MCOUNT 34config MCOUNT
35 bool 35 bool
36 depends on STACK_DEBUG 36 depends on STACK_DEBUG || FTRACE
37 default y 37 default y
38 38
39config FRAME_POINTER 39config FRAME_POINTER
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index ec4f5ebb1ca6..418b5782096e 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \
14 power.o sbus.o sparc64_ksyms.o chmc.o \ 14 power.o sbus.o sparc64_ksyms.o chmc.o \
15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o 15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
16 16
17obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
17obj-$(CONFIG_STACKTRACE) += stacktrace.o 18obj-$(CONFIG_STACKTRACE) += stacktrace.o
18obj-$(CONFIG_PCI) += ebus.o pci_common.o \ 19obj-$(CONFIG_PCI) += ebus.o pci_common.o \
19 pci_psycho.o pci_sabre.o pci_schizo.o \ 20 pci_psycho.o pci_sabre.o pci_schizo.o \
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
new file mode 100644
index 000000000000..f449e6df6c4a
--- /dev/null
+++ b/arch/sparc64/kernel/ftrace.c
@@ -0,0 +1,99 @@
1#include <linux/spinlock.h>
2#include <linux/hardirq.h>
3#include <linux/ftrace.h>
4#include <linux/percpu.h>
5#include <linux/init.h>
6#include <linux/list.h>
7
8static const u32 ftrace_nop = 0x01000000;
9
10notrace int ftrace_ip_converted(unsigned long ip)
11{
12 u32 insn = *(u32 *) ip;
13
14 return (insn == ftrace_nop);
15}
16
17notrace unsigned char *ftrace_nop_replace(void)
18{
19 return (char *)&ftrace_nop;
20}
21
22notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
23{
24 static u32 call;
25 s32 off;
26
27 off = ((s32)addr - (s32)ip);
28 call = 0x40000000 | ((u32)off >> 2);
29
30 return (unsigned char *) &call;
31}
32
33notrace int
34ftrace_modify_code(unsigned long ip, unsigned char *old_code,
35 unsigned char *new_code)
36{
37 u32 old = *(u32 *)old_code;
38 u32 new = *(u32 *)new_code;
39 u32 replaced;
40 int faulted;
41
42 __asm__ __volatile__(
43 "1: cas [%[ip]], %[old], %[new]\n"
44 " flush %[ip]\n"
45 " mov 0, %[faulted]\n"
46 "2:\n"
47 " .section .fixup,#alloc,#execinstr\n"
48 " .align 4\n"
49 "3: sethi %%hi(2b), %[faulted]\n"
50 " jmpl %[faulted] + %%lo(2b), %%g0\n"
51 " mov 1, %[faulted]\n"
52 " .previous\n"
53 " .section __ex_table,\"a\"\n"
54 " .align 4\n"
55 " .word 1b, 3b\n"
56 " .previous\n"
57 : "=r" (replaced), [faulted] "=r" (faulted)
58 : [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
59 : "memory");
60
61 if (replaced != old && replaced != new)
62 faulted = 2;
63
64 return faulted;
65}
66
67notrace int ftrace_update_ftrace_func(ftrace_func_t func)
68{
69 unsigned long ip = (unsigned long)(&ftrace_call);
70 unsigned char old[4], *new;
71
72 memcpy(old, &ftrace_call, 4);
73 new = ftrace_call_replace(ip, (unsigned long)func);
74 return ftrace_modify_code(ip, old, new);
75}
76
77notrace int ftrace_mcount_set(unsigned long *data)
78{
79 unsigned long ip = (long)(&mcount_call);
80 unsigned long *addr = data;
81 unsigned char old[4], *new;
82
83 /*
84 * Replace the mcount stub with a pointer to the
85 * ip recorder function.
86 */
87 memcpy(old, &mcount_call, 4);
88 new = ftrace_call_replace(ip, *addr);
89 *addr = ftrace_modify_code(ip, old, new);
90
91 return 0;
92}
93
94
95int __init ftrace_dyn_arch_init(void *data)
96{
97 ftrace_mcount_set(data);
98 return 0;
99}
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 9e4534b485c7..7735a7a60533 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -28,10 +28,13 @@ ovstack:
28 .skip OVSTACKSIZE 28 .skip OVSTACKSIZE
29#endif 29#endif
30 .text 30 .text
31 .align 32 31 .align 32
32 .globl mcount, _mcount 32 .globl _mcount
33mcount: 33 .type _mcount,#function
34 .globl mcount
35 .type mcount,#function
34_mcount: 36_mcount:
37mcount:
35#ifdef CONFIG_STACK_DEBUG 38#ifdef CONFIG_STACK_DEBUG
36 /* 39 /*
37 * Check whether %sp is dangerously low. 40 * Check whether %sp is dangerously low.
@@ -55,6 +58,53 @@ _mcount:
55 or %g3, %lo(panicstring), %o0 58 or %g3, %lo(panicstring), %o0
56 call prom_halt 59 call prom_halt
57 nop 60 nop
611:
62#endif
63#ifdef CONFIG_FTRACE
64#ifdef CONFIG_DYNAMIC_FTRACE
65 mov %o7, %o0
66 .globl mcount_call
67mcount_call:
68 call ftrace_stub
69 mov %o0, %o7
70#else
71 sethi %hi(ftrace_trace_function), %g1
72 sethi %hi(ftrace_stub), %g2
73 ldx [%g1 + %lo(ftrace_trace_function)], %g1
74 or %g2, %lo(ftrace_stub), %g2
75 cmp %g1, %g2
76 be,pn %icc, 1f
77 mov %i7, %o1
78 jmpl %g1, %g0
79 mov %o7, %o0
80 /* not reached */
811:
58#endif 82#endif
591: retl 83#endif
84 retl
60 nop 85 nop
86 .size _mcount,.-_mcount
87 .size mcount,.-mcount
88
89#ifdef CONFIG_FTRACE
90 .globl ftrace_stub
91 .type ftrace_stub,#function
92ftrace_stub:
93 retl
94 nop
95 .size ftrace_stub,.-ftrace_stub
96#ifdef CONFIG_DYNAMIC_FTRACE
97 .globl ftrace_caller
98 .type ftrace_caller,#function
99ftrace_caller:
100 mov %i7, %o1
101 mov %o7, %o0
102 .globl ftrace_call
103ftrace_call:
104 call ftrace_stub
105 mov %o0, %o7
106 retl
107 nop
108 .size ftrace_caller,.-ftrace_caller
109#endif
110#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e0edaaa6920a..400135148555 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_KPROBES 24 select HAVE_KPROBES
25 select HAVE_KRETPROBES 25 select HAVE_KRETPROBES
26 select HAVE_DYNAMIC_FTRACE
27 select HAVE_FTRACE
26 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 28 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
27 select HAVE_ARCH_KGDB if !X86_VOYAGER 29 select HAVE_ARCH_KGDB if !X86_VOYAGER
28 30
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51a..f395fd537c5c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -172,6 +172,14 @@ config IOMMU_LEAK
172 Add a simple leak tracer to the IOMMU code. This is useful when you 172 Add a simple leak tracer to the IOMMU code. This is useful when you
173 are debugging a buggy device driver that leaks IOMMU mappings. 173 are debugging a buggy device driver that leaks IOMMU mappings.
174 174
175config PAGE_FAULT_HANDLERS
176 bool "Custom page fault handlers"
177 depends on DEBUG_KERNEL
178 help
179 Allow the use of custom page fault handlers. A kernel module may
180 register a function that is called on every page fault. Custom
181 handlers are used by some debugging and reverse engineering tools.
182
175# 183#
176# IO delay types: 184# IO delay types:
177# 185#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 77807d4769c9..5ff67208d4ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE
10# Do not profile debug utilities
11CFLAGS_REMOVE_tsc_64.o = -pg
12CFLAGS_REMOVE_tsc_32.o = -pg
13CFLAGS_REMOVE_rtc.o = -pg
14endif
15
9# 16#
10# vsyscalls (which work on the user stack) should have 17# vsyscalls (which work on the user stack) should have
11# no stack-protector checks: 18# no stack-protector checks:
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
59obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
60obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 68obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
61obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/spinlock.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144 144
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146static inline const unsigned char*const * find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
162 { -1, NULL } 162 { -1, NULL }
163}; 163};
164 164
165static const unsigned char*const * find_nop_table(void) 165const unsigned char *const *find_nop_table(void)
166{ 166{
167 const unsigned char *const *noptable = intel_nops; 167 const unsigned char *const *noptable = intel_nops;
168 int i; 168 int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
279 struct list_head next; 279 struct list_head next;
280}; 280};
281static LIST_HEAD(smp_alt_modules); 281static LIST_HEAD(smp_alt_modules);
282static DEFINE_SPINLOCK(smp_alt); 282static DEFINE_MUTEX(smp_alt);
283static int smp_mode = 1; /* protected by smp_alt */ 283static int smp_mode = 1; /* protected by smp_alt */
284 284
285void alternatives_smp_module_add(struct module *mod, char *name, 285void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
312 __func__, smp->locks, smp->locks_end, 312 __func__, smp->locks, smp->locks_end,
313 smp->text, smp->text_end, smp->name); 313 smp->text, smp->text_end, smp->name);
314 314
315 spin_lock(&smp_alt); 315 mutex_lock(&smp_alt);
316 list_add_tail(&smp->next, &smp_alt_modules); 316 list_add_tail(&smp->next, &smp_alt_modules);
317 if (boot_cpu_has(X86_FEATURE_UP)) 317 if (boot_cpu_has(X86_FEATURE_UP))
318 alternatives_smp_unlock(smp->locks, smp->locks_end, 318 alternatives_smp_unlock(smp->locks, smp->locks_end,
319 smp->text, smp->text_end); 319 smp->text, smp->text_end);
320 spin_unlock(&smp_alt); 320 mutex_unlock(&smp_alt);
321} 321}
322 322
323void alternatives_smp_module_del(struct module *mod) 323void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
327 if (smp_alt_once || noreplace_smp) 327 if (smp_alt_once || noreplace_smp)
328 return; 328 return;
329 329
330 spin_lock(&smp_alt); 330 mutex_lock(&smp_alt);
331 list_for_each_entry(item, &smp_alt_modules, next) { 331 list_for_each_entry(item, &smp_alt_modules, next) {
332 if (mod != item->mod) 332 if (mod != item->mod)
333 continue; 333 continue;
334 list_del(&item->next); 334 list_del(&item->next);
335 spin_unlock(&smp_alt); 335 mutex_unlock(&smp_alt);
336 DPRINTK("%s: %s\n", __func__, item->name); 336 DPRINTK("%s: %s\n", __func__, item->name);
337 kfree(item); 337 kfree(item);
338 return; 338 return;
339 } 339 }
340 spin_unlock(&smp_alt); 340 mutex_unlock(&smp_alt);
341} 341}
342 342
343void alternatives_smp_switch(int smp) 343void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
359 return; 359 return;
360 BUG_ON(!smp && (num_online_cpus() > 1)); 360 BUG_ON(!smp && (num_online_cpus() > 1));
361 361
362 spin_lock(&smp_alt); 362 mutex_lock(&smp_alt);
363 363
364 /* 364 /*
365 * Avoid unnecessary switches because it forces JIT based VMs to 365 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
383 mod->text, mod->text_end); 383 mod->text, mod->text_end);
384 } 384 }
385 smp_mode = smp; 385 smp_mode = smp;
386 spin_unlock(&smp_alt); 386 mutex_unlock(&smp_alt);
387} 387}
388 388
389#endif 389#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c778e4fa55a2..04ea83ccb979 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1110,6 +1110,74 @@ ENDPROC(xen_failsafe_callback)
1110 1110
1111#endif /* CONFIG_XEN */ 1111#endif /* CONFIG_XEN */
1112 1112
1113#ifdef CONFIG_FTRACE
1114#ifdef CONFIG_DYNAMIC_FTRACE
1115
1116ENTRY(mcount)
1117 pushl %eax
1118 pushl %ecx
1119 pushl %edx
1120 movl 0xc(%esp), %eax
1121
1122.globl mcount_call
1123mcount_call:
1124 call ftrace_stub
1125
1126 popl %edx
1127 popl %ecx
1128 popl %eax
1129
1130 ret
1131END(mcount)
1132
1133ENTRY(ftrace_caller)
1134 pushl %eax
1135 pushl %ecx
1136 pushl %edx
1137 movl 0xc(%esp), %eax
1138 movl 0x4(%ebp), %edx
1139
1140.globl ftrace_call
1141ftrace_call:
1142 call ftrace_stub
1143
1144 popl %edx
1145 popl %ecx
1146 popl %eax
1147
1148.globl ftrace_stub
1149ftrace_stub:
1150 ret
1151END(ftrace_caller)
1152
1153#else /* ! CONFIG_DYNAMIC_FTRACE */
1154
1155ENTRY(mcount)
1156 cmpl $ftrace_stub, ftrace_trace_function
1157 jnz trace
1158.globl ftrace_stub
1159ftrace_stub:
1160 ret
1161
1162 /* taken from glibc */
1163trace:
1164 pushl %eax
1165 pushl %ecx
1166 pushl %edx
1167 movl 0xc(%esp), %eax
1168 movl 0x4(%ebp), %edx
1169
1170 call *ftrace_trace_function
1171
1172 popl %edx
1173 popl %ecx
1174 popl %eax
1175
1176 jmp ftrace_stub
1177END(mcount)
1178#endif /* CONFIG_DYNAMIC_FTRACE */
1179#endif /* CONFIG_FTRACE */
1180
1113.section .rodata,"a" 1181.section .rodata,"a"
1114#include "syscall_table_32.S" 1182#include "syscall_table_32.S"
1115 1183
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..fe25e5febca3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -54,6 +54,108 @@
54 54
55 .code64 55 .code64
56 56
57#ifdef CONFIG_FTRACE
58#ifdef CONFIG_DYNAMIC_FTRACE
59ENTRY(mcount)
60
61 subq $0x38, %rsp
62 movq %rax, (%rsp)
63 movq %rcx, 8(%rsp)
64 movq %rdx, 16(%rsp)
65 movq %rsi, 24(%rsp)
66 movq %rdi, 32(%rsp)
67 movq %r8, 40(%rsp)
68 movq %r9, 48(%rsp)
69
70 movq 0x38(%rsp), %rdi
71
72.globl mcount_call
73mcount_call:
74 call ftrace_stub
75
76 movq 48(%rsp), %r9
77 movq 40(%rsp), %r8
78 movq 32(%rsp), %rdi
79 movq 24(%rsp), %rsi
80 movq 16(%rsp), %rdx
81 movq 8(%rsp), %rcx
82 movq (%rsp), %rax
83 addq $0x38, %rsp
84
85 retq
86END(mcount)
87
88ENTRY(ftrace_caller)
89
90 /* taken from glibc */
91 subq $0x38, %rsp
92 movq %rax, (%rsp)
93 movq %rcx, 8(%rsp)
94 movq %rdx, 16(%rsp)
95 movq %rsi, 24(%rsp)
96 movq %rdi, 32(%rsp)
97 movq %r8, 40(%rsp)
98 movq %r9, 48(%rsp)
99
100 movq 0x38(%rsp), %rdi
101 movq 8(%rbp), %rsi
102
103.globl ftrace_call
104ftrace_call:
105 call ftrace_stub
106
107 movq 48(%rsp), %r9
108 movq 40(%rsp), %r8
109 movq 32(%rsp), %rdi
110 movq 24(%rsp), %rsi
111 movq 16(%rsp), %rdx
112 movq 8(%rsp), %rcx
113 movq (%rsp), %rax
114 addq $0x38, %rsp
115
116.globl ftrace_stub
117ftrace_stub:
118 retq
119END(ftrace_caller)
120
121#else /* ! CONFIG_DYNAMIC_FTRACE */
122ENTRY(mcount)
123 cmpq $ftrace_stub, ftrace_trace_function
124 jnz trace
125.globl ftrace_stub
126ftrace_stub:
127 retq
128
129trace:
130 /* taken from glibc */
131 subq $0x38, %rsp
132 movq %rax, (%rsp)
133 movq %rcx, 8(%rsp)
134 movq %rdx, 16(%rsp)
135 movq %rsi, 24(%rsp)
136 movq %rdi, 32(%rsp)
137 movq %r8, 40(%rsp)
138 movq %r9, 48(%rsp)
139
140 movq 0x38(%rsp), %rdi
141 movq 8(%rbp), %rsi
142
143 call *ftrace_trace_function
144
145 movq 48(%rsp), %r9
146 movq 40(%rsp), %r8
147 movq 32(%rsp), %rdi
148 movq 24(%rsp), %rsi
149 movq 16(%rsp), %rdx
150 movq 8(%rsp), %rcx
151 movq (%rsp), %rax
152 addq $0x38, %rsp
153
154 jmp ftrace_stub
155END(mcount)
156#endif /* CONFIG_DYNAMIC_FTRACE */
157#endif /* CONFIG_FTRACE */
158
57#ifndef CONFIG_PREEMPT 159#ifndef CONFIG_PREEMPT
58#define retint_kernel retint_restore_args 160#define retint_kernel retint_restore_args
59#endif 161#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..498608c015fb
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,159 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/alternative.h>
20
21#define CALL_BACK 5
22
23/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop;
25
26union ftrace_code_union {
27 char code[5];
28 struct {
29 char e8;
30 int offset;
31 } __attribute__((packed));
32};
33
34notrace int ftrace_ip_converted(unsigned long ip)
35{
36 unsigned long save;
37
38 ip -= CALL_BACK;
39 save = *(long *)ip;
40
41 return save == *ftrace_nop;
42}
43
44static int notrace ftrace_calc_offset(long ip, long addr)
45{
46 return (int)(addr - ip);
47}
48
49notrace unsigned char *ftrace_nop_replace(void)
50{
51 return (char *)ftrace_nop;
52}
53
54notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
55{
56 static union ftrace_code_union calc;
57
58 calc.e8 = 0xe8;
59 calc.offset = ftrace_calc_offset(ip, addr);
60
61 /*
62 * No locking needed, this must be called via kstop_machine
63 * which in essence is like running on a uniprocessor machine.
64 */
65 return calc.code;
66}
67
68notrace int
69ftrace_modify_code(unsigned long ip, unsigned char *old_code,
70 unsigned char *new_code)
71{
72 unsigned replaced;
73 unsigned old = *(unsigned *)old_code; /* 4 bytes */
74 unsigned new = *(unsigned *)new_code; /* 4 bytes */
75 unsigned char newch = new_code[4];
76 int faulted = 0;
77
78 /* move the IP back to the start of the call */
79 ip -= CALL_BACK;
80
81 /*
82 * Note: Due to modules and __init, code can
83 * disappear and change, we need to protect against faulting
84 * as well as code changing.
85 *
86 * No real locking needed, this code is run through
87 * kstop_machine.
88 */
89 asm volatile (
90 "1: lock\n"
91 " cmpxchg %3, (%2)\n"
92 " jnz 2f\n"
93 " movb %b4, 4(%2)\n"
94 "2:\n"
95 ".section .fixup, \"ax\"\n"
96 "3: movl $1, %0\n"
97 " jmp 2b\n"
98 ".previous\n"
99 _ASM_EXTABLE(1b, 3b)
100 : "=r"(faulted), "=a"(replaced)
101 : "r"(ip), "r"(new), "r"(newch),
102 "0"(faulted), "a"(old)
103 : "memory");
104 sync_core();
105
106 if (replaced != old && replaced != new)
107 faulted = 2;
108
109 return faulted;
110}
111
112notrace int ftrace_update_ftrace_func(ftrace_func_t func)
113{
114 unsigned long ip = (unsigned long)(&ftrace_call);
115 unsigned char old[5], *new;
116 int ret;
117
118 ip += CALL_BACK;
119
120 memcpy(old, &ftrace_call, 5);
121 new = ftrace_call_replace(ip, (unsigned long)func);
122 ret = ftrace_modify_code(ip, old, new);
123
124 return ret;
125}
126
127notrace int ftrace_mcount_set(unsigned long *data)
128{
129 unsigned long ip = (long)(&mcount_call);
130 unsigned long *addr = data;
131 unsigned char old[5], *new;
132
133 /* ip is at the location, but modify code will subtact this */
134 ip += CALL_BACK;
135
136 /*
137 * Replace the mcount stub with a pointer to the
138 * ip recorder function.
139 */
140 memcpy(old, &mcount_call, 5);
141 new = ftrace_call_replace(ip, *addr);
142 *addr = ftrace_modify_code(ip, old, new);
143
144 return 0;
145}
146
147int __init ftrace_dyn_arch_init(void *data)
148{
149 const unsigned char *const *noptable = find_nop_table();
150
151 /* This is running in kstop_machine */
152
153 ftrace_mcount_set(data);
154
155 ftrace_nop = (unsigned long *)noptable[CALL_BACK];
156
157 return 0;
158}
159
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..29999dbb754c 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
1#include <linux/ftrace.h>
1#include <linux/module.h> 2#include <linux/module.h>
3
2#include <asm/checksum.h> 4#include <asm/checksum.h>
3#include <asm/desc.h>
4#include <asm/pgtable.h> 5#include <asm/pgtable.h>
6#include <asm/desc.h>
7
8#ifdef CONFIG_FTRACE
9/* mcount is defined in assembly */
10EXPORT_SYMBOL(mcount);
11#endif
5 12
6/* Networking helper routines. */ 13/* Networking helper routines. */
7EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d0b234c9fc31..88923fd7a6fc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
16#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
107 unsigned long page_list[PAGES_NR]; 109 unsigned long page_list[PAGES_NR];
108 void *control_page; 110 void *control_page;
109 111
112 tracer_disable();
113
110 /* Interrupts aren't acceptable while we reboot */ 114 /* Interrupts aren't acceptable while we reboot */
111 local_irq_disable(); 115 local_irq_disable();
112 116
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 576a03db4511..1558fdc174f9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
16#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
184 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
185 void *control_page; 187 void *control_page;
186 188
189 tracer_disable();
190
187 /* Interrupts aren't acceptable while we reboot */ 191 /* Interrupts aren't acceptable while we reboot */
188 local_irq_disable(); 192 local_irq_disable();
189 193
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e2db9ac5c61c..347a7aba8b16 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -185,7 +185,10 @@ void cpu_idle(void)
185 185
186 local_irq_disable(); 186 local_irq_disable();
187 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 187 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
188 /* Don't trace irqs off for idle */
189 stop_critical_timings();
188 idle(); 190 idle();
191 start_critical_timings();
189 } 192 }
190 tick_nohz_restart_sched_tick(); 193 tick_nohz_restart_sched_tick();
191 preempt_enable_no_resched(); 194 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c6eb5c91e5f6..ea090e6cfe39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -165,7 +165,10 @@ void cpu_idle(void)
165 */ 165 */
166 local_irq_disable(); 166 local_irq_disable();
167 enter_idle(); 167 enter_idle();
168 /* Don't trace irqs off for idle */
169 stop_critical_timings();
168 idle(); 170 idle();
171 start_critical_timings();
169 /* In many cases the interrupt that ended idle 172 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle 173 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */ 174 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d564..4063dfa2a02d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43#include <asm/vgtod.h> 43#include <asm/vgtod.h>
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
46#define __syscall_clobber "r11","cx","memory" 47#define __syscall_clobber "r11","cx","memory"
47 48
48/* 49/*
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f6c05d0410fb..122885bc5f3b 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -1,15 +1,22 @@
1/* Exports for assembly files. 1/* Exports for assembly files.
2 All C exports should go in the respective C files. */ 2 All C exports should go in the respective C files. */
3 3
4#include <linux/ftrace.h>
4#include <linux/module.h> 5#include <linux/module.h>
5#include <net/checksum.h>
6#include <linux/smp.h> 6#include <linux/smp.h>
7 7
8#include <net/checksum.h>
9
8#include <asm/processor.h> 10#include <asm/processor.h>
9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/uaccess.h>
11#include <asm/desc.h> 13#include <asm/desc.h>
12 14
15#ifdef CONFIG_FTRACE
16/* mcount is defined in assembly */
17EXPORT_SYMBOL(mcount);
18#endif
19
13EXPORT_SYMBOL(kernel_thread); 20EXPORT_SYMBOL(kernel_thread);
14 21
15EXPORT_SYMBOL(__get_user_1); 22EXPORT_SYMBOL(__get_user_1);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a885..84aa2883fe15 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay_$(BITS).o 7lib-y := delay_$(BITS).o
8lib-y += thunk_$(BITS).o
8lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o 9lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
9lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
10 11
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
1/*
2 * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
3 * Copyright 2008 by Steven Rostedt, Red Hat, Inc
4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func
31 .globl \name
32\name:
33 pushl %eax
34 pushl %ecx
35 pushl %edx
36 /* Place EIP in the arg1 */
37 movl 3*4(%esp), %eax
38 call \func
39 popl %edx
40 popl %ecx
41 popl %eax
42 ret
43 .endm
44
45 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
46 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 7 */
7 8
@@ -42,8 +43,22 @@
42#endif 43#endif
43 44
44#ifdef CONFIG_TRACE_IRQFLAGS 45#ifdef CONFIG_TRACE_IRQFLAGS
45 thunk trace_hardirqs_on_thunk,trace_hardirqs_on 46 /* put return address in rdi (arg1) */
46 thunk trace_hardirqs_off_thunk,trace_hardirqs_off 47 .macro thunk_ra name,func
48 .globl \name
49\name:
50 CFI_STARTPROC
51 SAVE_ARGS
52 /* SAVE_ARGS pushs 9 elements */
53 /* the next element would be the rip */
54 movq 9*8(%rsp), %rdi
55 call \func
56 jmp restore
57 CFI_ENDPROC
58 .endm
59
60 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
61 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif 62#endif
48 63
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8bcb6f40ccb6..42394b353c6a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -49,6 +49,60 @@
49#define PF_RSVD (1<<3) 49#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 50#define PF_INSTR (1<<4)
51 51
52#ifdef CONFIG_PAGE_FAULT_HANDLERS
53static HLIST_HEAD(pf_handlers); /* protected by RCU */
54static DEFINE_SPINLOCK(pf_handlers_writer);
55
56void register_page_fault_handler(struct pf_handler *new_pfh)
57{
58 unsigned long flags;
59 spin_lock_irqsave(&pf_handlers_writer, flags);
60 hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
61 spin_unlock_irqrestore(&pf_handlers_writer, flags);
62}
63EXPORT_SYMBOL_GPL(register_page_fault_handler);
64
65/**
66 * unregister_page_fault_handler:
67 * The caller must ensure @old_pfh is not in use anymore before freeing it.
68 * This function does not guarantee it. The list of handlers is protected by
69 * RCU, so you can do this by e.g. calling synchronize_rcu().
70 */
71void unregister_page_fault_handler(struct pf_handler *old_pfh)
72{
73 unsigned long flags;
74 spin_lock_irqsave(&pf_handlers_writer, flags);
75 hlist_del_rcu(&old_pfh->hlist);
76 spin_unlock_irqrestore(&pf_handlers_writer, flags);
77}
78EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
79#endif
80
81/* returns non-zero if do_page_fault() should return */
82static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
83 unsigned long address)
84{
85#ifdef CONFIG_PAGE_FAULT_HANDLERS
86 int ret = 0;
87 struct pf_handler *cur;
88 struct hlist_node *ncur;
89
90 if (hlist_empty(&pf_handlers))
91 return 0;
92
93 rcu_read_lock();
94 hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
95 ret = cur->handler(regs, error_code, address);
96 if (ret)
97 break;
98 }
99 rcu_read_unlock();
100 return ret;
101#else
102 return 0;
103#endif
104}
105
52static inline int notify_page_fault(struct pt_regs *regs) 106static inline int notify_page_fault(struct pt_regs *regs)
53{ 107{
54#ifdef CONFIG_KPROBES 108#ifdef CONFIG_KPROBES
@@ -606,6 +660,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
606 660
607 if (notify_page_fault(regs)) 661 if (notify_page_fault(regs))
608 return; 662 return;
663 if (handle_custom_pf(regs, error_code, address))
664 return;
609 665
610 /* 666 /*
611 * We fault-in kernel-space virtual memory on-demand. The 667 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b6..f96eca21ad8f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -710,6 +710,8 @@ void mark_rodata_ro(void)
710 unsigned long start = PFN_ALIGN(_text); 710 unsigned long start = PFN_ALIGN(_text);
711 unsigned long size = PFN_ALIGN(_etext) - start; 711 unsigned long size = PFN_ALIGN(_etext) - start;
712 712
713#ifndef CONFIG_DYNAMIC_FTRACE
714 /* Dynamic tracing modifies the kernel text section */
713 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 715 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
714 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 716 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
715 size >> 10); 717 size >> 10);
@@ -722,6 +724,8 @@ void mark_rodata_ro(void)
722 printk(KERN_INFO "Testing CPA: write protecting again\n"); 724 printk(KERN_INFO "Testing CPA: write protecting again\n");
723 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 725 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
724#endif 726#endif
727#endif /* CONFIG_DYNAMIC_FTRACE */
728
725 start += size; 729 start += size;
726 size = (unsigned long)__end_rodata - start; 730 size = (unsigned long)__end_rodata - start;
727 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 731 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 156e6d7b0e32..a5fd2e06f5c9 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -766,6 +766,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
766void mark_rodata_ro(void) 766void mark_rodata_ro(void)
767{ 767{
768 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 768 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
769 unsigned long rodata_start =
770 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
771
772#ifdef CONFIG_DYNAMIC_FTRACE
773 /* Dynamic tracing modifies the kernel text section */
774 start = rodata_start;
775#endif
769 776
770 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 777 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
771 (end - start) >> 10); 778 (end - start) >> 10);
@@ -775,8 +782,7 @@ void mark_rodata_ro(void)
775 * The rodata section (but not the kernel text!) should also be 782 * The rodata section (but not the kernel text!) should also be
776 * not-executable. 783 * not-executable.
777 */ 784 */
778 start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 785 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
779 set_memory_nx(start, (end - start) >> PAGE_SHIFT);
780 786
781 rodata_test(); 787 rodata_test();
782 788
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c6005..1ef0f90813d6 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
23 23
24#define gtod vdso_vsyscall_gtod_data 24#define gtod vdso_vsyscall_gtod_data
25 25
26static long vdso_fallback_gettime(long clock, struct timespec *ts) 26notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
27{ 27{
28 long ret; 28 long ret;
29 asm("syscall" : "=a" (ret) : 29 asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
31 return ret; 31 return ret;
32} 32}
33 33
34static inline long vgetns(void) 34notrace static inline long vgetns(void)
35{ 35{
36 long v; 36 long v;
37 cycles_t (*vread)(void); 37 cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
40 return (v * gtod->clock.mult) >> gtod->clock.shift; 40 return (v * gtod->clock.mult) >> gtod->clock.shift;
41} 41}
42 42
43static noinline int do_realtime(struct timespec *ts) 43notrace static noinline int do_realtime(struct timespec *ts)
44{ 44{
45 unsigned long seq, ns; 45 unsigned long seq, ns;
46 do { 46 do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
54} 54}
55 55
56/* Copy of the version in kernel/time.c which we cannot directly access */ 56/* Copy of the version in kernel/time.c which we cannot directly access */
57static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) 57notrace static void
58vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
58{ 59{
59 while (nsec >= NSEC_PER_SEC) { 60 while (nsec >= NSEC_PER_SEC) {
60 nsec -= NSEC_PER_SEC; 61 nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
68 ts->tv_nsec = nsec; 69 ts->tv_nsec = nsec;
69} 70}
70 71
71static noinline int do_monotonic(struct timespec *ts) 72notrace static noinline int do_monotonic(struct timespec *ts)
72{ 73{
73 unsigned long seq, ns, secs; 74 unsigned long seq, ns, secs;
74 do { 75 do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
82 return 0; 83 return 0;
83} 84}
84 85
85int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 86notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
86{ 87{
87 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) 88 if (likely(gtod->sysctl_enabled && gtod->clock.vread))
88 switch (clock) { 89 switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
96int clock_gettime(clockid_t, struct timespec *) 97int clock_gettime(clockid_t, struct timespec *)
97 __attribute__((weak, alias("__vdso_clock_gettime"))); 98 __attribute__((weak, alias("__vdso_clock_gettime")));
98 99
99int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 100notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
100{ 101{
101 long ret; 102 long ret;
102 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { 103 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a9..9fbc6b20026b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
13#include <asm/vgtod.h> 13#include <asm/vgtod.h>
14#include "vextern.h" 14#include "vextern.h"
15 15
16long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) 16notrace long
17__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
17{ 18{
18 unsigned int p; 19 unsigned int p;
19 20
diff --git a/include/asm-powerpc/hw_irq.h b/include/asm-powerpc/hw_irq.h
index ad8c9f7fd0e3..f75a5fc64d2e 100644
--- a/include/asm-powerpc/hw_irq.h
+++ b/include/asm-powerpc/hw_irq.h
@@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void);
59 get_paca()->hard_enabled = 0; \ 59 get_paca()->hard_enabled = 0; \
60 } while(0) 60 } while(0)
61 61
62static inline int irqs_disabled_flags(unsigned long flags)
63{
64 return flags == 0;
65}
66
62#else 67#else
63 68
64#if defined(CONFIG_BOOKE) 69#if defined(CONFIG_BOOKE)
@@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags)
113#define hard_irq_enable() local_irq_enable() 118#define hard_irq_enable() local_irq_enable()
114#define hard_irq_disable() local_irq_disable() 119#define hard_irq_disable() local_irq_disable()
115 120
121static inline int irqs_disabled_flags(unsigned long flags)
122{
123 return (flags & MSR_EE) == 0;
124}
125
116#endif /* CONFIG_PPC64 */ 126#endif /* CONFIG_PPC64 */
117 127
118/* 128/*
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 1f6a9ca10126..f6aa18eadf71 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
72static inline void alternatives_smp_switch(int smp) {} 72static inline void alternatives_smp_switch(int smp) {}
73#endif /* CONFIG_SMP */ 73#endif /* CONFIG_SMP */
74 74
75const unsigned char *const *find_nop_table(void);
76
75/* 77/*
76 * Alternative instructions for different CPU types or capabilities. 78 * Alternative instructions for different CPU types or capabilities.
77 * 79 *
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970e..24d71b1eb189 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void)
179 * have a reliable stack. x86_64 only. 179 * have a reliable stack. x86_64 only.
180 */ 180 */
181#define SWAPGS_UNSAFE_STACK swapgs 181#define SWAPGS_UNSAFE_STACK swapgs
182#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
183#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
184#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk 182#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
185#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ 183#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
186 TRACE_IRQS_ON; \ 184 TRACE_IRQS_ON; \
@@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void)
192 TRACE_IRQS_OFF; 190 TRACE_IRQS_OFF;
193 191
194#else 192#else
195#define ARCH_TRACE_IRQS_ON \
196 pushl %eax; \
197 pushl %ecx; \
198 pushl %edx; \
199 call trace_hardirqs_on; \
200 popl %edx; \
201 popl %ecx; \
202 popl %eax;
203
204#define ARCH_TRACE_IRQS_OFF \
205 pushl %eax; \
206 pushl %ecx; \
207 pushl %edx; \
208 call trace_hardirqs_off; \
209 popl %edx; \
210 popl %ecx; \
211 popl %eax;
212
213#define ARCH_LOCKDEP_SYS_EXIT \ 193#define ARCH_LOCKDEP_SYS_EXIT \
214 pushl %eax; \ 194 pushl %eax; \
215 pushl %ecx; \ 195 pushl %ecx; \
@@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void)
223#endif 203#endif
224 204
225#ifdef CONFIG_TRACE_IRQFLAGS 205#ifdef CONFIG_TRACE_IRQFLAGS
226# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON 206# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
227# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF 207# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
228#else 208#else
229# define TRACE_IRQS_ON 209# define TRACE_IRQS_ON
230# define TRACE_IRQS_OFF 210# define TRACE_IRQS_OFF
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb59ba1..a80f2d6cc737 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs);
35extern unsigned long oops_begin(void); 35extern unsigned long oops_begin(void);
36extern void oops_end(unsigned long, struct pt_regs *, int signr); 36extern void oops_end(unsigned long, struct pt_regs *, int signr);
37 37
38struct pf_handler {
39 struct hlist_node hlist;
40 int (*handler)(struct pt_regs *regs, unsigned long error_code,
41 unsigned long address);
42};
43
44extern void register_page_fault_handler(struct pf_handler *new_pfh);
45extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
46
38#endif 47#endif
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 17b3700949bf..6b66ff905af0 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -24,7 +24,8 @@ enum vsyscall_num {
24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) 24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
25#define __section_vsyscall_clock __attribute__ \ 25#define __section_vsyscall_clock __attribute__ \
26 ((unused, __section__ (".vsyscall_clock"),aligned(16))) 26 ((unused, __section__ (".vsyscall_clock"),aligned(16)))
27#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) 27#define __vsyscall_fn \
28 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
28 29
29#define VGETCPU_RDTSCP 1 30#define VGETCPU_RDTSCP 1
30#define VGETCPU_LSL 2 31#define VGETCPU_LSL 2
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
new file mode 100644
index 000000000000..922e23d0196f
--- /dev/null
+++ b/include/linux/ftrace.h
@@ -0,0 +1,132 @@
1#ifndef _LINUX_FTRACE_H
2#define _LINUX_FTRACE_H
3
4#ifdef CONFIG_FTRACE
5
6#include <linux/linkage.h>
7#include <linux/fs.h>
8
9extern int ftrace_enabled;
10extern int
11ftrace_enable_sysctl(struct ctl_table *table, int write,
12 struct file *filp, void __user *buffer, size_t *lenp,
13 loff_t *ppos);
14
15typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
16
17struct ftrace_ops {
18 ftrace_func_t func;
19 struct ftrace_ops *next;
20};
21
22/*
23 * The ftrace_ops must be a static and should also
24 * be read_mostly. These functions do modify read_mostly variables
25 * so use them sparely. Never free an ftrace_op or modify the
26 * next pointer after it has been registered. Even after unregistering
27 * it, the next pointer may still be used internally.
28 */
29int register_ftrace_function(struct ftrace_ops *ops);
30int unregister_ftrace_function(struct ftrace_ops *ops);
31void clear_ftrace_function(void);
32
33extern void ftrace_stub(unsigned long a0, unsigned long a1);
34extern void mcount(void);
35
36#else /* !CONFIG_FTRACE */
37# define register_ftrace_function(ops) do { } while (0)
38# define unregister_ftrace_function(ops) do { } while (0)
39# define clear_ftrace_function(ops) do { } while (0)
40#endif /* CONFIG_FTRACE */
41
42#ifdef CONFIG_DYNAMIC_FTRACE
43# define FTRACE_HASHBITS 10
44# define FTRACE_HASHSIZE (1<<FTRACE_HASHBITS)
45
46enum {
47 FTRACE_FL_FREE = (1 << 0),
48 FTRACE_FL_FAILED = (1 << 1),
49 FTRACE_FL_FILTER = (1 << 2),
50 FTRACE_FL_ENABLED = (1 << 3),
51};
52
53struct dyn_ftrace {
54 struct hlist_node node;
55 unsigned long ip;
56 unsigned long flags;
57};
58
59int ftrace_force_update(void);
60void ftrace_set_filter(unsigned char *buf, int len, int reset);
61
62/* defined in arch */
63extern int ftrace_ip_converted(unsigned long ip);
64extern unsigned char *ftrace_nop_replace(void);
65extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
66extern int ftrace_dyn_arch_init(void *data);
67extern int ftrace_mcount_set(unsigned long *data);
68extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
69 unsigned char *new_code);
70extern int ftrace_update_ftrace_func(ftrace_func_t func);
71extern void ftrace_caller(void);
72extern void ftrace_call(void);
73extern void mcount_call(void);
74#else
75# define ftrace_force_update() ({ 0; })
76# define ftrace_set_filter(buf, len, reset) do { } while (0)
77#endif
78
79/* totally disable ftrace - can not re-enable after this */
80void ftrace_kill(void);
81
82static inline void tracer_disable(void)
83{
84#ifdef CONFIG_FTRACE
85 ftrace_enabled = 0;
86#endif
87}
88
89#ifdef CONFIG_FRAME_POINTER
90/* TODO: need to fix this for ARM */
91# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
92# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
93# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
94# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
95# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
96# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
97# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
98#else
99# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
100# define CALLER_ADDR1 0UL
101# define CALLER_ADDR2 0UL
102# define CALLER_ADDR3 0UL
103# define CALLER_ADDR4 0UL
104# define CALLER_ADDR5 0UL
105# define CALLER_ADDR6 0UL
106#endif
107
108#ifdef CONFIG_IRQSOFF_TRACER
109 extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
110 extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
111#else
112# define time_hardirqs_on(a0, a1) do { } while (0)
113# define time_hardirqs_off(a0, a1) do { } while (0)
114#endif
115
116#ifdef CONFIG_PREEMPT_TRACER
117 extern void trace_preempt_on(unsigned long a0, unsigned long a1);
118 extern void trace_preempt_off(unsigned long a0, unsigned long a1);
119#else
120# define trace_preempt_on(a0, a1) do { } while (0)
121# define trace_preempt_off(a0, a1) do { } while (0)
122#endif
123
124#ifdef CONFIG_CONTEXT_SWITCH_TRACER
125extern void
126ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
127#else
128static inline void
129ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
130#endif
131
132#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index e600c4e9b8c5..2b1c2e58566e 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,10 +12,10 @@
12#define _LINUX_TRACE_IRQFLAGS_H 12#define _LINUX_TRACE_IRQFLAGS_H
13 13
14#ifdef CONFIG_TRACE_IRQFLAGS 14#ifdef CONFIG_TRACE_IRQFLAGS
15 extern void trace_hardirqs_on(void);
16 extern void trace_hardirqs_off(void);
17 extern void trace_softirqs_on(unsigned long ip); 15 extern void trace_softirqs_on(unsigned long ip);
18 extern void trace_softirqs_off(unsigned long ip); 16 extern void trace_softirqs_off(unsigned long ip);
17 extern void trace_hardirqs_on(void);
18 extern void trace_hardirqs_off(void);
19# define trace_hardirq_context(p) ((p)->hardirq_context) 19# define trace_hardirq_context(p) ((p)->hardirq_context)
20# define trace_softirq_context(p) ((p)->softirq_context) 20# define trace_softirq_context(p) ((p)->softirq_context)
21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) 21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
@@ -41,6 +41,15 @@
41# define INIT_TRACE_IRQFLAGS 41# define INIT_TRACE_IRQFLAGS
42#endif 42#endif
43 43
44#if defined(CONFIG_IRQSOFF_TRACER) || \
45 defined(CONFIG_PREEMPT_TRACER)
46 extern void stop_critical_timings(void);
47 extern void start_critical_timings(void);
48#else
49# define stop_critical_timings() do { } while (0)
50# define start_critical_timings() do { } while (0)
51#endif
52
44#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 53#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
45 54
46#include <asm/irqflags.h> 55#include <asm/irqflags.h>
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 2119610b24f8..14f329c64ba8 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -3,6 +3,8 @@
3 3
4#include <asm/linkage.h> 4#include <asm/linkage.h>
5 5
6#define notrace __attribute__((no_instrument_function))
7
6#ifdef __cplusplus 8#ifdef __cplusplus
7#define CPP_ASMLINKAGE extern "C" 9#define CPP_ASMLINKAGE extern "C"
8#else 10#else
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 430f6adf9762..1290653f9241 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -44,8 +44,8 @@ struct marker {
44 */ 44 */
45 char state; /* Marker state. */ 45 char state; /* Marker state. */
46 char ptype; /* probe type : 0 : single, 1 : multi */ 46 char ptype; /* probe type : 0 : single, 1 : multi */
47 void (*call)(const struct marker *mdata, /* Probe wrapper */ 47 /* Probe wrapper */
48 void *call_private, const char *fmt, ...); 48 void (*call)(const struct marker *mdata, void *call_private, ...);
49 struct marker_probe_closure single; 49 struct marker_probe_closure single;
50 struct marker_probe_closure *multi; 50 struct marker_probe_closure *multi;
51} __attribute__((aligned(8))); 51} __attribute__((aligned(8)));
@@ -58,8 +58,12 @@ struct marker {
58 * Make sure the alignment of the structure in the __markers section will 58 * Make sure the alignment of the structure in the __markers section will
59 * not add unwanted padding between the beginning of the section and the 59 * not add unwanted padding between the beginning of the section and the
60 * structure. Force alignment to the same alignment as the section start. 60 * structure. Force alignment to the same alignment as the section start.
61 *
62 * The "generic" argument controls which marker enabling mechanism must be used.
63 * If generic is true, a variable read is used.
64 * If generic is false, immediate values are used.
61 */ 65 */
62#define __trace_mark(name, call_private, format, args...) \ 66#define __trace_mark(generic, name, call_private, format, args...) \
63 do { \ 67 do { \
64 static const char __mstrtab_##name[] \ 68 static const char __mstrtab_##name[] \
65 __attribute__((section("__markers_strings"))) \ 69 __attribute__((section("__markers_strings"))) \
@@ -72,15 +76,14 @@ struct marker {
72 __mark_check_format(format, ## args); \ 76 __mark_check_format(format, ## args); \
73 if (unlikely(__mark_##name.state)) { \ 77 if (unlikely(__mark_##name.state)) { \
74 (*__mark_##name.call) \ 78 (*__mark_##name.call) \
75 (&__mark_##name, call_private, \ 79 (&__mark_##name, call_private, ## args);\
76 format, ## args); \
77 } \ 80 } \
78 } while (0) 81 } while (0)
79 82
80extern void marker_update_probe_range(struct marker *begin, 83extern void marker_update_probe_range(struct marker *begin,
81 struct marker *end); 84 struct marker *end);
82#else /* !CONFIG_MARKERS */ 85#else /* !CONFIG_MARKERS */
83#define __trace_mark(name, call_private, format, args...) \ 86#define __trace_mark(generic, name, call_private, format, args...) \
84 __mark_check_format(format, ## args) 87 __mark_check_format(format, ## args)
85static inline void marker_update_probe_range(struct marker *begin, 88static inline void marker_update_probe_range(struct marker *begin,
86 struct marker *end) 89 struct marker *end)
@@ -88,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin,
88#endif /* CONFIG_MARKERS */ 91#endif /* CONFIG_MARKERS */
89 92
90/** 93/**
91 * trace_mark - Marker 94 * trace_mark - Marker using code patching
92 * @name: marker name, not quoted. 95 * @name: marker name, not quoted.
93 * @format: format string 96 * @format: format string
94 * @args...: variable argument list 97 * @args...: variable argument list
95 * 98 *
96 * Places a marker. 99 * Places a marker using optimized code patching technique (imv_read())
100 * to be enabled when immediate values are present.
97 */ 101 */
98#define trace_mark(name, format, args...) \ 102#define trace_mark(name, format, args...) \
99 __trace_mark(name, NULL, format, ## args) 103 __trace_mark(0, name, NULL, format, ## args)
104
105/**
106 * _trace_mark - Marker using variable read
107 * @name: marker name, not quoted.
108 * @format: format string
109 * @args...: variable argument list
110 *
111 * Places a marker using a standard memory read (_imv_read()) to be
112 * enabled. Should be used for markers in code paths where instruction
113 * modification based enabling is not welcome. (__init and __exit functions,
114 * lockdep, some traps, printk).
115 */
116#define _trace_mark(name, format, args...) \
117 __trace_mark(1, name, NULL, format, ## args)
100 118
101/** 119/**
102 * MARK_NOARGS - Format string for a marker with no argument. 120 * MARK_NOARGS - Format string for a marker with no argument.
@@ -117,9 +135,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
117extern marker_probe_func __mark_empty_function; 135extern marker_probe_func __mark_empty_function;
118 136
119extern void marker_probe_cb(const struct marker *mdata, 137extern void marker_probe_cb(const struct marker *mdata,
120 void *call_private, const char *fmt, ...); 138 void *call_private, ...);
121extern void marker_probe_cb_noarg(const struct marker *mdata, 139extern void marker_probe_cb_noarg(const struct marker *mdata,
122 void *call_private, const char *fmt, ...); 140 void *call_private, ...);
123 141
124/* 142/*
125 * Connect a probe to a marker. 143 * Connect a probe to a marker.
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 23f0c54175cd..72b1a10a59b6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,7 +10,7 @@
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11#include <linux/list.h> 11#include <linux/list.h>
12 12
13#ifdef CONFIG_DEBUG_PREEMPT 13#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
14 extern void add_preempt_count(int val); 14 extern void add_preempt_count(int val);
15 extern void sub_preempt_count(int val); 15 extern void sub_preempt_count(int val);
16#else 16#else
@@ -52,6 +52,34 @@ do { \
52 preempt_check_resched(); \ 52 preempt_check_resched(); \
53} while (0) 53} while (0)
54 54
55/* For debugging and tracer internals only! */
56#define add_preempt_count_notrace(val) \
57 do { preempt_count() += (val); } while (0)
58#define sub_preempt_count_notrace(val) \
59 do { preempt_count() -= (val); } while (0)
60#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
61#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
62
63#define preempt_disable_notrace() \
64do { \
65 inc_preempt_count_notrace(); \
66 barrier(); \
67} while (0)
68
69#define preempt_enable_no_resched_notrace() \
70do { \
71 barrier(); \
72 dec_preempt_count_notrace(); \
73} while (0)
74
75/* preempt_check_resched is OK to trace */
76#define preempt_enable_notrace() \
77do { \
78 preempt_enable_no_resched_notrace(); \
79 barrier(); \
80 preempt_check_resched(); \
81} while (0)
82
55#else 83#else
56 84
57#define preempt_disable() do { } while (0) 85#define preempt_disable() do { } while (0)
@@ -59,6 +87,10 @@ do { \
59#define preempt_enable() do { } while (0) 87#define preempt_enable() do { } while (0)
60#define preempt_check_resched() do { } while (0) 88#define preempt_check_resched() do { } while (0)
61 89
90#define preempt_disable_notrace() do { } while (0)
91#define preempt_enable_no_resched_notrace() do { } while (0)
92#define preempt_enable_notrace() do { } while (0)
93
62#endif 94#endif
63 95
64#ifdef CONFIG_PREEMPT_NOTIFIERS 96#ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d3f847ca8d..aa609858aef0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
246extern void init_idle(struct task_struct *idle, int cpu); 246extern void init_idle(struct task_struct *idle, int cpu);
247extern void init_idle_bootup_task(struct task_struct *idle); 247extern void init_idle_bootup_task(struct task_struct *idle);
248 248
249extern int runqueue_is_locked(void);
250
249extern cpumask_t nohz_cpu_mask; 251extern cpumask_t nohz_cpu_mask;
250#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 252#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
251extern int select_nohz_load_balancer(int cpu); 253extern int select_nohz_load_balancer(int cpu);
@@ -2131,6 +2133,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
2131} 2133}
2132#endif 2134#endif
2133 2135
2136#ifdef CONFIG_TRACING
2137extern void
2138__trace_special(void *__tr, void *__data,
2139 unsigned long arg1, unsigned long arg2, unsigned long arg3);
2140#else
2141static inline void
2142__trace_special(void *__tr, void *__data,
2143 unsigned long arg1, unsigned long arg2, unsigned long arg3)
2144{
2145}
2146#endif
2147
2134extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); 2148extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
2135extern long sched_getaffinity(pid_t pid, cpumask_t *mask); 2149extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
2136 2150
@@ -2225,6 +2239,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
2225} 2239}
2226#endif /* CONFIG_MM_OWNER */ 2240#endif /* CONFIG_MM_OWNER */
2227 2241
2242#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
2243
2228#endif /* __KERNEL__ */ 2244#endif /* __KERNEL__ */
2229 2245
2230#endif 2246#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f462439cc288..bd91987c065f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable;
105extern int block_dump; 105extern int block_dump;
106extern int laptop_mode; 106extern int laptop_mode;
107 107
108extern unsigned long determine_dirtyable_memory(void);
109
108extern int dirty_ratio_handler(struct ctl_table *table, int write, 110extern int dirty_ratio_handler(struct ctl_table *table, int write,
109 struct file *filp, void __user *buffer, size_t *lenp, 111 struct file *filp, void __user *buffer, size_t *lenp,
110 loff_t *ppos); 112 loff_t *ppos);
diff --git a/kernel/Makefile b/kernel/Makefile
index 1c9938addb9d..ca2433e84873 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -pg -mno-spe
15
16ifdef CONFIG_FTRACE
17# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg
20CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg
24endif
25
14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 26obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15obj-$(CONFIG_STACKTRACE) += stacktrace.o 27obj-$(CONFIG_STACKTRACE) += stacktrace.o
16obj-y += time/ 28obj-y += time/
@@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
69obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 81obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
70obj-$(CONFIG_MARKERS) += marker.o 82obj-$(CONFIG_MARKERS) += marker.o
71obj-$(CONFIG_LATENCYTOP) += latencytop.o 83obj-$(CONFIG_LATENCYTOP) += latencytop.o
84obj-$(CONFIG_FTRACE) += trace/
85obj-$(CONFIG_TRACING) += trace/
72 86
73ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 87ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
74# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 88# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..d66d676dc362 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
909 909
910 rt_mutex_init_task(p); 910 rt_mutex_init_task(p);
911 911
912#ifdef CONFIG_TRACE_IRQFLAGS 912#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
915#endif 915#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81a4e4a3f087..65548eff029e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -39,6 +39,7 @@
39#include <linux/irqflags.h> 39#include <linux/irqflags.h>
40#include <linux/utsname.h> 40#include <linux/utsname.h>
41#include <linux/hash.h> 41#include <linux/hash.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/sections.h> 44#include <asm/sections.h>
44 45
@@ -81,6 +82,8 @@ static int graph_lock(void)
81 __raw_spin_unlock(&lockdep_lock); 82 __raw_spin_unlock(&lockdep_lock);
82 return 0; 83 return 0;
83 } 84 }
85 /* prevent any recursions within lockdep from causing deadlocks */
86 current->lockdep_recursion++;
84 return 1; 87 return 1;
85} 88}
86 89
@@ -89,6 +92,7 @@ static inline int graph_unlock(void)
89 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) 92 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
90 return DEBUG_LOCKS_WARN_ON(1); 93 return DEBUG_LOCKS_WARN_ON(1);
91 94
95 current->lockdep_recursion--;
92 __raw_spin_unlock(&lockdep_lock); 96 __raw_spin_unlock(&lockdep_lock);
93 return 0; 97 return 0;
94} 98}
@@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
982 return 1; 986 return 1;
983} 987}
984 988
985#ifdef CONFIG_TRACE_IRQFLAGS 989#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
986/* 990/*
987 * Forwards and backwards subgraph searching, for the purposes of 991 * Forwards and backwards subgraph searching, for the purposes of
988 * proving that two subgraphs can be connected by a new dependency 992 * proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
1680static int mark_lock(struct task_struct *curr, struct held_lock *this, 1684static int mark_lock(struct task_struct *curr, struct held_lock *this,
1681 enum lock_usage_bit new_bit); 1685 enum lock_usage_bit new_bit);
1682 1686
1683#ifdef CONFIG_TRACE_IRQFLAGS 1687#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1684 1688
1685/* 1689/*
1686 * print irq inversion bug: 1690 * print irq inversion bug:
@@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
2013/* 2017/*
2014 * Hardirqs will be enabled: 2018 * Hardirqs will be enabled:
2015 */ 2019 */
2016void trace_hardirqs_on(void) 2020void trace_hardirqs_on_caller(unsigned long a0)
2017{ 2021{
2018 struct task_struct *curr = current; 2022 struct task_struct *curr = current;
2019 unsigned long ip; 2023 unsigned long ip;
2020 2024
2025 time_hardirqs_on(CALLER_ADDR0, a0);
2026
2021 if (unlikely(!debug_locks || current->lockdep_recursion)) 2027 if (unlikely(!debug_locks || current->lockdep_recursion))
2022 return; 2028 return;
2023 2029
@@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
2055 curr->hardirq_enable_event = ++curr->irq_events; 2061 curr->hardirq_enable_event = ++curr->irq_events;
2056 debug_atomic_inc(&hardirqs_on_events); 2062 debug_atomic_inc(&hardirqs_on_events);
2057} 2063}
2064EXPORT_SYMBOL(trace_hardirqs_on_caller);
2058 2065
2066void trace_hardirqs_on(void)
2067{
2068 trace_hardirqs_on_caller(CALLER_ADDR0);
2069}
2059EXPORT_SYMBOL(trace_hardirqs_on); 2070EXPORT_SYMBOL(trace_hardirqs_on);
2060 2071
2061/* 2072/*
2062 * Hardirqs were disabled: 2073 * Hardirqs were disabled:
2063 */ 2074 */
2064void trace_hardirqs_off(void) 2075void trace_hardirqs_off_caller(unsigned long a0)
2065{ 2076{
2066 struct task_struct *curr = current; 2077 struct task_struct *curr = current;
2067 2078
2079 time_hardirqs_off(CALLER_ADDR0, a0);
2080
2068 if (unlikely(!debug_locks || current->lockdep_recursion)) 2081 if (unlikely(!debug_locks || current->lockdep_recursion))
2069 return; 2082 return;
2070 2083
@@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
2082 } else 2095 } else
2083 debug_atomic_inc(&redundant_hardirqs_off); 2096 debug_atomic_inc(&redundant_hardirqs_off);
2084} 2097}
2098EXPORT_SYMBOL(trace_hardirqs_off_caller);
2085 2099
2100void trace_hardirqs_off(void)
2101{
2102 trace_hardirqs_off_caller(CALLER_ADDR0);
2103}
2086EXPORT_SYMBOL(trace_hardirqs_off); 2104EXPORT_SYMBOL(trace_hardirqs_off);
2087 2105
2088/* 2106/*
@@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
2246 * Mark a lock with a usage bit, and validate the state transition: 2264 * Mark a lock with a usage bit, and validate the state transition:
2247 */ 2265 */
2248static int mark_lock(struct task_struct *curr, struct held_lock *this, 2266static int mark_lock(struct task_struct *curr, struct held_lock *this,
2249 enum lock_usage_bit new_bit) 2267 enum lock_usage_bit new_bit)
2250{ 2268{
2251 unsigned int new_mask = 1 << new_bit, ret = 1; 2269 unsigned int new_mask = 1 << new_bit, ret = 1;
2252 2270
@@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
2686 * and also avoid lockdep recursion: 2704 * and also avoid lockdep recursion:
2687 */ 2705 */
2688void lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2706void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2689 int trylock, int read, int check, unsigned long ip) 2707 int trylock, int read, int check, unsigned long ip)
2690{ 2708{
2691 unsigned long flags; 2709 unsigned long flags;
2692 2710
@@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2708 2726
2709EXPORT_SYMBOL_GPL(lock_acquire); 2727EXPORT_SYMBOL_GPL(lock_acquire);
2710 2728
2711void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) 2729void lock_release(struct lockdep_map *lock, int nested,
2730 unsigned long ip)
2712{ 2731{
2713 unsigned long flags; 2732 unsigned long flags;
2714 2733
diff --git a/kernel/marker.c b/kernel/marker.c
index b5a9fe1d50d5..1abfb923b761 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
55struct marker_entry { 55struct marker_entry {
56 struct hlist_node hlist; 56 struct hlist_node hlist;
57 char *format; 57 char *format;
58 void (*call)(const struct marker *mdata, /* Probe wrapper */ 58 /* Probe wrapper */
59 void *call_private, const char *fmt, ...); 59 void (*call)(const struct marker *mdata, void *call_private, ...);
60 struct marker_probe_closure single; 60 struct marker_probe_closure single;
61 struct marker_probe_closure *multi; 61 struct marker_probe_closure *multi;
62 int refcount; /* Number of times armed. 0 if disarmed. */ 62 int refcount; /* Number of times armed. 0 if disarmed. */
@@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
91 * marker_probe_cb Callback that prepares the variable argument list for probes. 91 * marker_probe_cb Callback that prepares the variable argument list for probes.
92 * @mdata: pointer of type struct marker 92 * @mdata: pointer of type struct marker
93 * @call_private: caller site private data 93 * @call_private: caller site private data
94 * @fmt: format string
95 * @...: Variable argument list. 94 * @...: Variable argument list.
96 * 95 *
97 * Since we do not use "typical" pointer based RCU in the 1 argument case, we 96 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
98 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
99 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
100 */ 99 */
101void marker_probe_cb(const struct marker *mdata, void *call_private, 100void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
102 const char *fmt, ...)
103{ 101{
104 va_list args; 102 va_list args;
105 char ptype; 103 char ptype;
@@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
120 /* Must read the ptr before private data. They are not data 118 /* Must read the ptr before private data. They are not data
121 * dependant, so we put an explicit smp_rmb() here. */ 119 * dependant, so we put an explicit smp_rmb() here. */
122 smp_rmb(); 120 smp_rmb();
123 va_start(args, fmt); 121 va_start(args, call_private);
124 func(mdata->single.probe_private, call_private, fmt, &args); 122 func(mdata->single.probe_private, call_private, mdata->format,
123 &args);
125 va_end(args); 124 va_end(args);
126 } else { 125 } else {
127 struct marker_probe_closure *multi; 126 struct marker_probe_closure *multi;
@@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
136 smp_read_barrier_depends(); 135 smp_read_barrier_depends();
137 multi = mdata->multi; 136 multi = mdata->multi;
138 for (i = 0; multi[i].func; i++) { 137 for (i = 0; multi[i].func; i++) {
139 va_start(args, fmt); 138 va_start(args, call_private);
140 multi[i].func(multi[i].probe_private, call_private, fmt, 139 multi[i].func(multi[i].probe_private, call_private,
141 &args); 140 mdata->format, &args);
142 va_end(args); 141 va_end(args);
143 } 142 }
144 } 143 }
@@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
150 * marker_probe_cb Callback that does not prepare the variable argument list. 149 * marker_probe_cb Callback that does not prepare the variable argument list.
151 * @mdata: pointer of type struct marker 150 * @mdata: pointer of type struct marker
152 * @call_private: caller site private data 151 * @call_private: caller site private data
153 * @fmt: format string
154 * @...: Variable argument list. 152 * @...: Variable argument list.
155 * 153 *
156 * Should be connected to markers "MARK_NOARGS". 154 * Should be connected to markers "MARK_NOARGS".
157 */ 155 */
158void marker_probe_cb_noarg(const struct marker *mdata, 156void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
159 void *call_private, const char *fmt, ...)
160{ 157{
161 va_list args; /* not initialized */ 158 va_list args; /* not initialized */
162 char ptype; 159 char ptype;
@@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
172 /* Must read the ptr before private data. They are not data 169 /* Must read the ptr before private data. They are not data
173 * dependant, so we put an explicit smp_rmb() here. */ 170 * dependant, so we put an explicit smp_rmb() here. */
174 smp_rmb(); 171 smp_rmb();
175 func(mdata->single.probe_private, call_private, fmt, &args); 172 func(mdata->single.probe_private, call_private, mdata->format,
173 &args);
176 } else { 174 } else {
177 struct marker_probe_closure *multi; 175 struct marker_probe_closure *multi;
178 int i; 176 int i;
@@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
186 smp_read_barrier_depends(); 184 smp_read_barrier_depends();
187 multi = mdata->multi; 185 multi = mdata->multi;
188 for (i = 0; multi[i].func; i++) 186 for (i = 0; multi[i].func; i++)
189 multi[i].func(multi[i].probe_private, call_private, fmt, 187 multi[i].func(multi[i].probe_private, call_private,
190 &args); 188 mdata->format, &args);
191 } 189 }
192 preempt_enable(); 190 preempt_enable();
193} 191}
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fb01c32aa3b..ae7d5b9e535d 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1041,7 +1041,9 @@ void release_console_sem(void)
1041 _log_end = log_end; 1041 _log_end = log_end;
1042 con_start = log_end; /* Flush */ 1042 con_start = log_end; /* Flush */
1043 spin_unlock(&logbuf_lock); 1043 spin_unlock(&logbuf_lock);
1044 stop_critical_timings(); /* don't trace print latency */
1044 call_console_drivers(_con_start, _log_end); 1045 call_console_drivers(_con_start, _log_end);
1046 start_critical_timings();
1045 local_irq_restore(flags); 1047 local_irq_restore(flags);
1046 } 1048 }
1047 console_locked = 0; 1049 console_locked = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 3aaa5c8cb421..2a7ad35ea79b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,6 +70,7 @@
70#include <linux/bootmem.h> 70#include <linux/bootmem.h>
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h>
73 74
74#include <asm/tlb.h> 75#include <asm/tlb.h>
75#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
@@ -607,6 +608,24 @@ static inline void update_rq_clock(struct rq *rq)
607# define const_debug static const 608# define const_debug static const
608#endif 609#endif
609 610
611/**
612 * runqueue_is_locked
613 *
614 * Returns true if the current cpu runqueue is locked.
615 * This interface allows printk to be called with the runqueue lock
616 * held and know whether or not it is OK to wake up the klogd.
617 */
618int runqueue_is_locked(void)
619{
620 int cpu = get_cpu();
621 struct rq *rq = cpu_rq(cpu);
622 int ret;
623
624 ret = spin_is_locked(&rq->lock);
625 put_cpu();
626 return ret;
627}
628
610/* 629/*
611 * Debugging: various feature bits 630 * Debugging: various feature bits
612 */ 631 */
@@ -2149,6 +2168,9 @@ out_activate:
2149 success = 1; 2168 success = 1;
2150 2169
2151out_running: 2170out_running:
2171 trace_mark(kernel_sched_wakeup,
2172 "pid %d state %ld ## rq %p task %p rq->curr %p",
2173 p->pid, p->state, rq, p, rq->curr);
2152 check_preempt_curr(rq, p); 2174 check_preempt_curr(rq, p);
2153 2175
2154 p->state = TASK_RUNNING; 2176 p->state = TASK_RUNNING;
@@ -2279,6 +2301,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2279 p->sched_class->task_new(rq, p); 2301 p->sched_class->task_new(rq, p);
2280 inc_nr_running(p, rq); 2302 inc_nr_running(p, rq);
2281 } 2303 }
2304 trace_mark(kernel_sched_wakeup_new,
2305 "pid %d state %ld ## rq %p task %p rq->curr %p",
2306 p->pid, p->state, rq, p, rq->curr);
2282 check_preempt_curr(rq, p); 2307 check_preempt_curr(rq, p);
2283#ifdef CONFIG_SMP 2308#ifdef CONFIG_SMP
2284 if (p->sched_class->task_wake_up) 2309 if (p->sched_class->task_wake_up)
@@ -2451,6 +2476,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
2451 struct mm_struct *mm, *oldmm; 2476 struct mm_struct *mm, *oldmm;
2452 2477
2453 prepare_task_switch(rq, prev, next); 2478 prepare_task_switch(rq, prev, next);
2479 trace_mark(kernel_sched_schedule,
2480 "prev_pid %d next_pid %d prev_state %ld "
2481 "## rq %p prev %p next %p",
2482 prev->pid, next->pid, prev->state,
2483 rq, prev, next);
2454 mm = next->mm; 2484 mm = next->mm;
2455 oldmm = prev->active_mm; 2485 oldmm = prev->active_mm;
2456 /* 2486 /*
@@ -4021,26 +4051,44 @@ void scheduler_tick(void)
4021#endif 4051#endif
4022} 4052}
4023 4053
4024#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 4054#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4055 defined(CONFIG_PREEMPT_TRACER))
4056
4057static inline unsigned long get_parent_ip(unsigned long addr)
4058{
4059 if (in_lock_functions(addr)) {
4060 addr = CALLER_ADDR2;
4061 if (in_lock_functions(addr))
4062 addr = CALLER_ADDR3;
4063 }
4064 return addr;
4065}
4025 4066
4026void __kprobes add_preempt_count(int val) 4067void __kprobes add_preempt_count(int val)
4027{ 4068{
4069#ifdef CONFIG_DEBUG_PREEMPT
4028 /* 4070 /*
4029 * Underflow? 4071 * Underflow?
4030 */ 4072 */
4031 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 4073 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
4032 return; 4074 return;
4075#endif
4033 preempt_count() += val; 4076 preempt_count() += val;
4077#ifdef CONFIG_DEBUG_PREEMPT
4034 /* 4078 /*
4035 * Spinlock count overflowing soon? 4079 * Spinlock count overflowing soon?
4036 */ 4080 */
4037 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 4081 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
4038 PREEMPT_MASK - 10); 4082 PREEMPT_MASK - 10);
4083#endif
4084 if (preempt_count() == val)
4085 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4039} 4086}
4040EXPORT_SYMBOL(add_preempt_count); 4087EXPORT_SYMBOL(add_preempt_count);
4041 4088
4042void __kprobes sub_preempt_count(int val) 4089void __kprobes sub_preempt_count(int val)
4043{ 4090{
4091#ifdef CONFIG_DEBUG_PREEMPT
4044 /* 4092 /*
4045 * Underflow? 4093 * Underflow?
4046 */ 4094 */
@@ -4052,7 +4100,10 @@ void __kprobes sub_preempt_count(int val)
4052 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && 4100 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
4053 !(preempt_count() & PREEMPT_MASK))) 4101 !(preempt_count() & PREEMPT_MASK)))
4054 return; 4102 return;
4103#endif
4055 4104
4105 if (preempt_count() == val)
4106 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4056 preempt_count() -= val; 4107 preempt_count() -= val;
4057} 4108}
4058EXPORT_SYMBOL(sub_preempt_count); 4109EXPORT_SYMBOL(sub_preempt_count);
@@ -5384,7 +5435,7 @@ out_unlock:
5384 return retval; 5435 return retval;
5385} 5436}
5386 5437
5387static const char stat_nam[] = "RSDTtZX"; 5438static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5388 5439
5389void sched_show_task(struct task_struct *p) 5440void sched_show_task(struct task_struct *p)
5390{ 5441{
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 5c2942e768cd..1a064adab658 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -31,6 +31,7 @@
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/semaphore.h> 32#include <linux/semaphore.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/ftrace.h>
34 35
35static noinline void __down(struct semaphore *sem); 36static noinline void __down(struct semaphore *sem);
36static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
@@ -53,6 +54,7 @@ void down(struct semaphore *sem)
53{ 54{
54 unsigned long flags; 55 unsigned long flags;
55 56
57 ftrace_special(sem->count, 0, __LINE__);
56 spin_lock_irqsave(&sem->lock, flags); 58 spin_lock_irqsave(&sem->lock, flags);
57 if (likely(sem->count > 0)) 59 if (likely(sem->count > 0))
58 sem->count--; 60 sem->count--;
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index ae28c8245123..a1fb54c93cdd 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
436} 436}
437EXPORT_SYMBOL(_spin_trylock_bh); 437EXPORT_SYMBOL(_spin_trylock_bh);
438 438
439int in_lock_functions(unsigned long addr) 439notrace int in_lock_functions(unsigned long addr)
440{ 440{
441 /* Linker adds these: start and end of __lockfunc functions */ 441 /* Linker adds these: start and end of __lockfunc functions */
442 extern char __lock_text_start[], __lock_text_end[]; 442 extern char __lock_text_start[], __lock_text_end[];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..efaf7c5500e9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -46,6 +46,7 @@
46#include <linux/nfs_fs.h> 46#include <linux/nfs_fs.h>
47#include <linux/acpi.h> 47#include <linux/acpi.h>
48#include <linux/reboot.h> 48#include <linux/reboot.h>
49#include <linux/ftrace.h>
49 50
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
51#include <asm/processor.h> 52#include <asm/processor.h>
@@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = {
455 .mode = 0644, 456 .mode = 0644,
456 .proc_handler = &proc_dointvec, 457 .proc_handler = &proc_dointvec,
457 }, 458 },
459#ifdef CONFIG_FTRACE
460 {
461 .ctl_name = CTL_UNNUMBERED,
462 .procname = "ftrace_enabled",
463 .data = &ftrace_enabled,
464 .maxlen = sizeof(int),
465 .mode = 0644,
466 .proc_handler = &ftrace_enable_sysctl,
467 },
468#endif
458#ifdef CONFIG_KMOD 469#ifdef CONFIG_KMOD
459 { 470 {
460 .ctl_name = KERN_MODPROBE, 471 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 000000000000..263e9e6bbd60
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,135 @@
1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3#
4config HAVE_FTRACE
5 bool
6
7config HAVE_DYNAMIC_FTRACE
8 bool
9
10config TRACER_MAX_TRACE
11 bool
12
13config TRACING
14 bool
15 select DEBUG_FS
16 select STACKTRACE
17
18config FTRACE
19 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE
21 select FRAME_POINTER
22 select TRACING
23 select CONTEXT_SWITCH_TRACER
24 help
25 Enable the kernel to trace every kernel function. This is done
26 by using a compiler feature to insert a small, 5-byte No-Operation
27 instruction to the beginning of every kernel function, which NOP
28 sequence is then dynamically patched into a tracer call when
29 tracing is enabled by the administrator. If it's runtime disabled
30 (the bootup default), then the overhead of the instructions is very
31 small and not measurable even in micro-benchmarks.
32
33config IRQSOFF_TRACER
34 bool "Interrupts-off Latency Tracer"
35 default n
36 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME
38 depends on HAVE_FTRACE
39 select TRACE_IRQFLAGS
40 select TRACING
41 select TRACER_MAX_TRACE
42 help
43 This option measures the time spent in irqs-off critical
44 sections, with microsecond accuracy.
45
46 The default measurement method is a maximum search, which is
47 disabled by default and can be runtime (re-)started
48 via:
49
50 echo 0 > /debugfs/tracing/tracing_max_latency
51
52 (Note that kernel size and overhead increases with this option
53 enabled. This option and the preempt-off timing option can be
54 used together or separately.)
55
56config PREEMPT_TRACER
57 bool "Preemption-off Latency Tracer"
58 default n
59 depends on GENERIC_TIME
60 depends on PREEMPT
61 depends on HAVE_FTRACE
62 select TRACING
63 select TRACER_MAX_TRACE
64 help
65 This option measures the time spent in preemption off critical
66 sections, with microsecond accuracy.
67
68 The default measurement method is a maximum search, which is
69 disabled by default and can be runtime (re-)started
70 via:
71
72 echo 0 > /debugfs/tracing/tracing_max_latency
73
74 (Note that kernel size and overhead increases with this option
75 enabled. This option and the irqs-off timing option can be
76 used together or separately.)
77
78config SYSPROF_TRACER
79 bool "Sysprof Tracer"
80 depends on X86
81 select TRACING
82 help
83 This tracer provides the trace needed by the 'Sysprof' userspace
84 tool.
85
86config SCHED_TRACER
87 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE
89 select TRACING
90 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE
92 help
93 This tracer tracks the latency of the highest priority task
94 to be scheduled in, starting from the point it has woken up.
95
96config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches"
98 depends on HAVE_FTRACE
99 select TRACING
100 select MARKERS
101 help
102 This tracer gets called from the context switch and records
103 all switching of tasks.
104
105config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE
108 depends on HAVE_DYNAMIC_FTRACE
109 default y
110 help
111 This option will modify all the calls to ftrace dynamically
112 (will patch them out of the binary image and replaces them
113 with a No-Op instruction) as they are called. A table is
114 created to dynamically enable them again.
115
116 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
117 has native performance as long as no tracing is active.
118
119 The changes to the code are done by a kernel thread that
120 wakes up once a second and checks to see if any ftrace calls
121 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace.
123
124config FTRACE_SELFTEST
125 bool
126
127config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace"
129 depends on TRACING
130 select FTRACE_SELFTEST
131 help
132 This option performs a series of startup tests on ftrace. On bootup
133 a series of tests are made to verify that the tracer is
134 functioning properly. It will do tests on all the configured
135 tracers of ftrace.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 000000000000..7aec123ec1d8
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,23 @@
1
2# Do not instrument the tracer itself:
3
4ifdef CONFIG_FTRACE
5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7
8# selftest needs instrumentation
9CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o
11endif
12
13obj-$(CONFIG_FTRACE) += libftrace.o
14
15obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
18obj-$(CONFIG_FTRACE) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
22
23libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..89bd9a6f52ec
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1398 @@
1/*
2 * Infrastructure for profiling code inserted by 'gcc -pg'.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally ported from the -rt patch by:
8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code in the latency_tracer, that is:
11 *
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 William Lee Irwin III
14 */
15
16#include <linux/stop_machine.h>
17#include <linux/clocksource.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/debugfs.h>
21#include <linux/hardirq.h>
22#include <linux/kthread.h>
23#include <linux/uaccess.h>
24#include <linux/ftrace.h>
25#include <linux/sysctl.h>
26#include <linux/ctype.h>
27#include <linux/hash.h>
28#include <linux/list.h>
29
30#include "trace.h"
31
32/* ftrace_enabled is a method to turn ftrace on or off */
33int ftrace_enabled __read_mostly;
34static int last_ftrace_enabled;
35
36/*
37 * ftrace_disabled is set when an anomaly is discovered.
38 * ftrace_disabled is much stronger than ftrace_enabled.
39 */
40static int ftrace_disabled __read_mostly;
41
42static DEFINE_SPINLOCK(ftrace_lock);
43static DEFINE_MUTEX(ftrace_sysctl_lock);
44
45static struct ftrace_ops ftrace_list_end __read_mostly =
46{
47 .func = ftrace_stub,
48};
49
50static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
51ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
52
53void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
54{
55 struct ftrace_ops *op = ftrace_list;
56
57 /* in case someone actually ports this to alpha! */
58 read_barrier_depends();
59
60 while (op != &ftrace_list_end) {
61 /* silly alpha */
62 read_barrier_depends();
63 op->func(ip, parent_ip);
64 op = op->next;
65 };
66}
67
68/**
69 * clear_ftrace_function - reset the ftrace function
70 *
71 * This NULLs the ftrace function and in essence stops
72 * tracing. There may be lag
73 */
74void clear_ftrace_function(void)
75{
76 ftrace_trace_function = ftrace_stub;
77}
78
79static int __register_ftrace_function(struct ftrace_ops *ops)
80{
81 /* Should never be called by interrupts */
82 spin_lock(&ftrace_lock);
83
84 ops->next = ftrace_list;
85 /*
86 * We are entering ops into the ftrace_list but another
87 * CPU might be walking that list. We need to make sure
88 * the ops->next pointer is valid before another CPU sees
89 * the ops pointer included into the ftrace_list.
90 */
91 smp_wmb();
92 ftrace_list = ops;
93
94 if (ftrace_enabled) {
95 /*
96 * For one func, simply call it directly.
97 * For more than one func, call the chain.
98 */
99 if (ops->next == &ftrace_list_end)
100 ftrace_trace_function = ops->func;
101 else
102 ftrace_trace_function = ftrace_list_func;
103 }
104
105 spin_unlock(&ftrace_lock);
106
107 return 0;
108}
109
110static int __unregister_ftrace_function(struct ftrace_ops *ops)
111{
112 struct ftrace_ops **p;
113 int ret = 0;
114
115 spin_lock(&ftrace_lock);
116
117 /*
118 * If we are removing the last function, then simply point
119 * to the ftrace_stub.
120 */
121 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
122 ftrace_trace_function = ftrace_stub;
123 ftrace_list = &ftrace_list_end;
124 goto out;
125 }
126
127 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
128 if (*p == ops)
129 break;
130
131 if (*p != ops) {
132 ret = -1;
133 goto out;
134 }
135
136 *p = (*p)->next;
137
138 if (ftrace_enabled) {
139 /* If we only have one func left, then call that directly */
140 if (ftrace_list == &ftrace_list_end ||
141 ftrace_list->next == &ftrace_list_end)
142 ftrace_trace_function = ftrace_list->func;
143 }
144
145 out:
146 spin_unlock(&ftrace_lock);
147
148 return ret;
149}
150
151#ifdef CONFIG_DYNAMIC_FTRACE
152
153static struct task_struct *ftraced_task;
154static DECLARE_WAIT_QUEUE_HEAD(ftraced_waiters);
155static unsigned long ftraced_iteration_counter;
156
157enum {
158 FTRACE_ENABLE_CALLS = (1 << 0),
159 FTRACE_DISABLE_CALLS = (1 << 1),
160 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
161 FTRACE_ENABLE_MCOUNT = (1 << 3),
162 FTRACE_DISABLE_MCOUNT = (1 << 4),
163};
164
165static int ftrace_filtered;
166
167static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
168
169static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
170
171static DEFINE_SPINLOCK(ftrace_shutdown_lock);
172static DEFINE_MUTEX(ftraced_lock);
173static DEFINE_MUTEX(ftrace_filter_lock);
174
175struct ftrace_page {
176 struct ftrace_page *next;
177 unsigned long index;
178 struct dyn_ftrace records[];
179};
180
181#define ENTRIES_PER_PAGE \
182 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
183
184/* estimate from running different kernels */
185#define NR_TO_INIT 10000
186
187static struct ftrace_page *ftrace_pages_start;
188static struct ftrace_page *ftrace_pages;
189
190static int ftraced_trigger;
191static int ftraced_suspend;
192
193static int ftrace_record_suspend;
194
195static struct dyn_ftrace *ftrace_free_records;
196
197static inline int
198ftrace_ip_in_hash(unsigned long ip, unsigned long key)
199{
200 struct dyn_ftrace *p;
201 struct hlist_node *t;
202 int found = 0;
203
204 hlist_for_each_entry(p, t, &ftrace_hash[key], node) {
205 if (p->ip == ip) {
206 found = 1;
207 break;
208 }
209 }
210
211 return found;
212}
213
214static inline void
215ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
216{
217 hlist_add_head(&node->node, &ftrace_hash[key]);
218}
219
220static void ftrace_free_rec(struct dyn_ftrace *rec)
221{
222 /* no locking, only called from kstop_machine */
223
224 rec->ip = (unsigned long)ftrace_free_records;
225 ftrace_free_records = rec;
226 rec->flags |= FTRACE_FL_FREE;
227}
228
229static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
230{
231 struct dyn_ftrace *rec;
232
233 /* First check for freed records */
234 if (ftrace_free_records) {
235 rec = ftrace_free_records;
236
237 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
238 WARN_ON_ONCE(1);
239 ftrace_free_records = NULL;
240 ftrace_disabled = 1;
241 ftrace_enabled = 0;
242 return NULL;
243 }
244
245 ftrace_free_records = (void *)rec->ip;
246 memset(rec, 0, sizeof(*rec));
247 return rec;
248 }
249
250 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
251 if (!ftrace_pages->next)
252 return NULL;
253 ftrace_pages = ftrace_pages->next;
254 }
255
256 return &ftrace_pages->records[ftrace_pages->index++];
257}
258
259static void
260ftrace_record_ip(unsigned long ip)
261{
262 struct dyn_ftrace *node;
263 unsigned long flags;
264 unsigned long key;
265 int resched;
266 int atomic;
267 int cpu;
268
269 if (!ftrace_enabled || ftrace_disabled)
270 return;
271
272 resched = need_resched();
273 preempt_disable_notrace();
274
275 /*
276 * We simply need to protect against recursion.
277 * Use the the raw version of smp_processor_id and not
278 * __get_cpu_var which can call debug hooks that can
279 * cause a recursive crash here.
280 */
281 cpu = raw_smp_processor_id();
282 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
283 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
284 goto out;
285
286 if (unlikely(ftrace_record_suspend))
287 goto out;
288
289 key = hash_long(ip, FTRACE_HASHBITS);
290
291 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
292
293 if (ftrace_ip_in_hash(ip, key))
294 goto out;
295
296 atomic = irqs_disabled();
297
298 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
299
300 /* This ip may have hit the hash before the lock */
301 if (ftrace_ip_in_hash(ip, key))
302 goto out_unlock;
303
304 /*
305 * There's a slight race that the ftraced will update the
306 * hash and reset here. If it is already converted, skip it.
307 */
308 if (ftrace_ip_converted(ip))
309 goto out_unlock;
310
311 node = ftrace_alloc_dyn_node(ip);
312 if (!node)
313 goto out_unlock;
314
315 node->ip = ip;
316
317 ftrace_add_hash(node, key);
318
319 ftraced_trigger = 1;
320
321 out_unlock:
322 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
323 out:
324 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
325
326 /* prevent recursion with scheduler */
327 if (resched)
328 preempt_enable_no_resched_notrace();
329 else
330 preempt_enable_notrace();
331}
332
333#define FTRACE_ADDR ((long)(ftrace_caller))
334#define MCOUNT_ADDR ((long)(mcount))
335
336static void
337__ftrace_replace_code(struct dyn_ftrace *rec,
338 unsigned char *old, unsigned char *new, int enable)
339{
340 unsigned long ip;
341 int failed;
342
343 ip = rec->ip;
344
345 if (ftrace_filtered && enable) {
346 unsigned long fl;
347 /*
348 * If filtering is on:
349 *
350 * If this record is set to be filtered and
351 * is enabled then do nothing.
352 *
353 * If this record is set to be filtered and
354 * it is not enabled, enable it.
355 *
356 * If this record is not set to be filtered
357 * and it is not enabled do nothing.
358 *
359 * If this record is not set to be filtered and
360 * it is enabled, disable it.
361 */
362 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
363
364 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
365 (fl == 0))
366 return;
367
368 /*
369 * If it is enabled disable it,
370 * otherwise enable it!
371 */
372 if (fl == FTRACE_FL_ENABLED) {
373 /* swap new and old */
374 new = old;
375 old = ftrace_call_replace(ip, FTRACE_ADDR);
376 rec->flags &= ~FTRACE_FL_ENABLED;
377 } else {
378 new = ftrace_call_replace(ip, FTRACE_ADDR);
379 rec->flags |= FTRACE_FL_ENABLED;
380 }
381 } else {
382
383 if (enable)
384 new = ftrace_call_replace(ip, FTRACE_ADDR);
385 else
386 old = ftrace_call_replace(ip, FTRACE_ADDR);
387
388 if (enable) {
389 if (rec->flags & FTRACE_FL_ENABLED)
390 return;
391 rec->flags |= FTRACE_FL_ENABLED;
392 } else {
393 if (!(rec->flags & FTRACE_FL_ENABLED))
394 return;
395 rec->flags &= ~FTRACE_FL_ENABLED;
396 }
397 }
398
399 failed = ftrace_modify_code(ip, old, new);
400 if (failed) {
401 unsigned long key;
402 /* It is possible that the function hasn't been converted yet */
403 key = hash_long(ip, FTRACE_HASHBITS);
404 if (!ftrace_ip_in_hash(ip, key)) {
405 rec->flags |= FTRACE_FL_FAILED;
406 ftrace_free_rec(rec);
407 }
408
409 }
410}
411
412static void ftrace_replace_code(int enable)
413{
414 unsigned char *new = NULL, *old = NULL;
415 struct dyn_ftrace *rec;
416 struct ftrace_page *pg;
417 int i;
418
419 if (enable)
420 old = ftrace_nop_replace();
421 else
422 new = ftrace_nop_replace();
423
424 for (pg = ftrace_pages_start; pg; pg = pg->next) {
425 for (i = 0; i < pg->index; i++) {
426 rec = &pg->records[i];
427
428 /* don't modify code that has already faulted */
429 if (rec->flags & FTRACE_FL_FAILED)
430 continue;
431
432 __ftrace_replace_code(rec, old, new, enable);
433 }
434 }
435}
436
437static void ftrace_shutdown_replenish(void)
438{
439 if (ftrace_pages->next)
440 return;
441
442 /* allocate another page */
443 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
444}
445
446static void
447ftrace_code_disable(struct dyn_ftrace *rec)
448{
449 unsigned long ip;
450 unsigned char *nop, *call;
451 int failed;
452
453 ip = rec->ip;
454
455 nop = ftrace_nop_replace();
456 call = ftrace_call_replace(ip, MCOUNT_ADDR);
457
458 failed = ftrace_modify_code(ip, call, nop);
459 if (failed) {
460 rec->flags |= FTRACE_FL_FAILED;
461 ftrace_free_rec(rec);
462 }
463}
464
465static int __ftrace_modify_code(void *data)
466{
467 unsigned long addr;
468 int *command = data;
469
470 if (*command & FTRACE_ENABLE_CALLS)
471 ftrace_replace_code(1);
472 else if (*command & FTRACE_DISABLE_CALLS)
473 ftrace_replace_code(0);
474
475 if (*command & FTRACE_UPDATE_TRACE_FUNC)
476 ftrace_update_ftrace_func(ftrace_trace_function);
477
478 if (*command & FTRACE_ENABLE_MCOUNT) {
479 addr = (unsigned long)ftrace_record_ip;
480 ftrace_mcount_set(&addr);
481 } else if (*command & FTRACE_DISABLE_MCOUNT) {
482 addr = (unsigned long)ftrace_stub;
483 ftrace_mcount_set(&addr);
484 }
485
486 return 0;
487}
488
489static void ftrace_run_update_code(int command)
490{
491 stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
492}
493
494static ftrace_func_t saved_ftrace_func;
495
496static void ftrace_startup(void)
497{
498 int command = 0;
499
500 if (unlikely(ftrace_disabled))
501 return;
502
503 mutex_lock(&ftraced_lock);
504 ftraced_suspend++;
505 if (ftraced_suspend == 1)
506 command |= FTRACE_ENABLE_CALLS;
507
508 if (saved_ftrace_func != ftrace_trace_function) {
509 saved_ftrace_func = ftrace_trace_function;
510 command |= FTRACE_UPDATE_TRACE_FUNC;
511 }
512
513 if (!command || !ftrace_enabled)
514 goto out;
515
516 ftrace_run_update_code(command);
517 out:
518 mutex_unlock(&ftraced_lock);
519}
520
521static void ftrace_shutdown(void)
522{
523 int command = 0;
524
525 if (unlikely(ftrace_disabled))
526 return;
527
528 mutex_lock(&ftraced_lock);
529 ftraced_suspend--;
530 if (!ftraced_suspend)
531 command |= FTRACE_DISABLE_CALLS;
532
533 if (saved_ftrace_func != ftrace_trace_function) {
534 saved_ftrace_func = ftrace_trace_function;
535 command |= FTRACE_UPDATE_TRACE_FUNC;
536 }
537
538 if (!command || !ftrace_enabled)
539 goto out;
540
541 ftrace_run_update_code(command);
542 out:
543 mutex_unlock(&ftraced_lock);
544}
545
546static void ftrace_startup_sysctl(void)
547{
548 int command = FTRACE_ENABLE_MCOUNT;
549
550 if (unlikely(ftrace_disabled))
551 return;
552
553 mutex_lock(&ftraced_lock);
554 /* Force update next time */
555 saved_ftrace_func = NULL;
556 /* ftraced_suspend is true if we want ftrace running */
557 if (ftraced_suspend)
558 command |= FTRACE_ENABLE_CALLS;
559
560 ftrace_run_update_code(command);
561 mutex_unlock(&ftraced_lock);
562}
563
564static void ftrace_shutdown_sysctl(void)
565{
566 int command = FTRACE_DISABLE_MCOUNT;
567
568 if (unlikely(ftrace_disabled))
569 return;
570
571 mutex_lock(&ftraced_lock);
572 /* ftraced_suspend is true if ftrace is running */
573 if (ftraced_suspend)
574 command |= FTRACE_DISABLE_CALLS;
575
576 ftrace_run_update_code(command);
577 mutex_unlock(&ftraced_lock);
578}
579
580static cycle_t ftrace_update_time;
581static unsigned long ftrace_update_cnt;
582unsigned long ftrace_update_tot_cnt;
583
584static int __ftrace_update_code(void *ignore)
585{
586 struct dyn_ftrace *p;
587 struct hlist_head head;
588 struct hlist_node *t;
589 int save_ftrace_enabled;
590 cycle_t start, stop;
591 int i;
592
593 /* Don't be recording funcs now */
594 save_ftrace_enabled = ftrace_enabled;
595 ftrace_enabled = 0;
596
597 start = ftrace_now(raw_smp_processor_id());
598 ftrace_update_cnt = 0;
599
600 /* No locks needed, the machine is stopped! */
601 for (i = 0; i < FTRACE_HASHSIZE; i++) {
602 if (hlist_empty(&ftrace_hash[i]))
603 continue;
604
605 head = ftrace_hash[i];
606 INIT_HLIST_HEAD(&ftrace_hash[i]);
607
608 /* all CPUS are stopped, we are safe to modify code */
609 hlist_for_each_entry(p, t, &head, node) {
610 ftrace_code_disable(p);
611 ftrace_update_cnt++;
612 }
613
614 }
615
616 stop = ftrace_now(raw_smp_processor_id());
617 ftrace_update_time = stop - start;
618 ftrace_update_tot_cnt += ftrace_update_cnt;
619
620 ftrace_enabled = save_ftrace_enabled;
621
622 return 0;
623}
624
625static void ftrace_update_code(void)
626{
627 if (unlikely(ftrace_disabled))
628 return;
629
630 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
631}
632
633static int ftraced(void *ignore)
634{
635 unsigned long usecs;
636
637 while (!kthread_should_stop()) {
638
639 set_current_state(TASK_INTERRUPTIBLE);
640
641 /* check once a second */
642 schedule_timeout(HZ);
643
644 if (unlikely(ftrace_disabled))
645 continue;
646
647 mutex_lock(&ftrace_sysctl_lock);
648 mutex_lock(&ftraced_lock);
649 if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) {
650 ftrace_record_suspend++;
651 ftrace_update_code();
652 usecs = nsecs_to_usecs(ftrace_update_time);
653 if (ftrace_update_tot_cnt > 100000) {
654 ftrace_update_tot_cnt = 0;
655 pr_info("hm, dftrace overflow: %lu change%s"
656 " (%lu total) in %lu usec%s\n",
657 ftrace_update_cnt,
658 ftrace_update_cnt != 1 ? "s" : "",
659 ftrace_update_tot_cnt,
660 usecs, usecs != 1 ? "s" : "");
661 ftrace_disabled = 1;
662 WARN_ON_ONCE(1);
663 }
664 ftraced_trigger = 0;
665 ftrace_record_suspend--;
666 }
667 ftraced_iteration_counter++;
668 mutex_unlock(&ftraced_lock);
669 mutex_unlock(&ftrace_sysctl_lock);
670
671 wake_up_interruptible(&ftraced_waiters);
672
673 ftrace_shutdown_replenish();
674 }
675 __set_current_state(TASK_RUNNING);
676 return 0;
677}
678
679static int __init ftrace_dyn_table_alloc(void)
680{
681 struct ftrace_page *pg;
682 int cnt;
683 int i;
684
685 /* allocate a few pages */
686 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
687 if (!ftrace_pages_start)
688 return -1;
689
690 /*
691 * Allocate a few more pages.
692 *
693 * TODO: have some parser search vmlinux before
694 * final linking to find all calls to ftrace.
695 * Then we can:
696 * a) know how many pages to allocate.
697 * and/or
698 * b) set up the table then.
699 *
700 * The dynamic code is still necessary for
701 * modules.
702 */
703
704 pg = ftrace_pages = ftrace_pages_start;
705
706 cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
707
708 for (i = 0; i < cnt; i++) {
709 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
710
711 /* If we fail, we'll try later anyway */
712 if (!pg->next)
713 break;
714
715 pg = pg->next;
716 }
717
718 return 0;
719}
720
721enum {
722 FTRACE_ITER_FILTER = (1 << 0),
723 FTRACE_ITER_CONT = (1 << 1),
724};
725
726#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
727
728struct ftrace_iterator {
729 loff_t pos;
730 struct ftrace_page *pg;
731 unsigned idx;
732 unsigned flags;
733 unsigned char buffer[FTRACE_BUFF_MAX+1];
734 unsigned buffer_idx;
735 unsigned filtered;
736};
737
738static void *
739t_next(struct seq_file *m, void *v, loff_t *pos)
740{
741 struct ftrace_iterator *iter = m->private;
742 struct dyn_ftrace *rec = NULL;
743
744 (*pos)++;
745
746 retry:
747 if (iter->idx >= iter->pg->index) {
748 if (iter->pg->next) {
749 iter->pg = iter->pg->next;
750 iter->idx = 0;
751 goto retry;
752 }
753 } else {
754 rec = &iter->pg->records[iter->idx++];
755 if ((rec->flags & FTRACE_FL_FAILED) ||
756 ((iter->flags & FTRACE_ITER_FILTER) &&
757 !(rec->flags & FTRACE_FL_FILTER))) {
758 rec = NULL;
759 goto retry;
760 }
761 }
762
763 iter->pos = *pos;
764
765 return rec;
766}
767
768static void *t_start(struct seq_file *m, loff_t *pos)
769{
770 struct ftrace_iterator *iter = m->private;
771 void *p = NULL;
772 loff_t l = -1;
773
774 if (*pos != iter->pos) {
775 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
776 ;
777 } else {
778 l = *pos;
779 p = t_next(m, p, &l);
780 }
781
782 return p;
783}
784
785static void t_stop(struct seq_file *m, void *p)
786{
787}
788
789static int t_show(struct seq_file *m, void *v)
790{
791 struct dyn_ftrace *rec = v;
792 char str[KSYM_SYMBOL_LEN];
793
794 if (!rec)
795 return 0;
796
797 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
798
799 seq_printf(m, "%s\n", str);
800
801 return 0;
802}
803
804static struct seq_operations show_ftrace_seq_ops = {
805 .start = t_start,
806 .next = t_next,
807 .stop = t_stop,
808 .show = t_show,
809};
810
811static int
812ftrace_avail_open(struct inode *inode, struct file *file)
813{
814 struct ftrace_iterator *iter;
815 int ret;
816
817 if (unlikely(ftrace_disabled))
818 return -ENODEV;
819
820 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
821 if (!iter)
822 return -ENOMEM;
823
824 iter->pg = ftrace_pages_start;
825 iter->pos = -1;
826
827 ret = seq_open(file, &show_ftrace_seq_ops);
828 if (!ret) {
829 struct seq_file *m = file->private_data;
830
831 m->private = iter;
832 } else {
833 kfree(iter);
834 }
835
836 return ret;
837}
838
839int ftrace_avail_release(struct inode *inode, struct file *file)
840{
841 struct seq_file *m = (struct seq_file *)file->private_data;
842 struct ftrace_iterator *iter = m->private;
843
844 seq_release(inode, file);
845 kfree(iter);
846
847 return 0;
848}
849
850static void ftrace_filter_reset(void)
851{
852 struct ftrace_page *pg;
853 struct dyn_ftrace *rec;
854 unsigned i;
855
856 /* keep kstop machine from running */
857 preempt_disable();
858 ftrace_filtered = 0;
859 pg = ftrace_pages_start;
860 while (pg) {
861 for (i = 0; i < pg->index; i++) {
862 rec = &pg->records[i];
863 if (rec->flags & FTRACE_FL_FAILED)
864 continue;
865 rec->flags &= ~FTRACE_FL_FILTER;
866 }
867 pg = pg->next;
868 }
869 preempt_enable();
870}
871
872static int
873ftrace_filter_open(struct inode *inode, struct file *file)
874{
875 struct ftrace_iterator *iter;
876 int ret = 0;
877
878 if (unlikely(ftrace_disabled))
879 return -ENODEV;
880
881 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
882 if (!iter)
883 return -ENOMEM;
884
885 mutex_lock(&ftrace_filter_lock);
886 if ((file->f_mode & FMODE_WRITE) &&
887 !(file->f_flags & O_APPEND))
888 ftrace_filter_reset();
889
890 if (file->f_mode & FMODE_READ) {
891 iter->pg = ftrace_pages_start;
892 iter->pos = -1;
893 iter->flags = FTRACE_ITER_FILTER;
894
895 ret = seq_open(file, &show_ftrace_seq_ops);
896 if (!ret) {
897 struct seq_file *m = file->private_data;
898 m->private = iter;
899 } else
900 kfree(iter);
901 } else
902 file->private_data = iter;
903 mutex_unlock(&ftrace_filter_lock);
904
905 return ret;
906}
907
908static ssize_t
909ftrace_filter_read(struct file *file, char __user *ubuf,
910 size_t cnt, loff_t *ppos)
911{
912 if (file->f_mode & FMODE_READ)
913 return seq_read(file, ubuf, cnt, ppos);
914 else
915 return -EPERM;
916}
917
918static loff_t
919ftrace_filter_lseek(struct file *file, loff_t offset, int origin)
920{
921 loff_t ret;
922
923 if (file->f_mode & FMODE_READ)
924 ret = seq_lseek(file, offset, origin);
925 else
926 file->f_pos = ret = 1;
927
928 return ret;
929}
930
931enum {
932 MATCH_FULL,
933 MATCH_FRONT_ONLY,
934 MATCH_MIDDLE_ONLY,
935 MATCH_END_ONLY,
936};
937
938static void
939ftrace_match(unsigned char *buff, int len)
940{
941 char str[KSYM_SYMBOL_LEN];
942 char *search = NULL;
943 struct ftrace_page *pg;
944 struct dyn_ftrace *rec;
945 int type = MATCH_FULL;
946 unsigned i, match = 0, search_len = 0;
947
948 for (i = 0; i < len; i++) {
949 if (buff[i] == '*') {
950 if (!i) {
951 search = buff + i + 1;
952 type = MATCH_END_ONLY;
953 search_len = len - (i + 1);
954 } else {
955 if (type == MATCH_END_ONLY) {
956 type = MATCH_MIDDLE_ONLY;
957 } else {
958 match = i;
959 type = MATCH_FRONT_ONLY;
960 }
961 buff[i] = 0;
962 break;
963 }
964 }
965 }
966
967 /* keep kstop machine from running */
968 preempt_disable();
969 ftrace_filtered = 1;
970 pg = ftrace_pages_start;
971 while (pg) {
972 for (i = 0; i < pg->index; i++) {
973 int matched = 0;
974 char *ptr;
975
976 rec = &pg->records[i];
977 if (rec->flags & FTRACE_FL_FAILED)
978 continue;
979 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
980 switch (type) {
981 case MATCH_FULL:
982 if (strcmp(str, buff) == 0)
983 matched = 1;
984 break;
985 case MATCH_FRONT_ONLY:
986 if (memcmp(str, buff, match) == 0)
987 matched = 1;
988 break;
989 case MATCH_MIDDLE_ONLY:
990 if (strstr(str, search))
991 matched = 1;
992 break;
993 case MATCH_END_ONLY:
994 ptr = strstr(str, search);
995 if (ptr && (ptr[search_len] == 0))
996 matched = 1;
997 break;
998 }
999 if (matched)
1000 rec->flags |= FTRACE_FL_FILTER;
1001 }
1002 pg = pg->next;
1003 }
1004 preempt_enable();
1005}
1006
1007static ssize_t
1008ftrace_filter_write(struct file *file, const char __user *ubuf,
1009 size_t cnt, loff_t *ppos)
1010{
1011 struct ftrace_iterator *iter;
1012 char ch;
1013 size_t read = 0;
1014 ssize_t ret;
1015
1016 if (!cnt || cnt < 0)
1017 return 0;
1018
1019 mutex_lock(&ftrace_filter_lock);
1020
1021 if (file->f_mode & FMODE_READ) {
1022 struct seq_file *m = file->private_data;
1023 iter = m->private;
1024 } else
1025 iter = file->private_data;
1026
1027 if (!*ppos) {
1028 iter->flags &= ~FTRACE_ITER_CONT;
1029 iter->buffer_idx = 0;
1030 }
1031
1032 ret = get_user(ch, ubuf++);
1033 if (ret)
1034 goto out;
1035 read++;
1036 cnt--;
1037
1038 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
1039 /* skip white space */
1040 while (cnt && isspace(ch)) {
1041 ret = get_user(ch, ubuf++);
1042 if (ret)
1043 goto out;
1044 read++;
1045 cnt--;
1046 }
1047
1048
1049 if (isspace(ch)) {
1050 file->f_pos += read;
1051 ret = read;
1052 goto out;
1053 }
1054
1055 iter->buffer_idx = 0;
1056 }
1057
1058 while (cnt && !isspace(ch)) {
1059 if (iter->buffer_idx < FTRACE_BUFF_MAX)
1060 iter->buffer[iter->buffer_idx++] = ch;
1061 else {
1062 ret = -EINVAL;
1063 goto out;
1064 }
1065 ret = get_user(ch, ubuf++);
1066 if (ret)
1067 goto out;
1068 read++;
1069 cnt--;
1070 }
1071
1072 if (isspace(ch)) {
1073 iter->filtered++;
1074 iter->buffer[iter->buffer_idx] = 0;
1075 ftrace_match(iter->buffer, iter->buffer_idx);
1076 iter->buffer_idx = 0;
1077 } else
1078 iter->flags |= FTRACE_ITER_CONT;
1079
1080
1081 file->f_pos += read;
1082
1083 ret = read;
1084 out:
1085 mutex_unlock(&ftrace_filter_lock);
1086
1087 return ret;
1088}
1089
1090/**
1091 * ftrace_set_filter - set a function to filter on in ftrace
1092 * @buf - the string that holds the function filter text.
1093 * @len - the length of the string.
1094 * @reset - non zero to reset all filters before applying this filter.
1095 *
1096 * Filters denote which functions should be enabled when tracing is enabled.
1097 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
1098 */
1099void ftrace_set_filter(unsigned char *buf, int len, int reset)
1100{
1101 if (unlikely(ftrace_disabled))
1102 return;
1103
1104 mutex_lock(&ftrace_filter_lock);
1105 if (reset)
1106 ftrace_filter_reset();
1107 if (buf)
1108 ftrace_match(buf, len);
1109 mutex_unlock(&ftrace_filter_lock);
1110}
1111
1112static int
1113ftrace_filter_release(struct inode *inode, struct file *file)
1114{
1115 struct seq_file *m = (struct seq_file *)file->private_data;
1116 struct ftrace_iterator *iter;
1117
1118 mutex_lock(&ftrace_filter_lock);
1119 if (file->f_mode & FMODE_READ) {
1120 iter = m->private;
1121
1122 seq_release(inode, file);
1123 } else
1124 iter = file->private_data;
1125
1126 if (iter->buffer_idx) {
1127 iter->filtered++;
1128 iter->buffer[iter->buffer_idx] = 0;
1129 ftrace_match(iter->buffer, iter->buffer_idx);
1130 }
1131
1132 mutex_lock(&ftrace_sysctl_lock);
1133 mutex_lock(&ftraced_lock);
1134 if (iter->filtered && ftraced_suspend && ftrace_enabled)
1135 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1136 mutex_unlock(&ftraced_lock);
1137 mutex_unlock(&ftrace_sysctl_lock);
1138
1139 kfree(iter);
1140 mutex_unlock(&ftrace_filter_lock);
1141 return 0;
1142}
1143
1144static struct file_operations ftrace_avail_fops = {
1145 .open = ftrace_avail_open,
1146 .read = seq_read,
1147 .llseek = seq_lseek,
1148 .release = ftrace_avail_release,
1149};
1150
1151static struct file_operations ftrace_filter_fops = {
1152 .open = ftrace_filter_open,
1153 .read = ftrace_filter_read,
1154 .write = ftrace_filter_write,
1155 .llseek = ftrace_filter_lseek,
1156 .release = ftrace_filter_release,
1157};
1158
1159/**
1160 * ftrace_force_update - force an update to all recording ftrace functions
1161 *
1162 * The ftrace dynamic update daemon only wakes up once a second.
1163 * There may be cases where an update needs to be done immediately
1164 * for tests or internal kernel tracing to begin. This function
1165 * wakes the daemon to do an update and will not return until the
1166 * update is complete.
1167 */
1168int ftrace_force_update(void)
1169{
1170 unsigned long last_counter;
1171 DECLARE_WAITQUEUE(wait, current);
1172 int ret = 0;
1173
1174 if (unlikely(ftrace_disabled))
1175 return -ENODEV;
1176
1177 mutex_lock(&ftraced_lock);
1178 last_counter = ftraced_iteration_counter;
1179
1180 set_current_state(TASK_INTERRUPTIBLE);
1181 add_wait_queue(&ftraced_waiters, &wait);
1182
1183 if (unlikely(!ftraced_task)) {
1184 ret = -ENODEV;
1185 goto out;
1186 }
1187
1188 do {
1189 mutex_unlock(&ftraced_lock);
1190 wake_up_process(ftraced_task);
1191 schedule();
1192 mutex_lock(&ftraced_lock);
1193 if (signal_pending(current)) {
1194 ret = -EINTR;
1195 break;
1196 }
1197 set_current_state(TASK_INTERRUPTIBLE);
1198 } while (last_counter == ftraced_iteration_counter);
1199
1200 out:
1201 mutex_unlock(&ftraced_lock);
1202 remove_wait_queue(&ftraced_waiters, &wait);
1203 set_current_state(TASK_RUNNING);
1204
1205 return ret;
1206}
1207
1208static void ftrace_force_shutdown(void)
1209{
1210 struct task_struct *task;
1211 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1212
1213 mutex_lock(&ftraced_lock);
1214 task = ftraced_task;
1215 ftraced_task = NULL;
1216 ftraced_suspend = -1;
1217 ftrace_run_update_code(command);
1218 mutex_unlock(&ftraced_lock);
1219
1220 if (task)
1221 kthread_stop(task);
1222}
1223
1224static __init int ftrace_init_debugfs(void)
1225{
1226 struct dentry *d_tracer;
1227 struct dentry *entry;
1228
1229 d_tracer = tracing_init_dentry();
1230
1231 entry = debugfs_create_file("available_filter_functions", 0444,
1232 d_tracer, NULL, &ftrace_avail_fops);
1233 if (!entry)
1234 pr_warning("Could not create debugfs "
1235 "'available_filter_functions' entry\n");
1236
1237 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
1238 NULL, &ftrace_filter_fops);
1239 if (!entry)
1240 pr_warning("Could not create debugfs "
1241 "'set_ftrace_filter' entry\n");
1242 return 0;
1243}
1244
1245fs_initcall(ftrace_init_debugfs);
1246
1247static int __init ftrace_dynamic_init(void)
1248{
1249 struct task_struct *p;
1250 unsigned long addr;
1251 int ret;
1252
1253 addr = (unsigned long)ftrace_record_ip;
1254
1255 stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
1256
1257 /* ftrace_dyn_arch_init places the return code in addr */
1258 if (addr) {
1259 ret = (int)addr;
1260 goto failed;
1261 }
1262
1263 ret = ftrace_dyn_table_alloc();
1264 if (ret)
1265 goto failed;
1266
1267 p = kthread_run(ftraced, NULL, "ftraced");
1268 if (IS_ERR(p)) {
1269 ret = -1;
1270 goto failed;
1271 }
1272
1273 last_ftrace_enabled = ftrace_enabled = 1;
1274 ftraced_task = p;
1275
1276 return 0;
1277
1278 failed:
1279 ftrace_disabled = 1;
1280 return ret;
1281}
1282
1283core_initcall(ftrace_dynamic_init);
1284#else
1285# define ftrace_startup() do { } while (0)
1286# define ftrace_shutdown() do { } while (0)
1287# define ftrace_startup_sysctl() do { } while (0)
1288# define ftrace_shutdown_sysctl() do { } while (0)
1289# define ftrace_force_shutdown() do { } while (0)
1290#endif /* CONFIG_DYNAMIC_FTRACE */
1291
1292/**
1293 * ftrace_kill - totally shutdown ftrace
1294 *
1295 * This is a safety measure. If something was detected that seems
1296 * wrong, calling this function will keep ftrace from doing
1297 * any more modifications, and updates.
1298 * used when something went wrong.
1299 */
1300void ftrace_kill(void)
1301{
1302 mutex_lock(&ftrace_sysctl_lock);
1303 ftrace_disabled = 1;
1304 ftrace_enabled = 0;
1305
1306 clear_ftrace_function();
1307 mutex_unlock(&ftrace_sysctl_lock);
1308
1309 /* Try to totally disable ftrace */
1310 ftrace_force_shutdown();
1311}
1312
1313/**
1314 * register_ftrace_function - register a function for profiling
1315 * @ops - ops structure that holds the function for profiling.
1316 *
1317 * Register a function to be called by all functions in the
1318 * kernel.
1319 *
1320 * Note: @ops->func and all the functions it calls must be labeled
1321 * with "notrace", otherwise it will go into a
1322 * recursive loop.
1323 */
1324int register_ftrace_function(struct ftrace_ops *ops)
1325{
1326 int ret;
1327
1328 if (unlikely(ftrace_disabled))
1329 return -1;
1330
1331 mutex_lock(&ftrace_sysctl_lock);
1332 ret = __register_ftrace_function(ops);
1333 ftrace_startup();
1334 mutex_unlock(&ftrace_sysctl_lock);
1335
1336 return ret;
1337}
1338
1339/**
1340 * unregister_ftrace_function - unresgister a function for profiling.
1341 * @ops - ops structure that holds the function to unregister
1342 *
1343 * Unregister a function that was added to be called by ftrace profiling.
1344 */
1345int unregister_ftrace_function(struct ftrace_ops *ops)
1346{
1347 int ret;
1348
1349 mutex_lock(&ftrace_sysctl_lock);
1350 ret = __unregister_ftrace_function(ops);
1351 ftrace_shutdown();
1352 mutex_unlock(&ftrace_sysctl_lock);
1353
1354 return ret;
1355}
1356
1357int
1358ftrace_enable_sysctl(struct ctl_table *table, int write,
1359 struct file *file, void __user *buffer, size_t *lenp,
1360 loff_t *ppos)
1361{
1362 int ret;
1363
1364 if (unlikely(ftrace_disabled))
1365 return -ENODEV;
1366
1367 mutex_lock(&ftrace_sysctl_lock);
1368
1369 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1370
1371 if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
1372 goto out;
1373
1374 last_ftrace_enabled = ftrace_enabled;
1375
1376 if (ftrace_enabled) {
1377
1378 ftrace_startup_sysctl();
1379
1380 /* we are starting ftrace again */
1381 if (ftrace_list != &ftrace_list_end) {
1382 if (ftrace_list->next == &ftrace_list_end)
1383 ftrace_trace_function = ftrace_list->func;
1384 else
1385 ftrace_trace_function = ftrace_list_func;
1386 }
1387
1388 } else {
1389 /* stopping ftrace calls (just send to ftrace_stub) */
1390 ftrace_trace_function = ftrace_stub;
1391
1392 ftrace_shutdown_sysctl();
1393 }
1394
1395 out:
1396 mutex_unlock(&ftrace_sysctl_lock);
1397 return ret;
1398}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 000000000000..95b7c48a9a1d
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3034 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III
13 */
14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h>
16#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18#include <linux/pagemap.h>
19#include <linux/hardirq.h>
20#include <linux/linkage.h>
21#include <linux/uaccess.h>
22#include <linux/ftrace.h>
23#include <linux/module.h>
24#include <linux/percpu.h>
25#include <linux/ctype.h>
26#include <linux/init.h>
27#include <linux/poll.h>
28#include <linux/gfp.h>
29#include <linux/fs.h>
30#include <linux/writeback.h>
31
32#include <linux/stacktrace.h>
33
34#include "trace.h"
35
36unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
37unsigned long __read_mostly tracing_thresh;
38
39static unsigned long __read_mostly tracing_nr_buffers;
40static cpumask_t __read_mostly tracing_buffer_mask;
41
42#define for_each_tracing_cpu(cpu) \
43 for_each_cpu_mask(cpu, tracing_buffer_mask)
44
45/* dummy trace to disable tracing */
46static struct tracer no_tracer __read_mostly = {
47 .name = "none",
48};
49
50static int trace_alloc_page(void);
51static int trace_free_page(void);
52
53static int tracing_disabled = 1;
54
55static unsigned long tracing_pages_allocated;
56
57long
58ns2usecs(cycle_t nsec)
59{
60 nsec += 500;
61 do_div(nsec, 1000);
62 return nsec;
63}
64
65cycle_t ftrace_now(int cpu)
66{
67 return cpu_clock(cpu);
68}
69
70/*
71 * The global_trace is the descriptor that holds the tracing
72 * buffers for the live tracing. For each CPU, it contains
73 * a link list of pages that will store trace entries. The
74 * page descriptor of the pages in the memory is used to hold
75 * the link list by linking the lru item in the page descriptor
76 * to each of the pages in the buffer per CPU.
77 *
78 * For each active CPU there is a data field that holds the
79 * pages for the buffer for that CPU. Each CPU has the same number
80 * of pages allocated for its buffer.
81 */
82static struct trace_array global_trace;
83
84static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
85
86/*
87 * The max_tr is used to snapshot the global_trace when a maximum
88 * latency is reached. Some tracers will use this to store a maximum
89 * trace while it continues examining live traces.
90 *
91 * The buffers for the max_tr are set up the same as the global_trace.
92 * When a snapshot is taken, the link list of the max_tr is swapped
93 * with the link list of the global_trace and the buffers are reset for
94 * the global_trace so the tracing can continue.
95 */
96static struct trace_array max_tr;
97
98static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
99
100/* tracer_enabled is used to toggle activation of a tracer */
101static int tracer_enabled = 1;
102
103/*
104 * trace_nr_entries is the number of entries that is allocated
105 * for a buffer. Note, the number of entries is always rounded
106 * to ENTRIES_PER_PAGE.
107 */
108static unsigned long trace_nr_entries = 65536UL;
109
110/* trace_types holds a link list of available tracers. */
111static struct tracer *trace_types __read_mostly;
112
113/* current_trace points to the tracer that is currently active */
114static struct tracer *current_trace __read_mostly;
115
116/*
117 * max_tracer_type_len is used to simplify the allocating of
118 * buffers to read userspace tracer names. We keep track of
119 * the longest tracer name registered.
120 */
121static int max_tracer_type_len;
122
123/*
124 * trace_types_lock is used to protect the trace_types list.
125 * This lock is also used to keep user access serialized.
126 * Accesses from userspace will grab this lock while userspace
127 * activities happen inside the kernel.
128 */
129static DEFINE_MUTEX(trace_types_lock);
130
131/* trace_wait is a waitqueue for tasks blocked on trace_poll */
132static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
133
134/* trace_flags holds iter_ctrl options */
135unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
136
137/**
138 * trace_wake_up - wake up tasks waiting for trace input
139 *
140 * Simply wakes up any task that is blocked on the trace_wait
141 * queue. These is used with trace_poll for tasks polling the trace.
142 */
143void trace_wake_up(void)
144{
145 /*
146 * The runqueue_is_locked() can fail, but this is the best we
147 * have for now:
148 */
149 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
150 wake_up(&trace_wait);
151}
152
153#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
154
155static int __init set_nr_entries(char *str)
156{
157 unsigned long nr_entries;
158 int ret;
159
160 if (!str)
161 return 0;
162 ret = strict_strtoul(str, 0, &nr_entries);
163 /* nr_entries can not be zero */
164 if (ret < 0 || nr_entries == 0)
165 return 0;
166 trace_nr_entries = nr_entries;
167 return 1;
168}
169__setup("trace_entries=", set_nr_entries);
170
171unsigned long nsecs_to_usecs(unsigned long nsecs)
172{
173 return nsecs / 1000;
174}
175
176/*
177 * trace_flag_type is an enumeration that holds different
178 * states when a trace occurs. These are:
179 * IRQS_OFF - interrupts were disabled
180 * NEED_RESCED - reschedule is requested
181 * HARDIRQ - inside an interrupt handler
182 * SOFTIRQ - inside a softirq handler
183 */
184enum trace_flag_type {
185 TRACE_FLAG_IRQS_OFF = 0x01,
186 TRACE_FLAG_NEED_RESCHED = 0x02,
187 TRACE_FLAG_HARDIRQ = 0x04,
188 TRACE_FLAG_SOFTIRQ = 0x08,
189};
190
191/*
192 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
193 * control the output of kernel symbols.
194 */
195#define TRACE_ITER_SYM_MASK \
196 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
197
198/* These must match the bit postions in trace_iterator_flags */
199static const char *trace_options[] = {
200 "print-parent",
201 "sym-offset",
202 "sym-addr",
203 "verbose",
204 "raw",
205 "hex",
206 "bin",
207 "block",
208 "stacktrace",
209 "sched-tree",
210 NULL
211};
212
213/*
214 * ftrace_max_lock is used to protect the swapping of buffers
215 * when taking a max snapshot. The buffers themselves are
216 * protected by per_cpu spinlocks. But the action of the swap
217 * needs its own lock.
218 *
219 * This is defined as a raw_spinlock_t in order to help
220 * with performance when lockdep debugging is enabled.
221 */
222static raw_spinlock_t ftrace_max_lock =
223 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
224
225/*
226 * Copy the new maximum trace into the separate maximum-trace
227 * structure. (this way the maximum trace is permanently saved,
228 * for later retrieval via /debugfs/tracing/latency_trace)
229 */
230static void
231__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
232{
233 struct trace_array_cpu *data = tr->data[cpu];
234
235 max_tr.cpu = cpu;
236 max_tr.time_start = data->preempt_timestamp;
237
238 data = max_tr.data[cpu];
239 data->saved_latency = tracing_max_latency;
240
241 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
242 data->pid = tsk->pid;
243 data->uid = tsk->uid;
244 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
245 data->policy = tsk->policy;
246 data->rt_priority = tsk->rt_priority;
247
248 /* record this tasks comm */
249 tracing_record_cmdline(current);
250}
251
252/**
253 * check_pages - integrity check of trace buffers
254 *
255 * As a safty measure we check to make sure the data pages have not
256 * been corrupted. TODO: configure to disable this because it adds
257 * a bit of overhead.
258 */
259void check_pages(struct trace_array_cpu *data)
260{
261 struct page *page, *tmp;
262
263 BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
264 BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
265
266 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
267 BUG_ON(page->lru.next->prev != &page->lru);
268 BUG_ON(page->lru.prev->next != &page->lru);
269 }
270}
271
272/**
273 * head_page - page address of the first page in per_cpu buffer.
274 *
275 * head_page returns the page address of the first page in
276 * a per_cpu buffer. This also preforms various consistency
277 * checks to make sure the buffer has not been corrupted.
278 */
279void *head_page(struct trace_array_cpu *data)
280{
281 struct page *page;
282
283 check_pages(data);
284 if (list_empty(&data->trace_pages))
285 return NULL;
286
287 page = list_entry(data->trace_pages.next, struct page, lru);
288 BUG_ON(&page->lru == &data->trace_pages);
289
290 return page_address(page);
291}
292
293/**
294 * trace_seq_printf - sequence printing of trace information
295 * @s: trace sequence descriptor
296 * @fmt: printf format string
297 *
298 * The tracer may use either sequence operations or its own
299 * copy to user routines. To simplify formating of a trace
300 * trace_seq_printf is used to store strings into a special
301 * buffer (@s). Then the output may be either used by
302 * the sequencer or pulled into another buffer.
303 */
304int
305trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
306{
307 int len = (PAGE_SIZE - 1) - s->len;
308 va_list ap;
309 int ret;
310
311 if (!len)
312 return 0;
313
314 va_start(ap, fmt);
315 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
316 va_end(ap);
317
318 /* If we can't write it all, don't bother writing anything */
319 if (ret >= len)
320 return 0;
321
322 s->len += ret;
323
324 return len;
325}
326
327/**
328 * trace_seq_puts - trace sequence printing of simple string
329 * @s: trace sequence descriptor
330 * @str: simple string to record
331 *
332 * The tracer may use either the sequence operations or its own
333 * copy to user routines. This function records a simple string
334 * into a special buffer (@s) for later retrieval by a sequencer
335 * or other mechanism.
336 */
337static int
338trace_seq_puts(struct trace_seq *s, const char *str)
339{
340 int len = strlen(str);
341
342 if (len > ((PAGE_SIZE - 1) - s->len))
343 return 0;
344
345 memcpy(s->buffer + s->len, str, len);
346 s->len += len;
347
348 return len;
349}
350
351static int
352trace_seq_putc(struct trace_seq *s, unsigned char c)
353{
354 if (s->len >= (PAGE_SIZE - 1))
355 return 0;
356
357 s->buffer[s->len++] = c;
358
359 return 1;
360}
361
362static int
363trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
364{
365 if (len > ((PAGE_SIZE - 1) - s->len))
366 return 0;
367
368 memcpy(s->buffer + s->len, mem, len);
369 s->len += len;
370
371 return len;
372}
373
374#define HEX_CHARS 17
375static const char hex2asc[] = "0123456789abcdef";
376
377static int
378trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
379{
380 unsigned char hex[HEX_CHARS];
381 unsigned char *data = mem;
382 unsigned char byte;
383 int i, j;
384
385 BUG_ON(len >= HEX_CHARS);
386
387#ifdef __BIG_ENDIAN
388 for (i = 0, j = 0; i < len; i++) {
389#else
390 for (i = len-1, j = 0; i >= 0; i--) {
391#endif
392 byte = data[i];
393
394 hex[j++] = hex2asc[byte & 0x0f];
395 hex[j++] = hex2asc[byte >> 4];
396 }
397 hex[j++] = ' ';
398
399 return trace_seq_putmem(s, hex, j);
400}
401
402static void
403trace_seq_reset(struct trace_seq *s)
404{
405 s->len = 0;
406 s->readpos = 0;
407}
408
409ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
410{
411 int len;
412 int ret;
413
414 if (s->len <= s->readpos)
415 return -EBUSY;
416
417 len = s->len - s->readpos;
418 if (cnt > len)
419 cnt = len;
420 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
421 if (ret)
422 return -EFAULT;
423
424 s->readpos += len;
425 return cnt;
426}
427
428static void
429trace_print_seq(struct seq_file *m, struct trace_seq *s)
430{
431 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
432
433 s->buffer[len] = 0;
434 seq_puts(m, s->buffer);
435
436 trace_seq_reset(s);
437}
438
439/*
440 * flip the trace buffers between two trace descriptors.
441 * This usually is the buffers between the global_trace and
442 * the max_tr to record a snapshot of a current trace.
443 *
444 * The ftrace_max_lock must be held.
445 */
446static void
447flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
448{
449 struct list_head flip_pages;
450
451 INIT_LIST_HEAD(&flip_pages);
452
453 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
454 sizeof(struct trace_array_cpu) -
455 offsetof(struct trace_array_cpu, trace_head_idx));
456
457 check_pages(tr1);
458 check_pages(tr2);
459 list_splice_init(&tr1->trace_pages, &flip_pages);
460 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
461 list_splice_init(&flip_pages, &tr2->trace_pages);
462 BUG_ON(!list_empty(&flip_pages));
463 check_pages(tr1);
464 check_pages(tr2);
465}
466
467/**
468 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
469 * @tr: tracer
470 * @tsk: the task with the latency
471 * @cpu: The cpu that initiated the trace.
472 *
473 * Flip the buffers between the @tr and the max_tr and record information
474 * about which task was the cause of this latency.
475 */
476void
477update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
478{
479 struct trace_array_cpu *data;
480 int i;
481
482 WARN_ON_ONCE(!irqs_disabled());
483 __raw_spin_lock(&ftrace_max_lock);
484 /* clear out all the previous traces */
485 for_each_tracing_cpu(i) {
486 data = tr->data[i];
487 flip_trace(max_tr.data[i], data);
488 tracing_reset(data);
489 }
490
491 __update_max_tr(tr, tsk, cpu);
492 __raw_spin_unlock(&ftrace_max_lock);
493}
494
495/**
496 * update_max_tr_single - only copy one trace over, and reset the rest
497 * @tr - tracer
498 * @tsk - task with the latency
499 * @cpu - the cpu of the buffer to copy.
500 *
501 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
502 */
503void
504update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
505{
506 struct trace_array_cpu *data = tr->data[cpu];
507 int i;
508
509 WARN_ON_ONCE(!irqs_disabled());
510 __raw_spin_lock(&ftrace_max_lock);
511 for_each_tracing_cpu(i)
512 tracing_reset(max_tr.data[i]);
513
514 flip_trace(max_tr.data[cpu], data);
515 tracing_reset(data);
516
517 __update_max_tr(tr, tsk, cpu);
518 __raw_spin_unlock(&ftrace_max_lock);
519}
520
521/**
522 * register_tracer - register a tracer with the ftrace system.
523 * @type - the plugin for the tracer
524 *
525 * Register a new plugin tracer.
526 */
527int register_tracer(struct tracer *type)
528{
529 struct tracer *t;
530 int len;
531 int ret = 0;
532
533 if (!type->name) {
534 pr_info("Tracer must have a name\n");
535 return -1;
536 }
537
538 mutex_lock(&trace_types_lock);
539 for (t = trace_types; t; t = t->next) {
540 if (strcmp(type->name, t->name) == 0) {
541 /* already found */
542 pr_info("Trace %s already registered\n",
543 type->name);
544 ret = -1;
545 goto out;
546 }
547 }
548
549#ifdef CONFIG_FTRACE_STARTUP_TEST
550 if (type->selftest) {
551 struct tracer *saved_tracer = current_trace;
552 struct trace_array_cpu *data;
553 struct trace_array *tr = &global_trace;
554 int saved_ctrl = tr->ctrl;
555 int i;
556 /*
557 * Run a selftest on this tracer.
558 * Here we reset the trace buffer, and set the current
559 * tracer to be this tracer. The tracer can then run some
560 * internal tracing to verify that everything is in order.
561 * If we fail, we do not register this tracer.
562 */
563 for_each_tracing_cpu(i) {
564 data = tr->data[i];
565 if (!head_page(data))
566 continue;
567 tracing_reset(data);
568 }
569 current_trace = type;
570 tr->ctrl = 0;
571 /* the test is responsible for initializing and enabling */
572 pr_info("Testing tracer %s: ", type->name);
573 ret = type->selftest(type, tr);
574 /* the test is responsible for resetting too */
575 current_trace = saved_tracer;
576 tr->ctrl = saved_ctrl;
577 if (ret) {
578 printk(KERN_CONT "FAILED!\n");
579 goto out;
580 }
581 /* Only reset on passing, to avoid touching corrupted buffers */
582 for_each_tracing_cpu(i) {
583 data = tr->data[i];
584 if (!head_page(data))
585 continue;
586 tracing_reset(data);
587 }
588 printk(KERN_CONT "PASSED\n");
589 }
590#endif
591
592 type->next = trace_types;
593 trace_types = type;
594 len = strlen(type->name);
595 if (len > max_tracer_type_len)
596 max_tracer_type_len = len;
597
598 out:
599 mutex_unlock(&trace_types_lock);
600
601 return ret;
602}
603
604void unregister_tracer(struct tracer *type)
605{
606 struct tracer **t;
607 int len;
608
609 mutex_lock(&trace_types_lock);
610 for (t = &trace_types; *t; t = &(*t)->next) {
611 if (*t == type)
612 goto found;
613 }
614 pr_info("Trace %s not registered\n", type->name);
615 goto out;
616
617 found:
618 *t = (*t)->next;
619 if (strlen(type->name) != max_tracer_type_len)
620 goto out;
621
622 max_tracer_type_len = 0;
623 for (t = &trace_types; *t; t = &(*t)->next) {
624 len = strlen((*t)->name);
625 if (len > max_tracer_type_len)
626 max_tracer_type_len = len;
627 }
628 out:
629 mutex_unlock(&trace_types_lock);
630}
631
632void tracing_reset(struct trace_array_cpu *data)
633{
634 data->trace_idx = 0;
635 data->overrun = 0;
636 data->trace_head = data->trace_tail = head_page(data);
637 data->trace_head_idx = 0;
638 data->trace_tail_idx = 0;
639}
640
641#define SAVED_CMDLINES 128
642static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
643static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
644static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
645static int cmdline_idx;
646static DEFINE_SPINLOCK(trace_cmdline_lock);
647
648/* trace in all context switches */
649atomic_t trace_record_cmdline_enabled __read_mostly;
650
651/* temporary disable recording */
652atomic_t trace_record_cmdline_disabled __read_mostly;
653
654static void trace_init_cmdlines(void)
655{
656 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
657 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
658 cmdline_idx = 0;
659}
660
661void trace_stop_cmdline_recording(void);
662
663static void trace_save_cmdline(struct task_struct *tsk)
664{
665 unsigned map;
666 unsigned idx;
667
668 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
669 return;
670
671 /*
672 * It's not the end of the world if we don't get
673 * the lock, but we also don't want to spin
674 * nor do we want to disable interrupts,
675 * so if we miss here, then better luck next time.
676 */
677 if (!spin_trylock(&trace_cmdline_lock))
678 return;
679
680 idx = map_pid_to_cmdline[tsk->pid];
681 if (idx >= SAVED_CMDLINES) {
682 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
683
684 map = map_cmdline_to_pid[idx];
685 if (map <= PID_MAX_DEFAULT)
686 map_pid_to_cmdline[map] = (unsigned)-1;
687
688 map_pid_to_cmdline[tsk->pid] = idx;
689
690 cmdline_idx = idx;
691 }
692
693 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
694
695 spin_unlock(&trace_cmdline_lock);
696}
697
698static char *trace_find_cmdline(int pid)
699{
700 char *cmdline = "<...>";
701 unsigned map;
702
703 if (!pid)
704 return "<idle>";
705
706 if (pid > PID_MAX_DEFAULT)
707 goto out;
708
709 map = map_pid_to_cmdline[pid];
710 if (map >= SAVED_CMDLINES)
711 goto out;
712
713 cmdline = saved_cmdlines[map];
714
715 out:
716 return cmdline;
717}
718
719void tracing_record_cmdline(struct task_struct *tsk)
720{
721 if (atomic_read(&trace_record_cmdline_disabled))
722 return;
723
724 trace_save_cmdline(tsk);
725}
726
727static inline struct list_head *
728trace_next_list(struct trace_array_cpu *data, struct list_head *next)
729{
730 /*
731 * Roundrobin - but skip the head (which is not a real page):
732 */
733 next = next->next;
734 if (unlikely(next == &data->trace_pages))
735 next = next->next;
736 BUG_ON(next == &data->trace_pages);
737
738 return next;
739}
740
741static inline void *
742trace_next_page(struct trace_array_cpu *data, void *addr)
743{
744 struct list_head *next;
745 struct page *page;
746
747 page = virt_to_page(addr);
748
749 next = trace_next_list(data, &page->lru);
750 page = list_entry(next, struct page, lru);
751
752 return page_address(page);
753}
754
755static inline struct trace_entry *
756tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
757{
758 unsigned long idx, idx_next;
759 struct trace_entry *entry;
760
761 data->trace_idx++;
762 idx = data->trace_head_idx;
763 idx_next = idx + 1;
764
765 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
766
767 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
768
769 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
770 data->trace_head = trace_next_page(data, data->trace_head);
771 idx_next = 0;
772 }
773
774 if (data->trace_head == data->trace_tail &&
775 idx_next == data->trace_tail_idx) {
776 /* overrun */
777 data->overrun++;
778 data->trace_tail_idx++;
779 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
780 data->trace_tail =
781 trace_next_page(data, data->trace_tail);
782 data->trace_tail_idx = 0;
783 }
784 }
785
786 data->trace_head_idx = idx_next;
787
788 return entry;
789}
790
791static inline void
792tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
793{
794 struct task_struct *tsk = current;
795 unsigned long pc;
796
797 pc = preempt_count();
798
799 entry->preempt_count = pc & 0xff;
800 entry->pid = (tsk) ? tsk->pid : 0;
801 entry->t = ftrace_now(raw_smp_processor_id());
802 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
803 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
804 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
805 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
806}
807
808void
809trace_function(struct trace_array *tr, struct trace_array_cpu *data,
810 unsigned long ip, unsigned long parent_ip, unsigned long flags)
811{
812 struct trace_entry *entry;
813 unsigned long irq_flags;
814
815 raw_local_irq_save(irq_flags);
816 __raw_spin_lock(&data->lock);
817 entry = tracing_get_trace_entry(tr, data);
818 tracing_generic_entry_update(entry, flags);
819 entry->type = TRACE_FN;
820 entry->fn.ip = ip;
821 entry->fn.parent_ip = parent_ip;
822 __raw_spin_unlock(&data->lock);
823 raw_local_irq_restore(irq_flags);
824}
825
826void
827ftrace(struct trace_array *tr, struct trace_array_cpu *data,
828 unsigned long ip, unsigned long parent_ip, unsigned long flags)
829{
830 if (likely(!atomic_read(&data->disabled)))
831 trace_function(tr, data, ip, parent_ip, flags);
832}
833
834void __trace_stack(struct trace_array *tr,
835 struct trace_array_cpu *data,
836 unsigned long flags,
837 int skip)
838{
839 struct trace_entry *entry;
840 struct stack_trace trace;
841
842 if (!(trace_flags & TRACE_ITER_STACKTRACE))
843 return;
844
845 entry = tracing_get_trace_entry(tr, data);
846 tracing_generic_entry_update(entry, flags);
847 entry->type = TRACE_STACK;
848
849 memset(&entry->stack, 0, sizeof(entry->stack));
850
851 trace.nr_entries = 0;
852 trace.max_entries = FTRACE_STACK_ENTRIES;
853 trace.skip = skip;
854 trace.entries = entry->stack.caller;
855
856 save_stack_trace(&trace);
857}
858
859void
860__trace_special(void *__tr, void *__data,
861 unsigned long arg1, unsigned long arg2, unsigned long arg3)
862{
863 struct trace_array_cpu *data = __data;
864 struct trace_array *tr = __tr;
865 struct trace_entry *entry;
866 unsigned long irq_flags;
867
868 raw_local_irq_save(irq_flags);
869 __raw_spin_lock(&data->lock);
870 entry = tracing_get_trace_entry(tr, data);
871 tracing_generic_entry_update(entry, 0);
872 entry->type = TRACE_SPECIAL;
873 entry->special.arg1 = arg1;
874 entry->special.arg2 = arg2;
875 entry->special.arg3 = arg3;
876 __trace_stack(tr, data, irq_flags, 4);
877 __raw_spin_unlock(&data->lock);
878 raw_local_irq_restore(irq_flags);
879
880 trace_wake_up();
881}
882
883void
884tracing_sched_switch_trace(struct trace_array *tr,
885 struct trace_array_cpu *data,
886 struct task_struct *prev,
887 struct task_struct *next,
888 unsigned long flags)
889{
890 struct trace_entry *entry;
891 unsigned long irq_flags;
892
893 raw_local_irq_save(irq_flags);
894 __raw_spin_lock(&data->lock);
895 entry = tracing_get_trace_entry(tr, data);
896 tracing_generic_entry_update(entry, flags);
897 entry->type = TRACE_CTX;
898 entry->ctx.prev_pid = prev->pid;
899 entry->ctx.prev_prio = prev->prio;
900 entry->ctx.prev_state = prev->state;
901 entry->ctx.next_pid = next->pid;
902 entry->ctx.next_prio = next->prio;
903 entry->ctx.next_state = next->state;
904 __trace_stack(tr, data, flags, 5);
905 __raw_spin_unlock(&data->lock);
906 raw_local_irq_restore(irq_flags);
907}
908
909void
910tracing_sched_wakeup_trace(struct trace_array *tr,
911 struct trace_array_cpu *data,
912 struct task_struct *wakee,
913 struct task_struct *curr,
914 unsigned long flags)
915{
916 struct trace_entry *entry;
917 unsigned long irq_flags;
918
919 raw_local_irq_save(irq_flags);
920 __raw_spin_lock(&data->lock);
921 entry = tracing_get_trace_entry(tr, data);
922 tracing_generic_entry_update(entry, flags);
923 entry->type = TRACE_WAKE;
924 entry->ctx.prev_pid = curr->pid;
925 entry->ctx.prev_prio = curr->prio;
926 entry->ctx.prev_state = curr->state;
927 entry->ctx.next_pid = wakee->pid;
928 entry->ctx.next_prio = wakee->prio;
929 entry->ctx.next_state = wakee->state;
930 __trace_stack(tr, data, flags, 6);
931 __raw_spin_unlock(&data->lock);
932 raw_local_irq_restore(irq_flags);
933
934 trace_wake_up();
935}
936
937#ifdef CONFIG_FTRACE
938static void
939function_trace_call(unsigned long ip, unsigned long parent_ip)
940{
941 struct trace_array *tr = &global_trace;
942 struct trace_array_cpu *data;
943 unsigned long flags;
944 long disabled;
945 int cpu;
946
947 if (unlikely(!tracer_enabled))
948 return;
949
950 local_irq_save(flags);
951 cpu = raw_smp_processor_id();
952 data = tr->data[cpu];
953 disabled = atomic_inc_return(&data->disabled);
954
955 if (likely(disabled == 1))
956 trace_function(tr, data, ip, parent_ip, flags);
957
958 atomic_dec(&data->disabled);
959 local_irq_restore(flags);
960}
961
962static struct ftrace_ops trace_ops __read_mostly =
963{
964 .func = function_trace_call,
965};
966
967void tracing_start_function_trace(void)
968{
969 register_ftrace_function(&trace_ops);
970}
971
972void tracing_stop_function_trace(void)
973{
974 unregister_ftrace_function(&trace_ops);
975}
976#endif
977
978enum trace_file_type {
979 TRACE_FILE_LAT_FMT = 1,
980};
981
982static struct trace_entry *
983trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
984 struct trace_iterator *iter, int cpu)
985{
986 struct page *page;
987 struct trace_entry *array;
988
989 if (iter->next_idx[cpu] >= tr->entries ||
990 iter->next_idx[cpu] >= data->trace_idx ||
991 (data->trace_head == data->trace_tail &&
992 data->trace_head_idx == data->trace_tail_idx))
993 return NULL;
994
995 if (!iter->next_page[cpu]) {
996 /* Initialize the iterator for this cpu trace buffer */
997 WARN_ON(!data->trace_tail);
998 page = virt_to_page(data->trace_tail);
999 iter->next_page[cpu] = &page->lru;
1000 iter->next_page_idx[cpu] = data->trace_tail_idx;
1001 }
1002
1003 page = list_entry(iter->next_page[cpu], struct page, lru);
1004 BUG_ON(&data->trace_pages == &page->lru);
1005
1006 array = page_address(page);
1007
1008 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1009 return &array[iter->next_page_idx[cpu]];
1010}
1011
1012static struct trace_entry *
1013find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1014{
1015 struct trace_array *tr = iter->tr;
1016 struct trace_entry *ent, *next = NULL;
1017 int next_cpu = -1;
1018 int cpu;
1019
1020 for_each_tracing_cpu(cpu) {
1021 if (!head_page(tr->data[cpu]))
1022 continue;
1023 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1024 /*
1025 * Pick the entry with the smallest timestamp:
1026 */
1027 if (ent && (!next || ent->t < next->t)) {
1028 next = ent;
1029 next_cpu = cpu;
1030 }
1031 }
1032
1033 if (ent_cpu)
1034 *ent_cpu = next_cpu;
1035
1036 return next;
1037}
1038
1039static void trace_iterator_increment(struct trace_iterator *iter)
1040{
1041 iter->idx++;
1042 iter->next_idx[iter->cpu]++;
1043 iter->next_page_idx[iter->cpu]++;
1044
1045 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1046 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1047
1048 iter->next_page_idx[iter->cpu] = 0;
1049 iter->next_page[iter->cpu] =
1050 trace_next_list(data, iter->next_page[iter->cpu]);
1051 }
1052}
1053
1054static void trace_consume(struct trace_iterator *iter)
1055{
1056 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1057
1058 data->trace_tail_idx++;
1059 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1060 data->trace_tail = trace_next_page(data, data->trace_tail);
1061 data->trace_tail_idx = 0;
1062 }
1063
1064 /* Check if we empty it, then reset the index */
1065 if (data->trace_head == data->trace_tail &&
1066 data->trace_head_idx == data->trace_tail_idx)
1067 data->trace_idx = 0;
1068}
1069
1070static void *find_next_entry_inc(struct trace_iterator *iter)
1071{
1072 struct trace_entry *next;
1073 int next_cpu = -1;
1074
1075 next = find_next_entry(iter, &next_cpu);
1076
1077 iter->prev_ent = iter->ent;
1078 iter->prev_cpu = iter->cpu;
1079
1080 iter->ent = next;
1081 iter->cpu = next_cpu;
1082
1083 if (next)
1084 trace_iterator_increment(iter);
1085
1086 return next ? iter : NULL;
1087}
1088
1089static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1090{
1091 struct trace_iterator *iter = m->private;
1092 void *last_ent = iter->ent;
1093 int i = (int)*pos;
1094 void *ent;
1095
1096 (*pos)++;
1097
1098 /* can't go backwards */
1099 if (iter->idx > i)
1100 return NULL;
1101
1102 if (iter->idx < 0)
1103 ent = find_next_entry_inc(iter);
1104 else
1105 ent = iter;
1106
1107 while (ent && iter->idx < i)
1108 ent = find_next_entry_inc(iter);
1109
1110 iter->pos = *pos;
1111
1112 if (last_ent && !ent)
1113 seq_puts(m, "\n\nvim:ft=help\n");
1114
1115 return ent;
1116}
1117
1118static void *s_start(struct seq_file *m, loff_t *pos)
1119{
1120 struct trace_iterator *iter = m->private;
1121 void *p = NULL;
1122 loff_t l = 0;
1123 int i;
1124
1125 mutex_lock(&trace_types_lock);
1126
1127 if (!current_trace || current_trace != iter->trace) {
1128 mutex_unlock(&trace_types_lock);
1129 return NULL;
1130 }
1131
1132 atomic_inc(&trace_record_cmdline_disabled);
1133
1134 /* let the tracer grab locks here if needed */
1135 if (current_trace->start)
1136 current_trace->start(iter);
1137
1138 if (*pos != iter->pos) {
1139 iter->ent = NULL;
1140 iter->cpu = 0;
1141 iter->idx = -1;
1142 iter->prev_ent = NULL;
1143 iter->prev_cpu = -1;
1144
1145 for_each_tracing_cpu(i) {
1146 iter->next_idx[i] = 0;
1147 iter->next_page[i] = NULL;
1148 }
1149
1150 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1151 ;
1152
1153 } else {
1154 l = *pos - 1;
1155 p = s_next(m, p, &l);
1156 }
1157
1158 return p;
1159}
1160
1161static void s_stop(struct seq_file *m, void *p)
1162{
1163 struct trace_iterator *iter = m->private;
1164
1165 atomic_dec(&trace_record_cmdline_disabled);
1166
1167 /* let the tracer release locks here if needed */
1168 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1169 iter->trace->stop(iter);
1170
1171 mutex_unlock(&trace_types_lock);
1172}
1173
1174static int
1175seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1176{
1177#ifdef CONFIG_KALLSYMS
1178 char str[KSYM_SYMBOL_LEN];
1179
1180 kallsyms_lookup(address, NULL, NULL, NULL, str);
1181
1182 return trace_seq_printf(s, fmt, str);
1183#endif
1184 return 1;
1185}
1186
1187static int
1188seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1189 unsigned long address)
1190{
1191#ifdef CONFIG_KALLSYMS
1192 char str[KSYM_SYMBOL_LEN];
1193
1194 sprint_symbol(str, address);
1195 return trace_seq_printf(s, fmt, str);
1196#endif
1197 return 1;
1198}
1199
1200#ifndef CONFIG_64BIT
1201# define IP_FMT "%08lx"
1202#else
1203# define IP_FMT "%016lx"
1204#endif
1205
1206static int
1207seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1208{
1209 int ret;
1210
1211 if (!ip)
1212 return trace_seq_printf(s, "0");
1213
1214 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1215 ret = seq_print_sym_offset(s, "%s", ip);
1216 else
1217 ret = seq_print_sym_short(s, "%s", ip);
1218
1219 if (!ret)
1220 return 0;
1221
1222 if (sym_flags & TRACE_ITER_SYM_ADDR)
1223 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1224 return ret;
1225}
1226
1227static void print_lat_help_header(struct seq_file *m)
1228{
1229 seq_puts(m, "# _------=> CPU# \n");
1230 seq_puts(m, "# / _-----=> irqs-off \n");
1231 seq_puts(m, "# | / _----=> need-resched \n");
1232 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1233 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1234 seq_puts(m, "# |||| / \n");
1235 seq_puts(m, "# ||||| delay \n");
1236 seq_puts(m, "# cmd pid ||||| time | caller \n");
1237 seq_puts(m, "# \\ / ||||| \\ | / \n");
1238}
1239
1240static void print_func_help_header(struct seq_file *m)
1241{
1242 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1243 seq_puts(m, "# | | | | |\n");
1244}
1245
1246
1247static void
1248print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1249{
1250 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1251 struct trace_array *tr = iter->tr;
1252 struct trace_array_cpu *data = tr->data[tr->cpu];
1253 struct tracer *type = current_trace;
1254 unsigned long total = 0;
1255 unsigned long entries = 0;
1256 int cpu;
1257 const char *name = "preemption";
1258
1259 if (type)
1260 name = type->name;
1261
1262 for_each_tracing_cpu(cpu) {
1263 if (head_page(tr->data[cpu])) {
1264 total += tr->data[cpu]->trace_idx;
1265 if (tr->data[cpu]->trace_idx > tr->entries)
1266 entries += tr->entries;
1267 else
1268 entries += tr->data[cpu]->trace_idx;
1269 }
1270 }
1271
1272 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1273 name, UTS_RELEASE);
1274 seq_puts(m, "-----------------------------------"
1275 "---------------------------------\n");
1276 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1277 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1278 nsecs_to_usecs(data->saved_latency),
1279 entries,
1280 total,
1281 tr->cpu,
1282#if defined(CONFIG_PREEMPT_NONE)
1283 "server",
1284#elif defined(CONFIG_PREEMPT_VOLUNTARY)
1285 "desktop",
1286#elif defined(CONFIG_PREEMPT_DESKTOP)
1287 "preempt",
1288#else
1289 "unknown",
1290#endif
1291 /* These are reserved for later use */
1292 0, 0, 0, 0);
1293#ifdef CONFIG_SMP
1294 seq_printf(m, " #P:%d)\n", num_online_cpus());
1295#else
1296 seq_puts(m, ")\n");
1297#endif
1298 seq_puts(m, " -----------------\n");
1299 seq_printf(m, " | task: %.16s-%d "
1300 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1301 data->comm, data->pid, data->uid, data->nice,
1302 data->policy, data->rt_priority);
1303 seq_puts(m, " -----------------\n");
1304
1305 if (data->critical_start) {
1306 seq_puts(m, " => started at: ");
1307 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1308 trace_print_seq(m, &iter->seq);
1309 seq_puts(m, "\n => ended at: ");
1310 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1311 trace_print_seq(m, &iter->seq);
1312 seq_puts(m, "\n");
1313 }
1314
1315 seq_puts(m, "\n");
1316}
1317
1318static void
1319lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1320{
1321 int hardirq, softirq;
1322 char *comm;
1323
1324 comm = trace_find_cmdline(entry->pid);
1325
1326 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1327 trace_seq_printf(s, "%d", cpu);
1328 trace_seq_printf(s, "%c%c",
1329 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1330 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1331
1332 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1333 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1334 if (hardirq && softirq) {
1335 trace_seq_putc(s, 'H');
1336 } else {
1337 if (hardirq) {
1338 trace_seq_putc(s, 'h');
1339 } else {
1340 if (softirq)
1341 trace_seq_putc(s, 's');
1342 else
1343 trace_seq_putc(s, '.');
1344 }
1345 }
1346
1347 if (entry->preempt_count)
1348 trace_seq_printf(s, "%x", entry->preempt_count);
1349 else
1350 trace_seq_puts(s, ".");
1351}
1352
1353unsigned long preempt_mark_thresh = 100;
1354
1355static void
1356lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1357 unsigned long rel_usecs)
1358{
1359 trace_seq_printf(s, " %4lldus", abs_usecs);
1360 if (rel_usecs > preempt_mark_thresh)
1361 trace_seq_puts(s, "!: ");
1362 else if (rel_usecs > 1)
1363 trace_seq_puts(s, "+: ");
1364 else
1365 trace_seq_puts(s, " : ");
1366}
1367
1368static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1369
1370static int
1371print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1372{
1373 struct trace_seq *s = &iter->seq;
1374 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1375 struct trace_entry *next_entry = find_next_entry(iter, NULL);
1376 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1377 struct trace_entry *entry = iter->ent;
1378 unsigned long abs_usecs;
1379 unsigned long rel_usecs;
1380 char *comm;
1381 int S, T;
1382 int i;
1383 unsigned state;
1384
1385 if (!next_entry)
1386 next_entry = entry;
1387 rel_usecs = ns2usecs(next_entry->t - entry->t);
1388 abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1389
1390 if (verbose) {
1391 comm = trace_find_cmdline(entry->pid);
1392 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1393 " %ld.%03ldms (+%ld.%03ldms): ",
1394 comm,
1395 entry->pid, cpu, entry->flags,
1396 entry->preempt_count, trace_idx,
1397 ns2usecs(entry->t),
1398 abs_usecs/1000,
1399 abs_usecs % 1000, rel_usecs/1000,
1400 rel_usecs % 1000);
1401 } else {
1402 lat_print_generic(s, entry, cpu);
1403 lat_print_timestamp(s, abs_usecs, rel_usecs);
1404 }
1405 switch (entry->type) {
1406 case TRACE_FN:
1407 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1408 trace_seq_puts(s, " (");
1409 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1410 trace_seq_puts(s, ")\n");
1411 break;
1412 case TRACE_CTX:
1413 case TRACE_WAKE:
1414 T = entry->ctx.next_state < sizeof(state_to_char) ?
1415 state_to_char[entry->ctx.next_state] : 'X';
1416
1417 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1418 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1419 comm = trace_find_cmdline(entry->ctx.next_pid);
1420 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1421 entry->ctx.prev_pid,
1422 entry->ctx.prev_prio,
1423 S, entry->type == TRACE_CTX ? "==>" : " +",
1424 entry->ctx.next_pid,
1425 entry->ctx.next_prio,
1426 T, comm);
1427 break;
1428 case TRACE_SPECIAL:
1429 trace_seq_printf(s, "# %ld %ld %ld\n",
1430 entry->special.arg1,
1431 entry->special.arg2,
1432 entry->special.arg3);
1433 break;
1434 case TRACE_STACK:
1435 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1436 if (i)
1437 trace_seq_puts(s, " <= ");
1438 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1439 }
1440 trace_seq_puts(s, "\n");
1441 break;
1442 default:
1443 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1444 }
1445 return 1;
1446}
1447
1448static int print_trace_fmt(struct trace_iterator *iter)
1449{
1450 struct trace_seq *s = &iter->seq;
1451 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1452 struct trace_entry *entry;
1453 unsigned long usec_rem;
1454 unsigned long long t;
1455 unsigned long secs;
1456 char *comm;
1457 int ret;
1458 int S, T;
1459 int i;
1460
1461 entry = iter->ent;
1462
1463 comm = trace_find_cmdline(iter->ent->pid);
1464
1465 t = ns2usecs(entry->t);
1466 usec_rem = do_div(t, 1000000ULL);
1467 secs = (unsigned long)t;
1468
1469 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1470 if (!ret)
1471 return 0;
1472 ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1473 if (!ret)
1474 return 0;
1475 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1476 if (!ret)
1477 return 0;
1478
1479 switch (entry->type) {
1480 case TRACE_FN:
1481 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1482 if (!ret)
1483 return 0;
1484 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1485 entry->fn.parent_ip) {
1486 ret = trace_seq_printf(s, " <-");
1487 if (!ret)
1488 return 0;
1489 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1490 sym_flags);
1491 if (!ret)
1492 return 0;
1493 }
1494 ret = trace_seq_printf(s, "\n");
1495 if (!ret)
1496 return 0;
1497 break;
1498 case TRACE_CTX:
1499 case TRACE_WAKE:
1500 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1501 state_to_char[entry->ctx.prev_state] : 'X';
1502 T = entry->ctx.next_state < sizeof(state_to_char) ?
1503 state_to_char[entry->ctx.next_state] : 'X';
1504 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1505 entry->ctx.prev_pid,
1506 entry->ctx.prev_prio,
1507 S,
1508 entry->type == TRACE_CTX ? "==>" : " +",
1509 entry->ctx.next_pid,
1510 entry->ctx.next_prio,
1511 T);
1512 if (!ret)
1513 return 0;
1514 break;
1515 case TRACE_SPECIAL:
1516 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1517 entry->special.arg1,
1518 entry->special.arg2,
1519 entry->special.arg3);
1520 if (!ret)
1521 return 0;
1522 break;
1523 case TRACE_STACK:
1524 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1525 if (i) {
1526 ret = trace_seq_puts(s, " <= ");
1527 if (!ret)
1528 return 0;
1529 }
1530 ret = seq_print_ip_sym(s, entry->stack.caller[i],
1531 sym_flags);
1532 if (!ret)
1533 return 0;
1534 }
1535 ret = trace_seq_puts(s, "\n");
1536 if (!ret)
1537 return 0;
1538 break;
1539 }
1540 return 1;
1541}
1542
1543static int print_raw_fmt(struct trace_iterator *iter)
1544{
1545 struct trace_seq *s = &iter->seq;
1546 struct trace_entry *entry;
1547 int ret;
1548 int S, T;
1549
1550 entry = iter->ent;
1551
1552 ret = trace_seq_printf(s, "%d %d %llu ",
1553 entry->pid, iter->cpu, entry->t);
1554 if (!ret)
1555 return 0;
1556
1557 switch (entry->type) {
1558 case TRACE_FN:
1559 ret = trace_seq_printf(s, "%x %x\n",
1560 entry->fn.ip, entry->fn.parent_ip);
1561 if (!ret)
1562 return 0;
1563 break;
1564 case TRACE_CTX:
1565 case TRACE_WAKE:
1566 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1567 state_to_char[entry->ctx.prev_state] : 'X';
1568 T = entry->ctx.next_state < sizeof(state_to_char) ?
1569 state_to_char[entry->ctx.next_state] : 'X';
1570 if (entry->type == TRACE_WAKE)
1571 S = '+';
1572 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1573 entry->ctx.prev_pid,
1574 entry->ctx.prev_prio,
1575 S,
1576 entry->ctx.next_pid,
1577 entry->ctx.next_prio,
1578 T);
1579 if (!ret)
1580 return 0;
1581 break;
1582 case TRACE_SPECIAL:
1583 case TRACE_STACK:
1584 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1585 entry->special.arg1,
1586 entry->special.arg2,
1587 entry->special.arg3);
1588 if (!ret)
1589 return 0;
1590 break;
1591 }
1592 return 1;
1593}
1594
1595#define SEQ_PUT_FIELD_RET(s, x) \
1596do { \
1597 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
1598 return 0; \
1599} while (0)
1600
1601#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1602do { \
1603 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1604 return 0; \
1605} while (0)
1606
1607static int print_hex_fmt(struct trace_iterator *iter)
1608{
1609 struct trace_seq *s = &iter->seq;
1610 unsigned char newline = '\n';
1611 struct trace_entry *entry;
1612 int S, T;
1613
1614 entry = iter->ent;
1615
1616 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1617 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1618 SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1619
1620 switch (entry->type) {
1621 case TRACE_FN:
1622 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1623 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1624 break;
1625 case TRACE_CTX:
1626 case TRACE_WAKE:
1627 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1628 state_to_char[entry->ctx.prev_state] : 'X';
1629 T = entry->ctx.next_state < sizeof(state_to_char) ?
1630 state_to_char[entry->ctx.next_state] : 'X';
1631 if (entry->type == TRACE_WAKE)
1632 S = '+';
1633 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1634 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1635 SEQ_PUT_HEX_FIELD_RET(s, S);
1636 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1637 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1638 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1639 SEQ_PUT_HEX_FIELD_RET(s, T);
1640 break;
1641 case TRACE_SPECIAL:
1642 case TRACE_STACK:
1643 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1644 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1645 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1646 break;
1647 }
1648 SEQ_PUT_FIELD_RET(s, newline);
1649
1650 return 1;
1651}
1652
1653static int print_bin_fmt(struct trace_iterator *iter)
1654{
1655 struct trace_seq *s = &iter->seq;
1656 struct trace_entry *entry;
1657
1658 entry = iter->ent;
1659
1660 SEQ_PUT_FIELD_RET(s, entry->pid);
1661 SEQ_PUT_FIELD_RET(s, entry->cpu);
1662 SEQ_PUT_FIELD_RET(s, entry->t);
1663
1664 switch (entry->type) {
1665 case TRACE_FN:
1666 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1667 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1668 break;
1669 case TRACE_CTX:
1670 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1671 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1672 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1673 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1674 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1675 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1676 break;
1677 case TRACE_SPECIAL:
1678 case TRACE_STACK:
1679 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1680 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1681 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1682 break;
1683 }
1684 return 1;
1685}
1686
1687static int trace_empty(struct trace_iterator *iter)
1688{
1689 struct trace_array_cpu *data;
1690 int cpu;
1691
1692 for_each_tracing_cpu(cpu) {
1693 data = iter->tr->data[cpu];
1694
1695 if (head_page(data) && data->trace_idx &&
1696 (data->trace_tail != data->trace_head ||
1697 data->trace_tail_idx != data->trace_head_idx))
1698 return 0;
1699 }
1700 return 1;
1701}
1702
1703static int print_trace_line(struct trace_iterator *iter)
1704{
1705 if (iter->trace && iter->trace->print_line)
1706 return iter->trace->print_line(iter);
1707
1708 if (trace_flags & TRACE_ITER_BIN)
1709 return print_bin_fmt(iter);
1710
1711 if (trace_flags & TRACE_ITER_HEX)
1712 return print_hex_fmt(iter);
1713
1714 if (trace_flags & TRACE_ITER_RAW)
1715 return print_raw_fmt(iter);
1716
1717 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1718 return print_lat_fmt(iter, iter->idx, iter->cpu);
1719
1720 return print_trace_fmt(iter);
1721}
1722
1723static int s_show(struct seq_file *m, void *v)
1724{
1725 struct trace_iterator *iter = v;
1726
1727 if (iter->ent == NULL) {
1728 if (iter->tr) {
1729 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1730 seq_puts(m, "#\n");
1731 }
1732 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1733 /* print nothing if the buffers are empty */
1734 if (trace_empty(iter))
1735 return 0;
1736 print_trace_header(m, iter);
1737 if (!(trace_flags & TRACE_ITER_VERBOSE))
1738 print_lat_help_header(m);
1739 } else {
1740 if (!(trace_flags & TRACE_ITER_VERBOSE))
1741 print_func_help_header(m);
1742 }
1743 } else {
1744 print_trace_line(iter);
1745 trace_print_seq(m, &iter->seq);
1746 }
1747
1748 return 0;
1749}
1750
1751static struct seq_operations tracer_seq_ops = {
1752 .start = s_start,
1753 .next = s_next,
1754 .stop = s_stop,
1755 .show = s_show,
1756};
1757
1758static struct trace_iterator *
1759__tracing_open(struct inode *inode, struct file *file, int *ret)
1760{
1761 struct trace_iterator *iter;
1762
1763 if (tracing_disabled) {
1764 *ret = -ENODEV;
1765 return NULL;
1766 }
1767
1768 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1769 if (!iter) {
1770 *ret = -ENOMEM;
1771 goto out;
1772 }
1773
1774 mutex_lock(&trace_types_lock);
1775 if (current_trace && current_trace->print_max)
1776 iter->tr = &max_tr;
1777 else
1778 iter->tr = inode->i_private;
1779 iter->trace = current_trace;
1780 iter->pos = -1;
1781
1782 /* TODO stop tracer */
1783 *ret = seq_open(file, &tracer_seq_ops);
1784 if (!*ret) {
1785 struct seq_file *m = file->private_data;
1786 m->private = iter;
1787
1788 /* stop the trace while dumping */
1789 if (iter->tr->ctrl)
1790 tracer_enabled = 0;
1791
1792 if (iter->trace && iter->trace->open)
1793 iter->trace->open(iter);
1794 } else {
1795 kfree(iter);
1796 iter = NULL;
1797 }
1798 mutex_unlock(&trace_types_lock);
1799
1800 out:
1801 return iter;
1802}
1803
1804int tracing_open_generic(struct inode *inode, struct file *filp)
1805{
1806 if (tracing_disabled)
1807 return -ENODEV;
1808
1809 filp->private_data = inode->i_private;
1810 return 0;
1811}
1812
1813int tracing_release(struct inode *inode, struct file *file)
1814{
1815 struct seq_file *m = (struct seq_file *)file->private_data;
1816 struct trace_iterator *iter = m->private;
1817
1818 mutex_lock(&trace_types_lock);
1819 if (iter->trace && iter->trace->close)
1820 iter->trace->close(iter);
1821
1822 /* reenable tracing if it was previously enabled */
1823 if (iter->tr->ctrl)
1824 tracer_enabled = 1;
1825 mutex_unlock(&trace_types_lock);
1826
1827 seq_release(inode, file);
1828 kfree(iter);
1829 return 0;
1830}
1831
1832static int tracing_open(struct inode *inode, struct file *file)
1833{
1834 int ret;
1835
1836 __tracing_open(inode, file, &ret);
1837
1838 return ret;
1839}
1840
1841static int tracing_lt_open(struct inode *inode, struct file *file)
1842{
1843 struct trace_iterator *iter;
1844 int ret;
1845
1846 iter = __tracing_open(inode, file, &ret);
1847
1848 if (!ret)
1849 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1850
1851 return ret;
1852}
1853
1854
1855static void *
1856t_next(struct seq_file *m, void *v, loff_t *pos)
1857{
1858 struct tracer *t = m->private;
1859
1860 (*pos)++;
1861
1862 if (t)
1863 t = t->next;
1864
1865 m->private = t;
1866
1867 return t;
1868}
1869
1870static void *t_start(struct seq_file *m, loff_t *pos)
1871{
1872 struct tracer *t = m->private;
1873 loff_t l = 0;
1874
1875 mutex_lock(&trace_types_lock);
1876 for (; t && l < *pos; t = t_next(m, t, &l))
1877 ;
1878
1879 return t;
1880}
1881
1882static void t_stop(struct seq_file *m, void *p)
1883{
1884 mutex_unlock(&trace_types_lock);
1885}
1886
1887static int t_show(struct seq_file *m, void *v)
1888{
1889 struct tracer *t = v;
1890
1891 if (!t)
1892 return 0;
1893
1894 seq_printf(m, "%s", t->name);
1895 if (t->next)
1896 seq_putc(m, ' ');
1897 else
1898 seq_putc(m, '\n');
1899
1900 return 0;
1901}
1902
1903static struct seq_operations show_traces_seq_ops = {
1904 .start = t_start,
1905 .next = t_next,
1906 .stop = t_stop,
1907 .show = t_show,
1908};
1909
1910static int show_traces_open(struct inode *inode, struct file *file)
1911{
1912 int ret;
1913
1914 if (tracing_disabled)
1915 return -ENODEV;
1916
1917 ret = seq_open(file, &show_traces_seq_ops);
1918 if (!ret) {
1919 struct seq_file *m = file->private_data;
1920 m->private = trace_types;
1921 }
1922
1923 return ret;
1924}
1925
1926static struct file_operations tracing_fops = {
1927 .open = tracing_open,
1928 .read = seq_read,
1929 .llseek = seq_lseek,
1930 .release = tracing_release,
1931};
1932
1933static struct file_operations tracing_lt_fops = {
1934 .open = tracing_lt_open,
1935 .read = seq_read,
1936 .llseek = seq_lseek,
1937 .release = tracing_release,
1938};
1939
1940static struct file_operations show_traces_fops = {
1941 .open = show_traces_open,
1942 .read = seq_read,
1943 .release = seq_release,
1944};
1945
1946/*
1947 * Only trace on a CPU if the bitmask is set:
1948 */
1949static cpumask_t tracing_cpumask = CPU_MASK_ALL;
1950
1951/*
1952 * When tracing/tracing_cpu_mask is modified then this holds
1953 * the new bitmask we are about to install:
1954 */
1955static cpumask_t tracing_cpumask_new;
1956
1957/*
1958 * The tracer itself will not take this lock, but still we want
1959 * to provide a consistent cpumask to user-space:
1960 */
1961static DEFINE_MUTEX(tracing_cpumask_update_lock);
1962
1963/*
1964 * Temporary storage for the character representation of the
1965 * CPU bitmask (and one more byte for the newline):
1966 */
1967static char mask_str[NR_CPUS + 1];
1968
1969static ssize_t
1970tracing_cpumask_read(struct file *filp, char __user *ubuf,
1971 size_t count, loff_t *ppos)
1972{
1973 int len;
1974
1975 mutex_lock(&tracing_cpumask_update_lock);
1976
1977 len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
1978 if (count - len < 2) {
1979 count = -EINVAL;
1980 goto out_err;
1981 }
1982 len += sprintf(mask_str + len, "\n");
1983 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
1984
1985out_err:
1986 mutex_unlock(&tracing_cpumask_update_lock);
1987
1988 return count;
1989}
1990
1991static ssize_t
1992tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1993 size_t count, loff_t *ppos)
1994{
1995 int err, cpu;
1996
1997 mutex_lock(&tracing_cpumask_update_lock);
1998 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
1999 if (err)
2000 goto err_unlock;
2001
2002 raw_local_irq_disable();
2003 __raw_spin_lock(&ftrace_max_lock);
2004 for_each_tracing_cpu(cpu) {
2005 /*
2006 * Increase/decrease the disabled counter if we are
2007 * about to flip a bit in the cpumask:
2008 */
2009 if (cpu_isset(cpu, tracing_cpumask) &&
2010 !cpu_isset(cpu, tracing_cpumask_new)) {
2011 atomic_inc(&global_trace.data[cpu]->disabled);
2012 }
2013 if (!cpu_isset(cpu, tracing_cpumask) &&
2014 cpu_isset(cpu, tracing_cpumask_new)) {
2015 atomic_dec(&global_trace.data[cpu]->disabled);
2016 }
2017 }
2018 __raw_spin_unlock(&ftrace_max_lock);
2019 raw_local_irq_enable();
2020
2021 tracing_cpumask = tracing_cpumask_new;
2022
2023 mutex_unlock(&tracing_cpumask_update_lock);
2024
2025 return count;
2026
2027err_unlock:
2028 mutex_unlock(&tracing_cpumask_update_lock);
2029
2030 return err;
2031}
2032
2033static struct file_operations tracing_cpumask_fops = {
2034 .open = tracing_open_generic,
2035 .read = tracing_cpumask_read,
2036 .write = tracing_cpumask_write,
2037};
2038
2039static ssize_t
2040tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2041 size_t cnt, loff_t *ppos)
2042{
2043 char *buf;
2044 int r = 0;
2045 int len = 0;
2046 int i;
2047
2048 /* calulate max size */
2049 for (i = 0; trace_options[i]; i++) {
2050 len += strlen(trace_options[i]);
2051 len += 3; /* "no" and space */
2052 }
2053
2054 /* +2 for \n and \0 */
2055 buf = kmalloc(len + 2, GFP_KERNEL);
2056 if (!buf)
2057 return -ENOMEM;
2058
2059 for (i = 0; trace_options[i]; i++) {
2060 if (trace_flags & (1 << i))
2061 r += sprintf(buf + r, "%s ", trace_options[i]);
2062 else
2063 r += sprintf(buf + r, "no%s ", trace_options[i]);
2064 }
2065
2066 r += sprintf(buf + r, "\n");
2067 WARN_ON(r >= len + 2);
2068
2069 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2070
2071 kfree(buf);
2072
2073 return r;
2074}
2075
2076static ssize_t
2077tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2078 size_t cnt, loff_t *ppos)
2079{
2080 char buf[64];
2081 char *cmp = buf;
2082 int neg = 0;
2083 int i;
2084
2085 if (cnt >= sizeof(buf))
2086 return -EINVAL;
2087
2088 if (copy_from_user(&buf, ubuf, cnt))
2089 return -EFAULT;
2090
2091 buf[cnt] = 0;
2092
2093 if (strncmp(buf, "no", 2) == 0) {
2094 neg = 1;
2095 cmp += 2;
2096 }
2097
2098 for (i = 0; trace_options[i]; i++) {
2099 int len = strlen(trace_options[i]);
2100
2101 if (strncmp(cmp, trace_options[i], len) == 0) {
2102 if (neg)
2103 trace_flags &= ~(1 << i);
2104 else
2105 trace_flags |= (1 << i);
2106 break;
2107 }
2108 }
2109 /*
2110 * If no option could be set, return an error:
2111 */
2112 if (!trace_options[i])
2113 return -EINVAL;
2114
2115 filp->f_pos += cnt;
2116
2117 return cnt;
2118}
2119
2120static struct file_operations tracing_iter_fops = {
2121 .open = tracing_open_generic,
2122 .read = tracing_iter_ctrl_read,
2123 .write = tracing_iter_ctrl_write,
2124};
2125
2126static const char readme_msg[] =
2127 "tracing mini-HOWTO:\n\n"
2128 "# mkdir /debug\n"
2129 "# mount -t debugfs nodev /debug\n\n"
2130 "# cat /debug/tracing/available_tracers\n"
2131 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2132 "# cat /debug/tracing/current_tracer\n"
2133 "none\n"
2134 "# echo sched_switch > /debug/tracing/current_tracer\n"
2135 "# cat /debug/tracing/current_tracer\n"
2136 "sched_switch\n"
2137 "# cat /debug/tracing/iter_ctrl\n"
2138 "noprint-parent nosym-offset nosym-addr noverbose\n"
2139 "# echo print-parent > /debug/tracing/iter_ctrl\n"
2140 "# echo 1 > /debug/tracing/tracing_enabled\n"
2141 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2142 "echo 0 > /debug/tracing/tracing_enabled\n"
2143;
2144
2145static ssize_t
2146tracing_readme_read(struct file *filp, char __user *ubuf,
2147 size_t cnt, loff_t *ppos)
2148{
2149 return simple_read_from_buffer(ubuf, cnt, ppos,
2150 readme_msg, strlen(readme_msg));
2151}
2152
2153static struct file_operations tracing_readme_fops = {
2154 .open = tracing_open_generic,
2155 .read = tracing_readme_read,
2156};
2157
2158static ssize_t
2159tracing_ctrl_read(struct file *filp, char __user *ubuf,
2160 size_t cnt, loff_t *ppos)
2161{
2162 struct trace_array *tr = filp->private_data;
2163 char buf[64];
2164 int r;
2165
2166 r = sprintf(buf, "%ld\n", tr->ctrl);
2167 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2168}
2169
2170static ssize_t
2171tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2172 size_t cnt, loff_t *ppos)
2173{
2174 struct trace_array *tr = filp->private_data;
2175 char buf[64];
2176 long val;
2177 int ret;
2178
2179 if (cnt >= sizeof(buf))
2180 return -EINVAL;
2181
2182 if (copy_from_user(&buf, ubuf, cnt))
2183 return -EFAULT;
2184
2185 buf[cnt] = 0;
2186
2187 ret = strict_strtoul(buf, 10, &val);
2188 if (ret < 0)
2189 return ret;
2190
2191 val = !!val;
2192
2193 mutex_lock(&trace_types_lock);
2194 if (tr->ctrl ^ val) {
2195 if (val)
2196 tracer_enabled = 1;
2197 else
2198 tracer_enabled = 0;
2199
2200 tr->ctrl = val;
2201
2202 if (current_trace && current_trace->ctrl_update)
2203 current_trace->ctrl_update(tr);
2204 }
2205 mutex_unlock(&trace_types_lock);
2206
2207 filp->f_pos += cnt;
2208
2209 return cnt;
2210}
2211
2212static ssize_t
2213tracing_set_trace_read(struct file *filp, char __user *ubuf,
2214 size_t cnt, loff_t *ppos)
2215{
2216 char buf[max_tracer_type_len+2];
2217 int r;
2218
2219 mutex_lock(&trace_types_lock);
2220 if (current_trace)
2221 r = sprintf(buf, "%s\n", current_trace->name);
2222 else
2223 r = sprintf(buf, "\n");
2224 mutex_unlock(&trace_types_lock);
2225
2226 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2227}
2228
2229static ssize_t
2230tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2231 size_t cnt, loff_t *ppos)
2232{
2233 struct trace_array *tr = &global_trace;
2234 struct tracer *t;
2235 char buf[max_tracer_type_len+1];
2236 int i;
2237
2238 if (cnt > max_tracer_type_len)
2239 cnt = max_tracer_type_len;
2240
2241 if (copy_from_user(&buf, ubuf, cnt))
2242 return -EFAULT;
2243
2244 buf[cnt] = 0;
2245
2246 /* strip ending whitespace. */
2247 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2248 buf[i] = 0;
2249
2250 mutex_lock(&trace_types_lock);
2251 for (t = trace_types; t; t = t->next) {
2252 if (strcmp(t->name, buf) == 0)
2253 break;
2254 }
2255 if (!t || t == current_trace)
2256 goto out;
2257
2258 if (current_trace && current_trace->reset)
2259 current_trace->reset(tr);
2260
2261 current_trace = t;
2262 if (t->init)
2263 t->init(tr);
2264
2265 out:
2266 mutex_unlock(&trace_types_lock);
2267
2268 filp->f_pos += cnt;
2269
2270 return cnt;
2271}
2272
2273static ssize_t
2274tracing_max_lat_read(struct file *filp, char __user *ubuf,
2275 size_t cnt, loff_t *ppos)
2276{
2277 unsigned long *ptr = filp->private_data;
2278 char buf[64];
2279 int r;
2280
2281 r = snprintf(buf, sizeof(buf), "%ld\n",
2282 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2283 if (r > sizeof(buf))
2284 r = sizeof(buf);
2285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2286}
2287
2288static ssize_t
2289tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2290 size_t cnt, loff_t *ppos)
2291{
2292 long *ptr = filp->private_data;
2293 char buf[64];
2294 long val;
2295 int ret;
2296
2297 if (cnt >= sizeof(buf))
2298 return -EINVAL;
2299
2300 if (copy_from_user(&buf, ubuf, cnt))
2301 return -EFAULT;
2302
2303 buf[cnt] = 0;
2304
2305 ret = strict_strtoul(buf, 10, &val);
2306 if (ret < 0)
2307 return ret;
2308
2309 *ptr = val * 1000;
2310
2311 return cnt;
2312}
2313
2314static atomic_t tracing_reader;
2315
2316static int tracing_open_pipe(struct inode *inode, struct file *filp)
2317{
2318 struct trace_iterator *iter;
2319
2320 if (tracing_disabled)
2321 return -ENODEV;
2322
2323 /* We only allow for reader of the pipe */
2324 if (atomic_inc_return(&tracing_reader) != 1) {
2325 atomic_dec(&tracing_reader);
2326 return -EBUSY;
2327 }
2328
2329 /* create a buffer to store the information to pass to userspace */
2330 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2331 if (!iter)
2332 return -ENOMEM;
2333
2334 mutex_lock(&trace_types_lock);
2335 iter->tr = &global_trace;
2336 iter->trace = current_trace;
2337 filp->private_data = iter;
2338
2339 if (iter->trace->pipe_open)
2340 iter->trace->pipe_open(iter);
2341 mutex_unlock(&trace_types_lock);
2342
2343 return 0;
2344}
2345
2346static int tracing_release_pipe(struct inode *inode, struct file *file)
2347{
2348 struct trace_iterator *iter = file->private_data;
2349
2350 kfree(iter);
2351 atomic_dec(&tracing_reader);
2352
2353 return 0;
2354}
2355
2356static unsigned int
2357tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2358{
2359 struct trace_iterator *iter = filp->private_data;
2360
2361 if (trace_flags & TRACE_ITER_BLOCK) {
2362 /*
2363 * Always select as readable when in blocking mode
2364 */
2365 return POLLIN | POLLRDNORM;
2366 } else {
2367 if (!trace_empty(iter))
2368 return POLLIN | POLLRDNORM;
2369 poll_wait(filp, &trace_wait, poll_table);
2370 if (!trace_empty(iter))
2371 return POLLIN | POLLRDNORM;
2372
2373 return 0;
2374 }
2375}
2376
2377/*
2378 * Consumer reader.
2379 */
2380static ssize_t
2381tracing_read_pipe(struct file *filp, char __user *ubuf,
2382 size_t cnt, loff_t *ppos)
2383{
2384 struct trace_iterator *iter = filp->private_data;
2385 struct trace_array_cpu *data;
2386 static cpumask_t mask;
2387 unsigned long flags;
2388#ifdef CONFIG_FTRACE
2389 int ftrace_save;
2390#endif
2391 int cpu;
2392 ssize_t sret;
2393
2394 /* return any leftover data */
2395 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2396 if (sret != -EBUSY)
2397 return sret;
2398 sret = 0;
2399
2400 trace_seq_reset(&iter->seq);
2401
2402 mutex_lock(&trace_types_lock);
2403 if (iter->trace->read) {
2404 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2405 if (sret)
2406 goto out;
2407 }
2408
2409 while (trace_empty(iter)) {
2410
2411 if ((filp->f_flags & O_NONBLOCK)) {
2412 sret = -EAGAIN;
2413 goto out;
2414 }
2415
2416 /*
2417 * This is a make-shift waitqueue. The reason we don't use
2418 * an actual wait queue is because:
2419 * 1) we only ever have one waiter
2420 * 2) the tracing, traces all functions, we don't want
2421 * the overhead of calling wake_up and friends
2422 * (and tracing them too)
2423 * Anyway, this is really very primitive wakeup.
2424 */
2425 set_current_state(TASK_INTERRUPTIBLE);
2426 iter->tr->waiter = current;
2427
2428 mutex_unlock(&trace_types_lock);
2429
2430 /* sleep for 100 msecs, and try again. */
2431 schedule_timeout(HZ/10);
2432
2433 mutex_lock(&trace_types_lock);
2434
2435 iter->tr->waiter = NULL;
2436
2437 if (signal_pending(current)) {
2438 sret = -EINTR;
2439 goto out;
2440 }
2441
2442 if (iter->trace != current_trace)
2443 goto out;
2444
2445 /*
2446 * We block until we read something and tracing is disabled.
2447 * We still block if tracing is disabled, but we have never
2448 * read anything. This allows a user to cat this file, and
2449 * then enable tracing. But after we have read something,
2450 * we give an EOF when tracing is again disabled.
2451 *
2452 * iter->pos will be 0 if we haven't read anything.
2453 */
2454 if (!tracer_enabled && iter->pos)
2455 break;
2456
2457 continue;
2458 }
2459
2460 /* stop when tracing is finished */
2461 if (trace_empty(iter))
2462 goto out;
2463
2464 if (cnt >= PAGE_SIZE)
2465 cnt = PAGE_SIZE - 1;
2466
2467 /* reset all but tr, trace, and overruns */
2468 memset(&iter->seq, 0,
2469 sizeof(struct trace_iterator) -
2470 offsetof(struct trace_iterator, seq));
2471 iter->pos = -1;
2472
2473 /*
2474 * We need to stop all tracing on all CPUS to read the
2475 * the next buffer. This is a bit expensive, but is
2476 * not done often. We fill all what we can read,
2477 * and then release the locks again.
2478 */
2479
2480 cpus_clear(mask);
2481 local_irq_save(flags);
2482#ifdef CONFIG_FTRACE
2483 ftrace_save = ftrace_enabled;
2484 ftrace_enabled = 0;
2485#endif
2486 smp_wmb();
2487 for_each_tracing_cpu(cpu) {
2488 data = iter->tr->data[cpu];
2489
2490 if (!head_page(data) || !data->trace_idx)
2491 continue;
2492
2493 atomic_inc(&data->disabled);
2494 cpu_set(cpu, mask);
2495 }
2496
2497 for_each_cpu_mask(cpu, mask) {
2498 data = iter->tr->data[cpu];
2499 __raw_spin_lock(&data->lock);
2500
2501 if (data->overrun > iter->last_overrun[cpu])
2502 iter->overrun[cpu] +=
2503 data->overrun - iter->last_overrun[cpu];
2504 iter->last_overrun[cpu] = data->overrun;
2505 }
2506
2507 while (find_next_entry_inc(iter) != NULL) {
2508 int ret;
2509 int len = iter->seq.len;
2510
2511 ret = print_trace_line(iter);
2512 if (!ret) {
2513 /* don't print partial lines */
2514 iter->seq.len = len;
2515 break;
2516 }
2517
2518 trace_consume(iter);
2519
2520 if (iter->seq.len >= cnt)
2521 break;
2522 }
2523
2524 for_each_cpu_mask(cpu, mask) {
2525 data = iter->tr->data[cpu];
2526 __raw_spin_unlock(&data->lock);
2527 }
2528
2529 for_each_cpu_mask(cpu, mask) {
2530 data = iter->tr->data[cpu];
2531 atomic_dec(&data->disabled);
2532 }
2533#ifdef CONFIG_FTRACE
2534 ftrace_enabled = ftrace_save;
2535#endif
2536 local_irq_restore(flags);
2537
2538 /* Now copy what we have to the user */
2539 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2540 if (iter->seq.readpos >= iter->seq.len)
2541 trace_seq_reset(&iter->seq);
2542 if (sret == -EBUSY)
2543 sret = 0;
2544
2545out:
2546 mutex_unlock(&trace_types_lock);
2547
2548 return sret;
2549}
2550
2551static ssize_t
2552tracing_entries_read(struct file *filp, char __user *ubuf,
2553 size_t cnt, loff_t *ppos)
2554{
2555 struct trace_array *tr = filp->private_data;
2556 char buf[64];
2557 int r;
2558
2559 r = sprintf(buf, "%lu\n", tr->entries);
2560 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2561}
2562
2563static ssize_t
2564tracing_entries_write(struct file *filp, const char __user *ubuf,
2565 size_t cnt, loff_t *ppos)
2566{
2567 unsigned long val;
2568 char buf[64];
2569 int ret;
2570
2571 if (cnt >= sizeof(buf))
2572 return -EINVAL;
2573
2574 if (copy_from_user(&buf, ubuf, cnt))
2575 return -EFAULT;
2576
2577 buf[cnt] = 0;
2578
2579 ret = strict_strtoul(buf, 10, &val);
2580 if (ret < 0)
2581 return ret;
2582
2583 /* must have at least 1 entry */
2584 if (!val)
2585 return -EINVAL;
2586
2587 mutex_lock(&trace_types_lock);
2588
2589 if (current_trace != &no_tracer) {
2590 cnt = -EBUSY;
2591 pr_info("ftrace: set current_tracer to none"
2592 " before modifying buffer size\n");
2593 goto out;
2594 }
2595
2596 if (val > global_trace.entries) {
2597 long pages_requested;
2598 unsigned long freeable_pages;
2599
2600 /* make sure we have enough memory before mapping */
2601 pages_requested =
2602 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2603
2604 /* account for each buffer (and max_tr) */
2605 pages_requested *= tracing_nr_buffers * 2;
2606
2607 /* Check for overflow */
2608 if (pages_requested < 0) {
2609 cnt = -ENOMEM;
2610 goto out;
2611 }
2612
2613 freeable_pages = determine_dirtyable_memory();
2614
2615 /* we only allow to request 1/4 of useable memory */
2616 if (pages_requested >
2617 ((freeable_pages + tracing_pages_allocated) / 4)) {
2618 cnt = -ENOMEM;
2619 goto out;
2620 }
2621
2622 while (global_trace.entries < val) {
2623 if (trace_alloc_page()) {
2624 cnt = -ENOMEM;
2625 goto out;
2626 }
2627 /* double check that we don't go over the known pages */
2628 if (tracing_pages_allocated > pages_requested)
2629 break;
2630 }
2631
2632 } else {
2633 /* include the number of entries in val (inc of page entries) */
2634 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2635 trace_free_page();
2636 }
2637
2638 filp->f_pos += cnt;
2639
2640 out:
2641 max_tr.entries = global_trace.entries;
2642 mutex_unlock(&trace_types_lock);
2643
2644 return cnt;
2645}
2646
2647static struct file_operations tracing_max_lat_fops = {
2648 .open = tracing_open_generic,
2649 .read = tracing_max_lat_read,
2650 .write = tracing_max_lat_write,
2651};
2652
2653static struct file_operations tracing_ctrl_fops = {
2654 .open = tracing_open_generic,
2655 .read = tracing_ctrl_read,
2656 .write = tracing_ctrl_write,
2657};
2658
2659static struct file_operations set_tracer_fops = {
2660 .open = tracing_open_generic,
2661 .read = tracing_set_trace_read,
2662 .write = tracing_set_trace_write,
2663};
2664
2665static struct file_operations tracing_pipe_fops = {
2666 .open = tracing_open_pipe,
2667 .poll = tracing_poll_pipe,
2668 .read = tracing_read_pipe,
2669 .release = tracing_release_pipe,
2670};
2671
2672static struct file_operations tracing_entries_fops = {
2673 .open = tracing_open_generic,
2674 .read = tracing_entries_read,
2675 .write = tracing_entries_write,
2676};
2677
2678#ifdef CONFIG_DYNAMIC_FTRACE
2679
2680static ssize_t
2681tracing_read_long(struct file *filp, char __user *ubuf,
2682 size_t cnt, loff_t *ppos)
2683{
2684 unsigned long *p = filp->private_data;
2685 char buf[64];
2686 int r;
2687
2688 r = sprintf(buf, "%ld\n", *p);
2689
2690 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2691}
2692
2693static struct file_operations tracing_read_long_fops = {
2694 .open = tracing_open_generic,
2695 .read = tracing_read_long,
2696};
2697#endif
2698
2699static struct dentry *d_tracer;
2700
2701struct dentry *tracing_init_dentry(void)
2702{
2703 static int once;
2704
2705 if (d_tracer)
2706 return d_tracer;
2707
2708 d_tracer = debugfs_create_dir("tracing", NULL);
2709
2710 if (!d_tracer && !once) {
2711 once = 1;
2712 pr_warning("Could not create debugfs directory 'tracing'\n");
2713 return NULL;
2714 }
2715
2716 return d_tracer;
2717}
2718
2719#ifdef CONFIG_FTRACE_SELFTEST
2720/* Let selftest have access to static functions in this file */
2721#include "trace_selftest.c"
2722#endif
2723
2724static __init void tracer_init_debugfs(void)
2725{
2726 struct dentry *d_tracer;
2727 struct dentry *entry;
2728
2729 d_tracer = tracing_init_dentry();
2730
2731 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2732 &global_trace, &tracing_ctrl_fops);
2733 if (!entry)
2734 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2735
2736 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2737 NULL, &tracing_iter_fops);
2738 if (!entry)
2739 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2740
2741 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2742 NULL, &tracing_cpumask_fops);
2743 if (!entry)
2744 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2745
2746 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2747 &global_trace, &tracing_lt_fops);
2748 if (!entry)
2749 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2750
2751 entry = debugfs_create_file("trace", 0444, d_tracer,
2752 &global_trace, &tracing_fops);
2753 if (!entry)
2754 pr_warning("Could not create debugfs 'trace' entry\n");
2755
2756 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2757 &global_trace, &show_traces_fops);
2758 if (!entry)
2759 pr_warning("Could not create debugfs 'trace' entry\n");
2760
2761 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2762 &global_trace, &set_tracer_fops);
2763 if (!entry)
2764 pr_warning("Could not create debugfs 'trace' entry\n");
2765
2766 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2767 &tracing_max_latency,
2768 &tracing_max_lat_fops);
2769 if (!entry)
2770 pr_warning("Could not create debugfs "
2771 "'tracing_max_latency' entry\n");
2772
2773 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2774 &tracing_thresh, &tracing_max_lat_fops);
2775 if (!entry)
2776 pr_warning("Could not create debugfs "
2777 "'tracing_threash' entry\n");
2778 entry = debugfs_create_file("README", 0644, d_tracer,
2779 NULL, &tracing_readme_fops);
2780 if (!entry)
2781 pr_warning("Could not create debugfs 'README' entry\n");
2782
2783 entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2784 NULL, &tracing_pipe_fops);
2785 if (!entry)
2786 pr_warning("Could not create debugfs "
2787 "'tracing_threash' entry\n");
2788
2789 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2790 &global_trace, &tracing_entries_fops);
2791 if (!entry)
2792 pr_warning("Could not create debugfs "
2793 "'tracing_threash' entry\n");
2794
2795#ifdef CONFIG_DYNAMIC_FTRACE
2796 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2797 &ftrace_update_tot_cnt,
2798 &tracing_read_long_fops);
2799 if (!entry)
2800 pr_warning("Could not create debugfs "
2801 "'dyn_ftrace_total_info' entry\n");
2802#endif
2803#ifdef CONFIG_SYSPROF_TRACER
2804 init_tracer_sysprof_debugfs(d_tracer);
2805#endif
2806}
2807
2808static int trace_alloc_page(void)
2809{
2810 struct trace_array_cpu *data;
2811 struct page *page, *tmp;
2812 LIST_HEAD(pages);
2813 void *array;
2814 unsigned pages_allocated = 0;
2815 int i;
2816
2817 /* first allocate a page for each CPU */
2818 for_each_tracing_cpu(i) {
2819 array = (void *)__get_free_page(GFP_KERNEL);
2820 if (array == NULL) {
2821 printk(KERN_ERR "tracer: failed to allocate page"
2822 "for trace buffer!\n");
2823 goto free_pages;
2824 }
2825
2826 pages_allocated++;
2827 page = virt_to_page(array);
2828 list_add(&page->lru, &pages);
2829
2830/* Only allocate if we are actually using the max trace */
2831#ifdef CONFIG_TRACER_MAX_TRACE
2832 array = (void *)__get_free_page(GFP_KERNEL);
2833 if (array == NULL) {
2834 printk(KERN_ERR "tracer: failed to allocate page"
2835 "for trace buffer!\n");
2836 goto free_pages;
2837 }
2838 pages_allocated++;
2839 page = virt_to_page(array);
2840 list_add(&page->lru, &pages);
2841#endif
2842 }
2843
2844 /* Now that we successfully allocate a page per CPU, add them */
2845 for_each_tracing_cpu(i) {
2846 data = global_trace.data[i];
2847 page = list_entry(pages.next, struct page, lru);
2848 list_del_init(&page->lru);
2849 list_add_tail(&page->lru, &data->trace_pages);
2850 ClearPageLRU(page);
2851
2852#ifdef CONFIG_TRACER_MAX_TRACE
2853 data = max_tr.data[i];
2854 page = list_entry(pages.next, struct page, lru);
2855 list_del_init(&page->lru);
2856 list_add_tail(&page->lru, &data->trace_pages);
2857 SetPageLRU(page);
2858#endif
2859 }
2860 tracing_pages_allocated += pages_allocated;
2861 global_trace.entries += ENTRIES_PER_PAGE;
2862
2863 return 0;
2864
2865 free_pages:
2866 list_for_each_entry_safe(page, tmp, &pages, lru) {
2867 list_del_init(&page->lru);
2868 __free_page(page);
2869 }
2870 return -ENOMEM;
2871}
2872
2873static int trace_free_page(void)
2874{
2875 struct trace_array_cpu *data;
2876 struct page *page;
2877 struct list_head *p;
2878 int i;
2879 int ret = 0;
2880
2881 /* free one page from each buffer */
2882 for_each_tracing_cpu(i) {
2883 data = global_trace.data[i];
2884 p = data->trace_pages.next;
2885 if (p == &data->trace_pages) {
2886 /* should never happen */
2887 WARN_ON(1);
2888 tracing_disabled = 1;
2889 ret = -1;
2890 break;
2891 }
2892 page = list_entry(p, struct page, lru);
2893 ClearPageLRU(page);
2894 list_del(&page->lru);
2895 tracing_pages_allocated--;
2896 tracing_pages_allocated--;
2897 __free_page(page);
2898
2899 tracing_reset(data);
2900
2901#ifdef CONFIG_TRACER_MAX_TRACE
2902 data = max_tr.data[i];
2903 p = data->trace_pages.next;
2904 if (p == &data->trace_pages) {
2905 /* should never happen */
2906 WARN_ON(1);
2907 tracing_disabled = 1;
2908 ret = -1;
2909 break;
2910 }
2911 page = list_entry(p, struct page, lru);
2912 ClearPageLRU(page);
2913 list_del(&page->lru);
2914 __free_page(page);
2915
2916 tracing_reset(data);
2917#endif
2918 }
2919 global_trace.entries -= ENTRIES_PER_PAGE;
2920
2921 return ret;
2922}
2923
2924__init static int tracer_alloc_buffers(void)
2925{
2926 struct trace_array_cpu *data;
2927 void *array;
2928 struct page *page;
2929 int pages = 0;
2930 int ret = -ENOMEM;
2931 int i;
2932
2933 global_trace.ctrl = tracer_enabled;
2934
2935 /* TODO: make the number of buffers hot pluggable with CPUS */
2936 tracing_nr_buffers = num_possible_cpus();
2937 tracing_buffer_mask = cpu_possible_map;
2938
2939 /* Allocate the first page for all buffers */
2940 for_each_tracing_cpu(i) {
2941 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
2942 max_tr.data[i] = &per_cpu(max_data, i);
2943
2944 array = (void *)__get_free_page(GFP_KERNEL);
2945 if (array == NULL) {
2946 printk(KERN_ERR "tracer: failed to allocate page"
2947 "for trace buffer!\n");
2948 goto free_buffers;
2949 }
2950
2951 /* set the array to the list */
2952 INIT_LIST_HEAD(&data->trace_pages);
2953 page = virt_to_page(array);
2954 list_add(&page->lru, &data->trace_pages);
2955 /* use the LRU flag to differentiate the two buffers */
2956 ClearPageLRU(page);
2957
2958 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
2959 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
2960
2961/* Only allocate if we are actually using the max trace */
2962#ifdef CONFIG_TRACER_MAX_TRACE
2963 array = (void *)__get_free_page(GFP_KERNEL);
2964 if (array == NULL) {
2965 printk(KERN_ERR "tracer: failed to allocate page"
2966 "for trace buffer!\n");
2967 goto free_buffers;
2968 }
2969
2970 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
2971 page = virt_to_page(array);
2972 list_add(&page->lru, &max_tr.data[i]->trace_pages);
2973 SetPageLRU(page);
2974#endif
2975 }
2976
2977 /*
2978 * Since we allocate by orders of pages, we may be able to
2979 * round up a bit.
2980 */
2981 global_trace.entries = ENTRIES_PER_PAGE;
2982 pages++;
2983
2984 while (global_trace.entries < trace_nr_entries) {
2985 if (trace_alloc_page())
2986 break;
2987 pages++;
2988 }
2989 max_tr.entries = global_trace.entries;
2990
2991 pr_info("tracer: %d pages allocated for %ld",
2992 pages, trace_nr_entries);
2993 pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
2994 pr_info(" actual entries %ld\n", global_trace.entries);
2995
2996 tracer_init_debugfs();
2997
2998 trace_init_cmdlines();
2999
3000 register_tracer(&no_tracer);
3001 current_trace = &no_tracer;
3002
3003 /* All seems OK, enable tracing */
3004 tracing_disabled = 0;
3005
3006 return 0;
3007
3008 free_buffers:
3009 for (i-- ; i >= 0; i--) {
3010 struct page *page, *tmp;
3011 struct trace_array_cpu *data = global_trace.data[i];
3012
3013 if (data) {
3014 list_for_each_entry_safe(page, tmp,
3015 &data->trace_pages, lru) {
3016 list_del_init(&page->lru);
3017 __free_page(page);
3018 }
3019 }
3020
3021#ifdef CONFIG_TRACER_MAX_TRACE
3022 data = max_tr.data[i];
3023 if (data) {
3024 list_for_each_entry_safe(page, tmp,
3025 &data->trace_pages, lru) {
3026 list_del_init(&page->lru);
3027 __free_page(page);
3028 }
3029 }
3030#endif
3031 }
3032 return ret;
3033}
3034fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 000000000000..b7f85d9c80d7
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,319 @@
1#ifndef _LINUX_KERNEL_TRACE_H
2#define _LINUX_KERNEL_TRACE_H
3
4#include <linux/fs.h>
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/clocksource.h>
8
9enum trace_type {
10 __TRACE_FIRST_TYPE = 0,
11
12 TRACE_FN,
13 TRACE_CTX,
14 TRACE_WAKE,
15 TRACE_STACK,
16 TRACE_SPECIAL,
17
18 __TRACE_LAST_TYPE
19};
20
21/*
22 * Function trace entry - function address and parent function addres:
23 */
24struct ftrace_entry {
25 unsigned long ip;
26 unsigned long parent_ip;
27};
28
29/*
30 * Context switch trace entry - which task (and prio) we switched from/to:
31 */
32struct ctx_switch_entry {
33 unsigned int prev_pid;
34 unsigned char prev_prio;
35 unsigned char prev_state;
36 unsigned int next_pid;
37 unsigned char next_prio;
38 unsigned char next_state;
39};
40
41/*
42 * Special (free-form) trace entry:
43 */
44struct special_entry {
45 unsigned long arg1;
46 unsigned long arg2;
47 unsigned long arg3;
48};
49
50/*
51 * Stack-trace entry:
52 */
53
54#define FTRACE_STACK_ENTRIES 8
55
56struct stack_entry {
57 unsigned long caller[FTRACE_STACK_ENTRIES];
58};
59
60/*
61 * The trace entry - the most basic unit of tracing. This is what
62 * is printed in the end as a single line in the trace output, such as:
63 *
64 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
65 */
66struct trace_entry {
67 char type;
68 char cpu;
69 char flags;
70 char preempt_count;
71 int pid;
72 cycle_t t;
73 union {
74 struct ftrace_entry fn;
75 struct ctx_switch_entry ctx;
76 struct special_entry special;
77 struct stack_entry stack;
78 };
79};
80
81#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
82
83/*
84 * The CPU trace array - it consists of thousands of trace entries
85 * plus some other descriptor data: (for example which task started
86 * the trace, etc.)
87 */
88struct trace_array_cpu {
89 struct list_head trace_pages;
90 atomic_t disabled;
91 raw_spinlock_t lock;
92 struct lock_class_key lock_key;
93
94 /* these fields get copied into max-trace: */
95 unsigned trace_head_idx;
96 unsigned trace_tail_idx;
97 void *trace_head; /* producer */
98 void *trace_tail; /* consumer */
99 unsigned long trace_idx;
100 unsigned long overrun;
101 unsigned long saved_latency;
102 unsigned long critical_start;
103 unsigned long critical_end;
104 unsigned long critical_sequence;
105 unsigned long nice;
106 unsigned long policy;
107 unsigned long rt_priority;
108 cycle_t preempt_timestamp;
109 pid_t pid;
110 uid_t uid;
111 char comm[TASK_COMM_LEN];
112};
113
114struct trace_iterator;
115
116/*
117 * The trace array - an array of per-CPU trace arrays. This is the
118 * highest level data structure that individual tracers deal with.
119 * They have on/off state as well:
120 */
121struct trace_array {
122 unsigned long entries;
123 long ctrl;
124 int cpu;
125 cycle_t time_start;
126 struct task_struct *waiter;
127 struct trace_array_cpu *data[NR_CPUS];
128};
129
130/*
131 * A specific tracer, represented by methods that operate on a trace array:
132 */
133struct tracer {
134 const char *name;
135 void (*init)(struct trace_array *tr);
136 void (*reset)(struct trace_array *tr);
137 void (*open)(struct trace_iterator *iter);
138 void (*pipe_open)(struct trace_iterator *iter);
139 void (*close)(struct trace_iterator *iter);
140 void (*start)(struct trace_iterator *iter);
141 void (*stop)(struct trace_iterator *iter);
142 ssize_t (*read)(struct trace_iterator *iter,
143 struct file *filp, char __user *ubuf,
144 size_t cnt, loff_t *ppos);
145 void (*ctrl_update)(struct trace_array *tr);
146#ifdef CONFIG_FTRACE_STARTUP_TEST
147 int (*selftest)(struct tracer *trace,
148 struct trace_array *tr);
149#endif
150 int (*print_line)(struct trace_iterator *iter);
151 struct tracer *next;
152 int print_max;
153};
154
155struct trace_seq {
156 unsigned char buffer[PAGE_SIZE];
157 unsigned int len;
158 unsigned int readpos;
159};
160
161/*
162 * Trace iterator - used by printout routines who present trace
163 * results to users and which routines might sleep, etc:
164 */
165struct trace_iterator {
166 struct trace_array *tr;
167 struct tracer *trace;
168 void *private;
169 long last_overrun[NR_CPUS];
170 long overrun[NR_CPUS];
171
172 /* The below is zeroed out in pipe_read */
173 struct trace_seq seq;
174 struct trace_entry *ent;
175 int cpu;
176
177 struct trace_entry *prev_ent;
178 int prev_cpu;
179
180 unsigned long iter_flags;
181 loff_t pos;
182 unsigned long next_idx[NR_CPUS];
183 struct list_head *next_page[NR_CPUS];
184 unsigned next_page_idx[NR_CPUS];
185 long idx;
186};
187
188void tracing_reset(struct trace_array_cpu *data);
189int tracing_open_generic(struct inode *inode, struct file *filp);
190struct dentry *tracing_init_dentry(void);
191void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
192
193void ftrace(struct trace_array *tr,
194 struct trace_array_cpu *data,
195 unsigned long ip,
196 unsigned long parent_ip,
197 unsigned long flags);
198void tracing_sched_switch_trace(struct trace_array *tr,
199 struct trace_array_cpu *data,
200 struct task_struct *prev,
201 struct task_struct *next,
202 unsigned long flags);
203void tracing_record_cmdline(struct task_struct *tsk);
204
205void tracing_sched_wakeup_trace(struct trace_array *tr,
206 struct trace_array_cpu *data,
207 struct task_struct *wakee,
208 struct task_struct *cur,
209 unsigned long flags);
210void trace_special(struct trace_array *tr,
211 struct trace_array_cpu *data,
212 unsigned long arg1,
213 unsigned long arg2,
214 unsigned long arg3);
215void trace_function(struct trace_array *tr,
216 struct trace_array_cpu *data,
217 unsigned long ip,
218 unsigned long parent_ip,
219 unsigned long flags);
220
221void tracing_start_function_trace(void);
222void tracing_stop_function_trace(void);
223int register_tracer(struct tracer *type);
224void unregister_tracer(struct tracer *type);
225
226extern unsigned long nsecs_to_usecs(unsigned long nsecs);
227
228extern unsigned long tracing_max_latency;
229extern unsigned long tracing_thresh;
230
231extern atomic_t trace_record_cmdline_enabled;
232
233void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
234void update_max_tr_single(struct trace_array *tr,
235 struct task_struct *tsk, int cpu);
236
237extern cycle_t ftrace_now(int cpu);
238
239#ifdef CONFIG_CONTEXT_SWITCH_TRACER
240typedef void
241(*tracer_switch_func_t)(void *private,
242 void *__rq,
243 struct task_struct *prev,
244 struct task_struct *next);
245
246struct tracer_switch_ops {
247 tracer_switch_func_t func;
248 void *private;
249 struct tracer_switch_ops *next;
250};
251
252#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
253
254#ifdef CONFIG_DYNAMIC_FTRACE
255extern unsigned long ftrace_update_tot_cnt;
256#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
257extern int DYN_FTRACE_TEST_NAME(void);
258#endif
259
260#ifdef CONFIG_FTRACE_STARTUP_TEST
261#ifdef CONFIG_FTRACE
262extern int trace_selftest_startup_function(struct tracer *trace,
263 struct trace_array *tr);
264#endif
265#ifdef CONFIG_IRQSOFF_TRACER
266extern int trace_selftest_startup_irqsoff(struct tracer *trace,
267 struct trace_array *tr);
268#endif
269#ifdef CONFIG_PREEMPT_TRACER
270extern int trace_selftest_startup_preemptoff(struct tracer *trace,
271 struct trace_array *tr);
272#endif
273#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
274extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
275 struct trace_array *tr);
276#endif
277#ifdef CONFIG_SCHED_TRACER
278extern int trace_selftest_startup_wakeup(struct tracer *trace,
279 struct trace_array *tr);
280#endif
281#ifdef CONFIG_CONTEXT_SWITCH_TRACER
282extern int trace_selftest_startup_sched_switch(struct tracer *trace,
283 struct trace_array *tr);
284#endif
285#ifdef CONFIG_SYSPROF_TRACER
286extern int trace_selftest_startup_sysprof(struct tracer *trace,
287 struct trace_array *tr);
288#endif
289#endif /* CONFIG_FTRACE_STARTUP_TEST */
290
291extern void *head_page(struct trace_array_cpu *data);
292extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
293extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
294 size_t cnt);
295extern long ns2usecs(cycle_t nsec);
296
297extern unsigned long trace_flags;
298
299/*
300 * trace_iterator_flags is an enumeration that defines bit
301 * positions into trace_flags that controls the output.
302 *
303 * NOTE: These bits must match the trace_options array in
304 * trace.c.
305 */
306enum trace_iterator_flags {
307 TRACE_ITER_PRINT_PARENT = 0x01,
308 TRACE_ITER_SYM_OFFSET = 0x02,
309 TRACE_ITER_SYM_ADDR = 0x04,
310 TRACE_ITER_VERBOSE = 0x08,
311 TRACE_ITER_RAW = 0x10,
312 TRACE_ITER_HEX = 0x20,
313 TRACE_ITER_BIN = 0x40,
314 TRACE_ITER_BLOCK = 0x80,
315 TRACE_ITER_STACKTRACE = 0x100,
316 TRACE_ITER_SCHED_TREE = 0x200,
317};
318
319#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 000000000000..0a084656d7cf
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,78 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/debugfs.h>
13#include <linux/uaccess.h>
14#include <linux/ftrace.h>
15#include <linux/fs.h>
16
17#include "trace.h"
18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]);
27}
28
29static void start_function_trace(struct trace_array *tr)
30{
31 function_reset(tr);
32 atomic_inc(&trace_record_cmdline_enabled);
33 tracing_start_function_trace();
34}
35
36static void stop_function_trace(struct trace_array *tr)
37{
38 tracing_stop_function_trace();
39 atomic_dec(&trace_record_cmdline_enabled);
40}
41
42static void function_trace_init(struct trace_array *tr)
43{
44 if (tr->ctrl)
45 start_function_trace(tr);
46}
47
48static void function_trace_reset(struct trace_array *tr)
49{
50 if (tr->ctrl)
51 stop_function_trace(tr);
52}
53
54static void function_trace_ctrl_update(struct trace_array *tr)
55{
56 if (tr->ctrl)
57 start_function_trace(tr);
58 else
59 stop_function_trace(tr);
60}
61
62static struct tracer function_trace __read_mostly =
63{
64 .name = "ftrace",
65 .init = function_trace_init,
66 .reset = function_trace_reset,
67 .ctrl_update = function_trace_ctrl_update,
68#ifdef CONFIG_FTRACE_SELFTEST
69 .selftest = trace_selftest_startup_function,
70#endif
71};
72
73static __init int init_function_trace(void)
74{
75 return register_tracer(&function_trace);
76}
77
78device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 000000000000..761f3ec66c50
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,502 @@
1/*
2 * trace irqs off criticall timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * From code in the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/kallsyms.h>
13#include <linux/debugfs.h>
14#include <linux/uaccess.h>
15#include <linux/module.h>
16#include <linux/ftrace.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21static struct trace_array *irqsoff_trace __read_mostly;
22static int tracer_enabled __read_mostly;
23
24static DEFINE_PER_CPU(int, tracing_cpu);
25
26static DEFINE_SPINLOCK(max_trace_lock);
27
28enum {
29 TRACER_IRQS_OFF = (1 << 1),
30 TRACER_PREEMPT_OFF = (1 << 2),
31};
32
33static int trace_type __read_mostly;
34
35#ifdef CONFIG_PREEMPT_TRACER
36static inline int
37preempt_trace(void)
38{
39 return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
40}
41#else
42# define preempt_trace() (0)
43#endif
44
45#ifdef CONFIG_IRQSOFF_TRACER
46static inline int
47irq_trace(void)
48{
49 return ((trace_type & TRACER_IRQS_OFF) &&
50 irqs_disabled());
51}
52#else
53# define irq_trace() (0)
54#endif
55
56/*
57 * Sequence count - we record it when starting a measurement and
58 * skip the latency if the sequence has changed - some other section
59 * did a maximum and could disturb our measurement with serial console
60 * printouts, etc. Truly coinciding maximum latencies should be rare
61 * and what happens together happens separately as well, so this doesnt
62 * decrease the validity of the maximum found:
63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence;
65
66#ifdef CONFIG_FTRACE
67/*
68 * irqsoff uses its own tracer function to keep the overhead down:
69 */
70static void
71irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
72{
73 struct trace_array *tr = irqsoff_trace;
74 struct trace_array_cpu *data;
75 unsigned long flags;
76 long disabled;
77 int cpu;
78
79 /*
80 * Does not matter if we preempt. We test the flags
81 * afterward, to see if irqs are disabled or not.
82 * If we preempt and get a false positive, the flags
83 * test will fail.
84 */
85 cpu = raw_smp_processor_id();
86 if (likely(!per_cpu(tracing_cpu, cpu)))
87 return;
88
89 local_save_flags(flags);
90 /* slight chance to get a false positive on tracing_cpu */
91 if (!irqs_disabled_flags(flags))
92 return;
93
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags);
99
100 atomic_dec(&data->disabled);
101}
102
103static struct ftrace_ops trace_ops __read_mostly =
104{
105 .func = irqsoff_tracer_call,
106};
107#endif /* CONFIG_FTRACE */
108
109/*
110 * Should this new latency be reported/recorded?
111 */
112static int report_latency(cycle_t delta)
113{
114 if (tracing_thresh) {
115 if (delta < tracing_thresh)
116 return 0;
117 } else {
118 if (delta <= tracing_max_latency)
119 return 0;
120 }
121 return 1;
122}
123
124static void
125check_critical_timing(struct trace_array *tr,
126 struct trace_array_cpu *data,
127 unsigned long parent_ip,
128 int cpu)
129{
130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta;
132 unsigned long flags;
133
134 /*
135 * usecs conversion is slow so we try to delay the conversion
136 * as long as possible:
137 */
138 T0 = data->preempt_timestamp;
139 T1 = ftrace_now(cpu);
140 delta = T1-T0;
141
142 local_save_flags(flags);
143
144 if (!report_latency(delta))
145 goto out;
146
147 spin_lock_irqsave(&max_trace_lock, flags);
148
149 /* check if we are still the max latency */
150 if (!report_latency(delta))
151 goto out_unlock;
152
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
154
155 latency = nsecs_to_usecs(delta);
156
157 if (data->critical_sequence != max_sequence)
158 goto out_unlock;
159
160 tracing_max_latency = delta;
161 t0 = nsecs_to_usecs(T0);
162 t1 = nsecs_to_usecs(T1);
163
164 data->critical_end = parent_ip;
165
166 update_max_tr_single(tr, current, cpu);
167
168 if (!runqueue_is_locked()) {
169 if (tracing_thresh) {
170 printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical"
171 " section violates %lu us threshold.\n",
172 current->comm, current->pid,
173 raw_smp_processor_id(),
174 latency, nsecs_to_usecs(tracing_thresh));
175 } else {
176 printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us"
177 " maximum-latency critical section.\n",
178 current->comm, current->pid,
179 raw_smp_processor_id(),
180 latency);
181 }
182 }
183
184 max_sequence++;
185
186out_unlock:
187 spin_unlock_irqrestore(&max_trace_lock, flags);
188
189out:
190 data->critical_sequence = max_sequence;
191 data->preempt_timestamp = ftrace_now(cpu);
192 tracing_reset(data);
193 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
194}
195
196static inline void
197start_critical_timing(unsigned long ip, unsigned long parent_ip)
198{
199 int cpu;
200 struct trace_array *tr = irqsoff_trace;
201 struct trace_array_cpu *data;
202 unsigned long flags;
203
204 if (likely(!tracer_enabled))
205 return;
206
207 cpu = raw_smp_processor_id();
208
209 if (per_cpu(tracing_cpu, cpu))
210 return;
211
212 data = tr->data[cpu];
213
214 if (unlikely(!data) || atomic_read(&data->disabled))
215 return;
216
217 atomic_inc(&data->disabled);
218
219 data->critical_sequence = max_sequence;
220 data->preempt_timestamp = ftrace_now(cpu);
221 data->critical_start = parent_ip ? : ip;
222 tracing_reset(data);
223
224 local_save_flags(flags);
225
226 trace_function(tr, data, ip, parent_ip, flags);
227
228 per_cpu(tracing_cpu, cpu) = 1;
229
230 atomic_dec(&data->disabled);
231}
232
233static inline void
234stop_critical_timing(unsigned long ip, unsigned long parent_ip)
235{
236 int cpu;
237 struct trace_array *tr = irqsoff_trace;
238 struct trace_array_cpu *data;
239 unsigned long flags;
240
241 cpu = raw_smp_processor_id();
242 /* Always clear the tracing cpu on stopping the trace */
243 if (unlikely(per_cpu(tracing_cpu, cpu)))
244 per_cpu(tracing_cpu, cpu) = 0;
245 else
246 return;
247
248 if (!tracer_enabled)
249 return;
250
251 data = tr->data[cpu];
252
253 if (unlikely(!data) || unlikely(!head_page(data)) ||
254 !data->critical_start || atomic_read(&data->disabled))
255 return;
256
257 atomic_inc(&data->disabled);
258
259 local_save_flags(flags);
260 trace_function(tr, data, ip, parent_ip, flags);
261 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
262 data->critical_start = 0;
263 atomic_dec(&data->disabled);
264}
265
266/* start and stop critical timings used to for stoppage (in idle) */
267void start_critical_timings(void)
268{
269 if (preempt_trace() || irq_trace())
270 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
271}
272
273void stop_critical_timings(void)
274{
275 if (preempt_trace() || irq_trace())
276 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
277}
278
279#ifdef CONFIG_IRQSOFF_TRACER
280#ifdef CONFIG_PROVE_LOCKING
281void time_hardirqs_on(unsigned long a0, unsigned long a1)
282{
283 if (!preempt_trace() && irq_trace())
284 stop_critical_timing(a0, a1);
285}
286
287void time_hardirqs_off(unsigned long a0, unsigned long a1)
288{
289 if (!preempt_trace() && irq_trace())
290 start_critical_timing(a0, a1);
291}
292
293#else /* !CONFIG_PROVE_LOCKING */
294
295/*
296 * Stubs:
297 */
298
299void early_boot_irqs_off(void)
300{
301}
302
303void early_boot_irqs_on(void)
304{
305}
306
307void trace_softirqs_on(unsigned long ip)
308{
309}
310
311void trace_softirqs_off(unsigned long ip)
312{
313}
314
315inline void print_irqtrace_events(struct task_struct *curr)
316{
317}
318
319/*
320 * We are only interested in hardirq on/off events:
321 */
322void trace_hardirqs_on(void)
323{
324 if (!preempt_trace() && irq_trace())
325 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
326}
327EXPORT_SYMBOL(trace_hardirqs_on);
328
329void trace_hardirqs_off(void)
330{
331 if (!preempt_trace() && irq_trace())
332 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
333}
334EXPORT_SYMBOL(trace_hardirqs_off);
335
336void trace_hardirqs_on_caller(unsigned long caller_addr)
337{
338 if (!preempt_trace() && irq_trace())
339 stop_critical_timing(CALLER_ADDR0, caller_addr);
340}
341EXPORT_SYMBOL(trace_hardirqs_on_caller);
342
343void trace_hardirqs_off_caller(unsigned long caller_addr)
344{
345 if (!preempt_trace() && irq_trace())
346 start_critical_timing(CALLER_ADDR0, caller_addr);
347}
348EXPORT_SYMBOL(trace_hardirqs_off_caller);
349
350#endif /* CONFIG_PROVE_LOCKING */
351#endif /* CONFIG_IRQSOFF_TRACER */
352
353#ifdef CONFIG_PREEMPT_TRACER
354void trace_preempt_on(unsigned long a0, unsigned long a1)
355{
356 stop_critical_timing(a0, a1);
357}
358
359void trace_preempt_off(unsigned long a0, unsigned long a1)
360{
361 start_critical_timing(a0, a1);
362}
363#endif /* CONFIG_PREEMPT_TRACER */
364
365static void start_irqsoff_tracer(struct trace_array *tr)
366{
367 register_ftrace_function(&trace_ops);
368 tracer_enabled = 1;
369}
370
371static void stop_irqsoff_tracer(struct trace_array *tr)
372{
373 tracer_enabled = 0;
374 unregister_ftrace_function(&trace_ops);
375}
376
377static void __irqsoff_tracer_init(struct trace_array *tr)
378{
379 irqsoff_trace = tr;
380 /* make sure that the tracer is visible */
381 smp_wmb();
382
383 if (tr->ctrl)
384 start_irqsoff_tracer(tr);
385}
386
387static void irqsoff_tracer_reset(struct trace_array *tr)
388{
389 if (tr->ctrl)
390 stop_irqsoff_tracer(tr);
391}
392
393static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
394{
395 if (tr->ctrl)
396 start_irqsoff_tracer(tr);
397 else
398 stop_irqsoff_tracer(tr);
399}
400
401static void irqsoff_tracer_open(struct trace_iterator *iter)
402{
403 /* stop the trace while dumping */
404 if (iter->tr->ctrl)
405 stop_irqsoff_tracer(iter->tr);
406}
407
408static void irqsoff_tracer_close(struct trace_iterator *iter)
409{
410 if (iter->tr->ctrl)
411 start_irqsoff_tracer(iter->tr);
412}
413
414#ifdef CONFIG_IRQSOFF_TRACER
415static void irqsoff_tracer_init(struct trace_array *tr)
416{
417 trace_type = TRACER_IRQS_OFF;
418
419 __irqsoff_tracer_init(tr);
420}
421static struct tracer irqsoff_tracer __read_mostly =
422{
423 .name = "irqsoff",
424 .init = irqsoff_tracer_init,
425 .reset = irqsoff_tracer_reset,
426 .open = irqsoff_tracer_open,
427 .close = irqsoff_tracer_close,
428 .ctrl_update = irqsoff_tracer_ctrl_update,
429 .print_max = 1,
430#ifdef CONFIG_FTRACE_SELFTEST
431 .selftest = trace_selftest_startup_irqsoff,
432#endif
433};
434# define register_irqsoff(trace) register_tracer(&trace)
435#else
436# define register_irqsoff(trace) do { } while (0)
437#endif
438
439#ifdef CONFIG_PREEMPT_TRACER
440static void preemptoff_tracer_init(struct trace_array *tr)
441{
442 trace_type = TRACER_PREEMPT_OFF;
443
444 __irqsoff_tracer_init(tr);
445}
446
447static struct tracer preemptoff_tracer __read_mostly =
448{
449 .name = "preemptoff",
450 .init = preemptoff_tracer_init,
451 .reset = irqsoff_tracer_reset,
452 .open = irqsoff_tracer_open,
453 .close = irqsoff_tracer_close,
454 .ctrl_update = irqsoff_tracer_ctrl_update,
455 .print_max = 1,
456#ifdef CONFIG_FTRACE_SELFTEST
457 .selftest = trace_selftest_startup_preemptoff,
458#endif
459};
460# define register_preemptoff(trace) register_tracer(&trace)
461#else
462# define register_preemptoff(trace) do { } while (0)
463#endif
464
465#if defined(CONFIG_IRQSOFF_TRACER) && \
466 defined(CONFIG_PREEMPT_TRACER)
467
468static void preemptirqsoff_tracer_init(struct trace_array *tr)
469{
470 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
471
472 __irqsoff_tracer_init(tr);
473}
474
475static struct tracer preemptirqsoff_tracer __read_mostly =
476{
477 .name = "preemptirqsoff",
478 .init = preemptirqsoff_tracer_init,
479 .reset = irqsoff_tracer_reset,
480 .open = irqsoff_tracer_open,
481 .close = irqsoff_tracer_close,
482 .ctrl_update = irqsoff_tracer_ctrl_update,
483 .print_max = 1,
484#ifdef CONFIG_FTRACE_SELFTEST
485 .selftest = trace_selftest_startup_preemptirqsoff,
486#endif
487};
488
489# define register_preemptirqsoff(trace) register_tracer(&trace)
490#else
491# define register_preemptirqsoff(trace) do { } while (0)
492#endif
493
494__init static int init_irqsoff_tracer(void)
495{
496 register_irqsoff(irqsoff_tracer);
497 register_preemptoff(preemptoff_tracer);
498 register_preemptirqsoff(preemptirqsoff_tracer);
499
500 return 0;
501}
502device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 000000000000..d25ffa5eaf2b
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,301 @@
1/*
2 * trace context switch
3 *
4 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7#include <linux/module.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/kallsyms.h>
11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h>
14
15#include "trace.h"
16
17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref;
20
21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev,
23 struct task_struct *next)
24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data;
28 unsigned long flags;
29 long disabled;
30 int cpu;
31
32 if (!tracer_enabled)
33 return;
34
35 local_irq_save(flags);
36 cpu = raw_smp_processor_id();
37 data = tr->data[cpu];
38 disabled = atomic_inc_return(&data->disabled);
39
40 if (likely(disabled == 1))
41 tracing_sched_switch_trace(tr, data, prev, next, flags);
42
43 atomic_dec(&data->disabled);
44 local_irq_restore(flags);
45}
46
47static notrace void
48sched_switch_callback(void *probe_data, void *call_data,
49 const char *format, va_list *args)
50{
51 struct task_struct *prev;
52 struct task_struct *next;
53 struct rq *__rq;
54
55 if (!atomic_read(&sched_ref))
56 return;
57
58 /* skip prev_pid %d next_pid %d prev_state %ld */
59 (void)va_arg(*args, int);
60 (void)va_arg(*args, int);
61 (void)va_arg(*args, long);
62 __rq = va_arg(*args, typeof(__rq));
63 prev = va_arg(*args, typeof(prev));
64 next = va_arg(*args, typeof(next));
65
66 tracing_record_cmdline(prev);
67
68 /*
69 * If tracer_switch_func only points to the local
70 * switch func, it still needs the ptr passed to it.
71 */
72 sched_switch_func(probe_data, __rq, prev, next);
73}
74
75static void
76wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
77 task_struct *curr)
78{
79 struct trace_array **ptr = private;
80 struct trace_array *tr = *ptr;
81 struct trace_array_cpu *data;
82 unsigned long flags;
83 long disabled;
84 int cpu;
85
86 if (!tracer_enabled)
87 return;
88
89 tracing_record_cmdline(curr);
90
91 local_irq_save(flags);
92 cpu = raw_smp_processor_id();
93 data = tr->data[cpu];
94 disabled = atomic_inc_return(&data->disabled);
95
96 if (likely(disabled == 1))
97 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
98
99 atomic_dec(&data->disabled);
100 local_irq_restore(flags);
101}
102
103static notrace void
104wake_up_callback(void *probe_data, void *call_data,
105 const char *format, va_list *args)
106{
107 struct task_struct *curr;
108 struct task_struct *task;
109 struct rq *__rq;
110
111 if (likely(!tracer_enabled))
112 return;
113
114 /* Skip pid %d state %ld */
115 (void)va_arg(*args, int);
116 (void)va_arg(*args, long);
117 /* now get the meat: "rq %p task %p rq->curr %p" */
118 __rq = va_arg(*args, typeof(__rq));
119 task = va_arg(*args, typeof(task));
120 curr = va_arg(*args, typeof(curr));
121
122 tracing_record_cmdline(task);
123 tracing_record_cmdline(curr);
124
125 wakeup_func(probe_data, __rq, task, curr);
126}
127
128void
129ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
130{
131 struct trace_array *tr = ctx_trace;
132 struct trace_array_cpu *data;
133 unsigned long flags;
134 long disabled;
135 int cpu;
136
137 if (!tracer_enabled)
138 return;
139
140 local_irq_save(flags);
141 cpu = raw_smp_processor_id();
142 data = tr->data[cpu];
143 disabled = atomic_inc_return(&data->disabled);
144
145 if (likely(disabled == 1))
146 __trace_special(tr, data, arg1, arg2, arg3);
147
148 atomic_dec(&data->disabled);
149 local_irq_restore(flags);
150}
151
152static void sched_switch_reset(struct trace_array *tr)
153{
154 int cpu;
155
156 tr->time_start = ftrace_now(tr->cpu);
157
158 for_each_online_cpu(cpu)
159 tracing_reset(tr->data[cpu]);
160}
161
162static int tracing_sched_register(void)
163{
164 int ret;
165
166 ret = marker_probe_register("kernel_sched_wakeup",
167 "pid %d state %ld ## rq %p task %p rq->curr %p",
168 wake_up_callback,
169 &ctx_trace);
170 if (ret) {
171 pr_info("wakeup trace: Couldn't add marker"
172 " probe to kernel_sched_wakeup\n");
173 return ret;
174 }
175
176 ret = marker_probe_register("kernel_sched_wakeup_new",
177 "pid %d state %ld ## rq %p task %p rq->curr %p",
178 wake_up_callback,
179 &ctx_trace);
180 if (ret) {
181 pr_info("wakeup trace: Couldn't add marker"
182 " probe to kernel_sched_wakeup_new\n");
183 goto fail_deprobe;
184 }
185
186 ret = marker_probe_register("kernel_sched_schedule",
187 "prev_pid %d next_pid %d prev_state %ld "
188 "## rq %p prev %p next %p",
189 sched_switch_callback,
190 &ctx_trace);
191 if (ret) {
192 pr_info("sched trace: Couldn't add marker"
193 " probe to kernel_sched_schedule\n");
194 goto fail_deprobe_wake_new;
195 }
196
197 return ret;
198fail_deprobe_wake_new:
199 marker_probe_unregister("kernel_sched_wakeup_new",
200 wake_up_callback,
201 &ctx_trace);
202fail_deprobe:
203 marker_probe_unregister("kernel_sched_wakeup",
204 wake_up_callback,
205 &ctx_trace);
206 return ret;
207}
208
209static void tracing_sched_unregister(void)
210{
211 marker_probe_unregister("kernel_sched_schedule",
212 sched_switch_callback,
213 &ctx_trace);
214 marker_probe_unregister("kernel_sched_wakeup_new",
215 wake_up_callback,
216 &ctx_trace);
217 marker_probe_unregister("kernel_sched_wakeup",
218 wake_up_callback,
219 &ctx_trace);
220}
221
222void tracing_start_sched_switch(void)
223{
224 long ref;
225
226 ref = atomic_inc_return(&sched_ref);
227 if (ref == 1)
228 tracing_sched_register();
229}
230
231void tracing_stop_sched_switch(void)
232{
233 long ref;
234
235 ref = atomic_dec_and_test(&sched_ref);
236 if (ref)
237 tracing_sched_unregister();
238}
239
240static void start_sched_trace(struct trace_array *tr)
241{
242 sched_switch_reset(tr);
243 atomic_inc(&trace_record_cmdline_enabled);
244 tracer_enabled = 1;
245 tracing_start_sched_switch();
246}
247
248static void stop_sched_trace(struct trace_array *tr)
249{
250 tracing_stop_sched_switch();
251 atomic_dec(&trace_record_cmdline_enabled);
252 tracer_enabled = 0;
253}
254
255static void sched_switch_trace_init(struct trace_array *tr)
256{
257 ctx_trace = tr;
258
259 if (tr->ctrl)
260 start_sched_trace(tr);
261}
262
263static void sched_switch_trace_reset(struct trace_array *tr)
264{
265 if (tr->ctrl)
266 stop_sched_trace(tr);
267}
268
269static void sched_switch_trace_ctrl_update(struct trace_array *tr)
270{
271 /* When starting a new trace, reset the buffers */
272 if (tr->ctrl)
273 start_sched_trace(tr);
274 else
275 stop_sched_trace(tr);
276}
277
278static struct tracer sched_switch_trace __read_mostly =
279{
280 .name = "sched_switch",
281 .init = sched_switch_trace_init,
282 .reset = sched_switch_trace_reset,
283 .ctrl_update = sched_switch_trace_ctrl_update,
284#ifdef CONFIG_FTRACE_SELFTEST
285 .selftest = trace_selftest_startup_sched_switch,
286#endif
287};
288
289__init static int init_sched_switch_trace(void)
290{
291 int ret = 0;
292
293 if (atomic_read(&sched_ref))
294 ret = tracing_sched_register();
295 if (ret) {
296 pr_info("error registering scheduler trace\n");
297 return ret;
298 }
299 return register_tracer(&sched_switch_trace);
300}
301device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 000000000000..5d2fb48e47f8
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,382 @@
1/*
2 * trace task wakeup timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h>
16#include <linux/uaccess.h>
17#include <linux/ftrace.h>
18#include <linux/marker.h>
19
20#include "trace.h"
21
22static struct trace_array *wakeup_trace;
23static int __read_mostly tracer_enabled;
24
25static struct task_struct *wakeup_task;
26static int wakeup_cpu;
27static unsigned wakeup_prio = -1;
28
29static DEFINE_SPINLOCK(wakeup_lock);
30
31static void __wakeup_reset(struct trace_array *tr);
32
33/*
34 * Should this new latency be reported/recorded?
35 */
36static int report_latency(cycle_t delta)
37{
38 if (tracing_thresh) {
39 if (delta < tracing_thresh)
40 return 0;
41 } else {
42 if (delta <= tracing_max_latency)
43 return 0;
44 }
45 return 1;
46}
47
48static void notrace
49wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
50 struct task_struct *next)
51{
52 unsigned long latency = 0, t0 = 0, t1 = 0;
53 struct trace_array **ptr = private;
54 struct trace_array *tr = *ptr;
55 struct trace_array_cpu *data;
56 cycle_t T0, T1, delta;
57 unsigned long flags;
58 long disabled;
59 int cpu;
60
61 if (unlikely(!tracer_enabled))
62 return;
63
64 /*
65 * When we start a new trace, we set wakeup_task to NULL
66 * and then set tracer_enabled = 1. We want to make sure
67 * that another CPU does not see the tracer_enabled = 1
68 * and the wakeup_task with an older task, that might
69 * actually be the same as next.
70 */
71 smp_rmb();
72
73 if (next != wakeup_task)
74 return;
75
76 /* The task we are waitng for is waking up */
77 data = tr->data[wakeup_cpu];
78
79 /* disable local data, not wakeup_cpu data */
80 cpu = raw_smp_processor_id();
81 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
82 if (likely(disabled != 1))
83 goto out;
84
85 spin_lock_irqsave(&wakeup_lock, flags);
86
87 /* We could race with grabbing wakeup_lock */
88 if (unlikely(!tracer_enabled || next != wakeup_task))
89 goto out_unlock;
90
91 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
92
93 /*
94 * usecs conversion is slow so we try to delay the conversion
95 * as long as possible:
96 */
97 T0 = data->preempt_timestamp;
98 T1 = ftrace_now(cpu);
99 delta = T1-T0;
100
101 if (!report_latency(delta))
102 goto out_unlock;
103
104 latency = nsecs_to_usecs(delta);
105
106 tracing_max_latency = delta;
107 t0 = nsecs_to_usecs(T0);
108 t1 = nsecs_to_usecs(T1);
109
110 update_max_tr(tr, wakeup_task, wakeup_cpu);
111
112out_unlock:
113 __wakeup_reset(tr);
114 spin_unlock_irqrestore(&wakeup_lock, flags);
115out:
116 atomic_dec(&tr->data[cpu]->disabled);
117}
118
119static notrace void
120sched_switch_callback(void *probe_data, void *call_data,
121 const char *format, va_list *args)
122{
123 struct task_struct *prev;
124 struct task_struct *next;
125 struct rq *__rq;
126
127 /* skip prev_pid %d next_pid %d prev_state %ld */
128 (void)va_arg(*args, int);
129 (void)va_arg(*args, int);
130 (void)va_arg(*args, long);
131 __rq = va_arg(*args, typeof(__rq));
132 prev = va_arg(*args, typeof(prev));
133 next = va_arg(*args, typeof(next));
134
135 tracing_record_cmdline(prev);
136
137 /*
138 * If tracer_switch_func only points to the local
139 * switch func, it still needs the ptr passed to it.
140 */
141 wakeup_sched_switch(probe_data, __rq, prev, next);
142}
143
144static void __wakeup_reset(struct trace_array *tr)
145{
146 struct trace_array_cpu *data;
147 int cpu;
148
149 assert_spin_locked(&wakeup_lock);
150
151 for_each_possible_cpu(cpu) {
152 data = tr->data[cpu];
153 tracing_reset(data);
154 }
155
156 wakeup_cpu = -1;
157 wakeup_prio = -1;
158
159 if (wakeup_task)
160 put_task_struct(wakeup_task);
161
162 wakeup_task = NULL;
163}
164
165static void wakeup_reset(struct trace_array *tr)
166{
167 unsigned long flags;
168
169 spin_lock_irqsave(&wakeup_lock, flags);
170 __wakeup_reset(tr);
171 spin_unlock_irqrestore(&wakeup_lock, flags);
172}
173
174static void
175wakeup_check_start(struct trace_array *tr, struct task_struct *p,
176 struct task_struct *curr)
177{
178 int cpu = smp_processor_id();
179 unsigned long flags;
180 long disabled;
181
182 if (likely(!rt_task(p)) ||
183 p->prio >= wakeup_prio ||
184 p->prio >= curr->prio)
185 return;
186
187 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
188 if (unlikely(disabled != 1))
189 goto out;
190
191 /* interrupts should be off from try_to_wake_up */
192 spin_lock(&wakeup_lock);
193
194 /* check for races. */
195 if (!tracer_enabled || p->prio >= wakeup_prio)
196 goto out_locked;
197
198 /* reset the trace */
199 __wakeup_reset(tr);
200
201 wakeup_cpu = task_cpu(p);
202 wakeup_prio = p->prio;
203
204 wakeup_task = p;
205 get_task_struct(wakeup_task);
206
207 local_save_flags(flags);
208
209 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
210 trace_function(tr, tr->data[wakeup_cpu],
211 CALLER_ADDR1, CALLER_ADDR2, flags);
212
213out_locked:
214 spin_unlock(&wakeup_lock);
215out:
216 atomic_dec(&tr->data[cpu]->disabled);
217}
218
219static notrace void
220wake_up_callback(void *probe_data, void *call_data,
221 const char *format, va_list *args)
222{
223 struct trace_array **ptr = probe_data;
224 struct trace_array *tr = *ptr;
225 struct task_struct *curr;
226 struct task_struct *task;
227 struct rq *__rq;
228
229 if (likely(!tracer_enabled))
230 return;
231
232 /* Skip pid %d state %ld */
233 (void)va_arg(*args, int);
234 (void)va_arg(*args, long);
235 /* now get the meat: "rq %p task %p rq->curr %p" */
236 __rq = va_arg(*args, typeof(__rq));
237 task = va_arg(*args, typeof(task));
238 curr = va_arg(*args, typeof(curr));
239
240 tracing_record_cmdline(task);
241 tracing_record_cmdline(curr);
242
243 wakeup_check_start(tr, task, curr);
244}
245
246static void start_wakeup_tracer(struct trace_array *tr)
247{
248 int ret;
249
250 ret = marker_probe_register("kernel_sched_wakeup",
251 "pid %d state %ld ## rq %p task %p rq->curr %p",
252 wake_up_callback,
253 &wakeup_trace);
254 if (ret) {
255 pr_info("wakeup trace: Couldn't add marker"
256 " probe to kernel_sched_wakeup\n");
257 return;
258 }
259
260 ret = marker_probe_register("kernel_sched_wakeup_new",
261 "pid %d state %ld ## rq %p task %p rq->curr %p",
262 wake_up_callback,
263 &wakeup_trace);
264 if (ret) {
265 pr_info("wakeup trace: Couldn't add marker"
266 " probe to kernel_sched_wakeup_new\n");
267 goto fail_deprobe;
268 }
269
270 ret = marker_probe_register("kernel_sched_schedule",
271 "prev_pid %d next_pid %d prev_state %ld "
272 "## rq %p prev %p next %p",
273 sched_switch_callback,
274 &wakeup_trace);
275 if (ret) {
276 pr_info("sched trace: Couldn't add marker"
277 " probe to kernel_sched_schedule\n");
278 goto fail_deprobe_wake_new;
279 }
280
281 wakeup_reset(tr);
282
283 /*
284 * Don't let the tracer_enabled = 1 show up before
285 * the wakeup_task is reset. This may be overkill since
286 * wakeup_reset does a spin_unlock after setting the
287 * wakeup_task to NULL, but I want to be safe.
288 * This is a slow path anyway.
289 */
290 smp_wmb();
291
292 tracer_enabled = 1;
293
294 return;
295fail_deprobe_wake_new:
296 marker_probe_unregister("kernel_sched_wakeup_new",
297 wake_up_callback,
298 &wakeup_trace);
299fail_deprobe:
300 marker_probe_unregister("kernel_sched_wakeup",
301 wake_up_callback,
302 &wakeup_trace);
303}
304
305static void stop_wakeup_tracer(struct trace_array *tr)
306{
307 tracer_enabled = 0;
308 marker_probe_unregister("kernel_sched_schedule",
309 sched_switch_callback,
310 &wakeup_trace);
311 marker_probe_unregister("kernel_sched_wakeup_new",
312 wake_up_callback,
313 &wakeup_trace);
314 marker_probe_unregister("kernel_sched_wakeup",
315 wake_up_callback,
316 &wakeup_trace);
317}
318
319static void wakeup_tracer_init(struct trace_array *tr)
320{
321 wakeup_trace = tr;
322
323 if (tr->ctrl)
324 start_wakeup_tracer(tr);
325}
326
327static void wakeup_tracer_reset(struct trace_array *tr)
328{
329 if (tr->ctrl) {
330 stop_wakeup_tracer(tr);
331 /* make sure we put back any tasks we are tracing */
332 wakeup_reset(tr);
333 }
334}
335
336static void wakeup_tracer_ctrl_update(struct trace_array *tr)
337{
338 if (tr->ctrl)
339 start_wakeup_tracer(tr);
340 else
341 stop_wakeup_tracer(tr);
342}
343
344static void wakeup_tracer_open(struct trace_iterator *iter)
345{
346 /* stop the trace while dumping */
347 if (iter->tr->ctrl)
348 stop_wakeup_tracer(iter->tr);
349}
350
351static void wakeup_tracer_close(struct trace_iterator *iter)
352{
353 /* forget about any processes we were recording */
354 if (iter->tr->ctrl)
355 start_wakeup_tracer(iter->tr);
356}
357
358static struct tracer wakeup_tracer __read_mostly =
359{
360 .name = "wakeup",
361 .init = wakeup_tracer_init,
362 .reset = wakeup_tracer_reset,
363 .open = wakeup_tracer_open,
364 .close = wakeup_tracer_close,
365 .ctrl_update = wakeup_tracer_ctrl_update,
366 .print_max = 1,
367#ifdef CONFIG_FTRACE_SELFTEST
368 .selftest = trace_selftest_startup_wakeup,
369#endif
370};
371
372__init static int init_wakeup_tracer(void)
373{
374 int ret;
375
376 ret = register_tracer(&wakeup_tracer);
377 if (ret)
378 return ret;
379
380 return 0;
381}
382device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 000000000000..5588ecc40985
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,562 @@
1/* Include in trace.c */
2
3#include <linux/kthread.h>
4#include <linux/delay.h>
5
6static inline int trace_valid_entry(struct trace_entry *entry)
7{
8 switch (entry->type) {
9 case TRACE_FN:
10 case TRACE_CTX:
11 case TRACE_WAKE:
12 case TRACE_STACK:
13 case TRACE_SPECIAL:
14 return 1;
15 }
16 return 0;
17}
18
19static int
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{
22 struct trace_entry *entries;
23 struct page *page;
24 int idx = 0;
25 int i;
26
27 BUG_ON(list_empty(&data->trace_pages));
28 page = list_entry(data->trace_pages.next, struct page, lru);
29 entries = page_address(page);
30
31 if (head_page(data) != entries)
32 goto failed;
33
34 /*
35 * The starting trace buffer always has valid elements,
36 * if any element exists.
37 */
38 entries = head_page(data);
39
40 for (i = 0; i < tr->entries; i++) {
41
42 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
43 printk(KERN_CONT ".. invalid entry %d ",
44 entries[idx].type);
45 goto failed;
46 }
47
48 idx++;
49 if (idx >= ENTRIES_PER_PAGE) {
50 page = virt_to_page(entries);
51 if (page->lru.next == &data->trace_pages) {
52 if (i != tr->entries - 1) {
53 printk(KERN_CONT ".. entries buffer mismatch");
54 goto failed;
55 }
56 } else {
57 page = list_entry(page->lru.next, struct page, lru);
58 entries = page_address(page);
59 }
60 idx = 0;
61 }
62 }
63
64 page = virt_to_page(entries);
65 if (page->lru.next != &data->trace_pages) {
66 printk(KERN_CONT ".. too many entries");
67 goto failed;
68 }
69
70 return 0;
71
72 failed:
73 /* disable tracing */
74 tracing_disabled = 1;
75 printk(KERN_CONT ".. corrupted trace buffer .. ");
76 return -1;
77}
78
79/*
80 * Test the trace buffer to see if all the elements
81 * are still sane.
82 */
83static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
84{
85 unsigned long flags, cnt = 0;
86 int cpu, ret = 0;
87
88 /* Don't allow flipping of max traces now */
89 raw_local_irq_save(flags);
90 __raw_spin_lock(&ftrace_max_lock);
91 for_each_possible_cpu(cpu) {
92 if (!head_page(tr->data[cpu]))
93 continue;
94
95 cnt += tr->data[cpu]->trace_idx;
96
97 ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
98 if (ret)
99 break;
100 }
101 __raw_spin_unlock(&ftrace_max_lock);
102 raw_local_irq_restore(flags);
103
104 if (count)
105 *count = cnt;
106
107 return ret;
108}
109
110#ifdef CONFIG_FTRACE
111
112#ifdef CONFIG_DYNAMIC_FTRACE
113
114#define __STR(x) #x
115#define STR(x) __STR(x)
116
117/* Test dynamic code modification and ftrace filters */
118int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
119 struct trace_array *tr,
120 int (*func)(void))
121{
122 unsigned long count;
123 int ret;
124 int save_ftrace_enabled = ftrace_enabled;
125 int save_tracer_enabled = tracer_enabled;
126 char *func_name;
127
128 /* The ftrace test PASSED */
129 printk(KERN_CONT "PASSED\n");
130 pr_info("Testing dynamic ftrace: ");
131
132 /* enable tracing, and record the filter function */
133 ftrace_enabled = 1;
134 tracer_enabled = 1;
135
136 /* passed in by parameter to fool gcc from optimizing */
137 func();
138
139 /* update the records */
140 ret = ftrace_force_update();
141 if (ret) {
142 printk(KERN_CONT ".. ftraced failed .. ");
143 return ret;
144 }
145
146 /*
147 * Some archs *cough*PowerPC*cough* add charachters to the
148 * start of the function names. We simply put a '*' to
149 * accomodate them.
150 */
151 func_name = "*" STR(DYN_FTRACE_TEST_NAME);
152
153 /* filter only on our function */
154 ftrace_set_filter(func_name, strlen(func_name), 1);
155
156 /* enable tracing */
157 tr->ctrl = 1;
158 trace->init(tr);
159 /* Sleep for a 1/10 of a second */
160 msleep(100);
161
162 /* we should have nothing in the buffer */
163 ret = trace_test_buffer(tr, &count);
164 if (ret)
165 goto out;
166
167 if (count) {
168 ret = -1;
169 printk(KERN_CONT ".. filter did not filter .. ");
170 goto out;
171 }
172
173 /* call our function again */
174 func();
175
176 /* sleep again */
177 msleep(100);
178
179 /* stop the tracing. */
180 tr->ctrl = 0;
181 trace->ctrl_update(tr);
182 ftrace_enabled = 0;
183
184 /* check the trace buffer */
185 ret = trace_test_buffer(tr, &count);
186 trace->reset(tr);
187
188 /* we should only have one item */
189 if (!ret && count != 1) {
190 printk(KERN_CONT ".. filter failed count=%ld ..", count);
191 ret = -1;
192 goto out;
193 }
194 out:
195 ftrace_enabled = save_ftrace_enabled;
196 tracer_enabled = save_tracer_enabled;
197
198 /* Enable tracing on all functions again */
199 ftrace_set_filter(NULL, 0, 1);
200
201 return ret;
202}
203#else
204# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
205#endif /* CONFIG_DYNAMIC_FTRACE */
206/*
207 * Simple verification test of ftrace function tracer.
208 * Enable ftrace, sleep 1/10 second, and then read the trace
209 * buffer to see if all is in order.
210 */
211int
212trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
213{
214 unsigned long count;
215 int ret;
216 int save_ftrace_enabled = ftrace_enabled;
217 int save_tracer_enabled = tracer_enabled;
218
219 /* make sure msleep has been recorded */
220 msleep(1);
221
222 /* force the recorded functions to be traced */
223 ret = ftrace_force_update();
224 if (ret) {
225 printk(KERN_CONT ".. ftraced failed .. ");
226 return ret;
227 }
228
229 /* start the tracing */
230 ftrace_enabled = 1;
231 tracer_enabled = 1;
232
233 tr->ctrl = 1;
234 trace->init(tr);
235 /* Sleep for a 1/10 of a second */
236 msleep(100);
237 /* stop the tracing. */
238 tr->ctrl = 0;
239 trace->ctrl_update(tr);
240 ftrace_enabled = 0;
241
242 /* check the trace buffer */
243 ret = trace_test_buffer(tr, &count);
244 trace->reset(tr);
245
246 if (!ret && !count) {
247 printk(KERN_CONT ".. no entries found ..");
248 ret = -1;
249 goto out;
250 }
251
252 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
253 DYN_FTRACE_TEST_NAME);
254
255 out:
256 ftrace_enabled = save_ftrace_enabled;
257 tracer_enabled = save_tracer_enabled;
258
259 /* kill ftrace totally if we failed */
260 if (ret)
261 ftrace_kill();
262
263 return ret;
264}
265#endif /* CONFIG_FTRACE */
266
267#ifdef CONFIG_IRQSOFF_TRACER
268int
269trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
270{
271 unsigned long save_max = tracing_max_latency;
272 unsigned long count;
273 int ret;
274
275 /* start the tracing */
276 tr->ctrl = 1;
277 trace->init(tr);
278 /* reset the max latency */
279 tracing_max_latency = 0;
280 /* disable interrupts for a bit */
281 local_irq_disable();
282 udelay(100);
283 local_irq_enable();
284 /* stop the tracing. */
285 tr->ctrl = 0;
286 trace->ctrl_update(tr);
287 /* check both trace buffers */
288 ret = trace_test_buffer(tr, NULL);
289 if (!ret)
290 ret = trace_test_buffer(&max_tr, &count);
291 trace->reset(tr);
292
293 if (!ret && !count) {
294 printk(KERN_CONT ".. no entries found ..");
295 ret = -1;
296 }
297
298 tracing_max_latency = save_max;
299
300 return ret;
301}
302#endif /* CONFIG_IRQSOFF_TRACER */
303
304#ifdef CONFIG_PREEMPT_TRACER
305int
306trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
307{
308 unsigned long save_max = tracing_max_latency;
309 unsigned long count;
310 int ret;
311
312 /* start the tracing */
313 tr->ctrl = 1;
314 trace->init(tr);
315 /* reset the max latency */
316 tracing_max_latency = 0;
317 /* disable preemption for a bit */
318 preempt_disable();
319 udelay(100);
320 preempt_enable();
321 /* stop the tracing. */
322 tr->ctrl = 0;
323 trace->ctrl_update(tr);
324 /* check both trace buffers */
325 ret = trace_test_buffer(tr, NULL);
326 if (!ret)
327 ret = trace_test_buffer(&max_tr, &count);
328 trace->reset(tr);
329
330 if (!ret && !count) {
331 printk(KERN_CONT ".. no entries found ..");
332 ret = -1;
333 }
334
335 tracing_max_latency = save_max;
336
337 return ret;
338}
339#endif /* CONFIG_PREEMPT_TRACER */
340
341#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
342int
343trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
344{
345 unsigned long save_max = tracing_max_latency;
346 unsigned long count;
347 int ret;
348
349 /* start the tracing */
350 tr->ctrl = 1;
351 trace->init(tr);
352
353 /* reset the max latency */
354 tracing_max_latency = 0;
355
356 /* disable preemption and interrupts for a bit */
357 preempt_disable();
358 local_irq_disable();
359 udelay(100);
360 preempt_enable();
361 /* reverse the order of preempt vs irqs */
362 local_irq_enable();
363
364 /* stop the tracing. */
365 tr->ctrl = 0;
366 trace->ctrl_update(tr);
367 /* check both trace buffers */
368 ret = trace_test_buffer(tr, NULL);
369 if (ret)
370 goto out;
371
372 ret = trace_test_buffer(&max_tr, &count);
373 if (ret)
374 goto out;
375
376 if (!ret && !count) {
377 printk(KERN_CONT ".. no entries found ..");
378 ret = -1;
379 goto out;
380 }
381
382 /* do the test by disabling interrupts first this time */
383 tracing_max_latency = 0;
384 tr->ctrl = 1;
385 trace->ctrl_update(tr);
386 preempt_disable();
387 local_irq_disable();
388 udelay(100);
389 preempt_enable();
390 /* reverse the order of preempt vs irqs */
391 local_irq_enable();
392
393 /* stop the tracing. */
394 tr->ctrl = 0;
395 trace->ctrl_update(tr);
396 /* check both trace buffers */
397 ret = trace_test_buffer(tr, NULL);
398 if (ret)
399 goto out;
400
401 ret = trace_test_buffer(&max_tr, &count);
402
403 if (!ret && !count) {
404 printk(KERN_CONT ".. no entries found ..");
405 ret = -1;
406 goto out;
407 }
408
409 out:
410 trace->reset(tr);
411 tracing_max_latency = save_max;
412
413 return ret;
414}
415#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
416
417#ifdef CONFIG_SCHED_TRACER
418static int trace_wakeup_test_thread(void *data)
419{
420 /* Make this a RT thread, doesn't need to be too high */
421 struct sched_param param = { .sched_priority = 5 };
422 struct completion *x = data;
423
424 sched_setscheduler(current, SCHED_FIFO, &param);
425
426 /* Make it know we have a new prio */
427 complete(x);
428
429 /* now go to sleep and let the test wake us up */
430 set_current_state(TASK_INTERRUPTIBLE);
431 schedule();
432
433 /* we are awake, now wait to disappear */
434 while (!kthread_should_stop()) {
435 /*
436 * This is an RT task, do short sleeps to let
437 * others run.
438 */
439 msleep(100);
440 }
441
442 return 0;
443}
444
445int
446trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
447{
448 unsigned long save_max = tracing_max_latency;
449 struct task_struct *p;
450 struct completion isrt;
451 unsigned long count;
452 int ret;
453
454 init_completion(&isrt);
455
456 /* create a high prio thread */
457 p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
458 if (IS_ERR(p)) {
459 printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
460 return -1;
461 }
462
463 /* make sure the thread is running at an RT prio */
464 wait_for_completion(&isrt);
465
466 /* start the tracing */
467 tr->ctrl = 1;
468 trace->init(tr);
469 /* reset the max latency */
470 tracing_max_latency = 0;
471
472 /* sleep to let the RT thread sleep too */
473 msleep(100);
474
475 /*
476 * Yes this is slightly racy. It is possible that for some
477 * strange reason that the RT thread we created, did not
478 * call schedule for 100ms after doing the completion,
479 * and we do a wakeup on a task that already is awake.
480 * But that is extremely unlikely, and the worst thing that
481 * happens in such a case, is that we disable tracing.
482 * Honestly, if this race does happen something is horrible
483 * wrong with the system.
484 */
485
486 wake_up_process(p);
487
488 /* stop the tracing. */
489 tr->ctrl = 0;
490 trace->ctrl_update(tr);
491 /* check both trace buffers */
492 ret = trace_test_buffer(tr, NULL);
493 if (!ret)
494 ret = trace_test_buffer(&max_tr, &count);
495
496
497 trace->reset(tr);
498
499 tracing_max_latency = save_max;
500
501 /* kill the thread */
502 kthread_stop(p);
503
504 if (!ret && !count) {
505 printk(KERN_CONT ".. no entries found ..");
506 ret = -1;
507 }
508
509 return ret;
510}
511#endif /* CONFIG_SCHED_TRACER */
512
513#ifdef CONFIG_CONTEXT_SWITCH_TRACER
514int
515trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
516{
517 unsigned long count;
518 int ret;
519
520 /* start the tracing */
521 tr->ctrl = 1;
522 trace->init(tr);
523 /* Sleep for a 1/10 of a second */
524 msleep(100);
525 /* stop the tracing. */
526 tr->ctrl = 0;
527 trace->ctrl_update(tr);
528 /* check the trace buffer */
529 ret = trace_test_buffer(tr, &count);
530 trace->reset(tr);
531
532 if (!ret && !count) {
533 printk(KERN_CONT ".. no entries found ..");
534 ret = -1;
535 }
536
537 return ret;
538}
539#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
540
541#ifdef CONFIG_SYSPROF_TRACER
542int
543trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
544{
545 unsigned long count;
546 int ret;
547
548 /* start the tracing */
549 tr->ctrl = 1;
550 trace->init(tr);
551 /* Sleep for a 1/10 of a second */
552 msleep(100);
553 /* stop the tracing. */
554 tr->ctrl = 0;
555 trace->ctrl_update(tr);
556 /* check the trace buffer */
557 ret = trace_test_buffer(tr, &count);
558 trace->reset(tr);
559
560 return ret;
561}
562#endif /* CONFIG_SYSPROF_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 000000000000..54dd77cce5bf
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
1#include "trace.h"
2
3int DYN_FTRACE_TEST_NAME(void)
4{
5 /* used to call mcount */
6 return 0;
7}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
new file mode 100644
index 000000000000..2301e1e7c606
--- /dev/null
+++ b/kernel/trace/trace_sysprof.c
@@ -0,0 +1,363 @@
1/*
2 * trace stack traces
3 *
4 * Copyright (C) 2004-2008, Soeren Sandmann
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/kallsyms.h>
9#include <linux/debugfs.h>
10#include <linux/hrtimer.h>
11#include <linux/uaccess.h>
12#include <linux/ftrace.h>
13#include <linux/module.h>
14#include <linux/irq.h>
15#include <linux/fs.h>
16
17#include <asm/stacktrace.h>
18
19#include "trace.h"
20
21static struct trace_array *sysprof_trace;
22static int __read_mostly tracer_enabled;
23
24/*
25 * 1 msec sample interval by default:
26 */
27static unsigned long sample_period = 1000000;
28static const unsigned int sample_max_depth = 512;
29
30static DEFINE_MUTEX(sample_timer_lock);
31/*
32 * Per CPU hrtimers that do the profiling:
33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36struct stack_frame {
37 const void __user *next_fp;
38 unsigned long return_address;
39};
40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
42{
43 int ret;
44
45 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
46 return 0;
47
48 ret = 1;
49 pagefault_disable();
50 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
51 ret = 0;
52 pagefault_enable();
53
54 return ret;
55}
56
57struct backtrace_info {
58 struct trace_array_cpu *data;
59 struct trace_array *tr;
60 int pos;
61};
62
63static void
64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
65{
66 /* Ignore warnings */
67}
68
69static void backtrace_warning(void *data, char *msg)
70{
71 /* Ignore warnings */
72}
73
74static int backtrace_stack(void *data, char *name)
75{
76 /* Don't bother with IRQ stacks for now */
77 return -1;
78}
79
80static void backtrace_address(void *data, unsigned long addr, int reliable)
81{
82 struct backtrace_info *info = data;
83
84 if (info->pos < sample_max_depth && reliable) {
85 __trace_special(info->tr, info->data, 1, addr, 0);
86
87 info->pos++;
88 }
89}
90
91const static struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack,
95 .address = backtrace_address,
96};
97
98static int
99trace_kernel(struct pt_regs *regs, struct trace_array *tr,
100 struct trace_array_cpu *data)
101{
102 struct backtrace_info info;
103 unsigned long bp;
104 char *stack;
105
106 info.tr = tr;
107 info.data = data;
108 info.pos = 1;
109
110 __trace_special(info.tr, info.data, 1, regs->ip, 0);
111
112 stack = ((char *)regs + sizeof(struct pt_regs));
113#ifdef CONFIG_FRAME_POINTER
114 bp = regs->bp;
115#else
116 bp = 0;
117#endif
118
119 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
120
121 return info.pos;
122}
123
124static void timer_notify(struct pt_regs *regs, int cpu)
125{
126 struct trace_array_cpu *data;
127 struct stack_frame frame;
128 struct trace_array *tr;
129 const void __user *fp;
130 int is_user;
131 int i;
132
133 if (!regs)
134 return;
135
136 tr = sysprof_trace;
137 data = tr->data[cpu];
138 is_user = user_mode(regs);
139
140 if (!current || current->pid == 0)
141 return;
142
143 if (is_user && current->state != TASK_RUNNING)
144 return;
145
146 __trace_special(tr, data, 0, 0, current->pid);
147
148 if (!is_user)
149 i = trace_kernel(regs, tr, data);
150 else
151 i = 0;
152
153 /*
154 * Trace user stack if we are not a kernel thread
155 */
156 if (current->mm && i < sample_max_depth) {
157 regs = (struct pt_regs *)current->thread.sp0 - 1;
158
159 fp = (void __user *)regs->bp;
160
161 __trace_special(tr, data, 2, regs->ip, 0);
162
163 while (i < sample_max_depth) {
164 frame.next_fp = 0;
165 frame.return_address = 0;
166 if (!copy_stack_frame(fp, &frame))
167 break;
168 if ((unsigned long)fp < regs->sp)
169 break;
170
171 __trace_special(tr, data, 2, frame.return_address,
172 (unsigned long)fp);
173 fp = frame.next_fp;
174
175 i++;
176 }
177
178 }
179
180 /*
181 * Special trace entry if we overflow the max depth:
182 */
183 if (i == sample_max_depth)
184 __trace_special(tr, data, -1, -1, -1);
185
186 __trace_special(tr, data, 3, current->pid, i);
187}
188
189static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
190{
191 /* trace here */
192 timer_notify(get_irq_regs(), smp_processor_id());
193
194 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
195
196 return HRTIMER_RESTART;
197}
198
199static void start_stack_timer(int cpu)
200{
201 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208}
209
210static void start_stack_timers(void)
211{
212 cpumask_t saved_mask = current->cpus_allowed;
213 int cpu;
214
215 for_each_online_cpu(cpu) {
216 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
217 start_stack_timer(cpu);
218 }
219 set_cpus_allowed_ptr(current, &saved_mask);
220}
221
222static void stop_stack_timer(int cpu)
223{
224 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
225
226 hrtimer_cancel(hrtimer);
227}
228
229static void stop_stack_timers(void)
230{
231 int cpu;
232
233 for_each_online_cpu(cpu)
234 stop_stack_timer(cpu);
235}
236
237static void stack_reset(struct trace_array *tr)
238{
239 int cpu;
240
241 tr->time_start = ftrace_now(tr->cpu);
242
243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]);
245}
246
247static void start_stack_trace(struct trace_array *tr)
248{
249 mutex_lock(&sample_timer_lock);
250 stack_reset(tr);
251 start_stack_timers();
252 tracer_enabled = 1;
253 mutex_unlock(&sample_timer_lock);
254}
255
256static void stop_stack_trace(struct trace_array *tr)
257{
258 mutex_lock(&sample_timer_lock);
259 stop_stack_timers();
260 tracer_enabled = 0;
261 mutex_unlock(&sample_timer_lock);
262}
263
264static void stack_trace_init(struct trace_array *tr)
265{
266 sysprof_trace = tr;
267
268 if (tr->ctrl)
269 start_stack_trace(tr);
270}
271
272static void stack_trace_reset(struct trace_array *tr)
273{
274 if (tr->ctrl)
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285}
286
287static struct tracer stack_trace __read_mostly =
288{
289 .name = "sysprof",
290 .init = stack_trace_init,
291 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof,
295#endif
296};
297
298__init static int init_stack_trace(void)
299{
300 return register_tracer(&stack_trace);
301}
302device_initcall(init_stack_trace);
303
304#define MAX_LONG_DIGITS 22
305
306static ssize_t
307sysprof_sample_read(struct file *filp, char __user *ubuf,
308 size_t cnt, loff_t *ppos)
309{
310 char buf[MAX_LONG_DIGITS];
311 int r;
312
313 r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
314
315 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
316}
317
318static ssize_t
319sysprof_sample_write(struct file *filp, const char __user *ubuf,
320 size_t cnt, loff_t *ppos)
321{
322 char buf[MAX_LONG_DIGITS];
323 unsigned long val;
324
325 if (cnt > MAX_LONG_DIGITS-1)
326 cnt = MAX_LONG_DIGITS-1;
327
328 if (copy_from_user(&buf, ubuf, cnt))
329 return -EFAULT;
330
331 buf[cnt] = 0;
332
333 val = simple_strtoul(buf, NULL, 10);
334 /*
335 * Enforce a minimum sample period of 100 usecs:
336 */
337 if (val < 100)
338 val = 100;
339
340 mutex_lock(&sample_timer_lock);
341 stop_stack_timers();
342 sample_period = val * 1000;
343 start_stack_timers();
344 mutex_unlock(&sample_timer_lock);
345
346 return cnt;
347}
348
349static struct file_operations sysprof_sample_fops = {
350 .read = sysprof_sample_read,
351 .write = sysprof_sample_write,
352};
353
354void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
355{
356 struct dentry *entry;
357
358 entry = debugfs_create_file("sysprof_sample_period", 0644,
359 d_tracer, NULL, &sysprof_sample_fops);
360 if (entry)
361 return;
362 pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
363}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..d8b6279a9b42 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -634,6 +634,8 @@ config LATENCYTOP
634 Enable this option if you want to use the LatencyTOP tool 634 Enable this option if you want to use the LatencyTOP tool
635 to find out which userspace is blocking on what kernel operations. 635 to find out which userspace is blocking on what kernel operations.
636 636
637source kernel/trace/Kconfig
638
637config PROVIDE_OHCI1394_DMA_INIT 639config PROVIDE_OHCI1394_DMA_INIT
638 bool "Remote debugging over FireWire early on boot" 640 bool "Remote debugging over FireWire early on boot"
639 depends on PCI && X86 641 depends on PCI && X86
diff --git a/lib/Makefile b/lib/Makefile
index 74b0cfb1fcc3..4b836a53c08f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,6 +8,15 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
9 proportions.o prio_heap.o ratelimit.o 9 proportions.o prio_heap.o ratelimit.o
10 10
11ifdef CONFIG_FTRACE
12# Do not profile string.o, since it may be used in early boot or vdso
13CFLAGS_REMOVE_string.o = -pg
14# Also do not profile any debug utilities
15CFLAGS_REMOVE_spinlock_debug.o = -pg
16CFLAGS_REMOVE_list_debug.o = -pg
17CFLAGS_REMOVE_debugobjects.o = -pg
18endif
19
11lib-$(CONFIG_MMU) += ioremap.o 20lib-$(CONFIG_MMU) += ioremap.o
12lib-$(CONFIG_SMP) += cpumask.o 21lib-$(CONFIG_SMP) += cpumask.o
13 22
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 6c90fb90e19c..3b4dc098181e 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,7 @@
7#include <linux/kallsyms.h> 7#include <linux/kallsyms.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9 9
10unsigned int debug_smp_processor_id(void) 10notrace unsigned int debug_smp_processor_id(void)
11{ 11{
12 unsigned long preempt_count = preempt_count(); 12 unsigned long preempt_count = preempt_count();
13 int this_cpu = raw_smp_processor_id(); 13 int this_cpu = raw_smp_processor_id();
@@ -37,7 +37,7 @@ unsigned int debug_smp_processor_id(void)
37 /* 37 /*
38 * Avoid recursion: 38 * Avoid recursion:
39 */ 39 */
40 preempt_disable(); 40 preempt_disable_notrace();
41 41
42 if (!printk_ratelimit()) 42 if (!printk_ratelimit())
43 goto out_enable; 43 goto out_enable;
@@ -49,7 +49,7 @@ unsigned int debug_smp_processor_id(void)
49 dump_stack(); 49 dump_stack();
50 50
51out_enable: 51out_enable:
52 preempt_enable_no_resched(); 52 preempt_enable_no_resched_notrace();
53out: 53out:
54 return this_cpu; 54 return this_cpu;
55} 55}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 789b6adbef37..b38f700825fc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages);
126static struct prop_descriptor vm_completions; 126static struct prop_descriptor vm_completions;
127static struct prop_descriptor vm_dirties; 127static struct prop_descriptor vm_dirties;
128 128
129static unsigned long determine_dirtyable_memory(void);
130
131/* 129/*
132 * couple the period to the dirty_ratio: 130 * couple the period to the dirty_ratio:
133 * 131 *
@@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
347#endif 345#endif
348} 346}
349 347
350static unsigned long determine_dirtyable_memory(void) 348/**
349 * determine_dirtyable_memory - amount of memory that may be used
350 *
351 * Returns the numebr of pages that can currently be freed and used
352 * by the kernel for direct mappings.
353 */
354unsigned long determine_dirtyable_memory(void)
351{ 355{
352 unsigned long x; 356 unsigned long x;
353 357
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 8e440233c27d..ea48b82a3707 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -96,7 +96,8 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
96modname_flags = $(if $(filter 1,$(words $(modname))),\ 96modname_flags = $(if $(filter 1,$(words $(modname))),\
97 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") 97 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
98 98
99_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o) 99orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
100_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
100_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o) 101_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o)
101_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F)) 102_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F))
102 103