aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/compressed/Makefile6
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/armksyms.c5
-rw-r--r--arch/arm/kernel/entry-common.S51
-rw-r--r--arch/arm/kernel/ftrace.c116
-rw-r--r--arch/arm/kernel/kprobes.c2
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/entry_32.S127
-rw-r--r--arch/powerpc/kernel/entry_64.S65
-rw-r--r--arch/powerpc/kernel/ftrace.c154
-rw-r--r--arch/powerpc/kernel/io.c3
-rw-r--r--arch/powerpc/kernel/irq.c6
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/setup_32.c6
-rw-r--r--arch/powerpc/platforms/powermac/Makefile5
-rw-r--r--arch/sparc64/Kconfig2
-rw-r--r--arch/sparc64/Kconfig.debug2
-rw-r--r--arch/sparc64/kernel/Makefile1
-rw-r--r--arch/sparc64/kernel/ftrace.c94
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c2
-rw-r--r--arch/sparc64/lib/mcount.S58
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/entry_32.S72
-rw-r--r--arch/x86/kernel/entry_64.S106
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mm/fault.c56
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--include/asm-arm/ftrace.h14
-rw-r--r--include/asm-arm/kprobes.h1
-rw-r--r--include/asm-powerpc/ftrace.h14
-rw-r--r--include/asm-powerpc/hw_irq.h10
-rw-r--r--include/asm-sparc64/ftrace.h14
-rw-r--r--include/asm-x86/alternative.h2
-rw-r--r--include/asm-x86/ftrace.h14
-rw-r--r--include/asm-x86/irqflags.h24
-rw-r--r--include/asm-x86/kdebug.h9
-rw-r--r--include/asm-x86/vsyscall.h3
-rw-r--r--include/linux/ftrace.h143
-rw-r--r--include/linux/irqflags.h13
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/linux/linkage.h2
-rw-r--r--include/linux/marker.h40
-rw-r--r--include/linux/preempt.h34
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/writeback.h2
-rw-r--r--kernel/Makefile14
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/lockdep.c33
-rw-r--r--kernel/marker.c30
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched.c57
-rw-r--r--kernel/semaphore.c2
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/trace/Kconfig127
-rw-r--r--kernel/trace/Makefile22
-rw-r--r--kernel/trace/ftrace.c1703
-rw-r--r--kernel/trace/trace.c3100
-rw-r--r--kernel/trace/trace.h313
-rw-r--r--kernel/trace/trace_functions.c78
-rw-r--r--kernel/trace/trace_irqsoff.c486
-rw-r--r--kernel/trace/trace_sched_switch.c286
-rw-r--r--kernel/trace/trace_sched_wakeup.c447
-rw-r--r--kernel/trace/trace_selftest.c540
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile9
-rw-r--r--lib/smp_processor_id.c6
-rw-r--r--mm/page-writeback.c10
-rw-r--r--scripts/Makefile.lib3
89 files changed, 8827 insertions, 114 deletions
diff --git a/Makefile b/Makefile
index 6aff5f47c21d..ff2c681fb5dd 100644
--- a/Makefile
+++ b/Makefile
@@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g
528KBUILD_AFLAGS += -gdwarf-2 528KBUILD_AFLAGS += -gdwarf-2
529endif 529endif
530 530
531ifdef CONFIG_FTRACE
532KBUILD_CFLAGS += -pg
533endif
534
531# We trigger additional mismatches with less inlining 535# We trigger additional mismatches with less inlining
532ifdef CONFIG_DEBUG_SECTION_MISMATCH 536ifdef CONFIG_DEBUG_SECTION_MISMATCH
533KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once) 537KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b786e68914d4..3845e5c8a34f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -14,6 +14,8 @@ config ARM
14 select HAVE_OPROFILE 14 select HAVE_OPROFILE
15 select HAVE_KPROBES if (!XIP_KERNEL) 15 select HAVE_KPROBES if (!XIP_KERNEL)
16 select HAVE_KRETPROBES if (HAVE_KPROBES) 16 select HAVE_KRETPROBES if (HAVE_KPROBES)
17 select HAVE_FTRACE if (!XIP_KERNEL)
18 select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
17 help 19 help
18 The ARM series is a line of low-power-consumption RISC chip designs 20 The ARM series is a line of low-power-consumption RISC chip designs
19 licensed by ARM Ltd and targeted at embedded applications and 21 licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index de9d9ee50958..95baac4939e0 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -69,6 +69,12 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
69 69
70targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \ 70targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \
71 head.o misc.o $(OBJS) 71 head.o misc.o $(OBJS)
72
73ifeq ($(CONFIG_FTRACE),y)
74ORIG_CFLAGS := $(KBUILD_CFLAGS)
75KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
76endif
77
72EXTRA_CFLAGS := -fpic -fno-builtin 78EXTRA_CFLAGS := -fpic -fno-builtin
73EXTRA_AFLAGS := 79EXTRA_AFLAGS :=
74 80
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ad455ff5aebe..eb9092ca8008 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -4,6 +4,10 @@
4 4
5AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) 5AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
6 6
7ifdef CONFIG_DYNAMIC_FTRACE
8CFLAGS_REMOVE_ftrace.o = -pg
9endif
10
7# Object file lists. 11# Object file lists.
8 12
9obj-y := compat.o entry-armv.o entry-common.o irq.o \ 13obj-y := compat.o entry-armv.o entry-common.o irq.o \
@@ -18,6 +22,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o
18obj-$(CONFIG_ISA_DMA) += dma-isa.o 22obj-$(CONFIG_ISA_DMA) += dma-isa.o
19obj-$(CONFIG_PCI) += bios32.o isa.o 23obj-$(CONFIG_PCI) += bios32.o isa.o
20obj-$(CONFIG_SMP) += smp.o 24obj-$(CONFIG_SMP) += smp.o
25obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
21obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
22obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o 27obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
23obj-$(CONFIG_ATAGS_PROC) += atags.o 28obj-$(CONFIG_ATAGS_PROC) += atags.o
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 688b7b1ee416..cc7b246e9652 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -18,6 +18,7 @@
18#include <asm/io.h> 18#include <asm/io.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include <asm/ftrace.h>
21 22
22/* 23/*
23 * libgcc functions - functions that are used internally by the 24 * libgcc functions - functions that are used internally by the
@@ -181,3 +182,7 @@ EXPORT_SYMBOL(_find_next_bit_be);
181#endif 182#endif
182 183
183EXPORT_SYMBOL(copy_page); 184EXPORT_SYMBOL(copy_page);
185
186#ifdef CONFIG_FTRACE
187EXPORT_SYMBOL(mcount);
188#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 597ed00a08d8..84694e88b428 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <asm/unistd.h> 11#include <asm/unistd.h>
12#include <asm/ftrace.h>
12#include <asm/arch/entry-macro.S> 13#include <asm/arch/entry-macro.S>
13 14
14#include "entry-header.S" 15#include "entry-header.S"
@@ -99,6 +100,56 @@ ENTRY(ret_from_fork)
99#undef CALL 100#undef CALL
100#define CALL(x) .long x 101#define CALL(x) .long x
101 102
103#ifdef CONFIG_FTRACE
104#ifdef CONFIG_DYNAMIC_FTRACE
105ENTRY(mcount)
106 stmdb sp!, {r0-r3, lr}
107 mov r0, lr
108 sub r0, r0, #MCOUNT_INSN_SIZE
109
110 .globl mcount_call
111mcount_call:
112 bl ftrace_stub
113 ldmia sp!, {r0-r3, pc}
114
115ENTRY(ftrace_caller)
116 stmdb sp!, {r0-r3, lr}
117 ldr r1, [fp, #-4]
118 mov r0, lr
119 sub r0, r0, #MCOUNT_INSN_SIZE
120
121 .globl ftrace_call
122ftrace_call:
123 bl ftrace_stub
124 ldmia sp!, {r0-r3, pc}
125
126#else
127
128ENTRY(mcount)
129 stmdb sp!, {r0-r3, lr}
130 ldr r0, =ftrace_trace_function
131 ldr r2, [r0]
132 adr r0, ftrace_stub
133 cmp r0, r2
134 bne trace
135 ldmia sp!, {r0-r3, pc}
136
137trace:
138 ldr r1, [fp, #-4]
139 mov r0, lr
140 sub r0, r0, #MCOUNT_INSN_SIZE
141 mov lr, pc
142 mov pc, r2
143 ldmia sp!, {r0-r3, pc}
144
145#endif /* CONFIG_DYNAMIC_FTRACE */
146
147 .globl ftrace_stub
148ftrace_stub:
149 mov pc, lr
150
151#endif /* CONFIG_FTRACE */
152
102/*============================================================================= 153/*=============================================================================
103 * SWI handler 154 * SWI handler
104 *----------------------------------------------------------------------------- 155 *-----------------------------------------------------------------------------
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
new file mode 100644
index 000000000000..76d50e6091bc
--- /dev/null
+++ b/arch/arm/kernel/ftrace.c
@@ -0,0 +1,116 @@
1/*
2 * Dynamic function tracing support.
3 *
4 * Copyright (C) 2008 Abhishek Sagar <sagar.abhishek@gmail.com>
5 *
6 * For licencing details, see COPYING.
7 *
8 * Defines low-level handling of mcount calls when the kernel
9 * is compiled with the -pg flag. When using dynamic ftrace, the
10 * mcount call-sites get patched lazily with NOP till they are
11 * enabled. All code mutation routines here take effect atomically.
12 */
13
14#include <linux/ftrace.h>
15
16#include <asm/cacheflush.h>
17#include <asm/ftrace.h>
18
19#define PC_OFFSET 8
20#define BL_OPCODE 0xeb000000
21#define BL_OFFSET_MASK 0x00ffffff
22
23static unsigned long bl_insn;
24static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */
25
26unsigned char *ftrace_nop_replace(void)
27{
28 return (char *)&NOP;
29}
30
31/* construct a branch (BL) instruction to addr */
32unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
33{
34 long offset;
35
36 offset = (long)addr - (long)(pc + PC_OFFSET);
37 if (unlikely(offset < -33554432 || offset > 33554428)) {
38 /* Can't generate branches that far (from ARM ARM). Ftrace
39 * doesn't generate branches outside of kernel text.
40 */
41 WARN_ON_ONCE(1);
42 return NULL;
43 }
44 offset = (offset >> 2) & BL_OFFSET_MASK;
45 bl_insn = BL_OPCODE | offset;
46 return (unsigned char *)&bl_insn;
47}
48
49int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
50 unsigned char *new_code)
51{
52 unsigned long err = 0, replaced = 0, old, new;
53
54 old = *(unsigned long *)old_code;
55 new = *(unsigned long *)new_code;
56
57 __asm__ __volatile__ (
58 "1: ldr %1, [%2] \n"
59 " cmp %1, %4 \n"
60 "2: streq %3, [%2] \n"
61 " cmpne %1, %3 \n"
62 " movne %0, #2 \n"
63 "3:\n"
64
65 ".section .fixup, \"ax\"\n"
66 "4: mov %0, #1 \n"
67 " b 3b \n"
68 ".previous\n"
69
70 ".section __ex_table, \"a\"\n"
71 " .long 1b, 4b \n"
72 " .long 2b, 4b \n"
73 ".previous\n"
74
75 : "=r"(err), "=r"(replaced)
76 : "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced)
77 : "memory");
78
79 if (!err && (replaced == old))
80 flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
81
82 return err;
83}
84
85int ftrace_update_ftrace_func(ftrace_func_t func)
86{
87 int ret;
88 unsigned long pc, old;
89 unsigned char *new;
90
91 pc = (unsigned long)&ftrace_call;
92 memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
93 new = ftrace_call_replace(pc, (unsigned long)func);
94 ret = ftrace_modify_code(pc, (unsigned char *)&old, new);
95 return ret;
96}
97
98int ftrace_mcount_set(unsigned long *data)
99{
100 unsigned long pc, old;
101 unsigned long *addr = data;
102 unsigned char *new;
103
104 pc = (unsigned long)&mcount_call;
105 memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE);
106 new = ftrace_call_replace(pc, *addr);
107 *addr = ftrace_modify_code(pc, (unsigned char *)&old, new);
108 return 0;
109}
110
111/* run from kstop_machine */
112int __init ftrace_dyn_arch_init(void *data)
113{
114 ftrace_mcount_set(data);
115 return 0;
116}
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5593dd207216..5ee39e10c8d1 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
274 * for kretprobe handlers which should normally be interested in r0 only 274 * for kretprobe handlers which should normally be interested in r0 only
275 * anyway. 275 * anyway.
276 */ 276 */
277static void __attribute__((naked)) __kprobes kretprobe_trampoline(void) 277void __naked __kprobes kretprobe_trampoline(void)
278{ 278{
279 __asm__ __volatile__ ( 279 __asm__ __volatile__ (
280 "stmdb sp!, {r0 - r11} \n\t" 280 "stmdb sp!, {r0 - r11} \n\t"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e2659407..a5e9912e2d37 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -105,11 +105,13 @@ config ARCH_NO_VIRT_TO_BUS
105config PPC 105config PPC
106 bool 106 bool
107 default y 107 default y
108 select HAVE_DYNAMIC_FTRACE
109 select HAVE_FTRACE
108 select HAVE_IDE 110 select HAVE_IDE
109 select HAVE_OPROFILE
110 select HAVE_KPROBES 111 select HAVE_KPROBES
111 select HAVE_KRETPROBES 112 select HAVE_KRETPROBES
112 select HAVE_LMB 113 select HAVE_LMB
114 select HAVE_OPROFILE
113 115
114config EARLY_PRINTK 116config EARLY_PRINTK
115 bool 117 bool
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2346d271fbfd..f3f5e2641432 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC
12CFLAGS_btext.o += -fPIC 12CFLAGS_btext.o += -fPIC
13endif 13endif
14 14
15ifdef CONFIG_FTRACE
16# Do not trace early boot code
17CFLAGS_REMOVE_cputable.o = -pg
18CFLAGS_REMOVE_prom_init.o = -pg
19
20ifdef CONFIG_DYNAMIC_FTRACE
21# dynamic ftrace setup.
22CFLAGS_REMOVE_ftrace.o = -pg
23endif
24
25endif
26
15obj-y := cputable.o ptrace.o syscalls.o \ 27obj-y := cputable.o ptrace.o syscalls.o \
16 irq.o align.o signal_32.o pmc.o vdso.o \ 28 irq.o align.o signal_32.o pmc.o vdso.o \
17 init_task.o process.o systbl.o idle.o \ 29 init_task.o process.o systbl.o idle.o \
@@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
78obj-$(CONFIG_AUDIT) += audit.o 90obj-$(CONFIG_AUDIT) += audit.o
79obj64-$(CONFIG_AUDIT) += compat_audit.o 91obj64-$(CONFIG_AUDIT) += compat_audit.o
80 92
93obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
94
81obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 95obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
82 96
83ifneq ($(CONFIG_PPC_INDIRECT_IO),y) 97ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c8614d9875c..7231a708af0d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -30,6 +30,7 @@
30#include <asm/ppc_asm.h> 30#include <asm/ppc_asm.h>
31#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
32#include <asm/unistd.h> 32#include <asm/unistd.h>
33#include <asm/ftrace.h>
33 34
34#undef SHOW_SYSCALLS 35#undef SHOW_SYSCALLS
35#undef SHOW_SYSCALLS_TASK 36#undef SHOW_SYSCALLS_TASK
@@ -1035,3 +1036,129 @@ machine_check_in_rtas:
1035 /* XXX load up BATs and panic */ 1036 /* XXX load up BATs and panic */
1036 1037
1037#endif /* CONFIG_PPC_RTAS */ 1038#endif /* CONFIG_PPC_RTAS */
1039
1040#ifdef CONFIG_FTRACE
1041#ifdef CONFIG_DYNAMIC_FTRACE
1042_GLOBAL(mcount)
1043_GLOBAL(_mcount)
1044 stwu r1,-48(r1)
1045 stw r3, 12(r1)
1046 stw r4, 16(r1)
1047 stw r5, 20(r1)
1048 stw r6, 24(r1)
1049 mflr r3
1050 stw r7, 28(r1)
1051 mfcr r5
1052 stw r8, 32(r1)
1053 stw r9, 36(r1)
1054 stw r10,40(r1)
1055 stw r3, 44(r1)
1056 stw r5, 8(r1)
1057 subi r3, r3, MCOUNT_INSN_SIZE
1058 .globl mcount_call
1059mcount_call:
1060 bl ftrace_stub
1061 nop
1062 lwz r6, 8(r1)
1063 lwz r0, 44(r1)
1064 lwz r3, 12(r1)
1065 mtctr r0
1066 lwz r4, 16(r1)
1067 mtcr r6
1068 lwz r5, 20(r1)
1069 lwz r6, 24(r1)
1070 lwz r0, 52(r1)
1071 lwz r7, 28(r1)
1072 lwz r8, 32(r1)
1073 mtlr r0
1074 lwz r9, 36(r1)
1075 lwz r10,40(r1)
1076 addi r1, r1, 48
1077 bctr
1078
1079_GLOBAL(ftrace_caller)
1080 /* Based off of objdump optput from glibc */
1081 stwu r1,-48(r1)
1082 stw r3, 12(r1)
1083 stw r4, 16(r1)
1084 stw r5, 20(r1)
1085 stw r6, 24(r1)
1086 mflr r3
1087 lwz r4, 52(r1)
1088 mfcr r5
1089 stw r7, 28(r1)
1090 stw r8, 32(r1)
1091 stw r9, 36(r1)
1092 stw r10,40(r1)
1093 stw r3, 44(r1)
1094 stw r5, 8(r1)
1095 subi r3, r3, MCOUNT_INSN_SIZE
1096.globl ftrace_call
1097ftrace_call:
1098 bl ftrace_stub
1099 nop
1100 lwz r6, 8(r1)
1101 lwz r0, 44(r1)
1102 lwz r3, 12(r1)
1103 mtctr r0
1104 lwz r4, 16(r1)
1105 mtcr r6
1106 lwz r5, 20(r1)
1107 lwz r6, 24(r1)
1108 lwz r0, 52(r1)
1109 lwz r7, 28(r1)
1110 lwz r8, 32(r1)
1111 mtlr r0
1112 lwz r9, 36(r1)
1113 lwz r10,40(r1)
1114 addi r1, r1, 48
1115 bctr
1116#else
1117_GLOBAL(mcount)
1118_GLOBAL(_mcount)
1119 stwu r1,-48(r1)
1120 stw r3, 12(r1)
1121 stw r4, 16(r1)
1122 stw r5, 20(r1)
1123 stw r6, 24(r1)
1124 mflr r3
1125 lwz r4, 52(r1)
1126 mfcr r5
1127 stw r7, 28(r1)
1128 stw r8, 32(r1)
1129 stw r9, 36(r1)
1130 stw r10,40(r1)
1131 stw r3, 44(r1)
1132 stw r5, 8(r1)
1133
1134 subi r3, r3, MCOUNT_INSN_SIZE
1135 LOAD_REG_ADDR(r5, ftrace_trace_function)
1136 lwz r5,0(r5)
1137
1138 mtctr r5
1139 bctrl
1140
1141 nop
1142
1143 lwz r6, 8(r1)
1144 lwz r0, 44(r1)
1145 lwz r3, 12(r1)
1146 mtctr r0
1147 lwz r4, 16(r1)
1148 mtcr r6
1149 lwz r5, 20(r1)
1150 lwz r6, 24(r1)
1151 lwz r0, 52(r1)
1152 lwz r7, 28(r1)
1153 lwz r8, 32(r1)
1154 mtlr r0
1155 lwz r9, 36(r1)
1156 lwz r10,40(r1)
1157 addi r1, r1, 48
1158 bctr
1159#endif
1160
1161_GLOBAL(ftrace_stub)
1162 blr
1163
1164#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c0db5b769e55..2f511a969d2c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -31,6 +31,7 @@
31#include <asm/bug.h> 31#include <asm/bug.h>
32#include <asm/ptrace.h> 32#include <asm/ptrace.h>
33#include <asm/irqflags.h> 33#include <asm/irqflags.h>
34#include <asm/ftrace.h>
34 35
35/* 36/*
36 * System calls. 37 * System calls.
@@ -870,3 +871,67 @@ _GLOBAL(enter_prom)
870 ld r0,16(r1) 871 ld r0,16(r1)
871 mtlr r0 872 mtlr r0
872 blr 873 blr
874
875#ifdef CONFIG_FTRACE
876#ifdef CONFIG_DYNAMIC_FTRACE
877_GLOBAL(mcount)
878_GLOBAL(_mcount)
879 /* Taken from output of objdump from lib64/glibc */
880 mflr r3
881 stdu r1, -112(r1)
882 std r3, 128(r1)
883 subi r3, r3, MCOUNT_INSN_SIZE
884 .globl mcount_call
885mcount_call:
886 bl ftrace_stub
887 nop
888 ld r0, 128(r1)
889 mtlr r0
890 addi r1, r1, 112
891 blr
892
893_GLOBAL(ftrace_caller)
894 /* Taken from output of objdump from lib64/glibc */
895 mflr r3
896 ld r11, 0(r1)
897 stdu r1, -112(r1)
898 std r3, 128(r1)
899 ld r4, 16(r11)
900 subi r3, r3, MCOUNT_INSN_SIZE
901.globl ftrace_call
902ftrace_call:
903 bl ftrace_stub
904 nop
905 ld r0, 128(r1)
906 mtlr r0
907 addi r1, r1, 112
908_GLOBAL(ftrace_stub)
909 blr
910#else
911_GLOBAL(mcount)
912 blr
913
914_GLOBAL(_mcount)
915 /* Taken from output of objdump from lib64/glibc */
916 mflr r3
917 ld r11, 0(r1)
918 stdu r1, -112(r1)
919 std r3, 128(r1)
920 ld r4, 16(r11)
921
922 subi r3, r3, MCOUNT_INSN_SIZE
923 LOAD_REG_ADDR(r5,ftrace_trace_function)
924 ld r5,0(r5)
925 ld r5,0(r5)
926 mtctr r5
927 bctrl
928
929 nop
930 ld r0, 128(r1)
931 mtlr r0
932 addi r1, r1, 112
933_GLOBAL(ftrace_stub)
934 blr
935
936#endif
937#endif
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 000000000000..3855ceb937b0
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,154 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
7 *
8 */
9
10#include <linux/spinlock.h>
11#include <linux/hardirq.h>
12#include <linux/ftrace.h>
13#include <linux/percpu.h>
14#include <linux/init.h>
15#include <linux/list.h>
16
17#include <asm/cacheflush.h>
18#include <asm/ftrace.h>
19
20
21static unsigned int ftrace_nop = 0x60000000;
22
23#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr
25#else
26/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr
28#endif
29
30
31static unsigned int notrace ftrace_calc_offset(long ip, long addr)
32{
33 return (int)(addr - ip);
34}
35
36notrace unsigned char *ftrace_nop_replace(void)
37{
38 return (char *)&ftrace_nop;
39}
40
41notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
42{
43 static unsigned int op;
44
45 /*
46 * It would be nice to just use create_function_call, but that will
47 * update the code itself. Here we need to just return the
48 * instruction that is going to be modified, without modifying the
49 * code.
50 */
51 addr = GET_ADDR(addr);
52
53 /* Set to "bl addr" */
54 op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc);
55
56 /*
57 * No locking needed, this must be called via kstop_machine
58 * which in essence is like running on a uniprocessor machine.
59 */
60 return (unsigned char *)&op;
61}
62
63#ifdef CONFIG_PPC64
64# define _ASM_ALIGN " .align 3 "
65# define _ASM_PTR " .llong "
66#else
67# define _ASM_ALIGN " .align 2 "
68# define _ASM_PTR " .long "
69#endif
70
71notrace int
72ftrace_modify_code(unsigned long ip, unsigned char *old_code,
73 unsigned char *new_code)
74{
75 unsigned replaced;
76 unsigned old = *(unsigned *)old_code;
77 unsigned new = *(unsigned *)new_code;
78 int faulted = 0;
79
80 /*
81 * Note: Due to modules and __init, code can
82 * disappear and change, we need to protect against faulting
83 * as well as code changing.
84 *
85 * No real locking needed, this code is run through
86 * kstop_machine.
87 */
88 asm volatile (
89 "1: lwz %1, 0(%2)\n"
90 " cmpw %1, %5\n"
91 " bne 2f\n"
92 " stwu %3, 0(%2)\n"
93 "2:\n"
94 ".section .fixup, \"ax\"\n"
95 "3: li %0, 1\n"
96 " b 2b\n"
97 ".previous\n"
98 ".section __ex_table,\"a\"\n"
99 _ASM_ALIGN "\n"
100 _ASM_PTR "1b, 3b\n"
101 ".previous"
102 : "=r"(faulted), "=r"(replaced)
103 : "r"(ip), "r"(new),
104 "0"(faulted), "r"(old)
105 : "memory");
106
107 if (replaced != old && replaced != new)
108 faulted = 2;
109
110 if (!faulted)
111 flush_icache_range(ip, ip + 8);
112
113 return faulted;
114}
115
116notrace int ftrace_update_ftrace_func(ftrace_func_t func)
117{
118 unsigned long ip = (unsigned long)(&ftrace_call);
119 unsigned char old[MCOUNT_INSN_SIZE], *new;
120 int ret;
121
122 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
123 new = ftrace_call_replace(ip, (unsigned long)func);
124 ret = ftrace_modify_code(ip, old, new);
125
126 return ret;
127}
128
129notrace int ftrace_mcount_set(unsigned long *data)
130{
131 unsigned long ip = (long)(&mcount_call);
132 unsigned long *addr = data;
133 unsigned char old[MCOUNT_INSN_SIZE], *new;
134
135 /*
136 * Replace the mcount stub with a pointer to the
137 * ip recorder function.
138 */
139 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
140 new = ftrace_call_replace(ip, *addr);
141 *addr = ftrace_modify_code(ip, old, new);
142
143 return 0;
144}
145
146int __init ftrace_dyn_arch_init(void *data)
147{
148 /* This is running in kstop_machine */
149
150 ftrace_mcount_set(data);
151
152 return 0;
153}
154
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index e31aca9208eb..1882bf419fa6 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns);
120 120
121#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) 121#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
122 122
123void _memset_io(volatile void __iomem *addr, int c, unsigned long n) 123notrace void
124_memset_io(volatile void __iomem *addr, int c, unsigned long n)
124{ 125{
125 void *p = (void __force *)addr; 126 void *p = (void __force *)addr;
126 u32 lc = c; 127 u32 lc = c;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bcc249d90c4d..dcc946e67099 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc);
98 98
99int distribute_irqs = 1; 99int distribute_irqs = 1;
100 100
101static inline unsigned long get_hard_enabled(void) 101static inline notrace unsigned long get_hard_enabled(void)
102{ 102{
103 unsigned long enabled; 103 unsigned long enabled;
104 104
@@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void)
108 return enabled; 108 return enabled;
109} 109}
110 110
111static inline void set_soft_enabled(unsigned long enable) 111static inline notrace void set_soft_enabled(unsigned long enable)
112{ 112{
113 __asm__ __volatile__("stb %0,%1(13)" 113 __asm__ __volatile__("stb %0,%1(13)"
114 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); 114 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
115} 115}
116 116
117void raw_local_irq_restore(unsigned long en) 117notrace void raw_local_irq_restore(unsigned long en)
118{ 118{
119 /* 119 /*
120 * get_paca()->soft_enabled = en; 120 * get_paca()->soft_enabled = en;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d3ac631cbd26..a8d02506468a 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -42,6 +42,7 @@
42#include <asm/div64.h> 42#include <asm/div64.h>
43#include <asm/signal.h> 43#include <asm/signal.h>
44#include <asm/dcr.h> 44#include <asm/dcr.h>
45#include <asm/ftrace.h>
45 46
46#ifdef CONFIG_PPC32 47#ifdef CONFIG_PPC32
47extern void transfer_to_handler(void); 48extern void transfer_to_handler(void);
@@ -67,6 +68,10 @@ EXPORT_SYMBOL(single_step_exception);
67EXPORT_SYMBOL(sys_sigreturn); 68EXPORT_SYMBOL(sys_sigreturn);
68#endif 69#endif
69 70
71#ifdef CONFIG_FTRACE
72EXPORT_SYMBOL(_mcount);
73#endif
74
70EXPORT_SYMBOL(strcpy); 75EXPORT_SYMBOL(strcpy);
71EXPORT_SYMBOL(strncpy); 76EXPORT_SYMBOL(strncpy);
72EXPORT_SYMBOL(strcat); 77EXPORT_SYMBOL(strcat);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5112a4aa801d..19e8fcb9cea8 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -81,7 +81,7 @@ int ucache_bsize;
81 * from the address that it was linked at, so we must use RELOC/PTRRELOC 81 * from the address that it was linked at, so we must use RELOC/PTRRELOC
82 * to access static data (including strings). -- paulus 82 * to access static data (including strings). -- paulus
83 */ 83 */
84unsigned long __init early_init(unsigned long dt_ptr) 84notrace unsigned long __init early_init(unsigned long dt_ptr)
85{ 85{
86 unsigned long offset = reloc_offset(); 86 unsigned long offset = reloc_offset();
87 struct cpu_spec *spec; 87 struct cpu_spec *spec;
@@ -111,7 +111,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
111 * This is called very early on the boot process, after a minimal 111 * This is called very early on the boot process, after a minimal
112 * MMU environment has been set up but before MMU_init is called. 112 * MMU environment has been set up but before MMU_init is called.
113 */ 113 */
114void __init machine_init(unsigned long dt_ptr, unsigned long phys) 114notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys)
115{ 115{
116 /* Enable early debugging if any specified (see udbg.h) */ 116 /* Enable early debugging if any specified (see udbg.h) */
117 udbg_early_init(); 117 udbg_early_init();
@@ -133,7 +133,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
133 133
134#ifdef CONFIG_BOOKE_WDT 134#ifdef CONFIG_BOOKE_WDT
135/* Checks wdt=x and wdt_period=xx command-line option */ 135/* Checks wdt=x and wdt_period=xx command-line option */
136int __init early_parse_wdt(char *p) 136notrace int __init early_parse_wdt(char *p)
137{ 137{
138 if (p && strncmp(p, "0", 1) != 0) 138 if (p && strncmp(p, "0", 1) != 0)
139 booke_wdt_enabled = 1; 139 booke_wdt_enabled = 1;
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 4d72c8f72159..89774177b209 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,5 +1,10 @@
1CFLAGS_bootx_init.o += -fPIC 1CFLAGS_bootx_init.o += -fPIC
2 2
3ifdef CONFIG_FTRACE
4# Do not trace early boot code
5CFLAGS_REMOVE_bootx_init.o = -pg
6endif
7
3obj-y += pic.o setup.o time.o feature.o pci.o \ 8obj-y += pic.o setup.o time.o feature.o pci.o \
4 sleep.o low_i2c.o cache.o pfunc_core.o \ 9 sleep.o low_i2c.o cache.o pfunc_core.o \
5 pfunc_base.o 10 pfunc_base.o
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index eb36f3b746b8..fca9246470b1 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -11,6 +11,8 @@ config SPARC
11config SPARC64 11config SPARC64
12 bool 12 bool
13 default y 13 default y
14 select HAVE_DYNAMIC_FTRACE
15 select HAVE_FTRACE
14 select HAVE_IDE 16 select HAVE_IDE
15 select HAVE_LMB 17 select HAVE_LMB
16 select HAVE_ARCH_KGDB 18 select HAVE_ARCH_KGDB
diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index 6a4d28a4076d..d6d32d178fc8 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC
33 33
34config MCOUNT 34config MCOUNT
35 bool 35 bool
36 depends on STACK_DEBUG 36 depends on STACK_DEBUG || FTRACE
37 default y 37 default y
38 38
39config FRAME_POINTER 39config FRAME_POINTER
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index ec4f5ebb1ca6..418b5782096e 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \
14 power.o sbus.o sparc64_ksyms.o chmc.o \ 14 power.o sbus.o sparc64_ksyms.o chmc.o \
15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o 15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
16 16
17obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
17obj-$(CONFIG_STACKTRACE) += stacktrace.o 18obj-$(CONFIG_STACKTRACE) += stacktrace.o
18obj-$(CONFIG_PCI) += ebus.o pci_common.o \ 19obj-$(CONFIG_PCI) += ebus.o pci_common.o \
19 pci_psycho.o pci_sabre.o pci_schizo.o \ 20 pci_psycho.o pci_sabre.o pci_schizo.o \
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
new file mode 100644
index 000000000000..4298d0aee713
--- /dev/null
+++ b/arch/sparc64/kernel/ftrace.c
@@ -0,0 +1,94 @@
1#include <linux/spinlock.h>
2#include <linux/hardirq.h>
3#include <linux/ftrace.h>
4#include <linux/percpu.h>
5#include <linux/init.h>
6#include <linux/list.h>
7
8#include <asm/ftrace.h>
9
10static const u32 ftrace_nop = 0x01000000;
11
12notrace unsigned char *ftrace_nop_replace(void)
13{
14 return (char *)&ftrace_nop;
15}
16
17notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
18{
19 static u32 call;
20 s32 off;
21
22 off = ((s32)addr - (s32)ip);
23 call = 0x40000000 | ((u32)off >> 2);
24
25 return (unsigned char *) &call;
26}
27
28notrace int
29ftrace_modify_code(unsigned long ip, unsigned char *old_code,
30 unsigned char *new_code)
31{
32 u32 old = *(u32 *)old_code;
33 u32 new = *(u32 *)new_code;
34 u32 replaced;
35 int faulted;
36
37 __asm__ __volatile__(
38 "1: cas [%[ip]], %[old], %[new]\n"
39 " flush %[ip]\n"
40 " mov 0, %[faulted]\n"
41 "2:\n"
42 " .section .fixup,#alloc,#execinstr\n"
43 " .align 4\n"
44 "3: sethi %%hi(2b), %[faulted]\n"
45 " jmpl %[faulted] + %%lo(2b), %%g0\n"
46 " mov 1, %[faulted]\n"
47 " .previous\n"
48 " .section __ex_table,\"a\"\n"
49 " .align 4\n"
50 " .word 1b, 3b\n"
51 " .previous\n"
52 : "=r" (replaced), [faulted] "=r" (faulted)
53 : [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
54 : "memory");
55
56 if (replaced != old && replaced != new)
57 faulted = 2;
58
59 return faulted;
60}
61
62notrace int ftrace_update_ftrace_func(ftrace_func_t func)
63{
64 unsigned long ip = (unsigned long)(&ftrace_call);
65 unsigned char old[MCOUNT_INSN_SIZE], *new;
66
67 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
68 new = ftrace_call_replace(ip, (unsigned long)func);
69 return ftrace_modify_code(ip, old, new);
70}
71
72notrace int ftrace_mcount_set(unsigned long *data)
73{
74 unsigned long ip = (long)(&mcount_call);
75 unsigned long *addr = data;
76 unsigned char old[MCOUNT_INSN_SIZE], *new;
77
78 /*
79 * Replace the mcount stub with a pointer to the
80 * ip recorder function.
81 */
82 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
83 new = ftrace_call_replace(ip, *addr);
84 *addr = ftrace_modify_code(ip, old, new);
85
86 return 0;
87}
88
89
90int __init ftrace_dyn_arch_init(void *data)
91{
92 ftrace_mcount_set(data);
93 return 0;
94}
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 8ac0b99f2c55..b80d982a29c6 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -53,6 +53,7 @@
53#include <asm/ns87303.h> 53#include <asm/ns87303.h>
54#include <asm/timer.h> 54#include <asm/timer.h>
55#include <asm/cpudata.h> 55#include <asm/cpudata.h>
56#include <asm/ftrace.h>
56 57
57struct poll { 58struct poll {
58 int fd; 59 int fd;
@@ -112,7 +113,6 @@ EXPORT_SYMBOL(smp_call_function);
112#endif /* CONFIG_SMP */ 113#endif /* CONFIG_SMP */
113 114
114#if defined(CONFIG_MCOUNT) 115#if defined(CONFIG_MCOUNT)
115extern void _mcount(void);
116EXPORT_SYMBOL(_mcount); 116EXPORT_SYMBOL(_mcount);
117#endif 117#endif
118 118
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 9e4534b485c7..7735a7a60533 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -28,10 +28,13 @@ ovstack:
28 .skip OVSTACKSIZE 28 .skip OVSTACKSIZE
29#endif 29#endif
30 .text 30 .text
31 .align 32 31 .align 32
32 .globl mcount, _mcount 32 .globl _mcount
33mcount: 33 .type _mcount,#function
34 .globl mcount
35 .type mcount,#function
34_mcount: 36_mcount:
37mcount:
35#ifdef CONFIG_STACK_DEBUG 38#ifdef CONFIG_STACK_DEBUG
36 /* 39 /*
37 * Check whether %sp is dangerously low. 40 * Check whether %sp is dangerously low.
@@ -55,6 +58,53 @@ _mcount:
55 or %g3, %lo(panicstring), %o0 58 or %g3, %lo(panicstring), %o0
56 call prom_halt 59 call prom_halt
57 nop 60 nop
611:
62#endif
63#ifdef CONFIG_FTRACE
64#ifdef CONFIG_DYNAMIC_FTRACE
65 mov %o7, %o0
66 .globl mcount_call
67mcount_call:
68 call ftrace_stub
69 mov %o0, %o7
70#else
71 sethi %hi(ftrace_trace_function), %g1
72 sethi %hi(ftrace_stub), %g2
73 ldx [%g1 + %lo(ftrace_trace_function)], %g1
74 or %g2, %lo(ftrace_stub), %g2
75 cmp %g1, %g2
76 be,pn %icc, 1f
77 mov %i7, %o1
78 jmpl %g1, %g0
79 mov %o7, %o0
80 /* not reached */
811:
58#endif 82#endif
591: retl 83#endif
84 retl
60 nop 85 nop
86 .size _mcount,.-_mcount
87 .size mcount,.-mcount
88
89#ifdef CONFIG_FTRACE
90 .globl ftrace_stub
91 .type ftrace_stub,#function
92ftrace_stub:
93 retl
94 nop
95 .size ftrace_stub,.-ftrace_stub
96#ifdef CONFIG_DYNAMIC_FTRACE
97 .globl ftrace_caller
98 .type ftrace_caller,#function
99ftrace_caller:
100 mov %i7, %o1
101 mov %o7, %o0
102 .globl ftrace_call
103ftrace_call:
104 call ftrace_stub
105 mov %o0, %o7
106 retl
107 nop
108 .size ftrace_caller,.-ftrace_caller
109#endif
110#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e0edaaa6920a..400135148555 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_KPROBES 24 select HAVE_KPROBES
25 select HAVE_KRETPROBES 25 select HAVE_KRETPROBES
26 select HAVE_DYNAMIC_FTRACE
27 select HAVE_FTRACE
26 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 28 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
27 select HAVE_ARCH_KGDB if !X86_VOYAGER 29 select HAVE_ARCH_KGDB if !X86_VOYAGER
28 30
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51a..f395fd537c5c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -172,6 +172,14 @@ config IOMMU_LEAK
172 Add a simple leak tracer to the IOMMU code. This is useful when you 172 Add a simple leak tracer to the IOMMU code. This is useful when you
173 are debugging a buggy device driver that leaks IOMMU mappings. 173 are debugging a buggy device driver that leaks IOMMU mappings.
174 174
175config PAGE_FAULT_HANDLERS
176 bool "Custom page fault handlers"
177 depends on DEBUG_KERNEL
178 help
179 Allow the use of custom page fault handlers. A kernel module may
180 register a function that is called on every page fault. Custom
181 handlers are used by some debugging and reverse engineering tools.
182
175# 183#
176# IO delay types: 184# IO delay types:
177# 185#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 77807d4769c9..5ff67208d4ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE
10# Do not profile debug utilities
11CFLAGS_REMOVE_tsc_64.o = -pg
12CFLAGS_REMOVE_tsc_32.o = -pg
13CFLAGS_REMOVE_rtc.o = -pg
14endif
15
9# 16#
10# vsyscalls (which work on the user stack) should have 17# vsyscalls (which work on the user stack) should have
11# no stack-protector checks: 18# no stack-protector checks:
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
59obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
60obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 68obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
61obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/spinlock.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144 144
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146static inline const unsigned char*const * find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
162 { -1, NULL } 162 { -1, NULL }
163}; 163};
164 164
165static const unsigned char*const * find_nop_table(void) 165const unsigned char *const *find_nop_table(void)
166{ 166{
167 const unsigned char *const *noptable = intel_nops; 167 const unsigned char *const *noptable = intel_nops;
168 int i; 168 int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
279 struct list_head next; 279 struct list_head next;
280}; 280};
281static LIST_HEAD(smp_alt_modules); 281static LIST_HEAD(smp_alt_modules);
282static DEFINE_SPINLOCK(smp_alt); 282static DEFINE_MUTEX(smp_alt);
283static int smp_mode = 1; /* protected by smp_alt */ 283static int smp_mode = 1; /* protected by smp_alt */
284 284
285void alternatives_smp_module_add(struct module *mod, char *name, 285void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
312 __func__, smp->locks, smp->locks_end, 312 __func__, smp->locks, smp->locks_end,
313 smp->text, smp->text_end, smp->name); 313 smp->text, smp->text_end, smp->name);
314 314
315 spin_lock(&smp_alt); 315 mutex_lock(&smp_alt);
316 list_add_tail(&smp->next, &smp_alt_modules); 316 list_add_tail(&smp->next, &smp_alt_modules);
317 if (boot_cpu_has(X86_FEATURE_UP)) 317 if (boot_cpu_has(X86_FEATURE_UP))
318 alternatives_smp_unlock(smp->locks, smp->locks_end, 318 alternatives_smp_unlock(smp->locks, smp->locks_end,
319 smp->text, smp->text_end); 319 smp->text, smp->text_end);
320 spin_unlock(&smp_alt); 320 mutex_unlock(&smp_alt);
321} 321}
322 322
323void alternatives_smp_module_del(struct module *mod) 323void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
327 if (smp_alt_once || noreplace_smp) 327 if (smp_alt_once || noreplace_smp)
328 return; 328 return;
329 329
330 spin_lock(&smp_alt); 330 mutex_lock(&smp_alt);
331 list_for_each_entry(item, &smp_alt_modules, next) { 331 list_for_each_entry(item, &smp_alt_modules, next) {
332 if (mod != item->mod) 332 if (mod != item->mod)
333 continue; 333 continue;
334 list_del(&item->next); 334 list_del(&item->next);
335 spin_unlock(&smp_alt); 335 mutex_unlock(&smp_alt);
336 DPRINTK("%s: %s\n", __func__, item->name); 336 DPRINTK("%s: %s\n", __func__, item->name);
337 kfree(item); 337 kfree(item);
338 return; 338 return;
339 } 339 }
340 spin_unlock(&smp_alt); 340 mutex_unlock(&smp_alt);
341} 341}
342 342
343void alternatives_smp_switch(int smp) 343void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
359 return; 359 return;
360 BUG_ON(!smp && (num_online_cpus() > 1)); 360 BUG_ON(!smp && (num_online_cpus() > 1));
361 361
362 spin_lock(&smp_alt); 362 mutex_lock(&smp_alt);
363 363
364 /* 364 /*
365 * Avoid unnecessary switches because it forces JIT based VMs to 365 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
383 mod->text, mod->text_end); 383 mod->text, mod->text_end);
384 } 384 }
385 smp_mode = smp; 385 smp_mode = smp;
386 spin_unlock(&smp_alt); 386 mutex_unlock(&smp_alt);
387} 387}
388 388
389#endif 389#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c778e4fa55a2..95e6bbe3665e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
51#include <asm/percpu.h> 51#include <asm/percpu.h>
52#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
53#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
54#include <asm/ftrace.h>
54#include "irq_vectors.h" 55#include "irq_vectors.h"
55 56
56/* 57/*
@@ -1110,6 +1111,77 @@ ENDPROC(xen_failsafe_callback)
1110 1111
1111#endif /* CONFIG_XEN */ 1112#endif /* CONFIG_XEN */
1112 1113
1114#ifdef CONFIG_FTRACE
1115#ifdef CONFIG_DYNAMIC_FTRACE
1116
1117ENTRY(mcount)
1118 pushl %eax
1119 pushl %ecx
1120 pushl %edx
1121 movl 0xc(%esp), %eax
1122 subl $MCOUNT_INSN_SIZE, %eax
1123
1124.globl mcount_call
1125mcount_call:
1126 call ftrace_stub
1127
1128 popl %edx
1129 popl %ecx
1130 popl %eax
1131
1132 ret
1133END(mcount)
1134
1135ENTRY(ftrace_caller)
1136 pushl %eax
1137 pushl %ecx
1138 pushl %edx
1139 movl 0xc(%esp), %eax
1140 movl 0x4(%ebp), %edx
1141 subl $MCOUNT_INSN_SIZE, %eax
1142
1143.globl ftrace_call
1144ftrace_call:
1145 call ftrace_stub
1146
1147 popl %edx
1148 popl %ecx
1149 popl %eax
1150
1151.globl ftrace_stub
1152ftrace_stub:
1153 ret
1154END(ftrace_caller)
1155
1156#else /* ! CONFIG_DYNAMIC_FTRACE */
1157
1158ENTRY(mcount)
1159 cmpl $ftrace_stub, ftrace_trace_function
1160 jnz trace
1161.globl ftrace_stub
1162ftrace_stub:
1163 ret
1164
1165 /* taken from glibc */
1166trace:
1167 pushl %eax
1168 pushl %ecx
1169 pushl %edx
1170 movl 0xc(%esp), %eax
1171 movl 0x4(%ebp), %edx
1172 subl $MCOUNT_INSN_SIZE, %eax
1173
1174 call *ftrace_trace_function
1175
1176 popl %edx
1177 popl %ecx
1178 popl %eax
1179
1180 jmp ftrace_stub
1181END(mcount)
1182#endif /* CONFIG_DYNAMIC_FTRACE */
1183#endif /* CONFIG_FTRACE */
1184
1113.section .rodata,"a" 1185.section .rodata,"a"
1114#include "syscall_table_32.S" 1186#include "syscall_table_32.S"
1115 1187
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..b0f7308f78a6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
51#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h>
54 55
55 .code64 56 .code64
56 57
58#ifdef CONFIG_FTRACE
59#ifdef CONFIG_DYNAMIC_FTRACE
60ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74.globl mcount_call
75mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88END(mcount)
89
90ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106.globl ftrace_call
107ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119.globl ftrace_stub
120ftrace_stub:
121 retq
122END(ftrace_caller)
123
124#else /* ! CONFIG_DYNAMIC_FTRACE */
125ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128.globl ftrace_stub
129ftrace_stub:
130 retq
131
132trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159END(mcount)
160#endif /* CONFIG_DYNAMIC_FTRACE */
161#endif /* CONFIG_FTRACE */
162
57#ifndef CONFIG_PREEMPT 163#ifndef CONFIG_PREEMPT
58#define retint_kernel retint_restore_args 164#define retint_kernel retint_restore_args
59#endif 165#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..ab115cd15fdf
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/alternative.h>
20#include <asm/ftrace.h>
21
22
23/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop;
25
26union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE];
28 struct {
29 char e8;
30 int offset;
31 } __attribute__((packed));
32};
33
34
35static int notrace ftrace_calc_offset(long ip, long addr)
36{
37 return (int)(addr - ip);
38}
39
40notrace unsigned char *ftrace_nop_replace(void)
41{
42 return (char *)ftrace_nop;
43}
44
45notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{
47 static union ftrace_code_union calc;
48
49 calc.e8 = 0xe8;
50 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
51
52 /*
53 * No locking needed, this must be called via kstop_machine
54 * which in essence is like running on a uniprocessor machine.
55 */
56 return calc.code;
57}
58
59notrace int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code)
62{
63 unsigned replaced;
64 unsigned old = *(unsigned *)old_code; /* 4 bytes */
65 unsigned new = *(unsigned *)new_code; /* 4 bytes */
66 unsigned char newch = new_code[4];
67 int faulted = 0;
68
69 /*
70 * Note: Due to modules and __init, code can
71 * disappear and change, we need to protect against faulting
72 * as well as code changing.
73 *
74 * No real locking needed, this code is run through
75 * kstop_machine.
76 */
77 asm volatile (
78 "1: lock\n"
79 " cmpxchg %3, (%2)\n"
80 " jnz 2f\n"
81 " movb %b4, 4(%2)\n"
82 "2:\n"
83 ".section .fixup, \"ax\"\n"
84 "3: movl $1, %0\n"
85 " jmp 2b\n"
86 ".previous\n"
87 _ASM_EXTABLE(1b, 3b)
88 : "=r"(faulted), "=a"(replaced)
89 : "r"(ip), "r"(new), "c"(newch),
90 "0"(faulted), "a"(old)
91 : "memory");
92 sync_core();
93
94 if (replaced != old && replaced != new)
95 faulted = 2;
96
97 return faulted;
98}
99
100notrace int ftrace_update_ftrace_func(ftrace_func_t func)
101{
102 unsigned long ip = (unsigned long)(&ftrace_call);
103 unsigned char old[MCOUNT_INSN_SIZE], *new;
104 int ret;
105
106 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
107 new = ftrace_call_replace(ip, (unsigned long)func);
108 ret = ftrace_modify_code(ip, old, new);
109
110 return ret;
111}
112
113notrace int ftrace_mcount_set(unsigned long *data)
114{
115 unsigned long ip = (long)(&mcount_call);
116 unsigned long *addr = data;
117 unsigned char old[MCOUNT_INSN_SIZE], *new;
118
119 /*
120 * Replace the mcount stub with a pointer to the
121 * ip recorder function.
122 */
123 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
124 new = ftrace_call_replace(ip, *addr);
125 *addr = ftrace_modify_code(ip, old, new);
126
127 return 0;
128}
129
130int __init ftrace_dyn_arch_init(void *data)
131{
132 const unsigned char *const *noptable = find_nop_table();
133
134 /* This is running in kstop_machine */
135
136 ftrace_mcount_set(data);
137
138 ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
139
140 return 0;
141}
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..dd7ebee446af 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
1#include <linux/module.h> 1#include <linux/module.h>
2
2#include <asm/checksum.h> 3#include <asm/checksum.h>
3#include <asm/desc.h>
4#include <asm/pgtable.h> 4#include <asm/pgtable.h>
5#include <asm/desc.h>
6#include <asm/ftrace.h>
7
8#ifdef CONFIG_FTRACE
9/* mcount is defined in assembly */
10EXPORT_SYMBOL(mcount);
11#endif
5 12
6/* Networking helper routines. */ 13/* Networking helper routines. */
7EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d0b234c9fc31..88923fd7a6fc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
16#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
107 unsigned long page_list[PAGES_NR]; 109 unsigned long page_list[PAGES_NR];
108 void *control_page; 110 void *control_page;
109 111
112 tracer_disable();
113
110 /* Interrupts aren't acceptable while we reboot */ 114 /* Interrupts aren't acceptable while we reboot */
111 local_irq_disable(); 115 local_irq_disable();
112 116
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 576a03db4511..1558fdc174f9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
16#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
184 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
185 void *control_page; 187 void *control_page;
186 188
189 tracer_disable();
190
187 /* Interrupts aren't acceptable while we reboot */ 191 /* Interrupts aren't acceptable while we reboot */
188 local_irq_disable(); 192 local_irq_disable();
189 193
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e2db9ac5c61c..347a7aba8b16 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -185,7 +185,10 @@ void cpu_idle(void)
185 185
186 local_irq_disable(); 186 local_irq_disable();
187 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 187 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
188 /* Don't trace irqs off for idle */
189 stop_critical_timings();
188 idle(); 190 idle();
191 start_critical_timings();
189 } 192 }
190 tick_nohz_restart_sched_tick(); 193 tick_nohz_restart_sched_tick();
191 preempt_enable_no_resched(); 194 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c6eb5c91e5f6..ea090e6cfe39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -165,7 +165,10 @@ void cpu_idle(void)
165 */ 165 */
166 local_irq_disable(); 166 local_irq_disable();
167 enter_idle(); 167 enter_idle();
168 /* Don't trace irqs off for idle */
169 stop_critical_timings();
168 idle(); 170 idle();
171 start_critical_timings();
169 /* In many cases the interrupt that ended idle 172 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle 173 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */ 174 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d564..4063dfa2a02d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43#include <asm/vgtod.h> 43#include <asm/vgtod.h>
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
46#define __syscall_clobber "r11","cx","memory" 47#define __syscall_clobber "r11","cx","memory"
47 48
48/* 49/*
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f6c05d0410fb..16ff4bf418d9 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
2 All C exports should go in the respective C files. */ 2 All C exports should go in the respective C files. */
3 3
4#include <linux/module.h> 4#include <linux/module.h>
5#include <net/checksum.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7 6
7#include <net/checksum.h>
8
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/uaccess.h>
11#include <asm/desc.h> 12#include <asm/desc.h>
13#include <asm/ftrace.h>
14
15#ifdef CONFIG_FTRACE
16/* mcount is defined in assembly */
17EXPORT_SYMBOL(mcount);
18#endif
12 19
13EXPORT_SYMBOL(kernel_thread); 20EXPORT_SYMBOL(kernel_thread);
14 21
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a885..84aa2883fe15 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay_$(BITS).o 7lib-y := delay_$(BITS).o
8lib-y += thunk_$(BITS).o
8lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o 9lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
9lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
10 11
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
1/*
2 * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
3 * Copyright 2008 by Steven Rostedt, Red Hat, Inc
4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func
31 .globl \name
32\name:
33 pushl %eax
34 pushl %ecx
35 pushl %edx
36 /* Place EIP in the arg1 */
37 movl 3*4(%esp), %eax
38 call \func
39 popl %edx
40 popl %ecx
41 popl %eax
42 ret
43 .endm
44
45 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
46 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 7 */
7 8
@@ -42,8 +43,22 @@
42#endif 43#endif
43 44
44#ifdef CONFIG_TRACE_IRQFLAGS 45#ifdef CONFIG_TRACE_IRQFLAGS
45 thunk trace_hardirqs_on_thunk,trace_hardirqs_on 46 /* put return address in rdi (arg1) */
46 thunk trace_hardirqs_off_thunk,trace_hardirqs_off 47 .macro thunk_ra name,func
48 .globl \name
49\name:
50 CFI_STARTPROC
51 SAVE_ARGS
52 /* SAVE_ARGS pushs 9 elements */
53 /* the next element would be the rip */
54 movq 9*8(%rsp), %rdi
55 call \func
56 jmp restore
57 CFI_ENDPROC
58 .endm
59
60 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
61 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif 62#endif
48 63
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8bcb6f40ccb6..42394b353c6a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -49,6 +49,60 @@
49#define PF_RSVD (1<<3) 49#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 50#define PF_INSTR (1<<4)
51 51
52#ifdef CONFIG_PAGE_FAULT_HANDLERS
53static HLIST_HEAD(pf_handlers); /* protected by RCU */
54static DEFINE_SPINLOCK(pf_handlers_writer);
55
56void register_page_fault_handler(struct pf_handler *new_pfh)
57{
58 unsigned long flags;
59 spin_lock_irqsave(&pf_handlers_writer, flags);
60 hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
61 spin_unlock_irqrestore(&pf_handlers_writer, flags);
62}
63EXPORT_SYMBOL_GPL(register_page_fault_handler);
64
65/**
66 * unregister_page_fault_handler:
67 * The caller must ensure @old_pfh is not in use anymore before freeing it.
68 * This function does not guarantee it. The list of handlers is protected by
69 * RCU, so you can do this by e.g. calling synchronize_rcu().
70 */
71void unregister_page_fault_handler(struct pf_handler *old_pfh)
72{
73 unsigned long flags;
74 spin_lock_irqsave(&pf_handlers_writer, flags);
75 hlist_del_rcu(&old_pfh->hlist);
76 spin_unlock_irqrestore(&pf_handlers_writer, flags);
77}
78EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
79#endif
80
81/* returns non-zero if do_page_fault() should return */
82static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
83 unsigned long address)
84{
85#ifdef CONFIG_PAGE_FAULT_HANDLERS
86 int ret = 0;
87 struct pf_handler *cur;
88 struct hlist_node *ncur;
89
90 if (hlist_empty(&pf_handlers))
91 return 0;
92
93 rcu_read_lock();
94 hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
95 ret = cur->handler(regs, error_code, address);
96 if (ret)
97 break;
98 }
99 rcu_read_unlock();
100 return ret;
101#else
102 return 0;
103#endif
104}
105
52static inline int notify_page_fault(struct pt_regs *regs) 106static inline int notify_page_fault(struct pt_regs *regs)
53{ 107{
54#ifdef CONFIG_KPROBES 108#ifdef CONFIG_KPROBES
@@ -606,6 +660,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
606 660
607 if (notify_page_fault(regs)) 661 if (notify_page_fault(regs))
608 return; 662 return;
663 if (handle_custom_pf(regs, error_code, address))
664 return;
609 665
610 /* 666 /*
611 * We fault-in kernel-space virtual memory on-demand. The 667 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b6..f96eca21ad8f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -710,6 +710,8 @@ void mark_rodata_ro(void)
710 unsigned long start = PFN_ALIGN(_text); 710 unsigned long start = PFN_ALIGN(_text);
711 unsigned long size = PFN_ALIGN(_etext) - start; 711 unsigned long size = PFN_ALIGN(_etext) - start;
712 712
713#ifndef CONFIG_DYNAMIC_FTRACE
714 /* Dynamic tracing modifies the kernel text section */
713 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 715 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
714 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 716 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
715 size >> 10); 717 size >> 10);
@@ -722,6 +724,8 @@ void mark_rodata_ro(void)
722 printk(KERN_INFO "Testing CPA: write protecting again\n"); 724 printk(KERN_INFO "Testing CPA: write protecting again\n");
723 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 725 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
724#endif 726#endif
727#endif /* CONFIG_DYNAMIC_FTRACE */
728
725 start += size; 729 start += size;
726 size = (unsigned long)__end_rodata - start; 730 size = (unsigned long)__end_rodata - start;
727 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 731 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 156e6d7b0e32..a5fd2e06f5c9 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -766,6 +766,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
766void mark_rodata_ro(void) 766void mark_rodata_ro(void)
767{ 767{
768 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 768 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
769 unsigned long rodata_start =
770 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
771
772#ifdef CONFIG_DYNAMIC_FTRACE
773 /* Dynamic tracing modifies the kernel text section */
774 start = rodata_start;
775#endif
769 776
770 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 777 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
771 (end - start) >> 10); 778 (end - start) >> 10);
@@ -775,8 +782,7 @@ void mark_rodata_ro(void)
775 * The rodata section (but not the kernel text!) should also be 782 * The rodata section (but not the kernel text!) should also be
776 * not-executable. 783 * not-executable.
777 */ 784 */
778 start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 785 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
779 set_memory_nx(start, (end - start) >> PAGE_SHIFT);
780 786
781 rodata_test(); 787 rodata_test();
782 788
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c6005..1ef0f90813d6 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
23 23
24#define gtod vdso_vsyscall_gtod_data 24#define gtod vdso_vsyscall_gtod_data
25 25
26static long vdso_fallback_gettime(long clock, struct timespec *ts) 26notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
27{ 27{
28 long ret; 28 long ret;
29 asm("syscall" : "=a" (ret) : 29 asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
31 return ret; 31 return ret;
32} 32}
33 33
34static inline long vgetns(void) 34notrace static inline long vgetns(void)
35{ 35{
36 long v; 36 long v;
37 cycles_t (*vread)(void); 37 cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
40 return (v * gtod->clock.mult) >> gtod->clock.shift; 40 return (v * gtod->clock.mult) >> gtod->clock.shift;
41} 41}
42 42
43static noinline int do_realtime(struct timespec *ts) 43notrace static noinline int do_realtime(struct timespec *ts)
44{ 44{
45 unsigned long seq, ns; 45 unsigned long seq, ns;
46 do { 46 do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
54} 54}
55 55
56/* Copy of the version in kernel/time.c which we cannot directly access */ 56/* Copy of the version in kernel/time.c which we cannot directly access */
57static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) 57notrace static void
58vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
58{ 59{
59 while (nsec >= NSEC_PER_SEC) { 60 while (nsec >= NSEC_PER_SEC) {
60 nsec -= NSEC_PER_SEC; 61 nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
68 ts->tv_nsec = nsec; 69 ts->tv_nsec = nsec;
69} 70}
70 71
71static noinline int do_monotonic(struct timespec *ts) 72notrace static noinline int do_monotonic(struct timespec *ts)
72{ 73{
73 unsigned long seq, ns, secs; 74 unsigned long seq, ns, secs;
74 do { 75 do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
82 return 0; 83 return 0;
83} 84}
84 85
85int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 86notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
86{ 87{
87 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) 88 if (likely(gtod->sysctl_enabled && gtod->clock.vread))
88 switch (clock) { 89 switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
96int clock_gettime(clockid_t, struct timespec *) 97int clock_gettime(clockid_t, struct timespec *)
97 __attribute__((weak, alias("__vdso_clock_gettime"))); 98 __attribute__((weak, alias("__vdso_clock_gettime")));
98 99
99int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 100notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
100{ 101{
101 long ret; 102 long ret;
102 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { 103 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a9..9fbc6b20026b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
13#include <asm/vgtod.h> 13#include <asm/vgtod.h>
14#include "vextern.h" 14#include "vextern.h"
15 15
16long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) 16notrace long
17__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
17{ 18{
18 unsigned int p; 19 unsigned int p;
19 20
diff --git a/include/asm-arm/ftrace.h b/include/asm-arm/ftrace.h
new file mode 100644
index 000000000000..584ef9a8e5a5
--- /dev/null
+++ b/include/asm-arm/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_ARM_FTRACE
2#define _ASM_ARM_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void mcount(void);
10#endif
11
12#endif
13
14#endif /* _ASM_ARM_FTRACE */
diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h
index c042194d3ab5..b1a37876942d 100644
--- a/include/asm-arm/kprobes.h
+++ b/include/asm-arm/kprobes.h
@@ -59,6 +59,7 @@ struct kprobe_ctlblk {
59}; 59};
60 60
61void arch_remove_kprobe(struct kprobe *); 61void arch_remove_kprobe(struct kprobe *);
62void kretprobe_trampoline(void);
62 63
63int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr); 64int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr);
64int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); 65int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
diff --git a/include/asm-powerpc/ftrace.h b/include/asm-powerpc/ftrace.h
new file mode 100644
index 000000000000..de921326cca8
--- /dev/null
+++ b/include/asm-powerpc/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_POWERPC_FTRACE
2#define _ASM_POWERPC_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(_mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void _mcount(void);
10#endif
11
12#endif
13
14#endif /* _ASM_POWERPC_FTRACE */
diff --git a/include/asm-powerpc/hw_irq.h b/include/asm-powerpc/hw_irq.h
index ad8c9f7fd0e3..f75a5fc64d2e 100644
--- a/include/asm-powerpc/hw_irq.h
+++ b/include/asm-powerpc/hw_irq.h
@@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void);
59 get_paca()->hard_enabled = 0; \ 59 get_paca()->hard_enabled = 0; \
60 } while(0) 60 } while(0)
61 61
62static inline int irqs_disabled_flags(unsigned long flags)
63{
64 return flags == 0;
65}
66
62#else 67#else
63 68
64#if defined(CONFIG_BOOKE) 69#if defined(CONFIG_BOOKE)
@@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags)
113#define hard_irq_enable() local_irq_enable() 118#define hard_irq_enable() local_irq_enable()
114#define hard_irq_disable() local_irq_disable() 119#define hard_irq_disable() local_irq_disable()
115 120
121static inline int irqs_disabled_flags(unsigned long flags)
122{
123 return (flags & MSR_EE) == 0;
124}
125
116#endif /* CONFIG_PPC64 */ 126#endif /* CONFIG_PPC64 */
117 127
118/* 128/*
diff --git a/include/asm-sparc64/ftrace.h b/include/asm-sparc64/ftrace.h
new file mode 100644
index 000000000000..f76a40a338bb
--- /dev/null
+++ b/include/asm-sparc64/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_SPARC64_FTRACE
2#define _ASM_SPARC64_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(_mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void _mcount(void);
10#endif
11
12#endif
13
14#endif /* _ASM_SPARC64_FTRACE */
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 1f6a9ca10126..f6aa18eadf71 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
72static inline void alternatives_smp_switch(int smp) {} 72static inline void alternatives_smp_switch(int smp) {}
73#endif /* CONFIG_SMP */ 73#endif /* CONFIG_SMP */
74 74
75const unsigned char *const *find_nop_table(void);
76
75/* 77/*
76 * Alternative instructions for different CPU types or capabilities. 78 * Alternative instructions for different CPU types or capabilities.
77 * 79 *
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
new file mode 100644
index 000000000000..c184441133f2
--- /dev/null
+++ b/include/asm-x86/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_X86_FTRACE
2#define _ASM_SPARC64_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void mcount(void);
10#endif
11
12#endif /* CONFIG_FTRACE */
13
14#endif /* _ASM_X86_FTRACE */
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970e..24d71b1eb189 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void)
179 * have a reliable stack. x86_64 only. 179 * have a reliable stack. x86_64 only.
180 */ 180 */
181#define SWAPGS_UNSAFE_STACK swapgs 181#define SWAPGS_UNSAFE_STACK swapgs
182#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
183#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
184#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk 182#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
185#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ 183#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
186 TRACE_IRQS_ON; \ 184 TRACE_IRQS_ON; \
@@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void)
192 TRACE_IRQS_OFF; 190 TRACE_IRQS_OFF;
193 191
194#else 192#else
195#define ARCH_TRACE_IRQS_ON \
196 pushl %eax; \
197 pushl %ecx; \
198 pushl %edx; \
199 call trace_hardirqs_on; \
200 popl %edx; \
201 popl %ecx; \
202 popl %eax;
203
204#define ARCH_TRACE_IRQS_OFF \
205 pushl %eax; \
206 pushl %ecx; \
207 pushl %edx; \
208 call trace_hardirqs_off; \
209 popl %edx; \
210 popl %ecx; \
211 popl %eax;
212
213#define ARCH_LOCKDEP_SYS_EXIT \ 193#define ARCH_LOCKDEP_SYS_EXIT \
214 pushl %eax; \ 194 pushl %eax; \
215 pushl %ecx; \ 195 pushl %ecx; \
@@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void)
223#endif 203#endif
224 204
225#ifdef CONFIG_TRACE_IRQFLAGS 205#ifdef CONFIG_TRACE_IRQFLAGS
226# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON 206# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
227# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF 207# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
228#else 208#else
229# define TRACE_IRQS_ON 209# define TRACE_IRQS_ON
230# define TRACE_IRQS_OFF 210# define TRACE_IRQS_OFF
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb59ba1..a80f2d6cc737 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs);
35extern unsigned long oops_begin(void); 35extern unsigned long oops_begin(void);
36extern void oops_end(unsigned long, struct pt_regs *, int signr); 36extern void oops_end(unsigned long, struct pt_regs *, int signr);
37 37
38struct pf_handler {
39 struct hlist_node hlist;
40 int (*handler)(struct pt_regs *regs, unsigned long error_code,
41 unsigned long address);
42};
43
44extern void register_page_fault_handler(struct pf_handler *new_pfh);
45extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
46
38#endif 47#endif
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 17b3700949bf..6b66ff905af0 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -24,7 +24,8 @@ enum vsyscall_num {
24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) 24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
25#define __section_vsyscall_clock __attribute__ \ 25#define __section_vsyscall_clock __attribute__ \
26 ((unused, __section__ (".vsyscall_clock"),aligned(16))) 26 ((unused, __section__ (".vsyscall_clock"),aligned(16)))
27#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) 27#define __vsyscall_fn \
28 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
28 29
29#define VGETCPU_RDTSCP 1 30#define VGETCPU_RDTSCP 1
30#define VGETCPU_LSL 2 31#define VGETCPU_LSL 2
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
new file mode 100644
index 000000000000..3121b95443d9
--- /dev/null
+++ b/include/linux/ftrace.h
@@ -0,0 +1,143 @@
1#ifndef _LINUX_FTRACE_H
2#define _LINUX_FTRACE_H
3
4#ifdef CONFIG_FTRACE
5
6#include <linux/linkage.h>
7#include <linux/fs.h>
8
9extern int ftrace_enabled;
10extern int
11ftrace_enable_sysctl(struct ctl_table *table, int write,
12 struct file *filp, void __user *buffer, size_t *lenp,
13 loff_t *ppos);
14
15typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
16
17struct ftrace_ops {
18 ftrace_func_t func;
19 struct ftrace_ops *next;
20};
21
22/*
23 * The ftrace_ops must be a static and should also
24 * be read_mostly. These functions do modify read_mostly variables
25 * so use them sparely. Never free an ftrace_op or modify the
26 * next pointer after it has been registered. Even after unregistering
27 * it, the next pointer may still be used internally.
28 */
29int register_ftrace_function(struct ftrace_ops *ops);
30int unregister_ftrace_function(struct ftrace_ops *ops);
31void clear_ftrace_function(void);
32
33extern void ftrace_stub(unsigned long a0, unsigned long a1);
34
35#else /* !CONFIG_FTRACE */
36# define register_ftrace_function(ops) do { } while (0)
37# define unregister_ftrace_function(ops) do { } while (0)
38# define clear_ftrace_function(ops) do { } while (0)
39#endif /* CONFIG_FTRACE */
40
41#ifdef CONFIG_DYNAMIC_FTRACE
42# define FTRACE_HASHBITS 10
43# define FTRACE_HASHSIZE (1<<FTRACE_HASHBITS)
44
45enum {
46 FTRACE_FL_FREE = (1 << 0),
47 FTRACE_FL_FAILED = (1 << 1),
48 FTRACE_FL_FILTER = (1 << 2),
49 FTRACE_FL_ENABLED = (1 << 3),
50 FTRACE_FL_NOTRACE = (1 << 4),
51 FTRACE_FL_CONVERTED = (1 << 5),
52 FTRACE_FL_FROZEN = (1 << 6),
53};
54
55struct dyn_ftrace {
56 struct hlist_node node;
57 unsigned long ip; /* address of mcount call-site */
58 unsigned long flags;
59};
60
61int ftrace_force_update(void);
62void ftrace_set_filter(unsigned char *buf, int len, int reset);
63
64/* defined in arch */
65extern int ftrace_ip_converted(unsigned long ip);
66extern unsigned char *ftrace_nop_replace(void);
67extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
68extern int ftrace_dyn_arch_init(void *data);
69extern int ftrace_mcount_set(unsigned long *data);
70extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
71 unsigned char *new_code);
72extern int ftrace_update_ftrace_func(ftrace_func_t func);
73extern void ftrace_caller(void);
74extern void ftrace_call(void);
75extern void mcount_call(void);
76
77extern int skip_trace(unsigned long ip);
78
79void ftrace_disable_daemon(void);
80void ftrace_enable_daemon(void);
81
82#else
83# define skip_trace(ip) ({ 0; })
84# define ftrace_force_update() ({ 0; })
85# define ftrace_set_filter(buf, len, reset) do { } while (0)
86# define ftrace_disable_daemon() do { } while (0)
87# define ftrace_enable_daemon() do { } while (0)
88#endif /* CONFIG_DYNAMIC_FTRACE */
89
90/* totally disable ftrace - can not re-enable after this */
91void ftrace_kill(void);
92
93static inline void tracer_disable(void)
94{
95#ifdef CONFIG_FTRACE
96 ftrace_enabled = 0;
97#endif
98}
99
100#ifdef CONFIG_FRAME_POINTER
101/* TODO: need to fix this for ARM */
102# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
103# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
104# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
105# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
106# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
107# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
108# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
109#else
110# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
111# define CALLER_ADDR1 0UL
112# define CALLER_ADDR2 0UL
113# define CALLER_ADDR3 0UL
114# define CALLER_ADDR4 0UL
115# define CALLER_ADDR5 0UL
116# define CALLER_ADDR6 0UL
117#endif
118
119#ifdef CONFIG_IRQSOFF_TRACER
120 extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
121 extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
122#else
123# define time_hardirqs_on(a0, a1) do { } while (0)
124# define time_hardirqs_off(a0, a1) do { } while (0)
125#endif
126
127#ifdef CONFIG_PREEMPT_TRACER
128 extern void trace_preempt_on(unsigned long a0, unsigned long a1);
129 extern void trace_preempt_off(unsigned long a0, unsigned long a1);
130#else
131# define trace_preempt_on(a0, a1) do { } while (0)
132# define trace_preempt_off(a0, a1) do { } while (0)
133#endif
134
135#ifdef CONFIG_TRACING
136extern void
137ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
138#else
139static inline void
140ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
141#endif
142
143#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index e600c4e9b8c5..2b1c2e58566e 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,10 +12,10 @@
12#define _LINUX_TRACE_IRQFLAGS_H 12#define _LINUX_TRACE_IRQFLAGS_H
13 13
14#ifdef CONFIG_TRACE_IRQFLAGS 14#ifdef CONFIG_TRACE_IRQFLAGS
15 extern void trace_hardirqs_on(void);
16 extern void trace_hardirqs_off(void);
17 extern void trace_softirqs_on(unsigned long ip); 15 extern void trace_softirqs_on(unsigned long ip);
18 extern void trace_softirqs_off(unsigned long ip); 16 extern void trace_softirqs_off(unsigned long ip);
17 extern void trace_hardirqs_on(void);
18 extern void trace_hardirqs_off(void);
19# define trace_hardirq_context(p) ((p)->hardirq_context) 19# define trace_hardirq_context(p) ((p)->hardirq_context)
20# define trace_softirq_context(p) ((p)->softirq_context) 20# define trace_softirq_context(p) ((p)->softirq_context)
21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) 21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
@@ -41,6 +41,15 @@
41# define INIT_TRACE_IRQFLAGS 41# define INIT_TRACE_IRQFLAGS
42#endif 42#endif
43 43
44#if defined(CONFIG_IRQSOFF_TRACER) || \
45 defined(CONFIG_PREEMPT_TRACER)
46 extern void stop_critical_timings(void);
47 extern void start_critical_timings(void);
48#else
49# define stop_critical_timings() do { } while (0)
50# define start_critical_timings() do { } while (0)
51#endif
52
44#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 53#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
45 54
46#include <asm/irqflags.h> 55#include <asm/irqflags.h>
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1036631ff4fa..04a3556bdea6 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
259struct jprobe; 259struct jprobe;
260struct kretprobe; 260struct kretprobe;
261 261
262static inline struct kprobe *get_kprobe(void *addr)
263{
264 return NULL;
265}
262static inline struct kprobe *kprobe_running(void) 266static inline struct kprobe *kprobe_running(void)
263{ 267{
264 return NULL; 268 return NULL;
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 2119610b24f8..14f329c64ba8 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -3,6 +3,8 @@
3 3
4#include <asm/linkage.h> 4#include <asm/linkage.h>
5 5
6#define notrace __attribute__((no_instrument_function))
7
6#ifdef __cplusplus 8#ifdef __cplusplus
7#define CPP_ASMLINKAGE extern "C" 9#define CPP_ASMLINKAGE extern "C"
8#else 10#else
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 430f6adf9762..1290653f9241 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -44,8 +44,8 @@ struct marker {
44 */ 44 */
45 char state; /* Marker state. */ 45 char state; /* Marker state. */
46 char ptype; /* probe type : 0 : single, 1 : multi */ 46 char ptype; /* probe type : 0 : single, 1 : multi */
47 void (*call)(const struct marker *mdata, /* Probe wrapper */ 47 /* Probe wrapper */
48 void *call_private, const char *fmt, ...); 48 void (*call)(const struct marker *mdata, void *call_private, ...);
49 struct marker_probe_closure single; 49 struct marker_probe_closure single;
50 struct marker_probe_closure *multi; 50 struct marker_probe_closure *multi;
51} __attribute__((aligned(8))); 51} __attribute__((aligned(8)));
@@ -58,8 +58,12 @@ struct marker {
58 * Make sure the alignment of the structure in the __markers section will 58 * Make sure the alignment of the structure in the __markers section will
59 * not add unwanted padding between the beginning of the section and the 59 * not add unwanted padding between the beginning of the section and the
60 * structure. Force alignment to the same alignment as the section start. 60 * structure. Force alignment to the same alignment as the section start.
61 *
62 * The "generic" argument controls which marker enabling mechanism must be used.
63 * If generic is true, a variable read is used.
64 * If generic is false, immediate values are used.
61 */ 65 */
62#define __trace_mark(name, call_private, format, args...) \ 66#define __trace_mark(generic, name, call_private, format, args...) \
63 do { \ 67 do { \
64 static const char __mstrtab_##name[] \ 68 static const char __mstrtab_##name[] \
65 __attribute__((section("__markers_strings"))) \ 69 __attribute__((section("__markers_strings"))) \
@@ -72,15 +76,14 @@ struct marker {
72 __mark_check_format(format, ## args); \ 76 __mark_check_format(format, ## args); \
73 if (unlikely(__mark_##name.state)) { \ 77 if (unlikely(__mark_##name.state)) { \
74 (*__mark_##name.call) \ 78 (*__mark_##name.call) \
75 (&__mark_##name, call_private, \ 79 (&__mark_##name, call_private, ## args);\
76 format, ## args); \
77 } \ 80 } \
78 } while (0) 81 } while (0)
79 82
80extern void marker_update_probe_range(struct marker *begin, 83extern void marker_update_probe_range(struct marker *begin,
81 struct marker *end); 84 struct marker *end);
82#else /* !CONFIG_MARKERS */ 85#else /* !CONFIG_MARKERS */
83#define __trace_mark(name, call_private, format, args...) \ 86#define __trace_mark(generic, name, call_private, format, args...) \
84 __mark_check_format(format, ## args) 87 __mark_check_format(format, ## args)
85static inline void marker_update_probe_range(struct marker *begin, 88static inline void marker_update_probe_range(struct marker *begin,
86 struct marker *end) 89 struct marker *end)
@@ -88,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin,
88#endif /* CONFIG_MARKERS */ 91#endif /* CONFIG_MARKERS */
89 92
90/** 93/**
91 * trace_mark - Marker 94 * trace_mark - Marker using code patching
92 * @name: marker name, not quoted. 95 * @name: marker name, not quoted.
93 * @format: format string 96 * @format: format string
94 * @args...: variable argument list 97 * @args...: variable argument list
95 * 98 *
96 * Places a marker. 99 * Places a marker using optimized code patching technique (imv_read())
100 * to be enabled when immediate values are present.
97 */ 101 */
98#define trace_mark(name, format, args...) \ 102#define trace_mark(name, format, args...) \
99 __trace_mark(name, NULL, format, ## args) 103 __trace_mark(0, name, NULL, format, ## args)
104
105/**
106 * _trace_mark - Marker using variable read
107 * @name: marker name, not quoted.
108 * @format: format string
109 * @args...: variable argument list
110 *
111 * Places a marker using a standard memory read (_imv_read()) to be
112 * enabled. Should be used for markers in code paths where instruction
113 * modification based enabling is not welcome. (__init and __exit functions,
114 * lockdep, some traps, printk).
115 */
116#define _trace_mark(name, format, args...) \
117 __trace_mark(1, name, NULL, format, ## args)
100 118
101/** 119/**
102 * MARK_NOARGS - Format string for a marker with no argument. 120 * MARK_NOARGS - Format string for a marker with no argument.
@@ -117,9 +135,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
117extern marker_probe_func __mark_empty_function; 135extern marker_probe_func __mark_empty_function;
118 136
119extern void marker_probe_cb(const struct marker *mdata, 137extern void marker_probe_cb(const struct marker *mdata,
120 void *call_private, const char *fmt, ...); 138 void *call_private, ...);
121extern void marker_probe_cb_noarg(const struct marker *mdata, 139extern void marker_probe_cb_noarg(const struct marker *mdata,
122 void *call_private, const char *fmt, ...); 140 void *call_private, ...);
123 141
124/* 142/*
125 * Connect a probe to a marker. 143 * Connect a probe to a marker.
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 23f0c54175cd..72b1a10a59b6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,7 +10,7 @@
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11#include <linux/list.h> 11#include <linux/list.h>
12 12
13#ifdef CONFIG_DEBUG_PREEMPT 13#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
14 extern void add_preempt_count(int val); 14 extern void add_preempt_count(int val);
15 extern void sub_preempt_count(int val); 15 extern void sub_preempt_count(int val);
16#else 16#else
@@ -52,6 +52,34 @@ do { \
52 preempt_check_resched(); \ 52 preempt_check_resched(); \
53} while (0) 53} while (0)
54 54
55/* For debugging and tracer internals only! */
56#define add_preempt_count_notrace(val) \
57 do { preempt_count() += (val); } while (0)
58#define sub_preempt_count_notrace(val) \
59 do { preempt_count() -= (val); } while (0)
60#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
61#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
62
63#define preempt_disable_notrace() \
64do { \
65 inc_preempt_count_notrace(); \
66 barrier(); \
67} while (0)
68
69#define preempt_enable_no_resched_notrace() \
70do { \
71 barrier(); \
72 dec_preempt_count_notrace(); \
73} while (0)
74
75/* preempt_check_resched is OK to trace */
76#define preempt_enable_notrace() \
77do { \
78 preempt_enable_no_resched_notrace(); \
79 barrier(); \
80 preempt_check_resched(); \
81} while (0)
82
55#else 83#else
56 84
57#define preempt_disable() do { } while (0) 85#define preempt_disable() do { } while (0)
@@ -59,6 +87,10 @@ do { \
59#define preempt_enable() do { } while (0) 87#define preempt_enable() do { } while (0)
60#define preempt_check_resched() do { } while (0) 88#define preempt_check_resched() do { } while (0)
61 89
90#define preempt_disable_notrace() do { } while (0)
91#define preempt_enable_no_resched_notrace() do { } while (0)
92#define preempt_enable_notrace() do { } while (0)
93
62#endif 94#endif
63 95
64#ifdef CONFIG_PREEMPT_NOTIFIERS 96#ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d3f847ca8d..aa609858aef0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
246extern void init_idle(struct task_struct *idle, int cpu); 246extern void init_idle(struct task_struct *idle, int cpu);
247extern void init_idle_bootup_task(struct task_struct *idle); 247extern void init_idle_bootup_task(struct task_struct *idle);
248 248
249extern int runqueue_is_locked(void);
250
249extern cpumask_t nohz_cpu_mask; 251extern cpumask_t nohz_cpu_mask;
250#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 252#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
251extern int select_nohz_load_balancer(int cpu); 253extern int select_nohz_load_balancer(int cpu);
@@ -2131,6 +2133,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
2131} 2133}
2132#endif 2134#endif
2133 2135
2136#ifdef CONFIG_TRACING
2137extern void
2138__trace_special(void *__tr, void *__data,
2139 unsigned long arg1, unsigned long arg2, unsigned long arg3);
2140#else
2141static inline void
2142__trace_special(void *__tr, void *__data,
2143 unsigned long arg1, unsigned long arg2, unsigned long arg3)
2144{
2145}
2146#endif
2147
2134extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); 2148extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
2135extern long sched_getaffinity(pid_t pid, cpumask_t *mask); 2149extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
2136 2150
@@ -2225,6 +2239,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
2225} 2239}
2226#endif /* CONFIG_MM_OWNER */ 2240#endif /* CONFIG_MM_OWNER */
2227 2241
2242#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
2243
2228#endif /* __KERNEL__ */ 2244#endif /* __KERNEL__ */
2229 2245
2230#endif 2246#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f462439cc288..bd91987c065f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable;
105extern int block_dump; 105extern int block_dump;
106extern int laptop_mode; 106extern int laptop_mode;
107 107
108extern unsigned long determine_dirtyable_memory(void);
109
108extern int dirty_ratio_handler(struct ctl_table *table, int write, 110extern int dirty_ratio_handler(struct ctl_table *table, int write,
109 struct file *filp, void __user *buffer, size_t *lenp, 111 struct file *filp, void __user *buffer, size_t *lenp,
110 loff_t *ppos); 112 loff_t *ppos);
diff --git a/kernel/Makefile b/kernel/Makefile
index 1c9938addb9d..ca2433e84873 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -pg -mno-spe
15
16ifdef CONFIG_FTRACE
17# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg
20CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg
24endif
25
14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 26obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15obj-$(CONFIG_STACKTRACE) += stacktrace.o 27obj-$(CONFIG_STACKTRACE) += stacktrace.o
16obj-y += time/ 28obj-y += time/
@@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
69obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 81obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
70obj-$(CONFIG_MARKERS) += marker.o 82obj-$(CONFIG_MARKERS) += marker.o
71obj-$(CONFIG_LATENCYTOP) += latencytop.o 83obj-$(CONFIG_LATENCYTOP) += latencytop.o
84obj-$(CONFIG_FTRACE) += trace/
85obj-$(CONFIG_TRACING) += trace/
72 86
73ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 87ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
74# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 88# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..d66d676dc362 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
909 909
910 rt_mutex_init_task(p); 910 rt_mutex_init_task(p);
911 911
912#ifdef CONFIG_TRACE_IRQFLAGS 912#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
915#endif 915#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81a4e4a3f087..65548eff029e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -39,6 +39,7 @@
39#include <linux/irqflags.h> 39#include <linux/irqflags.h>
40#include <linux/utsname.h> 40#include <linux/utsname.h>
41#include <linux/hash.h> 41#include <linux/hash.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/sections.h> 44#include <asm/sections.h>
44 45
@@ -81,6 +82,8 @@ static int graph_lock(void)
81 __raw_spin_unlock(&lockdep_lock); 82 __raw_spin_unlock(&lockdep_lock);
82 return 0; 83 return 0;
83 } 84 }
85 /* prevent any recursions within lockdep from causing deadlocks */
86 current->lockdep_recursion++;
84 return 1; 87 return 1;
85} 88}
86 89
@@ -89,6 +92,7 @@ static inline int graph_unlock(void)
89 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) 92 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
90 return DEBUG_LOCKS_WARN_ON(1); 93 return DEBUG_LOCKS_WARN_ON(1);
91 94
95 current->lockdep_recursion--;
92 __raw_spin_unlock(&lockdep_lock); 96 __raw_spin_unlock(&lockdep_lock);
93 return 0; 97 return 0;
94} 98}
@@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
982 return 1; 986 return 1;
983} 987}
984 988
985#ifdef CONFIG_TRACE_IRQFLAGS 989#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
986/* 990/*
987 * Forwards and backwards subgraph searching, for the purposes of 991 * Forwards and backwards subgraph searching, for the purposes of
988 * proving that two subgraphs can be connected by a new dependency 992 * proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
1680static int mark_lock(struct task_struct *curr, struct held_lock *this, 1684static int mark_lock(struct task_struct *curr, struct held_lock *this,
1681 enum lock_usage_bit new_bit); 1685 enum lock_usage_bit new_bit);
1682 1686
1683#ifdef CONFIG_TRACE_IRQFLAGS 1687#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1684 1688
1685/* 1689/*
1686 * print irq inversion bug: 1690 * print irq inversion bug:
@@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
2013/* 2017/*
2014 * Hardirqs will be enabled: 2018 * Hardirqs will be enabled:
2015 */ 2019 */
2016void trace_hardirqs_on(void) 2020void trace_hardirqs_on_caller(unsigned long a0)
2017{ 2021{
2018 struct task_struct *curr = current; 2022 struct task_struct *curr = current;
2019 unsigned long ip; 2023 unsigned long ip;
2020 2024
2025 time_hardirqs_on(CALLER_ADDR0, a0);
2026
2021 if (unlikely(!debug_locks || current->lockdep_recursion)) 2027 if (unlikely(!debug_locks || current->lockdep_recursion))
2022 return; 2028 return;
2023 2029
@@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
2055 curr->hardirq_enable_event = ++curr->irq_events; 2061 curr->hardirq_enable_event = ++curr->irq_events;
2056 debug_atomic_inc(&hardirqs_on_events); 2062 debug_atomic_inc(&hardirqs_on_events);
2057} 2063}
2064EXPORT_SYMBOL(trace_hardirqs_on_caller);
2058 2065
2066void trace_hardirqs_on(void)
2067{
2068 trace_hardirqs_on_caller(CALLER_ADDR0);
2069}
2059EXPORT_SYMBOL(trace_hardirqs_on); 2070EXPORT_SYMBOL(trace_hardirqs_on);
2060 2071
2061/* 2072/*
2062 * Hardirqs were disabled: 2073 * Hardirqs were disabled:
2063 */ 2074 */
2064void trace_hardirqs_off(void) 2075void trace_hardirqs_off_caller(unsigned long a0)
2065{ 2076{
2066 struct task_struct *curr = current; 2077 struct task_struct *curr = current;
2067 2078
2079 time_hardirqs_off(CALLER_ADDR0, a0);
2080
2068 if (unlikely(!debug_locks || current->lockdep_recursion)) 2081 if (unlikely(!debug_locks || current->lockdep_recursion))
2069 return; 2082 return;
2070 2083
@@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
2082 } else 2095 } else
2083 debug_atomic_inc(&redundant_hardirqs_off); 2096 debug_atomic_inc(&redundant_hardirqs_off);
2084} 2097}
2098EXPORT_SYMBOL(trace_hardirqs_off_caller);
2085 2099
2100void trace_hardirqs_off(void)
2101{
2102 trace_hardirqs_off_caller(CALLER_ADDR0);
2103}
2086EXPORT_SYMBOL(trace_hardirqs_off); 2104EXPORT_SYMBOL(trace_hardirqs_off);
2087 2105
2088/* 2106/*
@@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
2246 * Mark a lock with a usage bit, and validate the state transition: 2264 * Mark a lock with a usage bit, and validate the state transition:
2247 */ 2265 */
2248static int mark_lock(struct task_struct *curr, struct held_lock *this, 2266static int mark_lock(struct task_struct *curr, struct held_lock *this,
2249 enum lock_usage_bit new_bit) 2267 enum lock_usage_bit new_bit)
2250{ 2268{
2251 unsigned int new_mask = 1 << new_bit, ret = 1; 2269 unsigned int new_mask = 1 << new_bit, ret = 1;
2252 2270
@@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
2686 * and also avoid lockdep recursion: 2704 * and also avoid lockdep recursion:
2687 */ 2705 */
2688void lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2706void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2689 int trylock, int read, int check, unsigned long ip) 2707 int trylock, int read, int check, unsigned long ip)
2690{ 2708{
2691 unsigned long flags; 2709 unsigned long flags;
2692 2710
@@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2708 2726
2709EXPORT_SYMBOL_GPL(lock_acquire); 2727EXPORT_SYMBOL_GPL(lock_acquire);
2710 2728
2711void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) 2729void lock_release(struct lockdep_map *lock, int nested,
2730 unsigned long ip)
2712{ 2731{
2713 unsigned long flags; 2732 unsigned long flags;
2714 2733
diff --git a/kernel/marker.c b/kernel/marker.c
index b5a9fe1d50d5..1abfb923b761 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
55struct marker_entry { 55struct marker_entry {
56 struct hlist_node hlist; 56 struct hlist_node hlist;
57 char *format; 57 char *format;
58 void (*call)(const struct marker *mdata, /* Probe wrapper */ 58 /* Probe wrapper */
59 void *call_private, const char *fmt, ...); 59 void (*call)(const struct marker *mdata, void *call_private, ...);
60 struct marker_probe_closure single; 60 struct marker_probe_closure single;
61 struct marker_probe_closure *multi; 61 struct marker_probe_closure *multi;
62 int refcount; /* Number of times armed. 0 if disarmed. */ 62 int refcount; /* Number of times armed. 0 if disarmed. */
@@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
91 * marker_probe_cb Callback that prepares the variable argument list for probes. 91 * marker_probe_cb Callback that prepares the variable argument list for probes.
92 * @mdata: pointer of type struct marker 92 * @mdata: pointer of type struct marker
93 * @call_private: caller site private data 93 * @call_private: caller site private data
94 * @fmt: format string
95 * @...: Variable argument list. 94 * @...: Variable argument list.
96 * 95 *
97 * Since we do not use "typical" pointer based RCU in the 1 argument case, we 96 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
98 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
99 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
100 */ 99 */
101void marker_probe_cb(const struct marker *mdata, void *call_private, 100void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
102 const char *fmt, ...)
103{ 101{
104 va_list args; 102 va_list args;
105 char ptype; 103 char ptype;
@@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
120 /* Must read the ptr before private data. They are not data 118 /* Must read the ptr before private data. They are not data
121 * dependant, so we put an explicit smp_rmb() here. */ 119 * dependant, so we put an explicit smp_rmb() here. */
122 smp_rmb(); 120 smp_rmb();
123 va_start(args, fmt); 121 va_start(args, call_private);
124 func(mdata->single.probe_private, call_private, fmt, &args); 122 func(mdata->single.probe_private, call_private, mdata->format,
123 &args);
125 va_end(args); 124 va_end(args);
126 } else { 125 } else {
127 struct marker_probe_closure *multi; 126 struct marker_probe_closure *multi;
@@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
136 smp_read_barrier_depends(); 135 smp_read_barrier_depends();
137 multi = mdata->multi; 136 multi = mdata->multi;
138 for (i = 0; multi[i].func; i++) { 137 for (i = 0; multi[i].func; i++) {
139 va_start(args, fmt); 138 va_start(args, call_private);
140 multi[i].func(multi[i].probe_private, call_private, fmt, 139 multi[i].func(multi[i].probe_private, call_private,
141 &args); 140 mdata->format, &args);
142 va_end(args); 141 va_end(args);
143 } 142 }
144 } 143 }
@@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
150 * marker_probe_cb Callback that does not prepare the variable argument list. 149 * marker_probe_cb Callback that does not prepare the variable argument list.
151 * @mdata: pointer of type struct marker 150 * @mdata: pointer of type struct marker
152 * @call_private: caller site private data 151 * @call_private: caller site private data
153 * @fmt: format string
154 * @...: Variable argument list. 152 * @...: Variable argument list.
155 * 153 *
156 * Should be connected to markers "MARK_NOARGS". 154 * Should be connected to markers "MARK_NOARGS".
157 */ 155 */
158void marker_probe_cb_noarg(const struct marker *mdata, 156void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
159 void *call_private, const char *fmt, ...)
160{ 157{
161 va_list args; /* not initialized */ 158 va_list args; /* not initialized */
162 char ptype; 159 char ptype;
@@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
172 /* Must read the ptr before private data. They are not data 169 /* Must read the ptr before private data. They are not data
173 * dependant, so we put an explicit smp_rmb() here. */ 170 * dependant, so we put an explicit smp_rmb() here. */
174 smp_rmb(); 171 smp_rmb();
175 func(mdata->single.probe_private, call_private, fmt, &args); 172 func(mdata->single.probe_private, call_private, mdata->format,
173 &args);
176 } else { 174 } else {
177 struct marker_probe_closure *multi; 175 struct marker_probe_closure *multi;
178 int i; 176 int i;
@@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
186 smp_read_barrier_depends(); 184 smp_read_barrier_depends();
187 multi = mdata->multi; 185 multi = mdata->multi;
188 for (i = 0; multi[i].func; i++) 186 for (i = 0; multi[i].func; i++)
189 multi[i].func(multi[i].probe_private, call_private, fmt, 187 multi[i].func(multi[i].probe_private, call_private,
190 &args); 188 mdata->format, &args);
191 } 189 }
192 preempt_enable(); 190 preempt_enable();
193} 191}
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fb01c32aa3b..ae7d5b9e535d 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1041,7 +1041,9 @@ void release_console_sem(void)
1041 _log_end = log_end; 1041 _log_end = log_end;
1042 con_start = log_end; /* Flush */ 1042 con_start = log_end; /* Flush */
1043 spin_unlock(&logbuf_lock); 1043 spin_unlock(&logbuf_lock);
1044 stop_critical_timings(); /* don't trace print latency */
1044 call_console_drivers(_con_start, _log_end); 1045 call_console_drivers(_con_start, _log_end);
1046 start_critical_timings();
1045 local_irq_restore(flags); 1047 local_irq_restore(flags);
1046 } 1048 }
1047 console_locked = 0; 1049 console_locked = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 3aaa5c8cb421..1ffa76813a01 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,6 +70,7 @@
70#include <linux/bootmem.h> 70#include <linux/bootmem.h>
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h>
73 74
74#include <asm/tlb.h> 75#include <asm/tlb.h>
75#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
@@ -607,6 +608,24 @@ static inline void update_rq_clock(struct rq *rq)
607# define const_debug static const 608# define const_debug static const
608#endif 609#endif
609 610
611/**
612 * runqueue_is_locked
613 *
614 * Returns true if the current cpu runqueue is locked.
615 * This interface allows printk to be called with the runqueue lock
616 * held and know whether or not it is OK to wake up the klogd.
617 */
618int runqueue_is_locked(void)
619{
620 int cpu = get_cpu();
621 struct rq *rq = cpu_rq(cpu);
622 int ret;
623
624 ret = spin_is_locked(&rq->lock);
625 put_cpu();
626 return ret;
627}
628
610/* 629/*
611 * Debugging: various feature bits 630 * Debugging: various feature bits
612 */ 631 */
@@ -831,7 +850,7 @@ static unsigned long long __cpu_clock(int cpu)
831 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 850 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
832 * clock constructed from sched_clock(): 851 * clock constructed from sched_clock():
833 */ 852 */
834unsigned long long cpu_clock(int cpu) 853unsigned long long notrace cpu_clock(int cpu)
835{ 854{
836 unsigned long long prev_cpu_time, time, delta_time; 855 unsigned long long prev_cpu_time, time, delta_time;
837 unsigned long flags; 856 unsigned long flags;
@@ -2149,6 +2168,9 @@ out_activate:
2149 success = 1; 2168 success = 1;
2150 2169
2151out_running: 2170out_running:
2171 trace_mark(kernel_sched_wakeup,
2172 "pid %d state %ld ## rq %p task %p rq->curr %p",
2173 p->pid, p->state, rq, p, rq->curr);
2152 check_preempt_curr(rq, p); 2174 check_preempt_curr(rq, p);
2153 2175
2154 p->state = TASK_RUNNING; 2176 p->state = TASK_RUNNING;
@@ -2279,6 +2301,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2279 p->sched_class->task_new(rq, p); 2301 p->sched_class->task_new(rq, p);
2280 inc_nr_running(p, rq); 2302 inc_nr_running(p, rq);
2281 } 2303 }
2304 trace_mark(kernel_sched_wakeup_new,
2305 "pid %d state %ld ## rq %p task %p rq->curr %p",
2306 p->pid, p->state, rq, p, rq->curr);
2282 check_preempt_curr(rq, p); 2307 check_preempt_curr(rq, p);
2283#ifdef CONFIG_SMP 2308#ifdef CONFIG_SMP
2284 if (p->sched_class->task_wake_up) 2309 if (p->sched_class->task_wake_up)
@@ -2451,6 +2476,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
2451 struct mm_struct *mm, *oldmm; 2476 struct mm_struct *mm, *oldmm;
2452 2477
2453 prepare_task_switch(rq, prev, next); 2478 prepare_task_switch(rq, prev, next);
2479 trace_mark(kernel_sched_schedule,
2480 "prev_pid %d next_pid %d prev_state %ld "
2481 "## rq %p prev %p next %p",
2482 prev->pid, next->pid, prev->state,
2483 rq, prev, next);
2454 mm = next->mm; 2484 mm = next->mm;
2455 oldmm = prev->active_mm; 2485 oldmm = prev->active_mm;
2456 /* 2486 /*
@@ -4021,26 +4051,44 @@ void scheduler_tick(void)
4021#endif 4051#endif
4022} 4052}
4023 4053
4024#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 4054#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4055 defined(CONFIG_PREEMPT_TRACER))
4056
4057static inline unsigned long get_parent_ip(unsigned long addr)
4058{
4059 if (in_lock_functions(addr)) {
4060 addr = CALLER_ADDR2;
4061 if (in_lock_functions(addr))
4062 addr = CALLER_ADDR3;
4063 }
4064 return addr;
4065}
4025 4066
4026void __kprobes add_preempt_count(int val) 4067void __kprobes add_preempt_count(int val)
4027{ 4068{
4069#ifdef CONFIG_DEBUG_PREEMPT
4028 /* 4070 /*
4029 * Underflow? 4071 * Underflow?
4030 */ 4072 */
4031 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 4073 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
4032 return; 4074 return;
4075#endif
4033 preempt_count() += val; 4076 preempt_count() += val;
4077#ifdef CONFIG_DEBUG_PREEMPT
4034 /* 4078 /*
4035 * Spinlock count overflowing soon? 4079 * Spinlock count overflowing soon?
4036 */ 4080 */
4037 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 4081 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
4038 PREEMPT_MASK - 10); 4082 PREEMPT_MASK - 10);
4083#endif
4084 if (preempt_count() == val)
4085 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4039} 4086}
4040EXPORT_SYMBOL(add_preempt_count); 4087EXPORT_SYMBOL(add_preempt_count);
4041 4088
4042void __kprobes sub_preempt_count(int val) 4089void __kprobes sub_preempt_count(int val)
4043{ 4090{
4091#ifdef CONFIG_DEBUG_PREEMPT
4044 /* 4092 /*
4045 * Underflow? 4093 * Underflow?
4046 */ 4094 */
@@ -4052,7 +4100,10 @@ void __kprobes sub_preempt_count(int val)
4052 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && 4100 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
4053 !(preempt_count() & PREEMPT_MASK))) 4101 !(preempt_count() & PREEMPT_MASK)))
4054 return; 4102 return;
4103#endif
4055 4104
4105 if (preempt_count() == val)
4106 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4056 preempt_count() -= val; 4107 preempt_count() -= val;
4057} 4108}
4058EXPORT_SYMBOL(sub_preempt_count); 4109EXPORT_SYMBOL(sub_preempt_count);
@@ -5384,7 +5435,7 @@ out_unlock:
5384 return retval; 5435 return retval;
5385} 5436}
5386 5437
5387static const char stat_nam[] = "RSDTtZX"; 5438static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5388 5439
5389void sched_show_task(struct task_struct *p) 5440void sched_show_task(struct task_struct *p)
5390{ 5441{
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 5c2942e768cd..1a064adab658 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -31,6 +31,7 @@
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/semaphore.h> 32#include <linux/semaphore.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/ftrace.h>
34 35
35static noinline void __down(struct semaphore *sem); 36static noinline void __down(struct semaphore *sem);
36static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
@@ -53,6 +54,7 @@ void down(struct semaphore *sem)
53{ 54{
54 unsigned long flags; 55 unsigned long flags;
55 56
57 ftrace_special(sem->count, 0, __LINE__);
56 spin_lock_irqsave(&sem->lock, flags); 58 spin_lock_irqsave(&sem->lock, flags);
57 if (likely(sem->count > 0)) 59 if (likely(sem->count > 0))
58 sem->count--; 60 sem->count--;
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index ae28c8245123..a1fb54c93cdd 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
436} 436}
437EXPORT_SYMBOL(_spin_trylock_bh); 437EXPORT_SYMBOL(_spin_trylock_bh);
438 438
439int in_lock_functions(unsigned long addr) 439notrace int in_lock_functions(unsigned long addr)
440{ 440{
441 /* Linker adds these: start and end of __lockfunc functions */ 441 /* Linker adds these: start and end of __lockfunc functions */
442 extern char __lock_text_start[], __lock_text_end[]; 442 extern char __lock_text_start[], __lock_text_end[];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..efaf7c5500e9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -46,6 +46,7 @@
46#include <linux/nfs_fs.h> 46#include <linux/nfs_fs.h>
47#include <linux/acpi.h> 47#include <linux/acpi.h>
48#include <linux/reboot.h> 48#include <linux/reboot.h>
49#include <linux/ftrace.h>
49 50
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
51#include <asm/processor.h> 52#include <asm/processor.h>
@@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = {
455 .mode = 0644, 456 .mode = 0644,
456 .proc_handler = &proc_dointvec, 457 .proc_handler = &proc_dointvec,
457 }, 458 },
459#ifdef CONFIG_FTRACE
460 {
461 .ctl_name = CTL_UNNUMBERED,
462 .procname = "ftrace_enabled",
463 .data = &ftrace_enabled,
464 .maxlen = sizeof(int),
465 .mode = 0644,
466 .proc_handler = &ftrace_enable_sysctl,
467 },
468#endif
458#ifdef CONFIG_KMOD 469#ifdef CONFIG_KMOD
459 { 470 {
460 .ctl_name = KERN_MODPROBE, 471 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 000000000000..5c2295b29f2c
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,127 @@
1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3#
4config HAVE_FTRACE
5 bool
6
7config HAVE_DYNAMIC_FTRACE
8 bool
9
10config TRACER_MAX_TRACE
11 bool
12
13config TRACING
14 bool
15 select DEBUG_FS
16 select STACKTRACE
17
18config FTRACE
19 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE
21 select FRAME_POINTER
22 select TRACING
23 select CONTEXT_SWITCH_TRACER
24 help
25 Enable the kernel to trace every kernel function. This is done
26 by using a compiler feature to insert a small, 5-byte No-Operation
27 instruction to the beginning of every kernel function, which NOP
28 sequence is then dynamically patched into a tracer call when
29 tracing is enabled by the administrator. If it's runtime disabled
30 (the bootup default), then the overhead of the instructions is very
31 small and not measurable even in micro-benchmarks.
32
33config IRQSOFF_TRACER
34 bool "Interrupts-off Latency Tracer"
35 default n
36 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME
38 depends on HAVE_FTRACE
39 select TRACE_IRQFLAGS
40 select TRACING
41 select TRACER_MAX_TRACE
42 help
43 This option measures the time spent in irqs-off critical
44 sections, with microsecond accuracy.
45
46 The default measurement method is a maximum search, which is
47 disabled by default and can be runtime (re-)started
48 via:
49
50 echo 0 > /debugfs/tracing/tracing_max_latency
51
52 (Note that kernel size and overhead increases with this option
53 enabled. This option and the preempt-off timing option can be
54 used together or separately.)
55
56config PREEMPT_TRACER
57 bool "Preemption-off Latency Tracer"
58 default n
59 depends on GENERIC_TIME
60 depends on PREEMPT
61 depends on HAVE_FTRACE
62 select TRACING
63 select TRACER_MAX_TRACE
64 help
65 This option measures the time spent in preemption off critical
66 sections, with microsecond accuracy.
67
68 The default measurement method is a maximum search, which is
69 disabled by default and can be runtime (re-)started
70 via:
71
72 echo 0 > /debugfs/tracing/tracing_max_latency
73
74 (Note that kernel size and overhead increases with this option
75 enabled. This option and the irqs-off timing option can be
76 used together or separately.)
77
78config SCHED_TRACER
79 bool "Scheduling Latency Tracer"
80 depends on HAVE_FTRACE
81 select TRACING
82 select CONTEXT_SWITCH_TRACER
83 select TRACER_MAX_TRACE
84 help
85 This tracer tracks the latency of the highest priority task
86 to be scheduled in, starting from the point it has woken up.
87
88config CONTEXT_SWITCH_TRACER
89 bool "Trace process context switches"
90 depends on HAVE_FTRACE
91 select TRACING
92 select MARKERS
93 help
94 This tracer gets called from the context switch and records
95 all switching of tasks.
96
97config DYNAMIC_FTRACE
98 bool "enable/disable ftrace tracepoints dynamically"
99 depends on FTRACE
100 depends on HAVE_DYNAMIC_FTRACE
101 default y
102 help
103 This option will modify all the calls to ftrace dynamically
104 (will patch them out of the binary image and replaces them
105 with a No-Op instruction) as they are called. A table is
106 created to dynamically enable them again.
107
108 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
109 has native performance as long as no tracing is active.
110
111 The changes to the code are done by a kernel thread that
112 wakes up once a second and checks to see if any ftrace calls
113 were made. If so, it runs stop_machine (stops all CPUS)
114 and modifies the code to jump over the call to ftrace.
115
116config FTRACE_SELFTEST
117 bool
118
119config FTRACE_STARTUP_TEST
120 bool "Perform a startup test on ftrace"
121 depends on TRACING
122 select FTRACE_SELFTEST
123 help
124 This option performs a series of startup tests on ftrace. On bootup
125 a series of tests are made to verify that the tracer is
126 functioning properly. It will do tests on all the configured
127 tracers of ftrace.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 000000000000..d9efbbfa2bdf
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,22 @@
1
2# Do not instrument the tracer itself:
3
4ifdef CONFIG_FTRACE
5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7
8# selftest needs instrumentation
9CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o
11endif
12
13obj-$(CONFIG_FTRACE) += libftrace.o
14
15obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_FTRACE) += trace_functions.o
18obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
19obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
21
22libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..85e841335417
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1703 @@
1/*
2 * Infrastructure for profiling code inserted by 'gcc -pg'.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally ported from the -rt patch by:
8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code in the latency_tracer, that is:
11 *
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 William Lee Irwin III
14 */
15
16#include <linux/stop_machine.h>
17#include <linux/clocksource.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/debugfs.h>
21#include <linux/hardirq.h>
22#include <linux/kthread.h>
23#include <linux/uaccess.h>
24#include <linux/kprobes.h>
25#include <linux/ftrace.h>
26#include <linux/sysctl.h>
27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h>
30
31#include <asm/ftrace.h>
32
33#include "trace.h"
34
35/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled;
38
39/*
40 * ftrace_disabled is set when an anomaly is discovered.
41 * ftrace_disabled is much stronger than ftrace_enabled.
42 */
43static int ftrace_disabled __read_mostly;
44
45static DEFINE_SPINLOCK(ftrace_lock);
46static DEFINE_MUTEX(ftrace_sysctl_lock);
47
48static struct ftrace_ops ftrace_list_end __read_mostly =
49{
50 .func = ftrace_stub,
51};
52
53static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
54ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
55
56static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
57{
58 struct ftrace_ops *op = ftrace_list;
59
60 /* in case someone actually ports this to alpha! */
61 read_barrier_depends();
62
63 while (op != &ftrace_list_end) {
64 /* silly alpha */
65 read_barrier_depends();
66 op->func(ip, parent_ip);
67 op = op->next;
68 };
69}
70
71/**
72 * clear_ftrace_function - reset the ftrace function
73 *
74 * This NULLs the ftrace function and in essence stops
75 * tracing. There may be lag
76 */
77void clear_ftrace_function(void)
78{
79 ftrace_trace_function = ftrace_stub;
80}
81
82static int __register_ftrace_function(struct ftrace_ops *ops)
83{
84 /* Should never be called by interrupts */
85 spin_lock(&ftrace_lock);
86
87 ops->next = ftrace_list;
88 /*
89 * We are entering ops into the ftrace_list but another
90 * CPU might be walking that list. We need to make sure
91 * the ops->next pointer is valid before another CPU sees
92 * the ops pointer included into the ftrace_list.
93 */
94 smp_wmb();
95 ftrace_list = ops;
96
97 if (ftrace_enabled) {
98 /*
99 * For one func, simply call it directly.
100 * For more than one func, call the chain.
101 */
102 if (ops->next == &ftrace_list_end)
103 ftrace_trace_function = ops->func;
104 else
105 ftrace_trace_function = ftrace_list_func;
106 }
107
108 spin_unlock(&ftrace_lock);
109
110 return 0;
111}
112
113static int __unregister_ftrace_function(struct ftrace_ops *ops)
114{
115 struct ftrace_ops **p;
116 int ret = 0;
117
118 spin_lock(&ftrace_lock);
119
120 /*
121 * If we are removing the last function, then simply point
122 * to the ftrace_stub.
123 */
124 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
125 ftrace_trace_function = ftrace_stub;
126 ftrace_list = &ftrace_list_end;
127 goto out;
128 }
129
130 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
131 if (*p == ops)
132 break;
133
134 if (*p != ops) {
135 ret = -1;
136 goto out;
137 }
138
139 *p = (*p)->next;
140
141 if (ftrace_enabled) {
142 /* If we only have one func left, then call that directly */
143 if (ftrace_list == &ftrace_list_end ||
144 ftrace_list->next == &ftrace_list_end)
145 ftrace_trace_function = ftrace_list->func;
146 }
147
148 out:
149 spin_unlock(&ftrace_lock);
150
151 return ret;
152}
153
154#ifdef CONFIG_DYNAMIC_FTRACE
155
156static struct task_struct *ftraced_task;
157
158enum {
159 FTRACE_ENABLE_CALLS = (1 << 0),
160 FTRACE_DISABLE_CALLS = (1 << 1),
161 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
162 FTRACE_ENABLE_MCOUNT = (1 << 3),
163 FTRACE_DISABLE_MCOUNT = (1 << 4),
164};
165
166static int ftrace_filtered;
167static int tracing_on;
168static int frozen_record_count;
169
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock);
177
178struct ftrace_page {
179 struct ftrace_page *next;
180 unsigned long index;
181 struct dyn_ftrace records[];
182};
183
184#define ENTRIES_PER_PAGE \
185 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
186
187/* estimate from running different kernels */
188#define NR_TO_INIT 10000
189
190static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages;
192
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records;
200
201
202#ifdef CONFIG_KPROBES
203static inline void freeze_record(struct dyn_ftrace *rec)
204{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) {
206 rec->flags |= FTRACE_FL_FROZEN;
207 frozen_record_count++;
208 }
209}
210
211static inline void unfreeze_record(struct dyn_ftrace *rec)
212{
213 if (rec->flags & FTRACE_FL_FROZEN) {
214 rec->flags &= ~FTRACE_FL_FROZEN;
215 frozen_record_count--;
216 }
217}
218
219static inline int record_frozen(struct dyn_ftrace *rec)
220{
221 return rec->flags & FTRACE_FL_FROZEN;
222}
223#else
224# define freeze_record(rec) ({ 0; })
225# define unfreeze_record(rec) ({ 0; })
226# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */
228
229int skip_trace(unsigned long ip)
230{
231 unsigned long fl;
232 struct dyn_ftrace *rec;
233 struct hlist_node *t;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264}
265
266static inline int
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{
269 struct dyn_ftrace *p;
270 struct hlist_node *t;
271 int found = 0;
272
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282
283static inline void
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288
289/* called from kstop_machine */
290static inline void ftrace_del_hash(struct dyn_ftrace *node)
291{
292 hlist_del(&node->node);
293}
294
295static void ftrace_free_rec(struct dyn_ftrace *rec)
296{
297 /* no locking, only called from kstop_machine */
298
299 rec->ip = (unsigned long)ftrace_free_records;
300 ftrace_free_records = rec;
301 rec->flags |= FTRACE_FL_FREE;
302}
303
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
305{
306 struct dyn_ftrace *rec;
307
308 /* First check for freed records */
309 if (ftrace_free_records) {
310 rec = ftrace_free_records;
311
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1);
314 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL;
318 }
319
320 ftrace_free_records = (void *)rec->ip;
321 memset(rec, 0, sizeof(*rec));
322 return rec;
323 }
324
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next)
327 return NULL;
328 ftrace_pages = ftrace_pages->next;
329 }
330
331 return &ftrace_pages->records[ftrace_pages->index++];
332}
333
334static void
335ftrace_record_ip(unsigned long ip)
336{
337 struct dyn_ftrace *node;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343
344 if (!ftrace_enabled || ftrace_disabled)
345 return;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367
368 if (ftrace_ip_in_hash(ip, key))
369 goto out;
370
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374
375 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key))
377 goto out_unlock;
378
379 node = ftrace_alloc_dyn_node(ip);
380 if (!node)
381 goto out_unlock;
382
383 node->ip = ip;
384
385 ftrace_add_hash(node, key);
386
387 ftraced_trigger = 1;
388
389 out_unlock:
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
391 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393
394 /* prevent recursion with scheduler */
395 if (resched)
396 preempt_enable_no_resched_notrace();
397 else
398 preempt_enable_notrace();
399}
400
401#define FTRACE_ADDR ((long)(ftrace_caller))
402
403static int
404__ftrace_replace_code(struct dyn_ftrace *rec,
405 unsigned char *old, unsigned char *new, int enable)
406{
407 unsigned long ip, fl;
408
409 ip = rec->ip;
410
411 if (ftrace_filtered && enable) {
412 /*
413 * If filtering is on:
414 *
415 * If this record is set to be filtered and
416 * is enabled then do nothing.
417 *
418 * If this record is set to be filtered and
419 * it is not enabled, enable it.
420 *
421 * If this record is not set to be filtered
422 * and it is not enabled do nothing.
423 *
424 * If this record is set not to trace then
425 * do nothing.
426 *
427 * If this record is set not to trace and
428 * it is enabled then disable it.
429 *
430 * If this record is not set to be filtered and
431 * it is enabled, disable it.
432 */
433
434 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
435 FTRACE_FL_ENABLED);
436
437 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
438 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
439 !fl || (fl == FTRACE_FL_NOTRACE))
440 return 0;
441
442 /*
443 * If it is enabled disable it,
444 * otherwise enable it!
445 */
446 if (fl & FTRACE_FL_ENABLED) {
447 /* swap new and old */
448 new = old;
449 old = ftrace_call_replace(ip, FTRACE_ADDR);
450 rec->flags &= ~FTRACE_FL_ENABLED;
451 } else {
452 new = ftrace_call_replace(ip, FTRACE_ADDR);
453 rec->flags |= FTRACE_FL_ENABLED;
454 }
455 } else {
456
457 if (enable) {
458 /*
459 * If this record is set not to trace and is
460 * not enabled, do nothing.
461 */
462 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
463 if (fl == FTRACE_FL_NOTRACE)
464 return 0;
465
466 new = ftrace_call_replace(ip, FTRACE_ADDR);
467 } else
468 old = ftrace_call_replace(ip, FTRACE_ADDR);
469
470 if (enable) {
471 if (rec->flags & FTRACE_FL_ENABLED)
472 return 0;
473 rec->flags |= FTRACE_FL_ENABLED;
474 } else {
475 if (!(rec->flags & FTRACE_FL_ENABLED))
476 return 0;
477 rec->flags &= ~FTRACE_FL_ENABLED;
478 }
479 }
480
481 return ftrace_modify_code(ip, old, new);
482}
483
484static void ftrace_replace_code(int enable)
485{
486 int i, failed;
487 unsigned char *new = NULL, *old = NULL;
488 struct dyn_ftrace *rec;
489 struct ftrace_page *pg;
490
491 if (enable)
492 old = ftrace_nop_replace();
493 else
494 new = ftrace_nop_replace();
495
496 for (pg = ftrace_pages_start; pg; pg = pg->next) {
497 for (i = 0; i < pg->index; i++) {
498 rec = &pg->records[i];
499
500 /* don't modify code that has already faulted */
501 if (rec->flags & FTRACE_FL_FAILED)
502 continue;
503
504 /* ignore updates to this record's mcount site */
505 if (get_kprobe((void *)rec->ip))
506 continue;
507
508 failed = __ftrace_replace_code(rec, old, new, enable);
509 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
510 rec->flags |= FTRACE_FL_FAILED;
511 if ((system_state == SYSTEM_BOOTING) ||
512 !core_kernel_text(rec->ip)) {
513 ftrace_del_hash(rec);
514 ftrace_free_rec(rec);
515 }
516 }
517 }
518 }
519}
520
521static void ftrace_shutdown_replenish(void)
522{
523 if (ftrace_pages->next)
524 return;
525
526 /* allocate another page */
527 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
528}
529
530static int
531ftrace_code_disable(struct dyn_ftrace *rec)
532{
533 unsigned long ip;
534 unsigned char *nop, *call;
535 int failed;
536
537 ip = rec->ip;
538
539 nop = ftrace_nop_replace();
540 call = ftrace_call_replace(ip, MCOUNT_ADDR);
541
542 failed = ftrace_modify_code(ip, call, nop);
543 if (failed) {
544 rec->flags |= FTRACE_FL_FAILED;
545 return 0;
546 }
547 return 1;
548}
549
550static int __ftrace_update_code(void *ignore);
551
552static int __ftrace_modify_code(void *data)
553{
554 unsigned long addr;
555 int *command = data;
556
557 if (*command & FTRACE_ENABLE_CALLS) {
558 /*
559 * Update any recorded ips now that we have the
560 * machine stopped
561 */
562 __ftrace_update_code(NULL);
563 ftrace_replace_code(1);
564 tracing_on = 1;
565 } else if (*command & FTRACE_DISABLE_CALLS) {
566 ftrace_replace_code(0);
567 tracing_on = 0;
568 }
569
570 if (*command & FTRACE_UPDATE_TRACE_FUNC)
571 ftrace_update_ftrace_func(ftrace_trace_function);
572
573 if (*command & FTRACE_ENABLE_MCOUNT) {
574 addr = (unsigned long)ftrace_record_ip;
575 ftrace_mcount_set(&addr);
576 } else if (*command & FTRACE_DISABLE_MCOUNT) {
577 addr = (unsigned long)ftrace_stub;
578 ftrace_mcount_set(&addr);
579 }
580
581 return 0;
582}
583
584static void ftrace_run_update_code(int command)
585{
586 stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
587}
588
589void ftrace_disable_daemon(void)
590{
591 /* Stop the daemon from calling kstop_machine */
592 mutex_lock(&ftraced_lock);
593 ftraced_stop = 1;
594 mutex_unlock(&ftraced_lock);
595
596 ftrace_force_update();
597}
598
599void ftrace_enable_daemon(void)
600{
601 mutex_lock(&ftraced_lock);
602 ftraced_stop = 0;
603 mutex_unlock(&ftraced_lock);
604
605 ftrace_force_update();
606}
607
608static ftrace_func_t saved_ftrace_func;
609
610static void ftrace_startup(void)
611{
612 int command = 0;
613
614 if (unlikely(ftrace_disabled))
615 return;
616
617 mutex_lock(&ftraced_lock);
618 ftraced_suspend++;
619 if (ftraced_suspend == 1)
620 command |= FTRACE_ENABLE_CALLS;
621
622 if (saved_ftrace_func != ftrace_trace_function) {
623 saved_ftrace_func = ftrace_trace_function;
624 command |= FTRACE_UPDATE_TRACE_FUNC;
625 }
626
627 if (!command || !ftrace_enabled)
628 goto out;
629
630 ftrace_run_update_code(command);
631 out:
632 mutex_unlock(&ftraced_lock);
633}
634
635static void ftrace_shutdown(void)
636{
637 int command = 0;
638
639 if (unlikely(ftrace_disabled))
640 return;
641
642 mutex_lock(&ftraced_lock);
643 ftraced_suspend--;
644 if (!ftraced_suspend)
645 command |= FTRACE_DISABLE_CALLS;
646
647 if (saved_ftrace_func != ftrace_trace_function) {
648 saved_ftrace_func = ftrace_trace_function;
649 command |= FTRACE_UPDATE_TRACE_FUNC;
650 }
651
652 if (!command || !ftrace_enabled)
653 goto out;
654
655 ftrace_run_update_code(command);
656 out:
657 mutex_unlock(&ftraced_lock);
658}
659
660static void ftrace_startup_sysctl(void)
661{
662 int command = FTRACE_ENABLE_MCOUNT;
663
664 if (unlikely(ftrace_disabled))
665 return;
666
667 mutex_lock(&ftraced_lock);
668 /* Force update next time */
669 saved_ftrace_func = NULL;
670 /* ftraced_suspend is true if we want ftrace running */
671 if (ftraced_suspend)
672 command |= FTRACE_ENABLE_CALLS;
673
674 ftrace_run_update_code(command);
675 mutex_unlock(&ftraced_lock);
676}
677
678static void ftrace_shutdown_sysctl(void)
679{
680 int command = FTRACE_DISABLE_MCOUNT;
681
682 if (unlikely(ftrace_disabled))
683 return;
684
685 mutex_lock(&ftraced_lock);
686 /* ftraced_suspend is true if ftrace is running */
687 if (ftraced_suspend)
688 command |= FTRACE_DISABLE_CALLS;
689
690 ftrace_run_update_code(command);
691 mutex_unlock(&ftraced_lock);
692}
693
694static cycle_t ftrace_update_time;
695static unsigned long ftrace_update_cnt;
696unsigned long ftrace_update_tot_cnt;
697
698static int __ftrace_update_code(void *ignore)
699{
700 int i, save_ftrace_enabled;
701 cycle_t start, stop;
702 struct dyn_ftrace *p;
703 struct hlist_node *t, *n;
704 struct hlist_head *head, temp_list;
705
706 /* Don't be recording funcs now */
707 ftrace_record_suspend++;
708 save_ftrace_enabled = ftrace_enabled;
709 ftrace_enabled = 0;
710
711 start = ftrace_now(raw_smp_processor_id());
712 ftrace_update_cnt = 0;
713
714 /* No locks needed, the machine is stopped! */
715 for (i = 0; i < FTRACE_HASHSIZE; i++) {
716 INIT_HLIST_HEAD(&temp_list);
717 head = &ftrace_hash[i];
718
719 /* all CPUS are stopped, we are safe to modify code */
720 hlist_for_each_entry_safe(p, t, n, head, node) {
721 /* Skip over failed records which have not been
722 * freed. */
723 if (p->flags & FTRACE_FL_FAILED)
724 continue;
725
726 /* Unconverted records are always at the head of the
727 * hash bucket. Once we encounter a converted record,
728 * simply skip over to the next bucket. Saves ftraced
729 * some processor cycles (ftrace does its bid for
730 * global warming :-p ). */
731 if (p->flags & (FTRACE_FL_CONVERTED))
732 break;
733
734 /* Ignore updates to this record's mcount site.
735 * Reintroduce this record at the head of this
736 * bucket to attempt to "convert" it again if
737 * the kprobe on it is unregistered before the
738 * next run. */
739 if (get_kprobe((void *)p->ip)) {
740 ftrace_del_hash(p);
741 INIT_HLIST_NODE(&p->node);
742 hlist_add_head(&p->node, &temp_list);
743 continue;
744 }
745
746 /* convert record (i.e, patch mcount-call with NOP) */
747 if (ftrace_code_disable(p)) {
748 p->flags |= FTRACE_FL_CONVERTED;
749 ftrace_update_cnt++;
750 } else {
751 if ((system_state == SYSTEM_BOOTING) ||
752 !core_kernel_text(p->ip)) {
753 ftrace_del_hash(p);
754 ftrace_free_rec(p);
755 }
756 }
757 }
758
759 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
760 hlist_del(&p->node);
761 INIT_HLIST_NODE(&p->node);
762 hlist_add_head(&p->node, head);
763 }
764 }
765
766 stop = ftrace_now(raw_smp_processor_id());
767 ftrace_update_time = stop - start;
768 ftrace_update_tot_cnt += ftrace_update_cnt;
769 ftraced_trigger = 0;
770
771 ftrace_enabled = save_ftrace_enabled;
772 ftrace_record_suspend--;
773
774 return 0;
775}
776
777static int ftrace_update_code(void)
778{
779 if (unlikely(ftrace_disabled) ||
780 !ftrace_enabled || !ftraced_trigger)
781 return 0;
782
783 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
784
785 return 1;
786}
787
788static int ftraced(void *ignore)
789{
790 unsigned long usecs;
791
792 while (!kthread_should_stop()) {
793
794 set_current_state(TASK_INTERRUPTIBLE);
795
796 /* check once a second */
797 schedule_timeout(HZ);
798
799 if (unlikely(ftrace_disabled))
800 continue;
801
802 mutex_lock(&ftrace_sysctl_lock);
803 mutex_lock(&ftraced_lock);
804 if (!ftraced_suspend && !ftraced_stop &&
805 ftrace_update_code()) {
806 usecs = nsecs_to_usecs(ftrace_update_time);
807 if (ftrace_update_tot_cnt > 100000) {
808 ftrace_update_tot_cnt = 0;
809 pr_info("hm, dftrace overflow: %lu change%s"
810 " (%lu total) in %lu usec%s\n",
811 ftrace_update_cnt,
812 ftrace_update_cnt != 1 ? "s" : "",
813 ftrace_update_tot_cnt,
814 usecs, usecs != 1 ? "s" : "");
815 ftrace_disabled = 1;
816 WARN_ON_ONCE(1);
817 }
818 }
819 mutex_unlock(&ftraced_lock);
820 mutex_unlock(&ftrace_sysctl_lock);
821
822 ftrace_shutdown_replenish();
823 }
824 __set_current_state(TASK_RUNNING);
825 return 0;
826}
827
828static int __init ftrace_dyn_table_alloc(void)
829{
830 struct ftrace_page *pg;
831 int cnt;
832 int i;
833
834 /* allocate a few pages */
835 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
836 if (!ftrace_pages_start)
837 return -1;
838
839 /*
840 * Allocate a few more pages.
841 *
842 * TODO: have some parser search vmlinux before
843 * final linking to find all calls to ftrace.
844 * Then we can:
845 * a) know how many pages to allocate.
846 * and/or
847 * b) set up the table then.
848 *
849 * The dynamic code is still necessary for
850 * modules.
851 */
852
853 pg = ftrace_pages = ftrace_pages_start;
854
855 cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
856
857 for (i = 0; i < cnt; i++) {
858 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
859
860 /* If we fail, we'll try later anyway */
861 if (!pg->next)
862 break;
863
864 pg = pg->next;
865 }
866
867 return 0;
868}
869
870enum {
871 FTRACE_ITER_FILTER = (1 << 0),
872 FTRACE_ITER_CONT = (1 << 1),
873 FTRACE_ITER_NOTRACE = (1 << 2),
874 FTRACE_ITER_FAILURES = (1 << 3),
875};
876
877#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
878
879struct ftrace_iterator {
880 loff_t pos;
881 struct ftrace_page *pg;
882 unsigned idx;
883 unsigned flags;
884 unsigned char buffer[FTRACE_BUFF_MAX+1];
885 unsigned buffer_idx;
886 unsigned filtered;
887};
888
889static void *
890t_next(struct seq_file *m, void *v, loff_t *pos)
891{
892 struct ftrace_iterator *iter = m->private;
893 struct dyn_ftrace *rec = NULL;
894
895 (*pos)++;
896
897 retry:
898 if (iter->idx >= iter->pg->index) {
899 if (iter->pg->next) {
900 iter->pg = iter->pg->next;
901 iter->idx = 0;
902 goto retry;
903 }
904 } else {
905 rec = &iter->pg->records[iter->idx++];
906 if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
907 (rec->flags & FTRACE_FL_FAILED)) ||
908
909 ((iter->flags & FTRACE_ITER_FAILURES) &&
910 (!(rec->flags & FTRACE_FL_FAILED) ||
911 (rec->flags & FTRACE_FL_FREE))) ||
912
913 ((iter->flags & FTRACE_ITER_FILTER) &&
914 !(rec->flags & FTRACE_FL_FILTER)) ||
915
916 ((iter->flags & FTRACE_ITER_NOTRACE) &&
917 !(rec->flags & FTRACE_FL_NOTRACE))) {
918 rec = NULL;
919 goto retry;
920 }
921 }
922
923 iter->pos = *pos;
924
925 return rec;
926}
927
928static void *t_start(struct seq_file *m, loff_t *pos)
929{
930 struct ftrace_iterator *iter = m->private;
931 void *p = NULL;
932 loff_t l = -1;
933
934 if (*pos != iter->pos) {
935 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
936 ;
937 } else {
938 l = *pos;
939 p = t_next(m, p, &l);
940 }
941
942 return p;
943}
944
945static void t_stop(struct seq_file *m, void *p)
946{
947}
948
949static int t_show(struct seq_file *m, void *v)
950{
951 struct dyn_ftrace *rec = v;
952 char str[KSYM_SYMBOL_LEN];
953
954 if (!rec)
955 return 0;
956
957 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
958
959 seq_printf(m, "%s\n", str);
960
961 return 0;
962}
963
964static struct seq_operations show_ftrace_seq_ops = {
965 .start = t_start,
966 .next = t_next,
967 .stop = t_stop,
968 .show = t_show,
969};
970
971static int
972ftrace_avail_open(struct inode *inode, struct file *file)
973{
974 struct ftrace_iterator *iter;
975 int ret;
976
977 if (unlikely(ftrace_disabled))
978 return -ENODEV;
979
980 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
981 if (!iter)
982 return -ENOMEM;
983
984 iter->pg = ftrace_pages_start;
985 iter->pos = -1;
986
987 ret = seq_open(file, &show_ftrace_seq_ops);
988 if (!ret) {
989 struct seq_file *m = file->private_data;
990
991 m->private = iter;
992 } else {
993 kfree(iter);
994 }
995
996 return ret;
997}
998
999int ftrace_avail_release(struct inode *inode, struct file *file)
1000{
1001 struct seq_file *m = (struct seq_file *)file->private_data;
1002 struct ftrace_iterator *iter = m->private;
1003
1004 seq_release(inode, file);
1005 kfree(iter);
1006
1007 return 0;
1008}
1009
1010static int
1011ftrace_failures_open(struct inode *inode, struct file *file)
1012{
1013 int ret;
1014 struct seq_file *m;
1015 struct ftrace_iterator *iter;
1016
1017 ret = ftrace_avail_open(inode, file);
1018 if (!ret) {
1019 m = (struct seq_file *)file->private_data;
1020 iter = (struct ftrace_iterator *)m->private;
1021 iter->flags = FTRACE_ITER_FAILURES;
1022 }
1023
1024 return ret;
1025}
1026
1027
1028static void ftrace_filter_reset(int enable)
1029{
1030 struct ftrace_page *pg;
1031 struct dyn_ftrace *rec;
1032 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1033 unsigned i;
1034
1035 /* keep kstop machine from running */
1036 preempt_disable();
1037 if (enable)
1038 ftrace_filtered = 0;
1039 pg = ftrace_pages_start;
1040 while (pg) {
1041 for (i = 0; i < pg->index; i++) {
1042 rec = &pg->records[i];
1043 if (rec->flags & FTRACE_FL_FAILED)
1044 continue;
1045 rec->flags &= ~type;
1046 }
1047 pg = pg->next;
1048 }
1049 preempt_enable();
1050}
1051
1052static int
1053ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1054{
1055 struct ftrace_iterator *iter;
1056 int ret = 0;
1057
1058 if (unlikely(ftrace_disabled))
1059 return -ENODEV;
1060
1061 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1062 if (!iter)
1063 return -ENOMEM;
1064
1065 mutex_lock(&ftrace_regex_lock);
1066 if ((file->f_mode & FMODE_WRITE) &&
1067 !(file->f_flags & O_APPEND))
1068 ftrace_filter_reset(enable);
1069
1070 if (file->f_mode & FMODE_READ) {
1071 iter->pg = ftrace_pages_start;
1072 iter->pos = -1;
1073 iter->flags = enable ? FTRACE_ITER_FILTER :
1074 FTRACE_ITER_NOTRACE;
1075
1076 ret = seq_open(file, &show_ftrace_seq_ops);
1077 if (!ret) {
1078 struct seq_file *m = file->private_data;
1079 m->private = iter;
1080 } else
1081 kfree(iter);
1082 } else
1083 file->private_data = iter;
1084 mutex_unlock(&ftrace_regex_lock);
1085
1086 return ret;
1087}
1088
1089static int
1090ftrace_filter_open(struct inode *inode, struct file *file)
1091{
1092 return ftrace_regex_open(inode, file, 1);
1093}
1094
1095static int
1096ftrace_notrace_open(struct inode *inode, struct file *file)
1097{
1098 return ftrace_regex_open(inode, file, 0);
1099}
1100
1101static ssize_t
1102ftrace_regex_read(struct file *file, char __user *ubuf,
1103 size_t cnt, loff_t *ppos)
1104{
1105 if (file->f_mode & FMODE_READ)
1106 return seq_read(file, ubuf, cnt, ppos);
1107 else
1108 return -EPERM;
1109}
1110
1111static loff_t
1112ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1113{
1114 loff_t ret;
1115
1116 if (file->f_mode & FMODE_READ)
1117 ret = seq_lseek(file, offset, origin);
1118 else
1119 file->f_pos = ret = 1;
1120
1121 return ret;
1122}
1123
1124enum {
1125 MATCH_FULL,
1126 MATCH_FRONT_ONLY,
1127 MATCH_MIDDLE_ONLY,
1128 MATCH_END_ONLY,
1129};
1130
1131static void
1132ftrace_match(unsigned char *buff, int len, int enable)
1133{
1134 char str[KSYM_SYMBOL_LEN];
1135 char *search = NULL;
1136 struct ftrace_page *pg;
1137 struct dyn_ftrace *rec;
1138 int type = MATCH_FULL;
1139 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1140 unsigned i, match = 0, search_len = 0;
1141
1142 for (i = 0; i < len; i++) {
1143 if (buff[i] == '*') {
1144 if (!i) {
1145 search = buff + i + 1;
1146 type = MATCH_END_ONLY;
1147 search_len = len - (i + 1);
1148 } else {
1149 if (type == MATCH_END_ONLY) {
1150 type = MATCH_MIDDLE_ONLY;
1151 } else {
1152 match = i;
1153 type = MATCH_FRONT_ONLY;
1154 }
1155 buff[i] = 0;
1156 break;
1157 }
1158 }
1159 }
1160
1161 /* keep kstop machine from running */
1162 preempt_disable();
1163 if (enable)
1164 ftrace_filtered = 1;
1165 pg = ftrace_pages_start;
1166 while (pg) {
1167 for (i = 0; i < pg->index; i++) {
1168 int matched = 0;
1169 char *ptr;
1170
1171 rec = &pg->records[i];
1172 if (rec->flags & FTRACE_FL_FAILED)
1173 continue;
1174 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1175 switch (type) {
1176 case MATCH_FULL:
1177 if (strcmp(str, buff) == 0)
1178 matched = 1;
1179 break;
1180 case MATCH_FRONT_ONLY:
1181 if (memcmp(str, buff, match) == 0)
1182 matched = 1;
1183 break;
1184 case MATCH_MIDDLE_ONLY:
1185 if (strstr(str, search))
1186 matched = 1;
1187 break;
1188 case MATCH_END_ONLY:
1189 ptr = strstr(str, search);
1190 if (ptr && (ptr[search_len] == 0))
1191 matched = 1;
1192 break;
1193 }
1194 if (matched)
1195 rec->flags |= flag;
1196 }
1197 pg = pg->next;
1198 }
1199 preempt_enable();
1200}
1201
1202static ssize_t
1203ftrace_regex_write(struct file *file, const char __user *ubuf,
1204 size_t cnt, loff_t *ppos, int enable)
1205{
1206 struct ftrace_iterator *iter;
1207 char ch;
1208 size_t read = 0;
1209 ssize_t ret;
1210
1211 if (!cnt || cnt < 0)
1212 return 0;
1213
1214 mutex_lock(&ftrace_regex_lock);
1215
1216 if (file->f_mode & FMODE_READ) {
1217 struct seq_file *m = file->private_data;
1218 iter = m->private;
1219 } else
1220 iter = file->private_data;
1221
1222 if (!*ppos) {
1223 iter->flags &= ~FTRACE_ITER_CONT;
1224 iter->buffer_idx = 0;
1225 }
1226
1227 ret = get_user(ch, ubuf++);
1228 if (ret)
1229 goto out;
1230 read++;
1231 cnt--;
1232
1233 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
1234 /* skip white space */
1235 while (cnt && isspace(ch)) {
1236 ret = get_user(ch, ubuf++);
1237 if (ret)
1238 goto out;
1239 read++;
1240 cnt--;
1241 }
1242
1243 if (isspace(ch)) {
1244 file->f_pos += read;
1245 ret = read;
1246 goto out;
1247 }
1248
1249 iter->buffer_idx = 0;
1250 }
1251
1252 while (cnt && !isspace(ch)) {
1253 if (iter->buffer_idx < FTRACE_BUFF_MAX)
1254 iter->buffer[iter->buffer_idx++] = ch;
1255 else {
1256 ret = -EINVAL;
1257 goto out;
1258 }
1259 ret = get_user(ch, ubuf++);
1260 if (ret)
1261 goto out;
1262 read++;
1263 cnt--;
1264 }
1265
1266 if (isspace(ch)) {
1267 iter->filtered++;
1268 iter->buffer[iter->buffer_idx] = 0;
1269 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1270 iter->buffer_idx = 0;
1271 } else
1272 iter->flags |= FTRACE_ITER_CONT;
1273
1274
1275 file->f_pos += read;
1276
1277 ret = read;
1278 out:
1279 mutex_unlock(&ftrace_regex_lock);
1280
1281 return ret;
1282}
1283
1284static ssize_t
1285ftrace_filter_write(struct file *file, const char __user *ubuf,
1286 size_t cnt, loff_t *ppos)
1287{
1288 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
1289}
1290
1291static ssize_t
1292ftrace_notrace_write(struct file *file, const char __user *ubuf,
1293 size_t cnt, loff_t *ppos)
1294{
1295 return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
1296}
1297
1298static void
1299ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1300{
1301 if (unlikely(ftrace_disabled))
1302 return;
1303
1304 mutex_lock(&ftrace_regex_lock);
1305 if (reset)
1306 ftrace_filter_reset(enable);
1307 if (buf)
1308 ftrace_match(buf, len, enable);
1309 mutex_unlock(&ftrace_regex_lock);
1310}
1311
1312/**
1313 * ftrace_set_filter - set a function to filter on in ftrace
1314 * @buf - the string that holds the function filter text.
1315 * @len - the length of the string.
1316 * @reset - non zero to reset all filters before applying this filter.
1317 *
1318 * Filters denote which functions should be enabled when tracing is enabled.
1319 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
1320 */
1321void ftrace_set_filter(unsigned char *buf, int len, int reset)
1322{
1323 ftrace_set_regex(buf, len, reset, 1);
1324}
1325
1326/**
1327 * ftrace_set_notrace - set a function to not trace in ftrace
1328 * @buf - the string that holds the function notrace text.
1329 * @len - the length of the string.
1330 * @reset - non zero to reset all filters before applying this filter.
1331 *
1332 * Notrace Filters denote which functions should not be enabled when tracing
1333 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
1334 * for tracing.
1335 */
1336void ftrace_set_notrace(unsigned char *buf, int len, int reset)
1337{
1338 ftrace_set_regex(buf, len, reset, 0);
1339}
1340
1341static int
1342ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1343{
1344 struct seq_file *m = (struct seq_file *)file->private_data;
1345 struct ftrace_iterator *iter;
1346
1347 mutex_lock(&ftrace_regex_lock);
1348 if (file->f_mode & FMODE_READ) {
1349 iter = m->private;
1350
1351 seq_release(inode, file);
1352 } else
1353 iter = file->private_data;
1354
1355 if (iter->buffer_idx) {
1356 iter->filtered++;
1357 iter->buffer[iter->buffer_idx] = 0;
1358 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1359 }
1360
1361 mutex_lock(&ftrace_sysctl_lock);
1362 mutex_lock(&ftraced_lock);
1363 if (iter->filtered && ftraced_suspend && ftrace_enabled)
1364 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1365 mutex_unlock(&ftraced_lock);
1366 mutex_unlock(&ftrace_sysctl_lock);
1367
1368 kfree(iter);
1369 mutex_unlock(&ftrace_regex_lock);
1370 return 0;
1371}
1372
1373static int
1374ftrace_filter_release(struct inode *inode, struct file *file)
1375{
1376 return ftrace_regex_release(inode, file, 1);
1377}
1378
1379static int
1380ftrace_notrace_release(struct inode *inode, struct file *file)
1381{
1382 return ftrace_regex_release(inode, file, 0);
1383}
1384
1385static ssize_t
1386ftraced_read(struct file *filp, char __user *ubuf,
1387 size_t cnt, loff_t *ppos)
1388{
1389 /* don't worry about races */
1390 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1391 int r = strlen(buf);
1392
1393 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1394}
1395
1396static ssize_t
1397ftraced_write(struct file *filp, const char __user *ubuf,
1398 size_t cnt, loff_t *ppos)
1399{
1400 char buf[64];
1401 long val;
1402 int ret;
1403
1404 if (cnt >= sizeof(buf))
1405 return -EINVAL;
1406
1407 if (copy_from_user(&buf, ubuf, cnt))
1408 return -EFAULT;
1409
1410 if (strncmp(buf, "enable", 6) == 0)
1411 val = 1;
1412 else if (strncmp(buf, "disable", 7) == 0)
1413 val = 0;
1414 else {
1415 buf[cnt] = 0;
1416
1417 ret = strict_strtoul(buf, 10, &val);
1418 if (ret < 0)
1419 return ret;
1420
1421 val = !!val;
1422 }
1423
1424 if (val)
1425 ftrace_enable_daemon();
1426 else
1427 ftrace_disable_daemon();
1428
1429 filp->f_pos += cnt;
1430
1431 return cnt;
1432}
1433
1434static struct file_operations ftrace_avail_fops = {
1435 .open = ftrace_avail_open,
1436 .read = seq_read,
1437 .llseek = seq_lseek,
1438 .release = ftrace_avail_release,
1439};
1440
1441static struct file_operations ftrace_failures_fops = {
1442 .open = ftrace_failures_open,
1443 .read = seq_read,
1444 .llseek = seq_lseek,
1445 .release = ftrace_avail_release,
1446};
1447
1448static struct file_operations ftrace_filter_fops = {
1449 .open = ftrace_filter_open,
1450 .read = ftrace_regex_read,
1451 .write = ftrace_filter_write,
1452 .llseek = ftrace_regex_lseek,
1453 .release = ftrace_filter_release,
1454};
1455
1456static struct file_operations ftrace_notrace_fops = {
1457 .open = ftrace_notrace_open,
1458 .read = ftrace_regex_read,
1459 .write = ftrace_notrace_write,
1460 .llseek = ftrace_regex_lseek,
1461 .release = ftrace_notrace_release,
1462};
1463
1464static struct file_operations ftraced_fops = {
1465 .open = tracing_open_generic,
1466 .read = ftraced_read,
1467 .write = ftraced_write,
1468};
1469
1470/**
1471 * ftrace_force_update - force an update to all recording ftrace functions
1472 */
1473int ftrace_force_update(void)
1474{
1475 int ret = 0;
1476
1477 if (unlikely(ftrace_disabled))
1478 return -ENODEV;
1479
1480 mutex_lock(&ftrace_sysctl_lock);
1481 mutex_lock(&ftraced_lock);
1482
1483 /*
1484 * If ftraced_trigger is not set, then there is nothing
1485 * to update.
1486 */
1487 if (ftraced_trigger && !ftrace_update_code())
1488 ret = -EBUSY;
1489
1490 mutex_unlock(&ftraced_lock);
1491 mutex_unlock(&ftrace_sysctl_lock);
1492
1493 return ret;
1494}
1495
1496static void ftrace_force_shutdown(void)
1497{
1498 struct task_struct *task;
1499 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1500
1501 mutex_lock(&ftraced_lock);
1502 task = ftraced_task;
1503 ftraced_task = NULL;
1504 ftraced_suspend = -1;
1505 ftrace_run_update_code(command);
1506 mutex_unlock(&ftraced_lock);
1507
1508 if (task)
1509 kthread_stop(task);
1510}
1511
1512static __init int ftrace_init_debugfs(void)
1513{
1514 struct dentry *d_tracer;
1515 struct dentry *entry;
1516
1517 d_tracer = tracing_init_dentry();
1518
1519 entry = debugfs_create_file("available_filter_functions", 0444,
1520 d_tracer, NULL, &ftrace_avail_fops);
1521 if (!entry)
1522 pr_warning("Could not create debugfs "
1523 "'available_filter_functions' entry\n");
1524
1525 entry = debugfs_create_file("failures", 0444,
1526 d_tracer, NULL, &ftrace_failures_fops);
1527 if (!entry)
1528 pr_warning("Could not create debugfs 'failures' entry\n");
1529
1530 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
1531 NULL, &ftrace_filter_fops);
1532 if (!entry)
1533 pr_warning("Could not create debugfs "
1534 "'set_ftrace_filter' entry\n");
1535
1536 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
1537 NULL, &ftrace_notrace_fops);
1538 if (!entry)
1539 pr_warning("Could not create debugfs "
1540 "'set_ftrace_notrace' entry\n");
1541
1542 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1543 NULL, &ftraced_fops);
1544 if (!entry)
1545 pr_warning("Could not create debugfs "
1546 "'ftraced_enabled' entry\n");
1547 return 0;
1548}
1549
1550fs_initcall(ftrace_init_debugfs);
1551
1552static int __init ftrace_dynamic_init(void)
1553{
1554 struct task_struct *p;
1555 unsigned long addr;
1556 int ret;
1557
1558 addr = (unsigned long)ftrace_record_ip;
1559
1560 stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
1561
1562 /* ftrace_dyn_arch_init places the return code in addr */
1563 if (addr) {
1564 ret = (int)addr;
1565 goto failed;
1566 }
1567
1568 ret = ftrace_dyn_table_alloc();
1569 if (ret)
1570 goto failed;
1571
1572 p = kthread_run(ftraced, NULL, "ftraced");
1573 if (IS_ERR(p)) {
1574 ret = -1;
1575 goto failed;
1576 }
1577
1578 last_ftrace_enabled = ftrace_enabled = 1;
1579 ftraced_task = p;
1580
1581 return 0;
1582
1583 failed:
1584 ftrace_disabled = 1;
1585 return ret;
1586}
1587
1588core_initcall(ftrace_dynamic_init);
1589#else
1590# define ftrace_startup() do { } while (0)
1591# define ftrace_shutdown() do { } while (0)
1592# define ftrace_startup_sysctl() do { } while (0)
1593# define ftrace_shutdown_sysctl() do { } while (0)
1594# define ftrace_force_shutdown() do { } while (0)
1595#endif /* CONFIG_DYNAMIC_FTRACE */
1596
1597/**
1598 * ftrace_kill - totally shutdown ftrace
1599 *
1600 * This is a safety measure. If something was detected that seems
1601 * wrong, calling this function will keep ftrace from doing
1602 * any more modifications, and updates.
1603 * used when something went wrong.
1604 */
1605void ftrace_kill(void)
1606{
1607 mutex_lock(&ftrace_sysctl_lock);
1608 ftrace_disabled = 1;
1609 ftrace_enabled = 0;
1610
1611 clear_ftrace_function();
1612 mutex_unlock(&ftrace_sysctl_lock);
1613
1614 /* Try to totally disable ftrace */
1615 ftrace_force_shutdown();
1616}
1617
1618/**
1619 * register_ftrace_function - register a function for profiling
1620 * @ops - ops structure that holds the function for profiling.
1621 *
1622 * Register a function to be called by all functions in the
1623 * kernel.
1624 *
1625 * Note: @ops->func and all the functions it calls must be labeled
1626 * with "notrace", otherwise it will go into a
1627 * recursive loop.
1628 */
1629int register_ftrace_function(struct ftrace_ops *ops)
1630{
1631 int ret;
1632
1633 if (unlikely(ftrace_disabled))
1634 return -1;
1635
1636 mutex_lock(&ftrace_sysctl_lock);
1637 ret = __register_ftrace_function(ops);
1638 ftrace_startup();
1639 mutex_unlock(&ftrace_sysctl_lock);
1640
1641 return ret;
1642}
1643
1644/**
1645 * unregister_ftrace_function - unresgister a function for profiling.
1646 * @ops - ops structure that holds the function to unregister
1647 *
1648 * Unregister a function that was added to be called by ftrace profiling.
1649 */
1650int unregister_ftrace_function(struct ftrace_ops *ops)
1651{
1652 int ret;
1653
1654 mutex_lock(&ftrace_sysctl_lock);
1655 ret = __unregister_ftrace_function(ops);
1656 ftrace_shutdown();
1657 mutex_unlock(&ftrace_sysctl_lock);
1658
1659 return ret;
1660}
1661
1662int
1663ftrace_enable_sysctl(struct ctl_table *table, int write,
1664 struct file *file, void __user *buffer, size_t *lenp,
1665 loff_t *ppos)
1666{
1667 int ret;
1668
1669 if (unlikely(ftrace_disabled))
1670 return -ENODEV;
1671
1672 mutex_lock(&ftrace_sysctl_lock);
1673
1674 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1675
1676 if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
1677 goto out;
1678
1679 last_ftrace_enabled = ftrace_enabled;
1680
1681 if (ftrace_enabled) {
1682
1683 ftrace_startup_sysctl();
1684
1685 /* we are starting ftrace again */
1686 if (ftrace_list != &ftrace_list_end) {
1687 if (ftrace_list->next == &ftrace_list_end)
1688 ftrace_trace_function = ftrace_list->func;
1689 else
1690 ftrace_trace_function = ftrace_list_func;
1691 }
1692
1693 } else {
1694 /* stopping ftrace calls (just send to ftrace_stub) */
1695 ftrace_trace_function = ftrace_stub;
1696
1697 ftrace_shutdown_sysctl();
1698 }
1699
1700 out:
1701 mutex_unlock(&ftrace_sysctl_lock);
1702 return ret;
1703}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 000000000000..9ade79369bfb
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3100 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III
13 */
14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h>
16#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18#include <linux/pagemap.h>
19#include <linux/hardirq.h>
20#include <linux/linkage.h>
21#include <linux/uaccess.h>
22#include <linux/ftrace.h>
23#include <linux/module.h>
24#include <linux/percpu.h>
25#include <linux/ctype.h>
26#include <linux/init.h>
27#include <linux/poll.h>
28#include <linux/gfp.h>
29#include <linux/fs.h>
30#include <linux/kprobes.h>
31#include <linux/writeback.h>
32
33#include <linux/stacktrace.h>
34
35#include "trace.h"
36
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh;
39
40static unsigned long __read_mostly tracing_nr_buffers;
41static cpumask_t __read_mostly tracing_buffer_mask;
42
43#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask)
45
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1;
50
51static unsigned long tracing_pages_allocated;
52
53long
54ns2usecs(cycle_t nsec)
55{
56 nsec += 500;
57 do_div(nsec, 1000);
58 return nsec;
59}
60
61cycle_t ftrace_now(int cpu)
62{
63 return cpu_clock(cpu);
64}
65
66/*
67 * The global_trace is the descriptor that holds the tracing
68 * buffers for the live tracing. For each CPU, it contains
69 * a link list of pages that will store trace entries. The
70 * page descriptor of the pages in the memory is used to hold
71 * the link list by linking the lru item in the page descriptor
72 * to each of the pages in the buffer per CPU.
73 *
74 * For each active CPU there is a data field that holds the
75 * pages for the buffer for that CPU. Each CPU has the same number
76 * of pages allocated for its buffer.
77 */
78static struct trace_array global_trace;
79
80static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
81
82/*
83 * The max_tr is used to snapshot the global_trace when a maximum
84 * latency is reached. Some tracers will use this to store a maximum
85 * trace while it continues examining live traces.
86 *
87 * The buffers for the max_tr are set up the same as the global_trace.
88 * When a snapshot is taken, the link list of the max_tr is swapped
89 * with the link list of the global_trace and the buffers are reset for
90 * the global_trace so the tracing can continue.
91 */
92static struct trace_array max_tr;
93
94static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
95
96/* tracer_enabled is used to toggle activation of a tracer */
97static int tracer_enabled = 1;
98
99/*
100 * trace_nr_entries is the number of entries that is allocated
101 * for a buffer. Note, the number of entries is always rounded
102 * to ENTRIES_PER_PAGE.
103 */
104static unsigned long trace_nr_entries = 65536UL;
105
106/* trace_types holds a link list of available tracers. */
107static struct tracer *trace_types __read_mostly;
108
109/* current_trace points to the tracer that is currently active */
110static struct tracer *current_trace __read_mostly;
111
112/*
113 * max_tracer_type_len is used to simplify the allocating of
114 * buffers to read userspace tracer names. We keep track of
115 * the longest tracer name registered.
116 */
117static int max_tracer_type_len;
118
119/*
120 * trace_types_lock is used to protect the trace_types list.
121 * This lock is also used to keep user access serialized.
122 * Accesses from userspace will grab this lock while userspace
123 * activities happen inside the kernel.
124 */
125static DEFINE_MUTEX(trace_types_lock);
126
127/* trace_wait is a waitqueue for tasks blocked on trace_poll */
128static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
129
130/* trace_flags holds iter_ctrl options */
131unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
132
133static notrace void no_trace_init(struct trace_array *tr)
134{
135 int cpu;
136
137 if(tr->ctrl)
138 for_each_online_cpu(cpu)
139 tracing_reset(tr->data[cpu]);
140 tracer_enabled = 0;
141}
142
143/* dummy trace to disable tracing */
144static struct tracer no_tracer __read_mostly = {
145 .name = "none",
146 .init = no_trace_init
147};
148
149
150/**
151 * trace_wake_up - wake up tasks waiting for trace input
152 *
153 * Simply wakes up any task that is blocked on the trace_wait
154 * queue. These is used with trace_poll for tasks polling the trace.
155 */
156void trace_wake_up(void)
157{
158 /*
159 * The runqueue_is_locked() can fail, but this is the best we
160 * have for now:
161 */
162 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
163 wake_up(&trace_wait);
164}
165
166#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
167
168static int __init set_nr_entries(char *str)
169{
170 unsigned long nr_entries;
171 int ret;
172
173 if (!str)
174 return 0;
175 ret = strict_strtoul(str, 0, &nr_entries);
176 /* nr_entries can not be zero */
177 if (ret < 0 || nr_entries == 0)
178 return 0;
179 trace_nr_entries = nr_entries;
180 return 1;
181}
182__setup("trace_entries=", set_nr_entries);
183
184unsigned long nsecs_to_usecs(unsigned long nsecs)
185{
186 return nsecs / 1000;
187}
188
189/*
190 * trace_flag_type is an enumeration that holds different
191 * states when a trace occurs. These are:
192 * IRQS_OFF - interrupts were disabled
193 * NEED_RESCED - reschedule is requested
194 * HARDIRQ - inside an interrupt handler
195 * SOFTIRQ - inside a softirq handler
196 */
197enum trace_flag_type {
198 TRACE_FLAG_IRQS_OFF = 0x01,
199 TRACE_FLAG_NEED_RESCHED = 0x02,
200 TRACE_FLAG_HARDIRQ = 0x04,
201 TRACE_FLAG_SOFTIRQ = 0x08,
202};
203
204/*
205 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
206 * control the output of kernel symbols.
207 */
208#define TRACE_ITER_SYM_MASK \
209 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
210
211/* These must match the bit postions in trace_iterator_flags */
212static const char *trace_options[] = {
213 "print-parent",
214 "sym-offset",
215 "sym-addr",
216 "verbose",
217 "raw",
218 "hex",
219 "bin",
220 "block",
221 "stacktrace",
222 "sched-tree",
223 NULL
224};
225
226/*
227 * ftrace_max_lock is used to protect the swapping of buffers
228 * when taking a max snapshot. The buffers themselves are
229 * protected by per_cpu spinlocks. But the action of the swap
230 * needs its own lock.
231 *
232 * This is defined as a raw_spinlock_t in order to help
233 * with performance when lockdep debugging is enabled.
234 */
235static raw_spinlock_t ftrace_max_lock =
236 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
237
238/*
239 * Copy the new maximum trace into the separate maximum-trace
240 * structure. (this way the maximum trace is permanently saved,
241 * for later retrieval via /debugfs/tracing/latency_trace)
242 */
243static void
244__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
245{
246 struct trace_array_cpu *data = tr->data[cpu];
247
248 max_tr.cpu = cpu;
249 max_tr.time_start = data->preempt_timestamp;
250
251 data = max_tr.data[cpu];
252 data->saved_latency = tracing_max_latency;
253
254 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
255 data->pid = tsk->pid;
256 data->uid = tsk->uid;
257 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
258 data->policy = tsk->policy;
259 data->rt_priority = tsk->rt_priority;
260
261 /* record this tasks comm */
262 tracing_record_cmdline(current);
263}
264
265#define CHECK_COND(cond) \
266 if (unlikely(cond)) { \
267 tracing_disabled = 1; \
268 WARN_ON(1); \
269 return -1; \
270 }
271
272/**
273 * check_pages - integrity check of trace buffers
274 *
275 * As a safty measure we check to make sure the data pages have not
276 * been corrupted.
277 */
278int check_pages(struct trace_array_cpu *data)
279{
280 struct page *page, *tmp;
281
282 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
283 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
284
285 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
286 CHECK_COND(page->lru.next->prev != &page->lru);
287 CHECK_COND(page->lru.prev->next != &page->lru);
288 }
289
290 return 0;
291}
292
293/**
294 * head_page - page address of the first page in per_cpu buffer.
295 *
296 * head_page returns the page address of the first page in
297 * a per_cpu buffer. This also preforms various consistency
298 * checks to make sure the buffer has not been corrupted.
299 */
300void *head_page(struct trace_array_cpu *data)
301{
302 struct page *page;
303
304 if (list_empty(&data->trace_pages))
305 return NULL;
306
307 page = list_entry(data->trace_pages.next, struct page, lru);
308 BUG_ON(&page->lru == &data->trace_pages);
309
310 return page_address(page);
311}
312
313/**
314 * trace_seq_printf - sequence printing of trace information
315 * @s: trace sequence descriptor
316 * @fmt: printf format string
317 *
318 * The tracer may use either sequence operations or its own
319 * copy to user routines. To simplify formating of a trace
320 * trace_seq_printf is used to store strings into a special
321 * buffer (@s). Then the output may be either used by
322 * the sequencer or pulled into another buffer.
323 */
324int
325trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
326{
327 int len = (PAGE_SIZE - 1) - s->len;
328 va_list ap;
329 int ret;
330
331 if (!len)
332 return 0;
333
334 va_start(ap, fmt);
335 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
336 va_end(ap);
337
338 /* If we can't write it all, don't bother writing anything */
339 if (ret >= len)
340 return 0;
341
342 s->len += ret;
343
344 return len;
345}
346
347/**
348 * trace_seq_puts - trace sequence printing of simple string
349 * @s: trace sequence descriptor
350 * @str: simple string to record
351 *
352 * The tracer may use either the sequence operations or its own
353 * copy to user routines. This function records a simple string
354 * into a special buffer (@s) for later retrieval by a sequencer
355 * or other mechanism.
356 */
357static int
358trace_seq_puts(struct trace_seq *s, const char *str)
359{
360 int len = strlen(str);
361
362 if (len > ((PAGE_SIZE - 1) - s->len))
363 return 0;
364
365 memcpy(s->buffer + s->len, str, len);
366 s->len += len;
367
368 return len;
369}
370
371static int
372trace_seq_putc(struct trace_seq *s, unsigned char c)
373{
374 if (s->len >= (PAGE_SIZE - 1))
375 return 0;
376
377 s->buffer[s->len++] = c;
378
379 return 1;
380}
381
382static int
383trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
384{
385 if (len > ((PAGE_SIZE - 1) - s->len))
386 return 0;
387
388 memcpy(s->buffer + s->len, mem, len);
389 s->len += len;
390
391 return len;
392}
393
394#define HEX_CHARS 17
395static const char hex2asc[] = "0123456789abcdef";
396
397static int
398trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
399{
400 unsigned char hex[HEX_CHARS];
401 unsigned char *data = mem;
402 unsigned char byte;
403 int i, j;
404
405 BUG_ON(len >= HEX_CHARS);
406
407#ifdef __BIG_ENDIAN
408 for (i = 0, j = 0; i < len; i++) {
409#else
410 for (i = len-1, j = 0; i >= 0; i--) {
411#endif
412 byte = data[i];
413
414 hex[j++] = hex2asc[byte & 0x0f];
415 hex[j++] = hex2asc[byte >> 4];
416 }
417 hex[j++] = ' ';
418
419 return trace_seq_putmem(s, hex, j);
420}
421
422static void
423trace_seq_reset(struct trace_seq *s)
424{
425 s->len = 0;
426 s->readpos = 0;
427}
428
429ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
430{
431 int len;
432 int ret;
433
434 if (s->len <= s->readpos)
435 return -EBUSY;
436
437 len = s->len - s->readpos;
438 if (cnt > len)
439 cnt = len;
440 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
441 if (ret)
442 return -EFAULT;
443
444 s->readpos += len;
445 return cnt;
446}
447
448static void
449trace_print_seq(struct seq_file *m, struct trace_seq *s)
450{
451 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
452
453 s->buffer[len] = 0;
454 seq_puts(m, s->buffer);
455
456 trace_seq_reset(s);
457}
458
459/*
460 * flip the trace buffers between two trace descriptors.
461 * This usually is the buffers between the global_trace and
462 * the max_tr to record a snapshot of a current trace.
463 *
464 * The ftrace_max_lock must be held.
465 */
466static void
467flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
468{
469 struct list_head flip_pages;
470
471 INIT_LIST_HEAD(&flip_pages);
472
473 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
474 sizeof(struct trace_array_cpu) -
475 offsetof(struct trace_array_cpu, trace_head_idx));
476
477 check_pages(tr1);
478 check_pages(tr2);
479 list_splice_init(&tr1->trace_pages, &flip_pages);
480 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
481 list_splice_init(&flip_pages, &tr2->trace_pages);
482 BUG_ON(!list_empty(&flip_pages));
483 check_pages(tr1);
484 check_pages(tr2);
485}
486
487/**
488 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
489 * @tr: tracer
490 * @tsk: the task with the latency
491 * @cpu: The cpu that initiated the trace.
492 *
493 * Flip the buffers between the @tr and the max_tr and record information
494 * about which task was the cause of this latency.
495 */
496void
497update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
498{
499 struct trace_array_cpu *data;
500 int i;
501
502 WARN_ON_ONCE(!irqs_disabled());
503 __raw_spin_lock(&ftrace_max_lock);
504 /* clear out all the previous traces */
505 for_each_tracing_cpu(i) {
506 data = tr->data[i];
507 flip_trace(max_tr.data[i], data);
508 tracing_reset(data);
509 }
510
511 __update_max_tr(tr, tsk, cpu);
512 __raw_spin_unlock(&ftrace_max_lock);
513}
514
515/**
516 * update_max_tr_single - only copy one trace over, and reset the rest
517 * @tr - tracer
518 * @tsk - task with the latency
519 * @cpu - the cpu of the buffer to copy.
520 *
521 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
522 */
523void
524update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
525{
526 struct trace_array_cpu *data = tr->data[cpu];
527 int i;
528
529 WARN_ON_ONCE(!irqs_disabled());
530 __raw_spin_lock(&ftrace_max_lock);
531 for_each_tracing_cpu(i)
532 tracing_reset(max_tr.data[i]);
533
534 flip_trace(max_tr.data[cpu], data);
535 tracing_reset(data);
536
537 __update_max_tr(tr, tsk, cpu);
538 __raw_spin_unlock(&ftrace_max_lock);
539}
540
541/**
542 * register_tracer - register a tracer with the ftrace system.
543 * @type - the plugin for the tracer
544 *
545 * Register a new plugin tracer.
546 */
547int register_tracer(struct tracer *type)
548{
549 struct tracer *t;
550 int len;
551 int ret = 0;
552
553 if (!type->name) {
554 pr_info("Tracer must have a name\n");
555 return -1;
556 }
557
558 mutex_lock(&trace_types_lock);
559 for (t = trace_types; t; t = t->next) {
560 if (strcmp(type->name, t->name) == 0) {
561 /* already found */
562 pr_info("Trace %s already registered\n",
563 type->name);
564 ret = -1;
565 goto out;
566 }
567 }
568
569#ifdef CONFIG_FTRACE_STARTUP_TEST
570 if (type->selftest) {
571 struct tracer *saved_tracer = current_trace;
572 struct trace_array_cpu *data;
573 struct trace_array *tr = &global_trace;
574 int saved_ctrl = tr->ctrl;
575 int i;
576 /*
577 * Run a selftest on this tracer.
578 * Here we reset the trace buffer, and set the current
579 * tracer to be this tracer. The tracer can then run some
580 * internal tracing to verify that everything is in order.
581 * If we fail, we do not register this tracer.
582 */
583 for_each_tracing_cpu(i) {
584 data = tr->data[i];
585 if (!head_page(data))
586 continue;
587 tracing_reset(data);
588 }
589 current_trace = type;
590 tr->ctrl = 0;
591 /* the test is responsible for initializing and enabling */
592 pr_info("Testing tracer %s: ", type->name);
593 ret = type->selftest(type, tr);
594 /* the test is responsible for resetting too */
595 current_trace = saved_tracer;
596 tr->ctrl = saved_ctrl;
597 if (ret) {
598 printk(KERN_CONT "FAILED!\n");
599 goto out;
600 }
601 /* Only reset on passing, to avoid touching corrupted buffers */
602 for_each_tracing_cpu(i) {
603 data = tr->data[i];
604 if (!head_page(data))
605 continue;
606 tracing_reset(data);
607 }
608 printk(KERN_CONT "PASSED\n");
609 }
610#endif
611
612 type->next = trace_types;
613 trace_types = type;
614 len = strlen(type->name);
615 if (len > max_tracer_type_len)
616 max_tracer_type_len = len;
617
618 out:
619 mutex_unlock(&trace_types_lock);
620
621 return ret;
622}
623
624void unregister_tracer(struct tracer *type)
625{
626 struct tracer **t;
627 int len;
628
629 mutex_lock(&trace_types_lock);
630 for (t = &trace_types; *t; t = &(*t)->next) {
631 if (*t == type)
632 goto found;
633 }
634 pr_info("Trace %s not registered\n", type->name);
635 goto out;
636
637 found:
638 *t = (*t)->next;
639 if (strlen(type->name) != max_tracer_type_len)
640 goto out;
641
642 max_tracer_type_len = 0;
643 for (t = &trace_types; *t; t = &(*t)->next) {
644 len = strlen((*t)->name);
645 if (len > max_tracer_type_len)
646 max_tracer_type_len = len;
647 }
648 out:
649 mutex_unlock(&trace_types_lock);
650}
651
652void tracing_reset(struct trace_array_cpu *data)
653{
654 data->trace_idx = 0;
655 data->overrun = 0;
656 data->trace_head = data->trace_tail = head_page(data);
657 data->trace_head_idx = 0;
658 data->trace_tail_idx = 0;
659}
660
661#define SAVED_CMDLINES 128
662static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
663static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
664static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
665static int cmdline_idx;
666static DEFINE_SPINLOCK(trace_cmdline_lock);
667
668/* temporary disable recording */
669atomic_t trace_record_cmdline_disabled __read_mostly;
670
671static void trace_init_cmdlines(void)
672{
673 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
674 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
675 cmdline_idx = 0;
676}
677
678void trace_stop_cmdline_recording(void);
679
680static void trace_save_cmdline(struct task_struct *tsk)
681{
682 unsigned map;
683 unsigned idx;
684
685 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
686 return;
687
688 /*
689 * It's not the end of the world if we don't get
690 * the lock, but we also don't want to spin
691 * nor do we want to disable interrupts,
692 * so if we miss here, then better luck next time.
693 */
694 if (!spin_trylock(&trace_cmdline_lock))
695 return;
696
697 idx = map_pid_to_cmdline[tsk->pid];
698 if (idx >= SAVED_CMDLINES) {
699 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
700
701 map = map_cmdline_to_pid[idx];
702 if (map <= PID_MAX_DEFAULT)
703 map_pid_to_cmdline[map] = (unsigned)-1;
704
705 map_pid_to_cmdline[tsk->pid] = idx;
706
707 cmdline_idx = idx;
708 }
709
710 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
711
712 spin_unlock(&trace_cmdline_lock);
713}
714
715static char *trace_find_cmdline(int pid)
716{
717 char *cmdline = "<...>";
718 unsigned map;
719
720 if (!pid)
721 return "<idle>";
722
723 if (pid > PID_MAX_DEFAULT)
724 goto out;
725
726 map = map_pid_to_cmdline[pid];
727 if (map >= SAVED_CMDLINES)
728 goto out;
729
730 cmdline = saved_cmdlines[map];
731
732 out:
733 return cmdline;
734}
735
736void tracing_record_cmdline(struct task_struct *tsk)
737{
738 if (atomic_read(&trace_record_cmdline_disabled))
739 return;
740
741 trace_save_cmdline(tsk);
742}
743
744static inline struct list_head *
745trace_next_list(struct trace_array_cpu *data, struct list_head *next)
746{
747 /*
748 * Roundrobin - but skip the head (which is not a real page):
749 */
750 next = next->next;
751 if (unlikely(next == &data->trace_pages))
752 next = next->next;
753 BUG_ON(next == &data->trace_pages);
754
755 return next;
756}
757
758static inline void *
759trace_next_page(struct trace_array_cpu *data, void *addr)
760{
761 struct list_head *next;
762 struct page *page;
763
764 page = virt_to_page(addr);
765
766 next = trace_next_list(data, &page->lru);
767 page = list_entry(next, struct page, lru);
768
769 return page_address(page);
770}
771
772static inline struct trace_entry *
773tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
774{
775 unsigned long idx, idx_next;
776 struct trace_entry *entry;
777
778 data->trace_idx++;
779 idx = data->trace_head_idx;
780 idx_next = idx + 1;
781
782 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
783
784 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
785
786 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
787 data->trace_head = trace_next_page(data, data->trace_head);
788 idx_next = 0;
789 }
790
791 if (data->trace_head == data->trace_tail &&
792 idx_next == data->trace_tail_idx) {
793 /* overrun */
794 data->overrun++;
795 data->trace_tail_idx++;
796 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
797 data->trace_tail =
798 trace_next_page(data, data->trace_tail);
799 data->trace_tail_idx = 0;
800 }
801 }
802
803 data->trace_head_idx = idx_next;
804
805 return entry;
806}
807
808static inline void
809tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
810{
811 struct task_struct *tsk = current;
812 unsigned long pc;
813
814 pc = preempt_count();
815
816 entry->preempt_count = pc & 0xff;
817 entry->pid = (tsk) ? tsk->pid : 0;
818 entry->t = ftrace_now(raw_smp_processor_id());
819 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
820 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
821 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
822 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
823}
824
825void
826trace_function(struct trace_array *tr, struct trace_array_cpu *data,
827 unsigned long ip, unsigned long parent_ip, unsigned long flags)
828{
829 struct trace_entry *entry;
830 unsigned long irq_flags;
831
832 raw_local_irq_save(irq_flags);
833 __raw_spin_lock(&data->lock);
834 entry = tracing_get_trace_entry(tr, data);
835 tracing_generic_entry_update(entry, flags);
836 entry->type = TRACE_FN;
837 entry->fn.ip = ip;
838 entry->fn.parent_ip = parent_ip;
839 __raw_spin_unlock(&data->lock);
840 raw_local_irq_restore(irq_flags);
841}
842
843void
844ftrace(struct trace_array *tr, struct trace_array_cpu *data,
845 unsigned long ip, unsigned long parent_ip, unsigned long flags)
846{
847 if (likely(!atomic_read(&data->disabled)))
848 trace_function(tr, data, ip, parent_ip, flags);
849}
850
851void __trace_stack(struct trace_array *tr,
852 struct trace_array_cpu *data,
853 unsigned long flags,
854 int skip)
855{
856 struct trace_entry *entry;
857 struct stack_trace trace;
858
859 if (!(trace_flags & TRACE_ITER_STACKTRACE))
860 return;
861
862 entry = tracing_get_trace_entry(tr, data);
863 tracing_generic_entry_update(entry, flags);
864 entry->type = TRACE_STACK;
865
866 memset(&entry->stack, 0, sizeof(entry->stack));
867
868 trace.nr_entries = 0;
869 trace.max_entries = FTRACE_STACK_ENTRIES;
870 trace.skip = skip;
871 trace.entries = entry->stack.caller;
872
873 save_stack_trace(&trace);
874}
875
876void
877__trace_special(void *__tr, void *__data,
878 unsigned long arg1, unsigned long arg2, unsigned long arg3)
879{
880 struct trace_array_cpu *data = __data;
881 struct trace_array *tr = __tr;
882 struct trace_entry *entry;
883 unsigned long irq_flags;
884
885 raw_local_irq_save(irq_flags);
886 __raw_spin_lock(&data->lock);
887 entry = tracing_get_trace_entry(tr, data);
888 tracing_generic_entry_update(entry, 0);
889 entry->type = TRACE_SPECIAL;
890 entry->special.arg1 = arg1;
891 entry->special.arg2 = arg2;
892 entry->special.arg3 = arg3;
893 __trace_stack(tr, data, irq_flags, 4);
894 __raw_spin_unlock(&data->lock);
895 raw_local_irq_restore(irq_flags);
896
897 trace_wake_up();
898}
899
900void
901tracing_sched_switch_trace(struct trace_array *tr,
902 struct trace_array_cpu *data,
903 struct task_struct *prev,
904 struct task_struct *next,
905 unsigned long flags)
906{
907 struct trace_entry *entry;
908 unsigned long irq_flags;
909
910 raw_local_irq_save(irq_flags);
911 __raw_spin_lock(&data->lock);
912 entry = tracing_get_trace_entry(tr, data);
913 tracing_generic_entry_update(entry, flags);
914 entry->type = TRACE_CTX;
915 entry->ctx.prev_pid = prev->pid;
916 entry->ctx.prev_prio = prev->prio;
917 entry->ctx.prev_state = prev->state;
918 entry->ctx.next_pid = next->pid;
919 entry->ctx.next_prio = next->prio;
920 entry->ctx.next_state = next->state;
921 __trace_stack(tr, data, flags, 5);
922 __raw_spin_unlock(&data->lock);
923 raw_local_irq_restore(irq_flags);
924}
925
926void
927tracing_sched_wakeup_trace(struct trace_array *tr,
928 struct trace_array_cpu *data,
929 struct task_struct *wakee,
930 struct task_struct *curr,
931 unsigned long flags)
932{
933 struct trace_entry *entry;
934 unsigned long irq_flags;
935
936 raw_local_irq_save(irq_flags);
937 __raw_spin_lock(&data->lock);
938 entry = tracing_get_trace_entry(tr, data);
939 tracing_generic_entry_update(entry, flags);
940 entry->type = TRACE_WAKE;
941 entry->ctx.prev_pid = curr->pid;
942 entry->ctx.prev_prio = curr->prio;
943 entry->ctx.prev_state = curr->state;
944 entry->ctx.next_pid = wakee->pid;
945 entry->ctx.next_prio = wakee->prio;
946 entry->ctx.next_state = wakee->state;
947 __trace_stack(tr, data, flags, 6);
948 __raw_spin_unlock(&data->lock);
949 raw_local_irq_restore(irq_flags);
950
951 trace_wake_up();
952}
953
954void
955ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
956{
957 struct trace_array *tr = &global_trace;
958 struct trace_array_cpu *data;
959 unsigned long flags;
960 long disabled;
961 int cpu;
962
963 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
964 return;
965
966 local_irq_save(flags);
967 cpu = raw_smp_processor_id();
968 data = tr->data[cpu];
969 disabled = atomic_inc_return(&data->disabled);
970
971 if (likely(disabled == 1))
972 __trace_special(tr, data, arg1, arg2, arg3);
973
974 atomic_dec(&data->disabled);
975 local_irq_restore(flags);
976}
977
978#ifdef CONFIG_FTRACE
979static void
980function_trace_call(unsigned long ip, unsigned long parent_ip)
981{
982 struct trace_array *tr = &global_trace;
983 struct trace_array_cpu *data;
984 unsigned long flags;
985 long disabled;
986 int cpu;
987
988 if (unlikely(!tracer_enabled))
989 return;
990
991 if (skip_trace(ip))
992 return;
993
994 local_irq_save(flags);
995 cpu = raw_smp_processor_id();
996 data = tr->data[cpu];
997 disabled = atomic_inc_return(&data->disabled);
998
999 if (likely(disabled == 1))
1000 trace_function(tr, data, ip, parent_ip, flags);
1001
1002 atomic_dec(&data->disabled);
1003 local_irq_restore(flags);
1004}
1005
1006static struct ftrace_ops trace_ops __read_mostly =
1007{
1008 .func = function_trace_call,
1009};
1010
1011void tracing_start_function_trace(void)
1012{
1013 register_ftrace_function(&trace_ops);
1014}
1015
1016void tracing_stop_function_trace(void)
1017{
1018 unregister_ftrace_function(&trace_ops);
1019}
1020#endif
1021
1022enum trace_file_type {
1023 TRACE_FILE_LAT_FMT = 1,
1024};
1025
1026static struct trace_entry *
1027trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1028 struct trace_iterator *iter, int cpu)
1029{
1030 struct page *page;
1031 struct trace_entry *array;
1032
1033 if (iter->next_idx[cpu] >= tr->entries ||
1034 iter->next_idx[cpu] >= data->trace_idx ||
1035 (data->trace_head == data->trace_tail &&
1036 data->trace_head_idx == data->trace_tail_idx))
1037 return NULL;
1038
1039 if (!iter->next_page[cpu]) {
1040 /* Initialize the iterator for this cpu trace buffer */
1041 WARN_ON(!data->trace_tail);
1042 page = virt_to_page(data->trace_tail);
1043 iter->next_page[cpu] = &page->lru;
1044 iter->next_page_idx[cpu] = data->trace_tail_idx;
1045 }
1046
1047 page = list_entry(iter->next_page[cpu], struct page, lru);
1048 BUG_ON(&data->trace_pages == &page->lru);
1049
1050 array = page_address(page);
1051
1052 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1053 return &array[iter->next_page_idx[cpu]];
1054}
1055
1056static struct trace_entry *
1057find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1058{
1059 struct trace_array *tr = iter->tr;
1060 struct trace_entry *ent, *next = NULL;
1061 int next_cpu = -1;
1062 int cpu;
1063
1064 for_each_tracing_cpu(cpu) {
1065 if (!head_page(tr->data[cpu]))
1066 continue;
1067 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1068 /*
1069 * Pick the entry with the smallest timestamp:
1070 */
1071 if (ent && (!next || ent->t < next->t)) {
1072 next = ent;
1073 next_cpu = cpu;
1074 }
1075 }
1076
1077 if (ent_cpu)
1078 *ent_cpu = next_cpu;
1079
1080 return next;
1081}
1082
1083static void trace_iterator_increment(struct trace_iterator *iter)
1084{
1085 iter->idx++;
1086 iter->next_idx[iter->cpu]++;
1087 iter->next_page_idx[iter->cpu]++;
1088
1089 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1090 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1091
1092 iter->next_page_idx[iter->cpu] = 0;
1093 iter->next_page[iter->cpu] =
1094 trace_next_list(data, iter->next_page[iter->cpu]);
1095 }
1096}
1097
1098static void trace_consume(struct trace_iterator *iter)
1099{
1100 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1101
1102 data->trace_tail_idx++;
1103 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1104 data->trace_tail = trace_next_page(data, data->trace_tail);
1105 data->trace_tail_idx = 0;
1106 }
1107
1108 /* Check if we empty it, then reset the index */
1109 if (data->trace_head == data->trace_tail &&
1110 data->trace_head_idx == data->trace_tail_idx)
1111 data->trace_idx = 0;
1112}
1113
1114static void *find_next_entry_inc(struct trace_iterator *iter)
1115{
1116 struct trace_entry *next;
1117 int next_cpu = -1;
1118
1119 next = find_next_entry(iter, &next_cpu);
1120
1121 iter->prev_ent = iter->ent;
1122 iter->prev_cpu = iter->cpu;
1123
1124 iter->ent = next;
1125 iter->cpu = next_cpu;
1126
1127 if (next)
1128 trace_iterator_increment(iter);
1129
1130 return next ? iter : NULL;
1131}
1132
1133static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1134{
1135 struct trace_iterator *iter = m->private;
1136 void *last_ent = iter->ent;
1137 int i = (int)*pos;
1138 void *ent;
1139
1140 (*pos)++;
1141
1142 /* can't go backwards */
1143 if (iter->idx > i)
1144 return NULL;
1145
1146 if (iter->idx < 0)
1147 ent = find_next_entry_inc(iter);
1148 else
1149 ent = iter;
1150
1151 while (ent && iter->idx < i)
1152 ent = find_next_entry_inc(iter);
1153
1154 iter->pos = *pos;
1155
1156 if (last_ent && !ent)
1157 seq_puts(m, "\n\nvim:ft=help\n");
1158
1159 return ent;
1160}
1161
1162static void *s_start(struct seq_file *m, loff_t *pos)
1163{
1164 struct trace_iterator *iter = m->private;
1165 void *p = NULL;
1166 loff_t l = 0;
1167 int i;
1168
1169 mutex_lock(&trace_types_lock);
1170
1171 if (!current_trace || current_trace != iter->trace) {
1172 mutex_unlock(&trace_types_lock);
1173 return NULL;
1174 }
1175
1176 atomic_inc(&trace_record_cmdline_disabled);
1177
1178 /* let the tracer grab locks here if needed */
1179 if (current_trace->start)
1180 current_trace->start(iter);
1181
1182 if (*pos != iter->pos) {
1183 iter->ent = NULL;
1184 iter->cpu = 0;
1185 iter->idx = -1;
1186 iter->prev_ent = NULL;
1187 iter->prev_cpu = -1;
1188
1189 for_each_tracing_cpu(i) {
1190 iter->next_idx[i] = 0;
1191 iter->next_page[i] = NULL;
1192 }
1193
1194 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1195 ;
1196
1197 } else {
1198 l = *pos - 1;
1199 p = s_next(m, p, &l);
1200 }
1201
1202 return p;
1203}
1204
1205static void s_stop(struct seq_file *m, void *p)
1206{
1207 struct trace_iterator *iter = m->private;
1208
1209 atomic_dec(&trace_record_cmdline_disabled);
1210
1211 /* let the tracer release locks here if needed */
1212 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1213 iter->trace->stop(iter);
1214
1215 mutex_unlock(&trace_types_lock);
1216}
1217
1218#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1219
1220#ifdef CONFIG_KRETPROBES
1221static inline int kretprobed(unsigned long addr)
1222{
1223 return addr == (unsigned long)kretprobe_trampoline;
1224}
1225#else
1226static inline int kretprobed(unsigned long addr)
1227{
1228 return 0;
1229}
1230#endif /* CONFIG_KRETPROBES */
1231
1232static int
1233seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1234{
1235#ifdef CONFIG_KALLSYMS
1236 char str[KSYM_SYMBOL_LEN];
1237
1238 kallsyms_lookup(address, NULL, NULL, NULL, str);
1239
1240 return trace_seq_printf(s, fmt, str);
1241#endif
1242 return 1;
1243}
1244
1245static int
1246seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1247 unsigned long address)
1248{
1249#ifdef CONFIG_KALLSYMS
1250 char str[KSYM_SYMBOL_LEN];
1251
1252 sprint_symbol(str, address);
1253 return trace_seq_printf(s, fmt, str);
1254#endif
1255 return 1;
1256}
1257
1258#ifndef CONFIG_64BIT
1259# define IP_FMT "%08lx"
1260#else
1261# define IP_FMT "%016lx"
1262#endif
1263
1264static int
1265seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1266{
1267 int ret;
1268
1269 if (!ip)
1270 return trace_seq_printf(s, "0");
1271
1272 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1273 ret = seq_print_sym_offset(s, "%s", ip);
1274 else
1275 ret = seq_print_sym_short(s, "%s", ip);
1276
1277 if (!ret)
1278 return 0;
1279
1280 if (sym_flags & TRACE_ITER_SYM_ADDR)
1281 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1282 return ret;
1283}
1284
1285static void print_lat_help_header(struct seq_file *m)
1286{
1287 seq_puts(m, "# _------=> CPU# \n");
1288 seq_puts(m, "# / _-----=> irqs-off \n");
1289 seq_puts(m, "# | / _----=> need-resched \n");
1290 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1291 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1292 seq_puts(m, "# |||| / \n");
1293 seq_puts(m, "# ||||| delay \n");
1294 seq_puts(m, "# cmd pid ||||| time | caller \n");
1295 seq_puts(m, "# \\ / ||||| \\ | / \n");
1296}
1297
1298static void print_func_help_header(struct seq_file *m)
1299{
1300 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1301 seq_puts(m, "# | | | | |\n");
1302}
1303
1304
1305static void
1306print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1307{
1308 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1309 struct trace_array *tr = iter->tr;
1310 struct trace_array_cpu *data = tr->data[tr->cpu];
1311 struct tracer *type = current_trace;
1312 unsigned long total = 0;
1313 unsigned long entries = 0;
1314 int cpu;
1315 const char *name = "preemption";
1316
1317 if (type)
1318 name = type->name;
1319
1320 for_each_tracing_cpu(cpu) {
1321 if (head_page(tr->data[cpu])) {
1322 total += tr->data[cpu]->trace_idx;
1323 if (tr->data[cpu]->trace_idx > tr->entries)
1324 entries += tr->entries;
1325 else
1326 entries += tr->data[cpu]->trace_idx;
1327 }
1328 }
1329
1330 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1331 name, UTS_RELEASE);
1332 seq_puts(m, "-----------------------------------"
1333 "---------------------------------\n");
1334 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1335 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1336 nsecs_to_usecs(data->saved_latency),
1337 entries,
1338 total,
1339 tr->cpu,
1340#if defined(CONFIG_PREEMPT_NONE)
1341 "server",
1342#elif defined(CONFIG_PREEMPT_VOLUNTARY)
1343 "desktop",
1344#elif defined(CONFIG_PREEMPT_DESKTOP)
1345 "preempt",
1346#else
1347 "unknown",
1348#endif
1349 /* These are reserved for later use */
1350 0, 0, 0, 0);
1351#ifdef CONFIG_SMP
1352 seq_printf(m, " #P:%d)\n", num_online_cpus());
1353#else
1354 seq_puts(m, ")\n");
1355#endif
1356 seq_puts(m, " -----------------\n");
1357 seq_printf(m, " | task: %.16s-%d "
1358 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1359 data->comm, data->pid, data->uid, data->nice,
1360 data->policy, data->rt_priority);
1361 seq_puts(m, " -----------------\n");
1362
1363 if (data->critical_start) {
1364 seq_puts(m, " => started at: ");
1365 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1366 trace_print_seq(m, &iter->seq);
1367 seq_puts(m, "\n => ended at: ");
1368 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1369 trace_print_seq(m, &iter->seq);
1370 seq_puts(m, "\n");
1371 }
1372
1373 seq_puts(m, "\n");
1374}
1375
1376static void
1377lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1378{
1379 int hardirq, softirq;
1380 char *comm;
1381
1382 comm = trace_find_cmdline(entry->pid);
1383
1384 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1385 trace_seq_printf(s, "%d", cpu);
1386 trace_seq_printf(s, "%c%c",
1387 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1388 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1389
1390 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1391 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1392 if (hardirq && softirq) {
1393 trace_seq_putc(s, 'H');
1394 } else {
1395 if (hardirq) {
1396 trace_seq_putc(s, 'h');
1397 } else {
1398 if (softirq)
1399 trace_seq_putc(s, 's');
1400 else
1401 trace_seq_putc(s, '.');
1402 }
1403 }
1404
1405 if (entry->preempt_count)
1406 trace_seq_printf(s, "%x", entry->preempt_count);
1407 else
1408 trace_seq_puts(s, ".");
1409}
1410
1411unsigned long preempt_mark_thresh = 100;
1412
1413static void
1414lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1415 unsigned long rel_usecs)
1416{
1417 trace_seq_printf(s, " %4lldus", abs_usecs);
1418 if (rel_usecs > preempt_mark_thresh)
1419 trace_seq_puts(s, "!: ");
1420 else if (rel_usecs > 1)
1421 trace_seq_puts(s, "+: ");
1422 else
1423 trace_seq_puts(s, " : ");
1424}
1425
1426static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1427
1428static int
1429print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1430{
1431 struct trace_seq *s = &iter->seq;
1432 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1433 struct trace_entry *next_entry = find_next_entry(iter, NULL);
1434 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1435 struct trace_entry *entry = iter->ent;
1436 unsigned long abs_usecs;
1437 unsigned long rel_usecs;
1438 char *comm;
1439 int S, T;
1440 int i;
1441 unsigned state;
1442
1443 if (!next_entry)
1444 next_entry = entry;
1445 rel_usecs = ns2usecs(next_entry->t - entry->t);
1446 abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1447
1448 if (verbose) {
1449 comm = trace_find_cmdline(entry->pid);
1450 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1451 " %ld.%03ldms (+%ld.%03ldms): ",
1452 comm,
1453 entry->pid, cpu, entry->flags,
1454 entry->preempt_count, trace_idx,
1455 ns2usecs(entry->t),
1456 abs_usecs/1000,
1457 abs_usecs % 1000, rel_usecs/1000,
1458 rel_usecs % 1000);
1459 } else {
1460 lat_print_generic(s, entry, cpu);
1461 lat_print_timestamp(s, abs_usecs, rel_usecs);
1462 }
1463 switch (entry->type) {
1464 case TRACE_FN:
1465 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1466 trace_seq_puts(s, " (");
1467 if (kretprobed(entry->fn.parent_ip))
1468 trace_seq_puts(s, KRETPROBE_MSG);
1469 else
1470 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1471 trace_seq_puts(s, ")\n");
1472 break;
1473 case TRACE_CTX:
1474 case TRACE_WAKE:
1475 T = entry->ctx.next_state < sizeof(state_to_char) ?
1476 state_to_char[entry->ctx.next_state] : 'X';
1477
1478 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1479 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1480 comm = trace_find_cmdline(entry->ctx.next_pid);
1481 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1482 entry->ctx.prev_pid,
1483 entry->ctx.prev_prio,
1484 S, entry->type == TRACE_CTX ? "==>" : " +",
1485 entry->ctx.next_pid,
1486 entry->ctx.next_prio,
1487 T, comm);
1488 break;
1489 case TRACE_SPECIAL:
1490 trace_seq_printf(s, "# %ld %ld %ld\n",
1491 entry->special.arg1,
1492 entry->special.arg2,
1493 entry->special.arg3);
1494 break;
1495 case TRACE_STACK:
1496 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1497 if (i)
1498 trace_seq_puts(s, " <= ");
1499 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1500 }
1501 trace_seq_puts(s, "\n");
1502 break;
1503 default:
1504 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1505 }
1506 return 1;
1507}
1508
1509static int print_trace_fmt(struct trace_iterator *iter)
1510{
1511 struct trace_seq *s = &iter->seq;
1512 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1513 struct trace_entry *entry;
1514 unsigned long usec_rem;
1515 unsigned long long t;
1516 unsigned long secs;
1517 char *comm;
1518 int ret;
1519 int S, T;
1520 int i;
1521
1522 entry = iter->ent;
1523
1524 comm = trace_find_cmdline(iter->ent->pid);
1525
1526 t = ns2usecs(entry->t);
1527 usec_rem = do_div(t, 1000000ULL);
1528 secs = (unsigned long)t;
1529
1530 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1531 if (!ret)
1532 return 0;
1533 ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1534 if (!ret)
1535 return 0;
1536 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1537 if (!ret)
1538 return 0;
1539
1540 switch (entry->type) {
1541 case TRACE_FN:
1542 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1543 if (!ret)
1544 return 0;
1545 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1546 entry->fn.parent_ip) {
1547 ret = trace_seq_printf(s, " <-");
1548 if (!ret)
1549 return 0;
1550 if (kretprobed(entry->fn.parent_ip))
1551 ret = trace_seq_puts(s, KRETPROBE_MSG);
1552 else
1553 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1554 sym_flags);
1555 if (!ret)
1556 return 0;
1557 }
1558 ret = trace_seq_printf(s, "\n");
1559 if (!ret)
1560 return 0;
1561 break;
1562 case TRACE_CTX:
1563 case TRACE_WAKE:
1564 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1565 state_to_char[entry->ctx.prev_state] : 'X';
1566 T = entry->ctx.next_state < sizeof(state_to_char) ?
1567 state_to_char[entry->ctx.next_state] : 'X';
1568 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1569 entry->ctx.prev_pid,
1570 entry->ctx.prev_prio,
1571 S,
1572 entry->type == TRACE_CTX ? "==>" : " +",
1573 entry->ctx.next_pid,
1574 entry->ctx.next_prio,
1575 T);
1576 if (!ret)
1577 return 0;
1578 break;
1579 case TRACE_SPECIAL:
1580 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1581 entry->special.arg1,
1582 entry->special.arg2,
1583 entry->special.arg3);
1584 if (!ret)
1585 return 0;
1586 break;
1587 case TRACE_STACK:
1588 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1589 if (i) {
1590 ret = trace_seq_puts(s, " <= ");
1591 if (!ret)
1592 return 0;
1593 }
1594 ret = seq_print_ip_sym(s, entry->stack.caller[i],
1595 sym_flags);
1596 if (!ret)
1597 return 0;
1598 }
1599 ret = trace_seq_puts(s, "\n");
1600 if (!ret)
1601 return 0;
1602 break;
1603 }
1604 return 1;
1605}
1606
1607static int print_raw_fmt(struct trace_iterator *iter)
1608{
1609 struct trace_seq *s = &iter->seq;
1610 struct trace_entry *entry;
1611 int ret;
1612 int S, T;
1613
1614 entry = iter->ent;
1615
1616 ret = trace_seq_printf(s, "%d %d %llu ",
1617 entry->pid, iter->cpu, entry->t);
1618 if (!ret)
1619 return 0;
1620
1621 switch (entry->type) {
1622 case TRACE_FN:
1623 ret = trace_seq_printf(s, "%x %x\n",
1624 entry->fn.ip, entry->fn.parent_ip);
1625 if (!ret)
1626 return 0;
1627 break;
1628 case TRACE_CTX:
1629 case TRACE_WAKE:
1630 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1631 state_to_char[entry->ctx.prev_state] : 'X';
1632 T = entry->ctx.next_state < sizeof(state_to_char) ?
1633 state_to_char[entry->ctx.next_state] : 'X';
1634 if (entry->type == TRACE_WAKE)
1635 S = '+';
1636 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1637 entry->ctx.prev_pid,
1638 entry->ctx.prev_prio,
1639 S,
1640 entry->ctx.next_pid,
1641 entry->ctx.next_prio,
1642 T);
1643 if (!ret)
1644 return 0;
1645 break;
1646 case TRACE_SPECIAL:
1647 case TRACE_STACK:
1648 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1649 entry->special.arg1,
1650 entry->special.arg2,
1651 entry->special.arg3);
1652 if (!ret)
1653 return 0;
1654 break;
1655 }
1656 return 1;
1657}
1658
1659#define SEQ_PUT_FIELD_RET(s, x) \
1660do { \
1661 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
1662 return 0; \
1663} while (0)
1664
1665#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1666do { \
1667 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1668 return 0; \
1669} while (0)
1670
1671static int print_hex_fmt(struct trace_iterator *iter)
1672{
1673 struct trace_seq *s = &iter->seq;
1674 unsigned char newline = '\n';
1675 struct trace_entry *entry;
1676 int S, T;
1677
1678 entry = iter->ent;
1679
1680 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1681 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1682 SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1683
1684 switch (entry->type) {
1685 case TRACE_FN:
1686 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1687 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1688 break;
1689 case TRACE_CTX:
1690 case TRACE_WAKE:
1691 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1692 state_to_char[entry->ctx.prev_state] : 'X';
1693 T = entry->ctx.next_state < sizeof(state_to_char) ?
1694 state_to_char[entry->ctx.next_state] : 'X';
1695 if (entry->type == TRACE_WAKE)
1696 S = '+';
1697 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1698 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1699 SEQ_PUT_HEX_FIELD_RET(s, S);
1700 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1701 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1702 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1703 SEQ_PUT_HEX_FIELD_RET(s, T);
1704 break;
1705 case TRACE_SPECIAL:
1706 case TRACE_STACK:
1707 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1708 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1709 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1710 break;
1711 }
1712 SEQ_PUT_FIELD_RET(s, newline);
1713
1714 return 1;
1715}
1716
1717static int print_bin_fmt(struct trace_iterator *iter)
1718{
1719 struct trace_seq *s = &iter->seq;
1720 struct trace_entry *entry;
1721
1722 entry = iter->ent;
1723
1724 SEQ_PUT_FIELD_RET(s, entry->pid);
1725 SEQ_PUT_FIELD_RET(s, entry->cpu);
1726 SEQ_PUT_FIELD_RET(s, entry->t);
1727
1728 switch (entry->type) {
1729 case TRACE_FN:
1730 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1731 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1732 break;
1733 case TRACE_CTX:
1734 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1735 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1736 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1737 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1738 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1739 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1740 break;
1741 case TRACE_SPECIAL:
1742 case TRACE_STACK:
1743 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1744 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1745 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1746 break;
1747 }
1748 return 1;
1749}
1750
1751static int trace_empty(struct trace_iterator *iter)
1752{
1753 struct trace_array_cpu *data;
1754 int cpu;
1755
1756 for_each_tracing_cpu(cpu) {
1757 data = iter->tr->data[cpu];
1758
1759 if (head_page(data) && data->trace_idx &&
1760 (data->trace_tail != data->trace_head ||
1761 data->trace_tail_idx != data->trace_head_idx))
1762 return 0;
1763 }
1764 return 1;
1765}
1766
1767static int print_trace_line(struct trace_iterator *iter)
1768{
1769 if (iter->trace && iter->trace->print_line)
1770 return iter->trace->print_line(iter);
1771
1772 if (trace_flags & TRACE_ITER_BIN)
1773 return print_bin_fmt(iter);
1774
1775 if (trace_flags & TRACE_ITER_HEX)
1776 return print_hex_fmt(iter);
1777
1778 if (trace_flags & TRACE_ITER_RAW)
1779 return print_raw_fmt(iter);
1780
1781 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1782 return print_lat_fmt(iter, iter->idx, iter->cpu);
1783
1784 return print_trace_fmt(iter);
1785}
1786
1787static int s_show(struct seq_file *m, void *v)
1788{
1789 struct trace_iterator *iter = v;
1790
1791 if (iter->ent == NULL) {
1792 if (iter->tr) {
1793 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1794 seq_puts(m, "#\n");
1795 }
1796 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1797 /* print nothing if the buffers are empty */
1798 if (trace_empty(iter))
1799 return 0;
1800 print_trace_header(m, iter);
1801 if (!(trace_flags & TRACE_ITER_VERBOSE))
1802 print_lat_help_header(m);
1803 } else {
1804 if (!(trace_flags & TRACE_ITER_VERBOSE))
1805 print_func_help_header(m);
1806 }
1807 } else {
1808 print_trace_line(iter);
1809 trace_print_seq(m, &iter->seq);
1810 }
1811
1812 return 0;
1813}
1814
1815static struct seq_operations tracer_seq_ops = {
1816 .start = s_start,
1817 .next = s_next,
1818 .stop = s_stop,
1819 .show = s_show,
1820};
1821
1822static struct trace_iterator *
1823__tracing_open(struct inode *inode, struct file *file, int *ret)
1824{
1825 struct trace_iterator *iter;
1826
1827 if (tracing_disabled) {
1828 *ret = -ENODEV;
1829 return NULL;
1830 }
1831
1832 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1833 if (!iter) {
1834 *ret = -ENOMEM;
1835 goto out;
1836 }
1837
1838 mutex_lock(&trace_types_lock);
1839 if (current_trace && current_trace->print_max)
1840 iter->tr = &max_tr;
1841 else
1842 iter->tr = inode->i_private;
1843 iter->trace = current_trace;
1844 iter->pos = -1;
1845
1846 /* TODO stop tracer */
1847 *ret = seq_open(file, &tracer_seq_ops);
1848 if (!*ret) {
1849 struct seq_file *m = file->private_data;
1850 m->private = iter;
1851
1852 /* stop the trace while dumping */
1853 if (iter->tr->ctrl)
1854 tracer_enabled = 0;
1855
1856 if (iter->trace && iter->trace->open)
1857 iter->trace->open(iter);
1858 } else {
1859 kfree(iter);
1860 iter = NULL;
1861 }
1862 mutex_unlock(&trace_types_lock);
1863
1864 out:
1865 return iter;
1866}
1867
1868int tracing_open_generic(struct inode *inode, struct file *filp)
1869{
1870 if (tracing_disabled)
1871 return -ENODEV;
1872
1873 filp->private_data = inode->i_private;
1874 return 0;
1875}
1876
1877int tracing_release(struct inode *inode, struct file *file)
1878{
1879 struct seq_file *m = (struct seq_file *)file->private_data;
1880 struct trace_iterator *iter = m->private;
1881
1882 mutex_lock(&trace_types_lock);
1883 if (iter->trace && iter->trace->close)
1884 iter->trace->close(iter);
1885
1886 /* reenable tracing if it was previously enabled */
1887 if (iter->tr->ctrl)
1888 tracer_enabled = 1;
1889 mutex_unlock(&trace_types_lock);
1890
1891 seq_release(inode, file);
1892 kfree(iter);
1893 return 0;
1894}
1895
1896static int tracing_open(struct inode *inode, struct file *file)
1897{
1898 int ret;
1899
1900 __tracing_open(inode, file, &ret);
1901
1902 return ret;
1903}
1904
1905static int tracing_lt_open(struct inode *inode, struct file *file)
1906{
1907 struct trace_iterator *iter;
1908 int ret;
1909
1910 iter = __tracing_open(inode, file, &ret);
1911
1912 if (!ret)
1913 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1914
1915 return ret;
1916}
1917
1918
1919static void *
1920t_next(struct seq_file *m, void *v, loff_t *pos)
1921{
1922 struct tracer *t = m->private;
1923
1924 (*pos)++;
1925
1926 if (t)
1927 t = t->next;
1928
1929 m->private = t;
1930
1931 return t;
1932}
1933
1934static void *t_start(struct seq_file *m, loff_t *pos)
1935{
1936 struct tracer *t = m->private;
1937 loff_t l = 0;
1938
1939 mutex_lock(&trace_types_lock);
1940 for (; t && l < *pos; t = t_next(m, t, &l))
1941 ;
1942
1943 return t;
1944}
1945
1946static void t_stop(struct seq_file *m, void *p)
1947{
1948 mutex_unlock(&trace_types_lock);
1949}
1950
1951static int t_show(struct seq_file *m, void *v)
1952{
1953 struct tracer *t = v;
1954
1955 if (!t)
1956 return 0;
1957
1958 seq_printf(m, "%s", t->name);
1959 if (t->next)
1960 seq_putc(m, ' ');
1961 else
1962 seq_putc(m, '\n');
1963
1964 return 0;
1965}
1966
1967static struct seq_operations show_traces_seq_ops = {
1968 .start = t_start,
1969 .next = t_next,
1970 .stop = t_stop,
1971 .show = t_show,
1972};
1973
1974static int show_traces_open(struct inode *inode, struct file *file)
1975{
1976 int ret;
1977
1978 if (tracing_disabled)
1979 return -ENODEV;
1980
1981 ret = seq_open(file, &show_traces_seq_ops);
1982 if (!ret) {
1983 struct seq_file *m = file->private_data;
1984 m->private = trace_types;
1985 }
1986
1987 return ret;
1988}
1989
1990static struct file_operations tracing_fops = {
1991 .open = tracing_open,
1992 .read = seq_read,
1993 .llseek = seq_lseek,
1994 .release = tracing_release,
1995};
1996
1997static struct file_operations tracing_lt_fops = {
1998 .open = tracing_lt_open,
1999 .read = seq_read,
2000 .llseek = seq_lseek,
2001 .release = tracing_release,
2002};
2003
2004static struct file_operations show_traces_fops = {
2005 .open = show_traces_open,
2006 .read = seq_read,
2007 .release = seq_release,
2008};
2009
2010/*
2011 * Only trace on a CPU if the bitmask is set:
2012 */
2013static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2014
2015/*
2016 * When tracing/tracing_cpu_mask is modified then this holds
2017 * the new bitmask we are about to install:
2018 */
2019static cpumask_t tracing_cpumask_new;
2020
2021/*
2022 * The tracer itself will not take this lock, but still we want
2023 * to provide a consistent cpumask to user-space:
2024 */
2025static DEFINE_MUTEX(tracing_cpumask_update_lock);
2026
2027/*
2028 * Temporary storage for the character representation of the
2029 * CPU bitmask (and one more byte for the newline):
2030 */
2031static char mask_str[NR_CPUS + 1];
2032
2033static ssize_t
2034tracing_cpumask_read(struct file *filp, char __user *ubuf,
2035 size_t count, loff_t *ppos)
2036{
2037 int len;
2038
2039 mutex_lock(&tracing_cpumask_update_lock);
2040
2041 len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2042 if (count - len < 2) {
2043 count = -EINVAL;
2044 goto out_err;
2045 }
2046 len += sprintf(mask_str + len, "\n");
2047 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2048
2049out_err:
2050 mutex_unlock(&tracing_cpumask_update_lock);
2051
2052 return count;
2053}
2054
2055static ssize_t
2056tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2057 size_t count, loff_t *ppos)
2058{
2059 int err, cpu;
2060
2061 mutex_lock(&tracing_cpumask_update_lock);
2062 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2063 if (err)
2064 goto err_unlock;
2065
2066 raw_local_irq_disable();
2067 __raw_spin_lock(&ftrace_max_lock);
2068 for_each_tracing_cpu(cpu) {
2069 /*
2070 * Increase/decrease the disabled counter if we are
2071 * about to flip a bit in the cpumask:
2072 */
2073 if (cpu_isset(cpu, tracing_cpumask) &&
2074 !cpu_isset(cpu, tracing_cpumask_new)) {
2075 atomic_inc(&global_trace.data[cpu]->disabled);
2076 }
2077 if (!cpu_isset(cpu, tracing_cpumask) &&
2078 cpu_isset(cpu, tracing_cpumask_new)) {
2079 atomic_dec(&global_trace.data[cpu]->disabled);
2080 }
2081 }
2082 __raw_spin_unlock(&ftrace_max_lock);
2083 raw_local_irq_enable();
2084
2085 tracing_cpumask = tracing_cpumask_new;
2086
2087 mutex_unlock(&tracing_cpumask_update_lock);
2088
2089 return count;
2090
2091err_unlock:
2092 mutex_unlock(&tracing_cpumask_update_lock);
2093
2094 return err;
2095}
2096
2097static struct file_operations tracing_cpumask_fops = {
2098 .open = tracing_open_generic,
2099 .read = tracing_cpumask_read,
2100 .write = tracing_cpumask_write,
2101};
2102
2103static ssize_t
2104tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2105 size_t cnt, loff_t *ppos)
2106{
2107 char *buf;
2108 int r = 0;
2109 int len = 0;
2110 int i;
2111
2112 /* calulate max size */
2113 for (i = 0; trace_options[i]; i++) {
2114 len += strlen(trace_options[i]);
2115 len += 3; /* "no" and space */
2116 }
2117
2118 /* +2 for \n and \0 */
2119 buf = kmalloc(len + 2, GFP_KERNEL);
2120 if (!buf)
2121 return -ENOMEM;
2122
2123 for (i = 0; trace_options[i]; i++) {
2124 if (trace_flags & (1 << i))
2125 r += sprintf(buf + r, "%s ", trace_options[i]);
2126 else
2127 r += sprintf(buf + r, "no%s ", trace_options[i]);
2128 }
2129
2130 r += sprintf(buf + r, "\n");
2131 WARN_ON(r >= len + 2);
2132
2133 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2134
2135 kfree(buf);
2136
2137 return r;
2138}
2139
2140static ssize_t
2141tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2142 size_t cnt, loff_t *ppos)
2143{
2144 char buf[64];
2145 char *cmp = buf;
2146 int neg = 0;
2147 int i;
2148
2149 if (cnt >= sizeof(buf))
2150 return -EINVAL;
2151
2152 if (copy_from_user(&buf, ubuf, cnt))
2153 return -EFAULT;
2154
2155 buf[cnt] = 0;
2156
2157 if (strncmp(buf, "no", 2) == 0) {
2158 neg = 1;
2159 cmp += 2;
2160 }
2161
2162 for (i = 0; trace_options[i]; i++) {
2163 int len = strlen(trace_options[i]);
2164
2165 if (strncmp(cmp, trace_options[i], len) == 0) {
2166 if (neg)
2167 trace_flags &= ~(1 << i);
2168 else
2169 trace_flags |= (1 << i);
2170 break;
2171 }
2172 }
2173 /*
2174 * If no option could be set, return an error:
2175 */
2176 if (!trace_options[i])
2177 return -EINVAL;
2178
2179 filp->f_pos += cnt;
2180
2181 return cnt;
2182}
2183
2184static struct file_operations tracing_iter_fops = {
2185 .open = tracing_open_generic,
2186 .read = tracing_iter_ctrl_read,
2187 .write = tracing_iter_ctrl_write,
2188};
2189
2190static const char readme_msg[] =
2191 "tracing mini-HOWTO:\n\n"
2192 "# mkdir /debug\n"
2193 "# mount -t debugfs nodev /debug\n\n"
2194 "# cat /debug/tracing/available_tracers\n"
2195 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2196 "# cat /debug/tracing/current_tracer\n"
2197 "none\n"
2198 "# echo sched_switch > /debug/tracing/current_tracer\n"
2199 "# cat /debug/tracing/current_tracer\n"
2200 "sched_switch\n"
2201 "# cat /debug/tracing/iter_ctrl\n"
2202 "noprint-parent nosym-offset nosym-addr noverbose\n"
2203 "# echo print-parent > /debug/tracing/iter_ctrl\n"
2204 "# echo 1 > /debug/tracing/tracing_enabled\n"
2205 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2206 "echo 0 > /debug/tracing/tracing_enabled\n"
2207;
2208
2209static ssize_t
2210tracing_readme_read(struct file *filp, char __user *ubuf,
2211 size_t cnt, loff_t *ppos)
2212{
2213 return simple_read_from_buffer(ubuf, cnt, ppos,
2214 readme_msg, strlen(readme_msg));
2215}
2216
2217static struct file_operations tracing_readme_fops = {
2218 .open = tracing_open_generic,
2219 .read = tracing_readme_read,
2220};
2221
2222static ssize_t
2223tracing_ctrl_read(struct file *filp, char __user *ubuf,
2224 size_t cnt, loff_t *ppos)
2225{
2226 struct trace_array *tr = filp->private_data;
2227 char buf[64];
2228 int r;
2229
2230 r = sprintf(buf, "%ld\n", tr->ctrl);
2231 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2232}
2233
2234static ssize_t
2235tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2236 size_t cnt, loff_t *ppos)
2237{
2238 struct trace_array *tr = filp->private_data;
2239 char buf[64];
2240 long val;
2241 int ret;
2242
2243 if (cnt >= sizeof(buf))
2244 return -EINVAL;
2245
2246 if (copy_from_user(&buf, ubuf, cnt))
2247 return -EFAULT;
2248
2249 buf[cnt] = 0;
2250
2251 ret = strict_strtoul(buf, 10, &val);
2252 if (ret < 0)
2253 return ret;
2254
2255 val = !!val;
2256
2257 mutex_lock(&trace_types_lock);
2258 if (tr->ctrl ^ val) {
2259 if (val)
2260 tracer_enabled = 1;
2261 else
2262 tracer_enabled = 0;
2263
2264 tr->ctrl = val;
2265
2266 if (current_trace && current_trace->ctrl_update)
2267 current_trace->ctrl_update(tr);
2268 }
2269 mutex_unlock(&trace_types_lock);
2270
2271 filp->f_pos += cnt;
2272
2273 return cnt;
2274}
2275
2276static ssize_t
2277tracing_set_trace_read(struct file *filp, char __user *ubuf,
2278 size_t cnt, loff_t *ppos)
2279{
2280 char buf[max_tracer_type_len+2];
2281 int r;
2282
2283 mutex_lock(&trace_types_lock);
2284 if (current_trace)
2285 r = sprintf(buf, "%s\n", current_trace->name);
2286 else
2287 r = sprintf(buf, "\n");
2288 mutex_unlock(&trace_types_lock);
2289
2290 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2291}
2292
2293static ssize_t
2294tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2295 size_t cnt, loff_t *ppos)
2296{
2297 struct trace_array *tr = &global_trace;
2298 struct tracer *t;
2299 char buf[max_tracer_type_len+1];
2300 int i;
2301
2302 if (cnt > max_tracer_type_len)
2303 cnt = max_tracer_type_len;
2304
2305 if (copy_from_user(&buf, ubuf, cnt))
2306 return -EFAULT;
2307
2308 buf[cnt] = 0;
2309
2310 /* strip ending whitespace. */
2311 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2312 buf[i] = 0;
2313
2314 mutex_lock(&trace_types_lock);
2315 for (t = trace_types; t; t = t->next) {
2316 if (strcmp(t->name, buf) == 0)
2317 break;
2318 }
2319 if (!t || t == current_trace)
2320 goto out;
2321
2322 if (current_trace && current_trace->reset)
2323 current_trace->reset(tr);
2324
2325 current_trace = t;
2326 if (t->init)
2327 t->init(tr);
2328
2329 out:
2330 mutex_unlock(&trace_types_lock);
2331
2332 filp->f_pos += cnt;
2333
2334 return cnt;
2335}
2336
2337static ssize_t
2338tracing_max_lat_read(struct file *filp, char __user *ubuf,
2339 size_t cnt, loff_t *ppos)
2340{
2341 unsigned long *ptr = filp->private_data;
2342 char buf[64];
2343 int r;
2344
2345 r = snprintf(buf, sizeof(buf), "%ld\n",
2346 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2347 if (r > sizeof(buf))
2348 r = sizeof(buf);
2349 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2350}
2351
2352static ssize_t
2353tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2354 size_t cnt, loff_t *ppos)
2355{
2356 long *ptr = filp->private_data;
2357 char buf[64];
2358 long val;
2359 int ret;
2360
2361 if (cnt >= sizeof(buf))
2362 return -EINVAL;
2363
2364 if (copy_from_user(&buf, ubuf, cnt))
2365 return -EFAULT;
2366
2367 buf[cnt] = 0;
2368
2369 ret = strict_strtoul(buf, 10, &val);
2370 if (ret < 0)
2371 return ret;
2372
2373 *ptr = val * 1000;
2374
2375 return cnt;
2376}
2377
2378static atomic_t tracing_reader;
2379
2380static int tracing_open_pipe(struct inode *inode, struct file *filp)
2381{
2382 struct trace_iterator *iter;
2383
2384 if (tracing_disabled)
2385 return -ENODEV;
2386
2387 /* We only allow for reader of the pipe */
2388 if (atomic_inc_return(&tracing_reader) != 1) {
2389 atomic_dec(&tracing_reader);
2390 return -EBUSY;
2391 }
2392
2393 /* create a buffer to store the information to pass to userspace */
2394 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2395 if (!iter)
2396 return -ENOMEM;
2397
2398 mutex_lock(&trace_types_lock);
2399 iter->tr = &global_trace;
2400 iter->trace = current_trace;
2401 filp->private_data = iter;
2402
2403 if (iter->trace->pipe_open)
2404 iter->trace->pipe_open(iter);
2405 mutex_unlock(&trace_types_lock);
2406
2407 return 0;
2408}
2409
2410static int tracing_release_pipe(struct inode *inode, struct file *file)
2411{
2412 struct trace_iterator *iter = file->private_data;
2413
2414 kfree(iter);
2415 atomic_dec(&tracing_reader);
2416
2417 return 0;
2418}
2419
2420static unsigned int
2421tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2422{
2423 struct trace_iterator *iter = filp->private_data;
2424
2425 if (trace_flags & TRACE_ITER_BLOCK) {
2426 /*
2427 * Always select as readable when in blocking mode
2428 */
2429 return POLLIN | POLLRDNORM;
2430 } else {
2431 if (!trace_empty(iter))
2432 return POLLIN | POLLRDNORM;
2433 poll_wait(filp, &trace_wait, poll_table);
2434 if (!trace_empty(iter))
2435 return POLLIN | POLLRDNORM;
2436
2437 return 0;
2438 }
2439}
2440
2441/*
2442 * Consumer reader.
2443 */
2444static ssize_t
2445tracing_read_pipe(struct file *filp, char __user *ubuf,
2446 size_t cnt, loff_t *ppos)
2447{
2448 struct trace_iterator *iter = filp->private_data;
2449 struct trace_array_cpu *data;
2450 static cpumask_t mask;
2451 unsigned long flags;
2452#ifdef CONFIG_FTRACE
2453 int ftrace_save;
2454#endif
2455 int cpu;
2456 ssize_t sret;
2457
2458 /* return any leftover data */
2459 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2460 if (sret != -EBUSY)
2461 return sret;
2462 sret = 0;
2463
2464 trace_seq_reset(&iter->seq);
2465
2466 mutex_lock(&trace_types_lock);
2467 if (iter->trace->read) {
2468 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2469 if (sret)
2470 goto out;
2471 }
2472
2473 while (trace_empty(iter)) {
2474
2475 if ((filp->f_flags & O_NONBLOCK)) {
2476 sret = -EAGAIN;
2477 goto out;
2478 }
2479
2480 /*
2481 * This is a make-shift waitqueue. The reason we don't use
2482 * an actual wait queue is because:
2483 * 1) we only ever have one waiter
2484 * 2) the tracing, traces all functions, we don't want
2485 * the overhead of calling wake_up and friends
2486 * (and tracing them too)
2487 * Anyway, this is really very primitive wakeup.
2488 */
2489 set_current_state(TASK_INTERRUPTIBLE);
2490 iter->tr->waiter = current;
2491
2492 mutex_unlock(&trace_types_lock);
2493
2494 /* sleep for 100 msecs, and try again. */
2495 schedule_timeout(HZ/10);
2496
2497 mutex_lock(&trace_types_lock);
2498
2499 iter->tr->waiter = NULL;
2500
2501 if (signal_pending(current)) {
2502 sret = -EINTR;
2503 goto out;
2504 }
2505
2506 if (iter->trace != current_trace)
2507 goto out;
2508
2509 /*
2510 * We block until we read something and tracing is disabled.
2511 * We still block if tracing is disabled, but we have never
2512 * read anything. This allows a user to cat this file, and
2513 * then enable tracing. But after we have read something,
2514 * we give an EOF when tracing is again disabled.
2515 *
2516 * iter->pos will be 0 if we haven't read anything.
2517 */
2518 if (!tracer_enabled && iter->pos)
2519 break;
2520
2521 continue;
2522 }
2523
2524 /* stop when tracing is finished */
2525 if (trace_empty(iter))
2526 goto out;
2527
2528 if (cnt >= PAGE_SIZE)
2529 cnt = PAGE_SIZE - 1;
2530
2531 /* reset all but tr, trace, and overruns */
2532 memset(&iter->seq, 0,
2533 sizeof(struct trace_iterator) -
2534 offsetof(struct trace_iterator, seq));
2535 iter->pos = -1;
2536
2537 /*
2538 * We need to stop all tracing on all CPUS to read the
2539 * the next buffer. This is a bit expensive, but is
2540 * not done often. We fill all what we can read,
2541 * and then release the locks again.
2542 */
2543
2544 cpus_clear(mask);
2545 local_irq_save(flags);
2546#ifdef CONFIG_FTRACE
2547 ftrace_save = ftrace_enabled;
2548 ftrace_enabled = 0;
2549#endif
2550 smp_wmb();
2551 for_each_tracing_cpu(cpu) {
2552 data = iter->tr->data[cpu];
2553
2554 if (!head_page(data) || !data->trace_idx)
2555 continue;
2556
2557 atomic_inc(&data->disabled);
2558 cpu_set(cpu, mask);
2559 }
2560
2561 for_each_cpu_mask(cpu, mask) {
2562 data = iter->tr->data[cpu];
2563 __raw_spin_lock(&data->lock);
2564
2565 if (data->overrun > iter->last_overrun[cpu])
2566 iter->overrun[cpu] +=
2567 data->overrun - iter->last_overrun[cpu];
2568 iter->last_overrun[cpu] = data->overrun;
2569 }
2570
2571 while (find_next_entry_inc(iter) != NULL) {
2572 int ret;
2573 int len = iter->seq.len;
2574
2575 ret = print_trace_line(iter);
2576 if (!ret) {
2577 /* don't print partial lines */
2578 iter->seq.len = len;
2579 break;
2580 }
2581
2582 trace_consume(iter);
2583
2584 if (iter->seq.len >= cnt)
2585 break;
2586 }
2587
2588 for_each_cpu_mask(cpu, mask) {
2589 data = iter->tr->data[cpu];
2590 __raw_spin_unlock(&data->lock);
2591 }
2592
2593 for_each_cpu_mask(cpu, mask) {
2594 data = iter->tr->data[cpu];
2595 atomic_dec(&data->disabled);
2596 }
2597#ifdef CONFIG_FTRACE
2598 ftrace_enabled = ftrace_save;
2599#endif
2600 local_irq_restore(flags);
2601
2602 /* Now copy what we have to the user */
2603 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2604 if (iter->seq.readpos >= iter->seq.len)
2605 trace_seq_reset(&iter->seq);
2606 if (sret == -EBUSY)
2607 sret = 0;
2608
2609out:
2610 mutex_unlock(&trace_types_lock);
2611
2612 return sret;
2613}
2614
2615static ssize_t
2616tracing_entries_read(struct file *filp, char __user *ubuf,
2617 size_t cnt, loff_t *ppos)
2618{
2619 struct trace_array *tr = filp->private_data;
2620 char buf[64];
2621 int r;
2622
2623 r = sprintf(buf, "%lu\n", tr->entries);
2624 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2625}
2626
2627static ssize_t
2628tracing_entries_write(struct file *filp, const char __user *ubuf,
2629 size_t cnt, loff_t *ppos)
2630{
2631 unsigned long val;
2632 char buf[64];
2633 int i, ret;
2634
2635 if (cnt >= sizeof(buf))
2636 return -EINVAL;
2637
2638 if (copy_from_user(&buf, ubuf, cnt))
2639 return -EFAULT;
2640
2641 buf[cnt] = 0;
2642
2643 ret = strict_strtoul(buf, 10, &val);
2644 if (ret < 0)
2645 return ret;
2646
2647 /* must have at least 1 entry */
2648 if (!val)
2649 return -EINVAL;
2650
2651 mutex_lock(&trace_types_lock);
2652
2653 if (current_trace != &no_tracer) {
2654 cnt = -EBUSY;
2655 pr_info("ftrace: set current_tracer to none"
2656 " before modifying buffer size\n");
2657 goto out;
2658 }
2659
2660 if (val > global_trace.entries) {
2661 long pages_requested;
2662 unsigned long freeable_pages;
2663
2664 /* make sure we have enough memory before mapping */
2665 pages_requested =
2666 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2667
2668 /* account for each buffer (and max_tr) */
2669 pages_requested *= tracing_nr_buffers * 2;
2670
2671 /* Check for overflow */
2672 if (pages_requested < 0) {
2673 cnt = -ENOMEM;
2674 goto out;
2675 }
2676
2677 freeable_pages = determine_dirtyable_memory();
2678
2679 /* we only allow to request 1/4 of useable memory */
2680 if (pages_requested >
2681 ((freeable_pages + tracing_pages_allocated) / 4)) {
2682 cnt = -ENOMEM;
2683 goto out;
2684 }
2685
2686 while (global_trace.entries < val) {
2687 if (trace_alloc_page()) {
2688 cnt = -ENOMEM;
2689 goto out;
2690 }
2691 /* double check that we don't go over the known pages */
2692 if (tracing_pages_allocated > pages_requested)
2693 break;
2694 }
2695
2696 } else {
2697 /* include the number of entries in val (inc of page entries) */
2698 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2699 trace_free_page();
2700 }
2701
2702 /* check integrity */
2703 for_each_tracing_cpu(i)
2704 check_pages(global_trace.data[i]);
2705
2706 filp->f_pos += cnt;
2707
2708 /* If check pages failed, return ENOMEM */
2709 if (tracing_disabled)
2710 cnt = -ENOMEM;
2711 out:
2712 max_tr.entries = global_trace.entries;
2713 mutex_unlock(&trace_types_lock);
2714
2715 return cnt;
2716}
2717
2718static struct file_operations tracing_max_lat_fops = {
2719 .open = tracing_open_generic,
2720 .read = tracing_max_lat_read,
2721 .write = tracing_max_lat_write,
2722};
2723
2724static struct file_operations tracing_ctrl_fops = {
2725 .open = tracing_open_generic,
2726 .read = tracing_ctrl_read,
2727 .write = tracing_ctrl_write,
2728};
2729
2730static struct file_operations set_tracer_fops = {
2731 .open = tracing_open_generic,
2732 .read = tracing_set_trace_read,
2733 .write = tracing_set_trace_write,
2734};
2735
2736static struct file_operations tracing_pipe_fops = {
2737 .open = tracing_open_pipe,
2738 .poll = tracing_poll_pipe,
2739 .read = tracing_read_pipe,
2740 .release = tracing_release_pipe,
2741};
2742
2743static struct file_operations tracing_entries_fops = {
2744 .open = tracing_open_generic,
2745 .read = tracing_entries_read,
2746 .write = tracing_entries_write,
2747};
2748
2749#ifdef CONFIG_DYNAMIC_FTRACE
2750
2751static ssize_t
2752tracing_read_long(struct file *filp, char __user *ubuf,
2753 size_t cnt, loff_t *ppos)
2754{
2755 unsigned long *p = filp->private_data;
2756 char buf[64];
2757 int r;
2758
2759 r = sprintf(buf, "%ld\n", *p);
2760
2761 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2762}
2763
2764static struct file_operations tracing_read_long_fops = {
2765 .open = tracing_open_generic,
2766 .read = tracing_read_long,
2767};
2768#endif
2769
2770static struct dentry *d_tracer;
2771
2772struct dentry *tracing_init_dentry(void)
2773{
2774 static int once;
2775
2776 if (d_tracer)
2777 return d_tracer;
2778
2779 d_tracer = debugfs_create_dir("tracing", NULL);
2780
2781 if (!d_tracer && !once) {
2782 once = 1;
2783 pr_warning("Could not create debugfs directory 'tracing'\n");
2784 return NULL;
2785 }
2786
2787 return d_tracer;
2788}
2789
2790#ifdef CONFIG_FTRACE_SELFTEST
2791/* Let selftest have access to static functions in this file */
2792#include "trace_selftest.c"
2793#endif
2794
2795static __init void tracer_init_debugfs(void)
2796{
2797 struct dentry *d_tracer;
2798 struct dentry *entry;
2799
2800 d_tracer = tracing_init_dentry();
2801
2802 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2803 &global_trace, &tracing_ctrl_fops);
2804 if (!entry)
2805 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2806
2807 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2808 NULL, &tracing_iter_fops);
2809 if (!entry)
2810 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2811
2812 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2813 NULL, &tracing_cpumask_fops);
2814 if (!entry)
2815 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2816
2817 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2818 &global_trace, &tracing_lt_fops);
2819 if (!entry)
2820 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2821
2822 entry = debugfs_create_file("trace", 0444, d_tracer,
2823 &global_trace, &tracing_fops);
2824 if (!entry)
2825 pr_warning("Could not create debugfs 'trace' entry\n");
2826
2827 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2828 &global_trace, &show_traces_fops);
2829 if (!entry)
2830 pr_warning("Could not create debugfs 'trace' entry\n");
2831
2832 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2833 &global_trace, &set_tracer_fops);
2834 if (!entry)
2835 pr_warning("Could not create debugfs 'trace' entry\n");
2836
2837 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2838 &tracing_max_latency,
2839 &tracing_max_lat_fops);
2840 if (!entry)
2841 pr_warning("Could not create debugfs "
2842 "'tracing_max_latency' entry\n");
2843
2844 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2845 &tracing_thresh, &tracing_max_lat_fops);
2846 if (!entry)
2847 pr_warning("Could not create debugfs "
2848 "'tracing_threash' entry\n");
2849 entry = debugfs_create_file("README", 0644, d_tracer,
2850 NULL, &tracing_readme_fops);
2851 if (!entry)
2852 pr_warning("Could not create debugfs 'README' entry\n");
2853
2854 entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2855 NULL, &tracing_pipe_fops);
2856 if (!entry)
2857 pr_warning("Could not create debugfs "
2858 "'tracing_threash' entry\n");
2859
2860 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2861 &global_trace, &tracing_entries_fops);
2862 if (!entry)
2863 pr_warning("Could not create debugfs "
2864 "'tracing_threash' entry\n");
2865
2866#ifdef CONFIG_DYNAMIC_FTRACE
2867 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2868 &ftrace_update_tot_cnt,
2869 &tracing_read_long_fops);
2870 if (!entry)
2871 pr_warning("Could not create debugfs "
2872 "'dyn_ftrace_total_info' entry\n");
2873#endif
2874}
2875
2876static int trace_alloc_page(void)
2877{
2878 struct trace_array_cpu *data;
2879 struct page *page, *tmp;
2880 LIST_HEAD(pages);
2881 void *array;
2882 unsigned pages_allocated = 0;
2883 int i;
2884
2885 /* first allocate a page for each CPU */
2886 for_each_tracing_cpu(i) {
2887 array = (void *)__get_free_page(GFP_KERNEL);
2888 if (array == NULL) {
2889 printk(KERN_ERR "tracer: failed to allocate page"
2890 "for trace buffer!\n");
2891 goto free_pages;
2892 }
2893
2894 pages_allocated++;
2895 page = virt_to_page(array);
2896 list_add(&page->lru, &pages);
2897
2898/* Only allocate if we are actually using the max trace */
2899#ifdef CONFIG_TRACER_MAX_TRACE
2900 array = (void *)__get_free_page(GFP_KERNEL);
2901 if (array == NULL) {
2902 printk(KERN_ERR "tracer: failed to allocate page"
2903 "for trace buffer!\n");
2904 goto free_pages;
2905 }
2906 pages_allocated++;
2907 page = virt_to_page(array);
2908 list_add(&page->lru, &pages);
2909#endif
2910 }
2911
2912 /* Now that we successfully allocate a page per CPU, add them */
2913 for_each_tracing_cpu(i) {
2914 data = global_trace.data[i];
2915 page = list_entry(pages.next, struct page, lru);
2916 list_del_init(&page->lru);
2917 list_add_tail(&page->lru, &data->trace_pages);
2918 ClearPageLRU(page);
2919
2920#ifdef CONFIG_TRACER_MAX_TRACE
2921 data = max_tr.data[i];
2922 page = list_entry(pages.next, struct page, lru);
2923 list_del_init(&page->lru);
2924 list_add_tail(&page->lru, &data->trace_pages);
2925 SetPageLRU(page);
2926#endif
2927 }
2928 tracing_pages_allocated += pages_allocated;
2929 global_trace.entries += ENTRIES_PER_PAGE;
2930
2931 return 0;
2932
2933 free_pages:
2934 list_for_each_entry_safe(page, tmp, &pages, lru) {
2935 list_del_init(&page->lru);
2936 __free_page(page);
2937 }
2938 return -ENOMEM;
2939}
2940
2941static int trace_free_page(void)
2942{
2943 struct trace_array_cpu *data;
2944 struct page *page;
2945 struct list_head *p;
2946 int i;
2947 int ret = 0;
2948
2949 /* free one page from each buffer */
2950 for_each_tracing_cpu(i) {
2951 data = global_trace.data[i];
2952 p = data->trace_pages.next;
2953 if (p == &data->trace_pages) {
2954 /* should never happen */
2955 WARN_ON(1);
2956 tracing_disabled = 1;
2957 ret = -1;
2958 break;
2959 }
2960 page = list_entry(p, struct page, lru);
2961 ClearPageLRU(page);
2962 list_del(&page->lru);
2963 tracing_pages_allocated--;
2964 tracing_pages_allocated--;
2965 __free_page(page);
2966
2967 tracing_reset(data);
2968
2969#ifdef CONFIG_TRACER_MAX_TRACE
2970 data = max_tr.data[i];
2971 p = data->trace_pages.next;
2972 if (p == &data->trace_pages) {
2973 /* should never happen */
2974 WARN_ON(1);
2975 tracing_disabled = 1;
2976 ret = -1;
2977 break;
2978 }
2979 page = list_entry(p, struct page, lru);
2980 ClearPageLRU(page);
2981 list_del(&page->lru);
2982 __free_page(page);
2983
2984 tracing_reset(data);
2985#endif
2986 }
2987 global_trace.entries -= ENTRIES_PER_PAGE;
2988
2989 return ret;
2990}
2991
2992__init static int tracer_alloc_buffers(void)
2993{
2994 struct trace_array_cpu *data;
2995 void *array;
2996 struct page *page;
2997 int pages = 0;
2998 int ret = -ENOMEM;
2999 int i;
3000
3001 /* TODO: make the number of buffers hot pluggable with CPUS */
3002 tracing_nr_buffers = num_possible_cpus();
3003 tracing_buffer_mask = cpu_possible_map;
3004
3005 /* Allocate the first page for all buffers */
3006 for_each_tracing_cpu(i) {
3007 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3008 max_tr.data[i] = &per_cpu(max_data, i);
3009
3010 array = (void *)__get_free_page(GFP_KERNEL);
3011 if (array == NULL) {
3012 printk(KERN_ERR "tracer: failed to allocate page"
3013 "for trace buffer!\n");
3014 goto free_buffers;
3015 }
3016
3017 /* set the array to the list */
3018 INIT_LIST_HEAD(&data->trace_pages);
3019 page = virt_to_page(array);
3020 list_add(&page->lru, &data->trace_pages);
3021 /* use the LRU flag to differentiate the two buffers */
3022 ClearPageLRU(page);
3023
3024 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3025 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3026
3027/* Only allocate if we are actually using the max trace */
3028#ifdef CONFIG_TRACER_MAX_TRACE
3029 array = (void *)__get_free_page(GFP_KERNEL);
3030 if (array == NULL) {
3031 printk(KERN_ERR "tracer: failed to allocate page"
3032 "for trace buffer!\n");
3033 goto free_buffers;
3034 }
3035
3036 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3037 page = virt_to_page(array);
3038 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3039 SetPageLRU(page);
3040#endif
3041 }
3042
3043 /*
3044 * Since we allocate by orders of pages, we may be able to
3045 * round up a bit.
3046 */
3047 global_trace.entries = ENTRIES_PER_PAGE;
3048 pages++;
3049
3050 while (global_trace.entries < trace_nr_entries) {
3051 if (trace_alloc_page())
3052 break;
3053 pages++;
3054 }
3055 max_tr.entries = global_trace.entries;
3056
3057 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3058 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3059 pr_info(" actual entries %ld\n", global_trace.entries);
3060
3061 tracer_init_debugfs();
3062
3063 trace_init_cmdlines();
3064
3065 register_tracer(&no_tracer);
3066 current_trace = &no_tracer;
3067
3068 /* All seems OK, enable tracing */
3069 global_trace.ctrl = tracer_enabled;
3070 tracing_disabled = 0;
3071
3072 return 0;
3073
3074 free_buffers:
3075 for (i-- ; i >= 0; i--) {
3076 struct page *page, *tmp;
3077 struct trace_array_cpu *data = global_trace.data[i];
3078
3079 if (data) {
3080 list_for_each_entry_safe(page, tmp,
3081 &data->trace_pages, lru) {
3082 list_del_init(&page->lru);
3083 __free_page(page);
3084 }
3085 }
3086
3087#ifdef CONFIG_TRACER_MAX_TRACE
3088 data = max_tr.data[i];
3089 if (data) {
3090 list_for_each_entry_safe(page, tmp,
3091 &data->trace_pages, lru) {
3092 list_del_init(&page->lru);
3093 __free_page(page);
3094 }
3095 }
3096#endif
3097 }
3098 return ret;
3099}
3100fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 000000000000..6b8bd8800d04
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,313 @@
1#ifndef _LINUX_KERNEL_TRACE_H
2#define _LINUX_KERNEL_TRACE_H
3
4#include <linux/fs.h>
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/clocksource.h>
8
9enum trace_type {
10 __TRACE_FIRST_TYPE = 0,
11
12 TRACE_FN,
13 TRACE_CTX,
14 TRACE_WAKE,
15 TRACE_STACK,
16 TRACE_SPECIAL,
17
18 __TRACE_LAST_TYPE
19};
20
21/*
22 * Function trace entry - function address and parent function addres:
23 */
24struct ftrace_entry {
25 unsigned long ip;
26 unsigned long parent_ip;
27};
28
29/*
30 * Context switch trace entry - which task (and prio) we switched from/to:
31 */
32struct ctx_switch_entry {
33 unsigned int prev_pid;
34 unsigned char prev_prio;
35 unsigned char prev_state;
36 unsigned int next_pid;
37 unsigned char next_prio;
38 unsigned char next_state;
39};
40
41/*
42 * Special (free-form) trace entry:
43 */
44struct special_entry {
45 unsigned long arg1;
46 unsigned long arg2;
47 unsigned long arg3;
48};
49
50/*
51 * Stack-trace entry:
52 */
53
54#define FTRACE_STACK_ENTRIES 8
55
56struct stack_entry {
57 unsigned long caller[FTRACE_STACK_ENTRIES];
58};
59
60/*
61 * The trace entry - the most basic unit of tracing. This is what
62 * is printed in the end as a single line in the trace output, such as:
63 *
64 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
65 */
66struct trace_entry {
67 char type;
68 char cpu;
69 char flags;
70 char preempt_count;
71 int pid;
72 cycle_t t;
73 union {
74 struct ftrace_entry fn;
75 struct ctx_switch_entry ctx;
76 struct special_entry special;
77 struct stack_entry stack;
78 };
79};
80
81#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
82
83/*
84 * The CPU trace array - it consists of thousands of trace entries
85 * plus some other descriptor data: (for example which task started
86 * the trace, etc.)
87 */
88struct trace_array_cpu {
89 struct list_head trace_pages;
90 atomic_t disabled;
91 raw_spinlock_t lock;
92 struct lock_class_key lock_key;
93
94 /* these fields get copied into max-trace: */
95 unsigned trace_head_idx;
96 unsigned trace_tail_idx;
97 void *trace_head; /* producer */
98 void *trace_tail; /* consumer */
99 unsigned long trace_idx;
100 unsigned long overrun;
101 unsigned long saved_latency;
102 unsigned long critical_start;
103 unsigned long critical_end;
104 unsigned long critical_sequence;
105 unsigned long nice;
106 unsigned long policy;
107 unsigned long rt_priority;
108 cycle_t preempt_timestamp;
109 pid_t pid;
110 uid_t uid;
111 char comm[TASK_COMM_LEN];
112};
113
114struct trace_iterator;
115
116/*
117 * The trace array - an array of per-CPU trace arrays. This is the
118 * highest level data structure that individual tracers deal with.
119 * They have on/off state as well:
120 */
121struct trace_array {
122 unsigned long entries;
123 long ctrl;
124 int cpu;
125 cycle_t time_start;
126 struct task_struct *waiter;
127 struct trace_array_cpu *data[NR_CPUS];
128};
129
130/*
131 * A specific tracer, represented by methods that operate on a trace array:
132 */
133struct tracer {
134 const char *name;
135 void (*init)(struct trace_array *tr);
136 void (*reset)(struct trace_array *tr);
137 void (*open)(struct trace_iterator *iter);
138 void (*pipe_open)(struct trace_iterator *iter);
139 void (*close)(struct trace_iterator *iter);
140 void (*start)(struct trace_iterator *iter);
141 void (*stop)(struct trace_iterator *iter);
142 ssize_t (*read)(struct trace_iterator *iter,
143 struct file *filp, char __user *ubuf,
144 size_t cnt, loff_t *ppos);
145 void (*ctrl_update)(struct trace_array *tr);
146#ifdef CONFIG_FTRACE_STARTUP_TEST
147 int (*selftest)(struct tracer *trace,
148 struct trace_array *tr);
149#endif
150 int (*print_line)(struct trace_iterator *iter);
151 struct tracer *next;
152 int print_max;
153};
154
155struct trace_seq {
156 unsigned char buffer[PAGE_SIZE];
157 unsigned int len;
158 unsigned int readpos;
159};
160
161/*
162 * Trace iterator - used by printout routines who present trace
163 * results to users and which routines might sleep, etc:
164 */
165struct trace_iterator {
166 struct trace_array *tr;
167 struct tracer *trace;
168 void *private;
169 long last_overrun[NR_CPUS];
170 long overrun[NR_CPUS];
171
172 /* The below is zeroed out in pipe_read */
173 struct trace_seq seq;
174 struct trace_entry *ent;
175 int cpu;
176
177 struct trace_entry *prev_ent;
178 int prev_cpu;
179
180 unsigned long iter_flags;
181 loff_t pos;
182 unsigned long next_idx[NR_CPUS];
183 struct list_head *next_page[NR_CPUS];
184 unsigned next_page_idx[NR_CPUS];
185 long idx;
186};
187
188void tracing_reset(struct trace_array_cpu *data);
189int tracing_open_generic(struct inode *inode, struct file *filp);
190struct dentry *tracing_init_dentry(void);
191void ftrace(struct trace_array *tr,
192 struct trace_array_cpu *data,
193 unsigned long ip,
194 unsigned long parent_ip,
195 unsigned long flags);
196void tracing_sched_switch_trace(struct trace_array *tr,
197 struct trace_array_cpu *data,
198 struct task_struct *prev,
199 struct task_struct *next,
200 unsigned long flags);
201void tracing_record_cmdline(struct task_struct *tsk);
202
203void tracing_sched_wakeup_trace(struct trace_array *tr,
204 struct trace_array_cpu *data,
205 struct task_struct *wakee,
206 struct task_struct *cur,
207 unsigned long flags);
208void trace_special(struct trace_array *tr,
209 struct trace_array_cpu *data,
210 unsigned long arg1,
211 unsigned long arg2,
212 unsigned long arg3);
213void trace_function(struct trace_array *tr,
214 struct trace_array_cpu *data,
215 unsigned long ip,
216 unsigned long parent_ip,
217 unsigned long flags);
218
219void tracing_start_function_trace(void);
220void tracing_stop_function_trace(void);
221void tracing_start_cmdline_record(void);
222void tracing_stop_cmdline_record(void);
223int register_tracer(struct tracer *type);
224void unregister_tracer(struct tracer *type);
225
226extern unsigned long nsecs_to_usecs(unsigned long nsecs);
227
228extern unsigned long tracing_max_latency;
229extern unsigned long tracing_thresh;
230
231void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
232void update_max_tr_single(struct trace_array *tr,
233 struct task_struct *tsk, int cpu);
234
235extern cycle_t ftrace_now(int cpu);
236
237#ifdef CONFIG_CONTEXT_SWITCH_TRACER
238typedef void
239(*tracer_switch_func_t)(void *private,
240 void *__rq,
241 struct task_struct *prev,
242 struct task_struct *next);
243
244struct tracer_switch_ops {
245 tracer_switch_func_t func;
246 void *private;
247 struct tracer_switch_ops *next;
248};
249
250#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
251
252#ifdef CONFIG_DYNAMIC_FTRACE
253extern unsigned long ftrace_update_tot_cnt;
254#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
255extern int DYN_FTRACE_TEST_NAME(void);
256#endif
257
258#ifdef CONFIG_FTRACE_STARTUP_TEST
259#ifdef CONFIG_FTRACE
260extern int trace_selftest_startup_function(struct tracer *trace,
261 struct trace_array *tr);
262#endif
263#ifdef CONFIG_IRQSOFF_TRACER
264extern int trace_selftest_startup_irqsoff(struct tracer *trace,
265 struct trace_array *tr);
266#endif
267#ifdef CONFIG_PREEMPT_TRACER
268extern int trace_selftest_startup_preemptoff(struct tracer *trace,
269 struct trace_array *tr);
270#endif
271#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
272extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
273 struct trace_array *tr);
274#endif
275#ifdef CONFIG_SCHED_TRACER
276extern int trace_selftest_startup_wakeup(struct tracer *trace,
277 struct trace_array *tr);
278#endif
279#ifdef CONFIG_CONTEXT_SWITCH_TRACER
280extern int trace_selftest_startup_sched_switch(struct tracer *trace,
281 struct trace_array *tr);
282#endif
283#endif /* CONFIG_FTRACE_STARTUP_TEST */
284
285extern void *head_page(struct trace_array_cpu *data);
286extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
287extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
288 size_t cnt);
289extern long ns2usecs(cycle_t nsec);
290
291extern unsigned long trace_flags;
292
293/*
294 * trace_iterator_flags is an enumeration that defines bit
295 * positions into trace_flags that controls the output.
296 *
297 * NOTE: These bits must match the trace_options array in
298 * trace.c.
299 */
300enum trace_iterator_flags {
301 TRACE_ITER_PRINT_PARENT = 0x01,
302 TRACE_ITER_SYM_OFFSET = 0x02,
303 TRACE_ITER_SYM_ADDR = 0x04,
304 TRACE_ITER_VERBOSE = 0x08,
305 TRACE_ITER_RAW = 0x10,
306 TRACE_ITER_HEX = 0x20,
307 TRACE_ITER_BIN = 0x40,
308 TRACE_ITER_BLOCK = 0x80,
309 TRACE_ITER_STACKTRACE = 0x100,
310 TRACE_ITER_SCHED_TREE = 0x200,
311};
312
313#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 000000000000..7ee7dcd76b7d
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,78 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/debugfs.h>
13#include <linux/uaccess.h>
14#include <linux/ftrace.h>
15#include <linux/fs.h>
16
17#include "trace.h"
18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]);
27}
28
29static void start_function_trace(struct trace_array *tr)
30{
31 function_reset(tr);
32 tracing_start_cmdline_record();
33 tracing_start_function_trace();
34}
35
36static void stop_function_trace(struct trace_array *tr)
37{
38 tracing_stop_function_trace();
39 tracing_stop_cmdline_record();
40}
41
42static void function_trace_init(struct trace_array *tr)
43{
44 if (tr->ctrl)
45 start_function_trace(tr);
46}
47
48static void function_trace_reset(struct trace_array *tr)
49{
50 if (tr->ctrl)
51 stop_function_trace(tr);
52}
53
54static void function_trace_ctrl_update(struct trace_array *tr)
55{
56 if (tr->ctrl)
57 start_function_trace(tr);
58 else
59 stop_function_trace(tr);
60}
61
62static struct tracer function_trace __read_mostly =
63{
64 .name = "ftrace",
65 .init = function_trace_init,
66 .reset = function_trace_reset,
67 .ctrl_update = function_trace_ctrl_update,
68#ifdef CONFIG_FTRACE_SELFTEST
69 .selftest = trace_selftest_startup_function,
70#endif
71};
72
73static __init int init_function_trace(void)
74{
75 return register_tracer(&function_trace);
76}
77
78device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 000000000000..421d6fe3650e
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,486 @@
1/*
2 * trace irqs off criticall timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * From code in the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/kallsyms.h>
13#include <linux/debugfs.h>
14#include <linux/uaccess.h>
15#include <linux/module.h>
16#include <linux/ftrace.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21static struct trace_array *irqsoff_trace __read_mostly;
22static int tracer_enabled __read_mostly;
23
24static DEFINE_PER_CPU(int, tracing_cpu);
25
26static DEFINE_SPINLOCK(max_trace_lock);
27
28enum {
29 TRACER_IRQS_OFF = (1 << 1),
30 TRACER_PREEMPT_OFF = (1 << 2),
31};
32
33static int trace_type __read_mostly;
34
35#ifdef CONFIG_PREEMPT_TRACER
36static inline int
37preempt_trace(void)
38{
39 return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
40}
41#else
42# define preempt_trace() (0)
43#endif
44
45#ifdef CONFIG_IRQSOFF_TRACER
46static inline int
47irq_trace(void)
48{
49 return ((trace_type & TRACER_IRQS_OFF) &&
50 irqs_disabled());
51}
52#else
53# define irq_trace() (0)
54#endif
55
56/*
57 * Sequence count - we record it when starting a measurement and
58 * skip the latency if the sequence has changed - some other section
59 * did a maximum and could disturb our measurement with serial console
60 * printouts, etc. Truly coinciding maximum latencies should be rare
61 * and what happens together happens separately as well, so this doesnt
62 * decrease the validity of the maximum found:
63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence;
65
66#ifdef CONFIG_FTRACE
67/*
68 * irqsoff uses its own tracer function to keep the overhead down:
69 */
70static void
71irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
72{
73 struct trace_array *tr = irqsoff_trace;
74 struct trace_array_cpu *data;
75 unsigned long flags;
76 long disabled;
77 int cpu;
78
79 /*
80 * Does not matter if we preempt. We test the flags
81 * afterward, to see if irqs are disabled or not.
82 * If we preempt and get a false positive, the flags
83 * test will fail.
84 */
85 cpu = raw_smp_processor_id();
86 if (likely(!per_cpu(tracing_cpu, cpu)))
87 return;
88
89 local_save_flags(flags);
90 /* slight chance to get a false positive on tracing_cpu */
91 if (!irqs_disabled_flags(flags))
92 return;
93
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags);
99
100 atomic_dec(&data->disabled);
101}
102
103static struct ftrace_ops trace_ops __read_mostly =
104{
105 .func = irqsoff_tracer_call,
106};
107#endif /* CONFIG_FTRACE */
108
109/*
110 * Should this new latency be reported/recorded?
111 */
112static int report_latency(cycle_t delta)
113{
114 if (tracing_thresh) {
115 if (delta < tracing_thresh)
116 return 0;
117 } else {
118 if (delta <= tracing_max_latency)
119 return 0;
120 }
121 return 1;
122}
123
124static void
125check_critical_timing(struct trace_array *tr,
126 struct trace_array_cpu *data,
127 unsigned long parent_ip,
128 int cpu)
129{
130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta;
132 unsigned long flags;
133
134 /*
135 * usecs conversion is slow so we try to delay the conversion
136 * as long as possible:
137 */
138 T0 = data->preempt_timestamp;
139 T1 = ftrace_now(cpu);
140 delta = T1-T0;
141
142 local_save_flags(flags);
143
144 if (!report_latency(delta))
145 goto out;
146
147 spin_lock_irqsave(&max_trace_lock, flags);
148
149 /* check if we are still the max latency */
150 if (!report_latency(delta))
151 goto out_unlock;
152
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
154
155 latency = nsecs_to_usecs(delta);
156
157 if (data->critical_sequence != max_sequence)
158 goto out_unlock;
159
160 tracing_max_latency = delta;
161 t0 = nsecs_to_usecs(T0);
162 t1 = nsecs_to_usecs(T1);
163
164 data->critical_end = parent_ip;
165
166 update_max_tr_single(tr, current, cpu);
167
168 max_sequence++;
169
170out_unlock:
171 spin_unlock_irqrestore(&max_trace_lock, flags);
172
173out:
174 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
178}
179
180static inline void
181start_critical_timing(unsigned long ip, unsigned long parent_ip)
182{
183 int cpu;
184 struct trace_array *tr = irqsoff_trace;
185 struct trace_array_cpu *data;
186 unsigned long flags;
187
188 if (likely(!tracer_enabled))
189 return;
190
191 cpu = raw_smp_processor_id();
192
193 if (per_cpu(tracing_cpu, cpu))
194 return;
195
196 data = tr->data[cpu];
197
198 if (unlikely(!data) || atomic_read(&data->disabled))
199 return;
200
201 atomic_inc(&data->disabled);
202
203 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data);
207
208 local_save_flags(flags);
209
210 trace_function(tr, data, ip, parent_ip, flags);
211
212 per_cpu(tracing_cpu, cpu) = 1;
213
214 atomic_dec(&data->disabled);
215}
216
217static inline void
218stop_critical_timing(unsigned long ip, unsigned long parent_ip)
219{
220 int cpu;
221 struct trace_array *tr = irqsoff_trace;
222 struct trace_array_cpu *data;
223 unsigned long flags;
224
225 cpu = raw_smp_processor_id();
226 /* Always clear the tracing cpu on stopping the trace */
227 if (unlikely(per_cpu(tracing_cpu, cpu)))
228 per_cpu(tracing_cpu, cpu) = 0;
229 else
230 return;
231
232 if (!tracer_enabled)
233 return;
234
235 data = tr->data[cpu];
236
237 if (unlikely(!data) || unlikely(!head_page(data)) ||
238 !data->critical_start || atomic_read(&data->disabled))
239 return;
240
241 atomic_inc(&data->disabled);
242
243 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags);
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0;
247 atomic_dec(&data->disabled);
248}
249
250/* start and stop critical timings used to for stoppage (in idle) */
251void start_critical_timings(void)
252{
253 if (preempt_trace() || irq_trace())
254 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
255}
256
257void stop_critical_timings(void)
258{
259 if (preempt_trace() || irq_trace())
260 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
261}
262
263#ifdef CONFIG_IRQSOFF_TRACER
264#ifdef CONFIG_PROVE_LOCKING
265void time_hardirqs_on(unsigned long a0, unsigned long a1)
266{
267 if (!preempt_trace() && irq_trace())
268 stop_critical_timing(a0, a1);
269}
270
271void time_hardirqs_off(unsigned long a0, unsigned long a1)
272{
273 if (!preempt_trace() && irq_trace())
274 start_critical_timing(a0, a1);
275}
276
277#else /* !CONFIG_PROVE_LOCKING */
278
279/*
280 * Stubs:
281 */
282
283void early_boot_irqs_off(void)
284{
285}
286
287void early_boot_irqs_on(void)
288{
289}
290
291void trace_softirqs_on(unsigned long ip)
292{
293}
294
295void trace_softirqs_off(unsigned long ip)
296{
297}
298
299inline void print_irqtrace_events(struct task_struct *curr)
300{
301}
302
303/*
304 * We are only interested in hardirq on/off events:
305 */
306void trace_hardirqs_on(void)
307{
308 if (!preempt_trace() && irq_trace())
309 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
310}
311EXPORT_SYMBOL(trace_hardirqs_on);
312
313void trace_hardirqs_off(void)
314{
315 if (!preempt_trace() && irq_trace())
316 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
317}
318EXPORT_SYMBOL(trace_hardirqs_off);
319
320void trace_hardirqs_on_caller(unsigned long caller_addr)
321{
322 if (!preempt_trace() && irq_trace())
323 stop_critical_timing(CALLER_ADDR0, caller_addr);
324}
325EXPORT_SYMBOL(trace_hardirqs_on_caller);
326
327void trace_hardirqs_off_caller(unsigned long caller_addr)
328{
329 if (!preempt_trace() && irq_trace())
330 start_critical_timing(CALLER_ADDR0, caller_addr);
331}
332EXPORT_SYMBOL(trace_hardirqs_off_caller);
333
334#endif /* CONFIG_PROVE_LOCKING */
335#endif /* CONFIG_IRQSOFF_TRACER */
336
337#ifdef CONFIG_PREEMPT_TRACER
338void trace_preempt_on(unsigned long a0, unsigned long a1)
339{
340 stop_critical_timing(a0, a1);
341}
342
343void trace_preempt_off(unsigned long a0, unsigned long a1)
344{
345 start_critical_timing(a0, a1);
346}
347#endif /* CONFIG_PREEMPT_TRACER */
348
349static void start_irqsoff_tracer(struct trace_array *tr)
350{
351 register_ftrace_function(&trace_ops);
352 tracer_enabled = 1;
353}
354
355static void stop_irqsoff_tracer(struct trace_array *tr)
356{
357 tracer_enabled = 0;
358 unregister_ftrace_function(&trace_ops);
359}
360
361static void __irqsoff_tracer_init(struct trace_array *tr)
362{
363 irqsoff_trace = tr;
364 /* make sure that the tracer is visible */
365 smp_wmb();
366
367 if (tr->ctrl)
368 start_irqsoff_tracer(tr);
369}
370
371static void irqsoff_tracer_reset(struct trace_array *tr)
372{
373 if (tr->ctrl)
374 stop_irqsoff_tracer(tr);
375}
376
377static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
378{
379 if (tr->ctrl)
380 start_irqsoff_tracer(tr);
381 else
382 stop_irqsoff_tracer(tr);
383}
384
385static void irqsoff_tracer_open(struct trace_iterator *iter)
386{
387 /* stop the trace while dumping */
388 if (iter->tr->ctrl)
389 stop_irqsoff_tracer(iter->tr);
390}
391
392static void irqsoff_tracer_close(struct trace_iterator *iter)
393{
394 if (iter->tr->ctrl)
395 start_irqsoff_tracer(iter->tr);
396}
397
398#ifdef CONFIG_IRQSOFF_TRACER
399static void irqsoff_tracer_init(struct trace_array *tr)
400{
401 trace_type = TRACER_IRQS_OFF;
402
403 __irqsoff_tracer_init(tr);
404}
405static struct tracer irqsoff_tracer __read_mostly =
406{
407 .name = "irqsoff",
408 .init = irqsoff_tracer_init,
409 .reset = irqsoff_tracer_reset,
410 .open = irqsoff_tracer_open,
411 .close = irqsoff_tracer_close,
412 .ctrl_update = irqsoff_tracer_ctrl_update,
413 .print_max = 1,
414#ifdef CONFIG_FTRACE_SELFTEST
415 .selftest = trace_selftest_startup_irqsoff,
416#endif
417};
418# define register_irqsoff(trace) register_tracer(&trace)
419#else
420# define register_irqsoff(trace) do { } while (0)
421#endif
422
423#ifdef CONFIG_PREEMPT_TRACER
424static void preemptoff_tracer_init(struct trace_array *tr)
425{
426 trace_type = TRACER_PREEMPT_OFF;
427
428 __irqsoff_tracer_init(tr);
429}
430
431static struct tracer preemptoff_tracer __read_mostly =
432{
433 .name = "preemptoff",
434 .init = preemptoff_tracer_init,
435 .reset = irqsoff_tracer_reset,
436 .open = irqsoff_tracer_open,
437 .close = irqsoff_tracer_close,
438 .ctrl_update = irqsoff_tracer_ctrl_update,
439 .print_max = 1,
440#ifdef CONFIG_FTRACE_SELFTEST
441 .selftest = trace_selftest_startup_preemptoff,
442#endif
443};
444# define register_preemptoff(trace) register_tracer(&trace)
445#else
446# define register_preemptoff(trace) do { } while (0)
447#endif
448
449#if defined(CONFIG_IRQSOFF_TRACER) && \
450 defined(CONFIG_PREEMPT_TRACER)
451
452static void preemptirqsoff_tracer_init(struct trace_array *tr)
453{
454 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
455
456 __irqsoff_tracer_init(tr);
457}
458
459static struct tracer preemptirqsoff_tracer __read_mostly =
460{
461 .name = "preemptirqsoff",
462 .init = preemptirqsoff_tracer_init,
463 .reset = irqsoff_tracer_reset,
464 .open = irqsoff_tracer_open,
465 .close = irqsoff_tracer_close,
466 .ctrl_update = irqsoff_tracer_ctrl_update,
467 .print_max = 1,
468#ifdef CONFIG_FTRACE_SELFTEST
469 .selftest = trace_selftest_startup_preemptirqsoff,
470#endif
471};
472
473# define register_preemptirqsoff(trace) register_tracer(&trace)
474#else
475# define register_preemptirqsoff(trace) do { } while (0)
476#endif
477
478__init static int init_irqsoff_tracer(void)
479{
480 register_irqsoff(irqsoff_tracer);
481 register_preemptoff(preemptoff_tracer);
482 register_preemptirqsoff(preemptirqsoff_tracer);
483
484 return 0;
485}
486device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 000000000000..93a662009151
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,286 @@
1/*
2 * trace context switch
3 *
4 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7#include <linux/module.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/kallsyms.h>
11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h>
14
15#include "trace.h"
16
17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref;
20
21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev,
23 struct task_struct *next)
24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data;
28 unsigned long flags;
29 long disabled;
30 int cpu;
31
32 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next);
34
35 if (!tracer_enabled)
36 return;
37
38 local_irq_save(flags);
39 cpu = raw_smp_processor_id();
40 data = tr->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42
43 if (likely(disabled == 1))
44 tracing_sched_switch_trace(tr, data, prev, next, flags);
45
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags);
48}
49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
78 task_struct *curr)
79{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data;
83 unsigned long flags;
84 long disabled;
85 int cpu;
86
87 if (!tracer_enabled)
88 return;
89
90 tracing_record_cmdline(curr);
91
92 local_irq_save(flags);
93 cpu = raw_smp_processor_id();
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
99
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags);
102}
103
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr)
130{
131 int cpu;
132
133 tr->time_start = ftrace_now(tr->cpu);
134
135 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]);
137}
138
139static int tracing_sched_register(void)
140{
141 int ret;
142
143 ret = marker_probe_register("kernel_sched_wakeup",
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker"
149 " probe to kernel_sched_wakeup\n");
150 return ret;
151 }
152
153 ret = marker_probe_register("kernel_sched_wakeup_new",
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker"
159 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe;
161 }
162
163 ret = marker_probe_register("kernel_sched_schedule",
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) {
169 pr_info("sched trace: Couldn't add marker"
170 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new;
172 }
173
174 return ret;
175fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new",
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup",
181 wake_up_callback,
182 &ctx_trace);
183 return ret;
184}
185
186static void tracing_sched_unregister(void)
187{
188 marker_probe_unregister("kernel_sched_schedule",
189 sched_switch_callback,
190 &ctx_trace);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197}
198
199static void tracing_start_sched_switch(void)
200{
201 long ref;
202
203 ref = atomic_inc_return(&sched_ref);
204 if (ref == 1)
205 tracing_sched_register();
206}
207
208static void tracing_stop_sched_switch(void)
209{
210 long ref;
211
212 ref = atomic_dec_and_test(&sched_ref);
213 if (ref)
214 tracing_sched_unregister();
215}
216
217void tracing_start_cmdline_record(void)
218{
219 tracing_start_sched_switch();
220}
221
222void tracing_stop_cmdline_record(void)
223{
224 tracing_stop_sched_switch();
225}
226
227static void start_sched_trace(struct trace_array *tr)
228{
229 sched_switch_reset(tr);
230 tracer_enabled = 1;
231 tracing_start_cmdline_record();
232}
233
234static void stop_sched_trace(struct trace_array *tr)
235{
236 tracing_stop_cmdline_record();
237 tracer_enabled = 0;
238}
239
240static void sched_switch_trace_init(struct trace_array *tr)
241{
242 ctx_trace = tr;
243
244 if (tr->ctrl)
245 start_sched_trace(tr);
246}
247
248static void sched_switch_trace_reset(struct trace_array *tr)
249{
250 if (tr->ctrl)
251 stop_sched_trace(tr);
252}
253
254static void sched_switch_trace_ctrl_update(struct trace_array *tr)
255{
256 /* When starting a new trace, reset the buffers */
257 if (tr->ctrl)
258 start_sched_trace(tr);
259 else
260 stop_sched_trace(tr);
261}
262
263static struct tracer sched_switch_trace __read_mostly =
264{
265 .name = "sched_switch",
266 .init = sched_switch_trace_init,
267 .reset = sched_switch_trace_reset,
268 .ctrl_update = sched_switch_trace_ctrl_update,
269#ifdef CONFIG_FTRACE_SELFTEST
270 .selftest = trace_selftest_startup_sched_switch,
271#endif
272};
273
274__init static int init_sched_switch_trace(void)
275{
276 int ret = 0;
277
278 if (atomic_read(&sched_ref))
279 ret = tracing_sched_register();
280 if (ret) {
281 pr_info("error registering scheduler trace\n");
282 return ret;
283 }
284 return register_tracer(&sched_switch_trace);
285}
286device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 000000000000..bf7e91caef57
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,447 @@
1/*
2 * trace task wakeup timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h>
16#include <linux/uaccess.h>
17#include <linux/ftrace.h>
18#include <linux/marker.h>
19
20#include "trace.h"
21
22static struct trace_array *wakeup_trace;
23static int __read_mostly tracer_enabled;
24
25static struct task_struct *wakeup_task;
26static int wakeup_cpu;
27static unsigned wakeup_prio = -1;
28
29static DEFINE_SPINLOCK(wakeup_lock);
30
31static void __wakeup_reset(struct trace_array *tr);
32
33#ifdef CONFIG_FTRACE
34/*
35 * irqsoff uses its own tracer function to keep the overhead down:
36 */
37static void
38wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
39{
40 struct trace_array *tr = wakeup_trace;
41 struct trace_array_cpu *data;
42 unsigned long flags;
43 long disabled;
44 int resched;
45 int cpu;
46
47 if (likely(!wakeup_task))
48 return;
49
50 resched = need_resched();
51 preempt_disable_notrace();
52
53 cpu = raw_smp_processor_id();
54 data = tr->data[cpu];
55 disabled = atomic_inc_return(&data->disabled);
56 if (unlikely(disabled != 1))
57 goto out;
58
59 spin_lock_irqsave(&wakeup_lock, flags);
60
61 if (unlikely(!wakeup_task))
62 goto unlock;
63
64 /*
65 * The task can't disappear because it needs to
66 * wake up first, and we have the wakeup_lock.
67 */
68 if (task_cpu(wakeup_task) != cpu)
69 goto unlock;
70
71 trace_function(tr, data, ip, parent_ip, flags);
72
73 unlock:
74 spin_unlock_irqrestore(&wakeup_lock, flags);
75
76 out:
77 atomic_dec(&data->disabled);
78
79 /*
80 * To prevent recursion from the scheduler, if the
81 * resched flag was set before we entered, then
82 * don't reschedule.
83 */
84 if (resched)
85 preempt_enable_no_resched_notrace();
86 else
87 preempt_enable_notrace();
88}
89
90static struct ftrace_ops trace_ops __read_mostly =
91{
92 .func = wakeup_tracer_call,
93};
94#endif /* CONFIG_FTRACE */
95
96/*
97 * Should this new latency be reported/recorded?
98 */
99static int report_latency(cycle_t delta)
100{
101 if (tracing_thresh) {
102 if (delta < tracing_thresh)
103 return 0;
104 } else {
105 if (delta <= tracing_max_latency)
106 return 0;
107 }
108 return 1;
109}
110
111static void notrace
112wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
113 struct task_struct *next)
114{
115 unsigned long latency = 0, t0 = 0, t1 = 0;
116 struct trace_array **ptr = private;
117 struct trace_array *tr = *ptr;
118 struct trace_array_cpu *data;
119 cycle_t T0, T1, delta;
120 unsigned long flags;
121 long disabled;
122 int cpu;
123
124 if (unlikely(!tracer_enabled))
125 return;
126
127 /*
128 * When we start a new trace, we set wakeup_task to NULL
129 * and then set tracer_enabled = 1. We want to make sure
130 * that another CPU does not see the tracer_enabled = 1
131 * and the wakeup_task with an older task, that might
132 * actually be the same as next.
133 */
134 smp_rmb();
135
136 if (next != wakeup_task)
137 return;
138
139 /* The task we are waiting for is waking up */
140 data = tr->data[wakeup_cpu];
141
142 /* disable local data, not wakeup_cpu data */
143 cpu = raw_smp_processor_id();
144 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
145 if (likely(disabled != 1))
146 goto out;
147
148 spin_lock_irqsave(&wakeup_lock, flags);
149
150 /* We could race with grabbing wakeup_lock */
151 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock;
153
154 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
155
156 /*
157 * usecs conversion is slow so we try to delay the conversion
158 * as long as possible:
159 */
160 T0 = data->preempt_timestamp;
161 T1 = ftrace_now(cpu);
162 delta = T1-T0;
163
164 if (!report_latency(delta))
165 goto out_unlock;
166
167 latency = nsecs_to_usecs(delta);
168
169 tracing_max_latency = delta;
170 t0 = nsecs_to_usecs(T0);
171 t1 = nsecs_to_usecs(T1);
172
173 update_max_tr(tr, wakeup_task, wakeup_cpu);
174
175out_unlock:
176 __wakeup_reset(tr);
177 spin_unlock_irqrestore(&wakeup_lock, flags);
178out:
179 atomic_dec(&tr->data[cpu]->disabled);
180}
181
182static notrace void
183sched_switch_callback(void *probe_data, void *call_data,
184 const char *format, va_list *args)
185{
186 struct task_struct *prev;
187 struct task_struct *next;
188 struct rq *__rq;
189
190 /* skip prev_pid %d next_pid %d prev_state %ld */
191 (void)va_arg(*args, int);
192 (void)va_arg(*args, int);
193 (void)va_arg(*args, long);
194 __rq = va_arg(*args, typeof(__rq));
195 prev = va_arg(*args, typeof(prev));
196 next = va_arg(*args, typeof(next));
197
198 tracing_record_cmdline(prev);
199
200 /*
201 * If tracer_switch_func only points to the local
202 * switch func, it still needs the ptr passed to it.
203 */
204 wakeup_sched_switch(probe_data, __rq, prev, next);
205}
206
207static void __wakeup_reset(struct trace_array *tr)
208{
209 struct trace_array_cpu *data;
210 int cpu;
211
212 assert_spin_locked(&wakeup_lock);
213
214 for_each_possible_cpu(cpu) {
215 data = tr->data[cpu];
216 tracing_reset(data);
217 }
218
219 wakeup_cpu = -1;
220 wakeup_prio = -1;
221
222 if (wakeup_task)
223 put_task_struct(wakeup_task);
224
225 wakeup_task = NULL;
226}
227
228static void wakeup_reset(struct trace_array *tr)
229{
230 unsigned long flags;
231
232 spin_lock_irqsave(&wakeup_lock, flags);
233 __wakeup_reset(tr);
234 spin_unlock_irqrestore(&wakeup_lock, flags);
235}
236
237static void
238wakeup_check_start(struct trace_array *tr, struct task_struct *p,
239 struct task_struct *curr)
240{
241 int cpu = smp_processor_id();
242 unsigned long flags;
243 long disabled;
244
245 if (likely(!rt_task(p)) ||
246 p->prio >= wakeup_prio ||
247 p->prio >= curr->prio)
248 return;
249
250 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
251 if (unlikely(disabled != 1))
252 goto out;
253
254 /* interrupts should be off from try_to_wake_up */
255 spin_lock(&wakeup_lock);
256
257 /* check for races. */
258 if (!tracer_enabled || p->prio >= wakeup_prio)
259 goto out_locked;
260
261 /* reset the trace */
262 __wakeup_reset(tr);
263
264 wakeup_cpu = task_cpu(p);
265 wakeup_prio = p->prio;
266
267 wakeup_task = p;
268 get_task_struct(wakeup_task);
269
270 local_save_flags(flags);
271
272 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
273 trace_function(tr, tr->data[wakeup_cpu],
274 CALLER_ADDR1, CALLER_ADDR2, flags);
275
276out_locked:
277 spin_unlock(&wakeup_lock);
278out:
279 atomic_dec(&tr->data[cpu]->disabled);
280}
281
282static notrace void
283wake_up_callback(void *probe_data, void *call_data,
284 const char *format, va_list *args)
285{
286 struct trace_array **ptr = probe_data;
287 struct trace_array *tr = *ptr;
288 struct task_struct *curr;
289 struct task_struct *task;
290 struct rq *__rq;
291
292 if (likely(!tracer_enabled))
293 return;
294
295 /* Skip pid %d state %ld */
296 (void)va_arg(*args, int);
297 (void)va_arg(*args, long);
298 /* now get the meat: "rq %p task %p rq->curr %p" */
299 __rq = va_arg(*args, typeof(__rq));
300 task = va_arg(*args, typeof(task));
301 curr = va_arg(*args, typeof(curr));
302
303 tracing_record_cmdline(task);
304 tracing_record_cmdline(curr);
305
306 wakeup_check_start(tr, task, curr);
307}
308
309static void start_wakeup_tracer(struct trace_array *tr)
310{
311 int ret;
312
313 ret = marker_probe_register("kernel_sched_wakeup",
314 "pid %d state %ld ## rq %p task %p rq->curr %p",
315 wake_up_callback,
316 &wakeup_trace);
317 if (ret) {
318 pr_info("wakeup trace: Couldn't add marker"
319 " probe to kernel_sched_wakeup\n");
320 return;
321 }
322
323 ret = marker_probe_register("kernel_sched_wakeup_new",
324 "pid %d state %ld ## rq %p task %p rq->curr %p",
325 wake_up_callback,
326 &wakeup_trace);
327 if (ret) {
328 pr_info("wakeup trace: Couldn't add marker"
329 " probe to kernel_sched_wakeup_new\n");
330 goto fail_deprobe;
331 }
332
333 ret = marker_probe_register("kernel_sched_schedule",
334 "prev_pid %d next_pid %d prev_state %ld "
335 "## rq %p prev %p next %p",
336 sched_switch_callback,
337 &wakeup_trace);
338 if (ret) {
339 pr_info("sched trace: Couldn't add marker"
340 " probe to kernel_sched_schedule\n");
341 goto fail_deprobe_wake_new;
342 }
343
344 wakeup_reset(tr);
345
346 /*
347 * Don't let the tracer_enabled = 1 show up before
348 * the wakeup_task is reset. This may be overkill since
349 * wakeup_reset does a spin_unlock after setting the
350 * wakeup_task to NULL, but I want to be safe.
351 * This is a slow path anyway.
352 */
353 smp_wmb();
354
355 tracer_enabled = 1;
356 register_ftrace_function(&trace_ops);
357
358 return;
359fail_deprobe_wake_new:
360 marker_probe_unregister("kernel_sched_wakeup_new",
361 wake_up_callback,
362 &wakeup_trace);
363fail_deprobe:
364 marker_probe_unregister("kernel_sched_wakeup",
365 wake_up_callback,
366 &wakeup_trace);
367}
368
369static void stop_wakeup_tracer(struct trace_array *tr)
370{
371 tracer_enabled = 0;
372 unregister_ftrace_function(&trace_ops);
373 marker_probe_unregister("kernel_sched_schedule",
374 sched_switch_callback,
375 &wakeup_trace);
376 marker_probe_unregister("kernel_sched_wakeup_new",
377 wake_up_callback,
378 &wakeup_trace);
379 marker_probe_unregister("kernel_sched_wakeup",
380 wake_up_callback,
381 &wakeup_trace);
382}
383
384static void wakeup_tracer_init(struct trace_array *tr)
385{
386 wakeup_trace = tr;
387
388 if (tr->ctrl)
389 start_wakeup_tracer(tr);
390}
391
392static void wakeup_tracer_reset(struct trace_array *tr)
393{
394 if (tr->ctrl) {
395 stop_wakeup_tracer(tr);
396 /* make sure we put back any tasks we are tracing */
397 wakeup_reset(tr);
398 }
399}
400
401static void wakeup_tracer_ctrl_update(struct trace_array *tr)
402{
403 if (tr->ctrl)
404 start_wakeup_tracer(tr);
405 else
406 stop_wakeup_tracer(tr);
407}
408
409static void wakeup_tracer_open(struct trace_iterator *iter)
410{
411 /* stop the trace while dumping */
412 if (iter->tr->ctrl)
413 stop_wakeup_tracer(iter->tr);
414}
415
416static void wakeup_tracer_close(struct trace_iterator *iter)
417{
418 /* forget about any processes we were recording */
419 if (iter->tr->ctrl)
420 start_wakeup_tracer(iter->tr);
421}
422
423static struct tracer wakeup_tracer __read_mostly =
424{
425 .name = "wakeup",
426 .init = wakeup_tracer_init,
427 .reset = wakeup_tracer_reset,
428 .open = wakeup_tracer_open,
429 .close = wakeup_tracer_close,
430 .ctrl_update = wakeup_tracer_ctrl_update,
431 .print_max = 1,
432#ifdef CONFIG_FTRACE_SELFTEST
433 .selftest = trace_selftest_startup_wakeup,
434#endif
435};
436
437__init static int init_wakeup_tracer(void)
438{
439 int ret;
440
441 ret = register_tracer(&wakeup_tracer);
442 if (ret)
443 return ret;
444
445 return 0;
446}
447device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 000000000000..18c5423bc977
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,540 @@
1/* Include in trace.c */
2
3#include <linux/kthread.h>
4#include <linux/delay.h>
5
6static inline int trace_valid_entry(struct trace_entry *entry)
7{
8 switch (entry->type) {
9 case TRACE_FN:
10 case TRACE_CTX:
11 case TRACE_WAKE:
12 case TRACE_STACK:
13 case TRACE_SPECIAL:
14 return 1;
15 }
16 return 0;
17}
18
19static int
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{
22 struct trace_entry *entries;
23 struct page *page;
24 int idx = 0;
25 int i;
26
27 BUG_ON(list_empty(&data->trace_pages));
28 page = list_entry(data->trace_pages.next, struct page, lru);
29 entries = page_address(page);
30
31 check_pages(data);
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type);
46 goto failed;
47 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0;
72
73 failed:
74 /* disable tracing */
75 tracing_disabled = 1;
76 printk(KERN_CONT ".. corrupted trace buffer .. ");
77 return -1;
78}
79
80/*
81 * Test the trace buffer to see if all the elements
82 * are still sane.
83 */
84static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85{
86 unsigned long flags, cnt = 0;
87 int cpu, ret = 0;
88
89 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95
96 cnt += tr->data[cpu]->trace_idx;
97
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
99 if (ret)
100 break;
101 }
102 __raw_spin_unlock(&ftrace_max_lock);
103 raw_local_irq_restore(flags);
104
105 if (count)
106 *count = cnt;
107
108 return ret;
109}
110
111#ifdef CONFIG_FTRACE
112
113#ifdef CONFIG_DYNAMIC_FTRACE
114
115#define __STR(x) #x
116#define STR(x) __STR(x)
117
118/* Test dynamic code modification and ftrace filters */
119int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr,
121 int (*func)(void))
122{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled;
127 char *func_name;
128
129 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n");
131 pr_info("Testing dynamic ftrace: ");
132
133 /* enable tracing, and record the filter function */
134 ftrace_enabled = 1;
135 tracer_enabled = 1;
136
137 /* passed in by parameter to fool gcc from optimizing */
138 func();
139
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to
150 * accomodate them.
151 */
152 func_name = "*" STR(DYN_FTRACE_TEST_NAME);
153
154 /* filter only on our function */
155 ftrace_set_filter(func_name, strlen(func_name), 1);
156
157 /* enable tracing */
158 tr->ctrl = 1;
159 trace->init(tr);
160 /* Sleep for a 1/10 of a second */
161 msleep(100);
162
163 /* we should have nothing in the buffer */
164 ret = trace_test_buffer(tr, &count);
165 if (ret)
166 goto out;
167
168 if (count) {
169 ret = -1;
170 printk(KERN_CONT ".. filter did not filter .. ");
171 goto out;
172 }
173
174 /* call our function again */
175 func();
176
177 /* sleep again */
178 msleep(100);
179
180 /* stop the tracing. */
181 tr->ctrl = 0;
182 trace->ctrl_update(tr);
183 ftrace_enabled = 0;
184
185 /* check the trace buffer */
186 ret = trace_test_buffer(tr, &count);
187 trace->reset(tr);
188
189 /* we should only have one item */
190 if (!ret && count != 1) {
191 printk(KERN_CONT ".. filter failed count=%ld ..", count);
192 ret = -1;
193 goto out;
194 }
195 out:
196 ftrace_enabled = save_ftrace_enabled;
197 tracer_enabled = save_tracer_enabled;
198
199 /* Enable tracing on all functions again */
200 ftrace_set_filter(NULL, 0, 1);
201
202 return ret;
203}
204#else
205# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
206#endif /* CONFIG_DYNAMIC_FTRACE */
207/*
208 * Simple verification test of ftrace function tracer.
209 * Enable ftrace, sleep 1/10 second, and then read the trace
210 * buffer to see if all is in order.
211 */
212int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled;
219
220 /* make sure msleep has been recorded */
221 msleep(1);
222
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */
231 ftrace_enabled = 1;
232 tracer_enabled = 1;
233
234 tr->ctrl = 1;
235 trace->init(tr);
236 /* Sleep for a 1/10 of a second */
237 msleep(100);
238 /* stop the tracing. */
239 tr->ctrl = 0;
240 trace->ctrl_update(tr);
241 ftrace_enabled = 0;
242
243 /* check the trace buffer */
244 ret = trace_test_buffer(tr, &count);
245 trace->reset(tr);
246
247 if (!ret && !count) {
248 printk(KERN_CONT ".. no entries found ..");
249 ret = -1;
250 goto out;
251 }
252
253 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
254 DYN_FTRACE_TEST_NAME);
255
256 out:
257 ftrace_enabled = save_ftrace_enabled;
258 tracer_enabled = save_tracer_enabled;
259
260 /* kill ftrace totally if we failed */
261 if (ret)
262 ftrace_kill();
263
264 return ret;
265}
266#endif /* CONFIG_FTRACE */
267
268#ifdef CONFIG_IRQSOFF_TRACER
269int
270trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
271{
272 unsigned long save_max = tracing_max_latency;
273 unsigned long count;
274 int ret;
275
276 /* start the tracing */
277 tr->ctrl = 1;
278 trace->init(tr);
279 /* reset the max latency */
280 tracing_max_latency = 0;
281 /* disable interrupts for a bit */
282 local_irq_disable();
283 udelay(100);
284 local_irq_enable();
285 /* stop the tracing. */
286 tr->ctrl = 0;
287 trace->ctrl_update(tr);
288 /* check both trace buffers */
289 ret = trace_test_buffer(tr, NULL);
290 if (!ret)
291 ret = trace_test_buffer(&max_tr, &count);
292 trace->reset(tr);
293
294 if (!ret && !count) {
295 printk(KERN_CONT ".. no entries found ..");
296 ret = -1;
297 }
298
299 tracing_max_latency = save_max;
300
301 return ret;
302}
303#endif /* CONFIG_IRQSOFF_TRACER */
304
305#ifdef CONFIG_PREEMPT_TRACER
306int
307trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
308{
309 unsigned long save_max = tracing_max_latency;
310 unsigned long count;
311 int ret;
312
313 /* start the tracing */
314 tr->ctrl = 1;
315 trace->init(tr);
316 /* reset the max latency */
317 tracing_max_latency = 0;
318 /* disable preemption for a bit */
319 preempt_disable();
320 udelay(100);
321 preempt_enable();
322 /* stop the tracing. */
323 tr->ctrl = 0;
324 trace->ctrl_update(tr);
325 /* check both trace buffers */
326 ret = trace_test_buffer(tr, NULL);
327 if (!ret)
328 ret = trace_test_buffer(&max_tr, &count);
329 trace->reset(tr);
330
331 if (!ret && !count) {
332 printk(KERN_CONT ".. no entries found ..");
333 ret = -1;
334 }
335
336 tracing_max_latency = save_max;
337
338 return ret;
339}
340#endif /* CONFIG_PREEMPT_TRACER */
341
342#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
343int
344trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
345{
346 unsigned long save_max = tracing_max_latency;
347 unsigned long count;
348 int ret;
349
350 /* start the tracing */
351 tr->ctrl = 1;
352 trace->init(tr);
353
354 /* reset the max latency */
355 tracing_max_latency = 0;
356
357 /* disable preemption and interrupts for a bit */
358 preempt_disable();
359 local_irq_disable();
360 udelay(100);
361 preempt_enable();
362 /* reverse the order of preempt vs irqs */
363 local_irq_enable();
364
365 /* stop the tracing. */
366 tr->ctrl = 0;
367 trace->ctrl_update(tr);
368 /* check both trace buffers */
369 ret = trace_test_buffer(tr, NULL);
370 if (ret)
371 goto out;
372
373 ret = trace_test_buffer(&max_tr, &count);
374 if (ret)
375 goto out;
376
377 if (!ret && !count) {
378 printk(KERN_CONT ".. no entries found ..");
379 ret = -1;
380 goto out;
381 }
382
383 /* do the test by disabling interrupts first this time */
384 tracing_max_latency = 0;
385 tr->ctrl = 1;
386 trace->ctrl_update(tr);
387 preempt_disable();
388 local_irq_disable();
389 udelay(100);
390 preempt_enable();
391 /* reverse the order of preempt vs irqs */
392 local_irq_enable();
393
394 /* stop the tracing. */
395 tr->ctrl = 0;
396 trace->ctrl_update(tr);
397 /* check both trace buffers */
398 ret = trace_test_buffer(tr, NULL);
399 if (ret)
400 goto out;
401
402 ret = trace_test_buffer(&max_tr, &count);
403
404 if (!ret && !count) {
405 printk(KERN_CONT ".. no entries found ..");
406 ret = -1;
407 goto out;
408 }
409
410 out:
411 trace->reset(tr);
412 tracing_max_latency = save_max;
413
414 return ret;
415}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417
418#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data)
420{
421 /* Make this a RT thread, doesn't need to be too high */
422 struct sched_param param = { .sched_priority = 5 };
423 struct completion *x = data;
424
425 sched_setscheduler(current, SCHED_FIFO, &param);
426
427 /* Make it know we have a new prio */
428 complete(x);
429
430 /* now go to sleep and let the test wake us up */
431 set_current_state(TASK_INTERRUPTIBLE);
432 schedule();
433
434 /* we are awake, now wait to disappear */
435 while (!kthread_should_stop()) {
436 /*
437 * This is an RT task, do short sleeps to let
438 * others run.
439 */
440 msleep(100);
441 }
442
443 return 0;
444}
445
446int
447trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448{
449 unsigned long save_max = tracing_max_latency;
450 struct task_struct *p;
451 struct completion isrt;
452 unsigned long count;
453 int ret;
454
455 init_completion(&isrt);
456
457 /* create a high prio thread */
458 p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
459 if (IS_ERR(p)) {
460 printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
461 return -1;
462 }
463
464 /* make sure the thread is running at an RT prio */
465 wait_for_completion(&isrt);
466
467 /* start the tracing */
468 tr->ctrl = 1;
469 trace->init(tr);
470 /* reset the max latency */
471 tracing_max_latency = 0;
472
473 /* sleep to let the RT thread sleep too */
474 msleep(100);
475
476 /*
477 * Yes this is slightly racy. It is possible that for some
478 * strange reason that the RT thread we created, did not
479 * call schedule for 100ms after doing the completion,
480 * and we do a wakeup on a task that already is awake.
481 * But that is extremely unlikely, and the worst thing that
482 * happens in such a case, is that we disable tracing.
483 * Honestly, if this race does happen something is horrible
484 * wrong with the system.
485 */
486
487 wake_up_process(p);
488
489 /* stop the tracing. */
490 tr->ctrl = 0;
491 trace->ctrl_update(tr);
492 /* check both trace buffers */
493 ret = trace_test_buffer(tr, NULL);
494 if (!ret)
495 ret = trace_test_buffer(&max_tr, &count);
496
497
498 trace->reset(tr);
499
500 tracing_max_latency = save_max;
501
502 /* kill the thread */
503 kthread_stop(p);
504
505 if (!ret && !count) {
506 printk(KERN_CONT ".. no entries found ..");
507 ret = -1;
508 }
509
510 return ret;
511}
512#endif /* CONFIG_SCHED_TRACER */
513
514#ifdef CONFIG_CONTEXT_SWITCH_TRACER
515int
516trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
517{
518 unsigned long count;
519 int ret;
520
521 /* start the tracing */
522 tr->ctrl = 1;
523 trace->init(tr);
524 /* Sleep for a 1/10 of a second */
525 msleep(100);
526 /* stop the tracing. */
527 tr->ctrl = 0;
528 trace->ctrl_update(tr);
529 /* check the trace buffer */
530 ret = trace_test_buffer(tr, &count);
531 trace->reset(tr);
532
533 if (!ret && !count) {
534 printk(KERN_CONT ".. no entries found ..");
535 ret = -1;
536 }
537
538 return ret;
539}
540#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 000000000000..54dd77cce5bf
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
1#include "trace.h"
2
3int DYN_FTRACE_TEST_NAME(void)
4{
5 /* used to call mcount */
6 return 0;
7}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..d8b6279a9b42 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -634,6 +634,8 @@ config LATENCYTOP
634 Enable this option if you want to use the LatencyTOP tool 634 Enable this option if you want to use the LatencyTOP tool
635 to find out which userspace is blocking on what kernel operations. 635 to find out which userspace is blocking on what kernel operations.
636 636
637source kernel/trace/Kconfig
638
637config PROVIDE_OHCI1394_DMA_INIT 639config PROVIDE_OHCI1394_DMA_INIT
638 bool "Remote debugging over FireWire early on boot" 640 bool "Remote debugging over FireWire early on boot"
639 depends on PCI && X86 641 depends on PCI && X86
diff --git a/lib/Makefile b/lib/Makefile
index 74b0cfb1fcc3..4b836a53c08f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,6 +8,15 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
9 proportions.o prio_heap.o ratelimit.o 9 proportions.o prio_heap.o ratelimit.o
10 10
11ifdef CONFIG_FTRACE
12# Do not profile string.o, since it may be used in early boot or vdso
13CFLAGS_REMOVE_string.o = -pg
14# Also do not profile any debug utilities
15CFLAGS_REMOVE_spinlock_debug.o = -pg
16CFLAGS_REMOVE_list_debug.o = -pg
17CFLAGS_REMOVE_debugobjects.o = -pg
18endif
19
11lib-$(CONFIG_MMU) += ioremap.o 20lib-$(CONFIG_MMU) += ioremap.o
12lib-$(CONFIG_SMP) += cpumask.o 21lib-$(CONFIG_SMP) += cpumask.o
13 22
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 6c90fb90e19c..3b4dc098181e 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,7 @@
7#include <linux/kallsyms.h> 7#include <linux/kallsyms.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9 9
10unsigned int debug_smp_processor_id(void) 10notrace unsigned int debug_smp_processor_id(void)
11{ 11{
12 unsigned long preempt_count = preempt_count(); 12 unsigned long preempt_count = preempt_count();
13 int this_cpu = raw_smp_processor_id(); 13 int this_cpu = raw_smp_processor_id();
@@ -37,7 +37,7 @@ unsigned int debug_smp_processor_id(void)
37 /* 37 /*
38 * Avoid recursion: 38 * Avoid recursion:
39 */ 39 */
40 preempt_disable(); 40 preempt_disable_notrace();
41 41
42 if (!printk_ratelimit()) 42 if (!printk_ratelimit())
43 goto out_enable; 43 goto out_enable;
@@ -49,7 +49,7 @@ unsigned int debug_smp_processor_id(void)
49 dump_stack(); 49 dump_stack();
50 50
51out_enable: 51out_enable:
52 preempt_enable_no_resched(); 52 preempt_enable_no_resched_notrace();
53out: 53out:
54 return this_cpu; 54 return this_cpu;
55} 55}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 789b6adbef37..b38f700825fc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages);
126static struct prop_descriptor vm_completions; 126static struct prop_descriptor vm_completions;
127static struct prop_descriptor vm_dirties; 127static struct prop_descriptor vm_dirties;
128 128
129static unsigned long determine_dirtyable_memory(void);
130
131/* 129/*
132 * couple the period to the dirty_ratio: 130 * couple the period to the dirty_ratio:
133 * 131 *
@@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
347#endif 345#endif
348} 346}
349 347
350static unsigned long determine_dirtyable_memory(void) 348/**
349 * determine_dirtyable_memory - amount of memory that may be used
350 *
351 * Returns the numebr of pages that can currently be freed and used
352 * by the kernel for direct mappings.
353 */
354unsigned long determine_dirtyable_memory(void)
351{ 355{
352 unsigned long x; 356 unsigned long x;
353 357
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 8e440233c27d..ea48b82a3707 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -96,7 +96,8 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
96modname_flags = $(if $(filter 1,$(words $(modname))),\ 96modname_flags = $(if $(filter 1,$(words $(modname))),\
97 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") 97 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
98 98
99_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o) 99orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
100_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
100_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o) 101_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o)
101_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F)) 102_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F))
102 103