aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 17:49:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 17:49:54 -0400
commite18425a0abc8eafa8e98ecffac517bb0c0904f4b (patch)
tree5eb743e7201850de19496183554bb34c766948c4
parentd1794f2c5b5817eb79ccc5e00701ca748d1b073a (diff)
parent5806b81ac1c0c52665b91723fd4146a4f86e386b (diff)
Merge branch 'tracing/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (228 commits) ftrace: build fix for ftraced_suspend ftrace: separate out the function enabled variable ftrace: add ftrace_kill_atomic ftrace: use current CPU for function startup ftrace: start wakeup tracing after setting function tracer ftrace: check proper config for preempt type ftrace: trace schedule ftrace: define function trace nop ftrace: move sched_switch enable after markers ftrace: prevent ftrace modifications while being kprobe'd, v2 fix "ftrace: store mcount address in rec->ip" mmiotrace broken in linux-next (8-bit writes only) ftrace: avoid modifying kprobe'd records ftrace: freeze kprobe'd records kprobes: enable clean usage of get_kprobe ftrace: store mcount address in rec->ip ftrace: build fix with gcc 4.3 namespacecheck: fixes ftrace: fix "notrace" filtering priority ftrace: fix printout ...
-rw-r--r--Documentation/tracers/mmiotrace.txt164
-rw-r--r--Makefile4
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/compressed/Makefile6
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/armksyms.c5
-rw-r--r--arch/arm/kernel/entry-common.S51
-rw-r--r--arch/arm/kernel/ftrace.c116
-rw-r--r--arch/arm/kernel/kprobes.c2
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/entry_32.S127
-rw-r--r--arch/powerpc/kernel/entry_64.S65
-rw-r--r--arch/powerpc/kernel/ftrace.c154
-rw-r--r--arch/powerpc/kernel/io.c3
-rw-r--r--arch/powerpc/kernel/irq.c6
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/setup_32.c6
-rw-r--r--arch/powerpc/platforms/powermac/Makefile5
-rw-r--r--arch/sparc64/Kconfig2
-rw-r--r--arch/sparc64/Kconfig.debug2
-rw-r--r--arch/sparc64/kernel/Makefile1
-rw-r--r--arch/sparc64/kernel/ftrace.c94
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c4
-rw-r--r--arch/sparc64/lib/mcount.S58
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug28
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/entry_32.S72
-rw-r--r--arch/x86/kernel/entry_64.S106
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mm/Makefile5
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/ioremap.c11
-rw-r--r--arch/x86/mm/kmmio.c510
-rw-r--r--arch/x86/mm/mmio-mod.c515
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/pf_in.c489
-rw-r--r--arch/x86/mm/pf_in.h39
-rw-r--r--arch/x86/mm/testmmiotrace.c71
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--include/asm-arm/ftrace.h14
-rw-r--r--include/asm-arm/kprobes.h1
-rw-r--r--include/asm-powerpc/ftrace.h14
-rw-r--r--include/asm-powerpc/hw_irq.h10
-rw-r--r--include/asm-sparc64/ftrace.h14
-rw-r--r--include/asm-x86/alternative.h2
-rw-r--r--include/asm-x86/ftrace.h14
-rw-r--r--include/asm-x86/irqflags.h24
-rw-r--r--include/asm-x86/vsyscall.h3
-rw-r--r--include/linux/ftrace.h144
-rw-r--r--include/linux/irqflags.h13
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/linux/linkage.h2
-rw-r--r--include/linux/marker.h40
-rw-r--r--include/linux/mmiotrace.h85
-rw-r--r--include/linux/preempt.h34
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/writeback.h2
-rw-r--r--kernel/Makefile14
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/lockdep.c33
-rw-r--r--kernel/marker.c30
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched.c55
-rw-r--r--kernel/semaphore.c1
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/trace/Kconfig135
-rw-r--r--kernel/trace/Makefile24
-rw-r--r--kernel/trace/ftrace.c1727
-rw-r--r--kernel/trace/trace.c3161
-rw-r--r--kernel/trace/trace.h339
-rw-r--r--kernel/trace/trace_functions.c81
-rw-r--r--kernel/trace/trace_irqsoff.c486
-rw-r--r--kernel/trace/trace_mmiotrace.c295
-rw-r--r--kernel/trace/trace_sched_switch.c286
-rw-r--r--kernel/trace/trace_sched_wakeup.c448
-rw-r--r--kernel/trace/trace_selftest.c563
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
-rw-r--r--kernel/trace/trace_sysprof.c363
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile9
-rw-r--r--lib/smp_processor_id.c6
-rw-r--r--mm/page-writeback.c10
-rw-r--r--scripts/Makefile.lib3
100 files changed, 11490 insertions, 115 deletions
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
new file mode 100644
index 000000000000..a4afb560a45b
--- /dev/null
+++ b/Documentation/tracers/mmiotrace.txt
@@ -0,0 +1,164 @@
1 In-kernel memory-mapped I/O tracing
2
3
4Home page and links to optional user space tools:
5
6 http://nouveau.freedesktop.org/wiki/MmioTrace
7
8MMIO tracing was originally developed by Intel around 2003 for their Fault
9Injection Test Harness. In Dec 2006 - Jan 2007, using the code from Intel,
10Jeff Muizelaar created a tool for tracing MMIO accesses with the Nouveau
11project in mind. Since then many people have contributed.
12
13Mmiotrace was built for reverse engineering any memory-mapped IO device with
14the Nouveau project as the first real user. Only x86 and x86_64 architectures
15are supported.
16
17Out-of-tree mmiotrace was originally modified for mainline inclusion and
18ftrace framework by Pekka Paalanen <pq@iki.fi>.
19
20
21Preparation
22-----------
23
24Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is
25disabled by default, so it is safe to have this set to yes. SMP systems are
26supported, but tracing is unreliable and may miss events if more than one CPU
27is on-line, therefore mmiotrace takes all but one CPU off-line during run-time
28activation. You can re-enable CPUs by hand, but you have been warned, there
29is no way to automatically detect if you are losing events due to CPUs racing.
30
31
32Usage Quick Reference
33---------------------
34
35$ mount -t debugfs debugfs /debug
36$ echo mmiotrace > /debug/tracing/current_tracer
37$ cat /debug/tracing/trace_pipe > mydump.txt &
38Start X or whatever.
39$ echo "X is up" > /debug/tracing/marker
40$ echo none > /debug/tracing/current_tracer
41Check for lost events.
42
43
44Usage
45-----
46
47Make sure debugfs is mounted to /debug. If not, (requires root privileges)
48$ mount -t debugfs debugfs /debug
49
50Check that the driver you are about to trace is not loaded.
51
52Activate mmiotrace (requires root privileges):
53$ echo mmiotrace > /debug/tracing/current_tracer
54
55Start storing the trace:
56$ cat /debug/tracing/trace_pipe > mydump.txt &
57The 'cat' process should stay running (sleeping) in the background.
58
59Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
60accesses to areas that are ioremapped while mmiotrace is active.
61
62[Unimplemented feature:]
63During tracing you can place comments (markers) into the trace by
64$ echo "X is up" > /debug/tracing/marker
65This makes it easier to see which part of the (huge) trace corresponds to
66which action. It is recommended to place descriptive markers about what you
67do.
68
69Shut down mmiotrace (requires root privileges):
70$ echo none > /debug/tracing/current_tracer
71The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
72pressing ctrl+c.
73
74Check that mmiotrace did not lose events due to a buffer filling up. Either
75$ grep -i lost mydump.txt
76which tells you exactly how many events were lost, or use
77$ dmesg
78to view your kernel log and look for "mmiotrace has lost events" warning. If
79events were lost, the trace is incomplete. You should enlarge the buffers and
80try again. Buffers are enlarged by first seeing how large the current buffers
81are:
82$ cat /debug/tracing/trace_entries
83gives you a number. Approximately double this number and write it back, for
84instance:
85$ echo 128000 > /debug/tracing/trace_entries
86Then start again from the top.
87
88If you are doing a trace for a driver project, e.g. Nouveau, you should also
89do the following before sending your results:
90$ lspci -vvv > lspci.txt
91$ dmesg > dmesg.txt
92$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
93and then send the .tar.gz file. The trace compresses considerably. Replace
94"pciid" and "nick" with the PCI ID or model name of your piece of hardware
95under investigation and your nick name.
96
97
98How Mmiotrace Works
99-------------------
100
101Access to hardware IO-memory is gained by mapping addresses from PCI bus by
102calling one of the ioremap_*() functions. Mmiotrace is hooked into the
103__ioremap() function and gets called whenever a mapping is created. Mapping is
104an event that is recorded into the trace log. Note, that ISA range mappings
105are not caught, since the mapping always exists and is returned directly.
106
107MMIO accesses are recorded via page faults. Just before __ioremap() returns,
108the mapped pages are marked as not present. Any access to the pages causes a
109fault. The page fault handler calls mmiotrace to handle the fault. Mmiotrace
110marks the page present, sets TF flag to achieve single stepping and exits the
111fault handler. The instruction that faulted is executed and debug trap is
112entered. Here mmiotrace again marks the page as not present. The instruction
113is decoded to get the type of operation (read/write), data width and the value
114read or written. These are stored to the trace log.
115
116Setting the page present in the page fault handler has a race condition on SMP
117machines. During the single stepping other CPUs may run freely on that page
118and events can be missed without a notice. Re-enabling other CPUs during
119tracing is discouraged.
120
121
122Trace Log Format
123----------------
124
125The raw log is text and easily filtered with e.g. grep and awk. One record is
126one line in the log. A record starts with a keyword, followed by keyword
127dependant arguments. Arguments are separated by a space, or continue until the
128end of line. The format for version 20070824 is as follows:
129
130Explanation Keyword Space separated arguments
131---------------------------------------------------------------------------
132
133read event R width, timestamp, map id, physical, value, PC, PID
134write event W width, timestamp, map id, physical, value, PC, PID
135ioremap event MAP timestamp, map id, physical, virtual, length, PC, PID
136iounmap event UNMAP timestamp, map id, PC, PID
137marker MARK timestamp, text
138version VERSION the string "20070824"
139info for reader LSPCI one line from lspci -v
140PCI address map PCIDEV space separated /proc/bus/pci/devices data
141unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID
142
143Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
144is a kernel virtual address. Width is the data width in bytes and value is the
145data value. Map id is an arbitrary id number identifying the mapping that was
146used in an operation. PC is the program counter and PID is process id. PC is
147zero if it is not recorded. PID is always zero as tracing MMIO accesses
148originating in user space memory is not yet supported.
149
150For instance, the following awk filter will pass all 32-bit writes that target
151physical addresses in the range [0xfb73ce40, 0xfb800000[
152
153$ awk '/W 4 / { adr=strtonum($5); if (adr >= 0xfb73ce40 &&
154adr < 0xfb800000) print; }'
155
156
157Tools for Developers
158--------------------
159
160The user space tools include utilities for:
161- replacing numeric addresses and values with hardware register names
162- replaying MMIO logs, i.e., re-executing the recorded writes
163
164
diff --git a/Makefile b/Makefile
index e3c5eb66ec52..4ac1d2f71ac3 100644
--- a/Makefile
+++ b/Makefile
@@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g
528KBUILD_AFLAGS += -gdwarf-2 528KBUILD_AFLAGS += -gdwarf-2
529endif 529endif
530 530
531ifdef CONFIG_FTRACE
532KBUILD_CFLAGS += -pg
533endif
534
531# We trigger additional mismatches with less inlining 535# We trigger additional mismatches with less inlining
532ifdef CONFIG_DEBUG_SECTION_MISMATCH 536ifdef CONFIG_DEBUG_SECTION_MISMATCH
533KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once) 537KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b786e68914d4..3845e5c8a34f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -14,6 +14,8 @@ config ARM
14 select HAVE_OPROFILE 14 select HAVE_OPROFILE
15 select HAVE_KPROBES if (!XIP_KERNEL) 15 select HAVE_KPROBES if (!XIP_KERNEL)
16 select HAVE_KRETPROBES if (HAVE_KPROBES) 16 select HAVE_KRETPROBES if (HAVE_KPROBES)
17 select HAVE_FTRACE if (!XIP_KERNEL)
18 select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
17 help 19 help
18 The ARM series is a line of low-power-consumption RISC chip designs 20 The ARM series is a line of low-power-consumption RISC chip designs
19 licensed by ARM Ltd and targeted at embedded applications and 21 licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index de9d9ee50958..95baac4939e0 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -69,6 +69,12 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
69 69
70targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \ 70targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \
71 head.o misc.o $(OBJS) 71 head.o misc.o $(OBJS)
72
73ifeq ($(CONFIG_FTRACE),y)
74ORIG_CFLAGS := $(KBUILD_CFLAGS)
75KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
76endif
77
72EXTRA_CFLAGS := -fpic -fno-builtin 78EXTRA_CFLAGS := -fpic -fno-builtin
73EXTRA_AFLAGS := 79EXTRA_AFLAGS :=
74 80
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ad455ff5aebe..eb9092ca8008 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -4,6 +4,10 @@
4 4
5AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) 5AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
6 6
7ifdef CONFIG_DYNAMIC_FTRACE
8CFLAGS_REMOVE_ftrace.o = -pg
9endif
10
7# Object file lists. 11# Object file lists.
8 12
9obj-y := compat.o entry-armv.o entry-common.o irq.o \ 13obj-y := compat.o entry-armv.o entry-common.o irq.o \
@@ -18,6 +22,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o
18obj-$(CONFIG_ISA_DMA) += dma-isa.o 22obj-$(CONFIG_ISA_DMA) += dma-isa.o
19obj-$(CONFIG_PCI) += bios32.o isa.o 23obj-$(CONFIG_PCI) += bios32.o isa.o
20obj-$(CONFIG_SMP) += smp.o 24obj-$(CONFIG_SMP) += smp.o
25obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
21obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
22obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o 27obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
23obj-$(CONFIG_ATAGS_PROC) += atags.o 28obj-$(CONFIG_ATAGS_PROC) += atags.o
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 688b7b1ee416..cc7b246e9652 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -18,6 +18,7 @@
18#include <asm/io.h> 18#include <asm/io.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include <asm/ftrace.h>
21 22
22/* 23/*
23 * libgcc functions - functions that are used internally by the 24 * libgcc functions - functions that are used internally by the
@@ -181,3 +182,7 @@ EXPORT_SYMBOL(_find_next_bit_be);
181#endif 182#endif
182 183
183EXPORT_SYMBOL(copy_page); 184EXPORT_SYMBOL(copy_page);
185
186#ifdef CONFIG_FTRACE
187EXPORT_SYMBOL(mcount);
188#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 597ed00a08d8..84694e88b428 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <asm/unistd.h> 11#include <asm/unistd.h>
12#include <asm/ftrace.h>
12#include <asm/arch/entry-macro.S> 13#include <asm/arch/entry-macro.S>
13 14
14#include "entry-header.S" 15#include "entry-header.S"
@@ -99,6 +100,56 @@ ENTRY(ret_from_fork)
99#undef CALL 100#undef CALL
100#define CALL(x) .long x 101#define CALL(x) .long x
101 102
103#ifdef CONFIG_FTRACE
104#ifdef CONFIG_DYNAMIC_FTRACE
105ENTRY(mcount)
106 stmdb sp!, {r0-r3, lr}
107 mov r0, lr
108 sub r0, r0, #MCOUNT_INSN_SIZE
109
110 .globl mcount_call
111mcount_call:
112 bl ftrace_stub
113 ldmia sp!, {r0-r3, pc}
114
115ENTRY(ftrace_caller)
116 stmdb sp!, {r0-r3, lr}
117 ldr r1, [fp, #-4]
118 mov r0, lr
119 sub r0, r0, #MCOUNT_INSN_SIZE
120
121 .globl ftrace_call
122ftrace_call:
123 bl ftrace_stub
124 ldmia sp!, {r0-r3, pc}
125
126#else
127
128ENTRY(mcount)
129 stmdb sp!, {r0-r3, lr}
130 ldr r0, =ftrace_trace_function
131 ldr r2, [r0]
132 adr r0, ftrace_stub
133 cmp r0, r2
134 bne trace
135 ldmia sp!, {r0-r3, pc}
136
137trace:
138 ldr r1, [fp, #-4]
139 mov r0, lr
140 sub r0, r0, #MCOUNT_INSN_SIZE
141 mov lr, pc
142 mov pc, r2
143 ldmia sp!, {r0-r3, pc}
144
145#endif /* CONFIG_DYNAMIC_FTRACE */
146
147 .globl ftrace_stub
148ftrace_stub:
149 mov pc, lr
150
151#endif /* CONFIG_FTRACE */
152
102/*============================================================================= 153/*=============================================================================
103 * SWI handler 154 * SWI handler
104 *----------------------------------------------------------------------------- 155 *-----------------------------------------------------------------------------
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
new file mode 100644
index 000000000000..76d50e6091bc
--- /dev/null
+++ b/arch/arm/kernel/ftrace.c
@@ -0,0 +1,116 @@
1/*
2 * Dynamic function tracing support.
3 *
4 * Copyright (C) 2008 Abhishek Sagar <sagar.abhishek@gmail.com>
5 *
6 * For licencing details, see COPYING.
7 *
8 * Defines low-level handling of mcount calls when the kernel
9 * is compiled with the -pg flag. When using dynamic ftrace, the
10 * mcount call-sites get patched lazily with NOP till they are
11 * enabled. All code mutation routines here take effect atomically.
12 */
13
14#include <linux/ftrace.h>
15
16#include <asm/cacheflush.h>
17#include <asm/ftrace.h>
18
19#define PC_OFFSET 8
20#define BL_OPCODE 0xeb000000
21#define BL_OFFSET_MASK 0x00ffffff
22
23static unsigned long bl_insn;
24static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */
25
26unsigned char *ftrace_nop_replace(void)
27{
28 return (char *)&NOP;
29}
30
31/* construct a branch (BL) instruction to addr */
32unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
33{
34 long offset;
35
36 offset = (long)addr - (long)(pc + PC_OFFSET);
37 if (unlikely(offset < -33554432 || offset > 33554428)) {
38 /* Can't generate branches that far (from ARM ARM). Ftrace
39 * doesn't generate branches outside of kernel text.
40 */
41 WARN_ON_ONCE(1);
42 return NULL;
43 }
44 offset = (offset >> 2) & BL_OFFSET_MASK;
45 bl_insn = BL_OPCODE | offset;
46 return (unsigned char *)&bl_insn;
47}
48
49int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
50 unsigned char *new_code)
51{
52 unsigned long err = 0, replaced = 0, old, new;
53
54 old = *(unsigned long *)old_code;
55 new = *(unsigned long *)new_code;
56
57 __asm__ __volatile__ (
58 "1: ldr %1, [%2] \n"
59 " cmp %1, %4 \n"
60 "2: streq %3, [%2] \n"
61 " cmpne %1, %3 \n"
62 " movne %0, #2 \n"
63 "3:\n"
64
65 ".section .fixup, \"ax\"\n"
66 "4: mov %0, #1 \n"
67 " b 3b \n"
68 ".previous\n"
69
70 ".section __ex_table, \"a\"\n"
71 " .long 1b, 4b \n"
72 " .long 2b, 4b \n"
73 ".previous\n"
74
75 : "=r"(err), "=r"(replaced)
76 : "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced)
77 : "memory");
78
79 if (!err && (replaced == old))
80 flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
81
82 return err;
83}
84
85int ftrace_update_ftrace_func(ftrace_func_t func)
86{
87 int ret;
88 unsigned long pc, old;
89 unsigned char *new;
90
91 pc = (unsigned long)&ftrace_call;
92 memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
93 new = ftrace_call_replace(pc, (unsigned long)func);
94 ret = ftrace_modify_code(pc, (unsigned char *)&old, new);
95 return ret;
96}
97
98int ftrace_mcount_set(unsigned long *data)
99{
100 unsigned long pc, old;
101 unsigned long *addr = data;
102 unsigned char *new;
103
104 pc = (unsigned long)&mcount_call;
105 memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE);
106 new = ftrace_call_replace(pc, *addr);
107 *addr = ftrace_modify_code(pc, (unsigned char *)&old, new);
108 return 0;
109}
110
111/* run from kstop_machine */
112int __init ftrace_dyn_arch_init(void *data)
113{
114 ftrace_mcount_set(data);
115 return 0;
116}
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5593dd207216..5ee39e10c8d1 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
274 * for kretprobe handlers which should normally be interested in r0 only 274 * for kretprobe handlers which should normally be interested in r0 only
275 * anyway. 275 * anyway.
276 */ 276 */
277static void __attribute__((naked)) __kprobes kretprobe_trampoline(void) 277void __naked __kprobes kretprobe_trampoline(void)
278{ 278{
279 __asm__ __volatile__ ( 279 __asm__ __volatile__ (
280 "stmdb sp!, {r0 - r11} \n\t" 280 "stmdb sp!, {r0 - r11} \n\t"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e2659407..a5e9912e2d37 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -105,11 +105,13 @@ config ARCH_NO_VIRT_TO_BUS
105config PPC 105config PPC
106 bool 106 bool
107 default y 107 default y
108 select HAVE_DYNAMIC_FTRACE
109 select HAVE_FTRACE
108 select HAVE_IDE 110 select HAVE_IDE
109 select HAVE_OPROFILE
110 select HAVE_KPROBES 111 select HAVE_KPROBES
111 select HAVE_KRETPROBES 112 select HAVE_KRETPROBES
112 select HAVE_LMB 113 select HAVE_LMB
114 select HAVE_OPROFILE
113 115
114config EARLY_PRINTK 116config EARLY_PRINTK
115 bool 117 bool
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2346d271fbfd..f3f5e2641432 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC
12CFLAGS_btext.o += -fPIC 12CFLAGS_btext.o += -fPIC
13endif 13endif
14 14
15ifdef CONFIG_FTRACE
16# Do not trace early boot code
17CFLAGS_REMOVE_cputable.o = -pg
18CFLAGS_REMOVE_prom_init.o = -pg
19
20ifdef CONFIG_DYNAMIC_FTRACE
21# dynamic ftrace setup.
22CFLAGS_REMOVE_ftrace.o = -pg
23endif
24
25endif
26
15obj-y := cputable.o ptrace.o syscalls.o \ 27obj-y := cputable.o ptrace.o syscalls.o \
16 irq.o align.o signal_32.o pmc.o vdso.o \ 28 irq.o align.o signal_32.o pmc.o vdso.o \
17 init_task.o process.o systbl.o idle.o \ 29 init_task.o process.o systbl.o idle.o \
@@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
78obj-$(CONFIG_AUDIT) += audit.o 90obj-$(CONFIG_AUDIT) += audit.o
79obj64-$(CONFIG_AUDIT) += compat_audit.o 91obj64-$(CONFIG_AUDIT) += compat_audit.o
80 92
93obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
94
81obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 95obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
82 96
83ifneq ($(CONFIG_PPC_INDIRECT_IO),y) 97ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c8614d9875c..7231a708af0d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -30,6 +30,7 @@
30#include <asm/ppc_asm.h> 30#include <asm/ppc_asm.h>
31#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
32#include <asm/unistd.h> 32#include <asm/unistd.h>
33#include <asm/ftrace.h>
33 34
34#undef SHOW_SYSCALLS 35#undef SHOW_SYSCALLS
35#undef SHOW_SYSCALLS_TASK 36#undef SHOW_SYSCALLS_TASK
@@ -1035,3 +1036,129 @@ machine_check_in_rtas:
1035 /* XXX load up BATs and panic */ 1036 /* XXX load up BATs and panic */
1036 1037
1037#endif /* CONFIG_PPC_RTAS */ 1038#endif /* CONFIG_PPC_RTAS */
1039
1040#ifdef CONFIG_FTRACE
1041#ifdef CONFIG_DYNAMIC_FTRACE
1042_GLOBAL(mcount)
1043_GLOBAL(_mcount)
1044 stwu r1,-48(r1)
1045 stw r3, 12(r1)
1046 stw r4, 16(r1)
1047 stw r5, 20(r1)
1048 stw r6, 24(r1)
1049 mflr r3
1050 stw r7, 28(r1)
1051 mfcr r5
1052 stw r8, 32(r1)
1053 stw r9, 36(r1)
1054 stw r10,40(r1)
1055 stw r3, 44(r1)
1056 stw r5, 8(r1)
1057 subi r3, r3, MCOUNT_INSN_SIZE
1058 .globl mcount_call
1059mcount_call:
1060 bl ftrace_stub
1061 nop
1062 lwz r6, 8(r1)
1063 lwz r0, 44(r1)
1064 lwz r3, 12(r1)
1065 mtctr r0
1066 lwz r4, 16(r1)
1067 mtcr r6
1068 lwz r5, 20(r1)
1069 lwz r6, 24(r1)
1070 lwz r0, 52(r1)
1071 lwz r7, 28(r1)
1072 lwz r8, 32(r1)
1073 mtlr r0
1074 lwz r9, 36(r1)
1075 lwz r10,40(r1)
1076 addi r1, r1, 48
1077 bctr
1078
1079_GLOBAL(ftrace_caller)
1080 /* Based off of objdump optput from glibc */
1081 stwu r1,-48(r1)
1082 stw r3, 12(r1)
1083 stw r4, 16(r1)
1084 stw r5, 20(r1)
1085 stw r6, 24(r1)
1086 mflr r3
1087 lwz r4, 52(r1)
1088 mfcr r5
1089 stw r7, 28(r1)
1090 stw r8, 32(r1)
1091 stw r9, 36(r1)
1092 stw r10,40(r1)
1093 stw r3, 44(r1)
1094 stw r5, 8(r1)
1095 subi r3, r3, MCOUNT_INSN_SIZE
1096.globl ftrace_call
1097ftrace_call:
1098 bl ftrace_stub
1099 nop
1100 lwz r6, 8(r1)
1101 lwz r0, 44(r1)
1102 lwz r3, 12(r1)
1103 mtctr r0
1104 lwz r4, 16(r1)
1105 mtcr r6
1106 lwz r5, 20(r1)
1107 lwz r6, 24(r1)
1108 lwz r0, 52(r1)
1109 lwz r7, 28(r1)
1110 lwz r8, 32(r1)
1111 mtlr r0
1112 lwz r9, 36(r1)
1113 lwz r10,40(r1)
1114 addi r1, r1, 48
1115 bctr
1116#else
1117_GLOBAL(mcount)
1118_GLOBAL(_mcount)
1119 stwu r1,-48(r1)
1120 stw r3, 12(r1)
1121 stw r4, 16(r1)
1122 stw r5, 20(r1)
1123 stw r6, 24(r1)
1124 mflr r3
1125 lwz r4, 52(r1)
1126 mfcr r5
1127 stw r7, 28(r1)
1128 stw r8, 32(r1)
1129 stw r9, 36(r1)
1130 stw r10,40(r1)
1131 stw r3, 44(r1)
1132 stw r5, 8(r1)
1133
1134 subi r3, r3, MCOUNT_INSN_SIZE
1135 LOAD_REG_ADDR(r5, ftrace_trace_function)
1136 lwz r5,0(r5)
1137
1138 mtctr r5
1139 bctrl
1140
1141 nop
1142
1143 lwz r6, 8(r1)
1144 lwz r0, 44(r1)
1145 lwz r3, 12(r1)
1146 mtctr r0
1147 lwz r4, 16(r1)
1148 mtcr r6
1149 lwz r5, 20(r1)
1150 lwz r6, 24(r1)
1151 lwz r0, 52(r1)
1152 lwz r7, 28(r1)
1153 lwz r8, 32(r1)
1154 mtlr r0
1155 lwz r9, 36(r1)
1156 lwz r10,40(r1)
1157 addi r1, r1, 48
1158 bctr
1159#endif
1160
1161_GLOBAL(ftrace_stub)
1162 blr
1163
1164#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c0db5b769e55..2f511a969d2c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -31,6 +31,7 @@
31#include <asm/bug.h> 31#include <asm/bug.h>
32#include <asm/ptrace.h> 32#include <asm/ptrace.h>
33#include <asm/irqflags.h> 33#include <asm/irqflags.h>
34#include <asm/ftrace.h>
34 35
35/* 36/*
36 * System calls. 37 * System calls.
@@ -870,3 +871,67 @@ _GLOBAL(enter_prom)
870 ld r0,16(r1) 871 ld r0,16(r1)
871 mtlr r0 872 mtlr r0
872 blr 873 blr
874
875#ifdef CONFIG_FTRACE
876#ifdef CONFIG_DYNAMIC_FTRACE
877_GLOBAL(mcount)
878_GLOBAL(_mcount)
879 /* Taken from output of objdump from lib64/glibc */
880 mflr r3
881 stdu r1, -112(r1)
882 std r3, 128(r1)
883 subi r3, r3, MCOUNT_INSN_SIZE
884 .globl mcount_call
885mcount_call:
886 bl ftrace_stub
887 nop
888 ld r0, 128(r1)
889 mtlr r0
890 addi r1, r1, 112
891 blr
892
893_GLOBAL(ftrace_caller)
894 /* Taken from output of objdump from lib64/glibc */
895 mflr r3
896 ld r11, 0(r1)
897 stdu r1, -112(r1)
898 std r3, 128(r1)
899 ld r4, 16(r11)
900 subi r3, r3, MCOUNT_INSN_SIZE
901.globl ftrace_call
902ftrace_call:
903 bl ftrace_stub
904 nop
905 ld r0, 128(r1)
906 mtlr r0
907 addi r1, r1, 112
908_GLOBAL(ftrace_stub)
909 blr
910#else
911_GLOBAL(mcount)
912 blr
913
914_GLOBAL(_mcount)
915 /* Taken from output of objdump from lib64/glibc */
916 mflr r3
917 ld r11, 0(r1)
918 stdu r1, -112(r1)
919 std r3, 128(r1)
920 ld r4, 16(r11)
921
922 subi r3, r3, MCOUNT_INSN_SIZE
923 LOAD_REG_ADDR(r5,ftrace_trace_function)
924 ld r5,0(r5)
925 ld r5,0(r5)
926 mtctr r5
927 bctrl
928
929 nop
930 ld r0, 128(r1)
931 mtlr r0
932 addi r1, r1, 112
933_GLOBAL(ftrace_stub)
934 blr
935
936#endif
937#endif
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 000000000000..3855ceb937b0
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,154 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
7 *
8 */
9
10#include <linux/spinlock.h>
11#include <linux/hardirq.h>
12#include <linux/ftrace.h>
13#include <linux/percpu.h>
14#include <linux/init.h>
15#include <linux/list.h>
16
17#include <asm/cacheflush.h>
18#include <asm/ftrace.h>
19
20
21static unsigned int ftrace_nop = 0x60000000;
22
23#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr
25#else
26/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr
28#endif
29
30
31static unsigned int notrace ftrace_calc_offset(long ip, long addr)
32{
33 return (int)(addr - ip);
34}
35
36notrace unsigned char *ftrace_nop_replace(void)
37{
38 return (char *)&ftrace_nop;
39}
40
41notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
42{
43 static unsigned int op;
44
45 /*
46 * It would be nice to just use create_function_call, but that will
47 * update the code itself. Here we need to just return the
48 * instruction that is going to be modified, without modifying the
49 * code.
50 */
51 addr = GET_ADDR(addr);
52
53 /* Set to "bl addr" */
54 op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc);
55
56 /*
57 * No locking needed, this must be called via kstop_machine
58 * which in essence is like running on a uniprocessor machine.
59 */
60 return (unsigned char *)&op;
61}
62
63#ifdef CONFIG_PPC64
64# define _ASM_ALIGN " .align 3 "
65# define _ASM_PTR " .llong "
66#else
67# define _ASM_ALIGN " .align 2 "
68# define _ASM_PTR " .long "
69#endif
70
71notrace int
72ftrace_modify_code(unsigned long ip, unsigned char *old_code,
73 unsigned char *new_code)
74{
75 unsigned replaced;
76 unsigned old = *(unsigned *)old_code;
77 unsigned new = *(unsigned *)new_code;
78 int faulted = 0;
79
80 /*
81 * Note: Due to modules and __init, code can
82 * disappear and change, we need to protect against faulting
83 * as well as code changing.
84 *
85 * No real locking needed, this code is run through
86 * kstop_machine.
87 */
88 asm volatile (
89 "1: lwz %1, 0(%2)\n"
90 " cmpw %1, %5\n"
91 " bne 2f\n"
92 " stwu %3, 0(%2)\n"
93 "2:\n"
94 ".section .fixup, \"ax\"\n"
95 "3: li %0, 1\n"
96 " b 2b\n"
97 ".previous\n"
98 ".section __ex_table,\"a\"\n"
99 _ASM_ALIGN "\n"
100 _ASM_PTR "1b, 3b\n"
101 ".previous"
102 : "=r"(faulted), "=r"(replaced)
103 : "r"(ip), "r"(new),
104 "0"(faulted), "r"(old)
105 : "memory");
106
107 if (replaced != old && replaced != new)
108 faulted = 2;
109
110 if (!faulted)
111 flush_icache_range(ip, ip + 8);
112
113 return faulted;
114}
115
116notrace int ftrace_update_ftrace_func(ftrace_func_t func)
117{
118 unsigned long ip = (unsigned long)(&ftrace_call);
119 unsigned char old[MCOUNT_INSN_SIZE], *new;
120 int ret;
121
122 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
123 new = ftrace_call_replace(ip, (unsigned long)func);
124 ret = ftrace_modify_code(ip, old, new);
125
126 return ret;
127}
128
129notrace int ftrace_mcount_set(unsigned long *data)
130{
131 unsigned long ip = (long)(&mcount_call);
132 unsigned long *addr = data;
133 unsigned char old[MCOUNT_INSN_SIZE], *new;
134
135 /*
136 * Replace the mcount stub with a pointer to the
137 * ip recorder function.
138 */
139 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
140 new = ftrace_call_replace(ip, *addr);
141 *addr = ftrace_modify_code(ip, old, new);
142
143 return 0;
144}
145
146int __init ftrace_dyn_arch_init(void *data)
147{
148 /* This is running in kstop_machine */
149
150 ftrace_mcount_set(data);
151
152 return 0;
153}
154
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index e31aca9208eb..1882bf419fa6 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns);
120 120
121#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) 121#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
122 122
123void _memset_io(volatile void __iomem *addr, int c, unsigned long n) 123notrace void
124_memset_io(volatile void __iomem *addr, int c, unsigned long n)
124{ 125{
125 void *p = (void __force *)addr; 126 void *p = (void __force *)addr;
126 u32 lc = c; 127 u32 lc = c;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bcc249d90c4d..dcc946e67099 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc);
98 98
99int distribute_irqs = 1; 99int distribute_irqs = 1;
100 100
101static inline unsigned long get_hard_enabled(void) 101static inline notrace unsigned long get_hard_enabled(void)
102{ 102{
103 unsigned long enabled; 103 unsigned long enabled;
104 104
@@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void)
108 return enabled; 108 return enabled;
109} 109}
110 110
111static inline void set_soft_enabled(unsigned long enable) 111static inline notrace void set_soft_enabled(unsigned long enable)
112{ 112{
113 __asm__ __volatile__("stb %0,%1(13)" 113 __asm__ __volatile__("stb %0,%1(13)"
114 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); 114 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
115} 115}
116 116
117void raw_local_irq_restore(unsigned long en) 117notrace void raw_local_irq_restore(unsigned long en)
118{ 118{
119 /* 119 /*
120 * get_paca()->soft_enabled = en; 120 * get_paca()->soft_enabled = en;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d3ac631cbd26..a8d02506468a 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -42,6 +42,7 @@
42#include <asm/div64.h> 42#include <asm/div64.h>
43#include <asm/signal.h> 43#include <asm/signal.h>
44#include <asm/dcr.h> 44#include <asm/dcr.h>
45#include <asm/ftrace.h>
45 46
46#ifdef CONFIG_PPC32 47#ifdef CONFIG_PPC32
47extern void transfer_to_handler(void); 48extern void transfer_to_handler(void);
@@ -67,6 +68,10 @@ EXPORT_SYMBOL(single_step_exception);
67EXPORT_SYMBOL(sys_sigreturn); 68EXPORT_SYMBOL(sys_sigreturn);
68#endif 69#endif
69 70
71#ifdef CONFIG_FTRACE
72EXPORT_SYMBOL(_mcount);
73#endif
74
70EXPORT_SYMBOL(strcpy); 75EXPORT_SYMBOL(strcpy);
71EXPORT_SYMBOL(strncpy); 76EXPORT_SYMBOL(strncpy);
72EXPORT_SYMBOL(strcat); 77EXPORT_SYMBOL(strcat);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5112a4aa801d..19e8fcb9cea8 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -81,7 +81,7 @@ int ucache_bsize;
81 * from the address that it was linked at, so we must use RELOC/PTRRELOC 81 * from the address that it was linked at, so we must use RELOC/PTRRELOC
82 * to access static data (including strings). -- paulus 82 * to access static data (including strings). -- paulus
83 */ 83 */
84unsigned long __init early_init(unsigned long dt_ptr) 84notrace unsigned long __init early_init(unsigned long dt_ptr)
85{ 85{
86 unsigned long offset = reloc_offset(); 86 unsigned long offset = reloc_offset();
87 struct cpu_spec *spec; 87 struct cpu_spec *spec;
@@ -111,7 +111,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
111 * This is called very early on the boot process, after a minimal 111 * This is called very early on the boot process, after a minimal
112 * MMU environment has been set up but before MMU_init is called. 112 * MMU environment has been set up but before MMU_init is called.
113 */ 113 */
114void __init machine_init(unsigned long dt_ptr, unsigned long phys) 114notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys)
115{ 115{
116 /* Enable early debugging if any specified (see udbg.h) */ 116 /* Enable early debugging if any specified (see udbg.h) */
117 udbg_early_init(); 117 udbg_early_init();
@@ -133,7 +133,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
133 133
134#ifdef CONFIG_BOOKE_WDT 134#ifdef CONFIG_BOOKE_WDT
135/* Checks wdt=x and wdt_period=xx command-line option */ 135/* Checks wdt=x and wdt_period=xx command-line option */
136int __init early_parse_wdt(char *p) 136notrace int __init early_parse_wdt(char *p)
137{ 137{
138 if (p && strncmp(p, "0", 1) != 0) 138 if (p && strncmp(p, "0", 1) != 0)
139 booke_wdt_enabled = 1; 139 booke_wdt_enabled = 1;
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 4d72c8f72159..89774177b209 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,5 +1,10 @@
1CFLAGS_bootx_init.o += -fPIC 1CFLAGS_bootx_init.o += -fPIC
2 2
3ifdef CONFIG_FTRACE
4# Do not trace early boot code
5CFLAGS_REMOVE_bootx_init.o = -pg
6endif
7
3obj-y += pic.o setup.o time.o feature.o pci.o \ 8obj-y += pic.o setup.o time.o feature.o pci.o \
4 sleep.o low_i2c.o cache.o pfunc_core.o \ 9 sleep.o low_i2c.o cache.o pfunc_core.o \
5 pfunc_base.o 10 pfunc_base.o
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index eb36f3b746b8..fca9246470b1 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -11,6 +11,8 @@ config SPARC
11config SPARC64 11config SPARC64
12 bool 12 bool
13 default y 13 default y
14 select HAVE_DYNAMIC_FTRACE
15 select HAVE_FTRACE
14 select HAVE_IDE 16 select HAVE_IDE
15 select HAVE_LMB 17 select HAVE_LMB
16 select HAVE_ARCH_KGDB 18 select HAVE_ARCH_KGDB
diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index 6a4d28a4076d..d6d32d178fc8 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC
33 33
34config MCOUNT 34config MCOUNT
35 bool 35 bool
36 depends on STACK_DEBUG 36 depends on STACK_DEBUG || FTRACE
37 default y 37 default y
38 38
39config FRAME_POINTER 39config FRAME_POINTER
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index ec4f5ebb1ca6..418b5782096e 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \
14 power.o sbus.o sparc64_ksyms.o chmc.o \ 14 power.o sbus.o sparc64_ksyms.o chmc.o \
15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o 15 visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
16 16
17obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
17obj-$(CONFIG_STACKTRACE) += stacktrace.o 18obj-$(CONFIG_STACKTRACE) += stacktrace.o
18obj-$(CONFIG_PCI) += ebus.o pci_common.o \ 19obj-$(CONFIG_PCI) += ebus.o pci_common.o \
19 pci_psycho.o pci_sabre.o pci_schizo.o \ 20 pci_psycho.o pci_sabre.o pci_schizo.o \
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
new file mode 100644
index 000000000000..4298d0aee713
--- /dev/null
+++ b/arch/sparc64/kernel/ftrace.c
@@ -0,0 +1,94 @@
1#include <linux/spinlock.h>
2#include <linux/hardirq.h>
3#include <linux/ftrace.h>
4#include <linux/percpu.h>
5#include <linux/init.h>
6#include <linux/list.h>
7
8#include <asm/ftrace.h>
9
10static const u32 ftrace_nop = 0x01000000;
11
12notrace unsigned char *ftrace_nop_replace(void)
13{
14 return (char *)&ftrace_nop;
15}
16
17notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
18{
19 static u32 call;
20 s32 off;
21
22 off = ((s32)addr - (s32)ip);
23 call = 0x40000000 | ((u32)off >> 2);
24
25 return (unsigned char *) &call;
26}
27
28notrace int
29ftrace_modify_code(unsigned long ip, unsigned char *old_code,
30 unsigned char *new_code)
31{
32 u32 old = *(u32 *)old_code;
33 u32 new = *(u32 *)new_code;
34 u32 replaced;
35 int faulted;
36
37 __asm__ __volatile__(
38 "1: cas [%[ip]], %[old], %[new]\n"
39 " flush %[ip]\n"
40 " mov 0, %[faulted]\n"
41 "2:\n"
42 " .section .fixup,#alloc,#execinstr\n"
43 " .align 4\n"
44 "3: sethi %%hi(2b), %[faulted]\n"
45 " jmpl %[faulted] + %%lo(2b), %%g0\n"
46 " mov 1, %[faulted]\n"
47 " .previous\n"
48 " .section __ex_table,\"a\"\n"
49 " .align 4\n"
50 " .word 1b, 3b\n"
51 " .previous\n"
52 : "=r" (replaced), [faulted] "=r" (faulted)
53 : [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
54 : "memory");
55
56 if (replaced != old && replaced != new)
57 faulted = 2;
58
59 return faulted;
60}
61
62notrace int ftrace_update_ftrace_func(ftrace_func_t func)
63{
64 unsigned long ip = (unsigned long)(&ftrace_call);
65 unsigned char old[MCOUNT_INSN_SIZE], *new;
66
67 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
68 new = ftrace_call_replace(ip, (unsigned long)func);
69 return ftrace_modify_code(ip, old, new);
70}
71
72notrace int ftrace_mcount_set(unsigned long *data)
73{
74 unsigned long ip = (long)(&mcount_call);
75 unsigned long *addr = data;
76 unsigned char old[MCOUNT_INSN_SIZE], *new;
77
78 /*
79 * Replace the mcount stub with a pointer to the
80 * ip recorder function.
81 */
82 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
83 new = ftrace_call_replace(ip, *addr);
84 *addr = ftrace_modify_code(ip, old, new);
85
86 return 0;
87}
88
89
90int __init ftrace_dyn_arch_init(void *data)
91{
92 ftrace_mcount_set(data);
93 return 0;
94}
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 8ac0b99f2c55..49d3ea50c247 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -53,6 +53,7 @@
53#include <asm/ns87303.h> 53#include <asm/ns87303.h>
54#include <asm/timer.h> 54#include <asm/timer.h>
55#include <asm/cpudata.h> 55#include <asm/cpudata.h>
56#include <asm/ftrace.h>
56 57
57struct poll { 58struct poll {
58 int fd; 59 int fd;
@@ -111,8 +112,7 @@ EXPORT_SYMBOL(__write_trylock);
111EXPORT_SYMBOL(smp_call_function); 112EXPORT_SYMBOL(smp_call_function);
112#endif /* CONFIG_SMP */ 113#endif /* CONFIG_SMP */
113 114
114#if defined(CONFIG_MCOUNT) 115#ifdef CONFIG_MCOUNT
115extern void _mcount(void);
116EXPORT_SYMBOL(_mcount); 116EXPORT_SYMBOL(_mcount);
117#endif 117#endif
118 118
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 9e4534b485c7..7735a7a60533 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -28,10 +28,13 @@ ovstack:
28 .skip OVSTACKSIZE 28 .skip OVSTACKSIZE
29#endif 29#endif
30 .text 30 .text
31 .align 32 31 .align 32
32 .globl mcount, _mcount 32 .globl _mcount
33mcount: 33 .type _mcount,#function
34 .globl mcount
35 .type mcount,#function
34_mcount: 36_mcount:
37mcount:
35#ifdef CONFIG_STACK_DEBUG 38#ifdef CONFIG_STACK_DEBUG
36 /* 39 /*
37 * Check whether %sp is dangerously low. 40 * Check whether %sp is dangerously low.
@@ -55,6 +58,53 @@ _mcount:
55 or %g3, %lo(panicstring), %o0 58 or %g3, %lo(panicstring), %o0
56 call prom_halt 59 call prom_halt
57 nop 60 nop
611:
62#endif
63#ifdef CONFIG_FTRACE
64#ifdef CONFIG_DYNAMIC_FTRACE
65 mov %o7, %o0
66 .globl mcount_call
67mcount_call:
68 call ftrace_stub
69 mov %o0, %o7
70#else
71 sethi %hi(ftrace_trace_function), %g1
72 sethi %hi(ftrace_stub), %g2
73 ldx [%g1 + %lo(ftrace_trace_function)], %g1
74 or %g2, %lo(ftrace_stub), %g2
75 cmp %g1, %g2
76 be,pn %icc, 1f
77 mov %i7, %o1
78 jmpl %g1, %g0
79 mov %o7, %o0
80 /* not reached */
811:
58#endif 82#endif
591: retl 83#endif
84 retl
60 nop 85 nop
86 .size _mcount,.-_mcount
87 .size mcount,.-mcount
88
89#ifdef CONFIG_FTRACE
90 .globl ftrace_stub
91 .type ftrace_stub,#function
92ftrace_stub:
93 retl
94 nop
95 .size ftrace_stub,.-ftrace_stub
96#ifdef CONFIG_DYNAMIC_FTRACE
97 .globl ftrace_caller
98 .type ftrace_caller,#function
99ftrace_caller:
100 mov %i7, %o1
101 mov %o7, %o0
102 .globl ftrace_call
103ftrace_call:
104 call ftrace_stub
105 mov %o0, %o7
106 retl
107 nop
108 .size ftrace_caller,.-ftrace_caller
109#endif
110#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2cfccc987a26..6958d6bcaf70 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_KPROBES 24 select HAVE_KPROBES
25 select HAVE_KRETPROBES 25 select HAVE_KRETPROBES
26 select HAVE_DYNAMIC_FTRACE
27 select HAVE_FTRACE
26 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 28 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
27 select HAVE_ARCH_KGDB if !X86_VOYAGER 29 select HAVE_ARCH_KGDB if !X86_VOYAGER
28 30
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index acc0271920f2..5236621350bc 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,6 +171,34 @@ config IOMMU_LEAK
171 Add a simple leak tracer to the IOMMU code. This is useful when you 171 Add a simple leak tracer to the IOMMU code. This is useful when you
172 are debugging a buggy device driver that leaks IOMMU mappings. 172 are debugging a buggy device driver that leaks IOMMU mappings.
173 173
174config MMIOTRACE_HOOKS
175 bool
176
177config MMIOTRACE
178 bool "Memory mapped IO tracing"
179 depends on DEBUG_KERNEL && PCI
180 select TRACING
181 select MMIOTRACE_HOOKS
182 default y
183 help
184 Mmiotrace traces Memory Mapped I/O access and is meant for
185 debugging and reverse engineering. It is called from the ioremap
186 implementation and works via page faults. Tracing is disabled by
187 default and can be enabled at run-time.
188
189 See Documentation/tracers/mmiotrace.txt.
190 If you are not helping to develop drivers, say N.
191
192config MMIOTRACE_TEST
193 tristate "Test module for mmiotrace"
194 depends on MMIOTRACE && m
195 help
196 This is a dumb module for testing mmiotrace. It is very dangerous
197 as it will write garbage to IO memory starting at a given address.
198 However, it should be safe to use on e.g. unused portion of VRAM.
199
200 Say N, unless you absolutely know what you are doing.
201
174# 202#
175# IO delay types: 203# IO delay types:
176# 204#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 55ff016e9f69..5112c84f5421 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE
10# Do not profile debug utilities
11CFLAGS_REMOVE_tsc_64.o = -pg
12CFLAGS_REMOVE_tsc_32.o = -pg
13CFLAGS_REMOVE_rtc.o = -pg
14endif
15
9# 16#
10# vsyscalls (which work on the user stack) should have 17# vsyscalls (which work on the user stack) should have
11# no stack-protector checks: 18# no stack-protector checks:
@@ -57,6 +64,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
57obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o 64obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
58obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 65obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
59obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
60obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 68obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
61obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
62obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/spinlock.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144 144
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146static inline const unsigned char*const * find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
162 { -1, NULL } 162 { -1, NULL }
163}; 163};
164 164
165static const unsigned char*const * find_nop_table(void) 165const unsigned char *const *find_nop_table(void)
166{ 166{
167 const unsigned char *const *noptable = intel_nops; 167 const unsigned char *const *noptable = intel_nops;
168 int i; 168 int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
279 struct list_head next; 279 struct list_head next;
280}; 280};
281static LIST_HEAD(smp_alt_modules); 281static LIST_HEAD(smp_alt_modules);
282static DEFINE_SPINLOCK(smp_alt); 282static DEFINE_MUTEX(smp_alt);
283static int smp_mode = 1; /* protected by smp_alt */ 283static int smp_mode = 1; /* protected by smp_alt */
284 284
285void alternatives_smp_module_add(struct module *mod, char *name, 285void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
312 __func__, smp->locks, smp->locks_end, 312 __func__, smp->locks, smp->locks_end,
313 smp->text, smp->text_end, smp->name); 313 smp->text, smp->text_end, smp->name);
314 314
315 spin_lock(&smp_alt); 315 mutex_lock(&smp_alt);
316 list_add_tail(&smp->next, &smp_alt_modules); 316 list_add_tail(&smp->next, &smp_alt_modules);
317 if (boot_cpu_has(X86_FEATURE_UP)) 317 if (boot_cpu_has(X86_FEATURE_UP))
318 alternatives_smp_unlock(smp->locks, smp->locks_end, 318 alternatives_smp_unlock(smp->locks, smp->locks_end,
319 smp->text, smp->text_end); 319 smp->text, smp->text_end);
320 spin_unlock(&smp_alt); 320 mutex_unlock(&smp_alt);
321} 321}
322 322
323void alternatives_smp_module_del(struct module *mod) 323void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
327 if (smp_alt_once || noreplace_smp) 327 if (smp_alt_once || noreplace_smp)
328 return; 328 return;
329 329
330 spin_lock(&smp_alt); 330 mutex_lock(&smp_alt);
331 list_for_each_entry(item, &smp_alt_modules, next) { 331 list_for_each_entry(item, &smp_alt_modules, next) {
332 if (mod != item->mod) 332 if (mod != item->mod)
333 continue; 333 continue;
334 list_del(&item->next); 334 list_del(&item->next);
335 spin_unlock(&smp_alt); 335 mutex_unlock(&smp_alt);
336 DPRINTK("%s: %s\n", __func__, item->name); 336 DPRINTK("%s: %s\n", __func__, item->name);
337 kfree(item); 337 kfree(item);
338 return; 338 return;
339 } 339 }
340 spin_unlock(&smp_alt); 340 mutex_unlock(&smp_alt);
341} 341}
342 342
343void alternatives_smp_switch(int smp) 343void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
359 return; 359 return;
360 BUG_ON(!smp && (num_online_cpus() > 1)); 360 BUG_ON(!smp && (num_online_cpus() > 1));
361 361
362 spin_lock(&smp_alt); 362 mutex_lock(&smp_alt);
363 363
364 /* 364 /*
365 * Avoid unnecessary switches because it forces JIT based VMs to 365 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
383 mod->text, mod->text_end); 383 mod->text, mod->text_end);
384 } 384 }
385 smp_mode = smp; 385 smp_mode = smp;
386 spin_unlock(&smp_alt); 386 mutex_unlock(&smp_alt);
387} 387}
388 388
389#endif 389#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cfe28a715434..6bc07f0f1202 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
51#include <asm/percpu.h> 51#include <asm/percpu.h>
52#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
53#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
54#include <asm/ftrace.h>
54#include <asm/irq_vectors.h> 55#include <asm/irq_vectors.h>
55 56
56/* 57/*
@@ -1111,6 +1112,77 @@ ENDPROC(xen_failsafe_callback)
1111 1112
1112#endif /* CONFIG_XEN */ 1113#endif /* CONFIG_XEN */
1113 1114
1115#ifdef CONFIG_FTRACE
1116#ifdef CONFIG_DYNAMIC_FTRACE
1117
1118ENTRY(mcount)
1119 pushl %eax
1120 pushl %ecx
1121 pushl %edx
1122 movl 0xc(%esp), %eax
1123 subl $MCOUNT_INSN_SIZE, %eax
1124
1125.globl mcount_call
1126mcount_call:
1127 call ftrace_stub
1128
1129 popl %edx
1130 popl %ecx
1131 popl %eax
1132
1133 ret
1134END(mcount)
1135
1136ENTRY(ftrace_caller)
1137 pushl %eax
1138 pushl %ecx
1139 pushl %edx
1140 movl 0xc(%esp), %eax
1141 movl 0x4(%ebp), %edx
1142 subl $MCOUNT_INSN_SIZE, %eax
1143
1144.globl ftrace_call
1145ftrace_call:
1146 call ftrace_stub
1147
1148 popl %edx
1149 popl %ecx
1150 popl %eax
1151
1152.globl ftrace_stub
1153ftrace_stub:
1154 ret
1155END(ftrace_caller)
1156
1157#else /* ! CONFIG_DYNAMIC_FTRACE */
1158
1159ENTRY(mcount)
1160 cmpl $ftrace_stub, ftrace_trace_function
1161 jnz trace
1162.globl ftrace_stub
1163ftrace_stub:
1164 ret
1165
1166 /* taken from glibc */
1167trace:
1168 pushl %eax
1169 pushl %ecx
1170 pushl %edx
1171 movl 0xc(%esp), %eax
1172 movl 0x4(%ebp), %edx
1173 subl $MCOUNT_INSN_SIZE, %eax
1174
1175 call *ftrace_trace_function
1176
1177 popl %edx
1178 popl %ecx
1179 popl %eax
1180
1181 jmp ftrace_stub
1182END(mcount)
1183#endif /* CONFIG_DYNAMIC_FTRACE */
1184#endif /* CONFIG_FTRACE */
1185
1114.section .rodata,"a" 1186.section .rodata,"a"
1115#include "syscall_table_32.S" 1187#include "syscall_table_32.S"
1116 1188
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bb4e22f4892f..ba41bf42748d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
51#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h>
54 55
55 .code64 56 .code64
56 57
58#ifdef CONFIG_FTRACE
59#ifdef CONFIG_DYNAMIC_FTRACE
60ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74.globl mcount_call
75mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88END(mcount)
89
90ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106.globl ftrace_call
107ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119.globl ftrace_stub
120ftrace_stub:
121 retq
122END(ftrace_caller)
123
124#else /* ! CONFIG_DYNAMIC_FTRACE */
125ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128.globl ftrace_stub
129ftrace_stub:
130 retq
131
132trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159END(mcount)
160#endif /* CONFIG_DYNAMIC_FTRACE */
161#endif /* CONFIG_FTRACE */
162
57#ifndef CONFIG_PREEMPT 163#ifndef CONFIG_PREEMPT
58#define retint_kernel retint_restore_args 164#define retint_kernel retint_restore_args
59#endif 165#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..ab115cd15fdf
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/alternative.h>
20#include <asm/ftrace.h>
21
22
23/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop;
25
26union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE];
28 struct {
29 char e8;
30 int offset;
31 } __attribute__((packed));
32};
33
34
35static int notrace ftrace_calc_offset(long ip, long addr)
36{
37 return (int)(addr - ip);
38}
39
40notrace unsigned char *ftrace_nop_replace(void)
41{
42 return (char *)ftrace_nop;
43}
44
45notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{
47 static union ftrace_code_union calc;
48
49 calc.e8 = 0xe8;
50 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
51
52 /*
53 * No locking needed, this must be called via kstop_machine
54 * which in essence is like running on a uniprocessor machine.
55 */
56 return calc.code;
57}
58
59notrace int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code)
62{
63 unsigned replaced;
64 unsigned old = *(unsigned *)old_code; /* 4 bytes */
65 unsigned new = *(unsigned *)new_code; /* 4 bytes */
66 unsigned char newch = new_code[4];
67 int faulted = 0;
68
69 /*
70 * Note: Due to modules and __init, code can
71 * disappear and change, we need to protect against faulting
72 * as well as code changing.
73 *
74 * No real locking needed, this code is run through
75 * kstop_machine.
76 */
77 asm volatile (
78 "1: lock\n"
79 " cmpxchg %3, (%2)\n"
80 " jnz 2f\n"
81 " movb %b4, 4(%2)\n"
82 "2:\n"
83 ".section .fixup, \"ax\"\n"
84 "3: movl $1, %0\n"
85 " jmp 2b\n"
86 ".previous\n"
87 _ASM_EXTABLE(1b, 3b)
88 : "=r"(faulted), "=a"(replaced)
89 : "r"(ip), "r"(new), "c"(newch),
90 "0"(faulted), "a"(old)
91 : "memory");
92 sync_core();
93
94 if (replaced != old && replaced != new)
95 faulted = 2;
96
97 return faulted;
98}
99
100notrace int ftrace_update_ftrace_func(ftrace_func_t func)
101{
102 unsigned long ip = (unsigned long)(&ftrace_call);
103 unsigned char old[MCOUNT_INSN_SIZE], *new;
104 int ret;
105
106 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
107 new = ftrace_call_replace(ip, (unsigned long)func);
108 ret = ftrace_modify_code(ip, old, new);
109
110 return ret;
111}
112
113notrace int ftrace_mcount_set(unsigned long *data)
114{
115 unsigned long ip = (long)(&mcount_call);
116 unsigned long *addr = data;
117 unsigned char old[MCOUNT_INSN_SIZE], *new;
118
119 /*
120 * Replace the mcount stub with a pointer to the
121 * ip recorder function.
122 */
123 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
124 new = ftrace_call_replace(ip, *addr);
125 *addr = ftrace_modify_code(ip, old, new);
126
127 return 0;
128}
129
130int __init ftrace_dyn_arch_init(void *data)
131{
132 const unsigned char *const *noptable = find_nop_table();
133
134 /* This is running in kstop_machine */
135
136 ftrace_mcount_set(data);
137
138 ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
139
140 return 0;
141}
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..dd7ebee446af 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
1#include <linux/module.h> 1#include <linux/module.h>
2
2#include <asm/checksum.h> 3#include <asm/checksum.h>
3#include <asm/desc.h>
4#include <asm/pgtable.h> 4#include <asm/pgtable.h>
5#include <asm/desc.h>
6#include <asm/ftrace.h>
7
8#ifdef CONFIG_FTRACE
9/* mcount is defined in assembly */
10EXPORT_SYMBOL(mcount);
11#endif
5 12
6/* Networking helper routines. */ 13/* Networking helper routines. */
7EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f4960171bc66..8864230d55af 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
16#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
107 unsigned long page_list[PAGES_NR]; 109 unsigned long page_list[PAGES_NR];
108 void *control_page; 110 void *control_page;
109 111
112 tracer_disable();
113
110 /* Interrupts aren't acceptable while we reboot */ 114 /* Interrupts aren't acceptable while we reboot */
111 local_irq_disable(); 115 local_irq_disable();
112 116
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7830dc4a8380..9dd9262693a3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
16#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
184 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
185 void *control_page; 187 void *control_page;
186 188
189 tracer_disable();
190
187 /* Interrupts aren't acceptable while we reboot */ 191 /* Interrupts aren't acceptable while we reboot */
188 local_irq_disable(); 192 local_irq_disable();
189 193
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a139f6c9df3..0c3927accb00 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,10 @@ void cpu_idle(void)
142 142
143 local_irq_disable(); 143 local_irq_disable();
144 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 144 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
145 /* Don't trace irqs off for idle */
146 stop_critical_timings();
145 pm_idle(); 147 pm_idle();
148 start_critical_timings();
146 } 149 }
147 tick_nohz_restart_sched_tick(); 150 tick_nohz_restart_sched_tick();
148 preempt_enable_no_resched(); 151 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index db5eb963e4df..a8e53626ac9a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -134,7 +134,10 @@ void cpu_idle(void)
134 */ 134 */
135 local_irq_disable(); 135 local_irq_disable();
136 enter_idle(); 136 enter_idle();
137 /* Don't trace irqs off for idle */
138 stop_critical_timings();
137 pm_idle(); 139 pm_idle();
140 start_critical_timings();
138 /* In many cases the interrupt that ended idle 141 /* In many cases the interrupt that ended idle
139 has already called exit_idle. But some idle 142 has already called exit_idle. But some idle
140 loops can be woken up without interrupt. */ 143 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c87cbd84c3e5..e50740d32314 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43#include <asm/vgtod.h> 43#include <asm/vgtod.h>
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
46#define __syscall_clobber "r11","cx","memory" 47#define __syscall_clobber "r11","cx","memory"
47 48
48/* 49/*
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 2f306a826897..b545f371b5f5 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
2 All C exports should go in the respective C files. */ 2 All C exports should go in the respective C files. */
3 3
4#include <linux/module.h> 4#include <linux/module.h>
5#include <net/checksum.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7 6
7#include <net/checksum.h>
8
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/uaccess.h>
11#include <asm/desc.h> 12#include <asm/desc.h>
13#include <asm/ftrace.h>
14
15#ifdef CONFIG_FTRACE
16/* mcount is defined in assembly */
17EXPORT_SYMBOL(mcount);
18#endif
12 19
13EXPORT_SYMBOL(kernel_thread); 20EXPORT_SYMBOL(kernel_thread);
14 21
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 83226e0a7ce4..aa3fa4119424 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay.o 7lib-y := delay.o
8lib-y += thunk_$(BITS).o
8lib-y += usercopy_$(BITS).o getuser.o putuser.o 9lib-y += usercopy_$(BITS).o getuser.o putuser.o
9lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
10 11
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
1/*
2 * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
3 * Copyright 2008 by Steven Rostedt, Red Hat, Inc
4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func
31 .globl \name
32\name:
33 pushl %eax
34 pushl %ecx
35 pushl %edx
36 /* Place EIP in the arg1 */
37 movl 3*4(%esp), %eax
38 call \func
39 popl %edx
40 popl %ecx
41 popl %eax
42 ret
43 .endm
44
45 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
46 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 7 */
7 8
@@ -42,8 +43,22 @@
42#endif 43#endif
43 44
44#ifdef CONFIG_TRACE_IRQFLAGS 45#ifdef CONFIG_TRACE_IRQFLAGS
45 thunk trace_hardirqs_on_thunk,trace_hardirqs_on 46 /* put return address in rdi (arg1) */
46 thunk trace_hardirqs_off_thunk,trace_hardirqs_off 47 .macro thunk_ra name,func
48 .globl \name
49\name:
50 CFI_STARTPROC
51 SAVE_ARGS
52 /* SAVE_ARGS pushs 9 elements */
53 /* the next element would be the rip */
54 movq 9*8(%rsp), %rdi
55 call \func
56 jmp restore
57 CFI_ENDPROC
58 .endm
59
60 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
61 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif 62#endif
48 63
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c107641cd39b..9873716e9f76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15
11ifeq ($(CONFIG_X86_32),y) 16ifeq ($(CONFIG_X86_32),y)
12obj-$(CONFIG_NUMA) += discontig_32.o 17obj-$(CONFIG_NUMA) += discontig_32.o
13else 18else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d0f5fce77d95..455f3fe67b42 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/ptrace.h> 12#include <linux/ptrace.h>
13#include <linux/mmiotrace.h>
13#include <linux/mman.h> 14#include <linux/mman.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/smp.h> 16#include <linux/smp.h>
@@ -49,6 +50,16 @@
49#define PF_RSVD (1<<3) 50#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 51#define PF_INSTR (1<<4)
51 52
53static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
54{
55#ifdef CONFIG_MMIOTRACE_HOOKS
56 if (unlikely(is_kmmio_active()))
57 if (kmmio_handler(regs, addr) == 1)
58 return -1;
59#endif
60 return 0;
61}
62
52static inline int notify_page_fault(struct pt_regs *regs) 63static inline int notify_page_fault(struct pt_regs *regs)
53{ 64{
54#ifdef CONFIG_KPROBES 65#ifdef CONFIG_KPROBES
@@ -598,6 +609,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
598 609
599 if (notify_page_fault(regs)) 610 if (notify_page_fault(regs))
600 return; 611 return;
612 if (unlikely(kmmio_fault(regs, address)))
613 return;
601 614
602 /* 615 /*
603 * We fault-in kernel-space virtual memory on-demand. The 616 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 029e8cffca9e..9689a5138e64 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1035,6 +1035,8 @@ void mark_rodata_ro(void)
1035 unsigned long start = PFN_ALIGN(_text); 1035 unsigned long start = PFN_ALIGN(_text);
1036 unsigned long size = PFN_ALIGN(_etext) - start; 1036 unsigned long size = PFN_ALIGN(_etext) - start;
1037 1037
1038#ifndef CONFIG_DYNAMIC_FTRACE
1039 /* Dynamic tracing modifies the kernel text section */
1038 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1040 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1039 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 1041 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
1040 size >> 10); 1042 size >> 10);
@@ -1047,6 +1049,8 @@ void mark_rodata_ro(void)
1047 printk(KERN_INFO "Testing CPA: write protecting again\n"); 1049 printk(KERN_INFO "Testing CPA: write protecting again\n");
1048 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 1050 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
1049#endif 1051#endif
1052#endif /* CONFIG_DYNAMIC_FTRACE */
1053
1050 start += size; 1054 start += size;
1051 size = (unsigned long)__end_rodata - start; 1055 size = (unsigned long)__end_rodata - start;
1052 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1056 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a25cc6fa2207..27de2435e008 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -991,6 +991,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
991void mark_rodata_ro(void) 991void mark_rodata_ro(void)
992{ 992{
993 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 993 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
994 unsigned long rodata_start =
995 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
996
997#ifdef CONFIG_DYNAMIC_FTRACE
998 /* Dynamic tracing modifies the kernel text section */
999 start = rodata_start;
1000#endif
994 1001
995 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 1002 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
996 (end - start) >> 10); 1003 (end - start) >> 10);
@@ -1000,8 +1007,7 @@ void mark_rodata_ro(void)
1000 * The rodata section (but not the kernel text!) should also be 1007 * The rodata section (but not the kernel text!) should also be
1001 * not-executable. 1008 * not-executable.
1002 */ 1009 */
1003 start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 1010 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
1004 set_memory_nx(start, (end - start) >> PAGE_SHIFT);
1005 1011
1006 rodata_test(); 1012 rodata_test();
1007 1013
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 115f13ee40c9..24c1d3c30186 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/mmiotrace.h>
15 16
16#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
17#include <asm/e820.h> 18#include <asm/e820.h>
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
122{ 123{
123 unsigned long pfn, offset, vaddr; 124 unsigned long pfn, offset, vaddr;
124 resource_size_t last_addr; 125 resource_size_t last_addr;
126 const resource_size_t unaligned_phys_addr = phys_addr;
127 const unsigned long unaligned_size = size;
125 struct vm_struct *area; 128 struct vm_struct *area;
126 unsigned long new_prot_val; 129 unsigned long new_prot_val;
127 pgprot_t prot; 130 pgprot_t prot;
128 int retval; 131 int retval;
132 void __iomem *ret_addr;
129 133
130 /* Don't allow wraparound or zero size */ 134 /* Don't allow wraparound or zero size */
131 last_addr = phys_addr + size - 1; 135 last_addr = phys_addr + size - 1;
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
233 return NULL; 237 return NULL;
234 } 238 }
235 239
236 return (void __iomem *) (vaddr + offset); 240 ret_addr = (void __iomem *) (vaddr + offset);
241 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
242
243 return ret_addr;
237} 244}
238 245
239/** 246/**
@@ -348,6 +355,8 @@ void iounmap(volatile void __iomem *addr)
348 addr = (volatile void __iomem *) 355 addr = (volatile void __iomem *)
349 (PAGE_MASK & (unsigned long __force)addr); 356 (PAGE_MASK & (unsigned long __force)addr);
350 357
358 mmiotrace_iounmap(addr);
359
351 /* Use the vm area unlocked, assuming the caller 360 /* Use the vm area unlocked, assuming the caller
352 ensures there isn't another iounmap for the same address 361 ensures there isn't another iounmap for the same address
353 in parallel. Reuse of the virtual address is prevented by 362 in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
new file mode 100644
index 000000000000..93d82038af4b
--- /dev/null
+++ b/arch/x86/mm/kmmio.c
@@ -0,0 +1,510 @@
1/* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */
7
8#include <linux/list.h>
9#include <linux/rculist.h>
10#include <linux/spinlock.h>
11#include <linux/hash.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/uaccess.h>
16#include <linux/ptrace.h>
17#include <linux/preempt.h>
18#include <linux/percpu.h>
19#include <linux/kdebug.h>
20#include <linux/mutex.h>
21#include <linux/io.h>
22#include <asm/cacheflush.h>
23#include <asm/tlbflush.h>
24#include <linux/errno.h>
25#include <asm/debugreg.h>
26#include <linux/mmiotrace.h>
27
28#define KMMIO_PAGE_HASH_BITS 4
29#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
30
31struct kmmio_fault_page {
32 struct list_head list;
33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */
35
36 /*
37 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU).
40 */
41 int count;
42};
43
44struct kmmio_delayed_release {
45 struct rcu_head rcu;
46 struct kmmio_fault_page *release_list;
47};
48
49struct kmmio_context {
50 struct kmmio_fault_page *fpage;
51 struct kmmio_probe *probe;
52 unsigned long saved_flags;
53 unsigned long addr;
54 int active;
55};
56
57static DEFINE_SPINLOCK(kmmio_lock);
58
59/* Protected by kmmio_lock */
60unsigned int kmmio_count;
61
62/* Read-protected by RCU, write-protected by kmmio_lock. */
63static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
64static LIST_HEAD(kmmio_probes);
65
66static struct list_head *kmmio_page_list(unsigned long page)
67{
68 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
69}
70
71/* Accessed per-cpu */
72static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
73
74/*
75 * this is basically a dynamic stabbing problem:
76 * Could use the existing prio tree code or
77 * Possible better implementations:
78 * The Interval Skip List: A Data Structure for Finding All Intervals That
79 * Overlap a Point (might be simple)
80 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
81 */
82/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
83static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
84{
85 struct kmmio_probe *p;
86 list_for_each_entry_rcu(p, &kmmio_probes, list) {
87 if (addr >= p->addr && addr <= (p->addr + p->len))
88 return p;
89 }
90 return NULL;
91}
92
93/* You must be holding RCU read lock. */
94static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
95{
96 struct list_head *head;
97 struct kmmio_fault_page *p;
98
99 page &= PAGE_MASK;
100 head = kmmio_page_list(page);
101 list_for_each_entry_rcu(p, head, list) {
102 if (p->page == page)
103 return p;
104 }
105 return NULL;
106}
107
108static void set_page_present(unsigned long addr, bool present,
109 unsigned int *pglevel)
110{
111 pteval_t pteval;
112 pmdval_t pmdval;
113 unsigned int level;
114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level);
116
117 if (!pte) {
118 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119 return;
120 }
121
122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) {
126 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte;
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break;
133
134 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT;
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break;
140
141 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return;
144 }
145
146 __flush_tlb_one(addr);
147}
148
149/** Mark the given page as not present. Access to it will trigger a fault. */
150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
151{
152 set_page_present(page & PAGE_MASK, false, pglevel);
153}
154
155/** Mark the given page as present. */
156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
157{
158 set_page_present(page & PAGE_MASK, true, pglevel);
159}
160
161/*
162 * This is being called from do_page_fault().
163 *
164 * We may be in an interrupt or a critical section. Also prefecthing may
165 * trigger a page fault. We may be in the middle of process switch.
166 * We cannot take any locks, because we could be executing especially
167 * within a kmmio critical section.
168 *
169 * Local interrupts are disabled, so preemption cannot happen.
170 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
171 */
172/*
173 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function.
175 */
176int kmmio_handler(struct pt_regs *regs, unsigned long addr)
177{
178 struct kmmio_context *ctx;
179 struct kmmio_fault_page *faultpage;
180 int ret = 0; /* default to fault not handled */
181
182 /*
183 * Preemption is now disabled to prevent process switch during
184 * single stepping. We can only handle one active kmmio trace
185 * per cpu, so ensure that we finish it before something else
186 * gets to run. We also hold the RCU read lock over single
187 * stepping to avoid looking up the probe and kmmio_fault_page
188 * again.
189 */
190 preempt_disable();
191 rcu_read_lock();
192
193 faultpage = get_kmmio_fault_page(addr);
194 if (!faultpage) {
195 /*
196 * Either this page fault is not caused by kmmio, or
197 * another CPU just pulled the kmmio probe from under
198 * our feet. The latter case should not be possible.
199 */
200 goto no_kmmio;
201 }
202
203 ctx = &get_cpu_var(kmmio_ctx);
204 if (ctx->active) {
205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) {
207 /*
208 * On SMP we sometimes get recursive probe hits on the
209 * same address. Context is already saved, fall out.
210 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for "
212 "address 0x%08lx.\n",
213 smp_processor_id(), addr);
214 ret = 1;
215 goto no_kmmio_ctx;
216 }
217 /*
218 * Prevent overwriting already in-flight context.
219 * This should not happen, let's hope disarming at least
220 * prevents a panic.
221 */
222 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n",
224 smp_processor_id(), addr);
225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr);
227 goto no_kmmio_ctx;
228 }
229 ctx->active++;
230
231 ctx->fpage = faultpage;
232 ctx->probe = get_kmmio_probe(addr);
233 ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
234 ctx->addr = addr;
235
236 if (ctx->probe && ctx->probe->pre_handler)
237 ctx->probe->pre_handler(ctx->probe, regs, addr);
238
239 /*
240 * Enable single-stepping and disable interrupts for the faulting
241 * context. Local interrupts must not get enabled during stepping.
242 */
243 regs->flags |= X86_EFLAGS_TF;
244 regs->flags &= ~X86_EFLAGS_IF;
245
246 /* Now we set present bit in PTE and single step. */
247 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
248
249 /*
250 * If another cpu accesses the same page while we are stepping,
251 * the access will not be caught. It will simply succeed and the
252 * only downside is we lose the event. If this becomes a problem,
253 * the user should drop to single cpu before tracing.
254 */
255
256 put_cpu_var(kmmio_ctx);
257 return 1; /* fault handled */
258
259no_kmmio_ctx:
260 put_cpu_var(kmmio_ctx);
261no_kmmio:
262 rcu_read_unlock();
263 preempt_enable_no_resched();
264 return ret;
265}
266
267/*
268 * Interrupts are disabled on entry as trap1 is an interrupt gate
269 * and they remain disabled thorough out this function.
270 * This must always get called as the pair to kmmio_handler().
271 */
272static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
273{
274 int ret = 0;
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276
277 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id());
280 goto out;
281 }
282
283 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs);
285
286 arm_kmmio_fault_page(ctx->fpage->page, NULL);
287
288 regs->flags &= ~X86_EFLAGS_TF;
289 regs->flags |= ctx->saved_flags;
290
291 /* These were acquired in kmmio_handler(). */
292 ctx->active--;
293 BUG_ON(ctx->active);
294 rcu_read_unlock();
295 preempt_enable_no_resched();
296
297 /*
298 * if somebody else is singlestepping across a probe point, flags
299 * will have TF set, in which case, continue the remaining processing
300 * of do_debug, as if this is not a probe hit.
301 */
302 if (!(regs->flags & X86_EFLAGS_TF))
303 ret = 1;
304out:
305 put_cpu_var(kmmio_ctx);
306 return ret;
307}
308
309/* You must be holding kmmio_lock. */
310static int add_kmmio_fault_page(unsigned long page)
311{
312 struct kmmio_fault_page *f;
313
314 page &= PAGE_MASK;
315 f = get_kmmio_fault_page(page);
316 if (f) {
317 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL);
319 f->count++;
320 return 0;
321 }
322
323 f = kmalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f)
325 return -1;
326
327 f->count = 1;
328 f->page = page;
329 list_add_rcu(&f->list, kmmio_page_list(f->page));
330
331 arm_kmmio_fault_page(f->page, NULL);
332
333 return 0;
334}
335
336/* You must be holding kmmio_lock. */
337static void release_kmmio_fault_page(unsigned long page,
338 struct kmmio_fault_page **release_list)
339{
340 struct kmmio_fault_page *f;
341
342 page &= PAGE_MASK;
343 f = get_kmmio_fault_page(page);
344 if (!f)
345 return;
346
347 f->count--;
348 BUG_ON(f->count < 0);
349 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL);
351 f->release_next = *release_list;
352 *release_list = f;
353 }
354}
355
356/*
357 * With page-unaligned ioremaps, one or two armed pages may contain
358 * addresses from outside the intended mapping. Events for these addresses
359 * are currently silently dropped. The events may result only from programming
360 * mistakes by accessing addresses before the beginning or past the end of a
361 * mapping.
362 */
363int register_kmmio_probe(struct kmmio_probe *p)
364{
365 unsigned long flags;
366 int ret = 0;
367 unsigned long size = 0;
368 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
369
370 spin_lock_irqsave(&kmmio_lock, flags);
371 if (get_kmmio_probe(p->addr)) {
372 ret = -EEXIST;
373 goto out;
374 }
375 kmmio_count++;
376 list_add_rcu(&p->list, &kmmio_probes);
377 while (size < size_lim) {
378 if (add_kmmio_fault_page(p->addr + size))
379 pr_err("kmmio: Unable to set page fault.\n");
380 size += PAGE_SIZE;
381 }
382out:
383 spin_unlock_irqrestore(&kmmio_lock, flags);
384 /*
385 * XXX: What should I do here?
386 * Here was a call to global_flush_tlb(), but it does not exist
387 * anymore. It seems it's not needed after all.
388 */
389 return ret;
390}
391EXPORT_SYMBOL(register_kmmio_probe);
392
393static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
394{
395 struct kmmio_delayed_release *dr = container_of(
396 head,
397 struct kmmio_delayed_release,
398 rcu);
399 struct kmmio_fault_page *p = dr->release_list;
400 while (p) {
401 struct kmmio_fault_page *next = p->release_next;
402 BUG_ON(p->count);
403 kfree(p);
404 p = next;
405 }
406 kfree(dr);
407}
408
409static void remove_kmmio_fault_pages(struct rcu_head *head)
410{
411 struct kmmio_delayed_release *dr = container_of(
412 head,
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags;
418 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) {
420 if (!p->count)
421 list_del_rcu(&p->list);
422 else
423 *prevp = p->release_next;
424 prevp = &p->release_next;
425 p = p->release_next;
426 }
427 spin_unlock_irqrestore(&kmmio_lock, flags);
428 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
430}
431
432/*
433 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
434 * sure that the callbacks will not be called anymore. Only after that
435 * you may actually release your struct kmmio_probe.
436 *
437 * Unregistering a kmmio fault page has three steps:
438 * 1. release_kmmio_fault_page()
439 * Disarm the page, wait a grace period to let all faults finish.
440 * 2. remove_kmmio_fault_pages()
441 * Remove the pages from kmmio_page_table.
442 * 3. rcu_free_kmmio_fault_pages()
443 * Actally free the kmmio_fault_page structs as with RCU.
444 */
445void unregister_kmmio_probe(struct kmmio_probe *p)
446{
447 unsigned long flags;
448 unsigned long size = 0;
449 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
450 struct kmmio_fault_page *release_list = NULL;
451 struct kmmio_delayed_release *drelease;
452
453 spin_lock_irqsave(&kmmio_lock, flags);
454 while (size < size_lim) {
455 release_kmmio_fault_page(p->addr + size, &release_list);
456 size += PAGE_SIZE;
457 }
458 list_del_rcu(&p->list);
459 kmmio_count--;
460 spin_unlock_irqrestore(&kmmio_lock, flags);
461
462 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
463 if (!drelease) {
464 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
465 return;
466 }
467 drelease->release_list = release_list;
468
469 /*
470 * This is not really RCU here. We have just disarmed a set of
471 * pages so that they cannot trigger page faults anymore. However,
472 * we cannot remove the pages from kmmio_page_table,
473 * because a probe hit might be in flight on another CPU. The
474 * pages are collected into a list, and they will be removed from
475 * kmmio_page_table when it is certain that no probe hit related to
476 * these pages can be in flight. RCU grace period sounds like a
477 * good choice.
478 *
479 * If we removed the pages too early, kmmio page fault handler might
480 * not find the respective kmmio_fault_page and determine it's not
481 * a kmmio fault, when it actually is. This would lead to madness.
482 */
483 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
484}
485EXPORT_SYMBOL(unregister_kmmio_probe);
486
487static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
488 void *args)
489{
490 struct die_args *arg = args;
491
492 if (val == DIE_DEBUG && (arg->err & DR_STEP))
493 if (post_kmmio_handler(arg->err, arg->regs) == 1)
494 return NOTIFY_STOP;
495
496 return NOTIFY_DONE;
497}
498
499static struct notifier_block nb_die = {
500 .notifier_call = kmmio_die_notifier
501};
502
503static int __init init_kmmio(void)
504{
505 int i;
506 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
507 INIT_LIST_HEAD(&kmmio_page_table[i]);
508 return register_die_notifier(&nb_die);
509}
510fs_initcall(init_kmmio); /* should be before device_initcall() */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
new file mode 100644
index 000000000000..e7397e108beb
--- /dev/null
+++ b/arch/x86/mm/mmio-mod.c
@@ -0,0 +1,515 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2005
17 * Jeff Muizelaar, 2006, 2007
18 * Pekka Paalanen, 2008 <pq@iki.fi>
19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */
22#define DEBUG 1
23
24#include <linux/module.h>
25#include <linux/debugfs.h>
26#include <linux/uaccess.h>
27#include <linux/io.h>
28#include <linux/version.h>
29#include <linux/kallsyms.h>
30#include <asm/pgtable.h>
31#include <linux/mmiotrace.h>
32#include <asm/e820.h> /* for ISA_START_ADDRESS */
33#include <asm/atomic.h>
34#include <linux/percpu.h>
35#include <linux/cpu.h>
36
37#include "pf_in.h"
38
39#define NAME "mmiotrace: "
40
41struct trap_reason {
42 unsigned long addr;
43 unsigned long ip;
44 enum reason_type type;
45 int active_traces;
46};
47
48struct remap_trace {
49 struct list_head list;
50 struct kmmio_probe probe;
51 resource_size_t phys;
52 unsigned long id;
53};
54
55/* Accessed per-cpu. */
56static DEFINE_PER_CPU(struct trap_reason, pf_reason);
57static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
58
59#if 0 /* XXX: no way gather this info anymore */
60/* Access to this is not per-cpu. */
61static DEFINE_PER_CPU(atomic_t, dropped);
62#endif
63
64static struct dentry *marker_file;
65
66static DEFINE_MUTEX(mmiotrace_mutex);
67static DEFINE_SPINLOCK(trace_lock);
68static atomic_t mmiotrace_enabled;
69static LIST_HEAD(trace_list); /* struct remap_trace */
70
71/*
72 * Locking in this file:
73 * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
74 * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
75 * and trace_lock.
76 * - Routines depending on is_enabled() must take trace_lock.
77 * - trace_list users must hold trace_lock.
78 * - is_enabled() guarantees that mmio_trace_record is allowed.
79 * - pre/post callbacks assume the effect of is_enabled() being true.
80 */
81
82/* module parameters */
83static unsigned long filter_offset;
84static int nommiotrace;
85static int trace_pc;
86
87module_param(filter_offset, ulong, 0);
88module_param(nommiotrace, bool, 0);
89module_param(trace_pc, bool, 0);
90
91MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
92MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
93MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
94
95static bool is_enabled(void)
96{
97 return atomic_read(&mmiotrace_enabled);
98}
99
100#if 0 /* XXX: needs rewrite */
101/*
102 * Write callback for the debugfs entry:
103 * Read a marker and write it to the mmio trace log
104 */
105static ssize_t write_marker(struct file *file, const char __user *buffer,
106 size_t count, loff_t *ppos)
107{
108 char *event = NULL;
109 struct mm_io_header *headp;
110 ssize_t len = (count > 65535) ? 65535 : count;
111
112 event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
113 if (!event)
114 return -ENOMEM;
115
116 headp = (struct mm_io_header *)event;
117 headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
118 headp->data_len = len;
119
120 if (copy_from_user(event + sizeof(*headp), buffer, len)) {
121 kfree(event);
122 return -EFAULT;
123 }
124
125 spin_lock_irq(&trace_lock);
126#if 0 /* XXX: convert this to use tracing */
127 if (is_enabled())
128 relay_write(chan, event, sizeof(*headp) + len);
129 else
130#endif
131 len = -EINVAL;
132 spin_unlock_irq(&trace_lock);
133 kfree(event);
134 return len;
135}
136#endif
137
138static void print_pte(unsigned long address)
139{
140 unsigned int level;
141 pte_t *pte = lookup_address(address, &level);
142
143 if (!pte) {
144 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
145 __func__, address);
146 return;
147 }
148
149 if (level == PG_LEVEL_2M) {
150 pr_emerg(NAME "4MB pages are not currently supported: "
151 "0x%08lx\n", address);
152 BUG();
153 }
154 pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
155 (unsigned long long)pte_val(*pte),
156 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
157}
158
159/*
160 * For some reason the pre/post pairs have been called in an
161 * unmatched order. Report and die.
162 */
163static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
164{
165 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
166 pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
167 "last fault for address: 0x%08lx\n",
168 addr, my_reason->addr);
169 print_pte(addr);
170 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
171 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
172#ifdef __i386__
173 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
174 regs->ax, regs->bx, regs->cx, regs->dx);
175 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
176 regs->si, regs->di, regs->bp, regs->sp);
177#else
178 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
179 regs->ax, regs->cx, regs->dx);
180 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
181 regs->si, regs->di, regs->bp, regs->sp);
182#endif
183 put_cpu_var(pf_reason);
184 BUG();
185}
186
187static void pre(struct kmmio_probe *p, struct pt_regs *regs,
188 unsigned long addr)
189{
190 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
191 struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
192 const unsigned long instptr = instruction_pointer(regs);
193 const enum reason_type type = get_ins_type(instptr);
194 struct remap_trace *trace = p->private;
195
196 /* it doesn't make sense to have more than one active trace per cpu */
197 if (my_reason->active_traces)
198 die_kmmio_nesting_error(regs, addr);
199 else
200 my_reason->active_traces++;
201
202 my_reason->type = type;
203 my_reason->addr = addr;
204 my_reason->ip = instptr;
205
206 my_trace->phys = addr - trace->probe.addr + trace->phys;
207 my_trace->map_id = trace->id;
208
209 /*
210 * Only record the program counter when requested.
211 * It may taint clean-room reverse engineering.
212 */
213 if (trace_pc)
214 my_trace->pc = instptr;
215 else
216 my_trace->pc = 0;
217
218 /*
219 * XXX: the timestamp recorded will be *after* the tracing has been
220 * done, not at the time we hit the instruction. SMP implications
221 * on event ordering?
222 */
223
224 switch (type) {
225 case REG_READ:
226 my_trace->opcode = MMIO_READ;
227 my_trace->width = get_ins_mem_width(instptr);
228 break;
229 case REG_WRITE:
230 my_trace->opcode = MMIO_WRITE;
231 my_trace->width = get_ins_mem_width(instptr);
232 my_trace->value = get_ins_reg_val(instptr, regs);
233 break;
234 case IMM_WRITE:
235 my_trace->opcode = MMIO_WRITE;
236 my_trace->width = get_ins_mem_width(instptr);
237 my_trace->value = get_ins_imm_val(instptr);
238 break;
239 default:
240 {
241 unsigned char *ip = (unsigned char *)instptr;
242 my_trace->opcode = MMIO_UNKNOWN_OP;
243 my_trace->width = 0;
244 my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
245 *(ip + 2);
246 }
247 }
248 put_cpu_var(cpu_trace);
249 put_cpu_var(pf_reason);
250}
251
252static void post(struct kmmio_probe *p, unsigned long condition,
253 struct pt_regs *regs)
254{
255 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
256 struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
257
258 /* this should always return the active_trace count to 0 */
259 my_reason->active_traces--;
260 if (my_reason->active_traces) {
261 pr_emerg(NAME "unexpected post handler");
262 BUG();
263 }
264
265 switch (my_reason->type) {
266 case REG_READ:
267 my_trace->value = get_ins_reg_val(my_reason->ip, regs);
268 break;
269 default:
270 break;
271 }
272
273 mmio_trace_rw(my_trace);
274 put_cpu_var(cpu_trace);
275 put_cpu_var(pf_reason);
276}
277
278static void ioremap_trace_core(resource_size_t offset, unsigned long size,
279 void __iomem *addr)
280{
281 static atomic_t next_id;
282 struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
283 /* These are page-unaligned. */
284 struct mmiotrace_map map = {
285 .phys = offset,
286 .virt = (unsigned long)addr,
287 .len = size,
288 .opcode = MMIO_PROBE
289 };
290
291 if (!trace) {
292 pr_err(NAME "kmalloc failed in ioremap\n");
293 return;
294 }
295
296 *trace = (struct remap_trace) {
297 .probe = {
298 .addr = (unsigned long)addr,
299 .len = size,
300 .pre_handler = pre,
301 .post_handler = post,
302 .private = trace
303 },
304 .phys = offset,
305 .id = atomic_inc_return(&next_id)
306 };
307 map.map_id = trace->id;
308
309 spin_lock_irq(&trace_lock);
310 if (!is_enabled())
311 goto not_enabled;
312
313 mmio_trace_mapping(&map);
314 list_add_tail(&trace->list, &trace_list);
315 if (!nommiotrace)
316 register_kmmio_probe(&trace->probe);
317
318not_enabled:
319 spin_unlock_irq(&trace_lock);
320}
321
322void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
323 void __iomem *addr)
324{
325 if (!is_enabled()) /* recheck and proper locking in *_core() */
326 return;
327
328 pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
329 (unsigned long long)offset, size, addr);
330 if ((filter_offset) && (offset != filter_offset))
331 return;
332 ioremap_trace_core(offset, size, addr);
333}
334
335static void iounmap_trace_core(volatile void __iomem *addr)
336{
337 struct mmiotrace_map map = {
338 .phys = 0,
339 .virt = (unsigned long)addr,
340 .len = 0,
341 .opcode = MMIO_UNPROBE
342 };
343 struct remap_trace *trace;
344 struct remap_trace *tmp;
345 struct remap_trace *found_trace = NULL;
346
347 pr_debug(NAME "Unmapping %p.\n", addr);
348
349 spin_lock_irq(&trace_lock);
350 if (!is_enabled())
351 goto not_enabled;
352
353 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
354 if ((unsigned long)addr == trace->probe.addr) {
355 if (!nommiotrace)
356 unregister_kmmio_probe(&trace->probe);
357 list_del(&trace->list);
358 found_trace = trace;
359 break;
360 }
361 }
362 map.map_id = (found_trace) ? found_trace->id : -1;
363 mmio_trace_mapping(&map);
364
365not_enabled:
366 spin_unlock_irq(&trace_lock);
367 if (found_trace) {
368 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
369 kfree(found_trace);
370 }
371}
372
373void mmiotrace_iounmap(volatile void __iomem *addr)
374{
375 might_sleep();
376 if (is_enabled()) /* recheck and proper locking in *_core() */
377 iounmap_trace_core(addr);
378}
379
380static void clear_trace_list(void)
381{
382 struct remap_trace *trace;
383 struct remap_trace *tmp;
384
385 /*
386 * No locking required, because the caller ensures we are in a
387 * critical section via mutex, and is_enabled() is false,
388 * i.e. nothing can traverse or modify this list.
389 * Caller also ensures is_enabled() cannot change.
390 */
391 list_for_each_entry(trace, &trace_list, list) {
392 pr_notice(NAME "purging non-iounmapped "
393 "trace @0x%08lx, size 0x%lx.\n",
394 trace->probe.addr, trace->probe.len);
395 if (!nommiotrace)
396 unregister_kmmio_probe(&trace->probe);
397 }
398 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
399
400 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
401 list_del(&trace->list);
402 kfree(trace);
403 }
404}
405
406#ifdef CONFIG_HOTPLUG_CPU
407static cpumask_t downed_cpus;
408
409static void enter_uniprocessor(void)
410{
411 int cpu;
412 int err;
413
414 get_online_cpus();
415 downed_cpus = cpu_online_map;
416 cpu_clear(first_cpu(cpu_online_map), downed_cpus);
417 if (num_online_cpus() > 1)
418 pr_notice(NAME "Disabling non-boot CPUs...\n");
419 put_online_cpus();
420
421 for_each_cpu_mask(cpu, downed_cpus) {
422 err = cpu_down(cpu);
423 if (!err)
424 pr_info(NAME "CPU%d is down.\n", cpu);
425 else
426 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
427 }
428 if (num_online_cpus() > 1)
429 pr_warning(NAME "multiple CPUs still online, "
430 "may miss events.\n");
431}
432
433static void leave_uniprocessor(void)
434{
435 int cpu;
436 int err;
437
438 if (cpus_weight(downed_cpus) == 0)
439 return;
440 pr_notice(NAME "Re-enabling CPUs...\n");
441 for_each_cpu_mask(cpu, downed_cpus) {
442 err = cpu_up(cpu);
443 if (!err)
444 pr_info(NAME "enabled CPU%d.\n", cpu);
445 else
446 pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
447 }
448}
449
450#else /* !CONFIG_HOTPLUG_CPU */
451static void enter_uniprocessor(void)
452{
453 if (num_online_cpus() > 1)
454 pr_warning(NAME "multiple CPUs are online, may miss events. "
455 "Suggest booting with maxcpus=1 kernel argument.\n");
456}
457
458static void leave_uniprocessor(void)
459{
460}
461#endif
462
463#if 0 /* XXX: out of order */
464static struct file_operations fops_marker = {
465 .owner = THIS_MODULE,
466 .write = write_marker
467};
468#endif
469
470void enable_mmiotrace(void)
471{
472 mutex_lock(&mmiotrace_mutex);
473 if (is_enabled())
474 goto out;
475
476#if 0 /* XXX: tracing does not support text entries */
477 marker_file = debugfs_create_file("marker", 0660, dir, NULL,
478 &fops_marker);
479 if (!marker_file)
480 pr_err(NAME "marker file creation failed.\n");
481#endif
482
483 if (nommiotrace)
484 pr_info(NAME "MMIO tracing disabled.\n");
485 enter_uniprocessor();
486 spin_lock_irq(&trace_lock);
487 atomic_inc(&mmiotrace_enabled);
488 spin_unlock_irq(&trace_lock);
489 pr_info(NAME "enabled.\n");
490out:
491 mutex_unlock(&mmiotrace_mutex);
492}
493
494void disable_mmiotrace(void)
495{
496 mutex_lock(&mmiotrace_mutex);
497 if (!is_enabled())
498 goto out;
499
500 spin_lock_irq(&trace_lock);
501 atomic_dec(&mmiotrace_enabled);
502 BUG_ON(is_enabled());
503 spin_unlock_irq(&trace_lock);
504
505 clear_trace_list(); /* guarantees: no more kmmio callbacks */
506 leave_uniprocessor();
507 if (marker_file) {
508 debugfs_remove(marker_file);
509 marker_file = NULL;
510 }
511
512 pr_info(NAME "disabled.\n");
513out:
514 mutex_unlock(&mmiotrace_mutex);
515}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fb6f2ab40dda..47f4e2e4a096 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -262,6 +262,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
262 262
263 return pte_offset_kernel(pmd, address); 263 return pte_offset_kernel(pmd, address);
264} 264}
265EXPORT_SYMBOL_GPL(lookup_address);
265 266
266/* 267/*
267 * Set the new pmd in all the pgds we know about: 268 * Set the new pmd in all the pgds we know about:
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
new file mode 100644
index 000000000000..efa1911e20ca
--- /dev/null
+++ b/arch/x86/mm/pf_in.c
@@ -0,0 +1,489 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
23 * Copyright by Intel Crop., 2002
24 * Louis Zhuang (louis.zhuang@intel.com)
25 *
26 * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
27 */
28
29#include <linux/module.h>
30#include <linux/ptrace.h> /* struct pt_regs */
31#include "pf_in.h"
32
33#ifdef __i386__
34/* IA32 Manual 3, 2-1 */
35static unsigned char prefix_codes[] = {
36 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
37 0x65, 0x2E, 0x3E, 0x66, 0x67
38};
39/* IA32 Manual 3, 3-432*/
40static unsigned int reg_rop[] = {
41 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
42};
43static unsigned int reg_wop[] = { 0x88, 0x89 };
44static unsigned int imm_wop[] = { 0xC6, 0xC7 };
45/* IA32 Manual 3, 3-432*/
46static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
47static unsigned int rw32[] = {
48 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
49};
50static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
51static unsigned int mw16[] = { 0xB70F, 0xBF0F };
52static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
53static unsigned int mw64[] = {};
54#else /* not __i386__ */
55static unsigned char prefix_codes[] = {
56 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
57 0xF0, 0xF3, 0xF2,
58 /* REX Prefixes */
59 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
60 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
61};
62/* AMD64 Manual 3, Appendix A*/
63static unsigned int reg_rop[] = {
64 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
65};
66static unsigned int reg_wop[] = { 0x88, 0x89 };
67static unsigned int imm_wop[] = { 0xC6, 0xC7 };
68static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
69static unsigned int rw32[] = {
70 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
71};
72/* 8 bit only */
73static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
74/* 16 bit only */
75static unsigned int mw16[] = { 0xB70F, 0xBF0F };
76/* 16 or 32 bit */
77static unsigned int mw32[] = { 0xC7 };
78/* 16, 32 or 64 bit */
79static unsigned int mw64[] = { 0x89, 0x8B };
80#endif /* not __i386__ */
81
82static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
83 int *rexr)
84{
85 int i;
86 unsigned char *p = addr;
87 *shorted = 0;
88 *enlarged = 0;
89 *rexr = 0;
90
91restart:
92 for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
93 if (*p == prefix_codes[i]) {
94 if (*p == 0x66)
95 *shorted = 1;
96#ifdef __amd64__
97 if ((*p & 0xf8) == 0x48)
98 *enlarged = 1;
99 if ((*p & 0xf4) == 0x44)
100 *rexr = 1;
101#endif
102 p++;
103 goto restart;
104 }
105 }
106
107 return (p - addr);
108}
109
110static int get_opcode(unsigned char *addr, unsigned int *opcode)
111{
112 int len;
113
114 if (*addr == 0x0F) {
115 /* 0x0F is extension instruction */
116 *opcode = *(unsigned short *)addr;
117 len = 2;
118 } else {
119 *opcode = *addr;
120 len = 1;
121 }
122
123 return len;
124}
125
126#define CHECK_OP_TYPE(opcode, array, type) \
127 for (i = 0; i < ARRAY_SIZE(array); i++) { \
128 if (array[i] == opcode) { \
129 rv = type; \
130 goto exit; \
131 } \
132 }
133
134enum reason_type get_ins_type(unsigned long ins_addr)
135{
136 unsigned int opcode;
137 unsigned char *p;
138 int shorted, enlarged, rexr;
139 int i;
140 enum reason_type rv = OTHERS;
141
142 p = (unsigned char *)ins_addr;
143 p += skip_prefix(p, &shorted, &enlarged, &rexr);
144 p += get_opcode(p, &opcode);
145
146 CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
147 CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
148 CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
149
150exit:
151 return rv;
152}
153#undef CHECK_OP_TYPE
154
155static unsigned int get_ins_reg_width(unsigned long ins_addr)
156{
157 unsigned int opcode;
158 unsigned char *p;
159 int i, shorted, enlarged, rexr;
160
161 p = (unsigned char *)ins_addr;
162 p += skip_prefix(p, &shorted, &enlarged, &rexr);
163 p += get_opcode(p, &opcode);
164
165 for (i = 0; i < ARRAY_SIZE(rw8); i++)
166 if (rw8[i] == opcode)
167 return 1;
168
169 for (i = 0; i < ARRAY_SIZE(rw32); i++)
170 if (rw32[i] == opcode)
171 return (shorted ? 2 : (enlarged ? 8 : 4));
172
173 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
174 return 0;
175}
176
177unsigned int get_ins_mem_width(unsigned long ins_addr)
178{
179 unsigned int opcode;
180 unsigned char *p;
181 int i, shorted, enlarged, rexr;
182
183 p = (unsigned char *)ins_addr;
184 p += skip_prefix(p, &shorted, &enlarged, &rexr);
185 p += get_opcode(p, &opcode);
186
187 for (i = 0; i < ARRAY_SIZE(mw8); i++)
188 if (mw8[i] == opcode)
189 return 1;
190
191 for (i = 0; i < ARRAY_SIZE(mw16); i++)
192 if (mw16[i] == opcode)
193 return 2;
194
195 for (i = 0; i < ARRAY_SIZE(mw32); i++)
196 if (mw32[i] == opcode)
197 return shorted ? 2 : 4;
198
199 for (i = 0; i < ARRAY_SIZE(mw64); i++)
200 if (mw64[i] == opcode)
201 return shorted ? 2 : (enlarged ? 8 : 4);
202
203 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
204 return 0;
205}
206
207/*
208 * Define register ident in mod/rm byte.
209 * Note: these are NOT the same as in ptrace-abi.h.
210 */
211enum {
212 arg_AL = 0,
213 arg_CL = 1,
214 arg_DL = 2,
215 arg_BL = 3,
216 arg_AH = 4,
217 arg_CH = 5,
218 arg_DH = 6,
219 arg_BH = 7,
220
221 arg_AX = 0,
222 arg_CX = 1,
223 arg_DX = 2,
224 arg_BX = 3,
225 arg_SP = 4,
226 arg_BP = 5,
227 arg_SI = 6,
228 arg_DI = 7,
229#ifdef __amd64__
230 arg_R8 = 8,
231 arg_R9 = 9,
232 arg_R10 = 10,
233 arg_R11 = 11,
234 arg_R12 = 12,
235 arg_R13 = 13,
236 arg_R14 = 14,
237 arg_R15 = 15
238#endif
239};
240
241static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
242{
243 unsigned char *rv = NULL;
244
245 switch (no) {
246 case arg_AL:
247 rv = (unsigned char *)&regs->ax;
248 break;
249 case arg_BL:
250 rv = (unsigned char *)&regs->bx;
251 break;
252 case arg_CL:
253 rv = (unsigned char *)&regs->cx;
254 break;
255 case arg_DL:
256 rv = (unsigned char *)&regs->dx;
257 break;
258 case arg_AH:
259 rv = 1 + (unsigned char *)&regs->ax;
260 break;
261 case arg_BH:
262 rv = 1 + (unsigned char *)&regs->bx;
263 break;
264 case arg_CH:
265 rv = 1 + (unsigned char *)&regs->cx;
266 break;
267 case arg_DH:
268 rv = 1 + (unsigned char *)&regs->dx;
269 break;
270#ifdef __amd64__
271 case arg_R8:
272 rv = (unsigned char *)&regs->r8;
273 break;
274 case arg_R9:
275 rv = (unsigned char *)&regs->r9;
276 break;
277 case arg_R10:
278 rv = (unsigned char *)&regs->r10;
279 break;
280 case arg_R11:
281 rv = (unsigned char *)&regs->r11;
282 break;
283 case arg_R12:
284 rv = (unsigned char *)&regs->r12;
285 break;
286 case arg_R13:
287 rv = (unsigned char *)&regs->r13;
288 break;
289 case arg_R14:
290 rv = (unsigned char *)&regs->r14;
291 break;
292 case arg_R15:
293 rv = (unsigned char *)&regs->r15;
294 break;
295#endif
296 default:
297 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
298 break;
299 }
300 return rv;
301}
302
303static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
304{
305 unsigned long *rv = NULL;
306
307 switch (no) {
308 case arg_AX:
309 rv = &regs->ax;
310 break;
311 case arg_BX:
312 rv = &regs->bx;
313 break;
314 case arg_CX:
315 rv = &regs->cx;
316 break;
317 case arg_DX:
318 rv = &regs->dx;
319 break;
320 case arg_SP:
321 rv = &regs->sp;
322 break;
323 case arg_BP:
324 rv = &regs->bp;
325 break;
326 case arg_SI:
327 rv = &regs->si;
328 break;
329 case arg_DI:
330 rv = &regs->di;
331 break;
332#ifdef __amd64__
333 case arg_R8:
334 rv = &regs->r8;
335 break;
336 case arg_R9:
337 rv = &regs->r9;
338 break;
339 case arg_R10:
340 rv = &regs->r10;
341 break;
342 case arg_R11:
343 rv = &regs->r11;
344 break;
345 case arg_R12:
346 rv = &regs->r12;
347 break;
348 case arg_R13:
349 rv = &regs->r13;
350 break;
351 case arg_R14:
352 rv = &regs->r14;
353 break;
354 case arg_R15:
355 rv = &regs->r15;
356 break;
357#endif
358 default:
359 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
360 }
361
362 return rv;
363}
364
365unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
366{
367 unsigned int opcode;
368 unsigned char mod_rm;
369 int reg;
370 unsigned char *p;
371 int i, shorted, enlarged, rexr;
372 unsigned long rv;
373
374 p = (unsigned char *)ins_addr;
375 p += skip_prefix(p, &shorted, &enlarged, &rexr);
376 p += get_opcode(p, &opcode);
377 for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
378 if (reg_rop[i] == opcode) {
379 rv = REG_READ;
380 goto do_work;
381 }
382
383 for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
384 if (reg_wop[i] == opcode) {
385 rv = REG_WRITE;
386 goto do_work;
387 }
388
389 printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
390 "0x%02x\n", opcode);
391 goto err;
392
393do_work:
394 mod_rm = *p;
395 reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
396 switch (get_ins_reg_width(ins_addr)) {
397 case 1:
398 return *get_reg_w8(reg, regs);
399
400 case 2:
401 return *(unsigned short *)get_reg_w32(reg, regs);
402
403 case 4:
404 return *(unsigned int *)get_reg_w32(reg, regs);
405
406#ifdef __amd64__
407 case 8:
408 return *(unsigned long *)get_reg_w32(reg, regs);
409#endif
410
411 default:
412 printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
413 }
414
415err:
416 return 0;
417}
418
419unsigned long get_ins_imm_val(unsigned long ins_addr)
420{
421 unsigned int opcode;
422 unsigned char mod_rm;
423 unsigned char mod;
424 unsigned char *p;
425 int i, shorted, enlarged, rexr;
426 unsigned long rv;
427
428 p = (unsigned char *)ins_addr;
429 p += skip_prefix(p, &shorted, &enlarged, &rexr);
430 p += get_opcode(p, &opcode);
431 for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
432 if (imm_wop[i] == opcode) {
433 rv = IMM_WRITE;
434 goto do_work;
435 }
436
437 printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
438 "0x%02x\n", opcode);
439 goto err;
440
441do_work:
442 mod_rm = *p;
443 mod = mod_rm >> 6;
444 p++;
445 switch (mod) {
446 case 0:
447 /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */
448 /* AMD64: XXX Check for address size prefix? */
449 if ((mod_rm & 0x7) == 0x5)
450 p += 4;
451 break;
452
453 case 1:
454 p += 1;
455 break;
456
457 case 2:
458 p += 4;
459 break;
460
461 case 3:
462 default:
463 printk(KERN_ERR "mmiotrace: not a memory access instruction "
464 "at 0x%lx, rm_mod=0x%02x\n",
465 ins_addr, mod_rm);
466 }
467
468 switch (get_ins_reg_width(ins_addr)) {
469 case 1:
470 return *(unsigned char *)p;
471
472 case 2:
473 return *(unsigned short *)p;
474
475 case 4:
476 return *(unsigned int *)p;
477
478#ifdef __amd64__
479 case 8:
480 return *(unsigned long *)p;
481#endif
482
483 default:
484 printk(KERN_ERR "mmiotrace: Error: width.\n");
485 }
486
487err:
488 return 0;
489}
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
new file mode 100644
index 000000000000..e05341a51a27
--- /dev/null
+++ b/arch/x86/mm/pf_in.h
@@ -0,0 +1,39 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22#ifndef __PF_H_
23#define __PF_H_
24
25enum reason_type {
26 NOT_ME, /* page fault is not in regions */
27 NOTHING, /* access others point in regions */
28 REG_READ, /* read from addr to reg */
29 REG_WRITE, /* write from reg to addr */
30 IMM_WRITE, /* write from imm to addr */
31 OTHERS /* Other instructions can not intercept */
32};
33
34enum reason_type get_ins_type(unsigned long ins_addr);
35unsigned int get_ins_mem_width(unsigned long ins_addr);
36unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
37unsigned long get_ins_imm_val(unsigned long ins_addr);
38
39#endif /* __PF_H_ */
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
new file mode 100644
index 000000000000..d877c5b423ef
--- /dev/null
+++ b/arch/x86/mm/testmmiotrace.c
@@ -0,0 +1,71 @@
1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi>
3 */
4#include <linux/module.h>
5#include <linux/io.h>
6
7#define MODULE_NAME "testmmiotrace"
8
9static unsigned long mmio_address;
10module_param(mmio_address, ulong, 0);
11MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
12
13static void do_write_test(void __iomem *p)
14{
15 unsigned int i;
16 for (i = 0; i < 256; i++)
17 iowrite8(i, p + i);
18 for (i = 1024; i < (5 * 1024); i += 2)
19 iowrite16(i * 12 + 7, p + i);
20 for (i = (5 * 1024); i < (16 * 1024); i += 4)
21 iowrite32(i * 212371 + 13, p + i);
22}
23
24static void do_read_test(void __iomem *p)
25{
26 unsigned int i;
27 for (i = 0; i < 256; i++)
28 ioread8(p + i);
29 for (i = 1024; i < (5 * 1024); i += 2)
30 ioread16(p + i);
31 for (i = (5 * 1024); i < (16 * 1024); i += 4)
32 ioread32(p + i);
33}
34
35static void do_test(void)
36{
37 void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
38 if (!p) {
39 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
40 return;
41 }
42 do_write_test(p);
43 do_read_test(p);
44 iounmap(p);
45}
46
47static int __init init(void)
48{
49 if (mmio_address == 0) {
50 pr_err(MODULE_NAME ": you have to use the module argument "
51 "mmio_address.\n");
52 pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
53 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
54 return -ENXIO;
55 }
56
57 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
58 "in PCI address space, and writing "
59 "rubbish in there.\n", mmio_address);
60 do_test();
61 return 0;
62}
63
64static void __exit cleanup(void)
65{
66 pr_debug(MODULE_NAME ": unloaded.\n");
67}
68
69module_init(init);
70module_exit(cleanup);
71MODULE_LICENSE("GPL");
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c6005..1ef0f90813d6 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
23 23
24#define gtod vdso_vsyscall_gtod_data 24#define gtod vdso_vsyscall_gtod_data
25 25
26static long vdso_fallback_gettime(long clock, struct timespec *ts) 26notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
27{ 27{
28 long ret; 28 long ret;
29 asm("syscall" : "=a" (ret) : 29 asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
31 return ret; 31 return ret;
32} 32}
33 33
34static inline long vgetns(void) 34notrace static inline long vgetns(void)
35{ 35{
36 long v; 36 long v;
37 cycles_t (*vread)(void); 37 cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
40 return (v * gtod->clock.mult) >> gtod->clock.shift; 40 return (v * gtod->clock.mult) >> gtod->clock.shift;
41} 41}
42 42
43static noinline int do_realtime(struct timespec *ts) 43notrace static noinline int do_realtime(struct timespec *ts)
44{ 44{
45 unsigned long seq, ns; 45 unsigned long seq, ns;
46 do { 46 do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
54} 54}
55 55
56/* Copy of the version in kernel/time.c which we cannot directly access */ 56/* Copy of the version in kernel/time.c which we cannot directly access */
57static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) 57notrace static void
58vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
58{ 59{
59 while (nsec >= NSEC_PER_SEC) { 60 while (nsec >= NSEC_PER_SEC) {
60 nsec -= NSEC_PER_SEC; 61 nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
68 ts->tv_nsec = nsec; 69 ts->tv_nsec = nsec;
69} 70}
70 71
71static noinline int do_monotonic(struct timespec *ts) 72notrace static noinline int do_monotonic(struct timespec *ts)
72{ 73{
73 unsigned long seq, ns, secs; 74 unsigned long seq, ns, secs;
74 do { 75 do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
82 return 0; 83 return 0;
83} 84}
84 85
85int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 86notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
86{ 87{
87 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) 88 if (likely(gtod->sysctl_enabled && gtod->clock.vread))
88 switch (clock) { 89 switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
96int clock_gettime(clockid_t, struct timespec *) 97int clock_gettime(clockid_t, struct timespec *)
97 __attribute__((weak, alias("__vdso_clock_gettime"))); 98 __attribute__((weak, alias("__vdso_clock_gettime")));
98 99
99int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 100notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
100{ 101{
101 long ret; 102 long ret;
102 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { 103 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a9..9fbc6b20026b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
13#include <asm/vgtod.h> 13#include <asm/vgtod.h>
14#include "vextern.h" 14#include "vextern.h"
15 15
16long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) 16notrace long
17__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
17{ 18{
18 unsigned int p; 19 unsigned int p;
19 20
diff --git a/include/asm-arm/ftrace.h b/include/asm-arm/ftrace.h
new file mode 100644
index 000000000000..584ef9a8e5a5
--- /dev/null
+++ b/include/asm-arm/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_ARM_FTRACE
2#define _ASM_ARM_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void mcount(void);
10#endif
11
12#endif
13
14#endif /* _ASM_ARM_FTRACE */
diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h
index c042194d3ab5..b1a37876942d 100644
--- a/include/asm-arm/kprobes.h
+++ b/include/asm-arm/kprobes.h
@@ -59,6 +59,7 @@ struct kprobe_ctlblk {
59}; 59};
60 60
61void arch_remove_kprobe(struct kprobe *); 61void arch_remove_kprobe(struct kprobe *);
62void kretprobe_trampoline(void);
62 63
63int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr); 64int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr);
64int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); 65int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
diff --git a/include/asm-powerpc/ftrace.h b/include/asm-powerpc/ftrace.h
new file mode 100644
index 000000000000..de921326cca8
--- /dev/null
+++ b/include/asm-powerpc/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_POWERPC_FTRACE
2#define _ASM_POWERPC_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(_mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void _mcount(void);
10#endif
11
12#endif
13
14#endif /* _ASM_POWERPC_FTRACE */
diff --git a/include/asm-powerpc/hw_irq.h b/include/asm-powerpc/hw_irq.h
index ad8c9f7fd0e3..f75a5fc64d2e 100644
--- a/include/asm-powerpc/hw_irq.h
+++ b/include/asm-powerpc/hw_irq.h
@@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void);
59 get_paca()->hard_enabled = 0; \ 59 get_paca()->hard_enabled = 0; \
60 } while(0) 60 } while(0)
61 61
62static inline int irqs_disabled_flags(unsigned long flags)
63{
64 return flags == 0;
65}
66
62#else 67#else
63 68
64#if defined(CONFIG_BOOKE) 69#if defined(CONFIG_BOOKE)
@@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags)
113#define hard_irq_enable() local_irq_enable() 118#define hard_irq_enable() local_irq_enable()
114#define hard_irq_disable() local_irq_disable() 119#define hard_irq_disable() local_irq_disable()
115 120
121static inline int irqs_disabled_flags(unsigned long flags)
122{
123 return (flags & MSR_EE) == 0;
124}
125
116#endif /* CONFIG_PPC64 */ 126#endif /* CONFIG_PPC64 */
117 127
118/* 128/*
diff --git a/include/asm-sparc64/ftrace.h b/include/asm-sparc64/ftrace.h
new file mode 100644
index 000000000000..d27716cd38c1
--- /dev/null
+++ b/include/asm-sparc64/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_SPARC64_FTRACE
2#define _ASM_SPARC64_FTRACE
3
4#ifdef CONFIG_MCOUNT
5#define MCOUNT_ADDR ((long)(_mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void _mcount(void);
10#endif
11
12#endif
13
14#endif /* _ASM_SPARC64_FTRACE */
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 1f6a9ca10126..f6aa18eadf71 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
72static inline void alternatives_smp_switch(int smp) {} 72static inline void alternatives_smp_switch(int smp) {}
73#endif /* CONFIG_SMP */ 73#endif /* CONFIG_SMP */
74 74
75const unsigned char *const *find_nop_table(void);
76
75/* 77/*
76 * Alternative instructions for different CPU types or capabilities. 78 * Alternative instructions for different CPU types or capabilities.
77 * 79 *
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
new file mode 100644
index 000000000000..c184441133f2
--- /dev/null
+++ b/include/asm-x86/ftrace.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_X86_FTRACE
2#define _ASM_SPARC64_FTRACE
3
4#ifdef CONFIG_FTRACE
5#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
7
8#ifndef __ASSEMBLY__
9extern void mcount(void);
10#endif
11
12#endif /* CONFIG_FTRACE */
13
14#endif /* _ASM_X86_FTRACE */
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 17e7a1701c97..424acb48cd61 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -190,8 +190,6 @@ static inline void trace_hardirqs_fixup(void)
190#else 190#else
191 191
192#ifdef CONFIG_X86_64 192#ifdef CONFIG_X86_64
193#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
194#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
195#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk 193#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
196#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ 194#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
197 TRACE_IRQS_ON; \ 195 TRACE_IRQS_ON; \
@@ -203,24 +201,6 @@ static inline void trace_hardirqs_fixup(void)
203 TRACE_IRQS_OFF; 201 TRACE_IRQS_OFF;
204 202
205#else 203#else
206#define ARCH_TRACE_IRQS_ON \
207 pushl %eax; \
208 pushl %ecx; \
209 pushl %edx; \
210 call trace_hardirqs_on; \
211 popl %edx; \
212 popl %ecx; \
213 popl %eax;
214
215#define ARCH_TRACE_IRQS_OFF \
216 pushl %eax; \
217 pushl %ecx; \
218 pushl %edx; \
219 call trace_hardirqs_off; \
220 popl %edx; \
221 popl %ecx; \
222 popl %eax;
223
224#define ARCH_LOCKDEP_SYS_EXIT \ 204#define ARCH_LOCKDEP_SYS_EXIT \
225 pushl %eax; \ 205 pushl %eax; \
226 pushl %ecx; \ 206 pushl %ecx; \
@@ -234,8 +214,8 @@ static inline void trace_hardirqs_fixup(void)
234#endif 214#endif
235 215
236#ifdef CONFIG_TRACE_IRQFLAGS 216#ifdef CONFIG_TRACE_IRQFLAGS
237# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON 217# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
238# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF 218# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
239#else 219#else
240# define TRACE_IRQS_ON 220# define TRACE_IRQS_ON
241# define TRACE_IRQS_OFF 221# define TRACE_IRQS_OFF
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 17b3700949bf..6b66ff905af0 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -24,7 +24,8 @@ enum vsyscall_num {
24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) 24 ((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
25#define __section_vsyscall_clock __attribute__ \ 25#define __section_vsyscall_clock __attribute__ \
26 ((unused, __section__ (".vsyscall_clock"),aligned(16))) 26 ((unused, __section__ (".vsyscall_clock"),aligned(16)))
27#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) 27#define __vsyscall_fn \
28 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
28 29
29#define VGETCPU_RDTSCP 1 30#define VGETCPU_RDTSCP 1
30#define VGETCPU_LSL 2 31#define VGETCPU_LSL 2
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
new file mode 100644
index 000000000000..f368d041e02d
--- /dev/null
+++ b/include/linux/ftrace.h
@@ -0,0 +1,144 @@
1#ifndef _LINUX_FTRACE_H
2#define _LINUX_FTRACE_H
3
4#ifdef CONFIG_FTRACE
5
6#include <linux/linkage.h>
7#include <linux/fs.h>
8
9extern int ftrace_enabled;
10extern int
11ftrace_enable_sysctl(struct ctl_table *table, int write,
12 struct file *filp, void __user *buffer, size_t *lenp,
13 loff_t *ppos);
14
15typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
16
17struct ftrace_ops {
18 ftrace_func_t func;
19 struct ftrace_ops *next;
20};
21
22/*
23 * The ftrace_ops must be a static and should also
24 * be read_mostly. These functions do modify read_mostly variables
25 * so use them sparely. Never free an ftrace_op or modify the
26 * next pointer after it has been registered. Even after unregistering
27 * it, the next pointer may still be used internally.
28 */
29int register_ftrace_function(struct ftrace_ops *ops);
30int unregister_ftrace_function(struct ftrace_ops *ops);
31void clear_ftrace_function(void);
32
33extern void ftrace_stub(unsigned long a0, unsigned long a1);
34
35#else /* !CONFIG_FTRACE */
36# define register_ftrace_function(ops) do { } while (0)
37# define unregister_ftrace_function(ops) do { } while (0)
38# define clear_ftrace_function(ops) do { } while (0)
39#endif /* CONFIG_FTRACE */
40
41#ifdef CONFIG_DYNAMIC_FTRACE
42# define FTRACE_HASHBITS 10
43# define FTRACE_HASHSIZE (1<<FTRACE_HASHBITS)
44
45enum {
46 FTRACE_FL_FREE = (1 << 0),
47 FTRACE_FL_FAILED = (1 << 1),
48 FTRACE_FL_FILTER = (1 << 2),
49 FTRACE_FL_ENABLED = (1 << 3),
50 FTRACE_FL_NOTRACE = (1 << 4),
51 FTRACE_FL_CONVERTED = (1 << 5),
52 FTRACE_FL_FROZEN = (1 << 6),
53};
54
55struct dyn_ftrace {
56 struct hlist_node node;
57 unsigned long ip; /* address of mcount call-site */
58 unsigned long flags;
59};
60
61int ftrace_force_update(void);
62void ftrace_set_filter(unsigned char *buf, int len, int reset);
63
64/* defined in arch */
65extern int ftrace_ip_converted(unsigned long ip);
66extern unsigned char *ftrace_nop_replace(void);
67extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
68extern int ftrace_dyn_arch_init(void *data);
69extern int ftrace_mcount_set(unsigned long *data);
70extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
71 unsigned char *new_code);
72extern int ftrace_update_ftrace_func(ftrace_func_t func);
73extern void ftrace_caller(void);
74extern void ftrace_call(void);
75extern void mcount_call(void);
76
77extern int skip_trace(unsigned long ip);
78
79void ftrace_disable_daemon(void);
80void ftrace_enable_daemon(void);
81
82#else
83# define skip_trace(ip) ({ 0; })
84# define ftrace_force_update() ({ 0; })
85# define ftrace_set_filter(buf, len, reset) do { } while (0)
86# define ftrace_disable_daemon() do { } while (0)
87# define ftrace_enable_daemon() do { } while (0)
88#endif /* CONFIG_DYNAMIC_FTRACE */
89
90/* totally disable ftrace - can not re-enable after this */
91void ftrace_kill(void);
92void ftrace_kill_atomic(void);
93
94static inline void tracer_disable(void)
95{
96#ifdef CONFIG_FTRACE
97 ftrace_enabled = 0;
98#endif
99}
100
101#ifdef CONFIG_FRAME_POINTER
102/* TODO: need to fix this for ARM */
103# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
104# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
105# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
106# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
107# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
108# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
109# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
110#else
111# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
112# define CALLER_ADDR1 0UL
113# define CALLER_ADDR2 0UL
114# define CALLER_ADDR3 0UL
115# define CALLER_ADDR4 0UL
116# define CALLER_ADDR5 0UL
117# define CALLER_ADDR6 0UL
118#endif
119
120#ifdef CONFIG_IRQSOFF_TRACER
121 extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
122 extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
123#else
124# define time_hardirqs_on(a0, a1) do { } while (0)
125# define time_hardirqs_off(a0, a1) do { } while (0)
126#endif
127
128#ifdef CONFIG_PREEMPT_TRACER
129 extern void trace_preempt_on(unsigned long a0, unsigned long a1);
130 extern void trace_preempt_off(unsigned long a0, unsigned long a1);
131#else
132# define trace_preempt_on(a0, a1) do { } while (0)
133# define trace_preempt_off(a0, a1) do { } while (0)
134#endif
135
136#ifdef CONFIG_TRACING
137extern void
138ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
139#else
140static inline void
141ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
142#endif
143
144#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index e600c4e9b8c5..2b1c2e58566e 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,10 +12,10 @@
12#define _LINUX_TRACE_IRQFLAGS_H 12#define _LINUX_TRACE_IRQFLAGS_H
13 13
14#ifdef CONFIG_TRACE_IRQFLAGS 14#ifdef CONFIG_TRACE_IRQFLAGS
15 extern void trace_hardirqs_on(void);
16 extern void trace_hardirqs_off(void);
17 extern void trace_softirqs_on(unsigned long ip); 15 extern void trace_softirqs_on(unsigned long ip);
18 extern void trace_softirqs_off(unsigned long ip); 16 extern void trace_softirqs_off(unsigned long ip);
17 extern void trace_hardirqs_on(void);
18 extern void trace_hardirqs_off(void);
19# define trace_hardirq_context(p) ((p)->hardirq_context) 19# define trace_hardirq_context(p) ((p)->hardirq_context)
20# define trace_softirq_context(p) ((p)->softirq_context) 20# define trace_softirq_context(p) ((p)->softirq_context)
21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) 21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
@@ -41,6 +41,15 @@
41# define INIT_TRACE_IRQFLAGS 41# define INIT_TRACE_IRQFLAGS
42#endif 42#endif
43 43
44#if defined(CONFIG_IRQSOFF_TRACER) || \
45 defined(CONFIG_PREEMPT_TRACER)
46 extern void stop_critical_timings(void);
47 extern void start_critical_timings(void);
48#else
49# define stop_critical_timings() do { } while (0)
50# define start_critical_timings() do { } while (0)
51#endif
52
44#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 53#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
45 54
46#include <asm/irqflags.h> 55#include <asm/irqflags.h>
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1036631ff4fa..04a3556bdea6 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
259struct jprobe; 259struct jprobe;
260struct kretprobe; 260struct kretprobe;
261 261
262static inline struct kprobe *get_kprobe(void *addr)
263{
264 return NULL;
265}
262static inline struct kprobe *kprobe_running(void) 266static inline struct kprobe *kprobe_running(void)
263{ 267{
264 return NULL; 268 return NULL;
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 9fd1f859021b..56ba37394656 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -4,6 +4,8 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/linkage.h> 5#include <asm/linkage.h>
6 6
7#define notrace __attribute__((no_instrument_function))
8
7#ifdef __cplusplus 9#ifdef __cplusplus
8#define CPP_ASMLINKAGE extern "C" 10#define CPP_ASMLINKAGE extern "C"
9#else 11#else
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 430f6adf9762..1290653f9241 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -44,8 +44,8 @@ struct marker {
44 */ 44 */
45 char state; /* Marker state. */ 45 char state; /* Marker state. */
46 char ptype; /* probe type : 0 : single, 1 : multi */ 46 char ptype; /* probe type : 0 : single, 1 : multi */
47 void (*call)(const struct marker *mdata, /* Probe wrapper */ 47 /* Probe wrapper */
48 void *call_private, const char *fmt, ...); 48 void (*call)(const struct marker *mdata, void *call_private, ...);
49 struct marker_probe_closure single; 49 struct marker_probe_closure single;
50 struct marker_probe_closure *multi; 50 struct marker_probe_closure *multi;
51} __attribute__((aligned(8))); 51} __attribute__((aligned(8)));
@@ -58,8 +58,12 @@ struct marker {
58 * Make sure the alignment of the structure in the __markers section will 58 * Make sure the alignment of the structure in the __markers section will
59 * not add unwanted padding between the beginning of the section and the 59 * not add unwanted padding between the beginning of the section and the
60 * structure. Force alignment to the same alignment as the section start. 60 * structure. Force alignment to the same alignment as the section start.
61 *
62 * The "generic" argument controls which marker enabling mechanism must be used.
63 * If generic is true, a variable read is used.
64 * If generic is false, immediate values are used.
61 */ 65 */
62#define __trace_mark(name, call_private, format, args...) \ 66#define __trace_mark(generic, name, call_private, format, args...) \
63 do { \ 67 do { \
64 static const char __mstrtab_##name[] \ 68 static const char __mstrtab_##name[] \
65 __attribute__((section("__markers_strings"))) \ 69 __attribute__((section("__markers_strings"))) \
@@ -72,15 +76,14 @@ struct marker {
72 __mark_check_format(format, ## args); \ 76 __mark_check_format(format, ## args); \
73 if (unlikely(__mark_##name.state)) { \ 77 if (unlikely(__mark_##name.state)) { \
74 (*__mark_##name.call) \ 78 (*__mark_##name.call) \
75 (&__mark_##name, call_private, \ 79 (&__mark_##name, call_private, ## args);\
76 format, ## args); \
77 } \ 80 } \
78 } while (0) 81 } while (0)
79 82
80extern void marker_update_probe_range(struct marker *begin, 83extern void marker_update_probe_range(struct marker *begin,
81 struct marker *end); 84 struct marker *end);
82#else /* !CONFIG_MARKERS */ 85#else /* !CONFIG_MARKERS */
83#define __trace_mark(name, call_private, format, args...) \ 86#define __trace_mark(generic, name, call_private, format, args...) \
84 __mark_check_format(format, ## args) 87 __mark_check_format(format, ## args)
85static inline void marker_update_probe_range(struct marker *begin, 88static inline void marker_update_probe_range(struct marker *begin,
86 struct marker *end) 89 struct marker *end)
@@ -88,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin,
88#endif /* CONFIG_MARKERS */ 91#endif /* CONFIG_MARKERS */
89 92
90/** 93/**
91 * trace_mark - Marker 94 * trace_mark - Marker using code patching
92 * @name: marker name, not quoted. 95 * @name: marker name, not quoted.
93 * @format: format string 96 * @format: format string
94 * @args...: variable argument list 97 * @args...: variable argument list
95 * 98 *
96 * Places a marker. 99 * Places a marker using optimized code patching technique (imv_read())
100 * to be enabled when immediate values are present.
97 */ 101 */
98#define trace_mark(name, format, args...) \ 102#define trace_mark(name, format, args...) \
99 __trace_mark(name, NULL, format, ## args) 103 __trace_mark(0, name, NULL, format, ## args)
104
105/**
106 * _trace_mark - Marker using variable read
107 * @name: marker name, not quoted.
108 * @format: format string
109 * @args...: variable argument list
110 *
111 * Places a marker using a standard memory read (_imv_read()) to be
112 * enabled. Should be used for markers in code paths where instruction
113 * modification based enabling is not welcome. (__init and __exit functions,
114 * lockdep, some traps, printk).
115 */
116#define _trace_mark(name, format, args...) \
117 __trace_mark(1, name, NULL, format, ## args)
100 118
101/** 119/**
102 * MARK_NOARGS - Format string for a marker with no argument. 120 * MARK_NOARGS - Format string for a marker with no argument.
@@ -117,9 +135,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
117extern marker_probe_func __mark_empty_function; 135extern marker_probe_func __mark_empty_function;
118 136
119extern void marker_probe_cb(const struct marker *mdata, 137extern void marker_probe_cb(const struct marker *mdata,
120 void *call_private, const char *fmt, ...); 138 void *call_private, ...);
121extern void marker_probe_cb_noarg(const struct marker *mdata, 139extern void marker_probe_cb_noarg(const struct marker *mdata,
122 void *call_private, const char *fmt, ...); 140 void *call_private, ...);
123 141
124/* 142/*
125 * Connect a probe to a marker. 143 * Connect a probe to a marker.
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
new file mode 100644
index 000000000000..61d19e1b7a0b
--- /dev/null
+++ b/include/linux/mmiotrace.h
@@ -0,0 +1,85 @@
1#ifndef MMIOTRACE_H
2#define MMIOTRACE_H
3
4#include <linux/types.h>
5#include <linux/list.h>
6
7struct kmmio_probe;
8struct pt_regs;
9
10typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
11 struct pt_regs *, unsigned long addr);
12typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
13 unsigned long condition, struct pt_regs *);
14
15struct kmmio_probe {
16 struct list_head list; /* kmmio internal list */
17 unsigned long addr; /* start location of the probe point */
18 unsigned long len; /* length of the probe region */
19 kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
20 kmmio_post_handler_t post_handler; /* Called after addr is executed */
21 void *private;
22};
23
24/* kmmio is active by some kmmio_probes? */
25static inline int is_kmmio_active(void)
26{
27 extern unsigned int kmmio_count;
28 return kmmio_count;
29}
30
31extern int register_kmmio_probe(struct kmmio_probe *p);
32extern void unregister_kmmio_probe(struct kmmio_probe *p);
33
34/* Called from page fault handler. */
35extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
36
37/* Called from ioremap.c */
38#ifdef CONFIG_MMIOTRACE
39extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
40 void __iomem *addr);
41extern void mmiotrace_iounmap(volatile void __iomem *addr);
42#else
43static inline void mmiotrace_ioremap(resource_size_t offset,
44 unsigned long size, void __iomem *addr)
45{
46}
47
48static inline void mmiotrace_iounmap(volatile void __iomem *addr)
49{
50}
51#endif /* CONFIG_MMIOTRACE_HOOKS */
52
53enum mm_io_opcode {
54 MMIO_READ = 0x1, /* struct mmiotrace_rw */
55 MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
56 MMIO_PROBE = 0x3, /* struct mmiotrace_map */
57 MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
58 MMIO_MARKER = 0x5, /* raw char data */
59 MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
60};
61
62struct mmiotrace_rw {
63 resource_size_t phys; /* PCI address of register */
64 unsigned long value;
65 unsigned long pc; /* optional program counter */
66 int map_id;
67 unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
68 unsigned char width; /* size of register access in bytes */
69};
70
71struct mmiotrace_map {
72 resource_size_t phys; /* base address in PCI space */
73 unsigned long virt; /* base virtual address */
74 unsigned long len; /* mapping size */
75 int map_id;
76 unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */
77};
78
79/* in kernel/trace/trace_mmiotrace.c */
80extern void enable_mmiotrace(void);
81extern void disable_mmiotrace(void);
82extern void mmio_trace_rw(struct mmiotrace_rw *rw);
83extern void mmio_trace_mapping(struct mmiotrace_map *map);
84
85#endif /* MMIOTRACE_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 23f0c54175cd..72b1a10a59b6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,7 +10,7 @@
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11#include <linux/list.h> 11#include <linux/list.h>
12 12
13#ifdef CONFIG_DEBUG_PREEMPT 13#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
14 extern void add_preempt_count(int val); 14 extern void add_preempt_count(int val);
15 extern void sub_preempt_count(int val); 15 extern void sub_preempt_count(int val);
16#else 16#else
@@ -52,6 +52,34 @@ do { \
52 preempt_check_resched(); \ 52 preempt_check_resched(); \
53} while (0) 53} while (0)
54 54
55/* For debugging and tracer internals only! */
56#define add_preempt_count_notrace(val) \
57 do { preempt_count() += (val); } while (0)
58#define sub_preempt_count_notrace(val) \
59 do { preempt_count() -= (val); } while (0)
60#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
61#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
62
63#define preempt_disable_notrace() \
64do { \
65 inc_preempt_count_notrace(); \
66 barrier(); \
67} while (0)
68
69#define preempt_enable_no_resched_notrace() \
70do { \
71 barrier(); \
72 dec_preempt_count_notrace(); \
73} while (0)
74
75/* preempt_check_resched is OK to trace */
76#define preempt_enable_notrace() \
77do { \
78 preempt_enable_no_resched_notrace(); \
79 barrier(); \
80 preempt_check_resched(); \
81} while (0)
82
55#else 83#else
56 84
57#define preempt_disable() do { } while (0) 85#define preempt_disable() do { } while (0)
@@ -59,6 +87,10 @@ do { \
59#define preempt_enable() do { } while (0) 87#define preempt_enable() do { } while (0)
60#define preempt_check_resched() do { } while (0) 88#define preempt_check_resched() do { } while (0)
61 89
90#define preempt_disable_notrace() do { } while (0)
91#define preempt_enable_no_resched_notrace() do { } while (0)
92#define preempt_enable_notrace() do { } while (0)
93
62#endif 94#endif
63 95
64#ifdef CONFIG_PREEMPT_NOTIFIERS 96#ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f6cd60f2de63..5d1af10b90c3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -245,6 +245,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
245extern void init_idle(struct task_struct *idle, int cpu); 245extern void init_idle(struct task_struct *idle, int cpu);
246extern void init_idle_bootup_task(struct task_struct *idle); 246extern void init_idle_bootup_task(struct task_struct *idle);
247 247
248extern int runqueue_is_locked(void);
249
248extern cpumask_t nohz_cpu_mask; 250extern cpumask_t nohz_cpu_mask;
249#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 251#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
250extern int select_nohz_load_balancer(int cpu); 252extern int select_nohz_load_balancer(int cpu);
@@ -2132,6 +2134,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
2132} 2134}
2133#endif 2135#endif
2134 2136
2137#ifdef CONFIG_TRACING
2138extern void
2139__trace_special(void *__tr, void *__data,
2140 unsigned long arg1, unsigned long arg2, unsigned long arg3);
2141#else
2142static inline void
2143__trace_special(void *__tr, void *__data,
2144 unsigned long arg1, unsigned long arg2, unsigned long arg3)
2145{
2146}
2147#endif
2148
2135extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); 2149extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
2136extern long sched_getaffinity(pid_t pid, cpumask_t *mask); 2150extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
2137 2151
@@ -2226,6 +2240,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
2226} 2240}
2227#endif /* CONFIG_MM_OWNER */ 2241#endif /* CONFIG_MM_OWNER */
2228 2242
2243#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
2244
2229#endif /* __KERNEL__ */ 2245#endif /* __KERNEL__ */
2230 2246
2231#endif 2247#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f462439cc288..bd91987c065f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable;
105extern int block_dump; 105extern int block_dump;
106extern int laptop_mode; 106extern int laptop_mode;
107 107
108extern unsigned long determine_dirtyable_memory(void);
109
108extern int dirty_ratio_handler(struct ctl_table *table, int write, 110extern int dirty_ratio_handler(struct ctl_table *table, int write,
109 struct file *filp, void __user *buffer, size_t *lenp, 111 struct file *filp, void __user *buffer, size_t *lenp,
110 loff_t *ppos); 112 loff_t *ppos);
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c55301112e0..f6328e16dfdd 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe
15
16ifdef CONFIG_FTRACE
17# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg
20CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg
24endif
25
14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 26obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15obj-$(CONFIG_STACKTRACE) += stacktrace.o 27obj-$(CONFIG_STACKTRACE) += stacktrace.o
16obj-y += time/ 28obj-y += time/
@@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
69obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 81obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
70obj-$(CONFIG_MARKERS) += marker.o 82obj-$(CONFIG_MARKERS) += marker.o
71obj-$(CONFIG_LATENCYTOP) += latencytop.o 83obj-$(CONFIG_LATENCYTOP) += latencytop.o
84obj-$(CONFIG_FTRACE) += trace/
85obj-$(CONFIG_TRACING) += trace/
72obj-$(CONFIG_SMP) += sched_cpupri.o 86obj-$(CONFIG_SMP) += sched_cpupri.o
73 87
74ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 88ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
diff --git a/kernel/fork.c b/kernel/fork.c
index b71ccd09fc8d..16fd412c756d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -910,7 +910,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
910 910
911 rt_mutex_init_task(p); 911 rt_mutex_init_task(p);
912 912
913#ifdef CONFIG_TRACE_IRQFLAGS 913#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
914 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 914 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
915 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 915 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
916#endif 916#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81a4e4a3f087..65548eff029e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -39,6 +39,7 @@
39#include <linux/irqflags.h> 39#include <linux/irqflags.h>
40#include <linux/utsname.h> 40#include <linux/utsname.h>
41#include <linux/hash.h> 41#include <linux/hash.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/sections.h> 44#include <asm/sections.h>
44 45
@@ -81,6 +82,8 @@ static int graph_lock(void)
81 __raw_spin_unlock(&lockdep_lock); 82 __raw_spin_unlock(&lockdep_lock);
82 return 0; 83 return 0;
83 } 84 }
85 /* prevent any recursions within lockdep from causing deadlocks */
86 current->lockdep_recursion++;
84 return 1; 87 return 1;
85} 88}
86 89
@@ -89,6 +92,7 @@ static inline int graph_unlock(void)
89 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) 92 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
90 return DEBUG_LOCKS_WARN_ON(1); 93 return DEBUG_LOCKS_WARN_ON(1);
91 94
95 current->lockdep_recursion--;
92 __raw_spin_unlock(&lockdep_lock); 96 __raw_spin_unlock(&lockdep_lock);
93 return 0; 97 return 0;
94} 98}
@@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
982 return 1; 986 return 1;
983} 987}
984 988
985#ifdef CONFIG_TRACE_IRQFLAGS 989#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
986/* 990/*
987 * Forwards and backwards subgraph searching, for the purposes of 991 * Forwards and backwards subgraph searching, for the purposes of
988 * proving that two subgraphs can be connected by a new dependency 992 * proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
1680static int mark_lock(struct task_struct *curr, struct held_lock *this, 1684static int mark_lock(struct task_struct *curr, struct held_lock *this,
1681 enum lock_usage_bit new_bit); 1685 enum lock_usage_bit new_bit);
1682 1686
1683#ifdef CONFIG_TRACE_IRQFLAGS 1687#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1684 1688
1685/* 1689/*
1686 * print irq inversion bug: 1690 * print irq inversion bug:
@@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
2013/* 2017/*
2014 * Hardirqs will be enabled: 2018 * Hardirqs will be enabled:
2015 */ 2019 */
2016void trace_hardirqs_on(void) 2020void trace_hardirqs_on_caller(unsigned long a0)
2017{ 2021{
2018 struct task_struct *curr = current; 2022 struct task_struct *curr = current;
2019 unsigned long ip; 2023 unsigned long ip;
2020 2024
2025 time_hardirqs_on(CALLER_ADDR0, a0);
2026
2021 if (unlikely(!debug_locks || current->lockdep_recursion)) 2027 if (unlikely(!debug_locks || current->lockdep_recursion))
2022 return; 2028 return;
2023 2029
@@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
2055 curr->hardirq_enable_event = ++curr->irq_events; 2061 curr->hardirq_enable_event = ++curr->irq_events;
2056 debug_atomic_inc(&hardirqs_on_events); 2062 debug_atomic_inc(&hardirqs_on_events);
2057} 2063}
2064EXPORT_SYMBOL(trace_hardirqs_on_caller);
2058 2065
2066void trace_hardirqs_on(void)
2067{
2068 trace_hardirqs_on_caller(CALLER_ADDR0);
2069}
2059EXPORT_SYMBOL(trace_hardirqs_on); 2070EXPORT_SYMBOL(trace_hardirqs_on);
2060 2071
2061/* 2072/*
2062 * Hardirqs were disabled: 2073 * Hardirqs were disabled:
2063 */ 2074 */
2064void trace_hardirqs_off(void) 2075void trace_hardirqs_off_caller(unsigned long a0)
2065{ 2076{
2066 struct task_struct *curr = current; 2077 struct task_struct *curr = current;
2067 2078
2079 time_hardirqs_off(CALLER_ADDR0, a0);
2080
2068 if (unlikely(!debug_locks || current->lockdep_recursion)) 2081 if (unlikely(!debug_locks || current->lockdep_recursion))
2069 return; 2082 return;
2070 2083
@@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
2082 } else 2095 } else
2083 debug_atomic_inc(&redundant_hardirqs_off); 2096 debug_atomic_inc(&redundant_hardirqs_off);
2084} 2097}
2098EXPORT_SYMBOL(trace_hardirqs_off_caller);
2085 2099
2100void trace_hardirqs_off(void)
2101{
2102 trace_hardirqs_off_caller(CALLER_ADDR0);
2103}
2086EXPORT_SYMBOL(trace_hardirqs_off); 2104EXPORT_SYMBOL(trace_hardirqs_off);
2087 2105
2088/* 2106/*
@@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
2246 * Mark a lock with a usage bit, and validate the state transition: 2264 * Mark a lock with a usage bit, and validate the state transition:
2247 */ 2265 */
2248static int mark_lock(struct task_struct *curr, struct held_lock *this, 2266static int mark_lock(struct task_struct *curr, struct held_lock *this,
2249 enum lock_usage_bit new_bit) 2267 enum lock_usage_bit new_bit)
2250{ 2268{
2251 unsigned int new_mask = 1 << new_bit, ret = 1; 2269 unsigned int new_mask = 1 << new_bit, ret = 1;
2252 2270
@@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
2686 * and also avoid lockdep recursion: 2704 * and also avoid lockdep recursion:
2687 */ 2705 */
2688void lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2706void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2689 int trylock, int read, int check, unsigned long ip) 2707 int trylock, int read, int check, unsigned long ip)
2690{ 2708{
2691 unsigned long flags; 2709 unsigned long flags;
2692 2710
@@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2708 2726
2709EXPORT_SYMBOL_GPL(lock_acquire); 2727EXPORT_SYMBOL_GPL(lock_acquire);
2710 2728
2711void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) 2729void lock_release(struct lockdep_map *lock, int nested,
2730 unsigned long ip)
2712{ 2731{
2713 unsigned long flags; 2732 unsigned long flags;
2714 2733
diff --git a/kernel/marker.c b/kernel/marker.c
index b5a9fe1d50d5..1abfb923b761 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
55struct marker_entry { 55struct marker_entry {
56 struct hlist_node hlist; 56 struct hlist_node hlist;
57 char *format; 57 char *format;
58 void (*call)(const struct marker *mdata, /* Probe wrapper */ 58 /* Probe wrapper */
59 void *call_private, const char *fmt, ...); 59 void (*call)(const struct marker *mdata, void *call_private, ...);
60 struct marker_probe_closure single; 60 struct marker_probe_closure single;
61 struct marker_probe_closure *multi; 61 struct marker_probe_closure *multi;
62 int refcount; /* Number of times armed. 0 if disarmed. */ 62 int refcount; /* Number of times armed. 0 if disarmed. */
@@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
91 * marker_probe_cb Callback that prepares the variable argument list for probes. 91 * marker_probe_cb Callback that prepares the variable argument list for probes.
92 * @mdata: pointer of type struct marker 92 * @mdata: pointer of type struct marker
93 * @call_private: caller site private data 93 * @call_private: caller site private data
94 * @fmt: format string
95 * @...: Variable argument list. 94 * @...: Variable argument list.
96 * 95 *
97 * Since we do not use "typical" pointer based RCU in the 1 argument case, we 96 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
98 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
99 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
100 */ 99 */
101void marker_probe_cb(const struct marker *mdata, void *call_private, 100void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
102 const char *fmt, ...)
103{ 101{
104 va_list args; 102 va_list args;
105 char ptype; 103 char ptype;
@@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
120 /* Must read the ptr before private data. They are not data 118 /* Must read the ptr before private data. They are not data
121 * dependant, so we put an explicit smp_rmb() here. */ 119 * dependant, so we put an explicit smp_rmb() here. */
122 smp_rmb(); 120 smp_rmb();
123 va_start(args, fmt); 121 va_start(args, call_private);
124 func(mdata->single.probe_private, call_private, fmt, &args); 122 func(mdata->single.probe_private, call_private, mdata->format,
123 &args);
125 va_end(args); 124 va_end(args);
126 } else { 125 } else {
127 struct marker_probe_closure *multi; 126 struct marker_probe_closure *multi;
@@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
136 smp_read_barrier_depends(); 135 smp_read_barrier_depends();
137 multi = mdata->multi; 136 multi = mdata->multi;
138 for (i = 0; multi[i].func; i++) { 137 for (i = 0; multi[i].func; i++) {
139 va_start(args, fmt); 138 va_start(args, call_private);
140 multi[i].func(multi[i].probe_private, call_private, fmt, 139 multi[i].func(multi[i].probe_private, call_private,
141 &args); 140 mdata->format, &args);
142 va_end(args); 141 va_end(args);
143 } 142 }
144 } 143 }
@@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
150 * marker_probe_cb Callback that does not prepare the variable argument list. 149 * marker_probe_cb Callback that does not prepare the variable argument list.
151 * @mdata: pointer of type struct marker 150 * @mdata: pointer of type struct marker
152 * @call_private: caller site private data 151 * @call_private: caller site private data
153 * @fmt: format string
154 * @...: Variable argument list. 152 * @...: Variable argument list.
155 * 153 *
156 * Should be connected to markers "MARK_NOARGS". 154 * Should be connected to markers "MARK_NOARGS".
157 */ 155 */
158void marker_probe_cb_noarg(const struct marker *mdata, 156void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
159 void *call_private, const char *fmt, ...)
160{ 157{
161 va_list args; /* not initialized */ 158 va_list args; /* not initialized */
162 char ptype; 159 char ptype;
@@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
172 /* Must read the ptr before private data. They are not data 169 /* Must read the ptr before private data. They are not data
173 * dependant, so we put an explicit smp_rmb() here. */ 170 * dependant, so we put an explicit smp_rmb() here. */
174 smp_rmb(); 171 smp_rmb();
175 func(mdata->single.probe_private, call_private, fmt, &args); 172 func(mdata->single.probe_private, call_private, mdata->format,
173 &args);
176 } else { 174 } else {
177 struct marker_probe_closure *multi; 175 struct marker_probe_closure *multi;
178 int i; 176 int i;
@@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
186 smp_read_barrier_depends(); 184 smp_read_barrier_depends();
187 multi = mdata->multi; 185 multi = mdata->multi;
188 for (i = 0; multi[i].func; i++) 186 for (i = 0; multi[i].func; i++)
189 multi[i].func(multi[i].probe_private, call_private, fmt, 187 multi[i].func(multi[i].probe_private, call_private,
190 &args); 188 mdata->format, &args);
191 } 189 }
192 preempt_enable(); 190 preempt_enable();
193} 191}
diff --git a/kernel/printk.c b/kernel/printk.c
index 625d240d7ada..5d81a11321fd 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1046,7 +1046,9 @@ void release_console_sem(void)
1046 _log_end = log_end; 1046 _log_end = log_end;
1047 con_start = log_end; /* Flush */ 1047 con_start = log_end; /* Flush */
1048 spin_unlock(&logbuf_lock); 1048 spin_unlock(&logbuf_lock);
1049 stop_critical_timings(); /* don't trace print latency */
1049 call_console_drivers(_con_start, _log_end); 1050 call_console_drivers(_con_start, _log_end);
1051 start_critical_timings();
1050 local_irq_restore(flags); 1052 local_irq_restore(flags);
1051 } 1053 }
1052 console_locked = 0; 1054 console_locked = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 591d5e7f757a..c74b0d23c752 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,6 +70,7 @@
70#include <linux/bootmem.h> 70#include <linux/bootmem.h>
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h>
73 74
74#include <asm/tlb.h> 75#include <asm/tlb.h>
75#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
@@ -645,6 +646,24 @@ static inline void update_rq_clock(struct rq *rq)
645# define const_debug static const 646# define const_debug static const
646#endif 647#endif
647 648
649/**
650 * runqueue_is_locked
651 *
652 * Returns true if the current cpu runqueue is locked.
653 * This interface allows printk to be called with the runqueue lock
654 * held and know whether or not it is OK to wake up the klogd.
655 */
656int runqueue_is_locked(void)
657{
658 int cpu = get_cpu();
659 struct rq *rq = cpu_rq(cpu);
660 int ret;
661
662 ret = spin_is_locked(&rq->lock);
663 put_cpu();
664 return ret;
665}
666
648/* 667/*
649 * Debugging: various feature bits 668 * Debugging: various feature bits
650 */ 669 */
@@ -2318,6 +2337,9 @@ out_activate:
2318 success = 1; 2337 success = 1;
2319 2338
2320out_running: 2339out_running:
2340 trace_mark(kernel_sched_wakeup,
2341 "pid %d state %ld ## rq %p task %p rq->curr %p",
2342 p->pid, p->state, rq, p, rq->curr);
2321 check_preempt_curr(rq, p); 2343 check_preempt_curr(rq, p);
2322 2344
2323 p->state = TASK_RUNNING; 2345 p->state = TASK_RUNNING;
@@ -2450,6 +2472,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2450 p->sched_class->task_new(rq, p); 2472 p->sched_class->task_new(rq, p);
2451 inc_nr_running(rq); 2473 inc_nr_running(rq);
2452 } 2474 }
2475 trace_mark(kernel_sched_wakeup_new,
2476 "pid %d state %ld ## rq %p task %p rq->curr %p",
2477 p->pid, p->state, rq, p, rq->curr);
2453 check_preempt_curr(rq, p); 2478 check_preempt_curr(rq, p);
2454#ifdef CONFIG_SMP 2479#ifdef CONFIG_SMP
2455 if (p->sched_class->task_wake_up) 2480 if (p->sched_class->task_wake_up)
@@ -2622,6 +2647,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
2622 struct mm_struct *mm, *oldmm; 2647 struct mm_struct *mm, *oldmm;
2623 2648
2624 prepare_task_switch(rq, prev, next); 2649 prepare_task_switch(rq, prev, next);
2650 trace_mark(kernel_sched_schedule,
2651 "prev_pid %d next_pid %d prev_state %ld "
2652 "## rq %p prev %p next %p",
2653 prev->pid, next->pid, prev->state,
2654 rq, prev, next);
2625 mm = next->mm; 2655 mm = next->mm;
2626 oldmm = prev->active_mm; 2656 oldmm = prev->active_mm;
2627 /* 2657 /*
@@ -4221,26 +4251,44 @@ void scheduler_tick(void)
4221#endif 4251#endif
4222} 4252}
4223 4253
4224#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 4254#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4255 defined(CONFIG_PREEMPT_TRACER))
4256
4257static inline unsigned long get_parent_ip(unsigned long addr)
4258{
4259 if (in_lock_functions(addr)) {
4260 addr = CALLER_ADDR2;
4261 if (in_lock_functions(addr))
4262 addr = CALLER_ADDR3;
4263 }
4264 return addr;
4265}
4225 4266
4226void __kprobes add_preempt_count(int val) 4267void __kprobes add_preempt_count(int val)
4227{ 4268{
4269#ifdef CONFIG_DEBUG_PREEMPT
4228 /* 4270 /*
4229 * Underflow? 4271 * Underflow?
4230 */ 4272 */
4231 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 4273 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
4232 return; 4274 return;
4275#endif
4233 preempt_count() += val; 4276 preempt_count() += val;
4277#ifdef CONFIG_DEBUG_PREEMPT
4234 /* 4278 /*
4235 * Spinlock count overflowing soon? 4279 * Spinlock count overflowing soon?
4236 */ 4280 */
4237 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 4281 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
4238 PREEMPT_MASK - 10); 4282 PREEMPT_MASK - 10);
4283#endif
4284 if (preempt_count() == val)
4285 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4239} 4286}
4240EXPORT_SYMBOL(add_preempt_count); 4287EXPORT_SYMBOL(add_preempt_count);
4241 4288
4242void __kprobes sub_preempt_count(int val) 4289void __kprobes sub_preempt_count(int val)
4243{ 4290{
4291#ifdef CONFIG_DEBUG_PREEMPT
4244 /* 4292 /*
4245 * Underflow? 4293 * Underflow?
4246 */ 4294 */
@@ -4252,7 +4300,10 @@ void __kprobes sub_preempt_count(int val)
4252 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && 4300 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
4253 !(preempt_count() & PREEMPT_MASK))) 4301 !(preempt_count() & PREEMPT_MASK)))
4254 return; 4302 return;
4303#endif
4255 4304
4305 if (preempt_count() == val)
4306 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4256 preempt_count() -= val; 4307 preempt_count() -= val;
4257} 4308}
4258EXPORT_SYMBOL(sub_preempt_count); 4309EXPORT_SYMBOL(sub_preempt_count);
@@ -5566,7 +5617,7 @@ out_unlock:
5566 return retval; 5617 return retval;
5567} 5618}
5568 5619
5569static const char stat_nam[] = "RSDTtZX"; 5620static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5570 5621
5571void sched_show_task(struct task_struct *p) 5622void sched_show_task(struct task_struct *p)
5572{ 5623{
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 5c2942e768cd..aaaeae8244e7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -31,6 +31,7 @@
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/semaphore.h> 32#include <linux/semaphore.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/ftrace.h>
34 35
35static noinline void __down(struct semaphore *sem); 36static noinline void __down(struct semaphore *sem);
36static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index ae28c8245123..a1fb54c93cdd 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
436} 436}
437EXPORT_SYMBOL(_spin_trylock_bh); 437EXPORT_SYMBOL(_spin_trylock_bh);
438 438
439int in_lock_functions(unsigned long addr) 439notrace int in_lock_functions(unsigned long addr)
440{ 440{
441 /* Linker adds these: start and end of __lockfunc functions */ 441 /* Linker adds these: start and end of __lockfunc functions */
442 extern char __lock_text_start[], __lock_text_end[]; 442 extern char __lock_text_start[], __lock_text_end[];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index fe8cdc80ff02..18943985ddee 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -46,6 +46,7 @@
46#include <linux/nfs_fs.h> 46#include <linux/nfs_fs.h>
47#include <linux/acpi.h> 47#include <linux/acpi.h>
48#include <linux/reboot.h> 48#include <linux/reboot.h>
49#include <linux/ftrace.h>
49 50
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
51#include <asm/processor.h> 52#include <asm/processor.h>
@@ -463,6 +464,16 @@ static struct ctl_table kern_table[] = {
463 .mode = 0644, 464 .mode = 0644,
464 .proc_handler = &proc_dointvec, 465 .proc_handler = &proc_dointvec,
465 }, 466 },
467#ifdef CONFIG_FTRACE
468 {
469 .ctl_name = CTL_UNNUMBERED,
470 .procname = "ftrace_enabled",
471 .data = &ftrace_enabled,
472 .maxlen = sizeof(int),
473 .mode = 0644,
474 .proc_handler = &ftrace_enable_sysctl,
475 },
476#endif
466#ifdef CONFIG_KMOD 477#ifdef CONFIG_KMOD
467 { 478 {
468 .ctl_name = KERN_MODPROBE, 479 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 000000000000..263e9e6bbd60
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,135 @@
1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3#
4config HAVE_FTRACE
5 bool
6
7config HAVE_DYNAMIC_FTRACE
8 bool
9
10config TRACER_MAX_TRACE
11 bool
12
13config TRACING
14 bool
15 select DEBUG_FS
16 select STACKTRACE
17
18config FTRACE
19 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE
21 select FRAME_POINTER
22 select TRACING
23 select CONTEXT_SWITCH_TRACER
24 help
25 Enable the kernel to trace every kernel function. This is done
26 by using a compiler feature to insert a small, 5-byte No-Operation
27 instruction to the beginning of every kernel function, which NOP
28 sequence is then dynamically patched into a tracer call when
29 tracing is enabled by the administrator. If it's runtime disabled
30 (the bootup default), then the overhead of the instructions is very
31 small and not measurable even in micro-benchmarks.
32
33config IRQSOFF_TRACER
34 bool "Interrupts-off Latency Tracer"
35 default n
36 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME
38 depends on HAVE_FTRACE
39 select TRACE_IRQFLAGS
40 select TRACING
41 select TRACER_MAX_TRACE
42 help
43 This option measures the time spent in irqs-off critical
44 sections, with microsecond accuracy.
45
46 The default measurement method is a maximum search, which is
47 disabled by default and can be runtime (re-)started
48 via:
49
50 echo 0 > /debugfs/tracing/tracing_max_latency
51
52 (Note that kernel size and overhead increases with this option
53 enabled. This option and the preempt-off timing option can be
54 used together or separately.)
55
56config PREEMPT_TRACER
57 bool "Preemption-off Latency Tracer"
58 default n
59 depends on GENERIC_TIME
60 depends on PREEMPT
61 depends on HAVE_FTRACE
62 select TRACING
63 select TRACER_MAX_TRACE
64 help
65 This option measures the time spent in preemption off critical
66 sections, with microsecond accuracy.
67
68 The default measurement method is a maximum search, which is
69 disabled by default and can be runtime (re-)started
70 via:
71
72 echo 0 > /debugfs/tracing/tracing_max_latency
73
74 (Note that kernel size and overhead increases with this option
75 enabled. This option and the irqs-off timing option can be
76 used together or separately.)
77
78config SYSPROF_TRACER
79 bool "Sysprof Tracer"
80 depends on X86
81 select TRACING
82 help
83 This tracer provides the trace needed by the 'Sysprof' userspace
84 tool.
85
86config SCHED_TRACER
87 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE
89 select TRACING
90 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE
92 help
93 This tracer tracks the latency of the highest priority task
94 to be scheduled in, starting from the point it has woken up.
95
96config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches"
98 depends on HAVE_FTRACE
99 select TRACING
100 select MARKERS
101 help
102 This tracer gets called from the context switch and records
103 all switching of tasks.
104
105config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE
108 depends on HAVE_DYNAMIC_FTRACE
109 default y
110 help
111 This option will modify all the calls to ftrace dynamically
112 (will patch them out of the binary image and replaces them
113 with a No-Op instruction) as they are called. A table is
114 created to dynamically enable them again.
115
116 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
117 has native performance as long as no tracing is active.
118
119 The changes to the code are done by a kernel thread that
120 wakes up once a second and checks to see if any ftrace calls
121 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace.
123
124config FTRACE_SELFTEST
125 bool
126
127config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace"
129 depends on TRACING
130 select FTRACE_SELFTEST
131 help
132 This option performs a series of startup tests on ftrace. On bootup
133 a series of tests are made to verify that the tracer is
134 functioning properly. It will do tests on all the configured
135 tracers of ftrace.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 000000000000..71d17de17288
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,24 @@
1
2# Do not instrument the tracer itself:
3
4ifdef CONFIG_FTRACE
5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7
8# selftest needs instrumentation
9CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o
11endif
12
13obj-$(CONFIG_FTRACE) += libftrace.o
14
15obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
18obj-$(CONFIG_FTRACE) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
23
24libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..4231a3dc224a
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1727 @@
1/*
2 * Infrastructure for profiling code inserted by 'gcc -pg'.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally ported from the -rt patch by:
8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code in the latency_tracer, that is:
11 *
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 William Lee Irwin III
14 */
15
16#include <linux/stop_machine.h>
17#include <linux/clocksource.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/debugfs.h>
21#include <linux/hardirq.h>
22#include <linux/kthread.h>
23#include <linux/uaccess.h>
24#include <linux/kprobes.h>
25#include <linux/ftrace.h>
26#include <linux/sysctl.h>
27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h>
30
31#include <asm/ftrace.h>
32
33#include "trace.h"
34
35/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled;
38
39/*
40 * ftrace_disabled is set when an anomaly is discovered.
41 * ftrace_disabled is much stronger than ftrace_enabled.
42 */
43static int ftrace_disabled __read_mostly;
44
45static DEFINE_SPINLOCK(ftrace_lock);
46static DEFINE_MUTEX(ftrace_sysctl_lock);
47
48static struct ftrace_ops ftrace_list_end __read_mostly =
49{
50 .func = ftrace_stub,
51};
52
53static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
54ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
55
56static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
57{
58 struct ftrace_ops *op = ftrace_list;
59
60 /* in case someone actually ports this to alpha! */
61 read_barrier_depends();
62
63 while (op != &ftrace_list_end) {
64 /* silly alpha */
65 read_barrier_depends();
66 op->func(ip, parent_ip);
67 op = op->next;
68 };
69}
70
71/**
72 * clear_ftrace_function - reset the ftrace function
73 *
74 * This NULLs the ftrace function and in essence stops
75 * tracing. There may be lag
76 */
77void clear_ftrace_function(void)
78{
79 ftrace_trace_function = ftrace_stub;
80}
81
82static int __register_ftrace_function(struct ftrace_ops *ops)
83{
84 /* Should never be called by interrupts */
85 spin_lock(&ftrace_lock);
86
87 ops->next = ftrace_list;
88 /*
89 * We are entering ops into the ftrace_list but another
90 * CPU might be walking that list. We need to make sure
91 * the ops->next pointer is valid before another CPU sees
92 * the ops pointer included into the ftrace_list.
93 */
94 smp_wmb();
95 ftrace_list = ops;
96
97 if (ftrace_enabled) {
98 /*
99 * For one func, simply call it directly.
100 * For more than one func, call the chain.
101 */
102 if (ops->next == &ftrace_list_end)
103 ftrace_trace_function = ops->func;
104 else
105 ftrace_trace_function = ftrace_list_func;
106 }
107
108 spin_unlock(&ftrace_lock);
109
110 return 0;
111}
112
113static int __unregister_ftrace_function(struct ftrace_ops *ops)
114{
115 struct ftrace_ops **p;
116 int ret = 0;
117
118 spin_lock(&ftrace_lock);
119
120 /*
121 * If we are removing the last function, then simply point
122 * to the ftrace_stub.
123 */
124 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
125 ftrace_trace_function = ftrace_stub;
126 ftrace_list = &ftrace_list_end;
127 goto out;
128 }
129
130 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
131 if (*p == ops)
132 break;
133
134 if (*p != ops) {
135 ret = -1;
136 goto out;
137 }
138
139 *p = (*p)->next;
140
141 if (ftrace_enabled) {
142 /* If we only have one func left, then call that directly */
143 if (ftrace_list == &ftrace_list_end ||
144 ftrace_list->next == &ftrace_list_end)
145 ftrace_trace_function = ftrace_list->func;
146 }
147
148 out:
149 spin_unlock(&ftrace_lock);
150
151 return ret;
152}
153
154#ifdef CONFIG_DYNAMIC_FTRACE
155
156static struct task_struct *ftraced_task;
157
158enum {
159 FTRACE_ENABLE_CALLS = (1 << 0),
160 FTRACE_DISABLE_CALLS = (1 << 1),
161 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
162 FTRACE_ENABLE_MCOUNT = (1 << 3),
163 FTRACE_DISABLE_MCOUNT = (1 << 4),
164};
165
166static int ftrace_filtered;
167static int tracing_on;
168static int frozen_record_count;
169
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock);
177
178struct ftrace_page {
179 struct ftrace_page *next;
180 unsigned long index;
181 struct dyn_ftrace records[];
182};
183
184#define ENTRIES_PER_PAGE \
185 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
186
187/* estimate from running different kernels */
188#define NR_TO_INIT 10000
189
190static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages;
192
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records;
200
201
202#ifdef CONFIG_KPROBES
203static inline void freeze_record(struct dyn_ftrace *rec)
204{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) {
206 rec->flags |= FTRACE_FL_FROZEN;
207 frozen_record_count++;
208 }
209}
210
211static inline void unfreeze_record(struct dyn_ftrace *rec)
212{
213 if (rec->flags & FTRACE_FL_FROZEN) {
214 rec->flags &= ~FTRACE_FL_FROZEN;
215 frozen_record_count--;
216 }
217}
218
219static inline int record_frozen(struct dyn_ftrace *rec)
220{
221 return rec->flags & FTRACE_FL_FROZEN;
222}
223#else
224# define freeze_record(rec) ({ 0; })
225# define unfreeze_record(rec) ({ 0; })
226# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */
228
229int skip_trace(unsigned long ip)
230{
231 unsigned long fl;
232 struct dyn_ftrace *rec;
233 struct hlist_node *t;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264}
265
266static inline int
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{
269 struct dyn_ftrace *p;
270 struct hlist_node *t;
271 int found = 0;
272
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282
283static inline void
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288
289/* called from kstop_machine */
290static inline void ftrace_del_hash(struct dyn_ftrace *node)
291{
292 hlist_del(&node->node);
293}
294
295static void ftrace_free_rec(struct dyn_ftrace *rec)
296{
297 /* no locking, only called from kstop_machine */
298
299 rec->ip = (unsigned long)ftrace_free_records;
300 ftrace_free_records = rec;
301 rec->flags |= FTRACE_FL_FREE;
302}
303
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
305{
306 struct dyn_ftrace *rec;
307
308 /* First check for freed records */
309 if (ftrace_free_records) {
310 rec = ftrace_free_records;
311
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1);
314 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL;
318 }
319
320 ftrace_free_records = (void *)rec->ip;
321 memset(rec, 0, sizeof(*rec));
322 return rec;
323 }
324
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next)
327 return NULL;
328 ftrace_pages = ftrace_pages->next;
329 }
330
331 return &ftrace_pages->records[ftrace_pages->index++];
332}
333
334static void
335ftrace_record_ip(unsigned long ip)
336{
337 struct dyn_ftrace *node;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343
344 if (!ftrace_enabled || ftrace_disabled)
345 return;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367
368 if (ftrace_ip_in_hash(ip, key))
369 goto out;
370
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374
375 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key))
377 goto out_unlock;
378
379 node = ftrace_alloc_dyn_node(ip);
380 if (!node)
381 goto out_unlock;
382
383 node->ip = ip;
384
385 ftrace_add_hash(node, key);
386
387 ftraced_trigger = 1;
388
389 out_unlock:
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
391 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393
394 /* prevent recursion with scheduler */
395 if (resched)
396 preempt_enable_no_resched_notrace();
397 else
398 preempt_enable_notrace();
399}
400
401#define FTRACE_ADDR ((long)(ftrace_caller))
402
403static int
404__ftrace_replace_code(struct dyn_ftrace *rec,
405 unsigned char *old, unsigned char *new, int enable)
406{
407 unsigned long ip, fl;
408
409 ip = rec->ip;
410
411 if (ftrace_filtered && enable) {
412 /*
413 * If filtering is on:
414 *
415 * If this record is set to be filtered and
416 * is enabled then do nothing.
417 *
418 * If this record is set to be filtered and
419 * it is not enabled, enable it.
420 *
421 * If this record is not set to be filtered
422 * and it is not enabled do nothing.
423 *
424 * If this record is set not to trace then
425 * do nothing.
426 *
427 * If this record is set not to trace and
428 * it is enabled then disable it.
429 *
430 * If this record is not set to be filtered and
431 * it is enabled, disable it.
432 */
433
434 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
435 FTRACE_FL_ENABLED);
436
437 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
438 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
439 !fl || (fl == FTRACE_FL_NOTRACE))
440 return 0;
441
442 /*
443 * If it is enabled disable it,
444 * otherwise enable it!
445 */
446 if (fl & FTRACE_FL_ENABLED) {
447 /* swap new and old */
448 new = old;
449 old = ftrace_call_replace(ip, FTRACE_ADDR);
450 rec->flags &= ~FTRACE_FL_ENABLED;
451 } else {
452 new = ftrace_call_replace(ip, FTRACE_ADDR);
453 rec->flags |= FTRACE_FL_ENABLED;
454 }
455 } else {
456
457 if (enable) {
458 /*
459 * If this record is set not to trace and is
460 * not enabled, do nothing.
461 */
462 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
463 if (fl == FTRACE_FL_NOTRACE)
464 return 0;
465
466 new = ftrace_call_replace(ip, FTRACE_ADDR);
467 } else
468 old = ftrace_call_replace(ip, FTRACE_ADDR);
469
470 if (enable) {
471 if (rec->flags & FTRACE_FL_ENABLED)
472 return 0;
473 rec->flags |= FTRACE_FL_ENABLED;
474 } else {
475 if (!(rec->flags & FTRACE_FL_ENABLED))
476 return 0;
477 rec->flags &= ~FTRACE_FL_ENABLED;
478 }
479 }
480
481 return ftrace_modify_code(ip, old, new);
482}
483
484static void ftrace_replace_code(int enable)
485{
486 int i, failed;
487 unsigned char *new = NULL, *old = NULL;
488 struct dyn_ftrace *rec;
489 struct ftrace_page *pg;
490
491 if (enable)
492 old = ftrace_nop_replace();
493 else
494 new = ftrace_nop_replace();
495
496 for (pg = ftrace_pages_start; pg; pg = pg->next) {
497 for (i = 0; i < pg->index; i++) {
498 rec = &pg->records[i];
499
500 /* don't modify code that has already faulted */
501 if (rec->flags & FTRACE_FL_FAILED)
502 continue;
503
504 /* ignore updates to this record's mcount site */
505 if (get_kprobe((void *)rec->ip)) {
506 freeze_record(rec);
507 continue;
508 } else {
509 unfreeze_record(rec);
510 }
511
512 failed = __ftrace_replace_code(rec, old, new, enable);
513 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
514 rec->flags |= FTRACE_FL_FAILED;
515 if ((system_state == SYSTEM_BOOTING) ||
516 !core_kernel_text(rec->ip)) {
517 ftrace_del_hash(rec);
518 ftrace_free_rec(rec);
519 }
520 }
521 }
522 }
523}
524
525static void ftrace_shutdown_replenish(void)
526{
527 if (ftrace_pages->next)
528 return;
529
530 /* allocate another page */
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
532}
533
534static int
535ftrace_code_disable(struct dyn_ftrace *rec)
536{
537 unsigned long ip;
538 unsigned char *nop, *call;
539 int failed;
540
541 ip = rec->ip;
542
543 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR);
545
546 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) {
548 rec->flags |= FTRACE_FL_FAILED;
549 return 0;
550 }
551 return 1;
552}
553
554static int __ftrace_update_code(void *ignore);
555
556static int __ftrace_modify_code(void *data)
557{
558 unsigned long addr;
559 int *command = data;
560
561 if (*command & FTRACE_ENABLE_CALLS) {
562 /*
563 * Update any recorded ips now that we have the
564 * machine stopped
565 */
566 __ftrace_update_code(NULL);
567 ftrace_replace_code(1);
568 tracing_on = 1;
569 } else if (*command & FTRACE_DISABLE_CALLS) {
570 ftrace_replace_code(0);
571 tracing_on = 0;
572 }
573
574 if (*command & FTRACE_UPDATE_TRACE_FUNC)
575 ftrace_update_ftrace_func(ftrace_trace_function);
576
577 if (*command & FTRACE_ENABLE_MCOUNT) {
578 addr = (unsigned long)ftrace_record_ip;
579 ftrace_mcount_set(&addr);
580 } else if (*command & FTRACE_DISABLE_MCOUNT) {
581 addr = (unsigned long)ftrace_stub;
582 ftrace_mcount_set(&addr);
583 }
584
585 return 0;
586}
587
588static void ftrace_run_update_code(int command)
589{
590 stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
591}
592
593void ftrace_disable_daemon(void)
594{
595 /* Stop the daemon from calling kstop_machine */
596 mutex_lock(&ftraced_lock);
597 ftraced_stop = 1;
598 mutex_unlock(&ftraced_lock);
599
600 ftrace_force_update();
601}
602
603void ftrace_enable_daemon(void)
604{
605 mutex_lock(&ftraced_lock);
606 ftraced_stop = 0;
607 mutex_unlock(&ftraced_lock);
608
609 ftrace_force_update();
610}
611
612static ftrace_func_t saved_ftrace_func;
613
614static void ftrace_startup(void)
615{
616 int command = 0;
617
618 if (unlikely(ftrace_disabled))
619 return;
620
621 mutex_lock(&ftraced_lock);
622 ftraced_suspend++;
623 if (ftraced_suspend == 1)
624 command |= FTRACE_ENABLE_CALLS;
625
626 if (saved_ftrace_func != ftrace_trace_function) {
627 saved_ftrace_func = ftrace_trace_function;
628 command |= FTRACE_UPDATE_TRACE_FUNC;
629 }
630
631 if (!command || !ftrace_enabled)
632 goto out;
633
634 ftrace_run_update_code(command);
635 out:
636 mutex_unlock(&ftraced_lock);
637}
638
639static void ftrace_shutdown(void)
640{
641 int command = 0;
642
643 if (unlikely(ftrace_disabled))
644 return;
645
646 mutex_lock(&ftraced_lock);
647 ftraced_suspend--;
648 if (!ftraced_suspend)
649 command |= FTRACE_DISABLE_CALLS;
650
651 if (saved_ftrace_func != ftrace_trace_function) {
652 saved_ftrace_func = ftrace_trace_function;
653 command |= FTRACE_UPDATE_TRACE_FUNC;
654 }
655
656 if (!command || !ftrace_enabled)
657 goto out;
658
659 ftrace_run_update_code(command);
660 out:
661 mutex_unlock(&ftraced_lock);
662}
663
664static void ftrace_startup_sysctl(void)
665{
666 int command = FTRACE_ENABLE_MCOUNT;
667
668 if (unlikely(ftrace_disabled))
669 return;
670
671 mutex_lock(&ftraced_lock);
672 /* Force update next time */
673 saved_ftrace_func = NULL;
674 /* ftraced_suspend is true if we want ftrace running */
675 if (ftraced_suspend)
676 command |= FTRACE_ENABLE_CALLS;
677
678 ftrace_run_update_code(command);
679 mutex_unlock(&ftraced_lock);
680}
681
682static void ftrace_shutdown_sysctl(void)
683{
684 int command = FTRACE_DISABLE_MCOUNT;
685
686 if (unlikely(ftrace_disabled))
687 return;
688
689 mutex_lock(&ftraced_lock);
690 /* ftraced_suspend is true if ftrace is running */
691 if (ftraced_suspend)
692 command |= FTRACE_DISABLE_CALLS;
693
694 ftrace_run_update_code(command);
695 mutex_unlock(&ftraced_lock);
696}
697
698static cycle_t ftrace_update_time;
699static unsigned long ftrace_update_cnt;
700unsigned long ftrace_update_tot_cnt;
701
702static int __ftrace_update_code(void *ignore)
703{
704 int i, save_ftrace_enabled;
705 cycle_t start, stop;
706 struct dyn_ftrace *p;
707 struct hlist_node *t, *n;
708 struct hlist_head *head, temp_list;
709
710 /* Don't be recording funcs now */
711 ftrace_record_suspend++;
712 save_ftrace_enabled = ftrace_enabled;
713 ftrace_enabled = 0;
714
715 start = ftrace_now(raw_smp_processor_id());
716 ftrace_update_cnt = 0;
717
718 /* No locks needed, the machine is stopped! */
719 for (i = 0; i < FTRACE_HASHSIZE; i++) {
720 INIT_HLIST_HEAD(&temp_list);
721 head = &ftrace_hash[i];
722
723 /* all CPUS are stopped, we are safe to modify code */
724 hlist_for_each_entry_safe(p, t, n, head, node) {
725 /* Skip over failed records which have not been
726 * freed. */
727 if (p->flags & FTRACE_FL_FAILED)
728 continue;
729
730 /* Unconverted records are always at the head of the
731 * hash bucket. Once we encounter a converted record,
732 * simply skip over to the next bucket. Saves ftraced
733 * some processor cycles (ftrace does its bid for
734 * global warming :-p ). */
735 if (p->flags & (FTRACE_FL_CONVERTED))
736 break;
737
738 /* Ignore updates to this record's mcount site.
739 * Reintroduce this record at the head of this
740 * bucket to attempt to "convert" it again if
741 * the kprobe on it is unregistered before the
742 * next run. */
743 if (get_kprobe((void *)p->ip)) {
744 ftrace_del_hash(p);
745 INIT_HLIST_NODE(&p->node);
746 hlist_add_head(&p->node, &temp_list);
747 freeze_record(p);
748 continue;
749 } else {
750 unfreeze_record(p);
751 }
752
753 /* convert record (i.e, patch mcount-call with NOP) */
754 if (ftrace_code_disable(p)) {
755 p->flags |= FTRACE_FL_CONVERTED;
756 ftrace_update_cnt++;
757 } else {
758 if ((system_state == SYSTEM_BOOTING) ||
759 !core_kernel_text(p->ip)) {
760 ftrace_del_hash(p);
761 ftrace_free_rec(p);
762 }
763 }
764 }
765
766 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
767 hlist_del(&p->node);
768 INIT_HLIST_NODE(&p->node);
769 hlist_add_head(&p->node, head);
770 }
771 }
772
773 stop = ftrace_now(raw_smp_processor_id());
774 ftrace_update_time = stop - start;
775 ftrace_update_tot_cnt += ftrace_update_cnt;
776 ftraced_trigger = 0;
777
778 ftrace_enabled = save_ftrace_enabled;
779 ftrace_record_suspend--;
780
781 return 0;
782}
783
784static int ftrace_update_code(void)
785{
786 if (unlikely(ftrace_disabled) ||
787 !ftrace_enabled || !ftraced_trigger)
788 return 0;
789
790 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
791
792 return 1;
793}
794
795static int ftraced(void *ignore)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{
837 struct ftrace_page *pg;
838 int cnt;
839 int i;
840
841 /* allocate a few pages */
842 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
843 if (!ftrace_pages_start)
844 return -1;
845
846 /*
847 * Allocate a few more pages.
848 *
849 * TODO: have some parser search vmlinux before
850 * final linking to find all calls to ftrace.
851 * Then we can:
852 * a) know how many pages to allocate.
853 * and/or
854 * b) set up the table then.
855 *
856 * The dynamic code is still necessary for
857 * modules.
858 */
859
860 pg = ftrace_pages = ftrace_pages_start;
861
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
863
864 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
866
867 /* If we fail, we'll try later anyway */
868 if (!pg->next)
869 break;
870
871 pg = pg->next;
872 }
873
874 return 0;
875}
876
877enum {
878 FTRACE_ITER_FILTER = (1 << 0),
879 FTRACE_ITER_CONT = (1 << 1),
880 FTRACE_ITER_NOTRACE = (1 << 2),
881 FTRACE_ITER_FAILURES = (1 << 3),
882};
883
884#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
885
886struct ftrace_iterator {
887 loff_t pos;
888 struct ftrace_page *pg;
889 unsigned idx;
890 unsigned flags;
891 unsigned char buffer[FTRACE_BUFF_MAX+1];
892 unsigned buffer_idx;
893 unsigned filtered;
894};
895
896static void *
897t_next(struct seq_file *m, void *v, loff_t *pos)
898{
899 struct ftrace_iterator *iter = m->private;
900 struct dyn_ftrace *rec = NULL;
901
902 (*pos)++;
903
904 retry:
905 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) {
907 iter->pg = iter->pg->next;
908 iter->idx = 0;
909 goto retry;
910 }
911 } else {
912 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) ||
915
916 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919
920 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) ||
922
923 ((iter->flags & FTRACE_ITER_NOTRACE) &&
924 !(rec->flags & FTRACE_FL_NOTRACE))) {
925 rec = NULL;
926 goto retry;
927 }
928 }
929
930 iter->pos = *pos;
931
932 return rec;
933}
934
935static void *t_start(struct seq_file *m, loff_t *pos)
936{
937 struct ftrace_iterator *iter = m->private;
938 void *p = NULL;
939 loff_t l = -1;
940
941 if (*pos != iter->pos) {
942 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
943 ;
944 } else {
945 l = *pos;
946 p = t_next(m, p, &l);
947 }
948
949 return p;
950}
951
952static void t_stop(struct seq_file *m, void *p)
953{
954}
955
956static int t_show(struct seq_file *m, void *v)
957{
958 struct dyn_ftrace *rec = v;
959 char str[KSYM_SYMBOL_LEN];
960
961 if (!rec)
962 return 0;
963
964 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
965
966 seq_printf(m, "%s\n", str);
967
968 return 0;
969}
970
971static struct seq_operations show_ftrace_seq_ops = {
972 .start = t_start,
973 .next = t_next,
974 .stop = t_stop,
975 .show = t_show,
976};
977
978static int
979ftrace_avail_open(struct inode *inode, struct file *file)
980{
981 struct ftrace_iterator *iter;
982 int ret;
983
984 if (unlikely(ftrace_disabled))
985 return -ENODEV;
986
987 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
988 if (!iter)
989 return -ENOMEM;
990
991 iter->pg = ftrace_pages_start;
992 iter->pos = -1;
993
994 ret = seq_open(file, &show_ftrace_seq_ops);
995 if (!ret) {
996 struct seq_file *m = file->private_data;
997
998 m->private = iter;
999 } else {
1000 kfree(iter);
1001 }
1002
1003 return ret;
1004}
1005
1006int ftrace_avail_release(struct inode *inode, struct file *file)
1007{
1008 struct seq_file *m = (struct seq_file *)file->private_data;
1009 struct ftrace_iterator *iter = m->private;
1010
1011 seq_release(inode, file);
1012 kfree(iter);
1013
1014 return 0;
1015}
1016
1017static int
1018ftrace_failures_open(struct inode *inode, struct file *file)
1019{
1020 int ret;
1021 struct seq_file *m;
1022 struct ftrace_iterator *iter;
1023
1024 ret = ftrace_avail_open(inode, file);
1025 if (!ret) {
1026 m = (struct seq_file *)file->private_data;
1027 iter = (struct ftrace_iterator *)m->private;
1028 iter->flags = FTRACE_ITER_FAILURES;
1029 }
1030
1031 return ret;
1032}
1033
1034
1035static void ftrace_filter_reset(int enable)
1036{
1037 struct ftrace_page *pg;
1038 struct dyn_ftrace *rec;
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i;
1041
1042 /* keep kstop machine from running */
1043 preempt_disable();
1044 if (enable)
1045 ftrace_filtered = 0;
1046 pg = ftrace_pages_start;
1047 while (pg) {
1048 for (i = 0; i < pg->index; i++) {
1049 rec = &pg->records[i];
1050 if (rec->flags & FTRACE_FL_FAILED)
1051 continue;
1052 rec->flags &= ~type;
1053 }
1054 pg = pg->next;
1055 }
1056 preempt_enable();
1057}
1058
1059static int
1060ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1061{
1062 struct ftrace_iterator *iter;
1063 int ret = 0;
1064
1065 if (unlikely(ftrace_disabled))
1066 return -ENODEV;
1067
1068 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1069 if (!iter)
1070 return -ENOMEM;
1071
1072 mutex_lock(&ftrace_regex_lock);
1073 if ((file->f_mode & FMODE_WRITE) &&
1074 !(file->f_flags & O_APPEND))
1075 ftrace_filter_reset(enable);
1076
1077 if (file->f_mode & FMODE_READ) {
1078 iter->pg = ftrace_pages_start;
1079 iter->pos = -1;
1080 iter->flags = enable ? FTRACE_ITER_FILTER :
1081 FTRACE_ITER_NOTRACE;
1082
1083 ret = seq_open(file, &show_ftrace_seq_ops);
1084 if (!ret) {
1085 struct seq_file *m = file->private_data;
1086 m->private = iter;
1087 } else
1088 kfree(iter);
1089 } else
1090 file->private_data = iter;
1091 mutex_unlock(&ftrace_regex_lock);
1092
1093 return ret;
1094}
1095
1096static int
1097ftrace_filter_open(struct inode *inode, struct file *file)
1098{
1099 return ftrace_regex_open(inode, file, 1);
1100}
1101
1102static int
1103ftrace_notrace_open(struct inode *inode, struct file *file)
1104{
1105 return ftrace_regex_open(inode, file, 0);
1106}
1107
1108static ssize_t
1109ftrace_regex_read(struct file *file, char __user *ubuf,
1110 size_t cnt, loff_t *ppos)
1111{
1112 if (file->f_mode & FMODE_READ)
1113 return seq_read(file, ubuf, cnt, ppos);
1114 else
1115 return -EPERM;
1116}
1117
1118static loff_t
1119ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1120{
1121 loff_t ret;
1122
1123 if (file->f_mode & FMODE_READ)
1124 ret = seq_lseek(file, offset, origin);
1125 else
1126 file->f_pos = ret = 1;
1127
1128 return ret;
1129}
1130
1131enum {
1132 MATCH_FULL,
1133 MATCH_FRONT_ONLY,
1134 MATCH_MIDDLE_ONLY,
1135 MATCH_END_ONLY,
1136};
1137
1138static void
1139ftrace_match(unsigned char *buff, int len, int enable)
1140{
1141 char str[KSYM_SYMBOL_LEN];
1142 char *search = NULL;
1143 struct ftrace_page *pg;
1144 struct dyn_ftrace *rec;
1145 int type = MATCH_FULL;
1146 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1147 unsigned i, match = 0, search_len = 0;
1148
1149 for (i = 0; i < len; i++) {
1150 if (buff[i] == '*') {
1151 if (!i) {
1152 search = buff + i + 1;
1153 type = MATCH_END_ONLY;
1154 search_len = len - (i + 1);
1155 } else {
1156 if (type == MATCH_END_ONLY) {
1157 type = MATCH_MIDDLE_ONLY;
1158 } else {
1159 match = i;
1160 type = MATCH_FRONT_ONLY;
1161 }
1162 buff[i] = 0;
1163 break;
1164 }
1165 }
1166 }
1167
1168 /* keep kstop machine from running */
1169 preempt_disable();
1170 if (enable)
1171 ftrace_filtered = 1;
1172 pg = ftrace_pages_start;
1173 while (pg) {
1174 for (i = 0; i < pg->index; i++) {
1175 int matched = 0;
1176 char *ptr;
1177
1178 rec = &pg->records[i];
1179 if (rec->flags & FTRACE_FL_FAILED)
1180 continue;
1181 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1182 switch (type) {
1183 case MATCH_FULL:
1184 if (strcmp(str, buff) == 0)
1185 matched = 1;
1186 break;
1187 case MATCH_FRONT_ONLY:
1188 if (memcmp(str, buff, match) == 0)
1189 matched = 1;
1190 break;
1191 case MATCH_MIDDLE_ONLY:
1192 if (strstr(str, search))
1193 matched = 1;
1194 break;
1195 case MATCH_END_ONLY:
1196 ptr = strstr(str, search);
1197 if (ptr && (ptr[search_len] == 0))
1198 matched = 1;
1199 break;
1200 }
1201 if (matched)
1202 rec->flags |= flag;
1203 }
1204 pg = pg->next;
1205 }
1206 preempt_enable();
1207}
1208
1209static ssize_t
1210ftrace_regex_write(struct file *file, const char __user *ubuf,
1211 size_t cnt, loff_t *ppos, int enable)
1212{
1213 struct ftrace_iterator *iter;
1214 char ch;
1215 size_t read = 0;
1216 ssize_t ret;
1217
1218 if (!cnt || cnt < 0)
1219 return 0;
1220
1221 mutex_lock(&ftrace_regex_lock);
1222
1223 if (file->f_mode & FMODE_READ) {
1224 struct seq_file *m = file->private_data;
1225 iter = m->private;
1226 } else
1227 iter = file->private_data;
1228
1229 if (!*ppos) {
1230 iter->flags &= ~FTRACE_ITER_CONT;
1231 iter->buffer_idx = 0;
1232 }
1233
1234 ret = get_user(ch, ubuf++);
1235 if (ret)
1236 goto out;
1237 read++;
1238 cnt--;
1239
1240 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
1241 /* skip white space */
1242 while (cnt && isspace(ch)) {
1243 ret = get_user(ch, ubuf++);
1244 if (ret)
1245 goto out;
1246 read++;
1247 cnt--;
1248 }
1249
1250 if (isspace(ch)) {
1251 file->f_pos += read;
1252 ret = read;
1253 goto out;
1254 }
1255
1256 iter->buffer_idx = 0;
1257 }
1258
1259 while (cnt && !isspace(ch)) {
1260 if (iter->buffer_idx < FTRACE_BUFF_MAX)
1261 iter->buffer[iter->buffer_idx++] = ch;
1262 else {
1263 ret = -EINVAL;
1264 goto out;
1265 }
1266 ret = get_user(ch, ubuf++);
1267 if (ret)
1268 goto out;
1269 read++;
1270 cnt--;
1271 }
1272
1273 if (isspace(ch)) {
1274 iter->filtered++;
1275 iter->buffer[iter->buffer_idx] = 0;
1276 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1277 iter->buffer_idx = 0;
1278 } else
1279 iter->flags |= FTRACE_ITER_CONT;
1280
1281
1282 file->f_pos += read;
1283
1284 ret = read;
1285 out:
1286 mutex_unlock(&ftrace_regex_lock);
1287
1288 return ret;
1289}
1290
1291static ssize_t
1292ftrace_filter_write(struct file *file, const char __user *ubuf,
1293 size_t cnt, loff_t *ppos)
1294{
1295 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
1296}
1297
1298static ssize_t
1299ftrace_notrace_write(struct file *file, const char __user *ubuf,
1300 size_t cnt, loff_t *ppos)
1301{
1302 return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
1303}
1304
1305static void
1306ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1307{
1308 if (unlikely(ftrace_disabled))
1309 return;
1310
1311 mutex_lock(&ftrace_regex_lock);
1312 if (reset)
1313 ftrace_filter_reset(enable);
1314 if (buf)
1315 ftrace_match(buf, len, enable);
1316 mutex_unlock(&ftrace_regex_lock);
1317}
1318
1319/**
1320 * ftrace_set_filter - set a function to filter on in ftrace
1321 * @buf - the string that holds the function filter text.
1322 * @len - the length of the string.
1323 * @reset - non zero to reset all filters before applying this filter.
1324 *
1325 * Filters denote which functions should be enabled when tracing is enabled.
1326 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
1327 */
1328void ftrace_set_filter(unsigned char *buf, int len, int reset)
1329{
1330 ftrace_set_regex(buf, len, reset, 1);
1331}
1332
1333/**
1334 * ftrace_set_notrace - set a function to not trace in ftrace
1335 * @buf - the string that holds the function notrace text.
1336 * @len - the length of the string.
1337 * @reset - non zero to reset all filters before applying this filter.
1338 *
1339 * Notrace Filters denote which functions should not be enabled when tracing
1340 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
1341 * for tracing.
1342 */
1343void ftrace_set_notrace(unsigned char *buf, int len, int reset)
1344{
1345 ftrace_set_regex(buf, len, reset, 0);
1346}
1347
1348static int
1349ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1350{
1351 struct seq_file *m = (struct seq_file *)file->private_data;
1352 struct ftrace_iterator *iter;
1353
1354 mutex_lock(&ftrace_regex_lock);
1355 if (file->f_mode & FMODE_READ) {
1356 iter = m->private;
1357
1358 seq_release(inode, file);
1359 } else
1360 iter = file->private_data;
1361
1362 if (iter->buffer_idx) {
1363 iter->filtered++;
1364 iter->buffer[iter->buffer_idx] = 0;
1365 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1366 }
1367
1368 mutex_lock(&ftrace_sysctl_lock);
1369 mutex_lock(&ftraced_lock);
1370 if (iter->filtered && ftraced_suspend && ftrace_enabled)
1371 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1372 mutex_unlock(&ftraced_lock);
1373 mutex_unlock(&ftrace_sysctl_lock);
1374
1375 kfree(iter);
1376 mutex_unlock(&ftrace_regex_lock);
1377 return 0;
1378}
1379
1380static int
1381ftrace_filter_release(struct inode *inode, struct file *file)
1382{
1383 return ftrace_regex_release(inode, file, 1);
1384}
1385
1386static int
1387ftrace_notrace_release(struct inode *inode, struct file *file)
1388{
1389 return ftrace_regex_release(inode, file, 0);
1390}
1391
1392static ssize_t
1393ftraced_read(struct file *filp, char __user *ubuf,
1394 size_t cnt, loff_t *ppos)
1395{
1396 /* don't worry about races */
1397 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1398 int r = strlen(buf);
1399
1400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1401}
1402
1403static ssize_t
1404ftraced_write(struct file *filp, const char __user *ubuf,
1405 size_t cnt, loff_t *ppos)
1406{
1407 char buf[64];
1408 long val;
1409 int ret;
1410
1411 if (cnt >= sizeof(buf))
1412 return -EINVAL;
1413
1414 if (copy_from_user(&buf, ubuf, cnt))
1415 return -EFAULT;
1416
1417 if (strncmp(buf, "enable", 6) == 0)
1418 val = 1;
1419 else if (strncmp(buf, "disable", 7) == 0)
1420 val = 0;
1421 else {
1422 buf[cnt] = 0;
1423
1424 ret = strict_strtoul(buf, 10, &val);
1425 if (ret < 0)
1426 return ret;
1427
1428 val = !!val;
1429 }
1430
1431 if (val)
1432 ftrace_enable_daemon();
1433 else
1434 ftrace_disable_daemon();
1435
1436 filp->f_pos += cnt;
1437
1438 return cnt;
1439}
1440
1441static struct file_operations ftrace_avail_fops = {
1442 .open = ftrace_avail_open,
1443 .read = seq_read,
1444 .llseek = seq_lseek,
1445 .release = ftrace_avail_release,
1446};
1447
1448static struct file_operations ftrace_failures_fops = {
1449 .open = ftrace_failures_open,
1450 .read = seq_read,
1451 .llseek = seq_lseek,
1452 .release = ftrace_avail_release,
1453};
1454
1455static struct file_operations ftrace_filter_fops = {
1456 .open = ftrace_filter_open,
1457 .read = ftrace_regex_read,
1458 .write = ftrace_filter_write,
1459 .llseek = ftrace_regex_lseek,
1460 .release = ftrace_filter_release,
1461};
1462
1463static struct file_operations ftrace_notrace_fops = {
1464 .open = ftrace_notrace_open,
1465 .read = ftrace_regex_read,
1466 .write = ftrace_notrace_write,
1467 .llseek = ftrace_regex_lseek,
1468 .release = ftrace_notrace_release,
1469};
1470
1471static struct file_operations ftraced_fops = {
1472 .open = tracing_open_generic,
1473 .read = ftraced_read,
1474 .write = ftraced_write,
1475};
1476
1477/**
1478 * ftrace_force_update - force an update to all recording ftrace functions
1479 */
1480int ftrace_force_update(void)
1481{
1482 int ret = 0;
1483
1484 if (unlikely(ftrace_disabled))
1485 return -ENODEV;
1486
1487 mutex_lock(&ftrace_sysctl_lock);
1488 mutex_lock(&ftraced_lock);
1489
1490 /*
1491 * If ftraced_trigger is not set, then there is nothing
1492 * to update.
1493 */
1494 if (ftraced_trigger && !ftrace_update_code())
1495 ret = -EBUSY;
1496
1497 mutex_unlock(&ftraced_lock);
1498 mutex_unlock(&ftrace_sysctl_lock);
1499
1500 return ret;
1501}
1502
1503static void ftrace_force_shutdown(void)
1504{
1505 struct task_struct *task;
1506 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1507
1508 mutex_lock(&ftraced_lock);
1509 task = ftraced_task;
1510 ftraced_task = NULL;
1511 ftraced_suspend = -1;
1512 ftrace_run_update_code(command);
1513 mutex_unlock(&ftraced_lock);
1514
1515 if (task)
1516 kthread_stop(task);
1517}
1518
1519static __init int ftrace_init_debugfs(void)
1520{
1521 struct dentry *d_tracer;
1522 struct dentry *entry;
1523
1524 d_tracer = tracing_init_dentry();
1525
1526 entry = debugfs_create_file("available_filter_functions", 0444,
1527 d_tracer, NULL, &ftrace_avail_fops);
1528 if (!entry)
1529 pr_warning("Could not create debugfs "
1530 "'available_filter_functions' entry\n");
1531
1532 entry = debugfs_create_file("failures", 0444,
1533 d_tracer, NULL, &ftrace_failures_fops);
1534 if (!entry)
1535 pr_warning("Could not create debugfs 'failures' entry\n");
1536
1537 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
1538 NULL, &ftrace_filter_fops);
1539 if (!entry)
1540 pr_warning("Could not create debugfs "
1541 "'set_ftrace_filter' entry\n");
1542
1543 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
1544 NULL, &ftrace_notrace_fops);
1545 if (!entry)
1546 pr_warning("Could not create debugfs "
1547 "'set_ftrace_notrace' entry\n");
1548
1549 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1550 NULL, &ftraced_fops);
1551 if (!entry)
1552 pr_warning("Could not create debugfs "
1553 "'ftraced_enabled' entry\n");
1554 return 0;
1555}
1556
1557fs_initcall(ftrace_init_debugfs);
1558
1559static int __init ftrace_dynamic_init(void)
1560{
1561 struct task_struct *p;
1562 unsigned long addr;
1563 int ret;
1564
1565 addr = (unsigned long)ftrace_record_ip;
1566
1567 stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
1568
1569 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) {
1571 ret = (int)addr;
1572 goto failed;
1573 }
1574
1575 ret = ftrace_dyn_table_alloc();
1576 if (ret)
1577 goto failed;
1578
1579 p = kthread_run(ftraced, NULL, "ftraced");
1580 if (IS_ERR(p)) {
1581 ret = -1;
1582 goto failed;
1583 }
1584
1585 last_ftrace_enabled = ftrace_enabled = 1;
1586 ftraced_task = p;
1587
1588 return 0;
1589
1590 failed:
1591 ftrace_disabled = 1;
1592 return ret;
1593}
1594
1595core_initcall(ftrace_dynamic_init);
1596#else
1597# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0)
1599# define ftrace_startup_sysctl() do { } while (0)
1600# define ftrace_shutdown_sysctl() do { } while (0)
1601# define ftrace_force_shutdown() do { } while (0)
1602#endif /* CONFIG_DYNAMIC_FTRACE */
1603
1604/**
1605 * ftrace_kill_atomic - kill ftrace from critical sections
1606 *
1607 * This function should be used by panic code. It stops ftrace
1608 * but in a not so nice way. If you need to simply kill ftrace
1609 * from a non-atomic section, use ftrace_kill.
1610 */
1611void ftrace_kill_atomic(void)
1612{
1613 ftrace_disabled = 1;
1614 ftrace_enabled = 0;
1615#ifdef CONFIG_DYNAMIC_FTRACE
1616 ftraced_suspend = -1;
1617#endif
1618 clear_ftrace_function();
1619}
1620
1621/**
1622 * ftrace_kill - totally shutdown ftrace
1623 *
1624 * This is a safety measure. If something was detected that seems
1625 * wrong, calling this function will keep ftrace from doing
1626 * any more modifications, and updates.
1627 * used when something went wrong.
1628 */
1629void ftrace_kill(void)
1630{
1631 mutex_lock(&ftrace_sysctl_lock);
1632 ftrace_disabled = 1;
1633 ftrace_enabled = 0;
1634
1635 clear_ftrace_function();
1636 mutex_unlock(&ftrace_sysctl_lock);
1637
1638 /* Try to totally disable ftrace */
1639 ftrace_force_shutdown();
1640}
1641
1642/**
1643 * register_ftrace_function - register a function for profiling
1644 * @ops - ops structure that holds the function for profiling.
1645 *
1646 * Register a function to be called by all functions in the
1647 * kernel.
1648 *
1649 * Note: @ops->func and all the functions it calls must be labeled
1650 * with "notrace", otherwise it will go into a
1651 * recursive loop.
1652 */
1653int register_ftrace_function(struct ftrace_ops *ops)
1654{
1655 int ret;
1656
1657 if (unlikely(ftrace_disabled))
1658 return -1;
1659
1660 mutex_lock(&ftrace_sysctl_lock);
1661 ret = __register_ftrace_function(ops);
1662 ftrace_startup();
1663 mutex_unlock(&ftrace_sysctl_lock);
1664
1665 return ret;
1666}
1667
1668/**
1669 * unregister_ftrace_function - unresgister a function for profiling.
1670 * @ops - ops structure that holds the function to unregister
1671 *
1672 * Unregister a function that was added to be called by ftrace profiling.
1673 */
1674int unregister_ftrace_function(struct ftrace_ops *ops)
1675{
1676 int ret;
1677
1678 mutex_lock(&ftrace_sysctl_lock);
1679 ret = __unregister_ftrace_function(ops);
1680 ftrace_shutdown();
1681 mutex_unlock(&ftrace_sysctl_lock);
1682
1683 return ret;
1684}
1685
1686int
1687ftrace_enable_sysctl(struct ctl_table *table, int write,
1688 struct file *file, void __user *buffer, size_t *lenp,
1689 loff_t *ppos)
1690{
1691 int ret;
1692
1693 if (unlikely(ftrace_disabled))
1694 return -ENODEV;
1695
1696 mutex_lock(&ftrace_sysctl_lock);
1697
1698 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1699
1700 if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
1701 goto out;
1702
1703 last_ftrace_enabled = ftrace_enabled;
1704
1705 if (ftrace_enabled) {
1706
1707 ftrace_startup_sysctl();
1708
1709 /* we are starting ftrace again */
1710 if (ftrace_list != &ftrace_list_end) {
1711 if (ftrace_list->next == &ftrace_list_end)
1712 ftrace_trace_function = ftrace_list->func;
1713 else
1714 ftrace_trace_function = ftrace_list_func;
1715 }
1716
1717 } else {
1718 /* stopping ftrace calls (just send to ftrace_stub) */
1719 ftrace_trace_function = ftrace_stub;
1720
1721 ftrace_shutdown_sysctl();
1722 }
1723
1724 out:
1725 mutex_unlock(&ftrace_sysctl_lock);
1726 return ret;
1727}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 000000000000..868e121c8e38
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3161 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III
13 */
14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h>
16#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18#include <linux/pagemap.h>
19#include <linux/hardirq.h>
20#include <linux/linkage.h>
21#include <linux/uaccess.h>
22#include <linux/ftrace.h>
23#include <linux/module.h>
24#include <linux/percpu.h>
25#include <linux/ctype.h>
26#include <linux/init.h>
27#include <linux/poll.h>
28#include <linux/gfp.h>
29#include <linux/fs.h>
30#include <linux/kprobes.h>
31#include <linux/writeback.h>
32
33#include <linux/stacktrace.h>
34
35#include "trace.h"
36
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh;
39
40static unsigned long __read_mostly tracing_nr_buffers;
41static cpumask_t __read_mostly tracing_buffer_mask;
42
43#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask)
45
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1;
50
51static unsigned long tracing_pages_allocated;
52
53long
54ns2usecs(cycle_t nsec)
55{
56 nsec += 500;
57 do_div(nsec, 1000);
58 return nsec;
59}
60
61cycle_t ftrace_now(int cpu)
62{
63 return cpu_clock(cpu);
64}
65
66/*
67 * The global_trace is the descriptor that holds the tracing
68 * buffers for the live tracing. For each CPU, it contains
69 * a link list of pages that will store trace entries. The
70 * page descriptor of the pages in the memory is used to hold
71 * the link list by linking the lru item in the page descriptor
72 * to each of the pages in the buffer per CPU.
73 *
74 * For each active CPU there is a data field that holds the
75 * pages for the buffer for that CPU. Each CPU has the same number
76 * of pages allocated for its buffer.
77 */
78static struct trace_array global_trace;
79
80static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
81
82/*
83 * The max_tr is used to snapshot the global_trace when a maximum
84 * latency is reached. Some tracers will use this to store a maximum
85 * trace while it continues examining live traces.
86 *
87 * The buffers for the max_tr are set up the same as the global_trace.
88 * When a snapshot is taken, the link list of the max_tr is swapped
89 * with the link list of the global_trace and the buffers are reset for
90 * the global_trace so the tracing can continue.
91 */
92static struct trace_array max_tr;
93
94static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
95
96/* tracer_enabled is used to toggle activation of a tracer */
97static int tracer_enabled = 1;
98
99/* function tracing enabled */
100int ftrace_function_enabled;
101
102/*
103 * trace_nr_entries is the number of entries that is allocated
104 * for a buffer. Note, the number of entries is always rounded
105 * to ENTRIES_PER_PAGE.
106 */
107static unsigned long trace_nr_entries = 65536UL;
108
109/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly;
111
112/* current_trace points to the tracer that is currently active */
113static struct tracer *current_trace __read_mostly;
114
115/*
116 * max_tracer_type_len is used to simplify the allocating of
117 * buffers to read userspace tracer names. We keep track of
118 * the longest tracer name registered.
119 */
120static int max_tracer_type_len;
121
122/*
123 * trace_types_lock is used to protect the trace_types list.
124 * This lock is also used to keep user access serialized.
125 * Accesses from userspace will grab this lock while userspace
126 * activities happen inside the kernel.
127 */
128static DEFINE_MUTEX(trace_types_lock);
129
130/* trace_wait is a waitqueue for tasks blocked on trace_poll */
131static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
132
133/* trace_flags holds iter_ctrl options */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
135
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153
154/**
155 * trace_wake_up - wake up tasks waiting for trace input
156 *
157 * Simply wakes up any task that is blocked on the trace_wait
158 * queue. These is used with trace_poll for tasks polling the trace.
159 */
160void trace_wake_up(void)
161{
162 /*
163 * The runqueue_is_locked() can fail, but this is the best we
164 * have for now:
165 */
166 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
167 wake_up(&trace_wait);
168}
169
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
171
172static int __init set_nr_entries(char *str)
173{
174 unsigned long nr_entries;
175 int ret;
176
177 if (!str)
178 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries);
180 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0)
182 return 0;
183 trace_nr_entries = nr_entries;
184 return 1;
185}
186__setup("trace_entries=", set_nr_entries);
187
188unsigned long nsecs_to_usecs(unsigned long nsecs)
189{
190 return nsecs / 1000;
191}
192
193/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols.
211 */
212#define TRACE_ITER_SYM_MASK \
213 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
214
215/* These must match the bit postions in trace_iterator_flags */
216static const char *trace_options[] = {
217 "print-parent",
218 "sym-offset",
219 "sym-addr",
220 "verbose",
221 "raw",
222 "hex",
223 "bin",
224 "block",
225 "stacktrace",
226 "sched-tree",
227 NULL
228};
229
230/*
231 * ftrace_max_lock is used to protect the swapping of buffers
232 * when taking a max snapshot. The buffers themselves are
233 * protected by per_cpu spinlocks. But the action of the swap
234 * needs its own lock.
235 *
236 * This is defined as a raw_spinlock_t in order to help
237 * with performance when lockdep debugging is enabled.
238 */
239static raw_spinlock_t ftrace_max_lock =
240 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
241
242/*
243 * Copy the new maximum trace into the separate maximum-trace
244 * structure. (this way the maximum trace is permanently saved,
245 * for later retrieval via /debugfs/tracing/latency_trace)
246 */
247static void
248__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
249{
250 struct trace_array_cpu *data = tr->data[cpu];
251
252 max_tr.cpu = cpu;
253 max_tr.time_start = data->preempt_timestamp;
254
255 data = max_tr.data[cpu];
256 data->saved_latency = tracing_max_latency;
257
258 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
259 data->pid = tsk->pid;
260 data->uid = tsk->uid;
261 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
262 data->policy = tsk->policy;
263 data->rt_priority = tsk->rt_priority;
264
265 /* record this tasks comm */
266 tracing_record_cmdline(current);
267}
268
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/**
318 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor
320 * @fmt: printf format string
321 *
322 * The tracer may use either sequence operations or its own
323 * copy to user routines. To simplify formating of a trace
324 * trace_seq_printf is used to store strings into a special
325 * buffer (@s). Then the output may be either used by
326 * the sequencer or pulled into another buffer.
327 */
328int
329trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
330{
331 int len = (PAGE_SIZE - 1) - s->len;
332 va_list ap;
333 int ret;
334
335 if (!len)
336 return 0;
337
338 va_start(ap, fmt);
339 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
340 va_end(ap);
341
342 /* If we can't write it all, don't bother writing anything */
343 if (ret >= len)
344 return 0;
345
346 s->len += ret;
347
348 return len;
349}
350
351/**
352 * trace_seq_puts - trace sequence printing of simple string
353 * @s: trace sequence descriptor
354 * @str: simple string to record
355 *
356 * The tracer may use either the sequence operations or its own
357 * copy to user routines. This function records a simple string
358 * into a special buffer (@s) for later retrieval by a sequencer
359 * or other mechanism.
360 */
361static int
362trace_seq_puts(struct trace_seq *s, const char *str)
363{
364 int len = strlen(str);
365
366 if (len > ((PAGE_SIZE - 1) - s->len))
367 return 0;
368
369 memcpy(s->buffer + s->len, str, len);
370 s->len += len;
371
372 return len;
373}
374
375static int
376trace_seq_putc(struct trace_seq *s, unsigned char c)
377{
378 if (s->len >= (PAGE_SIZE - 1))
379 return 0;
380
381 s->buffer[s->len++] = c;
382
383 return 1;
384}
385
386static int
387trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
388{
389 if (len > ((PAGE_SIZE - 1) - s->len))
390 return 0;
391
392 memcpy(s->buffer + s->len, mem, len);
393 s->len += len;
394
395 return len;
396}
397
398#define HEX_CHARS 17
399static const char hex2asc[] = "0123456789abcdef";
400
401static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{
404 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j;
408
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) {
413#else
414 for (i = len-1, j = 0; i >= 0; i--) {
415#endif
416 byte = data[i];
417
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 }
421 hex[j++] = ' ';
422
423 return trace_seq_putmem(s, hex, j);
424}
425
426static void
427trace_seq_reset(struct trace_seq *s)
428{
429 s->len = 0;
430 s->readpos = 0;
431}
432
433ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
434{
435 int len;
436 int ret;
437
438 if (s->len <= s->readpos)
439 return -EBUSY;
440
441 len = s->len - s->readpos;
442 if (cnt > len)
443 cnt = len;
444 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
445 if (ret)
446 return -EFAULT;
447
448 s->readpos += len;
449 return cnt;
450}
451
452static void
453trace_print_seq(struct seq_file *m, struct trace_seq *s)
454{
455 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
456
457 s->buffer[len] = 0;
458 seq_puts(m, s->buffer);
459
460 trace_seq_reset(s);
461}
462
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer
494 * @tsk: the task with the latency
495 * @cpu: The cpu that initiated the trace.
496 *
497 * Flip the buffers between the @tr and the max_tr and record information
498 * about which task was the cause of this latency.
499 */
500void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{
503 struct trace_array_cpu *data;
504 int i;
505
506 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */
509 for_each_tracing_cpu(i) {
510 data = tr->data[i];
511 flip_trace(max_tr.data[i], data);
512 tracing_reset(data);
513 }
514
515 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock);
517}
518
519/**
520 * update_max_tr_single - only copy one trace over, and reset the rest
521 * @tr - tracer
522 * @tsk - task with the latency
523 * @cpu - the cpu of the buffer to copy.
524 *
525 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
526 */
527void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{
530 struct trace_array_cpu *data = tr->data[cpu];
531 int i;
532
533 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537
538 flip_trace(max_tr.data[cpu], data);
539 tracing_reset(data);
540
541 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock);
543}
544
545/**
546 * register_tracer - register a tracer with the ftrace system.
547 * @type - the plugin for the tracer
548 *
549 * Register a new plugin tracer.
550 */
551int register_tracer(struct tracer *type)
552{
553 struct tracer *t;
554 int len;
555 int ret = 0;
556
557 if (!type->name) {
558 pr_info("Tracer must have a name\n");
559 return -1;
560 }
561
562 mutex_lock(&trace_types_lock);
563 for (t = trace_types; t; t = t->next) {
564 if (strcmp(type->name, t->name) == 0) {
565 /* already found */
566 pr_info("Trace %s already registered\n",
567 type->name);
568 ret = -1;
569 goto out;
570 }
571 }
572
573#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl;
579 int i;
580 /*
581 * Run a selftest on this tracer.
582 * Here we reset the trace buffer, and set the current
583 * tracer to be this tracer. The tracer can then run some
584 * internal tracing to verify that everything is in order.
585 * If we fail, we do not register this tracer.
586 */
587 for_each_tracing_cpu(i) {
588 data = tr->data[i];
589 if (!head_page(data))
590 continue;
591 tracing_reset(data);
592 }
593 current_trace = type;
594 tr->ctrl = 0;
595 /* the test is responsible for initializing and enabling */
596 pr_info("Testing tracer %s: ", type->name);
597 ret = type->selftest(type, tr);
598 /* the test is responsible for resetting too */
599 current_trace = saved_tracer;
600 tr->ctrl = saved_ctrl;
601 if (ret) {
602 printk(KERN_CONT "FAILED!\n");
603 goto out;
604 }
605 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) {
607 data = tr->data[i];
608 if (!head_page(data))
609 continue;
610 tracing_reset(data);
611 }
612 printk(KERN_CONT "PASSED\n");
613 }
614#endif
615
616 type->next = trace_types;
617 trace_types = type;
618 len = strlen(type->name);
619 if (len > max_tracer_type_len)
620 max_tracer_type_len = len;
621
622 out:
623 mutex_unlock(&trace_types_lock);
624
625 return ret;
626}
627
628void unregister_tracer(struct tracer *type)
629{
630 struct tracer **t;
631 int len;
632
633 mutex_lock(&trace_types_lock);
634 for (t = &trace_types; *t; t = &(*t)->next) {
635 if (*t == type)
636 goto found;
637 }
638 pr_info("Trace %s not registered\n", type->name);
639 goto out;
640
641 found:
642 *t = (*t)->next;
643 if (strlen(type->name) != max_tracer_type_len)
644 goto out;
645
646 max_tracer_type_len = 0;
647 for (t = &trace_types; *t; t = &(*t)->next) {
648 len = strlen((*t)->name);
649 if (len > max_tracer_type_len)
650 max_tracer_type_len = len;
651 }
652 out:
653 mutex_unlock(&trace_types_lock);
654}
655
656void tracing_reset(struct trace_array_cpu *data)
657{
658 data->trace_idx = 0;
659 data->overrun = 0;
660 data->trace_head = data->trace_tail = head_page(data);
661 data->trace_head_idx = 0;
662 data->trace_tail_idx = 0;
663}
664
665#define SAVED_CMDLINES 128
666static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
667static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
668static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
669static int cmdline_idx;
670static DEFINE_SPINLOCK(trace_cmdline_lock);
671
672/* temporary disable recording */
673atomic_t trace_record_cmdline_disabled __read_mostly;
674
675static void trace_init_cmdlines(void)
676{
677 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
678 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
679 cmdline_idx = 0;
680}
681
682void trace_stop_cmdline_recording(void);
683
684static void trace_save_cmdline(struct task_struct *tsk)
685{
686 unsigned map;
687 unsigned idx;
688
689 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
690 return;
691
692 /*
693 * It's not the end of the world if we don't get
694 * the lock, but we also don't want to spin
695 * nor do we want to disable interrupts,
696 * so if we miss here, then better luck next time.
697 */
698 if (!spin_trylock(&trace_cmdline_lock))
699 return;
700
701 idx = map_pid_to_cmdline[tsk->pid];
702 if (idx >= SAVED_CMDLINES) {
703 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
704
705 map = map_cmdline_to_pid[idx];
706 if (map <= PID_MAX_DEFAULT)
707 map_pid_to_cmdline[map] = (unsigned)-1;
708
709 map_pid_to_cmdline[tsk->pid] = idx;
710
711 cmdline_idx = idx;
712 }
713
714 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
715
716 spin_unlock(&trace_cmdline_lock);
717}
718
719static char *trace_find_cmdline(int pid)
720{
721 char *cmdline = "<...>";
722 unsigned map;
723
724 if (!pid)
725 return "<idle>";
726
727 if (pid > PID_MAX_DEFAULT)
728 goto out;
729
730 map = map_pid_to_cmdline[pid];
731 if (map >= SAVED_CMDLINES)
732 goto out;
733
734 cmdline = saved_cmdlines[map];
735
736 out:
737 return cmdline;
738}
739
740void tracing_record_cmdline(struct task_struct *tsk)
741{
742 if (atomic_read(&trace_record_cmdline_disabled))
743 return;
744
745 trace_save_cmdline(tsk);
746}
747
748static inline struct list_head *
749trace_next_list(struct trace_array_cpu *data, struct list_head *next)
750{
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{
815 struct task_struct *tsk = current;
816 unsigned long pc;
817
818 pc = preempt_count();
819
820 entry->preempt_count = pc & 0xff;
821 entry->pid = (tsk) ? tsk->pid : 0;
822 entry->t = ftrace_now(raw_smp_processor_id());
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
827}
828
829void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags)
832{
833 struct trace_entry *entry;
834 unsigned long irq_flags;
835
836 raw_local_irq_save(irq_flags);
837 __raw_spin_lock(&data->lock);
838 entry = tracing_get_trace_entry(tr, data);
839 tracing_generic_entry_update(entry, flags);
840 entry->type = TRACE_FN;
841 entry->fn.ip = ip;
842 entry->fn.parent_ip = parent_ip;
843 __raw_spin_unlock(&data->lock);
844 raw_local_irq_restore(irq_flags);
845}
846
847void
848ftrace(struct trace_array *tr, struct trace_array_cpu *data,
849 unsigned long ip, unsigned long parent_ip, unsigned long flags)
850{
851 if (likely(!atomic_read(&data->disabled)))
852 trace_function(tr, data, ip, parent_ip, flags);
853}
854
855#ifdef CONFIG_MMIOTRACE
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw)
858{
859 struct trace_entry *entry;
860 unsigned long irq_flags;
861
862 raw_local_irq_save(irq_flags);
863 __raw_spin_lock(&data->lock);
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869
870 __raw_spin_unlock(&data->lock);
871 raw_local_irq_restore(irq_flags);
872
873 trace_wake_up();
874}
875
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
877 struct mmiotrace_map *map)
878{
879 struct trace_entry *entry;
880 unsigned long irq_flags;
881
882 raw_local_irq_save(irq_flags);
883 __raw_spin_lock(&data->lock);
884
885 entry = tracing_get_trace_entry(tr, data);
886 tracing_generic_entry_update(entry, 0);
887 entry->type = TRACE_MMIO_MAP;
888 entry->mmiomap = *map;
889
890 __raw_spin_unlock(&data->lock);
891 raw_local_irq_restore(irq_flags);
892
893 trace_wake_up();
894}
895#endif
896
897void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data,
899 unsigned long flags,
900 int skip)
901{
902 struct trace_entry *entry;
903 struct stack_trace trace;
904
905 if (!(trace_flags & TRACE_ITER_STACKTRACE))
906 return;
907
908 entry = tracing_get_trace_entry(tr, data);
909 tracing_generic_entry_update(entry, flags);
910 entry->type = TRACE_STACK;
911
912 memset(&entry->stack, 0, sizeof(entry->stack));
913
914 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip;
917 trace.entries = entry->stack.caller;
918
919 save_stack_trace(&trace);
920}
921
922void
923__trace_special(void *__tr, void *__data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3)
925{
926 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr;
928 struct trace_entry *entry;
929 unsigned long irq_flags;
930
931 raw_local_irq_save(irq_flags);
932 __raw_spin_lock(&data->lock);
933 entry = tracing_get_trace_entry(tr, data);
934 tracing_generic_entry_update(entry, 0);
935 entry->type = TRACE_SPECIAL;
936 entry->special.arg1 = arg1;
937 entry->special.arg2 = arg2;
938 entry->special.arg3 = arg3;
939 __trace_stack(tr, data, irq_flags, 4);
940 __raw_spin_unlock(&data->lock);
941 raw_local_irq_restore(irq_flags);
942
943 trace_wake_up();
944}
945
946void
947tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data,
949 struct task_struct *prev,
950 struct task_struct *next,
951 unsigned long flags)
952{
953 struct trace_entry *entry;
954 unsigned long irq_flags;
955
956 raw_local_irq_save(irq_flags);
957 __raw_spin_lock(&data->lock);
958 entry = tracing_get_trace_entry(tr, data);
959 tracing_generic_entry_update(entry, flags);
960 entry->type = TRACE_CTX;
961 entry->ctx.prev_pid = prev->pid;
962 entry->ctx.prev_prio = prev->prio;
963 entry->ctx.prev_state = prev->state;
964 entry->ctx.next_pid = next->pid;
965 entry->ctx.next_prio = next->prio;
966 entry->ctx.next_state = next->state;
967 __trace_stack(tr, data, flags, 5);
968 __raw_spin_unlock(&data->lock);
969 raw_local_irq_restore(irq_flags);
970}
971
972void
973tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data,
975 struct task_struct *wakee,
976 struct task_struct *curr,
977 unsigned long flags)
978{
979 struct trace_entry *entry;
980 unsigned long irq_flags;
981
982 raw_local_irq_save(irq_flags);
983 __raw_spin_lock(&data->lock);
984 entry = tracing_get_trace_entry(tr, data);
985 tracing_generic_entry_update(entry, flags);
986 entry->type = TRACE_WAKE;
987 entry->ctx.prev_pid = curr->pid;
988 entry->ctx.prev_prio = curr->prio;
989 entry->ctx.prev_state = curr->state;
990 entry->ctx.next_pid = wakee->pid;
991 entry->ctx.next_prio = wakee->prio;
992 entry->ctx.next_state = wakee->state;
993 __trace_stack(tr, data, flags, 6);
994 __raw_spin_unlock(&data->lock);
995 raw_local_irq_restore(irq_flags);
996
997 trace_wake_up();
998}
999
1000void
1001ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1002{
1003 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data;
1005 unsigned long flags;
1006 long disabled;
1007 int cpu;
1008
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
1010 return;
1011
1012 local_irq_save(flags);
1013 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled);
1016
1017 if (likely(disabled == 1))
1018 __trace_special(tr, data, arg1, arg2, arg3);
1019
1020 atomic_dec(&data->disabled);
1021 local_irq_restore(flags);
1022}
1023
1024#ifdef CONFIG_FTRACE
1025static void
1026function_trace_call(unsigned long ip, unsigned long parent_ip)
1027{
1028 struct trace_array *tr = &global_trace;
1029 struct trace_array_cpu *data;
1030 unsigned long flags;
1031 long disabled;
1032 int cpu;
1033
1034 if (unlikely(!ftrace_function_enabled))
1035 return;
1036
1037 if (skip_trace(ip))
1038 return;
1039
1040 local_irq_save(flags);
1041 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled);
1044
1045 if (likely(disabled == 1))
1046 trace_function(tr, data, ip, parent_ip, flags);
1047
1048 atomic_dec(&data->disabled);
1049 local_irq_restore(flags);
1050}
1051
1052static struct ftrace_ops trace_ops __read_mostly =
1053{
1054 .func = function_trace_call,
1055};
1056
1057void tracing_start_function_trace(void)
1058{
1059 ftrace_function_enabled = 0;
1060 register_ftrace_function(&trace_ops);
1061 if (tracer_enabled)
1062 ftrace_function_enabled = 1;
1063}
1064
1065void tracing_stop_function_trace(void)
1066{
1067 ftrace_function_enabled = 0;
1068 unregister_ftrace_function(&trace_ops);
1069}
1070#endif
1071
1072enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1,
1074};
1075
1076static struct trace_entry *
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{
1080 struct page *page;
1081 struct trace_entry *array;
1082
1083 if (iter->next_idx[cpu] >= tr->entries ||
1084 iter->next_idx[cpu] >= data->trace_idx ||
1085 (data->trace_head == data->trace_tail &&
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088
1089 if (!iter->next_page[cpu]) {
1090 /* Initialize the iterator for this cpu trace buffer */
1091 WARN_ON(!data->trace_tail);
1092 page = virt_to_page(data->trace_tail);
1093 iter->next_page[cpu] = &page->lru;
1094 iter->next_page_idx[cpu] = data->trace_tail_idx;
1095 }
1096
1097 page = list_entry(iter->next_page[cpu], struct page, lru);
1098 BUG_ON(&data->trace_pages == &page->lru);
1099
1100 array = page_address(page);
1101
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1103 return &array[iter->next_page_idx[cpu]];
1104}
1105
1106static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1108{
1109 struct trace_array *tr = iter->tr;
1110 struct trace_entry *ent, *next = NULL;
1111 int next_cpu = -1;
1112 int cpu;
1113
1114 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu]))
1116 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1118 /*
1119 * Pick the entry with the smallest timestamp:
1120 */
1121 if (ent && (!next || ent->t < next->t)) {
1122 next = ent;
1123 next_cpu = cpu;
1124 }
1125 }
1126
1127 if (ent_cpu)
1128 *ent_cpu = next_cpu;
1129
1130 return next;
1131}
1132
1133static void trace_iterator_increment(struct trace_iterator *iter)
1134{
1135 iter->idx++;
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146}
1147
1148static void trace_consume(struct trace_iterator *iter)
1149{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1151
1152 data->trace_tail_idx++;
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157
1158 /* Check if we empty it, then reset the index */
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162}
1163
1164static void *find_next_entry_inc(struct trace_iterator *iter)
1165{
1166 struct trace_entry *next;
1167 int next_cpu = -1;
1168
1169 next = find_next_entry(iter, &next_cpu);
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181}
1182
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1184{
1185 struct trace_iterator *iter = m->private;
1186 void *last_ent = iter->ent;
1187 int i = (int)*pos;
1188 void *ent;
1189
1190 (*pos)++;
1191
1192 /* can't go backwards */
1193 if (iter->idx > i)
1194 return NULL;
1195
1196 if (iter->idx < 0)
1197 ent = find_next_entry_inc(iter);
1198 else
1199 ent = iter;
1200
1201 while (ent && iter->idx < i)
1202 ent = find_next_entry_inc(iter);
1203
1204 iter->pos = *pos;
1205
1206 if (last_ent && !ent)
1207 seq_puts(m, "\n\nvim:ft=help\n");
1208
1209 return ent;
1210}
1211
1212static void *s_start(struct seq_file *m, loff_t *pos)
1213{
1214 struct trace_iterator *iter = m->private;
1215 void *p = NULL;
1216 loff_t l = 0;
1217 int i;
1218
1219 mutex_lock(&trace_types_lock);
1220
1221 if (!current_trace || current_trace != iter->trace) {
1222 mutex_unlock(&trace_types_lock);
1223 return NULL;
1224 }
1225
1226 atomic_inc(&trace_record_cmdline_disabled);
1227
1228 /* let the tracer grab locks here if needed */
1229 if (current_trace->start)
1230 current_trace->start(iter);
1231
1232 if (*pos != iter->pos) {
1233 iter->ent = NULL;
1234 iter->cpu = 0;
1235 iter->idx = -1;
1236 iter->prev_ent = NULL;
1237 iter->prev_cpu = -1;
1238
1239 for_each_tracing_cpu(i) {
1240 iter->next_idx[i] = 0;
1241 iter->next_page[i] = NULL;
1242 }
1243
1244 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1245 ;
1246
1247 } else {
1248 l = *pos - 1;
1249 p = s_next(m, p, &l);
1250 }
1251
1252 return p;
1253}
1254
1255static void s_stop(struct seq_file *m, void *p)
1256{
1257 struct trace_iterator *iter = m->private;
1258
1259 atomic_dec(&trace_record_cmdline_disabled);
1260
1261 /* let the tracer release locks here if needed */
1262 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1263 iter->trace->stop(iter);
1264
1265 mutex_unlock(&trace_types_lock);
1266}
1267
1268#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1269
1270#ifdef CONFIG_KRETPROBES
1271static inline int kretprobed(unsigned long addr)
1272{
1273 return addr == (unsigned long)kretprobe_trampoline;
1274}
1275#else
1276static inline int kretprobed(unsigned long addr)
1277{
1278 return 0;
1279}
1280#endif /* CONFIG_KRETPROBES */
1281
1282static int
1283seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1284{
1285#ifdef CONFIG_KALLSYMS
1286 char str[KSYM_SYMBOL_LEN];
1287
1288 kallsyms_lookup(address, NULL, NULL, NULL, str);
1289
1290 return trace_seq_printf(s, fmt, str);
1291#endif
1292 return 1;
1293}
1294
1295static int
1296seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1297 unsigned long address)
1298{
1299#ifdef CONFIG_KALLSYMS
1300 char str[KSYM_SYMBOL_LEN];
1301
1302 sprint_symbol(str, address);
1303 return trace_seq_printf(s, fmt, str);
1304#endif
1305 return 1;
1306}
1307
1308#ifndef CONFIG_64BIT
1309# define IP_FMT "%08lx"
1310#else
1311# define IP_FMT "%016lx"
1312#endif
1313
1314static int
1315seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1316{
1317 int ret;
1318
1319 if (!ip)
1320 return trace_seq_printf(s, "0");
1321
1322 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1323 ret = seq_print_sym_offset(s, "%s", ip);
1324 else
1325 ret = seq_print_sym_short(s, "%s", ip);
1326
1327 if (!ret)
1328 return 0;
1329
1330 if (sym_flags & TRACE_ITER_SYM_ADDR)
1331 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1332 return ret;
1333}
1334
1335static void print_lat_help_header(struct seq_file *m)
1336{
1337 seq_puts(m, "# _------=> CPU# \n");
1338 seq_puts(m, "# / _-----=> irqs-off \n");
1339 seq_puts(m, "# | / _----=> need-resched \n");
1340 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1341 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1342 seq_puts(m, "# |||| / \n");
1343 seq_puts(m, "# ||||| delay \n");
1344 seq_puts(m, "# cmd pid ||||| time | caller \n");
1345 seq_puts(m, "# \\ / ||||| \\ | / \n");
1346}
1347
1348static void print_func_help_header(struct seq_file *m)
1349{
1350 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1351 seq_puts(m, "# | | | | |\n");
1352}
1353
1354
1355static void
1356print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1357{
1358 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1359 struct trace_array *tr = iter->tr;
1360 struct trace_array_cpu *data = tr->data[tr->cpu];
1361 struct tracer *type = current_trace;
1362 unsigned long total = 0;
1363 unsigned long entries = 0;
1364 int cpu;
1365 const char *name = "preemption";
1366
1367 if (type)
1368 name = type->name;
1369
1370 for_each_tracing_cpu(cpu) {
1371 if (head_page(tr->data[cpu])) {
1372 total += tr->data[cpu]->trace_idx;
1373 if (tr->data[cpu]->trace_idx > tr->entries)
1374 entries += tr->entries;
1375 else
1376 entries += tr->data[cpu]->trace_idx;
1377 }
1378 }
1379
1380 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1381 name, UTS_RELEASE);
1382 seq_puts(m, "-----------------------------------"
1383 "---------------------------------\n");
1384 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1385 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1386 nsecs_to_usecs(data->saved_latency),
1387 entries,
1388 total,
1389 tr->cpu,
1390#if defined(CONFIG_PREEMPT_NONE)
1391 "server",
1392#elif defined(CONFIG_PREEMPT_VOLUNTARY)
1393 "desktop",
1394#elif defined(CONFIG_PREEMPT)
1395 "preempt",
1396#else
1397 "unknown",
1398#endif
1399 /* These are reserved for later use */
1400 0, 0, 0, 0);
1401#ifdef CONFIG_SMP
1402 seq_printf(m, " #P:%d)\n", num_online_cpus());
1403#else
1404 seq_puts(m, ")\n");
1405#endif
1406 seq_puts(m, " -----------------\n");
1407 seq_printf(m, " | task: %.16s-%d "
1408 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1409 data->comm, data->pid, data->uid, data->nice,
1410 data->policy, data->rt_priority);
1411 seq_puts(m, " -----------------\n");
1412
1413 if (data->critical_start) {
1414 seq_puts(m, " => started at: ");
1415 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1416 trace_print_seq(m, &iter->seq);
1417 seq_puts(m, "\n => ended at: ");
1418 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1419 trace_print_seq(m, &iter->seq);
1420 seq_puts(m, "\n");
1421 }
1422
1423 seq_puts(m, "\n");
1424}
1425
1426static void
1427lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1428{
1429 int hardirq, softirq;
1430 char *comm;
1431
1432 comm = trace_find_cmdline(entry->pid);
1433
1434 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1435 trace_seq_printf(s, "%d", cpu);
1436 trace_seq_printf(s, "%c%c",
1437 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1438 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1439
1440 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1441 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1442 if (hardirq && softirq) {
1443 trace_seq_putc(s, 'H');
1444 } else {
1445 if (hardirq) {
1446 trace_seq_putc(s, 'h');
1447 } else {
1448 if (softirq)
1449 trace_seq_putc(s, 's');
1450 else
1451 trace_seq_putc(s, '.');
1452 }
1453 }
1454
1455 if (entry->preempt_count)
1456 trace_seq_printf(s, "%x", entry->preempt_count);
1457 else
1458 trace_seq_puts(s, ".");
1459}
1460
1461unsigned long preempt_mark_thresh = 100;
1462
1463static void
1464lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1465 unsigned long rel_usecs)
1466{
1467 trace_seq_printf(s, " %4lldus", abs_usecs);
1468 if (rel_usecs > preempt_mark_thresh)
1469 trace_seq_puts(s, "!: ");
1470 else if (rel_usecs > 1)
1471 trace_seq_puts(s, "+: ");
1472 else
1473 trace_seq_puts(s, " : ");
1474}
1475
1476static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1477
1478static int
1479print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1480{
1481 struct trace_seq *s = &iter->seq;
1482 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1483 struct trace_entry *next_entry = find_next_entry(iter, NULL);
1484 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1485 struct trace_entry *entry = iter->ent;
1486 unsigned long abs_usecs;
1487 unsigned long rel_usecs;
1488 char *comm;
1489 int S, T;
1490 int i;
1491 unsigned state;
1492
1493 if (!next_entry)
1494 next_entry = entry;
1495 rel_usecs = ns2usecs(next_entry->t - entry->t);
1496 abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1497
1498 if (verbose) {
1499 comm = trace_find_cmdline(entry->pid);
1500 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1501 " %ld.%03ldms (+%ld.%03ldms): ",
1502 comm,
1503 entry->pid, cpu, entry->flags,
1504 entry->preempt_count, trace_idx,
1505 ns2usecs(entry->t),
1506 abs_usecs/1000,
1507 abs_usecs % 1000, rel_usecs/1000,
1508 rel_usecs % 1000);
1509 } else {
1510 lat_print_generic(s, entry, cpu);
1511 lat_print_timestamp(s, abs_usecs, rel_usecs);
1512 }
1513 switch (entry->type) {
1514 case TRACE_FN:
1515 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1516 trace_seq_puts(s, " (");
1517 if (kretprobed(entry->fn.parent_ip))
1518 trace_seq_puts(s, KRETPROBE_MSG);
1519 else
1520 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1521 trace_seq_puts(s, ")\n");
1522 break;
1523 case TRACE_CTX:
1524 case TRACE_WAKE:
1525 T = entry->ctx.next_state < sizeof(state_to_char) ?
1526 state_to_char[entry->ctx.next_state] : 'X';
1527
1528 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1529 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1530 comm = trace_find_cmdline(entry->ctx.next_pid);
1531 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1532 entry->ctx.prev_pid,
1533 entry->ctx.prev_prio,
1534 S, entry->type == TRACE_CTX ? "==>" : " +",
1535 entry->ctx.next_pid,
1536 entry->ctx.next_prio,
1537 T, comm);
1538 break;
1539 case TRACE_SPECIAL:
1540 trace_seq_printf(s, "# %ld %ld %ld\n",
1541 entry->special.arg1,
1542 entry->special.arg2,
1543 entry->special.arg3);
1544 break;
1545 case TRACE_STACK:
1546 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1547 if (i)
1548 trace_seq_puts(s, " <= ");
1549 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1550 }
1551 trace_seq_puts(s, "\n");
1552 break;
1553 default:
1554 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1555 }
1556 return 1;
1557}
1558
1559static int print_trace_fmt(struct trace_iterator *iter)
1560{
1561 struct trace_seq *s = &iter->seq;
1562 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1563 struct trace_entry *entry;
1564 unsigned long usec_rem;
1565 unsigned long long t;
1566 unsigned long secs;
1567 char *comm;
1568 int ret;
1569 int S, T;
1570 int i;
1571
1572 entry = iter->ent;
1573
1574 comm = trace_find_cmdline(iter->ent->pid);
1575
1576 t = ns2usecs(entry->t);
1577 usec_rem = do_div(t, 1000000ULL);
1578 secs = (unsigned long)t;
1579
1580 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1581 if (!ret)
1582 return 0;
1583 ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1584 if (!ret)
1585 return 0;
1586 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1587 if (!ret)
1588 return 0;
1589
1590 switch (entry->type) {
1591 case TRACE_FN:
1592 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1593 if (!ret)
1594 return 0;
1595 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1596 entry->fn.parent_ip) {
1597 ret = trace_seq_printf(s, " <-");
1598 if (!ret)
1599 return 0;
1600 if (kretprobed(entry->fn.parent_ip))
1601 ret = trace_seq_puts(s, KRETPROBE_MSG);
1602 else
1603 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1604 sym_flags);
1605 if (!ret)
1606 return 0;
1607 }
1608 ret = trace_seq_printf(s, "\n");
1609 if (!ret)
1610 return 0;
1611 break;
1612 case TRACE_CTX:
1613 case TRACE_WAKE:
1614 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1615 state_to_char[entry->ctx.prev_state] : 'X';
1616 T = entry->ctx.next_state < sizeof(state_to_char) ?
1617 state_to_char[entry->ctx.next_state] : 'X';
1618 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1619 entry->ctx.prev_pid,
1620 entry->ctx.prev_prio,
1621 S,
1622 entry->type == TRACE_CTX ? "==>" : " +",
1623 entry->ctx.next_pid,
1624 entry->ctx.next_prio,
1625 T);
1626 if (!ret)
1627 return 0;
1628 break;
1629 case TRACE_SPECIAL:
1630 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1631 entry->special.arg1,
1632 entry->special.arg2,
1633 entry->special.arg3);
1634 if (!ret)
1635 return 0;
1636 break;
1637 case TRACE_STACK:
1638 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1639 if (i) {
1640 ret = trace_seq_puts(s, " <= ");
1641 if (!ret)
1642 return 0;
1643 }
1644 ret = seq_print_ip_sym(s, entry->stack.caller[i],
1645 sym_flags);
1646 if (!ret)
1647 return 0;
1648 }
1649 ret = trace_seq_puts(s, "\n");
1650 if (!ret)
1651 return 0;
1652 break;
1653 }
1654 return 1;
1655}
1656
1657static int print_raw_fmt(struct trace_iterator *iter)
1658{
1659 struct trace_seq *s = &iter->seq;
1660 struct trace_entry *entry;
1661 int ret;
1662 int S, T;
1663
1664 entry = iter->ent;
1665
1666 ret = trace_seq_printf(s, "%d %d %llu ",
1667 entry->pid, iter->cpu, entry->t);
1668 if (!ret)
1669 return 0;
1670
1671 switch (entry->type) {
1672 case TRACE_FN:
1673 ret = trace_seq_printf(s, "%x %x\n",
1674 entry->fn.ip, entry->fn.parent_ip);
1675 if (!ret)
1676 return 0;
1677 break;
1678 case TRACE_CTX:
1679 case TRACE_WAKE:
1680 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1681 state_to_char[entry->ctx.prev_state] : 'X';
1682 T = entry->ctx.next_state < sizeof(state_to_char) ?
1683 state_to_char[entry->ctx.next_state] : 'X';
1684 if (entry->type == TRACE_WAKE)
1685 S = '+';
1686 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1687 entry->ctx.prev_pid,
1688 entry->ctx.prev_prio,
1689 S,
1690 entry->ctx.next_pid,
1691 entry->ctx.next_prio,
1692 T);
1693 if (!ret)
1694 return 0;
1695 break;
1696 case TRACE_SPECIAL:
1697 case TRACE_STACK:
1698 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1699 entry->special.arg1,
1700 entry->special.arg2,
1701 entry->special.arg3);
1702 if (!ret)
1703 return 0;
1704 break;
1705 }
1706 return 1;
1707}
1708
1709#define SEQ_PUT_FIELD_RET(s, x) \
1710do { \
1711 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
1712 return 0; \
1713} while (0)
1714
1715#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1716do { \
1717 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1718 return 0; \
1719} while (0)
1720
1721static int print_hex_fmt(struct trace_iterator *iter)
1722{
1723 struct trace_seq *s = &iter->seq;
1724 unsigned char newline = '\n';
1725 struct trace_entry *entry;
1726 int S, T;
1727
1728 entry = iter->ent;
1729
1730 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1731 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1733
1734 switch (entry->type) {
1735 case TRACE_FN:
1736 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1737 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1738 break;
1739 case TRACE_CTX:
1740 case TRACE_WAKE:
1741 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1742 state_to_char[entry->ctx.prev_state] : 'X';
1743 T = entry->ctx.next_state < sizeof(state_to_char) ?
1744 state_to_char[entry->ctx.next_state] : 'X';
1745 if (entry->type == TRACE_WAKE)
1746 S = '+';
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1749 SEQ_PUT_HEX_FIELD_RET(s, S);
1750 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1751 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1752 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1753 SEQ_PUT_HEX_FIELD_RET(s, T);
1754 break;
1755 case TRACE_SPECIAL:
1756 case TRACE_STACK:
1757 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1758 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1759 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1760 break;
1761 }
1762 SEQ_PUT_FIELD_RET(s, newline);
1763
1764 return 1;
1765}
1766
1767static int print_bin_fmt(struct trace_iterator *iter)
1768{
1769 struct trace_seq *s = &iter->seq;
1770 struct trace_entry *entry;
1771
1772 entry = iter->ent;
1773
1774 SEQ_PUT_FIELD_RET(s, entry->pid);
1775 SEQ_PUT_FIELD_RET(s, entry->cpu);
1776 SEQ_PUT_FIELD_RET(s, entry->t);
1777
1778 switch (entry->type) {
1779 case TRACE_FN:
1780 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1781 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1782 break;
1783 case TRACE_CTX:
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1786 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1787 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1788 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1789 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1790 break;
1791 case TRACE_SPECIAL:
1792 case TRACE_STACK:
1793 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1794 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1795 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1796 break;
1797 }
1798 return 1;
1799}
1800
1801static int trace_empty(struct trace_iterator *iter)
1802{
1803 struct trace_array_cpu *data;
1804 int cpu;
1805
1806 for_each_tracing_cpu(cpu) {
1807 data = iter->tr->data[cpu];
1808
1809 if (head_page(data) && data->trace_idx &&
1810 (data->trace_tail != data->trace_head ||
1811 data->trace_tail_idx != data->trace_head_idx))
1812 return 0;
1813 }
1814 return 1;
1815}
1816
1817static int print_trace_line(struct trace_iterator *iter)
1818{
1819 if (iter->trace && iter->trace->print_line)
1820 return iter->trace->print_line(iter);
1821
1822 if (trace_flags & TRACE_ITER_BIN)
1823 return print_bin_fmt(iter);
1824
1825 if (trace_flags & TRACE_ITER_HEX)
1826 return print_hex_fmt(iter);
1827
1828 if (trace_flags & TRACE_ITER_RAW)
1829 return print_raw_fmt(iter);
1830
1831 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1832 return print_lat_fmt(iter, iter->idx, iter->cpu);
1833
1834 return print_trace_fmt(iter);
1835}
1836
1837static int s_show(struct seq_file *m, void *v)
1838{
1839 struct trace_iterator *iter = v;
1840
1841 if (iter->ent == NULL) {
1842 if (iter->tr) {
1843 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1844 seq_puts(m, "#\n");
1845 }
1846 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1847 /* print nothing if the buffers are empty */
1848 if (trace_empty(iter))
1849 return 0;
1850 print_trace_header(m, iter);
1851 if (!(trace_flags & TRACE_ITER_VERBOSE))
1852 print_lat_help_header(m);
1853 } else {
1854 if (!(trace_flags & TRACE_ITER_VERBOSE))
1855 print_func_help_header(m);
1856 }
1857 } else {
1858 print_trace_line(iter);
1859 trace_print_seq(m, &iter->seq);
1860 }
1861
1862 return 0;
1863}
1864
1865static struct seq_operations tracer_seq_ops = {
1866 .start = s_start,
1867 .next = s_next,
1868 .stop = s_stop,
1869 .show = s_show,
1870};
1871
1872static struct trace_iterator *
1873__tracing_open(struct inode *inode, struct file *file, int *ret)
1874{
1875 struct trace_iterator *iter;
1876
1877 if (tracing_disabled) {
1878 *ret = -ENODEV;
1879 return NULL;
1880 }
1881
1882 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1883 if (!iter) {
1884 *ret = -ENOMEM;
1885 goto out;
1886 }
1887
1888 mutex_lock(&trace_types_lock);
1889 if (current_trace && current_trace->print_max)
1890 iter->tr = &max_tr;
1891 else
1892 iter->tr = inode->i_private;
1893 iter->trace = current_trace;
1894 iter->pos = -1;
1895
1896 /* TODO stop tracer */
1897 *ret = seq_open(file, &tracer_seq_ops);
1898 if (!*ret) {
1899 struct seq_file *m = file->private_data;
1900 m->private = iter;
1901
1902 /* stop the trace while dumping */
1903 if (iter->tr->ctrl) {
1904 tracer_enabled = 0;
1905 ftrace_function_enabled = 0;
1906 }
1907
1908 if (iter->trace && iter->trace->open)
1909 iter->trace->open(iter);
1910 } else {
1911 kfree(iter);
1912 iter = NULL;
1913 }
1914 mutex_unlock(&trace_types_lock);
1915
1916 out:
1917 return iter;
1918}
1919
1920int tracing_open_generic(struct inode *inode, struct file *filp)
1921{
1922 if (tracing_disabled)
1923 return -ENODEV;
1924
1925 filp->private_data = inode->i_private;
1926 return 0;
1927}
1928
1929int tracing_release(struct inode *inode, struct file *file)
1930{
1931 struct seq_file *m = (struct seq_file *)file->private_data;
1932 struct trace_iterator *iter = m->private;
1933
1934 mutex_lock(&trace_types_lock);
1935 if (iter->trace && iter->trace->close)
1936 iter->trace->close(iter);
1937
1938 /* reenable tracing if it was previously enabled */
1939 if (iter->tr->ctrl) {
1940 tracer_enabled = 1;
1941 /*
1942 * It is safe to enable function tracing even if it
1943 * isn't used
1944 */
1945 ftrace_function_enabled = 1;
1946 }
1947 mutex_unlock(&trace_types_lock);
1948
1949 seq_release(inode, file);
1950 kfree(iter);
1951 return 0;
1952}
1953
1954static int tracing_open(struct inode *inode, struct file *file)
1955{
1956 int ret;
1957
1958 __tracing_open(inode, file, &ret);
1959
1960 return ret;
1961}
1962
1963static int tracing_lt_open(struct inode *inode, struct file *file)
1964{
1965 struct trace_iterator *iter;
1966 int ret;
1967
1968 iter = __tracing_open(inode, file, &ret);
1969
1970 if (!ret)
1971 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1972
1973 return ret;
1974}
1975
1976
1977static void *
1978t_next(struct seq_file *m, void *v, loff_t *pos)
1979{
1980 struct tracer *t = m->private;
1981
1982 (*pos)++;
1983
1984 if (t)
1985 t = t->next;
1986
1987 m->private = t;
1988
1989 return t;
1990}
1991
1992static void *t_start(struct seq_file *m, loff_t *pos)
1993{
1994 struct tracer *t = m->private;
1995 loff_t l = 0;
1996
1997 mutex_lock(&trace_types_lock);
1998 for (; t && l < *pos; t = t_next(m, t, &l))
1999 ;
2000
2001 return t;
2002}
2003
2004static void t_stop(struct seq_file *m, void *p)
2005{
2006 mutex_unlock(&trace_types_lock);
2007}
2008
2009static int t_show(struct seq_file *m, void *v)
2010{
2011 struct tracer *t = v;
2012
2013 if (!t)
2014 return 0;
2015
2016 seq_printf(m, "%s", t->name);
2017 if (t->next)
2018 seq_putc(m, ' ');
2019 else
2020 seq_putc(m, '\n');
2021
2022 return 0;
2023}
2024
2025static struct seq_operations show_traces_seq_ops = {
2026 .start = t_start,
2027 .next = t_next,
2028 .stop = t_stop,
2029 .show = t_show,
2030};
2031
2032static int show_traces_open(struct inode *inode, struct file *file)
2033{
2034 int ret;
2035
2036 if (tracing_disabled)
2037 return -ENODEV;
2038
2039 ret = seq_open(file, &show_traces_seq_ops);
2040 if (!ret) {
2041 struct seq_file *m = file->private_data;
2042 m->private = trace_types;
2043 }
2044
2045 return ret;
2046}
2047
2048static struct file_operations tracing_fops = {
2049 .open = tracing_open,
2050 .read = seq_read,
2051 .llseek = seq_lseek,
2052 .release = tracing_release,
2053};
2054
2055static struct file_operations tracing_lt_fops = {
2056 .open = tracing_lt_open,
2057 .read = seq_read,
2058 .llseek = seq_lseek,
2059 .release = tracing_release,
2060};
2061
2062static struct file_operations show_traces_fops = {
2063 .open = show_traces_open,
2064 .read = seq_read,
2065 .release = seq_release,
2066};
2067
2068/*
2069 * Only trace on a CPU if the bitmask is set:
2070 */
2071static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2072
2073/*
2074 * When tracing/tracing_cpu_mask is modified then this holds
2075 * the new bitmask we are about to install:
2076 */
2077static cpumask_t tracing_cpumask_new;
2078
2079/*
2080 * The tracer itself will not take this lock, but still we want
2081 * to provide a consistent cpumask to user-space:
2082 */
2083static DEFINE_MUTEX(tracing_cpumask_update_lock);
2084
2085/*
2086 * Temporary storage for the character representation of the
2087 * CPU bitmask (and one more byte for the newline):
2088 */
2089static char mask_str[NR_CPUS + 1];
2090
2091static ssize_t
2092tracing_cpumask_read(struct file *filp, char __user *ubuf,
2093 size_t count, loff_t *ppos)
2094{
2095 int len;
2096
2097 mutex_lock(&tracing_cpumask_update_lock);
2098
2099 len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2100 if (count - len < 2) {
2101 count = -EINVAL;
2102 goto out_err;
2103 }
2104 len += sprintf(mask_str + len, "\n");
2105 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2106
2107out_err:
2108 mutex_unlock(&tracing_cpumask_update_lock);
2109
2110 return count;
2111}
2112
2113static ssize_t
2114tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2115 size_t count, loff_t *ppos)
2116{
2117 int err, cpu;
2118
2119 mutex_lock(&tracing_cpumask_update_lock);
2120 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2121 if (err)
2122 goto err_unlock;
2123
2124 raw_local_irq_disable();
2125 __raw_spin_lock(&ftrace_max_lock);
2126 for_each_tracing_cpu(cpu) {
2127 /*
2128 * Increase/decrease the disabled counter if we are
2129 * about to flip a bit in the cpumask:
2130 */
2131 if (cpu_isset(cpu, tracing_cpumask) &&
2132 !cpu_isset(cpu, tracing_cpumask_new)) {
2133 atomic_inc(&global_trace.data[cpu]->disabled);
2134 }
2135 if (!cpu_isset(cpu, tracing_cpumask) &&
2136 cpu_isset(cpu, tracing_cpumask_new)) {
2137 atomic_dec(&global_trace.data[cpu]->disabled);
2138 }
2139 }
2140 __raw_spin_unlock(&ftrace_max_lock);
2141 raw_local_irq_enable();
2142
2143 tracing_cpumask = tracing_cpumask_new;
2144
2145 mutex_unlock(&tracing_cpumask_update_lock);
2146
2147 return count;
2148
2149err_unlock:
2150 mutex_unlock(&tracing_cpumask_update_lock);
2151
2152 return err;
2153}
2154
2155static struct file_operations tracing_cpumask_fops = {
2156 .open = tracing_open_generic,
2157 .read = tracing_cpumask_read,
2158 .write = tracing_cpumask_write,
2159};
2160
2161static ssize_t
2162tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2163 size_t cnt, loff_t *ppos)
2164{
2165 char *buf;
2166 int r = 0;
2167 int len = 0;
2168 int i;
2169
2170 /* calulate max size */
2171 for (i = 0; trace_options[i]; i++) {
2172 len += strlen(trace_options[i]);
2173 len += 3; /* "no" and space */
2174 }
2175
2176 /* +2 for \n and \0 */
2177 buf = kmalloc(len + 2, GFP_KERNEL);
2178 if (!buf)
2179 return -ENOMEM;
2180
2181 for (i = 0; trace_options[i]; i++) {
2182 if (trace_flags & (1 << i))
2183 r += sprintf(buf + r, "%s ", trace_options[i]);
2184 else
2185 r += sprintf(buf + r, "no%s ", trace_options[i]);
2186 }
2187
2188 r += sprintf(buf + r, "\n");
2189 WARN_ON(r >= len + 2);
2190
2191 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2192
2193 kfree(buf);
2194
2195 return r;
2196}
2197
2198static ssize_t
2199tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2200 size_t cnt, loff_t *ppos)
2201{
2202 char buf[64];
2203 char *cmp = buf;
2204 int neg = 0;
2205 int i;
2206
2207 if (cnt >= sizeof(buf))
2208 return -EINVAL;
2209
2210 if (copy_from_user(&buf, ubuf, cnt))
2211 return -EFAULT;
2212
2213 buf[cnt] = 0;
2214
2215 if (strncmp(buf, "no", 2) == 0) {
2216 neg = 1;
2217 cmp += 2;
2218 }
2219
2220 for (i = 0; trace_options[i]; i++) {
2221 int len = strlen(trace_options[i]);
2222
2223 if (strncmp(cmp, trace_options[i], len) == 0) {
2224 if (neg)
2225 trace_flags &= ~(1 << i);
2226 else
2227 trace_flags |= (1 << i);
2228 break;
2229 }
2230 }
2231 /*
2232 * If no option could be set, return an error:
2233 */
2234 if (!trace_options[i])
2235 return -EINVAL;
2236
2237 filp->f_pos += cnt;
2238
2239 return cnt;
2240}
2241
2242static struct file_operations tracing_iter_fops = {
2243 .open = tracing_open_generic,
2244 .read = tracing_iter_ctrl_read,
2245 .write = tracing_iter_ctrl_write,
2246};
2247
2248static const char readme_msg[] =
2249 "tracing mini-HOWTO:\n\n"
2250 "# mkdir /debug\n"
2251 "# mount -t debugfs nodev /debug\n\n"
2252 "# cat /debug/tracing/available_tracers\n"
2253 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2254 "# cat /debug/tracing/current_tracer\n"
2255 "none\n"
2256 "# echo sched_switch > /debug/tracing/current_tracer\n"
2257 "# cat /debug/tracing/current_tracer\n"
2258 "sched_switch\n"
2259 "# cat /debug/tracing/iter_ctrl\n"
2260 "noprint-parent nosym-offset nosym-addr noverbose\n"
2261 "# echo print-parent > /debug/tracing/iter_ctrl\n"
2262 "# echo 1 > /debug/tracing/tracing_enabled\n"
2263 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2264 "echo 0 > /debug/tracing/tracing_enabled\n"
2265;
2266
2267static ssize_t
2268tracing_readme_read(struct file *filp, char __user *ubuf,
2269 size_t cnt, loff_t *ppos)
2270{
2271 return simple_read_from_buffer(ubuf, cnt, ppos,
2272 readme_msg, strlen(readme_msg));
2273}
2274
2275static struct file_operations tracing_readme_fops = {
2276 .open = tracing_open_generic,
2277 .read = tracing_readme_read,
2278};
2279
2280static ssize_t
2281tracing_ctrl_read(struct file *filp, char __user *ubuf,
2282 size_t cnt, loff_t *ppos)
2283{
2284 struct trace_array *tr = filp->private_data;
2285 char buf[64];
2286 int r;
2287
2288 r = sprintf(buf, "%ld\n", tr->ctrl);
2289 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2290}
2291
2292static ssize_t
2293tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2294 size_t cnt, loff_t *ppos)
2295{
2296 struct trace_array *tr = filp->private_data;
2297 char buf[64];
2298 long val;
2299 int ret;
2300
2301 if (cnt >= sizeof(buf))
2302 return -EINVAL;
2303
2304 if (copy_from_user(&buf, ubuf, cnt))
2305 return -EFAULT;
2306
2307 buf[cnt] = 0;
2308
2309 ret = strict_strtoul(buf, 10, &val);
2310 if (ret < 0)
2311 return ret;
2312
2313 val = !!val;
2314
2315 mutex_lock(&trace_types_lock);
2316 if (tr->ctrl ^ val) {
2317 if (val)
2318 tracer_enabled = 1;
2319 else
2320 tracer_enabled = 0;
2321
2322 tr->ctrl = val;
2323
2324 if (current_trace && current_trace->ctrl_update)
2325 current_trace->ctrl_update(tr);
2326 }
2327 mutex_unlock(&trace_types_lock);
2328
2329 filp->f_pos += cnt;
2330
2331 return cnt;
2332}
2333
2334static ssize_t
2335tracing_set_trace_read(struct file *filp, char __user *ubuf,
2336 size_t cnt, loff_t *ppos)
2337{
2338 char buf[max_tracer_type_len+2];
2339 int r;
2340
2341 mutex_lock(&trace_types_lock);
2342 if (current_trace)
2343 r = sprintf(buf, "%s\n", current_trace->name);
2344 else
2345 r = sprintf(buf, "\n");
2346 mutex_unlock(&trace_types_lock);
2347
2348 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2349}
2350
2351static ssize_t
2352tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2353 size_t cnt, loff_t *ppos)
2354{
2355 struct trace_array *tr = &global_trace;
2356 struct tracer *t;
2357 char buf[max_tracer_type_len+1];
2358 int i;
2359
2360 if (cnt > max_tracer_type_len)
2361 cnt = max_tracer_type_len;
2362
2363 if (copy_from_user(&buf, ubuf, cnt))
2364 return -EFAULT;
2365
2366 buf[cnt] = 0;
2367
2368 /* strip ending whitespace. */
2369 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2370 buf[i] = 0;
2371
2372 mutex_lock(&trace_types_lock);
2373 for (t = trace_types; t; t = t->next) {
2374 if (strcmp(t->name, buf) == 0)
2375 break;
2376 }
2377 if (!t || t == current_trace)
2378 goto out;
2379
2380 if (current_trace && current_trace->reset)
2381 current_trace->reset(tr);
2382
2383 current_trace = t;
2384 if (t->init)
2385 t->init(tr);
2386
2387 out:
2388 mutex_unlock(&trace_types_lock);
2389
2390 filp->f_pos += cnt;
2391
2392 return cnt;
2393}
2394
2395static ssize_t
2396tracing_max_lat_read(struct file *filp, char __user *ubuf,
2397 size_t cnt, loff_t *ppos)
2398{
2399 unsigned long *ptr = filp->private_data;
2400 char buf[64];
2401 int r;
2402
2403 r = snprintf(buf, sizeof(buf), "%ld\n",
2404 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2405 if (r > sizeof(buf))
2406 r = sizeof(buf);
2407 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2408}
2409
2410static ssize_t
2411tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2412 size_t cnt, loff_t *ppos)
2413{
2414 long *ptr = filp->private_data;
2415 char buf[64];
2416 long val;
2417 int ret;
2418
2419 if (cnt >= sizeof(buf))
2420 return -EINVAL;
2421
2422 if (copy_from_user(&buf, ubuf, cnt))
2423 return -EFAULT;
2424
2425 buf[cnt] = 0;
2426
2427 ret = strict_strtoul(buf, 10, &val);
2428 if (ret < 0)
2429 return ret;
2430
2431 *ptr = val * 1000;
2432
2433 return cnt;
2434}
2435
2436static atomic_t tracing_reader;
2437
2438static int tracing_open_pipe(struct inode *inode, struct file *filp)
2439{
2440 struct trace_iterator *iter;
2441
2442 if (tracing_disabled)
2443 return -ENODEV;
2444
2445 /* We only allow for reader of the pipe */
2446 if (atomic_inc_return(&tracing_reader) != 1) {
2447 atomic_dec(&tracing_reader);
2448 return -EBUSY;
2449 }
2450
2451 /* create a buffer to store the information to pass to userspace */
2452 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2453 if (!iter)
2454 return -ENOMEM;
2455
2456 mutex_lock(&trace_types_lock);
2457 iter->tr = &global_trace;
2458 iter->trace = current_trace;
2459 filp->private_data = iter;
2460
2461 if (iter->trace->pipe_open)
2462 iter->trace->pipe_open(iter);
2463 mutex_unlock(&trace_types_lock);
2464
2465 return 0;
2466}
2467
2468static int tracing_release_pipe(struct inode *inode, struct file *file)
2469{
2470 struct trace_iterator *iter = file->private_data;
2471
2472 kfree(iter);
2473 atomic_dec(&tracing_reader);
2474
2475 return 0;
2476}
2477
2478static unsigned int
2479tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2480{
2481 struct trace_iterator *iter = filp->private_data;
2482
2483 if (trace_flags & TRACE_ITER_BLOCK) {
2484 /*
2485 * Always select as readable when in blocking mode
2486 */
2487 return POLLIN | POLLRDNORM;
2488 } else {
2489 if (!trace_empty(iter))
2490 return POLLIN | POLLRDNORM;
2491 poll_wait(filp, &trace_wait, poll_table);
2492 if (!trace_empty(iter))
2493 return POLLIN | POLLRDNORM;
2494
2495 return 0;
2496 }
2497}
2498
2499/*
2500 * Consumer reader.
2501 */
2502static ssize_t
2503tracing_read_pipe(struct file *filp, char __user *ubuf,
2504 size_t cnt, loff_t *ppos)
2505{
2506 struct trace_iterator *iter = filp->private_data;
2507 struct trace_array_cpu *data;
2508 static cpumask_t mask;
2509 unsigned long flags;
2510#ifdef CONFIG_FTRACE
2511 int ftrace_save;
2512#endif
2513 int cpu;
2514 ssize_t sret;
2515
2516 /* return any leftover data */
2517 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2518 if (sret != -EBUSY)
2519 return sret;
2520 sret = 0;
2521
2522 trace_seq_reset(&iter->seq);
2523
2524 mutex_lock(&trace_types_lock);
2525 if (iter->trace->read) {
2526 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2527 if (sret)
2528 goto out;
2529 }
2530
2531 while (trace_empty(iter)) {
2532
2533 if ((filp->f_flags & O_NONBLOCK)) {
2534 sret = -EAGAIN;
2535 goto out;
2536 }
2537
2538 /*
2539 * This is a make-shift waitqueue. The reason we don't use
2540 * an actual wait queue is because:
2541 * 1) we only ever have one waiter
2542 * 2) the tracing, traces all functions, we don't want
2543 * the overhead of calling wake_up and friends
2544 * (and tracing them too)
2545 * Anyway, this is really very primitive wakeup.
2546 */
2547 set_current_state(TASK_INTERRUPTIBLE);
2548 iter->tr->waiter = current;
2549
2550 mutex_unlock(&trace_types_lock);
2551
2552 /* sleep for 100 msecs, and try again. */
2553 schedule_timeout(HZ/10);
2554
2555 mutex_lock(&trace_types_lock);
2556
2557 iter->tr->waiter = NULL;
2558
2559 if (signal_pending(current)) {
2560 sret = -EINTR;
2561 goto out;
2562 }
2563
2564 if (iter->trace != current_trace)
2565 goto out;
2566
2567 /*
2568 * We block until we read something and tracing is disabled.
2569 * We still block if tracing is disabled, but we have never
2570 * read anything. This allows a user to cat this file, and
2571 * then enable tracing. But after we have read something,
2572 * we give an EOF when tracing is again disabled.
2573 *
2574 * iter->pos will be 0 if we haven't read anything.
2575 */
2576 if (!tracer_enabled && iter->pos)
2577 break;
2578
2579 continue;
2580 }
2581
2582 /* stop when tracing is finished */
2583 if (trace_empty(iter))
2584 goto out;
2585
2586 if (cnt >= PAGE_SIZE)
2587 cnt = PAGE_SIZE - 1;
2588
2589 /* reset all but tr, trace, and overruns */
2590 memset(&iter->seq, 0,
2591 sizeof(struct trace_iterator) -
2592 offsetof(struct trace_iterator, seq));
2593 iter->pos = -1;
2594
2595 /*
2596 * We need to stop all tracing on all CPUS to read the
2597 * the next buffer. This is a bit expensive, but is
2598 * not done often. We fill all what we can read,
2599 * and then release the locks again.
2600 */
2601
2602 cpus_clear(mask);
2603 local_irq_save(flags);
2604#ifdef CONFIG_FTRACE
2605 ftrace_save = ftrace_enabled;
2606 ftrace_enabled = 0;
2607#endif
2608 smp_wmb();
2609 for_each_tracing_cpu(cpu) {
2610 data = iter->tr->data[cpu];
2611
2612 if (!head_page(data) || !data->trace_idx)
2613 continue;
2614
2615 atomic_inc(&data->disabled);
2616 cpu_set(cpu, mask);
2617 }
2618
2619 for_each_cpu_mask(cpu, mask) {
2620 data = iter->tr->data[cpu];
2621 __raw_spin_lock(&data->lock);
2622
2623 if (data->overrun > iter->last_overrun[cpu])
2624 iter->overrun[cpu] +=
2625 data->overrun - iter->last_overrun[cpu];
2626 iter->last_overrun[cpu] = data->overrun;
2627 }
2628
2629 while (find_next_entry_inc(iter) != NULL) {
2630 int ret;
2631 int len = iter->seq.len;
2632
2633 ret = print_trace_line(iter);
2634 if (!ret) {
2635 /* don't print partial lines */
2636 iter->seq.len = len;
2637 break;
2638 }
2639
2640 trace_consume(iter);
2641
2642 if (iter->seq.len >= cnt)
2643 break;
2644 }
2645
2646 for_each_cpu_mask(cpu, mask) {
2647 data = iter->tr->data[cpu];
2648 __raw_spin_unlock(&data->lock);
2649 }
2650
2651 for_each_cpu_mask(cpu, mask) {
2652 data = iter->tr->data[cpu];
2653 atomic_dec(&data->disabled);
2654 }
2655#ifdef CONFIG_FTRACE
2656 ftrace_enabled = ftrace_save;
2657#endif
2658 local_irq_restore(flags);
2659
2660 /* Now copy what we have to the user */
2661 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2662 if (iter->seq.readpos >= iter->seq.len)
2663 trace_seq_reset(&iter->seq);
2664 if (sret == -EBUSY)
2665 sret = 0;
2666
2667out:
2668 mutex_unlock(&trace_types_lock);
2669
2670 return sret;
2671}
2672
2673static ssize_t
2674tracing_entries_read(struct file *filp, char __user *ubuf,
2675 size_t cnt, loff_t *ppos)
2676{
2677 struct trace_array *tr = filp->private_data;
2678 char buf[64];
2679 int r;
2680
2681 r = sprintf(buf, "%lu\n", tr->entries);
2682 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2683}
2684
2685static ssize_t
2686tracing_entries_write(struct file *filp, const char __user *ubuf,
2687 size_t cnt, loff_t *ppos)
2688{
2689 unsigned long val;
2690 char buf[64];
2691 int i, ret;
2692
2693 if (cnt >= sizeof(buf))
2694 return -EINVAL;
2695
2696 if (copy_from_user(&buf, ubuf, cnt))
2697 return -EFAULT;
2698
2699 buf[cnt] = 0;
2700
2701 ret = strict_strtoul(buf, 10, &val);
2702 if (ret < 0)
2703 return ret;
2704
2705 /* must have at least 1 entry */
2706 if (!val)
2707 return -EINVAL;
2708
2709 mutex_lock(&trace_types_lock);
2710
2711 if (current_trace != &no_tracer) {
2712 cnt = -EBUSY;
2713 pr_info("ftrace: set current_tracer to none"
2714 " before modifying buffer size\n");
2715 goto out;
2716 }
2717
2718 if (val > global_trace.entries) {
2719 long pages_requested;
2720 unsigned long freeable_pages;
2721
2722 /* make sure we have enough memory before mapping */
2723 pages_requested =
2724 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2725
2726 /* account for each buffer (and max_tr) */
2727 pages_requested *= tracing_nr_buffers * 2;
2728
2729 /* Check for overflow */
2730 if (pages_requested < 0) {
2731 cnt = -ENOMEM;
2732 goto out;
2733 }
2734
2735 freeable_pages = determine_dirtyable_memory();
2736
2737 /* we only allow to request 1/4 of useable memory */
2738 if (pages_requested >
2739 ((freeable_pages + tracing_pages_allocated) / 4)) {
2740 cnt = -ENOMEM;
2741 goto out;
2742 }
2743
2744 while (global_trace.entries < val) {
2745 if (trace_alloc_page()) {
2746 cnt = -ENOMEM;
2747 goto out;
2748 }
2749 /* double check that we don't go over the known pages */
2750 if (tracing_pages_allocated > pages_requested)
2751 break;
2752 }
2753
2754 } else {
2755 /* include the number of entries in val (inc of page entries) */
2756 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2757 trace_free_page();
2758 }
2759
2760 /* check integrity */
2761 for_each_tracing_cpu(i)
2762 check_pages(global_trace.data[i]);
2763
2764 filp->f_pos += cnt;
2765
2766 /* If check pages failed, return ENOMEM */
2767 if (tracing_disabled)
2768 cnt = -ENOMEM;
2769 out:
2770 max_tr.entries = global_trace.entries;
2771 mutex_unlock(&trace_types_lock);
2772
2773 return cnt;
2774}
2775
2776static struct file_operations tracing_max_lat_fops = {
2777 .open = tracing_open_generic,
2778 .read = tracing_max_lat_read,
2779 .write = tracing_max_lat_write,
2780};
2781
2782static struct file_operations tracing_ctrl_fops = {
2783 .open = tracing_open_generic,
2784 .read = tracing_ctrl_read,
2785 .write = tracing_ctrl_write,
2786};
2787
2788static struct file_operations set_tracer_fops = {
2789 .open = tracing_open_generic,
2790 .read = tracing_set_trace_read,
2791 .write = tracing_set_trace_write,
2792};
2793
2794static struct file_operations tracing_pipe_fops = {
2795 .open = tracing_open_pipe,
2796 .poll = tracing_poll_pipe,
2797 .read = tracing_read_pipe,
2798 .release = tracing_release_pipe,
2799};
2800
2801static struct file_operations tracing_entries_fops = {
2802 .open = tracing_open_generic,
2803 .read = tracing_entries_read,
2804 .write = tracing_entries_write,
2805};
2806
2807#ifdef CONFIG_DYNAMIC_FTRACE
2808
2809static ssize_t
2810tracing_read_long(struct file *filp, char __user *ubuf,
2811 size_t cnt, loff_t *ppos)
2812{
2813 unsigned long *p = filp->private_data;
2814 char buf[64];
2815 int r;
2816
2817 r = sprintf(buf, "%ld\n", *p);
2818
2819 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2820}
2821
2822static struct file_operations tracing_read_long_fops = {
2823 .open = tracing_open_generic,
2824 .read = tracing_read_long,
2825};
2826#endif
2827
2828static struct dentry *d_tracer;
2829
2830struct dentry *tracing_init_dentry(void)
2831{
2832 static int once;
2833
2834 if (d_tracer)
2835 return d_tracer;
2836
2837 d_tracer = debugfs_create_dir("tracing", NULL);
2838
2839 if (!d_tracer && !once) {
2840 once = 1;
2841 pr_warning("Could not create debugfs directory 'tracing'\n");
2842 return NULL;
2843 }
2844
2845 return d_tracer;
2846}
2847
2848#ifdef CONFIG_FTRACE_SELFTEST
2849/* Let selftest have access to static functions in this file */
2850#include "trace_selftest.c"
2851#endif
2852
2853static __init void tracer_init_debugfs(void)
2854{
2855 struct dentry *d_tracer;
2856 struct dentry *entry;
2857
2858 d_tracer = tracing_init_dentry();
2859
2860 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2861 &global_trace, &tracing_ctrl_fops);
2862 if (!entry)
2863 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2864
2865 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2866 NULL, &tracing_iter_fops);
2867 if (!entry)
2868 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2869
2870 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2871 NULL, &tracing_cpumask_fops);
2872 if (!entry)
2873 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2874
2875 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2876 &global_trace, &tracing_lt_fops);
2877 if (!entry)
2878 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2879
2880 entry = debugfs_create_file("trace", 0444, d_tracer,
2881 &global_trace, &tracing_fops);
2882 if (!entry)
2883 pr_warning("Could not create debugfs 'trace' entry\n");
2884
2885 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2886 &global_trace, &show_traces_fops);
2887 if (!entry)
2888 pr_warning("Could not create debugfs 'trace' entry\n");
2889
2890 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2891 &global_trace, &set_tracer_fops);
2892 if (!entry)
2893 pr_warning("Could not create debugfs 'trace' entry\n");
2894
2895 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2896 &tracing_max_latency,
2897 &tracing_max_lat_fops);
2898 if (!entry)
2899 pr_warning("Could not create debugfs "
2900 "'tracing_max_latency' entry\n");
2901
2902 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2903 &tracing_thresh, &tracing_max_lat_fops);
2904 if (!entry)
2905 pr_warning("Could not create debugfs "
2906 "'tracing_threash' entry\n");
2907 entry = debugfs_create_file("README", 0644, d_tracer,
2908 NULL, &tracing_readme_fops);
2909 if (!entry)
2910 pr_warning("Could not create debugfs 'README' entry\n");
2911
2912 entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2913 NULL, &tracing_pipe_fops);
2914 if (!entry)
2915 pr_warning("Could not create debugfs "
2916 "'tracing_threash' entry\n");
2917
2918 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2919 &global_trace, &tracing_entries_fops);
2920 if (!entry)
2921 pr_warning("Could not create debugfs "
2922 "'tracing_threash' entry\n");
2923
2924#ifdef CONFIG_DYNAMIC_FTRACE
2925 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2926 &ftrace_update_tot_cnt,
2927 &tracing_read_long_fops);
2928 if (!entry)
2929 pr_warning("Could not create debugfs "
2930 "'dyn_ftrace_total_info' entry\n");
2931#endif
2932#ifdef CONFIG_SYSPROF_TRACER
2933 init_tracer_sysprof_debugfs(d_tracer);
2934#endif
2935}
2936
2937static int trace_alloc_page(void)
2938{
2939 struct trace_array_cpu *data;
2940 struct page *page, *tmp;
2941 LIST_HEAD(pages);
2942 void *array;
2943 unsigned pages_allocated = 0;
2944 int i;
2945
2946 /* first allocate a page for each CPU */
2947 for_each_tracing_cpu(i) {
2948 array = (void *)__get_free_page(GFP_KERNEL);
2949 if (array == NULL) {
2950 printk(KERN_ERR "tracer: failed to allocate page"
2951 "for trace buffer!\n");
2952 goto free_pages;
2953 }
2954
2955 pages_allocated++;
2956 page = virt_to_page(array);
2957 list_add(&page->lru, &pages);
2958
2959/* Only allocate if we are actually using the max trace */
2960#ifdef CONFIG_TRACER_MAX_TRACE
2961 array = (void *)__get_free_page(GFP_KERNEL);
2962 if (array == NULL) {
2963 printk(KERN_ERR "tracer: failed to allocate page"
2964 "for trace buffer!\n");
2965 goto free_pages;
2966 }
2967 pages_allocated++;
2968 page = virt_to_page(array);
2969 list_add(&page->lru, &pages);
2970#endif
2971 }
2972
2973 /* Now that we successfully allocate a page per CPU, add them */
2974 for_each_tracing_cpu(i) {
2975 data = global_trace.data[i];
2976 page = list_entry(pages.next, struct page, lru);
2977 list_del_init(&page->lru);
2978 list_add_tail(&page->lru, &data->trace_pages);
2979 ClearPageLRU(page);
2980
2981#ifdef CONFIG_TRACER_MAX_TRACE
2982 data = max_tr.data[i];
2983 page = list_entry(pages.next, struct page, lru);
2984 list_del_init(&page->lru);
2985 list_add_tail(&page->lru, &data->trace_pages);
2986 SetPageLRU(page);
2987#endif
2988 }
2989 tracing_pages_allocated += pages_allocated;
2990 global_trace.entries += ENTRIES_PER_PAGE;
2991
2992 return 0;
2993
2994 free_pages:
2995 list_for_each_entry_safe(page, tmp, &pages, lru) {
2996 list_del_init(&page->lru);
2997 __free_page(page);
2998 }
2999 return -ENOMEM;
3000}
3001
3002static int trace_free_page(void)
3003{
3004 struct trace_array_cpu *data;
3005 struct page *page;
3006 struct list_head *p;
3007 int i;
3008 int ret = 0;
3009
3010 /* free one page from each buffer */
3011 for_each_tracing_cpu(i) {
3012 data = global_trace.data[i];
3013 p = data->trace_pages.next;
3014 if (p == &data->trace_pages) {
3015 /* should never happen */
3016 WARN_ON(1);
3017 tracing_disabled = 1;
3018 ret = -1;
3019 break;
3020 }
3021 page = list_entry(p, struct page, lru);
3022 ClearPageLRU(page);
3023 list_del(&page->lru);
3024 tracing_pages_allocated--;
3025 tracing_pages_allocated--;
3026 __free_page(page);
3027
3028 tracing_reset(data);
3029
3030#ifdef CONFIG_TRACER_MAX_TRACE
3031 data = max_tr.data[i];
3032 p = data->trace_pages.next;
3033 if (p == &data->trace_pages) {
3034 /* should never happen */
3035 WARN_ON(1);
3036 tracing_disabled = 1;
3037 ret = -1;
3038 break;
3039 }
3040 page = list_entry(p, struct page, lru);
3041 ClearPageLRU(page);
3042 list_del(&page->lru);
3043 __free_page(page);
3044
3045 tracing_reset(data);
3046#endif
3047 }
3048 global_trace.entries -= ENTRIES_PER_PAGE;
3049
3050 return ret;
3051}
3052
3053__init static int tracer_alloc_buffers(void)
3054{
3055 struct trace_array_cpu *data;
3056 void *array;
3057 struct page *page;
3058 int pages = 0;
3059 int ret = -ENOMEM;
3060 int i;
3061
3062 /* TODO: make the number of buffers hot pluggable with CPUS */
3063 tracing_nr_buffers = num_possible_cpus();
3064 tracing_buffer_mask = cpu_possible_map;
3065
3066 /* Allocate the first page for all buffers */
3067 for_each_tracing_cpu(i) {
3068 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3069 max_tr.data[i] = &per_cpu(max_data, i);
3070
3071 array = (void *)__get_free_page(GFP_KERNEL);
3072 if (array == NULL) {
3073 printk(KERN_ERR "tracer: failed to allocate page"
3074 "for trace buffer!\n");
3075 goto free_buffers;
3076 }
3077
3078 /* set the array to the list */
3079 INIT_LIST_HEAD(&data->trace_pages);
3080 page = virt_to_page(array);
3081 list_add(&page->lru, &data->trace_pages);
3082 /* use the LRU flag to differentiate the two buffers */
3083 ClearPageLRU(page);
3084
3085 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3086 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3087
3088/* Only allocate if we are actually using the max trace */
3089#ifdef CONFIG_TRACER_MAX_TRACE
3090 array = (void *)__get_free_page(GFP_KERNEL);
3091 if (array == NULL) {
3092 printk(KERN_ERR "tracer: failed to allocate page"
3093 "for trace buffer!\n");
3094 goto free_buffers;
3095 }
3096
3097 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3098 page = virt_to_page(array);
3099 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3100 SetPageLRU(page);
3101#endif
3102 }
3103
3104 /*
3105 * Since we allocate by orders of pages, we may be able to
3106 * round up a bit.
3107 */
3108 global_trace.entries = ENTRIES_PER_PAGE;
3109 pages++;
3110
3111 while (global_trace.entries < trace_nr_entries) {
3112 if (trace_alloc_page())
3113 break;
3114 pages++;
3115 }
3116 max_tr.entries = global_trace.entries;
3117
3118 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3119 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3120 pr_info(" actual entries %ld\n", global_trace.entries);
3121
3122 tracer_init_debugfs();
3123
3124 trace_init_cmdlines();
3125
3126 register_tracer(&no_tracer);
3127 current_trace = &no_tracer;
3128
3129 /* All seems OK, enable tracing */
3130 global_trace.ctrl = tracer_enabled;
3131 tracing_disabled = 0;
3132
3133 return 0;
3134
3135 free_buffers:
3136 for (i-- ; i >= 0; i--) {
3137 struct page *page, *tmp;
3138 struct trace_array_cpu *data = global_trace.data[i];
3139
3140 if (data) {
3141 list_for_each_entry_safe(page, tmp,
3142 &data->trace_pages, lru) {
3143 list_del_init(&page->lru);
3144 __free_page(page);
3145 }
3146 }
3147
3148#ifdef CONFIG_TRACER_MAX_TRACE
3149 data = max_tr.data[i];
3150 if (data) {
3151 list_for_each_entry_safe(page, tmp,
3152 &data->trace_pages, lru) {
3153 list_del_init(&page->lru);
3154 __free_page(page);
3155 }
3156 }
3157#endif
3158 }
3159 return ret;
3160}
3161fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 000000000000..f69f86788c2b
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,339 @@
1#ifndef _LINUX_KERNEL_TRACE_H
2#define _LINUX_KERNEL_TRACE_H
3
4#include <linux/fs.h>
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/clocksource.h>
8#include <linux/mmiotrace.h>
9
10enum trace_type {
11 __TRACE_FIRST_TYPE = 0,
12
13 TRACE_FN,
14 TRACE_CTX,
15 TRACE_WAKE,
16 TRACE_STACK,
17 TRACE_SPECIAL,
18 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP,
20
21 __TRACE_LAST_TYPE
22};
23
24/*
25 * Function trace entry - function address and parent function addres:
26 */
27struct ftrace_entry {
28 unsigned long ip;
29 unsigned long parent_ip;
30};
31
32/*
33 * Context switch trace entry - which task (and prio) we switched from/to:
34 */
35struct ctx_switch_entry {
36 unsigned int prev_pid;
37 unsigned char prev_prio;
38 unsigned char prev_state;
39 unsigned int next_pid;
40 unsigned char next_prio;
41 unsigned char next_state;
42};
43
44/*
45 * Special (free-form) trace entry:
46 */
47struct special_entry {
48 unsigned long arg1;
49 unsigned long arg2;
50 unsigned long arg3;
51};
52
53/*
54 * Stack-trace entry:
55 */
56
57#define FTRACE_STACK_ENTRIES 8
58
59struct stack_entry {
60 unsigned long caller[FTRACE_STACK_ENTRIES];
61};
62
63/*
64 * The trace entry - the most basic unit of tracing. This is what
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */
69struct trace_entry {
70 char type;
71 char cpu;
72 char flags;
73 char preempt_count;
74 int pid;
75 cycle_t t;
76 union {
77 struct ftrace_entry fn;
78 struct ctx_switch_entry ctx;
79 struct special_entry special;
80 struct stack_entry stack;
81 struct mmiotrace_rw mmiorw;
82 struct mmiotrace_map mmiomap;
83 };
84};
85
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
87
88/*
89 * The CPU trace array - it consists of thousands of trace entries
90 * plus some other descriptor data: (for example which task started
91 * the trace, etc.)
92 */
93struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98
99 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx;
105 unsigned long overrun;
106 unsigned long saved_latency;
107 unsigned long critical_start;
108 unsigned long critical_end;
109 unsigned long critical_sequence;
110 unsigned long nice;
111 unsigned long policy;
112 unsigned long rt_priority;
113 cycle_t preempt_timestamp;
114 pid_t pid;
115 uid_t uid;
116 char comm[TASK_COMM_LEN];
117};
118
119struct trace_iterator;
120
121/*
122 * The trace array - an array of per-CPU trace arrays. This is the
123 * highest level data structure that individual tracers deal with.
124 * They have on/off state as well:
125 */
126struct trace_array {
127 unsigned long entries;
128 long ctrl;
129 int cpu;
130 cycle_t time_start;
131 struct task_struct *waiter;
132 struct trace_array_cpu *data[NR_CPUS];
133};
134
135/*
136 * A specific tracer, represented by methods that operate on a trace array:
137 */
138struct tracer {
139 const char *name;
140 void (*init)(struct trace_array *tr);
141 void (*reset)(struct trace_array *tr);
142 void (*open)(struct trace_iterator *iter);
143 void (*pipe_open)(struct trace_iterator *iter);
144 void (*close)(struct trace_iterator *iter);
145 void (*start)(struct trace_iterator *iter);
146 void (*stop)(struct trace_iterator *iter);
147 ssize_t (*read)(struct trace_iterator *iter,
148 struct file *filp, char __user *ubuf,
149 size_t cnt, loff_t *ppos);
150 void (*ctrl_update)(struct trace_array *tr);
151#ifdef CONFIG_FTRACE_STARTUP_TEST
152 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr);
154#endif
155 int (*print_line)(struct trace_iterator *iter);
156 struct tracer *next;
157 int print_max;
158};
159
160struct trace_seq {
161 unsigned char buffer[PAGE_SIZE];
162 unsigned int len;
163 unsigned int readpos;
164};
165
166/*
167 * Trace iterator - used by printout routines who present trace
168 * results to users and which routines might sleep, etc:
169 */
170struct trace_iterator {
171 struct trace_array *tr;
172 struct tracer *trace;
173 void *private;
174 long last_overrun[NR_CPUS];
175 long overrun[NR_CPUS];
176
177 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq;
179 struct trace_entry *ent;
180 int cpu;
181
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184
185 unsigned long iter_flags;
186 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx;
191};
192
193void tracing_reset(struct trace_array_cpu *data);
194int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197
198void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data,
200 unsigned long ip,
201 unsigned long parent_ip,
202 unsigned long flags);
203void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data,
205 struct task_struct *prev,
206 struct task_struct *next,
207 unsigned long flags);
208void tracing_record_cmdline(struct task_struct *tsk);
209
210void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data,
212 struct task_struct *wakee,
213 struct task_struct *cur,
214 unsigned long flags);
215void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data,
217 unsigned long arg1,
218 unsigned long arg2,
219 unsigned long arg3);
220void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data,
222 unsigned long ip,
223 unsigned long parent_ip,
224 unsigned long flags);
225
226void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void);
228int register_tracer(struct tracer *type);
229void unregister_tracer(struct tracer *type);
230
231extern unsigned long nsecs_to_usecs(unsigned long nsecs);
232
233extern unsigned long tracing_max_latency;
234extern unsigned long tracing_thresh;
235
236void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
237void update_max_tr_single(struct trace_array *tr,
238 struct task_struct *tsk, int cpu);
239
240extern cycle_t ftrace_now(int cpu);
241
242#ifdef CONFIG_FTRACE
243void tracing_start_function_trace(void);
244void tracing_stop_function_trace(void);
245#else
246# define tracing_start_function_trace() do { } while (0)
247# define tracing_stop_function_trace() do { } while (0)
248#endif
249
250#ifdef CONFIG_CONTEXT_SWITCH_TRACER
251typedef void
252(*tracer_switch_func_t)(void *private,
253 void *__rq,
254 struct task_struct *prev,
255 struct task_struct *next);
256
257struct tracer_switch_ops {
258 tracer_switch_func_t func;
259 void *private;
260 struct tracer_switch_ops *next;
261};
262
263#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
264
265#ifdef CONFIG_DYNAMIC_FTRACE
266extern unsigned long ftrace_update_tot_cnt;
267#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
268extern int DYN_FTRACE_TEST_NAME(void);
269#endif
270
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr);
300#endif
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER
302extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr);
308#endif
309#endif /* CONFIG_FTRACE_STARTUP_TEST */
310
311extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt);
315extern long ns2usecs(cycle_t nsec);
316
317extern unsigned long trace_flags;
318
319/*
320 * trace_iterator_flags is an enumeration that defines bit
321 * positions into trace_flags that controls the output.
322 *
323 * NOTE: These bits must match the trace_options array in
324 * trace.c.
325 */
326enum trace_iterator_flags {
327 TRACE_ITER_PRINT_PARENT = 0x01,
328 TRACE_ITER_SYM_OFFSET = 0x02,
329 TRACE_ITER_SYM_ADDR = 0x04,
330 TRACE_ITER_VERBOSE = 0x08,
331 TRACE_ITER_RAW = 0x10,
332 TRACE_ITER_HEX = 0x20,
333 TRACE_ITER_BIN = 0x40,
334 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200,
337};
338
339#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 000000000000..312144897970
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,81 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/debugfs.h>
13#include <linux/uaccess.h>
14#include <linux/ftrace.h>
15#include <linux/fs.h>
16
17#include "trace.h"
18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]);
27}
28
29static void start_function_trace(struct trace_array *tr)
30{
31 tr->cpu = get_cpu();
32 function_reset(tr);
33 put_cpu();
34
35 tracing_start_cmdline_record();
36 tracing_start_function_trace();
37}
38
39static void stop_function_trace(struct trace_array *tr)
40{
41 tracing_stop_function_trace();
42 tracing_stop_cmdline_record();
43}
44
45static void function_trace_init(struct trace_array *tr)
46{
47 if (tr->ctrl)
48 start_function_trace(tr);
49}
50
51static void function_trace_reset(struct trace_array *tr)
52{
53 if (tr->ctrl)
54 stop_function_trace(tr);
55}
56
57static void function_trace_ctrl_update(struct trace_array *tr)
58{
59 if (tr->ctrl)
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63}
64
65static struct tracer function_trace __read_mostly =
66{
67 .name = "ftrace",
68 .init = function_trace_init,
69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update,
71#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function,
73#endif
74};
75
76static __init int init_function_trace(void)
77{
78 return register_tracer(&function_trace);
79}
80
81device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 000000000000..421d6fe3650e
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,486 @@
1/*
2 * trace irqs off criticall timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * From code in the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/kallsyms.h>
13#include <linux/debugfs.h>
14#include <linux/uaccess.h>
15#include <linux/module.h>
16#include <linux/ftrace.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21static struct trace_array *irqsoff_trace __read_mostly;
22static int tracer_enabled __read_mostly;
23
24static DEFINE_PER_CPU(int, tracing_cpu);
25
26static DEFINE_SPINLOCK(max_trace_lock);
27
28enum {
29 TRACER_IRQS_OFF = (1 << 1),
30 TRACER_PREEMPT_OFF = (1 << 2),
31};
32
33static int trace_type __read_mostly;
34
35#ifdef CONFIG_PREEMPT_TRACER
36static inline int
37preempt_trace(void)
38{
39 return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
40}
41#else
42# define preempt_trace() (0)
43#endif
44
45#ifdef CONFIG_IRQSOFF_TRACER
46static inline int
47irq_trace(void)
48{
49 return ((trace_type & TRACER_IRQS_OFF) &&
50 irqs_disabled());
51}
52#else
53# define irq_trace() (0)
54#endif
55
56/*
57 * Sequence count - we record it when starting a measurement and
58 * skip the latency if the sequence has changed - some other section
59 * did a maximum and could disturb our measurement with serial console
60 * printouts, etc. Truly coinciding maximum latencies should be rare
61 * and what happens together happens separately as well, so this doesnt
62 * decrease the validity of the maximum found:
63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence;
65
66#ifdef CONFIG_FTRACE
67/*
68 * irqsoff uses its own tracer function to keep the overhead down:
69 */
70static void
71irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
72{
73 struct trace_array *tr = irqsoff_trace;
74 struct trace_array_cpu *data;
75 unsigned long flags;
76 long disabled;
77 int cpu;
78
79 /*
80 * Does not matter if we preempt. We test the flags
81 * afterward, to see if irqs are disabled or not.
82 * If we preempt and get a false positive, the flags
83 * test will fail.
84 */
85 cpu = raw_smp_processor_id();
86 if (likely(!per_cpu(tracing_cpu, cpu)))
87 return;
88
89 local_save_flags(flags);
90 /* slight chance to get a false positive on tracing_cpu */
91 if (!irqs_disabled_flags(flags))
92 return;
93
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags);
99
100 atomic_dec(&data->disabled);
101}
102
103static struct ftrace_ops trace_ops __read_mostly =
104{
105 .func = irqsoff_tracer_call,
106};
107#endif /* CONFIG_FTRACE */
108
109/*
110 * Should this new latency be reported/recorded?
111 */
112static int report_latency(cycle_t delta)
113{
114 if (tracing_thresh) {
115 if (delta < tracing_thresh)
116 return 0;
117 } else {
118 if (delta <= tracing_max_latency)
119 return 0;
120 }
121 return 1;
122}
123
124static void
125check_critical_timing(struct trace_array *tr,
126 struct trace_array_cpu *data,
127 unsigned long parent_ip,
128 int cpu)
129{
130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta;
132 unsigned long flags;
133
134 /*
135 * usecs conversion is slow so we try to delay the conversion
136 * as long as possible:
137 */
138 T0 = data->preempt_timestamp;
139 T1 = ftrace_now(cpu);
140 delta = T1-T0;
141
142 local_save_flags(flags);
143
144 if (!report_latency(delta))
145 goto out;
146
147 spin_lock_irqsave(&max_trace_lock, flags);
148
149 /* check if we are still the max latency */
150 if (!report_latency(delta))
151 goto out_unlock;
152
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
154
155 latency = nsecs_to_usecs(delta);
156
157 if (data->critical_sequence != max_sequence)
158 goto out_unlock;
159
160 tracing_max_latency = delta;
161 t0 = nsecs_to_usecs(T0);
162 t1 = nsecs_to_usecs(T1);
163
164 data->critical_end = parent_ip;
165
166 update_max_tr_single(tr, current, cpu);
167
168 max_sequence++;
169
170out_unlock:
171 spin_unlock_irqrestore(&max_trace_lock, flags);
172
173out:
174 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
178}
179
180static inline void
181start_critical_timing(unsigned long ip, unsigned long parent_ip)
182{
183 int cpu;
184 struct trace_array *tr = irqsoff_trace;
185 struct trace_array_cpu *data;
186 unsigned long flags;
187
188 if (likely(!tracer_enabled))
189 return;
190
191 cpu = raw_smp_processor_id();
192
193 if (per_cpu(tracing_cpu, cpu))
194 return;
195
196 data = tr->data[cpu];
197
198 if (unlikely(!data) || atomic_read(&data->disabled))
199 return;
200
201 atomic_inc(&data->disabled);
202
203 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data);
207
208 local_save_flags(flags);
209
210 trace_function(tr, data, ip, parent_ip, flags);
211
212 per_cpu(tracing_cpu, cpu) = 1;
213
214 atomic_dec(&data->disabled);
215}
216
217static inline void
218stop_critical_timing(unsigned long ip, unsigned long parent_ip)
219{
220 int cpu;
221 struct trace_array *tr = irqsoff_trace;
222 struct trace_array_cpu *data;
223 unsigned long flags;
224
225 cpu = raw_smp_processor_id();
226 /* Always clear the tracing cpu on stopping the trace */
227 if (unlikely(per_cpu(tracing_cpu, cpu)))
228 per_cpu(tracing_cpu, cpu) = 0;
229 else
230 return;
231
232 if (!tracer_enabled)
233 return;
234
235 data = tr->data[cpu];
236
237 if (unlikely(!data) || unlikely(!head_page(data)) ||
238 !data->critical_start || atomic_read(&data->disabled))
239 return;
240
241 atomic_inc(&data->disabled);
242
243 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags);
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0;
247 atomic_dec(&data->disabled);
248}
249
250/* start and stop critical timings used to for stoppage (in idle) */
251void start_critical_timings(void)
252{
253 if (preempt_trace() || irq_trace())
254 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
255}
256
257void stop_critical_timings(void)
258{
259 if (preempt_trace() || irq_trace())
260 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
261}
262
263#ifdef CONFIG_IRQSOFF_TRACER
264#ifdef CONFIG_PROVE_LOCKING
265void time_hardirqs_on(unsigned long a0, unsigned long a1)
266{
267 if (!preempt_trace() && irq_trace())
268 stop_critical_timing(a0, a1);
269}
270
271void time_hardirqs_off(unsigned long a0, unsigned long a1)
272{
273 if (!preempt_trace() && irq_trace())
274 start_critical_timing(a0, a1);
275}
276
277#else /* !CONFIG_PROVE_LOCKING */
278
279/*
280 * Stubs:
281 */
282
283void early_boot_irqs_off(void)
284{
285}
286
287void early_boot_irqs_on(void)
288{
289}
290
291void trace_softirqs_on(unsigned long ip)
292{
293}
294
295void trace_softirqs_off(unsigned long ip)
296{
297}
298
299inline void print_irqtrace_events(struct task_struct *curr)
300{
301}
302
303/*
304 * We are only interested in hardirq on/off events:
305 */
306void trace_hardirqs_on(void)
307{
308 if (!preempt_trace() && irq_trace())
309 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
310}
311EXPORT_SYMBOL(trace_hardirqs_on);
312
313void trace_hardirqs_off(void)
314{
315 if (!preempt_trace() && irq_trace())
316 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
317}
318EXPORT_SYMBOL(trace_hardirqs_off);
319
320void trace_hardirqs_on_caller(unsigned long caller_addr)
321{
322 if (!preempt_trace() && irq_trace())
323 stop_critical_timing(CALLER_ADDR0, caller_addr);
324}
325EXPORT_SYMBOL(trace_hardirqs_on_caller);
326
327void trace_hardirqs_off_caller(unsigned long caller_addr)
328{
329 if (!preempt_trace() && irq_trace())
330 start_critical_timing(CALLER_ADDR0, caller_addr);
331}
332EXPORT_SYMBOL(trace_hardirqs_off_caller);
333
334#endif /* CONFIG_PROVE_LOCKING */
335#endif /* CONFIG_IRQSOFF_TRACER */
336
337#ifdef CONFIG_PREEMPT_TRACER
338void trace_preempt_on(unsigned long a0, unsigned long a1)
339{
340 stop_critical_timing(a0, a1);
341}
342
343void trace_preempt_off(unsigned long a0, unsigned long a1)
344{
345 start_critical_timing(a0, a1);
346}
347#endif /* CONFIG_PREEMPT_TRACER */
348
349static void start_irqsoff_tracer(struct trace_array *tr)
350{
351 register_ftrace_function(&trace_ops);
352 tracer_enabled = 1;
353}
354
355static void stop_irqsoff_tracer(struct trace_array *tr)
356{
357 tracer_enabled = 0;
358 unregister_ftrace_function(&trace_ops);
359}
360
361static void __irqsoff_tracer_init(struct trace_array *tr)
362{
363 irqsoff_trace = tr;
364 /* make sure that the tracer is visible */
365 smp_wmb();
366
367 if (tr->ctrl)
368 start_irqsoff_tracer(tr);
369}
370
371static void irqsoff_tracer_reset(struct trace_array *tr)
372{
373 if (tr->ctrl)
374 stop_irqsoff_tracer(tr);
375}
376
377static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
378{
379 if (tr->ctrl)
380 start_irqsoff_tracer(tr);
381 else
382 stop_irqsoff_tracer(tr);
383}
384
385static void irqsoff_tracer_open(struct trace_iterator *iter)
386{
387 /* stop the trace while dumping */
388 if (iter->tr->ctrl)
389 stop_irqsoff_tracer(iter->tr);
390}
391
392static void irqsoff_tracer_close(struct trace_iterator *iter)
393{
394 if (iter->tr->ctrl)
395 start_irqsoff_tracer(iter->tr);
396}
397
398#ifdef CONFIG_IRQSOFF_TRACER
399static void irqsoff_tracer_init(struct trace_array *tr)
400{
401 trace_type = TRACER_IRQS_OFF;
402
403 __irqsoff_tracer_init(tr);
404}
405static struct tracer irqsoff_tracer __read_mostly =
406{
407 .name = "irqsoff",
408 .init = irqsoff_tracer_init,
409 .reset = irqsoff_tracer_reset,
410 .open = irqsoff_tracer_open,
411 .close = irqsoff_tracer_close,
412 .ctrl_update = irqsoff_tracer_ctrl_update,
413 .print_max = 1,
414#ifdef CONFIG_FTRACE_SELFTEST
415 .selftest = trace_selftest_startup_irqsoff,
416#endif
417};
418# define register_irqsoff(trace) register_tracer(&trace)
419#else
420# define register_irqsoff(trace) do { } while (0)
421#endif
422
423#ifdef CONFIG_PREEMPT_TRACER
424static void preemptoff_tracer_init(struct trace_array *tr)
425{
426 trace_type = TRACER_PREEMPT_OFF;
427
428 __irqsoff_tracer_init(tr);
429}
430
431static struct tracer preemptoff_tracer __read_mostly =
432{
433 .name = "preemptoff",
434 .init = preemptoff_tracer_init,
435 .reset = irqsoff_tracer_reset,
436 .open = irqsoff_tracer_open,
437 .close = irqsoff_tracer_close,
438 .ctrl_update = irqsoff_tracer_ctrl_update,
439 .print_max = 1,
440#ifdef CONFIG_FTRACE_SELFTEST
441 .selftest = trace_selftest_startup_preemptoff,
442#endif
443};
444# define register_preemptoff(trace) register_tracer(&trace)
445#else
446# define register_preemptoff(trace) do { } while (0)
447#endif
448
449#if defined(CONFIG_IRQSOFF_TRACER) && \
450 defined(CONFIG_PREEMPT_TRACER)
451
452static void preemptirqsoff_tracer_init(struct trace_array *tr)
453{
454 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
455
456 __irqsoff_tracer_init(tr);
457}
458
459static struct tracer preemptirqsoff_tracer __read_mostly =
460{
461 .name = "preemptirqsoff",
462 .init = preemptirqsoff_tracer_init,
463 .reset = irqsoff_tracer_reset,
464 .open = irqsoff_tracer_open,
465 .close = irqsoff_tracer_close,
466 .ctrl_update = irqsoff_tracer_ctrl_update,
467 .print_max = 1,
468#ifdef CONFIG_FTRACE_SELFTEST
469 .selftest = trace_selftest_startup_preemptirqsoff,
470#endif
471};
472
473# define register_preemptirqsoff(trace) register_tracer(&trace)
474#else
475# define register_preemptirqsoff(trace) do { } while (0)
476#endif
477
478__init static int init_irqsoff_tracer(void)
479{
480 register_irqsoff(irqsoff_tracer);
481 register_preemptoff(preemptoff_tracer);
482 register_preemptirqsoff(preemptirqsoff_tracer);
483
484 return 0;
485}
486device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
new file mode 100644
index 000000000000..b13dc19dcbb4
--- /dev/null
+++ b/kernel/trace/trace_mmiotrace.c
@@ -0,0 +1,295 @@
1/*
2 * Memory mapped I/O tracing
3 *
4 * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi>
5 */
6
7#define DEBUG 1
8
9#include <linux/kernel.h>
10#include <linux/mmiotrace.h>
11#include <linux/pci.h>
12
13#include "trace.h"
14
15struct header_iter {
16 struct pci_dev *dev;
17};
18
19static struct trace_array *mmio_trace_array;
20static bool overrun_detected;
21
22static void mmio_reset_data(struct trace_array *tr)
23{
24 int cpu;
25
26 overrun_detected = false;
27 tr->time_start = ftrace_now(tr->cpu);
28
29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]);
31}
32
33static void mmio_trace_init(struct trace_array *tr)
34{
35 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr;
37 if (tr->ctrl) {
38 mmio_reset_data(tr);
39 enable_mmiotrace();
40 }
41}
42
43static void mmio_trace_reset(struct trace_array *tr)
44{
45 pr_debug("in %s\n", __func__);
46 if (tr->ctrl)
47 disable_mmiotrace();
48 mmio_reset_data(tr);
49 mmio_trace_array = NULL;
50}
51
52static void mmio_trace_ctrl_update(struct trace_array *tr)
53{
54 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) {
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61}
62
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
64{
65 int ret = 0;
66 int i;
67 resource_size_t start, end;
68 const struct pci_driver *drv = pci_dev_driver(dev);
69
70 /* XXX: incomplete checks for trace_seq_printf() return value */
71 ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
72 dev->bus->number, dev->devfn,
73 dev->vendor, dev->device, dev->irq);
74 /*
75 * XXX: is pci_resource_to_user() appropriate, since we are
76 * supposed to interpret the __ioremap() phys_addr argument based on
77 * these printed values?
78 */
79 for (i = 0; i < 7; i++) {
80 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
81 ret += trace_seq_printf(s, " %llx",
82 (unsigned long long)(start |
83 (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
84 }
85 for (i = 0; i < 7; i++) {
86 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
87 ret += trace_seq_printf(s, " %llx",
88 dev->resource[i].start < dev->resource[i].end ?
89 (unsigned long long)(end - start) + 1 : 0);
90 }
91 if (drv)
92 ret += trace_seq_printf(s, " %s\n", drv->name);
93 else
94 ret += trace_seq_printf(s, " \n");
95 return ret;
96}
97
98static void destroy_header_iter(struct header_iter *hiter)
99{
100 if (!hiter)
101 return;
102 pci_dev_put(hiter->dev);
103 kfree(hiter);
104}
105
106static void mmio_pipe_open(struct trace_iterator *iter)
107{
108 struct header_iter *hiter;
109 struct trace_seq *s = &iter->seq;
110
111 trace_seq_printf(s, "VERSION 20070824\n");
112
113 hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
114 if (!hiter)
115 return;
116
117 hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL);
118 iter->private = hiter;
119}
120
121/* XXX: This is not called when the pipe is closed! */
122static void mmio_close(struct trace_iterator *iter)
123{
124 struct header_iter *hiter = iter->private;
125 destroy_header_iter(hiter);
126 iter->private = NULL;
127}
128
129static unsigned long count_overruns(struct trace_iterator *iter)
130{
131 int cpu;
132 unsigned long cnt = 0;
133 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0;
136 }
137 return cnt;
138}
139
140static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
141 char __user *ubuf, size_t cnt, loff_t *ppos)
142{
143 ssize_t ret;
144 struct header_iter *hiter = iter->private;
145 struct trace_seq *s = &iter->seq;
146 unsigned long n;
147
148 n = count_overruns(iter);
149 if (n) {
150 /* XXX: This is later than where events were lost. */
151 trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
152 if (!overrun_detected)
153 pr_warning("mmiotrace has lost events.\n");
154 overrun_detected = true;
155 goto print_out;
156 }
157
158 if (!hiter)
159 return 0;
160
161 mmio_print_pcidev(s, hiter->dev);
162 hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev);
163
164 if (!hiter->dev) {
165 destroy_header_iter(hiter);
166 iter->private = NULL;
167 }
168
169print_out:
170 ret = trace_seq_to_user(s, ubuf, cnt);
171 return (ret == -EBUSY) ? 0 : ret;
172}
173
174static int mmio_print_rw(struct trace_iterator *iter)
175{
176 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw;
178 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t);
180 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t;
182 int ret = 1;
183
184 switch (entry->mmiorw.opcode) {
185 case MMIO_READ:
186 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
188 rw->width, secs, usec_rem, rw->map_id,
189 (unsigned long long)rw->phys,
190 rw->value, rw->pc, 0);
191 break;
192 case MMIO_WRITE:
193 ret = trace_seq_printf(s,
194 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
195 rw->width, secs, usec_rem, rw->map_id,
196 (unsigned long long)rw->phys,
197 rw->value, rw->pc, 0);
198 break;
199 case MMIO_UNKNOWN_OP:
200 ret = trace_seq_printf(s,
201 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
202 secs, usec_rem, rw->map_id,
203 (unsigned long long)rw->phys,
204 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
205 (rw->value >> 0) & 0xff, rw->pc, 0);
206 break;
207 default:
208 ret = trace_seq_printf(s, "rw what?\n");
209 break;
210 }
211 if (ret)
212 return 1;
213 return 0;
214}
215
216static int mmio_print_map(struct trace_iterator *iter)
217{
218 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap;
220 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t);
222 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t;
224 int ret = 1;
225
226 switch (entry->mmiorw.opcode) {
227 case MMIO_PROBE:
228 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
230 secs, usec_rem, m->map_id,
231 (unsigned long long)m->phys, m->virt, m->len,
232 0UL, 0);
233 break;
234 case MMIO_UNPROBE:
235 ret = trace_seq_printf(s,
236 "UNMAP %lu.%06lu %d 0x%lx %d\n",
237 secs, usec_rem, m->map_id, 0UL, 0);
238 break;
239 default:
240 ret = trace_seq_printf(s, "map what?\n");
241 break;
242 }
243 if (ret)
244 return 1;
245 return 0;
246}
247
248/* return 0 to abort printing without consuming current entry in pipe mode */
249static int mmio_print_line(struct trace_iterator *iter)
250{
251 switch (iter->ent->type) {
252 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter);
256 default:
257 return 1; /* ignore unknown entries */
258 }
259}
260
261static struct tracer mmio_tracer __read_mostly =
262{
263 .name = "mmiotrace",
264 .init = mmio_trace_init,
265 .reset = mmio_trace_reset,
266 .pipe_open = mmio_pipe_open,
267 .close = mmio_close,
268 .read = mmio_read,
269 .ctrl_update = mmio_trace_ctrl_update,
270 .print_line = mmio_print_line,
271};
272
273__init static int init_mmio_trace(void)
274{
275 return register_tracer(&mmio_tracer);
276}
277device_initcall(init_mmio_trace);
278
279void mmio_trace_rw(struct mmiotrace_rw *rw)
280{
281 struct trace_array *tr = mmio_trace_array;
282 struct trace_array_cpu *data = tr->data[smp_processor_id()];
283 __trace_mmiotrace_rw(tr, data, rw);
284}
285
286void mmio_trace_mapping(struct mmiotrace_map *map)
287{
288 struct trace_array *tr = mmio_trace_array;
289 struct trace_array_cpu *data;
290
291 preempt_disable();
292 data = tr->data[smp_processor_id()];
293 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable();
295}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 000000000000..cb817a209aa0
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,286 @@
1/*
2 * trace context switch
3 *
4 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7#include <linux/module.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/kallsyms.h>
11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h>
14
15#include "trace.h"
16
17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref;
20
21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev,
23 struct task_struct *next)
24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data;
28 unsigned long flags;
29 long disabled;
30 int cpu;
31
32 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next);
34
35 if (!tracer_enabled)
36 return;
37
38 local_irq_save(flags);
39 cpu = raw_smp_processor_id();
40 data = tr->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42
43 if (likely(disabled == 1))
44 tracing_sched_switch_trace(tr, data, prev, next, flags);
45
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags);
48}
49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
78 task_struct *curr)
79{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data;
83 unsigned long flags;
84 long disabled;
85 int cpu;
86
87 if (!tracer_enabled)
88 return;
89
90 tracing_record_cmdline(curr);
91
92 local_irq_save(flags);
93 cpu = raw_smp_processor_id();
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
99
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags);
102}
103
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr)
130{
131 int cpu;
132
133 tr->time_start = ftrace_now(tr->cpu);
134
135 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]);
137}
138
139static int tracing_sched_register(void)
140{
141 int ret;
142
143 ret = marker_probe_register("kernel_sched_wakeup",
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker"
149 " probe to kernel_sched_wakeup\n");
150 return ret;
151 }
152
153 ret = marker_probe_register("kernel_sched_wakeup_new",
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker"
159 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe;
161 }
162
163 ret = marker_probe_register("kernel_sched_schedule",
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) {
169 pr_info("sched trace: Couldn't add marker"
170 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new;
172 }
173
174 return ret;
175fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new",
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup",
181 wake_up_callback,
182 &ctx_trace);
183 return ret;
184}
185
186static void tracing_sched_unregister(void)
187{
188 marker_probe_unregister("kernel_sched_schedule",
189 sched_switch_callback,
190 &ctx_trace);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197}
198
199static void tracing_start_sched_switch(void)
200{
201 long ref;
202
203 ref = atomic_inc_return(&sched_ref);
204 if (ref == 1)
205 tracing_sched_register();
206}
207
208static void tracing_stop_sched_switch(void)
209{
210 long ref;
211
212 ref = atomic_dec_and_test(&sched_ref);
213 if (ref)
214 tracing_sched_unregister();
215}
216
217void tracing_start_cmdline_record(void)
218{
219 tracing_start_sched_switch();
220}
221
222void tracing_stop_cmdline_record(void)
223{
224 tracing_stop_sched_switch();
225}
226
227static void start_sched_trace(struct trace_array *tr)
228{
229 sched_switch_reset(tr);
230 tracing_start_cmdline_record();
231 tracer_enabled = 1;
232}
233
234static void stop_sched_trace(struct trace_array *tr)
235{
236 tracer_enabled = 0;
237 tracing_stop_cmdline_record();
238}
239
240static void sched_switch_trace_init(struct trace_array *tr)
241{
242 ctx_trace = tr;
243
244 if (tr->ctrl)
245 start_sched_trace(tr);
246}
247
248static void sched_switch_trace_reset(struct trace_array *tr)
249{
250 if (tr->ctrl)
251 stop_sched_trace(tr);
252}
253
254static void sched_switch_trace_ctrl_update(struct trace_array *tr)
255{
256 /* When starting a new trace, reset the buffers */
257 if (tr->ctrl)
258 start_sched_trace(tr);
259 else
260 stop_sched_trace(tr);
261}
262
263static struct tracer sched_switch_trace __read_mostly =
264{
265 .name = "sched_switch",
266 .init = sched_switch_trace_init,
267 .reset = sched_switch_trace_reset,
268 .ctrl_update = sched_switch_trace_ctrl_update,
269#ifdef CONFIG_FTRACE_SELFTEST
270 .selftest = trace_selftest_startup_sched_switch,
271#endif
272};
273
274__init static int init_sched_switch_trace(void)
275{
276 int ret = 0;
277
278 if (atomic_read(&sched_ref))
279 ret = tracing_sched_register();
280 if (ret) {
281 pr_info("error registering scheduler trace\n");
282 return ret;
283 }
284 return register_tracer(&sched_switch_trace);
285}
286device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 000000000000..3c8d61df4474
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,448 @@
1/*
2 * trace task wakeup timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h>
16#include <linux/uaccess.h>
17#include <linux/ftrace.h>
18#include <linux/marker.h>
19
20#include "trace.h"
21
22static struct trace_array *wakeup_trace;
23static int __read_mostly tracer_enabled;
24
25static struct task_struct *wakeup_task;
26static int wakeup_cpu;
27static unsigned wakeup_prio = -1;
28
29static DEFINE_SPINLOCK(wakeup_lock);
30
31static void __wakeup_reset(struct trace_array *tr);
32
33#ifdef CONFIG_FTRACE
34/*
35 * irqsoff uses its own tracer function to keep the overhead down:
36 */
37static void
38wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
39{
40 struct trace_array *tr = wakeup_trace;
41 struct trace_array_cpu *data;
42 unsigned long flags;
43 long disabled;
44 int resched;
45 int cpu;
46
47 if (likely(!wakeup_task))
48 return;
49
50 resched = need_resched();
51 preempt_disable_notrace();
52
53 cpu = raw_smp_processor_id();
54 data = tr->data[cpu];
55 disabled = atomic_inc_return(&data->disabled);
56 if (unlikely(disabled != 1))
57 goto out;
58
59 spin_lock_irqsave(&wakeup_lock, flags);
60
61 if (unlikely(!wakeup_task))
62 goto unlock;
63
64 /*
65 * The task can't disappear because it needs to
66 * wake up first, and we have the wakeup_lock.
67 */
68 if (task_cpu(wakeup_task) != cpu)
69 goto unlock;
70
71 trace_function(tr, data, ip, parent_ip, flags);
72
73 unlock:
74 spin_unlock_irqrestore(&wakeup_lock, flags);
75
76 out:
77 atomic_dec(&data->disabled);
78
79 /*
80 * To prevent recursion from the scheduler, if the
81 * resched flag was set before we entered, then
82 * don't reschedule.
83 */
84 if (resched)
85 preempt_enable_no_resched_notrace();
86 else
87 preempt_enable_notrace();
88}
89
90static struct ftrace_ops trace_ops __read_mostly =
91{
92 .func = wakeup_tracer_call,
93};
94#endif /* CONFIG_FTRACE */
95
96/*
97 * Should this new latency be reported/recorded?
98 */
99static int report_latency(cycle_t delta)
100{
101 if (tracing_thresh) {
102 if (delta < tracing_thresh)
103 return 0;
104 } else {
105 if (delta <= tracing_max_latency)
106 return 0;
107 }
108 return 1;
109}
110
111static void notrace
112wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
113 struct task_struct *next)
114{
115 unsigned long latency = 0, t0 = 0, t1 = 0;
116 struct trace_array **ptr = private;
117 struct trace_array *tr = *ptr;
118 struct trace_array_cpu *data;
119 cycle_t T0, T1, delta;
120 unsigned long flags;
121 long disabled;
122 int cpu;
123
124 if (unlikely(!tracer_enabled))
125 return;
126
127 /*
128 * When we start a new trace, we set wakeup_task to NULL
129 * and then set tracer_enabled = 1. We want to make sure
130 * that another CPU does not see the tracer_enabled = 1
131 * and the wakeup_task with an older task, that might
132 * actually be the same as next.
133 */
134 smp_rmb();
135
136 if (next != wakeup_task)
137 return;
138
139 /* The task we are waiting for is waking up */
140 data = tr->data[wakeup_cpu];
141
142 /* disable local data, not wakeup_cpu data */
143 cpu = raw_smp_processor_id();
144 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
145 if (likely(disabled != 1))
146 goto out;
147
148 spin_lock_irqsave(&wakeup_lock, flags);
149
150 /* We could race with grabbing wakeup_lock */
151 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock;
153
154 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
155
156 /*
157 * usecs conversion is slow so we try to delay the conversion
158 * as long as possible:
159 */
160 T0 = data->preempt_timestamp;
161 T1 = ftrace_now(cpu);
162 delta = T1-T0;
163
164 if (!report_latency(delta))
165 goto out_unlock;
166
167 latency = nsecs_to_usecs(delta);
168
169 tracing_max_latency = delta;
170 t0 = nsecs_to_usecs(T0);
171 t1 = nsecs_to_usecs(T1);
172
173 update_max_tr(tr, wakeup_task, wakeup_cpu);
174
175out_unlock:
176 __wakeup_reset(tr);
177 spin_unlock_irqrestore(&wakeup_lock, flags);
178out:
179 atomic_dec(&tr->data[cpu]->disabled);
180}
181
182static notrace void
183sched_switch_callback(void *probe_data, void *call_data,
184 const char *format, va_list *args)
185{
186 struct task_struct *prev;
187 struct task_struct *next;
188 struct rq *__rq;
189
190 /* skip prev_pid %d next_pid %d prev_state %ld */
191 (void)va_arg(*args, int);
192 (void)va_arg(*args, int);
193 (void)va_arg(*args, long);
194 __rq = va_arg(*args, typeof(__rq));
195 prev = va_arg(*args, typeof(prev));
196 next = va_arg(*args, typeof(next));
197
198 tracing_record_cmdline(prev);
199
200 /*
201 * If tracer_switch_func only points to the local
202 * switch func, it still needs the ptr passed to it.
203 */
204 wakeup_sched_switch(probe_data, __rq, prev, next);
205}
206
207static void __wakeup_reset(struct trace_array *tr)
208{
209 struct trace_array_cpu *data;
210 int cpu;
211
212 assert_spin_locked(&wakeup_lock);
213
214 for_each_possible_cpu(cpu) {
215 data = tr->data[cpu];
216 tracing_reset(data);
217 }
218
219 wakeup_cpu = -1;
220 wakeup_prio = -1;
221
222 if (wakeup_task)
223 put_task_struct(wakeup_task);
224
225 wakeup_task = NULL;
226}
227
228static void wakeup_reset(struct trace_array *tr)
229{
230 unsigned long flags;
231
232 spin_lock_irqsave(&wakeup_lock, flags);
233 __wakeup_reset(tr);
234 spin_unlock_irqrestore(&wakeup_lock, flags);
235}
236
237static void
238wakeup_check_start(struct trace_array *tr, struct task_struct *p,
239 struct task_struct *curr)
240{
241 int cpu = smp_processor_id();
242 unsigned long flags;
243 long disabled;
244
245 if (likely(!rt_task(p)) ||
246 p->prio >= wakeup_prio ||
247 p->prio >= curr->prio)
248 return;
249
250 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
251 if (unlikely(disabled != 1))
252 goto out;
253
254 /* interrupts should be off from try_to_wake_up */
255 spin_lock(&wakeup_lock);
256
257 /* check for races. */
258 if (!tracer_enabled || p->prio >= wakeup_prio)
259 goto out_locked;
260
261 /* reset the trace */
262 __wakeup_reset(tr);
263
264 wakeup_cpu = task_cpu(p);
265 wakeup_prio = p->prio;
266
267 wakeup_task = p;
268 get_task_struct(wakeup_task);
269
270 local_save_flags(flags);
271
272 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
273 trace_function(tr, tr->data[wakeup_cpu],
274 CALLER_ADDR1, CALLER_ADDR2, flags);
275
276out_locked:
277 spin_unlock(&wakeup_lock);
278out:
279 atomic_dec(&tr->data[cpu]->disabled);
280}
281
282static notrace void
283wake_up_callback(void *probe_data, void *call_data,
284 const char *format, va_list *args)
285{
286 struct trace_array **ptr = probe_data;
287 struct trace_array *tr = *ptr;
288 struct task_struct *curr;
289 struct task_struct *task;
290 struct rq *__rq;
291
292 if (likely(!tracer_enabled))
293 return;
294
295 /* Skip pid %d state %ld */
296 (void)va_arg(*args, int);
297 (void)va_arg(*args, long);
298 /* now get the meat: "rq %p task %p rq->curr %p" */
299 __rq = va_arg(*args, typeof(__rq));
300 task = va_arg(*args, typeof(task));
301 curr = va_arg(*args, typeof(curr));
302
303 tracing_record_cmdline(task);
304 tracing_record_cmdline(curr);
305
306 wakeup_check_start(tr, task, curr);
307}
308
309static void start_wakeup_tracer(struct trace_array *tr)
310{
311 int ret;
312
313 ret = marker_probe_register("kernel_sched_wakeup",
314 "pid %d state %ld ## rq %p task %p rq->curr %p",
315 wake_up_callback,
316 &wakeup_trace);
317 if (ret) {
318 pr_info("wakeup trace: Couldn't add marker"
319 " probe to kernel_sched_wakeup\n");
320 return;
321 }
322
323 ret = marker_probe_register("kernel_sched_wakeup_new",
324 "pid %d state %ld ## rq %p task %p rq->curr %p",
325 wake_up_callback,
326 &wakeup_trace);
327 if (ret) {
328 pr_info("wakeup trace: Couldn't add marker"
329 " probe to kernel_sched_wakeup_new\n");
330 goto fail_deprobe;
331 }
332
333 ret = marker_probe_register("kernel_sched_schedule",
334 "prev_pid %d next_pid %d prev_state %ld "
335 "## rq %p prev %p next %p",
336 sched_switch_callback,
337 &wakeup_trace);
338 if (ret) {
339 pr_info("sched trace: Couldn't add marker"
340 " probe to kernel_sched_schedule\n");
341 goto fail_deprobe_wake_new;
342 }
343
344 wakeup_reset(tr);
345
346 /*
347 * Don't let the tracer_enabled = 1 show up before
348 * the wakeup_task is reset. This may be overkill since
349 * wakeup_reset does a spin_unlock after setting the
350 * wakeup_task to NULL, but I want to be safe.
351 * This is a slow path anyway.
352 */
353 smp_wmb();
354
355 register_ftrace_function(&trace_ops);
356
357 tracer_enabled = 1;
358
359 return;
360fail_deprobe_wake_new:
361 marker_probe_unregister("kernel_sched_wakeup_new",
362 wake_up_callback,
363 &wakeup_trace);
364fail_deprobe:
365 marker_probe_unregister("kernel_sched_wakeup",
366 wake_up_callback,
367 &wakeup_trace);
368}
369
370static void stop_wakeup_tracer(struct trace_array *tr)
371{
372 tracer_enabled = 0;
373 unregister_ftrace_function(&trace_ops);
374 marker_probe_unregister("kernel_sched_schedule",
375 sched_switch_callback,
376 &wakeup_trace);
377 marker_probe_unregister("kernel_sched_wakeup_new",
378 wake_up_callback,
379 &wakeup_trace);
380 marker_probe_unregister("kernel_sched_wakeup",
381 wake_up_callback,
382 &wakeup_trace);
383}
384
385static void wakeup_tracer_init(struct trace_array *tr)
386{
387 wakeup_trace = tr;
388
389 if (tr->ctrl)
390 start_wakeup_tracer(tr);
391}
392
393static void wakeup_tracer_reset(struct trace_array *tr)
394{
395 if (tr->ctrl) {
396 stop_wakeup_tracer(tr);
397 /* make sure we put back any tasks we are tracing */
398 wakeup_reset(tr);
399 }
400}
401
402static void wakeup_tracer_ctrl_update(struct trace_array *tr)
403{
404 if (tr->ctrl)
405 start_wakeup_tracer(tr);
406 else
407 stop_wakeup_tracer(tr);
408}
409
410static void wakeup_tracer_open(struct trace_iterator *iter)
411{
412 /* stop the trace while dumping */
413 if (iter->tr->ctrl)
414 stop_wakeup_tracer(iter->tr);
415}
416
417static void wakeup_tracer_close(struct trace_iterator *iter)
418{
419 /* forget about any processes we were recording */
420 if (iter->tr->ctrl)
421 start_wakeup_tracer(iter->tr);
422}
423
424static struct tracer wakeup_tracer __read_mostly =
425{
426 .name = "wakeup",
427 .init = wakeup_tracer_init,
428 .reset = wakeup_tracer_reset,
429 .open = wakeup_tracer_open,
430 .close = wakeup_tracer_close,
431 .ctrl_update = wakeup_tracer_ctrl_update,
432 .print_max = 1,
433#ifdef CONFIG_FTRACE_SELFTEST
434 .selftest = trace_selftest_startup_wakeup,
435#endif
436};
437
438__init static int init_wakeup_tracer(void)
439{
440 int ret;
441
442 ret = register_tracer(&wakeup_tracer);
443 if (ret)
444 return ret;
445
446 return 0;
447}
448device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 000000000000..0911b7e073bf
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,563 @@
1/* Include in trace.c */
2
3#include <linux/kthread.h>
4#include <linux/delay.h>
5
6static inline int trace_valid_entry(struct trace_entry *entry)
7{
8 switch (entry->type) {
9 case TRACE_FN:
10 case TRACE_CTX:
11 case TRACE_WAKE:
12 case TRACE_STACK:
13 case TRACE_SPECIAL:
14 return 1;
15 }
16 return 0;
17}
18
19static int
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{
22 struct trace_entry *entries;
23 struct page *page;
24 int idx = 0;
25 int i;
26
27 BUG_ON(list_empty(&data->trace_pages));
28 page = list_entry(data->trace_pages.next, struct page, lru);
29 entries = page_address(page);
30
31 check_pages(data);
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type);
46 goto failed;
47 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0;
72
73 failed:
74 /* disable tracing */
75 tracing_disabled = 1;
76 printk(KERN_CONT ".. corrupted trace buffer .. ");
77 return -1;
78}
79
80/*
81 * Test the trace buffer to see if all the elements
82 * are still sane.
83 */
84static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85{
86 unsigned long flags, cnt = 0;
87 int cpu, ret = 0;
88
89 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95
96 cnt += tr->data[cpu]->trace_idx;
97
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
99 if (ret)
100 break;
101 }
102 __raw_spin_unlock(&ftrace_max_lock);
103 raw_local_irq_restore(flags);
104
105 if (count)
106 *count = cnt;
107
108 return ret;
109}
110
111#ifdef CONFIG_FTRACE
112
113#ifdef CONFIG_DYNAMIC_FTRACE
114
115#define __STR(x) #x
116#define STR(x) __STR(x)
117
118/* Test dynamic code modification and ftrace filters */
119int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr,
121 int (*func)(void))
122{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled;
127 char *func_name;
128
129 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n");
131 pr_info("Testing dynamic ftrace: ");
132
133 /* enable tracing, and record the filter function */
134 ftrace_enabled = 1;
135 tracer_enabled = 1;
136
137 /* passed in by parameter to fool gcc from optimizing */
138 func();
139
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to
150 * accomodate them.
151 */
152 func_name = "*" STR(DYN_FTRACE_TEST_NAME);
153
154 /* filter only on our function */
155 ftrace_set_filter(func_name, strlen(func_name), 1);
156
157 /* enable tracing */
158 tr->ctrl = 1;
159 trace->init(tr);
160 /* Sleep for a 1/10 of a second */
161 msleep(100);
162
163 /* we should have nothing in the buffer */
164 ret = trace_test_buffer(tr, &count);
165 if (ret)
166 goto out;
167
168 if (count) {
169 ret = -1;
170 printk(KERN_CONT ".. filter did not filter .. ");
171 goto out;
172 }
173
174 /* call our function again */
175 func();
176
177 /* sleep again */
178 msleep(100);
179
180 /* stop the tracing. */
181 tr->ctrl = 0;
182 trace->ctrl_update(tr);
183 ftrace_enabled = 0;
184
185 /* check the trace buffer */
186 ret = trace_test_buffer(tr, &count);
187 trace->reset(tr);
188
189 /* we should only have one item */
190 if (!ret && count != 1) {
191 printk(KERN_CONT ".. filter failed count=%ld ..", count);
192 ret = -1;
193 goto out;
194 }
195 out:
196 ftrace_enabled = save_ftrace_enabled;
197 tracer_enabled = save_tracer_enabled;
198
199 /* Enable tracing on all functions again */
200 ftrace_set_filter(NULL, 0, 1);
201
202 return ret;
203}
204#else
205# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
206#endif /* CONFIG_DYNAMIC_FTRACE */
207/*
208 * Simple verification test of ftrace function tracer.
209 * Enable ftrace, sleep 1/10 second, and then read the trace
210 * buffer to see if all is in order.
211 */
212int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled;
219
220 /* make sure msleep has been recorded */
221 msleep(1);
222
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */
231 ftrace_enabled = 1;
232 tracer_enabled = 1;
233
234 tr->ctrl = 1;
235 trace->init(tr);
236 /* Sleep for a 1/10 of a second */
237 msleep(100);
238 /* stop the tracing. */
239 tr->ctrl = 0;
240 trace->ctrl_update(tr);
241 ftrace_enabled = 0;
242
243 /* check the trace buffer */
244 ret = trace_test_buffer(tr, &count);
245 trace->reset(tr);
246
247 if (!ret && !count) {
248 printk(KERN_CONT ".. no entries found ..");
249 ret = -1;
250 goto out;
251 }
252
253 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
254 DYN_FTRACE_TEST_NAME);
255
256 out:
257 ftrace_enabled = save_ftrace_enabled;
258 tracer_enabled = save_tracer_enabled;
259
260 /* kill ftrace totally if we failed */
261 if (ret)
262 ftrace_kill();
263
264 return ret;
265}
266#endif /* CONFIG_FTRACE */
267
268#ifdef CONFIG_IRQSOFF_TRACER
269int
270trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
271{
272 unsigned long save_max = tracing_max_latency;
273 unsigned long count;
274 int ret;
275
276 /* start the tracing */
277 tr->ctrl = 1;
278 trace->init(tr);
279 /* reset the max latency */
280 tracing_max_latency = 0;
281 /* disable interrupts for a bit */
282 local_irq_disable();
283 udelay(100);
284 local_irq_enable();
285 /* stop the tracing. */
286 tr->ctrl = 0;
287 trace->ctrl_update(tr);
288 /* check both trace buffers */
289 ret = trace_test_buffer(tr, NULL);
290 if (!ret)
291 ret = trace_test_buffer(&max_tr, &count);
292 trace->reset(tr);
293
294 if (!ret && !count) {
295 printk(KERN_CONT ".. no entries found ..");
296 ret = -1;
297 }
298
299 tracing_max_latency = save_max;
300
301 return ret;
302}
303#endif /* CONFIG_IRQSOFF_TRACER */
304
305#ifdef CONFIG_PREEMPT_TRACER
306int
307trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
308{
309 unsigned long save_max = tracing_max_latency;
310 unsigned long count;
311 int ret;
312
313 /* start the tracing */
314 tr->ctrl = 1;
315 trace->init(tr);
316 /* reset the max latency */
317 tracing_max_latency = 0;
318 /* disable preemption for a bit */
319 preempt_disable();
320 udelay(100);
321 preempt_enable();
322 /* stop the tracing. */
323 tr->ctrl = 0;
324 trace->ctrl_update(tr);
325 /* check both trace buffers */
326 ret = trace_test_buffer(tr, NULL);
327 if (!ret)
328 ret = trace_test_buffer(&max_tr, &count);
329 trace->reset(tr);
330
331 if (!ret && !count) {
332 printk(KERN_CONT ".. no entries found ..");
333 ret = -1;
334 }
335
336 tracing_max_latency = save_max;
337
338 return ret;
339}
340#endif /* CONFIG_PREEMPT_TRACER */
341
342#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
343int
344trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
345{
346 unsigned long save_max = tracing_max_latency;
347 unsigned long count;
348 int ret;
349
350 /* start the tracing */
351 tr->ctrl = 1;
352 trace->init(tr);
353
354 /* reset the max latency */
355 tracing_max_latency = 0;
356
357 /* disable preemption and interrupts for a bit */
358 preempt_disable();
359 local_irq_disable();
360 udelay(100);
361 preempt_enable();
362 /* reverse the order of preempt vs irqs */
363 local_irq_enable();
364
365 /* stop the tracing. */
366 tr->ctrl = 0;
367 trace->ctrl_update(tr);
368 /* check both trace buffers */
369 ret = trace_test_buffer(tr, NULL);
370 if (ret)
371 goto out;
372
373 ret = trace_test_buffer(&max_tr, &count);
374 if (ret)
375 goto out;
376
377 if (!ret && !count) {
378 printk(KERN_CONT ".. no entries found ..");
379 ret = -1;
380 goto out;
381 }
382
383 /* do the test by disabling interrupts first this time */
384 tracing_max_latency = 0;
385 tr->ctrl = 1;
386 trace->ctrl_update(tr);
387 preempt_disable();
388 local_irq_disable();
389 udelay(100);
390 preempt_enable();
391 /* reverse the order of preempt vs irqs */
392 local_irq_enable();
393
394 /* stop the tracing. */
395 tr->ctrl = 0;
396 trace->ctrl_update(tr);
397 /* check both trace buffers */
398 ret = trace_test_buffer(tr, NULL);
399 if (ret)
400 goto out;
401
402 ret = trace_test_buffer(&max_tr, &count);
403
404 if (!ret && !count) {
405 printk(KERN_CONT ".. no entries found ..");
406 ret = -1;
407 goto out;
408 }
409
410 out:
411 trace->reset(tr);
412 tracing_max_latency = save_max;
413
414 return ret;
415}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417
418#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data)
420{
421 /* Make this a RT thread, doesn't need to be too high */
422 struct sched_param param = { .sched_priority = 5 };
423 struct completion *x = data;
424
425 sched_setscheduler(current, SCHED_FIFO, &param);
426
427 /* Make it know we have a new prio */
428 complete(x);
429
430 /* now go to sleep and let the test wake us up */
431 set_current_state(TASK_INTERRUPTIBLE);
432 schedule();
433
434 /* we are awake, now wait to disappear */
435 while (!kthread_should_stop()) {
436 /*
437 * This is an RT task, do short sleeps to let
438 * others run.
439 */
440 msleep(100);
441 }
442
443 return 0;
444}
445
446int
447trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448{
449 unsigned long save_max = tracing_max_latency;
450 struct task_struct *p;
451 struct completion isrt;
452 unsigned long count;
453 int ret;
454
455 init_completion(&isrt);
456
457 /* create a high prio thread */
458 p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
459 if (IS_ERR(p)) {
460 printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
461 return -1;
462 }
463
464 /* make sure the thread is running at an RT prio */
465 wait_for_completion(&isrt);
466
467 /* start the tracing */
468 tr->ctrl = 1;
469 trace->init(tr);
470 /* reset the max latency */
471 tracing_max_latency = 0;
472
473 /* sleep to let the RT thread sleep too */
474 msleep(100);
475
476 /*
477 * Yes this is slightly racy. It is possible that for some
478 * strange reason that the RT thread we created, did not
479 * call schedule for 100ms after doing the completion,
480 * and we do a wakeup on a task that already is awake.
481 * But that is extremely unlikely, and the worst thing that
482 * happens in such a case, is that we disable tracing.
483 * Honestly, if this race does happen something is horrible
484 * wrong with the system.
485 */
486
487 wake_up_process(p);
488
489 /* stop the tracing. */
490 tr->ctrl = 0;
491 trace->ctrl_update(tr);
492 /* check both trace buffers */
493 ret = trace_test_buffer(tr, NULL);
494 if (!ret)
495 ret = trace_test_buffer(&max_tr, &count);
496
497
498 trace->reset(tr);
499
500 tracing_max_latency = save_max;
501
502 /* kill the thread */
503 kthread_stop(p);
504
505 if (!ret && !count) {
506 printk(KERN_CONT ".. no entries found ..");
507 ret = -1;
508 }
509
510 return ret;
511}
512#endif /* CONFIG_SCHED_TRACER */
513
514#ifdef CONFIG_CONTEXT_SWITCH_TRACER
515int
516trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
517{
518 unsigned long count;
519 int ret;
520
521 /* start the tracing */
522 tr->ctrl = 1;
523 trace->init(tr);
524 /* Sleep for a 1/10 of a second */
525 msleep(100);
526 /* stop the tracing. */
527 tr->ctrl = 0;
528 trace->ctrl_update(tr);
529 /* check the trace buffer */
530 ret = trace_test_buffer(tr, &count);
531 trace->reset(tr);
532
533 if (!ret && !count) {
534 printk(KERN_CONT ".. no entries found ..");
535 ret = -1;
536 }
537
538 return ret;
539}
540#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
541
542#ifdef CONFIG_SYSPROF_TRACER
543int
544trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
545{
546 unsigned long count;
547 int ret;
548
549 /* start the tracing */
550 tr->ctrl = 1;
551 trace->init(tr);
552 /* Sleep for a 1/10 of a second */
553 msleep(100);
554 /* stop the tracing. */
555 tr->ctrl = 0;
556 trace->ctrl_update(tr);
557 /* check the trace buffer */
558 ret = trace_test_buffer(tr, &count);
559 trace->reset(tr);
560
561 return ret;
562}
563#endif /* CONFIG_SYSPROF_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 000000000000..54dd77cce5bf
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
1#include "trace.h"
2
3int DYN_FTRACE_TEST_NAME(void)
4{
5 /* used to call mcount */
6 return 0;
7}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
new file mode 100644
index 000000000000..2301e1e7c606
--- /dev/null
+++ b/kernel/trace/trace_sysprof.c
@@ -0,0 +1,363 @@
1/*
2 * trace stack traces
3 *
4 * Copyright (C) 2004-2008, Soeren Sandmann
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/kallsyms.h>
9#include <linux/debugfs.h>
10#include <linux/hrtimer.h>
11#include <linux/uaccess.h>
12#include <linux/ftrace.h>
13#include <linux/module.h>
14#include <linux/irq.h>
15#include <linux/fs.h>
16
17#include <asm/stacktrace.h>
18
19#include "trace.h"
20
21static struct trace_array *sysprof_trace;
22static int __read_mostly tracer_enabled;
23
24/*
25 * 1 msec sample interval by default:
26 */
27static unsigned long sample_period = 1000000;
28static const unsigned int sample_max_depth = 512;
29
30static DEFINE_MUTEX(sample_timer_lock);
31/*
32 * Per CPU hrtimers that do the profiling:
33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36struct stack_frame {
37 const void __user *next_fp;
38 unsigned long return_address;
39};
40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
42{
43 int ret;
44
45 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
46 return 0;
47
48 ret = 1;
49 pagefault_disable();
50 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
51 ret = 0;
52 pagefault_enable();
53
54 return ret;
55}
56
57struct backtrace_info {
58 struct trace_array_cpu *data;
59 struct trace_array *tr;
60 int pos;
61};
62
63static void
64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
65{
66 /* Ignore warnings */
67}
68
69static void backtrace_warning(void *data, char *msg)
70{
71 /* Ignore warnings */
72}
73
74static int backtrace_stack(void *data, char *name)
75{
76 /* Don't bother with IRQ stacks for now */
77 return -1;
78}
79
80static void backtrace_address(void *data, unsigned long addr, int reliable)
81{
82 struct backtrace_info *info = data;
83
84 if (info->pos < sample_max_depth && reliable) {
85 __trace_special(info->tr, info->data, 1, addr, 0);
86
87 info->pos++;
88 }
89}
90
91const static struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack,
95 .address = backtrace_address,
96};
97
98static int
99trace_kernel(struct pt_regs *regs, struct trace_array *tr,
100 struct trace_array_cpu *data)
101{
102 struct backtrace_info info;
103 unsigned long bp;
104 char *stack;
105
106 info.tr = tr;
107 info.data = data;
108 info.pos = 1;
109
110 __trace_special(info.tr, info.data, 1, regs->ip, 0);
111
112 stack = ((char *)regs + sizeof(struct pt_regs));
113#ifdef CONFIG_FRAME_POINTER
114 bp = regs->bp;
115#else
116 bp = 0;
117#endif
118
119 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
120
121 return info.pos;
122}
123
124static void timer_notify(struct pt_regs *regs, int cpu)
125{
126 struct trace_array_cpu *data;
127 struct stack_frame frame;
128 struct trace_array *tr;
129 const void __user *fp;
130 int is_user;
131 int i;
132
133 if (!regs)
134 return;
135
136 tr = sysprof_trace;
137 data = tr->data[cpu];
138 is_user = user_mode(regs);
139
140 if (!current || current->pid == 0)
141 return;
142
143 if (is_user && current->state != TASK_RUNNING)
144 return;
145
146 __trace_special(tr, data, 0, 0, current->pid);
147
148 if (!is_user)
149 i = trace_kernel(regs, tr, data);
150 else
151 i = 0;
152
153 /*
154 * Trace user stack if we are not a kernel thread
155 */
156 if (current->mm && i < sample_max_depth) {
157 regs = (struct pt_regs *)current->thread.sp0 - 1;
158
159 fp = (void __user *)regs->bp;
160
161 __trace_special(tr, data, 2, regs->ip, 0);
162
163 while (i < sample_max_depth) {
164 frame.next_fp = 0;
165 frame.return_address = 0;
166 if (!copy_stack_frame(fp, &frame))
167 break;
168 if ((unsigned long)fp < regs->sp)
169 break;
170
171 __trace_special(tr, data, 2, frame.return_address,
172 (unsigned long)fp);
173 fp = frame.next_fp;
174
175 i++;
176 }
177
178 }
179
180 /*
181 * Special trace entry if we overflow the max depth:
182 */
183 if (i == sample_max_depth)
184 __trace_special(tr, data, -1, -1, -1);
185
186 __trace_special(tr, data, 3, current->pid, i);
187}
188
189static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
190{
191 /* trace here */
192 timer_notify(get_irq_regs(), smp_processor_id());
193
194 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
195
196 return HRTIMER_RESTART;
197}
198
199static void start_stack_timer(int cpu)
200{
201 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208}
209
210static void start_stack_timers(void)
211{
212 cpumask_t saved_mask = current->cpus_allowed;
213 int cpu;
214
215 for_each_online_cpu(cpu) {
216 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
217 start_stack_timer(cpu);
218 }
219 set_cpus_allowed_ptr(current, &saved_mask);
220}
221
222static void stop_stack_timer(int cpu)
223{
224 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
225
226 hrtimer_cancel(hrtimer);
227}
228
229static void stop_stack_timers(void)
230{
231 int cpu;
232
233 for_each_online_cpu(cpu)
234 stop_stack_timer(cpu);
235}
236
237static void stack_reset(struct trace_array *tr)
238{
239 int cpu;
240
241 tr->time_start = ftrace_now(tr->cpu);
242
243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]);
245}
246
247static void start_stack_trace(struct trace_array *tr)
248{
249 mutex_lock(&sample_timer_lock);
250 stack_reset(tr);
251 start_stack_timers();
252 tracer_enabled = 1;
253 mutex_unlock(&sample_timer_lock);
254}
255
256static void stop_stack_trace(struct trace_array *tr)
257{
258 mutex_lock(&sample_timer_lock);
259 stop_stack_timers();
260 tracer_enabled = 0;
261 mutex_unlock(&sample_timer_lock);
262}
263
264static void stack_trace_init(struct trace_array *tr)
265{
266 sysprof_trace = tr;
267
268 if (tr->ctrl)
269 start_stack_trace(tr);
270}
271
272static void stack_trace_reset(struct trace_array *tr)
273{
274 if (tr->ctrl)
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285}
286
287static struct tracer stack_trace __read_mostly =
288{
289 .name = "sysprof",
290 .init = stack_trace_init,
291 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof,
295#endif
296};
297
298__init static int init_stack_trace(void)
299{
300 return register_tracer(&stack_trace);
301}
302device_initcall(init_stack_trace);
303
304#define MAX_LONG_DIGITS 22
305
306static ssize_t
307sysprof_sample_read(struct file *filp, char __user *ubuf,
308 size_t cnt, loff_t *ppos)
309{
310 char buf[MAX_LONG_DIGITS];
311 int r;
312
313 r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
314
315 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
316}
317
318static ssize_t
319sysprof_sample_write(struct file *filp, const char __user *ubuf,
320 size_t cnt, loff_t *ppos)
321{
322 char buf[MAX_LONG_DIGITS];
323 unsigned long val;
324
325 if (cnt > MAX_LONG_DIGITS-1)
326 cnt = MAX_LONG_DIGITS-1;
327
328 if (copy_from_user(&buf, ubuf, cnt))
329 return -EFAULT;
330
331 buf[cnt] = 0;
332
333 val = simple_strtoul(buf, NULL, 10);
334 /*
335 * Enforce a minimum sample period of 100 usecs:
336 */
337 if (val < 100)
338 val = 100;
339
340 mutex_lock(&sample_timer_lock);
341 stop_stack_timers();
342 sample_period = val * 1000;
343 start_stack_timers();
344 mutex_unlock(&sample_timer_lock);
345
346 return cnt;
347}
348
349static struct file_operations sysprof_sample_fops = {
350 .read = sysprof_sample_read,
351 .write = sysprof_sample_write,
352};
353
354void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
355{
356 struct dentry *entry;
357
358 entry = debugfs_create_file("sysprof_sample_period", 0644,
359 d_tracer, NULL, &sysprof_sample_fops);
360 if (entry)
361 return;
362 pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
363}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..d8b6279a9b42 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -634,6 +634,8 @@ config LATENCYTOP
634 Enable this option if you want to use the LatencyTOP tool 634 Enable this option if you want to use the LatencyTOP tool
635 to find out which userspace is blocking on what kernel operations. 635 to find out which userspace is blocking on what kernel operations.
636 636
637source kernel/trace/Kconfig
638
637config PROVIDE_OHCI1394_DMA_INIT 639config PROVIDE_OHCI1394_DMA_INIT
638 bool "Remote debugging over FireWire early on boot" 640 bool "Remote debugging over FireWire early on boot"
639 depends on PCI && X86 641 depends on PCI && X86
diff --git a/lib/Makefile b/lib/Makefile
index 74b0cfb1fcc3..4b836a53c08f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,6 +8,15 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
9 proportions.o prio_heap.o ratelimit.o 9 proportions.o prio_heap.o ratelimit.o
10 10
11ifdef CONFIG_FTRACE
12# Do not profile string.o, since it may be used in early boot or vdso
13CFLAGS_REMOVE_string.o = -pg
14# Also do not profile any debug utilities
15CFLAGS_REMOVE_spinlock_debug.o = -pg
16CFLAGS_REMOVE_list_debug.o = -pg
17CFLAGS_REMOVE_debugobjects.o = -pg
18endif
19
11lib-$(CONFIG_MMU) += ioremap.o 20lib-$(CONFIG_MMU) += ioremap.o
12lib-$(CONFIG_SMP) += cpumask.o 21lib-$(CONFIG_SMP) += cpumask.o
13 22
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 6c90fb90e19c..3b4dc098181e 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,7 @@
7#include <linux/kallsyms.h> 7#include <linux/kallsyms.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9 9
10unsigned int debug_smp_processor_id(void) 10notrace unsigned int debug_smp_processor_id(void)
11{ 11{
12 unsigned long preempt_count = preempt_count(); 12 unsigned long preempt_count = preempt_count();
13 int this_cpu = raw_smp_processor_id(); 13 int this_cpu = raw_smp_processor_id();
@@ -37,7 +37,7 @@ unsigned int debug_smp_processor_id(void)
37 /* 37 /*
38 * Avoid recursion: 38 * Avoid recursion:
39 */ 39 */
40 preempt_disable(); 40 preempt_disable_notrace();
41 41
42 if (!printk_ratelimit()) 42 if (!printk_ratelimit())
43 goto out_enable; 43 goto out_enable;
@@ -49,7 +49,7 @@ unsigned int debug_smp_processor_id(void)
49 dump_stack(); 49 dump_stack();
50 50
51out_enable: 51out_enable:
52 preempt_enable_no_resched(); 52 preempt_enable_no_resched_notrace();
53out: 53out:
54 return this_cpu; 54 return this_cpu;
55} 55}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 789b6adbef37..b38f700825fc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages);
126static struct prop_descriptor vm_completions; 126static struct prop_descriptor vm_completions;
127static struct prop_descriptor vm_dirties; 127static struct prop_descriptor vm_dirties;
128 128
129static unsigned long determine_dirtyable_memory(void);
130
131/* 129/*
132 * couple the period to the dirty_ratio: 130 * couple the period to the dirty_ratio:
133 * 131 *
@@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
347#endif 345#endif
348} 346}
349 347
350static unsigned long determine_dirtyable_memory(void) 348/**
349 * determine_dirtyable_memory - amount of memory that may be used
350 *
351 * Returns the numebr of pages that can currently be freed and used
352 * by the kernel for direct mappings.
353 */
354unsigned long determine_dirtyable_memory(void)
351{ 355{
352 unsigned long x; 356 unsigned long x;
353 357
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 8e440233c27d..ea48b82a3707 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -96,7 +96,8 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
96modname_flags = $(if $(filter 1,$(words $(modname))),\ 96modname_flags = $(if $(filter 1,$(words $(modname))),\
97 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") 97 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
98 98
99_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o) 99orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
100_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
100_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o) 101_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o)
101_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F)) 102_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F))
102 103