aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/filesystems/ext2.txt10
-rw-r--r--Documentation/filesystems/ext3.txt4
-rw-r--r--Documentation/ftrace.txt1134
-rw-r--r--Documentation/hwmon/lm9010
-rw-r--r--Documentation/kernel-parameters.txt12
-rw-r--r--Documentation/lockdep-design.txt30
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/tracepoints.txt8
-rw-r--r--Documentation/vm/kmemtrace.txt126
-rw-r--r--MAINTAINERS10
-rw-r--r--Makefile18
-rw-r--r--arch/Kconfig1
-rw-r--r--arch/alpha/include/asm/ftrace.h1
-rw-r--r--arch/alpha/include/asm/hardirq.h13
-rw-r--r--arch/avr32/include/asm/ftrace.h1
-rw-r--r--arch/avr32/include/asm/hardirq.h11
-rw-r--r--arch/blackfin/include/asm/ftrace.h1
-rw-r--r--arch/cris/include/asm/ftrace.h1
-rw-r--r--arch/h8300/include/asm/ftrace.h1
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/ftrace.h28
-rw-r--r--arch/ia64/include/asm/hardirq.h10
-rw-r--r--arch/ia64/kernel/Makefile5
-rw-r--r--arch/ia64/kernel/entry.S100
-rw-r--r--arch/ia64/kernel/ftrace.c206
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c6
-rw-r--r--arch/m68k/include/asm/ftrace.h1
-rw-r--r--arch/mips/configs/ip27_defconfig2
-rw-r--r--arch/mips/configs/jmr3927_defconfig265
-rw-r--r--arch/mips/configs/rbtx49xx_defconfig319
-rw-r--r--arch/mips/include/asm/ftrace.h1
-rw-r--r--arch/mips/include/asm/hazards.h3
-rw-r--r--arch/mips/include/asm/prefetch.h2
-rw-r--r--arch/mips/kernel/cpu-probe.c1
-rw-r--r--arch/mips/kernel/linux32.c2
-rw-r--r--arch/mips/mm/page.c3
-rw-r--r--arch/mips/mm/tlbex.c1
-rw-r--r--arch/parisc/include/asm/ftrace.h1
-rw-r--r--arch/sparc/kernel/irq_64.c29
-rw-r--r--arch/um/include/asm/ftrace.h1
-rw-r--r--arch/um/kernel/ptrace.c5
-rw-r--r--arch/um/os-Linux/user_syms.c5
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/cacheflush.h5
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/ftrace.h32
-rw-r--r--arch/x86/include/asm/ptrace-abi.h3
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/alternative.c29
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c8
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/ftrace.c206
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/mm/init_32.c35
-rw-r--r--arch/x86/mm/init_64.c37
-rw-r--r--arch/xtensa/include/asm/ftrace.h1
-rw-r--r--block/Kconfig16
-rw-r--r--block/Makefile1
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/hwmon/Kconfig4
-rw-r--r--drivers/hwmon/abituguru3.c7
-rw-r--r--drivers/hwmon/f75375s.c2
-rw-r--r--drivers/hwmon/it87.c8
-rw-r--r--drivers/hwmon/lm90.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c39
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h1
-rw-r--r--drivers/mfd/wm8350-core.c5
-rw-r--r--drivers/mmc/host/s3cmci.c3
-rw-r--r--drivers/net/sunhme.c2
-rw-r--r--drivers/oprofile/cpu_buffer.c5
-rw-r--r--drivers/pci/hotplug/Kconfig2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c3
-rw-r--r--drivers/pci/pcie/portdrv_pci.c1
-rw-r--r--drivers/pci/quirks.c31
-rw-r--r--drivers/platform/x86/acer-wmi.c2
-rw-r--r--drivers/power/ds2760_battery.c11
-rw-r--r--drivers/sbus/char/bbc_i2c.c2
-rw-r--r--drivers/sbus/char/jsflash.c3
-rw-r--r--drivers/w1/masters/w1-gpio.c2
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/inode.c7
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/squashfs/block.c21
-rw-r--r--fs/super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c10
-rw-r--r--fs/xfs/xfs_iget.c15
-rw-r--r--fs/xfs/xfs_log_recover.c17
-rw-r--r--include/asm-frv/ftrace.h1
-rw-r--r--include/asm-generic/vmlinux.lds.h29
-rw-r--r--include/asm-m32r/ftrace.h1
-rw-r--r--include/asm-mn10300/ftrace.h1
-rw-r--r--include/linux/blktrace_api.h5
-rw-r--r--include/linux/ftrace.h252
-rw-r--r--include/linux/ftrace_irq.h2
-rw-r--r--include/linux/hardirq.h73
-rw-r--r--include/linux/irqflags.h8
-rw-r--r--include/linux/kernel.h136
-rw-r--r--include/linux/lockdep.h50
-rw-r--r--include/linux/memory.h6
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/mutex.h5
-rw-r--r--include/linux/ring_buffer.h20
-rw-r--r--include/linux/sched.h18
-rw-r--r--include/linux/slab_def.h68
-rw-r--r--include/linux/slob_def.h9
-rw-r--r--include/linux/slub_def.h72
-rw-r--r--include/linux/string.h7
-rw-r--r--include/linux/syscalls.h60
-rw-r--r--include/linux/timer.h93
-rw-r--r--include/linux/trace_clock.h19
-rw-r--r--include/linux/tracepoint.h116
-rw-r--r--include/trace/block.h70
-rw-r--r--include/trace/irq.h9
-rw-r--r--include/trace/irq_event_types.h43
-rw-r--r--include/trace/kmemtrace.h75
-rw-r--r--include/trace/lockdep.h9
-rw-r--r--include/trace/lockdep_event_types.h44
-rw-r--r--include/trace/power.h32
-rw-r--r--include/trace/sched.h49
-rw-r--r--include/trace/sched_event_types.h337
-rw-r--r--include/trace/trace_event_types.h5
-rw-r--r--include/trace/trace_events.h5
-rw-r--r--include/trace/workqueue.h25
-rw-r--r--init/Kconfig2
-rw-r--r--init/main.c2
-rw-r--r--kernel/extable.c4
-rw-r--r--kernel/irq/handle.c6
-rw-r--r--kernel/kprobes.c15
-rw-r--r--kernel/lockdep.c538
-rw-r--r--kernel/lockdep_internals.h45
-rw-r--r--kernel/lockdep_proc.c22
-rw-r--r--kernel/lockdep_states.h9
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/mutex-debug.c9
-rw-r--r--kernel/mutex-debug.h18
-rw-r--r--kernel/mutex.c121
-rw-r--r--kernel/mutex.h22
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched.c79
-rw-r--r--kernel/sched_clock.c53
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/softirq.c17
-rw-r--r--kernel/timer.c68
-rw-r--r--kernel/trace/Kconfig118
-rw-r--r--kernel/trace/Makefile11
-rw-r--r--kernel/trace/blktrace.c (renamed from block/blktrace.c)719
-rw-r--r--kernel/trace/events.c15
-rw-r--r--kernel/trace/ftrace.c1029
-rw-r--r--kernel/trace/kmemtrace.c339
-rw-r--r--kernel/trace/ring_buffer.c281
-rw-r--r--kernel/trace/trace.c2639
-rw-r--r--kernel/trace/trace.h208
-rw-r--r--kernel/trace/trace_boot.c36
-rw-r--r--kernel/trace/trace_branch.c278
-rw-r--r--kernel/trace/trace_clock.c108
-rw-r--r--kernel/trace/trace_event_types.h166
-rw-r--r--kernel/trace/trace_events.c606
-rw-r--r--kernel/trace/trace_events_stage_1.h39
-rw-r--r--kernel/trace/trace_events_stage_2.h131
-rw-r--r--kernel/trace/trace_events_stage_3.h217
-rw-r--r--kernel/trace/trace_export.c102
-rw-r--r--kernel/trace/trace_functions.c369
-rw-r--r--kernel/trace/trace_functions_graph.c486
-rw-r--r--kernel/trace/trace_hw_branches.c185
-rw-r--r--kernel/trace/trace_irqsoff.c54
-rw-r--r--kernel/trace/trace_mmiotrace.c46
-rw-r--r--kernel/trace/trace_nop.c5
-rw-r--r--kernel/trace/trace_output.c916
-rw-r--r--kernel/trace/trace_output.h63
-rw-r--r--kernel/trace/trace_power.c188
-rw-r--r--kernel/trace/trace_printk.c138
-rw-r--r--kernel/trace/trace_sched_switch.c15
-rw-r--r--kernel/trace/trace_sched_wakeup.c96
-rw-r--r--kernel/trace/trace_selftest.c91
-rw-r--r--kernel/trace/trace_stat.c319
-rw-r--r--kernel/trace/trace_stat.h31
-rw-r--r--kernel/trace/trace_syscalls.c243
-rw-r--r--kernel/trace/trace_sysprof.c23
-rw-r--r--kernel/trace/trace_workqueue.c280
-rw-r--r--kernel/workqueue.c16
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/locking-selftest.c4
-rw-r--r--lib/vsprintf.c1005
-rw-r--r--mm/memory.c10
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/slab.c75
-rw-r--r--mm/slob.c39
-rw-r--r--mm/slub.c98
-rw-r--r--mm/vmscan.c4
-rw-r--r--samples/tracepoints/tp-samples-trace.h8
-rw-r--r--scripts/Makefile.build13
-rw-r--r--scripts/package/Makefile3
-rwxr-xr-xscripts/package/mkspec2
-rwxr-xr-xscripts/recordmcount.pl37
-rw-r--r--scripts/unifdef.c6
206 files changed, 13811 insertions, 4042 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 000000000000..5e6a92a02d85
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
index 4333e836c495..23448551cabe 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.txt
@@ -373,10 +373,10 @@ Filesystem Resizing http://ext2resize.sourceforge.net/
373Compression (*) http://e2compr.sourceforge.net/ 373Compression (*) http://e2compr.sourceforge.net/
374 374
375Implementations for: 375Implementations for:
376Windows 95/98/NT/2000 http://uranus.it.swin.edu.au/~jn/linux/Explore2fs.htm 376Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs
377Windows 95 (*) http://www.yipton.demon.co.uk/content.html#FSDEXT2 377Windows 95 (*) http://www.yipton.net/content.html#FSDEXT2
378DOS client (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/ 378DOS client (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
379OS/2 http://perso.wanadoo.fr/matthieu.willm/ext2-os2/ 379OS/2 (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
380RISC OS client ftp://ftp.barnet.ac.uk/pub/acorn/armlinux/iscafs/ 380RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/
381 381
382(*) no longer actively developed/supported (as of Apr 2001) 382(*) no longer actively developed/supported (as of Mar 2009)
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 9dd2a3bb2acc..e5f3833a6ef8 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -198,5 +198,5 @@ kernel source: <file:fs/ext3/>
198programs: http://e2fsprogs.sourceforge.net/ 198programs: http://e2fsprogs.sourceforge.net/
199 http://ext2resize.sourceforge.net 199 http://ext2resize.sourceforge.net
200 200
201useful links: http://www-106.ibm.com/developerworks/linux/library/l-fs7/ 201useful links: http://www.ibm.com/developerworks/library/l-fs7.html
202 http://www-106.ibm.com/developerworks/linux/library/l-fs8/ 202 http://www.ibm.com/developerworks/library/l-fs8.html
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 803b1318b13d..fd9a3e693813 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -15,31 +15,31 @@ Introduction
15 15
16Ftrace is an internal tracer designed to help out developers and 16Ftrace is an internal tracer designed to help out developers and
17designers of systems to find what is going on inside the kernel. 17designers of systems to find what is going on inside the kernel.
18It can be used for debugging or analyzing latencies and performance 18It can be used for debugging or analyzing latencies and
19issues that take place outside of user-space. 19performance issues that take place outside of user-space.
20 20
21Although ftrace is the function tracer, it also includes an 21Although ftrace is the function tracer, it also includes an
22infrastructure that allows for other types of tracing. Some of the 22infrastructure that allows for other types of tracing. Some of
23tracers that are currently in ftrace include a tracer to trace 23the tracers that are currently in ftrace include a tracer to
24context switches, the time it takes for a high priority task to 24trace context switches, the time it takes for a high priority
25run after it was woken up, the time interrupts are disabled, and 25task to run after it was woken up, the time interrupts are
26more (ftrace allows for tracer plugins, which means that the list of 26disabled, and more (ftrace allows for tracer plugins, which
27tracers can always grow). 27means that the list of tracers can always grow).
28 28
29 29
30The File System 30The File System
31--------------- 31---------------
32 32
33Ftrace uses the debugfs file system to hold the control files as well 33Ftrace uses the debugfs file system to hold the control files as
34as the files to display output. 34well as the files to display output.
35 35
36To mount the debugfs system: 36To mount the debugfs system:
37 37
38 # mkdir /debug 38 # mkdir /debug
39 # mount -t debugfs nodev /debug 39 # mount -t debugfs nodev /debug
40 40
41(Note: it is more common to mount at /sys/kernel/debug, but for simplicity 41( Note: it is more common to mount at /sys/kernel/debug, but for
42 this document will use /debug) 42 simplicity this document will use /debug)
43 43
44That's it! (assuming that you have ftrace configured into your kernel) 44That's it! (assuming that you have ftrace configured into your kernel)
45 45
@@ -50,90 +50,124 @@ of ftrace. Here is a list of some of the key files:
50 50
51 Note: all time values are in microseconds. 51 Note: all time values are in microseconds.
52 52
53 current_tracer: This is used to set or display the current tracer 53 current_tracer:
54 that is configured. 54
55 55 This is used to set or display the current tracer
56 available_tracers: This holds the different types of tracers that 56 that is configured.
57 have been compiled into the kernel. The tracers 57
58 listed here can be configured by echoing their name 58 available_tracers:
59 into current_tracer. 59
60 60 This holds the different types of tracers that
61 tracing_enabled: This sets or displays whether the current_tracer 61 have been compiled into the kernel. The
62 is activated and tracing or not. Echo 0 into this 62 tracers listed here can be configured by
63 file to disable the tracer or 1 to enable it. 63 echoing their name into current_tracer.
64 64
65 trace: This file holds the output of the trace in a human readable 65 tracing_enabled:
66 format (described below). 66
67 67 This sets or displays whether the current_tracer
68 latency_trace: This file shows the same trace but the information 68 is activated and tracing or not. Echo 0 into this
69 is organized more to display possible latencies 69 file to disable the tracer or 1 to enable it.
70 in the system (described below). 70
71 71 trace:
72 trace_pipe: The output is the same as the "trace" file but this 72
73 file is meant to be streamed with live tracing. 73 This file holds the output of the trace in a human
74 Reads from this file will block until new data 74 readable format (described below).
75 is retrieved. Unlike the "trace" and "latency_trace" 75
76 files, this file is a consumer. This means reading 76 latency_trace:
77 from this file causes sequential reads to display 77
78 more current data. Once data is read from this 78 This file shows the same trace but the information
79 file, it is consumed, and will not be read 79 is organized more to display possible latencies
80 again with a sequential read. The "trace" and 80 in the system (described below).
81 "latency_trace" files are static, and if the 81
82 tracer is not adding more data, they will display 82 trace_pipe:
83 the same information every time they are read. 83
84 84 The output is the same as the "trace" file but this
85 trace_options: This file lets the user control the amount of data 85 file is meant to be streamed with live tracing.
86 that is displayed in one of the above output 86 Reads from this file will block until new data
87 files. 87 is retrieved. Unlike the "trace" and "latency_trace"
88 88 files, this file is a consumer. This means reading
89 trace_max_latency: Some of the tracers record the max latency. 89 from this file causes sequential reads to display
90 For example, the time interrupts are disabled. 90 more current data. Once data is read from this
91 This time is saved in this file. The max trace 91 file, it is consumed, and will not be read
92 will also be stored, and displayed by either 92 again with a sequential read. The "trace" and
93 "trace" or "latency_trace". A new max trace will 93 "latency_trace" files are static, and if the
94 only be recorded if the latency is greater than 94 tracer is not adding more data, they will display
95 the value in this file. (in microseconds) 95 the same information every time they are read.
96 96
97 buffer_size_kb: This sets or displays the number of kilobytes each CPU 97 trace_options:
98 buffer can hold. The tracer buffers are the same size 98
99 for each CPU. The displayed number is the size of the 99 This file lets the user control the amount of data
100 CPU buffer and not total size of all buffers. The 100 that is displayed in one of the above output
101 trace buffers are allocated in pages (blocks of memory 101 files.
102 that the kernel uses for allocation, usually 4 KB in size). 102
103 If the last page allocated has room for more bytes 103 tracing_max_latency:
104 than requested, the rest of the page will be used, 104
105 making the actual allocation bigger than requested. 105 Some of the tracers record the max latency.
106 (Note, the size may not be a multiple of the page size due 106 For example, the time interrupts are disabled.
107 to buffer managment overhead.) 107 This time is saved in this file. The max trace
108 108 will also be stored, and displayed by either
109 This can only be updated when the current_tracer 109 "trace" or "latency_trace". A new max trace will
110 is set to "nop". 110 only be recorded if the latency is greater than
111 111 the value in this file. (in microseconds)
112 tracing_cpumask: This is a mask that lets the user only trace 112
113 on specified CPUS. The format is a hex string 113 buffer_size_kb:
114 representing the CPUS. 114
115 115 This sets or displays the number of kilobytes each CPU
116 set_ftrace_filter: When dynamic ftrace is configured in (see the 116 buffer can hold. The tracer buffers are the same size
117 section below "dynamic ftrace"), the code is dynamically 117 for each CPU. The displayed number is the size of the
118 modified (code text rewrite) to disable calling of the 118 CPU buffer and not total size of all buffers. The
119 function profiler (mcount). This lets tracing be configured 119 trace buffers are allocated in pages (blocks of memory
120 in with practically no overhead in performance. This also 120 that the kernel uses for allocation, usually 4 KB in size).
121 has a side effect of enabling or disabling specific functions 121 If the last page allocated has room for more bytes
122 to be traced. Echoing names of functions into this file 122 than requested, the rest of the page will be used,
123 will limit the trace to only those functions. 123 making the actual allocation bigger than requested.
124 124 ( Note, the size may not be a multiple of the page size
125 set_ftrace_notrace: This has an effect opposite to that of 125 due to buffer managment overhead. )
126 set_ftrace_filter. Any function that is added here will not 126
127 be traced. If a function exists in both set_ftrace_filter 127 This can only be updated when the current_tracer
128 and set_ftrace_notrace, the function will _not_ be traced. 128 is set to "nop".
129 129
130 set_ftrace_pid: Have the function tracer only trace a single thread. 130 tracing_cpumask:
131 131
132 available_filter_functions: This lists the functions that ftrace 132 This is a mask that lets the user only trace
133 has processed and can trace. These are the function 133 on specified CPUS. The format is a hex string
134 names that you can pass to "set_ftrace_filter" or 134 representing the CPUS.
135 "set_ftrace_notrace". (See the section "dynamic ftrace" 135
136 below for more details.) 136 set_ftrace_filter:
137
138 When dynamic ftrace is configured in (see the
139 section below "dynamic ftrace"), the code is dynamically
140 modified (code text rewrite) to disable calling of the
141 function profiler (mcount). This lets tracing be configured
142 in with practically no overhead in performance. This also
143 has a side effect of enabling or disabling specific functions
144 to be traced. Echoing names of functions into this file
145 will limit the trace to only those functions.
146
147 set_ftrace_notrace:
148
149 This has an effect opposite to that of
150 set_ftrace_filter. Any function that is added here will not
151 be traced. If a function exists in both set_ftrace_filter
152 and set_ftrace_notrace, the function will _not_ be traced.
153
154 set_ftrace_pid:
155
156 Have the function tracer only trace a single thread.
157
158 set_graph_function:
159
160 Set a "trigger" function where tracing should start
161 with the function graph tracer (See the section
162 "dynamic ftrace" for more details).
163
164 available_filter_functions:
165
166 This lists the functions that ftrace
167 has processed and can trace. These are the function
168 names that you can pass to "set_ftrace_filter" or
169 "set_ftrace_notrace". (See the section "dynamic ftrace"
170 below for more details.)
137 171
138 172
139The Tracers 173The Tracers
@@ -141,36 +175,66 @@ The Tracers
141 175
142Here is the list of current tracers that may be configured. 176Here is the list of current tracers that may be configured.
143 177
144 function - function tracer that uses mcount to trace all functions. 178 "function"
179
180 Function call tracer to trace all kernel functions.
181
182 "function_graph_tracer"
183
184 Similar to the function tracer except that the
185 function tracer probes the functions on their entry
186 whereas the function graph tracer traces on both entry
187 and exit of the functions. It then provides the ability
188 to draw a graph of function calls similar to C code
189 source.
145 190
146 sched_switch - traces the context switches between tasks. 191 "sched_switch"
147 192
148 irqsoff - traces the areas that disable interrupts and saves 193 Traces the context switches and wakeups between tasks.
149 the trace with the longest max latency.
150 See tracing_max_latency. When a new max is recorded,
151 it replaces the old trace. It is best to view this
152 trace via the latency_trace file.
153 194
154 preemptoff - Similar to irqsoff but traces and records the amount of 195 "irqsoff"
155 time for which preemption is disabled.
156 196
157 preemptirqsoff - Similar to irqsoff and preemptoff, but traces and 197 Traces the areas that disable interrupts and saves
158 records the largest time for which irqs and/or preemption 198 the trace with the longest max latency.
159 is disabled. 199 See tracing_max_latency. When a new max is recorded,
200 it replaces the old trace. It is best to view this
201 trace via the latency_trace file.
160 202
161 wakeup - Traces and records the max latency that it takes for 203 "preemptoff"
162 the highest priority task to get scheduled after
163 it has been woken up.
164 204
165 nop - This is not a tracer. To remove all tracers from tracing 205 Similar to irqsoff but traces and records the amount of
166 simply echo "nop" into current_tracer. 206 time for which preemption is disabled.
207
208 "preemptirqsoff"
209
210 Similar to irqsoff and preemptoff, but traces and
211 records the largest time for which irqs and/or preemption
212 is disabled.
213
214 "wakeup"
215
216 Traces and records the max latency that it takes for
217 the highest priority task to get scheduled after
218 it has been woken up.
219
220 "hw-branch-tracer"
221
222 Uses the BTS CPU feature on x86 CPUs to traces all
223 branches executed.
224
225 "nop"
226
227 This is the "trace nothing" tracer. To remove all
228 tracers from tracing simply echo "nop" into
229 current_tracer.
167 230
168 231
169Examples of using the tracer 232Examples of using the tracer
170---------------------------- 233----------------------------
171 234
172Here are typical examples of using the tracers when controlling them only 235Here are typical examples of using the tracers when controlling
173with the debugfs interface (without using any user-land utilities). 236them only with the debugfs interface (without using any
237user-land utilities).
174 238
175Output format: 239Output format:
176-------------- 240--------------
@@ -187,16 +251,16 @@ Here is an example of the output format of the file "trace"
187 bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput 251 bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput
188 -------- 252 --------
189 253
190A header is printed with the tracer name that is represented by the trace. 254A header is printed with the tracer name that is represented by
191In this case the tracer is "function". Then a header showing the format. Task 255the trace. In this case the tracer is "function". Then a header
192name "bash", the task PID "4251", the CPU that it was running on 256showing the format. Task name "bash", the task PID "4251", the
193"01", the timestamp in <secs>.<usecs> format, the function name that was 257CPU that it was running on "01", the timestamp in <secs>.<usecs>
194traced "path_put" and the parent function that called this function 258format, the function name that was traced "path_put" and the
195"path_walk". The timestamp is the time at which the function was 259parent function that called this function "path_walk". The
196entered. 260timestamp is the time at which the function was entered.
197 261
198The sched_switch tracer also includes tracing of task wakeups and 262The sched_switch tracer also includes tracing of task wakeups
199context switches. 263and context switches.
200 264
201 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S 265 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
202 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S 266 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
@@ -205,8 +269,8 @@ context switches.
205 kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R 269 kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
206 ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R 270 ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
207 271
208Wake ups are represented by a "+" and the context switches are shown as 272Wake ups are represented by a "+" and the context switches are
209"==>". The format is: 273shown as "==>". The format is:
210 274
211 Context switches: 275 Context switches:
212 276
@@ -220,19 +284,20 @@ Wake ups are represented by a "+" and the context switches are shown as
220 284
221 <pid>:<prio>:<state> + <pid>:<prio>:<state> 285 <pid>:<prio>:<state> + <pid>:<prio>:<state>
222 286
223The prio is the internal kernel priority, which is the inverse of the 287The prio is the internal kernel priority, which is the inverse
224priority that is usually displayed by user-space tools. Zero represents 288of the priority that is usually displayed by user-space tools.
225the highest priority (99). Prio 100 starts the "nice" priorities with 289Zero represents the highest priority (99). Prio 100 starts the
226100 being equal to nice -20 and 139 being nice 19. The prio "140" is 290"nice" priorities with 100 being equal to nice -20 and 139 being
227reserved for the idle task which is the lowest priority thread (pid 0). 291nice 19. The prio "140" is reserved for the idle task which is
292the lowest priority thread (pid 0).
228 293
229 294
230Latency trace format 295Latency trace format
231-------------------- 296--------------------
232 297
233For traces that display latency times, the latency_trace file gives 298For traces that display latency times, the latency_trace file
234somewhat more information to see why a latency happened. Here is a typical 299gives somewhat more information to see why a latency happened.
235trace. 300Here is a typical trace.
236 301
237# tracer: irqsoff 302# tracer: irqsoff
238# 303#
@@ -259,20 +324,20 @@ irqsoff latency trace v1.1.5 on 2.6.26-rc8
259 <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) 324 <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq)
260 325
261 326
327This shows that the current tracer is "irqsoff" tracing the time
328for which interrupts were disabled. It gives the trace version
329and the version of the kernel upon which this was executed on
330(2.6.26-rc8). Then it displays the max latency in microsecs (97
331us). The number of trace entries displayed and the total number
332recorded (both are three: #3/3). The type of preemption that was
333used (PREEMPT). VP, KP, SP, and HP are always zero and are
334reserved for later use. #P is the number of online CPUS (#P:2).
262 335
263This shows that the current tracer is "irqsoff" tracing the time for which 336The task is the process that was running when the latency
264interrupts were disabled. It gives the trace version and the version 337occurred. (swapper pid: 0).
265of the kernel upon which this was executed on (2.6.26-rc8). Then it displays
266the max latency in microsecs (97 us). The number of trace entries displayed
267and the total number recorded (both are three: #3/3). The type of
268preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero
269and are reserved for later use. #P is the number of online CPUS (#P:2).
270
271The task is the process that was running when the latency occurred.
272(swapper pid: 0).
273 338
274The start and stop (the functions in which the interrupts were disabled and 339The start and stop (the functions in which the interrupts were
275enabled respectively) that caused the latencies: 340disabled and enabled respectively) that caused the latencies:
276 341
277 apic_timer_interrupt is where the interrupts were disabled. 342 apic_timer_interrupt is where the interrupts were disabled.
278 do_softirq is where they were enabled again. 343 do_softirq is where they were enabled again.
@@ -308,12 +373,12 @@ The above is mostly meaningful for kernel developers.
308 latency_trace file is relative to the start of the trace. 373 latency_trace file is relative to the start of the trace.
309 374
310 delay: This is just to help catch your eye a bit better. And 375 delay: This is just to help catch your eye a bit better. And
311 needs to be fixed to be only relative to the same CPU. 376 needs to be fixed to be only relative to the same CPU.
312 The marks are determined by the difference between this 377 The marks are determined by the difference between this
313 current trace and the next trace. 378 current trace and the next trace.
314 '!' - greater than preempt_mark_thresh (default 100) 379 '!' - greater than preempt_mark_thresh (default 100)
315 '+' - greater than 1 microsecond 380 '+' - greater than 1 microsecond
316 ' ' - less than or equal to 1 microsecond. 381 ' ' - less than or equal to 1 microsecond.
317 382
318 The rest is the same as the 'trace' file. 383 The rest is the same as the 'trace' file.
319 384
@@ -321,14 +386,15 @@ The above is mostly meaningful for kernel developers.
321trace_options 386trace_options
322------------- 387-------------
323 388
324The trace_options file is used to control what gets printed in the trace 389The trace_options file is used to control what gets printed in
325output. To see what is available, simply cat the file: 390the trace output. To see what is available, simply cat the file:
326 391
327 cat /debug/tracing/trace_options 392 cat /debug/tracing/trace_options
328 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ 393 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
329 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj 394 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
330 395
331To disable one of the options, echo in the option prepended with "no". 396To disable one of the options, echo in the option prepended with
397"no".
332 398
333 echo noprint-parent > /debug/tracing/trace_options 399 echo noprint-parent > /debug/tracing/trace_options
334 400
@@ -338,8 +404,8 @@ To enable an option, leave off the "no".
338 404
339Here are the available options: 405Here are the available options:
340 406
341 print-parent - On function traces, display the calling function 407 print-parent - On function traces, display the calling (parent)
342 as well as the function being traced. 408 function as well as the function being traced.
343 409
344 print-parent: 410 print-parent:
345 bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul 411 bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul
@@ -348,15 +414,16 @@ Here are the available options:
348 bash-4000 [01] 1477.606694: simple_strtoul 414 bash-4000 [01] 1477.606694: simple_strtoul
349 415
350 416
351 sym-offset - Display not only the function name, but also the offset 417 sym-offset - Display not only the function name, but also the
352 in the function. For example, instead of seeing just 418 offset in the function. For example, instead of
353 "ktime_get", you will see "ktime_get+0xb/0x20". 419 seeing just "ktime_get", you will see
420 "ktime_get+0xb/0x20".
354 421
355 sym-offset: 422 sym-offset:
356 bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0 423 bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0
357 424
358 sym-addr - this will also display the function address as well as 425 sym-addr - this will also display the function address as well
359 the function name. 426 as the function name.
360 427
361 sym-addr: 428 sym-addr:
362 bash-4000 [01] 1477.606694: simple_strtoul <c0339346> 429 bash-4000 [01] 1477.606694: simple_strtoul <c0339346>
@@ -366,35 +433,41 @@ Here are the available options:
366 bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ 433 bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
367 (+0.000ms): simple_strtoul (strict_strtoul) 434 (+0.000ms): simple_strtoul (strict_strtoul)
368 435
369 raw - This will display raw numbers. This option is best for use with 436 raw - This will display raw numbers. This option is best for
370 user applications that can translate the raw numbers better than 437 use with user applications that can translate the raw
371 having it done in the kernel. 438 numbers better than having it done in the kernel.
372 439
373 hex - Similar to raw, but the numbers will be in a hexadecimal format. 440 hex - Similar to raw, but the numbers will be in a hexadecimal
441 format.
374 442
375 bin - This will print out the formats in raw binary. 443 bin - This will print out the formats in raw binary.
376 444
377 block - TBD (needs update) 445 block - TBD (needs update)
378 446
379 stacktrace - This is one of the options that changes the trace itself. 447 stacktrace - This is one of the options that changes the trace
380 When a trace is recorded, so is the stack of functions. 448 itself. When a trace is recorded, so is the stack
381 This allows for back traces of trace sites. 449 of functions. This allows for back traces of
450 trace sites.
382 451
383 userstacktrace - This option changes the trace. 452 userstacktrace - This option changes the trace. It records a
384 It records a stacktrace of the current userspace thread. 453 stacktrace of the current userspace thread.
385 454
386 sym-userobj - when user stacktrace are enabled, look up which object the 455 sym-userobj - when user stacktrace are enabled, look up which
387 address belongs to, and print a relative address 456 object the address belongs to, and print a
388 This is especially useful when ASLR is on, otherwise you don't 457 relative address. This is especially useful when
389 get a chance to resolve the address to object/file/line after the app is no 458 ASLR is on, otherwise you don't get a chance to
390 longer running 459 resolve the address to object/file/line after
460 the app is no longer running
391 461
392 The lookup is performed when you read trace,trace_pipe,latency_trace. Example: 462 The lookup is performed when you read
463 trace,trace_pipe,latency_trace. Example:
393 464
394 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 465 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
395x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] 466x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
396 467
397 sched-tree - TBD (any users??) 468 sched-tree - trace all tasks that are on the runqueue, at
469 every scheduling event. Will add overhead if
470 there's a lot of tasks running at once.
398 471
399 472
400sched_switch 473sched_switch
@@ -431,18 +504,19 @@ of how to use it.
431 [...] 504 [...]
432 505
433 506
434As we have discussed previously about this format, the header shows 507As we have discussed previously about this format, the header
435the name of the trace and points to the options. The "FUNCTION" 508shows the name of the trace and points to the options. The
436is a misnomer since here it represents the wake ups and context 509"FUNCTION" is a misnomer since here it represents the wake ups
437switches. 510and context switches.
438 511
439The sched_switch file only lists the wake ups (represented with '+') 512The sched_switch file only lists the wake ups (represented with
440and context switches ('==>') with the previous task or current task 513'+') and context switches ('==>') with the previous task or
441first followed by the next task or task waking up. The format for both 514current task first followed by the next task or task waking up.
442of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO 515The format for both of these is PID:KERNEL-PRIO:TASK-STATE.
443is the inverse of the actual priority with zero (0) being the highest 516Remember that the KERNEL-PRIO is the inverse of the actual
444priority and the nice values starting at 100 (nice -20). Below is 517priority with zero (0) being the highest priority and the nice
445a quick chart to map the kernel priority to user land priorities. 518values starting at 100 (nice -20). Below is a quick chart to map
519the kernel priority to user land priorities.
446 520
447 Kernel priority: 0 to 99 ==> user RT priority 99 to 0 521 Kernel priority: 0 to 99 ==> user RT priority 99 to 0
448 Kernel priority: 100 to 139 ==> user nice -20 to 19 522 Kernel priority: 100 to 139 ==> user nice -20 to 19
@@ -463,10 +537,10 @@ The task states are:
463ftrace_enabled 537ftrace_enabled
464-------------- 538--------------
465 539
466The following tracers (listed below) give different output depending 540The following tracers (listed below) give different output
467on whether or not the sysctl ftrace_enabled is set. To set ftrace_enabled, 541depending on whether or not the sysctl ftrace_enabled is set. To
468one can either use the sysctl function or set it via the proc 542set ftrace_enabled, one can either use the sysctl function or
469file system interface. 543set it via the proc file system interface.
470 544
471 sysctl kernel.ftrace_enabled=1 545 sysctl kernel.ftrace_enabled=1
472 546
@@ -474,12 +548,12 @@ file system interface.
474 548
475 echo 1 > /proc/sys/kernel/ftrace_enabled 549 echo 1 > /proc/sys/kernel/ftrace_enabled
476 550
477To disable ftrace_enabled simply replace the '1' with '0' in 551To disable ftrace_enabled simply replace the '1' with '0' in the
478the above commands. 552above commands.
479 553
480When ftrace_enabled is set the tracers will also record the functions 554When ftrace_enabled is set the tracers will also record the
481that are within the trace. The descriptions of the tracers 555functions that are within the trace. The descriptions of the
482will also show an example with ftrace enabled. 556tracers will also show an example with ftrace enabled.
483 557
484 558
485irqsoff 559irqsoff
@@ -487,17 +561,18 @@ irqsoff
487 561
488When interrupts are disabled, the CPU can not react to any other 562When interrupts are disabled, the CPU can not react to any other
489external event (besides NMIs and SMIs). This prevents the timer 563external event (besides NMIs and SMIs). This prevents the timer
490interrupt from triggering or the mouse interrupt from letting the 564interrupt from triggering or the mouse interrupt from letting
491kernel know of a new mouse event. The result is a latency with the 565the kernel know of a new mouse event. The result is a latency
492reaction time. 566with the reaction time.
493 567
494The irqsoff tracer tracks the time for which interrupts are disabled. 568The irqsoff tracer tracks the time for which interrupts are
495When a new maximum latency is hit, the tracer saves the trace leading up 569disabled. When a new maximum latency is hit, the tracer saves
496to that latency point so that every time a new maximum is reached, the old 570the trace leading up to that latency point so that every time a
497saved trace is discarded and the new trace is saved. 571new maximum is reached, the old saved trace is discarded and the
572new trace is saved.
498 573
499To reset the maximum, echo 0 into tracing_max_latency. Here is an 574To reset the maximum, echo 0 into tracing_max_latency. Here is
500example: 575an example:
501 576
502 # echo irqsoff > /debug/tracing/current_tracer 577 # echo irqsoff > /debug/tracing/current_tracer
503 # echo 0 > /debug/tracing/tracing_max_latency 578 # echo 0 > /debug/tracing/tracing_max_latency
@@ -532,10 +607,11 @@ irqsoff latency trace v1.1.5 on 2.6.26
532 607
533 608
534Here we see that that we had a latency of 12 microsecs (which is 609Here we see that that we had a latency of 12 microsecs (which is
535very good). The _write_lock_irq in sys_setpgid disabled interrupts. 610very good). The _write_lock_irq in sys_setpgid disabled
536The difference between the 12 and the displayed timestamp 14us occurred 611interrupts. The difference between the 12 and the displayed
537because the clock was incremented between the time of recording the max 612timestamp 14us occurred because the clock was incremented
538latency and the time of recording the function that had that latency. 613between the time of recording the max latency and the time of
614recording the function that had that latency.
539 615
540Note the above example had ftrace_enabled not set. If we set the 616Note the above example had ftrace_enabled not set. If we set the
541ftrace_enabled, we get a much larger output: 617ftrace_enabled, we get a much larger output:
@@ -586,24 +662,24 @@ irqsoff latency trace v1.1.5 on 2.6.26-rc8
586 662
587 663
588Here we traced a 50 microsecond latency. But we also see all the 664Here we traced a 50 microsecond latency. But we also see all the
589functions that were called during that time. Note that by enabling 665functions that were called during that time. Note that by
590function tracing, we incur an added overhead. This overhead may 666enabling function tracing, we incur an added overhead. This
591extend the latency times. But nevertheless, this trace has provided 667overhead may extend the latency times. But nevertheless, this
592some very helpful debugging information. 668trace has provided some very helpful debugging information.
593 669
594 670
595preemptoff 671preemptoff
596---------- 672----------
597 673
598When preemption is disabled, we may be able to receive interrupts but 674When preemption is disabled, we may be able to receive
599the task cannot be preempted and a higher priority task must wait 675interrupts but the task cannot be preempted and a higher
600for preemption to be enabled again before it can preempt a lower 676priority task must wait for preemption to be enabled again
601priority task. 677before it can preempt a lower priority task.
602 678
603The preemptoff tracer traces the places that disable preemption. 679The preemptoff tracer traces the places that disable preemption.
604Like the irqsoff tracer, it records the maximum latency for which preemption 680Like the irqsoff tracer, it records the maximum latency for
605was disabled. The control of preemptoff tracer is much like the irqsoff 681which preemption was disabled. The control of preemptoff tracer
606tracer. 682is much like the irqsoff tracer.
607 683
608 # echo preemptoff > /debug/tracing/current_tracer 684 # echo preemptoff > /debug/tracing/current_tracer
609 # echo 0 > /debug/tracing/tracing_max_latency 685 # echo 0 > /debug/tracing/tracing_max_latency
@@ -637,11 +713,12 @@ preemptoff latency trace v1.1.5 on 2.6.26-rc8
637 sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) 713 sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq)
638 714
639 715
640This has some more changes. Preemption was disabled when an interrupt 716This has some more changes. Preemption was disabled when an
641came in (notice the 'h'), and was enabled while doing a softirq. 717interrupt came in (notice the 'h'), and was enabled while doing
642(notice the 's'). But we also see that interrupts have been disabled 718a softirq. (notice the 's'). But we also see that interrupts
643when entering the preempt off section and leaving it (the 'd'). 719have been disabled when entering the preempt off section and
644We do not know if interrupts were enabled in the mean time. 720leaving it (the 'd'). We do not know if interrupts were enabled
721in the mean time.
645 722
646# tracer: preemptoff 723# tracer: preemptoff
647# 724#
@@ -700,28 +777,30 @@ preemptoff latency trace v1.1.5 on 2.6.26-rc8
700 sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) 777 sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq)
701 778
702 779
703The above is an example of the preemptoff trace with ftrace_enabled 780The above is an example of the preemptoff trace with
704set. Here we see that interrupts were disabled the entire time. 781ftrace_enabled set. Here we see that interrupts were disabled
705The irq_enter code lets us know that we entered an interrupt 'h'. 782the entire time. The irq_enter code lets us know that we entered
706Before that, the functions being traced still show that it is not 783an interrupt 'h'. Before that, the functions being traced still
707in an interrupt, but we can see from the functions themselves that 784show that it is not in an interrupt, but we can see from the
708this is not the case. 785functions themselves that this is not the case.
709 786
710Notice that __do_softirq when called does not have a preempt_count. 787Notice that __do_softirq when called does not have a
711It may seem that we missed a preempt enabling. What really happened 788preempt_count. It may seem that we missed a preempt enabling.
712is that the preempt count is held on the thread's stack and we 789What really happened is that the preempt count is held on the
713switched to the softirq stack (4K stacks in effect). The code 790thread's stack and we switched to the softirq stack (4K stacks
714does not copy the preempt count, but because interrupts are disabled, 791in effect). The code does not copy the preempt count, but
715we do not need to worry about it. Having a tracer like this is good 792because interrupts are disabled, we do not need to worry about
716for letting people know what really happens inside the kernel. 793it. Having a tracer like this is good for letting people know
794what really happens inside the kernel.
717 795
718 796
719preemptirqsoff 797preemptirqsoff
720-------------- 798--------------
721 799
722Knowing the locations that have interrupts disabled or preemption 800Knowing the locations that have interrupts disabled or
723disabled for the longest times is helpful. But sometimes we would 801preemption disabled for the longest times is helpful. But
724like to know when either preemption and/or interrupts are disabled. 802sometimes we would like to know when either preemption and/or
803interrupts are disabled.
725 804
726Consider the following code: 805Consider the following code:
727 806
@@ -741,11 +820,13 @@ The preemptoff tracer will record the total length of
741call_function_with_irqs_and_preemption_off() and 820call_function_with_irqs_and_preemption_off() and
742call_function_with_preemption_off(). 821call_function_with_preemption_off().
743 822
744But neither will trace the time that interrupts and/or preemption 823But neither will trace the time that interrupts and/or
745is disabled. This total time is the time that we can not schedule. 824preemption is disabled. This total time is the time that we can
746To record this time, use the preemptirqsoff tracer. 825not schedule. To record this time, use the preemptirqsoff
826tracer.
747 827
748Again, using this trace is much like the irqsoff and preemptoff tracers. 828Again, using this trace is much like the irqsoff and preemptoff
829tracers.
749 830
750 # echo preemptirqsoff > /debug/tracing/current_tracer 831 # echo preemptirqsoff > /debug/tracing/current_tracer
751 # echo 0 > /debug/tracing/tracing_max_latency 832 # echo 0 > /debug/tracing/tracing_max_latency
@@ -781,9 +862,10 @@ preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
781 862
782 863
783The trace_hardirqs_off_thunk is called from assembly on x86 when 864The trace_hardirqs_off_thunk is called from assembly on x86 when
784interrupts are disabled in the assembly code. Without the function 865interrupts are disabled in the assembly code. Without the
785tracing, we do not know if interrupts were enabled within the preemption 866function tracing, we do not know if interrupts were enabled
786points. We do see that it started with preemption enabled. 867within the preemption points. We do see that it started with
868preemption enabled.
787 869
788Here is a trace with ftrace_enabled set: 870Here is a trace with ftrace_enabled set:
789 871
@@ -871,40 +953,42 @@ preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
871 sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) 953 sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq)
872 954
873 955
874This is a very interesting trace. It started with the preemption of 956This is a very interesting trace. It started with the preemption
875the ls task. We see that the task had the "need_resched" bit set 957of the ls task. We see that the task had the "need_resched" bit
876via the 'N' in the trace. Interrupts were disabled before the spin_lock 958set via the 'N' in the trace. Interrupts were disabled before
877at the beginning of the trace. We see that a schedule took place to run 959the spin_lock at the beginning of the trace. We see that a
878sshd. When the interrupts were enabled, we took an interrupt. 960schedule took place to run sshd. When the interrupts were
879On return from the interrupt handler, the softirq ran. We took another 961enabled, we took an interrupt. On return from the interrupt
880interrupt while running the softirq as we see from the capital 'H'. 962handler, the softirq ran. We took another interrupt while
963running the softirq as we see from the capital 'H'.
881 964
882 965
883wakeup 966wakeup
884------ 967------
885 968
886In a Real-Time environment it is very important to know the wakeup 969In a Real-Time environment it is very important to know the
887time it takes for the highest priority task that is woken up to the 970wakeup time it takes for the highest priority task that is woken
888time that it executes. This is also known as "schedule latency". 971up to the time that it executes. This is also known as "schedule
889I stress the point that this is about RT tasks. It is also important 972latency". I stress the point that this is about RT tasks. It is
890to know the scheduling latency of non-RT tasks, but the average 973also important to know the scheduling latency of non-RT tasks,
891schedule latency is better for non-RT tasks. Tools like 974but the average schedule latency is better for non-RT tasks.
892LatencyTop are more appropriate for such measurements. 975Tools like LatencyTop are more appropriate for such
976measurements.
893 977
894Real-Time environments are interested in the worst case latency. 978Real-Time environments are interested in the worst case latency.
895That is the longest latency it takes for something to happen, and 979That is the longest latency it takes for something to happen,
896not the average. We can have a very fast scheduler that may only 980and not the average. We can have a very fast scheduler that may
897have a large latency once in a while, but that would not work well 981only have a large latency once in a while, but that would not
898with Real-Time tasks. The wakeup tracer was designed to record 982work well with Real-Time tasks. The wakeup tracer was designed
899the worst case wakeups of RT tasks. Non-RT tasks are not recorded 983to record the worst case wakeups of RT tasks. Non-RT tasks are
900because the tracer only records one worst case and tracing non-RT 984not recorded because the tracer only records one worst case and
901tasks that are unpredictable will overwrite the worst case latency 985tracing non-RT tasks that are unpredictable will overwrite the
902of RT tasks. 986worst case latency of RT tasks.
903 987
904Since this tracer only deals with RT tasks, we will run this slightly 988Since this tracer only deals with RT tasks, we will run this
905differently than we did with the previous tracers. Instead of performing 989slightly differently than we did with the previous tracers.
906an 'ls', we will run 'sleep 1' under 'chrt' which changes the 990Instead of performing an 'ls', we will run 'sleep 1' under
907priority of the task. 991'chrt' which changes the priority of the task.
908 992
909 # echo wakeup > /debug/tracing/current_tracer 993 # echo wakeup > /debug/tracing/current_tracer
910 # echo 0 > /debug/tracing/tracing_max_latency 994 # echo 0 > /debug/tracing/tracing_max_latency
@@ -934,17 +1018,16 @@ wakeup latency trace v1.1.5 on 2.6.26-rc8
934 <idle>-0 1d..4 4us : schedule (cpu_idle) 1018 <idle>-0 1d..4 4us : schedule (cpu_idle)
935 1019
936 1020
1021Running this on an idle system, we see that it only took 4
1022microseconds to perform the task switch. Note, since the trace
1023marker in the schedule is before the actual "switch", we stop
1024the tracing when the recorded task is about to schedule in. This
1025may change if we add a new marker at the end of the scheduler.
937 1026
938Running this on an idle system, we see that it only took 4 microseconds 1027Notice that the recorded task is 'sleep' with the PID of 4901
939to perform the task switch. Note, since the trace marker in the 1028and it has an rt_prio of 5. This priority is user-space priority
940schedule is before the actual "switch", we stop the tracing when 1029and not the internal kernel priority. The policy is 1 for
941the recorded task is about to schedule in. This may change if 1030SCHED_FIFO and 2 for SCHED_RR.
942we add a new marker at the end of the scheduler.
943
944Notice that the recorded task is 'sleep' with the PID of 4901 and it
945has an rt_prio of 5. This priority is user-space priority and not
946the internal kernel priority. The policy is 1 for SCHED_FIFO and 2
947for SCHED_RR.
948 1031
949Doing the same with chrt -r 5 and ftrace_enabled set. 1032Doing the same with chrt -r 5 and ftrace_enabled set.
950 1033
@@ -1001,24 +1084,25 @@ ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline)
1001ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) 1084ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock)
1002ksoftirq-7 1d..4 50us : schedule (__cond_resched) 1085ksoftirq-7 1d..4 50us : schedule (__cond_resched)
1003 1086
1004The interrupt went off while running ksoftirqd. This task runs at 1087The interrupt went off while running ksoftirqd. This task runs
1005SCHED_OTHER. Why did not we see the 'N' set early? This may be 1088at SCHED_OTHER. Why did not we see the 'N' set early? This may
1006a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K stacks 1089be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K
1007configured, the interrupt and softirq run with their own stack. 1090stacks configured, the interrupt and softirq run with their own
1008Some information is held on the top of the task's stack (need_resched 1091stack. Some information is held on the top of the task's stack
1009and preempt_count are both stored there). The setting of the NEED_RESCHED 1092(need_resched and preempt_count are both stored there). The
1010bit is done directly to the task's stack, but the reading of the 1093setting of the NEED_RESCHED bit is done directly to the task's
1011NEED_RESCHED is done by looking at the current stack, which in this case 1094stack, but the reading of the NEED_RESCHED is done by looking at
1012is the stack for the hard interrupt. This hides the fact that NEED_RESCHED 1095the current stack, which in this case is the stack for the hard
1013has been set. We do not see the 'N' until we switch back to the task's 1096interrupt. This hides the fact that NEED_RESCHED has been set.
1097We do not see the 'N' until we switch back to the task's
1014assigned stack. 1098assigned stack.
1015 1099
1016function 1100function
1017-------- 1101--------
1018 1102
1019This tracer is the function tracer. Enabling the function tracer 1103This tracer is the function tracer. Enabling the function tracer
1020can be done from the debug file system. Make sure the ftrace_enabled is 1104can be done from the debug file system. Make sure the
1021set; otherwise this tracer is a nop. 1105ftrace_enabled is set; otherwise this tracer is a nop.
1022 1106
1023 # sysctl kernel.ftrace_enabled=1 1107 # sysctl kernel.ftrace_enabled=1
1024 # echo function > /debug/tracing/current_tracer 1108 # echo function > /debug/tracing/current_tracer
@@ -1048,14 +1132,15 @@ set; otherwise this tracer is a nop.
1048[...] 1132[...]
1049 1133
1050 1134
1051Note: function tracer uses ring buffers to store the above entries. 1135Note: function tracer uses ring buffers to store the above
1052The newest data may overwrite the oldest data. Sometimes using echo to 1136entries. The newest data may overwrite the oldest data.
1053stop the trace is not sufficient because the tracing could have overwritten 1137Sometimes using echo to stop the trace is not sufficient because
1054the data that you wanted to record. For this reason, it is sometimes better to 1138the tracing could have overwritten the data that you wanted to
1055disable tracing directly from a program. This allows you to stop the 1139record. For this reason, it is sometimes better to disable
1056tracing at the point that you hit the part that you are interested in. 1140tracing directly from a program. This allows you to stop the
1057To disable the tracing directly from a C program, something like following 1141tracing at the point that you hit the part that you are
1058code snippet can be used: 1142interested in. To disable the tracing directly from a C program,
1143something like following code snippet can be used:
1059 1144
1060int trace_fd; 1145int trace_fd;
1061[...] 1146[...]
@@ -1070,10 +1155,10 @@ int main(int argc, char *argv[]) {
1070} 1155}
1071 1156
1072Note: Here we hard coded the path name. The debugfs mount is not 1157Note: Here we hard coded the path name. The debugfs mount is not
1073guaranteed to be at /debug (and is more commonly at /sys/kernel/debug). 1158guaranteed to be at /debug (and is more commonly at
1074For simple one time traces, the above is sufficent. For anything else, 1159/sys/kernel/debug). For simple one time traces, the above is
1075a search through /proc/mounts may be needed to find where the debugfs 1160sufficent. For anything else, a search through /proc/mounts may
1076file-system is mounted. 1161be needed to find where the debugfs file-system is mounted.
1077 1162
1078 1163
1079Single thread tracing 1164Single thread tracing
@@ -1152,49 +1237,297 @@ int main (int argc, char **argv)
1152 return 0; 1237 return 0;
1153} 1238}
1154 1239
1240
1241hw-branch-tracer (x86 only)
1242---------------------------
1243
1244This tracer uses the x86 last branch tracing hardware feature to
1245collect a branch trace on all cpus with relatively low overhead.
1246
1247The tracer uses a fixed-size circular buffer per cpu and only
1248traces ring 0 branches. The trace file dumps that buffer in the
1249following format:
1250
1251# tracer: hw-branch-tracer
1252#
1253# CPU# TO <- FROM
1254 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6
1255 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a
1256 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf
1257 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf
1258 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a
1259 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf
1260
1261
1262The tracer may be used to dump the trace for the oops'ing cpu on
1263a kernel oops into the system log. To enable this,
1264ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
1265can either use the sysctl function or set it via the proc system
1266interface.
1267
1268 sysctl kernel.ftrace_dump_on_oops=1
1269
1270or
1271
1272 echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
1273
1274
1275Here's an example of such a dump after a null pointer
1276dereference in a kernel module:
1277
1278[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
1279[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
1280[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
1281[57848.106019] Oops: 0002 [#1] SMP
1282[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
1283[57848.106019] Dumping ftrace buffer:
1284[57848.106019] ---------------------------------
1285[...]
1286[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24
1287[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165
1288[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165
1289[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165
1290[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165
1291[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops]
1292[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30
1293[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b
1294[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31
1295[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1
1296[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30
1297[...]
1298[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2
1299[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881
1300[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881
1301[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96
1302[...]
1303[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3
1304[57848.106019] ---------------------------------
1305[57848.106019] CPU 0
1306[57848.106019] Modules linked in: oops
1307[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23
1308[57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops]
1309[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246
1310[...]
1311
1312
1313function graph tracer
1314---------------------------
1315
1316This tracer is similar to the function tracer except that it
1317probes a function on its entry and its exit. This is done by
1318using a dynamically allocated stack of return addresses in each
1319task_struct. On function entry the tracer overwrites the return
1320address of each function traced to set a custom probe. Thus the
1321original return address is stored on the stack of return address
1322in the task_struct.
1323
1324Probing on both ends of a function leads to special features
1325such as:
1326
1327- measure of a function's time execution
1328- having a reliable call stack to draw function calls graph
1329
1330This tracer is useful in several situations:
1331
1332- you want to find the reason of a strange kernel behavior and
1333 need to see what happens in detail on any areas (or specific
1334 ones).
1335
1336- you are experiencing weird latencies but it's difficult to
1337 find its origin.
1338
1339- you want to find quickly which path is taken by a specific
1340 function
1341
1342- you just want to peek inside a working kernel and want to see
1343 what happens there.
1344
1345# tracer: function_graph
1346#
1347# CPU DURATION FUNCTION CALLS
1348# | | | | | | |
1349
1350 0) | sys_open() {
1351 0) | do_sys_open() {
1352 0) | getname() {
1353 0) | kmem_cache_alloc() {
1354 0) 1.382 us | __might_sleep();
1355 0) 2.478 us | }
1356 0) | strncpy_from_user() {
1357 0) | might_fault() {
1358 0) 1.389 us | __might_sleep();
1359 0) 2.553 us | }
1360 0) 3.807 us | }
1361 0) 7.876 us | }
1362 0) | alloc_fd() {
1363 0) 0.668 us | _spin_lock();
1364 0) 0.570 us | expand_files();
1365 0) 0.586 us | _spin_unlock();
1366
1367
1368There are several columns that can be dynamically
1369enabled/disabled. You can use every combination of options you
1370want, depending on your needs.
1371
1372- The cpu number on which the function executed is default
1373 enabled. It is sometimes better to only trace one cpu (see
1374 tracing_cpu_mask file) or you might sometimes see unordered
1375 function calls while cpu tracing switch.
1376
1377 hide: echo nofuncgraph-cpu > /debug/tracing/trace_options
1378 show: echo funcgraph-cpu > /debug/tracing/trace_options
1379
1380- The duration (function's time of execution) is displayed on
1381 the closing bracket line of a function or on the same line
1382 than the current function in case of a leaf one. It is default
1383 enabled.
1384
1385 hide: echo nofuncgraph-duration > /debug/tracing/trace_options
1386 show: echo funcgraph-duration > /debug/tracing/trace_options
1387
1388- The overhead field precedes the duration field in case of
1389 reached duration thresholds.
1390
1391 hide: echo nofuncgraph-overhead > /debug/tracing/trace_options
1392 show: echo funcgraph-overhead > /debug/tracing/trace_options
1393 depends on: funcgraph-duration
1394
1395 ie:
1396
1397 0) | up_write() {
1398 0) 0.646 us | _spin_lock_irqsave();
1399 0) 0.684 us | _spin_unlock_irqrestore();
1400 0) 3.123 us | }
1401 0) 0.548 us | fput();
1402 0) + 58.628 us | }
1403
1404 [...]
1405
1406 0) | putname() {
1407 0) | kmem_cache_free() {
1408 0) 0.518 us | __phys_addr();
1409 0) 1.757 us | }
1410 0) 2.861 us | }
1411 0) ! 115.305 us | }
1412 0) ! 116.402 us | }
1413
1414 + means that the function exceeded 10 usecs.
1415 ! means that the function exceeded 100 usecs.
1416
1417
1418- The task/pid field displays the thread cmdline and pid which
1419 executed the function. It is default disabled.
1420
1421 hide: echo nofuncgraph-proc > /debug/tracing/trace_options
1422 show: echo funcgraph-proc > /debug/tracing/trace_options
1423
1424 ie:
1425
1426 # tracer: function_graph
1427 #
1428 # CPU TASK/PID DURATION FUNCTION CALLS
1429 # | | | | | | | | |
1430 0) sh-4802 | | d_free() {
1431 0) sh-4802 | | call_rcu() {
1432 0) sh-4802 | | __call_rcu() {
1433 0) sh-4802 | 0.616 us | rcu_process_gp_end();
1434 0) sh-4802 | 0.586 us | check_for_new_grace_period();
1435 0) sh-4802 | 2.899 us | }
1436 0) sh-4802 | 4.040 us | }
1437 0) sh-4802 | 5.151 us | }
1438 0) sh-4802 | + 49.370 us | }
1439
1440
1441- The absolute time field is an absolute timestamp given by the
1442 system clock since it started. A snapshot of this time is
1443 given on each entry/exit of functions
1444
1445 hide: echo nofuncgraph-abstime > /debug/tracing/trace_options
1446 show: echo funcgraph-abstime > /debug/tracing/trace_options
1447
1448 ie:
1449
1450 #
1451 # TIME CPU DURATION FUNCTION CALLS
1452 # | | | | | | | |
1453 360.774522 | 1) 0.541 us | }
1454 360.774522 | 1) 4.663 us | }
1455 360.774523 | 1) 0.541 us | __wake_up_bit();
1456 360.774524 | 1) 6.796 us | }
1457 360.774524 | 1) 7.952 us | }
1458 360.774525 | 1) 9.063 us | }
1459 360.774525 | 1) 0.615 us | journal_mark_dirty();
1460 360.774527 | 1) 0.578 us | __brelse();
1461 360.774528 | 1) | reiserfs_prepare_for_journal() {
1462 360.774528 | 1) | unlock_buffer() {
1463 360.774529 | 1) | wake_up_bit() {
1464 360.774529 | 1) | bit_waitqueue() {
1465 360.774530 | 1) 0.594 us | __phys_addr();
1466
1467
1468You can put some comments on specific functions by using
1469trace_printk() For example, if you want to put a comment inside
1470the __might_sleep() function, you just have to include
1471<linux/ftrace.h> and call trace_printk() inside __might_sleep()
1472
1473trace_printk("I'm a comment!\n")
1474
1475will produce:
1476
1477 1) | __might_sleep() {
1478 1) | /* I'm a comment! */
1479 1) 1.449 us | }
1480
1481
1482You might find other useful features for this tracer in the
1483following "dynamic ftrace" section such as tracing only specific
1484functions or tasks.
1485
1155dynamic ftrace 1486dynamic ftrace
1156-------------- 1487--------------
1157 1488
1158If CONFIG_DYNAMIC_FTRACE is set, the system will run with 1489If CONFIG_DYNAMIC_FTRACE is set, the system will run with
1159virtually no overhead when function tracing is disabled. The way 1490virtually no overhead when function tracing is disabled. The way
1160this works is the mcount function call (placed at the start of 1491this works is the mcount function call (placed at the start of
1161every kernel function, produced by the -pg switch in gcc), starts 1492every kernel function, produced by the -pg switch in gcc),
1162of pointing to a simple return. (Enabling FTRACE will include the 1493starts of pointing to a simple return. (Enabling FTRACE will
1163-pg switch in the compiling of the kernel.) 1494include the -pg switch in the compiling of the kernel.)
1164 1495
1165At compile time every C file object is run through the 1496At compile time every C file object is run through the
1166recordmcount.pl script (located in the scripts directory). This 1497recordmcount.pl script (located in the scripts directory). This
1167script will process the C object using objdump to find all the 1498script will process the C object using objdump to find all the
1168locations in the .text section that call mcount. (Note, only 1499locations in the .text section that call mcount. (Note, only the
1169the .text section is processed, since processing other sections 1500.text section is processed, since processing other sections like
1170like .init.text may cause races due to those sections being freed). 1501.init.text may cause races due to those sections being freed).
1171 1502
1172A new section called "__mcount_loc" is created that holds references 1503A new section called "__mcount_loc" is created that holds
1173to all the mcount call sites in the .text section. This section is 1504references to all the mcount call sites in the .text section.
1174compiled back into the original object. The final linker will add 1505This section is compiled back into the original object. The
1175all these references into a single table. 1506final linker will add all these references into a single table.
1176 1507
1177On boot up, before SMP is initialized, the dynamic ftrace code 1508On boot up, before SMP is initialized, the dynamic ftrace code
1178scans this table and updates all the locations into nops. It also 1509scans this table and updates all the locations into nops. It
1179records the locations, which are added to the available_filter_functions 1510also records the locations, which are added to the
1180list. Modules are processed as they are loaded and before they are 1511available_filter_functions list. Modules are processed as they
1181executed. When a module is unloaded, it also removes its functions from 1512are loaded and before they are executed. When a module is
1182the ftrace function list. This is automatic in the module unload 1513unloaded, it also removes its functions from the ftrace function
1183code, and the module author does not need to worry about it. 1514list. This is automatic in the module unload code, and the
1184 1515module author does not need to worry about it.
1185When tracing is enabled, kstop_machine is called to prevent races 1516
1186with the CPUS executing code being modified (which can cause the 1517When tracing is enabled, kstop_machine is called to prevent
1187CPU to do undesireable things), and the nops are patched back 1518races with the CPUS executing code being modified (which can
1188to calls. But this time, they do not call mcount (which is just 1519cause the CPU to do undesireable things), and the nops are
1189a function stub). They now call into the ftrace infrastructure. 1520patched back to calls. But this time, they do not call mcount
1521(which is just a function stub). They now call into the ftrace
1522infrastructure.
1190 1523
1191One special side-effect to the recording of the functions being 1524One special side-effect to the recording of the functions being
1192traced is that we can now selectively choose which functions we 1525traced is that we can now selectively choose which functions we
1193wish to trace and which ones we want the mcount calls to remain as 1526wish to trace and which ones we want the mcount calls to remain
1194nops. 1527as nops.
1195 1528
1196Two files are used, one for enabling and one for disabling the tracing 1529Two files are used, one for enabling and one for disabling the
1197of specified functions. They are: 1530tracing of specified functions. They are:
1198 1531
1199 set_ftrace_filter 1532 set_ftrace_filter
1200 1533
@@ -1202,8 +1535,8 @@ and
1202 1535
1203 set_ftrace_notrace 1536 set_ftrace_notrace
1204 1537
1205A list of available functions that you can add to these files is listed 1538A list of available functions that you can add to these files is
1206in: 1539listed in:
1207 1540
1208 available_filter_functions 1541 available_filter_functions
1209 1542
@@ -1240,8 +1573,8 @@ hrtimer_interrupt
1240sys_nanosleep 1573sys_nanosleep
1241 1574
1242 1575
1243Perhaps this is not enough. The filters also allow simple wild cards. 1576Perhaps this is not enough. The filters also allow simple wild
1244Only the following are currently available 1577cards. Only the following are currently available
1245 1578
1246 <match>* - will match functions that begin with <match> 1579 <match>* - will match functions that begin with <match>
1247 *<match> - will match functions that end with <match> 1580 *<match> - will match functions that end with <match>
@@ -1251,9 +1584,9 @@ These are the only wild cards which are supported.
1251 1584
1252 <match>*<match> will not work. 1585 <match>*<match> will not work.
1253 1586
1254Note: It is better to use quotes to enclose the wild cards, otherwise 1587Note: It is better to use quotes to enclose the wild cards,
1255 the shell may expand the parameters into names of files in the local 1588 otherwise the shell may expand the parameters into names
1256 directory. 1589 of files in the local directory.
1257 1590
1258 # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter 1591 # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
1259 1592
@@ -1299,7 +1632,8 @@ This is because the '>' and '>>' act just like they do in bash.
1299To rewrite the filters, use '>' 1632To rewrite the filters, use '>'
1300To append to the filters, use '>>' 1633To append to the filters, use '>>'
1301 1634
1302To clear out a filter so that all functions will be recorded again: 1635To clear out a filter so that all functions will be recorded
1636again:
1303 1637
1304 # echo > /debug/tracing/set_ftrace_filter 1638 # echo > /debug/tracing/set_ftrace_filter
1305 # cat /debug/tracing/set_ftrace_filter 1639 # cat /debug/tracing/set_ftrace_filter
@@ -1331,7 +1665,8 @@ hrtimer_get_res
1331hrtimer_init_sleeper 1665hrtimer_init_sleeper
1332 1666
1333 1667
1334The set_ftrace_notrace prevents those functions from being traced. 1668The set_ftrace_notrace prevents those functions from being
1669traced.
1335 1670
1336 # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace 1671 # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace
1337 1672
@@ -1353,13 +1688,75 @@ Produces:
1353 1688
1354We can see that there's no more lock or preempt tracing. 1689We can see that there's no more lock or preempt tracing.
1355 1690
1691
1692Dynamic ftrace with the function graph tracer
1693---------------------------------------------
1694
1695Although what has been explained above concerns both the
1696function tracer and the function-graph-tracer, there are some
1697special features only available in the function-graph tracer.
1698
1699If you want to trace only one function and all of its children,
1700you just have to echo its name into set_graph_function:
1701
1702 echo __do_fault > set_graph_function
1703
1704will produce the following "expanded" trace of the __do_fault()
1705function:
1706
1707 0) | __do_fault() {
1708 0) | filemap_fault() {
1709 0) | find_lock_page() {
1710 0) 0.804 us | find_get_page();
1711 0) | __might_sleep() {
1712 0) 1.329 us | }
1713 0) 3.904 us | }
1714 0) 4.979 us | }
1715 0) 0.653 us | _spin_lock();
1716 0) 0.578 us | page_add_file_rmap();
1717 0) 0.525 us | native_set_pte_at();
1718 0) 0.585 us | _spin_unlock();
1719 0) | unlock_page() {
1720 0) 0.541 us | page_waitqueue();
1721 0) 0.639 us | __wake_up_bit();
1722 0) 2.786 us | }
1723 0) + 14.237 us | }
1724 0) | __do_fault() {
1725 0) | filemap_fault() {
1726 0) | find_lock_page() {
1727 0) 0.698 us | find_get_page();
1728 0) | __might_sleep() {
1729 0) 1.412 us | }
1730 0) 3.950 us | }
1731 0) 5.098 us | }
1732 0) 0.631 us | _spin_lock();
1733 0) 0.571 us | page_add_file_rmap();
1734 0) 0.526 us | native_set_pte_at();
1735 0) 0.586 us | _spin_unlock();
1736 0) | unlock_page() {
1737 0) 0.533 us | page_waitqueue();
1738 0) 0.638 us | __wake_up_bit();
1739 0) 2.793 us | }
1740 0) + 14.012 us | }
1741
1742You can also expand several functions at once:
1743
1744 echo sys_open > set_graph_function
1745 echo sys_close >> set_graph_function
1746
1747Now if you want to go back to trace all functions you can clear
1748this special filter via:
1749
1750 echo > set_graph_function
1751
1752
1356trace_pipe 1753trace_pipe
1357---------- 1754----------
1358 1755
1359The trace_pipe outputs the same content as the trace file, but the effect 1756The trace_pipe outputs the same content as the trace file, but
1360on the tracing is different. Every read from trace_pipe is consumed. 1757the effect on the tracing is different. Every read from
1361This means that subsequent reads will be different. The trace 1758trace_pipe is consumed. This means that subsequent reads will be
1362is live. 1759different. The trace is live.
1363 1760
1364 # echo function > /debug/tracing/current_tracer 1761 # echo function > /debug/tracing/current_tracer
1365 # cat /debug/tracing/trace_pipe > /tmp/trace.out & 1762 # cat /debug/tracing/trace_pipe > /tmp/trace.out &
@@ -1387,38 +1784,45 @@ is live.
1387 bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up 1784 bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up
1388 1785
1389 1786
1390Note, reading the trace_pipe file will block until more input is added. 1787Note, reading the trace_pipe file will block until more input is
1391By changing the tracer, trace_pipe will issue an EOF. We needed 1788added. By changing the tracer, trace_pipe will issue an EOF. We
1392to set the function tracer _before_ we "cat" the trace_pipe file. 1789needed to set the function tracer _before_ we "cat" the
1790trace_pipe file.
1393 1791
1394 1792
1395trace entries 1793trace entries
1396------------- 1794-------------
1397 1795
1398Having too much or not enough data can be troublesome in diagnosing 1796Having too much or not enough data can be troublesome in
1399an issue in the kernel. The file buffer_size_kb is used to modify 1797diagnosing an issue in the kernel. The file buffer_size_kb is
1400the size of the internal trace buffers. The number listed 1798used to modify the size of the internal trace buffers. The
1401is the number of entries that can be recorded per CPU. To know 1799number listed is the number of entries that can be recorded per
1402the full size, multiply the number of possible CPUS with the 1800CPU. To know the full size, multiply the number of possible CPUS
1403number of entries. 1801with the number of entries.
1404 1802
1405 # cat /debug/tracing/buffer_size_kb 1803 # cat /debug/tracing/buffer_size_kb
14061408 (units kilobytes) 18041408 (units kilobytes)
1407 1805
1408Note, to modify this, you must have tracing completely disabled. To do that, 1806Note, to modify this, you must have tracing completely disabled.
1409echo "nop" into the current_tracer. If the current_tracer is not set 1807To do that, echo "nop" into the current_tracer. If the
1410to "nop", an EINVAL error will be returned. 1808current_tracer is not set to "nop", an EINVAL error will be
1809returned.
1411 1810
1412 # echo nop > /debug/tracing/current_tracer 1811 # echo nop > /debug/tracing/current_tracer
1413 # echo 10000 > /debug/tracing/buffer_size_kb 1812 # echo 10000 > /debug/tracing/buffer_size_kb
1414 # cat /debug/tracing/buffer_size_kb 1813 # cat /debug/tracing/buffer_size_kb
141510000 (units kilobytes) 181410000 (units kilobytes)
1416 1815
1417The number of pages which will be allocated is limited to a percentage 1816The number of pages which will be allocated is limited to a
1418of available memory. Allocating too much will produce an error. 1817percentage of available memory. Allocating too much will produce
1818an error.
1419 1819
1420 # echo 1000000000000 > /debug/tracing/buffer_size_kb 1820 # echo 1000000000000 > /debug/tracing/buffer_size_kb
1421-bash: echo: write error: Cannot allocate memory 1821-bash: echo: write error: Cannot allocate memory
1422 # cat /debug/tracing/buffer_size_kb 1822 # cat /debug/tracing/buffer_size_kb
142385 182385
1424 1824
1825-----------
1826
1827More details can be found in the source code, in the
1828kernel/tracing/*.c files.
diff --git a/Documentation/hwmon/lm90 b/Documentation/hwmon/lm90
index 0e8411710238..93d8e3d55150 100644
--- a/Documentation/hwmon/lm90
+++ b/Documentation/hwmon/lm90
@@ -42,6 +42,11 @@ Supported chips:
42 Addresses scanned: I2C 0x4e 42 Addresses scanned: I2C 0x4e
43 Datasheet: Publicly available at the Maxim website 43 Datasheet: Publicly available at the Maxim website
44 http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497 44 http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497
45 * Maxim MAX6648
46 Prefix: 'max6646'
47 Addresses scanned: I2C 0x4c
48 Datasheet: Publicly available at the Maxim website
49 http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3500
45 * Maxim MAX6649 50 * Maxim MAX6649
46 Prefix: 'max6646' 51 Prefix: 'max6646'
47 Addresses scanned: I2C 0x4c 52 Addresses scanned: I2C 0x4c
@@ -74,6 +79,11 @@ Supported chips:
74 0x4c, 0x4d and 0x4e 79 0x4c, 0x4d and 0x4e
75 Datasheet: Publicly available at the Maxim website 80 Datasheet: Publicly available at the Maxim website
76 http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370 81 http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
82 * Maxim MAX6692
83 Prefix: 'max6646'
84 Addresses scanned: I2C 0x4c
85 Datasheet: Publicly available at the Maxim website
86 http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3500
77 87
78 88
79Author: Jean Delvare <khali@linux-fr.org> 89Author: Jean Delvare <khali@linux-fr.org>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 28de395fa096..7643483bdd6a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
49 ISAPNP ISA PnP code is enabled. 49 ISAPNP ISA PnP code is enabled.
50 ISDN Appropriate ISDN support is enabled. 50 ISDN Appropriate ISDN support is enabled.
51 JOY Appropriate joystick support is enabled. 51 JOY Appropriate joystick support is enabled.
52 KMEMTRACE kmemtrace is enabled.
52 LIBATA Libata driver is enabled 53 LIBATA Libata driver is enabled
53 LP Printer support is enabled. 54 LP Printer support is enabled.
54 LOOP Loopback device support is enabled. 55 LOOP Loopback device support is enabled.
@@ -1047,6 +1048,15 @@ and is between 256 and 4096 characters. It is defined in the file
1047 use the HighMem zone if it exists, and the Normal 1048 use the HighMem zone if it exists, and the Normal
1048 zone if it does not. 1049 zone if it does not.
1049 1050
1051 kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
1052 Controls whether kmemtrace is enabled
1053 at boot-time.
1054
1055 kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
1056 subbufs kmemtrace's relay channel has. Set this
1057 higher than default (KMEMTRACE_N_SUBBUFS in code) if
1058 you experience buffer overruns.
1059
1050 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter 1060 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
1051 is similar to kernelcore except it specifies the 1061 is similar to kernelcore except it specifies the
1052 amount of memory used for migratable allocations. 1062 amount of memory used for migratable allocations.
@@ -2334,6 +2344,8 @@ and is between 256 and 4096 characters. It is defined in the file
2334 2344
2335 tp720= [HW,PS2] 2345 tp720= [HW,PS2]
2336 2346
2347 trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size.
2348
2337 trix= [HW,OSS] MediaTrix AudioTrix Pro 2349 trix= [HW,OSS] MediaTrix AudioTrix Pro
2338 Format: 2350 Format:
2339 <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> 2351 <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index 488773018152..938ea22f2cc0 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -27,33 +27,37 @@ lock-class.
27State 27State
28----- 28-----
29 29
30The validator tracks lock-class usage history into 5 separate state bits: 30The validator tracks lock-class usage history into 4n + 1 separate state bits:
31 31
32- 'ever held in hardirq context' [ == hardirq-safe ] 32- 'ever held in STATE context'
33- 'ever held in softirq context' [ == softirq-safe ] 33- 'ever head as readlock in STATE context'
34- 'ever held with hardirqs enabled' [ == hardirq-unsafe ] 34- 'ever head with STATE enabled'
35- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ] 35- 'ever head as readlock with STATE enabled'
36
37Where STATE can be either one of (kernel/lockdep_states.h)
38 - hardirq
39 - softirq
40 - reclaim_fs
36 41
37- 'ever used' [ == !unused ] 42- 'ever used' [ == !unused ]
38 43
39When locking rules are violated, these 4 state bits are presented in the 44When locking rules are violated, these state bits are presented in the
40locking error messages, inside curlies. A contrived example: 45locking error messages, inside curlies. A contrived example:
41 46
42 modprobe/2287 is trying to acquire lock: 47 modprobe/2287 is trying to acquire lock:
43 (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 48 (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
44 49
45 but task is already holding lock: 50 but task is already holding lock:
46 (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 51 (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
47 52
48 53
49The bit position indicates hardirq, softirq, hardirq-read, 54The bit position indicates STATE, STATE-read, for each of the states listed
50softirq-read respectively, and the character displayed in each 55above, and the character displayed in each indicates:
51indicates:
52 56
53 '.' acquired while irqs disabled 57 '.' acquired while irqs disabled
54 '+' acquired in irq context 58 '+' acquired in irq context
55 '-' acquired with irqs enabled 59 '-' acquired with irqs enabled
56 '?' read acquired in irq context with irqs enabled. 60 '?' acquired in irq context with irqs enabled.
57 61
58Unused mutexes cannot be part of the cause of an error. 62Unused mutexes cannot be part of the cause of an error.
59 63
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 9e592c718afb..535aeb936dbc 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -113,6 +113,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
113 113
114'x' - Used by xmon interface on ppc/powerpc platforms. 114'x' - Used by xmon interface on ppc/powerpc platforms.
115 115
116'z' - Dump the ftrace buffer
117
116'0'-'9' - Sets the console log level, controlling which kernel messages 118'0'-'9' - Sets the console log level, controlling which kernel messages
117 will be printed to your console. ('0', for example would make 119 will be printed to your console. ('0', for example would make
118 it so that only emergency messages like PANICs or OOPSes would 120 it so that only emergency messages like PANICs or OOPSes would
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 6f0a044f5b5e..4ff43c6de299 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -45,8 +45,8 @@ In include/trace/subsys.h :
45#include <linux/tracepoint.h> 45#include <linux/tracepoint.h>
46 46
47DECLARE_TRACE(subsys_eventname, 47DECLARE_TRACE(subsys_eventname,
48 TPPROTO(int firstarg, struct task_struct *p), 48 TP_PROTO(int firstarg, struct task_struct *p),
49 TPARGS(firstarg, p)); 49 TP_ARGS(firstarg, p));
50 50
51In subsys/file.c (where the tracing statement must be added) : 51In subsys/file.c (where the tracing statement must be added) :
52 52
@@ -66,10 +66,10 @@ Where :
66 - subsys is the name of your subsystem. 66 - subsys is the name of your subsystem.
67 - eventname is the name of the event to trace. 67 - eventname is the name of the event to trace.
68 68
69- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the 69- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the
70 function called by this tracepoint. 70 function called by this tracepoint.
71 71
72- TPARGS(firstarg, p) are the parameters names, same as found in the 72- TP_ARGS(firstarg, p) are the parameters names, same as found in the
73 prototype. 73 prototype.
74 74
75Connecting a function (probe) to a tracepoint is done by providing a 75Connecting a function (probe) to a tracepoint is done by providing a
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 000000000000..a956d9b7f943
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/MAINTAINERS b/MAINTAINERS
index 1c2ca1dc66f2..c0a662d1c284 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1469,8 +1469,6 @@ L: linux-acpi@vger.kernel.org
1469S: Supported 1469S: Supported
1470 1470
1471DOCUMENTATION (/Documentation directory) 1471DOCUMENTATION (/Documentation directory)
1472P: Michael Kerrisk
1473M: mtk.manpages@gmail.com
1474P: Randy Dunlap 1472P: Randy Dunlap
1475M: rdunlap@xenotime.net 1473M: rdunlap@xenotime.net
1476L: linux-doc@vger.kernel.org 1474L: linux-doc@vger.kernel.org
@@ -2623,6 +2621,12 @@ M: jason.wessel@windriver.com
2623L: kgdb-bugreport@lists.sourceforge.net 2621L: kgdb-bugreport@lists.sourceforge.net
2624S: Maintained 2622S: Maintained
2625 2623
2624KMEMTRACE
2625P: Eduard - Gabriel Munteanu
2626M: eduard.munteanu@linux360.ro
2627L: linux-kernel@vger.kernel.org
2628S: Maintained
2629
2626KPROBES 2630KPROBES
2627P: Ananth N Mavinakayanahalli 2631P: Ananth N Mavinakayanahalli
2628M: ananth@in.ibm.com 2632M: ananth@in.ibm.com
@@ -2879,7 +2883,7 @@ P: Michael Kerrisk
2879M: mtk.manpages@gmail.com 2883M: mtk.manpages@gmail.com
2880W: http://www.kernel.org/doc/man-pages 2884W: http://www.kernel.org/doc/man-pages
2881L: linux-man@vger.kernel.org 2885L: linux-man@vger.kernel.org
2882S: Supported 2886S: Maintained
2883 2887
2884MARVELL LIBERTAS WIRELESS DRIVER 2888MARVELL LIBERTAS WIRELESS DRIVER
2885P: Dan Williams 2889P: Dan Williams
diff --git a/Makefile b/Makefile
index c40d83aedebe..69b8091bfed1 100644
--- a/Makefile
+++ b/Makefile
@@ -905,12 +905,18 @@ localver = $(subst $(space),, $(string) \
905# and if the SCM is know a tag from the SCM is appended. 905# and if the SCM is know a tag from the SCM is appended.
906# The appended tag is determined by the SCM used. 906# The appended tag is determined by the SCM used.
907# 907#
908# Currently, only git is supported. 908# .scmversion is used when generating rpm packages so we do not loose
909# Other SCMs can edit scripts/setlocalversion and add the appropriate 909# the version information from the SCM when we do the build of the kernel
910# checks as needed. 910# from the copied source
911ifdef CONFIG_LOCALVERSION_AUTO 911ifdef CONFIG_LOCALVERSION_AUTO
912 _localver-auto = $(shell $(CONFIG_SHELL) \ 912
913 $(srctree)/scripts/setlocalversion $(srctree)) 913ifeq ($(wildcard .scmversion),)
914 _localver-auto = $(shell $(CONFIG_SHELL) \
915 $(srctree)/scripts/setlocalversion $(srctree))
916else
917 _localver-auto = $(shell cat .scmversion 2> /dev/null)
918endif
919
914 localver-auto = $(LOCALVERSION)$(_localver-auto) 920 localver-auto = $(LOCALVERSION)$(_localver-auto)
915endif 921endif
916 922
@@ -1538,7 +1544,7 @@ quiet_cmd_depmod = DEPMOD $(KERNELRELEASE)
1538 cmd_depmod = \ 1544 cmd_depmod = \
1539 if [ -r System.map -a -x $(DEPMOD) ]; then \ 1545 if [ -r System.map -a -x $(DEPMOD) ]; then \
1540 $(DEPMOD) -ae -F System.map \ 1546 $(DEPMOD) -ae -F System.map \
1541 $(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) -r) \ 1547 $(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) ) \
1542 $(KERNELRELEASE); \ 1548 $(KERNELRELEASE); \
1543 fi 1549 fi
1544 1550
diff --git a/arch/Kconfig b/arch/Kconfig
index 550dab22daa1..a092dc77c24d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -6,6 +6,7 @@ config OPROFILE
6 tristate "OProfile system profiling (EXPERIMENTAL)" 6 tristate "OProfile system profiling (EXPERIMENTAL)"
7 depends on PROFILING 7 depends on PROFILING
8 depends on HAVE_OPROFILE 8 depends on HAVE_OPROFILE
9 depends on TRACING_SUPPORT
9 select TRACING 10 select TRACING
10 select RING_BUFFER 11 select RING_BUFFER
11 help 12 help
diff --git a/arch/alpha/include/asm/ftrace.h b/arch/alpha/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/alpha/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index d953e234daa8..88971460fa6c 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -14,17 +14,4 @@ typedef struct {
14 14
15void ack_bad_irq(unsigned int irq); 15void ack_bad_irq(unsigned int irq);
16 16
17#define HARDIRQ_BITS 12
18
19/*
20 * The hardirq mask has to be large enough to have
21 * space for potentially nestable IRQ sources in the system
22 * to nest on a single CPU. On Alpha, interrupts are masked at the CPU
23 * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
24 * so we really only have 8 nestable IRQs, but allow some overhead
25 */
26#if (1 << HARDIRQ_BITS) < 16
27#error HARDIRQ_BITS is too low!
28#endif
29
30#endif /* _ALPHA_HARDIRQ_H */ 17#endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/avr32/include/asm/ftrace.h b/arch/avr32/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/avr32/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h
index 267354356f60..015bc75ea798 100644
--- a/arch/avr32/include/asm/hardirq.h
+++ b/arch/avr32/include/asm/hardirq.h
@@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq);
20 20
21#endif /* __ASSEMBLY__ */ 21#endif /* __ASSEMBLY__ */
22 22
23#define HARDIRQ_BITS 12
24
25/*
26 * The hardirq mask has to be large enough to have
27 * space for potentially all IRQ sources in the system
28 * nesting on a single CPU:
29 */
30#if (1 << HARDIRQ_BITS) < NR_IRQS
31# error HARDIRQ_BITS is too low!
32#endif
33
34#endif /* __ASM_AVR32_HARDIRQ_H */ 23#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/arch/blackfin/include/asm/ftrace.h b/arch/blackfin/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/blackfin/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/cris/include/asm/ftrace.h b/arch/cris/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/cris/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/h8300/include/asm/ftrace.h b/arch/h8300/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/h8300/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 153e727a6e8e..294a3b13ecac 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -22,6 +22,9 @@ config IA64
22 select HAVE_OPROFILE 22 select HAVE_OPROFILE
23 select HAVE_KPROBES 23 select HAVE_KPROBES
24 select HAVE_KRETPROBES 24 select HAVE_KRETPROBES
25 select HAVE_FTRACE_MCOUNT_RECORD
26 select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
27 select HAVE_FUNCTION_TRACER
25 select HAVE_DMA_ATTRS 28 select HAVE_DMA_ATTRS
26 select HAVE_KVM 29 select HAVE_KVM
27 select HAVE_ARCH_TRACEHOOK 30 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 000000000000..d20db3c2a656
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,28 @@
1#ifndef _ASM_IA64_FTRACE_H
2#define _ASM_IA64_FTRACE_H
3
4#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */
6
7#ifndef __ASSEMBLY__
8extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
9#define mcount _mcount
10
11#include <asm/kprobes.h>
12/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
13#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
14#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
15
16static inline unsigned long ftrace_call_adjust(unsigned long addr)
17{
18 /* second bundle, insn 2 */
19 return addr - 0x12;
20}
21
22struct dyn_arch_ftrace {
23};
24#endif
25
26#endif /* CONFIG_FUNCTION_TRACER */
27
28#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index 140e495b8e0e..d514cd9edb49 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -20,16 +20,6 @@
20 20
21#define local_softirq_pending() (local_cpu_data->softirq_pending) 21#define local_softirq_pending() (local_cpu_data->softirq_pending)
22 22
23#define HARDIRQ_BITS 14
24
25/*
26 * The hardirq mask has to be large enough to have space for potentially all IRQ sources
27 * in the system nesting on a single CPU:
28 */
29#if (1 << HARDIRQ_BITS) < NR_IRQS
30# error HARDIRQ_BITS is too low!
31#endif
32
33extern void __iomem *ipi_base_addr; 23extern void __iomem *ipi_base_addr;
34 24
35void ack_bad_irq(unsigned int irq); 25void ack_bad_irq(unsigned int irq);
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c381ea954892..ab6e7ec0bba3 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,6 +2,10 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5ifdef CONFIG_DYNAMIC_FTRACE
6CFLAGS_REMOVE_ftrace.o = -pg
7endif
8
5extra-y := head.o init_task.o vmlinux.lds 9extra-y := head.o init_task.o vmlinux.lds
6 10
7obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ 11obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
28obj-$(CONFIG_CPU_FREQ) += cpufreq/ 32obj-$(CONFIG_CPU_FREQ) += cpufreq/
29obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o 33obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
30obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
35obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
31obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 36obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
32obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 37obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
33obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o 38obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2c1175..7e3382b06d56 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -47,6 +47,7 @@
47#include <asm/processor.h> 47#include <asm/processor.h>
48#include <asm/thread_info.h> 48#include <asm/thread_info.h>
49#include <asm/unistd.h> 49#include <asm/unistd.h>
50#include <asm/ftrace.h>
50 51
51#include "minstate.h" 52#include "minstate.h"
52 53
@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
1404 br.ret.sptk.many rp 1405 br.ret.sptk.many rp
1405END(unw_init_running) 1406END(unw_init_running)
1406 1407
1408#ifdef CONFIG_FUNCTION_TRACER
1409#ifdef CONFIG_DYNAMIC_FTRACE
1410GLOBAL_ENTRY(_mcount)
1411 br ftrace_stub
1412END(_mcount)
1413
1414.here:
1415 br.ret.sptk.many b0
1416
1417GLOBAL_ENTRY(ftrace_caller)
1418 alloc out0 = ar.pfs, 8, 0, 4, 0
1419 mov out3 = r0
1420 ;;
1421 mov out2 = b0
1422 add r3 = 0x20, r3
1423 mov out1 = r1;
1424 br.call.sptk.many b0 = ftrace_patch_gp
1425 //this might be called from module, so we must patch gp
1426ftrace_patch_gp:
1427 movl gp=__gp
1428 mov b0 = r3
1429 ;;
1430.global ftrace_call;
1431ftrace_call:
1432{
1433 .mlx
1434 nop.m 0x0
1435 movl r3 = .here;;
1436}
1437 alloc loc0 = ar.pfs, 4, 4, 2, 0
1438 ;;
1439 mov loc1 = b0
1440 mov out0 = b0
1441 mov loc2 = r8
1442 mov loc3 = r15
1443 ;;
1444 adds out0 = -MCOUNT_INSN_SIZE, out0
1445 mov out1 = in2
1446 mov b6 = r3
1447
1448 br.call.sptk.many b0 = b6
1449 ;;
1450 mov ar.pfs = loc0
1451 mov b0 = loc1
1452 mov r8 = loc2
1453 mov r15 = loc3
1454 br ftrace_stub
1455 ;;
1456END(ftrace_caller)
1457
1458#else
1459GLOBAL_ENTRY(_mcount)
1460 movl r2 = ftrace_stub
1461 movl r3 = ftrace_trace_function;;
1462 ld8 r3 = [r3];;
1463 ld8 r3 = [r3];;
1464 cmp.eq p7,p0 = r2, r3
1465(p7) br.sptk.many ftrace_stub
1466 ;;
1467
1468 alloc loc0 = ar.pfs, 4, 4, 2, 0
1469 ;;
1470 mov loc1 = b0
1471 mov out0 = b0
1472 mov loc2 = r8
1473 mov loc3 = r15
1474 ;;
1475 adds out0 = -MCOUNT_INSN_SIZE, out0
1476 mov out1 = in2
1477 mov b6 = r3
1478
1479 br.call.sptk.many b0 = b6
1480 ;;
1481 mov ar.pfs = loc0
1482 mov b0 = loc1
1483 mov r8 = loc2
1484 mov r15 = loc3
1485 br ftrace_stub
1486 ;;
1487END(_mcount)
1488#endif
1489
1490GLOBAL_ENTRY(ftrace_stub)
1491 mov r3 = b0
1492 movl r2 = _mcount_ret_helper
1493 ;;
1494 mov b6 = r2
1495 mov b7 = r3
1496 br.ret.sptk.many b6
1497
1498_mcount_ret_helper:
1499 mov b0 = r42
1500 mov r1 = r41
1501 mov ar.pfs = r40
1502 br b7
1503END(ftrace_stub)
1504
1505#endif /* CONFIG_FUNCTION_TRACER */
1506
1407 .rodata 1507 .rodata
1408 .align 8 1508 .align 8
1409 .globl sys_call_table 1509 .globl sys_call_table
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 000000000000..7fc8c961b1f7
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,206 @@
1/*
2 * Dynamic function tracing support.
3 *
4 * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
5 *
6 * For licencing details, see COPYING.
7 *
8 * Defines low-level handling of mcount calls when the kernel
9 * is compiled with the -pg flag. When using dynamic ftrace, the
10 * mcount call-sites get patched lazily with NOP till they are
11 * enabled. All code mutation routines here take effect atomically.
12 */
13
14#include <linux/uaccess.h>
15#include <linux/ftrace.h>
16
17#include <asm/cacheflush.h>
18#include <asm/patch.h>
19
20/* In IA64, each function will be added below two bundles with -pg option */
21static unsigned char __attribute__((aligned(8)))
22ftrace_orig_code[MCOUNT_INSN_SIZE] = {
23 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
24 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
25 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */
26 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
27 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
28 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
29};
30
31struct ftrace_orig_insn {
32 u64 dummy1, dummy2, dummy3;
33 u64 dummy4:64-41+13;
34 u64 imm20:20;
35 u64 dummy5:3;
36 u64 sign:1;
37 u64 dummy6:4;
38};
39
40/* mcount stub will be converted below for nop */
41static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
42 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
43 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
44 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
45 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
46 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
47 0x00, 0x00, 0x04, 0x00
48};
49
50static unsigned char *ftrace_nop_replace(void)
51{
52 return ftrace_nop_code;
53}
54
55/*
56 * mcount stub will be converted below for call
57 * Note: Just the last instruction is changed against nop
58 * */
59static unsigned char __attribute__((aligned(8)))
60ftrace_call_code[MCOUNT_INSN_SIZE] = {
61 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
62 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
63 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
64 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
65 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
66 0xf8, 0xff, 0xff, 0xc8
67};
68
69struct ftrace_call_insn {
70 u64 dummy1, dummy2;
71 u64 dummy3:48;
72 u64 imm39_l:16;
73 u64 imm39_h:23;
74 u64 dummy4:13;
75 u64 imm20:20;
76 u64 dummy5:3;
77 u64 i:1;
78 u64 dummy6:4;
79};
80
81static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
82{
83 struct ftrace_call_insn *code = (void *)ftrace_call_code;
84 unsigned long offset = addr - (ip + 0x10);
85
86 code->imm39_l = offset >> 24;
87 code->imm39_h = offset >> 40;
88 code->imm20 = offset >> 4;
89 code->i = offset >> 63;
90 return ftrace_call_code;
91}
92
93static int
94ftrace_modify_code(unsigned long ip, unsigned char *old_code,
95 unsigned char *new_code, int do_check)
96{
97 unsigned char replaced[MCOUNT_INSN_SIZE];
98
99 /*
100 * Note: Due to modules and __init, code can
101 * disappear and change, we need to protect against faulting
102 * as well as code changing. We do this by using the
103 * probe_kernel_* functions.
104 *
105 * No real locking needed, this code is run through
106 * kstop_machine, or before SMP starts.
107 */
108
109 if (!do_check)
110 goto skip_check;
111
112 /* read the text we want to modify */
113 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
114 return -EFAULT;
115
116 /* Make sure it is what we expect it to be */
117 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
118 return -EINVAL;
119
120skip_check:
121 /* replace the text with the new text */
122 if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
123 return -EPERM;
124 flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
125
126 return 0;
127}
128
129static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
130{
131 unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
132 unsigned long ip = rec->ip;
133
134 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
135 return -EFAULT;
136 if (rec->flags & FTRACE_FL_CONVERTED) {
137 struct ftrace_call_insn *call_insn, *tmp_call;
138
139 call_insn = (void *)ftrace_call_code;
140 tmp_call = (void *)replaced;
141 call_insn->imm39_l = tmp_call->imm39_l;
142 call_insn->imm39_h = tmp_call->imm39_h;
143 call_insn->imm20 = tmp_call->imm20;
144 call_insn->i = tmp_call->i;
145 if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
146 return -EINVAL;
147 return 0;
148 } else {
149 struct ftrace_orig_insn *call_insn, *tmp_call;
150
151 call_insn = (void *)ftrace_orig_code;
152 tmp_call = (void *)replaced;
153 call_insn->sign = tmp_call->sign;
154 call_insn->imm20 = tmp_call->imm20;
155 if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
156 return -EINVAL;
157 return 0;
158 }
159}
160
161int ftrace_make_nop(struct module *mod,
162 struct dyn_ftrace *rec, unsigned long addr)
163{
164 int ret;
165 char *new;
166
167 ret = ftrace_make_nop_check(rec, addr);
168 if (ret)
169 return ret;
170 new = ftrace_nop_replace();
171 return ftrace_modify_code(rec->ip, NULL, new, 0);
172}
173
174int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
175{
176 unsigned long ip = rec->ip;
177 unsigned char *old, *new;
178
179 old= ftrace_nop_replace();
180 new = ftrace_call_replace(ip, addr);
181 return ftrace_modify_code(ip, old, new, 1);
182}
183
184/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
185int ftrace_update_ftrace_func(ftrace_func_t func)
186{
187 unsigned long ip;
188 unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
189
190 if (func == ftrace_stub)
191 return 0;
192 ip = ((struct fnptr *)func)->ip;
193
194 ia64_patch_imm64(addr + 2, ip);
195
196 flush_icache_range(addr, addr + 16);
197 return 0;
198}
199
200/* run from kstop_machine */
201int __init ftrace_dyn_arch_init(void *data)
202{
203 *(unsigned long *)data = 0;
204
205 return 0;
206}
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d7372..2d311864e359 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
112#endif 112#endif
113extern char ia64_ivt[]; 113extern char ia64_ivt[];
114EXPORT_SYMBOL(ia64_ivt); 114EXPORT_SYMBOL(ia64_ivt);
115
116#include <asm/ftrace.h>
117#ifdef CONFIG_FUNCTION_TRACER
118/* mcount is defined in assembly */
119EXPORT_SYMBOL(_mcount);
120#endif
diff --git a/arch/m68k/include/asm/ftrace.h b/arch/m68k/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/m68k/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index f2baea3039bb..0208723adf28 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -512,7 +512,7 @@ CONFIG_MD_LINEAR=m
512CONFIG_MD_RAID0=y 512CONFIG_MD_RAID0=y
513CONFIG_MD_RAID1=y 513CONFIG_MD_RAID1=y
514CONFIG_MD_RAID10=m 514CONFIG_MD_RAID10=m
515CONFIG_MD_RAID456=m 515CONFIG_MD_RAID456=y
516CONFIG_MD_RAID5_RESHAPE=y 516CONFIG_MD_RAID5_RESHAPE=y
517CONFIG_MD_MULTIPATH=m 517CONFIG_MD_MULTIPATH=m
518CONFIG_MD_FAULTY=m 518CONFIG_MD_FAULTY=m
diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig
index 9d5bd2a0af3d..5380f1f582d9 100644
--- a/arch/mips/configs/jmr3927_defconfig
+++ b/arch/mips/configs/jmr3927_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.26-rc9 3# Linux kernel version: 2.6.29-rc7
4# Fri Jul 11 23:01:36 2008 4# Wed Mar 4 23:07:16 2009
5# 5#
6CONFIG_MIPS=y 6CONFIG_MIPS=y
7 7
@@ -18,8 +18,10 @@ CONFIG_MIPS=y
18# CONFIG_LEMOTE_FULONG is not set 18# CONFIG_LEMOTE_FULONG is not set
19# CONFIG_MIPS_MALTA is not set 19# CONFIG_MIPS_MALTA is not set
20# CONFIG_MIPS_SIM is not set 20# CONFIG_MIPS_SIM is not set
21# CONFIG_MARKEINS is not set 21# CONFIG_MACH_EMMA is not set
22# CONFIG_MACH_VR41XX is not set 22# CONFIG_MACH_VR41XX is not set
23# CONFIG_NXP_STB220 is not set
24# CONFIG_NXP_STB225 is not set
23# CONFIG_PNX8550_JBS is not set 25# CONFIG_PNX8550_JBS is not set
24# CONFIG_PNX8550_STB810 is not set 26# CONFIG_PNX8550_STB810 is not set
25# CONFIG_PMC_MSP is not set 27# CONFIG_PMC_MSP is not set
@@ -39,7 +41,11 @@ CONFIG_MIPS=y
39# CONFIG_SNI_RM is not set 41# CONFIG_SNI_RM is not set
40CONFIG_MACH_TX39XX=y 42CONFIG_MACH_TX39XX=y
41# CONFIG_MACH_TX49XX is not set 43# CONFIG_MACH_TX49XX is not set
44# CONFIG_MIKROTIK_RB532 is not set
42# CONFIG_WR_PPMC is not set 45# CONFIG_WR_PPMC is not set
46# CONFIG_CAVIUM_OCTEON_SIMULATOR is not set
47# CONFIG_CAVIUM_OCTEON_REFERENCE_BOARD is not set
48CONFIG_MACH_TXX9=y
43CONFIG_TOSHIBA_JMR3927=y 49CONFIG_TOSHIBA_JMR3927=y
44CONFIG_SOC_TX3927=y 50CONFIG_SOC_TX3927=y
45# CONFIG_TOSHIBA_FPCIB0 is not set 51# CONFIG_TOSHIBA_FPCIB0 is not set
@@ -54,12 +60,14 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
54CONFIG_GENERIC_CLOCKEVENTS=y 60CONFIG_GENERIC_CLOCKEVENTS=y
55CONFIG_GENERIC_TIME=y 61CONFIG_GENERIC_TIME=y
56CONFIG_GENERIC_CMOS_UPDATE=y 62CONFIG_GENERIC_CMOS_UPDATE=y
57CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y 63CONFIG_SCHED_OMIT_FRAME_POINTER=y
58CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y 64CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
59CONFIG_CEVT_TXX9=y 65CONFIG_CEVT_TXX9=y
60CONFIG_GPIO_TXX9=y 66CONFIG_GPIO_TXX9=y
61CONFIG_DMA_NONCOHERENT=y 67CONFIG_DMA_NONCOHERENT=y
62CONFIG_DMA_NEED_PCI_MAP_STATE=y 68CONFIG_DMA_NEED_PCI_MAP_STATE=y
69CONFIG_EARLY_PRINTK=y
70CONFIG_SYS_HAS_EARLY_PRINTK=y
63# CONFIG_HOTPLUG_CPU is not set 71# CONFIG_HOTPLUG_CPU is not set
64# CONFIG_NO_IOPORT is not set 72# CONFIG_NO_IOPORT is not set
65CONFIG_GENERIC_GPIO=y 73CONFIG_GENERIC_GPIO=y
@@ -87,6 +95,7 @@ CONFIG_CPU_TX39XX=y
87# CONFIG_CPU_TX49XX is not set 95# CONFIG_CPU_TX49XX is not set
88# CONFIG_CPU_R5000 is not set 96# CONFIG_CPU_R5000 is not set
89# CONFIG_CPU_R5432 is not set 97# CONFIG_CPU_R5432 is not set
98# CONFIG_CPU_R5500 is not set
90# CONFIG_CPU_R6000 is not set 99# CONFIG_CPU_R6000 is not set
91# CONFIG_CPU_NEVADA is not set 100# CONFIG_CPU_NEVADA is not set
92# CONFIG_CPU_R8000 is not set 101# CONFIG_CPU_R8000 is not set
@@ -94,6 +103,7 @@ CONFIG_CPU_TX39XX=y
94# CONFIG_CPU_RM7000 is not set 103# CONFIG_CPU_RM7000 is not set
95# CONFIG_CPU_RM9000 is not set 104# CONFIG_CPU_RM9000 is not set
96# CONFIG_CPU_SB1 is not set 105# CONFIG_CPU_SB1 is not set
106# CONFIG_CPU_CAVIUM_OCTEON is not set
97CONFIG_SYS_HAS_CPU_TX39XX=y 107CONFIG_SYS_HAS_CPU_TX39XX=y
98CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y 108CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
99CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y 109CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
@@ -117,14 +127,12 @@ CONFIG_ARCH_FLATMEM_ENABLE=y
117CONFIG_ARCH_POPULATES_NODE_MAP=y 127CONFIG_ARCH_POPULATES_NODE_MAP=y
118CONFIG_FLATMEM=y 128CONFIG_FLATMEM=y
119CONFIG_FLAT_NODE_MEM_MAP=y 129CONFIG_FLAT_NODE_MEM_MAP=y
120# CONFIG_SPARSEMEM_STATIC is not set
121# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
122CONFIG_PAGEFLAGS_EXTENDED=y 130CONFIG_PAGEFLAGS_EXTENDED=y
123CONFIG_SPLIT_PTLOCK_CPUS=4 131CONFIG_SPLIT_PTLOCK_CPUS=4
124# CONFIG_RESOURCES_64BIT is not set 132# CONFIG_PHYS_ADDR_T_64BIT is not set
125CONFIG_ZONE_DMA_FLAG=0 133CONFIG_ZONE_DMA_FLAG=0
126CONFIG_VIRT_TO_BUS=y 134CONFIG_VIRT_TO_BUS=y
127# CONFIG_TICK_ONESHOT is not set 135CONFIG_UNEVICTABLE_LRU=y
128# CONFIG_NO_HZ is not set 136# CONFIG_NO_HZ is not set
129# CONFIG_HIGH_RES_TIMERS is not set 137# CONFIG_HIGH_RES_TIMERS is not set
130CONFIG_GENERIC_CLOCKEVENTS_BUILD=y 138CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -159,6 +167,15 @@ CONFIG_SYSVIPC_SYSCTL=y
159# CONFIG_BSD_PROCESS_ACCT is not set 167# CONFIG_BSD_PROCESS_ACCT is not set
160# CONFIG_TASKSTATS is not set 168# CONFIG_TASKSTATS is not set
161# CONFIG_AUDIT is not set 169# CONFIG_AUDIT is not set
170
171#
172# RCU Subsystem
173#
174CONFIG_CLASSIC_RCU=y
175# CONFIG_TREE_RCU is not set
176# CONFIG_PREEMPT_RCU is not set
177# CONFIG_TREE_RCU_TRACE is not set
178# CONFIG_PREEMPT_RCU_TRACE is not set
162# CONFIG_IKCONFIG is not set 179# CONFIG_IKCONFIG is not set
163CONFIG_LOG_BUF_SHIFT=14 180CONFIG_LOG_BUF_SHIFT=14
164# CONFIG_CGROUPS is not set 181# CONFIG_CGROUPS is not set
@@ -171,7 +188,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
171CONFIG_SYSCTL=y 188CONFIG_SYSCTL=y
172CONFIG_EMBEDDED=y 189CONFIG_EMBEDDED=y
173CONFIG_SYSCTL_SYSCALL=y 190CONFIG_SYSCTL_SYSCALL=y
174CONFIG_SYSCTL_SYSCALL_CHECK=y
175CONFIG_KALLSYMS=y 191CONFIG_KALLSYMS=y
176# CONFIG_KALLSYMS_EXTRA_PASS is not set 192# CONFIG_KALLSYMS_EXTRA_PASS is not set
177# CONFIG_HOTPLUG is not set 193# CONFIG_HOTPLUG is not set
@@ -188,26 +204,23 @@ CONFIG_SIGNALFD=y
188CONFIG_TIMERFD=y 204CONFIG_TIMERFD=y
189CONFIG_EVENTFD=y 205CONFIG_EVENTFD=y
190CONFIG_SHMEM=y 206CONFIG_SHMEM=y
207CONFIG_AIO=y
191CONFIG_VM_EVENT_COUNTERS=y 208CONFIG_VM_EVENT_COUNTERS=y
209CONFIG_PCI_QUIRKS=y
192CONFIG_SLAB=y 210CONFIG_SLAB=y
193# CONFIG_SLUB is not set 211# CONFIG_SLUB is not set
194# CONFIG_SLOB is not set 212# CONFIG_SLOB is not set
195# CONFIG_PROFILING is not set 213# CONFIG_PROFILING is not set
196# CONFIG_MARKERS is not set
197CONFIG_HAVE_OPROFILE=y 214CONFIG_HAVE_OPROFILE=y
198# CONFIG_HAVE_KPROBES is not set 215# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
199# CONFIG_HAVE_KRETPROBES is not set
200# CONFIG_HAVE_DMA_ATTRS is not set
201CONFIG_PROC_PAGE_MONITOR=y
202CONFIG_SLABINFO=y 216CONFIG_SLABINFO=y
203CONFIG_RT_MUTEXES=y 217CONFIG_RT_MUTEXES=y
204# CONFIG_TINY_SHMEM is not set
205CONFIG_BASE_SMALL=0 218CONFIG_BASE_SMALL=0
206# CONFIG_MODULES is not set 219# CONFIG_MODULES is not set
207CONFIG_BLOCK=y 220CONFIG_BLOCK=y
208# CONFIG_LBD is not set 221# CONFIG_LBD is not set
209# CONFIG_BLK_DEV_IO_TRACE is not set 222# CONFIG_BLK_DEV_IO_TRACE is not set
210# CONFIG_LSF is not set 223# CONFIG_BLK_DEV_INTEGRITY is not set
211 224
212# 225#
213# IO Schedulers 226# IO Schedulers
@@ -221,7 +234,7 @@ CONFIG_IOSCHED_CFQ=y
221CONFIG_DEFAULT_CFQ=y 234CONFIG_DEFAULT_CFQ=y
222# CONFIG_DEFAULT_NOOP is not set 235# CONFIG_DEFAULT_NOOP is not set
223CONFIG_DEFAULT_IOSCHED="cfq" 236CONFIG_DEFAULT_IOSCHED="cfq"
224CONFIG_CLASSIC_RCU=y 237# CONFIG_FREEZER is not set
225 238
226# 239#
227# Bus options (PCI, PCMCIA, EISA, ISA, TC) 240# Bus options (PCI, PCMCIA, EISA, ISA, TC)
@@ -231,12 +244,15 @@ CONFIG_PCI=y
231CONFIG_PCI_DOMAINS=y 244CONFIG_PCI_DOMAINS=y
232# CONFIG_ARCH_SUPPORTS_MSI is not set 245# CONFIG_ARCH_SUPPORTS_MSI is not set
233CONFIG_PCI_LEGACY=y 246CONFIG_PCI_LEGACY=y
247# CONFIG_PCI_STUB is not set
234CONFIG_MMU=y 248CONFIG_MMU=y
235 249
236# 250#
237# Executable file formats 251# Executable file formats
238# 252#
239CONFIG_BINFMT_ELF=y 253CONFIG_BINFMT_ELF=y
254# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
255# CONFIG_HAVE_AOUT is not set
240# CONFIG_BINFMT_MISC is not set 256# CONFIG_BINFMT_MISC is not set
241CONFIG_TRAD_SIGNALS=y 257CONFIG_TRAD_SIGNALS=y
242 258
@@ -245,15 +261,12 @@ CONFIG_TRAD_SIGNALS=y
245# 261#
246CONFIG_ARCH_SUSPEND_POSSIBLE=y 262CONFIG_ARCH_SUSPEND_POSSIBLE=y
247# CONFIG_PM is not set 263# CONFIG_PM is not set
248
249#
250# Networking
251#
252CONFIG_NET=y 264CONFIG_NET=y
253 265
254# 266#
255# Networking options 267# Networking options
256# 268#
269CONFIG_COMPAT_NET_DEV_OPS=y
257CONFIG_PACKET=y 270CONFIG_PACKET=y
258# CONFIG_PACKET_MMAP is not set 271# CONFIG_PACKET_MMAP is not set
259CONFIG_UNIX=y 272CONFIG_UNIX=y
@@ -293,6 +306,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
293# CONFIG_IPX is not set 306# CONFIG_IPX is not set
294# CONFIG_ATALK is not set 307# CONFIG_ATALK is not set
295# CONFIG_NET_SCHED is not set 308# CONFIG_NET_SCHED is not set
309# CONFIG_DCB is not set
296 310
297# 311#
298# Network testing 312# Network testing
@@ -302,14 +316,9 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
302# CONFIG_CAN is not set 316# CONFIG_CAN is not set
303# CONFIG_IRDA is not set 317# CONFIG_IRDA is not set
304# CONFIG_BT is not set 318# CONFIG_BT is not set
305 319# CONFIG_PHONET is not set
306# 320# CONFIG_WIRELESS is not set
307# Wireless 321# CONFIG_WIMAX is not set
308#
309# CONFIG_CFG80211 is not set
310# CONFIG_WIRELESS_EXT is not set
311# CONFIG_MAC80211 is not set
312# CONFIG_IEEE80211 is not set
313# CONFIG_RFKILL is not set 322# CONFIG_RFKILL is not set
314 323
315# 324#
@@ -323,7 +332,89 @@ CONFIG_STANDALONE=y
323CONFIG_PREVENT_FIRMWARE_BUILD=y 332CONFIG_PREVENT_FIRMWARE_BUILD=y
324# CONFIG_SYS_HYPERVISOR is not set 333# CONFIG_SYS_HYPERVISOR is not set
325# CONFIG_CONNECTOR is not set 334# CONFIG_CONNECTOR is not set
326# CONFIG_MTD is not set 335CONFIG_MTD=y
336# CONFIG_MTD_DEBUG is not set
337# CONFIG_MTD_CONCAT is not set
338CONFIG_MTD_PARTITIONS=y
339# CONFIG_MTD_REDBOOT_PARTS is not set
340CONFIG_MTD_CMDLINE_PARTS=y
341# CONFIG_MTD_AR7_PARTS is not set
342
343#
344# User Modules And Translation Layers
345#
346CONFIG_MTD_CHAR=y
347# CONFIG_MTD_BLKDEVS is not set
348# CONFIG_MTD_BLOCK is not set
349# CONFIG_MTD_BLOCK_RO is not set
350# CONFIG_FTL is not set
351# CONFIG_NFTL is not set
352# CONFIG_INFTL is not set
353# CONFIG_RFD_FTL is not set
354# CONFIG_SSFDC is not set
355# CONFIG_MTD_OOPS is not set
356
357#
358# RAM/ROM/Flash chip drivers
359#
360CONFIG_MTD_CFI=y
361CONFIG_MTD_JEDECPROBE=y
362CONFIG_MTD_GEN_PROBE=y
363# CONFIG_MTD_CFI_ADV_OPTIONS is not set
364CONFIG_MTD_MAP_BANK_WIDTH_1=y
365CONFIG_MTD_MAP_BANK_WIDTH_2=y
366CONFIG_MTD_MAP_BANK_WIDTH_4=y
367# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
368# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
369# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
370CONFIG_MTD_CFI_I1=y
371CONFIG_MTD_CFI_I2=y
372# CONFIG_MTD_CFI_I4 is not set
373# CONFIG_MTD_CFI_I8 is not set
374# CONFIG_MTD_CFI_INTELEXT is not set
375CONFIG_MTD_CFI_AMDSTD=y
376# CONFIG_MTD_CFI_STAA is not set
377CONFIG_MTD_CFI_UTIL=y
378# CONFIG_MTD_RAM is not set
379# CONFIG_MTD_ROM is not set
380# CONFIG_MTD_ABSENT is not set
381
382#
383# Mapping drivers for chip access
384#
385# CONFIG_MTD_COMPLEX_MAPPINGS is not set
386CONFIG_MTD_PHYSMAP=y
387# CONFIG_MTD_PHYSMAP_COMPAT is not set
388# CONFIG_MTD_INTEL_VR_NOR is not set
389# CONFIG_MTD_PLATRAM is not set
390
391#
392# Self-contained MTD device drivers
393#
394# CONFIG_MTD_PMC551 is not set
395# CONFIG_MTD_SLRAM is not set
396# CONFIG_MTD_PHRAM is not set
397# CONFIG_MTD_MTDRAM is not set
398# CONFIG_MTD_BLOCK2MTD is not set
399
400#
401# Disk-On-Chip Device Drivers
402#
403# CONFIG_MTD_DOC2000 is not set
404# CONFIG_MTD_DOC2001 is not set
405# CONFIG_MTD_DOC2001PLUS is not set
406# CONFIG_MTD_NAND is not set
407# CONFIG_MTD_ONENAND is not set
408
409#
410# LPDDR flash memory drivers
411#
412# CONFIG_MTD_LPDDR is not set
413
414#
415# UBI - Unsorted block images
416#
417# CONFIG_MTD_UBI is not set
327# CONFIG_PARPORT is not set 418# CONFIG_PARPORT is not set
328CONFIG_BLK_DEV=y 419CONFIG_BLK_DEV=y
329# CONFIG_BLK_CPQ_DA is not set 420# CONFIG_BLK_CPQ_DA is not set
@@ -336,6 +427,7 @@ CONFIG_BLK_DEV=y
336# CONFIG_BLK_DEV_RAM is not set 427# CONFIG_BLK_DEV_RAM is not set
337# CONFIG_CDROM_PKTCDVD is not set 428# CONFIG_CDROM_PKTCDVD is not set
338# CONFIG_ATA_OVER_ETH is not set 429# CONFIG_ATA_OVER_ETH is not set
430# CONFIG_BLK_DEV_HD is not set
339# CONFIG_MISC_DEVICES is not set 431# CONFIG_MISC_DEVICES is not set
340CONFIG_HAVE_IDE=y 432CONFIG_HAVE_IDE=y
341# CONFIG_IDE is not set 433# CONFIG_IDE is not set
@@ -361,7 +453,6 @@ CONFIG_HAVE_IDE=y
361# CONFIG_IEEE1394 is not set 453# CONFIG_IEEE1394 is not set
362# CONFIG_I2O is not set 454# CONFIG_I2O is not set
363CONFIG_NETDEVICES=y 455CONFIG_NETDEVICES=y
364# CONFIG_NETDEVICES_MULTIQUEUE is not set
365# CONFIG_DUMMY is not set 456# CONFIG_DUMMY is not set
366# CONFIG_BONDING is not set 457# CONFIG_BONDING is not set
367# CONFIG_EQUALIZER is not set 458# CONFIG_EQUALIZER is not set
@@ -383,6 +474,9 @@ CONFIG_PHYLIB=y
383# CONFIG_BROADCOM_PHY is not set 474# CONFIG_BROADCOM_PHY is not set
384# CONFIG_ICPLUS_PHY is not set 475# CONFIG_ICPLUS_PHY is not set
385# CONFIG_REALTEK_PHY is not set 476# CONFIG_REALTEK_PHY is not set
477# CONFIG_NATIONAL_PHY is not set
478# CONFIG_STE10XP is not set
479# CONFIG_LSI_ET1011C_PHY is not set
386# CONFIG_FIXED_PHY is not set 480# CONFIG_FIXED_PHY is not set
387# CONFIG_MDIO_BITBANG is not set 481# CONFIG_MDIO_BITBANG is not set
388CONFIG_NET_ETHERNET=y 482CONFIG_NET_ETHERNET=y
@@ -392,6 +486,7 @@ CONFIG_NET_ETHERNET=y
392# CONFIG_SUNGEM is not set 486# CONFIG_SUNGEM is not set
393# CONFIG_CASSINI is not set 487# CONFIG_CASSINI is not set
394# CONFIG_NET_VENDOR_3COM is not set 488# CONFIG_NET_VENDOR_3COM is not set
489# CONFIG_SMC91X is not set
395# CONFIG_DM9000 is not set 490# CONFIG_DM9000 is not set
396# CONFIG_NET_TULIP is not set 491# CONFIG_NET_TULIP is not set
397# CONFIG_HP100 is not set 492# CONFIG_HP100 is not set
@@ -399,6 +494,9 @@ CONFIG_NET_ETHERNET=y
399# CONFIG_IBM_NEW_EMAC_RGMII is not set 494# CONFIG_IBM_NEW_EMAC_RGMII is not set
400# CONFIG_IBM_NEW_EMAC_TAH is not set 495# CONFIG_IBM_NEW_EMAC_TAH is not set
401# CONFIG_IBM_NEW_EMAC_EMAC4 is not set 496# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
497# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
498# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
499# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
402CONFIG_NET_PCI=y 500CONFIG_NET_PCI=y
403# CONFIG_PCNET32 is not set 501# CONFIG_PCNET32 is not set
404# CONFIG_AMD8111_ETH is not set 502# CONFIG_AMD8111_ETH is not set
@@ -406,7 +504,6 @@ CONFIG_NET_PCI=y
406# CONFIG_B44 is not set 504# CONFIG_B44 is not set
407# CONFIG_FORCEDETH is not set 505# CONFIG_FORCEDETH is not set
408CONFIG_TC35815=y 506CONFIG_TC35815=y
409# CONFIG_EEPRO100 is not set
410# CONFIG_E100 is not set 507# CONFIG_E100 is not set
411# CONFIG_FEALNX is not set 508# CONFIG_FEALNX is not set
412# CONFIG_NATSEMI is not set 509# CONFIG_NATSEMI is not set
@@ -415,9 +512,11 @@ CONFIG_TC35815=y
415# CONFIG_R6040 is not set 512# CONFIG_R6040 is not set
416# CONFIG_SIS900 is not set 513# CONFIG_SIS900 is not set
417# CONFIG_EPIC100 is not set 514# CONFIG_EPIC100 is not set
515# CONFIG_SMSC9420 is not set
418# CONFIG_SUNDANCE is not set 516# CONFIG_SUNDANCE is not set
419# CONFIG_TLAN is not set 517# CONFIG_TLAN is not set
420# CONFIG_VIA_RHINE is not set 518# CONFIG_VIA_RHINE is not set
519# CONFIG_ATL2 is not set
421# CONFIG_NETDEV_1000 is not set 520# CONFIG_NETDEV_1000 is not set
422# CONFIG_NETDEV_10000 is not set 521# CONFIG_NETDEV_10000 is not set
423# CONFIG_TR is not set 522# CONFIG_TR is not set
@@ -428,6 +527,10 @@ CONFIG_TC35815=y
428# CONFIG_WLAN_PRE80211 is not set 527# CONFIG_WLAN_PRE80211 is not set
429# CONFIG_WLAN_80211 is not set 528# CONFIG_WLAN_80211 is not set
430# CONFIG_IWLWIFI_LEDS is not set 529# CONFIG_IWLWIFI_LEDS is not set
530
531#
532# Enable WiMAX (Networking options) to see the WiMAX drivers
533#
431# CONFIG_WAN is not set 534# CONFIG_WAN is not set
432# CONFIG_FDDI is not set 535# CONFIG_FDDI is not set
433# CONFIG_PPP is not set 536# CONFIG_PPP is not set
@@ -440,27 +543,7 @@ CONFIG_TC35815=y
440# 543#
441# Input device support 544# Input device support
442# 545#
443CONFIG_INPUT=y 546# CONFIG_INPUT is not set
444# CONFIG_INPUT_FF_MEMLESS is not set
445# CONFIG_INPUT_POLLDEV is not set
446
447#
448# Userland interfaces
449#
450# CONFIG_INPUT_MOUSEDEV is not set
451# CONFIG_INPUT_JOYDEV is not set
452# CONFIG_INPUT_EVDEV is not set
453# CONFIG_INPUT_EVBUG is not set
454
455#
456# Input Device Drivers
457#
458# CONFIG_INPUT_KEYBOARD is not set
459# CONFIG_INPUT_MOUSE is not set
460# CONFIG_INPUT_JOYSTICK is not set
461# CONFIG_INPUT_TABLET is not set
462# CONFIG_INPUT_TOUCHSCREEN is not set
463# CONFIG_INPUT_MISC is not set
464 547
465# 548#
466# Hardware I/O ports 549# Hardware I/O ports
@@ -517,10 +600,11 @@ CONFIG_LEGACY_PTY_COUNT=256
517CONFIG_DEVPORT=y 600CONFIG_DEVPORT=y
518# CONFIG_I2C is not set 601# CONFIG_I2C is not set
519# CONFIG_SPI is not set 602# CONFIG_SPI is not set
520CONFIG_HAVE_GPIO_LIB=y 603CONFIG_ARCH_REQUIRE_GPIOLIB=y
604CONFIG_GPIOLIB=y
521 605
522# 606#
523# GPIO Support 607# Memory mapped GPIO expanders:
524# 608#
525 609
526# 610#
@@ -528,6 +612,11 @@ CONFIG_HAVE_GPIO_LIB=y
528# 612#
529 613
530# 614#
615# PCI GPIO expanders:
616#
617# CONFIG_GPIO_BT8XX is not set
618
619#
531# SPI GPIO expanders: 620# SPI GPIO expanders:
532# 621#
533# CONFIG_W1 is not set 622# CONFIG_W1 is not set
@@ -542,6 +631,7 @@ CONFIG_WATCHDOG=y
542# Watchdog Device Drivers 631# Watchdog Device Drivers
543# 632#
544# CONFIG_SOFT_WATCHDOG is not set 633# CONFIG_SOFT_WATCHDOG is not set
634# CONFIG_ALIM7101_WDT is not set
545CONFIG_TXX9_WDT=y 635CONFIG_TXX9_WDT=y
546 636
547# 637#
@@ -549,18 +639,21 @@ CONFIG_TXX9_WDT=y
549# 639#
550# CONFIG_PCIPCWATCHDOG is not set 640# CONFIG_PCIPCWATCHDOG is not set
551# CONFIG_WDTPCI is not set 641# CONFIG_WDTPCI is not set
642CONFIG_SSB_POSSIBLE=y
552 643
553# 644#
554# Sonics Silicon Backplane 645# Sonics Silicon Backplane
555# 646#
556CONFIG_SSB_POSSIBLE=y
557# CONFIG_SSB is not set 647# CONFIG_SSB is not set
558 648
559# 649#
560# Multifunction device drivers 650# Multifunction device drivers
561# 651#
652# CONFIG_MFD_CORE is not set
562# CONFIG_MFD_SM501 is not set 653# CONFIG_MFD_SM501 is not set
563# CONFIG_HTC_PASIC3 is not set 654# CONFIG_HTC_PASIC3 is not set
655# CONFIG_MFD_TMIO is not set
656# CONFIG_REGULATOR is not set
564 657
565# 658#
566# Multimedia devices 659# Multimedia devices
@@ -591,16 +684,26 @@ CONFIG_SSB_POSSIBLE=y
591# Display device support 684# Display device support
592# 685#
593# CONFIG_DISPLAY_SUPPORT is not set 686# CONFIG_DISPLAY_SUPPORT is not set
594
595#
596# Sound
597#
598# CONFIG_SOUND is not set 687# CONFIG_SOUND is not set
599# CONFIG_HID_SUPPORT is not set
600# CONFIG_USB_SUPPORT is not set 688# CONFIG_USB_SUPPORT is not set
601# CONFIG_MMC is not set 689# CONFIG_MMC is not set
602# CONFIG_MEMSTICK is not set 690# CONFIG_MEMSTICK is not set
603# CONFIG_NEW_LEDS is not set 691CONFIG_NEW_LEDS=y
692CONFIG_LEDS_CLASS=y
693
694#
695# LED drivers
696#
697CONFIG_LEDS_GPIO=y
698
699#
700# LED Triggers
701#
702CONFIG_LEDS_TRIGGERS=y
703# CONFIG_LEDS_TRIGGER_TIMER is not set
704CONFIG_LEDS_TRIGGER_HEARTBEAT=y
705# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
706# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
604# CONFIG_ACCESSIBILITY is not set 707# CONFIG_ACCESSIBILITY is not set
605# CONFIG_INFINIBAND is not set 708# CONFIG_INFINIBAND is not set
606CONFIG_RTC_LIB=y 709CONFIG_RTC_LIB=y
@@ -626,27 +729,34 @@ CONFIG_RTC_INTF_DEV=y
626# Platform RTC drivers 729# Platform RTC drivers
627# 730#
628# CONFIG_RTC_DRV_CMOS is not set 731# CONFIG_RTC_DRV_CMOS is not set
732# CONFIG_RTC_DRV_DS1286 is not set
629# CONFIG_RTC_DRV_DS1511 is not set 733# CONFIG_RTC_DRV_DS1511 is not set
630# CONFIG_RTC_DRV_DS1553 is not set 734# CONFIG_RTC_DRV_DS1553 is not set
631CONFIG_RTC_DRV_DS1742=y 735CONFIG_RTC_DRV_DS1742=y
632# CONFIG_RTC_DRV_STK17TA8 is not set 736# CONFIG_RTC_DRV_STK17TA8 is not set
633# CONFIG_RTC_DRV_M48T86 is not set 737# CONFIG_RTC_DRV_M48T86 is not set
738# CONFIG_RTC_DRV_M48T35 is not set
634# CONFIG_RTC_DRV_M48T59 is not set 739# CONFIG_RTC_DRV_M48T59 is not set
740# CONFIG_RTC_DRV_BQ4802 is not set
635# CONFIG_RTC_DRV_V3020 is not set 741# CONFIG_RTC_DRV_V3020 is not set
636 742
637# 743#
638# on-CPU RTC drivers 744# on-CPU RTC drivers
639# 745#
746# CONFIG_DMADEVICES is not set
640# CONFIG_UIO is not set 747# CONFIG_UIO is not set
748# CONFIG_STAGING is not set
641 749
642# 750#
643# File systems 751# File systems
644# 752#
645# CONFIG_EXT2_FS is not set 753# CONFIG_EXT2_FS is not set
646# CONFIG_EXT3_FS is not set 754# CONFIG_EXT3_FS is not set
755# CONFIG_EXT4_FS is not set
647# CONFIG_REISERFS_FS is not set 756# CONFIG_REISERFS_FS is not set
648# CONFIG_JFS_FS is not set 757# CONFIG_JFS_FS is not set
649# CONFIG_FS_POSIX_ACL is not set 758# CONFIG_FS_POSIX_ACL is not set
759CONFIG_FILE_LOCKING=y
650# CONFIG_XFS_FS is not set 760# CONFIG_XFS_FS is not set
651# CONFIG_OCFS2_FS is not set 761# CONFIG_OCFS2_FS is not set
652CONFIG_DNOTIFY=y 762CONFIG_DNOTIFY=y
@@ -676,28 +786,17 @@ CONFIG_INOTIFY_USER=y
676CONFIG_PROC_FS=y 786CONFIG_PROC_FS=y
677CONFIG_PROC_KCORE=y 787CONFIG_PROC_KCORE=y
678CONFIG_PROC_SYSCTL=y 788CONFIG_PROC_SYSCTL=y
789CONFIG_PROC_PAGE_MONITOR=y
679CONFIG_SYSFS=y 790CONFIG_SYSFS=y
680# CONFIG_TMPFS is not set 791# CONFIG_TMPFS is not set
681# CONFIG_HUGETLB_PAGE is not set 792# CONFIG_HUGETLB_PAGE is not set
682# CONFIG_CONFIGFS_FS is not set 793# CONFIG_CONFIGFS_FS is not set
683 794# CONFIG_MISC_FILESYSTEMS is not set
684#
685# Miscellaneous filesystems
686#
687# CONFIG_HFSPLUS_FS is not set
688# CONFIG_CRAMFS is not set
689# CONFIG_VXFS_FS is not set
690# CONFIG_MINIX_FS is not set
691# CONFIG_HPFS_FS is not set
692# CONFIG_QNX4FS_FS is not set
693# CONFIG_ROMFS_FS is not set
694# CONFIG_SYSV_FS is not set
695# CONFIG_UFS_FS is not set
696CONFIG_NETWORK_FILESYSTEMS=y 795CONFIG_NETWORK_FILESYSTEMS=y
697CONFIG_NFS_FS=y 796CONFIG_NFS_FS=y
698# CONFIG_NFS_V3 is not set 797# CONFIG_NFS_V3 is not set
699# CONFIG_NFSD is not set
700CONFIG_ROOT_NFS=y 798CONFIG_ROOT_NFS=y
799# CONFIG_NFSD is not set
701CONFIG_LOCKD=y 800CONFIG_LOCKD=y
702CONFIG_NFS_COMMON=y 801CONFIG_NFS_COMMON=y
703CONFIG_SUNRPC=y 802CONFIG_SUNRPC=y
@@ -726,7 +825,16 @@ CONFIG_FRAME_WARN=1024
726# CONFIG_DEBUG_FS is not set 825# CONFIG_DEBUG_FS is not set
727# CONFIG_HEADERS_CHECK is not set 826# CONFIG_HEADERS_CHECK is not set
728# CONFIG_DEBUG_KERNEL is not set 827# CONFIG_DEBUG_KERNEL is not set
828# CONFIG_DEBUG_MEMORY_INIT is not set
829# CONFIG_RCU_CPU_STALL_DETECTOR is not set
830CONFIG_SYSCTL_SYSCALL_CHECK=y
831
832#
833# Tracers
834#
835# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
729# CONFIG_SAMPLES is not set 836# CONFIG_SAMPLES is not set
837CONFIG_HAVE_ARCH_KGDB=y
730CONFIG_CMDLINE="" 838CONFIG_CMDLINE=""
731 839
732# 840#
@@ -734,15 +842,18 @@ CONFIG_CMDLINE=""
734# 842#
735# CONFIG_KEYS is not set 843# CONFIG_KEYS is not set
736# CONFIG_SECURITY is not set 844# CONFIG_SECURITY is not set
845# CONFIG_SECURITYFS is not set
846# CONFIG_SECURITY_FILE_CAPABILITIES is not set
737# CONFIG_CRYPTO is not set 847# CONFIG_CRYPTO is not set
738 848
739# 849#
740# Library routines 850# Library routines
741# 851#
742CONFIG_BITREVERSE=y 852CONFIG_BITREVERSE=y
743# CONFIG_GENERIC_FIND_FIRST_BIT is not set 853CONFIG_GENERIC_FIND_LAST_BIT=y
744# CONFIG_CRC_CCITT is not set 854# CONFIG_CRC_CCITT is not set
745# CONFIG_CRC16 is not set 855# CONFIG_CRC16 is not set
856# CONFIG_CRC_T10DIF is not set
746# CONFIG_CRC_ITU_T is not set 857# CONFIG_CRC_ITU_T is not set
747CONFIG_CRC32=y 858CONFIG_CRC32=y
748# CONFIG_CRC7 is not set 859# CONFIG_CRC7 is not set
diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig
index 83d5c58662c8..1efe977497dd 100644
--- a/arch/mips/configs/rbtx49xx_defconfig
+++ b/arch/mips/configs/rbtx49xx_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.26-rc9 3# Linux kernel version: 2.6.29-rc7
4# Fri Jul 11 23:03:21 2008 4# Wed Mar 4 23:08:06 2009
5# 5#
6CONFIG_MIPS=y 6CONFIG_MIPS=y
7 7
@@ -18,8 +18,10 @@ CONFIG_MIPS=y
18# CONFIG_LEMOTE_FULONG is not set 18# CONFIG_LEMOTE_FULONG is not set
19# CONFIG_MIPS_MALTA is not set 19# CONFIG_MIPS_MALTA is not set
20# CONFIG_MIPS_SIM is not set 20# CONFIG_MIPS_SIM is not set
21# CONFIG_MARKEINS is not set 21# CONFIG_MACH_EMMA is not set
22# CONFIG_MACH_VR41XX is not set 22# CONFIG_MACH_VR41XX is not set
23# CONFIG_NXP_STB220 is not set
24# CONFIG_NXP_STB225 is not set
23# CONFIG_PNX8550_JBS is not set 25# CONFIG_PNX8550_JBS is not set
24# CONFIG_PNX8550_STB810 is not set 26# CONFIG_PNX8550_STB810 is not set
25# CONFIG_PMC_MSP is not set 27# CONFIG_PMC_MSP is not set
@@ -39,20 +41,28 @@ CONFIG_MIPS=y
39# CONFIG_SNI_RM is not set 41# CONFIG_SNI_RM is not set
40# CONFIG_MACH_TX39XX is not set 42# CONFIG_MACH_TX39XX is not set
41CONFIG_MACH_TX49XX=y 43CONFIG_MACH_TX49XX=y
44# CONFIG_MIKROTIK_RB532 is not set
42# CONFIG_WR_PPMC is not set 45# CONFIG_WR_PPMC is not set
46# CONFIG_CAVIUM_OCTEON_SIMULATOR is not set
47# CONFIG_CAVIUM_OCTEON_REFERENCE_BOARD is not set
48CONFIG_MACH_TXX9=y
43CONFIG_TOSHIBA_RBTX4927=y 49CONFIG_TOSHIBA_RBTX4927=y
44CONFIG_TOSHIBA_RBTX4938=y 50CONFIG_TOSHIBA_RBTX4938=y
51CONFIG_TOSHIBA_RBTX4939=y
45CONFIG_SOC_TX4927=y 52CONFIG_SOC_TX4927=y
46CONFIG_SOC_TX4938=y 53CONFIG_SOC_TX4938=y
54CONFIG_SOC_TX4939=y
55CONFIG_TXX9_7SEGLED=y
47# CONFIG_TOSHIBA_FPCIB0 is not set 56# CONFIG_TOSHIBA_FPCIB0 is not set
48CONFIG_PICMG_PCI_BACKPLANE_DEFAULT=y 57CONFIG_PICMG_PCI_BACKPLANE_DEFAULT=y
49 58
50# 59#
51# Multiplex Pin Select 60# Multiplex Pin Select
52# 61#
53CONFIG_TOSHIBA_RBTX4938_MPLEX_PIO58_61=y 62# CONFIG_TOSHIBA_RBTX4938_MPLEX_PIO58_61 is not set
54# CONFIG_TOSHIBA_RBTX4938_MPLEX_NAND is not set 63# CONFIG_TOSHIBA_RBTX4938_MPLEX_NAND is not set
55# CONFIG_TOSHIBA_RBTX4938_MPLEX_ATA is not set 64# CONFIG_TOSHIBA_RBTX4938_MPLEX_ATA is not set
65CONFIG_TOSHIBA_RBTX4938_MPLEX_KEEP=y
56CONFIG_PCI_TX4927=y 66CONFIG_PCI_TX4927=y
57CONFIG_RWSEM_GENERIC_SPINLOCK=y 67CONFIG_RWSEM_GENERIC_SPINLOCK=y
58# CONFIG_ARCH_HAS_ILOG2_U32 is not set 68# CONFIG_ARCH_HAS_ILOG2_U32 is not set
@@ -64,14 +74,18 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
64CONFIG_GENERIC_CLOCKEVENTS=y 74CONFIG_GENERIC_CLOCKEVENTS=y
65CONFIG_GENERIC_TIME=y 75CONFIG_GENERIC_TIME=y
66CONFIG_GENERIC_CMOS_UPDATE=y 76CONFIG_GENERIC_CMOS_UPDATE=y
67CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y 77CONFIG_SCHED_OMIT_FRAME_POINTER=y
68CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y 78CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
79CONFIG_CEVT_R4K_LIB=y
69CONFIG_CEVT_R4K=y 80CONFIG_CEVT_R4K=y
70CONFIG_CEVT_TXX9=y 81CONFIG_CEVT_TXX9=y
82CONFIG_CSRC_R4K_LIB=y
71CONFIG_CSRC_R4K=y 83CONFIG_CSRC_R4K=y
72CONFIG_GPIO_TXX9=y 84CONFIG_GPIO_TXX9=y
73CONFIG_DMA_NONCOHERENT=y 85CONFIG_DMA_NONCOHERENT=y
74CONFIG_DMA_NEED_PCI_MAP_STATE=y 86CONFIG_DMA_NEED_PCI_MAP_STATE=y
87CONFIG_EARLY_PRINTK=y
88CONFIG_SYS_HAS_EARLY_PRINTK=y
75# CONFIG_HOTPLUG_CPU is not set 89# CONFIG_HOTPLUG_CPU is not set
76# CONFIG_NO_IOPORT is not set 90# CONFIG_NO_IOPORT is not set
77CONFIG_GENERIC_GPIO=y 91CONFIG_GENERIC_GPIO=y
@@ -100,6 +114,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
100CONFIG_CPU_TX49XX=y 114CONFIG_CPU_TX49XX=y
101# CONFIG_CPU_R5000 is not set 115# CONFIG_CPU_R5000 is not set
102# CONFIG_CPU_R5432 is not set 116# CONFIG_CPU_R5432 is not set
117# CONFIG_CPU_R5500 is not set
103# CONFIG_CPU_R6000 is not set 118# CONFIG_CPU_R6000 is not set
104# CONFIG_CPU_NEVADA is not set 119# CONFIG_CPU_NEVADA is not set
105# CONFIG_CPU_R8000 is not set 120# CONFIG_CPU_R8000 is not set
@@ -107,6 +122,7 @@ CONFIG_CPU_TX49XX=y
107# CONFIG_CPU_RM7000 is not set 122# CONFIG_CPU_RM7000 is not set
108# CONFIG_CPU_RM9000 is not set 123# CONFIG_CPU_RM9000 is not set
109# CONFIG_CPU_SB1 is not set 124# CONFIG_CPU_SB1 is not set
125# CONFIG_CPU_CAVIUM_OCTEON is not set
110CONFIG_SYS_HAS_CPU_TX49XX=y 126CONFIG_SYS_HAS_CPU_TX49XX=y
111CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y 127CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
112CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y 128CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y
@@ -134,13 +150,12 @@ CONFIG_ARCH_FLATMEM_ENABLE=y
134CONFIG_ARCH_POPULATES_NODE_MAP=y 150CONFIG_ARCH_POPULATES_NODE_MAP=y
135CONFIG_FLATMEM=y 151CONFIG_FLATMEM=y
136CONFIG_FLAT_NODE_MEM_MAP=y 152CONFIG_FLAT_NODE_MEM_MAP=y
137# CONFIG_SPARSEMEM_STATIC is not set
138# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
139CONFIG_PAGEFLAGS_EXTENDED=y 153CONFIG_PAGEFLAGS_EXTENDED=y
140CONFIG_SPLIT_PTLOCK_CPUS=4 154CONFIG_SPLIT_PTLOCK_CPUS=4
141# CONFIG_RESOURCES_64BIT is not set 155# CONFIG_PHYS_ADDR_T_64BIT is not set
142CONFIG_ZONE_DMA_FLAG=0 156CONFIG_ZONE_DMA_FLAG=0
143CONFIG_VIRT_TO_BUS=y 157CONFIG_VIRT_TO_BUS=y
158CONFIG_UNEVICTABLE_LRU=y
144CONFIG_TICK_ONESHOT=y 159CONFIG_TICK_ONESHOT=y
145CONFIG_NO_HZ=y 160CONFIG_NO_HZ=y
146CONFIG_HIGH_RES_TIMERS=y 161CONFIG_HIGH_RES_TIMERS=y
@@ -176,6 +191,15 @@ CONFIG_SYSVIPC_SYSCTL=y
176# CONFIG_BSD_PROCESS_ACCT is not set 191# CONFIG_BSD_PROCESS_ACCT is not set
177# CONFIG_TASKSTATS is not set 192# CONFIG_TASKSTATS is not set
178# CONFIG_AUDIT is not set 193# CONFIG_AUDIT is not set
194
195#
196# RCU Subsystem
197#
198CONFIG_CLASSIC_RCU=y
199# CONFIG_TREE_RCU is not set
200# CONFIG_PREEMPT_RCU is not set
201# CONFIG_TREE_RCU_TRACE is not set
202# CONFIG_PREEMPT_RCU_TRACE is not set
179CONFIG_IKCONFIG=y 203CONFIG_IKCONFIG=y
180CONFIG_IKCONFIG_PROC=y 204CONFIG_IKCONFIG_PROC=y
181CONFIG_LOG_BUF_SHIFT=14 205CONFIG_LOG_BUF_SHIFT=14
@@ -190,7 +214,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
190CONFIG_SYSCTL=y 214CONFIG_SYSCTL=y
191CONFIG_EMBEDDED=y 215CONFIG_EMBEDDED=y
192CONFIG_SYSCTL_SYSCALL=y 216CONFIG_SYSCTL_SYSCALL=y
193CONFIG_SYSCTL_SYSCALL_CHECK=y
194CONFIG_KALLSYMS=y 217CONFIG_KALLSYMS=y
195# CONFIG_KALLSYMS_EXTRA_PASS is not set 218# CONFIG_KALLSYMS_EXTRA_PASS is not set
196# CONFIG_HOTPLUG is not set 219# CONFIG_HOTPLUG is not set
@@ -207,30 +230,26 @@ CONFIG_SIGNALFD=y
207CONFIG_TIMERFD=y 230CONFIG_TIMERFD=y
208CONFIG_EVENTFD=y 231CONFIG_EVENTFD=y
209CONFIG_SHMEM=y 232CONFIG_SHMEM=y
233CONFIG_AIO=y
210CONFIG_VM_EVENT_COUNTERS=y 234CONFIG_VM_EVENT_COUNTERS=y
235CONFIG_PCI_QUIRKS=y
211CONFIG_SLAB=y 236CONFIG_SLAB=y
212# CONFIG_SLUB is not set 237# CONFIG_SLUB is not set
213# CONFIG_SLOB is not set 238# CONFIG_SLOB is not set
214# CONFIG_PROFILING is not set 239# CONFIG_PROFILING is not set
215# CONFIG_MARKERS is not set
216CONFIG_HAVE_OPROFILE=y 240CONFIG_HAVE_OPROFILE=y
217# CONFIG_HAVE_KPROBES is not set 241# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
218# CONFIG_HAVE_KRETPROBES is not set
219# CONFIG_HAVE_DMA_ATTRS is not set
220CONFIG_PROC_PAGE_MONITOR=y
221CONFIG_SLABINFO=y 242CONFIG_SLABINFO=y
222# CONFIG_TINY_SHMEM is not set
223CONFIG_BASE_SMALL=0 243CONFIG_BASE_SMALL=0
224CONFIG_MODULES=y 244CONFIG_MODULES=y
225# CONFIG_MODULE_FORCE_LOAD is not set 245# CONFIG_MODULE_FORCE_LOAD is not set
226# CONFIG_MODULE_UNLOAD is not set 246CONFIG_MODULE_UNLOAD=y
227# CONFIG_MODVERSIONS is not set 247# CONFIG_MODVERSIONS is not set
228# CONFIG_MODULE_SRCVERSION_ALL is not set 248# CONFIG_MODULE_SRCVERSION_ALL is not set
229CONFIG_KMOD=y
230CONFIG_BLOCK=y 249CONFIG_BLOCK=y
231# CONFIG_LBD is not set 250# CONFIG_LBD is not set
232# CONFIG_BLK_DEV_IO_TRACE is not set 251# CONFIG_BLK_DEV_IO_TRACE is not set
233# CONFIG_LSF is not set 252# CONFIG_BLK_DEV_INTEGRITY is not set
234 253
235# 254#
236# IO Schedulers 255# IO Schedulers
@@ -244,7 +263,8 @@ CONFIG_DEFAULT_AS=y
244# CONFIG_DEFAULT_CFQ is not set 263# CONFIG_DEFAULT_CFQ is not set
245# CONFIG_DEFAULT_NOOP is not set 264# CONFIG_DEFAULT_NOOP is not set
246CONFIG_DEFAULT_IOSCHED="anticipatory" 265CONFIG_DEFAULT_IOSCHED="anticipatory"
247CONFIG_CLASSIC_RCU=y 266# CONFIG_PROBE_INITRD_HEADER is not set
267# CONFIG_FREEZER is not set
248 268
249# 269#
250# Bus options (PCI, PCMCIA, EISA, ISA, TC) 270# Bus options (PCI, PCMCIA, EISA, ISA, TC)
@@ -254,12 +274,15 @@ CONFIG_PCI=y
254CONFIG_PCI_DOMAINS=y 274CONFIG_PCI_DOMAINS=y
255# CONFIG_ARCH_SUPPORTS_MSI is not set 275# CONFIG_ARCH_SUPPORTS_MSI is not set
256# CONFIG_PCI_LEGACY is not set 276# CONFIG_PCI_LEGACY is not set
277# CONFIG_PCI_STUB is not set
257CONFIG_MMU=y 278CONFIG_MMU=y
258 279
259# 280#
260# Executable file formats 281# Executable file formats
261# 282#
262CONFIG_BINFMT_ELF=y 283CONFIG_BINFMT_ELF=y
284# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
285# CONFIG_HAVE_AOUT is not set
263# CONFIG_BINFMT_MISC is not set 286# CONFIG_BINFMT_MISC is not set
264CONFIG_TRAD_SIGNALS=y 287CONFIG_TRAD_SIGNALS=y
265 288
@@ -268,15 +291,12 @@ CONFIG_TRAD_SIGNALS=y
268# 291#
269CONFIG_ARCH_SUSPEND_POSSIBLE=y 292CONFIG_ARCH_SUSPEND_POSSIBLE=y
270# CONFIG_PM is not set 293# CONFIG_PM is not set
271
272#
273# Networking
274#
275CONFIG_NET=y 294CONFIG_NET=y
276 295
277# 296#
278# Networking options 297# Networking options
279# 298#
299CONFIG_COMPAT_NET_DEV_OPS=y
280CONFIG_PACKET=y 300CONFIG_PACKET=y
281# CONFIG_PACKET_MMAP is not set 301# CONFIG_PACKET_MMAP is not set
282CONFIG_UNIX=y 302CONFIG_UNIX=y
@@ -318,6 +338,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
318# CONFIG_IPX is not set 338# CONFIG_IPX is not set
319# CONFIG_ATALK is not set 339# CONFIG_ATALK is not set
320# CONFIG_NET_SCHED is not set 340# CONFIG_NET_SCHED is not set
341# CONFIG_DCB is not set
321 342
322# 343#
323# Network testing 344# Network testing
@@ -327,14 +348,9 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
327# CONFIG_CAN is not set 348# CONFIG_CAN is not set
328# CONFIG_IRDA is not set 349# CONFIG_IRDA is not set
329# CONFIG_BT is not set 350# CONFIG_BT is not set
330 351# CONFIG_PHONET is not set
331# 352# CONFIG_WIRELESS is not set
332# Wireless 353# CONFIG_WIMAX is not set
333#
334# CONFIG_CFG80211 is not set
335# CONFIG_WIRELESS_EXT is not set
336# CONFIG_MAC80211 is not set
337# CONFIG_IEEE80211 is not set
338# CONFIG_RFKILL is not set 354# CONFIG_RFKILL is not set
339 355
340# 356#
@@ -348,7 +364,90 @@ CONFIG_STANDALONE=y
348CONFIG_PREVENT_FIRMWARE_BUILD=y 364CONFIG_PREVENT_FIRMWARE_BUILD=y
349# CONFIG_SYS_HYPERVISOR is not set 365# CONFIG_SYS_HYPERVISOR is not set
350# CONFIG_CONNECTOR is not set 366# CONFIG_CONNECTOR is not set
351# CONFIG_MTD is not set 367CONFIG_MTD=y
368# CONFIG_MTD_DEBUG is not set
369# CONFIG_MTD_CONCAT is not set
370CONFIG_MTD_PARTITIONS=y
371# CONFIG_MTD_TESTS is not set
372# CONFIG_MTD_REDBOOT_PARTS is not set
373CONFIG_MTD_CMDLINE_PARTS=y
374# CONFIG_MTD_AR7_PARTS is not set
375
376#
377# User Modules And Translation Layers
378#
379CONFIG_MTD_CHAR=y
380# CONFIG_MTD_BLKDEVS is not set
381# CONFIG_MTD_BLOCK is not set
382# CONFIG_MTD_BLOCK_RO is not set
383# CONFIG_FTL is not set
384# CONFIG_NFTL is not set
385# CONFIG_INFTL is not set
386# CONFIG_RFD_FTL is not set
387# CONFIG_SSFDC is not set
388# CONFIG_MTD_OOPS is not set
389
390#
391# RAM/ROM/Flash chip drivers
392#
393CONFIG_MTD_CFI=y
394CONFIG_MTD_JEDECPROBE=y
395CONFIG_MTD_GEN_PROBE=y
396# CONFIG_MTD_CFI_ADV_OPTIONS is not set
397CONFIG_MTD_MAP_BANK_WIDTH_1=y
398CONFIG_MTD_MAP_BANK_WIDTH_2=y
399CONFIG_MTD_MAP_BANK_WIDTH_4=y
400# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
401# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
402# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
403CONFIG_MTD_CFI_I1=y
404CONFIG_MTD_CFI_I2=y
405# CONFIG_MTD_CFI_I4 is not set
406# CONFIG_MTD_CFI_I8 is not set
407# CONFIG_MTD_CFI_INTELEXT is not set
408CONFIG_MTD_CFI_AMDSTD=y
409# CONFIG_MTD_CFI_STAA is not set
410CONFIG_MTD_CFI_UTIL=y
411# CONFIG_MTD_RAM is not set
412# CONFIG_MTD_ROM is not set
413# CONFIG_MTD_ABSENT is not set
414
415#
416# Mapping drivers for chip access
417#
418# CONFIG_MTD_COMPLEX_MAPPINGS is not set
419CONFIG_MTD_PHYSMAP=y
420# CONFIG_MTD_PHYSMAP_COMPAT is not set
421# CONFIG_MTD_INTEL_VR_NOR is not set
422# CONFIG_MTD_PLATRAM is not set
423
424#
425# Self-contained MTD device drivers
426#
427# CONFIG_MTD_PMC551 is not set
428# CONFIG_MTD_SLRAM is not set
429# CONFIG_MTD_PHRAM is not set
430# CONFIG_MTD_MTDRAM is not set
431# CONFIG_MTD_BLOCK2MTD is not set
432
433#
434# Disk-On-Chip Device Drivers
435#
436# CONFIG_MTD_DOC2000 is not set
437# CONFIG_MTD_DOC2001 is not set
438# CONFIG_MTD_DOC2001PLUS is not set
439# CONFIG_MTD_NAND is not set
440# CONFIG_MTD_ONENAND is not set
441
442#
443# LPDDR flash memory drivers
444#
445# CONFIG_MTD_LPDDR is not set
446
447#
448# UBI - Unsorted block images
449#
450# CONFIG_MTD_UBI is not set
352# CONFIG_PARPORT is not set 451# CONFIG_PARPORT is not set
353CONFIG_BLK_DEV=y 452CONFIG_BLK_DEV=y
354# CONFIG_BLK_CPQ_DA is not set 453# CONFIG_BLK_CPQ_DA is not set
@@ -365,9 +464,60 @@ CONFIG_BLK_DEV_RAM_SIZE=8192
365# CONFIG_BLK_DEV_XIP is not set 464# CONFIG_BLK_DEV_XIP is not set
366# CONFIG_CDROM_PKTCDVD is not set 465# CONFIG_CDROM_PKTCDVD is not set
367# CONFIG_ATA_OVER_ETH is not set 466# CONFIG_ATA_OVER_ETH is not set
467# CONFIG_BLK_DEV_HD is not set
368# CONFIG_MISC_DEVICES is not set 468# CONFIG_MISC_DEVICES is not set
369CONFIG_HAVE_IDE=y 469CONFIG_HAVE_IDE=y
370# CONFIG_IDE is not set 470CONFIG_IDE=y
471
472#
473# Please see Documentation/ide/ide.txt for help/info on IDE drives
474#
475CONFIG_IDE_TIMINGS=y
476# CONFIG_BLK_DEV_IDE_SATA is not set
477CONFIG_IDE_GD=y
478CONFIG_IDE_GD_ATA=y
479# CONFIG_IDE_GD_ATAPI is not set
480# CONFIG_BLK_DEV_IDECD is not set
481# CONFIG_BLK_DEV_IDETAPE is not set
482# CONFIG_IDE_TASK_IOCTL is not set
483CONFIG_IDE_PROC_FS=y
484
485#
486# IDE chipset support/bugfixes
487#
488# CONFIG_IDE_GENERIC is not set
489# CONFIG_BLK_DEV_PLATFORM is not set
490CONFIG_BLK_DEV_IDEDMA_SFF=y
491
492#
493# PCI IDE chipsets support
494#
495# CONFIG_BLK_DEV_GENERIC is not set
496# CONFIG_BLK_DEV_AEC62XX is not set
497# CONFIG_BLK_DEV_ALI15X3 is not set
498# CONFIG_BLK_DEV_AMD74XX is not set
499# CONFIG_BLK_DEV_CMD64X is not set
500# CONFIG_BLK_DEV_TRIFLEX is not set
501# CONFIG_BLK_DEV_CS5530 is not set
502# CONFIG_BLK_DEV_HPT366 is not set
503# CONFIG_BLK_DEV_JMICRON is not set
504# CONFIG_BLK_DEV_SC1200 is not set
505# CONFIG_BLK_DEV_PIIX is not set
506# CONFIG_BLK_DEV_IT8172 is not set
507# CONFIG_BLK_DEV_IT8213 is not set
508# CONFIG_BLK_DEV_IT821X is not set
509# CONFIG_BLK_DEV_NS87415 is not set
510# CONFIG_BLK_DEV_PDC202XX_OLD is not set
511# CONFIG_BLK_DEV_PDC202XX_NEW is not set
512# CONFIG_BLK_DEV_SVWKS is not set
513# CONFIG_BLK_DEV_SIIMAGE is not set
514# CONFIG_BLK_DEV_SLC90E66 is not set
515# CONFIG_BLK_DEV_TRM290 is not set
516# CONFIG_BLK_DEV_VIA82CXXX is not set
517# CONFIG_BLK_DEV_TC86C001 is not set
518CONFIG_BLK_DEV_IDE_TX4938=y
519CONFIG_BLK_DEV_IDE_TX4939=y
520CONFIG_BLK_DEV_IDEDMA=y
371 521
372# 522#
373# SCSI device support 523# SCSI device support
@@ -390,7 +540,6 @@ CONFIG_HAVE_IDE=y
390# CONFIG_IEEE1394 is not set 540# CONFIG_IEEE1394 is not set
391# CONFIG_I2O is not set 541# CONFIG_I2O is not set
392CONFIG_NETDEVICES=y 542CONFIG_NETDEVICES=y
393# CONFIG_NETDEVICES_MULTIQUEUE is not set
394# CONFIG_DUMMY is not set 543# CONFIG_DUMMY is not set
395# CONFIG_BONDING is not set 544# CONFIG_BONDING is not set
396# CONFIG_EQUALIZER is not set 545# CONFIG_EQUALIZER is not set
@@ -412,15 +561,19 @@ CONFIG_PHYLIB=y
412# CONFIG_BROADCOM_PHY is not set 561# CONFIG_BROADCOM_PHY is not set
413# CONFIG_ICPLUS_PHY is not set 562# CONFIG_ICPLUS_PHY is not set
414# CONFIG_REALTEK_PHY is not set 563# CONFIG_REALTEK_PHY is not set
564# CONFIG_NATIONAL_PHY is not set
565# CONFIG_STE10XP is not set
566# CONFIG_LSI_ET1011C_PHY is not set
415# CONFIG_FIXED_PHY is not set 567# CONFIG_FIXED_PHY is not set
416# CONFIG_MDIO_BITBANG is not set 568# CONFIG_MDIO_BITBANG is not set
417CONFIG_NET_ETHERNET=y 569CONFIG_NET_ETHERNET=y
418# CONFIG_MII is not set 570CONFIG_MII=y
419# CONFIG_AX88796 is not set 571# CONFIG_AX88796 is not set
420# CONFIG_HAPPYMEAL is not set 572# CONFIG_HAPPYMEAL is not set
421# CONFIG_SUNGEM is not set 573# CONFIG_SUNGEM is not set
422# CONFIG_CASSINI is not set 574# CONFIG_CASSINI is not set
423# CONFIG_NET_VENDOR_3COM is not set 575# CONFIG_NET_VENDOR_3COM is not set
576CONFIG_SMC91X=y
424# CONFIG_DM9000 is not set 577# CONFIG_DM9000 is not set
425# CONFIG_NET_TULIP is not set 578# CONFIG_NET_TULIP is not set
426# CONFIG_HP100 is not set 579# CONFIG_HP100 is not set
@@ -429,6 +582,9 @@ CONFIG_NE2000=y
429# CONFIG_IBM_NEW_EMAC_RGMII is not set 582# CONFIG_IBM_NEW_EMAC_RGMII is not set
430# CONFIG_IBM_NEW_EMAC_TAH is not set 583# CONFIG_IBM_NEW_EMAC_TAH is not set
431# CONFIG_IBM_NEW_EMAC_EMAC4 is not set 584# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
585# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
586# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
587# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
432CONFIG_NET_PCI=y 588CONFIG_NET_PCI=y
433# CONFIG_PCNET32 is not set 589# CONFIG_PCNET32 is not set
434# CONFIG_AMD8111_ETH is not set 590# CONFIG_AMD8111_ETH is not set
@@ -436,7 +592,6 @@ CONFIG_NET_PCI=y
436# CONFIG_B44 is not set 592# CONFIG_B44 is not set
437# CONFIG_FORCEDETH is not set 593# CONFIG_FORCEDETH is not set
438CONFIG_TC35815=y 594CONFIG_TC35815=y
439# CONFIG_EEPRO100 is not set
440# CONFIG_E100 is not set 595# CONFIG_E100 is not set
441# CONFIG_FEALNX is not set 596# CONFIG_FEALNX is not set
442# CONFIG_NATSEMI is not set 597# CONFIG_NATSEMI is not set
@@ -445,9 +600,11 @@ CONFIG_TC35815=y
445# CONFIG_R6040 is not set 600# CONFIG_R6040 is not set
446# CONFIG_SIS900 is not set 601# CONFIG_SIS900 is not set
447# CONFIG_EPIC100 is not set 602# CONFIG_EPIC100 is not set
603# CONFIG_SMSC9420 is not set
448# CONFIG_SUNDANCE is not set 604# CONFIG_SUNDANCE is not set
449# CONFIG_TLAN is not set 605# CONFIG_TLAN is not set
450# CONFIG_VIA_RHINE is not set 606# CONFIG_VIA_RHINE is not set
607# CONFIG_ATL2 is not set
451# CONFIG_NETDEV_1000 is not set 608# CONFIG_NETDEV_1000 is not set
452# CONFIG_NETDEV_10000 is not set 609# CONFIG_NETDEV_10000 is not set
453# CONFIG_TR is not set 610# CONFIG_TR is not set
@@ -458,6 +615,10 @@ CONFIG_TC35815=y
458# CONFIG_WLAN_PRE80211 is not set 615# CONFIG_WLAN_PRE80211 is not set
459# CONFIG_WLAN_80211 is not set 616# CONFIG_WLAN_80211 is not set
460# CONFIG_IWLWIFI_LEDS is not set 617# CONFIG_IWLWIFI_LEDS is not set
618
619#
620# Enable WiMAX (Networking options) to see the WiMAX drivers
621#
461# CONFIG_WAN is not set 622# CONFIG_WAN is not set
462# CONFIG_FDDI is not set 623# CONFIG_FDDI is not set
463# CONFIG_PPP is not set 624# CONFIG_PPP is not set
@@ -502,6 +663,7 @@ CONFIG_SERIAL_TXX9_CONSOLE=y
502CONFIG_SERIAL_TXX9_STDSERIAL=y 663CONFIG_SERIAL_TXX9_STDSERIAL=y
503# CONFIG_SERIAL_JSM is not set 664# CONFIG_SERIAL_JSM is not set
504CONFIG_UNIX98_PTYS=y 665CONFIG_UNIX98_PTYS=y
666# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
505CONFIG_LEGACY_PTYS=y 667CONFIG_LEGACY_PTYS=y
506CONFIG_LEGACY_PTY_COUNT=256 668CONFIG_LEGACY_PTY_COUNT=256
507# CONFIG_IPMI_HANDLER is not set 669# CONFIG_IPMI_HANDLER is not set
@@ -517,17 +679,19 @@ CONFIG_SPI_MASTER=y
517# 679#
518# SPI Master Controller Drivers 680# SPI Master Controller Drivers
519# 681#
682# CONFIG_SPI_BITBANG is not set
683# CONFIG_SPI_GPIO is not set
520CONFIG_SPI_TXX9=y 684CONFIG_SPI_TXX9=y
521 685
522# 686#
523# SPI Protocol Masters 687# SPI Protocol Masters
524# 688#
525CONFIG_EEPROM_AT25=y
526# CONFIG_SPI_TLE62X0 is not set 689# CONFIG_SPI_TLE62X0 is not set
527CONFIG_HAVE_GPIO_LIB=y 690CONFIG_ARCH_REQUIRE_GPIOLIB=y
691CONFIG_GPIOLIB=y
528 692
529# 693#
530# GPIO Support 694# Memory mapped GPIO expanders:
531# 695#
532 696
533# 697#
@@ -535,8 +699,14 @@ CONFIG_HAVE_GPIO_LIB=y
535# 699#
536 700
537# 701#
702# PCI GPIO expanders:
703#
704# CONFIG_GPIO_BT8XX is not set
705
706#
538# SPI GPIO expanders: 707# SPI GPIO expanders:
539# 708#
709# CONFIG_GPIO_MAX7301 is not set
540# CONFIG_GPIO_MCP23S08 is not set 710# CONFIG_GPIO_MCP23S08 is not set
541# CONFIG_W1 is not set 711# CONFIG_W1 is not set
542# CONFIG_POWER_SUPPLY is not set 712# CONFIG_POWER_SUPPLY is not set
@@ -550,6 +720,7 @@ CONFIG_WATCHDOG=y
550# Watchdog Device Drivers 720# Watchdog Device Drivers
551# 721#
552# CONFIG_SOFT_WATCHDOG is not set 722# CONFIG_SOFT_WATCHDOG is not set
723# CONFIG_ALIM7101_WDT is not set
553CONFIG_TXX9_WDT=m 724CONFIG_TXX9_WDT=m
554 725
555# 726#
@@ -557,18 +728,21 @@ CONFIG_TXX9_WDT=m
557# 728#
558# CONFIG_PCIPCWATCHDOG is not set 729# CONFIG_PCIPCWATCHDOG is not set
559# CONFIG_WDTPCI is not set 730# CONFIG_WDTPCI is not set
731CONFIG_SSB_POSSIBLE=y
560 732
561# 733#
562# Sonics Silicon Backplane 734# Sonics Silicon Backplane
563# 735#
564CONFIG_SSB_POSSIBLE=y
565# CONFIG_SSB is not set 736# CONFIG_SSB is not set
566 737
567# 738#
568# Multifunction device drivers 739# Multifunction device drivers
569# 740#
741# CONFIG_MFD_CORE is not set
570# CONFIG_MFD_SM501 is not set 742# CONFIG_MFD_SM501 is not set
571# CONFIG_HTC_PASIC3 is not set 743# CONFIG_HTC_PASIC3 is not set
744# CONFIG_MFD_TMIO is not set
745# CONFIG_REGULATOR is not set
572 746
573# 747#
574# Multimedia devices 748# Multimedia devices
@@ -599,15 +773,27 @@ CONFIG_SSB_POSSIBLE=y
599# Display device support 773# Display device support
600# 774#
601# CONFIG_DISPLAY_SUPPORT is not set 775# CONFIG_DISPLAY_SUPPORT is not set
602
603#
604# Sound
605#
606# CONFIG_SOUND is not set 776# CONFIG_SOUND is not set
607# CONFIG_USB_SUPPORT is not set 777# CONFIG_USB_SUPPORT is not set
608# CONFIG_MMC is not set 778# CONFIG_MMC is not set
609# CONFIG_MEMSTICK is not set 779# CONFIG_MEMSTICK is not set
610# CONFIG_NEW_LEDS is not set 780CONFIG_NEW_LEDS=y
781CONFIG_LEDS_CLASS=y
782
783#
784# LED drivers
785#
786CONFIG_LEDS_GPIO=y
787
788#
789# LED Triggers
790#
791CONFIG_LEDS_TRIGGERS=y
792# CONFIG_LEDS_TRIGGER_TIMER is not set
793CONFIG_LEDS_TRIGGER_IDE_DISK=y
794CONFIG_LEDS_TRIGGER_HEARTBEAT=y
795# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
796# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
611# CONFIG_ACCESSIBILITY is not set 797# CONFIG_ACCESSIBILITY is not set
612# CONFIG_INFINIBAND is not set 798# CONFIG_INFINIBAND is not set
613CONFIG_RTC_LIB=y 799CONFIG_RTC_LIB=y
@@ -628,35 +814,47 @@ CONFIG_RTC_INTF_DEV_UIE_EMUL=y
628# 814#
629# SPI RTC drivers 815# SPI RTC drivers
630# 816#
817# CONFIG_RTC_DRV_M41T94 is not set
818# CONFIG_RTC_DRV_DS1305 is not set
819# CONFIG_RTC_DRV_DS1390 is not set
631# CONFIG_RTC_DRV_MAX6902 is not set 820# CONFIG_RTC_DRV_MAX6902 is not set
632# CONFIG_RTC_DRV_R9701 is not set 821# CONFIG_RTC_DRV_R9701 is not set
633CONFIG_RTC_DRV_RS5C348=y 822CONFIG_RTC_DRV_RS5C348=y
823# CONFIG_RTC_DRV_DS3234 is not set
634 824
635# 825#
636# Platform RTC drivers 826# Platform RTC drivers
637# 827#
638# CONFIG_RTC_DRV_CMOS is not set 828# CONFIG_RTC_DRV_CMOS is not set
829# CONFIG_RTC_DRV_DS1286 is not set
639# CONFIG_RTC_DRV_DS1511 is not set 830# CONFIG_RTC_DRV_DS1511 is not set
640# CONFIG_RTC_DRV_DS1553 is not set 831# CONFIG_RTC_DRV_DS1553 is not set
641CONFIG_RTC_DRV_DS1742=y 832CONFIG_RTC_DRV_DS1742=y
642# CONFIG_RTC_DRV_STK17TA8 is not set 833# CONFIG_RTC_DRV_STK17TA8 is not set
643# CONFIG_RTC_DRV_M48T86 is not set 834# CONFIG_RTC_DRV_M48T86 is not set
835# CONFIG_RTC_DRV_M48T35 is not set
644# CONFIG_RTC_DRV_M48T59 is not set 836# CONFIG_RTC_DRV_M48T59 is not set
837# CONFIG_RTC_DRV_BQ4802 is not set
645# CONFIG_RTC_DRV_V3020 is not set 838# CONFIG_RTC_DRV_V3020 is not set
646 839
647# 840#
648# on-CPU RTC drivers 841# on-CPU RTC drivers
649# 842#
843CONFIG_RTC_DRV_TX4939=y
844# CONFIG_DMADEVICES is not set
650# CONFIG_UIO is not set 845# CONFIG_UIO is not set
846# CONFIG_STAGING is not set
651 847
652# 848#
653# File systems 849# File systems
654# 850#
655# CONFIG_EXT2_FS is not set 851# CONFIG_EXT2_FS is not set
656# CONFIG_EXT3_FS is not set 852# CONFIG_EXT3_FS is not set
853# CONFIG_EXT4_FS is not set
657# CONFIG_REISERFS_FS is not set 854# CONFIG_REISERFS_FS is not set
658# CONFIG_JFS_FS is not set 855# CONFIG_JFS_FS is not set
659CONFIG_FS_POSIX_ACL=y 856CONFIG_FS_POSIX_ACL=y
857CONFIG_FILE_LOCKING=y
660# CONFIG_XFS_FS is not set 858# CONFIG_XFS_FS is not set
661# CONFIG_OCFS2_FS is not set 859# CONFIG_OCFS2_FS is not set
662# CONFIG_DNOTIFY is not set 860# CONFIG_DNOTIFY is not set
@@ -687,30 +885,19 @@ CONFIG_GENERIC_ACL=y
687CONFIG_PROC_FS=y 885CONFIG_PROC_FS=y
688# CONFIG_PROC_KCORE is not set 886# CONFIG_PROC_KCORE is not set
689CONFIG_PROC_SYSCTL=y 887CONFIG_PROC_SYSCTL=y
888CONFIG_PROC_PAGE_MONITOR=y
690CONFIG_SYSFS=y 889CONFIG_SYSFS=y
691CONFIG_TMPFS=y 890CONFIG_TMPFS=y
692CONFIG_TMPFS_POSIX_ACL=y 891CONFIG_TMPFS_POSIX_ACL=y
693# CONFIG_HUGETLB_PAGE is not set 892# CONFIG_HUGETLB_PAGE is not set
694# CONFIG_CONFIGFS_FS is not set 893# CONFIG_CONFIGFS_FS is not set
695 894# CONFIG_MISC_FILESYSTEMS is not set
696#
697# Miscellaneous filesystems
698#
699# CONFIG_HFSPLUS_FS is not set
700# CONFIG_CRAMFS is not set
701# CONFIG_VXFS_FS is not set
702# CONFIG_MINIX_FS is not set
703# CONFIG_HPFS_FS is not set
704# CONFIG_QNX4FS_FS is not set
705# CONFIG_ROMFS_FS is not set
706# CONFIG_SYSV_FS is not set
707# CONFIG_UFS_FS is not set
708CONFIG_NETWORK_FILESYSTEMS=y 895CONFIG_NETWORK_FILESYSTEMS=y
709CONFIG_NFS_FS=y 896CONFIG_NFS_FS=y
710CONFIG_NFS_V3=y 897CONFIG_NFS_V3=y
711# CONFIG_NFS_V3_ACL is not set 898# CONFIG_NFS_V3_ACL is not set
712# CONFIG_NFSD is not set
713CONFIG_ROOT_NFS=y 899CONFIG_ROOT_NFS=y
900# CONFIG_NFSD is not set
714CONFIG_LOCKD=y 901CONFIG_LOCKD=y
715CONFIG_LOCKD_V4=y 902CONFIG_LOCKD_V4=y
716CONFIG_NFS_COMMON=y 903CONFIG_NFS_COMMON=y
@@ -740,7 +927,16 @@ CONFIG_FRAME_WARN=1024
740CONFIG_DEBUG_FS=y 927CONFIG_DEBUG_FS=y
741# CONFIG_HEADERS_CHECK is not set 928# CONFIG_HEADERS_CHECK is not set
742# CONFIG_DEBUG_KERNEL is not set 929# CONFIG_DEBUG_KERNEL is not set
930# CONFIG_DEBUG_MEMORY_INIT is not set
931# CONFIG_RCU_CPU_STALL_DETECTOR is not set
932CONFIG_SYSCTL_SYSCALL_CHECK=y
933
934#
935# Tracers
936#
937# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
743# CONFIG_SAMPLES is not set 938# CONFIG_SAMPLES is not set
939CONFIG_HAVE_ARCH_KGDB=y
744CONFIG_CMDLINE="" 940CONFIG_CMDLINE=""
745 941
746# 942#
@@ -748,15 +944,18 @@ CONFIG_CMDLINE=""
748# 944#
749# CONFIG_KEYS is not set 945# CONFIG_KEYS is not set
750# CONFIG_SECURITY is not set 946# CONFIG_SECURITY is not set
947# CONFIG_SECURITYFS is not set
948# CONFIG_SECURITY_FILE_CAPABILITIES is not set
751# CONFIG_CRYPTO is not set 949# CONFIG_CRYPTO is not set
752 950
753# 951#
754# Library routines 952# Library routines
755# 953#
756CONFIG_BITREVERSE=y 954CONFIG_BITREVERSE=y
757# CONFIG_GENERIC_FIND_FIRST_BIT is not set 955CONFIG_GENERIC_FIND_LAST_BIT=y
758# CONFIG_CRC_CCITT is not set 956# CONFIG_CRC_CCITT is not set
759# CONFIG_CRC16 is not set 957# CONFIG_CRC16 is not set
958# CONFIG_CRC_T10DIF is not set
760# CONFIG_CRC_ITU_T is not set 959# CONFIG_CRC_ITU_T is not set
761CONFIG_CRC32=y 960CONFIG_CRC32=y
762# CONFIG_CRC7 is not set 961# CONFIG_CRC7 is not set
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/mips/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index 43baed16a109..134e1fc8f4d6 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -138,7 +138,8 @@ do { \
138 __instruction_hazard(); \ 138 __instruction_hazard(); \
139} while (0) 139} while (0)
140 140
141#elif defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_CAVIUM_OCTEON) 141#elif defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
142 defined(CONFIG_CPU_R5500)
142 143
143/* 144/*
144 * R10000 rocks - all hazards handled in hardware, so this becomes a nobrainer. 145 * R10000 rocks - all hazards handled in hardware, so this becomes a nobrainer.
diff --git a/arch/mips/include/asm/prefetch.h b/arch/mips/include/asm/prefetch.h
index 17850834ccb0..a56594f360ee 100644
--- a/arch/mips/include/asm/prefetch.h
+++ b/arch/mips/include/asm/prefetch.h
@@ -26,7 +26,7 @@
26 * Pref_WriteBackInvalidate is a nop and Pref_PrepareForStore is broken in 26 * Pref_WriteBackInvalidate is a nop and Pref_PrepareForStore is broken in
27 * current versions due to erratum G105. 27 * current versions due to erratum G105.
28 * 28 *
29 * VR7701 only implements the Load prefetch. 29 * VR5500 (including VR5701 and VR7701) only implement load prefetch.
30 * 30 *
31 * Finally MIPS32 and MIPS64 implement all of the following hints. 31 * Finally MIPS32 and MIPS64 implement all of the following hints.
32 */ 32 */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index a7162a4484cf..1bdbcad3bb74 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -149,6 +149,7 @@ void __init check_wait(void)
149 case CPU_R4650: 149 case CPU_R4650:
150 case CPU_R4700: 150 case CPU_R4700:
151 case CPU_R5000: 151 case CPU_R5000:
152 case CPU_R5500:
152 case CPU_NEVADA: 153 case CPU_NEVADA:
153 case CPU_4KC: 154 case CPU_4KC:
154 case CPU_4KEC: 155 case CPU_4KEC:
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 2f8452b404c7..1a86f84fa947 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -235,7 +235,7 @@ SYSCALL_DEFINE6(32_ipc, u32, call, long, first, long, second, long, third,
235#else 235#else
236 236
237SYSCALL_DEFINE6(32_ipc, u32, call, int, first, int, second, int, third, 237SYSCALL_DEFINE6(32_ipc, u32, call, int, first, int, second, int, third,
238 u32, ptr, u32 fifth) 238 u32, ptr, u32, fifth)
239{ 239{
240 return -ENOSYS; 240 return -ENOSYS;
241} 241}
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 1417c6494858..48060c635acd 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -172,8 +172,9 @@ static void __cpuinit set_prefetch_parameters(void)
172 */ 172 */
173 cache_line_size = cpu_dcache_line_size(); 173 cache_line_size = cpu_dcache_line_size();
174 switch (current_cpu_type()) { 174 switch (current_cpu_type()) {
175 case CPU_R5500:
175 case CPU_TX49XX: 176 case CPU_TX49XX:
176 /* TX49 supports only Pref_Load */ 177 /* These processors only support the Pref_Load. */
177 pref_bias_copy_load = 256; 178 pref_bias_copy_load = 256;
178 break; 179 break;
179 180
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 42942038d0fd..f335cf6cdd78 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -318,6 +318,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
318 case CPU_BCM4710: 318 case CPU_BCM4710:
319 case CPU_LOONGSON2: 319 case CPU_LOONGSON2:
320 case CPU_CAVIUM_OCTEON: 320 case CPU_CAVIUM_OCTEON:
321 case CPU_R5500:
321 if (m4kc_tlbp_war()) 322 if (m4kc_tlbp_war())
322 uasm_i_nop(p); 323 uasm_i_nop(p);
323 tlbw(p); 324 tlbw(p);
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/parisc/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 3d2c6baae96b..233bd87a9637 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -324,17 +324,25 @@ static void sun4u_set_affinity(unsigned int virt_irq,
324 sun4u_irq_enable(virt_irq); 324 sun4u_irq_enable(virt_irq);
325} 325}
326 326
327/* Don't do anything. The desc->status check for IRQ_DISABLED in
328 * handler_irq() will skip the handler call and that will leave the
329 * interrupt in the sent state. The next ->enable() call will hit the
330 * ICLR register to reset the state machine.
331 *
332 * This scheme is necessary, instead of clearing the Valid bit in the
333 * IMAP register, to handle the case of IMAP registers being shared by
334 * multiple INOs (and thus ICLR registers). Since we use a different
335 * virtual IRQ for each shared IMAP instance, the generic code thinks
336 * there is only one user so it prematurely calls ->disable() on
337 * free_irq().
338 *
339 * We have to provide an explicit ->disable() method instead of using
340 * NULL to get the default. The reason is that if the generic code
341 * sees that, it also hooks up a default ->shutdown method which
342 * invokes ->mask() which we do not want. See irq_chip_set_defaults().
343 */
327static void sun4u_irq_disable(unsigned int virt_irq) 344static void sun4u_irq_disable(unsigned int virt_irq)
328{ 345{
329 struct irq_handler_data *data = get_irq_chip_data(virt_irq);
330
331 if (likely(data)) {
332 unsigned long imap = data->imap;
333 unsigned long tmp = upa_readq(imap);
334
335 tmp &= ~IMAP_VALID;
336 upa_writeq(tmp, imap);
337 }
338} 346}
339 347
340static void sun4u_irq_eoi(unsigned int virt_irq) 348static void sun4u_irq_eoi(unsigned int virt_irq)
@@ -747,7 +755,8 @@ void handler_irq(int irq, struct pt_regs *regs)
747 755
748 desc = irq_desc + virt_irq; 756 desc = irq_desc + virt_irq;
749 757
750 desc->handle_irq(virt_irq, desc); 758 if (!(desc->status & IRQ_DISABLED))
759 desc->handle_irq(virt_irq, desc);
751 760
752 bucket_pa = next_pa; 761 bucket_pa = next_pa;
753 } 762 }
diff --git a/arch/um/include/asm/ftrace.h b/arch/um/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/um/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 15e8b7c4de13..8e3d69e4fcb5 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -64,6 +64,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
64 ret = poke_user(child, addr, data); 64 ret = poke_user(child, addr, data);
65 break; 65 break;
66 66
67 case PTRACE_SYSEMU:
68 case PTRACE_SYSEMU_SINGLESTEP:
69 ret = -EIO;
70 break;
71
67 /* continue and stop at next (return from) syscall */ 72 /* continue and stop at next (return from) syscall */
68 case PTRACE_SYSCALL: 73 case PTRACE_SYSCALL:
69 /* restart after signal. */ 74 /* restart after signal. */
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index 74f49bb9b125..89b48a116a89 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -14,7 +14,6 @@
14#undef memset 14#undef memset
15 15
16extern size_t strlen(const char *); 16extern size_t strlen(const char *);
17extern void *memcpy(void *, const void *, size_t);
18extern void *memmove(void *, const void *, size_t); 17extern void *memmove(void *, const void *, size_t);
19extern void *memset(void *, int, size_t); 18extern void *memset(void *, int, size_t);
20extern int printf(const char *, ...); 19extern int printf(const char *, ...);
@@ -24,7 +23,11 @@ extern int printf(const char *, ...);
24EXPORT_SYMBOL(strstr); 23EXPORT_SYMBOL(strstr);
25#endif 24#endif
26 25
26#ifndef __x86_64__
27extern void *memcpy(void *, const void *, size_t);
27EXPORT_SYMBOL(memcpy); 28EXPORT_SYMBOL(memcpy);
29#endif
30
28EXPORT_SYMBOL(memmove); 31EXPORT_SYMBOL(memmove);
29EXPORT_SYMBOL(memset); 32EXPORT_SYMBOL(memset);
30EXPORT_SYMBOL(printf); 33EXPORT_SYMBOL(printf);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7fcf85182681..1a3150570785 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,8 @@ config X86
34 select HAVE_FUNCTION_TRACER 34 select HAVE_FUNCTION_TRACER
35 select HAVE_FUNCTION_GRAPH_TRACER 35 select HAVE_FUNCTION_GRAPH_TRACER
36 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 36 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
37 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
38 select HAVE_FTRACE_SYSCALLS
37 select HAVE_KVM 39 select HAVE_KVM
38 select HAVE_ARCH_KGDB 40 select HAVE_ARCH_KGDB
39 select HAVE_ARCH_TRACEHOOK 41 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 5b301b7ff5f4..eb2221d5add2 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -123,6 +123,11 @@ void clflush_cache_range(void *addr, unsigned int size);
123#ifdef CONFIG_DEBUG_RODATA 123#ifdef CONFIG_DEBUG_RODATA
124void mark_rodata_ro(void); 124void mark_rodata_ro(void);
125extern const int rodata_test_data; 125extern const int rodata_test_data;
126void set_kernel_text_rw(void);
127void set_kernel_text_ro(void);
128#else
129static inline void set_kernel_text_rw(void) { }
130static inline void set_kernel_text_ro(void) { }
126#endif 131#endif
127 132
128#ifdef CONFIG_DEBUG_RODATA_TEST 133#ifdef CONFIG_DEBUG_RODATA_TEST
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 63a79c77d220..81937a5dc77c 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -111,6 +111,8 @@ enum fixed_addresses {
111#ifdef CONFIG_PARAVIRT 111#ifdef CONFIG_PARAVIRT
112 FIX_PARAVIRT_BOOTMAP, 112 FIX_PARAVIRT_BOOTMAP,
113#endif 113#endif
114 FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */
115 FIX_TEXT_POKE1,
114 __end_of_permanent_fixed_addresses, 116 __end_of_permanent_fixed_addresses,
115#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 117#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
116 FIX_OHCI1394_BASE, 118 FIX_OHCI1394_BASE,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b55b4a7fbefd..bd2c6511c887 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,6 +28,13 @@
28 28
29#endif 29#endif
30 30
31/* FIXME: I don't want to stay hardcoded */
32#ifdef CONFIG_X86_64
33# define FTRACE_SYSCALL_MAX 296
34#else
35# define FTRACE_SYSCALL_MAX 333
36#endif
37
31#ifdef CONFIG_FUNCTION_TRACER 38#ifdef CONFIG_FUNCTION_TRACER
32#define MCOUNT_ADDR ((long)(mcount)) 39#define MCOUNT_ADDR ((long)(mcount))
33#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 40#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
@@ -55,29 +62,4 @@ struct dyn_arch_ftrace {
55#endif /* __ASSEMBLY__ */ 62#endif /* __ASSEMBLY__ */
56#endif /* CONFIG_FUNCTION_TRACER */ 63#endif /* CONFIG_FUNCTION_TRACER */
57 64
58#ifdef CONFIG_FUNCTION_GRAPH_TRACER
59
60#ifndef __ASSEMBLY__
61
62/*
63 * Stack of return addresses for functions
64 * of a thread.
65 * Used in struct thread_info
66 */
67struct ftrace_ret_stack {
68 unsigned long ret;
69 unsigned long func;
70 unsigned long long calltime;
71};
72
73/*
74 * Primary handler of a function return.
75 * It relays on ftrace_return_to_handler.
76 * Defined in entry_32/64.S
77 */
78extern void return_to_handler(void);
79
80#endif /* __ASSEMBLY__ */
81#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
82
83#endif /* _ASM_X86_FTRACE_H */ 65#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h
index 8e0f8d199e05..86723035a515 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/asm/ptrace-abi.h
@@ -80,8 +80,6 @@
80 80
81#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ 81#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
82 82
83#ifdef CONFIG_X86_PTRACE_BTS
84
85#ifndef __ASSEMBLY__ 83#ifndef __ASSEMBLY__
86#include <linux/types.h> 84#include <linux/types.h>
87 85
@@ -140,6 +138,5 @@ struct ptrace_bts_config {
140 BTS records are read from oldest to newest. 138 BTS records are read from oldest to newest.
141 Returns number of BTS records drained. 139 Returns number of BTS records drained.
142*/ 140*/
143#endif /* CONFIG_X86_PTRACE_BTS */
144 141
145#endif /* _ASM_X86_PTRACE_ABI_H */ 142#endif /* _ASM_X86_PTRACE_ABI_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index df9d5f78385e..8820a73ae090 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,6 +94,7 @@ struct thread_info {
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */
97 98
98#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 99#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
99#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 100#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -115,15 +116,17 @@ struct thread_info {
115#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
116#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 117#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
117#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 118#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
119#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
118 120
119/* work to do in syscall_trace_enter() */ 121/* work to do in syscall_trace_enter() */
120#define _TIF_WORK_SYSCALL_ENTRY \ 122#define _TIF_WORK_SYSCALL_ENTRY \
121 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \ 123 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE | \
122 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) 124 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
123 125
124/* work to do in syscall_trace_leave() */ 126/* work to do in syscall_trace_leave() */
125#define _TIF_WORK_SYSCALL_EXIT \ 127#define _TIF_WORK_SYSCALL_EXIT \
126 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP) 128 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
129 _TIF_SYSCALL_FTRACE)
127 130
128/* work to do on interrupt/exception return */ 131/* work to do on interrupt/exception return */
129#define _TIF_WORK_MASK \ 132#define _TIF_WORK_MASK \
@@ -132,7 +135,7 @@ struct thread_info {
132 _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) 135 _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
133 136
134/* work to do on any return to user space */ 137/* work to do on any return to user space */
135#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) 138#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE)
136 139
137/* Only used for 64 bit */ 140/* Only used for 64 bit */
138#define _TIF_DO_NOTIFY_MASK \ 141#define _TIF_DO_NOTIFY_MASK \
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 339ce35648e6..84000eb931ff 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,7 +66,8 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
66obj-y += apic/ 66obj-y += apic/
67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
69obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 69obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
70obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 71obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 72obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 73obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4c80f1557433..f57658702571 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -5,6 +5,7 @@
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/vmalloc.h> 7#include <linux/vmalloc.h>
8#include <linux/memory.h>
8#include <asm/alternative.h> 9#include <asm/alternative.h>
9#include <asm/sections.h> 10#include <asm/sections.h>
10#include <asm/pgtable.h> 11#include <asm/pgtable.h>
@@ -12,7 +13,9 @@
12#include <asm/nmi.h> 13#include <asm/nmi.h>
13#include <asm/vsyscall.h> 14#include <asm/vsyscall.h>
14#include <asm/cacheflush.h> 15#include <asm/cacheflush.h>
16#include <asm/tlbflush.h>
15#include <asm/io.h> 17#include <asm/io.h>
18#include <asm/fixmap.h>
16 19
17#define MAX_PATCH_LEN (255-1) 20#define MAX_PATCH_LEN (255-1)
18 21
@@ -226,6 +229,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
226{ 229{
227 u8 **ptr; 230 u8 **ptr;
228 231
232 mutex_lock(&text_mutex);
229 for (ptr = start; ptr < end; ptr++) { 233 for (ptr = start; ptr < end; ptr++) {
230 if (*ptr < text) 234 if (*ptr < text)
231 continue; 235 continue;
@@ -234,6 +238,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
234 /* turn DS segment override prefix into lock prefix */ 238 /* turn DS segment override prefix into lock prefix */
235 text_poke(*ptr, ((unsigned char []){0xf0}), 1); 239 text_poke(*ptr, ((unsigned char []){0xf0}), 1);
236 }; 240 };
241 mutex_unlock(&text_mutex);
237} 242}
238 243
239static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 244static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
@@ -243,6 +248,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
243 if (noreplace_smp) 248 if (noreplace_smp)
244 return; 249 return;
245 250
251 mutex_lock(&text_mutex);
246 for (ptr = start; ptr < end; ptr++) { 252 for (ptr = start; ptr < end; ptr++) {
247 if (*ptr < text) 253 if (*ptr < text)
248 continue; 254 continue;
@@ -251,6 +257,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
251 /* turn lock prefix into DS segment override prefix */ 257 /* turn lock prefix into DS segment override prefix */
252 text_poke(*ptr, ((unsigned char []){0x3E}), 1); 258 text_poke(*ptr, ((unsigned char []){0x3E}), 1);
253 }; 259 };
260 mutex_unlock(&text_mutex);
254} 261}
255 262
256struct smp_alt_module { 263struct smp_alt_module {
@@ -500,15 +507,16 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
500 * It means the size must be writable atomically and the address must be aligned 507 * It means the size must be writable atomically and the address must be aligned
501 * in a way that permits an atomic write. It also makes sure we fit on a single 508 * in a way that permits an atomic write. It also makes sure we fit on a single
502 * page. 509 * page.
510 *
511 * Note: Must be called under text_mutex.
503 */ 512 */
504void *__kprobes text_poke(void *addr, const void *opcode, size_t len) 513void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
505{ 514{
515 unsigned long flags;
506 char *vaddr; 516 char *vaddr;
507 int nr_pages = 2;
508 struct page *pages[2]; 517 struct page *pages[2];
509 int i; 518 int i;
510 519
511 might_sleep();
512 if (!core_kernel_text((unsigned long)addr)) { 520 if (!core_kernel_text((unsigned long)addr)) {
513 pages[0] = vmalloc_to_page(addr); 521 pages[0] = vmalloc_to_page(addr);
514 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 522 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -518,18 +526,21 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
518 pages[1] = virt_to_page(addr + PAGE_SIZE); 526 pages[1] = virt_to_page(addr + PAGE_SIZE);
519 } 527 }
520 BUG_ON(!pages[0]); 528 BUG_ON(!pages[0]);
521 if (!pages[1]) 529 local_irq_save(flags);
522 nr_pages = 1; 530 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
523 vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 531 if (pages[1])
524 BUG_ON(!vaddr); 532 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
525 local_irq_disable(); 533 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
526 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); 534 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
527 local_irq_enable(); 535 clear_fixmap(FIX_TEXT_POKE0);
528 vunmap(vaddr); 536 if (pages[1])
537 clear_fixmap(FIX_TEXT_POKE1);
538 local_flush_tlb();
529 sync_core(); 539 sync_core();
530 /* Could also do a CLFLUSH here to speed up CPU recovery; but 540 /* Could also do a CLFLUSH here to speed up CPU recovery; but
531 that causes hangs on some VIA CPUs. */ 541 that causes hangs on some VIA CPUs. */
532 for (i = 0; i < len; i++) 542 for (i = 0; i < len; i++)
533 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); 543 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
544 local_irq_restore(flags);
534 return addr; 545 return addr;
535} 546}
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 22590cf688ae..5e40f54171e7 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h> 36#include <trace/power.h>
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
70 70
71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
72 72
73DEFINE_TRACE(power_mark);
74
73/* acpi_perf_data is a pointer to percpu data. */ 75/* acpi_perf_data is a pointer to percpu data. */
74static struct acpi_processor_performance *acpi_perf_data; 76static struct acpi_processor_performance *acpi_perf_data;
75 77
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 191117f1ad51..c1c04bf0df77 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
4#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/bitops.h> 5#include <linux/bitops.h>
6#include <linux/smp.h> 6#include <linux/smp.h>
7#include <linux/sched.h>
7#include <linux/thread_info.h> 8#include <linux/thread_info.h>
8#include <linux/module.h> 9#include <linux/module.h>
9 10
@@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
56 57
57 /* 58 /*
58 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 59 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
59 * with P/T states and does not stop in deep C-states 60 * with P/T states and does not stop in deep C-states.
61 *
62 * It is also reliable across cores and sockets. (but not across
63 * cabinets - we turn it off in that case explicitly.)
60 */ 64 */
61 if (c->x86_power & (1 << 8)) { 65 if (c->x86_power & (1 << 8)) {
62 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 66 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 67 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
68 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
69 sched_clock_stable = 1;
64 } 70 }
65 71
66 /* 72 /*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 87d103ded1c3..95ea5fa7d444 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -10,10 +10,12 @@
10#include <linux/kdebug.h> 10#include <linux/kdebug.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/ptrace.h> 12#include <linux/ptrace.h>
13#include <linux/ftrace.h>
13#include <linux/kexec.h> 14#include <linux/kexec.h>
14#include <linux/bug.h> 15#include <linux/bug.h>
15#include <linux/nmi.h> 16#include <linux/nmi.h>
16#include <linux/sysfs.h> 17#include <linux/sysfs.h>
18#include <linux/ftrace.h>
17 19
18#include <asm/stacktrace.h> 20#include <asm/stacktrace.h>
19 21
@@ -195,6 +197,11 @@ unsigned __kprobes long oops_begin(void)
195 int cpu; 197 int cpu;
196 unsigned long flags; 198 unsigned long flags;
197 199
200 /* notify the hw-branch tracer so it may disable tracing and
201 add the last trace to the trace buffer -
202 the earlier this happens, the more useful the trace. */
203 trace_hw_branch_oops();
204
198 oops_enter(); 205 oops_enter();
199 206
200 /* racy, but better than risking deadlock. */ 207 /* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 231bdd3c5b1c..1d0d7f42efe3 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,6 +18,7 @@
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/list.h> 19#include <linux/list.h>
20 20
21#include <asm/cacheflush.h>
21#include <asm/ftrace.h> 22#include <asm/ftrace.h>
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <asm/nops.h> 24#include <asm/nops.h>
@@ -26,6 +27,18 @@
26 27
27#ifdef CONFIG_DYNAMIC_FTRACE 28#ifdef CONFIG_DYNAMIC_FTRACE
28 29
30int ftrace_arch_code_modify_prepare(void)
31{
32 set_kernel_text_rw();
33 return 0;
34}
35
36int ftrace_arch_code_modify_post_process(void)
37{
38 set_kernel_text_ro();
39 return 0;
40}
41
29union ftrace_code_union { 42union ftrace_code_union {
30 char code[MCOUNT_INSN_SIZE]; 43 char code[MCOUNT_INSN_SIZE];
31 struct { 44 struct {
@@ -82,7 +95,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
82 * are the same as what exists. 95 * are the same as what exists.
83 */ 96 */
84 97
85static atomic_t in_nmi = ATOMIC_INIT(0); 98static atomic_t nmi_running = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */ 99static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */ 100static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */ 101static void *mod_code_ip; /* holds the IP to write to */
@@ -111,12 +124,16 @@ static void ftrace_mod_code(void)
111 */ 124 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 125 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE); 126 MCOUNT_INSN_SIZE);
127
128 /* if we fail, then kill any new writers */
129 if (mod_code_status)
130 mod_code_write = 0;
114} 131}
115 132
116void ftrace_nmi_enter(void) 133void ftrace_nmi_enter(void)
117{ 134{
118 atomic_inc(&in_nmi); 135 atomic_inc(&nmi_running);
119 /* Must have in_nmi seen before reading write flag */ 136 /* Must have nmi_running seen before reading write flag */
120 smp_mb(); 137 smp_mb();
121 if (mod_code_write) { 138 if (mod_code_write) {
122 ftrace_mod_code(); 139 ftrace_mod_code();
@@ -126,22 +143,21 @@ void ftrace_nmi_enter(void)
126 143
127void ftrace_nmi_exit(void) 144void ftrace_nmi_exit(void)
128{ 145{
129 /* Finish all executions before clearing in_nmi */ 146 /* Finish all executions before clearing nmi_running */
130 smp_wmb(); 147 smp_wmb();
131 atomic_dec(&in_nmi); 148 atomic_dec(&nmi_running);
132} 149}
133 150
134static void wait_for_nmi(void) 151static void wait_for_nmi(void)
135{ 152{
136 int waited = 0; 153 if (!atomic_read(&nmi_running))
154 return;
137 155
138 while (atomic_read(&in_nmi)) { 156 do {
139 waited = 1;
140 cpu_relax(); 157 cpu_relax();
141 } 158 } while (atomic_read(&nmi_running));
142 159
143 if (waited) 160 nmi_wait_count++;
144 nmi_wait_count++;
145} 161}
146 162
147static int 163static int
@@ -368,100 +384,8 @@ int ftrace_disable_ftrace_graph_caller(void)
368 return ftrace_mod_jmp(ip, old_offset, new_offset); 384 return ftrace_mod_jmp(ip, old_offset, new_offset);
369} 385}
370 386
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */ 387#endif /* !CONFIG_DYNAMIC_FTRACE */
391 388
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/* 389/*
466 * Hook the return address and push it in the stack of return addrs 390 * Hook the return address and push it in the stack of return addrs
467 * in current thread info. 391 * in current thread info.
@@ -476,7 +400,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
476 &return_to_handler; 400 &return_to_handler;
477 401
478 /* Nmi's are currently unsupported */ 402 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi))) 403 if (unlikely(in_nmi()))
480 return; 404 return;
481 405
482 if (unlikely(atomic_read(&current->tracing_graph_pause))) 406 if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -512,16 +436,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
512 return; 436 return;
513 } 437 }
514 438
515 if (unlikely(!__kernel_text_address(old))) { 439 calltime = trace_clock_local();
516 ftrace_graph_stop();
517 *parent = old;
518 WARN_ON(1);
519 return;
520 }
521
522 calltime = cpu_clock(raw_smp_processor_id());
523 440
524 if (push_return_trace(old, calltime, 441 if (ftrace_push_return_trace(old, calltime,
525 self_addr, &trace.depth) == -EBUSY) { 442 self_addr, &trace.depth) == -EBUSY) {
526 *parent = old; 443 *parent = old;
527 return; 444 return;
@@ -536,3 +453,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
536 } 453 }
537} 454}
538#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 455#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
456
457#ifdef CONFIG_FTRACE_SYSCALLS
458
459extern unsigned long __start_syscalls_metadata[];
460extern unsigned long __stop_syscalls_metadata[];
461extern unsigned long *sys_call_table;
462
463static struct syscall_metadata **syscalls_metadata;
464
465static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
466{
467 struct syscall_metadata *start;
468 struct syscall_metadata *stop;
469 char str[KSYM_SYMBOL_LEN];
470
471
472 start = (struct syscall_metadata *)__start_syscalls_metadata;
473 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
474 kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
475
476 for ( ; start < stop; start++) {
477 if (start->name && !strcmp(start->name, str))
478 return start;
479 }
480 return NULL;
481}
482
483struct syscall_metadata *syscall_nr_to_meta(int nr)
484{
485 if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
486 return NULL;
487
488 return syscalls_metadata[nr];
489}
490
491void arch_init_ftrace_syscalls(void)
492{
493 int i;
494 struct syscall_metadata *meta;
495 unsigned long **psys_syscall_table = &sys_call_table;
496 static atomic_t refs;
497
498 if (atomic_inc_return(&refs) != 1)
499 goto end;
500
501 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
502 FTRACE_SYSCALL_MAX, GFP_KERNEL);
503 if (!syscalls_metadata) {
504 WARN_ON(1);
505 return;
506 }
507
508 for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
509 meta = find_syscall_meta(psys_syscall_table[i]);
510 syscalls_metadata[i] = meta;
511 }
512 return;
513
514 /* Paranoid: avoid overflow */
515end:
516 atomic_dec(&refs);
517}
518#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6afa5232dbb7..8c037051b353 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,7 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pm.h> 9#include <linux/pm.h>
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/ftrace.h> 11#include <trace/power.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/apic.h> 13#include <asm/apic.h>
14#include <asm/idle.h> 14#include <asm/idle.h>
@@ -22,6 +22,9 @@ EXPORT_SYMBOL(idle_nomwait);
22 22
23struct kmem_cache *task_xstate_cachep; 23struct kmem_cache *task_xstate_cachep;
24 24
25DEFINE_TRACE(power_start);
26DEFINE_TRACE(power_end);
27
25int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 28int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
26{ 29{
27 *dst = *src; 30 *dst = *src;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3d9672e59c16..99749d6e87a8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/ftrace.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
@@ -1416,6 +1417,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
1416 tracehook_report_syscall_entry(regs)) 1417 tracehook_report_syscall_entry(regs))
1417 ret = -1L; 1418 ret = -1L;
1418 1419
1420 if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
1421 ftrace_syscall_enter(regs);
1422
1419 if (unlikely(current->audit_context)) { 1423 if (unlikely(current->audit_context)) {
1420 if (IS_IA32) 1424 if (IS_IA32)
1421 audit_syscall_entry(AUDIT_ARCH_I386, 1425 audit_syscall_entry(AUDIT_ARCH_I386,
@@ -1439,6 +1443,9 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
1439 if (unlikely(current->audit_context)) 1443 if (unlikely(current->audit_context))
1440 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); 1444 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1441 1445
1446 if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
1447 ftrace_syscall_exit(regs);
1448
1442 if (test_thread_flag(TIF_SYSCALL_TRACE)) 1449 if (test_thread_flag(TIF_SYSCALL_TRACE))
1443 tracehook_report_syscall_exit(regs, 0); 1450 tracehook_report_syscall_exit(regs, 0);
1444 1451
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..c7da3683f4c5 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
55 55
56config KVM_TRACE 56config KVM_TRACE
57 bool "KVM trace support" 57 bool "KVM trace support"
58 depends on KVM && MARKERS && SYSFS 58 depends on KVM && SYSFS
59 select MARKERS
59 select RELAY 60 select RELAY
60 select DEBUG_FS 61 select DEBUG_FS
61 default n 62 default n
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index db81e9a8556b..749559ed80f5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1054,17 +1054,47 @@ static noinline int do_test_wp_bit(void)
1054const int rodata_test_data = 0xC3; 1054const int rodata_test_data = 0xC3;
1055EXPORT_SYMBOL_GPL(rodata_test_data); 1055EXPORT_SYMBOL_GPL(rodata_test_data);
1056 1056
1057static int kernel_set_to_readonly;
1058
1059void set_kernel_text_rw(void)
1060{
1061 unsigned long start = PFN_ALIGN(_text);
1062 unsigned long size = PFN_ALIGN(_etext) - start;
1063
1064 if (!kernel_set_to_readonly)
1065 return;
1066
1067 pr_debug("Set kernel text: %lx - %lx for read write\n",
1068 start, start+size);
1069
1070 set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
1071}
1072
1073void set_kernel_text_ro(void)
1074{
1075 unsigned long start = PFN_ALIGN(_text);
1076 unsigned long size = PFN_ALIGN(_etext) - start;
1077
1078 if (!kernel_set_to_readonly)
1079 return;
1080
1081 pr_debug("Set kernel text: %lx - %lx for read only\n",
1082 start, start+size);
1083
1084 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1085}
1086
1057void mark_rodata_ro(void) 1087void mark_rodata_ro(void)
1058{ 1088{
1059 unsigned long start = PFN_ALIGN(_text); 1089 unsigned long start = PFN_ALIGN(_text);
1060 unsigned long size = PFN_ALIGN(_etext) - start; 1090 unsigned long size = PFN_ALIGN(_etext) - start;
1061 1091
1062#ifndef CONFIG_DYNAMIC_FTRACE
1063 /* Dynamic tracing modifies the kernel text section */
1064 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1092 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1065 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 1093 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
1066 size >> 10); 1094 size >> 10);
1067 1095
1096 kernel_set_to_readonly = 1;
1097
1068#ifdef CONFIG_CPA_DEBUG 1098#ifdef CONFIG_CPA_DEBUG
1069 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", 1099 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
1070 start, start+size); 1100 start, start+size);
@@ -1073,7 +1103,6 @@ void mark_rodata_ro(void)
1073 printk(KERN_INFO "Testing CPA: write protecting again\n"); 1103 printk(KERN_INFO "Testing CPA: write protecting again\n");
1074 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 1104 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
1075#endif 1105#endif
1076#endif /* CONFIG_DYNAMIC_FTRACE */
1077 1106
1078 start += size; 1107 start += size;
1079 size = (unsigned long)__end_rodata - start; 1108 size = (unsigned long)__end_rodata - start;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 54efa57d1c03..1753e8020df6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -734,21 +734,48 @@ void __init mem_init(void)
734const int rodata_test_data = 0xC3; 734const int rodata_test_data = 0xC3;
735EXPORT_SYMBOL_GPL(rodata_test_data); 735EXPORT_SYMBOL_GPL(rodata_test_data);
736 736
737static int kernel_set_to_readonly;
738
739void set_kernel_text_rw(void)
740{
741 unsigned long start = PFN_ALIGN(_stext);
742 unsigned long end = PFN_ALIGN(__start_rodata);
743
744 if (!kernel_set_to_readonly)
745 return;
746
747 pr_debug("Set kernel text: %lx - %lx for read write\n",
748 start, end);
749
750 set_memory_rw(start, (end - start) >> PAGE_SHIFT);
751}
752
753void set_kernel_text_ro(void)
754{
755 unsigned long start = PFN_ALIGN(_stext);
756 unsigned long end = PFN_ALIGN(__start_rodata);
757
758 if (!kernel_set_to_readonly)
759 return;
760
761 pr_debug("Set kernel text: %lx - %lx for read only\n",
762 start, end);
763
764 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
765}
766
737void mark_rodata_ro(void) 767void mark_rodata_ro(void)
738{ 768{
739 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 769 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
740 unsigned long rodata_start = 770 unsigned long rodata_start =
741 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 771 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
742 772
743#ifdef CONFIG_DYNAMIC_FTRACE
744 /* Dynamic tracing modifies the kernel text section */
745 start = rodata_start;
746#endif
747
748 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 773 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
749 (end - start) >> 10); 774 (end - start) >> 10);
750 set_memory_ro(start, (end - start) >> PAGE_SHIFT); 775 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
751 776
777 kernel_set_to_readonly = 1;
778
752 /* 779 /*
753 * The rodata section (but not the kernel text!) should also be 780 * The rodata section (but not the kernel text!) should also be
754 * not-executable. 781 * not-executable.
diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/xtensa/include/asm/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/block/Kconfig b/block/Kconfig
index 0cbb3b88b59a..e7d12782bcfb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -44,22 +44,6 @@ config LBD
44 44
45 If unsure, say N. 45 If unsure, say N.
46 46
47config BLK_DEV_IO_TRACE
48 bool "Support for tracing block io actions"
49 depends on SYSFS
50 select RELAY
51 select DEBUG_FS
52 select TRACEPOINTS
53 help
54 Say Y here if you want to be able to trace the block layer actions
55 on a given queue. Tracing allows you to see any traffic happening
56 on a block device queue. For more information (and the userspace
57 support tools needed), fetch the blktrace tools from:
58
59 git://git.kernel.dk/blktrace.git
60
61 If unsure, say N.
62
63config BLK_DEV_BSG 47config BLK_DEV_BSG
64 bool "Block layer SG support v4 (EXPERIMENTAL)" 48 bool "Block layer SG support v4 (EXPERIMENTAL)"
65 depends on EXPERIMENTAL 49 depends on EXPERIMENTAL
diff --git a/block/Makefile b/block/Makefile
index bfe73049f939..e9fa4dd690f2 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -13,6 +13,5 @@ obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
13obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o 13obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
14obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 14obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
15 15
16obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
17obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 16obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
18obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o 17obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 33a9351c896d..30659ce9bcf4 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
283} 283}
284static struct sysrq_key_op sysrq_ftrace_dump_op = { 284static struct sysrq_key_op sysrq_ftrace_dump_op = {
285 .handler = sysrq_ftrace_dump, 285 .handler = sysrq_ftrace_dump,
286 .help_msg = "dumpZ-ftrace-buffer", 286 .help_msg = "dump-ftrace-buffer(Z)",
287 .action_msg = "Dump ftrace buffer", 287 .action_msg = "Dump ftrace buffer",
288 .enable_mask = SYSRQ_ENABLE_DUMP, 288 .enable_mask = SYSRQ_ENABLE_DUMP,
289}; 289};
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index b84bf066879b..b4eea0292c1a 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -543,8 +543,8 @@ config SENSORS_LM90
543 help 543 help
544 If you say yes here you get support for National Semiconductor LM90, 544 If you say yes here you get support for National Semiconductor LM90,
545 LM86, LM89 and LM99, Analog Devices ADM1032 and ADT7461, and Maxim 545 LM86, LM89 and LM99, Analog Devices ADM1032 and ADT7461, and Maxim
546 MAX6646, MAX6647, MAX6649, MAX6657, MAX6658, MAX6659, MAX6680 and 546 MAX6646, MAX6647, MAX6648, MAX6649, MAX6657, MAX6658, MAX6659,
547 MAX6681 sensor chips. 547 MAX6680, MAX6681 and MAX6692 sensor chips.
548 548
549 This driver can also be built as a module. If so, the module 549 This driver can also be built as a module. If so, the module
550 will be called lm90. 550 will be called lm90.
diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index e52b38806d03..ad2b3431b725 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c
@@ -760,8 +760,11 @@ static int abituguru3_read_increment_offset(struct abituguru3_data *data,
760 760
761 for (i = 0; i < offset_count; i++) 761 for (i = 0; i < offset_count; i++)
762 if ((x = abituguru3_read(data, bank, offset + i, count, 762 if ((x = abituguru3_read(data, bank, offset + i, count,
763 buf + i * count)) != count) 763 buf + i * count)) != count) {
764 return i * count + (i && (x < 0)) ? 0 : x; 764 if (x < 0)
765 return x;
766 return i * count + x;
767 }
765 768
766 return i * count; 769 return i * count;
767} 770}
diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
index 1692de369969..18a1ba888165 100644
--- a/drivers/hwmon/f75375s.c
+++ b/drivers/hwmon/f75375s.c
@@ -617,7 +617,7 @@ static void f75375_init(struct i2c_client *client, struct f75375_data *data,
617static int f75375_probe(struct i2c_client *client, 617static int f75375_probe(struct i2c_client *client,
618 const struct i2c_device_id *id) 618 const struct i2c_device_id *id)
619{ 619{
620 struct f75375_data *data = i2c_get_clientdata(client); 620 struct f75375_data *data;
621 struct f75375s_platform_data *f75375s_pdata = client->dev.platform_data; 621 struct f75375s_platform_data *f75375s_pdata = client->dev.platform_data;
622 int err; 622 int err;
623 623
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 95a99c590da2..9157247fed8e 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -213,7 +213,7 @@ static inline u16 FAN16_TO_REG(long rpm)
213 213
214#define TEMP_TO_REG(val) (SENSORS_LIMIT(((val)<0?(((val)-500)/1000):\ 214#define TEMP_TO_REG(val) (SENSORS_LIMIT(((val)<0?(((val)-500)/1000):\
215 ((val)+500)/1000),-128,127)) 215 ((val)+500)/1000),-128,127))
216#define TEMP_FROM_REG(val) (((val)>0x80?(val)-0x100:(val))*1000) 216#define TEMP_FROM_REG(val) ((val) * 1000)
217 217
218#define PWM_TO_REG(val) ((val) >> 1) 218#define PWM_TO_REG(val) ((val) >> 1)
219#define PWM_FROM_REG(val) (((val)&0x7f) << 1) 219#define PWM_FROM_REG(val) (((val)&0x7f) << 1)
@@ -267,9 +267,9 @@ struct it87_data {
267 u8 has_fan; /* Bitfield, fans enabled */ 267 u8 has_fan; /* Bitfield, fans enabled */
268 u16 fan[5]; /* Register values, possibly combined */ 268 u16 fan[5]; /* Register values, possibly combined */
269 u16 fan_min[5]; /* Register values, possibly combined */ 269 u16 fan_min[5]; /* Register values, possibly combined */
270 u8 temp[3]; /* Register value */ 270 s8 temp[3]; /* Register value */
271 u8 temp_high[3]; /* Register value */ 271 s8 temp_high[3]; /* Register value */
272 u8 temp_low[3]; /* Register value */ 272 s8 temp_low[3]; /* Register value */
273 u8 sensor; /* Register value */ 273 u8 sensor; /* Register value */
274 u8 fan_div[3]; /* Register encoding, shifted right */ 274 u8 fan_div[3]; /* Register encoding, shifted right */
275 u8 vid; /* Register encoding, combined */ 275 u8 vid; /* Register encoding, combined */
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 96a701866726..1aff7575799d 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -32,10 +32,10 @@
32 * supported by this driver. These chips lack the remote temperature 32 * supported by this driver. These chips lack the remote temperature
33 * offset feature. 33 * offset feature.
34 * 34 *
35 * This driver also supports the MAX6646, MAX6647 and MAX6649 chips 35 * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and
36 * made by Maxim. These are again similar to the LM86, but they use 36 * MAX6692 chips made by Maxim. These are again similar to the LM86,
37 * unsigned temperature values and can report temperatures from 0 to 37 * but they use unsigned temperature values and can report temperatures
38 * 145 degrees. 38 * from 0 to 145 degrees.
39 * 39 *
40 * This driver also supports the MAX6680 and MAX6681, two other sensor 40 * This driver also supports the MAX6680 and MAX6681, two other sensor
41 * chips made by Maxim. These are quite similar to the other Maxim 41 * chips made by Maxim. These are quite similar to the other Maxim
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index a01b4488208b..4a65b96db2c8 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2490,12 +2490,14 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
2490 int ret = 0; 2490 int ret = 0;
2491 struct nes_vnic *nesvnic; 2491 struct nes_vnic *nesvnic;
2492 struct nes_device *nesdev; 2492 struct nes_device *nesdev;
2493 struct nes_ib_device *nesibdev;
2493 2494
2494 nesvnic = to_nesvnic(nesqp->ibqp.device); 2495 nesvnic = to_nesvnic(nesqp->ibqp.device);
2495 if (!nesvnic) 2496 if (!nesvnic)
2496 return -EINVAL; 2497 return -EINVAL;
2497 2498
2498 nesdev = nesvnic->nesdev; 2499 nesdev = nesvnic->nesdev;
2500 nesibdev = nesvnic->nesibdev;
2499 2501
2500 nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", 2502 nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
2501 atomic_read(&nesvnic->netdev->refcnt)); 2503 atomic_read(&nesvnic->netdev->refcnt));
@@ -2507,6 +2509,8 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
2507 } else { 2509 } else {
2508 /* Need to free the Last Streaming Mode Message */ 2510 /* Need to free the Last Streaming Mode Message */
2509 if (nesqp->ietf_frame) { 2511 if (nesqp->ietf_frame) {
2512 if (nesqp->lsmm_mr)
2513 nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
2510 pci_free_consistent(nesdev->pcidev, 2514 pci_free_consistent(nesdev->pcidev,
2511 nesqp->private_data_len+sizeof(struct ietf_mpa_frame), 2515 nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
2512 nesqp->ietf_frame, nesqp->ietf_frame_pbase); 2516 nesqp->ietf_frame, nesqp->ietf_frame_pbase);
@@ -2543,6 +2547,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2543 u32 crc_value; 2547 u32 crc_value;
2544 int ret; 2548 int ret;
2545 int passive_state; 2549 int passive_state;
2550 struct nes_ib_device *nesibdev;
2551 struct ib_mr *ibmr = NULL;
2552 struct ib_phys_buf ibphysbuf;
2553 struct nes_pd *nespd;
2554
2555
2546 2556
2547 ibqp = nes_get_qp(cm_id->device, conn_param->qpn); 2557 ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
2548 if (!ibqp) 2558 if (!ibqp)
@@ -2601,6 +2611,26 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2601 if (cm_id->remote_addr.sin_addr.s_addr != 2611 if (cm_id->remote_addr.sin_addr.s_addr !=
2602 cm_id->local_addr.sin_addr.s_addr) { 2612 cm_id->local_addr.sin_addr.s_addr) {
2603 u64temp = (unsigned long)nesqp; 2613 u64temp = (unsigned long)nesqp;
2614 nesibdev = nesvnic->nesibdev;
2615 nespd = nesqp->nespd;
2616 ibphysbuf.addr = nesqp->ietf_frame_pbase;
2617 ibphysbuf.size = conn_param->private_data_len +
2618 sizeof(struct ietf_mpa_frame);
2619 ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
2620 &ibphysbuf, 1,
2621 IB_ACCESS_LOCAL_WRITE,
2622 (u64 *)&nesqp->ietf_frame);
2623 if (!ibmr) {
2624 nes_debug(NES_DBG_CM, "Unable to register memory region"
2625 "for lSMM for cm_node = %p \n",
2626 cm_node);
2627 return -ENOMEM;
2628 }
2629
2630 ibmr->pd = &nespd->ibpd;
2631 ibmr->device = nespd->ibpd.device;
2632 nesqp->lsmm_mr = ibmr;
2633
2604 u64temp |= NES_SW_CONTEXT_ALIGN>>1; 2634 u64temp |= NES_SW_CONTEXT_ALIGN>>1;
2605 set_wqe_64bit_value(wqe->wqe_words, 2635 set_wqe_64bit_value(wqe->wqe_words,
2606 NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, 2636 NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
@@ -2611,14 +2641,13 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2611 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 2641 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
2612 cpu_to_le32(conn_param->private_data_len + 2642 cpu_to_le32(conn_param->private_data_len +
2613 sizeof(struct ietf_mpa_frame)); 2643 sizeof(struct ietf_mpa_frame));
2614 wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 2644 set_wqe_64bit_value(wqe->wqe_words,
2615 cpu_to_le32((u32)nesqp->ietf_frame_pbase); 2645 NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
2616 wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 2646 (u64)nesqp->ietf_frame);
2617 cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32));
2618 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 2647 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
2619 cpu_to_le32(conn_param->private_data_len + 2648 cpu_to_le32(conn_param->private_data_len +
2620 sizeof(struct ietf_mpa_frame)); 2649 sizeof(struct ietf_mpa_frame));
2621 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; 2650 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
2622 2651
2623 nesqp->nesqp_context->ird_ord_sizes |= 2652 nesqp->nesqp_context->ird_ord_sizes |=
2624 cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | 2653 cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 4fdb72454f94..d93a6562817c 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1360,8 +1360,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1360 NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT); 1360 NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT);
1361 nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size << 1361 nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size <<
1362 NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT); 1362 NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT);
1363 if (!udata) {
1363 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN); 1364 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN);
1364 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN); 1365 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN);
1366 }
1365 nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number + 1367 nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number +
1366 ((u32)nesqp->nesrcq->hw_cq.cq_number << 16)); 1368 ((u32)nesqp->nesrcq->hw_cq.cq_number << 16));
1367 u64temp = (u64)nesqp->hwqp.sq_pbase; 1369 u64temp = (u64)nesqp->hwqp.sq_pbase;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 6c6b4da5184f..ae0ca9bc83bd 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -134,6 +134,7 @@ struct nes_qp {
134 struct ietf_mpa_frame *ietf_frame; 134 struct ietf_mpa_frame *ietf_frame;
135 dma_addr_t ietf_frame_pbase; 135 dma_addr_t ietf_frame_pbase;
136 wait_queue_head_t state_waitq; 136 wait_queue_head_t state_waitq;
137 struct ib_mr *lsmm_mr;
137 unsigned long socket; 138 unsigned long socket;
138 struct nes_hw_qp hwqp; 139 struct nes_hw_qp hwqp;
139 struct work_struct work; 140 struct work_struct work;
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 84d5ea1ec171..b457a05b28d9 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1383,6 +1383,11 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
1383 wm8350->power.rev_g_coeff = 1; 1383 wm8350->power.rev_g_coeff = 1;
1384 break; 1384 break;
1385 1385
1386 case 1:
1387 dev_info(wm8350->dev, "WM8351 Rev B\n");
1388 wm8350->power.rev_g_coeff = 1;
1389 break;
1390
1386 default: 1391 default:
1387 dev_err(wm8350->dev, "Unknown WM8351 CHIP_REV\n"); 1392 dev_err(wm8350->dev, "Unknown WM8351 CHIP_REV\n");
1388 ret = -ENODEV; 1393 ret = -ENODEV;
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f4a67c65d301..2db166b7096f 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -793,8 +793,7 @@ static void s3cmci_dma_setup(struct s3cmci_host *host,
793 host->mem->start + host->sdidata); 793 host->mem->start + host->sdidata);
794 794
795 if (!setup_ok) { 795 if (!setup_ok) {
796 s3c2410_dma_config(host->dma, 4, 796 s3c2410_dma_config(host->dma, 4, 0);
797 (S3C2410_DCON_HWTRIG | S3C2410_DCON_CH0_SDI));
798 s3c2410_dma_set_buffdone_fn(host->dma, 797 s3c2410_dma_set_buffdone_fn(host->dma,
799 s3cmci_dma_done_callback); 798 s3cmci_dma_done_callback);
800 s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART); 799 s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART);
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index d4fb4acdbebd..4e9bd380a5c2 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2649,8 +2649,6 @@ static int __devinit happy_meal_sbus_probe_one(struct of_device *op, int is_qfe)
2649 int err = -ENODEV; 2649 int err = -ENODEV;
2650 2650
2651 sbus_dp = to_of_device(op->dev.parent)->node; 2651 sbus_dp = to_of_device(op->dev.parent)->node;
2652 if (is_qfe)
2653 sbus_dp = to_of_device(op->dev.parent->parent)->node;
2654 2652
2655 /* We can match PCI devices too, do not accept those here. */ 2653 /* We can match PCI devices too, do not accept those here. */
2656 if (strcmp(sbus_dp->name, "sbus")) 2654 if (strcmp(sbus_dp->name, "sbus"))
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index e76d715e4342..f0e99d4c066b 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -161,7 +161,7 @@ struct op_sample
161{ 161{
162 entry->event = ring_buffer_lock_reserve 162 entry->event = ring_buffer_lock_reserve
163 (op_ring_buffer_write, sizeof(struct op_sample) + 163 (op_ring_buffer_write, sizeof(struct op_sample) +
164 size * sizeof(entry->sample->data[0]), &entry->irq_flags); 164 size * sizeof(entry->sample->data[0]));
165 if (entry->event) 165 if (entry->event)
166 entry->sample = ring_buffer_event_data(entry->event); 166 entry->sample = ring_buffer_event_data(entry->event);
167 else 167 else
@@ -178,8 +178,7 @@ struct op_sample
178 178
179int op_cpu_buffer_write_commit(struct op_entry *entry) 179int op_cpu_buffer_write_commit(struct op_entry *entry)
180{ 180{
181 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, 181 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event);
182 entry->irq_flags);
183} 182}
184 183
185struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) 184struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index eacfb13998bb..9aa4fe100a0d 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -143,7 +143,7 @@ config HOTPLUG_PCI_SHPC
143 143
144config HOTPLUG_PCI_RPA 144config HOTPLUG_PCI_RPA
145 tristate "RPA PCI Hotplug driver" 145 tristate "RPA PCI Hotplug driver"
146 depends on PPC_PSERIES && PPC64 && !HOTPLUG_PCI_FAKE 146 depends on PPC_PSERIES && EEH && !HOTPLUG_PCI_FAKE
147 help 147 help
148 Say Y here if you have a RPA system that supports PCI Hotplug. 148 Say Y here if you have a RPA system that supports PCI Hotplug.
149 149
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index d0c973685868..382575007382 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -133,6 +133,9 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev,
133 bool enable) 133 bool enable)
134{ 134{
135 set_device_error_reporting(dev, &enable); 135 set_device_error_reporting(dev, &enable);
136
137 if (!dev->subordinate)
138 return;
136 pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); 139 pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
137} 140}
138 141
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 248b4db91552..5ea566e20b37 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -103,6 +103,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
103static void pcie_portdrv_remove (struct pci_dev *dev) 103static void pcie_portdrv_remove (struct pci_dev *dev)
104{ 104{
105 pcie_port_device_remove(dev); 105 pcie_port_device_remove(dev);
106 pci_disable_device(dev);
106 kfree(pci_get_drvdata(dev)); 107 kfree(pci_get_drvdata(dev));
107} 108}
108 109
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f20d55368edb..92b9efe9bcaf 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -23,6 +23,7 @@
23#include <linux/acpi.h> 23#include <linux/acpi.h>
24#include <linux/kallsyms.h> 24#include <linux/kallsyms.h>
25#include <linux/dmi.h> 25#include <linux/dmi.h>
26#include <linux/pci-aspm.h>
26#include "pci.h" 27#include "pci.h"
27 28
28int isa_dma_bridge_buggy; 29int isa_dma_bridge_buggy;
@@ -1749,6 +1750,30 @@ static void __devinit quirk_e100_interrupt(struct pci_dev *dev)
1749} 1750}
1750DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_e100_interrupt); 1751DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_e100_interrupt);
1751 1752
1753/*
1754 * The 82575 and 82598 may experience data corruption issues when transitioning
1755 * out of L0S. To prevent this we need to disable L0S on the pci-e link
1756 */
1757static void __devinit quirk_disable_aspm_l0s(struct pci_dev *dev)
1758{
1759 dev_info(&dev->dev, "Disabling L0s\n");
1760 pci_disable_link_state(dev, PCIE_LINK_STATE_L0S);
1761}
1762DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a7, quirk_disable_aspm_l0s);
1763DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a9, quirk_disable_aspm_l0s);
1764DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10b6, quirk_disable_aspm_l0s);
1765DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c6, quirk_disable_aspm_l0s);
1766DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c7, quirk_disable_aspm_l0s);
1767DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c8, quirk_disable_aspm_l0s);
1768DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10d6, quirk_disable_aspm_l0s);
1769DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10db, quirk_disable_aspm_l0s);
1770DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10dd, quirk_disable_aspm_l0s);
1771DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10e1, quirk_disable_aspm_l0s);
1772DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10ec, quirk_disable_aspm_l0s);
1773DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s);
1774DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s);
1775DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s);
1776
1752static void __devinit fixup_rev1_53c810(struct pci_dev* dev) 1777static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
1753{ 1778{
1754 /* rev 1 ncr53c810 chips don't set the class at all which means 1779 /* rev 1 ncr53c810 chips don't set the class at all which means
@@ -2097,7 +2122,7 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev)
2097 2122
2098 if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, 2123 if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
2099 &flags) == 0) { 2124 &flags) == 0) {
2100 dev_info(&dev->dev, "Enabling HT MSI Mapping\n"); 2125 dev_info(&dev->dev, "Disabling HT MSI Mapping\n");
2101 2126
2102 pci_write_config_byte(dev, pos + HT_MSI_FLAGS, 2127 pci_write_config_byte(dev, pos + HT_MSI_FLAGS,
2103 flags & ~HT_MSI_FLAGS_ENABLE); 2128 flags & ~HT_MSI_FLAGS_ENABLE);
@@ -2141,6 +2166,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
2141 int pos; 2166 int pos;
2142 int found; 2167 int found;
2143 2168
2169 /* Enabling HT MSI mapping on this device breaks MCP51 */
2170 if (dev->device == 0x270)
2171 return;
2172
2144 /* check if there is HT MSI cap or enabled on this device */ 2173 /* check if there is HT MSI cap or enabled on this device */
2145 found = ht_check_msi_mapping(dev); 2174 found = ht_check_msi_mapping(dev);
2146 2175
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 94c9f911824e..6bcca616a704 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -1297,7 +1297,7 @@ static int __init acer_wmi_init(void)
1297 1297
1298 set_quirks(); 1298 set_quirks();
1299 1299
1300 if (!acpi_video_backlight_support() && has_cap(ACER_CAP_BRIGHTNESS)) { 1300 if (acpi_video_backlight_support() && has_cap(ACER_CAP_BRIGHTNESS)) {
1301 interface->capability &= ~ACER_CAP_BRIGHTNESS; 1301 interface->capability &= ~ACER_CAP_BRIGHTNESS;
1302 printk(ACER_INFO "Brightness must be controlled by " 1302 printk(ACER_INFO "Brightness must be controlled by "
1303 "generic video driver\n"); 1303 "generic video driver\n");
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 1d768928e0bb..a52d4a11652d 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -180,10 +180,13 @@ static int ds2760_battery_read_status(struct ds2760_device_info *di)
180 di->empty_uAh = battery_interpolate(scale, di->temp_C / 10); 180 di->empty_uAh = battery_interpolate(scale, di->temp_C / 10);
181 di->empty_uAh *= 1000; /* convert to µAh */ 181 di->empty_uAh *= 1000; /* convert to µAh */
182 182
183 /* From Maxim Application Note 131: remaining capacity = 183 if (di->full_active_uAh == di->empty_uAh)
184 * ((ICA - Empty Value) / (Full Value - Empty Value)) x 100% */ 184 di->rem_capacity = 0;
185 di->rem_capacity = ((di->accum_current_uAh - di->empty_uAh) * 100L) / 185 else
186 (di->full_active_uAh - di->empty_uAh); 186 /* From Maxim Application Note 131: remaining capacity =
187 * ((ICA - Empty Value) / (Full Value - Empty Value)) x 100% */
188 di->rem_capacity = ((di->accum_current_uAh - di->empty_uAh) * 100L) /
189 (di->full_active_uAh - di->empty_uAh);
187 190
188 if (di->rem_capacity < 0) 191 if (di->rem_capacity < 0)
189 di->rem_capacity = 0; 192 di->rem_capacity = 0;
diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c
index f08e169ba1b5..7e30e5f6e032 100644
--- a/drivers/sbus/char/bbc_i2c.c
+++ b/drivers/sbus/char/bbc_i2c.c
@@ -129,7 +129,7 @@ static int wait_for_pin(struct bbc_i2c_bus *bp, u8 *status)
129 bp->waiting = 1; 129 bp->waiting = 1;
130 add_wait_queue(&bp->wq, &wait); 130 add_wait_queue(&bp->wq, &wait);
131 while (limit-- > 0) { 131 while (limit-- > 0) {
132 unsigned long val; 132 long val;
133 133
134 val = wait_event_interruptible_timeout( 134 val = wait_event_interruptible_timeout(
135 bp->wq, 135 bp->wq,
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index a9a9893a5f95..e6d1fc8c54f1 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -38,9 +38,6 @@
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/genhd.h> 39#include <linux/genhd.h>
40#include <linux/blkdev.h> 40#include <linux/blkdev.h>
41
42#define MAJOR_NR JSFD_MAJOR
43
44#include <asm/uaccess.h> 41#include <asm/uaccess.h>
45#include <asm/pgtable.h> 42#include <asm/pgtable.h>
46#include <asm/io.h> 43#include <asm/io.h>
diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c
index 9e1138a75e8b..a411702413d6 100644
--- a/drivers/w1/masters/w1-gpio.c
+++ b/drivers/w1/masters/w1-gpio.c
@@ -39,7 +39,7 @@ static u8 w1_gpio_read_bit(void *data)
39{ 39{
40 struct w1_gpio_platform_data *pdata = data; 40 struct w1_gpio_platform_data *pdata = data;
41 41
42 return gpio_get_value(pdata->pin); 42 return gpio_get_value(pdata->pin) ? 1 : 0;
43} 43}
44 44
45static int __init w1_gpio_probe(struct platform_device *pdev) 45static int __init w1_gpio_probe(struct platform_device *pdev)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e5eaa62fd17f..e3fe9918faaf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -274,6 +274,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
274 int ret; 274 int ret;
275 275
276 BUG_ON(inode->i_state & I_SYNC); 276 BUG_ON(inode->i_state & I_SYNC);
277 WARN_ON(inode->i_state & I_NEW);
277 278
278 /* Set I_SYNC, reset I_DIRTY */ 279 /* Set I_SYNC, reset I_DIRTY */
279 dirty = inode->i_state & I_DIRTY; 280 dirty = inode->i_state & I_DIRTY;
@@ -298,6 +299,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
298 } 299 }
299 300
300 spin_lock(&inode_lock); 301 spin_lock(&inode_lock);
302 WARN_ON(inode->i_state & I_NEW);
301 inode->i_state &= ~I_SYNC; 303 inode->i_state &= ~I_SYNC;
302 if (!(inode->i_state & I_FREEING)) { 304 if (!(inode->i_state & I_FREEING)) {
303 if (!(inode->i_state & I_DIRTY) && 305 if (!(inode->i_state & I_DIRTY) &&
@@ -470,6 +472,11 @@ void generic_sync_sb_inodes(struct super_block *sb,
470 break; 472 break;
471 } 473 }
472 474
475 if (inode->i_state & I_NEW) {
476 requeue_io(inode);
477 continue;
478 }
479
473 if (wbc->nonblocking && bdi_write_congested(bdi)) { 480 if (wbc->nonblocking && bdi_write_congested(bdi)) {
474 wbc->encountered_congestion = 1; 481 wbc->encountered_congestion = 1;
475 if (!sb_is_blkdev_sb(sb)) 482 if (!sb_is_blkdev_sb(sb))
@@ -531,7 +538,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
531 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 538 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
532 struct address_space *mapping; 539 struct address_space *mapping;
533 540
534 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 541 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
535 continue; 542 continue;
536 mapping = inode->i_mapping; 543 mapping = inode->i_mapping;
537 if (mapping->nrpages == 0) 544 if (mapping->nrpages == 0)
diff --git a/fs/inode.c b/fs/inode.c
index 913ab2d9a5d1..826fb0b9d1c3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -359,6 +359,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
359 invalidate_inode_buffers(inode); 359 invalidate_inode_buffers(inode);
360 if (!atomic_read(&inode->i_count)) { 360 if (!atomic_read(&inode->i_count)) {
361 list_move(&inode->i_list, dispose); 361 list_move(&inode->i_list, dispose);
362 WARN_ON(inode->i_state & I_NEW);
362 inode->i_state |= I_FREEING; 363 inode->i_state |= I_FREEING;
363 count++; 364 count++;
364 continue; 365 continue;
@@ -460,6 +461,7 @@ static void prune_icache(int nr_to_scan)
460 continue; 461 continue;
461 } 462 }
462 list_move(&inode->i_list, &freeable); 463 list_move(&inode->i_list, &freeable);
464 WARN_ON(inode->i_state & I_NEW);
463 inode->i_state |= I_FREEING; 465 inode->i_state |= I_FREEING;
464 nr_pruned++; 466 nr_pruned++;
465 } 467 }
@@ -656,6 +658,7 @@ void unlock_new_inode(struct inode *inode)
656 * just created it (so there can be no old holders 658 * just created it (so there can be no old holders
657 * that haven't tested I_LOCK). 659 * that haven't tested I_LOCK).
658 */ 660 */
661 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
659 inode->i_state &= ~(I_LOCK|I_NEW); 662 inode->i_state &= ~(I_LOCK|I_NEW);
660 wake_up_inode(inode); 663 wake_up_inode(inode);
661} 664}
@@ -1145,6 +1148,7 @@ void generic_delete_inode(struct inode *inode)
1145 1148
1146 list_del_init(&inode->i_list); 1149 list_del_init(&inode->i_list);
1147 list_del_init(&inode->i_sb_list); 1150 list_del_init(&inode->i_sb_list);
1151 WARN_ON(inode->i_state & I_NEW);
1148 inode->i_state |= I_FREEING; 1152 inode->i_state |= I_FREEING;
1149 inodes_stat.nr_inodes--; 1153 inodes_stat.nr_inodes--;
1150 spin_unlock(&inode_lock); 1154 spin_unlock(&inode_lock);
@@ -1186,16 +1190,19 @@ static void generic_forget_inode(struct inode *inode)
1186 spin_unlock(&inode_lock); 1190 spin_unlock(&inode_lock);
1187 return; 1191 return;
1188 } 1192 }
1193 WARN_ON(inode->i_state & I_NEW);
1189 inode->i_state |= I_WILL_FREE; 1194 inode->i_state |= I_WILL_FREE;
1190 spin_unlock(&inode_lock); 1195 spin_unlock(&inode_lock);
1191 write_inode_now(inode, 1); 1196 write_inode_now(inode, 1);
1192 spin_lock(&inode_lock); 1197 spin_lock(&inode_lock);
1198 WARN_ON(inode->i_state & I_NEW);
1193 inode->i_state &= ~I_WILL_FREE; 1199 inode->i_state &= ~I_WILL_FREE;
1194 inodes_stat.nr_unused--; 1200 inodes_stat.nr_unused--;
1195 hlist_del_init(&inode->i_hash); 1201 hlist_del_init(&inode->i_hash);
1196 } 1202 }
1197 list_del_init(&inode->i_list); 1203 list_del_init(&inode->i_list);
1198 list_del_init(&inode->i_sb_list); 1204 list_del_init(&inode->i_sb_list);
1205 WARN_ON(inode->i_state & I_NEW);
1199 inode->i_state |= I_FREEING; 1206 inode->i_state |= I_FREEING;
1200 inodes_stat.nr_inodes--; 1207 inodes_stat.nr_inodes--;
1201 spin_unlock(&inode_lock); 1208 spin_unlock(&inode_lock);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..8a17f7edcc74 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -19,6 +19,7 @@
19#include <linux/kmod.h> 19#include <linux/kmod.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/genhd.h> 21#include <linux/genhd.h>
22#include <linux/blktrace_api.h>
22 23
23#include "check.h" 24#include "check.h"
24 25
@@ -294,6 +295,9 @@ static struct attribute_group part_attr_group = {
294 295
295static struct attribute_group *part_attr_groups[] = { 296static struct attribute_group *part_attr_groups[] = {
296 &part_attr_group, 297 &part_attr_group,
298#ifdef CONFIG_BLK_DEV_IO_TRACE
299 &blk_trace_attr_group,
300#endif
297 NULL 301 NULL
298}; 302};
299 303
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a48ba5179d5..14f502b89cf5 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -699,12 +699,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
699 int retval; 699 int retval;
700 700
701 mutex_lock(&inode->i_mutex); 701 mutex_lock(&inode->i_mutex);
702
703 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 702 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
704 703 if (retval >= 0) {
705 if (retval >= 0)
706 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 704 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
707 705 if (retval < 0) /* this can happen only if on == T */
706 fasync_helper(-1, filp, 0, &pipe->fasync_readers);
707 }
708 mutex_unlock(&inode->i_mutex); 708 mutex_unlock(&inode->i_mutex);
709 709
710 if (retval < 0) 710 if (retval < 0)
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 321728f48f2d..2a7960310349 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -184,15 +184,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
184 offset = 0; 184 offset = 0;
185 } 185 }
186 186
187 if (msblk->stream.avail_out == 0) { 187 if (msblk->stream.avail_out == 0 && page < pages) {
188 if (page == pages) {
189 ERROR("zlib_inflate tried to "
190 "decompress too much data, "
191 "expected %d bytes. Zlib "
192 "data probably corrupt\n",
193 srclength);
194 goto release_mutex;
195 }
196 msblk->stream.next_out = buffer[page++]; 188 msblk->stream.next_out = buffer[page++];
197 msblk->stream.avail_out = PAGE_CACHE_SIZE; 189 msblk->stream.avail_out = PAGE_CACHE_SIZE;
198 } 190 }
@@ -209,25 +201,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
209 zlib_init = 1; 201 zlib_init = 1;
210 } 202 }
211 203
212 zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH); 204 zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
213 205
214 if (msblk->stream.avail_in == 0 && k < b) 206 if (msblk->stream.avail_in == 0 && k < b)
215 put_bh(bh[k++]); 207 put_bh(bh[k++]);
216 } while (zlib_err == Z_OK); 208 } while (zlib_err == Z_OK);
217 209
218 if (zlib_err != Z_STREAM_END) { 210 if (zlib_err != Z_STREAM_END) {
219 ERROR("zlib_inflate returned unexpected result" 211 ERROR("zlib_inflate error, data probably corrupt\n");
220 " 0x%x, srclength %d, avail_in %d,"
221 " avail_out %d\n", zlib_err, srclength,
222 msblk->stream.avail_in,
223 msblk->stream.avail_out);
224 goto release_mutex; 212 goto release_mutex;
225 } 213 }
226 214
227 zlib_err = zlib_inflateEnd(&msblk->stream); 215 zlib_err = zlib_inflateEnd(&msblk->stream);
228 if (zlib_err != Z_OK) { 216 if (zlib_err != Z_OK) {
229 ERROR("zlib_inflateEnd returned unexpected result 0x%x," 217 ERROR("zlib_inflate error, data probably corrupt\n");
230 " srclength %d\n", zlib_err, srclength);
231 goto release_mutex; 218 goto release_mutex;
232 } 219 }
233 length = msblk->stream.total_out; 220 length = msblk->stream.total_out;
diff --git a/fs/super.c b/fs/super.c
index 8349ed6b1412..6ce501447ada 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -371,8 +371,10 @@ retry:
371 continue; 371 continue;
372 if (!grab_super(old)) 372 if (!grab_super(old))
373 goto retry; 373 goto retry;
374 if (s) 374 if (s) {
375 up_write(&s->s_umount);
375 destroy_super(s); 376 destroy_super(s);
377 }
376 return old; 378 return old;
377 } 379 }
378 } 380 }
@@ -387,6 +389,7 @@ retry:
387 err = set(s, data); 389 err = set(s, data);
388 if (err) { 390 if (err) {
389 spin_unlock(&sb_lock); 391 spin_unlock(&sb_lock);
392 up_write(&s->s_umount);
390 destroy_super(s); 393 destroy_super(s);
391 return ERR_PTR(err); 394 return ERR_PTR(err);
392 } 395 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index cb329edc925b..aa1016bb9134 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -34,6 +34,12 @@
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36 36
37#include "xfs_sb.h"
38#include "xfs_inum.h"
39#include "xfs_ag.h"
40#include "xfs_dmapi.h"
41#include "xfs_mount.h"
42
37static kmem_zone_t *xfs_buf_zone; 43static kmem_zone_t *xfs_buf_zone;
38STATIC int xfsbufd(void *); 44STATIC int xfsbufd(void *);
39STATIC int xfsbufd_wakeup(int, gfp_t); 45STATIC int xfsbufd_wakeup(int, gfp_t);
@@ -1435,10 +1441,12 @@ xfs_unregister_buftarg(
1435 1441
1436void 1442void
1437xfs_free_buftarg( 1443xfs_free_buftarg(
1438 xfs_buftarg_t *btp) 1444 struct xfs_mount *mp,
1445 struct xfs_buftarg *btp)
1439{ 1446{
1440 xfs_flush_buftarg(btp, 1); 1447 xfs_flush_buftarg(btp, 1);
1441 xfs_blkdev_issue_flush(btp); 1448 if (mp->m_flags & XFS_MOUNT_BARRIER)
1449 xfs_blkdev_issue_flush(btp);
1442 xfs_free_bufhash(btp); 1450 xfs_free_bufhash(btp);
1443 iput(btp->bt_mapping->host); 1451 iput(btp->bt_mapping->host);
1444 1452
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 288ae7c4c800..9b4d666ad31f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -413,7 +413,7 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
413 * Handling of buftargs. 413 * Handling of buftargs.
414 */ 414 */
415extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 415extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
416extern void xfs_free_buftarg(xfs_buftarg_t *); 416extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
417extern void xfs_wait_buftarg(xfs_buftarg_t *); 417extern void xfs_wait_buftarg(xfs_buftarg_t *);
418extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 418extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
419extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 419extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c71e226da7f5..32ae5028e96b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -734,15 +734,15 @@ xfs_close_devices(
734{ 734{
735 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 735 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
736 struct block_device *logdev = mp->m_logdev_targp->bt_bdev; 736 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
737 xfs_free_buftarg(mp->m_logdev_targp); 737 xfs_free_buftarg(mp, mp->m_logdev_targp);
738 xfs_blkdev_put(logdev); 738 xfs_blkdev_put(logdev);
739 } 739 }
740 if (mp->m_rtdev_targp) { 740 if (mp->m_rtdev_targp) {
741 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; 741 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
742 xfs_free_buftarg(mp->m_rtdev_targp); 742 xfs_free_buftarg(mp, mp->m_rtdev_targp);
743 xfs_blkdev_put(rtdev); 743 xfs_blkdev_put(rtdev);
744 } 744 }
745 xfs_free_buftarg(mp->m_ddev_targp); 745 xfs_free_buftarg(mp, mp->m_ddev_targp);
746} 746}
747 747
748/* 748/*
@@ -811,9 +811,9 @@ xfs_open_devices(
811 811
812 out_free_rtdev_targ: 812 out_free_rtdev_targ:
813 if (mp->m_rtdev_targp) 813 if (mp->m_rtdev_targp)
814 xfs_free_buftarg(mp->m_rtdev_targp); 814 xfs_free_buftarg(mp, mp->m_rtdev_targp);
815 out_free_ddev_targ: 815 out_free_ddev_targ:
816 xfs_free_buftarg(mp->m_ddev_targp); 816 xfs_free_buftarg(mp, mp->m_ddev_targp);
817 out_close_rtdev: 817 out_close_rtdev:
818 if (rtdev) 818 if (rtdev)
819 xfs_blkdev_put(rtdev); 819 xfs_blkdev_put(rtdev);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e2fb6210d4c5..478e587087fe 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -246,9 +246,6 @@ xfs_iget_cache_miss(
246 goto out_destroy; 246 goto out_destroy;
247 } 247 }
248 248
249 if (lock_flags)
250 xfs_ilock(ip, lock_flags);
251
252 /* 249 /*
253 * Preload the radix tree so we can insert safely under the 250 * Preload the radix tree so we can insert safely under the
254 * write spinlock. Note that we cannot sleep inside the preload 251 * write spinlock. Note that we cannot sleep inside the preload
@@ -256,7 +253,16 @@ xfs_iget_cache_miss(
256 */ 253 */
257 if (radix_tree_preload(GFP_KERNEL)) { 254 if (radix_tree_preload(GFP_KERNEL)) {
258 error = EAGAIN; 255 error = EAGAIN;
259 goto out_unlock; 256 goto out_destroy;
257 }
258
259 /*
260 * Because the inode hasn't been added to the radix-tree yet it can't
261 * be found by another thread, so we can do the non-sleeping lock here.
262 */
263 if (lock_flags) {
264 if (!xfs_ilock_nowait(ip, lock_flags))
265 BUG();
260 } 266 }
261 267
262 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 268 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
@@ -284,7 +290,6 @@ xfs_iget_cache_miss(
284out_preload_end: 290out_preload_end:
285 write_unlock(&pag->pag_ici_lock); 291 write_unlock(&pag->pag_ici_lock);
286 radix_tree_preload_end(); 292 radix_tree_preload_end();
287out_unlock:
288 if (lock_flags) 293 if (lock_flags)
289 xfs_iunlock(ip, lock_flags); 294 xfs_iunlock(ip, lock_flags);
290out_destroy: 295out_destroy:
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b1047de2fffd..61af610d79b3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1455,10 +1455,19 @@ xlog_recover_add_to_trans(
1455 item = item->ri_prev; 1455 item = item->ri_prev;
1456 1456
1457 if (item->ri_total == 0) { /* first region to be added */ 1457 if (item->ri_total == 0) { /* first region to be added */
1458 item->ri_total = in_f->ilf_size; 1458 if (in_f->ilf_size == 0 ||
1459 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); 1459 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1460 item->ri_buf = kmem_zalloc((item->ri_total * 1460 xlog_warn(
1461 sizeof(xfs_log_iovec_t)), KM_SLEEP); 1461 "XFS: bad number of regions (%d) in inode log format",
1462 in_f->ilf_size);
1463 ASSERT(0);
1464 return XFS_ERROR(EIO);
1465 }
1466
1467 item->ri_total = in_f->ilf_size;
1468 item->ri_buf =
1469 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
1470 KM_SLEEP);
1462 } 1471 }
1463 ASSERT(item->ri_total > item->ri_cnt); 1472 ASSERT(item->ri_total > item->ri_cnt);
1464 /* Description region is ri_buf[0] */ 1473 /* Description region is ri_buf[0] */
diff --git a/include/asm-frv/ftrace.h b/include/asm-frv/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/include/asm-frv/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5406e70aba86..d3bc3c86df6a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -61,6 +61,30 @@
61#define BRANCH_PROFILE() 61#define BRANCH_PROFILE()
62#endif 62#endif
63 63
64#ifdef CONFIG_EVENT_TRACER
65#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \
66 *(_ftrace_events) \
67 VMLINUX_SYMBOL(__stop_ftrace_events) = .;
68#else
69#define FTRACE_EVENTS()
70#endif
71
72#ifdef CONFIG_TRACING
73#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .; \
74 *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \
75 VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
76#else
77#define TRACE_PRINTKS()
78#endif
79
80#ifdef CONFIG_FTRACE_SYSCALLS
81#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .; \
82 *(__syscalls_metadata) \
83 VMLINUX_SYMBOL(__stop_syscalls_metadata) = .;
84#else
85#define TRACE_SYSCALLS()
86#endif
87
64/* .data section */ 88/* .data section */
65#define DATA_DATA \ 89#define DATA_DATA \
66 *(.data) \ 90 *(.data) \
@@ -81,7 +105,10 @@
81 *(__tracepoints) \ 105 *(__tracepoints) \
82 VMLINUX_SYMBOL(__stop___tracepoints) = .; \ 106 VMLINUX_SYMBOL(__stop___tracepoints) = .; \
83 LIKELY_PROFILE() \ 107 LIKELY_PROFILE() \
84 BRANCH_PROFILE() 108 BRANCH_PROFILE() \
109 TRACE_PRINTKS() \
110 FTRACE_EVENTS() \
111 TRACE_SYSCALLS()
85 112
86#define RO_DATA(align) \ 113#define RO_DATA(align) \
87 . = ALIGN((align)); \ 114 . = ALIGN((align)); \
diff --git a/include/asm-m32r/ftrace.h b/include/asm-m32r/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/include/asm-m32r/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/include/asm-mn10300/ftrace.h b/include/asm-mn10300/ftrace.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/include/asm-mn10300/ftrace.h
@@ -0,0 +1 @@
/* empty */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 6e915878e88c..d960889e92ef 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -144,6 +144,9 @@ struct blk_user_trace_setup {
144 144
145#ifdef __KERNEL__ 145#ifdef __KERNEL__
146#if defined(CONFIG_BLK_DEV_IO_TRACE) 146#if defined(CONFIG_BLK_DEV_IO_TRACE)
147
148#include <linux/sysfs.h>
149
147struct blk_trace { 150struct blk_trace {
148 int trace_state; 151 int trace_state;
149 struct rchan *rchan; 152 struct rchan *rchan;
@@ -194,6 +197,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
194extern int blk_trace_startstop(struct request_queue *q, int start); 197extern int blk_trace_startstop(struct request_queue *q, int start);
195extern int blk_trace_remove(struct request_queue *q); 198extern int blk_trace_remove(struct request_queue *q);
196 199
200extern struct attribute_group blk_trace_attr_group;
201
197#else /* !CONFIG_BLK_DEV_IO_TRACE */ 202#else /* !CONFIG_BLK_DEV_IO_TRACE */
198#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 203#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
199#define blk_trace_shutdown(q) do { } while (0) 204#define blk_trace_shutdown(q) do { } while (0)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 677432b9cb7e..6dc1c652447e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,15 +1,18 @@
1#ifndef _LINUX_FTRACE_H 1#ifndef _LINUX_FTRACE_H
2#define _LINUX_FTRACE_H 2#define _LINUX_FTRACE_H
3 3
4#include <linux/linkage.h> 4#include <linux/trace_clock.h>
5#include <linux/fs.h>
6#include <linux/ktime.h>
7#include <linux/init.h>
8#include <linux/types.h>
9#include <linux/module.h>
10#include <linux/kallsyms.h> 5#include <linux/kallsyms.h>
6#include <linux/linkage.h>
11#include <linux/bitops.h> 7#include <linux/bitops.h>
8#include <linux/module.h>
9#include <linux/ktime.h>
12#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/types.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14
15#include <asm/ftrace.h>
13 16
14#ifdef CONFIG_FUNCTION_TRACER 17#ifdef CONFIG_FUNCTION_TRACER
15 18
@@ -95,9 +98,41 @@ stack_trace_sysctl(struct ctl_table *table, int write,
95 loff_t *ppos); 98 loff_t *ppos);
96#endif 99#endif
97 100
101struct ftrace_func_command {
102 struct list_head list;
103 char *name;
104 int (*func)(char *func, char *cmd,
105 char *params, int enable);
106};
107
98#ifdef CONFIG_DYNAMIC_FTRACE 108#ifdef CONFIG_DYNAMIC_FTRACE
99/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ 109
100#include <asm/ftrace.h> 110int ftrace_arch_code_modify_prepare(void);
111int ftrace_arch_code_modify_post_process(void);
112
113struct seq_file;
114
115struct ftrace_probe_ops {
116 void (*func)(unsigned long ip,
117 unsigned long parent_ip,
118 void **data);
119 int (*callback)(unsigned long ip, void **data);
120 void (*free)(void **data);
121 int (*print)(struct seq_file *m,
122 unsigned long ip,
123 struct ftrace_probe_ops *ops,
124 void *data);
125};
126
127extern int
128register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
129 void *data);
130extern void
131unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
132 void *data);
133extern void
134unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
135extern void unregister_ftrace_function_probe_all(char *glob);
101 136
102enum { 137enum {
103 FTRACE_FL_FREE = (1 << 0), 138 FTRACE_FL_FREE = (1 << 0),
@@ -119,6 +154,9 @@ struct dyn_ftrace {
119int ftrace_force_update(void); 154int ftrace_force_update(void);
120void ftrace_set_filter(unsigned char *buf, int len, int reset); 155void ftrace_set_filter(unsigned char *buf, int len, int reset);
121 156
157int register_ftrace_command(struct ftrace_func_command *cmd);
158int unregister_ftrace_command(struct ftrace_func_command *cmd);
159
122/* defined in arch */ 160/* defined in arch */
123extern int ftrace_ip_converted(unsigned long ip); 161extern int ftrace_ip_converted(unsigned long ip);
124extern int ftrace_dyn_arch_init(void *data); 162extern int ftrace_dyn_arch_init(void *data);
@@ -126,6 +164,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
126extern void ftrace_caller(void); 164extern void ftrace_caller(void);
127extern void ftrace_call(void); 165extern void ftrace_call(void);
128extern void mcount_call(void); 166extern void mcount_call(void);
167
168#ifndef FTRACE_ADDR
169#define FTRACE_ADDR ((unsigned long)ftrace_caller)
170#endif
129#ifdef CONFIG_FUNCTION_GRAPH_TRACER 171#ifdef CONFIG_FUNCTION_GRAPH_TRACER
130extern void ftrace_graph_caller(void); 172extern void ftrace_graph_caller(void);
131extern int ftrace_enable_ftrace_graph_caller(void); 173extern int ftrace_enable_ftrace_graph_caller(void);
@@ -136,7 +178,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
136#endif 178#endif
137 179
138/** 180/**
139 * ftrace_make_nop - convert code into top 181 * ftrace_make_nop - convert code into nop
140 * @mod: module structure if called by module load initialization 182 * @mod: module structure if called by module load initialization
141 * @rec: the mcount call site record 183 * @rec: the mcount call site record
142 * @addr: the address that the call site should be calling 184 * @addr: the address that the call site should be calling
@@ -181,7 +223,6 @@ extern int ftrace_make_nop(struct module *mod,
181 */ 223 */
182extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); 224extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
183 225
184
185/* May be defined in arch */ 226/* May be defined in arch */
186extern int ftrace_arch_read_dyn_info(char *buf, int size); 227extern int ftrace_arch_read_dyn_info(char *buf, int size);
187 228
@@ -198,6 +239,14 @@ extern void ftrace_enable_daemon(void);
198# define ftrace_disable_daemon() do { } while (0) 239# define ftrace_disable_daemon() do { } while (0)
199# define ftrace_enable_daemon() do { } while (0) 240# define ftrace_enable_daemon() do { } while (0)
200static inline void ftrace_release(void *start, unsigned long size) { } 241static inline void ftrace_release(void *start, unsigned long size) { }
242static inline int register_ftrace_command(struct ftrace_func_command *cmd)
243{
244 return -EINVAL;
245}
246static inline int unregister_ftrace_command(char *cmd_name)
247{
248 return -EINVAL;
249}
201#endif /* CONFIG_DYNAMIC_FTRACE */ 250#endif /* CONFIG_DYNAMIC_FTRACE */
202 251
203/* totally disable ftrace - can not re-enable after this */ 252/* totally disable ftrace - can not re-enable after this */
@@ -233,24 +282,25 @@ static inline void __ftrace_enabled_restore(int enabled)
233#endif 282#endif
234} 283}
235 284
236#ifdef CONFIG_FRAME_POINTER 285#ifndef HAVE_ARCH_CALLER_ADDR
237/* TODO: need to fix this for ARM */ 286# ifdef CONFIG_FRAME_POINTER
238# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) 287# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
239# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) 288# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
240# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) 289# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
241# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) 290# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
242# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) 291# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
243# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) 292# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
244# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) 293# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
245#else 294# else
246# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) 295# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
247# define CALLER_ADDR1 0UL 296# define CALLER_ADDR1 0UL
248# define CALLER_ADDR2 0UL 297# define CALLER_ADDR2 0UL
249# define CALLER_ADDR3 0UL 298# define CALLER_ADDR3 0UL
250# define CALLER_ADDR4 0UL 299# define CALLER_ADDR4 0UL
251# define CALLER_ADDR5 0UL 300# define CALLER_ADDR5 0UL
252# define CALLER_ADDR6 0UL 301# define CALLER_ADDR6 0UL
253#endif 302# endif
303#endif /* ifndef HAVE_ARCH_CALLER_ADDR */
254 304
255#ifdef CONFIG_IRQSOFF_TRACER 305#ifdef CONFIG_IRQSOFF_TRACER
256 extern void time_hardirqs_on(unsigned long a0, unsigned long a1); 306 extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
@@ -268,54 +318,6 @@ static inline void __ftrace_enabled_restore(int enabled)
268# define trace_preempt_off(a0, a1) do { } while (0) 318# define trace_preempt_off(a0, a1) do { } while (0)
269#endif 319#endif
270 320
271#ifdef CONFIG_TRACING
272extern int ftrace_dump_on_oops;
273
274extern void tracing_start(void);
275extern void tracing_stop(void);
276extern void ftrace_off_permanent(void);
277
278extern void
279ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
280
281/**
282 * ftrace_printk - printf formatting in the ftrace buffer
283 * @fmt: the printf format for printing
284 *
285 * Note: __ftrace_printk is an internal function for ftrace_printk and
286 * the @ip is passed in via the ftrace_printk macro.
287 *
288 * This function allows a kernel developer to debug fast path sections
289 * that printk is not appropriate for. By scattering in various
290 * printk like tracing in the code, a developer can quickly see
291 * where problems are occurring.
292 *
293 * This is intended as a debugging tool for the developer only.
294 * Please refrain from leaving ftrace_printks scattered around in
295 * your code.
296 */
297# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
298extern int
299__ftrace_printk(unsigned long ip, const char *fmt, ...)
300 __attribute__ ((format (printf, 2, 3)));
301extern void ftrace_dump(void);
302#else
303static inline void
304ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
305static inline int
306ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
307
308static inline void tracing_start(void) { }
309static inline void tracing_stop(void) { }
310static inline void ftrace_off_permanent(void) { }
311static inline int
312ftrace_printk(const char *fmt, ...)
313{
314 return 0;
315}
316static inline void ftrace_dump(void) { }
317#endif
318
319#ifdef CONFIG_FTRACE_MCOUNT_RECORD 321#ifdef CONFIG_FTRACE_MCOUNT_RECORD
320extern void ftrace_init(void); 322extern void ftrace_init(void);
321extern void ftrace_init_module(struct module *mod, 323extern void ftrace_init_module(struct module *mod,
@@ -327,36 +329,6 @@ ftrace_init_module(struct module *mod,
327 unsigned long *start, unsigned long *end) { } 329 unsigned long *start, unsigned long *end) { }
328#endif 330#endif
329 331
330enum {
331 POWER_NONE = 0,
332 POWER_CSTATE = 1,
333 POWER_PSTATE = 2,
334};
335
336struct power_trace {
337#ifdef CONFIG_POWER_TRACER
338 ktime_t stamp;
339 ktime_t end;
340 int type;
341 int state;
342#endif
343};
344
345#ifdef CONFIG_POWER_TRACER
346extern void trace_power_start(struct power_trace *it, unsigned int type,
347 unsigned int state);
348extern void trace_power_mark(struct power_trace *it, unsigned int type,
349 unsigned int state);
350extern void trace_power_end(struct power_trace *it);
351#else
352static inline void trace_power_start(struct power_trace *it, unsigned int type,
353 unsigned int state) { }
354static inline void trace_power_mark(struct power_trace *it, unsigned int type,
355 unsigned int state) { }
356static inline void trace_power_end(struct power_trace *it) { }
357#endif
358
359
360/* 332/*
361 * Structure that defines an entry function trace. 333 * Structure that defines an entry function trace.
362 */ 334 */
@@ -380,6 +352,30 @@ struct ftrace_graph_ret {
380#ifdef CONFIG_FUNCTION_GRAPH_TRACER 352#ifdef CONFIG_FUNCTION_GRAPH_TRACER
381 353
382/* 354/*
355 * Stack of return addresses for functions
356 * of a thread.
357 * Used in struct thread_info
358 */
359struct ftrace_ret_stack {
360 unsigned long ret;
361 unsigned long func;
362 unsigned long long calltime;
363};
364
365/*
366 * Primary handler of a function return.
367 * It relays on ftrace_return_to_handler.
368 * Defined in entry_32/64.S
369 */
370extern void return_to_handler(void);
371
372extern int
373ftrace_push_return_trace(unsigned long ret, unsigned long long time,
374 unsigned long func, int *depth);
375extern void
376ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
377
378/*
383 * Sometimes we don't want to trace a function with the function 379 * Sometimes we don't want to trace a function with the function
384 * graph tracer but we want them to keep traced by the usual function 380 * graph tracer but we want them to keep traced by the usual function
385 * tracer if the function graph tracer is not configured. 381 * tracer if the function graph tracer is not configured.
@@ -490,6 +486,50 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
490 return tsk->trace & TSK_TRACE_FL_GRAPH; 486 return tsk->trace & TSK_TRACE_FL_GRAPH;
491} 487}
492 488
489extern int ftrace_dump_on_oops;
490
493#endif /* CONFIG_TRACING */ 491#endif /* CONFIG_TRACING */
494 492
493
494#ifdef CONFIG_HW_BRANCH_TRACER
495
496void trace_hw_branch(u64 from, u64 to);
497void trace_hw_branch_oops(void);
498
499#else /* CONFIG_HW_BRANCH_TRACER */
500
501static inline void trace_hw_branch(u64 from, u64 to) {}
502static inline void trace_hw_branch_oops(void) {}
503
504#endif /* CONFIG_HW_BRANCH_TRACER */
505
506/*
507 * A syscall entry in the ftrace syscalls array.
508 *
509 * @name: name of the syscall
510 * @nb_args: number of parameters it takes
511 * @types: list of types as strings
512 * @args: list of args as strings (args[i] matches types[i])
513 */
514struct syscall_metadata {
515 const char *name;
516 int nb_args;
517 const char **types;
518 const char **args;
519};
520
521#ifdef CONFIG_FTRACE_SYSCALLS
522extern void arch_init_ftrace_syscalls(void);
523extern struct syscall_metadata *syscall_nr_to_meta(int nr);
524extern void start_ftrace_syscalls(void);
525extern void stop_ftrace_syscalls(void);
526extern void ftrace_syscall_enter(struct pt_regs *regs);
527extern void ftrace_syscall_exit(struct pt_regs *regs);
528#else
529static inline void start_ftrace_syscalls(void) { }
530static inline void stop_ftrace_syscalls(void) { }
531static inline void ftrace_syscall_enter(struct pt_regs *regs) { }
532static inline void ftrace_syscall_exit(struct pt_regs *regs) { }
533#endif
534
495#endif /* _LINUX_FTRACE_H */ 535#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index 366a054d0b05..dca7bf8cffe2 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,7 +2,7 @@
2#define _LINUX_FTRACE_IRQ_H 2#define _LINUX_FTRACE_IRQ_H
3 3
4 4
5#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) 5#ifdef CONFIG_FTRACE_NMI_ENTER
6extern void ftrace_nmi_enter(void); 6extern void ftrace_nmi_enter(void);
7extern void ftrace_nmi_exit(void); 7extern void ftrace_nmi_exit(void);
8#else 8#else
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f83288347dda..faa1cf848bcd 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -15,55 +15,61 @@
15 * - bits 0-7 are the preemption count (max preemption depth: 256) 15 * - bits 0-7 are the preemption count (max preemption depth: 256)
16 * - bits 8-15 are the softirq count (max # of softirqs: 256) 16 * - bits 8-15 are the softirq count (max # of softirqs: 256)
17 * 17 *
18 * The hardirq count can be overridden per architecture, the default is: 18 * The hardirq count can in theory reach the same as NR_IRQS.
19 * In reality, the number of nested IRQS is limited to the stack
20 * size as well. For archs with over 1000 IRQS it is not practical
21 * to expect that they will all nest. We give a max of 10 bits for
22 * hardirq nesting. An arch may choose to give less than 10 bits.
23 * m68k expects it to be 8.
19 * 24 *
20 * - bits 16-27 are the hardirq count (max # of hardirqs: 4096) 25 * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
21 * - ( bit 28 is the PREEMPT_ACTIVE flag. ) 26 * - bit 26 is the NMI_MASK
27 * - bit 28 is the PREEMPT_ACTIVE flag
22 * 28 *
23 * PREEMPT_MASK: 0x000000ff 29 * PREEMPT_MASK: 0x000000ff
24 * SOFTIRQ_MASK: 0x0000ff00 30 * SOFTIRQ_MASK: 0x0000ff00
25 * HARDIRQ_MASK: 0x0fff0000 31 * HARDIRQ_MASK: 0x03ff0000
32 * NMI_MASK: 0x04000000
26 */ 33 */
27#define PREEMPT_BITS 8 34#define PREEMPT_BITS 8
28#define SOFTIRQ_BITS 8 35#define SOFTIRQ_BITS 8
36#define NMI_BITS 1
29 37
30#ifndef HARDIRQ_BITS 38#define MAX_HARDIRQ_BITS 10
31#define HARDIRQ_BITS 12
32 39
33#ifndef MAX_HARDIRQS_PER_CPU 40#ifndef HARDIRQ_BITS
34#define MAX_HARDIRQS_PER_CPU NR_IRQS 41# define HARDIRQ_BITS MAX_HARDIRQ_BITS
35#endif 42#endif
36 43
37/* 44#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
38 * The hardirq mask has to be large enough to have space for potentially 45#error HARDIRQ_BITS too high!
39 * all IRQ sources in the system nesting on a single CPU.
40 */
41#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
42# error HARDIRQ_BITS is too low!
43#endif
44#endif 46#endif
45 47
46#define PREEMPT_SHIFT 0 48#define PREEMPT_SHIFT 0
47#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) 49#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
48#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) 50#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
51#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
49 52
50#define __IRQ_MASK(x) ((1UL << (x))-1) 53#define __IRQ_MASK(x) ((1UL << (x))-1)
51 54
52#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) 55#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
53#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) 56#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
54#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) 57#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
58#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
55 59
56#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) 60#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
57#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) 61#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
58#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) 62#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
63#define NMI_OFFSET (1UL << NMI_SHIFT)
59 64
60#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) 65#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
61#error PREEMPT_ACTIVE is too low! 66#error PREEMPT_ACTIVE is too low!
62#endif 67#endif
63 68
64#define hardirq_count() (preempt_count() & HARDIRQ_MASK) 69#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
65#define softirq_count() (preempt_count() & SOFTIRQ_MASK) 70#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
66#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) 71#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
72 | NMI_MASK))
67 73
68/* 74/*
69 * Are we doing bottom half or hardware interrupt processing? 75 * Are we doing bottom half or hardware interrupt processing?
@@ -73,6 +79,11 @@
73#define in_softirq() (softirq_count()) 79#define in_softirq() (softirq_count())
74#define in_interrupt() (irq_count()) 80#define in_interrupt() (irq_count())
75 81
82/*
83 * Are we in NMI context?
84 */
85#define in_nmi() (preempt_count() & NMI_MASK)
86
76#if defined(CONFIG_PREEMPT) 87#if defined(CONFIG_PREEMPT)
77# define PREEMPT_INATOMIC_BASE kernel_locked() 88# define PREEMPT_INATOMIC_BASE kernel_locked()
78# define PREEMPT_CHECK_OFFSET 1 89# define PREEMPT_CHECK_OFFSET 1
@@ -164,20 +175,24 @@ extern void irq_enter(void);
164 */ 175 */
165extern void irq_exit(void); 176extern void irq_exit(void);
166 177
167#define nmi_enter() \ 178#define nmi_enter() \
168 do { \ 179 do { \
169 ftrace_nmi_enter(); \ 180 ftrace_nmi_enter(); \
170 lockdep_off(); \ 181 BUG_ON(in_nmi()); \
171 rcu_nmi_enter(); \ 182 add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
172 __irq_enter(); \ 183 lockdep_off(); \
184 rcu_nmi_enter(); \
185 trace_hardirq_enter(); \
173 } while (0) 186 } while (0)
174 187
175#define nmi_exit() \ 188#define nmi_exit() \
176 do { \ 189 do { \
177 __irq_exit(); \ 190 trace_hardirq_exit(); \
178 rcu_nmi_exit(); \ 191 rcu_nmi_exit(); \
179 lockdep_on(); \ 192 lockdep_on(); \
180 ftrace_nmi_exit(); \ 193 BUG_ON(!in_nmi()); \
194 sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
195 ftrace_nmi_exit(); \
181 } while (0) 196 } while (0)
182 197
183#endif /* LINUX_HARDIRQ_H */ 198#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 74bde13224c9..b02a3f1d46a0 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -24,8 +24,8 @@
24# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) 24# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
25# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) 25# define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
26# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) 26# define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
27# define trace_softirq_enter() do { current->softirq_context++; } while (0) 27# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
28# define trace_softirq_exit() do { current->softirq_context--; } while (0) 28# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
29# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, 29# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
30#else 30#else
31# define trace_hardirqs_on() do { } while (0) 31# define trace_hardirqs_on() do { } while (0)
@@ -38,8 +38,8 @@
38# define trace_softirqs_enabled(p) 0 38# define trace_softirqs_enabled(p) 0
39# define trace_hardirq_enter() do { } while (0) 39# define trace_hardirq_enter() do { } while (0)
40# define trace_hardirq_exit() do { } while (0) 40# define trace_hardirq_exit() do { } while (0)
41# define trace_softirq_enter() do { } while (0) 41# define lockdep_softirq_enter() do { } while (0)
42# define trace_softirq_exit() do { } while (0) 42# define lockdep_softirq_exit() do { } while (0)
43# define INIT_TRACE_IRQFLAGS 43# define INIT_TRACE_IRQFLAGS
44#endif 44#endif
45 45
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7fa371898e3e..7742798c9208 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -242,6 +242,19 @@ extern struct ratelimit_state printk_ratelimit_state;
242extern int printk_ratelimit(void); 242extern int printk_ratelimit(void);
243extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, 243extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
244 unsigned int interval_msec); 244 unsigned int interval_msec);
245
246/*
247 * Print a one-time message (analogous to WARN_ONCE() et al):
248 */
249#define printk_once(x...) ({ \
250 static int __print_once = 1; \
251 \
252 if (__print_once) { \
253 __print_once = 0; \
254 printk(x); \
255 } \
256})
257
245#else 258#else
246static inline int vprintk(const char *s, va_list args) 259static inline int vprintk(const char *s, va_list args)
247 __attribute__ ((format (printf, 1, 0))); 260 __attribute__ ((format (printf, 1, 0)));
@@ -253,6 +266,10 @@ static inline int printk_ratelimit(void) { return 0; }
253static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ 266static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
254 unsigned int interval_msec) \ 267 unsigned int interval_msec) \
255 { return false; } 268 { return false; }
269
270/* No effect, but we still get type checking even in the !PRINTK case: */
271#define printk_once(x...) printk(x)
272
256#endif 273#endif
257 274
258extern int printk_needs_cpu(int cpu); 275extern int printk_needs_cpu(int cpu);
@@ -368,6 +385,125 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
368#endif 385#endif
369 386
370/* 387/*
388 * General tracing related utility functions - trace_printk(),
389 * tracing_on/tracing_off and tracing_start()/tracing_stop
390 *
391 * Use tracing_on/tracing_off when you want to quickly turn on or off
392 * tracing. It simply enables or disables the recording of the trace events.
393 * This also corresponds to the user space debugfs/tracing/tracing_on
394 * file, which gives a means for the kernel and userspace to interact.
395 * Place a tracing_off() in the kernel where you want tracing to end.
396 * From user space, examine the trace, and then echo 1 > tracing_on
397 * to continue tracing.
398 *
399 * tracing_stop/tracing_start has slightly more overhead. It is used
400 * by things like suspend to ram where disabling the recording of the
401 * trace is not enough, but tracing must actually stop because things
402 * like calling smp_processor_id() may crash the system.
403 *
404 * Most likely, you want to use tracing_on/tracing_off.
405 */
406#ifdef CONFIG_RING_BUFFER
407void tracing_on(void);
408void tracing_off(void);
409/* trace_off_permanent stops recording with no way to bring it back */
410void tracing_off_permanent(void);
411int tracing_is_on(void);
412#else
413static inline void tracing_on(void) { }
414static inline void tracing_off(void) { }
415static inline void tracing_off_permanent(void) { }
416static inline int tracing_is_on(void) { return 0; }
417#endif
418#ifdef CONFIG_TRACING
419extern void tracing_start(void);
420extern void tracing_stop(void);
421extern void ftrace_off_permanent(void);
422
423extern void
424ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
425
426static inline void __attribute__ ((format (printf, 1, 2)))
427____trace_printk_check_format(const char *fmt, ...)
428{
429}
430#define __trace_printk_check_format(fmt, args...) \
431do { \
432 if (0) \
433 ____trace_printk_check_format(fmt, ##args); \
434} while (0)
435
436/**
437 * trace_printk - printf formatting in the ftrace buffer
438 * @fmt: the printf format for printing
439 *
440 * Note: __trace_printk is an internal function for trace_printk and
441 * the @ip is passed in via the trace_printk macro.
442 *
443 * This function allows a kernel developer to debug fast path sections
444 * that printk is not appropriate for. By scattering in various
445 * printk like tracing in the code, a developer can quickly see
446 * where problems are occurring.
447 *
448 * This is intended as a debugging tool for the developer only.
449 * Please refrain from leaving trace_printks scattered around in
450 * your code.
451 */
452
453#define trace_printk(fmt, args...) \
454do { \
455 static const char *trace_printk_fmt \
456 __attribute__((section("__trace_printk_fmt"))); \
457 \
458 if (!trace_printk_fmt) \
459 trace_printk_fmt = fmt; \
460 \
461 __trace_printk_check_format(fmt, ##args); \
462 __trace_printk(_THIS_IP_, trace_printk_fmt, ##args); \
463} while (0)
464
465extern int
466__trace_printk(unsigned long ip, const char *fmt, ...)
467 __attribute__ ((format (printf, 2, 3)));
468
469#define ftrace_vprintk(fmt, vargs) \
470do { \
471 static const char *trace_printk_fmt \
472 __attribute__((section("__trace_printk_fmt"))); \
473 \
474 if (!trace_printk_fmt) \
475 trace_printk_fmt = fmt; \
476 \
477 __ftrace_vprintk(_THIS_IP_, trace_printk_fmt, vargs); \
478} while (0)
479
480extern int
481__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
482
483extern void ftrace_dump(void);
484#else
485static inline void
486ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
487static inline int
488trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
489
490static inline void tracing_start(void) { }
491static inline void tracing_stop(void) { }
492static inline void ftrace_off_permanent(void) { }
493static inline int
494trace_printk(const char *fmt, ...)
495{
496 return 0;
497}
498static inline int
499ftrace_vprintk(const char *fmt, va_list ap)
500{
501 return 0;
502}
503static inline void ftrace_dump(void) { }
504#endif /* CONFIG_TRACING */
505
506/*
371 * Display an IP address in readable format. 507 * Display an IP address in readable format.
372 */ 508 */
373 509
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 23bf02fb124f..5a58ea3e91e9 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -20,43 +20,10 @@ struct lockdep_map;
20#include <linux/stacktrace.h> 20#include <linux/stacktrace.h>
21 21
22/* 22/*
23 * Lock-class usage-state bits: 23 * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
24 * the total number of states... :-(
24 */ 25 */
25enum lock_usage_bit 26#define XXX_LOCK_USAGE_STATES (1+3*4)
26{
27 LOCK_USED = 0,
28 LOCK_USED_IN_HARDIRQ,
29 LOCK_USED_IN_SOFTIRQ,
30 LOCK_ENABLED_SOFTIRQS,
31 LOCK_ENABLED_HARDIRQS,
32 LOCK_USED_IN_HARDIRQ_READ,
33 LOCK_USED_IN_SOFTIRQ_READ,
34 LOCK_ENABLED_SOFTIRQS_READ,
35 LOCK_ENABLED_HARDIRQS_READ,
36 LOCK_USAGE_STATES
37};
38
39/*
40 * Usage-state bitmasks:
41 */
42#define LOCKF_USED (1 << LOCK_USED)
43#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ)
44#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ)
45#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS)
46#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS)
47
48#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
49#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
50
51#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ)
52#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ)
53#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ)
54#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ)
55
56#define LOCKF_ENABLED_IRQS_READ \
57 (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
58#define LOCKF_USED_IN_IRQ_READ \
59 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
60 27
61#define MAX_LOCKDEP_SUBCLASSES 8UL 28#define MAX_LOCKDEP_SUBCLASSES 8UL
62 29
@@ -97,7 +64,7 @@ struct lock_class {
97 * IRQ/softirq usage tracking bits: 64 * IRQ/softirq usage tracking bits:
98 */ 65 */
99 unsigned long usage_mask; 66 unsigned long usage_mask;
100 struct stack_trace usage_traces[LOCK_USAGE_STATES]; 67 struct stack_trace usage_traces[XXX_LOCK_USAGE_STATES];
101 68
102 /* 69 /*
103 * These fields represent a directed graph of lock dependencies, 70 * These fields represent a directed graph of lock dependencies,
@@ -324,7 +291,11 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
324 lock_set_class(lock, lock->name, lock->key, subclass, ip); 291 lock_set_class(lock, lock->name, lock->key, subclass, ip);
325} 292}
326 293
327# define INIT_LOCKDEP .lockdep_recursion = 0, 294extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
295extern void lockdep_clear_current_reclaim_state(void);
296extern void lockdep_trace_alloc(gfp_t mask);
297
298# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
328 299
329#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) 300#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
330 301
@@ -342,6 +313,9 @@ static inline void lockdep_on(void)
342# define lock_release(l, n, i) do { } while (0) 313# define lock_release(l, n, i) do { } while (0)
343# define lock_set_class(l, n, k, s, i) do { } while (0) 314# define lock_set_class(l, n, k, s, i) do { } while (0)
344# define lock_set_subclass(l, s, i) do { } while (0) 315# define lock_set_subclass(l, s, i) do { } while (0)
316# define lockdep_set_current_reclaim_state(g) do { } while (0)
317# define lockdep_clear_current_reclaim_state() do { } while (0)
318# define lockdep_trace_alloc(g) do { } while (0)
345# define lockdep_init() do { } while (0) 319# define lockdep_init() do { } while (0)
346# define lockdep_info() do { } while (0) 320# define lockdep_info() do { } while (0)
347# define lockdep_init_map(lock, name, key, sub) \ 321# define lockdep_init_map(lock, name, key, sub) \
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 3fdc10806d31..86a6c0f0518d 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -99,4 +99,10 @@ enum mem_add_context { BOOT, HOTPLUG };
99#define hotplug_memory_notifier(fn, pri) do { } while (0) 99#define hotplug_memory_notifier(fn, pri) do { } while (0)
100#endif 100#endif
101 101
102/*
103 * Kernel text modification mutex, used for code patching. Users of this lock
104 * can sleep.
105 */
106extern struct mutex text_mutex;
107
102#endif /* _LINUX_MEMORY_H_ */ 108#endif /* _LINUX_MEMORY_H_ */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 92915e81443f..d84feb7bdbf0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -276,4 +276,7 @@ struct mm_struct {
276#endif 276#endif
277}; 277};
278 278
279/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
280#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
281
279#endif /* _LINUX_MM_TYPES_H */ 282#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index 145a75528cc1..22d9878e868c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -329,6 +329,11 @@ struct module
329 unsigned int num_tracepoints; 329 unsigned int num_tracepoints;
330#endif 330#endif
331 331
332#ifdef CONFIG_TRACING
333 const char **trace_bprintk_fmt_start;
334 unsigned int num_trace_bprintk_fmt;
335#endif
336
332#ifdef CONFIG_MODULE_UNLOAD 337#ifdef CONFIG_MODULE_UNLOAD
333 /* What modules depend on me? */ 338 /* What modules depend on me? */
334 struct list_head modules_which_use_me; 339 struct list_head modules_which_use_me;
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 7a0e5c4f8072..3069ec7e0ab8 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -50,8 +50,10 @@ struct mutex {
50 atomic_t count; 50 atomic_t count;
51 spinlock_t wait_lock; 51 spinlock_t wait_lock;
52 struct list_head wait_list; 52 struct list_head wait_list;
53#ifdef CONFIG_DEBUG_MUTEXES 53#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
54 struct thread_info *owner; 54 struct thread_info *owner;
55#endif
56#ifdef CONFIG_DEBUG_MUTEXES
55 const char *name; 57 const char *name;
56 void *magic; 58 void *magic;
57#endif 59#endif
@@ -68,7 +70,6 @@ struct mutex_waiter {
68 struct list_head list; 70 struct list_head list;
69 struct task_struct *task; 71 struct task_struct *task;
70#ifdef CONFIG_DEBUG_MUTEXES 72#ifdef CONFIG_DEBUG_MUTEXES
71 struct mutex *lock;
72 void *magic; 73 void *magic;
73#endif 74#endif
74}; 75};
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b3b359660082..b1a0068a5557 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -8,7 +8,7 @@ struct ring_buffer;
8struct ring_buffer_iter; 8struct ring_buffer_iter;
9 9
10/* 10/*
11 * Don't reference this struct directly, use functions below. 11 * Don't refer to this struct directly, use functions below.
12 */ 12 */
13struct ring_buffer_event { 13struct ring_buffer_event {
14 u32 type:2, len:3, time_delta:27; 14 u32 type:2, len:3, time_delta:27;
@@ -74,13 +74,10 @@ void ring_buffer_free(struct ring_buffer *buffer);
74 74
75int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 75int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
76 76
77struct ring_buffer_event * 77struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
78ring_buffer_lock_reserve(struct ring_buffer *buffer, 78 unsigned long length);
79 unsigned long length,
80 unsigned long *flags);
81int ring_buffer_unlock_commit(struct ring_buffer *buffer, 79int ring_buffer_unlock_commit(struct ring_buffer *buffer,
82 struct ring_buffer_event *event, 80 struct ring_buffer_event *event);
83 unsigned long flags);
84int ring_buffer_write(struct ring_buffer *buffer, 81int ring_buffer_write(struct ring_buffer *buffer,
85 unsigned long length, void *data); 82 unsigned long length, void *data);
86 83
@@ -124,14 +121,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
124u64 ring_buffer_time_stamp(int cpu); 121u64 ring_buffer_time_stamp(int cpu);
125void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); 122void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
126 123
127void tracing_on(void); 124size_t ring_buffer_page_len(void *page);
128void tracing_off(void); 125
129void tracing_off_permanent(void);
130 126
131void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); 127void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
132void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); 128void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
133int ring_buffer_read_page(struct ring_buffer *buffer, 129int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
134 void **data_page, int cpu, int full); 130 size_t len, int cpu, int full);
135 131
136enum ring_buffer_flags { 132enum ring_buffer_flags {
137 RB_FL_OVERWRITE = 1 << 0, 133 RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7c7698583bb..89cd308cc7a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_uninterruptible(void);
137extern unsigned long nr_active(void); 137extern unsigned long nr_active(void);
138extern unsigned long nr_iowait(void); 138extern unsigned long nr_iowait(void);
139 139
140extern unsigned long get_parent_ip(unsigned long addr);
141
140struct seq_file; 142struct seq_file;
141struct cfs_rq; 143struct cfs_rq;
142struct task_group; 144struct task_group;
@@ -331,7 +333,9 @@ extern signed long schedule_timeout(signed long timeout);
331extern signed long schedule_timeout_interruptible(signed long timeout); 333extern signed long schedule_timeout_interruptible(signed long timeout);
332extern signed long schedule_timeout_killable(signed long timeout); 334extern signed long schedule_timeout_killable(signed long timeout);
333extern signed long schedule_timeout_uninterruptible(signed long timeout); 335extern signed long schedule_timeout_uninterruptible(signed long timeout);
336asmlinkage void __schedule(void);
334asmlinkage void schedule(void); 337asmlinkage void schedule(void);
338extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
335 339
336struct nsproxy; 340struct nsproxy;
337struct user_namespace; 341struct user_namespace;
@@ -1327,6 +1331,7 @@ struct task_struct {
1327 int lockdep_depth; 1331 int lockdep_depth;
1328 unsigned int lockdep_recursion; 1332 unsigned int lockdep_recursion;
1329 struct held_lock held_locks[MAX_LOCK_DEPTH]; 1333 struct held_lock held_locks[MAX_LOCK_DEPTH];
1334 gfp_t lockdep_reclaim_gfp;
1330#endif 1335#endif
1331 1336
1332/* journalling filesystem info */ 1337/* journalling filesystem info */
@@ -1418,6 +1423,9 @@ struct task_struct {
1418#endif 1423#endif
1419}; 1424};
1420 1425
1426/* Future-safe accessor for struct task_struct's cpus_allowed. */
1427#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed)
1428
1421/* 1429/*
1422 * Priority of a process goes from 0..MAX_PRIO-1, valid RT 1430 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
1423 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH 1431 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -1669,6 +1677,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1669 return set_cpus_allowed_ptr(p, &new_mask); 1677 return set_cpus_allowed_ptr(p, &new_mask);
1670} 1678}
1671 1679
1680/*
1681 * Architectures can set this to 1 if they have specified
1682 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
1683 * but then during bootup it turns out that sched_clock()
1684 * is reliable after all:
1685 */
1686#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1687extern int sched_clock_stable;
1688#endif
1689
1672extern unsigned long long sched_clock(void); 1690extern unsigned long long sched_clock(void);
1673 1691
1674extern void sched_clock_init(void); 1692extern void sched_clock_init(void);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 6ca6a7b66d75..f4523651fa42 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <trace/kmemtrace.h>
17 18
18/* Size description struct for general caches. */ 19/* Size description struct for general caches. */
19struct cache_sizes { 20struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
28void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 29void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
29void *__kmalloc(size_t size, gfp_t flags); 30void *__kmalloc(size_t size, gfp_t flags);
30 31
31static inline void *kmalloc(size_t size, gfp_t flags) 32#ifdef CONFIG_KMEMTRACE
33extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
34extern size_t slab_buffer_size(struct kmem_cache *cachep);
35#else
36static __always_inline void *
37kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
32{ 38{
39 return kmem_cache_alloc(cachep, flags);
40}
41static inline size_t slab_buffer_size(struct kmem_cache *cachep)
42{
43 return 0;
44}
45#endif
46
47static __always_inline void *kmalloc(size_t size, gfp_t flags)
48{
49 struct kmem_cache *cachep;
50 void *ret;
51
33 if (__builtin_constant_p(size)) { 52 if (__builtin_constant_p(size)) {
34 int i = 0; 53 int i = 0;
35 54
@@ -47,10 +66,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
47found: 66found:
48#ifdef CONFIG_ZONE_DMA 67#ifdef CONFIG_ZONE_DMA
49 if (flags & GFP_DMA) 68 if (flags & GFP_DMA)
50 return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep, 69 cachep = malloc_sizes[i].cs_dmacachep;
51 flags); 70 else
52#endif 71#endif
53 return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags); 72 cachep = malloc_sizes[i].cs_cachep;
73
74 ret = kmem_cache_alloc_notrace(cachep, flags);
75
76 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
77 size, slab_buffer_size(cachep), flags);
78
79 return ret;
54 } 80 }
55 return __kmalloc(size, flags); 81 return __kmalloc(size, flags);
56} 82}
@@ -59,8 +85,25 @@ found:
59extern void *__kmalloc_node(size_t size, gfp_t flags, int node); 85extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
60extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 86extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
61 87
62static inline void *kmalloc_node(size_t size, gfp_t flags, int node) 88#ifdef CONFIG_KMEMTRACE
89extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
90 gfp_t flags,
91 int nodeid);
92#else
93static __always_inline void *
94kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
95 gfp_t flags,
96 int nodeid)
97{
98 return kmem_cache_alloc_node(cachep, flags, nodeid);
99}
100#endif
101
102static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
63{ 103{
104 struct kmem_cache *cachep;
105 void *ret;
106
64 if (__builtin_constant_p(size)) { 107 if (__builtin_constant_p(size)) {
65 int i = 0; 108 int i = 0;
66 109
@@ -78,11 +121,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
78found: 121found:
79#ifdef CONFIG_ZONE_DMA 122#ifdef CONFIG_ZONE_DMA
80 if (flags & GFP_DMA) 123 if (flags & GFP_DMA)
81 return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep, 124 cachep = malloc_sizes[i].cs_dmacachep;
82 flags, node); 125 else
83#endif 126#endif
84 return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep, 127 cachep = malloc_sizes[i].cs_cachep;
85 flags, node); 128
129 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
130
131 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
132 ret, size, slab_buffer_size(cachep),
133 flags, node);
134
135 return ret;
86 } 136 }
87 return __kmalloc_node(size, flags, node); 137 return __kmalloc_node(size, flags, node);
88} 138}
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa476ab9..0ec00b39d006 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
3 3
4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
5 5
6static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 6static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
7 gfp_t flags)
7{ 8{
8 return kmem_cache_alloc_node(cachep, flags, -1); 9 return kmem_cache_alloc_node(cachep, flags, -1);
9} 10}
10 11
11void *__kmalloc_node(size_t size, gfp_t flags, int node); 12void *__kmalloc_node(size_t size, gfp_t flags, int node);
12 13
13static inline void *kmalloc_node(size_t size, gfp_t flags, int node) 14static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
14{ 15{
15 return __kmalloc_node(size, flags, node); 16 return __kmalloc_node(size, flags, node);
16} 17}
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
23 * kmalloc is the normal method of allocating memory 24 * kmalloc is the normal method of allocating memory
24 * in the kernel. 25 * in the kernel.
25 */ 26 */
26static inline void *kmalloc(size_t size, gfp_t flags) 27static __always_inline void *kmalloc(size_t size, gfp_t flags)
27{ 28{
28 return __kmalloc_node(size, flags, -1); 29 return __kmalloc_node(size, flags, -1);
29} 30}
30 31
31static inline void *__kmalloc(size_t size, gfp_t flags) 32static __always_inline void *__kmalloc(size_t size, gfp_t flags)
32{ 33{
33 return kmalloc(size, flags); 34 return kmalloc(size, flags);
34} 35}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aacd..9e3a575b2c30 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <trace/kmemtrace.h>
13 14
14enum stat_item { 15enum stat_item {
15 ALLOC_FASTPATH, /* Allocation from cpu slab */ 16 ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -121,10 +122,23 @@ struct kmem_cache {
121#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) 122#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
122 123
123/* 124/*
125 * Maximum kmalloc object size handled by SLUB. Larger object allocations
126 * are passed through to the page allocator. The page allocator "fastpath"
127 * is relatively slow so we need this value sufficiently high so that
128 * performance critical objects are allocated through the SLUB fastpath.
129 *
130 * This should be dropped to PAGE_SIZE / 2 once the page allocator
131 * "fastpath" becomes competitive with the slab allocator fastpaths.
132 */
133#define SLUB_MAX_SIZE (PAGE_SIZE)
134
135#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 1)
136
137/*
124 * We keep the general caches in an array of slab caches that are used for 138 * We keep the general caches in an array of slab caches that are used for
125 * 2^x bytes of allocations. 139 * 2^x bytes of allocations.
126 */ 140 */
127extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; 141extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT];
128 142
129/* 143/*
130 * Sorry that the following has to be that ugly but some versions of GCC 144 * Sorry that the following has to be that ugly but some versions of GCC
@@ -204,15 +218,33 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
204void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 218void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
205void *__kmalloc(size_t size, gfp_t flags); 219void *__kmalloc(size_t size, gfp_t flags);
206 220
221#ifdef CONFIG_KMEMTRACE
222extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
223#else
224static __always_inline void *
225kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
226{
227 return kmem_cache_alloc(s, gfpflags);
228}
229#endif
230
207static __always_inline void *kmalloc_large(size_t size, gfp_t flags) 231static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
208{ 232{
209 return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); 233 unsigned int order = get_order(size);
234 void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
235
236 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
237 size, PAGE_SIZE << order, flags);
238
239 return ret;
210} 240}
211 241
212static __always_inline void *kmalloc(size_t size, gfp_t flags) 242static __always_inline void *kmalloc(size_t size, gfp_t flags)
213{ 243{
244 void *ret;
245
214 if (__builtin_constant_p(size)) { 246 if (__builtin_constant_p(size)) {
215 if (size > PAGE_SIZE) 247 if (size > SLUB_MAX_SIZE)
216 return kmalloc_large(size, flags); 248 return kmalloc_large(size, flags);
217 249
218 if (!(flags & SLUB_DMA)) { 250 if (!(flags & SLUB_DMA)) {
@@ -221,7 +253,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
221 if (!s) 253 if (!s)
222 return ZERO_SIZE_PTR; 254 return ZERO_SIZE_PTR;
223 255
224 return kmem_cache_alloc(s, flags); 256 ret = kmem_cache_alloc_notrace(s, flags);
257
258 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
259 _THIS_IP_, ret,
260 size, s->size, flags);
261
262 return ret;
225 } 263 }
226 } 264 }
227 return __kmalloc(size, flags); 265 return __kmalloc(size, flags);
@@ -231,16 +269,38 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
231void *__kmalloc_node(size_t size, gfp_t flags, int node); 269void *__kmalloc_node(size_t size, gfp_t flags, int node);
232void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 270void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
233 271
272#ifdef CONFIG_KMEMTRACE
273extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
274 gfp_t gfpflags,
275 int node);
276#else
277static __always_inline void *
278kmem_cache_alloc_node_notrace(struct kmem_cache *s,
279 gfp_t gfpflags,
280 int node)
281{
282 return kmem_cache_alloc_node(s, gfpflags, node);
283}
284#endif
285
234static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 286static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
235{ 287{
288 void *ret;
289
236 if (__builtin_constant_p(size) && 290 if (__builtin_constant_p(size) &&
237 size <= PAGE_SIZE && !(flags & SLUB_DMA)) { 291 size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
238 struct kmem_cache *s = kmalloc_slab(size); 292 struct kmem_cache *s = kmalloc_slab(size);
239 293
240 if (!s) 294 if (!s)
241 return ZERO_SIZE_PTR; 295 return ZERO_SIZE_PTR;
242 296
243 return kmem_cache_alloc_node(s, flags, node); 297 ret = kmem_cache_alloc_node_notrace(s, flags, node);
298
299 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
300 _THIS_IP_, ret,
301 size, s->size, flags, node);
302
303 return ret;
244 } 304 }
245 return __kmalloc_node(size, flags, node); 305 return __kmalloc_node(size, flags, node);
246} 306}
diff --git a/include/linux/string.h b/include/linux/string.h
index d18fc198aa2f..27ac31784ad2 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -10,6 +10,7 @@
10#include <linux/compiler.h> /* for inline */ 10#include <linux/compiler.h> /* for inline */
11#include <linux/types.h> /* for size_t */ 11#include <linux/types.h> /* for size_t */
12#include <linux/stddef.h> /* for NULL */ 12#include <linux/stddef.h> /* for NULL */
13#include <stdarg.h>
13 14
14extern char *strndup_user(const char __user *, long); 15extern char *strndup_user(const char __user *, long);
15 16
@@ -111,6 +112,12 @@ extern void argv_free(char **argv);
111 112
112extern bool sysfs_streq(const char *s1, const char *s2); 113extern bool sysfs_streq(const char *s1, const char *s2);
113 114
115#ifdef CONFIG_BINARY_PRINTF
116int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
117int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
118int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
119#endif
120
114extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 121extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
115 const void *from, size_t available); 122 const void *from, size_t available);
116 123
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f9f900cfd066..0cff9bb80b02 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,6 +65,7 @@ struct old_linux_dirent;
65#include <asm/signal.h> 65#include <asm/signal.h>
66#include <linux/quota.h> 66#include <linux/quota.h>
67#include <linux/key.h> 67#include <linux/key.h>
68#include <linux/ftrace.h>
68 69
69#define __SC_DECL1(t1, a1) t1 a1 70#define __SC_DECL1(t1, a1) t1 a1
70#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) 71#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
@@ -95,7 +96,46 @@ struct old_linux_dirent;
95#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) 96#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
96#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) 97#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
97 98
99#ifdef CONFIG_FTRACE_SYSCALLS
100#define __SC_STR_ADECL1(t, a) #a
101#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__)
102#define __SC_STR_ADECL3(t, a, ...) #a, __SC_STR_ADECL2(__VA_ARGS__)
103#define __SC_STR_ADECL4(t, a, ...) #a, __SC_STR_ADECL3(__VA_ARGS__)
104#define __SC_STR_ADECL5(t, a, ...) #a, __SC_STR_ADECL4(__VA_ARGS__)
105#define __SC_STR_ADECL6(t, a, ...) #a, __SC_STR_ADECL5(__VA_ARGS__)
106
107#define __SC_STR_TDECL1(t, a) #t
108#define __SC_STR_TDECL2(t, a, ...) #t, __SC_STR_TDECL1(__VA_ARGS__)
109#define __SC_STR_TDECL3(t, a, ...) #t, __SC_STR_TDECL2(__VA_ARGS__)
110#define __SC_STR_TDECL4(t, a, ...) #t, __SC_STR_TDECL3(__VA_ARGS__)
111#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__)
112#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__)
113
114#define SYSCALL_METADATA(sname, nb) \
115 static const struct syscall_metadata __used \
116 __attribute__((__aligned__(4))) \
117 __attribute__((section("__syscalls_metadata"))) \
118 __syscall_meta_##sname = { \
119 .name = "sys"#sname, \
120 .nb_args = nb, \
121 .types = types_##sname, \
122 .args = args_##sname, \
123 }
124
125#define SYSCALL_DEFINE0(sname) \
126 static const struct syscall_metadata __used \
127 __attribute__((__aligned__(4))) \
128 __attribute__((section("__syscalls_metadata"))) \
129 __syscall_meta_##sname = { \
130 .name = "sys_"#sname, \
131 .nb_args = 0, \
132 }; \
133 asmlinkage long sys_##sname(void)
134
135#else
98#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) 136#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void)
137#endif
138
99#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) 139#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
100#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) 140#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
101#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) 141#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
@@ -117,10 +157,26 @@ struct old_linux_dirent;
117#endif 157#endif
118#endif 158#endif
119 159
160#ifdef CONFIG_FTRACE_SYSCALLS
161#define SYSCALL_DEFINEx(x, sname, ...) \
162 static const char *types_##sname[] = { \
163 __SC_STR_TDECL##x(__VA_ARGS__) \
164 }; \
165 static const char *args_##sname[] = { \
166 __SC_STR_ADECL##x(__VA_ARGS__) \
167 }; \
168 SYSCALL_METADATA(sname, x); \
169 __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
170#else
171#define SYSCALL_DEFINEx(x, sname, ...) \
172 __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
173#endif
174
120#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 175#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
121 176
122#define SYSCALL_DEFINE(name) static inline long SYSC_##name 177#define SYSCALL_DEFINE(name) static inline long SYSC_##name
123#define SYSCALL_DEFINEx(x, name, ...) \ 178
179#define __SYSCALL_DEFINEx(x, name, ...) \
124 asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ 180 asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \
125 static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ 181 static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \
126 asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ 182 asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \
@@ -134,7 +190,7 @@ struct old_linux_dirent;
134#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ 190#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
135 191
136#define SYSCALL_DEFINE(name) asmlinkage long sys_##name 192#define SYSCALL_DEFINE(name) asmlinkage long sys_##name
137#define SYSCALL_DEFINEx(x, name, ...) \ 193#define __SYSCALL_DEFINEx(x, name, ...) \
138 asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) 194 asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
139 195
140#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ 196#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index daf9685b861c..51774eb87cc6 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -5,6 +5,7 @@
5#include <linux/ktime.h> 5#include <linux/ktime.h>
6#include <linux/stddef.h> 6#include <linux/stddef.h>
7#include <linux/debugobjects.h> 7#include <linux/debugobjects.h>
8#include <linux/stringify.h>
8 9
9struct tvec_base; 10struct tvec_base;
10 11
@@ -21,52 +22,126 @@ struct timer_list {
21 char start_comm[16]; 22 char start_comm[16];
22 int start_pid; 23 int start_pid;
23#endif 24#endif
25#ifdef CONFIG_LOCKDEP
26 struct lockdep_map lockdep_map;
27#endif
24}; 28};
25 29
26extern struct tvec_base boot_tvec_bases; 30extern struct tvec_base boot_tvec_bases;
27 31
32#ifdef CONFIG_LOCKDEP
33/*
34 * NB: because we have to copy the lockdep_map, setting the lockdep_map key
35 * (second argument) here is required, otherwise it could be initialised to
36 * the copy of the lockdep_map later! We use the pointer to and the string
37 * "<file>:<line>" as the key resp. the name of the lockdep_map.
38 */
39#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) \
40 .lockdep_map = STATIC_LOCKDEP_MAP_INIT(_kn, &_kn),
41#else
42#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
43#endif
44
28#define TIMER_INITIALIZER(_function, _expires, _data) { \ 45#define TIMER_INITIALIZER(_function, _expires, _data) { \
29 .entry = { .prev = TIMER_ENTRY_STATIC }, \ 46 .entry = { .prev = TIMER_ENTRY_STATIC }, \
30 .function = (_function), \ 47 .function = (_function), \
31 .expires = (_expires), \ 48 .expires = (_expires), \
32 .data = (_data), \ 49 .data = (_data), \
33 .base = &boot_tvec_bases, \ 50 .base = &boot_tvec_bases, \
51 __TIMER_LOCKDEP_MAP_INITIALIZER( \
52 __FILE__ ":" __stringify(__LINE__)) \
34 } 53 }
35 54
36#define DEFINE_TIMER(_name, _function, _expires, _data) \ 55#define DEFINE_TIMER(_name, _function, _expires, _data) \
37 struct timer_list _name = \ 56 struct timer_list _name = \
38 TIMER_INITIALIZER(_function, _expires, _data) 57 TIMER_INITIALIZER(_function, _expires, _data)
39 58
40void init_timer(struct timer_list *timer); 59void init_timer_key(struct timer_list *timer,
41void init_timer_deferrable(struct timer_list *timer); 60 const char *name,
61 struct lock_class_key *key);
62void init_timer_deferrable_key(struct timer_list *timer,
63 const char *name,
64 struct lock_class_key *key);
65
66#ifdef CONFIG_LOCKDEP
67#define init_timer(timer) \
68 do { \
69 static struct lock_class_key __key; \
70 init_timer_key((timer), #timer, &__key); \
71 } while (0)
72
73#define init_timer_deferrable(timer) \
74 do { \
75 static struct lock_class_key __key; \
76 init_timer_deferrable_key((timer), #timer, &__key); \
77 } while (0)
78
79#define init_timer_on_stack(timer) \
80 do { \
81 static struct lock_class_key __key; \
82 init_timer_on_stack_key((timer), #timer, &__key); \
83 } while (0)
84
85#define setup_timer(timer, fn, data) \
86 do { \
87 static struct lock_class_key __key; \
88 setup_timer_key((timer), #timer, &__key, (fn), (data));\
89 } while (0)
90
91#define setup_timer_on_stack(timer, fn, data) \
92 do { \
93 static struct lock_class_key __key; \
94 setup_timer_on_stack_key((timer), #timer, &__key, \
95 (fn), (data)); \
96 } while (0)
97#else
98#define init_timer(timer)\
99 init_timer_key((timer), NULL, NULL)
100#define init_timer_deferrable(timer)\
101 init_timer_deferrable_key((timer), NULL, NULL)
102#define init_timer_on_stack(timer)\
103 init_timer_on_stack_key((timer), NULL, NULL)
104#define setup_timer(timer, fn, data)\
105 setup_timer_key((timer), NULL, NULL, (fn), (data))
106#define setup_timer_on_stack(timer, fn, data)\
107 setup_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
108#endif
42 109
43#ifdef CONFIG_DEBUG_OBJECTS_TIMERS 110#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
44extern void init_timer_on_stack(struct timer_list *timer); 111extern void init_timer_on_stack_key(struct timer_list *timer,
112 const char *name,
113 struct lock_class_key *key);
45extern void destroy_timer_on_stack(struct timer_list *timer); 114extern void destroy_timer_on_stack(struct timer_list *timer);
46#else 115#else
47static inline void destroy_timer_on_stack(struct timer_list *timer) { } 116static inline void destroy_timer_on_stack(struct timer_list *timer) { }
48static inline void init_timer_on_stack(struct timer_list *timer) 117static inline void init_timer_on_stack_key(struct timer_list *timer,
118 const char *name,
119 struct lock_class_key *key)
49{ 120{
50 init_timer(timer); 121 init_timer_key(timer, name, key);
51} 122}
52#endif 123#endif
53 124
54static inline void setup_timer(struct timer_list * timer, 125static inline void setup_timer_key(struct timer_list * timer,
126 const char *name,
127 struct lock_class_key *key,
55 void (*function)(unsigned long), 128 void (*function)(unsigned long),
56 unsigned long data) 129 unsigned long data)
57{ 130{
58 timer->function = function; 131 timer->function = function;
59 timer->data = data; 132 timer->data = data;
60 init_timer(timer); 133 init_timer_key(timer, name, key);
61} 134}
62 135
63static inline void setup_timer_on_stack(struct timer_list *timer, 136static inline void setup_timer_on_stack_key(struct timer_list *timer,
137 const char *name,
138 struct lock_class_key *key,
64 void (*function)(unsigned long), 139 void (*function)(unsigned long),
65 unsigned long data) 140 unsigned long data)
66{ 141{
67 timer->function = function; 142 timer->function = function;
68 timer->data = data; 143 timer->data = data;
69 init_timer_on_stack(timer); 144 init_timer_on_stack_key(timer, name, key);
70} 145}
71 146
72/** 147/**
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
new file mode 100644
index 000000000000..7a8130384087
--- /dev/null
+++ b/include/linux/trace_clock.h
@@ -0,0 +1,19 @@
1#ifndef _LINUX_TRACE_CLOCK_H
2#define _LINUX_TRACE_CLOCK_H
3
4/*
5 * 3 trace clock variants, with differing scalability/precision
6 * tradeoffs:
7 *
8 * - local: CPU-local trace clock
9 * - medium: scalable global clock with some jitter
10 * - global: globally monotonic, serialized clock
11 */
12#include <linux/compiler.h>
13#include <linux/types.h>
14
15extern u64 notrace trace_clock_local(void);
16extern u64 notrace trace_clock(void);
17extern u64 notrace trace_clock_global(void);
18
19#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 757005458366..d35a7ee7611f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,8 +31,8 @@ struct tracepoint {
31 * Keep in sync with vmlinux.lds.h. 31 * Keep in sync with vmlinux.lds.h.
32 */ 32 */
33 33
34#define TPPROTO(args...) args 34#define TP_PROTO(args...) args
35#define TPARGS(args...) args 35#define TP_ARGS(args...) args
36 36
37#ifdef CONFIG_TRACEPOINTS 37#ifdef CONFIG_TRACEPOINTS
38 38
@@ -65,7 +65,7 @@ struct tracepoint {
65 { \ 65 { \
66 if (unlikely(__tracepoint_##name.state)) \ 66 if (unlikely(__tracepoint_##name.state)) \
67 __DO_TRACE(&__tracepoint_##name, \ 67 __DO_TRACE(&__tracepoint_##name, \
68 TPPROTO(proto), TPARGS(args)); \ 68 TP_PROTO(proto), TP_ARGS(args)); \
69 } \ 69 } \
70 static inline int register_trace_##name(void (*probe)(proto)) \ 70 static inline int register_trace_##name(void (*probe)(proto)) \
71 { \ 71 { \
@@ -153,4 +153,114 @@ static inline void tracepoint_synchronize_unregister(void)
153 synchronize_sched(); 153 synchronize_sched();
154} 154}
155 155
156#define PARAMS(args...) args
157#define TRACE_FORMAT(name, proto, args, fmt) \
158 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
159
160
161/*
162 * For use with the TRACE_EVENT macro:
163 *
164 * We define a tracepoint, its arguments, its printk format
165 * and its 'fast binay record' layout.
166 *
167 * Firstly, name your tracepoint via TRACE_EVENT(name : the
168 * 'subsystem_event' notation is fine.
169 *
170 * Think about this whole construct as the
171 * 'trace_sched_switch() function' from now on.
172 *
173 *
174 * TRACE_EVENT(sched_switch,
175 *
176 * *
177 * * A function has a regular function arguments
178 * * prototype, declare it via TP_PROTO():
179 * *
180 *
181 * TP_PROTO(struct rq *rq, struct task_struct *prev,
182 * struct task_struct *next),
183 *
184 * *
185 * * Define the call signature of the 'function'.
186 * * (Design sidenote: we use this instead of a
187 * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
188 * *
189 *
190 * TP_ARGS(rq, prev, next),
191 *
192 * *
193 * * Fast binary tracing: define the trace record via
194 * * TP_STRUCT__entry(). You can think about it like a
195 * * regular C structure local variable definition.
196 * *
197 * * This is how the trace record is structured and will
198 * * be saved into the ring buffer. These are the fields
199 * * that will be exposed to user-space in
200 * * /debug/tracing/events/<*>/format.
201 * *
202 * * The declared 'local variable' is called '__entry'
203 * *
204 * * __field(pid_t, prev_prid) is equivalent to a standard declariton:
205 * *
206 * * pid_t prev_pid;
207 * *
208 * * __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
209 * *
210 * * char prev_comm[TASK_COMM_LEN];
211 * *
212 *
213 * TP_STRUCT__entry(
214 * __array( char, prev_comm, TASK_COMM_LEN )
215 * __field( pid_t, prev_pid )
216 * __field( int, prev_prio )
217 * __array( char, next_comm, TASK_COMM_LEN )
218 * __field( pid_t, next_pid )
219 * __field( int, next_prio )
220 * ),
221 *
222 * *
223 * * Assign the entry into the trace record, by embedding
224 * * a full C statement block into TP_fast_assign(). You
225 * * can refer to the trace record as '__entry' -
226 * * otherwise you can put arbitrary C code in here.
227 * *
228 * * Note: this C code will execute every time a trace event
229 * * happens, on an active tracepoint.
230 * *
231 *
232 * TP_fast_assign(
233 * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
234 * __entry->prev_pid = prev->pid;
235 * __entry->prev_prio = prev->prio;
236 * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
237 * __entry->next_pid = next->pid;
238 * __entry->next_prio = next->prio;
239 * )
240 *
241 * *
242 * * Formatted output of a trace record via TP_printk().
243 * * This is how the tracepoint will appear under ftrace
244 * * plugins that make use of this tracepoint.
245 * *
246 * * (raw-binary tracing wont actually perform this step.)
247 * *
248 *
249 * TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
250 * __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
251 * __entry->next_comm, __entry->next_pid, __entry->next_prio),
252 *
253 * );
254 *
255 * This macro construct is thus used for the regular printk format
256 * tracing setup, it is used to construct a function pointer based
257 * tracepoint callback (this is used by programmatic plugins and
258 * can also by used by generic instrumentation like SystemTap), and
259 * it is also used to expose a structured trace record in
260 * /debug/tracing/events/.
261 */
262
263#define TRACE_EVENT(name, proto, args, struct, assign, print) \
264 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
265
156#endif 266#endif
diff --git a/include/trace/block.h b/include/trace/block.h
index 25c6a1fd5b77..25b7068b819e 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -5,72 +5,72 @@
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DECLARE_TRACE(block_rq_abort, 7DECLARE_TRACE(block_rq_abort,
8 TPPROTO(struct request_queue *q, struct request *rq), 8 TP_PROTO(struct request_queue *q, struct request *rq),
9 TPARGS(q, rq)); 9 TP_ARGS(q, rq));
10 10
11DECLARE_TRACE(block_rq_insert, 11DECLARE_TRACE(block_rq_insert,
12 TPPROTO(struct request_queue *q, struct request *rq), 12 TP_PROTO(struct request_queue *q, struct request *rq),
13 TPARGS(q, rq)); 13 TP_ARGS(q, rq));
14 14
15DECLARE_TRACE(block_rq_issue, 15DECLARE_TRACE(block_rq_issue,
16 TPPROTO(struct request_queue *q, struct request *rq), 16 TP_PROTO(struct request_queue *q, struct request *rq),
17 TPARGS(q, rq)); 17 TP_ARGS(q, rq));
18 18
19DECLARE_TRACE(block_rq_requeue, 19DECLARE_TRACE(block_rq_requeue,
20 TPPROTO(struct request_queue *q, struct request *rq), 20 TP_PROTO(struct request_queue *q, struct request *rq),
21 TPARGS(q, rq)); 21 TP_ARGS(q, rq));
22 22
23DECLARE_TRACE(block_rq_complete, 23DECLARE_TRACE(block_rq_complete,
24 TPPROTO(struct request_queue *q, struct request *rq), 24 TP_PROTO(struct request_queue *q, struct request *rq),
25 TPARGS(q, rq)); 25 TP_ARGS(q, rq));
26 26
27DECLARE_TRACE(block_bio_bounce, 27DECLARE_TRACE(block_bio_bounce,
28 TPPROTO(struct request_queue *q, struct bio *bio), 28 TP_PROTO(struct request_queue *q, struct bio *bio),
29 TPARGS(q, bio)); 29 TP_ARGS(q, bio));
30 30
31DECLARE_TRACE(block_bio_complete, 31DECLARE_TRACE(block_bio_complete,
32 TPPROTO(struct request_queue *q, struct bio *bio), 32 TP_PROTO(struct request_queue *q, struct bio *bio),
33 TPARGS(q, bio)); 33 TP_ARGS(q, bio));
34 34
35DECLARE_TRACE(block_bio_backmerge, 35DECLARE_TRACE(block_bio_backmerge,
36 TPPROTO(struct request_queue *q, struct bio *bio), 36 TP_PROTO(struct request_queue *q, struct bio *bio),
37 TPARGS(q, bio)); 37 TP_ARGS(q, bio));
38 38
39DECLARE_TRACE(block_bio_frontmerge, 39DECLARE_TRACE(block_bio_frontmerge,
40 TPPROTO(struct request_queue *q, struct bio *bio), 40 TP_PROTO(struct request_queue *q, struct bio *bio),
41 TPARGS(q, bio)); 41 TP_ARGS(q, bio));
42 42
43DECLARE_TRACE(block_bio_queue, 43DECLARE_TRACE(block_bio_queue,
44 TPPROTO(struct request_queue *q, struct bio *bio), 44 TP_PROTO(struct request_queue *q, struct bio *bio),
45 TPARGS(q, bio)); 45 TP_ARGS(q, bio));
46 46
47DECLARE_TRACE(block_getrq, 47DECLARE_TRACE(block_getrq,
48 TPPROTO(struct request_queue *q, struct bio *bio, int rw), 48 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
49 TPARGS(q, bio, rw)); 49 TP_ARGS(q, bio, rw));
50 50
51DECLARE_TRACE(block_sleeprq, 51DECLARE_TRACE(block_sleeprq,
52 TPPROTO(struct request_queue *q, struct bio *bio, int rw), 52 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
53 TPARGS(q, bio, rw)); 53 TP_ARGS(q, bio, rw));
54 54
55DECLARE_TRACE(block_plug, 55DECLARE_TRACE(block_plug,
56 TPPROTO(struct request_queue *q), 56 TP_PROTO(struct request_queue *q),
57 TPARGS(q)); 57 TP_ARGS(q));
58 58
59DECLARE_TRACE(block_unplug_timer, 59DECLARE_TRACE(block_unplug_timer,
60 TPPROTO(struct request_queue *q), 60 TP_PROTO(struct request_queue *q),
61 TPARGS(q)); 61 TP_ARGS(q));
62 62
63DECLARE_TRACE(block_unplug_io, 63DECLARE_TRACE(block_unplug_io,
64 TPPROTO(struct request_queue *q), 64 TP_PROTO(struct request_queue *q),
65 TPARGS(q)); 65 TP_ARGS(q));
66 66
67DECLARE_TRACE(block_split, 67DECLARE_TRACE(block_split,
68 TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), 68 TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
69 TPARGS(q, bio, pdu)); 69 TP_ARGS(q, bio, pdu));
70 70
71DECLARE_TRACE(block_remap, 71DECLARE_TRACE(block_remap,
72 TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, 72 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
73 sector_t from, sector_t to), 73 sector_t from, sector_t to),
74 TPARGS(q, bio, dev, from, to)); 74 TP_ARGS(q, bio, dev, from, to));
75 75
76#endif 76#endif
diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 000000000000..ff5d4495dc37
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,9 @@
1#ifndef _TRACE_IRQ_H
2#define _TRACE_IRQ_H
3
4#include <linux/interrupt.h>
5#include <linux/tracepoint.h>
6
7#include <trace/irq_event_types.h>
8
9#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
new file mode 100644
index 000000000000..214bb928fe9e
--- /dev/null
+++ b/include/trace/irq_event_types.h
@@ -0,0 +1,43 @@
1
2/* use <trace/irq.h> instead */
3#ifndef TRACE_FORMAT
4# error Do not include this file directly.
5# error Unless you know what you are doing.
6#endif
7
8#undef TRACE_SYSTEM
9#define TRACE_SYSTEM irq
10
11/*
12 * Tracepoint for entry of interrupt handler:
13 */
14TRACE_FORMAT(irq_handler_entry,
15 TP_PROTO(int irq, struct irqaction *action),
16 TP_ARGS(irq, action),
17 TP_FMT("irq=%d handler=%s", irq, action->name)
18 );
19
20/*
21 * Tracepoint for return of an interrupt handler:
22 */
23TRACE_EVENT(irq_handler_exit,
24
25 TP_PROTO(int irq, struct irqaction *action, int ret),
26
27 TP_ARGS(irq, action, ret),
28
29 TP_STRUCT__entry(
30 __field( int, irq )
31 __field( int, ret )
32 ),
33
34 TP_fast_assign(
35 __entry->irq = irq;
36 __entry->ret = ret;
37 ),
38
39 TP_printk("irq=%d return=%s",
40 __entry->irq, __entry->ret ? "handled" : "unhandled")
41);
42
43#undef TRACE_SYSTEM
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644
index 000000000000..ad8b7857855a
--- /dev/null
+++ b/include/trace/kmemtrace.h
@@ -0,0 +1,75 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <linux/types.h>
13#include <linux/marker.h>
14
15enum kmemtrace_type_id {
16 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
17 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
18 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
19};
20
21#ifdef CONFIG_KMEMTRACE
22
23extern void kmemtrace_init(void);
24
25extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
26 unsigned long call_site,
27 const void *ptr,
28 size_t bytes_req,
29 size_t bytes_alloc,
30 gfp_t gfp_flags,
31 int node);
32
33extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
34 unsigned long call_site,
35 const void *ptr);
36
37#else /* CONFIG_KMEMTRACE */
38
39static inline void kmemtrace_init(void)
40{
41}
42
43static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
44 unsigned long call_site,
45 const void *ptr,
46 size_t bytes_req,
47 size_t bytes_alloc,
48 gfp_t gfp_flags,
49 int node)
50{
51}
52
53static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
54 unsigned long call_site,
55 const void *ptr)
56{
57}
58
59#endif /* CONFIG_KMEMTRACE */
60
61static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
62 unsigned long call_site,
63 const void *ptr,
64 size_t bytes_req,
65 size_t bytes_alloc,
66 gfp_t gfp_flags)
67{
68 kmemtrace_mark_alloc_node(type_id, call_site, ptr,
69 bytes_req, bytes_alloc, gfp_flags, -1);
70}
71
72#endif /* __KERNEL__ */
73
74#endif /* _LINUX_KMEMTRACE_H */
75
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
new file mode 100644
index 000000000000..5ca67df87f2a
--- /dev/null
+++ b/include/trace/lockdep.h
@@ -0,0 +1,9 @@
1#ifndef _TRACE_LOCKDEP_H
2#define _TRACE_LOCKDEP_H
3
4#include <linux/lockdep.h>
5#include <linux/tracepoint.h>
6
7#include <trace/lockdep_event_types.h>
8
9#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
new file mode 100644
index 000000000000..adccfcd2ec8f
--- /dev/null
+++ b/include/trace/lockdep_event_types.h
@@ -0,0 +1,44 @@
1
2#ifndef TRACE_FORMAT
3# error Do not include this file directly.
4# error Unless you know what you are doing.
5#endif
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM lock
9
10#ifdef CONFIG_LOCKDEP
11
12TRACE_FORMAT(lock_acquire,
13 TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
14 int trylock, int read, int check,
15 struct lockdep_map *next_lock, unsigned long ip),
16 TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
17 TP_FMT("%s%s%s", trylock ? "try " : "",
18 read ? "read " : "", lock->name)
19 );
20
21TRACE_FORMAT(lock_release,
22 TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
23 TP_ARGS(lock, nested, ip),
24 TP_FMT("%s", lock->name)
25 );
26
27#ifdef CONFIG_LOCK_STAT
28
29TRACE_FORMAT(lock_contended,
30 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
31 TP_ARGS(lock, ip),
32 TP_FMT("%s", lock->name)
33 );
34
35TRACE_FORMAT(lock_acquired,
36 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
37 TP_ARGS(lock, ip),
38 TP_FMT("%s", lock->name)
39 );
40
41#endif
42#endif
43
44#undef TRACE_SYSTEM
diff --git a/include/trace/power.h b/include/trace/power.h
new file mode 100644
index 000000000000..ef204666e983
--- /dev/null
+++ b/include/trace/power.h
@@ -0,0 +1,32 @@
1#ifndef _TRACE_POWER_H
2#define _TRACE_POWER_H
3
4#include <linux/ktime.h>
5#include <linux/tracepoint.h>
6
7enum {
8 POWER_NONE = 0,
9 POWER_CSTATE = 1,
10 POWER_PSTATE = 2,
11};
12
13struct power_trace {
14 ktime_t stamp;
15 ktime_t end;
16 int type;
17 int state;
18};
19
20DECLARE_TRACE(power_start,
21 TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state),
22 TP_ARGS(it, type, state));
23
24DECLARE_TRACE(power_mark,
25 TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state),
26 TP_ARGS(it, type, state));
27
28DECLARE_TRACE(power_end,
29 TP_PROTO(struct power_trace *it),
30 TP_ARGS(it));
31
32#endif /* _TRACE_POWER_H */
diff --git a/include/trace/sched.h b/include/trace/sched.h
index 0d81098ee9fc..4e372a1a29bf 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -4,53 +4,6 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DECLARE_TRACE(sched_kthread_stop, 7#include <trace/sched_event_types.h>
8 TPPROTO(struct task_struct *t),
9 TPARGS(t));
10
11DECLARE_TRACE(sched_kthread_stop_ret,
12 TPPROTO(int ret),
13 TPARGS(ret));
14
15DECLARE_TRACE(sched_wait_task,
16 TPPROTO(struct rq *rq, struct task_struct *p),
17 TPARGS(rq, p));
18
19DECLARE_TRACE(sched_wakeup,
20 TPPROTO(struct rq *rq, struct task_struct *p, int success),
21 TPARGS(rq, p, success));
22
23DECLARE_TRACE(sched_wakeup_new,
24 TPPROTO(struct rq *rq, struct task_struct *p, int success),
25 TPARGS(rq, p, success));
26
27DECLARE_TRACE(sched_switch,
28 TPPROTO(struct rq *rq, struct task_struct *prev,
29 struct task_struct *next),
30 TPARGS(rq, prev, next));
31
32DECLARE_TRACE(sched_migrate_task,
33 TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
34 TPARGS(p, orig_cpu, dest_cpu));
35
36DECLARE_TRACE(sched_process_free,
37 TPPROTO(struct task_struct *p),
38 TPARGS(p));
39
40DECLARE_TRACE(sched_process_exit,
41 TPPROTO(struct task_struct *p),
42 TPARGS(p));
43
44DECLARE_TRACE(sched_process_wait,
45 TPPROTO(struct pid *pid),
46 TPARGS(pid));
47
48DECLARE_TRACE(sched_process_fork,
49 TPPROTO(struct task_struct *parent, struct task_struct *child),
50 TPARGS(parent, child));
51
52DECLARE_TRACE(sched_signal_send,
53 TPPROTO(int sig, struct task_struct *p),
54 TPARGS(sig, p));
55 8
56#endif 9#endif
diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h
new file mode 100644
index 000000000000..63547dc1125f
--- /dev/null
+++ b/include/trace/sched_event_types.h
@@ -0,0 +1,337 @@
1
2/* use <trace/sched.h> instead */
3#ifndef TRACE_EVENT
4# error Do not include this file directly.
5# error Unless you know what you are doing.
6#endif
7
8#undef TRACE_SYSTEM
9#define TRACE_SYSTEM sched
10
11/*
12 * Tracepoint for calling kthread_stop, performed to end a kthread:
13 */
14TRACE_EVENT(sched_kthread_stop,
15
16 TP_PROTO(struct task_struct *t),
17
18 TP_ARGS(t),
19
20 TP_STRUCT__entry(
21 __array( char, comm, TASK_COMM_LEN )
22 __field( pid_t, pid )
23 ),
24
25 TP_fast_assign(
26 memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
27 __entry->pid = t->pid;
28 ),
29
30 TP_printk("task %s:%d", __entry->comm, __entry->pid)
31);
32
33/*
34 * Tracepoint for the return value of the kthread stopping:
35 */
36TRACE_EVENT(sched_kthread_stop_ret,
37
38 TP_PROTO(int ret),
39
40 TP_ARGS(ret),
41
42 TP_STRUCT__entry(
43 __field( int, ret )
44 ),
45
46 TP_fast_assign(
47 __entry->ret = ret;
48 ),
49
50 TP_printk("ret %d", __entry->ret)
51);
52
53/*
54 * Tracepoint for waiting on task to unschedule:
55 *
56 * (NOTE: the 'rq' argument is not used by generic trace events,
57 * but used by the latency tracer plugin. )
58 */
59TRACE_EVENT(sched_wait_task,
60
61 TP_PROTO(struct rq *rq, struct task_struct *p),
62
63 TP_ARGS(rq, p),
64
65 TP_STRUCT__entry(
66 __array( char, comm, TASK_COMM_LEN )
67 __field( pid_t, pid )
68 __field( int, prio )
69 ),
70
71 TP_fast_assign(
72 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
73 __entry->pid = p->pid;
74 __entry->prio = p->prio;
75 ),
76
77 TP_printk("task %s:%d [%d]",
78 __entry->comm, __entry->pid, __entry->prio)
79);
80
81/*
82 * Tracepoint for waking up a task:
83 *
84 * (NOTE: the 'rq' argument is not used by generic trace events,
85 * but used by the latency tracer plugin. )
86 */
87TRACE_EVENT(sched_wakeup,
88
89 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
90
91 TP_ARGS(rq, p, success),
92
93 TP_STRUCT__entry(
94 __array( char, comm, TASK_COMM_LEN )
95 __field( pid_t, pid )
96 __field( int, prio )
97 __field( int, success )
98 ),
99
100 TP_fast_assign(
101 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
102 __entry->pid = p->pid;
103 __entry->prio = p->prio;
104 __entry->success = success;
105 ),
106
107 TP_printk("task %s:%d [%d] success=%d",
108 __entry->comm, __entry->pid, __entry->prio,
109 __entry->success)
110);
111
112/*
113 * Tracepoint for waking up a new task:
114 *
115 * (NOTE: the 'rq' argument is not used by generic trace events,
116 * but used by the latency tracer plugin. )
117 */
118TRACE_EVENT(sched_wakeup_new,
119
120 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
121
122 TP_ARGS(rq, p, success),
123
124 TP_STRUCT__entry(
125 __array( char, comm, TASK_COMM_LEN )
126 __field( pid_t, pid )
127 __field( int, prio )
128 __field( int, success )
129 ),
130
131 TP_fast_assign(
132 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
133 __entry->pid = p->pid;
134 __entry->prio = p->prio;
135 __entry->success = success;
136 ),
137
138 TP_printk("task %s:%d [%d] success=%d",
139 __entry->comm, __entry->pid, __entry->prio,
140 __entry->success)
141);
142
143/*
144 * Tracepoint for task switches, performed by the scheduler:
145 *
146 * (NOTE: the 'rq' argument is not used by generic trace events,
147 * but used by the latency tracer plugin. )
148 */
149TRACE_EVENT(sched_switch,
150
151 TP_PROTO(struct rq *rq, struct task_struct *prev,
152 struct task_struct *next),
153
154 TP_ARGS(rq, prev, next),
155
156 TP_STRUCT__entry(
157 __array( char, prev_comm, TASK_COMM_LEN )
158 __field( pid_t, prev_pid )
159 __field( int, prev_prio )
160 __array( char, next_comm, TASK_COMM_LEN )
161 __field( pid_t, next_pid )
162 __field( int, next_prio )
163 ),
164
165 TP_fast_assign(
166 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
167 __entry->prev_pid = prev->pid;
168 __entry->prev_prio = prev->prio;
169 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
170 __entry->next_pid = next->pid;
171 __entry->next_prio = next->prio;
172 ),
173
174 TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
175 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
176 __entry->next_comm, __entry->next_pid, __entry->next_prio)
177);
178
179/*
180 * Tracepoint for a task being migrated:
181 */
182TRACE_EVENT(sched_migrate_task,
183
184 TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
185
186 TP_ARGS(p, orig_cpu, dest_cpu),
187
188 TP_STRUCT__entry(
189 __array( char, comm, TASK_COMM_LEN )
190 __field( pid_t, pid )
191 __field( int, prio )
192 __field( int, orig_cpu )
193 __field( int, dest_cpu )
194 ),
195
196 TP_fast_assign(
197 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
198 __entry->pid = p->pid;
199 __entry->prio = p->prio;
200 __entry->orig_cpu = orig_cpu;
201 __entry->dest_cpu = dest_cpu;
202 ),
203
204 TP_printk("task %s:%d [%d] from: %d to: %d",
205 __entry->comm, __entry->pid, __entry->prio,
206 __entry->orig_cpu, __entry->dest_cpu)
207);
208
209/*
210 * Tracepoint for freeing a task:
211 */
212TRACE_EVENT(sched_process_free,
213
214 TP_PROTO(struct task_struct *p),
215
216 TP_ARGS(p),
217
218 TP_STRUCT__entry(
219 __array( char, comm, TASK_COMM_LEN )
220 __field( pid_t, pid )
221 __field( int, prio )
222 ),
223
224 TP_fast_assign(
225 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
226 __entry->pid = p->pid;
227 __entry->prio = p->prio;
228 ),
229
230 TP_printk("task %s:%d [%d]",
231 __entry->comm, __entry->pid, __entry->prio)
232);
233
234/*
235 * Tracepoint for a task exiting:
236 */
237TRACE_EVENT(sched_process_exit,
238
239 TP_PROTO(struct task_struct *p),
240
241 TP_ARGS(p),
242
243 TP_STRUCT__entry(
244 __array( char, comm, TASK_COMM_LEN )
245 __field( pid_t, pid )
246 __field( int, prio )
247 ),
248
249 TP_fast_assign(
250 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
251 __entry->pid = p->pid;
252 __entry->prio = p->prio;
253 ),
254
255 TP_printk("task %s:%d [%d]",
256 __entry->comm, __entry->pid, __entry->prio)
257);
258
259/*
260 * Tracepoint for a waiting task:
261 */
262TRACE_EVENT(sched_process_wait,
263
264 TP_PROTO(struct pid *pid),
265
266 TP_ARGS(pid),
267
268 TP_STRUCT__entry(
269 __array( char, comm, TASK_COMM_LEN )
270 __field( pid_t, pid )
271 __field( int, prio )
272 ),
273
274 TP_fast_assign(
275 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
276 __entry->pid = pid_nr(pid);
277 __entry->prio = current->prio;
278 ),
279
280 TP_printk("task %s:%d [%d]",
281 __entry->comm, __entry->pid, __entry->prio)
282);
283
284/*
285 * Tracepoint for do_fork:
286 */
287TRACE_EVENT(sched_process_fork,
288
289 TP_PROTO(struct task_struct *parent, struct task_struct *child),
290
291 TP_ARGS(parent, child),
292
293 TP_STRUCT__entry(
294 __array( char, parent_comm, TASK_COMM_LEN )
295 __field( pid_t, parent_pid )
296 __array( char, child_comm, TASK_COMM_LEN )
297 __field( pid_t, child_pid )
298 ),
299
300 TP_fast_assign(
301 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
302 __entry->parent_pid = parent->pid;
303 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
304 __entry->child_pid = child->pid;
305 ),
306
307 TP_printk("parent %s:%d child %s:%d",
308 __entry->parent_comm, __entry->parent_pid,
309 __entry->child_comm, __entry->child_pid)
310);
311
312/*
313 * Tracepoint for sending a signal:
314 */
315TRACE_EVENT(sched_signal_send,
316
317 TP_PROTO(int sig, struct task_struct *p),
318
319 TP_ARGS(sig, p),
320
321 TP_STRUCT__entry(
322 __field( int, sig )
323 __array( char, comm, TASK_COMM_LEN )
324 __field( pid_t, pid )
325 ),
326
327 TP_fast_assign(
328 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
329 __entry->pid = p->pid;
330 __entry->sig = sig;
331 ),
332
333 TP_printk("sig: %d task %s:%d",
334 __entry->sig, __entry->comm, __entry->pid)
335);
336
337#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
new file mode 100644
index 000000000000..df56f5694be6
--- /dev/null
+++ b/include/trace/trace_event_types.h
@@ -0,0 +1,5 @@
1/* trace/<type>_event_types.h here */
2
3#include <trace/sched_event_types.h>
4#include <trace/irq_event_types.h>
5#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
new file mode 100644
index 000000000000..fd13750ca4ba
--- /dev/null
+++ b/include/trace/trace_events.h
@@ -0,0 +1,5 @@
1/* trace/<type>.h here */
2
3#include <trace/sched.h>
4#include <trace/irq.h>
5#include <trace/lockdep.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
new file mode 100644
index 000000000000..7626523deeba
--- /dev/null
+++ b/include/trace/workqueue.h
@@ -0,0 +1,25 @@
1#ifndef __TRACE_WORKQUEUE_H
2#define __TRACE_WORKQUEUE_H
3
4#include <linux/tracepoint.h>
5#include <linux/workqueue.h>
6#include <linux/sched.h>
7
8DECLARE_TRACE(workqueue_insertion,
9 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
10 TP_ARGS(wq_thread, work));
11
12DECLARE_TRACE(workqueue_execution,
13 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
14 TP_ARGS(wq_thread, work));
15
16/* Trace the creation of one workqueue thread on a cpu */
17DECLARE_TRACE(workqueue_creation,
18 TP_PROTO(struct task_struct *wq_thread, int cpu),
19 TP_ARGS(wq_thread, cpu));
20
21DECLARE_TRACE(workqueue_destruction,
22 TP_PROTO(struct task_struct *wq_thread),
23 TP_ARGS(wq_thread));
24
25#endif /* __TRACE_WORKQUEUE_H */
diff --git a/init/Kconfig b/init/Kconfig
index 38396ec7ee36..69d5190918e5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1005,7 +1005,7 @@ config TRACEPOINTS
1005 1005
1006config MARKERS 1006config MARKERS
1007 bool "Activate markers" 1007 bool "Activate markers"
1008 depends on TRACEPOINTS 1008 select TRACEPOINTS
1009 help 1009 help
1010 Place an empty function call at each marker site. Can be 1010 Place an empty function call at each marker site. Can be
1011 dynamically changed for a probe function. 1011 dynamically changed for a probe function.
diff --git a/init/main.c b/init/main.c
index 6bf83afd654d..20d784ab5ef8 100644
--- a/init/main.c
+++ b/init/main.c
@@ -71,6 +71,7 @@
71#include <asm/setup.h> 71#include <asm/setup.h>
72#include <asm/sections.h> 72#include <asm/sections.h>
73#include <asm/cacheflush.h> 73#include <asm/cacheflush.h>
74#include <trace/kmemtrace.h>
74 75
75#ifdef CONFIG_X86_LOCAL_APIC 76#ifdef CONFIG_X86_LOCAL_APIC
76#include <asm/smp.h> 77#include <asm/smp.h>
@@ -649,6 +650,7 @@ asmlinkage void __init start_kernel(void)
649 enable_debug_pagealloc(); 650 enable_debug_pagealloc();
650 cpu_hotplug_init(); 651 cpu_hotplug_init();
651 kmem_cache_init(); 652 kmem_cache_init();
653 kmemtrace_init();
652 debug_objects_mem_init(); 654 debug_objects_mem_init();
653 idr_init_cache(); 655 idr_init_cache();
654 setup_per_cpu_pageset(); 656 setup_per_cpu_pageset();
diff --git a/kernel/extable.c b/kernel/extable.c
index e136ed8d82ba..0df6253730be 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
41 return e; 41 return e;
42} 42}
43 43
44__notrace_funcgraph int core_kernel_text(unsigned long addr) 44int core_kernel_text(unsigned long addr)
45{ 45{
46 if (addr >= (unsigned long)_stext && 46 if (addr >= (unsigned long)_stext &&
47 addr <= (unsigned long)_etext) 47 addr <= (unsigned long)_etext)
@@ -54,7 +54,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr)
54 return 0; 54 return 0;
55} 55}
56 56
57__notrace_funcgraph int __kernel_text_address(unsigned long addr) 57int __kernel_text_address(unsigned long addr)
58{ 58{
59 if (core_kernel_text(addr)) 59 if (core_kernel_text(addr))
60 return 1; 60 return 1;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f51eaee921b6..412370ab9a34 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,6 +17,7 @@
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/rculist.h> 18#include <linux/rculist.h>
19#include <linux/hash.h> 19#include <linux/hash.h>
20#include <trace/irq.h>
20#include <linux/bootmem.h> 21#include <linux/bootmem.h>
21 22
22#include "internals.h" 23#include "internals.h"
@@ -329,6 +330,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
329 return IRQ_NONE; 330 return IRQ_NONE;
330} 331}
331 332
333DEFINE_TRACE(irq_handler_entry);
334DEFINE_TRACE(irq_handler_exit);
335
332/** 336/**
333 * handle_IRQ_event - irq action chain handler 337 * handle_IRQ_event - irq action chain handler
334 * @irq: the interrupt number 338 * @irq: the interrupt number
@@ -345,7 +349,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
345 local_irq_enable_in_hardirq(); 349 local_irq_enable_in_hardirq();
346 350
347 do { 351 do {
352 trace_irq_handler_entry(irq, action);
348 ret = action->handler(irq, action->dev_id); 353 ret = action->handler(irq, action->dev_id);
354 trace_irq_handler_exit(irq, action, ret);
349 if (ret == IRQ_HANDLED) 355 if (ret == IRQ_HANDLED)
350 status |= action->flags; 356 status |= action->flags;
351 retval |= ret; 357 retval |= ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7ba8cd9845cb..479d4d5672f9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -43,6 +43,7 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/debugfs.h> 44#include <linux/debugfs.h>
45#include <linux/kdebug.h> 45#include <linux/kdebug.h>
46#include <linux/memory.h>
46 47
47#include <asm-generic/sections.h> 48#include <asm-generic/sections.h>
48#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
@@ -699,9 +700,10 @@ int __kprobes register_kprobe(struct kprobe *p)
699 goto out; 700 goto out;
700 } 701 }
701 702
703 mutex_lock(&text_mutex);
702 ret = arch_prepare_kprobe(p); 704 ret = arch_prepare_kprobe(p);
703 if (ret) 705 if (ret)
704 goto out; 706 goto out_unlock_text;
705 707
706 INIT_HLIST_NODE(&p->hlist); 708 INIT_HLIST_NODE(&p->hlist);
707 hlist_add_head_rcu(&p->hlist, 709 hlist_add_head_rcu(&p->hlist,
@@ -710,6 +712,8 @@ int __kprobes register_kprobe(struct kprobe *p)
710 if (kprobe_enabled) 712 if (kprobe_enabled)
711 arch_arm_kprobe(p); 713 arch_arm_kprobe(p);
712 714
715out_unlock_text:
716 mutex_unlock(&text_mutex);
713out: 717out:
714 mutex_unlock(&kprobe_mutex); 718 mutex_unlock(&kprobe_mutex);
715 719
@@ -746,8 +750,11 @@ valid_p:
746 * enabled and not gone - otherwise, the breakpoint would 750 * enabled and not gone - otherwise, the breakpoint would
747 * already have been removed. We save on flushing icache. 751 * already have been removed. We save on flushing icache.
748 */ 752 */
749 if (kprobe_enabled && !kprobe_gone(old_p)) 753 if (kprobe_enabled && !kprobe_gone(old_p)) {
754 mutex_lock(&text_mutex);
750 arch_disarm_kprobe(p); 755 arch_disarm_kprobe(p);
756 mutex_unlock(&text_mutex);
757 }
751 hlist_del_rcu(&old_p->hlist); 758 hlist_del_rcu(&old_p->hlist);
752 } else { 759 } else {
753 if (p->break_handler && !kprobe_gone(p)) 760 if (p->break_handler && !kprobe_gone(p))
@@ -1280,12 +1287,14 @@ static void __kprobes enable_all_kprobes(void)
1280 if (kprobe_enabled) 1287 if (kprobe_enabled)
1281 goto already_enabled; 1288 goto already_enabled;
1282 1289
1290 mutex_lock(&text_mutex);
1283 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1291 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1284 head = &kprobe_table[i]; 1292 head = &kprobe_table[i];
1285 hlist_for_each_entry_rcu(p, node, head, hlist) 1293 hlist_for_each_entry_rcu(p, node, head, hlist)
1286 if (!kprobe_gone(p)) 1294 if (!kprobe_gone(p))
1287 arch_arm_kprobe(p); 1295 arch_arm_kprobe(p);
1288 } 1296 }
1297 mutex_unlock(&text_mutex);
1289 1298
1290 kprobe_enabled = true; 1299 kprobe_enabled = true;
1291 printk(KERN_INFO "Kprobes globally enabled\n"); 1300 printk(KERN_INFO "Kprobes globally enabled\n");
@@ -1310,6 +1319,7 @@ static void __kprobes disable_all_kprobes(void)
1310 1319
1311 kprobe_enabled = false; 1320 kprobe_enabled = false;
1312 printk(KERN_INFO "Kprobes globally disabled\n"); 1321 printk(KERN_INFO "Kprobes globally disabled\n");
1322 mutex_lock(&text_mutex);
1313 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1323 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1314 head = &kprobe_table[i]; 1324 head = &kprobe_table[i];
1315 hlist_for_each_entry_rcu(p, node, head, hlist) { 1325 hlist_for_each_entry_rcu(p, node, head, hlist) {
@@ -1318,6 +1328,7 @@ static void __kprobes disable_all_kprobes(void)
1318 } 1328 }
1319 } 1329 }
1320 1330
1331 mutex_unlock(&text_mutex);
1321 mutex_unlock(&kprobe_mutex); 1332 mutex_unlock(&kprobe_mutex);
1322 /* Allow all currently running kprobes to complete */ 1333 /* Allow all currently running kprobes to complete */
1323 synchronize_sched(); 1334 synchronize_sched();
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06b0c3568f0b..71b567f52813 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -41,6 +41,8 @@
41#include <linux/utsname.h> 41#include <linux/utsname.h>
42#include <linux/hash.h> 42#include <linux/hash.h>
43#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/stringify.h>
45#include <trace/lockdep.h>
44 46
45#include <asm/sections.h> 47#include <asm/sections.h>
46 48
@@ -310,12 +312,14 @@ EXPORT_SYMBOL(lockdep_on);
310#if VERBOSE 312#if VERBOSE
311# define HARDIRQ_VERBOSE 1 313# define HARDIRQ_VERBOSE 1
312# define SOFTIRQ_VERBOSE 1 314# define SOFTIRQ_VERBOSE 1
315# define RECLAIM_VERBOSE 1
313#else 316#else
314# define HARDIRQ_VERBOSE 0 317# define HARDIRQ_VERBOSE 0
315# define SOFTIRQ_VERBOSE 0 318# define SOFTIRQ_VERBOSE 0
319# define RECLAIM_VERBOSE 0
316#endif 320#endif
317 321
318#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE 322#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
319/* 323/*
320 * Quick filtering for interesting events: 324 * Quick filtering for interesting events:
321 */ 325 */
@@ -430,30 +434,24 @@ atomic_t nr_find_usage_forwards_checks;
430atomic_t nr_find_usage_forwards_recursions; 434atomic_t nr_find_usage_forwards_recursions;
431atomic_t nr_find_usage_backwards_checks; 435atomic_t nr_find_usage_backwards_checks;
432atomic_t nr_find_usage_backwards_recursions; 436atomic_t nr_find_usage_backwards_recursions;
433# define debug_atomic_inc(ptr) atomic_inc(ptr)
434# define debug_atomic_dec(ptr) atomic_dec(ptr)
435# define debug_atomic_read(ptr) atomic_read(ptr)
436#else
437# define debug_atomic_inc(ptr) do { } while (0)
438# define debug_atomic_dec(ptr) do { } while (0)
439# define debug_atomic_read(ptr) 0
440#endif 437#endif
441 438
442/* 439/*
443 * Locking printouts: 440 * Locking printouts:
444 */ 441 */
445 442
443#define __USAGE(__STATE) \
444 [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \
445 [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \
446 [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\
447 [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R",
448
446static const char *usage_str[] = 449static const char *usage_str[] =
447{ 450{
448 [LOCK_USED] = "initial-use ", 451#define LOCKDEP_STATE(__STATE) __USAGE(__STATE)
449 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W", 452#include "lockdep_states.h"
450 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W", 453#undef LOCKDEP_STATE
451 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W", 454 [LOCK_USED] = "INITIAL USE",
452 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
453 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
454 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
455 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
456 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
457}; 455};
458 456
459const char * __get_key_name(struct lockdep_subclass_key *key, char *str) 457const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
@@ -461,46 +459,45 @@ const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
461 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); 459 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
462} 460}
463 461
464void 462static inline unsigned long lock_flag(enum lock_usage_bit bit)
465get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
466{ 463{
467 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; 464 return 1UL << bit;
468 465}
469 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
470 *c1 = '+';
471 else
472 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
473 *c1 = '-';
474 466
475 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) 467static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
476 *c2 = '+'; 468{
477 else 469 char c = '.';
478 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
479 *c2 = '-';
480 470
481 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 471 if (class->usage_mask & lock_flag(bit + 2))
482 *c3 = '-'; 472 c = '+';
483 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { 473 if (class->usage_mask & lock_flag(bit)) {
484 *c3 = '+'; 474 c = '-';
485 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 475 if (class->usage_mask & lock_flag(bit + 2))
486 *c3 = '?'; 476 c = '?';
487 } 477 }
488 478
489 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 479 return c;
490 *c4 = '-'; 480}
491 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { 481
492 *c4 = '+'; 482void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
493 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 483{
494 *c4 = '?'; 484 int i = 0;
495 } 485
486#define LOCKDEP_STATE(__STATE) \
487 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \
488 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ);
489#include "lockdep_states.h"
490#undef LOCKDEP_STATE
491
492 usage[i] = '\0';
496} 493}
497 494
498static void print_lock_name(struct lock_class *class) 495static void print_lock_name(struct lock_class *class)
499{ 496{
500 char str[KSYM_NAME_LEN], c1, c2, c3, c4; 497 char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
501 const char *name; 498 const char *name;
502 499
503 get_usage_chars(class, &c1, &c2, &c3, &c4); 500 get_usage_chars(class, usage);
504 501
505 name = class->name; 502 name = class->name;
506 if (!name) { 503 if (!name) {
@@ -513,7 +510,7 @@ static void print_lock_name(struct lock_class *class)
513 if (class->subclass) 510 if (class->subclass)
514 printk("/%d", class->subclass); 511 printk("/%d", class->subclass);
515 } 512 }
516 printk("){%c%c%c%c}", c1, c2, c3, c4); 513 printk("){%s}", usage);
517} 514}
518 515
519static void print_lockdep_cache(struct lockdep_map *lock) 516static void print_lockdep_cache(struct lockdep_map *lock)
@@ -1263,9 +1260,49 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
1263 bit_backwards, bit_forwards, irqclass); 1260 bit_backwards, bit_forwards, irqclass);
1264} 1261}
1265 1262
1266static int 1263static const char *state_names[] = {
1267check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, 1264#define LOCKDEP_STATE(__STATE) \
1268 struct held_lock *next) 1265 __stringify(__STATE),
1266#include "lockdep_states.h"
1267#undef LOCKDEP_STATE
1268};
1269
1270static const char *state_rnames[] = {
1271#define LOCKDEP_STATE(__STATE) \
1272 __stringify(__STATE)"-READ",
1273#include "lockdep_states.h"
1274#undef LOCKDEP_STATE
1275};
1276
1277static inline const char *state_name(enum lock_usage_bit bit)
1278{
1279 return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2];
1280}
1281
1282static int exclusive_bit(int new_bit)
1283{
1284 /*
1285 * USED_IN
1286 * USED_IN_READ
1287 * ENABLED
1288 * ENABLED_READ
1289 *
1290 * bit 0 - write/read
1291 * bit 1 - used_in/enabled
1292 * bit 2+ state
1293 */
1294
1295 int state = new_bit & ~3;
1296 int dir = new_bit & 2;
1297
1298 /*
1299 * keep state, bit flip the direction and strip read.
1300 */
1301 return state | (dir ^ 2);
1302}
1303
1304static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
1305 struct held_lock *next, enum lock_usage_bit bit)
1269{ 1306{
1270 /* 1307 /*
1271 * Prove that the new dependency does not connect a hardirq-safe 1308 * Prove that the new dependency does not connect a hardirq-safe
@@ -1273,38 +1310,34 @@ check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1273 * the backwards-subgraph starting at <prev>, and the 1310 * the backwards-subgraph starting at <prev>, and the
1274 * forwards-subgraph starting at <next>: 1311 * forwards-subgraph starting at <next>:
1275 */ 1312 */
1276 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, 1313 if (!check_usage(curr, prev, next, bit,
1277 LOCK_ENABLED_HARDIRQS, "hard")) 1314 exclusive_bit(bit), state_name(bit)))
1278 return 0; 1315 return 0;
1279 1316
1317 bit++; /* _READ */
1318
1280 /* 1319 /*
1281 * Prove that the new dependency does not connect a hardirq-safe-read 1320 * Prove that the new dependency does not connect a hardirq-safe-read
1282 * lock with a hardirq-unsafe lock - to achieve this we search 1321 * lock with a hardirq-unsafe lock - to achieve this we search
1283 * the backwards-subgraph starting at <prev>, and the 1322 * the backwards-subgraph starting at <prev>, and the
1284 * forwards-subgraph starting at <next>: 1323 * forwards-subgraph starting at <next>:
1285 */ 1324 */
1286 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, 1325 if (!check_usage(curr, prev, next, bit,
1287 LOCK_ENABLED_HARDIRQS, "hard-read")) 1326 exclusive_bit(bit), state_name(bit)))
1288 return 0; 1327 return 0;
1289 1328
1290 /* 1329 return 1;
1291 * Prove that the new dependency does not connect a softirq-safe 1330}
1292 * lock with a softirq-unsafe lock - to achieve this we search 1331
1293 * the backwards-subgraph starting at <prev>, and the 1332static int
1294 * forwards-subgraph starting at <next>: 1333check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1295 */ 1334 struct held_lock *next)
1296 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, 1335{
1297 LOCK_ENABLED_SOFTIRQS, "soft")) 1336#define LOCKDEP_STATE(__STATE) \
1298 return 0; 1337 if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
1299 /*
1300 * Prove that the new dependency does not connect a softirq-safe-read
1301 * lock with a softirq-unsafe lock - to achieve this we search
1302 * the backwards-subgraph starting at <prev>, and the
1303 * forwards-subgraph starting at <next>:
1304 */
1305 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
1306 LOCK_ENABLED_SOFTIRQS, "soft"))
1307 return 0; 1338 return 0;
1339#include "lockdep_states.h"
1340#undef LOCKDEP_STATE
1308 1341
1309 return 1; 1342 return 1;
1310} 1343}
@@ -1861,9 +1894,9 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1861 curr->comm, task_pid_nr(curr)); 1894 curr->comm, task_pid_nr(curr));
1862 print_lock(this); 1895 print_lock(this);
1863 if (forwards) 1896 if (forwards)
1864 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); 1897 printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
1865 else 1898 else
1866 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); 1899 printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
1867 print_lock_name(other); 1900 print_lock_name(other);
1868 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); 1901 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1869 1902
@@ -1933,7 +1966,7 @@ void print_irqtrace_events(struct task_struct *curr)
1933 print_ip_sym(curr->softirq_disable_ip); 1966 print_ip_sym(curr->softirq_disable_ip);
1934} 1967}
1935 1968
1936static int hardirq_verbose(struct lock_class *class) 1969static int HARDIRQ_verbose(struct lock_class *class)
1937{ 1970{
1938#if HARDIRQ_VERBOSE 1971#if HARDIRQ_VERBOSE
1939 return class_filter(class); 1972 return class_filter(class);
@@ -1941,7 +1974,7 @@ static int hardirq_verbose(struct lock_class *class)
1941 return 0; 1974 return 0;
1942} 1975}
1943 1976
1944static int softirq_verbose(struct lock_class *class) 1977static int SOFTIRQ_verbose(struct lock_class *class)
1945{ 1978{
1946#if SOFTIRQ_VERBOSE 1979#if SOFTIRQ_VERBOSE
1947 return class_filter(class); 1980 return class_filter(class);
@@ -1949,185 +1982,95 @@ static int softirq_verbose(struct lock_class *class)
1949 return 0; 1982 return 0;
1950} 1983}
1951 1984
1985static int RECLAIM_FS_verbose(struct lock_class *class)
1986{
1987#if RECLAIM_VERBOSE
1988 return class_filter(class);
1989#endif
1990 return 0;
1991}
1992
1952#define STRICT_READ_CHECKS 1 1993#define STRICT_READ_CHECKS 1
1953 1994
1954static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, 1995static int (*state_verbose_f[])(struct lock_class *class) = {
1996#define LOCKDEP_STATE(__STATE) \
1997 __STATE##_verbose,
1998#include "lockdep_states.h"
1999#undef LOCKDEP_STATE
2000};
2001
2002static inline int state_verbose(enum lock_usage_bit bit,
2003 struct lock_class *class)
2004{
2005 return state_verbose_f[bit >> 2](class);
2006}
2007
2008typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
2009 enum lock_usage_bit bit, const char *name);
2010
2011static int
2012mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1955 enum lock_usage_bit new_bit) 2013 enum lock_usage_bit new_bit)
1956{ 2014{
1957 int ret = 1; 2015 int excl_bit = exclusive_bit(new_bit);
2016 int read = new_bit & 1;
2017 int dir = new_bit & 2;
1958 2018
1959 switch(new_bit) { 2019 /*
1960 case LOCK_USED_IN_HARDIRQ: 2020 * mark USED_IN has to look forwards -- to ensure no dependency
1961 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) 2021 * has ENABLED state, which would allow recursion deadlocks.
1962 return 0; 2022 *
1963 if (!valid_state(curr, this, new_bit, 2023 * mark ENABLED has to look backwards -- to ensure no dependee
1964 LOCK_ENABLED_HARDIRQS_READ)) 2024 * has USED_IN state, which, again, would allow recursion deadlocks.
1965 return 0; 2025 */
1966 /* 2026 check_usage_f usage = dir ?
1967 * just marked it hardirq-safe, check that this lock 2027 check_usage_backwards : check_usage_forwards;
1968 * took no hardirq-unsafe lock in the past: 2028
1969 */ 2029 /*
1970 if (!check_usage_forwards(curr, this, 2030 * Validate that this particular lock does not have conflicting
1971 LOCK_ENABLED_HARDIRQS, "hard")) 2031 * usage states.
1972 return 0; 2032 */
1973#if STRICT_READ_CHECKS 2033 if (!valid_state(curr, this, new_bit, excl_bit))
1974 /* 2034 return 0;
1975 * just marked it hardirq-safe, check that this lock 2035
1976 * took no hardirq-unsafe-read lock in the past: 2036 /*
1977 */ 2037 * Validate that the lock dependencies don't have conflicting usage
1978 if (!check_usage_forwards(curr, this, 2038 * states.
1979 LOCK_ENABLED_HARDIRQS_READ, "hard-read")) 2039 */
1980 return 0; 2040 if ((!read || !dir || STRICT_READ_CHECKS) &&
1981#endif 2041 !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
1982 if (hardirq_verbose(hlock_class(this))) 2042 return 0;
1983 ret = 2; 2043
1984 break; 2044 /*
1985 case LOCK_USED_IN_SOFTIRQ: 2045 * Check for read in write conflicts
1986 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) 2046 */
1987 return 0; 2047 if (!read) {
1988 if (!valid_state(curr, this, new_bit, 2048 if (!valid_state(curr, this, new_bit, excl_bit + 1))
1989 LOCK_ENABLED_SOFTIRQS_READ))
1990 return 0;
1991 /*
1992 * just marked it softirq-safe, check that this lock
1993 * took no softirq-unsafe lock in the past:
1994 */
1995 if (!check_usage_forwards(curr, this,
1996 LOCK_ENABLED_SOFTIRQS, "soft"))
1997 return 0;
1998#if STRICT_READ_CHECKS
1999 /*
2000 * just marked it softirq-safe, check that this lock
2001 * took no softirq-unsafe-read lock in the past:
2002 */
2003 if (!check_usage_forwards(curr, this,
2004 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
2005 return 0;
2006#endif
2007 if (softirq_verbose(hlock_class(this)))
2008 ret = 2;
2009 break;
2010 case LOCK_USED_IN_HARDIRQ_READ:
2011 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
2012 return 0;
2013 /*
2014 * just marked it hardirq-read-safe, check that this lock
2015 * took no hardirq-unsafe lock in the past:
2016 */
2017 if (!check_usage_forwards(curr, this,
2018 LOCK_ENABLED_HARDIRQS, "hard"))
2019 return 0;
2020 if (hardirq_verbose(hlock_class(this)))
2021 ret = 2;
2022 break;
2023 case LOCK_USED_IN_SOFTIRQ_READ:
2024 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
2025 return 0;
2026 /*
2027 * just marked it softirq-read-safe, check that this lock
2028 * took no softirq-unsafe lock in the past:
2029 */
2030 if (!check_usage_forwards(curr, this,
2031 LOCK_ENABLED_SOFTIRQS, "soft"))
2032 return 0;
2033 if (softirq_verbose(hlock_class(this)))
2034 ret = 2;
2035 break;
2036 case LOCK_ENABLED_HARDIRQS:
2037 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
2038 return 0;
2039 if (!valid_state(curr, this, new_bit,
2040 LOCK_USED_IN_HARDIRQ_READ))
2041 return 0;
2042 /*
2043 * just marked it hardirq-unsafe, check that no hardirq-safe
2044 * lock in the system ever took it in the past:
2045 */
2046 if (!check_usage_backwards(curr, this,
2047 LOCK_USED_IN_HARDIRQ, "hard"))
2048 return 0;
2049#if STRICT_READ_CHECKS
2050 /*
2051 * just marked it hardirq-unsafe, check that no
2052 * hardirq-safe-read lock in the system ever took
2053 * it in the past:
2054 */
2055 if (!check_usage_backwards(curr, this,
2056 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
2057 return 0;
2058#endif
2059 if (hardirq_verbose(hlock_class(this)))
2060 ret = 2;
2061 break;
2062 case LOCK_ENABLED_SOFTIRQS:
2063 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
2064 return 0;
2065 if (!valid_state(curr, this, new_bit,
2066 LOCK_USED_IN_SOFTIRQ_READ))
2067 return 0;
2068 /*
2069 * just marked it softirq-unsafe, check that no softirq-safe
2070 * lock in the system ever took it in the past:
2071 */
2072 if (!check_usage_backwards(curr, this,
2073 LOCK_USED_IN_SOFTIRQ, "soft"))
2074 return 0;
2075#if STRICT_READ_CHECKS
2076 /*
2077 * just marked it softirq-unsafe, check that no
2078 * softirq-safe-read lock in the system ever took
2079 * it in the past:
2080 */
2081 if (!check_usage_backwards(curr, this,
2082 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
2083 return 0;
2084#endif
2085 if (softirq_verbose(hlock_class(this)))
2086 ret = 2;
2087 break;
2088 case LOCK_ENABLED_HARDIRQS_READ:
2089 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
2090 return 0;
2091#if STRICT_READ_CHECKS
2092 /*
2093 * just marked it hardirq-read-unsafe, check that no
2094 * hardirq-safe lock in the system ever took it in the past:
2095 */
2096 if (!check_usage_backwards(curr, this,
2097 LOCK_USED_IN_HARDIRQ, "hard"))
2098 return 0;
2099#endif
2100 if (hardirq_verbose(hlock_class(this)))
2101 ret = 2;
2102 break;
2103 case LOCK_ENABLED_SOFTIRQS_READ:
2104 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
2105 return 0; 2049 return 0;
2106#if STRICT_READ_CHECKS 2050
2107 /* 2051 if (STRICT_READ_CHECKS &&
2108 * just marked it softirq-read-unsafe, check that no 2052 !usage(curr, this, excl_bit + 1,
2109 * softirq-safe lock in the system ever took it in the past: 2053 state_name(new_bit + 1)))
2110 */
2111 if (!check_usage_backwards(curr, this,
2112 LOCK_USED_IN_SOFTIRQ, "soft"))
2113 return 0; 2054 return 0;
2114#endif
2115 if (softirq_verbose(hlock_class(this)))
2116 ret = 2;
2117 break;
2118 default:
2119 WARN_ON(1);
2120 break;
2121 } 2055 }
2122 2056
2123 return ret; 2057 if (state_verbose(new_bit, hlock_class(this)))
2058 return 2;
2059
2060 return 1;
2124} 2061}
2125 2062
2063enum mark_type {
2064#define LOCKDEP_STATE(__STATE) __STATE,
2065#include "lockdep_states.h"
2066#undef LOCKDEP_STATE
2067};
2068
2126/* 2069/*
2127 * Mark all held locks with a usage bit: 2070 * Mark all held locks with a usage bit:
2128 */ 2071 */
2129static int 2072static int
2130mark_held_locks(struct task_struct *curr, int hardirq) 2073mark_held_locks(struct task_struct *curr, enum mark_type mark)
2131{ 2074{
2132 enum lock_usage_bit usage_bit; 2075 enum lock_usage_bit usage_bit;
2133 struct held_lock *hlock; 2076 struct held_lock *hlock;
@@ -2136,17 +2079,12 @@ mark_held_locks(struct task_struct *curr, int hardirq)
2136 for (i = 0; i < curr->lockdep_depth; i++) { 2079 for (i = 0; i < curr->lockdep_depth; i++) {
2137 hlock = curr->held_locks + i; 2080 hlock = curr->held_locks + i;
2138 2081
2139 if (hardirq) { 2082 usage_bit = 2 + (mark << 2); /* ENABLED */
2140 if (hlock->read) 2083 if (hlock->read)
2141 usage_bit = LOCK_ENABLED_HARDIRQS_READ; 2084 usage_bit += 1; /* READ */
2142 else 2085
2143 usage_bit = LOCK_ENABLED_HARDIRQS; 2086 BUG_ON(usage_bit >= LOCK_USAGE_STATES);
2144 } else { 2087
2145 if (hlock->read)
2146 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
2147 else
2148 usage_bit = LOCK_ENABLED_SOFTIRQS;
2149 }
2150 if (!mark_lock(curr, hlock, usage_bit)) 2088 if (!mark_lock(curr, hlock, usage_bit))
2151 return 0; 2089 return 0;
2152 } 2090 }
@@ -2200,7 +2138,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2200 * We are going to turn hardirqs on, so set the 2138 * We are going to turn hardirqs on, so set the
2201 * usage bit for all held locks: 2139 * usage bit for all held locks:
2202 */ 2140 */
2203 if (!mark_held_locks(curr, 1)) 2141 if (!mark_held_locks(curr, HARDIRQ))
2204 return; 2142 return;
2205 /* 2143 /*
2206 * If we have softirqs enabled, then set the usage 2144 * If we have softirqs enabled, then set the usage
@@ -2208,7 +2146,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2208 * this bit from being set before) 2146 * this bit from being set before)
2209 */ 2147 */
2210 if (curr->softirqs_enabled) 2148 if (curr->softirqs_enabled)
2211 if (!mark_held_locks(curr, 0)) 2149 if (!mark_held_locks(curr, SOFTIRQ))
2212 return; 2150 return;
2213 2151
2214 curr->hardirq_enable_ip = ip; 2152 curr->hardirq_enable_ip = ip;
@@ -2288,7 +2226,7 @@ void trace_softirqs_on(unsigned long ip)
2288 * enabled too: 2226 * enabled too:
2289 */ 2227 */
2290 if (curr->hardirqs_enabled) 2228 if (curr->hardirqs_enabled)
2291 mark_held_locks(curr, 0); 2229 mark_held_locks(curr, SOFTIRQ);
2292} 2230}
2293 2231
2294/* 2232/*
@@ -2317,6 +2255,31 @@ void trace_softirqs_off(unsigned long ip)
2317 debug_atomic_inc(&redundant_softirqs_off); 2255 debug_atomic_inc(&redundant_softirqs_off);
2318} 2256}
2319 2257
2258void lockdep_trace_alloc(gfp_t gfp_mask)
2259{
2260 struct task_struct *curr = current;
2261
2262 if (unlikely(!debug_locks))
2263 return;
2264
2265 /* no reclaim without waiting on it */
2266 if (!(gfp_mask & __GFP_WAIT))
2267 return;
2268
2269 /* this guy won't enter reclaim */
2270 if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
2271 return;
2272
2273 /* We're only interested __GFP_FS allocations for now */
2274 if (!(gfp_mask & __GFP_FS))
2275 return;
2276
2277 if (DEBUG_LOCKS_WARN_ON(irqs_disabled()))
2278 return;
2279
2280 mark_held_locks(curr, RECLAIM_FS);
2281}
2282
2320static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) 2283static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2321{ 2284{
2322 /* 2285 /*
@@ -2345,19 +2308,35 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2345 if (!hlock->hardirqs_off) { 2308 if (!hlock->hardirqs_off) {
2346 if (hlock->read) { 2309 if (hlock->read) {
2347 if (!mark_lock(curr, hlock, 2310 if (!mark_lock(curr, hlock,
2348 LOCK_ENABLED_HARDIRQS_READ)) 2311 LOCK_ENABLED_HARDIRQ_READ))
2349 return 0; 2312 return 0;
2350 if (curr->softirqs_enabled) 2313 if (curr->softirqs_enabled)
2351 if (!mark_lock(curr, hlock, 2314 if (!mark_lock(curr, hlock,
2352 LOCK_ENABLED_SOFTIRQS_READ)) 2315 LOCK_ENABLED_SOFTIRQ_READ))
2353 return 0; 2316 return 0;
2354 } else { 2317 } else {
2355 if (!mark_lock(curr, hlock, 2318 if (!mark_lock(curr, hlock,
2356 LOCK_ENABLED_HARDIRQS)) 2319 LOCK_ENABLED_HARDIRQ))
2357 return 0; 2320 return 0;
2358 if (curr->softirqs_enabled) 2321 if (curr->softirqs_enabled)
2359 if (!mark_lock(curr, hlock, 2322 if (!mark_lock(curr, hlock,
2360 LOCK_ENABLED_SOFTIRQS)) 2323 LOCK_ENABLED_SOFTIRQ))
2324 return 0;
2325 }
2326 }
2327
2328 /*
2329 * We reuse the irq context infrastructure more broadly as a general
2330 * context checking code. This tests GFP_FS recursion (a lock taken
2331 * during reclaim for a GFP_FS allocation is held over a GFP_FS
2332 * allocation).
2333 */
2334 if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
2335 if (hlock->read) {
2336 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
2337 return 0;
2338 } else {
2339 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
2361 return 0; 2340 return 0;
2362 } 2341 }
2363 } 2342 }
@@ -2412,6 +2391,10 @@ static inline int separate_irq_context(struct task_struct *curr,
2412 return 0; 2391 return 0;
2413} 2392}
2414 2393
2394void lockdep_trace_alloc(gfp_t gfp_mask)
2395{
2396}
2397
2415#endif 2398#endif
2416 2399
2417/* 2400/*
@@ -2445,14 +2428,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2445 return 0; 2428 return 0;
2446 2429
2447 switch (new_bit) { 2430 switch (new_bit) {
2448 case LOCK_USED_IN_HARDIRQ: 2431#define LOCKDEP_STATE(__STATE) \
2449 case LOCK_USED_IN_SOFTIRQ: 2432 case LOCK_USED_IN_##__STATE: \
2450 case LOCK_USED_IN_HARDIRQ_READ: 2433 case LOCK_USED_IN_##__STATE##_READ: \
2451 case LOCK_USED_IN_SOFTIRQ_READ: 2434 case LOCK_ENABLED_##__STATE: \
2452 case LOCK_ENABLED_HARDIRQS: 2435 case LOCK_ENABLED_##__STATE##_READ:
2453 case LOCK_ENABLED_SOFTIRQS: 2436#include "lockdep_states.h"
2454 case LOCK_ENABLED_HARDIRQS_READ: 2437#undef LOCKDEP_STATE
2455 case LOCK_ENABLED_SOFTIRQS_READ:
2456 ret = mark_lock_irq(curr, this, new_bit); 2438 ret = mark_lock_irq(curr, this, new_bit);
2457 if (!ret) 2439 if (!ret)
2458 return 0; 2440 return 0;
@@ -2925,6 +2907,8 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
2925} 2907}
2926EXPORT_SYMBOL_GPL(lock_set_class); 2908EXPORT_SYMBOL_GPL(lock_set_class);
2927 2909
2910DEFINE_TRACE(lock_acquire);
2911
2928/* 2912/*
2929 * We are not always called with irqs disabled - do that here, 2913 * We are not always called with irqs disabled - do that here,
2930 * and also avoid lockdep recursion: 2914 * and also avoid lockdep recursion:
@@ -2935,6 +2919,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2935{ 2919{
2936 unsigned long flags; 2920 unsigned long flags;
2937 2921
2922 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
2923
2938 if (unlikely(current->lockdep_recursion)) 2924 if (unlikely(current->lockdep_recursion))
2939 return; 2925 return;
2940 2926
@@ -2949,11 +2935,15 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2949} 2935}
2950EXPORT_SYMBOL_GPL(lock_acquire); 2936EXPORT_SYMBOL_GPL(lock_acquire);
2951 2937
2938DEFINE_TRACE(lock_release);
2939
2952void lock_release(struct lockdep_map *lock, int nested, 2940void lock_release(struct lockdep_map *lock, int nested,
2953 unsigned long ip) 2941 unsigned long ip)
2954{ 2942{
2955 unsigned long flags; 2943 unsigned long flags;
2956 2944
2945 trace_lock_release(lock, nested, ip);
2946
2957 if (unlikely(current->lockdep_recursion)) 2947 if (unlikely(current->lockdep_recursion))
2958 return; 2948 return;
2959 2949
@@ -2966,6 +2956,16 @@ void lock_release(struct lockdep_map *lock, int nested,
2966} 2956}
2967EXPORT_SYMBOL_GPL(lock_release); 2957EXPORT_SYMBOL_GPL(lock_release);
2968 2958
2959void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
2960{
2961 current->lockdep_reclaim_gfp = gfp_mask;
2962}
2963
2964void lockdep_clear_current_reclaim_state(void)
2965{
2966 current->lockdep_reclaim_gfp = 0;
2967}
2968
2969#ifdef CONFIG_LOCK_STAT 2969#ifdef CONFIG_LOCK_STAT
2970static int 2970static int
2971print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, 2971print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
@@ -3092,10 +3092,14 @@ found_it:
3092 lock->ip = ip; 3092 lock->ip = ip;
3093} 3093}
3094 3094
3095DEFINE_TRACE(lock_contended);
3096
3095void lock_contended(struct lockdep_map *lock, unsigned long ip) 3097void lock_contended(struct lockdep_map *lock, unsigned long ip)
3096{ 3098{
3097 unsigned long flags; 3099 unsigned long flags;
3098 3100
3101 trace_lock_contended(lock, ip);
3102
3099 if (unlikely(!lock_stat)) 3103 if (unlikely(!lock_stat))
3100 return; 3104 return;
3101 3105
@@ -3111,10 +3115,14 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3111} 3115}
3112EXPORT_SYMBOL_GPL(lock_contended); 3116EXPORT_SYMBOL_GPL(lock_contended);
3113 3117
3118DEFINE_TRACE(lock_acquired);
3119
3114void lock_acquired(struct lockdep_map *lock, unsigned long ip) 3120void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3115{ 3121{
3116 unsigned long flags; 3122 unsigned long flags;
3117 3123
3124 trace_lock_acquired(lock, ip);
3125
3118 if (unlikely(!lock_stat)) 3126 if (unlikely(!lock_stat))
3119 return; 3127 return;
3120 3128
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index 56b196932c08..a2cc7e9a6e84 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -7,6 +7,45 @@
7 */ 7 */
8 8
9/* 9/*
10 * Lock-class usage-state bits:
11 */
12enum lock_usage_bit {
13#define LOCKDEP_STATE(__STATE) \
14 LOCK_USED_IN_##__STATE, \
15 LOCK_USED_IN_##__STATE##_READ, \
16 LOCK_ENABLED_##__STATE, \
17 LOCK_ENABLED_##__STATE##_READ,
18#include "lockdep_states.h"
19#undef LOCKDEP_STATE
20 LOCK_USED,
21 LOCK_USAGE_STATES
22};
23
24/*
25 * Usage-state bitmasks:
26 */
27#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE),
28
29enum {
30#define LOCKDEP_STATE(__STATE) \
31 __LOCKF(USED_IN_##__STATE) \
32 __LOCKF(USED_IN_##__STATE##_READ) \
33 __LOCKF(ENABLED_##__STATE) \
34 __LOCKF(ENABLED_##__STATE##_READ)
35#include "lockdep_states.h"
36#undef LOCKDEP_STATE
37 __LOCKF(USED)
38};
39
40#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
41#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
42
43#define LOCKF_ENABLED_IRQ_READ \
44 (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
45#define LOCKF_USED_IN_IRQ_READ \
46 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
47
48/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies 49 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track. 50 * we track.
12 * 51 *
@@ -31,8 +70,10 @@
31extern struct list_head all_lock_classes; 70extern struct list_head all_lock_classes;
32extern struct lock_chain lock_chains[]; 71extern struct lock_chain lock_chains[];
33 72
34extern void 73#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); 74
75extern void get_usage_chars(struct lock_class *class,
76 char usage[LOCK_USAGE_CHARS]);
36 77
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); 78extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38 79
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 13716b813896..d7135aa2d2c4 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -84,7 +84,7 @@ static int l_show(struct seq_file *m, void *v)
84{ 84{
85 struct lock_class *class = v; 85 struct lock_class *class = v;
86 struct lock_list *entry; 86 struct lock_list *entry;
87 char c1, c2, c3, c4; 87 char usage[LOCK_USAGE_CHARS];
88 88
89 if (v == SEQ_START_TOKEN) { 89 if (v == SEQ_START_TOKEN) {
90 seq_printf(m, "all lock classes:\n"); 90 seq_printf(m, "all lock classes:\n");
@@ -100,8 +100,8 @@ static int l_show(struct seq_file *m, void *v)
100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); 100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
101#endif 101#endif
102 102
103 get_usage_chars(class, &c1, &c2, &c3, &c4); 103 get_usage_chars(class, usage);
104 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); 104 seq_printf(m, " %s", usage);
105 105
106 seq_printf(m, ": "); 106 seq_printf(m, ": ");
107 print_name(m, class); 107 print_name(m, class);
@@ -300,27 +300,27 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
300 nr_uncategorized++; 300 nr_uncategorized++;
301 if (class->usage_mask & LOCKF_USED_IN_IRQ) 301 if (class->usage_mask & LOCKF_USED_IN_IRQ)
302 nr_irq_safe++; 302 nr_irq_safe++;
303 if (class->usage_mask & LOCKF_ENABLED_IRQS) 303 if (class->usage_mask & LOCKF_ENABLED_IRQ)
304 nr_irq_unsafe++; 304 nr_irq_unsafe++;
305 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) 305 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
306 nr_softirq_safe++; 306 nr_softirq_safe++;
307 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) 307 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
308 nr_softirq_unsafe++; 308 nr_softirq_unsafe++;
309 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) 309 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
310 nr_hardirq_safe++; 310 nr_hardirq_safe++;
311 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) 311 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
312 nr_hardirq_unsafe++; 312 nr_hardirq_unsafe++;
313 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) 313 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
314 nr_irq_read_safe++; 314 nr_irq_read_safe++;
315 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ) 315 if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
316 nr_irq_read_unsafe++; 316 nr_irq_read_unsafe++;
317 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) 317 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
318 nr_softirq_read_safe++; 318 nr_softirq_read_safe++;
319 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 319 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
320 nr_softirq_read_unsafe++; 320 nr_softirq_read_unsafe++;
321 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) 321 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
322 nr_hardirq_read_safe++; 322 nr_hardirq_read_safe++;
323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
324 nr_hardirq_read_unsafe++; 324 nr_hardirq_read_unsafe++;
325 325
326#ifdef CONFIG_PROVE_LOCKING 326#ifdef CONFIG_PROVE_LOCKING
@@ -601,6 +601,10 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
601static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
602{ 602{
603 seq_printf(m, "lock_stat version 0.3\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
604
605 if (unlikely(!debug_locks))
606 seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
607
604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 608 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 609 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
606 "%14s %14s\n", 610 "%14s %14s\n",
diff --git a/kernel/lockdep_states.h b/kernel/lockdep_states.h
new file mode 100644
index 000000000000..995b0cc2b84c
--- /dev/null
+++ b/kernel/lockdep_states.h
@@ -0,0 +1,9 @@
1/*
2 * Lockdep states,
3 *
4 * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
5 * you add one, or come up with a nice dynamic solution.
6 */
7LOCKDEP_STATE(HARDIRQ)
8LOCKDEP_STATE(SOFTIRQ)
9LOCKDEP_STATE(RECLAIM_FS)
diff --git a/kernel/module.c b/kernel/module.c
index f0e04d6b67d8..8b742f2b3845 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2769,7 +2769,7 @@ int is_module_address(unsigned long addr)
2769 2769
2770 2770
2771/* Is this a valid kernel address? */ 2771/* Is this a valid kernel address? */
2772__notrace_funcgraph struct module *__module_text_address(unsigned long addr) 2772struct module *__module_text_address(unsigned long addr)
2773{ 2773{
2774 struct module *mod; 2774 struct module *mod;
2775 2775
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160eb532..50d022e5a560 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -26,11 +26,6 @@
26/* 26/*
27 * Must be called with lock->wait_lock held. 27 * Must be called with lock->wait_lock held.
28 */ 28 */
29void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
30{
31 lock->owner = new_owner;
32}
33
34void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) 29void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
35{ 30{
36 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 31 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
@@ -59,7 +54,6 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
59 54
60 /* Mark the current thread as blocked on the lock: */ 55 /* Mark the current thread as blocked on the lock: */
61 ti->task->blocked_on = waiter; 56 ti->task->blocked_on = waiter;
62 waiter->lock = lock;
63} 57}
64 58
65void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 59void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
@@ -82,7 +76,7 @@ void debug_mutex_unlock(struct mutex *lock)
82 DEBUG_LOCKS_WARN_ON(lock->magic != lock); 76 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 77 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
84 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 78 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
85 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 79 mutex_clear_owner(lock);
86} 80}
87 81
88void debug_mutex_init(struct mutex *lock, const char *name, 82void debug_mutex_init(struct mutex *lock, const char *name,
@@ -95,7 +89,6 @@ void debug_mutex_init(struct mutex *lock, const char *name,
95 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 89 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
96 lockdep_init_map(&lock->dep_map, name, key, 0); 90 lockdep_init_map(&lock->dep_map, name, key, 0);
97#endif 91#endif
98 lock->owner = NULL;
99 lock->magic = lock; 92 lock->magic = lock;
100} 93}
101 94
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index babfbdfc534b..6b2d735846a5 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -13,14 +13,6 @@
13/* 13/*
14 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
15 */ 15 */
16extern void
17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
18
19static inline void debug_mutex_clear_owner(struct mutex *lock)
20{
21 lock->owner = NULL;
22}
23
24extern void debug_mutex_lock_common(struct mutex *lock, 16extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter); 17 struct mutex_waiter *waiter);
26extern void debug_mutex_wake_waiter(struct mutex *lock, 18extern void debug_mutex_wake_waiter(struct mutex *lock,
@@ -35,6 +27,16 @@ extern void debug_mutex_unlock(struct mutex *lock);
35extern void debug_mutex_init(struct mutex *lock, const char *name, 27extern void debug_mutex_init(struct mutex *lock, const char *name,
36 struct lock_class_key *key); 28 struct lock_class_key *key);
37 29
30static inline void mutex_set_owner(struct mutex *lock)
31{
32 lock->owner = current_thread_info();
33}
34
35static inline void mutex_clear_owner(struct mutex *lock)
36{
37 lock->owner = NULL;
38}
39
38#define spin_lock_mutex(lock, flags) \ 40#define spin_lock_mutex(lock, flags) \
39 do { \ 41 do { \
40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 42 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4f45d4b658ef..5d79781394a3 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -10,6 +10,11 @@
10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and 10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
11 * David Howells for suggestions and improvements. 11 * David Howells for suggestions and improvements.
12 * 12 *
13 * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
14 * from the -rt tree, where it was originally implemented for rtmutexes
15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
16 * and Sven Dietrich.
17 *
13 * Also see Documentation/mutex-design.txt. 18 * Also see Documentation/mutex-design.txt.
14 */ 19 */
15#include <linux/mutex.h> 20#include <linux/mutex.h>
@@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
46 atomic_set(&lock->count, 1); 51 atomic_set(&lock->count, 1);
47 spin_lock_init(&lock->wait_lock); 52 spin_lock_init(&lock->wait_lock);
48 INIT_LIST_HEAD(&lock->wait_list); 53 INIT_LIST_HEAD(&lock->wait_list);
54 mutex_clear_owner(lock);
49 55
50 debug_mutex_init(lock, name, key); 56 debug_mutex_init(lock, name, key);
51} 57}
@@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mutex *lock)
91 * 'unlocked' into 'locked' state. 97 * 'unlocked' into 'locked' state.
92 */ 98 */
93 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); 99 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
100 mutex_set_owner(lock);
94} 101}
95 102
96EXPORT_SYMBOL(mutex_lock); 103EXPORT_SYMBOL(mutex_lock);
@@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *lock)
115 * The unlocking fastpath is the 0->1 transition from 'locked' 122 * The unlocking fastpath is the 0->1 transition from 'locked'
116 * into 'unlocked' state: 123 * into 'unlocked' state:
117 */ 124 */
125#ifndef CONFIG_DEBUG_MUTEXES
126 /*
127 * When debugging is enabled we must not clear the owner before time,
128 * the slow path will always be taken, and that clears the owner field
129 * after verifying that it was indeed current.
130 */
131 mutex_clear_owner(lock);
132#endif
118 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); 133 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
119} 134}
120 135
@@ -129,21 +144,75 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
129{ 144{
130 struct task_struct *task = current; 145 struct task_struct *task = current;
131 struct mutex_waiter waiter; 146 struct mutex_waiter waiter;
132 unsigned int old_val;
133 unsigned long flags; 147 unsigned long flags;
134 148
149 preempt_disable();
150 mutex_acquire(&lock->dep_map, subclass, 0, ip);
151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
152 /*
153 * Optimistic spinning.
154 *
155 * We try to spin for acquisition when we find that there are no
156 * pending waiters and the lock owner is currently running on a
157 * (different) CPU.
158 *
159 * The rationale is that if the lock owner is running, it is likely to
160 * release the lock soon.
161 *
162 * Since this needs the lock owner, and this mutex implementation
163 * doesn't track the owner atomically in the lock field, we need to
164 * track it non-atomically.
165 *
166 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
167 * to serialize everything.
168 */
169
170 for (;;) {
171 struct thread_info *owner;
172
173 /*
174 * If there's an owner, wait for it to either
175 * release the lock or go to sleep.
176 */
177 owner = ACCESS_ONCE(lock->owner);
178 if (owner && !mutex_spin_on_owner(lock, owner))
179 break;
180
181 if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
182 lock_acquired(&lock->dep_map, ip);
183 mutex_set_owner(lock);
184 preempt_enable();
185 return 0;
186 }
187
188 /*
189 * When there's no owner, we might have preempted between the
190 * owner acquiring the lock and setting the owner field. If
191 * we're an RT task that will live-lock because we won't let
192 * the owner complete.
193 */
194 if (!owner && (need_resched() || rt_task(task)))
195 break;
196
197 /*
198 * The cpu_relax() call is a compiler barrier which forces
199 * everything in this loop to be re-loaded. We don't need
200 * memory barriers as we'll eventually observe the right
201 * values at the cost of a few extra spins.
202 */
203 cpu_relax();
204 }
205#endif
135 spin_lock_mutex(&lock->wait_lock, flags); 206 spin_lock_mutex(&lock->wait_lock, flags);
136 207
137 debug_mutex_lock_common(lock, &waiter); 208 debug_mutex_lock_common(lock, &waiter);
138 mutex_acquire(&lock->dep_map, subclass, 0, ip);
139 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); 209 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
140 210
141 /* add waiting tasks to the end of the waitqueue (FIFO): */ 211 /* add waiting tasks to the end of the waitqueue (FIFO): */
142 list_add_tail(&waiter.list, &lock->wait_list); 212 list_add_tail(&waiter.list, &lock->wait_list);
143 waiter.task = task; 213 waiter.task = task;
144 214
145 old_val = atomic_xchg(&lock->count, -1); 215 if (atomic_xchg(&lock->count, -1) == 1)
146 if (old_val == 1)
147 goto done; 216 goto done;
148 217
149 lock_contended(&lock->dep_map, ip); 218 lock_contended(&lock->dep_map, ip);
@@ -158,8 +227,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
158 * that when we release the lock, we properly wake up the 227 * that when we release the lock, we properly wake up the
159 * other waiters: 228 * other waiters:
160 */ 229 */
161 old_val = atomic_xchg(&lock->count, -1); 230 if (atomic_xchg(&lock->count, -1) == 1)
162 if (old_val == 1)
163 break; 231 break;
164 232
165 /* 233 /*
@@ -173,21 +241,22 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
173 spin_unlock_mutex(&lock->wait_lock, flags); 241 spin_unlock_mutex(&lock->wait_lock, flags);
174 242
175 debug_mutex_free_waiter(&waiter); 243 debug_mutex_free_waiter(&waiter);
244 preempt_enable();
176 return -EINTR; 245 return -EINTR;
177 } 246 }
178 __set_task_state(task, state); 247 __set_task_state(task, state);
179 248
180 /* didnt get the lock, go to sleep: */ 249 /* didnt get the lock, go to sleep: */
181 spin_unlock_mutex(&lock->wait_lock, flags); 250 spin_unlock_mutex(&lock->wait_lock, flags);
182 schedule(); 251 __schedule();
183 spin_lock_mutex(&lock->wait_lock, flags); 252 spin_lock_mutex(&lock->wait_lock, flags);
184 } 253 }
185 254
186done: 255done:
187 lock_acquired(&lock->dep_map, ip); 256 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 257 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 258 mutex_remove_waiter(lock, &waiter, current_thread_info());
190 debug_mutex_set_owner(lock, task_thread_info(task)); 259 mutex_set_owner(lock);
191 260
192 /* set it to 0 if there are no waiters left: */ 261 /* set it to 0 if there are no waiters left: */
193 if (likely(list_empty(&lock->wait_list))) 262 if (likely(list_empty(&lock->wait_list)))
@@ -196,6 +265,7 @@ done:
196 spin_unlock_mutex(&lock->wait_lock, flags); 265 spin_unlock_mutex(&lock->wait_lock, flags);
197 266
198 debug_mutex_free_waiter(&waiter); 267 debug_mutex_free_waiter(&waiter);
268 preempt_enable();
199 269
200 return 0; 270 return 0;
201} 271}
@@ -222,7 +292,8 @@ int __sched
222mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) 292mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
223{ 293{
224 might_sleep(); 294 might_sleep();
225 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); 295 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
296 subclass, _RET_IP_);
226} 297}
227 298
228EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 299EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -260,8 +331,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
260 wake_up_process(waiter->task); 331 wake_up_process(waiter->task);
261 } 332 }
262 333
263 debug_mutex_clear_owner(lock);
264
265 spin_unlock_mutex(&lock->wait_lock, flags); 334 spin_unlock_mutex(&lock->wait_lock, flags);
266} 335}
267 336
@@ -298,18 +367,30 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count);
298 */ 367 */
299int __sched mutex_lock_interruptible(struct mutex *lock) 368int __sched mutex_lock_interruptible(struct mutex *lock)
300{ 369{
370 int ret;
371
301 might_sleep(); 372 might_sleep();
302 return __mutex_fastpath_lock_retval 373 ret = __mutex_fastpath_lock_retval
303 (&lock->count, __mutex_lock_interruptible_slowpath); 374 (&lock->count, __mutex_lock_interruptible_slowpath);
375 if (!ret)
376 mutex_set_owner(lock);
377
378 return ret;
304} 379}
305 380
306EXPORT_SYMBOL(mutex_lock_interruptible); 381EXPORT_SYMBOL(mutex_lock_interruptible);
307 382
308int __sched mutex_lock_killable(struct mutex *lock) 383int __sched mutex_lock_killable(struct mutex *lock)
309{ 384{
385 int ret;
386
310 might_sleep(); 387 might_sleep();
311 return __mutex_fastpath_lock_retval 388 ret = __mutex_fastpath_lock_retval
312 (&lock->count, __mutex_lock_killable_slowpath); 389 (&lock->count, __mutex_lock_killable_slowpath);
390 if (!ret)
391 mutex_set_owner(lock);
392
393 return ret;
313} 394}
314EXPORT_SYMBOL(mutex_lock_killable); 395EXPORT_SYMBOL(mutex_lock_killable);
315 396
@@ -352,9 +433,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
352 433
353 prev = atomic_xchg(&lock->count, -1); 434 prev = atomic_xchg(&lock->count, -1);
354 if (likely(prev == 1)) { 435 if (likely(prev == 1)) {
355 debug_mutex_set_owner(lock, current_thread_info()); 436 mutex_set_owner(lock);
356 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 437 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
357 } 438 }
439
358 /* Set it back to 0 if there are no waiters: */ 440 /* Set it back to 0 if there are no waiters: */
359 if (likely(list_empty(&lock->wait_list))) 441 if (likely(list_empty(&lock->wait_list)))
360 atomic_set(&lock->count, 0); 442 atomic_set(&lock->count, 0);
@@ -380,8 +462,13 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
380 */ 462 */
381int __sched mutex_trylock(struct mutex *lock) 463int __sched mutex_trylock(struct mutex *lock)
382{ 464{
383 return __mutex_fastpath_trylock(&lock->count, 465 int ret;
384 __mutex_trylock_slowpath); 466
467 ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
468 if (ret)
469 mutex_set_owner(lock);
470
471 return ret;
385} 472}
386 473
387EXPORT_SYMBOL(mutex_trylock); 474EXPORT_SYMBOL(mutex_trylock);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index a075dafbb290..67578ca48f94 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,8 +16,26 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#ifdef CONFIG_SMP
20#define debug_mutex_clear_owner(lock) do { } while (0) 20static inline void mutex_set_owner(struct mutex *lock)
21{
22 lock->owner = current_thread_info();
23}
24
25static inline void mutex_clear_owner(struct mutex *lock)
26{
27 lock->owner = NULL;
28}
29#else
30static inline void mutex_set_owner(struct mutex *lock)
31{
32}
33
34static inline void mutex_clear_owner(struct mutex *lock)
35{
36}
37#endif
38
21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 39#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
22#define debug_mutex_free_waiter(waiter) do { } while (0) 40#define debug_mutex_free_waiter(waiter) do { } while (0)
23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) 41#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
diff --git a/kernel/relay.c b/kernel/relay.c
index 9d79b7854fa6..edc0ba6d8160 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -677,9 +677,7 @@ int relay_late_setup_files(struct rchan *chan,
677 */ 677 */
678 for_each_online_cpu(i) { 678 for_each_online_cpu(i) {
679 if (unlikely(!chan->buf[i])) { 679 if (unlikely(!chan->buf[i])) {
680 printk(KERN_ERR "relay_late_setup_files: CPU %u " 680 WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
681 "has no buffer, it must have!\n", i);
682 BUG();
683 err = -EINVAL; 681 err = -EINVAL;
684 break; 682 break;
685 } 683 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 0a76d0b6f215..7299083e69e7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4404,10 +4404,7 @@ void scheduler_tick(void)
4404#endif 4404#endif
4405} 4405}
4406 4406
4407#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4407unsigned long get_parent_ip(unsigned long addr)
4408 defined(CONFIG_PREEMPT_TRACER))
4409
4410static inline unsigned long get_parent_ip(unsigned long addr)
4411{ 4408{
4412 if (in_lock_functions(addr)) { 4409 if (in_lock_functions(addr)) {
4413 addr = CALLER_ADDR2; 4410 addr = CALLER_ADDR2;
@@ -4417,6 +4414,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
4417 return addr; 4414 return addr;
4418} 4415}
4419 4416
4417#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4418 defined(CONFIG_PREEMPT_TRACER))
4419
4420void __kprobes add_preempt_count(int val) 4420void __kprobes add_preempt_count(int val)
4421{ 4421{
4422#ifdef CONFIG_DEBUG_PREEMPT 4422#ifdef CONFIG_DEBUG_PREEMPT
@@ -4543,15 +4543,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
4543/* 4543/*
4544 * schedule() is the main scheduler function. 4544 * schedule() is the main scheduler function.
4545 */ 4545 */
4546asmlinkage void __sched schedule(void) 4546asmlinkage void __sched __schedule(void)
4547{ 4547{
4548 struct task_struct *prev, *next; 4548 struct task_struct *prev, *next;
4549 unsigned long *switch_count; 4549 unsigned long *switch_count;
4550 struct rq *rq; 4550 struct rq *rq;
4551 int cpu; 4551 int cpu;
4552 4552
4553need_resched:
4554 preempt_disable();
4555 cpu = smp_processor_id(); 4553 cpu = smp_processor_id();
4556 rq = cpu_rq(cpu); 4554 rq = cpu_rq(cpu);
4557 rcu_qsctr_inc(cpu); 4555 rcu_qsctr_inc(cpu);
@@ -4608,13 +4606,80 @@ need_resched_nonpreemptible:
4608 4606
4609 if (unlikely(reacquire_kernel_lock(current) < 0)) 4607 if (unlikely(reacquire_kernel_lock(current) < 0))
4610 goto need_resched_nonpreemptible; 4608 goto need_resched_nonpreemptible;
4609}
4611 4610
4611asmlinkage void __sched schedule(void)
4612{
4613need_resched:
4614 preempt_disable();
4615 __schedule();
4612 preempt_enable_no_resched(); 4616 preempt_enable_no_resched();
4613 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 4617 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
4614 goto need_resched; 4618 goto need_resched;
4615} 4619}
4616EXPORT_SYMBOL(schedule); 4620EXPORT_SYMBOL(schedule);
4617 4621
4622#ifdef CONFIG_SMP
4623/*
4624 * Look out! "owner" is an entirely speculative pointer
4625 * access and not reliable.
4626 */
4627int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4628{
4629 unsigned int cpu;
4630 struct rq *rq;
4631
4632 if (!sched_feat(OWNER_SPIN))
4633 return 0;
4634
4635#ifdef CONFIG_DEBUG_PAGEALLOC
4636 /*
4637 * Need to access the cpu field knowing that
4638 * DEBUG_PAGEALLOC could have unmapped it if
4639 * the mutex owner just released it and exited.
4640 */
4641 if (probe_kernel_address(&owner->cpu, cpu))
4642 goto out;
4643#else
4644 cpu = owner->cpu;
4645#endif
4646
4647 /*
4648 * Even if the access succeeded (likely case),
4649 * the cpu field may no longer be valid.
4650 */
4651 if (cpu >= nr_cpumask_bits)
4652 goto out;
4653
4654 /*
4655 * We need to validate that we can do a
4656 * get_cpu() and that we have the percpu area.
4657 */
4658 if (!cpu_online(cpu))
4659 goto out;
4660
4661 rq = cpu_rq(cpu);
4662
4663 for (;;) {
4664 /*
4665 * Owner changed, break to re-assess state.
4666 */
4667 if (lock->owner != owner)
4668 break;
4669
4670 /*
4671 * Is that owner really running on that cpu?
4672 */
4673 if (task_thread_info(rq->curr) != owner || need_resched())
4674 return 0;
4675
4676 cpu_relax();
4677 }
4678out:
4679 return 1;
4680}
4681#endif
4682
4618#ifdef CONFIG_PREEMPT 4683#ifdef CONFIG_PREEMPT
4619/* 4684/*
4620 * this is the entry point to schedule() from in-kernel preemption 4685 * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..7ec82c1c61c5 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,12 @@
24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat 24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
25 * consistent between cpus (never more than 2 jiffies difference). 25 * consistent between cpus (never more than 2 jiffies difference).
26 */ 26 */
27#include <linux/sched.h>
28#include <linux/percpu.h>
29#include <linux/spinlock.h> 27#include <linux/spinlock.h>
30#include <linux/ktime.h> 28#include <linux/hardirq.h>
31#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/percpu.h>
31#include <linux/ktime.h>
32#include <linux/sched.h>
32 33
33/* 34/*
34 * Scheduler clock - returns current time in nanosec units. 35 * Scheduler clock - returns current time in nanosec units.
@@ -43,6 +44,10 @@ unsigned long long __attribute__((weak)) sched_clock(void)
43static __read_mostly int sched_clock_running; 44static __read_mostly int sched_clock_running;
44 45
45#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 46#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
47__read_mostly int sched_clock_stable;
48#else
49static const int sched_clock_stable = 1;
50#endif
46 51
47struct sched_clock_data { 52struct sched_clock_data {
48 /* 53 /*
@@ -87,7 +92,7 @@ void sched_clock_init(void)
87} 92}
88 93
89/* 94/*
90 * min,max except they take wrapping into account 95 * min, max except they take wrapping into account
91 */ 96 */
92 97
93static inline u64 wrap_min(u64 x, u64 y) 98static inline u64 wrap_min(u64 x, u64 y)
@@ -116,10 +121,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
116 if (unlikely(delta < 0)) 121 if (unlikely(delta < 0))
117 delta = 0; 122 delta = 0;
118 123
124 if (unlikely(!sched_clock_running))
125 return 0ull;
126
119 /* 127 /*
120 * scd->clock = clamp(scd->tick_gtod + delta, 128 * scd->clock = clamp(scd->tick_gtod + delta,
121 * max(scd->tick_gtod, scd->clock), 129 * max(scd->tick_gtod, scd->clock),
122 * scd->tick_gtod + TICK_NSEC); 130 * scd->tick_gtod + TICK_NSEC);
123 */ 131 */
124 132
125 clock = scd->tick_gtod + delta; 133 clock = scd->tick_gtod + delta;
@@ -148,8 +156,20 @@ static void lock_double_clock(struct sched_clock_data *data1,
148 156
149u64 sched_clock_cpu(int cpu) 157u64 sched_clock_cpu(int cpu)
150{ 158{
151 struct sched_clock_data *scd = cpu_sdc(cpu);
152 u64 now, clock, this_clock, remote_clock; 159 u64 now, clock, this_clock, remote_clock;
160 struct sched_clock_data *scd;
161
162 if (sched_clock_stable)
163 return sched_clock();
164
165 scd = cpu_sdc(cpu);
166
167 /*
168 * Normally this is not called in NMI context - but if it is,
169 * trying to do any locking here is totally lethal.
170 */
171 if (unlikely(in_nmi()))
172 return scd->clock;
153 173
154 if (unlikely(!sched_clock_running)) 174 if (unlikely(!sched_clock_running))
155 return 0ull; 175 return 0ull;
@@ -193,6 +213,8 @@ u64 sched_clock_cpu(int cpu)
193 return clock; 213 return clock;
194} 214}
195 215
216#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
217
196void sched_clock_tick(void) 218void sched_clock_tick(void)
197{ 219{
198 struct sched_clock_data *scd = this_scd(); 220 struct sched_clock_data *scd = this_scd();
@@ -235,22 +257,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
235} 257}
236EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 258EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
237 259
238#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 260#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
239
240void sched_clock_init(void)
241{
242 sched_clock_running = 1;
243}
244
245u64 sched_clock_cpu(int cpu)
246{
247 if (unlikely(!sched_clock_running))
248 return 0;
249
250 return sched_clock();
251}
252
253#endif
254 261
255unsigned long long cpu_clock(int cpu) 262unsigned long long cpu_clock(int cpu)
256{ 263{
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index da5d93b5d2c6..07bc02e99ab1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -13,3 +13,4 @@ SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1) 15SCHED_FEAT(LAST_BUDDY, 1)
16SCHED_FEAT(OWNER_SPIN, 1)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 57d3f67f6f38..7571bcb71be4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,6 +21,7 @@
21#include <linux/freezer.h> 21#include <linux/freezer.h>
22#include <linux/kthread.h> 22#include <linux/kthread.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/ftrace.h>
24#include <linux/smp.h> 25#include <linux/smp.h>
25#include <linux/tick.h> 26#include <linux/tick.h>
26 27
@@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip)
79 WARN_ON_ONCE(in_irq()); 80 WARN_ON_ONCE(in_irq());
80 81
81 raw_local_irq_save(flags); 82 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET); 83 /*
84 * The preempt tracer hooks into add_preempt_count and will break
85 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
86 * is set and before current->softirq_enabled is cleared.
87 * We must manually increment preempt_count here and manually
88 * call the trace_preempt_off later.
89 */
90 preempt_count() += SOFTIRQ_OFFSET;
83 /* 91 /*
84 * Were softirqs turned off above: 92 * Were softirqs turned off above:
85 */ 93 */
86 if (softirq_count() == SOFTIRQ_OFFSET) 94 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip); 95 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags); 96 raw_local_irq_restore(flags);
97
98 if (preempt_count() == SOFTIRQ_OFFSET)
99 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
89} 100}
90#else /* !CONFIG_TRACE_IRQFLAGS */ 101#else /* !CONFIG_TRACE_IRQFLAGS */
91static inline void __local_bh_disable(unsigned long ip) 102static inline void __local_bh_disable(unsigned long ip)
@@ -180,7 +191,7 @@ asmlinkage void __do_softirq(void)
180 account_system_vtime(current); 191 account_system_vtime(current);
181 192
182 __local_bh_disable((unsigned long)__builtin_return_address(0)); 193 __local_bh_disable((unsigned long)__builtin_return_address(0));
183 trace_softirq_enter(); 194 lockdep_softirq_enter();
184 195
185 cpu = smp_processor_id(); 196 cpu = smp_processor_id();
186restart: 197restart:
@@ -220,7 +231,7 @@ restart:
220 if (pending) 231 if (pending)
221 wakeup_softirqd(); 232 wakeup_softirqd();
222 233
223 trace_softirq_exit(); 234 lockdep_softirq_exit();
224 235
225 account_system_vtime(current); 236 account_system_vtime(current);
226 _local_bh_enable(); 237 _local_bh_enable();
diff --git a/kernel/timer.c b/kernel/timer.c
index 13dd64fe143d..ef1c385bc572 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -491,14 +491,18 @@ static inline void debug_timer_free(struct timer_list *timer)
491 debug_object_free(timer, &timer_debug_descr); 491 debug_object_free(timer, &timer_debug_descr);
492} 492}
493 493
494static void __init_timer(struct timer_list *timer); 494static void __init_timer(struct timer_list *timer,
495 const char *name,
496 struct lock_class_key *key);
495 497
496void init_timer_on_stack(struct timer_list *timer) 498void init_timer_on_stack_key(struct timer_list *timer,
499 const char *name,
500 struct lock_class_key *key)
497{ 501{
498 debug_object_init_on_stack(timer, &timer_debug_descr); 502 debug_object_init_on_stack(timer, &timer_debug_descr);
499 __init_timer(timer); 503 __init_timer(timer, name, key);
500} 504}
501EXPORT_SYMBOL_GPL(init_timer_on_stack); 505EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
502 506
503void destroy_timer_on_stack(struct timer_list *timer) 507void destroy_timer_on_stack(struct timer_list *timer)
504{ 508{
@@ -512,7 +516,9 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
512static inline void debug_timer_deactivate(struct timer_list *timer) { } 516static inline void debug_timer_deactivate(struct timer_list *timer) { }
513#endif 517#endif
514 518
515static void __init_timer(struct timer_list *timer) 519static void __init_timer(struct timer_list *timer,
520 const char *name,
521 struct lock_class_key *key)
516{ 522{
517 timer->entry.next = NULL; 523 timer->entry.next = NULL;
518 timer->base = __raw_get_cpu_var(tvec_bases); 524 timer->base = __raw_get_cpu_var(tvec_bases);
@@ -521,6 +527,7 @@ static void __init_timer(struct timer_list *timer)
521 timer->start_pid = -1; 527 timer->start_pid = -1;
522 memset(timer->start_comm, 0, TASK_COMM_LEN); 528 memset(timer->start_comm, 0, TASK_COMM_LEN);
523#endif 529#endif
530 lockdep_init_map(&timer->lockdep_map, name, key, 0);
524} 531}
525 532
526/** 533/**
@@ -530,19 +537,23 @@ static void __init_timer(struct timer_list *timer)
530 * init_timer() must be done to a timer prior calling *any* of the 537 * init_timer() must be done to a timer prior calling *any* of the
531 * other timer functions. 538 * other timer functions.
532 */ 539 */
533void init_timer(struct timer_list *timer) 540void init_timer_key(struct timer_list *timer,
541 const char *name,
542 struct lock_class_key *key)
534{ 543{
535 debug_timer_init(timer); 544 debug_timer_init(timer);
536 __init_timer(timer); 545 __init_timer(timer, name, key);
537} 546}
538EXPORT_SYMBOL(init_timer); 547EXPORT_SYMBOL(init_timer_key);
539 548
540void init_timer_deferrable(struct timer_list *timer) 549void init_timer_deferrable_key(struct timer_list *timer,
550 const char *name,
551 struct lock_class_key *key)
541{ 552{
542 init_timer(timer); 553 init_timer_key(timer, name, key);
543 timer_set_deferrable(timer); 554 timer_set_deferrable(timer);
544} 555}
545EXPORT_SYMBOL(init_timer_deferrable); 556EXPORT_SYMBOL(init_timer_deferrable_key);
546 557
547static inline void detach_timer(struct timer_list *timer, 558static inline void detach_timer(struct timer_list *timer,
548 int clear_pending) 559 int clear_pending)
@@ -789,6 +800,15 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
789 */ 800 */
790int del_timer_sync(struct timer_list *timer) 801int del_timer_sync(struct timer_list *timer)
791{ 802{
803#ifdef CONFIG_LOCKDEP
804 unsigned long flags;
805
806 local_irq_save(flags);
807 lock_map_acquire(&timer->lockdep_map);
808 lock_map_release(&timer->lockdep_map);
809 local_irq_restore(flags);
810#endif
811
792 for (;;) { 812 for (;;) {
793 int ret = try_to_del_timer_sync(timer); 813 int ret = try_to_del_timer_sync(timer);
794 if (ret >= 0) 814 if (ret >= 0)
@@ -861,10 +881,36 @@ static inline void __run_timers(struct tvec_base *base)
861 881
862 set_running_timer(base, timer); 882 set_running_timer(base, timer);
863 detach_timer(timer, 1); 883 detach_timer(timer, 1);
884
864 spin_unlock_irq(&base->lock); 885 spin_unlock_irq(&base->lock);
865 { 886 {
866 int preempt_count = preempt_count(); 887 int preempt_count = preempt_count();
888
889#ifdef CONFIG_LOCKDEP
890 /*
891 * It is permissible to free the timer from
892 * inside the function that is called from
893 * it, this we need to take into account for
894 * lockdep too. To avoid bogus "held lock
895 * freed" warnings as well as problems when
896 * looking into timer->lockdep_map, make a
897 * copy and use that here.
898 */
899 struct lockdep_map lockdep_map =
900 timer->lockdep_map;
901#endif
902 /*
903 * Couple the lock chain with the lock chain at
904 * del_timer_sync() by acquiring the lock_map
905 * around the fn() call here and in
906 * del_timer_sync().
907 */
908 lock_map_acquire(&lockdep_map);
909
867 fn(data); 910 fn(data);
911
912 lock_map_release(&lockdep_map);
913
868 if (preempt_count != preempt_count()) { 914 if (preempt_count != preempt_count()) {
869 printk(KERN_ERR "huh, entered %p " 915 printk(KERN_ERR "huh, entered %p "
870 "with preempt_count %08x, exited" 916 "with preempt_count %08x, exited"
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 34e707e5ab87..95a0ad191f19 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
9config NOP_TRACER 9config NOP_TRACER
10 bool 10 bool
11 11
12config HAVE_FTRACE_NMI_ENTER
13 bool
14
12config HAVE_FUNCTION_TRACER 15config HAVE_FUNCTION_TRACER
13 bool 16 bool
14 17
@@ -31,12 +34,20 @@ config HAVE_FTRACE_MCOUNT_RECORD
31config HAVE_HW_BRANCH_TRACER 34config HAVE_HW_BRANCH_TRACER
32 bool 35 bool
33 36
37config HAVE_FTRACE_SYSCALLS
38 bool
39
34config TRACER_MAX_TRACE 40config TRACER_MAX_TRACE
35 bool 41 bool
36 42
37config RING_BUFFER 43config RING_BUFFER
38 bool 44 bool
39 45
46config FTRACE_NMI_ENTER
47 bool
48 depends on HAVE_FTRACE_NMI_ENTER
49 default y
50
40config TRACING 51config TRACING
41 bool 52 bool
42 select DEBUG_FS 53 select DEBUG_FS
@@ -44,13 +55,25 @@ config TRACING
44 select STACKTRACE if STACKTRACE_SUPPORT 55 select STACKTRACE if STACKTRACE_SUPPORT
45 select TRACEPOINTS 56 select TRACEPOINTS
46 select NOP_TRACER 57 select NOP_TRACER
58 select BINARY_PRINTF
59
60#
61# Minimum requirements an architecture has to meet for us to
62# be able to offer generic tracing facilities:
63#
64config TRACING_SUPPORT
65 bool
66 depends on TRACE_IRQFLAGS_SUPPORT
67 depends on STACKTRACE_SUPPORT
68 default y
69
70if TRACING_SUPPORT
47 71
48menu "Tracers" 72menu "Tracers"
49 73
50config FUNCTION_TRACER 74config FUNCTION_TRACER
51 bool "Kernel Function Tracer" 75 bool "Kernel Function Tracer"
52 depends on HAVE_FUNCTION_TRACER 76 depends on HAVE_FUNCTION_TRACER
53 depends on DEBUG_KERNEL
54 select FRAME_POINTER 77 select FRAME_POINTER
55 select KALLSYMS 78 select KALLSYMS
56 select TRACING 79 select TRACING
@@ -83,7 +106,6 @@ config IRQSOFF_TRACER
83 default n 106 default n
84 depends on TRACE_IRQFLAGS_SUPPORT 107 depends on TRACE_IRQFLAGS_SUPPORT
85 depends on GENERIC_TIME 108 depends on GENERIC_TIME
86 depends on DEBUG_KERNEL
87 select TRACE_IRQFLAGS 109 select TRACE_IRQFLAGS
88 select TRACING 110 select TRACING
89 select TRACER_MAX_TRACE 111 select TRACER_MAX_TRACE
@@ -106,7 +128,6 @@ config PREEMPT_TRACER
106 default n 128 default n
107 depends on GENERIC_TIME 129 depends on GENERIC_TIME
108 depends on PREEMPT 130 depends on PREEMPT
109 depends on DEBUG_KERNEL
110 select TRACING 131 select TRACING
111 select TRACER_MAX_TRACE 132 select TRACER_MAX_TRACE
112 help 133 help
@@ -127,13 +148,13 @@ config SYSPROF_TRACER
127 bool "Sysprof Tracer" 148 bool "Sysprof Tracer"
128 depends on X86 149 depends on X86
129 select TRACING 150 select TRACING
151 select CONTEXT_SWITCH_TRACER
130 help 152 help
131 This tracer provides the trace needed by the 'Sysprof' userspace 153 This tracer provides the trace needed by the 'Sysprof' userspace
132 tool. 154 tool.
133 155
134config SCHED_TRACER 156config SCHED_TRACER
135 bool "Scheduling Latency Tracer" 157 bool "Scheduling Latency Tracer"
136 depends on DEBUG_KERNEL
137 select TRACING 158 select TRACING
138 select CONTEXT_SWITCH_TRACER 159 select CONTEXT_SWITCH_TRACER
139 select TRACER_MAX_TRACE 160 select TRACER_MAX_TRACE
@@ -143,16 +164,29 @@ config SCHED_TRACER
143 164
144config CONTEXT_SWITCH_TRACER 165config CONTEXT_SWITCH_TRACER
145 bool "Trace process context switches" 166 bool "Trace process context switches"
146 depends on DEBUG_KERNEL
147 select TRACING 167 select TRACING
148 select MARKERS 168 select MARKERS
149 help 169 help
150 This tracer gets called from the context switch and records 170 This tracer gets called from the context switch and records
151 all switching of tasks. 171 all switching of tasks.
152 172
173config EVENT_TRACER
174 bool "Trace various events in the kernel"
175 select TRACING
176 help
177 This tracer hooks to various trace points in the kernel
178 allowing the user to pick and choose which trace point they
179 want to trace.
180
181config FTRACE_SYSCALLS
182 bool "Trace syscalls"
183 depends on HAVE_FTRACE_SYSCALLS
184 select TRACING
185 help
186 Basic tracer to catch the syscall entry and exit events.
187
153config BOOT_TRACER 188config BOOT_TRACER
154 bool "Trace boot initcalls" 189 bool "Trace boot initcalls"
155 depends on DEBUG_KERNEL
156 select TRACING 190 select TRACING
157 select CONTEXT_SWITCH_TRACER 191 select CONTEXT_SWITCH_TRACER
158 help 192 help
@@ -165,13 +199,11 @@ config BOOT_TRACER
165 representation of the delays during initcalls - but the raw 199 representation of the delays during initcalls - but the raw
166 /debug/tracing/trace text output is readable too. 200 /debug/tracing/trace text output is readable too.
167 201
168 ( Note that tracing self tests can't be enabled if this tracer is 202 You must pass in ftrace=initcall to the kernel command line
169 selected, because the self-tests are an initcall as well and that 203 to enable this on bootup.
170 would invalidate the boot trace. )
171 204
172config TRACE_BRANCH_PROFILING 205config TRACE_BRANCH_PROFILING
173 bool "Trace likely/unlikely profiler" 206 bool "Trace likely/unlikely profiler"
174 depends on DEBUG_KERNEL
175 select TRACING 207 select TRACING
176 help 208 help
177 This tracer profiles all the the likely and unlikely macros 209 This tracer profiles all the the likely and unlikely macros
@@ -224,7 +256,6 @@ config BRANCH_TRACER
224 256
225config POWER_TRACER 257config POWER_TRACER
226 bool "Trace power consumption behavior" 258 bool "Trace power consumption behavior"
227 depends on DEBUG_KERNEL
228 depends on X86 259 depends on X86
229 select TRACING 260 select TRACING
230 help 261 help
@@ -236,7 +267,6 @@ config POWER_TRACER
236config STACK_TRACER 267config STACK_TRACER
237 bool "Trace max stack" 268 bool "Trace max stack"
238 depends on HAVE_FUNCTION_TRACER 269 depends on HAVE_FUNCTION_TRACER
239 depends on DEBUG_KERNEL
240 select FUNCTION_TRACER 270 select FUNCTION_TRACER
241 select STACKTRACE 271 select STACKTRACE
242 select KALLSYMS 272 select KALLSYMS
@@ -266,11 +296,66 @@ config HW_BRANCH_TRACER
266 This tracer records all branches on the system in a circular 296 This tracer records all branches on the system in a circular
267 buffer giving access to the last N branches for each cpu. 297 buffer giving access to the last N branches for each cpu.
268 298
299config KMEMTRACE
300 bool "Trace SLAB allocations"
301 select TRACING
302 help
303 kmemtrace provides tracing for slab allocator functions, such as
304 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
305 data is then fed to the userspace application in order to analyse
306 allocation hotspots, internal fragmentation and so on, making it
307 possible to see how well an allocator performs, as well as debug
308 and profile kernel code.
309
310 This requires an userspace application to use. See
311 Documentation/vm/kmemtrace.txt for more information.
312
313 Saying Y will make the kernel somewhat larger and slower. However,
314 if you disable kmemtrace at run-time or boot-time, the performance
315 impact is minimal (depending on the arch the kernel is built for).
316
317 If unsure, say N.
318
319config WORKQUEUE_TRACER
320 bool "Trace workqueues"
321 select TRACING
322 help
323 The workqueue tracer provides some statistical informations
324 about each cpu workqueue thread such as the number of the
325 works inserted and executed since their creation. It can help
326 to evaluate the amount of work each of them have to perform.
327 For example it can help a developer to decide whether he should
328 choose a per cpu workqueue instead of a singlethreaded one.
329
330config BLK_DEV_IO_TRACE
331 bool "Support for tracing block io actions"
332 depends on SYSFS
333 depends on BLOCK
334 select RELAY
335 select DEBUG_FS
336 select TRACEPOINTS
337 select TRACING
338 select STACKTRACE
339 help
340 Say Y here if you want to be able to trace the block layer actions
341 on a given queue. Tracing allows you to see any traffic happening
342 on a block device queue. For more information (and the userspace
343 support tools needed), fetch the blktrace tools from:
344
345 git://git.kernel.dk/blktrace.git
346
347 Tracing also is possible using the ftrace interface, e.g.:
348
349 echo 1 > /sys/block/sda/sda1/trace/enable
350 echo blk > /sys/kernel/debug/tracing/current_tracer
351 cat /sys/kernel/debug/tracing/trace_pipe
352
353 If unsure, say N.
354
269config DYNAMIC_FTRACE 355config DYNAMIC_FTRACE
270 bool "enable/disable ftrace tracepoints dynamically" 356 bool "enable/disable ftrace tracepoints dynamically"
271 depends on FUNCTION_TRACER 357 depends on FUNCTION_TRACER
272 depends on HAVE_DYNAMIC_FTRACE 358 depends on HAVE_DYNAMIC_FTRACE
273 depends on DEBUG_KERNEL
274 default y 359 default y
275 help 360 help
276 This option will modify all the calls to ftrace dynamically 361 This option will modify all the calls to ftrace dynamically
@@ -296,7 +381,7 @@ config FTRACE_SELFTEST
296 381
297config FTRACE_STARTUP_TEST 382config FTRACE_STARTUP_TEST
298 bool "Perform a startup test on ftrace" 383 bool "Perform a startup test on ftrace"
299 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER 384 depends on TRACING
300 select FTRACE_SELFTEST 385 select FTRACE_SELFTEST
301 help 386 help
302 This option performs a series of startup tests on ftrace. On bootup 387 This option performs a series of startup tests on ftrace. On bootup
@@ -306,7 +391,7 @@ config FTRACE_STARTUP_TEST
306 391
307config MMIOTRACE 392config MMIOTRACE
308 bool "Memory mapped IO tracing" 393 bool "Memory mapped IO tracing"
309 depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI 394 depends on HAVE_MMIOTRACE_SUPPORT && PCI
310 select TRACING 395 select TRACING
311 help 396 help
312 Mmiotrace traces Memory Mapped I/O access and is meant for 397 Mmiotrace traces Memory Mapped I/O access and is meant for
@@ -328,3 +413,6 @@ config MMIOTRACE_TEST
328 Say N, unless you absolutely know what you are doing. 413 Say N, unless you absolutely know what you are doing.
329 414
330endmenu 415endmenu
416
417endif # TRACING_SUPPORT
418
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653f..c3feea01c3e0 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20 20
21obj-$(CONFIG_TRACING) += trace.o 21obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_clock.o
23obj-$(CONFIG_TRACING) += trace_output.o
24obj-$(CONFIG_TRACING) += trace_stat.o
25obj-$(CONFIG_TRACING) += trace_printk.o
22obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 26obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
23obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 27obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
24obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 28obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +37,12 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 37obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 38obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o 39obj-$(CONFIG_POWER_TRACER) += trace_power.o
40obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
41obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
42obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
43obj-$(CONFIG_EVENT_TRACER) += trace_events.o
44obj-$(CONFIG_EVENT_TRACER) += events.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
36 47
37libftrace-y := ftrace.o 48libftrace-y := ftrace.o
diff --git a/block/blktrace.c b/kernel/trace/blktrace.c
index 028120a0965a..1f32e4edf490 100644
--- a/block/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -24,10 +24,28 @@
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h> 26#include <trace/block.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28#include "trace_output.h"
28 29
29static unsigned int blktrace_seq __read_mostly = 1; 30static unsigned int blktrace_seq __read_mostly = 1;
30 31
32static struct trace_array *blk_tr;
33static int __read_mostly blk_tracer_enabled;
34
35/* Select an alternative, minimalistic output than the original one */
36#define TRACE_BLK_OPT_CLASSIC 0x1
37
38static struct tracer_opt blk_tracer_opts[] = {
39 /* Default disable the minimalistic output */
40 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
41 { }
42};
43
44static struct tracer_flags blk_tracer_flags = {
45 .val = 0,
46 .opts = blk_tracer_opts,
47};
48
31/* Global reference count of probes */ 49/* Global reference count of probes */
32static DEFINE_MUTEX(blk_probe_mutex); 50static DEFINE_MUTEX(blk_probe_mutex);
33static atomic_t blk_probes_ref = ATOMIC_INIT(0); 51static atomic_t blk_probes_ref = ATOMIC_INIT(0);
@@ -43,6 +61,9 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
43{ 61{
44 struct blk_io_trace *t; 62 struct blk_io_trace *t;
45 63
64 if (!bt->rchan)
65 return;
66
46 t = relay_reserve(bt->rchan, sizeof(*t) + len); 67 t = relay_reserve(bt->rchan, sizeof(*t) + len);
47 if (t) { 68 if (t) {
48 const int cpu = smp_processor_id(); 69 const int cpu = smp_processor_id();
@@ -90,6 +111,16 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
90 unsigned long flags; 111 unsigned long flags;
91 char *buf; 112 char *buf;
92 113
114 if (blk_tr) {
115 va_start(args, fmt);
116 ftrace_vprintk(fmt, args);
117 va_end(args);
118 return;
119 }
120
121 if (!bt->msg_data)
122 return;
123
93 local_irq_save(flags); 124 local_irq_save(flags);
94 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 125 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
95 va_start(args, fmt); 126 va_start(args, fmt);
@@ -117,11 +148,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
117/* 148/*
118 * Data direction bit lookup 149 * Data direction bit lookup
119 */ 150 */
120static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; 151static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
152 BLK_TC_ACT(BLK_TC_WRITE) };
121 153
122/* The ilog2() calls fall out because they're constant */ 154/* The ilog2() calls fall out because they're constant */
123#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ 155#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
124 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) 156 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
125 157
126/* 158/*
127 * The worker for the various blk_add_trace*() types. Fills out a 159 * The worker for the various blk_add_trace*() types. Fills out a
@@ -131,13 +163,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
131 int rw, u32 what, int error, int pdu_len, void *pdu_data) 163 int rw, u32 what, int error, int pdu_len, void *pdu_data)
132{ 164{
133 struct task_struct *tsk = current; 165 struct task_struct *tsk = current;
166 struct ring_buffer_event *event = NULL;
134 struct blk_io_trace *t; 167 struct blk_io_trace *t;
135 unsigned long flags; 168 unsigned long flags = 0;
136 unsigned long *sequence; 169 unsigned long *sequence;
137 pid_t pid; 170 pid_t pid;
138 int cpu; 171 int cpu, pc = 0;
139 172
140 if (unlikely(bt->trace_state != Blktrace_running)) 173 if (unlikely(bt->trace_state != Blktrace_running ||
174 !blk_tracer_enabled))
141 return; 175 return;
142 176
143 what |= ddir_act[rw & WRITE]; 177 what |= ddir_act[rw & WRITE];
@@ -150,6 +184,20 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
150 pid = tsk->pid; 184 pid = tsk->pid;
151 if (unlikely(act_log_check(bt, what, sector, pid))) 185 if (unlikely(act_log_check(bt, what, sector, pid)))
152 return; 186 return;
187 cpu = raw_smp_processor_id();
188
189 if (blk_tr) {
190 tracing_record_cmdline(current);
191
192 pc = preempt_count();
193 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
194 sizeof(*t) + pdu_len,
195 0, pc);
196 if (!event)
197 return;
198 t = ring_buffer_event_data(event);
199 goto record_it;
200 }
153 201
154 /* 202 /*
155 * A word about the locking here - we disable interrupts to reserve 203 * A word about the locking here - we disable interrupts to reserve
@@ -163,23 +211,35 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
163 211
164 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); 212 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
165 if (t) { 213 if (t) {
166 cpu = smp_processor_id();
167 sequence = per_cpu_ptr(bt->sequence, cpu); 214 sequence = per_cpu_ptr(bt->sequence, cpu);
168 215
169 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 216 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
170 t->sequence = ++(*sequence); 217 t->sequence = ++(*sequence);
171 t->time = ktime_to_ns(ktime_get()); 218 t->time = ktime_to_ns(ktime_get());
219record_it:
220 /*
221 * These two are not needed in ftrace as they are in the
222 * generic trace_entry, filled by tracing_generic_entry_update,
223 * but for the trace_event->bin() synthesizer benefit we do it
224 * here too.
225 */
226 t->cpu = cpu;
227 t->pid = pid;
228
172 t->sector = sector; 229 t->sector = sector;
173 t->bytes = bytes; 230 t->bytes = bytes;
174 t->action = what; 231 t->action = what;
175 t->pid = pid;
176 t->device = bt->dev; 232 t->device = bt->dev;
177 t->cpu = cpu;
178 t->error = error; 233 t->error = error;
179 t->pdu_len = pdu_len; 234 t->pdu_len = pdu_len;
180 235
181 if (pdu_len) 236 if (pdu_len)
182 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 237 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
238
239 if (blk_tr) {
240 trace_buffer_unlock_commit(blk_tr, event, 0, pc);
241 return;
242 }
183 } 243 }
184 244
185 local_irq_restore(flags); 245 local_irq_restore(flags);
@@ -385,7 +445,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
385 atomic_set(&bt->dropped, 0); 445 atomic_set(&bt->dropped, 0);
386 446
387 ret = -EIO; 447 ret = -EIO;
388 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); 448 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
449 &blk_dropped_fops);
389 if (!bt->dropped_file) 450 if (!bt->dropped_file)
390 goto err; 451 goto err;
391 452
@@ -467,10 +528,10 @@ EXPORT_SYMBOL_GPL(blk_trace_setup);
467 528
468int blk_trace_startstop(struct request_queue *q, int start) 529int blk_trace_startstop(struct request_queue *q, int start)
469{ 530{
470 struct blk_trace *bt;
471 int ret; 531 int ret;
532 struct blk_trace *bt = q->blk_trace;
472 533
473 if ((bt = q->blk_trace) == NULL) 534 if (bt == NULL)
474 return -EINVAL; 535 return -EINVAL;
475 536
476 /* 537 /*
@@ -503,7 +564,7 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop);
503/** 564/**
504 * blk_trace_ioctl: - handle the ioctls associated with tracing 565 * blk_trace_ioctl: - handle the ioctls associated with tracing
505 * @bdev: the block device 566 * @bdev: the block device
506 * @cmd: the ioctl cmd 567 * @cmd: the ioctl cmd
507 * @arg: the argument data, if any 568 * @arg: the argument data, if any
508 * 569 *
509 **/ 570 **/
@@ -606,12 +667,14 @@ static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
606 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 667 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
607} 668}
608 669
609static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) 670static void blk_add_trace_rq_requeue(struct request_queue *q,
671 struct request *rq)
610{ 672{
611 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 673 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
612} 674}
613 675
614static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) 676static void blk_add_trace_rq_complete(struct request_queue *q,
677 struct request *rq)
615{ 678{
616 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 679 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
617} 680}
@@ -648,12 +711,14 @@ static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
648 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 711 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
649} 712}
650 713
651static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) 714static void blk_add_trace_bio_backmerge(struct request_queue *q,
715 struct bio *bio)
652{ 716{
653 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 717 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
654} 718}
655 719
656static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) 720static void blk_add_trace_bio_frontmerge(struct request_queue *q,
721 struct bio *bio)
657{ 722{
658 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 723 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
659} 724}
@@ -663,7 +728,8 @@ static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
663 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 728 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
664} 729}
665 730
666static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) 731static void blk_add_trace_getrq(struct request_queue *q,
732 struct bio *bio, int rw)
667{ 733{
668 if (bio) 734 if (bio)
669 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 735 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
@@ -676,7 +742,8 @@ static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw
676} 742}
677 743
678 744
679static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) 745static void blk_add_trace_sleeprq(struct request_queue *q,
746 struct bio *bio, int rw)
680{ 747{
681 if (bio) 748 if (bio)
682 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 749 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
@@ -684,7 +751,8 @@ static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int
684 struct blk_trace *bt = q->blk_trace; 751 struct blk_trace *bt = q->blk_trace;
685 752
686 if (bt) 753 if (bt)
687 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); 754 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
755 0, 0, NULL);
688 } 756 }
689} 757}
690 758
@@ -858,3 +926,612 @@ static void blk_unregister_tracepoints(void)
858 926
859 tracepoint_synchronize_unregister(); 927 tracepoint_synchronize_unregister();
860} 928}
929
930/*
931 * struct blk_io_tracer formatting routines
932 */
933
934static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
935{
936 int i = 0;
937
938 if (t->action & BLK_TC_DISCARD)
939 rwbs[i++] = 'D';
940 else if (t->action & BLK_TC_WRITE)
941 rwbs[i++] = 'W';
942 else if (t->bytes)
943 rwbs[i++] = 'R';
944 else
945 rwbs[i++] = 'N';
946
947 if (t->action & BLK_TC_AHEAD)
948 rwbs[i++] = 'A';
949 if (t->action & BLK_TC_BARRIER)
950 rwbs[i++] = 'B';
951 if (t->action & BLK_TC_SYNC)
952 rwbs[i++] = 'S';
953 if (t->action & BLK_TC_META)
954 rwbs[i++] = 'M';
955
956 rwbs[i] = '\0';
957}
958
959static inline
960const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
961{
962 return (const struct blk_io_trace *)ent;
963}
964
965static inline const void *pdu_start(const struct trace_entry *ent)
966{
967 return te_blk_io_trace(ent) + 1;
968}
969
970static inline u32 t_sec(const struct trace_entry *ent)
971{
972 return te_blk_io_trace(ent)->bytes >> 9;
973}
974
975static inline unsigned long long t_sector(const struct trace_entry *ent)
976{
977 return te_blk_io_trace(ent)->sector;
978}
979
980static inline __u16 t_error(const struct trace_entry *ent)
981{
982 return te_blk_io_trace(ent)->sector;
983}
984
985static __u64 get_pdu_int(const struct trace_entry *ent)
986{
987 const __u64 *val = pdu_start(ent);
988 return be64_to_cpu(*val);
989}
990
991static void get_pdu_remap(const struct trace_entry *ent,
992 struct blk_io_trace_remap *r)
993{
994 const struct blk_io_trace_remap *__r = pdu_start(ent);
995 __u64 sector = __r->sector;
996
997 r->device = be32_to_cpu(__r->device);
998 r->device_from = be32_to_cpu(__r->device_from);
999 r->sector = be64_to_cpu(sector);
1000}
1001
1002static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
1003{
1004 char rwbs[6];
1005 unsigned long long ts = ns2usecs(iter->ts);
1006 unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
1007 unsigned secs = (unsigned long)ts;
1008 const struct trace_entry *ent = iter->ent;
1009 const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
1010
1011 fill_rwbs(rwbs, t);
1012
1013 return trace_seq_printf(&iter->seq,
1014 "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
1015 MAJOR(t->device), MINOR(t->device), iter->cpu,
1016 secs, usec_rem, ent->pid, act, rwbs);
1017}
1018
1019static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
1020 const char *act)
1021{
1022 char rwbs[6];
1023 fill_rwbs(rwbs, t);
1024 return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
1025 MAJOR(t->device), MINOR(t->device), act, rwbs);
1026}
1027
1028static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1029{
1030 const char *cmd = trace_find_cmdline(ent->pid);
1031
1032 if (t_sec(ent))
1033 return trace_seq_printf(s, "%llu + %u [%s]\n",
1034 t_sector(ent), t_sec(ent), cmd);
1035 return trace_seq_printf(s, "[%s]\n", cmd);
1036}
1037
1038static int blk_log_with_error(struct trace_seq *s,
1039 const struct trace_entry *ent)
1040{
1041 if (t_sec(ent))
1042 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
1043 t_sec(ent), t_error(ent));
1044 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
1045}
1046
1047static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1048{
1049 struct blk_io_trace_remap r = { .device = 0, };
1050
1051 get_pdu_remap(ent, &r);
1052 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1053 t_sector(ent),
1054 t_sec(ent), MAJOR(r.device), MINOR(r.device),
1055 (unsigned long long)r.sector);
1056}
1057
1058static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1059{
1060 return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid));
1061}
1062
1063static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1064{
1065 return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid),
1066 get_pdu_int(ent));
1067}
1068
1069static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1070{
1071 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1072 get_pdu_int(ent), trace_find_cmdline(ent->pid));
1073}
1074
1075/*
1076 * struct tracer operations
1077 */
1078
1079static void blk_tracer_print_header(struct seq_file *m)
1080{
1081 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1082 return;
1083 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
1084 "# | | | | | |\n");
1085}
1086
1087static void blk_tracer_start(struct trace_array *tr)
1088{
1089 mutex_lock(&blk_probe_mutex);
1090 if (atomic_add_return(1, &blk_probes_ref) == 1)
1091 if (blk_register_tracepoints())
1092 atomic_dec(&blk_probes_ref);
1093 mutex_unlock(&blk_probe_mutex);
1094 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1095}
1096
1097static int blk_tracer_init(struct trace_array *tr)
1098{
1099 blk_tr = tr;
1100 blk_tracer_start(tr);
1101 mutex_lock(&blk_probe_mutex);
1102 blk_tracer_enabled++;
1103 mutex_unlock(&blk_probe_mutex);
1104 return 0;
1105}
1106
1107static void blk_tracer_stop(struct trace_array *tr)
1108{
1109 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1110 mutex_lock(&blk_probe_mutex);
1111 if (atomic_dec_and_test(&blk_probes_ref))
1112 blk_unregister_tracepoints();
1113 mutex_unlock(&blk_probe_mutex);
1114}
1115
1116static void blk_tracer_reset(struct trace_array *tr)
1117{
1118 if (!atomic_read(&blk_probes_ref))
1119 return;
1120
1121 mutex_lock(&blk_probe_mutex);
1122 blk_tracer_enabled--;
1123 WARN_ON(blk_tracer_enabled < 0);
1124 mutex_unlock(&blk_probe_mutex);
1125
1126 blk_tracer_stop(tr);
1127}
1128
1129static struct {
1130 const char *act[2];
1131 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1132} what2act[] __read_mostly = {
1133 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1134 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1135 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
1136 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
1137 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
1138 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
1139 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
1140 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
1141 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
1142 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
1143 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
1144 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
1145 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
1146 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
1147 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1148};
1149
1150static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1151 int flags)
1152{
1153 struct trace_seq *s = &iter->seq;
1154 const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1155 const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1156 int ret;
1157
1158 if (!trace_print_context(iter))
1159 return TRACE_TYPE_PARTIAL_LINE;
1160
1161 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1162 ret = trace_seq_printf(s, "Bad pc action %x\n", what);
1163 else {
1164 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1165 ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
1166 if (ret)
1167 ret = what2act[what].print(s, iter->ent);
1168 }
1169
1170 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1171}
1172
1173static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1174{
1175 struct trace_seq *s = &iter->seq;
1176 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1177 const int offset = offsetof(struct blk_io_trace, sector);
1178 struct blk_io_trace old = {
1179 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1180 .time = ns2usecs(iter->ts),
1181 };
1182
1183 if (!trace_seq_putmem(s, &old, offset))
1184 return 0;
1185 return trace_seq_putmem(s, &t->sector,
1186 sizeof(old) - offset + t->pdu_len);
1187}
1188
1189static enum print_line_t
1190blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
1191{
1192 return blk_trace_synthesize_old_trace(iter) ?
1193 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1194}
1195
1196static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1197{
1198 const struct blk_io_trace *t;
1199 u16 what;
1200 int ret;
1201
1202 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1203 return TRACE_TYPE_UNHANDLED;
1204
1205 t = (const struct blk_io_trace *)iter->ent;
1206 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1207
1208 if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
1209 ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
1210 else {
1211 const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1212 ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
1213 if (ret)
1214 ret = what2act[what].print(&iter->seq, iter->ent);
1215 }
1216
1217 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1218}
1219
1220static struct tracer blk_tracer __read_mostly = {
1221 .name = "blk",
1222 .init = blk_tracer_init,
1223 .reset = blk_tracer_reset,
1224 .start = blk_tracer_start,
1225 .stop = blk_tracer_stop,
1226 .print_header = blk_tracer_print_header,
1227 .print_line = blk_tracer_print_line,
1228 .flags = &blk_tracer_flags,
1229};
1230
1231static struct trace_event trace_blk_event = {
1232 .type = TRACE_BLK,
1233 .trace = blk_trace_event_print,
1234 .binary = blk_trace_event_print_binary,
1235};
1236
1237static int __init init_blk_tracer(void)
1238{
1239 if (!register_ftrace_event(&trace_blk_event)) {
1240 pr_warning("Warning: could not register block events\n");
1241 return 1;
1242 }
1243
1244 if (register_tracer(&blk_tracer) != 0) {
1245 pr_warning("Warning: could not register the block tracer\n");
1246 unregister_ftrace_event(&trace_blk_event);
1247 return 1;
1248 }
1249
1250 return 0;
1251}
1252
1253device_initcall(init_blk_tracer);
1254
1255static int blk_trace_remove_queue(struct request_queue *q)
1256{
1257 struct blk_trace *bt;
1258
1259 bt = xchg(&q->blk_trace, NULL);
1260 if (bt == NULL)
1261 return -EINVAL;
1262
1263 kfree(bt);
1264 return 0;
1265}
1266
1267/*
1268 * Setup everything required to start tracing
1269 */
1270static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1271{
1272 struct blk_trace *old_bt, *bt = NULL;
1273 int ret;
1274
1275 ret = -ENOMEM;
1276 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1277 if (!bt)
1278 goto err;
1279
1280 bt->dev = dev;
1281 bt->act_mask = (u16)-1;
1282 bt->end_lba = -1ULL;
1283 bt->trace_state = Blktrace_running;
1284
1285 old_bt = xchg(&q->blk_trace, bt);
1286 if (old_bt != NULL) {
1287 (void)xchg(&q->blk_trace, old_bt);
1288 kfree(bt);
1289 ret = -EBUSY;
1290 }
1291 return 0;
1292err:
1293 return ret;
1294}
1295
1296/*
1297 * sysfs interface to enable and configure tracing
1298 */
1299
1300static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
1301 struct device_attribute *attr,
1302 char *buf)
1303{
1304 struct hd_struct *p = dev_to_part(dev);
1305 struct block_device *bdev;
1306 ssize_t ret = -ENXIO;
1307
1308 lock_kernel();
1309 bdev = bdget(part_devt(p));
1310 if (bdev != NULL) {
1311 struct request_queue *q = bdev_get_queue(bdev);
1312
1313 if (q != NULL) {
1314 mutex_lock(&bdev->bd_mutex);
1315 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1316 mutex_unlock(&bdev->bd_mutex);
1317 }
1318
1319 bdput(bdev);
1320 }
1321
1322 unlock_kernel();
1323 return ret;
1324}
1325
1326static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
1327 struct device_attribute *attr,
1328 const char *buf, size_t count)
1329{
1330 struct block_device *bdev;
1331 struct request_queue *q;
1332 struct hd_struct *p;
1333 int value;
1334 ssize_t ret = -ENXIO;
1335
1336 if (count == 0 || sscanf(buf, "%d", &value) != 1)
1337 goto out;
1338
1339 lock_kernel();
1340 p = dev_to_part(dev);
1341 bdev = bdget(part_devt(p));
1342 if (bdev == NULL)
1343 goto out_unlock_kernel;
1344
1345 q = bdev_get_queue(bdev);
1346 if (q == NULL)
1347 goto out_bdput;
1348
1349 mutex_lock(&bdev->bd_mutex);
1350 if (value)
1351 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1352 else
1353 ret = blk_trace_remove_queue(q);
1354 mutex_unlock(&bdev->bd_mutex);
1355
1356 if (ret == 0)
1357 ret = count;
1358out_bdput:
1359 bdput(bdev);
1360out_unlock_kernel:
1361 unlock_kernel();
1362out:
1363 return ret;
1364}
1365
1366static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1367 struct device_attribute *attr,
1368 char *buf);
1369static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1370 struct device_attribute *attr,
1371 const char *buf, size_t count);
1372#define BLK_TRACE_DEVICE_ATTR(_name) \
1373 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1374 sysfs_blk_trace_attr_show, \
1375 sysfs_blk_trace_attr_store)
1376
1377static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
1378 sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
1379static BLK_TRACE_DEVICE_ATTR(act_mask);
1380static BLK_TRACE_DEVICE_ATTR(pid);
1381static BLK_TRACE_DEVICE_ATTR(start_lba);
1382static BLK_TRACE_DEVICE_ATTR(end_lba);
1383
1384static struct attribute *blk_trace_attrs[] = {
1385 &dev_attr_enable.attr,
1386 &dev_attr_act_mask.attr,
1387 &dev_attr_pid.attr,
1388 &dev_attr_start_lba.attr,
1389 &dev_attr_end_lba.attr,
1390 NULL
1391};
1392
1393struct attribute_group blk_trace_attr_group = {
1394 .name = "trace",
1395 .attrs = blk_trace_attrs,
1396};
1397
1398static int blk_str2act_mask(const char *str)
1399{
1400 int mask = 0;
1401 char *copy = kstrdup(str, GFP_KERNEL), *s;
1402
1403 if (copy == NULL)
1404 return -ENOMEM;
1405
1406 s = strstrip(copy);
1407
1408 while (1) {
1409 char *sep = strchr(s, ',');
1410
1411 if (sep != NULL)
1412 *sep = '\0';
1413
1414 if (strcasecmp(s, "barrier") == 0)
1415 mask |= BLK_TC_BARRIER;
1416 else if (strcasecmp(s, "complete") == 0)
1417 mask |= BLK_TC_COMPLETE;
1418 else if (strcasecmp(s, "fs") == 0)
1419 mask |= BLK_TC_FS;
1420 else if (strcasecmp(s, "issue") == 0)
1421 mask |= BLK_TC_ISSUE;
1422 else if (strcasecmp(s, "pc") == 0)
1423 mask |= BLK_TC_PC;
1424 else if (strcasecmp(s, "queue") == 0)
1425 mask |= BLK_TC_QUEUE;
1426 else if (strcasecmp(s, "read") == 0)
1427 mask |= BLK_TC_READ;
1428 else if (strcasecmp(s, "requeue") == 0)
1429 mask |= BLK_TC_REQUEUE;
1430 else if (strcasecmp(s, "sync") == 0)
1431 mask |= BLK_TC_SYNC;
1432 else if (strcasecmp(s, "write") == 0)
1433 mask |= BLK_TC_WRITE;
1434
1435 if (sep == NULL)
1436 break;
1437
1438 s = sep + 1;
1439 }
1440 kfree(copy);
1441
1442 return mask;
1443}
1444
1445static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1446 struct device_attribute *attr,
1447 char *buf)
1448{
1449 struct hd_struct *p = dev_to_part(dev);
1450 struct request_queue *q;
1451 struct block_device *bdev;
1452 ssize_t ret = -ENXIO;
1453
1454 lock_kernel();
1455 bdev = bdget(part_devt(p));
1456 if (bdev == NULL)
1457 goto out_unlock_kernel;
1458
1459 q = bdev_get_queue(bdev);
1460 if (q == NULL)
1461 goto out_bdput;
1462 mutex_lock(&bdev->bd_mutex);
1463 if (q->blk_trace == NULL)
1464 ret = sprintf(buf, "disabled\n");
1465 else if (attr == &dev_attr_act_mask)
1466 ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
1467 else if (attr == &dev_attr_pid)
1468 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1469 else if (attr == &dev_attr_start_lba)
1470 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1471 else if (attr == &dev_attr_end_lba)
1472 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1473 mutex_unlock(&bdev->bd_mutex);
1474out_bdput:
1475 bdput(bdev);
1476out_unlock_kernel:
1477 unlock_kernel();
1478 return ret;
1479}
1480
1481static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1482 struct device_attribute *attr,
1483 const char *buf, size_t count)
1484{
1485 struct block_device *bdev;
1486 struct request_queue *q;
1487 struct hd_struct *p;
1488 u64 value;
1489 ssize_t ret = -ENXIO;
1490
1491 if (count == 0)
1492 goto out;
1493
1494 if (attr == &dev_attr_act_mask) {
1495 if (sscanf(buf, "%llx", &value) != 1) {
1496 /* Assume it is a list of trace category names */
1497 value = blk_str2act_mask(buf);
1498 if (value < 0)
1499 goto out;
1500 }
1501 } else if (sscanf(buf, "%llu", &value) != 1)
1502 goto out;
1503
1504 lock_kernel();
1505 p = dev_to_part(dev);
1506 bdev = bdget(part_devt(p));
1507 if (bdev == NULL)
1508 goto out_unlock_kernel;
1509
1510 q = bdev_get_queue(bdev);
1511 if (q == NULL)
1512 goto out_bdput;
1513
1514 mutex_lock(&bdev->bd_mutex);
1515 ret = 0;
1516 if (q->blk_trace == NULL)
1517 ret = blk_trace_setup_queue(q, bdev->bd_dev);
1518
1519 if (ret == 0) {
1520 if (attr == &dev_attr_act_mask)
1521 q->blk_trace->act_mask = value;
1522 else if (attr == &dev_attr_pid)
1523 q->blk_trace->pid = value;
1524 else if (attr == &dev_attr_start_lba)
1525 q->blk_trace->start_lba = value;
1526 else if (attr == &dev_attr_end_lba)
1527 q->blk_trace->end_lba = value;
1528 ret = count;
1529 }
1530 mutex_unlock(&bdev->bd_mutex);
1531out_bdput:
1532 bdput(bdev);
1533out_unlock_kernel:
1534 unlock_kernel();
1535out:
1536 return ret;
1537}
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
new file mode 100644
index 000000000000..9fc918da404f
--- /dev/null
+++ b/kernel/trace/events.c
@@ -0,0 +1,15 @@
1/*
2 * This is the place to register all trace points as events.
3 */
4
5#include <linux/stringify.h>
6
7#include <trace/trace_events.h>
8
9#include "trace_output.h"
10
11#include "trace_events_stage_1.h"
12#include "trace_events_stage_2.h"
13#include "trace_events_stage_3.h"
14
15#include <trace/trace_event_types.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fdf913dfc7e8..d33d306bdcf4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
27#include <linux/sysctl.h> 27#include <linux/sysctl.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h>
30 31
31#include <asm/ftrace.h> 32#include <asm/ftrace.h>
32 33
@@ -44,14 +45,14 @@
44 ftrace_kill(); \ 45 ftrace_kill(); \
45 } while (0) 46 } while (0)
46 47
48/* hash bits for specific function selection */
49#define FTRACE_HASH_BITS 7
50#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
51
47/* ftrace_enabled is a method to turn ftrace on or off */ 52/* ftrace_enabled is a method to turn ftrace on or off */
48int ftrace_enabled __read_mostly; 53int ftrace_enabled __read_mostly;
49static int last_ftrace_enabled; 54static int last_ftrace_enabled;
50 55
51/* set when tracing only a pid */
52struct pid *ftrace_pid_trace;
53static struct pid * const ftrace_swapper_pid = &init_struct_pid;
54
55/* Quick disabling of function tracer. */ 56/* Quick disabling of function tracer. */
56int function_trace_stop; 57int function_trace_stop;
57 58
@@ -61,9 +62,7 @@ int function_trace_stop;
61 */ 62 */
62static int ftrace_disabled __read_mostly; 63static int ftrace_disabled __read_mostly;
63 64
64static DEFINE_SPINLOCK(ftrace_lock); 65static DEFINE_MUTEX(ftrace_lock);
65static DEFINE_MUTEX(ftrace_sysctl_lock);
66static DEFINE_MUTEX(ftrace_start_lock);
67 66
68static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
69{ 68{
@@ -134,9 +133,6 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
134 133
135static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
136{ 135{
137 /* should not be called from interrupt context */
138 spin_lock(&ftrace_lock);
139
140 ops->next = ftrace_list; 136 ops->next = ftrace_list;
141 /* 137 /*
142 * We are entering ops into the ftrace_list but another 138 * We are entering ops into the ftrace_list but another
@@ -172,18 +168,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
172#endif 168#endif
173 } 169 }
174 170
175 spin_unlock(&ftrace_lock);
176
177 return 0; 171 return 0;
178} 172}
179 173
180static int __unregister_ftrace_function(struct ftrace_ops *ops) 174static int __unregister_ftrace_function(struct ftrace_ops *ops)
181{ 175{
182 struct ftrace_ops **p; 176 struct ftrace_ops **p;
183 int ret = 0;
184
185 /* should not be called from interrupt context */
186 spin_lock(&ftrace_lock);
187 177
188 /* 178 /*
189 * If we are removing the last function, then simply point 179 * If we are removing the last function, then simply point
@@ -192,17 +182,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
192 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 182 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
193 ftrace_trace_function = ftrace_stub; 183 ftrace_trace_function = ftrace_stub;
194 ftrace_list = &ftrace_list_end; 184 ftrace_list = &ftrace_list_end;
195 goto out; 185 return 0;
196 } 186 }
197 187
198 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) 188 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
199 if (*p == ops) 189 if (*p == ops)
200 break; 190 break;
201 191
202 if (*p != ops) { 192 if (*p != ops)
203 ret = -1; 193 return -1;
204 goto out;
205 }
206 194
207 *p = (*p)->next; 195 *p = (*p)->next;
208 196
@@ -223,21 +211,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
223 } 211 }
224 } 212 }
225 213
226 out: 214 return 0;
227 spin_unlock(&ftrace_lock);
228
229 return ret;
230} 215}
231 216
232static void ftrace_update_pid_func(void) 217static void ftrace_update_pid_func(void)
233{ 218{
234 ftrace_func_t func; 219 ftrace_func_t func;
235 220
236 /* should not be called from interrupt context */
237 spin_lock(&ftrace_lock);
238
239 if (ftrace_trace_function == ftrace_stub) 221 if (ftrace_trace_function == ftrace_stub)
240 goto out; 222 return;
241 223
242 func = ftrace_trace_function; 224 func = ftrace_trace_function;
243 225
@@ -254,23 +236,29 @@ static void ftrace_update_pid_func(void)
254#else 236#else
255 __ftrace_trace_function = func; 237 __ftrace_trace_function = func;
256#endif 238#endif
257
258 out:
259 spin_unlock(&ftrace_lock);
260} 239}
261 240
241/* set when tracing only a pid */
242struct pid *ftrace_pid_trace;
243static struct pid * const ftrace_swapper_pid = &init_struct_pid;
244
262#ifdef CONFIG_DYNAMIC_FTRACE 245#ifdef CONFIG_DYNAMIC_FTRACE
246
263#ifndef CONFIG_FTRACE_MCOUNT_RECORD 247#ifndef CONFIG_FTRACE_MCOUNT_RECORD
264# error Dynamic ftrace depends on MCOUNT_RECORD 248# error Dynamic ftrace depends on MCOUNT_RECORD
265#endif 249#endif
266 250
267/* 251static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
268 * Since MCOUNT_ADDR may point to mcount itself, we do not want 252
269 * to get it confused by reading a reference in the code as we 253struct ftrace_func_probe {
270 * are parsing on objcopy output of text. Use a variable for 254 struct hlist_node node;
271 * it instead. 255 struct ftrace_probe_ops *ops;
272 */ 256 unsigned long flags;
273static unsigned long mcount_addr = MCOUNT_ADDR; 257 unsigned long ip;
258 void *data;
259 struct rcu_head rcu;
260};
261
274 262
275enum { 263enum {
276 FTRACE_ENABLE_CALLS = (1 << 0), 264 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -290,7 +278,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
290 278
291struct ftrace_page { 279struct ftrace_page {
292 struct ftrace_page *next; 280 struct ftrace_page *next;
293 unsigned long index; 281 int index;
294 struct dyn_ftrace records[]; 282 struct dyn_ftrace records[];
295}; 283};
296 284
@@ -305,6 +293,19 @@ static struct ftrace_page *ftrace_pages;
305 293
306static struct dyn_ftrace *ftrace_free_records; 294static struct dyn_ftrace *ftrace_free_records;
307 295
296/*
297 * This is a double for. Do not use 'break' to break out of the loop,
298 * you must use a goto.
299 */
300#define do_for_each_ftrace_rec(pg, rec) \
301 for (pg = ftrace_pages_start; pg; pg = pg->next) { \
302 int _____i; \
303 for (_____i = 0; _____i < pg->index; _____i++) { \
304 rec = &pg->records[_____i];
305
306#define while_for_each_ftrace_rec() \
307 } \
308 }
308 309
309#ifdef CONFIG_KPROBES 310#ifdef CONFIG_KPROBES
310 311
@@ -349,23 +350,16 @@ void ftrace_release(void *start, unsigned long size)
349 struct ftrace_page *pg; 350 struct ftrace_page *pg;
350 unsigned long s = (unsigned long)start; 351 unsigned long s = (unsigned long)start;
351 unsigned long e = s + size; 352 unsigned long e = s + size;
352 int i;
353 353
354 if (ftrace_disabled || !start) 354 if (ftrace_disabled || !start)
355 return; 355 return;
356 356
357 /* should not be called from interrupt context */ 357 mutex_lock(&ftrace_lock);
358 spin_lock(&ftrace_lock); 358 do_for_each_ftrace_rec(pg, rec) {
359 359 if ((rec->ip >= s) && (rec->ip < e))
360 for (pg = ftrace_pages_start; pg; pg = pg->next) { 360 ftrace_free_rec(rec);
361 for (i = 0; i < pg->index; i++) { 361 } while_for_each_ftrace_rec();
362 rec = &pg->records[i]; 362 mutex_unlock(&ftrace_lock);
363
364 if ((rec->ip >= s) && (rec->ip < e))
365 ftrace_free_rec(rec);
366 }
367 }
368 spin_unlock(&ftrace_lock);
369} 363}
370 364
371static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 365static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -461,10 +455,10 @@ static void ftrace_bug(int failed, unsigned long ip)
461static int 455static int
462__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 456__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
463{ 457{
464 unsigned long ip, fl;
465 unsigned long ftrace_addr; 458 unsigned long ftrace_addr;
459 unsigned long ip, fl;
466 460
467 ftrace_addr = (unsigned long)ftrace_caller; 461 ftrace_addr = (unsigned long)FTRACE_ADDR;
468 462
469 ip = rec->ip; 463 ip = rec->ip;
470 464
@@ -473,7 +467,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
473 * it is not enabled then do nothing. 467 * it is not enabled then do nothing.
474 * 468 *
475 * If this record is not to be traced and 469 * If this record is not to be traced and
476 * it is enabled then disabled it. 470 * it is enabled then disable it.
477 * 471 *
478 */ 472 */
479 if (rec->flags & FTRACE_FL_NOTRACE) { 473 if (rec->flags & FTRACE_FL_NOTRACE) {
@@ -493,7 +487,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
493 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) 487 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
494 return 0; 488 return 0;
495 489
496 /* Record is not filtered and is not enabled do nothing */ 490 /* Record is not filtered or enabled, do nothing */
497 if (!fl) 491 if (!fl)
498 return 0; 492 return 0;
499 493
@@ -515,7 +509,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
515 509
516 } else { 510 } else {
517 511
518 /* if record is not enabled do nothing */ 512 /* if record is not enabled, do nothing */
519 if (!(rec->flags & FTRACE_FL_ENABLED)) 513 if (!(rec->flags & FTRACE_FL_ENABLED))
520 return 0; 514 return 0;
521 515
@@ -531,41 +525,40 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
531 525
532static void ftrace_replace_code(int enable) 526static void ftrace_replace_code(int enable)
533{ 527{
534 int i, failed;
535 struct dyn_ftrace *rec; 528 struct dyn_ftrace *rec;
536 struct ftrace_page *pg; 529 struct ftrace_page *pg;
530 int failed;
537 531
538 for (pg = ftrace_pages_start; pg; pg = pg->next) { 532 do_for_each_ftrace_rec(pg, rec) {
539 for (i = 0; i < pg->index; i++) { 533 /*
540 rec = &pg->records[i]; 534 * Skip over free records and records that have
541 535 * failed.
542 /* 536 */
543 * Skip over free records and records that have 537 if (rec->flags & FTRACE_FL_FREE ||
544 * failed. 538 rec->flags & FTRACE_FL_FAILED)
545 */ 539 continue;
546 if (rec->flags & FTRACE_FL_FREE ||
547 rec->flags & FTRACE_FL_FAILED)
548 continue;
549 540
550 /* ignore updates to this record's mcount site */ 541 /* ignore updates to this record's mcount site */
551 if (get_kprobe((void *)rec->ip)) { 542 if (get_kprobe((void *)rec->ip)) {
552 freeze_record(rec); 543 freeze_record(rec);
553 continue; 544 continue;
554 } else { 545 } else {
555 unfreeze_record(rec); 546 unfreeze_record(rec);
556 } 547 }
557 548
558 failed = __ftrace_replace_code(rec, enable); 549 failed = __ftrace_replace_code(rec, enable);
559 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 550 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
560 rec->flags |= FTRACE_FL_FAILED; 551 rec->flags |= FTRACE_FL_FAILED;
561 if ((system_state == SYSTEM_BOOTING) || 552 if ((system_state == SYSTEM_BOOTING) ||
562 !core_kernel_text(rec->ip)) { 553 !core_kernel_text(rec->ip)) {
563 ftrace_free_rec(rec); 554 ftrace_free_rec(rec);
564 } else 555 } else {
565 ftrace_bug(failed, rec->ip); 556 ftrace_bug(failed, rec->ip);
566 } 557 /* Stop processing */
558 return;
559 }
567 } 560 }
568 } 561 } while_for_each_ftrace_rec();
569} 562}
570 563
571static int 564static int
@@ -576,7 +569,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
576 569
577 ip = rec->ip; 570 ip = rec->ip;
578 571
579 ret = ftrace_make_nop(mod, rec, mcount_addr); 572 ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
580 if (ret) { 573 if (ret) {
581 ftrace_bug(ret, ip); 574 ftrace_bug(ret, ip);
582 rec->flags |= FTRACE_FL_FAILED; 575 rec->flags |= FTRACE_FL_FAILED;
@@ -585,6 +578,24 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
585 return 1; 578 return 1;
586} 579}
587 580
581/*
582 * archs can override this function if they must do something
583 * before the modifying code is performed.
584 */
585int __weak ftrace_arch_code_modify_prepare(void)
586{
587 return 0;
588}
589
590/*
591 * archs can override this function if they must do something
592 * after the modifying code is performed.
593 */
594int __weak ftrace_arch_code_modify_post_process(void)
595{
596 return 0;
597}
598
588static int __ftrace_modify_code(void *data) 599static int __ftrace_modify_code(void *data)
589{ 600{
590 int *command = data; 601 int *command = data;
@@ -607,7 +618,17 @@ static int __ftrace_modify_code(void *data)
607 618
608static void ftrace_run_update_code(int command) 619static void ftrace_run_update_code(int command)
609{ 620{
621 int ret;
622
623 ret = ftrace_arch_code_modify_prepare();
624 FTRACE_WARN_ON(ret);
625 if (ret)
626 return;
627
610 stop_machine(__ftrace_modify_code, &command, NULL); 628 stop_machine(__ftrace_modify_code, &command, NULL);
629
630 ret = ftrace_arch_code_modify_post_process();
631 FTRACE_WARN_ON(ret);
611} 632}
612 633
613static ftrace_func_t saved_ftrace_func; 634static ftrace_func_t saved_ftrace_func;
@@ -631,13 +652,10 @@ static void ftrace_startup(int command)
631 if (unlikely(ftrace_disabled)) 652 if (unlikely(ftrace_disabled))
632 return; 653 return;
633 654
634 mutex_lock(&ftrace_start_lock);
635 ftrace_start_up++; 655 ftrace_start_up++;
636 command |= FTRACE_ENABLE_CALLS; 656 command |= FTRACE_ENABLE_CALLS;
637 657
638 ftrace_startup_enable(command); 658 ftrace_startup_enable(command);
639
640 mutex_unlock(&ftrace_start_lock);
641} 659}
642 660
643static void ftrace_shutdown(int command) 661static void ftrace_shutdown(int command)
@@ -645,7 +663,6 @@ static void ftrace_shutdown(int command)
645 if (unlikely(ftrace_disabled)) 663 if (unlikely(ftrace_disabled))
646 return; 664 return;
647 665
648 mutex_lock(&ftrace_start_lock);
649 ftrace_start_up--; 666 ftrace_start_up--;
650 if (!ftrace_start_up) 667 if (!ftrace_start_up)
651 command |= FTRACE_DISABLE_CALLS; 668 command |= FTRACE_DISABLE_CALLS;
@@ -656,11 +673,9 @@ static void ftrace_shutdown(int command)
656 } 673 }
657 674
658 if (!command || !ftrace_enabled) 675 if (!command || !ftrace_enabled)
659 goto out; 676 return;
660 677
661 ftrace_run_update_code(command); 678 ftrace_run_update_code(command);
662 out:
663 mutex_unlock(&ftrace_start_lock);
664} 679}
665 680
666static void ftrace_startup_sysctl(void) 681static void ftrace_startup_sysctl(void)
@@ -670,7 +685,6 @@ static void ftrace_startup_sysctl(void)
670 if (unlikely(ftrace_disabled)) 685 if (unlikely(ftrace_disabled))
671 return; 686 return;
672 687
673 mutex_lock(&ftrace_start_lock);
674 /* Force update next time */ 688 /* Force update next time */
675 saved_ftrace_func = NULL; 689 saved_ftrace_func = NULL;
676 /* ftrace_start_up is true if we want ftrace running */ 690 /* ftrace_start_up is true if we want ftrace running */
@@ -678,7 +692,6 @@ static void ftrace_startup_sysctl(void)
678 command |= FTRACE_ENABLE_CALLS; 692 command |= FTRACE_ENABLE_CALLS;
679 693
680 ftrace_run_update_code(command); 694 ftrace_run_update_code(command);
681 mutex_unlock(&ftrace_start_lock);
682} 695}
683 696
684static void ftrace_shutdown_sysctl(void) 697static void ftrace_shutdown_sysctl(void)
@@ -688,13 +701,11 @@ static void ftrace_shutdown_sysctl(void)
688 if (unlikely(ftrace_disabled)) 701 if (unlikely(ftrace_disabled))
689 return; 702 return;
690 703
691 mutex_lock(&ftrace_start_lock);
692 /* ftrace_start_up is true if ftrace is running */ 704 /* ftrace_start_up is true if ftrace is running */
693 if (ftrace_start_up) 705 if (ftrace_start_up)
694 command |= FTRACE_DISABLE_CALLS; 706 command |= FTRACE_DISABLE_CALLS;
695 707
696 ftrace_run_update_code(command); 708 ftrace_run_update_code(command);
697 mutex_unlock(&ftrace_start_lock);
698} 709}
699 710
700static cycle_t ftrace_update_time; 711static cycle_t ftrace_update_time;
@@ -781,13 +792,16 @@ enum {
781 FTRACE_ITER_CONT = (1 << 1), 792 FTRACE_ITER_CONT = (1 << 1),
782 FTRACE_ITER_NOTRACE = (1 << 2), 793 FTRACE_ITER_NOTRACE = (1 << 2),
783 FTRACE_ITER_FAILURES = (1 << 3), 794 FTRACE_ITER_FAILURES = (1 << 3),
795 FTRACE_ITER_PRINTALL = (1 << 4),
796 FTRACE_ITER_HASH = (1 << 5),
784}; 797};
785 798
786#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 799#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
787 800
788struct ftrace_iterator { 801struct ftrace_iterator {
789 struct ftrace_page *pg; 802 struct ftrace_page *pg;
790 unsigned idx; 803 int hidx;
804 int idx;
791 unsigned flags; 805 unsigned flags;
792 unsigned char buffer[FTRACE_BUFF_MAX+1]; 806 unsigned char buffer[FTRACE_BUFF_MAX+1];
793 unsigned buffer_idx; 807 unsigned buffer_idx;
@@ -795,15 +809,89 @@ struct ftrace_iterator {
795}; 809};
796 810
797static void * 811static void *
812t_hash_next(struct seq_file *m, void *v, loff_t *pos)
813{
814 struct ftrace_iterator *iter = m->private;
815 struct hlist_node *hnd = v;
816 struct hlist_head *hhd;
817
818 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
819
820 (*pos)++;
821
822 retry:
823 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
824 return NULL;
825
826 hhd = &ftrace_func_hash[iter->hidx];
827
828 if (hlist_empty(hhd)) {
829 iter->hidx++;
830 hnd = NULL;
831 goto retry;
832 }
833
834 if (!hnd)
835 hnd = hhd->first;
836 else {
837 hnd = hnd->next;
838 if (!hnd) {
839 iter->hidx++;
840 goto retry;
841 }
842 }
843
844 return hnd;
845}
846
847static void *t_hash_start(struct seq_file *m, loff_t *pos)
848{
849 struct ftrace_iterator *iter = m->private;
850 void *p = NULL;
851
852 iter->flags |= FTRACE_ITER_HASH;
853
854 return t_hash_next(m, p, pos);
855}
856
857static int t_hash_show(struct seq_file *m, void *v)
858{
859 struct ftrace_func_probe *rec;
860 struct hlist_node *hnd = v;
861 char str[KSYM_SYMBOL_LEN];
862
863 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
864
865 if (rec->ops->print)
866 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
867
868 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
869 seq_printf(m, "%s:", str);
870
871 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
872 seq_printf(m, "%s", str);
873
874 if (rec->data)
875 seq_printf(m, ":%p", rec->data);
876 seq_putc(m, '\n');
877
878 return 0;
879}
880
881static void *
798t_next(struct seq_file *m, void *v, loff_t *pos) 882t_next(struct seq_file *m, void *v, loff_t *pos)
799{ 883{
800 struct ftrace_iterator *iter = m->private; 884 struct ftrace_iterator *iter = m->private;
801 struct dyn_ftrace *rec = NULL; 885 struct dyn_ftrace *rec = NULL;
802 886
887 if (iter->flags & FTRACE_ITER_HASH)
888 return t_hash_next(m, v, pos);
889
803 (*pos)++; 890 (*pos)++;
804 891
805 /* should not be called from interrupt context */ 892 if (iter->flags & FTRACE_ITER_PRINTALL)
806 spin_lock(&ftrace_lock); 893 return NULL;
894
807 retry: 895 retry:
808 if (iter->idx >= iter->pg->index) { 896 if (iter->idx >= iter->pg->index) {
809 if (iter->pg->next) { 897 if (iter->pg->next) {
@@ -832,7 +920,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
832 goto retry; 920 goto retry;
833 } 921 }
834 } 922 }
835 spin_unlock(&ftrace_lock);
836 923
837 return rec; 924 return rec;
838} 925}
@@ -842,6 +929,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
842 struct ftrace_iterator *iter = m->private; 929 struct ftrace_iterator *iter = m->private;
843 void *p = NULL; 930 void *p = NULL;
844 931
932 mutex_lock(&ftrace_lock);
933 /*
934 * For set_ftrace_filter reading, if we have the filter
935 * off, we can short cut and just print out that all
936 * functions are enabled.
937 */
938 if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
939 if (*pos > 0)
940 return t_hash_start(m, pos);
941 iter->flags |= FTRACE_ITER_PRINTALL;
942 (*pos)++;
943 return iter;
944 }
945
946 if (iter->flags & FTRACE_ITER_HASH)
947 return t_hash_start(m, pos);
948
845 if (*pos > 0) { 949 if (*pos > 0) {
846 if (iter->idx < 0) 950 if (iter->idx < 0)
847 return p; 951 return p;
@@ -851,18 +955,31 @@ static void *t_start(struct seq_file *m, loff_t *pos)
851 955
852 p = t_next(m, p, pos); 956 p = t_next(m, p, pos);
853 957
958 if (!p)
959 return t_hash_start(m, pos);
960
854 return p; 961 return p;
855} 962}
856 963
857static void t_stop(struct seq_file *m, void *p) 964static void t_stop(struct seq_file *m, void *p)
858{ 965{
966 mutex_unlock(&ftrace_lock);
859} 967}
860 968
861static int t_show(struct seq_file *m, void *v) 969static int t_show(struct seq_file *m, void *v)
862{ 970{
971 struct ftrace_iterator *iter = m->private;
863 struct dyn_ftrace *rec = v; 972 struct dyn_ftrace *rec = v;
864 char str[KSYM_SYMBOL_LEN]; 973 char str[KSYM_SYMBOL_LEN];
865 974
975 if (iter->flags & FTRACE_ITER_HASH)
976 return t_hash_show(m, v);
977
978 if (iter->flags & FTRACE_ITER_PRINTALL) {
979 seq_printf(m, "#### all functions enabled ####\n");
980 return 0;
981 }
982
866 if (!rec) 983 if (!rec)
867 return 0; 984 return 0;
868 985
@@ -941,23 +1058,16 @@ static void ftrace_filter_reset(int enable)
941 struct ftrace_page *pg; 1058 struct ftrace_page *pg;
942 struct dyn_ftrace *rec; 1059 struct dyn_ftrace *rec;
943 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1060 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
944 unsigned i;
945 1061
946 /* should not be called from interrupt context */ 1062 mutex_lock(&ftrace_lock);
947 spin_lock(&ftrace_lock);
948 if (enable) 1063 if (enable)
949 ftrace_filtered = 0; 1064 ftrace_filtered = 0;
950 pg = ftrace_pages_start; 1065 do_for_each_ftrace_rec(pg, rec) {
951 while (pg) { 1066 if (rec->flags & FTRACE_FL_FAILED)
952 for (i = 0; i < pg->index; i++) { 1067 continue;
953 rec = &pg->records[i]; 1068 rec->flags &= ~type;
954 if (rec->flags & FTRACE_FL_FAILED) 1069 } while_for_each_ftrace_rec();
955 continue; 1070 mutex_unlock(&ftrace_lock);
956 rec->flags &= ~type;
957 }
958 pg = pg->next;
959 }
960 spin_unlock(&ftrace_lock);
961} 1071}
962 1072
963static int 1073static int
@@ -1038,86 +1148,536 @@ enum {
1038 MATCH_END_ONLY, 1148 MATCH_END_ONLY,
1039}; 1149};
1040 1150
1041static void 1151/*
1042ftrace_match(unsigned char *buff, int len, int enable) 1152 * (static function - no need for kernel doc)
1153 *
1154 * Pass in a buffer containing a glob and this function will
1155 * set search to point to the search part of the buffer and
1156 * return the type of search it is (see enum above).
1157 * This does modify buff.
1158 *
1159 * Returns enum type.
1160 * search returns the pointer to use for comparison.
1161 * not returns 1 if buff started with a '!'
1162 * 0 otherwise.
1163 */
1164static int
1165ftrace_setup_glob(char *buff, int len, char **search, int *not)
1043{ 1166{
1044 char str[KSYM_SYMBOL_LEN];
1045 char *search = NULL;
1046 struct ftrace_page *pg;
1047 struct dyn_ftrace *rec;
1048 int type = MATCH_FULL; 1167 int type = MATCH_FULL;
1049 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1168 int i;
1050 unsigned i, match = 0, search_len = 0;
1051 int not = 0;
1052 1169
1053 if (buff[0] == '!') { 1170 if (buff[0] == '!') {
1054 not = 1; 1171 *not = 1;
1055 buff++; 1172 buff++;
1056 len--; 1173 len--;
1057 } 1174 } else
1175 *not = 0;
1176
1177 *search = buff;
1058 1178
1059 for (i = 0; i < len; i++) { 1179 for (i = 0; i < len; i++) {
1060 if (buff[i] == '*') { 1180 if (buff[i] == '*') {
1061 if (!i) { 1181 if (!i) {
1062 search = buff + i + 1; 1182 *search = buff + 1;
1063 type = MATCH_END_ONLY; 1183 type = MATCH_END_ONLY;
1064 search_len = len - (i + 1);
1065 } else { 1184 } else {
1066 if (type == MATCH_END_ONLY) { 1185 if (type == MATCH_END_ONLY)
1067 type = MATCH_MIDDLE_ONLY; 1186 type = MATCH_MIDDLE_ONLY;
1068 } else { 1187 else
1069 match = i;
1070 type = MATCH_FRONT_ONLY; 1188 type = MATCH_FRONT_ONLY;
1071 }
1072 buff[i] = 0; 1189 buff[i] = 0;
1073 break; 1190 break;
1074 } 1191 }
1075 } 1192 }
1076 } 1193 }
1077 1194
1078 /* should not be called from interrupt context */ 1195 return type;
1079 spin_lock(&ftrace_lock); 1196}
1080 if (enable) 1197
1081 ftrace_filtered = 1; 1198static int ftrace_match(char *str, char *regex, int len, int type)
1082 pg = ftrace_pages_start; 1199{
1083 while (pg) { 1200 int matched = 0;
1084 for (i = 0; i < pg->index; i++) { 1201 char *ptr;
1085 int matched = 0; 1202
1086 char *ptr; 1203 switch (type) {
1087 1204 case MATCH_FULL:
1088 rec = &pg->records[i]; 1205 if (strcmp(str, regex) == 0)
1089 if (rec->flags & FTRACE_FL_FAILED) 1206 matched = 1;
1207 break;
1208 case MATCH_FRONT_ONLY:
1209 if (strncmp(str, regex, len) == 0)
1210 matched = 1;
1211 break;
1212 case MATCH_MIDDLE_ONLY:
1213 if (strstr(str, regex))
1214 matched = 1;
1215 break;
1216 case MATCH_END_ONLY:
1217 ptr = strstr(str, regex);
1218 if (ptr && (ptr[len] == 0))
1219 matched = 1;
1220 break;
1221 }
1222
1223 return matched;
1224}
1225
1226static int
1227ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1228{
1229 char str[KSYM_SYMBOL_LEN];
1230
1231 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1232 return ftrace_match(str, regex, len, type);
1233}
1234
1235static void ftrace_match_records(char *buff, int len, int enable)
1236{
1237 unsigned int search_len;
1238 struct ftrace_page *pg;
1239 struct dyn_ftrace *rec;
1240 unsigned long flag;
1241 char *search;
1242 int type;
1243 int not;
1244
1245 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1246 type = ftrace_setup_glob(buff, len, &search, &not);
1247
1248 search_len = strlen(search);
1249
1250 mutex_lock(&ftrace_lock);
1251 do_for_each_ftrace_rec(pg, rec) {
1252
1253 if (rec->flags & FTRACE_FL_FAILED)
1254 continue;
1255
1256 if (ftrace_match_record(rec, search, search_len, type)) {
1257 if (not)
1258 rec->flags &= ~flag;
1259 else
1260 rec->flags |= flag;
1261 }
1262 /*
1263 * Only enable filtering if we have a function that
1264 * is filtered on.
1265 */
1266 if (enable && (rec->flags & FTRACE_FL_FILTER))
1267 ftrace_filtered = 1;
1268 } while_for_each_ftrace_rec();
1269 mutex_unlock(&ftrace_lock);
1270}
1271
1272static int
1273ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1274 char *regex, int len, int type)
1275{
1276 char str[KSYM_SYMBOL_LEN];
1277 char *modname;
1278
1279 kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
1280
1281 if (!modname || strcmp(modname, mod))
1282 return 0;
1283
1284 /* blank search means to match all funcs in the mod */
1285 if (len)
1286 return ftrace_match(str, regex, len, type);
1287 else
1288 return 1;
1289}
1290
1291static void ftrace_match_module_records(char *buff, char *mod, int enable)
1292{
1293 unsigned search_len = 0;
1294 struct ftrace_page *pg;
1295 struct dyn_ftrace *rec;
1296 int type = MATCH_FULL;
1297 char *search = buff;
1298 unsigned long flag;
1299 int not = 0;
1300
1301 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1302
1303 /* blank or '*' mean the same */
1304 if (strcmp(buff, "*") == 0)
1305 buff[0] = 0;
1306
1307 /* handle the case of 'dont filter this module' */
1308 if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) {
1309 buff[0] = 0;
1310 not = 1;
1311 }
1312
1313 if (strlen(buff)) {
1314 type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
1315 search_len = strlen(search);
1316 }
1317
1318 mutex_lock(&ftrace_lock);
1319 do_for_each_ftrace_rec(pg, rec) {
1320
1321 if (rec->flags & FTRACE_FL_FAILED)
1322 continue;
1323
1324 if (ftrace_match_module_record(rec, mod,
1325 search, search_len, type)) {
1326 if (not)
1327 rec->flags &= ~flag;
1328 else
1329 rec->flags |= flag;
1330 }
1331 if (enable && (rec->flags & FTRACE_FL_FILTER))
1332 ftrace_filtered = 1;
1333
1334 } while_for_each_ftrace_rec();
1335 mutex_unlock(&ftrace_lock);
1336}
1337
1338/*
1339 * We register the module command as a template to show others how
1340 * to register the a command as well.
1341 */
1342
1343static int
1344ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1345{
1346 char *mod;
1347
1348 /*
1349 * cmd == 'mod' because we only registered this func
1350 * for the 'mod' ftrace_func_command.
1351 * But if you register one func with multiple commands,
1352 * you can tell which command was used by the cmd
1353 * parameter.
1354 */
1355
1356 /* we must have a module name */
1357 if (!param)
1358 return -EINVAL;
1359
1360 mod = strsep(&param, ":");
1361 if (!strlen(mod))
1362 return -EINVAL;
1363
1364 ftrace_match_module_records(func, mod, enable);
1365 return 0;
1366}
1367
1368static struct ftrace_func_command ftrace_mod_cmd = {
1369 .name = "mod",
1370 .func = ftrace_mod_callback,
1371};
1372
1373static int __init ftrace_mod_cmd_init(void)
1374{
1375 return register_ftrace_command(&ftrace_mod_cmd);
1376}
1377device_initcall(ftrace_mod_cmd_init);
1378
1379static void
1380function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1381{
1382 struct ftrace_func_probe *entry;
1383 struct hlist_head *hhd;
1384 struct hlist_node *n;
1385 unsigned long key;
1386 int resched;
1387
1388 key = hash_long(ip, FTRACE_HASH_BITS);
1389
1390 hhd = &ftrace_func_hash[key];
1391
1392 if (hlist_empty(hhd))
1393 return;
1394
1395 /*
1396 * Disable preemption for these calls to prevent a RCU grace
1397 * period. This syncs the hash iteration and freeing of items
1398 * on the hash. rcu_read_lock is too dangerous here.
1399 */
1400 resched = ftrace_preempt_disable();
1401 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1402 if (entry->ip == ip)
1403 entry->ops->func(ip, parent_ip, &entry->data);
1404 }
1405 ftrace_preempt_enable(resched);
1406}
1407
1408static struct ftrace_ops trace_probe_ops __read_mostly =
1409{
1410 .func = function_trace_probe_call,
1411};
1412
1413static int ftrace_probe_registered;
1414
1415static void __enable_ftrace_function_probe(void)
1416{
1417 int i;
1418
1419 if (ftrace_probe_registered)
1420 return;
1421
1422 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1423 struct hlist_head *hhd = &ftrace_func_hash[i];
1424 if (hhd->first)
1425 break;
1426 }
1427 /* Nothing registered? */
1428 if (i == FTRACE_FUNC_HASHSIZE)
1429 return;
1430
1431 __register_ftrace_function(&trace_probe_ops);
1432 ftrace_startup(0);
1433 ftrace_probe_registered = 1;
1434}
1435
1436static void __disable_ftrace_function_probe(void)
1437{
1438 int i;
1439
1440 if (!ftrace_probe_registered)
1441 return;
1442
1443 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1444 struct hlist_head *hhd = &ftrace_func_hash[i];
1445 if (hhd->first)
1446 return;
1447 }
1448
1449 /* no more funcs left */
1450 __unregister_ftrace_function(&trace_probe_ops);
1451 ftrace_shutdown(0);
1452 ftrace_probe_registered = 0;
1453}
1454
1455
1456static void ftrace_free_entry_rcu(struct rcu_head *rhp)
1457{
1458 struct ftrace_func_probe *entry =
1459 container_of(rhp, struct ftrace_func_probe, rcu);
1460
1461 if (entry->ops->free)
1462 entry->ops->free(&entry->data);
1463 kfree(entry);
1464}
1465
1466
1467int
1468register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1469 void *data)
1470{
1471 struct ftrace_func_probe *entry;
1472 struct ftrace_page *pg;
1473 struct dyn_ftrace *rec;
1474 int type, len, not;
1475 unsigned long key;
1476 int count = 0;
1477 char *search;
1478
1479 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1480 len = strlen(search);
1481
1482 /* we do not support '!' for function probes */
1483 if (WARN_ON(not))
1484 return -EINVAL;
1485
1486 mutex_lock(&ftrace_lock);
1487 do_for_each_ftrace_rec(pg, rec) {
1488
1489 if (rec->flags & FTRACE_FL_FAILED)
1490 continue;
1491
1492 if (!ftrace_match_record(rec, search, len, type))
1493 continue;
1494
1495 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
1496 if (!entry) {
1497 /* If we did not process any, then return error */
1498 if (!count)
1499 count = -ENOMEM;
1500 goto out_unlock;
1501 }
1502
1503 count++;
1504
1505 entry->data = data;
1506
1507 /*
1508 * The caller might want to do something special
1509 * for each function we find. We call the callback
1510 * to give the caller an opportunity to do so.
1511 */
1512 if (ops->callback) {
1513 if (ops->callback(rec->ip, &entry->data) < 0) {
1514 /* caller does not like this func */
1515 kfree(entry);
1090 continue; 1516 continue;
1091 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1092 switch (type) {
1093 case MATCH_FULL:
1094 if (strcmp(str, buff) == 0)
1095 matched = 1;
1096 break;
1097 case MATCH_FRONT_ONLY:
1098 if (memcmp(str, buff, match) == 0)
1099 matched = 1;
1100 break;
1101 case MATCH_MIDDLE_ONLY:
1102 if (strstr(str, search))
1103 matched = 1;
1104 break;
1105 case MATCH_END_ONLY:
1106 ptr = strstr(str, search);
1107 if (ptr && (ptr[search_len] == 0))
1108 matched = 1;
1109 break;
1110 } 1517 }
1111 if (matched) { 1518 }
1112 if (not) 1519
1113 rec->flags &= ~flag; 1520 entry->ops = ops;
1114 else 1521 entry->ip = rec->ip;
1115 rec->flags |= flag; 1522
1523 key = hash_long(entry->ip, FTRACE_HASH_BITS);
1524 hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
1525
1526 } while_for_each_ftrace_rec();
1527 __enable_ftrace_function_probe();
1528
1529 out_unlock:
1530 mutex_unlock(&ftrace_lock);
1531
1532 return count;
1533}
1534
1535enum {
1536 PROBE_TEST_FUNC = 1,
1537 PROBE_TEST_DATA = 2
1538};
1539
1540static void
1541__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1542 void *data, int flags)
1543{
1544 struct ftrace_func_probe *entry;
1545 struct hlist_node *n, *tmp;
1546 char str[KSYM_SYMBOL_LEN];
1547 int type = MATCH_FULL;
1548 int i, len = 0;
1549 char *search;
1550
1551 if (glob && (strcmp(glob, "*") || !strlen(glob)))
1552 glob = NULL;
1553 else {
1554 int not;
1555
1556 type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
1557 len = strlen(search);
1558
1559 /* we do not support '!' for function probes */
1560 if (WARN_ON(not))
1561 return;
1562 }
1563
1564 mutex_lock(&ftrace_lock);
1565 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
1566 struct hlist_head *hhd = &ftrace_func_hash[i];
1567
1568 hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
1569
1570 /* break up if statements for readability */
1571 if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
1572 continue;
1573
1574 if ((flags & PROBE_TEST_DATA) && entry->data != data)
1575 continue;
1576
1577 /* do this last, since it is the most expensive */
1578 if (glob) {
1579 kallsyms_lookup(entry->ip, NULL, NULL,
1580 NULL, str);
1581 if (!ftrace_match(str, glob, len, type))
1582 continue;
1116 } 1583 }
1584
1585 hlist_del(&entry->node);
1586 call_rcu(&entry->rcu, ftrace_free_entry_rcu);
1587 }
1588 }
1589 __disable_ftrace_function_probe();
1590 mutex_unlock(&ftrace_lock);
1591}
1592
1593void
1594unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1595 void *data)
1596{
1597 __unregister_ftrace_function_probe(glob, ops, data,
1598 PROBE_TEST_FUNC | PROBE_TEST_DATA);
1599}
1600
1601void
1602unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops)
1603{
1604 __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC);
1605}
1606
1607void unregister_ftrace_function_probe_all(char *glob)
1608{
1609 __unregister_ftrace_function_probe(glob, NULL, NULL, 0);
1610}
1611
1612static LIST_HEAD(ftrace_commands);
1613static DEFINE_MUTEX(ftrace_cmd_mutex);
1614
1615int register_ftrace_command(struct ftrace_func_command *cmd)
1616{
1617 struct ftrace_func_command *p;
1618 int ret = 0;
1619
1620 mutex_lock(&ftrace_cmd_mutex);
1621 list_for_each_entry(p, &ftrace_commands, list) {
1622 if (strcmp(cmd->name, p->name) == 0) {
1623 ret = -EBUSY;
1624 goto out_unlock;
1625 }
1626 }
1627 list_add(&cmd->list, &ftrace_commands);
1628 out_unlock:
1629 mutex_unlock(&ftrace_cmd_mutex);
1630
1631 return ret;
1632}
1633
1634int unregister_ftrace_command(struct ftrace_func_command *cmd)
1635{
1636 struct ftrace_func_command *p, *n;
1637 int ret = -ENODEV;
1638
1639 mutex_lock(&ftrace_cmd_mutex);
1640 list_for_each_entry_safe(p, n, &ftrace_commands, list) {
1641 if (strcmp(cmd->name, p->name) == 0) {
1642 ret = 0;
1643 list_del_init(&p->list);
1644 goto out_unlock;
1645 }
1646 }
1647 out_unlock:
1648 mutex_unlock(&ftrace_cmd_mutex);
1649
1650 return ret;
1651}
1652
1653static int ftrace_process_regex(char *buff, int len, int enable)
1654{
1655 char *func, *command, *next = buff;
1656 struct ftrace_func_command *p;
1657 int ret = -EINVAL;
1658
1659 func = strsep(&next, ":");
1660
1661 if (!next) {
1662 ftrace_match_records(func, len, enable);
1663 return 0;
1664 }
1665
1666 /* command found */
1667
1668 command = strsep(&next, ":");
1669
1670 mutex_lock(&ftrace_cmd_mutex);
1671 list_for_each_entry(p, &ftrace_commands, list) {
1672 if (strcmp(p->name, command) == 0) {
1673 ret = p->func(func, command, next, enable);
1674 goto out_unlock;
1117 } 1675 }
1118 pg = pg->next;
1119 } 1676 }
1120 spin_unlock(&ftrace_lock); 1677 out_unlock:
1678 mutex_unlock(&ftrace_cmd_mutex);
1679
1680 return ret;
1121} 1681}
1122 1682
1123static ssize_t 1683static ssize_t
@@ -1187,7 +1747,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
1187 if (isspace(ch)) { 1747 if (isspace(ch)) {
1188 iter->filtered++; 1748 iter->filtered++;
1189 iter->buffer[iter->buffer_idx] = 0; 1749 iter->buffer[iter->buffer_idx] = 0;
1190 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1750 ret = ftrace_process_regex(iter->buffer,
1751 iter->buffer_idx, enable);
1752 if (ret)
1753 goto out;
1191 iter->buffer_idx = 0; 1754 iter->buffer_idx = 0;
1192 } else 1755 } else
1193 iter->flags |= FTRACE_ITER_CONT; 1756 iter->flags |= FTRACE_ITER_CONT;
@@ -1226,7 +1789,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1226 if (reset) 1789 if (reset)
1227 ftrace_filter_reset(enable); 1790 ftrace_filter_reset(enable);
1228 if (buf) 1791 if (buf)
1229 ftrace_match(buf, len, enable); 1792 ftrace_match_records(buf, len, enable);
1230 mutex_unlock(&ftrace_regex_lock); 1793 mutex_unlock(&ftrace_regex_lock);
1231} 1794}
1232 1795
@@ -1276,15 +1839,13 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1276 if (iter->buffer_idx) { 1839 if (iter->buffer_idx) {
1277 iter->filtered++; 1840 iter->filtered++;
1278 iter->buffer[iter->buffer_idx] = 0; 1841 iter->buffer[iter->buffer_idx] = 0;
1279 ftrace_match(iter->buffer, iter->buffer_idx, enable); 1842 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
1280 } 1843 }
1281 1844
1282 mutex_lock(&ftrace_sysctl_lock); 1845 mutex_lock(&ftrace_lock);
1283 mutex_lock(&ftrace_start_lock);
1284 if (ftrace_start_up && ftrace_enabled) 1846 if (ftrace_start_up && ftrace_enabled)
1285 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1847 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1286 mutex_unlock(&ftrace_start_lock); 1848 mutex_unlock(&ftrace_lock);
1287 mutex_unlock(&ftrace_sysctl_lock);
1288 1849
1289 kfree(iter); 1850 kfree(iter);
1290 mutex_unlock(&ftrace_regex_lock); 1851 mutex_unlock(&ftrace_regex_lock);
@@ -1303,21 +1864,21 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1303 return ftrace_regex_release(inode, file, 0); 1864 return ftrace_regex_release(inode, file, 0);
1304} 1865}
1305 1866
1306static struct file_operations ftrace_avail_fops = { 1867static const struct file_operations ftrace_avail_fops = {
1307 .open = ftrace_avail_open, 1868 .open = ftrace_avail_open,
1308 .read = seq_read, 1869 .read = seq_read,
1309 .llseek = seq_lseek, 1870 .llseek = seq_lseek,
1310 .release = ftrace_avail_release, 1871 .release = ftrace_avail_release,
1311}; 1872};
1312 1873
1313static struct file_operations ftrace_failures_fops = { 1874static const struct file_operations ftrace_failures_fops = {
1314 .open = ftrace_failures_open, 1875 .open = ftrace_failures_open,
1315 .read = seq_read, 1876 .read = seq_read,
1316 .llseek = seq_lseek, 1877 .llseek = seq_lseek,
1317 .release = ftrace_avail_release, 1878 .release = ftrace_avail_release,
1318}; 1879};
1319 1880
1320static struct file_operations ftrace_filter_fops = { 1881static const struct file_operations ftrace_filter_fops = {
1321 .open = ftrace_filter_open, 1882 .open = ftrace_filter_open,
1322 .read = ftrace_regex_read, 1883 .read = ftrace_regex_read,
1323 .write = ftrace_filter_write, 1884 .write = ftrace_filter_write,
@@ -1325,7 +1886,7 @@ static struct file_operations ftrace_filter_fops = {
1325 .release = ftrace_filter_release, 1886 .release = ftrace_filter_release,
1326}; 1887};
1327 1888
1328static struct file_operations ftrace_notrace_fops = { 1889static const struct file_operations ftrace_notrace_fops = {
1329 .open = ftrace_notrace_open, 1890 .open = ftrace_notrace_open,
1330 .read = ftrace_regex_read, 1891 .read = ftrace_regex_read,
1331 .write = ftrace_notrace_write, 1892 .write = ftrace_notrace_write,
@@ -1360,6 +1921,10 @@ static void *g_start(struct seq_file *m, loff_t *pos)
1360 1921
1361 mutex_lock(&graph_lock); 1922 mutex_lock(&graph_lock);
1362 1923
1924 /* Nothing, tell g_show to print all functions are enabled */
1925 if (!ftrace_graph_count && !*pos)
1926 return (void *)1;
1927
1363 p = g_next(m, p, pos); 1928 p = g_next(m, p, pos);
1364 1929
1365 return p; 1930 return p;
@@ -1378,6 +1943,11 @@ static int g_show(struct seq_file *m, void *v)
1378 if (!ptr) 1943 if (!ptr)
1379 return 0; 1944 return 0;
1380 1945
1946 if (ptr == (unsigned long *)1) {
1947 seq_printf(m, "#### all functions enabled ####\n");
1948 return 0;
1949 }
1950
1381 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 1951 kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
1382 1952
1383 seq_printf(m, "%s\n", str); 1953 seq_printf(m, "%s\n", str);
@@ -1431,42 +2001,52 @@ ftrace_graph_read(struct file *file, char __user *ubuf,
1431} 2001}
1432 2002
1433static int 2003static int
1434ftrace_set_func(unsigned long *array, int idx, char *buffer) 2004ftrace_set_func(unsigned long *array, int *idx, char *buffer)
1435{ 2005{
1436 char str[KSYM_SYMBOL_LEN];
1437 struct dyn_ftrace *rec; 2006 struct dyn_ftrace *rec;
1438 struct ftrace_page *pg; 2007 struct ftrace_page *pg;
2008 int search_len;
1439 int found = 0; 2009 int found = 0;
1440 int i, j; 2010 int type, not;
2011 char *search;
2012 bool exists;
2013 int i;
1441 2014
1442 if (ftrace_disabled) 2015 if (ftrace_disabled)
1443 return -ENODEV; 2016 return -ENODEV;
1444 2017
1445 /* should not be called from interrupt context */ 2018 /* decode regex */
1446 spin_lock(&ftrace_lock); 2019 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not);
2020 if (not)
2021 return -EINVAL;
1447 2022
1448 for (pg = ftrace_pages_start; pg; pg = pg->next) { 2023 search_len = strlen(search);
1449 for (i = 0; i < pg->index; i++) {
1450 rec = &pg->records[i];
1451 2024
1452 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2025 mutex_lock(&ftrace_lock);
1453 continue; 2026 do_for_each_ftrace_rec(pg, rec) {
2027
2028 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2029 break;
2030
2031 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2032 continue;
1454 2033
1455 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 2034 if (ftrace_match_record(rec, search, search_len, type)) {
1456 if (strcmp(str, buffer) == 0) { 2035 /* ensure it is not already in the array */
2036 exists = false;
2037 for (i = 0; i < *idx; i++)
2038 if (array[i] == rec->ip) {
2039 exists = true;
2040 break;
2041 }
2042 if (!exists) {
2043 array[(*idx)++] = rec->ip;
1457 found = 1; 2044 found = 1;
1458 for (j = 0; j < idx; j++)
1459 if (array[j] == rec->ip) {
1460 found = 0;
1461 break;
1462 }
1463 if (found)
1464 array[idx] = rec->ip;
1465 break;
1466 } 2045 }
1467 } 2046 }
1468 } 2047 } while_for_each_ftrace_rec();
1469 spin_unlock(&ftrace_lock); 2048
2049 mutex_unlock(&ftrace_lock);
1470 2050
1471 return found ? 0 : -EINVAL; 2051 return found ? 0 : -EINVAL;
1472} 2052}
@@ -1534,13 +2114,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
1534 } 2114 }
1535 buffer[index] = 0; 2115 buffer[index] = 0;
1536 2116
1537 /* we allow only one at a time */ 2117 /* we allow only one expression at a time */
1538 ret = ftrace_set_func(array, ftrace_graph_count, buffer); 2118 ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
1539 if (ret) 2119 if (ret)
1540 goto out; 2120 goto out;
1541 2121
1542 ftrace_graph_count++;
1543
1544 file->f_pos += read; 2122 file->f_pos += read;
1545 2123
1546 ret = read; 2124 ret = read;
@@ -1604,7 +2182,7 @@ static int ftrace_convert_nops(struct module *mod,
1604 unsigned long addr; 2182 unsigned long addr;
1605 unsigned long flags; 2183 unsigned long flags;
1606 2184
1607 mutex_lock(&ftrace_start_lock); 2185 mutex_lock(&ftrace_lock);
1608 p = start; 2186 p = start;
1609 while (p < end) { 2187 while (p < end) {
1610 addr = ftrace_call_adjust(*p++); 2188 addr = ftrace_call_adjust(*p++);
@@ -1623,7 +2201,7 @@ static int ftrace_convert_nops(struct module *mod,
1623 local_irq_save(flags); 2201 local_irq_save(flags);
1624 ftrace_update_code(mod); 2202 ftrace_update_code(mod);
1625 local_irq_restore(flags); 2203 local_irq_restore(flags);
1626 mutex_unlock(&ftrace_start_lock); 2204 mutex_unlock(&ftrace_lock);
1627 2205
1628 return 0; 2206 return 0;
1629} 2207}
@@ -1796,7 +2374,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1796 if (ret < 0) 2374 if (ret < 0)
1797 return ret; 2375 return ret;
1798 2376
1799 mutex_lock(&ftrace_start_lock); 2377 mutex_lock(&ftrace_lock);
1800 if (val < 0) { 2378 if (val < 0) {
1801 /* disable pid tracing */ 2379 /* disable pid tracing */
1802 if (!ftrace_pid_trace) 2380 if (!ftrace_pid_trace)
@@ -1835,12 +2413,12 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
1835 ftrace_startup_enable(0); 2413 ftrace_startup_enable(0);
1836 2414
1837 out: 2415 out:
1838 mutex_unlock(&ftrace_start_lock); 2416 mutex_unlock(&ftrace_lock);
1839 2417
1840 return cnt; 2418 return cnt;
1841} 2419}
1842 2420
1843static struct file_operations ftrace_pid_fops = { 2421static const struct file_operations ftrace_pid_fops = {
1844 .read = ftrace_pid_read, 2422 .read = ftrace_pid_read,
1845 .write = ftrace_pid_write, 2423 .write = ftrace_pid_write,
1846}; 2424};
@@ -1863,7 +2441,6 @@ static __init int ftrace_init_debugfs(void)
1863 "'set_ftrace_pid' entry\n"); 2441 "'set_ftrace_pid' entry\n");
1864 return 0; 2442 return 0;
1865} 2443}
1866
1867fs_initcall(ftrace_init_debugfs); 2444fs_initcall(ftrace_init_debugfs);
1868 2445
1869/** 2446/**
@@ -1898,17 +2475,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
1898 if (unlikely(ftrace_disabled)) 2475 if (unlikely(ftrace_disabled))
1899 return -1; 2476 return -1;
1900 2477
1901 mutex_lock(&ftrace_sysctl_lock); 2478 mutex_lock(&ftrace_lock);
1902 2479
1903 ret = __register_ftrace_function(ops); 2480 ret = __register_ftrace_function(ops);
1904 ftrace_startup(0); 2481 ftrace_startup(0);
1905 2482
1906 mutex_unlock(&ftrace_sysctl_lock); 2483 mutex_unlock(&ftrace_lock);
1907 return ret; 2484 return ret;
1908} 2485}
1909 2486
1910/** 2487/**
1911 * unregister_ftrace_function - unresgister a function for profiling. 2488 * unregister_ftrace_function - unregister a function for profiling.
1912 * @ops - ops structure that holds the function to unregister 2489 * @ops - ops structure that holds the function to unregister
1913 * 2490 *
1914 * Unregister a function that was added to be called by ftrace profiling. 2491 * Unregister a function that was added to be called by ftrace profiling.
@@ -1917,10 +2494,10 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1917{ 2494{
1918 int ret; 2495 int ret;
1919 2496
1920 mutex_lock(&ftrace_sysctl_lock); 2497 mutex_lock(&ftrace_lock);
1921 ret = __unregister_ftrace_function(ops); 2498 ret = __unregister_ftrace_function(ops);
1922 ftrace_shutdown(0); 2499 ftrace_shutdown(0);
1923 mutex_unlock(&ftrace_sysctl_lock); 2500 mutex_unlock(&ftrace_lock);
1924 2501
1925 return ret; 2502 return ret;
1926} 2503}
@@ -1935,7 +2512,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1935 if (unlikely(ftrace_disabled)) 2512 if (unlikely(ftrace_disabled))
1936 return -ENODEV; 2513 return -ENODEV;
1937 2514
1938 mutex_lock(&ftrace_sysctl_lock); 2515 mutex_lock(&ftrace_lock);
1939 2516
1940 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 2517 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1941 2518
@@ -1964,7 +2541,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1964 } 2541 }
1965 2542
1966 out: 2543 out:
1967 mutex_unlock(&ftrace_sysctl_lock); 2544 mutex_unlock(&ftrace_lock);
1968 return ret; 2545 return ret;
1969} 2546}
1970 2547
@@ -2080,7 +2657,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2080{ 2657{
2081 int ret = 0; 2658 int ret = 0;
2082 2659
2083 mutex_lock(&ftrace_sysctl_lock); 2660 mutex_lock(&ftrace_lock);
2084 2661
2085 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 2662 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2086 register_pm_notifier(&ftrace_suspend_notifier); 2663 register_pm_notifier(&ftrace_suspend_notifier);
@@ -2098,13 +2675,13 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2098 ftrace_startup(FTRACE_START_FUNC_RET); 2675 ftrace_startup(FTRACE_START_FUNC_RET);
2099 2676
2100out: 2677out:
2101 mutex_unlock(&ftrace_sysctl_lock); 2678 mutex_unlock(&ftrace_lock);
2102 return ret; 2679 return ret;
2103} 2680}
2104 2681
2105void unregister_ftrace_graph(void) 2682void unregister_ftrace_graph(void)
2106{ 2683{
2107 mutex_lock(&ftrace_sysctl_lock); 2684 mutex_lock(&ftrace_lock);
2108 2685
2109 atomic_dec(&ftrace_graph_active); 2686 atomic_dec(&ftrace_graph_active);
2110 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2687 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
@@ -2112,7 +2689,7 @@ void unregister_ftrace_graph(void)
2112 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2689 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2113 unregister_pm_notifier(&ftrace_suspend_notifier); 2690 unregister_pm_notifier(&ftrace_suspend_notifier);
2114 2691
2115 mutex_unlock(&ftrace_sysctl_lock); 2692 mutex_unlock(&ftrace_lock);
2116} 2693}
2117 2694
2118/* Allocate a return stack for newly created task */ 2695/* Allocate a return stack for newly created task */
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 000000000000..ae201b3eda89
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,339 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/dcache.h>
10#include <linux/debugfs.h>
11#include <linux/fs.h>
12#include <linux/seq_file.h>
13#include <trace/kmemtrace.h>
14
15#include "trace.h"
16#include "trace_output.h"
17
18/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1
20
21static struct tracer_opt kmem_opts[] = {
22 /* Default disable the minimalistic output */
23 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
24 { }
25};
26
27static struct tracer_flags kmem_tracer_flags = {
28 .val = 0,
29 .opts = kmem_opts
30};
31
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array;
35
36static int kmem_trace_init(struct trace_array *tr)
37{
38 int cpu;
39 kmemtrace_array = tr;
40
41 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu);
43
44 kmem_tracing_enabled = true;
45
46 return 0;
47}
48
49static void kmem_trace_reset(struct trace_array *tr)
50{
51 kmem_tracing_enabled = false;
52}
53
54static void kmemtrace_headers(struct seq_file *s)
55{
56 /* Don't need headers for the original kmemtrace output */
57 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
58 return;
59
60 seq_printf(s, "#\n");
61 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
62 " POINTER NODE CALLER\n");
63 seq_printf(s, "# FREE | | | | "
64 " | | | |\n");
65 seq_printf(s, "# |\n\n");
66}
67
68/*
69 * The two following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things
71 */
72static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry)
75{
76 struct trace_seq *s = &iter->seq;
77 int ret;
78
79 /* Taken from the old linux/kmemtrace.h */
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr,
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
84 (unsigned long) entry->gfp_flags, entry->node);
85
86 if (!ret)
87 return TRACE_TYPE_PARTIAL_LINE;
88
89 return TRACE_TYPE_HANDLED;
90}
91
92static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry)
95{
96 struct trace_seq *s = &iter->seq;
97 int ret;
98
99 /* Taken from the old linux/kmemtrace.h */
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE;
105
106 return TRACE_TYPE_HANDLED;
107}
108
109
110/* The two other following provide a more minimalistic output */
111static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter,
113 struct kmemtrace_alloc_entry *entry)
114{
115 struct trace_seq *s = &iter->seq;
116 int ret;
117
118 /* Alloc entry */
119 ret = trace_seq_printf(s, " + ");
120 if (!ret)
121 return TRACE_TYPE_PARTIAL_LINE;
122
123 /* Type */
124 switch (entry->type_id) {
125 case KMEMTRACE_TYPE_KMALLOC:
126 ret = trace_seq_printf(s, "K ");
127 break;
128 case KMEMTRACE_TYPE_CACHE:
129 ret = trace_seq_printf(s, "C ");
130 break;
131 case KMEMTRACE_TYPE_PAGES:
132 ret = trace_seq_printf(s, "P ");
133 break;
134 default:
135 ret = trace_seq_printf(s, "? ");
136 }
137
138 if (!ret)
139 return TRACE_TYPE_PARTIAL_LINE;
140
141 /* Requested */
142 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
143 if (!ret)
144 return TRACE_TYPE_PARTIAL_LINE;
145
146 /* Allocated */
147 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
148 if (!ret)
149 return TRACE_TYPE_PARTIAL_LINE;
150
151 /* Flags
152 * TODO: would be better to see the name of the GFP flag names
153 */
154 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
155 if (!ret)
156 return TRACE_TYPE_PARTIAL_LINE;
157
158 /* Pointer to allocated */
159 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
160 if (!ret)
161 return TRACE_TYPE_PARTIAL_LINE;
162
163 /* Node */
164 ret = trace_seq_printf(s, "%4d ", entry->node);
165 if (!ret)
166 return TRACE_TYPE_PARTIAL_LINE;
167
168 /* Call site */
169 ret = seq_print_ip_sym(s, entry->call_site, 0);
170 if (!ret)
171 return TRACE_TYPE_PARTIAL_LINE;
172
173 if (!trace_seq_printf(s, "\n"))
174 return TRACE_TYPE_PARTIAL_LINE;
175
176 return TRACE_TYPE_HANDLED;
177}
178
179static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry)
182{
183 struct trace_seq *s = &iter->seq;
184 int ret;
185
186 /* Free entry */
187 ret = trace_seq_printf(s, " - ");
188 if (!ret)
189 return TRACE_TYPE_PARTIAL_LINE;
190
191 /* Type */
192 switch (entry->type_id) {
193 case KMEMTRACE_TYPE_KMALLOC:
194 ret = trace_seq_printf(s, "K ");
195 break;
196 case KMEMTRACE_TYPE_CACHE:
197 ret = trace_seq_printf(s, "C ");
198 break;
199 case KMEMTRACE_TYPE_PAGES:
200 ret = trace_seq_printf(s, "P ");
201 break;
202 default:
203 ret = trace_seq_printf(s, "? ");
204 }
205
206 if (!ret)
207 return TRACE_TYPE_PARTIAL_LINE;
208
209 /* Skip requested/allocated/flags */
210 ret = trace_seq_printf(s, " ");
211 if (!ret)
212 return TRACE_TYPE_PARTIAL_LINE;
213
214 /* Pointer to allocated */
215 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
216 if (!ret)
217 return TRACE_TYPE_PARTIAL_LINE;
218
219 /* Skip node */
220 ret = trace_seq_printf(s, " ");
221 if (!ret)
222 return TRACE_TYPE_PARTIAL_LINE;
223
224 /* Call site */
225 ret = seq_print_ip_sym(s, entry->call_site, 0);
226 if (!ret)
227 return TRACE_TYPE_PARTIAL_LINE;
228
229 if (!trace_seq_printf(s, "\n"))
230 return TRACE_TYPE_PARTIAL_LINE;
231
232 return TRACE_TYPE_HANDLED;
233}
234
235static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
236{
237 struct trace_entry *entry = iter->ent;
238
239 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field;
242 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field);
245 else
246 return kmemtrace_print_alloc_original(iter, field);
247 }
248
249 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field;
251 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field);
254 else
255 return kmemtrace_print_free_original(iter, field);
256 }
257
258 default:
259 return TRACE_TYPE_UNHANDLED;
260 }
261}
262
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace",
322 .init = kmem_trace_init,
323 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags
327};
328
329void kmemtrace_init(void)
330{
331 /* earliest opportunity to start kmem tracing */
332}
333
334static int __init init_kmem_tracer(void)
335{
336 return register_tracer(&kmem_tracer);
337}
338
339device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8ad..178858492a89 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,13 +4,15 @@
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
7#include <linux/spinlock.h> 9#include <linux/spinlock.h>
8#include <linux/debugfs.h> 10#include <linux/debugfs.h>
9#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/hardirq.h>
10#include <linux/module.h> 13#include <linux/module.h>
11#include <linux/percpu.h> 14#include <linux/percpu.h>
12#include <linux/mutex.h> 15#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/hash.h> 17#include <linux/hash.h>
16#include <linux/list.h> 18#include <linux/list.h>
@@ -57,7 +59,9 @@ enum {
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, 59 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58}; 60};
59 61
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
63
64#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
61 65
62/** 66/**
63 * tracing_on - enable all tracing buffers 67 * tracing_on - enable all tracing buffers
@@ -89,26 +93,34 @@ EXPORT_SYMBOL_GPL(tracing_off);
89 * tracing_off_permanent - permanently disable ring buffers 93 * tracing_off_permanent - permanently disable ring buffers
90 * 94 *
91 * This function, once called, will disable all ring buffers 95 * This function, once called, will disable all ring buffers
92 * permanenty. 96 * permanently.
93 */ 97 */
94void tracing_off_permanent(void) 98void tracing_off_permanent(void)
95{ 99{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 100 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97} 101}
98 102
103/**
104 * tracing_is_on - show state of ring buffers enabled
105 */
106int tracing_is_on(void)
107{
108 return ring_buffer_flags == RB_BUFFERS_ON;
109}
110EXPORT_SYMBOL_GPL(tracing_is_on);
111
99#include "trace.h" 112#include "trace.h"
100 113
101/* Up this if you want to test the TIME_EXTENTS and normalization */ 114/* Up this if you want to test the TIME_EXTENTS and normalization */
102#define DEBUG_SHIFT 0 115#define DEBUG_SHIFT 0
103 116
104/* FIXME!!! */
105u64 ring_buffer_time_stamp(int cpu) 117u64 ring_buffer_time_stamp(int cpu)
106{ 118{
107 u64 time; 119 u64 time;
108 120
109 preempt_disable_notrace(); 121 preempt_disable_notrace();
110 /* shift to debug/test normalization and TIME_EXTENTS */ 122 /* shift to debug/test normalization and TIME_EXTENTS */
111 time = sched_clock() << DEBUG_SHIFT; 123 time = trace_clock_local() << DEBUG_SHIFT;
112 preempt_enable_no_resched_notrace(); 124 preempt_enable_no_resched_notrace();
113 125
114 return time; 126 return time;
@@ -122,9 +134,8 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
122} 134}
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 135EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124 136
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 137#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
126#define RB_ALIGNMENT_SHIFT 2 138#define RB_ALIGNMENT 4U
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28 139#define RB_MAX_SMALL_DATA 28
129 140
130enum { 141enum {
@@ -133,7 +144,7 @@ enum {
133}; 144};
134 145
135/* inline for ring buffer fast paths */ 146/* inline for ring buffer fast paths */
136static inline unsigned 147static unsigned
137rb_event_length(struct ring_buffer_event *event) 148rb_event_length(struct ring_buffer_event *event)
138{ 149{
139 unsigned length; 150 unsigned length;
@@ -151,7 +162,7 @@ rb_event_length(struct ring_buffer_event *event)
151 162
152 case RINGBUF_TYPE_DATA: 163 case RINGBUF_TYPE_DATA:
153 if (event->len) 164 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT; 165 length = event->len * RB_ALIGNMENT;
155 else 166 else
156 length = event->array[0]; 167 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE; 168 return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +190,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
179EXPORT_SYMBOL_GPL(ring_buffer_event_length); 190EXPORT_SYMBOL_GPL(ring_buffer_event_length);
180 191
181/* inline for ring buffer fast paths */ 192/* inline for ring buffer fast paths */
182static inline void * 193static void *
183rb_event_data(struct ring_buffer_event *event) 194rb_event_data(struct ring_buffer_event *event)
184{ 195{
185 BUG_ON(event->type != RINGBUF_TYPE_DATA); 196 BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -209,7 +220,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
209 220
210struct buffer_data_page { 221struct buffer_data_page {
211 u64 time_stamp; /* page time stamp */ 222 u64 time_stamp; /* page time stamp */
212 local_t commit; /* write commited index */ 223 local_t commit; /* write committed index */
213 unsigned char data[]; /* data of buffer page */ 224 unsigned char data[]; /* data of buffer page */
214}; 225};
215 226
@@ -225,14 +236,25 @@ static void rb_init_page(struct buffer_data_page *bpage)
225 local_set(&bpage->commit, 0); 236 local_set(&bpage->commit, 0);
226} 237}
227 238
239/**
240 * ring_buffer_page_len - the size of data on the page.
241 * @page: The page to read
242 *
243 * Returns the amount of data on the page, including buffer page header.
244 */
245size_t ring_buffer_page_len(void *page)
246{
247 return local_read(&((struct buffer_data_page *)page)->commit)
248 + BUF_PAGE_HDR_SIZE;
249}
250
228/* 251/*
229 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 252 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
230 * this issue out. 253 * this issue out.
231 */ 254 */
232static inline void free_buffer_page(struct buffer_page *bpage) 255static void free_buffer_page(struct buffer_page *bpage)
233{ 256{
234 if (bpage->page) 257 free_page((unsigned long)bpage->page);
235 free_page((unsigned long)bpage->page);
236 kfree(bpage); 258 kfree(bpage);
237} 259}
238 260
@@ -246,7 +268,7 @@ static inline int test_time_stamp(u64 delta)
246 return 0; 268 return 0;
247} 269}
248 270
249#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) 271#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
250 272
251/* 273/*
252 * head_page == tail_page && head == tail then buffer is empty. 274 * head_page == tail_page && head == tail then buffer is empty.
@@ -260,7 +282,7 @@ struct ring_buffer_per_cpu {
260 struct list_head pages; 282 struct list_head pages;
261 struct buffer_page *head_page; /* read from head */ 283 struct buffer_page *head_page; /* read from head */
262 struct buffer_page *tail_page; /* write to tail */ 284 struct buffer_page *tail_page; /* write to tail */
263 struct buffer_page *commit_page; /* commited pages */ 285 struct buffer_page *commit_page; /* committed pages */
264 struct buffer_page *reader_page; 286 struct buffer_page *reader_page;
265 unsigned long overrun; 287 unsigned long overrun;
266 unsigned long entries; 288 unsigned long entries;
@@ -273,8 +295,8 @@ struct ring_buffer {
273 unsigned pages; 295 unsigned pages;
274 unsigned flags; 296 unsigned flags;
275 int cpus; 297 int cpus;
276 cpumask_var_t cpumask;
277 atomic_t record_disabled; 298 atomic_t record_disabled;
299 cpumask_var_t cpumask;
278 300
279 struct mutex mutex; 301 struct mutex mutex;
280 302
@@ -303,7 +325,7 @@ struct ring_buffer_iter {
303 * check_pages - integrity check of buffer pages 325 * check_pages - integrity check of buffer pages
304 * @cpu_buffer: CPU buffer with pages to test 326 * @cpu_buffer: CPU buffer with pages to test
305 * 327 *
306 * As a safty measure we check to make sure the data pages have not 328 * As a safety measure we check to make sure the data pages have not
307 * been corrupted. 329 * been corrupted.
308 */ 330 */
309static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 331static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
@@ -811,7 +833,7 @@ rb_event_index(struct ring_buffer_event *event)
811 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 833 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
812} 834}
813 835
814static inline int 836static int
815rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 837rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event) 838 struct ring_buffer_event *event)
817{ 839{
@@ -825,7 +847,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
825 rb_commit_index(cpu_buffer) == index; 847 rb_commit_index(cpu_buffer) == index;
826} 848}
827 849
828static inline void 850static void
829rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 851rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
830 struct ring_buffer_event *event) 852 struct ring_buffer_event *event)
831{ 853{
@@ -850,7 +872,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
850 local_set(&cpu_buffer->commit_page->page->commit, index); 872 local_set(&cpu_buffer->commit_page->page->commit, index);
851} 873}
852 874
853static inline void 875static void
854rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 876rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
855{ 877{
856 /* 878 /*
@@ -896,7 +918,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
896 cpu_buffer->reader_page->read = 0; 918 cpu_buffer->reader_page->read = 0;
897} 919}
898 920
899static inline void rb_inc_iter(struct ring_buffer_iter *iter) 921static void rb_inc_iter(struct ring_buffer_iter *iter)
900{ 922{
901 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 923 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
902 924
@@ -926,7 +948,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
926 * and with this, we can determine what to place into the 948 * and with this, we can determine what to place into the
927 * data field. 949 * data field.
928 */ 950 */
929static inline void 951static void
930rb_update_event(struct ring_buffer_event *event, 952rb_update_event(struct ring_buffer_event *event,
931 unsigned type, unsigned length) 953 unsigned type, unsigned length)
932{ 954{
@@ -938,15 +960,11 @@ rb_update_event(struct ring_buffer_event *event,
938 break; 960 break;
939 961
940 case RINGBUF_TYPE_TIME_EXTEND: 962 case RINGBUF_TYPE_TIME_EXTEND:
941 event->len = 963 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
942 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
943 >> RB_ALIGNMENT_SHIFT;
944 break; 964 break;
945 965
946 case RINGBUF_TYPE_TIME_STAMP: 966 case RINGBUF_TYPE_TIME_STAMP:
947 event->len = 967 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
948 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
949 >> RB_ALIGNMENT_SHIFT;
950 break; 968 break;
951 969
952 case RINGBUF_TYPE_DATA: 970 case RINGBUF_TYPE_DATA:
@@ -955,16 +973,14 @@ rb_update_event(struct ring_buffer_event *event,
955 event->len = 0; 973 event->len = 0;
956 event->array[0] = length; 974 event->array[0] = length;
957 } else 975 } else
958 event->len = 976 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
959 (length + (RB_ALIGNMENT-1))
960 >> RB_ALIGNMENT_SHIFT;
961 break; 977 break;
962 default: 978 default:
963 BUG(); 979 BUG();
964 } 980 }
965} 981}
966 982
967static inline unsigned rb_calculate_event_length(unsigned length) 983static unsigned rb_calculate_event_length(unsigned length)
968{ 984{
969 struct ring_buffer_event event; /* Used only for sizeof array */ 985 struct ring_buffer_event event; /* Used only for sizeof array */
970 986
@@ -990,6 +1006,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
990 struct ring_buffer *buffer = cpu_buffer->buffer; 1006 struct ring_buffer *buffer = cpu_buffer->buffer;
991 struct ring_buffer_event *event; 1007 struct ring_buffer_event *event;
992 unsigned long flags; 1008 unsigned long flags;
1009 bool lock_taken = false;
993 1010
994 commit_page = cpu_buffer->commit_page; 1011 commit_page = cpu_buffer->commit_page;
995 /* we just need to protect against interrupts */ 1012 /* we just need to protect against interrupts */
@@ -1003,7 +1020,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1003 struct buffer_page *next_page = tail_page; 1020 struct buffer_page *next_page = tail_page;
1004 1021
1005 local_irq_save(flags); 1022 local_irq_save(flags);
1006 __raw_spin_lock(&cpu_buffer->lock); 1023 /*
1024 * Since the write to the buffer is still not
1025 * fully lockless, we must be careful with NMIs.
1026 * The locks in the writers are taken when a write
1027 * crosses to a new page. The locks protect against
1028 * races with the readers (this will soon be fixed
1029 * with a lockless solution).
1030 *
1031 * Because we can not protect against NMIs, and we
1032 * want to keep traces reentrant, we need to manage
1033 * what happens when we are in an NMI.
1034 *
1035 * NMIs can happen after we take the lock.
1036 * If we are in an NMI, only take the lock
1037 * if it is not already taken. Otherwise
1038 * simply fail.
1039 */
1040 if (unlikely(in_nmi())) {
1041 if (!__raw_spin_trylock(&cpu_buffer->lock))
1042 goto out_reset;
1043 } else
1044 __raw_spin_lock(&cpu_buffer->lock);
1045
1046 lock_taken = true;
1007 1047
1008 rb_inc_page(cpu_buffer, &next_page); 1048 rb_inc_page(cpu_buffer, &next_page);
1009 1049
@@ -1012,7 +1052,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1012 1052
1013 /* we grabbed the lock before incrementing */ 1053 /* we grabbed the lock before incrementing */
1014 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1054 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1015 goto out_unlock; 1055 goto out_reset;
1016 1056
1017 /* 1057 /*
1018 * If for some reason, we had an interrupt storm that made 1058 * If for some reason, we had an interrupt storm that made
@@ -1021,12 +1061,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1021 */ 1061 */
1022 if (unlikely(next_page == commit_page)) { 1062 if (unlikely(next_page == commit_page)) {
1023 WARN_ON_ONCE(1); 1063 WARN_ON_ONCE(1);
1024 goto out_unlock; 1064 goto out_reset;
1025 } 1065 }
1026 1066
1027 if (next_page == head_page) { 1067 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) 1068 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 goto out_unlock; 1069 goto out_reset;
1030 1070
1031 /* tail_page has not moved yet? */ 1071 /* tail_page has not moved yet? */
1032 if (tail_page == cpu_buffer->tail_page) { 1072 if (tail_page == cpu_buffer->tail_page) {
@@ -1100,12 +1140,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1100 1140
1101 return event; 1141 return event;
1102 1142
1103 out_unlock: 1143 out_reset:
1104 /* reset write */ 1144 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE) 1145 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail); 1146 local_set(&tail_page->write, tail);
1107 1147
1108 __raw_spin_unlock(&cpu_buffer->lock); 1148 if (likely(lock_taken))
1149 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1150 local_irq_restore(flags);
1110 return NULL; 1151 return NULL;
1111} 1152}
@@ -1265,7 +1306,6 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1265 * ring_buffer_lock_reserve - reserve a part of the buffer 1306 * ring_buffer_lock_reserve - reserve a part of the buffer
1266 * @buffer: the ring buffer to reserve from 1307 * @buffer: the ring buffer to reserve from
1267 * @length: the length of the data to reserve (excluding event header) 1308 * @length: the length of the data to reserve (excluding event header)
1268 * @flags: a pointer to save the interrupt flags
1269 * 1309 *
1270 * Returns a reseverd event on the ring buffer to copy directly to. 1310 * Returns a reseverd event on the ring buffer to copy directly to.
1271 * The user of this interface will need to get the body to write into 1311 * The user of this interface will need to get the body to write into
@@ -1278,9 +1318,7 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1278 * If NULL is returned, then nothing has been allocated or locked. 1318 * If NULL is returned, then nothing has been allocated or locked.
1279 */ 1319 */
1280struct ring_buffer_event * 1320struct ring_buffer_event *
1281ring_buffer_lock_reserve(struct ring_buffer *buffer, 1321ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1282 unsigned long length,
1283 unsigned long *flags)
1284{ 1322{
1285 struct ring_buffer_per_cpu *cpu_buffer; 1323 struct ring_buffer_per_cpu *cpu_buffer;
1286 struct ring_buffer_event *event; 1324 struct ring_buffer_event *event;
@@ -1347,15 +1385,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1347 * ring_buffer_unlock_commit - commit a reserved 1385 * ring_buffer_unlock_commit - commit a reserved
1348 * @buffer: The buffer to commit to 1386 * @buffer: The buffer to commit to
1349 * @event: The event pointer to commit. 1387 * @event: The event pointer to commit.
1350 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1351 * 1388 *
1352 * This commits the data to the ring buffer, and releases any locks held. 1389 * This commits the data to the ring buffer, and releases any locks held.
1353 * 1390 *
1354 * Must be paired with ring_buffer_lock_reserve. 1391 * Must be paired with ring_buffer_lock_reserve.
1355 */ 1392 */
1356int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1393int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1357 struct ring_buffer_event *event, 1394 struct ring_buffer_event *event)
1358 unsigned long flags)
1359{ 1395{
1360 struct ring_buffer_per_cpu *cpu_buffer; 1396 struct ring_buffer_per_cpu *cpu_buffer;
1361 int cpu = raw_smp_processor_id(); 1397 int cpu = raw_smp_processor_id();
@@ -1438,7 +1474,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1438} 1474}
1439EXPORT_SYMBOL_GPL(ring_buffer_write); 1475EXPORT_SYMBOL_GPL(ring_buffer_write);
1440 1476
1441static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1477static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1442{ 1478{
1443 struct buffer_page *reader = cpu_buffer->reader_page; 1479 struct buffer_page *reader = cpu_buffer->reader_page;
1444 struct buffer_page *head = cpu_buffer->head_page; 1480 struct buffer_page *head = cpu_buffer->head_page;
@@ -2277,9 +2313,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2277 if (buffer_a->pages != buffer_b->pages) 2313 if (buffer_a->pages != buffer_b->pages)
2278 return -EINVAL; 2314 return -EINVAL;
2279 2315
2316 if (ring_buffer_flags != RB_BUFFERS_ON)
2317 return -EAGAIN;
2318
2319 if (atomic_read(&buffer_a->record_disabled))
2320 return -EAGAIN;
2321
2322 if (atomic_read(&buffer_b->record_disabled))
2323 return -EAGAIN;
2324
2280 cpu_buffer_a = buffer_a->buffers[cpu]; 2325 cpu_buffer_a = buffer_a->buffers[cpu];
2281 cpu_buffer_b = buffer_b->buffers[cpu]; 2326 cpu_buffer_b = buffer_b->buffers[cpu];
2282 2327
2328 if (atomic_read(&cpu_buffer_a->record_disabled))
2329 return -EAGAIN;
2330
2331 if (atomic_read(&cpu_buffer_b->record_disabled))
2332 return -EAGAIN;
2333
2283 /* 2334 /*
2284 * We can't do a synchronize_sched here because this 2335 * We can't do a synchronize_sched here because this
2285 * function can be called in atomic context. 2336 * function can be called in atomic context.
@@ -2303,13 +2354,14 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2303EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2354EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2304 2355
2305static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, 2356static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2306 struct buffer_data_page *bpage) 2357 struct buffer_data_page *bpage,
2358 unsigned int offset)
2307{ 2359{
2308 struct ring_buffer_event *event; 2360 struct ring_buffer_event *event;
2309 unsigned long head; 2361 unsigned long head;
2310 2362
2311 __raw_spin_lock(&cpu_buffer->lock); 2363 __raw_spin_lock(&cpu_buffer->lock);
2312 for (head = 0; head < local_read(&bpage->commit); 2364 for (head = offset; head < local_read(&bpage->commit);
2313 head += rb_event_length(event)) { 2365 head += rb_event_length(event)) {
2314 2366
2315 event = __rb_data_page_index(bpage, head); 2367 event = __rb_data_page_index(bpage, head);
@@ -2340,8 +2392,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2340 */ 2392 */
2341void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 2393void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2342{ 2394{
2343 unsigned long addr;
2344 struct buffer_data_page *bpage; 2395 struct buffer_data_page *bpage;
2396 unsigned long addr;
2345 2397
2346 addr = __get_free_page(GFP_KERNEL); 2398 addr = __get_free_page(GFP_KERNEL);
2347 if (!addr) 2399 if (!addr)
@@ -2349,6 +2401,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2349 2401
2350 bpage = (void *)addr; 2402 bpage = (void *)addr;
2351 2403
2404 rb_init_page(bpage);
2405
2352 return bpage; 2406 return bpage;
2353} 2407}
2354 2408
@@ -2368,6 +2422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2368 * ring_buffer_read_page - extract a page from the ring buffer 2422 * ring_buffer_read_page - extract a page from the ring buffer
2369 * @buffer: buffer to extract from 2423 * @buffer: buffer to extract from
2370 * @data_page: the page to use allocated from ring_buffer_alloc_read_page 2424 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2425 * @len: amount to extract
2371 * @cpu: the cpu of the buffer to extract 2426 * @cpu: the cpu of the buffer to extract
2372 * @full: should the extraction only happen when the page is full. 2427 * @full: should the extraction only happen when the page is full.
2373 * 2428 *
@@ -2377,12 +2432,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2377 * to swap with a page in the ring buffer. 2432 * to swap with a page in the ring buffer.
2378 * 2433 *
2379 * for example: 2434 * for example:
2380 * rpage = ring_buffer_alloc_page(buffer); 2435 * rpage = ring_buffer_alloc_read_page(buffer);
2381 * if (!rpage) 2436 * if (!rpage)
2382 * return error; 2437 * return error;
2383 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2438 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
2384 * if (ret) 2439 * if (ret >= 0)
2385 * process_page(rpage); 2440 * process_page(rpage, ret);
2386 * 2441 *
2387 * When @full is set, the function will not return true unless 2442 * When @full is set, the function will not return true unless
2388 * the writer is off the reader page. 2443 * the writer is off the reader page.
@@ -2393,69 +2448,111 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2393 * responsible for that. 2448 * responsible for that.
2394 * 2449 *
2395 * Returns: 2450 * Returns:
2396 * 1 if data has been transferred 2451 * >=0 if data has been transferred, returns the offset of consumed data.
2397 * 0 if no data has been transferred. 2452 * <0 if no data has been transferred.
2398 */ 2453 */
2399int ring_buffer_read_page(struct ring_buffer *buffer, 2454int ring_buffer_read_page(struct ring_buffer *buffer,
2400 void **data_page, int cpu, int full) 2455 void **data_page, size_t len, int cpu, int full)
2401{ 2456{
2402 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2457 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2403 struct ring_buffer_event *event; 2458 struct ring_buffer_event *event;
2404 struct buffer_data_page *bpage; 2459 struct buffer_data_page *bpage;
2460 struct buffer_page *reader;
2405 unsigned long flags; 2461 unsigned long flags;
2406 int ret = 0; 2462 unsigned int commit;
2463 unsigned int read;
2464 u64 save_timestamp;
2465 int ret = -1;
2466
2467 /*
2468 * If len is not big enough to hold the page header, then
2469 * we can not copy anything.
2470 */
2471 if (len <= BUF_PAGE_HDR_SIZE)
2472 return -1;
2473
2474 len -= BUF_PAGE_HDR_SIZE;
2407 2475
2408 if (!data_page) 2476 if (!data_page)
2409 return 0; 2477 return -1;
2410 2478
2411 bpage = *data_page; 2479 bpage = *data_page;
2412 if (!bpage) 2480 if (!bpage)
2413 return 0; 2481 return -1;
2414 2482
2415 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2483 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2416 2484
2417 /* 2485 reader = rb_get_reader_page(cpu_buffer);
2418 * rb_buffer_peek will get the next ring buffer if 2486 if (!reader)
2419 * the current reader page is empty.
2420 */
2421 event = rb_buffer_peek(buffer, cpu, NULL);
2422 if (!event)
2423 goto out; 2487 goto out;
2424 2488
2425 /* check for data */ 2489 event = rb_reader_event(cpu_buffer);
2426 if (!local_read(&cpu_buffer->reader_page->page->commit)) 2490
2427 goto out; 2491 read = reader->read;
2492 commit = rb_page_commit(reader);
2493
2428 /* 2494 /*
2429 * If the writer is already off of the read page, then simply 2495 * If this page has been partially read or
2430 * switch the read page with the given page. Otherwise 2496 * if len is not big enough to read the rest of the page or
2431 * we need to copy the data from the reader to the writer. 2497 * a writer is still on the page, then
2498 * we must copy the data from the page to the buffer.
2499 * Otherwise, we can simply swap the page with the one passed in.
2432 */ 2500 */
2433 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2501 if (read || (len < (commit - read)) ||
2434 unsigned int read = cpu_buffer->reader_page->read; 2502 cpu_buffer->reader_page == cpu_buffer->commit_page) {
2503 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2504 unsigned int rpos = read;
2505 unsigned int pos = 0;
2506 unsigned int size;
2435 2507
2436 if (full) 2508 if (full)
2437 goto out; 2509 goto out;
2438 /* The writer is still on the reader page, we must copy */
2439 bpage = cpu_buffer->reader_page->page;
2440 memcpy(bpage->data,
2441 cpu_buffer->reader_page->page->data + read,
2442 local_read(&bpage->commit) - read);
2443 2510
2444 /* consume what was read */ 2511 if (len > (commit - read))
2445 cpu_buffer->reader_page += read; 2512 len = (commit - read);
2446 2513
2514 size = rb_event_length(event);
2515
2516 if (len < size)
2517 goto out;
2518
2519 /* save the current timestamp, since the user will need it */
2520 save_timestamp = cpu_buffer->read_stamp;
2521
2522 /* Need to copy one event at a time */
2523 do {
2524 memcpy(bpage->data + pos, rpage->data + rpos, size);
2525
2526 len -= size;
2527
2528 rb_advance_reader(cpu_buffer);
2529 rpos = reader->read;
2530 pos += size;
2531
2532 event = rb_reader_event(cpu_buffer);
2533 size = rb_event_length(event);
2534 } while (len > size);
2535
2536 /* update bpage */
2537 local_set(&bpage->commit, pos);
2538 bpage->time_stamp = save_timestamp;
2539
2540 /* we copied everything to the beginning */
2541 read = 0;
2447 } else { 2542 } else {
2448 /* swap the pages */ 2543 /* swap the pages */
2449 rb_init_page(bpage); 2544 rb_init_page(bpage);
2450 bpage = cpu_buffer->reader_page->page; 2545 bpage = reader->page;
2451 cpu_buffer->reader_page->page = *data_page; 2546 reader->page = *data_page;
2452 cpu_buffer->reader_page->read = 0; 2547 local_set(&reader->write, 0);
2548 reader->read = 0;
2453 *data_page = bpage; 2549 *data_page = bpage;
2550
2551 /* update the entry counter */
2552 rb_remove_entries(cpu_buffer, bpage, read);
2454 } 2553 }
2455 ret = 1; 2554 ret = read;
2456 2555
2457 /* update the entry counter */
2458 rb_remove_entries(cpu_buffer, bpage);
2459 out: 2556 out:
2460 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2557 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2461 2558
@@ -2466,7 +2563,7 @@ static ssize_t
2466rb_simple_read(struct file *filp, char __user *ubuf, 2563rb_simple_read(struct file *filp, char __user *ubuf,
2467 size_t cnt, loff_t *ppos) 2564 size_t cnt, loff_t *ppos)
2468{ 2565{
2469 long *p = filp->private_data; 2566 unsigned long *p = filp->private_data;
2470 char buf[64]; 2567 char buf[64];
2471 int r; 2568 int r;
2472 2569
@@ -2482,9 +2579,9 @@ static ssize_t
2482rb_simple_write(struct file *filp, const char __user *ubuf, 2579rb_simple_write(struct file *filp, const char __user *ubuf,
2483 size_t cnt, loff_t *ppos) 2580 size_t cnt, loff_t *ppos)
2484{ 2581{
2485 long *p = filp->private_data; 2582 unsigned long *p = filp->private_data;
2486 char buf[64]; 2583 char buf[64];
2487 long val; 2584 unsigned long val;
2488 int ret; 2585 int ret;
2489 2586
2490 if (cnt >= sizeof(buf)) 2587 if (cnt >= sizeof(buf))
@@ -2509,7 +2606,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2509 return cnt; 2606 return cnt;
2510} 2607}
2511 2608
2512static struct file_operations rb_simple_fops = { 2609static const struct file_operations rb_simple_fops = {
2513 .open = tracing_open_generic, 2610 .open = tracing_open_generic,
2514 .read = rb_simple_read, 2611 .read = rb_simple_read,
2515 .write = rb_simple_write, 2612 .write = rb_simple_write,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 17bb88d86ac2..5c9c6d907054 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -11,32 +11,33 @@
11 * Copyright (C) 2004-2006 Ingo Molnar 11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h>
14#include <linux/utsrelease.h> 15#include <linux/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
15#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 19#include <linux/seq_file.h>
17#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h>
18#include <linux/debugfs.h> 22#include <linux/debugfs.h>
19#include <linux/pagemap.h> 23#include <linux/pagemap.h>
20#include <linux/hardirq.h> 24#include <linux/hardirq.h>
21#include <linux/linkage.h> 25#include <linux/linkage.h>
22#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/kprobes.h>
23#include <linux/ftrace.h> 28#include <linux/ftrace.h>
24#include <linux/module.h> 29#include <linux/module.h>
25#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h>
26#include <linux/kdebug.h> 32#include <linux/kdebug.h>
27#include <linux/ctype.h> 33#include <linux/ctype.h>
28#include <linux/init.h> 34#include <linux/init.h>
29#include <linux/poll.h> 35#include <linux/poll.h>
30#include <linux/gfp.h> 36#include <linux/gfp.h>
31#include <linux/fs.h> 37#include <linux/fs.h>
32#include <linux/kprobes.h>
33#include <linux/writeback.h>
34
35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
38 38
39#include "trace.h" 39#include "trace.h"
40#include "trace_output.h"
40 41
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 42#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 43
@@ -47,11 +48,16 @@ unsigned long __read_mostly tracing_thresh;
47 * We need to change this state when a selftest is running. 48 * We need to change this state when a selftest is running.
48 * A selftest will lurk into the ring-buffer to count the 49 * A selftest will lurk into the ring-buffer to count the
49 * entries inserted during the selftest although some concurrent 50 * entries inserted during the selftest although some concurrent
50 * insertions into the ring-buffer such as ftrace_printk could occurred 51 * insertions into the ring-buffer such as trace_printk could occurred
51 * at the same time, giving false positive or negative results. 52 * at the same time, giving false positive or negative results.
52 */ 53 */
53static bool __read_mostly tracing_selftest_running; 54static bool __read_mostly tracing_selftest_running;
54 55
56/*
57 * If a tracer is running, we do not want to run SELFTEST.
58 */
59static bool __read_mostly tracing_selftest_disabled;
60
55/* For tracers that don't implement custom flags */ 61/* For tracers that don't implement custom flags */
56static struct tracer_opt dummy_tracer_opt[] = { 62static struct tracer_opt dummy_tracer_opt[] = {
57 { } 63 { }
@@ -73,7 +79,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
73 * of the tracer is successful. But that is the only place that sets 79 * of the tracer is successful. But that is the only place that sets
74 * this back to zero. 80 * this back to zero.
75 */ 81 */
76int tracing_disabled = 1; 82static int tracing_disabled = 1;
77 83
78static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 84static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
79 85
@@ -91,6 +97,9 @@ static inline void ftrace_enable_cpu(void)
91 97
92static cpumask_var_t __read_mostly tracing_buffer_mask; 98static cpumask_var_t __read_mostly tracing_buffer_mask;
93 99
100/* Define which cpu buffers are currently read in trace_pipe */
101static cpumask_var_t tracing_reader_cpumask;
102
94#define for_each_tracing_cpu(cpu) \ 103#define for_each_tracing_cpu(cpu) \
95 for_each_cpu(cpu, tracing_buffer_mask) 104 for_each_cpu(cpu, tracing_buffer_mask)
96 105
@@ -109,14 +118,19 @@ static cpumask_var_t __read_mostly tracing_buffer_mask;
109 */ 118 */
110int ftrace_dump_on_oops; 119int ftrace_dump_on_oops;
111 120
112static int tracing_set_tracer(char *buf); 121static int tracing_set_tracer(const char *buf);
122
123#define BOOTUP_TRACER_SIZE 100
124static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
125static char *default_bootup_tracer;
113 126
114static int __init set_ftrace(char *str) 127static int __init set_ftrace(char *str)
115{ 128{
116 tracing_set_tracer(str); 129 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
130 default_bootup_tracer = bootup_tracer_buf;
117 return 1; 131 return 1;
118} 132}
119__setup("ftrace", set_ftrace); 133__setup("ftrace=", set_ftrace);
120 134
121static int __init set_ftrace_dump_on_oops(char *str) 135static int __init set_ftrace_dump_on_oops(char *str)
122{ 136{
@@ -186,9 +200,6 @@ int tracing_is_enabled(void)
186 return tracer_enabled; 200 return tracer_enabled;
187} 201}
188 202
189/* function tracing enabled */
190int ftrace_function_enabled;
191
192/* 203/*
193 * trace_buf_size is the size in bytes that is allocated 204 * trace_buf_size is the size in bytes that is allocated
194 * for a buffer. Note, the number of bytes is always rounded 205 * for a buffer. Note, the number of bytes is always rounded
@@ -229,7 +240,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
229 240
230/* trace_flags holds trace_options default values */ 241/* trace_flags holds trace_options default values */
231unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 242unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
232 TRACE_ITER_ANNOTATE; 243 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
233 244
234/** 245/**
235 * trace_wake_up - wake up tasks waiting for trace input 246 * trace_wake_up - wake up tasks waiting for trace input
@@ -280,13 +291,15 @@ static const char *trace_options[] = {
280 "block", 291 "block",
281 "stacktrace", 292 "stacktrace",
282 "sched-tree", 293 "sched-tree",
283 "ftrace_printk", 294 "trace_printk",
284 "ftrace_preempt", 295 "ftrace_preempt",
285 "branch", 296 "branch",
286 "annotate", 297 "annotate",
287 "userstacktrace", 298 "userstacktrace",
288 "sym-userobj", 299 "sym-userobj",
289 "printk-msg-only", 300 "printk-msg-only",
301 "context-info",
302 "latency-format",
290 NULL 303 NULL
291}; 304};
292 305
@@ -326,146 +339,37 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
326 data->rt_priority = tsk->rt_priority; 339 data->rt_priority = tsk->rt_priority;
327 340
328 /* record this tasks comm */ 341 /* record this tasks comm */
329 tracing_record_cmdline(current); 342 tracing_record_cmdline(tsk);
330} 343}
331 344
332/** 345ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
333 * trace_seq_printf - sequence printing of trace information
334 * @s: trace sequence descriptor
335 * @fmt: printf format string
336 *
337 * The tracer may use either sequence operations or its own
338 * copy to user routines. To simplify formating of a trace
339 * trace_seq_printf is used to store strings into a special
340 * buffer (@s). Then the output may be either used by
341 * the sequencer or pulled into another buffer.
342 */
343int
344trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
345{ 346{
346 int len = (PAGE_SIZE - 1) - s->len; 347 int len;
347 va_list ap;
348 int ret; 348 int ret;
349 349
350 if (!len) 350 if (!cnt)
351 return 0;
352
353 va_start(ap, fmt);
354 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
355 va_end(ap);
356
357 /* If we can't write it all, don't bother writing anything */
358 if (ret >= len)
359 return 0;
360
361 s->len += ret;
362
363 return len;
364}
365
366/**
367 * trace_seq_puts - trace sequence printing of simple string
368 * @s: trace sequence descriptor
369 * @str: simple string to record
370 *
371 * The tracer may use either the sequence operations or its own
372 * copy to user routines. This function records a simple string
373 * into a special buffer (@s) for later retrieval by a sequencer
374 * or other mechanism.
375 */
376static int
377trace_seq_puts(struct trace_seq *s, const char *str)
378{
379 int len = strlen(str);
380
381 if (len > ((PAGE_SIZE - 1) - s->len))
382 return 0;
383
384 memcpy(s->buffer + s->len, str, len);
385 s->len += len;
386
387 return len;
388}
389
390static int
391trace_seq_putc(struct trace_seq *s, unsigned char c)
392{
393 if (s->len >= (PAGE_SIZE - 1))
394 return 0;
395
396 s->buffer[s->len++] = c;
397
398 return 1;
399}
400
401static int
402trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
403{
404 if (len > ((PAGE_SIZE - 1) - s->len))
405 return 0; 351 return 0;
406 352
407 memcpy(s->buffer + s->len, mem, len); 353 if (s->len <= s->readpos)
408 s->len += len; 354 return -EBUSY;
409
410 return len;
411}
412
413#define MAX_MEMHEX_BYTES 8
414#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
415
416static int
417trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
418{
419 unsigned char hex[HEX_CHARS];
420 unsigned char *data = mem;
421 int i, j;
422
423#ifdef __BIG_ENDIAN
424 for (i = 0, j = 0; i < len; i++) {
425#else
426 for (i = len-1, j = 0; i >= 0; i--) {
427#endif
428 hex[j++] = hex_asc_hi(data[i]);
429 hex[j++] = hex_asc_lo(data[i]);
430 }
431 hex[j++] = ' ';
432
433 return trace_seq_putmem(s, hex, j);
434}
435
436static int
437trace_seq_path(struct trace_seq *s, struct path *path)
438{
439 unsigned char *p;
440 355
441 if (s->len >= (PAGE_SIZE - 1)) 356 len = s->len - s->readpos;
442 return 0; 357 if (cnt > len)
443 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 358 cnt = len;
444 if (!IS_ERR(p)) { 359 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
445 p = mangle_path(s->buffer + s->len, p, "\n"); 360 if (ret == cnt)
446 if (p) { 361 return -EFAULT;
447 s->len = p - s->buffer;
448 return 1;
449 }
450 } else {
451 s->buffer[s->len++] = '?';
452 return 1;
453 }
454 362
455 return 0; 363 cnt -= ret;
456}
457 364
458static void 365 s->readpos += cnt;
459trace_seq_reset(struct trace_seq *s) 366 return cnt;
460{
461 s->len = 0;
462 s->readpos = 0;
463} 367}
464 368
465ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 369ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
466{ 370{
467 int len; 371 int len;
468 int ret; 372 void *ret;
469 373
470 if (s->len <= s->readpos) 374 if (s->len <= s->readpos)
471 return -EBUSY; 375 return -EBUSY;
@@ -473,11 +377,11 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
473 len = s->len - s->readpos; 377 len = s->len - s->readpos;
474 if (cnt > len) 378 if (cnt > len)
475 cnt = len; 379 cnt = len;
476 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); 380 ret = memcpy(buf, s->buffer + s->readpos, cnt);
477 if (ret) 381 if (!ret)
478 return -EFAULT; 382 return -EFAULT;
479 383
480 s->readpos += len; 384 s->readpos += cnt;
481 return cnt; 385 return cnt;
482} 386}
483 387
@@ -489,7 +393,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
489 s->buffer[len] = 0; 393 s->buffer[len] = 0;
490 seq_puts(m, s->buffer); 394 seq_puts(m, s->buffer);
491 395
492 trace_seq_reset(s); 396 trace_seq_init(s);
493} 397}
494 398
495/** 399/**
@@ -543,7 +447,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
543 447
544 ftrace_enable_cpu(); 448 ftrace_enable_cpu();
545 449
546 WARN_ON_ONCE(ret); 450 WARN_ON_ONCE(ret && ret != -EAGAIN);
547 451
548 __update_max_tr(tr, tsk, cpu); 452 __update_max_tr(tr, tsk, cpu);
549 __raw_spin_unlock(&ftrace_max_lock); 453 __raw_spin_unlock(&ftrace_max_lock);
@@ -556,6 +460,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
556 * Register a new plugin tracer. 460 * Register a new plugin tracer.
557 */ 461 */
558int register_tracer(struct tracer *type) 462int register_tracer(struct tracer *type)
463__releases(kernel_lock)
464__acquires(kernel_lock)
559{ 465{
560 struct tracer *t; 466 struct tracer *t;
561 int len; 467 int len;
@@ -594,9 +500,12 @@ int register_tracer(struct tracer *type)
594 else 500 else
595 if (!type->flags->opts) 501 if (!type->flags->opts)
596 type->flags->opts = dummy_tracer_opt; 502 type->flags->opts = dummy_tracer_opt;
503 if (!type->wait_pipe)
504 type->wait_pipe = default_wait_pipe;
505
597 506
598#ifdef CONFIG_FTRACE_STARTUP_TEST 507#ifdef CONFIG_FTRACE_STARTUP_TEST
599 if (type->selftest) { 508 if (type->selftest && !tracing_selftest_disabled) {
600 struct tracer *saved_tracer = current_trace; 509 struct tracer *saved_tracer = current_trace;
601 struct trace_array *tr = &global_trace; 510 struct trace_array *tr = &global_trace;
602 int i; 511 int i;
@@ -638,8 +547,26 @@ int register_tracer(struct tracer *type)
638 out: 547 out:
639 tracing_selftest_running = false; 548 tracing_selftest_running = false;
640 mutex_unlock(&trace_types_lock); 549 mutex_unlock(&trace_types_lock);
641 lock_kernel();
642 550
551 if (ret || !default_bootup_tracer)
552 goto out_unlock;
553
554 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
555 goto out_unlock;
556
557 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
558 /* Do we want this tracer to start on bootup? */
559 tracing_set_tracer(type->name);
560 default_bootup_tracer = NULL;
561 /* disable other selftests, since this will break it. */
562 tracing_selftest_disabled = 1;
563#ifdef CONFIG_FTRACE_STARTUP_TEST
564 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
565 type->name);
566#endif
567
568 out_unlock:
569 lock_kernel();
643 return ret; 570 return ret;
644} 571}
645 572
@@ -658,6 +585,15 @@ void unregister_tracer(struct tracer *type)
658 585
659 found: 586 found:
660 *t = (*t)->next; 587 *t = (*t)->next;
588
589 if (type == current_trace && tracer_enabled) {
590 tracer_enabled = 0;
591 tracing_stop();
592 if (current_trace->stop)
593 current_trace->stop(&global_trace);
594 current_trace = &nop_trace;
595 }
596
661 if (strlen(type->name) != max_tracer_type_len) 597 if (strlen(type->name) != max_tracer_type_len)
662 goto out; 598 goto out;
663 599
@@ -693,10 +629,10 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
693static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 629static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
694static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 630static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
695static int cmdline_idx; 631static int cmdline_idx;
696static DEFINE_SPINLOCK(trace_cmdline_lock); 632static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
697 633
698/* temporary disable recording */ 634/* temporary disable recording */
699atomic_t trace_record_cmdline_disabled __read_mostly; 635static atomic_t trace_record_cmdline_disabled __read_mostly;
700 636
701static void trace_init_cmdlines(void) 637static void trace_init_cmdlines(void)
702{ 638{
@@ -738,13 +674,12 @@ void tracing_start(void)
738 return; 674 return;
739 675
740 spin_lock_irqsave(&tracing_start_lock, flags); 676 spin_lock_irqsave(&tracing_start_lock, flags);
741 if (--trace_stop_count) 677 if (--trace_stop_count) {
742 goto out; 678 if (trace_stop_count < 0) {
743 679 /* Someone screwed up their debugging */
744 if (trace_stop_count < 0) { 680 WARN_ON_ONCE(1);
745 /* Someone screwed up their debugging */ 681 trace_stop_count = 0;
746 WARN_ON_ONCE(1); 682 }
747 trace_stop_count = 0;
748 goto out; 683 goto out;
749 } 684 }
750 685
@@ -806,7 +741,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
806 * nor do we want to disable interrupts, 741 * nor do we want to disable interrupts,
807 * so if we miss here, then better luck next time. 742 * so if we miss here, then better luck next time.
808 */ 743 */
809 if (!spin_trylock(&trace_cmdline_lock)) 744 if (!__raw_spin_trylock(&trace_cmdline_lock))
810 return; 745 return;
811 746
812 idx = map_pid_to_cmdline[tsk->pid]; 747 idx = map_pid_to_cmdline[tsk->pid];
@@ -824,7 +759,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
824 759
825 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 760 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
826 761
827 spin_unlock(&trace_cmdline_lock); 762 __raw_spin_unlock(&trace_cmdline_lock);
828} 763}
829 764
830char *trace_find_cmdline(int pid) 765char *trace_find_cmdline(int pid)
@@ -864,7 +799,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
864 799
865 entry->preempt_count = pc & 0xff; 800 entry->preempt_count = pc & 0xff;
866 entry->pid = (tsk) ? tsk->pid : 0; 801 entry->pid = (tsk) ? tsk->pid : 0;
867 entry->tgid = (tsk) ? tsk->tgid : 0; 802 entry->tgid = (tsk) ? tsk->tgid : 0;
868 entry->flags = 803 entry->flags =
869#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 804#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
870 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 805 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -876,78 +811,114 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
876 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 811 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
877} 812}
878 813
814struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
815 unsigned char type,
816 unsigned long len,
817 unsigned long flags, int pc)
818{
819 struct ring_buffer_event *event;
820
821 event = ring_buffer_lock_reserve(tr->buffer, len);
822 if (event != NULL) {
823 struct trace_entry *ent = ring_buffer_event_data(event);
824
825 tracing_generic_entry_update(ent, flags, pc);
826 ent->type = type;
827 }
828
829 return event;
830}
831static void ftrace_trace_stack(struct trace_array *tr,
832 unsigned long flags, int skip, int pc);
833static void ftrace_trace_userstack(struct trace_array *tr,
834 unsigned long flags, int pc);
835
836void trace_buffer_unlock_commit(struct trace_array *tr,
837 struct ring_buffer_event *event,
838 unsigned long flags, int pc)
839{
840 ring_buffer_unlock_commit(tr->buffer, event);
841
842 ftrace_trace_stack(tr, flags, 6, pc);
843 ftrace_trace_userstack(tr, flags, pc);
844 trace_wake_up();
845}
846
847struct ring_buffer_event *
848trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
849 unsigned long flags, int pc)
850{
851 return trace_buffer_lock_reserve(&global_trace,
852 type, len, flags, pc);
853}
854
855void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
856 unsigned long flags, int pc)
857{
858 return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
859}
860
879void 861void
880trace_function(struct trace_array *tr, struct trace_array_cpu *data, 862trace_function(struct trace_array *tr,
881 unsigned long ip, unsigned long parent_ip, unsigned long flags, 863 unsigned long ip, unsigned long parent_ip, unsigned long flags,
882 int pc) 864 int pc)
883{ 865{
884 struct ring_buffer_event *event; 866 struct ring_buffer_event *event;
885 struct ftrace_entry *entry; 867 struct ftrace_entry *entry;
886 unsigned long irq_flags;
887 868
888 /* If we are reading the ring buffer, don't trace */ 869 /* If we are reading the ring buffer, don't trace */
889 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 870 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
890 return; 871 return;
891 872
892 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 873 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
893 &irq_flags); 874 flags, pc);
894 if (!event) 875 if (!event)
895 return; 876 return;
896 entry = ring_buffer_event_data(event); 877 entry = ring_buffer_event_data(event);
897 tracing_generic_entry_update(&entry->ent, flags, pc);
898 entry->ent.type = TRACE_FN;
899 entry->ip = ip; 878 entry->ip = ip;
900 entry->parent_ip = parent_ip; 879 entry->parent_ip = parent_ip;
901 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 880 ring_buffer_unlock_commit(tr->buffer, event);
902} 881}
903 882
904#ifdef CONFIG_FUNCTION_GRAPH_TRACER 883#ifdef CONFIG_FUNCTION_GRAPH_TRACER
905static void __trace_graph_entry(struct trace_array *tr, 884static void __trace_graph_entry(struct trace_array *tr,
906 struct trace_array_cpu *data,
907 struct ftrace_graph_ent *trace, 885 struct ftrace_graph_ent *trace,
908 unsigned long flags, 886 unsigned long flags,
909 int pc) 887 int pc)
910{ 888{
911 struct ring_buffer_event *event; 889 struct ring_buffer_event *event;
912 struct ftrace_graph_ent_entry *entry; 890 struct ftrace_graph_ent_entry *entry;
913 unsigned long irq_flags;
914 891
915 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 892 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
916 return; 893 return;
917 894
918 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 895 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
919 &irq_flags); 896 sizeof(*entry), flags, pc);
920 if (!event) 897 if (!event)
921 return; 898 return;
922 entry = ring_buffer_event_data(event); 899 entry = ring_buffer_event_data(event);
923 tracing_generic_entry_update(&entry->ent, flags, pc);
924 entry->ent.type = TRACE_GRAPH_ENT;
925 entry->graph_ent = *trace; 900 entry->graph_ent = *trace;
926 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 901 ring_buffer_unlock_commit(global_trace.buffer, event);
927} 902}
928 903
929static void __trace_graph_return(struct trace_array *tr, 904static void __trace_graph_return(struct trace_array *tr,
930 struct trace_array_cpu *data,
931 struct ftrace_graph_ret *trace, 905 struct ftrace_graph_ret *trace,
932 unsigned long flags, 906 unsigned long flags,
933 int pc) 907 int pc)
934{ 908{
935 struct ring_buffer_event *event; 909 struct ring_buffer_event *event;
936 struct ftrace_graph_ret_entry *entry; 910 struct ftrace_graph_ret_entry *entry;
937 unsigned long irq_flags;
938 911
939 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 912 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
940 return; 913 return;
941 914
942 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), 915 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
943 &irq_flags); 916 sizeof(*entry), flags, pc);
944 if (!event) 917 if (!event)
945 return; 918 return;
946 entry = ring_buffer_event_data(event); 919 entry = ring_buffer_event_data(event);
947 tracing_generic_entry_update(&entry->ent, flags, pc);
948 entry->ent.type = TRACE_GRAPH_RET;
949 entry->ret = *trace; 920 entry->ret = *trace;
950 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); 921 ring_buffer_unlock_commit(global_trace.buffer, event);
951} 922}
952#endif 923#endif
953 924
@@ -957,31 +928,23 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
957 int pc) 928 int pc)
958{ 929{
959 if (likely(!atomic_read(&data->disabled))) 930 if (likely(!atomic_read(&data->disabled)))
960 trace_function(tr, data, ip, parent_ip, flags, pc); 931 trace_function(tr, ip, parent_ip, flags, pc);
961} 932}
962 933
963static void ftrace_trace_stack(struct trace_array *tr, 934static void __ftrace_trace_stack(struct trace_array *tr,
964 struct trace_array_cpu *data, 935 unsigned long flags,
965 unsigned long flags, 936 int skip, int pc)
966 int skip, int pc)
967{ 937{
968#ifdef CONFIG_STACKTRACE 938#ifdef CONFIG_STACKTRACE
969 struct ring_buffer_event *event; 939 struct ring_buffer_event *event;
970 struct stack_entry *entry; 940 struct stack_entry *entry;
971 struct stack_trace trace; 941 struct stack_trace trace;
972 unsigned long irq_flags;
973 942
974 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 943 event = trace_buffer_lock_reserve(tr, TRACE_STACK,
975 return; 944 sizeof(*entry), flags, pc);
976
977 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
978 &irq_flags);
979 if (!event) 945 if (!event)
980 return; 946 return;
981 entry = ring_buffer_event_data(event); 947 entry = ring_buffer_event_data(event);
982 tracing_generic_entry_update(&entry->ent, flags, pc);
983 entry->ent.type = TRACE_STACK;
984
985 memset(&entry->caller, 0, sizeof(entry->caller)); 948 memset(&entry->caller, 0, sizeof(entry->caller));
986 949
987 trace.nr_entries = 0; 950 trace.nr_entries = 0;
@@ -990,38 +953,43 @@ static void ftrace_trace_stack(struct trace_array *tr,
990 trace.entries = entry->caller; 953 trace.entries = entry->caller;
991 954
992 save_stack_trace(&trace); 955 save_stack_trace(&trace);
993 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 956 ring_buffer_unlock_commit(tr->buffer, event);
994#endif 957#endif
995} 958}
996 959
960static void ftrace_trace_stack(struct trace_array *tr,
961 unsigned long flags,
962 int skip, int pc)
963{
964 if (!(trace_flags & TRACE_ITER_STACKTRACE))
965 return;
966
967 __ftrace_trace_stack(tr, flags, skip, pc);
968}
969
997void __trace_stack(struct trace_array *tr, 970void __trace_stack(struct trace_array *tr,
998 struct trace_array_cpu *data,
999 unsigned long flags, 971 unsigned long flags,
1000 int skip) 972 int skip, int pc)
1001{ 973{
1002 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 974 __ftrace_trace_stack(tr, flags, skip, pc);
1003} 975}
1004 976
1005static void ftrace_trace_userstack(struct trace_array *tr, 977static void ftrace_trace_userstack(struct trace_array *tr,
1006 struct trace_array_cpu *data, 978 unsigned long flags, int pc)
1007 unsigned long flags, int pc)
1008{ 979{
1009#ifdef CONFIG_STACKTRACE 980#ifdef CONFIG_STACKTRACE
1010 struct ring_buffer_event *event; 981 struct ring_buffer_event *event;
1011 struct userstack_entry *entry; 982 struct userstack_entry *entry;
1012 struct stack_trace trace; 983 struct stack_trace trace;
1013 unsigned long irq_flags;
1014 984
1015 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 985 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1016 return; 986 return;
1017 987
1018 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 988 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
1019 &irq_flags); 989 sizeof(*entry), flags, pc);
1020 if (!event) 990 if (!event)
1021 return; 991 return;
1022 entry = ring_buffer_event_data(event); 992 entry = ring_buffer_event_data(event);
1023 tracing_generic_entry_update(&entry->ent, flags, pc);
1024 entry->ent.type = TRACE_USER_STACK;
1025 993
1026 memset(&entry->caller, 0, sizeof(entry->caller)); 994 memset(&entry->caller, 0, sizeof(entry->caller));
1027 995
@@ -1031,70 +999,58 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1031 trace.entries = entry->caller; 999 trace.entries = entry->caller;
1032 1000
1033 save_stack_trace_user(&trace); 1001 save_stack_trace_user(&trace);
1034 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1002 ring_buffer_unlock_commit(tr->buffer, event);
1035#endif 1003#endif
1036} 1004}
1037 1005
1038void __trace_userstack(struct trace_array *tr, 1006#ifdef UNUSED
1039 struct trace_array_cpu *data, 1007static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1040 unsigned long flags)
1041{ 1008{
1042 ftrace_trace_userstack(tr, data, flags, preempt_count()); 1009 ftrace_trace_userstack(tr, flags, preempt_count());
1043} 1010}
1011#endif /* UNUSED */
1044 1012
1045static void 1013static void
1046ftrace_trace_special(void *__tr, void *__data, 1014ftrace_trace_special(void *__tr,
1047 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1015 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1048 int pc) 1016 int pc)
1049{ 1017{
1050 struct ring_buffer_event *event; 1018 struct ring_buffer_event *event;
1051 struct trace_array_cpu *data = __data;
1052 struct trace_array *tr = __tr; 1019 struct trace_array *tr = __tr;
1053 struct special_entry *entry; 1020 struct special_entry *entry;
1054 unsigned long irq_flags;
1055 1021
1056 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1022 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1057 &irq_flags); 1023 sizeof(*entry), 0, pc);
1058 if (!event) 1024 if (!event)
1059 return; 1025 return;
1060 entry = ring_buffer_event_data(event); 1026 entry = ring_buffer_event_data(event);
1061 tracing_generic_entry_update(&entry->ent, 0, pc);
1062 entry->ent.type = TRACE_SPECIAL;
1063 entry->arg1 = arg1; 1027 entry->arg1 = arg1;
1064 entry->arg2 = arg2; 1028 entry->arg2 = arg2;
1065 entry->arg3 = arg3; 1029 entry->arg3 = arg3;
1066 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1030 trace_buffer_unlock_commit(tr, event, 0, pc);
1067 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1068 ftrace_trace_userstack(tr, data, irq_flags, pc);
1069
1070 trace_wake_up();
1071} 1031}
1072 1032
1073void 1033void
1074__trace_special(void *__tr, void *__data, 1034__trace_special(void *__tr, void *__data,
1075 unsigned long arg1, unsigned long arg2, unsigned long arg3) 1035 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1076{ 1036{
1077 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); 1037 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1078} 1038}
1079 1039
1080void 1040void
1081tracing_sched_switch_trace(struct trace_array *tr, 1041tracing_sched_switch_trace(struct trace_array *tr,
1082 struct trace_array_cpu *data,
1083 struct task_struct *prev, 1042 struct task_struct *prev,
1084 struct task_struct *next, 1043 struct task_struct *next,
1085 unsigned long flags, int pc) 1044 unsigned long flags, int pc)
1086{ 1045{
1087 struct ring_buffer_event *event; 1046 struct ring_buffer_event *event;
1088 struct ctx_switch_entry *entry; 1047 struct ctx_switch_entry *entry;
1089 unsigned long irq_flags;
1090 1048
1091 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1049 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1092 &irq_flags); 1050 sizeof(*entry), flags, pc);
1093 if (!event) 1051 if (!event)
1094 return; 1052 return;
1095 entry = ring_buffer_event_data(event); 1053 entry = ring_buffer_event_data(event);
1096 tracing_generic_entry_update(&entry->ent, flags, pc);
1097 entry->ent.type = TRACE_CTX;
1098 entry->prev_pid = prev->pid; 1054 entry->prev_pid = prev->pid;
1099 entry->prev_prio = prev->prio; 1055 entry->prev_prio = prev->prio;
1100 entry->prev_state = prev->state; 1056 entry->prev_state = prev->state;
@@ -1102,29 +1058,23 @@ tracing_sched_switch_trace(struct trace_array *tr,
1102 entry->next_prio = next->prio; 1058 entry->next_prio = next->prio;
1103 entry->next_state = next->state; 1059 entry->next_state = next->state;
1104 entry->next_cpu = task_cpu(next); 1060 entry->next_cpu = task_cpu(next);
1105 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1061 trace_buffer_unlock_commit(tr, event, flags, pc);
1106 ftrace_trace_stack(tr, data, flags, 5, pc);
1107 ftrace_trace_userstack(tr, data, flags, pc);
1108} 1062}
1109 1063
1110void 1064void
1111tracing_sched_wakeup_trace(struct trace_array *tr, 1065tracing_sched_wakeup_trace(struct trace_array *tr,
1112 struct trace_array_cpu *data,
1113 struct task_struct *wakee, 1066 struct task_struct *wakee,
1114 struct task_struct *curr, 1067 struct task_struct *curr,
1115 unsigned long flags, int pc) 1068 unsigned long flags, int pc)
1116{ 1069{
1117 struct ring_buffer_event *event; 1070 struct ring_buffer_event *event;
1118 struct ctx_switch_entry *entry; 1071 struct ctx_switch_entry *entry;
1119 unsigned long irq_flags;
1120 1072
1121 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 1073 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1122 &irq_flags); 1074 sizeof(*entry), flags, pc);
1123 if (!event) 1075 if (!event)
1124 return; 1076 return;
1125 entry = ring_buffer_event_data(event); 1077 entry = ring_buffer_event_data(event);
1126 tracing_generic_entry_update(&entry->ent, flags, pc);
1127 entry->ent.type = TRACE_WAKE;
1128 entry->prev_pid = curr->pid; 1078 entry->prev_pid = curr->pid;
1129 entry->prev_prio = curr->prio; 1079 entry->prev_prio = curr->prio;
1130 entry->prev_state = curr->state; 1080 entry->prev_state = curr->state;
@@ -1132,11 +1082,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1132 entry->next_prio = wakee->prio; 1082 entry->next_prio = wakee->prio;
1133 entry->next_state = wakee->state; 1083 entry->next_state = wakee->state;
1134 entry->next_cpu = task_cpu(wakee); 1084 entry->next_cpu = task_cpu(wakee);
1135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1136 ftrace_trace_stack(tr, data, flags, 6, pc);
1137 ftrace_trace_userstack(tr, data, flags, pc);
1138 1085
1139 trace_wake_up(); 1086 ring_buffer_unlock_commit(tr->buffer, event);
1087 ftrace_trace_stack(tr, flags, 6, pc);
1088 ftrace_trace_userstack(tr, flags, pc);
1140} 1089}
1141 1090
1142void 1091void
@@ -1157,66 +1106,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1157 data = tr->data[cpu]; 1106 data = tr->data[cpu];
1158 1107
1159 if (likely(atomic_inc_return(&data->disabled) == 1)) 1108 if (likely(atomic_inc_return(&data->disabled) == 1))
1160 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1109 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1161
1162 atomic_dec(&data->disabled);
1163 local_irq_restore(flags);
1164}
1165
1166#ifdef CONFIG_FUNCTION_TRACER
1167static void
1168function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1169{
1170 struct trace_array *tr = &global_trace;
1171 struct trace_array_cpu *data;
1172 unsigned long flags;
1173 long disabled;
1174 int cpu, resched;
1175 int pc;
1176
1177 if (unlikely(!ftrace_function_enabled))
1178 return;
1179
1180 pc = preempt_count();
1181 resched = ftrace_preempt_disable();
1182 local_save_flags(flags);
1183 cpu = raw_smp_processor_id();
1184 data = tr->data[cpu];
1185 disabled = atomic_inc_return(&data->disabled);
1186
1187 if (likely(disabled == 1))
1188 trace_function(tr, data, ip, parent_ip, flags, pc);
1189
1190 atomic_dec(&data->disabled);
1191 ftrace_preempt_enable(resched);
1192}
1193
1194static void
1195function_trace_call(unsigned long ip, unsigned long parent_ip)
1196{
1197 struct trace_array *tr = &global_trace;
1198 struct trace_array_cpu *data;
1199 unsigned long flags;
1200 long disabled;
1201 int cpu;
1202 int pc;
1203
1204 if (unlikely(!ftrace_function_enabled))
1205 return;
1206
1207 /*
1208 * Need to use raw, since this must be called before the
1209 * recursive protection is performed.
1210 */
1211 local_irq_save(flags);
1212 cpu = raw_smp_processor_id();
1213 data = tr->data[cpu];
1214 disabled = atomic_inc_return(&data->disabled);
1215
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 trace_function(tr, data, ip, parent_ip, flags, pc);
1219 }
1220 1110
1221 atomic_dec(&data->disabled); 1111 atomic_dec(&data->disabled);
1222 local_irq_restore(flags); 1112 local_irq_restore(flags);
@@ -1244,7 +1134,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
1244 disabled = atomic_inc_return(&data->disabled); 1134 disabled = atomic_inc_return(&data->disabled);
1245 if (likely(disabled == 1)) { 1135 if (likely(disabled == 1)) {
1246 pc = preempt_count(); 1136 pc = preempt_count();
1247 __trace_graph_entry(tr, data, trace, flags, pc); 1137 __trace_graph_entry(tr, trace, flags, pc);
1248 } 1138 }
1249 /* Only do the atomic if it is not already set */ 1139 /* Only do the atomic if it is not already set */
1250 if (!test_tsk_trace_graph(current)) 1140 if (!test_tsk_trace_graph(current))
@@ -1270,7 +1160,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1270 disabled = atomic_inc_return(&data->disabled); 1160 disabled = atomic_inc_return(&data->disabled);
1271 if (likely(disabled == 1)) { 1161 if (likely(disabled == 1)) {
1272 pc = preempt_count(); 1162 pc = preempt_count();
1273 __trace_graph_return(tr, data, trace, flags, pc); 1163 __trace_graph_return(tr, trace, flags, pc);
1274 } 1164 }
1275 if (!trace->depth) 1165 if (!trace->depth)
1276 clear_tsk_trace_graph(current); 1166 clear_tsk_trace_graph(current);
@@ -1279,30 +1169,70 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
1279} 1169}
1280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 1170#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1281 1171
1282static struct ftrace_ops trace_ops __read_mostly =
1283{
1284 .func = function_trace_call,
1285};
1286 1172
1287void tracing_start_function_trace(void) 1173/**
1174 * trace_vprintk - write binary msg to tracing buffer
1175 *
1176 */
1177int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
1288{ 1178{
1289 ftrace_function_enabled = 0; 1179 static raw_spinlock_t trace_buf_lock =
1180 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1181 static u32 trace_buf[TRACE_BUF_SIZE];
1290 1182
1291 if (trace_flags & TRACE_ITER_PREEMPTONLY) 1183 struct ring_buffer_event *event;
1292 trace_ops.func = function_trace_call_preempt_only; 1184 struct trace_array *tr = &global_trace;
1293 else 1185 struct trace_array_cpu *data;
1294 trace_ops.func = function_trace_call; 1186 struct print_entry *entry;
1187 unsigned long flags;
1188 int resched;
1189 int cpu, len = 0, size, pc;
1295 1190
1296 register_ftrace_function(&trace_ops); 1191 if (unlikely(tracing_selftest_running || tracing_disabled))
1297 ftrace_function_enabled = 1; 1192 return 0;
1298}
1299 1193
1300void tracing_stop_function_trace(void) 1194 /* Don't pollute graph traces with trace_vprintk internals */
1301{ 1195 pause_graph_tracing();
1302 ftrace_function_enabled = 0; 1196
1303 unregister_ftrace_function(&trace_ops); 1197 pc = preempt_count();
1198 resched = ftrace_preempt_disable();
1199 cpu = raw_smp_processor_id();
1200 data = tr->data[cpu];
1201
1202 if (unlikely(atomic_read(&data->disabled)))
1203 goto out;
1204
1205 /* Lockdep uses trace_printk for lock tracing */
1206 local_irq_save(flags);
1207 __raw_spin_lock(&trace_buf_lock);
1208 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1209
1210 if (len > TRACE_BUF_SIZE || len < 0)
1211 goto out_unlock;
1212
1213 size = sizeof(*entry) + sizeof(u32) * len;
1214 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc);
1215 if (!event)
1216 goto out_unlock;
1217 entry = ring_buffer_event_data(event);
1218 entry->ip = ip;
1219 entry->depth = depth;
1220 entry->fmt = fmt;
1221
1222 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1223 ring_buffer_unlock_commit(tr->buffer, event);
1224
1225out_unlock:
1226 __raw_spin_unlock(&trace_buf_lock);
1227 local_irq_restore(flags);
1228
1229out:
1230 ftrace_preempt_enable(resched);
1231 unpause_graph_tracing();
1232
1233 return len;
1304} 1234}
1305#endif 1235EXPORT_SYMBOL_GPL(trace_vprintk);
1306 1236
1307enum trace_file_type { 1237enum trace_file_type {
1308 TRACE_FILE_LAT_FMT = 1, 1238 TRACE_FILE_LAT_FMT = 1,
@@ -1345,10 +1275,25 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1345{ 1275{
1346 struct ring_buffer *buffer = iter->tr->buffer; 1276 struct ring_buffer *buffer = iter->tr->buffer;
1347 struct trace_entry *ent, *next = NULL; 1277 struct trace_entry *ent, *next = NULL;
1278 int cpu_file = iter->cpu_file;
1348 u64 next_ts = 0, ts; 1279 u64 next_ts = 0, ts;
1349 int next_cpu = -1; 1280 int next_cpu = -1;
1350 int cpu; 1281 int cpu;
1351 1282
1283 /*
1284 * If we are in a per_cpu trace file, don't bother by iterating over
1285 * all cpu and peek directly.
1286 */
1287 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1288 if (ring_buffer_empty_cpu(buffer, cpu_file))
1289 return NULL;
1290 ent = peek_next_entry(iter, cpu_file, ent_ts);
1291 if (ent_cpu)
1292 *ent_cpu = cpu_file;
1293
1294 return ent;
1295 }
1296
1352 for_each_tracing_cpu(cpu) { 1297 for_each_tracing_cpu(cpu) {
1353 1298
1354 if (ring_buffer_empty_cpu(buffer, cpu)) 1299 if (ring_buffer_empty_cpu(buffer, cpu))
@@ -1376,8 +1321,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1376} 1321}
1377 1322
1378/* Find the next real entry, without updating the iterator itself */ 1323/* Find the next real entry, without updating the iterator itself */
1379static struct trace_entry * 1324struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1380find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1325 int *ent_cpu, u64 *ent_ts)
1381{ 1326{
1382 return __find_next_entry(iter, ent_cpu, ent_ts); 1327 return __find_next_entry(iter, ent_cpu, ent_ts);
1383} 1328}
@@ -1426,19 +1371,32 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1426 return ent; 1371 return ent;
1427} 1372}
1428 1373
1374/*
1375 * No necessary locking here. The worst thing which can
1376 * happen is loosing events consumed at the same time
1377 * by a trace_pipe reader.
1378 * Other than that, we don't risk to crash the ring buffer
1379 * because it serializes the readers.
1380 *
1381 * The current tracer is copied to avoid a global locking
1382 * all around.
1383 */
1429static void *s_start(struct seq_file *m, loff_t *pos) 1384static void *s_start(struct seq_file *m, loff_t *pos)
1430{ 1385{
1431 struct trace_iterator *iter = m->private; 1386 struct trace_iterator *iter = m->private;
1387 static struct tracer *old_tracer;
1388 int cpu_file = iter->cpu_file;
1432 void *p = NULL; 1389 void *p = NULL;
1433 loff_t l = 0; 1390 loff_t l = 0;
1434 int cpu; 1391 int cpu;
1435 1392
1393 /* copy the tracer to avoid using a global lock all around */
1436 mutex_lock(&trace_types_lock); 1394 mutex_lock(&trace_types_lock);
1437 1395 if (unlikely(old_tracer != current_trace && current_trace)) {
1438 if (!current_trace || current_trace != iter->trace) { 1396 old_tracer = current_trace;
1439 mutex_unlock(&trace_types_lock); 1397 *iter->trace = *current_trace;
1440 return NULL;
1441 } 1398 }
1399 mutex_unlock(&trace_types_lock);
1442 1400
1443 atomic_inc(&trace_record_cmdline_disabled); 1401 atomic_inc(&trace_record_cmdline_disabled);
1444 1402
@@ -1449,9 +1407,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1449 1407
1450 ftrace_disable_cpu(); 1408 ftrace_disable_cpu();
1451 1409
1452 for_each_tracing_cpu(cpu) { 1410 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1453 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1411 for_each_tracing_cpu(cpu)
1454 } 1412 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1413 } else
1414 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
1415
1455 1416
1456 ftrace_enable_cpu(); 1417 ftrace_enable_cpu();
1457 1418
@@ -1469,155 +1430,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1469static void s_stop(struct seq_file *m, void *p) 1430static void s_stop(struct seq_file *m, void *p)
1470{ 1431{
1471 atomic_dec(&trace_record_cmdline_disabled); 1432 atomic_dec(&trace_record_cmdline_disabled);
1472 mutex_unlock(&trace_types_lock);
1473}
1474
1475#ifdef CONFIG_KRETPROBES
1476static inline const char *kretprobed(const char *name)
1477{
1478 static const char tramp_name[] = "kretprobe_trampoline";
1479 int size = sizeof(tramp_name);
1480
1481 if (strncmp(tramp_name, name, size) == 0)
1482 return "[unknown/kretprobe'd]";
1483 return name;
1484}
1485#else
1486static inline const char *kretprobed(const char *name)
1487{
1488 return name;
1489}
1490#endif /* CONFIG_KRETPROBES */
1491
1492static int
1493seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1494{
1495#ifdef CONFIG_KALLSYMS
1496 char str[KSYM_SYMBOL_LEN];
1497 const char *name;
1498
1499 kallsyms_lookup(address, NULL, NULL, NULL, str);
1500
1501 name = kretprobed(str);
1502
1503 return trace_seq_printf(s, fmt, name);
1504#endif
1505 return 1;
1506}
1507
1508static int
1509seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1510 unsigned long address)
1511{
1512#ifdef CONFIG_KALLSYMS
1513 char str[KSYM_SYMBOL_LEN];
1514 const char *name;
1515
1516 sprint_symbol(str, address);
1517 name = kretprobed(str);
1518
1519 return trace_seq_printf(s, fmt, name);
1520#endif
1521 return 1;
1522}
1523
1524#ifndef CONFIG_64BIT
1525# define IP_FMT "%08lx"
1526#else
1527# define IP_FMT "%016lx"
1528#endif
1529
1530int
1531seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1532{
1533 int ret;
1534
1535 if (!ip)
1536 return trace_seq_printf(s, "0");
1537
1538 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1539 ret = seq_print_sym_offset(s, "%s", ip);
1540 else
1541 ret = seq_print_sym_short(s, "%s", ip);
1542
1543 if (!ret)
1544 return 0;
1545
1546 if (sym_flags & TRACE_ITER_SYM_ADDR)
1547 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1548 return ret;
1549}
1550
1551static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1552 unsigned long ip, unsigned long sym_flags)
1553{
1554 struct file *file = NULL;
1555 unsigned long vmstart = 0;
1556 int ret = 1;
1557
1558 if (mm) {
1559 const struct vm_area_struct *vma;
1560
1561 down_read(&mm->mmap_sem);
1562 vma = find_vma(mm, ip);
1563 if (vma) {
1564 file = vma->vm_file;
1565 vmstart = vma->vm_start;
1566 }
1567 if (file) {
1568 ret = trace_seq_path(s, &file->f_path);
1569 if (ret)
1570 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1571 }
1572 up_read(&mm->mmap_sem);
1573 }
1574 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1575 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1576 return ret;
1577}
1578
1579static int
1580seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1581 unsigned long sym_flags)
1582{
1583 struct mm_struct *mm = NULL;
1584 int ret = 1;
1585 unsigned int i;
1586
1587 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1588 struct task_struct *task;
1589 /*
1590 * we do the lookup on the thread group leader,
1591 * since individual threads might have already quit!
1592 */
1593 rcu_read_lock();
1594 task = find_task_by_vpid(entry->ent.tgid);
1595 if (task)
1596 mm = get_task_mm(task);
1597 rcu_read_unlock();
1598 }
1599
1600 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1601 unsigned long ip = entry->caller[i];
1602
1603 if (ip == ULONG_MAX || !ret)
1604 break;
1605 if (i && ret)
1606 ret = trace_seq_puts(s, " <- ");
1607 if (!ip) {
1608 if (ret)
1609 ret = trace_seq_puts(s, "??");
1610 continue;
1611 }
1612 if (!ret)
1613 break;
1614 if (ret)
1615 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1616 }
1617
1618 if (mm)
1619 mmput(mm);
1620 return ret;
1621} 1433}
1622 1434
1623static void print_lat_help_header(struct seq_file *m) 1435static void print_lat_help_header(struct seq_file *m)
@@ -1658,11 +1470,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1658 total = entries + 1470 total = entries +
1659 ring_buffer_overruns(iter->tr->buffer); 1471 ring_buffer_overruns(iter->tr->buffer);
1660 1472
1661 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1473 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1662 name, UTS_RELEASE); 1474 name, UTS_RELEASE);
1663 seq_puts(m, "-----------------------------------" 1475 seq_puts(m, "# -----------------------------------"
1664 "---------------------------------\n"); 1476 "---------------------------------\n");
1665 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" 1477 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
1666 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 1478 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1667 nsecs_to_usecs(data->saved_latency), 1479 nsecs_to_usecs(data->saved_latency),
1668 entries, 1480 entries,
@@ -1684,121 +1496,24 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1684#else 1496#else
1685 seq_puts(m, ")\n"); 1497 seq_puts(m, ")\n");
1686#endif 1498#endif
1687 seq_puts(m, " -----------------\n"); 1499 seq_puts(m, "# -----------------\n");
1688 seq_printf(m, " | task: %.16s-%d " 1500 seq_printf(m, "# | task: %.16s-%d "
1689 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 1501 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1690 data->comm, data->pid, data->uid, data->nice, 1502 data->comm, data->pid, data->uid, data->nice,
1691 data->policy, data->rt_priority); 1503 data->policy, data->rt_priority);
1692 seq_puts(m, " -----------------\n"); 1504 seq_puts(m, "# -----------------\n");
1693 1505
1694 if (data->critical_start) { 1506 if (data->critical_start) {
1695 seq_puts(m, " => started at: "); 1507 seq_puts(m, "# => started at: ");
1696 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 1508 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1697 trace_print_seq(m, &iter->seq); 1509 trace_print_seq(m, &iter->seq);
1698 seq_puts(m, "\n => ended at: "); 1510 seq_puts(m, "\n# => ended at: ");
1699 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 1511 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1700 trace_print_seq(m, &iter->seq); 1512 trace_print_seq(m, &iter->seq);
1701 seq_puts(m, "\n"); 1513 seq_puts(m, "#\n");
1702 }
1703
1704 seq_puts(m, "\n");
1705}
1706
1707static void
1708lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1709{
1710 int hardirq, softirq;
1711 char *comm;
1712
1713 comm = trace_find_cmdline(entry->pid);
1714
1715 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1716 trace_seq_printf(s, "%3d", cpu);
1717 trace_seq_printf(s, "%c%c",
1718 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1719 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1720 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1721
1722 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1723 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1724 if (hardirq && softirq) {
1725 trace_seq_putc(s, 'H');
1726 } else {
1727 if (hardirq) {
1728 trace_seq_putc(s, 'h');
1729 } else {
1730 if (softirq)
1731 trace_seq_putc(s, 's');
1732 else
1733 trace_seq_putc(s, '.');
1734 }
1735 } 1514 }
1736 1515
1737 if (entry->preempt_count) 1516 seq_puts(m, "#\n");
1738 trace_seq_printf(s, "%x", entry->preempt_count);
1739 else
1740 trace_seq_puts(s, ".");
1741}
1742
1743unsigned long preempt_mark_thresh = 100;
1744
1745static void
1746lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1747 unsigned long rel_usecs)
1748{
1749 trace_seq_printf(s, " %4lldus", abs_usecs);
1750 if (rel_usecs > preempt_mark_thresh)
1751 trace_seq_puts(s, "!: ");
1752 else if (rel_usecs > 1)
1753 trace_seq_puts(s, "+: ");
1754 else
1755 trace_seq_puts(s, " : ");
1756}
1757
1758static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1759
1760static int task_state_char(unsigned long state)
1761{
1762 int bit = state ? __ffs(state) + 1 : 0;
1763
1764 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1765}
1766
1767/*
1768 * The message is supposed to contain an ending newline.
1769 * If the printing stops prematurely, try to add a newline of our own.
1770 */
1771void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1772{
1773 struct trace_entry *ent;
1774 struct trace_field_cont *cont;
1775 bool ok = true;
1776
1777 ent = peek_next_entry(iter, iter->cpu, NULL);
1778 if (!ent || ent->type != TRACE_CONT) {
1779 trace_seq_putc(s, '\n');
1780 return;
1781 }
1782
1783 do {
1784 cont = (struct trace_field_cont *)ent;
1785 if (ok)
1786 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1787
1788 ftrace_disable_cpu();
1789
1790 if (iter->buffer_iter[iter->cpu])
1791 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1792 else
1793 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1794
1795 ftrace_enable_cpu();
1796
1797 ent = peek_next_entry(iter, iter->cpu, NULL);
1798 } while (ent && ent->type == TRACE_CONT);
1799
1800 if (!ok)
1801 trace_seq_putc(s, '\n');
1802} 1517}
1803 1518
1804static void test_cpu_buff_start(struct trace_iterator *iter) 1519static void test_cpu_buff_start(struct trace_iterator *iter)
@@ -1818,452 +1533,88 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1818 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1533 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1819} 1534}
1820 1535
1821static enum print_line_t
1822print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1823{
1824 struct trace_seq *s = &iter->seq;
1825 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1826 struct trace_entry *next_entry;
1827 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1828 struct trace_entry *entry = iter->ent;
1829 unsigned long abs_usecs;
1830 unsigned long rel_usecs;
1831 u64 next_ts;
1832 char *comm;
1833 int S, T;
1834 int i;
1835
1836 if (entry->type == TRACE_CONT)
1837 return TRACE_TYPE_HANDLED;
1838
1839 test_cpu_buff_start(iter);
1840
1841 next_entry = find_next_entry(iter, NULL, &next_ts);
1842 if (!next_entry)
1843 next_ts = iter->ts;
1844 rel_usecs = ns2usecs(next_ts - iter->ts);
1845 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1846
1847 if (verbose) {
1848 comm = trace_find_cmdline(entry->pid);
1849 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1850 " %ld.%03ldms (+%ld.%03ldms): ",
1851 comm,
1852 entry->pid, cpu, entry->flags,
1853 entry->preempt_count, trace_idx,
1854 ns2usecs(iter->ts),
1855 abs_usecs/1000,
1856 abs_usecs % 1000, rel_usecs/1000,
1857 rel_usecs % 1000);
1858 } else {
1859 lat_print_generic(s, entry, cpu);
1860 lat_print_timestamp(s, abs_usecs, rel_usecs);
1861 }
1862 switch (entry->type) {
1863 case TRACE_FN: {
1864 struct ftrace_entry *field;
1865
1866 trace_assign_type(field, entry);
1867
1868 seq_print_ip_sym(s, field->ip, sym_flags);
1869 trace_seq_puts(s, " (");
1870 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1871 trace_seq_puts(s, ")\n");
1872 break;
1873 }
1874 case TRACE_CTX:
1875 case TRACE_WAKE: {
1876 struct ctx_switch_entry *field;
1877
1878 trace_assign_type(field, entry);
1879
1880 T = task_state_char(field->next_state);
1881 S = task_state_char(field->prev_state);
1882 comm = trace_find_cmdline(field->next_pid);
1883 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1884 field->prev_pid,
1885 field->prev_prio,
1886 S, entry->type == TRACE_CTX ? "==>" : " +",
1887 field->next_cpu,
1888 field->next_pid,
1889 field->next_prio,
1890 T, comm);
1891 break;
1892 }
1893 case TRACE_SPECIAL: {
1894 struct special_entry *field;
1895
1896 trace_assign_type(field, entry);
1897
1898 trace_seq_printf(s, "# %ld %ld %ld\n",
1899 field->arg1,
1900 field->arg2,
1901 field->arg3);
1902 break;
1903 }
1904 case TRACE_STACK: {
1905 struct stack_entry *field;
1906
1907 trace_assign_type(field, entry);
1908
1909 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1910 if (i)
1911 trace_seq_puts(s, " <= ");
1912 seq_print_ip_sym(s, field->caller[i], sym_flags);
1913 }
1914 trace_seq_puts(s, "\n");
1915 break;
1916 }
1917 case TRACE_PRINT: {
1918 struct print_entry *field;
1919
1920 trace_assign_type(field, entry);
1921
1922 seq_print_ip_sym(s, field->ip, sym_flags);
1923 trace_seq_printf(s, ": %s", field->buf);
1924 if (entry->flags & TRACE_FLAG_CONT)
1925 trace_seq_print_cont(s, iter);
1926 break;
1927 }
1928 case TRACE_BRANCH: {
1929 struct trace_branch *field;
1930
1931 trace_assign_type(field, entry);
1932
1933 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1934 field->correct ? " ok " : " MISS ",
1935 field->func,
1936 field->file,
1937 field->line);
1938 break;
1939 }
1940 case TRACE_USER_STACK: {
1941 struct userstack_entry *field;
1942
1943 trace_assign_type(field, entry);
1944
1945 seq_print_userip_objs(field, s, sym_flags);
1946 trace_seq_putc(s, '\n');
1947 break;
1948 }
1949 default:
1950 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1951 }
1952 return TRACE_TYPE_HANDLED;
1953}
1954
1955static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1536static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1956{ 1537{
1957 struct trace_seq *s = &iter->seq; 1538 struct trace_seq *s = &iter->seq;
1958 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1539 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1959 struct trace_entry *entry; 1540 struct trace_entry *entry;
1960 unsigned long usec_rem; 1541 struct trace_event *event;
1961 unsigned long long t;
1962 unsigned long secs;
1963 char *comm;
1964 int ret;
1965 int S, T;
1966 int i;
1967 1542
1968 entry = iter->ent; 1543 entry = iter->ent;
1969 1544
1970 if (entry->type == TRACE_CONT)
1971 return TRACE_TYPE_HANDLED;
1972
1973 test_cpu_buff_start(iter); 1545 test_cpu_buff_start(iter);
1974 1546
1975 comm = trace_find_cmdline(iter->ent->pid); 1547 event = ftrace_find_event(entry->type);
1976
1977 t = ns2usecs(iter->ts);
1978 usec_rem = do_div(t, 1000000ULL);
1979 secs = (unsigned long)t;
1980
1981 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1982 if (!ret)
1983 return TRACE_TYPE_PARTIAL_LINE;
1984 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1985 if (!ret)
1986 return TRACE_TYPE_PARTIAL_LINE;
1987 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1988 if (!ret)
1989 return TRACE_TYPE_PARTIAL_LINE;
1990
1991 switch (entry->type) {
1992 case TRACE_FN: {
1993 struct ftrace_entry *field;
1994
1995 trace_assign_type(field, entry);
1996
1997 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1998 if (!ret)
1999 return TRACE_TYPE_PARTIAL_LINE;
2000 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
2001 field->parent_ip) {
2002 ret = trace_seq_printf(s, " <-");
2003 if (!ret)
2004 return TRACE_TYPE_PARTIAL_LINE;
2005 ret = seq_print_ip_sym(s,
2006 field->parent_ip,
2007 sym_flags);
2008 if (!ret)
2009 return TRACE_TYPE_PARTIAL_LINE;
2010 }
2011 ret = trace_seq_printf(s, "\n");
2012 if (!ret)
2013 return TRACE_TYPE_PARTIAL_LINE;
2014 break;
2015 }
2016 case TRACE_CTX:
2017 case TRACE_WAKE: {
2018 struct ctx_switch_entry *field;
2019
2020 trace_assign_type(field, entry);
2021
2022 T = task_state_char(field->next_state);
2023 S = task_state_char(field->prev_state);
2024 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
2025 field->prev_pid,
2026 field->prev_prio,
2027 S,
2028 entry->type == TRACE_CTX ? "==>" : " +",
2029 field->next_cpu,
2030 field->next_pid,
2031 field->next_prio,
2032 T);
2033 if (!ret)
2034 return TRACE_TYPE_PARTIAL_LINE;
2035 break;
2036 }
2037 case TRACE_SPECIAL: {
2038 struct special_entry *field;
2039
2040 trace_assign_type(field, entry);
2041
2042 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
2043 field->arg1,
2044 field->arg2,
2045 field->arg3);
2046 if (!ret)
2047 return TRACE_TYPE_PARTIAL_LINE;
2048 break;
2049 }
2050 case TRACE_STACK: {
2051 struct stack_entry *field;
2052
2053 trace_assign_type(field, entry);
2054 1548
2055 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1549 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2056 if (i) { 1550 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2057 ret = trace_seq_puts(s, " <= "); 1551 if (!trace_print_lat_context(iter))
2058 if (!ret) 1552 goto partial;
2059 return TRACE_TYPE_PARTIAL_LINE; 1553 } else {
2060 } 1554 if (!trace_print_context(iter))
2061 ret = seq_print_ip_sym(s, field->caller[i], 1555 goto partial;
2062 sym_flags);
2063 if (!ret)
2064 return TRACE_TYPE_PARTIAL_LINE;
2065 } 1556 }
2066 ret = trace_seq_puts(s, "\n");
2067 if (!ret)
2068 return TRACE_TYPE_PARTIAL_LINE;
2069 break;
2070 } 1557 }
2071 case TRACE_PRINT: {
2072 struct print_entry *field;
2073 1558
2074 trace_assign_type(field, entry); 1559 if (event)
1560 return event->trace(iter, sym_flags);
2075 1561
2076 seq_print_ip_sym(s, field->ip, sym_flags); 1562 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2077 trace_seq_printf(s, ": %s", field->buf); 1563 goto partial;
2078 if (entry->flags & TRACE_FLAG_CONT)
2079 trace_seq_print_cont(s, iter);
2080 break;
2081 }
2082 case TRACE_GRAPH_RET: {
2083 return print_graph_function(iter);
2084 }
2085 case TRACE_GRAPH_ENT: {
2086 return print_graph_function(iter);
2087 }
2088 case TRACE_BRANCH: {
2089 struct trace_branch *field;
2090 1564
2091 trace_assign_type(field, entry);
2092
2093 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2094 field->correct ? " ok " : " MISS ",
2095 field->func,
2096 field->file,
2097 field->line);
2098 break;
2099 }
2100 case TRACE_USER_STACK: {
2101 struct userstack_entry *field;
2102
2103 trace_assign_type(field, entry);
2104
2105 ret = seq_print_userip_objs(field, s, sym_flags);
2106 if (!ret)
2107 return TRACE_TYPE_PARTIAL_LINE;
2108 ret = trace_seq_putc(s, '\n');
2109 if (!ret)
2110 return TRACE_TYPE_PARTIAL_LINE;
2111 break;
2112 }
2113 }
2114 return TRACE_TYPE_HANDLED; 1565 return TRACE_TYPE_HANDLED;
1566partial:
1567 return TRACE_TYPE_PARTIAL_LINE;
2115} 1568}
2116 1569
2117static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 1570static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2118{ 1571{
2119 struct trace_seq *s = &iter->seq; 1572 struct trace_seq *s = &iter->seq;
2120 struct trace_entry *entry; 1573 struct trace_entry *entry;
2121 int ret; 1574 struct trace_event *event;
2122 int S, T;
2123 1575
2124 entry = iter->ent; 1576 entry = iter->ent;
2125 1577
2126 if (entry->type == TRACE_CONT) 1578 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2127 return TRACE_TYPE_HANDLED; 1579 if (!trace_seq_printf(s, "%d %d %llu ",
2128 1580 entry->pid, iter->cpu, iter->ts))
2129 ret = trace_seq_printf(s, "%d %d %llu ", 1581 goto partial;
2130 entry->pid, iter->cpu, iter->ts);
2131 if (!ret)
2132 return TRACE_TYPE_PARTIAL_LINE;
2133
2134 switch (entry->type) {
2135 case TRACE_FN: {
2136 struct ftrace_entry *field;
2137
2138 trace_assign_type(field, entry);
2139
2140 ret = trace_seq_printf(s, "%x %x\n",
2141 field->ip,
2142 field->parent_ip);
2143 if (!ret)
2144 return TRACE_TYPE_PARTIAL_LINE;
2145 break;
2146 }
2147 case TRACE_CTX:
2148 case TRACE_WAKE: {
2149 struct ctx_switch_entry *field;
2150
2151 trace_assign_type(field, entry);
2152
2153 T = task_state_char(field->next_state);
2154 S = entry->type == TRACE_WAKE ? '+' :
2155 task_state_char(field->prev_state);
2156 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
2157 field->prev_pid,
2158 field->prev_prio,
2159 S,
2160 field->next_cpu,
2161 field->next_pid,
2162 field->next_prio,
2163 T);
2164 if (!ret)
2165 return TRACE_TYPE_PARTIAL_LINE;
2166 break;
2167 } 1582 }
2168 case TRACE_SPECIAL:
2169 case TRACE_USER_STACK:
2170 case TRACE_STACK: {
2171 struct special_entry *field;
2172 1583
2173 trace_assign_type(field, entry); 1584 event = ftrace_find_event(entry->type);
1585 if (event)
1586 return event->raw(iter, 0);
2174 1587
2175 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1588 if (!trace_seq_printf(s, "%d ?\n", entry->type))
2176 field->arg1, 1589 goto partial;
2177 field->arg2,
2178 field->arg3);
2179 if (!ret)
2180 return TRACE_TYPE_PARTIAL_LINE;
2181 break;
2182 }
2183 case TRACE_PRINT: {
2184 struct print_entry *field;
2185
2186 trace_assign_type(field, entry);
2187 1590
2188 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
2189 if (entry->flags & TRACE_FLAG_CONT)
2190 trace_seq_print_cont(s, iter);
2191 break;
2192 }
2193 }
2194 return TRACE_TYPE_HANDLED; 1591 return TRACE_TYPE_HANDLED;
1592partial:
1593 return TRACE_TYPE_PARTIAL_LINE;
2195} 1594}
2196 1595
2197#define SEQ_PUT_FIELD_RET(s, x) \
2198do { \
2199 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
2200 return 0; \
2201} while (0)
2202
2203#define SEQ_PUT_HEX_FIELD_RET(s, x) \
2204do { \
2205 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
2206 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
2207 return 0; \
2208} while (0)
2209
2210static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 1596static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2211{ 1597{
2212 struct trace_seq *s = &iter->seq; 1598 struct trace_seq *s = &iter->seq;
2213 unsigned char newline = '\n'; 1599 unsigned char newline = '\n';
2214 struct trace_entry *entry; 1600 struct trace_entry *entry;
2215 int S, T; 1601 struct trace_event *event;
2216 1602
2217 entry = iter->ent; 1603 entry = iter->ent;
2218 1604
2219 if (entry->type == TRACE_CONT) 1605 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2220 return TRACE_TYPE_HANDLED; 1606 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2221 1607 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2222 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1608 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2223 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2224 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2225
2226 switch (entry->type) {
2227 case TRACE_FN: {
2228 struct ftrace_entry *field;
2229
2230 trace_assign_type(field, entry);
2231
2232 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
2233 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
2234 break;
2235 } 1609 }
2236 case TRACE_CTX:
2237 case TRACE_WAKE: {
2238 struct ctx_switch_entry *field;
2239
2240 trace_assign_type(field, entry);
2241
2242 T = task_state_char(field->next_state);
2243 S = entry->type == TRACE_WAKE ? '+' :
2244 task_state_char(field->prev_state);
2245 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
2247 SEQ_PUT_HEX_FIELD_RET(s, S);
2248 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
2249 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
2250 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
2251 SEQ_PUT_HEX_FIELD_RET(s, T);
2252 break;
2253 }
2254 case TRACE_SPECIAL:
2255 case TRACE_USER_STACK:
2256 case TRACE_STACK: {
2257 struct special_entry *field;
2258
2259 trace_assign_type(field, entry);
2260 1610
2261 SEQ_PUT_HEX_FIELD_RET(s, field->arg1); 1611 event = ftrace_find_event(entry->type);
2262 SEQ_PUT_HEX_FIELD_RET(s, field->arg2); 1612 if (event) {
2263 SEQ_PUT_HEX_FIELD_RET(s, field->arg3); 1613 enum print_line_t ret = event->hex(iter, 0);
2264 break; 1614 if (ret != TRACE_TYPE_HANDLED)
2265 } 1615 return ret;
2266 } 1616 }
1617
2267 SEQ_PUT_FIELD_RET(s, newline); 1618 SEQ_PUT_FIELD_RET(s, newline);
2268 1619
2269 return TRACE_TYPE_HANDLED; 1620 return TRACE_TYPE_HANDLED;
@@ -2278,13 +1629,10 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2278 1629
2279 trace_assign_type(field, entry); 1630 trace_assign_type(field, entry);
2280 1631
2281 ret = trace_seq_printf(s, field->buf); 1632 ret = trace_seq_bprintf(s, field->fmt, field->buf);
2282 if (!ret) 1633 if (!ret)
2283 return TRACE_TYPE_PARTIAL_LINE; 1634 return TRACE_TYPE_PARTIAL_LINE;
2284 1635
2285 if (entry->flags & TRACE_FLAG_CONT)
2286 trace_seq_print_cont(s, iter);
2287
2288 return TRACE_TYPE_HANDLED; 1636 return TRACE_TYPE_HANDLED;
2289} 1637}
2290 1638
@@ -2292,53 +1640,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2292{ 1640{
2293 struct trace_seq *s = &iter->seq; 1641 struct trace_seq *s = &iter->seq;
2294 struct trace_entry *entry; 1642 struct trace_entry *entry;
1643 struct trace_event *event;
2295 1644
2296 entry = iter->ent; 1645 entry = iter->ent;
2297 1646
2298 if (entry->type == TRACE_CONT) 1647 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2299 return TRACE_TYPE_HANDLED; 1648 SEQ_PUT_FIELD_RET(s, entry->pid);
2300 1649 SEQ_PUT_FIELD_RET(s, iter->cpu);
2301 SEQ_PUT_FIELD_RET(s, entry->pid); 1650 SEQ_PUT_FIELD_RET(s, iter->ts);
2302 SEQ_PUT_FIELD_RET(s, entry->cpu);
2303 SEQ_PUT_FIELD_RET(s, iter->ts);
2304
2305 switch (entry->type) {
2306 case TRACE_FN: {
2307 struct ftrace_entry *field;
2308
2309 trace_assign_type(field, entry);
2310
2311 SEQ_PUT_FIELD_RET(s, field->ip);
2312 SEQ_PUT_FIELD_RET(s, field->parent_ip);
2313 break;
2314 } 1651 }
2315 case TRACE_CTX: {
2316 struct ctx_switch_entry *field;
2317 1652
2318 trace_assign_type(field, entry); 1653 event = ftrace_find_event(entry->type);
2319 1654 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
2320 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2321 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2322 SEQ_PUT_FIELD_RET(s, field->prev_state);
2323 SEQ_PUT_FIELD_RET(s, field->next_pid);
2324 SEQ_PUT_FIELD_RET(s, field->next_prio);
2325 SEQ_PUT_FIELD_RET(s, field->next_state);
2326 break;
2327 }
2328 case TRACE_SPECIAL:
2329 case TRACE_USER_STACK:
2330 case TRACE_STACK: {
2331 struct special_entry *field;
2332
2333 trace_assign_type(field, entry);
2334
2335 SEQ_PUT_FIELD_RET(s, field->arg1);
2336 SEQ_PUT_FIELD_RET(s, field->arg2);
2337 SEQ_PUT_FIELD_RET(s, field->arg3);
2338 break;
2339 }
2340 }
2341 return 1;
2342} 1655}
2343 1656
2344static int trace_empty(struct trace_iterator *iter) 1657static int trace_empty(struct trace_iterator *iter)
@@ -2382,9 +1695,6 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2382 if (trace_flags & TRACE_ITER_RAW) 1695 if (trace_flags & TRACE_ITER_RAW)
2383 return print_raw_fmt(iter); 1696 return print_raw_fmt(iter);
2384 1697
2385 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2386 return print_lat_fmt(iter, iter->idx, iter->cpu);
2387
2388 return print_trace_fmt(iter); 1698 return print_trace_fmt(iter);
2389} 1699}
2390 1700
@@ -2426,30 +1736,40 @@ static struct seq_operations tracer_seq_ops = {
2426}; 1736};
2427 1737
2428static struct trace_iterator * 1738static struct trace_iterator *
2429__tracing_open(struct inode *inode, struct file *file, int *ret) 1739__tracing_open(struct inode *inode, struct file *file)
2430{ 1740{
1741 long cpu_file = (long) inode->i_private;
1742 void *fail_ret = ERR_PTR(-ENOMEM);
2431 struct trace_iterator *iter; 1743 struct trace_iterator *iter;
2432 struct seq_file *m; 1744 struct seq_file *m;
2433 int cpu; 1745 int cpu, ret;
2434 1746
2435 if (tracing_disabled) { 1747 if (tracing_disabled)
2436 *ret = -ENODEV; 1748 return ERR_PTR(-ENODEV);
2437 return NULL;
2438 }
2439 1749
2440 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 1750 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2441 if (!iter) { 1751 if (!iter)
2442 *ret = -ENOMEM; 1752 return ERR_PTR(-ENOMEM);
2443 goto out;
2444 }
2445 1753
1754 /*
1755 * We make a copy of the current tracer to avoid concurrent
1756 * changes on it while we are reading.
1757 */
2446 mutex_lock(&trace_types_lock); 1758 mutex_lock(&trace_types_lock);
1759 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1760 if (!iter->trace)
1761 goto fail;
1762
1763 if (current_trace)
1764 *iter->trace = *current_trace;
1765
2447 if (current_trace && current_trace->print_max) 1766 if (current_trace && current_trace->print_max)
2448 iter->tr = &max_tr; 1767 iter->tr = &max_tr;
2449 else 1768 else
2450 iter->tr = inode->i_private; 1769 iter->tr = &global_trace;
2451 iter->trace = current_trace;
2452 iter->pos = -1; 1770 iter->pos = -1;
1771 mutex_init(&iter->mutex);
1772 iter->cpu_file = cpu_file;
2453 1773
2454 /* Notify the tracer early; before we stop tracing. */ 1774 /* Notify the tracer early; before we stop tracing. */
2455 if (iter->trace && iter->trace->open) 1775 if (iter->trace && iter->trace->open)
@@ -2459,20 +1779,30 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2459 if (ring_buffer_overruns(iter->tr->buffer)) 1779 if (ring_buffer_overruns(iter->tr->buffer))
2460 iter->iter_flags |= TRACE_FILE_ANNOTATE; 1780 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2461 1781
1782 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1783 for_each_tracing_cpu(cpu) {
2462 1784
2463 for_each_tracing_cpu(cpu) { 1785 iter->buffer_iter[cpu] =
1786 ring_buffer_read_start(iter->tr->buffer, cpu);
2464 1787
1788 if (!iter->buffer_iter[cpu])
1789 goto fail_buffer;
1790 }
1791 } else {
1792 cpu = iter->cpu_file;
2465 iter->buffer_iter[cpu] = 1793 iter->buffer_iter[cpu] =
2466 ring_buffer_read_start(iter->tr->buffer, cpu); 1794 ring_buffer_read_start(iter->tr->buffer, cpu);
2467 1795
2468 if (!iter->buffer_iter[cpu]) 1796 if (!iter->buffer_iter[cpu])
2469 goto fail_buffer; 1797 goto fail;
2470 } 1798 }
2471 1799
2472 /* TODO stop tracer */ 1800 /* TODO stop tracer */
2473 *ret = seq_open(file, &tracer_seq_ops); 1801 ret = seq_open(file, &tracer_seq_ops);
2474 if (*ret) 1802 if (ret < 0) {
1803 fail_ret = ERR_PTR(ret);
2475 goto fail_buffer; 1804 goto fail_buffer;
1805 }
2476 1806
2477 m = file->private_data; 1807 m = file->private_data;
2478 m->private = iter; 1808 m->private = iter;
@@ -2482,7 +1812,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2482 1812
2483 mutex_unlock(&trace_types_lock); 1813 mutex_unlock(&trace_types_lock);
2484 1814
2485 out:
2486 return iter; 1815 return iter;
2487 1816
2488 fail_buffer: 1817 fail_buffer:
@@ -2490,10 +1819,12 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2490 if (iter->buffer_iter[cpu]) 1819 if (iter->buffer_iter[cpu])
2491 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1820 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2492 } 1821 }
1822 fail:
2493 mutex_unlock(&trace_types_lock); 1823 mutex_unlock(&trace_types_lock);
1824 kfree(iter->trace);
2494 kfree(iter); 1825 kfree(iter);
2495 1826
2496 return ERR_PTR(-ENOMEM); 1827 return fail_ret;
2497} 1828}
2498 1829
2499int tracing_open_generic(struct inode *inode, struct file *filp) 1830int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2505,7 +1836,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2505 return 0; 1836 return 0;
2506} 1837}
2507 1838
2508int tracing_release(struct inode *inode, struct file *file) 1839static int tracing_release(struct inode *inode, struct file *file)
2509{ 1840{
2510 struct seq_file *m = (struct seq_file *)file->private_data; 1841 struct seq_file *m = (struct seq_file *)file->private_data;
2511 struct trace_iterator *iter = m->private; 1842 struct trace_iterator *iter = m->private;
@@ -2525,33 +1856,26 @@ int tracing_release(struct inode *inode, struct file *file)
2525 mutex_unlock(&trace_types_lock); 1856 mutex_unlock(&trace_types_lock);
2526 1857
2527 seq_release(inode, file); 1858 seq_release(inode, file);
1859 mutex_destroy(&iter->mutex);
1860 kfree(iter->trace);
2528 kfree(iter); 1861 kfree(iter);
2529 return 0; 1862 return 0;
2530} 1863}
2531 1864
2532static int tracing_open(struct inode *inode, struct file *file) 1865static int tracing_open(struct inode *inode, struct file *file)
2533{ 1866{
2534 int ret;
2535
2536 __tracing_open(inode, file, &ret);
2537
2538 return ret;
2539}
2540
2541static int tracing_lt_open(struct inode *inode, struct file *file)
2542{
2543 struct trace_iterator *iter; 1867 struct trace_iterator *iter;
2544 int ret; 1868 int ret = 0;
2545
2546 iter = __tracing_open(inode, file, &ret);
2547 1869
2548 if (!ret) 1870 iter = __tracing_open(inode, file);
1871 if (IS_ERR(iter))
1872 ret = PTR_ERR(iter);
1873 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2549 iter->iter_flags |= TRACE_FILE_LAT_FMT; 1874 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2550 1875
2551 return ret; 1876 return ret;
2552} 1877}
2553 1878
2554
2555static void * 1879static void *
2556t_next(struct seq_file *m, void *v, loff_t *pos) 1880t_next(struct seq_file *m, void *v, loff_t *pos)
2557{ 1881{
@@ -2623,21 +1947,14 @@ static int show_traces_open(struct inode *inode, struct file *file)
2623 return ret; 1947 return ret;
2624} 1948}
2625 1949
2626static struct file_operations tracing_fops = { 1950static const struct file_operations tracing_fops = {
2627 .open = tracing_open, 1951 .open = tracing_open,
2628 .read = seq_read, 1952 .read = seq_read,
2629 .llseek = seq_lseek, 1953 .llseek = seq_lseek,
2630 .release = tracing_release, 1954 .release = tracing_release,
2631}; 1955};
2632 1956
2633static struct file_operations tracing_lt_fops = { 1957static const struct file_operations show_traces_fops = {
2634 .open = tracing_lt_open,
2635 .read = seq_read,
2636 .llseek = seq_lseek,
2637 .release = tracing_release,
2638};
2639
2640static struct file_operations show_traces_fops = {
2641 .open = show_traces_open, 1958 .open = show_traces_open,
2642 .read = seq_read, 1959 .read = seq_read,
2643 .release = seq_release, 1960 .release = seq_release,
@@ -2730,7 +2047,7 @@ err_unlock:
2730 return err; 2047 return err;
2731} 2048}
2732 2049
2733static struct file_operations tracing_cpumask_fops = { 2050static const struct file_operations tracing_cpumask_fops = {
2734 .open = tracing_open_generic, 2051 .open = tracing_open_generic,
2735 .read = tracing_cpumask_read, 2052 .read = tracing_cpumask_read,
2736 .write = tracing_cpumask_write, 2053 .write = tracing_cpumask_write,
@@ -2740,57 +2057,62 @@ static ssize_t
2740tracing_trace_options_read(struct file *filp, char __user *ubuf, 2057tracing_trace_options_read(struct file *filp, char __user *ubuf,
2741 size_t cnt, loff_t *ppos) 2058 size_t cnt, loff_t *ppos)
2742{ 2059{
2743 int i; 2060 struct tracer_opt *trace_opts;
2061 u32 tracer_flags;
2062 int len = 0;
2744 char *buf; 2063 char *buf;
2745 int r = 0; 2064 int r = 0;
2746 int len = 0; 2065 int i;
2747 u32 tracer_flags = current_trace->flags->val;
2748 struct tracer_opt *trace_opts = current_trace->flags->opts;
2749 2066
2750 2067
2751 /* calulate max size */ 2068 /* calculate max size */
2752 for (i = 0; trace_options[i]; i++) { 2069 for (i = 0; trace_options[i]; i++) {
2753 len += strlen(trace_options[i]); 2070 len += strlen(trace_options[i]);
2754 len += 3; /* "no" and space */ 2071 len += 3; /* "no" and newline */
2755 } 2072 }
2756 2073
2074 mutex_lock(&trace_types_lock);
2075 tracer_flags = current_trace->flags->val;
2076 trace_opts = current_trace->flags->opts;
2077
2757 /* 2078 /*
2758 * Increase the size with names of options specific 2079 * Increase the size with names of options specific
2759 * of the current tracer. 2080 * of the current tracer.
2760 */ 2081 */
2761 for (i = 0; trace_opts[i].name; i++) { 2082 for (i = 0; trace_opts[i].name; i++) {
2762 len += strlen(trace_opts[i].name); 2083 len += strlen(trace_opts[i].name);
2763 len += 3; /* "no" and space */ 2084 len += 3; /* "no" and newline */
2764 } 2085 }
2765 2086
2766 /* +2 for \n and \0 */ 2087 /* +2 for \n and \0 */
2767 buf = kmalloc(len + 2, GFP_KERNEL); 2088 buf = kmalloc(len + 2, GFP_KERNEL);
2768 if (!buf) 2089 if (!buf) {
2090 mutex_unlock(&trace_types_lock);
2769 return -ENOMEM; 2091 return -ENOMEM;
2092 }
2770 2093
2771 for (i = 0; trace_options[i]; i++) { 2094 for (i = 0; trace_options[i]; i++) {
2772 if (trace_flags & (1 << i)) 2095 if (trace_flags & (1 << i))
2773 r += sprintf(buf + r, "%s ", trace_options[i]); 2096 r += sprintf(buf + r, "%s\n", trace_options[i]);
2774 else 2097 else
2775 r += sprintf(buf + r, "no%s ", trace_options[i]); 2098 r += sprintf(buf + r, "no%s\n", trace_options[i]);
2776 } 2099 }
2777 2100
2778 for (i = 0; trace_opts[i].name; i++) { 2101 for (i = 0; trace_opts[i].name; i++) {
2779 if (tracer_flags & trace_opts[i].bit) 2102 if (tracer_flags & trace_opts[i].bit)
2780 r += sprintf(buf + r, "%s ", 2103 r += sprintf(buf + r, "%s\n",
2781 trace_opts[i].name); 2104 trace_opts[i].name);
2782 else 2105 else
2783 r += sprintf(buf + r, "no%s ", 2106 r += sprintf(buf + r, "no%s\n",
2784 trace_opts[i].name); 2107 trace_opts[i].name);
2785 } 2108 }
2109 mutex_unlock(&trace_types_lock);
2786 2110
2787 r += sprintf(buf + r, "\n");
2788 WARN_ON(r >= len + 2); 2111 WARN_ON(r >= len + 2);
2789 2112
2790 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2113 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2791 2114
2792 kfree(buf); 2115 kfree(buf);
2793
2794 return r; 2116 return r;
2795} 2117}
2796 2118
@@ -2865,7 +2187,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2865 2187
2866 /* If no option could be set, test the specific tracer options */ 2188 /* If no option could be set, test the specific tracer options */
2867 if (!trace_options[i]) { 2189 if (!trace_options[i]) {
2190 mutex_lock(&trace_types_lock);
2868 ret = set_tracer_option(current_trace, cmp, neg); 2191 ret = set_tracer_option(current_trace, cmp, neg);
2192 mutex_unlock(&trace_types_lock);
2869 if (ret) 2193 if (ret)
2870 return ret; 2194 return ret;
2871 } 2195 }
@@ -2875,7 +2199,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2875 return cnt; 2199 return cnt;
2876} 2200}
2877 2201
2878static struct file_operations tracing_iter_fops = { 2202static const struct file_operations tracing_iter_fops = {
2879 .open = tracing_open_generic, 2203 .open = tracing_open_generic,
2880 .read = tracing_trace_options_read, 2204 .read = tracing_trace_options_read,
2881 .write = tracing_trace_options_write, 2205 .write = tracing_trace_options_write,
@@ -2908,7 +2232,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
2908 readme_msg, strlen(readme_msg)); 2232 readme_msg, strlen(readme_msg));
2909} 2233}
2910 2234
2911static struct file_operations tracing_readme_fops = { 2235static const struct file_operations tracing_readme_fops = {
2912 .open = tracing_open_generic, 2236 .open = tracing_open_generic,
2913 .read = tracing_readme_read, 2237 .read = tracing_readme_read,
2914}; 2238};
@@ -2930,7 +2254,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2930{ 2254{
2931 struct trace_array *tr = filp->private_data; 2255 struct trace_array *tr = filp->private_data;
2932 char buf[64]; 2256 char buf[64];
2933 long val; 2257 unsigned long val;
2934 int ret; 2258 int ret;
2935 2259
2936 if (cnt >= sizeof(buf)) 2260 if (cnt >= sizeof(buf))
@@ -2985,8 +2309,23 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2309 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2986} 2310}
2987 2311
2988static int tracing_set_tracer(char *buf) 2312int tracer_init(struct tracer *t, struct trace_array *tr)
2989{ 2313{
2314 tracing_reset_online_cpus(tr);
2315 return t->init(tr);
2316}
2317
2318struct trace_option_dentry;
2319
2320static struct trace_option_dentry *
2321create_trace_option_files(struct tracer *tracer);
2322
2323static void
2324destroy_trace_option_files(struct trace_option_dentry *topts);
2325
2326static int tracing_set_tracer(const char *buf)
2327{
2328 static struct trace_option_dentry *topts;
2990 struct trace_array *tr = &global_trace; 2329 struct trace_array *tr = &global_trace;
2991 struct tracer *t; 2330 struct tracer *t;
2992 int ret = 0; 2331 int ret = 0;
@@ -3007,9 +2346,14 @@ static int tracing_set_tracer(char *buf)
3007 if (current_trace && current_trace->reset) 2346 if (current_trace && current_trace->reset)
3008 current_trace->reset(tr); 2347 current_trace->reset(tr);
3009 2348
2349 destroy_trace_option_files(topts);
2350
3010 current_trace = t; 2351 current_trace = t;
2352
2353 topts = create_trace_option_files(current_trace);
2354
3011 if (t->init) { 2355 if (t->init) {
3012 ret = t->init(tr); 2356 ret = tracer_init(t, tr);
3013 if (ret) 2357 if (ret)
3014 goto out; 2358 goto out;
3015 } 2359 }
@@ -3072,9 +2416,9 @@ static ssize_t
3072tracing_max_lat_write(struct file *filp, const char __user *ubuf, 2416tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3073 size_t cnt, loff_t *ppos) 2417 size_t cnt, loff_t *ppos)
3074{ 2418{
3075 long *ptr = filp->private_data; 2419 unsigned long *ptr = filp->private_data;
3076 char buf[64]; 2420 char buf[64];
3077 long val; 2421 unsigned long val;
3078 int ret; 2422 int ret;
3079 2423
3080 if (cnt >= sizeof(buf)) 2424 if (cnt >= sizeof(buf))
@@ -3094,54 +2438,96 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3094 return cnt; 2438 return cnt;
3095} 2439}
3096 2440
3097static atomic_t tracing_reader;
3098
3099static int tracing_open_pipe(struct inode *inode, struct file *filp) 2441static int tracing_open_pipe(struct inode *inode, struct file *filp)
3100{ 2442{
2443 long cpu_file = (long) inode->i_private;
3101 struct trace_iterator *iter; 2444 struct trace_iterator *iter;
2445 int ret = 0;
3102 2446
3103 if (tracing_disabled) 2447 if (tracing_disabled)
3104 return -ENODEV; 2448 return -ENODEV;
3105 2449
3106 /* We only allow for reader of the pipe */ 2450 mutex_lock(&trace_types_lock);
3107 if (atomic_inc_return(&tracing_reader) != 1) { 2451
3108 atomic_dec(&tracing_reader); 2452 /* We only allow one reader per cpu */
3109 return -EBUSY; 2453 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2454 if (!cpumask_empty(tracing_reader_cpumask)) {
2455 ret = -EBUSY;
2456 goto out;
2457 }
2458 cpumask_setall(tracing_reader_cpumask);
2459 } else {
2460 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2461 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2462 else {
2463 ret = -EBUSY;
2464 goto out;
2465 }
3110 } 2466 }
3111 2467
3112 /* create a buffer to store the information to pass to userspace */ 2468 /* create a buffer to store the information to pass to userspace */
3113 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2469 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3114 if (!iter) 2470 if (!iter) {
3115 return -ENOMEM; 2471 ret = -ENOMEM;
2472 goto out;
2473 }
3116 2474
3117 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 2475 /*
3118 kfree(iter); 2476 * We make a copy of the current tracer to avoid concurrent
3119 return -ENOMEM; 2477 * changes on it while we are reading.
2478 */
2479 iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2480 if (!iter->trace) {
2481 ret = -ENOMEM;
2482 goto fail;
3120 } 2483 }
2484 if (current_trace)
2485 *iter->trace = *current_trace;
3121 2486
3122 mutex_lock(&trace_types_lock); 2487 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2488 ret = -ENOMEM;
2489 goto fail;
2490 }
3123 2491
3124 /* trace pipe does not show start of buffer */ 2492 /* trace pipe does not show start of buffer */
3125 cpumask_setall(iter->started); 2493 cpumask_setall(iter->started);
3126 2494
2495 iter->cpu_file = cpu_file;
3127 iter->tr = &global_trace; 2496 iter->tr = &global_trace;
3128 iter->trace = current_trace; 2497 mutex_init(&iter->mutex);
3129 filp->private_data = iter; 2498 filp->private_data = iter;
3130 2499
3131 if (iter->trace->pipe_open) 2500 if (iter->trace->pipe_open)
3132 iter->trace->pipe_open(iter); 2501 iter->trace->pipe_open(iter);
2502
2503out:
3133 mutex_unlock(&trace_types_lock); 2504 mutex_unlock(&trace_types_lock);
2505 return ret;
3134 2506
3135 return 0; 2507fail:
2508 kfree(iter->trace);
2509 kfree(iter);
2510 mutex_unlock(&trace_types_lock);
2511 return ret;
3136} 2512}
3137 2513
3138static int tracing_release_pipe(struct inode *inode, struct file *file) 2514static int tracing_release_pipe(struct inode *inode, struct file *file)
3139{ 2515{
3140 struct trace_iterator *iter = file->private_data; 2516 struct trace_iterator *iter = file->private_data;
3141 2517
2518 mutex_lock(&trace_types_lock);
2519
2520 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2521 cpumask_clear(tracing_reader_cpumask);
2522 else
2523 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2524
2525 mutex_unlock(&trace_types_lock);
2526
3142 free_cpumask_var(iter->started); 2527 free_cpumask_var(iter->started);
2528 mutex_destroy(&iter->mutex);
2529 kfree(iter->trace);
3143 kfree(iter); 2530 kfree(iter);
3144 atomic_dec(&tracing_reader);
3145 2531
3146 return 0; 2532 return 0;
3147} 2533}
@@ -3167,67 +2553,57 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3167 } 2553 }
3168} 2554}
3169 2555
3170/* 2556
3171 * Consumer reader. 2557void default_wait_pipe(struct trace_iterator *iter)
3172 */
3173static ssize_t
3174tracing_read_pipe(struct file *filp, char __user *ubuf,
3175 size_t cnt, loff_t *ppos)
3176{ 2558{
3177 struct trace_iterator *iter = filp->private_data; 2559 DEFINE_WAIT(wait);
3178 ssize_t sret;
3179 2560
3180 /* return any leftover data */ 2561 prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
3181 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3182 if (sret != -EBUSY)
3183 return sret;
3184 2562
3185 trace_seq_reset(&iter->seq); 2563 if (trace_empty(iter))
2564 schedule();
3186 2565
3187 mutex_lock(&trace_types_lock); 2566 finish_wait(&trace_wait, &wait);
3188 if (iter->trace->read) { 2567}
3189 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 2568
3190 if (sret) 2569/*
3191 goto out; 2570 * This is a make-shift waitqueue.
3192 } 2571 * A tracer might use this callback on some rare cases:
2572 *
2573 * 1) the current tracer might hold the runqueue lock when it wakes up
2574 * a reader, hence a deadlock (sched, function, and function graph tracers)
2575 * 2) the function tracers, trace all functions, we don't want
2576 * the overhead of calling wake_up and friends
2577 * (and tracing them too)
2578 *
2579 * Anyway, this is really very primitive wakeup.
2580 */
2581void poll_wait_pipe(struct trace_iterator *iter)
2582{
2583 set_current_state(TASK_INTERRUPTIBLE);
2584 /* sleep for 100 msecs, and try again. */
2585 schedule_timeout(HZ / 10);
2586}
2587
2588/* Must be called with trace_types_lock mutex held. */
2589static int tracing_wait_pipe(struct file *filp)
2590{
2591 struct trace_iterator *iter = filp->private_data;
3193 2592
3194waitagain:
3195 sret = 0;
3196 while (trace_empty(iter)) { 2593 while (trace_empty(iter)) {
3197 2594
3198 if ((filp->f_flags & O_NONBLOCK)) { 2595 if ((filp->f_flags & O_NONBLOCK)) {
3199 sret = -EAGAIN; 2596 return -EAGAIN;
3200 goto out;
3201 } 2597 }
3202 2598
3203 /* 2599 mutex_unlock(&iter->mutex);
3204 * This is a make-shift waitqueue. The reason we don't use
3205 * an actual wait queue is because:
3206 * 1) we only ever have one waiter
3207 * 2) the tracing, traces all functions, we don't want
3208 * the overhead of calling wake_up and friends
3209 * (and tracing them too)
3210 * Anyway, this is really very primitive wakeup.
3211 */
3212 set_current_state(TASK_INTERRUPTIBLE);
3213 iter->tr->waiter = current;
3214 2600
3215 mutex_unlock(&trace_types_lock); 2601 iter->trace->wait_pipe(iter);
3216 2602
3217 /* sleep for 100 msecs, and try again. */ 2603 mutex_lock(&iter->mutex);
3218 schedule_timeout(HZ/10);
3219 2604
3220 mutex_lock(&trace_types_lock); 2605 if (signal_pending(current))
3221 2606 return -EINTR;
3222 iter->tr->waiter = NULL;
3223
3224 if (signal_pending(current)) {
3225 sret = -EINTR;
3226 goto out;
3227 }
3228
3229 if (iter->trace != current_trace)
3230 goto out;
3231 2607
3232 /* 2608 /*
3233 * We block until we read something and tracing is disabled. 2609 * We block until we read something and tracing is disabled.
@@ -3240,13 +2616,59 @@ waitagain:
3240 */ 2616 */
3241 if (!tracer_enabled && iter->pos) 2617 if (!tracer_enabled && iter->pos)
3242 break; 2618 break;
2619 }
2620
2621 return 1;
2622}
2623
2624/*
2625 * Consumer reader.
2626 */
2627static ssize_t
2628tracing_read_pipe(struct file *filp, char __user *ubuf,
2629 size_t cnt, loff_t *ppos)
2630{
2631 struct trace_iterator *iter = filp->private_data;
2632 static struct tracer *old_tracer;
2633 ssize_t sret;
2634
2635 /* return any leftover data */
2636 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2637 if (sret != -EBUSY)
2638 return sret;
3243 2639
3244 continue; 2640 trace_seq_init(&iter->seq);
2641
2642 /* copy the tracer to avoid using a global lock all around */
2643 mutex_lock(&trace_types_lock);
2644 if (unlikely(old_tracer != current_trace && current_trace)) {
2645 old_tracer = current_trace;
2646 *iter->trace = *current_trace;
3245 } 2647 }
2648 mutex_unlock(&trace_types_lock);
2649
2650 /*
2651 * Avoid more than one consumer on a single file descriptor
2652 * This is just a matter of traces coherency, the ring buffer itself
2653 * is protected.
2654 */
2655 mutex_lock(&iter->mutex);
2656 if (iter->trace->read) {
2657 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2658 if (sret)
2659 goto out;
2660 }
2661
2662waitagain:
2663 sret = tracing_wait_pipe(filp);
2664 if (sret <= 0)
2665 goto out;
3246 2666
3247 /* stop when tracing is finished */ 2667 /* stop when tracing is finished */
3248 if (trace_empty(iter)) 2668 if (trace_empty(iter)) {
2669 sret = 0;
3249 goto out; 2670 goto out;
2671 }
3250 2672
3251 if (cnt >= PAGE_SIZE) 2673 if (cnt >= PAGE_SIZE)
3252 cnt = PAGE_SIZE - 1; 2674 cnt = PAGE_SIZE - 1;
@@ -3267,8 +2689,8 @@ waitagain:
3267 iter->seq.len = len; 2689 iter->seq.len = len;
3268 break; 2690 break;
3269 } 2691 }
3270 2692 if (ret != TRACE_TYPE_NO_CONSUME)
3271 trace_consume(iter); 2693 trace_consume(iter);
3272 2694
3273 if (iter->seq.len >= cnt) 2695 if (iter->seq.len >= cnt)
3274 break; 2696 break;
@@ -3277,7 +2699,7 @@ waitagain:
3277 /* Now copy what we have to the user */ 2699 /* Now copy what we have to the user */
3278 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2700 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3279 if (iter->seq.readpos >= iter->seq.len) 2701 if (iter->seq.readpos >= iter->seq.len)
3280 trace_seq_reset(&iter->seq); 2702 trace_seq_init(&iter->seq);
3281 2703
3282 /* 2704 /*
3283 * If there was nothing to send to user, inspite of consuming trace 2705 * If there was nothing to send to user, inspite of consuming trace
@@ -3287,11 +2709,148 @@ waitagain:
3287 goto waitagain; 2709 goto waitagain;
3288 2710
3289out: 2711out:
3290 mutex_unlock(&trace_types_lock); 2712 mutex_unlock(&iter->mutex);
3291 2713
3292 return sret; 2714 return sret;
3293} 2715}
3294 2716
2717static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
2718 struct pipe_buffer *buf)
2719{
2720 __free_page(buf->page);
2721}
2722
2723static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
2724 unsigned int idx)
2725{
2726 __free_page(spd->pages[idx]);
2727}
2728
2729static struct pipe_buf_operations tracing_pipe_buf_ops = {
2730 .can_merge = 0,
2731 .map = generic_pipe_buf_map,
2732 .unmap = generic_pipe_buf_unmap,
2733 .confirm = generic_pipe_buf_confirm,
2734 .release = tracing_pipe_buf_release,
2735 .steal = generic_pipe_buf_steal,
2736 .get = generic_pipe_buf_get,
2737};
2738
2739static size_t
2740tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
2741{
2742 size_t count;
2743 int ret;
2744
2745 /* Seq buffer is page-sized, exactly what we need. */
2746 for (;;) {
2747 count = iter->seq.len;
2748 ret = print_trace_line(iter);
2749 count = iter->seq.len - count;
2750 if (rem < count) {
2751 rem = 0;
2752 iter->seq.len -= count;
2753 break;
2754 }
2755 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2756 iter->seq.len -= count;
2757 break;
2758 }
2759
2760 trace_consume(iter);
2761 rem -= count;
2762 if (!find_next_entry_inc(iter)) {
2763 rem = 0;
2764 iter->ent = NULL;
2765 break;
2766 }
2767 }
2768
2769 return rem;
2770}
2771
2772static ssize_t tracing_splice_read_pipe(struct file *filp,
2773 loff_t *ppos,
2774 struct pipe_inode_info *pipe,
2775 size_t len,
2776 unsigned int flags)
2777{
2778 struct page *pages[PIPE_BUFFERS];
2779 struct partial_page partial[PIPE_BUFFERS];
2780 struct trace_iterator *iter = filp->private_data;
2781 struct splice_pipe_desc spd = {
2782 .pages = pages,
2783 .partial = partial,
2784 .nr_pages = 0, /* This gets updated below. */
2785 .flags = flags,
2786 .ops = &tracing_pipe_buf_ops,
2787 .spd_release = tracing_spd_release_pipe,
2788 };
2789 static struct tracer *old_tracer;
2790 ssize_t ret;
2791 size_t rem;
2792 unsigned int i;
2793
2794 /* copy the tracer to avoid using a global lock all around */
2795 mutex_lock(&trace_types_lock);
2796 if (unlikely(old_tracer != current_trace && current_trace)) {
2797 old_tracer = current_trace;
2798 *iter->trace = *current_trace;
2799 }
2800 mutex_unlock(&trace_types_lock);
2801
2802 mutex_lock(&iter->mutex);
2803
2804 if (iter->trace->splice_read) {
2805 ret = iter->trace->splice_read(iter, filp,
2806 ppos, pipe, len, flags);
2807 if (ret)
2808 goto out_err;
2809 }
2810
2811 ret = tracing_wait_pipe(filp);
2812 if (ret <= 0)
2813 goto out_err;
2814
2815 if (!iter->ent && !find_next_entry_inc(iter)) {
2816 ret = -EFAULT;
2817 goto out_err;
2818 }
2819
2820 /* Fill as many pages as possible. */
2821 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
2822 pages[i] = alloc_page(GFP_KERNEL);
2823 if (!pages[i])
2824 break;
2825
2826 rem = tracing_fill_pipe_page(rem, iter);
2827
2828 /* Copy the data into the page, so we can start over. */
2829 ret = trace_seq_to_buffer(&iter->seq,
2830 page_address(pages[i]),
2831 iter->seq.len);
2832 if (ret < 0) {
2833 __free_page(pages[i]);
2834 break;
2835 }
2836 partial[i].offset = 0;
2837 partial[i].len = iter->seq.len;
2838
2839 trace_seq_init(&iter->seq);
2840 }
2841
2842 mutex_unlock(&iter->mutex);
2843
2844 spd.nr_pages = i;
2845
2846 return splice_to_pipe(pipe, &spd);
2847
2848out_err:
2849 mutex_unlock(&iter->mutex);
2850
2851 return ret;
2852}
2853
3295static ssize_t 2854static ssize_t
3296tracing_entries_read(struct file *filp, char __user *ubuf, 2855tracing_entries_read(struct file *filp, char __user *ubuf,
3297 size_t cnt, loff_t *ppos) 2856 size_t cnt, loff_t *ppos)
@@ -3433,42 +2992,288 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3433 return cnt; 2992 return cnt;
3434} 2993}
3435 2994
3436static struct file_operations tracing_max_lat_fops = { 2995static const struct file_operations tracing_max_lat_fops = {
3437 .open = tracing_open_generic, 2996 .open = tracing_open_generic,
3438 .read = tracing_max_lat_read, 2997 .read = tracing_max_lat_read,
3439 .write = tracing_max_lat_write, 2998 .write = tracing_max_lat_write,
3440}; 2999};
3441 3000
3442static struct file_operations tracing_ctrl_fops = { 3001static const struct file_operations tracing_ctrl_fops = {
3443 .open = tracing_open_generic, 3002 .open = tracing_open_generic,
3444 .read = tracing_ctrl_read, 3003 .read = tracing_ctrl_read,
3445 .write = tracing_ctrl_write, 3004 .write = tracing_ctrl_write,
3446}; 3005};
3447 3006
3448static struct file_operations set_tracer_fops = { 3007static const struct file_operations set_tracer_fops = {
3449 .open = tracing_open_generic, 3008 .open = tracing_open_generic,
3450 .read = tracing_set_trace_read, 3009 .read = tracing_set_trace_read,
3451 .write = tracing_set_trace_write, 3010 .write = tracing_set_trace_write,
3452}; 3011};
3453 3012
3454static struct file_operations tracing_pipe_fops = { 3013static const struct file_operations tracing_pipe_fops = {
3455 .open = tracing_open_pipe, 3014 .open = tracing_open_pipe,
3456 .poll = tracing_poll_pipe, 3015 .poll = tracing_poll_pipe,
3457 .read = tracing_read_pipe, 3016 .read = tracing_read_pipe,
3017 .splice_read = tracing_splice_read_pipe,
3458 .release = tracing_release_pipe, 3018 .release = tracing_release_pipe,
3459}; 3019};
3460 3020
3461static struct file_operations tracing_entries_fops = { 3021static const struct file_operations tracing_entries_fops = {
3462 .open = tracing_open_generic, 3022 .open = tracing_open_generic,
3463 .read = tracing_entries_read, 3023 .read = tracing_entries_read,
3464 .write = tracing_entries_write, 3024 .write = tracing_entries_write,
3465}; 3025};
3466 3026
3467static struct file_operations tracing_mark_fops = { 3027static const struct file_operations tracing_mark_fops = {
3468 .open = tracing_open_generic, 3028 .open = tracing_open_generic,
3469 .write = tracing_mark_write, 3029 .write = tracing_mark_write,
3470}; 3030};
3471 3031
3032struct ftrace_buffer_info {
3033 struct trace_array *tr;
3034 void *spare;
3035 int cpu;
3036 unsigned int read;
3037};
3038
3039static int tracing_buffers_open(struct inode *inode, struct file *filp)
3040{
3041 int cpu = (int)(long)inode->i_private;
3042 struct ftrace_buffer_info *info;
3043
3044 if (tracing_disabled)
3045 return -ENODEV;
3046
3047 info = kzalloc(sizeof(*info), GFP_KERNEL);
3048 if (!info)
3049 return -ENOMEM;
3050
3051 info->tr = &global_trace;
3052 info->cpu = cpu;
3053 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3054 /* Force reading ring buffer for first read */
3055 info->read = (unsigned int)-1;
3056 if (!info->spare)
3057 goto out;
3058
3059 filp->private_data = info;
3060
3061 return 0;
3062
3063 out:
3064 kfree(info);
3065 return -ENOMEM;
3066}
3067
3068static ssize_t
3069tracing_buffers_read(struct file *filp, char __user *ubuf,
3070 size_t count, loff_t *ppos)
3071{
3072 struct ftrace_buffer_info *info = filp->private_data;
3073 unsigned int pos;
3074 ssize_t ret;
3075 size_t size;
3076
3077 if (!count)
3078 return 0;
3079
3080 /* Do we have previous read data to read? */
3081 if (info->read < PAGE_SIZE)
3082 goto read;
3083
3084 info->read = 0;
3085
3086 ret = ring_buffer_read_page(info->tr->buffer,
3087 &info->spare,
3088 count,
3089 info->cpu, 0);
3090 if (ret < 0)
3091 return 0;
3092
3093 pos = ring_buffer_page_len(info->spare);
3094
3095 if (pos < PAGE_SIZE)
3096 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3097
3098read:
3099 size = PAGE_SIZE - info->read;
3100 if (size > count)
3101 size = count;
3102
3103 ret = copy_to_user(ubuf, info->spare + info->read, size);
3104 if (ret == size)
3105 return -EFAULT;
3106 size -= ret;
3107
3108 *ppos += size;
3109 info->read += size;
3110
3111 return size;
3112}
3113
3114static int tracing_buffers_release(struct inode *inode, struct file *file)
3115{
3116 struct ftrace_buffer_info *info = file->private_data;
3117
3118 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3119 kfree(info);
3120
3121 return 0;
3122}
3123
3124struct buffer_ref {
3125 struct ring_buffer *buffer;
3126 void *page;
3127 int ref;
3128};
3129
3130static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3131 struct pipe_buffer *buf)
3132{
3133 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3134
3135 if (--ref->ref)
3136 return;
3137
3138 ring_buffer_free_read_page(ref->buffer, ref->page);
3139 kfree(ref);
3140 buf->private = 0;
3141}
3142
3143static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3144 struct pipe_buffer *buf)
3145{
3146 return 1;
3147}
3148
3149static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3150 struct pipe_buffer *buf)
3151{
3152 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3153
3154 ref->ref++;
3155}
3156
3157/* Pipe buffer operations for a buffer. */
3158static struct pipe_buf_operations buffer_pipe_buf_ops = {
3159 .can_merge = 0,
3160 .map = generic_pipe_buf_map,
3161 .unmap = generic_pipe_buf_unmap,
3162 .confirm = generic_pipe_buf_confirm,
3163 .release = buffer_pipe_buf_release,
3164 .steal = buffer_pipe_buf_steal,
3165 .get = buffer_pipe_buf_get,
3166};
3167
3168/*
3169 * Callback from splice_to_pipe(), if we need to release some pages
3170 * at the end of the spd in case we error'ed out in filling the pipe.
3171 */
3172static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3173{
3174 struct buffer_ref *ref =
3175 (struct buffer_ref *)spd->partial[i].private;
3176
3177 if (--ref->ref)
3178 return;
3179
3180 ring_buffer_free_read_page(ref->buffer, ref->page);
3181 kfree(ref);
3182 spd->partial[i].private = 0;
3183}
3184
3185static ssize_t
3186tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3187 struct pipe_inode_info *pipe, size_t len,
3188 unsigned int flags)
3189{
3190 struct ftrace_buffer_info *info = file->private_data;
3191 struct partial_page partial[PIPE_BUFFERS];
3192 struct page *pages[PIPE_BUFFERS];
3193 struct splice_pipe_desc spd = {
3194 .pages = pages,
3195 .partial = partial,
3196 .flags = flags,
3197 .ops = &buffer_pipe_buf_ops,
3198 .spd_release = buffer_spd_release,
3199 };
3200 struct buffer_ref *ref;
3201 int size, i;
3202 size_t ret;
3203
3204 /*
3205 * We can't seek on a buffer input
3206 */
3207 if (unlikely(*ppos))
3208 return -ESPIPE;
3209
3210
3211 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
3212 struct page *page;
3213 int r;
3214
3215 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3216 if (!ref)
3217 break;
3218
3219 ref->buffer = info->tr->buffer;
3220 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3221 if (!ref->page) {
3222 kfree(ref);
3223 break;
3224 }
3225
3226 r = ring_buffer_read_page(ref->buffer, &ref->page,
3227 len, info->cpu, 0);
3228 if (r < 0) {
3229 ring_buffer_free_read_page(ref->buffer,
3230 ref->page);
3231 kfree(ref);
3232 break;
3233 }
3234
3235 /*
3236 * zero out any left over data, this is going to
3237 * user land.
3238 */
3239 size = ring_buffer_page_len(ref->page);
3240 if (size < PAGE_SIZE)
3241 memset(ref->page + size, 0, PAGE_SIZE - size);
3242
3243 page = virt_to_page(ref->page);
3244
3245 spd.pages[i] = page;
3246 spd.partial[i].len = PAGE_SIZE;
3247 spd.partial[i].offset = 0;
3248 spd.partial[i].private = (unsigned long)ref;
3249 spd.nr_pages++;
3250 }
3251
3252 spd.nr_pages = i;
3253
3254 /* did we read anything? */
3255 if (!spd.nr_pages) {
3256 if (flags & SPLICE_F_NONBLOCK)
3257 ret = -EAGAIN;
3258 else
3259 ret = 0;
3260 /* TODO: block */
3261 return ret;
3262 }
3263
3264 ret = splice_to_pipe(pipe, &spd);
3265
3266 return ret;
3267}
3268
3269static const struct file_operations tracing_buffers_fops = {
3270 .open = tracing_buffers_open,
3271 .read = tracing_buffers_read,
3272 .release = tracing_buffers_release,
3273 .splice_read = tracing_buffers_splice_read,
3274 .llseek = no_llseek,
3275};
3276
3472#ifdef CONFIG_DYNAMIC_FTRACE 3277#ifdef CONFIG_DYNAMIC_FTRACE
3473 3278
3474int __weak ftrace_arch_read_dyn_info(char *buf, int size) 3279int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3500,7 +3305,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3500 return r; 3305 return r;
3501} 3306}
3502 3307
3503static struct file_operations tracing_dyn_info_fops = { 3308static const struct file_operations tracing_dyn_info_fops = {
3504 .open = tracing_open_generic, 3309 .open = tracing_open_generic,
3505 .read = tracing_read_dyn_info, 3310 .read = tracing_read_dyn_info,
3506}; 3311};
@@ -3526,15 +3331,346 @@ struct dentry *tracing_init_dentry(void)
3526 return d_tracer; 3331 return d_tracer;
3527} 3332}
3528 3333
3334static struct dentry *d_percpu;
3335
3336struct dentry *tracing_dentry_percpu(void)
3337{
3338 static int once;
3339 struct dentry *d_tracer;
3340
3341 if (d_percpu)
3342 return d_percpu;
3343
3344 d_tracer = tracing_init_dentry();
3345
3346 if (!d_tracer)
3347 return NULL;
3348
3349 d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3350
3351 if (!d_percpu && !once) {
3352 once = 1;
3353 pr_warning("Could not create debugfs directory 'per_cpu'\n");
3354 return NULL;
3355 }
3356
3357 return d_percpu;
3358}
3359
3360static void tracing_init_debugfs_percpu(long cpu)
3361{
3362 struct dentry *d_percpu = tracing_dentry_percpu();
3363 struct dentry *entry, *d_cpu;
3364 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3365 char cpu_dir[7];
3366
3367 if (cpu > 999 || cpu < 0)
3368 return;
3369
3370 sprintf(cpu_dir, "cpu%ld", cpu);
3371 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3372 if (!d_cpu) {
3373 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3374 return;
3375 }
3376
3377 /* per cpu trace_pipe */
3378 entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
3379 (void *) cpu, &tracing_pipe_fops);
3380 if (!entry)
3381 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3382
3383 /* per cpu trace */
3384 entry = debugfs_create_file("trace", 0444, d_cpu,
3385 (void *) cpu, &tracing_fops);
3386 if (!entry)
3387 pr_warning("Could not create debugfs 'trace' entry\n");
3388}
3389
3529#ifdef CONFIG_FTRACE_SELFTEST 3390#ifdef CONFIG_FTRACE_SELFTEST
3530/* Let selftest have access to static functions in this file */ 3391/* Let selftest have access to static functions in this file */
3531#include "trace_selftest.c" 3392#include "trace_selftest.c"
3532#endif 3393#endif
3533 3394
3395struct trace_option_dentry {
3396 struct tracer_opt *opt;
3397 struct tracer_flags *flags;
3398 struct dentry *entry;
3399};
3400
3401static ssize_t
3402trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3403 loff_t *ppos)
3404{
3405 struct trace_option_dentry *topt = filp->private_data;
3406 char *buf;
3407
3408 if (topt->flags->val & topt->opt->bit)
3409 buf = "1\n";
3410 else
3411 buf = "0\n";
3412
3413 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3414}
3415
3416static ssize_t
3417trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3418 loff_t *ppos)
3419{
3420 struct trace_option_dentry *topt = filp->private_data;
3421 unsigned long val;
3422 char buf[64];
3423 int ret;
3424
3425 if (cnt >= sizeof(buf))
3426 return -EINVAL;
3427
3428 if (copy_from_user(&buf, ubuf, cnt))
3429 return -EFAULT;
3430
3431 buf[cnt] = 0;
3432
3433 ret = strict_strtoul(buf, 10, &val);
3434 if (ret < 0)
3435 return ret;
3436
3437 ret = 0;
3438 switch (val) {
3439 case 0:
3440 /* do nothing if already cleared */
3441 if (!(topt->flags->val & topt->opt->bit))
3442 break;
3443
3444 mutex_lock(&trace_types_lock);
3445 if (current_trace->set_flag)
3446 ret = current_trace->set_flag(topt->flags->val,
3447 topt->opt->bit, 0);
3448 mutex_unlock(&trace_types_lock);
3449 if (ret)
3450 return ret;
3451 topt->flags->val &= ~topt->opt->bit;
3452 break;
3453 case 1:
3454 /* do nothing if already set */
3455 if (topt->flags->val & topt->opt->bit)
3456 break;
3457
3458 mutex_lock(&trace_types_lock);
3459 if (current_trace->set_flag)
3460 ret = current_trace->set_flag(topt->flags->val,
3461 topt->opt->bit, 1);
3462 mutex_unlock(&trace_types_lock);
3463 if (ret)
3464 return ret;
3465 topt->flags->val |= topt->opt->bit;
3466 break;
3467
3468 default:
3469 return -EINVAL;
3470 }
3471
3472 *ppos += cnt;
3473
3474 return cnt;
3475}
3476
3477
3478static const struct file_operations trace_options_fops = {
3479 .open = tracing_open_generic,
3480 .read = trace_options_read,
3481 .write = trace_options_write,
3482};
3483
3484static ssize_t
3485trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3486 loff_t *ppos)
3487{
3488 long index = (long)filp->private_data;
3489 char *buf;
3490
3491 if (trace_flags & (1 << index))
3492 buf = "1\n";
3493 else
3494 buf = "0\n";
3495
3496 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3497}
3498
3499static ssize_t
3500trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3501 loff_t *ppos)
3502{
3503 long index = (long)filp->private_data;
3504 char buf[64];
3505 unsigned long val;
3506 int ret;
3507
3508 if (cnt >= sizeof(buf))
3509 return -EINVAL;
3510
3511 if (copy_from_user(&buf, ubuf, cnt))
3512 return -EFAULT;
3513
3514 buf[cnt] = 0;
3515
3516 ret = strict_strtoul(buf, 10, &val);
3517 if (ret < 0)
3518 return ret;
3519
3520 switch (val) {
3521 case 0:
3522 trace_flags &= ~(1 << index);
3523 break;
3524 case 1:
3525 trace_flags |= 1 << index;
3526 break;
3527
3528 default:
3529 return -EINVAL;
3530 }
3531
3532 *ppos += cnt;
3533
3534 return cnt;
3535}
3536
3537static const struct file_operations trace_options_core_fops = {
3538 .open = tracing_open_generic,
3539 .read = trace_options_core_read,
3540 .write = trace_options_core_write,
3541};
3542
3543static struct dentry *trace_options_init_dentry(void)
3544{
3545 struct dentry *d_tracer;
3546 static struct dentry *t_options;
3547
3548 if (t_options)
3549 return t_options;
3550
3551 d_tracer = tracing_init_dentry();
3552 if (!d_tracer)
3553 return NULL;
3554
3555 t_options = debugfs_create_dir("options", d_tracer);
3556 if (!t_options) {
3557 pr_warning("Could not create debugfs directory 'options'\n");
3558 return NULL;
3559 }
3560
3561 return t_options;
3562}
3563
3564static void
3565create_trace_option_file(struct trace_option_dentry *topt,
3566 struct tracer_flags *flags,
3567 struct tracer_opt *opt)
3568{
3569 struct dentry *t_options;
3570 struct dentry *entry;
3571
3572 t_options = trace_options_init_dentry();
3573 if (!t_options)
3574 return;
3575
3576 topt->flags = flags;
3577 topt->opt = opt;
3578
3579 entry = debugfs_create_file(opt->name, 0644, t_options, topt,
3580 &trace_options_fops);
3581
3582 topt->entry = entry;
3583
3584}
3585
3586static struct trace_option_dentry *
3587create_trace_option_files(struct tracer *tracer)
3588{
3589 struct trace_option_dentry *topts;
3590 struct tracer_flags *flags;
3591 struct tracer_opt *opts;
3592 int cnt;
3593
3594 if (!tracer)
3595 return NULL;
3596
3597 flags = tracer->flags;
3598
3599 if (!flags || !flags->opts)
3600 return NULL;
3601
3602 opts = flags->opts;
3603
3604 for (cnt = 0; opts[cnt].name; cnt++)
3605 ;
3606
3607 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
3608 if (!topts)
3609 return NULL;
3610
3611 for (cnt = 0; opts[cnt].name; cnt++)
3612 create_trace_option_file(&topts[cnt], flags,
3613 &opts[cnt]);
3614
3615 return topts;
3616}
3617
3618static void
3619destroy_trace_option_files(struct trace_option_dentry *topts)
3620{
3621 int cnt;
3622
3623 if (!topts)
3624 return;
3625
3626 for (cnt = 0; topts[cnt].opt; cnt++) {
3627 if (topts[cnt].entry)
3628 debugfs_remove(topts[cnt].entry);
3629 }
3630
3631 kfree(topts);
3632}
3633
3634static struct dentry *
3635create_trace_option_core_file(const char *option, long index)
3636{
3637 struct dentry *t_options;
3638 struct dentry *entry;
3639
3640 t_options = trace_options_init_dentry();
3641 if (!t_options)
3642 return NULL;
3643
3644 entry = debugfs_create_file(option, 0644, t_options, (void *)index,
3645 &trace_options_core_fops);
3646
3647 return entry;
3648}
3649
3650static __init void create_trace_options_dir(void)
3651{
3652 struct dentry *t_options;
3653 struct dentry *entry;
3654 int i;
3655
3656 t_options = trace_options_init_dentry();
3657 if (!t_options)
3658 return;
3659
3660 for (i = 0; trace_options[i]; i++) {
3661 entry = create_trace_option_core_file(trace_options[i], i);
3662 if (!entry)
3663 pr_warning("Could not create debugfs %s entry\n",
3664 trace_options[i]);
3665 }
3666}
3667
3534static __init int tracer_init_debugfs(void) 3668static __init int tracer_init_debugfs(void)
3535{ 3669{
3536 struct dentry *d_tracer; 3670 struct dentry *d_tracer;
3671 struct dentry *buffers;
3537 struct dentry *entry; 3672 struct dentry *entry;
3673 int cpu;
3538 3674
3539 d_tracer = tracing_init_dentry(); 3675 d_tracer = tracing_init_dentry();
3540 3676
@@ -3548,18 +3684,15 @@ static __init int tracer_init_debugfs(void)
3548 if (!entry) 3684 if (!entry)
3549 pr_warning("Could not create debugfs 'trace_options' entry\n"); 3685 pr_warning("Could not create debugfs 'trace_options' entry\n");
3550 3686
3687 create_trace_options_dir();
3688
3551 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3689 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
3552 NULL, &tracing_cpumask_fops); 3690 NULL, &tracing_cpumask_fops);
3553 if (!entry) 3691 if (!entry)
3554 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); 3692 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3555 3693
3556 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
3557 &global_trace, &tracing_lt_fops);
3558 if (!entry)
3559 pr_warning("Could not create debugfs 'latency_trace' entry\n");
3560
3561 entry = debugfs_create_file("trace", 0444, d_tracer, 3694 entry = debugfs_create_file("trace", 0444, d_tracer,
3562 &global_trace, &tracing_fops); 3695 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
3563 if (!entry) 3696 if (!entry)
3564 pr_warning("Could not create debugfs 'trace' entry\n"); 3697 pr_warning("Could not create debugfs 'trace' entry\n");
3565 3698
@@ -3590,8 +3723,8 @@ static __init int tracer_init_debugfs(void)
3590 if (!entry) 3723 if (!entry)
3591 pr_warning("Could not create debugfs 'README' entry\n"); 3724 pr_warning("Could not create debugfs 'README' entry\n");
3592 3725
3593 entry = debugfs_create_file("trace_pipe", 0644, d_tracer, 3726 entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
3594 NULL, &tracing_pipe_fops); 3727 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
3595 if (!entry) 3728 if (!entry)
3596 pr_warning("Could not create debugfs " 3729 pr_warning("Could not create debugfs "
3597 "'trace_pipe' entry\n"); 3730 "'trace_pipe' entry\n");
@@ -3608,6 +3741,26 @@ static __init int tracer_init_debugfs(void)
3608 pr_warning("Could not create debugfs " 3741 pr_warning("Could not create debugfs "
3609 "'trace_marker' entry\n"); 3742 "'trace_marker' entry\n");
3610 3743
3744 buffers = debugfs_create_dir("binary_buffers", d_tracer);
3745
3746 if (!buffers)
3747 pr_warning("Could not create buffers directory\n");
3748 else {
3749 int cpu;
3750 char buf[64];
3751
3752 for_each_tracing_cpu(cpu) {
3753 sprintf(buf, "%d", cpu);
3754
3755 entry = debugfs_create_file(buf, 0444, buffers,
3756 (void *)(long)cpu,
3757 &tracing_buffers_fops);
3758 if (!entry)
3759 pr_warning("Could not create debugfs buffers "
3760 "'%s' entry\n", buf);
3761 }
3762 }
3763
3611#ifdef CONFIG_DYNAMIC_FTRACE 3764#ifdef CONFIG_DYNAMIC_FTRACE
3612 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3765 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3613 &ftrace_update_tot_cnt, 3766 &ftrace_update_tot_cnt,
@@ -3619,77 +3772,12 @@ static __init int tracer_init_debugfs(void)
3619#ifdef CONFIG_SYSPROF_TRACER 3772#ifdef CONFIG_SYSPROF_TRACER
3620 init_tracer_sysprof_debugfs(d_tracer); 3773 init_tracer_sysprof_debugfs(d_tracer);
3621#endif 3774#endif
3622 return 0;
3623}
3624
3625int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3626{
3627 static DEFINE_SPINLOCK(trace_buf_lock);
3628 static char trace_buf[TRACE_BUF_SIZE];
3629
3630 struct ring_buffer_event *event;
3631 struct trace_array *tr = &global_trace;
3632 struct trace_array_cpu *data;
3633 int cpu, len = 0, size, pc;
3634 struct print_entry *entry;
3635 unsigned long irq_flags;
3636 3775
3637 if (tracing_disabled || tracing_selftest_running) 3776 for_each_tracing_cpu(cpu)
3638 return 0; 3777 tracing_init_debugfs_percpu(cpu);
3639 3778
3640 pc = preempt_count(); 3779 return 0;
3641 preempt_disable_notrace();
3642 cpu = raw_smp_processor_id();
3643 data = tr->data[cpu];
3644
3645 if (unlikely(atomic_read(&data->disabled)))
3646 goto out;
3647
3648 pause_graph_tracing();
3649 spin_lock_irqsave(&trace_buf_lock, irq_flags);
3650 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3651
3652 len = min(len, TRACE_BUF_SIZE-1);
3653 trace_buf[len] = 0;
3654
3655 size = sizeof(*entry) + len + 1;
3656 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3657 if (!event)
3658 goto out_unlock;
3659 entry = ring_buffer_event_data(event);
3660 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3661 entry->ent.type = TRACE_PRINT;
3662 entry->ip = ip;
3663 entry->depth = depth;
3664
3665 memcpy(&entry->buf, trace_buf, len);
3666 entry->buf[len] = 0;
3667 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3668
3669 out_unlock:
3670 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3671 unpause_graph_tracing();
3672 out:
3673 preempt_enable_notrace();
3674
3675 return len;
3676}
3677EXPORT_SYMBOL_GPL(trace_vprintk);
3678
3679int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3680{
3681 int ret;
3682 va_list ap;
3683
3684 if (!(trace_flags & TRACE_ITER_PRINTK))
3685 return 0;
3686
3687 va_start(ap, fmt);
3688 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3689 va_end(ap);
3690 return ret;
3691} 3780}
3692EXPORT_SYMBOL_GPL(__ftrace_printk);
3693 3781
3694static int trace_panic_handler(struct notifier_block *this, 3782static int trace_panic_handler(struct notifier_block *this,
3695 unsigned long event, void *unused) 3783 unsigned long event, void *unused)
@@ -3750,7 +3838,7 @@ trace_printk_seq(struct trace_seq *s)
3750 3838
3751 printk(KERN_TRACE "%s", s->buffer); 3839 printk(KERN_TRACE "%s", s->buffer);
3752 3840
3753 trace_seq_reset(s); 3841 trace_seq_init(s);
3754} 3842}
3755 3843
3756void ftrace_dump(void) 3844void ftrace_dump(void)
@@ -3782,8 +3870,10 @@ void ftrace_dump(void)
3782 3870
3783 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3871 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3784 3872
3873 /* Simulate the iterator */
3785 iter.tr = &global_trace; 3874 iter.tr = &global_trace;
3786 iter.trace = current_trace; 3875 iter.trace = current_trace;
3876 iter.cpu_file = TRACE_PIPE_ALL_CPU;
3787 3877
3788 /* 3878 /*
3789 * We need to stop all tracing on all CPUS to read the 3879 * We need to stop all tracing on all CPUS to read the
@@ -3835,8 +3925,12 @@ __init static int tracer_alloc_buffers(void)
3835 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 3925 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
3836 goto out_free_buffer_mask; 3926 goto out_free_buffer_mask;
3837 3927
3928 if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
3929 goto out_free_tracing_cpumask;
3930
3838 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 3931 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
3839 cpumask_copy(tracing_cpumask, cpu_all_mask); 3932 cpumask_copy(tracing_cpumask, cpu_all_mask);
3933 cpumask_clear(tracing_reader_cpumask);
3840 3934
3841 /* TODO: make the number of buffers hot pluggable with CPUS */ 3935 /* TODO: make the number of buffers hot pluggable with CPUS */
3842 global_trace.buffer = ring_buffer_alloc(trace_buf_size, 3936 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
@@ -3871,14 +3965,10 @@ __init static int tracer_alloc_buffers(void)
3871 trace_init_cmdlines(); 3965 trace_init_cmdlines();
3872 3966
3873 register_tracer(&nop_trace); 3967 register_tracer(&nop_trace);
3968 current_trace = &nop_trace;
3874#ifdef CONFIG_BOOT_TRACER 3969#ifdef CONFIG_BOOT_TRACER
3875 register_tracer(&boot_tracer); 3970 register_tracer(&boot_tracer);
3876 current_trace = &boot_tracer;
3877 current_trace->init(&global_trace);
3878#else
3879 current_trace = &nop_trace;
3880#endif 3971#endif
3881
3882 /* All seems OK, enable tracing */ 3972 /* All seems OK, enable tracing */
3883 tracing_disabled = 0; 3973 tracing_disabled = 0;
3884 3974
@@ -3889,11 +3979,34 @@ __init static int tracer_alloc_buffers(void)
3889 ret = 0; 3979 ret = 0;
3890 3980
3891out_free_cpumask: 3981out_free_cpumask:
3982 free_cpumask_var(tracing_reader_cpumask);
3983out_free_tracing_cpumask:
3892 free_cpumask_var(tracing_cpumask); 3984 free_cpumask_var(tracing_cpumask);
3893out_free_buffer_mask: 3985out_free_buffer_mask:
3894 free_cpumask_var(tracing_buffer_mask); 3986 free_cpumask_var(tracing_buffer_mask);
3895out: 3987out:
3896 return ret; 3988 return ret;
3897} 3989}
3990
3991__init static int clear_boot_tracer(void)
3992{
3993 /*
3994 * The default tracer at boot buffer is an init section.
3995 * This function is called in lateinit. If we did not
3996 * find the boot tracer, then clear it out, to prevent
3997 * later registration from accessing the buffer that is
3998 * about to be freed.
3999 */
4000 if (!default_bootup_tracer)
4001 return 0;
4002
4003 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
4004 default_bootup_tracer);
4005 default_bootup_tracer = NULL;
4006
4007 return 0;
4008}
4009
3898early_initcall(tracer_alloc_buffers); 4010early_initcall(tracer_alloc_buffers);
3899fs_initcall(tracer_init_debugfs); 4011fs_initcall(tracer_init_debugfs);
4012late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd95..d80ca0d464d9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,8 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h>
13#include <trace/power.h>
12 14
13enum trace_type { 15enum trace_type {
14 __TRACE_FIRST_TYPE = 0, 16 __TRACE_FIRST_TYPE = 0,
@@ -16,7 +18,6 @@ enum trace_type {
16 TRACE_FN, 18 TRACE_FN,
17 TRACE_CTX, 19 TRACE_CTX,
18 TRACE_WAKE, 20 TRACE_WAKE,
19 TRACE_CONT,
20 TRACE_STACK, 21 TRACE_STACK,
21 TRACE_PRINT, 22 TRACE_PRINT,
22 TRACE_SPECIAL, 23 TRACE_SPECIAL,
@@ -29,9 +30,14 @@ enum trace_type {
29 TRACE_GRAPH_ENT, 30 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK, 31 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES, 32 TRACE_HW_BRANCHES,
33 TRACE_SYSCALL_ENTER,
34 TRACE_SYSCALL_EXIT,
35 TRACE_KMEM_ALLOC,
36 TRACE_KMEM_FREE,
32 TRACE_POWER, 37 TRACE_POWER,
38 TRACE_BLK,
33 39
34 __TRACE_LAST_TYPE 40 __TRACE_LAST_TYPE,
35}; 41};
36 42
37/* 43/*
@@ -42,7 +48,6 @@ enum trace_type {
42 */ 48 */
43struct trace_entry { 49struct trace_entry {
44 unsigned char type; 50 unsigned char type;
45 unsigned char cpu;
46 unsigned char flags; 51 unsigned char flags;
47 unsigned char preempt_count; 52 unsigned char preempt_count;
48 int pid; 53 int pid;
@@ -60,13 +65,13 @@ struct ftrace_entry {
60 65
61/* Function call entry */ 66/* Function call entry */
62struct ftrace_graph_ent_entry { 67struct ftrace_graph_ent_entry {
63 struct trace_entry ent; 68 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent; 69 struct ftrace_graph_ent graph_ent;
65}; 70};
66 71
67/* Function return entry */ 72/* Function return entry */
68struct ftrace_graph_ret_entry { 73struct ftrace_graph_ret_entry {
69 struct trace_entry ent; 74 struct trace_entry ent;
70 struct ftrace_graph_ret ret; 75 struct ftrace_graph_ret ret;
71}; 76};
72extern struct tracer boot_tracer; 77extern struct tracer boot_tracer;
@@ -112,13 +117,14 @@ struct userstack_entry {
112}; 117};
113 118
114/* 119/*
115 * ftrace_printk entry: 120 * trace_printk entry:
116 */ 121 */
117struct print_entry { 122struct print_entry {
118 struct trace_entry ent; 123 struct trace_entry ent;
119 unsigned long ip; 124 unsigned long ip;
120 int depth; 125 int depth;
121 char buf[]; 126 const char *fmt;
127 u32 buf[];
122}; 128};
123 129
124#define TRACE_OLD_SIZE 88 130#define TRACE_OLD_SIZE 88
@@ -170,15 +176,45 @@ struct trace_power {
170 struct power_trace state_data; 176 struct power_trace state_data;
171}; 177};
172 178
179struct kmemtrace_alloc_entry {
180 struct trace_entry ent;
181 enum kmemtrace_type_id type_id;
182 unsigned long call_site;
183 const void *ptr;
184 size_t bytes_req;
185 size_t bytes_alloc;
186 gfp_t gfp_flags;
187 int node;
188};
189
190struct kmemtrace_free_entry {
191 struct trace_entry ent;
192 enum kmemtrace_type_id type_id;
193 unsigned long call_site;
194 const void *ptr;
195};
196
197struct syscall_trace_enter {
198 struct trace_entry ent;
199 int nr;
200 unsigned long args[];
201};
202
203struct syscall_trace_exit {
204 struct trace_entry ent;
205 int nr;
206 unsigned long ret;
207};
208
209
173/* 210/*
174 * trace_flag_type is an enumeration that holds different 211 * trace_flag_type is an enumeration that holds different
175 * states when a trace occurs. These are: 212 * states when a trace occurs. These are:
176 * IRQS_OFF - interrupts were disabled 213 * IRQS_OFF - interrupts were disabled
177 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags 214 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
178 * NEED_RESCED - reschedule is requested 215 * NEED_RESCED - reschedule is requested
179 * HARDIRQ - inside an interrupt handler 216 * HARDIRQ - inside an interrupt handler
180 * SOFTIRQ - inside a softirq handler 217 * SOFTIRQ - inside a softirq handler
181 * CONT - multiple entries hold the trace item
182 */ 218 */
183enum trace_flag_type { 219enum trace_flag_type {
184 TRACE_FLAG_IRQS_OFF = 0x01, 220 TRACE_FLAG_IRQS_OFF = 0x01,
@@ -186,7 +222,6 @@ enum trace_flag_type {
186 TRACE_FLAG_NEED_RESCHED = 0x04, 222 TRACE_FLAG_NEED_RESCHED = 0x04,
187 TRACE_FLAG_HARDIRQ = 0x08, 223 TRACE_FLAG_HARDIRQ = 0x08,
188 TRACE_FLAG_SOFTIRQ = 0x10, 224 TRACE_FLAG_SOFTIRQ = 0x10,
189 TRACE_FLAG_CONT = 0x20,
190}; 225};
191 226
192#define TRACE_BUF_SIZE 1024 227#define TRACE_BUF_SIZE 1024
@@ -198,6 +233,7 @@ enum trace_flag_type {
198 */ 233 */
199struct trace_array_cpu { 234struct trace_array_cpu {
200 atomic_t disabled; 235 atomic_t disabled;
236 void *buffer_page; /* ring buffer spare */
201 237
202 /* these fields get copied into max-trace: */ 238 /* these fields get copied into max-trace: */
203 unsigned long trace_idx; 239 unsigned long trace_idx;
@@ -262,7 +298,6 @@ extern void __ftrace_bad_type(void);
262 do { \ 298 do { \
263 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ 299 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 300 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 301 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 302 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 303 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
@@ -279,7 +314,15 @@ extern void __ftrace_bad_type(void);
279 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 314 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
280 TRACE_GRAPH_RET); \ 315 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 316 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ 317 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
318 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
319 TRACE_KMEM_ALLOC); \
320 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
321 TRACE_KMEM_FREE); \
322 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
323 TRACE_SYSCALL_ENTER); \
324 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
325 TRACE_SYSCALL_EXIT); \
283 __ftrace_bad_type(); \ 326 __ftrace_bad_type(); \
284 } while (0) 327 } while (0)
285 328
@@ -287,7 +330,8 @@ extern void __ftrace_bad_type(void);
287enum print_line_t { 330enum print_line_t {
288 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 331 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
289 TRACE_TYPE_HANDLED = 1, 332 TRACE_TYPE_HANDLED = 1,
290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 333 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
334 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
291}; 335};
292 336
293 337
@@ -297,8 +341,8 @@ enum print_line_t {
297 * flags value in struct tracer_flags. 341 * flags value in struct tracer_flags.
298 */ 342 */
299struct tracer_opt { 343struct tracer_opt {
300 const char *name; /* Will appear on the trace_options file */ 344 const char *name; /* Will appear on the trace_options file */
301 u32 bit; /* Mask assigned in val field in tracer_flags */ 345 u32 bit; /* Mask assigned in val field in tracer_flags */
302}; 346};
303 347
304/* 348/*
@@ -307,28 +351,51 @@ struct tracer_opt {
307 */ 351 */
308struct tracer_flags { 352struct tracer_flags {
309 u32 val; 353 u32 val;
310 struct tracer_opt *opts; 354 struct tracer_opt *opts;
311}; 355};
312 356
313/* Makes more easy to define a tracer opt */ 357/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b 358#define TRACER_OPT(s, b) .name = #s, .bit = b
315 359
316/* 360
317 * A specific tracer, represented by methods that operate on a trace array: 361/**
362 * struct tracer - a specific tracer and its callbacks to interact with debugfs
363 * @name: the name chosen to select it on the available_tracers file
364 * @init: called when one switches to this tracer (echo name > current_tracer)
365 * @reset: called when one switches to another tracer
366 * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
367 * @stop: called when tracing is paused (echo 0 > tracing_enabled)
368 * @open: called when the trace file is opened
369 * @pipe_open: called when the trace_pipe file is opened
370 * @wait_pipe: override how the user waits for traces on trace_pipe
371 * @close: called when the trace file is released
372 * @read: override the default read callback on trace_pipe
373 * @splice_read: override the default splice_read callback on trace_pipe
374 * @selftest: selftest to run on boot (see trace_selftest.c)
375 * @print_headers: override the first lines that describe your columns
376 * @print_line: callback that prints a trace
377 * @set_flag: signals one of your private flags changed (trace_options file)
378 * @flags: your private flags
318 */ 379 */
319struct tracer { 380struct tracer {
320 const char *name; 381 const char *name;
321 /* Your tracer should raise a warning if init fails */
322 int (*init)(struct trace_array *tr); 382 int (*init)(struct trace_array *tr);
323 void (*reset)(struct trace_array *tr); 383 void (*reset)(struct trace_array *tr);
324 void (*start)(struct trace_array *tr); 384 void (*start)(struct trace_array *tr);
325 void (*stop)(struct trace_array *tr); 385 void (*stop)(struct trace_array *tr);
326 void (*open)(struct trace_iterator *iter); 386 void (*open)(struct trace_iterator *iter);
327 void (*pipe_open)(struct trace_iterator *iter); 387 void (*pipe_open)(struct trace_iterator *iter);
388 void (*wait_pipe)(struct trace_iterator *iter);
328 void (*close)(struct trace_iterator *iter); 389 void (*close)(struct trace_iterator *iter);
329 ssize_t (*read)(struct trace_iterator *iter, 390 ssize_t (*read)(struct trace_iterator *iter,
330 struct file *filp, char __user *ubuf, 391 struct file *filp, char __user *ubuf,
331 size_t cnt, loff_t *ppos); 392 size_t cnt, loff_t *ppos);
393 ssize_t (*splice_read)(struct trace_iterator *iter,
394 struct file *filp,
395 loff_t *ppos,
396 struct pipe_inode_info *pipe,
397 size_t len,
398 unsigned int flags);
332#ifdef CONFIG_FTRACE_STARTUP_TEST 399#ifdef CONFIG_FTRACE_STARTUP_TEST
333 int (*selftest)(struct tracer *trace, 400 int (*selftest)(struct tracer *trace,
334 struct trace_array *tr); 401 struct trace_array *tr);
@@ -339,7 +406,8 @@ struct tracer {
339 int (*set_flag)(u32 old_flags, u32 bit, int set); 406 int (*set_flag)(u32 old_flags, u32 bit, int set);
340 struct tracer *next; 407 struct tracer *next;
341 int print_max; 408 int print_max;
342 struct tracer_flags *flags; 409 struct tracer_flags *flags;
410 struct tracer_stat *stats;
343}; 411};
344 412
345struct trace_seq { 413struct trace_seq {
@@ -348,6 +416,16 @@ struct trace_seq {
348 unsigned int readpos; 416 unsigned int readpos;
349}; 417};
350 418
419static inline void
420trace_seq_init(struct trace_seq *s)
421{
422 s->len = 0;
423 s->readpos = 0;
424}
425
426
427#define TRACE_PIPE_ALL_CPU -1
428
351/* 429/*
352 * Trace iterator - used by printout routines who present trace 430 * Trace iterator - used by printout routines who present trace
353 * results to users and which routines might sleep, etc: 431 * results to users and which routines might sleep, etc:
@@ -356,6 +434,8 @@ struct trace_iterator {
356 struct trace_array *tr; 434 struct trace_array *tr;
357 struct tracer *trace; 435 struct tracer *trace;
358 void *private; 436 void *private;
437 int cpu_file;
438 struct mutex mutex;
359 struct ring_buffer_iter *buffer_iter[NR_CPUS]; 439 struct ring_buffer_iter *buffer_iter[NR_CPUS];
360 440
361 /* The below is zeroed out in pipe_read */ 441 /* The below is zeroed out in pipe_read */
@@ -371,6 +451,7 @@ struct trace_iterator {
371 cpumask_var_t started; 451 cpumask_var_t started;
372}; 452};
373 453
454int tracer_init(struct tracer *t, struct trace_array *tr);
374int tracing_is_enabled(void); 455int tracing_is_enabled(void);
375void trace_wake_up(void); 456void trace_wake_up(void);
376void tracing_reset(struct trace_array *tr, int cpu); 457void tracing_reset(struct trace_array *tr, int cpu);
@@ -379,26 +460,48 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
379struct dentry *tracing_init_dentry(void); 460struct dentry *tracing_init_dentry(void);
380void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 461void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
381 462
463struct ring_buffer_event;
464
465struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
466 unsigned char type,
467 unsigned long len,
468 unsigned long flags,
469 int pc);
470void trace_buffer_unlock_commit(struct trace_array *tr,
471 struct ring_buffer_event *event,
472 unsigned long flags, int pc);
473
474struct ring_buffer_event *
475trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
476 unsigned long flags, int pc);
477void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
478 unsigned long flags, int pc);
479
382struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 480struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
383 struct trace_array_cpu *data); 481 struct trace_array_cpu *data);
482
483struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
484 int *ent_cpu, u64 *ent_ts);
485
384void tracing_generic_entry_update(struct trace_entry *entry, 486void tracing_generic_entry_update(struct trace_entry *entry,
385 unsigned long flags, 487 unsigned long flags,
386 int pc); 488 int pc);
387 489
490void default_wait_pipe(struct trace_iterator *iter);
491void poll_wait_pipe(struct trace_iterator *iter);
492
388void ftrace(struct trace_array *tr, 493void ftrace(struct trace_array *tr,
389 struct trace_array_cpu *data, 494 struct trace_array_cpu *data,
390 unsigned long ip, 495 unsigned long ip,
391 unsigned long parent_ip, 496 unsigned long parent_ip,
392 unsigned long flags, int pc); 497 unsigned long flags, int pc);
393void tracing_sched_switch_trace(struct trace_array *tr, 498void tracing_sched_switch_trace(struct trace_array *tr,
394 struct trace_array_cpu *data,
395 struct task_struct *prev, 499 struct task_struct *prev,
396 struct task_struct *next, 500 struct task_struct *next,
397 unsigned long flags, int pc); 501 unsigned long flags, int pc);
398void tracing_record_cmdline(struct task_struct *tsk); 502void tracing_record_cmdline(struct task_struct *tsk);
399 503
400void tracing_sched_wakeup_trace(struct trace_array *tr, 504void tracing_sched_wakeup_trace(struct trace_array *tr,
401 struct trace_array_cpu *data,
402 struct task_struct *wakee, 505 struct task_struct *wakee,
403 struct task_struct *cur, 506 struct task_struct *cur,
404 unsigned long flags, int pc); 507 unsigned long flags, int pc);
@@ -408,14 +511,12 @@ void trace_special(struct trace_array *tr,
408 unsigned long arg2, 511 unsigned long arg2,
409 unsigned long arg3, int pc); 512 unsigned long arg3, int pc);
410void trace_function(struct trace_array *tr, 513void trace_function(struct trace_array *tr,
411 struct trace_array_cpu *data,
412 unsigned long ip, 514 unsigned long ip,
413 unsigned long parent_ip, 515 unsigned long parent_ip,
414 unsigned long flags, int pc); 516 unsigned long flags, int pc);
415 517
416void trace_graph_return(struct ftrace_graph_ret *trace); 518void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace); 519int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
419 520
420void tracing_start_cmdline_record(void); 521void tracing_start_cmdline_record(void);
421void tracing_stop_cmdline_record(void); 522void tracing_stop_cmdline_record(void);
@@ -434,15 +535,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
434void update_max_tr_single(struct trace_array *tr, 535void update_max_tr_single(struct trace_array *tr,
435 struct task_struct *tsk, int cpu); 536 struct task_struct *tsk, int cpu);
436 537
437extern cycle_t ftrace_now(int cpu); 538void __trace_stack(struct trace_array *tr,
539 unsigned long flags,
540 int skip, int pc);
438 541
439#ifdef CONFIG_FUNCTION_TRACER 542extern cycle_t ftrace_now(int cpu);
440void tracing_start_function_trace(void);
441void tracing_stop_function_trace(void);
442#else
443# define tracing_start_function_trace() do { } while (0)
444# define tracing_stop_function_trace() do { } while (0)
445#endif
446 543
447#ifdef CONFIG_CONTEXT_SWITCH_TRACER 544#ifdef CONFIG_CONTEXT_SWITCH_TRACER
448typedef void 545typedef void
@@ -456,10 +553,10 @@ struct tracer_switch_ops {
456 void *private; 553 void *private;
457 struct tracer_switch_ops *next; 554 struct tracer_switch_ops *next;
458}; 555};
459
460char *trace_find_cmdline(int pid);
461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 556#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
462 557
558extern char *trace_find_cmdline(int pid);
559
463#ifdef CONFIG_DYNAMIC_FTRACE 560#ifdef CONFIG_DYNAMIC_FTRACE
464extern unsigned long ftrace_update_tot_cnt; 561extern unsigned long ftrace_update_tot_cnt;
465#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 562#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -469,6 +566,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
469#ifdef CONFIG_FTRACE_STARTUP_TEST 566#ifdef CONFIG_FTRACE_STARTUP_TEST
470extern int trace_selftest_startup_function(struct tracer *trace, 567extern int trace_selftest_startup_function(struct tracer *trace,
471 struct trace_array *tr); 568 struct trace_array *tr);
569extern int trace_selftest_startup_function_graph(struct tracer *trace,
570 struct trace_array *tr);
472extern int trace_selftest_startup_irqsoff(struct tracer *trace, 571extern int trace_selftest_startup_irqsoff(struct tracer *trace,
473 struct trace_array *tr); 572 struct trace_array *tr);
474extern int trace_selftest_startup_preemptoff(struct tracer *trace, 573extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -488,15 +587,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
488#endif /* CONFIG_FTRACE_STARTUP_TEST */ 587#endif /* CONFIG_FTRACE_STARTUP_TEST */
489 588
490extern void *head_page(struct trace_array_cpu *data); 589extern void *head_page(struct trace_array_cpu *data);
491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
492extern void trace_seq_print_cont(struct trace_seq *s,
493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
499 size_t cnt);
500extern long ns2usecs(cycle_t nsec); 590extern long ns2usecs(cycle_t nsec);
501extern int 591extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); 592trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
@@ -580,7 +670,9 @@ enum trace_iterator_flags {
580 TRACE_ITER_ANNOTATE = 0x2000, 670 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000, 671 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000, 672 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000 673 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
674 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
675 TRACE_ITER_LATENCY_FMT = 0x40000,
584}; 676};
585 677
586/* 678/*
@@ -601,12 +693,12 @@ extern struct tracer nop_trace;
601 * preempt_enable (after a disable), a schedule might take place 693 * preempt_enable (after a disable), a schedule might take place
602 * causing an infinite recursion. 694 * causing an infinite recursion.
603 * 695 *
604 * To prevent this, we read the need_recshed flag before 696 * To prevent this, we read the need_resched flag before
605 * disabling preemption. When we want to enable preemption we 697 * disabling preemption. When we want to enable preemption we
606 * check the flag, if it is set, then we call preempt_enable_no_resched. 698 * check the flag, if it is set, then we call preempt_enable_no_resched.
607 * Otherwise, we call preempt_enable. 699 * Otherwise, we call preempt_enable.
608 * 700 *
609 * The rational for doing the above is that if need resched is set 701 * The rational for doing the above is that if need_resched is set
610 * and we have yet to reschedule, we are either in an atomic location 702 * and we have yet to reschedule, we are either in an atomic location
611 * (where we do not need to check for scheduling) or we are inside 703 * (where we do not need to check for scheduling) or we are inside
612 * the scheduler and do not want to resched. 704 * the scheduler and do not want to resched.
@@ -627,7 +719,7 @@ static inline int ftrace_preempt_disable(void)
627 * 719 *
628 * This is a scheduler safe way to enable preemption and not miss 720 * This is a scheduler safe way to enable preemption and not miss
629 * any preemption checks. The disabled saved the state of preemption. 721 * any preemption checks. The disabled saved the state of preemption.
630 * If resched is set, then we were either inside an atomic or 722 * If resched is set, then we are either inside an atomic or
631 * are inside the scheduler (we would have already scheduled 723 * are inside the scheduler (we would have already scheduled
632 * otherwise). In this case, we do not want to call normal 724 * otherwise). In this case, we do not want to call normal
633 * preempt_enable, but preempt_enable_no_resched instead. 725 * preempt_enable, but preempt_enable_no_resched instead.
@@ -664,4 +756,26 @@ static inline void trace_branch_disable(void)
664} 756}
665#endif /* CONFIG_BRANCH_TRACER */ 757#endif /* CONFIG_BRANCH_TRACER */
666 758
759/* trace event type bit fields, not numeric */
760enum {
761 TRACE_EVENT_TYPE_PRINTF = 1,
762 TRACE_EVENT_TYPE_RAW = 2,
763};
764
765struct ftrace_event_call {
766 char *name;
767 char *system;
768 struct dentry *dir;
769 int enabled;
770 int (*regfunc)(void);
771 void (*unregfunc)(void);
772 int id;
773 int (*raw_init)(void);
774 int (*show_format)(struct trace_seq *s);
775};
776
777void event_trace_printk(unsigned long ip, const char *fmt, ...);
778extern struct ftrace_event_call __start_ftrace_events[];
779extern struct ftrace_event_call __stop_ftrace_events[];
780
667#endif /* _LINUX_KERNEL_TRACE_H */ 781#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c333e13..7a30fc4c3642 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12 12
13#include "trace.h" 13#include "trace.h"
14#include "trace_output.h"
14 15
15static struct trace_array *boot_trace; 16static struct trace_array *boot_trace;
16static bool pre_initcalls_finished; 17static bool pre_initcalls_finished;
@@ -27,13 +28,13 @@ void start_boot_trace(void)
27 28
28void enable_boot_trace(void) 29void enable_boot_trace(void)
29{ 30{
30 if (pre_initcalls_finished) 31 if (boot_trace && pre_initcalls_finished)
31 tracing_start_sched_switch_record(); 32 tracing_start_sched_switch_record();
32} 33}
33 34
34void disable_boot_trace(void) 35void disable_boot_trace(void)
35{ 36{
36 if (pre_initcalls_finished) 37 if (boot_trace && pre_initcalls_finished)
37 tracing_stop_sched_switch_record(); 38 tracing_stop_sched_switch_record();
38} 39}
39 40
@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_array *tr)
42 int cpu; 43 int cpu;
43 boot_trace = tr; 44 boot_trace = tr;
44 45
46 if (!tr)
47 return 0;
48
45 for_each_cpu(cpu, cpu_possible_mask) 49 for_each_cpu(cpu, cpu_possible_mask)
46 tracing_reset(tr, cpu); 50 tracing_reset(tr, cpu);
47 51
@@ -128,10 +132,9 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
128{ 132{
129 struct ring_buffer_event *event; 133 struct ring_buffer_event *event;
130 struct trace_boot_call *entry; 134 struct trace_boot_call *entry;
131 unsigned long irq_flags;
132 struct trace_array *tr = boot_trace; 135 struct trace_array *tr = boot_trace;
133 136
134 if (!pre_initcalls_finished) 137 if (!tr || !pre_initcalls_finished)
135 return; 138 return;
136 139
137 /* Get its name now since this function could 140 /* Get its name now since this function could
@@ -140,18 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
140 sprint_symbol(bt->func, (unsigned long)fn); 143 sprint_symbol(bt->func, (unsigned long)fn);
141 preempt_disable(); 144 preempt_disable();
142 145
143 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 146 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
144 &irq_flags); 147 sizeof(*entry), 0, 0);
145 if (!event) 148 if (!event)
146 goto out; 149 goto out;
147 entry = ring_buffer_event_data(event); 150 entry = ring_buffer_event_data(event);
148 tracing_generic_entry_update(&entry->ent, 0, 0);
149 entry->ent.type = TRACE_BOOT_CALL;
150 entry->boot_call = *bt; 151 entry->boot_call = *bt;
151 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 152 trace_buffer_unlock_commit(tr, event, 0, 0);
152
153 trace_wake_up();
154
155 out: 153 out:
156 preempt_enable(); 154 preempt_enable();
157} 155}
@@ -160,27 +158,21 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
160{ 158{
161 struct ring_buffer_event *event; 159 struct ring_buffer_event *event;
162 struct trace_boot_ret *entry; 160 struct trace_boot_ret *entry;
163 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace; 161 struct trace_array *tr = boot_trace;
165 162
166 if (!pre_initcalls_finished) 163 if (!tr || !pre_initcalls_finished)
167 return; 164 return;
168 165
169 sprint_symbol(bt->func, (unsigned long)fn); 166 sprint_symbol(bt->func, (unsigned long)fn);
170 preempt_disable(); 167 preempt_disable();
171 168
172 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 169 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
173 &irq_flags); 170 sizeof(*entry), 0, 0);
174 if (!event) 171 if (!event)
175 goto out; 172 goto out;
176 entry = ring_buffer_event_data(event); 173 entry = ring_buffer_event_data(event);
177 tracing_generic_entry_update(&entry->ent, 0, 0);
178 entry->ent.type = TRACE_BOOT_RET;
179 entry->boot_ret = *bt; 174 entry->boot_ret = *bt;
180 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 175 trace_buffer_unlock_commit(tr, event, 0, 0);
181
182 trace_wake_up();
183
184 out: 176 out:
185 preempt_enable(); 177 preempt_enable();
186} 178}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb3bac7..ad8c22efff41 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <asm/local.h> 16#include <asm/local.h>
17
17#include "trace.h" 18#include "trace.h"
19#include "trace_stat.h"
20#include "trace_output.h"
18 21
19#ifdef CONFIG_BRANCH_TRACER 22#ifdef CONFIG_BRANCH_TRACER
20 23
24static struct tracer branch_trace;
21static int branch_tracing_enabled __read_mostly; 25static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex); 26static DEFINE_MUTEX(branch_tracing_mutex);
27
23static struct trace_array *branch_tracer; 28static struct trace_array *branch_tracer;
24 29
25static void 30static void
@@ -28,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
28 struct trace_array *tr = branch_tracer; 33 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event; 34 struct ring_buffer_event *event;
30 struct trace_branch *entry; 35 struct trace_branch *entry;
31 unsigned long flags, irq_flags; 36 unsigned long flags;
32 int cpu, pc; 37 int cpu, pc;
33 const char *p; 38 const char *p;
34 39
@@ -47,15 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) 52 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out; 53 goto out;
49 54
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 55 pc = preempt_count();
51 &irq_flags); 56 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
57 sizeof(*entry), flags, pc);
52 if (!event) 58 if (!event)
53 goto out; 59 goto out;
54 60
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event); 61 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59 62
60 /* Strip off the path, only save the file */ 63 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file); 64 p = f->file + strlen(f->file);
@@ -70,7 +73,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
70 entry->line = f->line; 73 entry->line = f->line;
71 entry->correct = val == expect; 74 entry->correct = val == expect;
72 75
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 76 ring_buffer_unlock_commit(tr->buffer, event);
74 77
75 out: 78 out:
76 atomic_dec(&tr->data[cpu]->disabled); 79 atomic_dec(&tr->data[cpu]->disabled);
@@ -88,8 +91,6 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
88 91
89int enable_branch_tracing(struct trace_array *tr) 92int enable_branch_tracing(struct trace_array *tr)
90{ 93{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex); 94 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr; 95 branch_tracer = tr;
95 /* 96 /*
@@ -100,7 +101,7 @@ int enable_branch_tracing(struct trace_array *tr)
100 branch_tracing_enabled++; 101 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex); 102 mutex_unlock(&branch_tracing_mutex);
102 103
103 return ret; 104 return 0;
104} 105}
105 106
106void disable_branch_tracing(void) 107void disable_branch_tracing(void)
@@ -128,11 +129,6 @@ static void stop_branch_trace(struct trace_array *tr)
128 129
129static int branch_trace_init(struct trace_array *tr) 130static int branch_trace_init(struct trace_array *tr)
130{ 131{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr); 132 start_branch_trace(tr);
137 return 0; 133 return 0;
138} 134}
@@ -142,22 +138,53 @@ static void branch_trace_reset(struct trace_array *tr)
142 stop_branch_trace(tr); 138 stop_branch_trace(tr);
143} 139}
144 140
145struct tracer branch_trace __read_mostly = 141static enum print_line_t trace_branch_print(struct trace_iterator *iter,
142 int flags)
143{
144 struct trace_branch *field;
145
146 trace_assign_type(field, iter->ent);
147
148 if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
149 field->correct ? " ok " : " MISS ",
150 field->func,
151 field->file,
152 field->line))
153 return TRACE_TYPE_PARTIAL_LINE;
154
155 return TRACE_TYPE_HANDLED;
156}
157
158
159static struct trace_event trace_branch_event = {
160 .type = TRACE_BRANCH,
161 .trace = trace_branch_print,
162};
163
164static struct tracer branch_trace __read_mostly =
146{ 165{
147 .name = "branch", 166 .name = "branch",
148 .init = branch_trace_init, 167 .init = branch_trace_init,
149 .reset = branch_trace_reset, 168 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST 169#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch, 170 .selftest = trace_selftest_startup_branch,
152#endif 171#endif /* CONFIG_FTRACE_SELFTEST */
153}; 172};
154 173
155__init static int init_branch_trace(void) 174__init static int init_branch_tracer(void)
156{ 175{
176 int ret;
177
178 ret = register_ftrace_event(&trace_branch_event);
179 if (!ret) {
180 printk(KERN_WARNING "Warning: could not register "
181 "branch events\n");
182 return 1;
183 }
157 return register_tracer(&branch_trace); 184 return register_tracer(&branch_trace);
158} 185}
186device_initcall(init_branch_tracer);
159 187
160device_initcall(init_branch_trace);
161#else 188#else
162static inline 189static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) 190void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +210,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
183} 210}
184EXPORT_SYMBOL(ftrace_likely_update); 211EXPORT_SYMBOL(ftrace_likely_update);
185 212
186struct ftrace_pointer { 213extern unsigned long __start_annotated_branch_profile[];
187 void *start; 214extern unsigned long __stop_annotated_branch_profile[];
188 void *stop;
189 int hit;
190};
191 215
192static void * 216static int annotated_branch_stat_headers(struct seq_file *m)
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{ 217{
195 const struct ftrace_pointer *f = m->private; 218 seq_printf(m, " correct incorrect %% ");
196 struct ftrace_branch_data *p = v; 219 seq_printf(m, " Function "
197 220 " File Line\n"
198 (*pos)++; 221 " ------- --------- - "
199 222 " -------- "
200 if (v == (void *)1) 223 " ---- ----\n");
201 return f->start; 224 return 0;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209} 225}
210 226
211static void *t_start(struct seq_file *m, loff_t *pos) 227static inline long get_incorrect_percent(struct ftrace_branch_data *p)
212{ 228{
213 void *t = (void *)1; 229 long percent;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218 230
219 return t; 231 if (p->correct) {
220} 232 percent = p->incorrect * 100;
233 percent /= p->correct + p->incorrect;
234 } else
235 percent = p->incorrect ? 100 : -1;
221 236
222static void t_stop(struct seq_file *m, void *p) 237 return percent;
223{
224} 238}
225 239
226static int t_show(struct seq_file *m, void *v) 240static int branch_stat_show(struct seq_file *m, void *v)
227{ 241{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v; 242 struct ftrace_branch_data *p = v;
230 const char *f; 243 const char *f;
231 long percent; 244 long percent;
232 245
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */ 246 /* Only print the file, not the path */
247 f = p->file + strlen(p->file); 247 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/') 248 while (f >= p->file && *f != '/')
@@ -252,11 +252,7 @@ static int t_show(struct seq_file *m, void *v)
252 /* 252 /*
253 * The miss is overlayed on correct, and hit on incorrect. 253 * The miss is overlayed on correct, and hit on incorrect.
254 */ 254 */
255 if (p->correct) { 255 percent = get_incorrect_percent(p);
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260 256
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); 257 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0) 258 if (percent < 0)
@@ -267,76 +263,118 @@ static int t_show(struct seq_file *m, void *v)
267 return 0; 263 return 0;
268} 264}
269 265
270static struct seq_operations tracing_likely_seq_ops = { 266static void *annotated_branch_stat_start(void)
271 .start = t_start, 267{
272 .next = t_next, 268 return __start_annotated_branch_profile;
273 .stop = t_stop, 269}
274 .show = t_show, 270
271static void *
272annotated_branch_stat_next(void *v, int idx)
273{
274 struct ftrace_branch_data *p = v;
275
276 ++p;
277
278 if ((void *)p >= (void *)__stop_annotated_branch_profile)
279 return NULL;
280
281 return p;
282}
283
284static int annotated_branch_stat_cmp(void *p1, void *p2)
285{
286 struct ftrace_branch_data *a = p1;
287 struct ftrace_branch_data *b = p2;
288
289 long percent_a, percent_b;
290
291 percent_a = get_incorrect_percent(a);
292 percent_b = get_incorrect_percent(b);
293
294 if (percent_a < percent_b)
295 return -1;
296 if (percent_a > percent_b)
297 return 1;
298 else
299 return 0;
300}
301
302static struct tracer_stat annotated_branch_stats = {
303 .name = "branch_annotated",
304 .stat_start = annotated_branch_stat_start,
305 .stat_next = annotated_branch_stat_next,
306 .stat_cmp = annotated_branch_stat_cmp,
307 .stat_headers = annotated_branch_stat_headers,
308 .stat_show = branch_stat_show
275}; 309};
276 310
277static int tracing_branch_open(struct inode *inode, struct file *file) 311__init static int init_annotated_branch_stats(void)
278{ 312{
279 int ret; 313 int ret;
280 314
281 ret = seq_open(file, &tracing_likely_seq_ops); 315 ret = register_stat_tracer(&annotated_branch_stats);
282 if (!ret) { 316 if (!ret) {
283 struct seq_file *m = file->private_data; 317 printk(KERN_WARNING "Warning: could not register "
284 m->private = (void *)inode->i_private; 318 "annotated branches stats\n");
319 return 1;
285 } 320 }
286 321 return 0;
287 return ret;
288} 322}
289 323fs_initcall(init_annotated_branch_stats);
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295 324
296#ifdef CONFIG_PROFILE_ALL_BRANCHES 325#ifdef CONFIG_PROFILE_ALL_BRANCHES
326
297extern unsigned long __start_branch_profile[]; 327extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[]; 328extern unsigned long __stop_branch_profile[];
299 329
300static const struct ftrace_pointer ftrace_branch_pos = { 330static int all_branch_stat_headers(struct seq_file *m)
301 .start = __start_branch_profile, 331{
302 .stop = __stop_branch_profile, 332 seq_printf(m, " miss hit %% ");
303 .hit = 1, 333 seq_printf(m, " Function "
304}; 334 " File Line\n"
335 " ------- --------- - "
336 " -------- "
337 " ---- ----\n");
338 return 0;
339}
305 340
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */ 341static void *all_branch_stat_start(void)
342{
343 return __start_branch_profile;
344}
307 345
308extern unsigned long __start_annotated_branch_profile[]; 346static void *
309extern unsigned long __stop_annotated_branch_profile[]; 347all_branch_stat_next(void *v, int idx)
348{
349 struct ftrace_branch_data *p = v;
310 350
311static const struct ftrace_pointer ftrace_annotated_branch_pos = { 351 ++p;
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315 352
316static __init int ftrace_branch_init(void) 353 if ((void *)p >= (void *)__stop_branch_profile)
317{ 354 return NULL;
318 struct dentry *d_tracer;
319 struct dentry *entry;
320 355
321 d_tracer = tracing_init_dentry(); 356 return p;
357}
322 358
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, 359static struct tracer_stat all_branch_stats = {
324 (void *)&ftrace_annotated_branch_pos, 360 .name = "branch_all",
325 &tracing_branch_fops); 361 .stat_start = all_branch_stat_start,
326 if (!entry) 362 .stat_next = all_branch_stat_next,
327 pr_warning("Could not create debugfs " 363 .stat_headers = all_branch_stat_headers,
328 "'profile_annotatet_branch' entry\n"); 364 .stat_show = branch_stat_show
365};
329 366
330#ifdef CONFIG_PROFILE_ALL_BRANCHES 367__init static int all_annotated_branch_stats(void)
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer, 368{
332 (void *)&ftrace_branch_pos, 369 int ret;
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338 370
371 ret = register_stat_tracer(&all_branch_stats);
372 if (!ret) {
373 printk(KERN_WARNING "Warning: could not register "
374 "all branches stats\n");
375 return 1;
376 }
339 return 0; 377 return 0;
340} 378}
341 379fs_initcall(all_annotated_branch_stats);
342device_initcall(ftrace_branch_init); 380#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..05b176abfd30
--- /dev/null
+++ b/kernel/trace/trace_clock.c
@@ -0,0 +1,108 @@
1/*
2 * tracing clocks
3 *
4 * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 *
6 * Implements 3 trace clock variants, with differing scalability/precision
7 * tradeoffs:
8 *
9 * - local: CPU-local trace clock
10 * - medium: scalable global clock with some jitter
11 * - global: globally monotonic, serialized clock
12 *
13 * Tracer plugins will chose a default from these clocks.
14 */
15#include <linux/spinlock.h>
16#include <linux/hardirq.h>
17#include <linux/module.h>
18#include <linux/percpu.h>
19#include <linux/sched.h>
20#include <linux/ktime.h>
21
22/*
23 * trace_clock_local(): the simplest and least coherent tracing clock.
24 *
25 * Useful for tracing that does not cross to other CPUs nor
26 * does it go through idle events.
27 */
28u64 notrace trace_clock_local(void)
29{
30 unsigned long flags;
31 u64 clock;
32
33 /*
34 * sched_clock() is an architecture implemented, fast, scalable,
35 * lockless clock. It is not guaranteed to be coherent across
36 * CPUs, nor across CPU idle events.
37 */
38 raw_local_irq_save(flags);
39 clock = sched_clock();
40 raw_local_irq_restore(flags);
41
42 return clock;
43}
44
45/*
46 * trace_clock(): 'inbetween' trace clock. Not completely serialized,
47 * but not completely incorrect when crossing CPUs either.
48 *
49 * This is based on cpu_clock(), which will allow at most ~1 jiffy of
50 * jitter between CPUs. So it's a pretty scalable clock, but there
51 * can be offsets in the trace data.
52 */
53u64 notrace trace_clock(void)
54{
55 return cpu_clock(raw_smp_processor_id());
56}
57
58
59/*
60 * trace_clock_global(): special globally coherent trace clock
61 *
62 * It has higher overhead than the other trace clocks but is still
63 * an order of magnitude faster than GTOD derived hardware clocks.
64 *
65 * Used by plugins that need globally coherent timestamps.
66 */
67
68static u64 prev_trace_clock_time;
69
70static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
71 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
72
73u64 notrace trace_clock_global(void)
74{
75 unsigned long flags;
76 int this_cpu;
77 u64 now;
78
79 raw_local_irq_save(flags);
80
81 this_cpu = raw_smp_processor_id();
82 now = cpu_clock(this_cpu);
83 /*
84 * If in an NMI context then dont risk lockups and return the
85 * cpu_clock() time:
86 */
87 if (unlikely(in_nmi()))
88 goto out;
89
90 __raw_spin_lock(&trace_clock_lock);
91
92 /*
93 * TODO: if this happens often then maybe we should reset
94 * my_scd->clock to prev_trace_clock_time+1, to make sure
95 * we start ticking with the local clock from now on?
96 */
97 if ((s64)(now - prev_trace_clock_time) < 0)
98 now = prev_trace_clock_time + 1;
99
100 prev_trace_clock_time = now;
101
102 __raw_spin_unlock(&trace_clock_lock);
103
104 out:
105 raw_local_irq_restore(flags);
106
107 return now;
108}
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
new file mode 100644
index 000000000000..5cca4c978bde
--- /dev/null
+++ b/kernel/trace/trace_event_types.h
@@ -0,0 +1,166 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ftrace
3
4/*
5 * We cheat and use the proto type field as the ID
6 * and args as the entry type (minus 'struct')
7 */
8TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore,
9 TRACE_STRUCT(
10 TRACE_FIELD(unsigned long, ip, ip)
11 TRACE_FIELD(unsigned long, parent_ip, parent_ip)
12 ),
13 TP_RAW_FMT(" %lx <-- %lx")
14);
15
16TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT,
17 ftrace_graph_ent_entry, ignore,
18 TRACE_STRUCT(
19 TRACE_FIELD(unsigned long, graph_ent.func, func)
20 TRACE_FIELD(int, graph_ent.depth, depth)
21 ),
22 TP_RAW_FMT("--> %lx (%d)")
23);
24
25TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(int, ret.depth, depth)
30 ),
31 TP_RAW_FMT("<-- %lx (%d)")
32);
33
34TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore,
35 TRACE_STRUCT(
36 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
37 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
38 TRACE_FIELD(unsigned char, prev_state, prev_state)
39 TRACE_FIELD(unsigned int, next_pid, next_pid)
40 TRACE_FIELD(unsigned char, next_prio, next_prio)
41 TRACE_FIELD(unsigned char, next_state, next_state)
42 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
43 ),
44 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
45);
46
47TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
48 TRACE_STRUCT(
49 TRACE_FIELD(unsigned int, prev_pid, prev_pid)
50 TRACE_FIELD(unsigned char, prev_prio, prev_prio)
51 TRACE_FIELD(unsigned char, prev_state, prev_state)
52 TRACE_FIELD(unsigned int, next_pid, next_pid)
53 TRACE_FIELD(unsigned char, next_prio, next_prio)
54 TRACE_FIELD(unsigned char, next_state, next_state)
55 TRACE_FIELD(unsigned int, next_cpu, next_cpu)
56 ),
57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
58);
59
60TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
61 TRACE_STRUCT(
62 TRACE_FIELD(unsigned long, arg1, arg1)
63 TRACE_FIELD(unsigned long, arg2, arg2)
64 TRACE_FIELD(unsigned long, arg3, arg3)
65 ),
66 TP_RAW_FMT("(%08lx) (%08lx) (%08lx)")
67);
68
69/*
70 * Stack-trace entry:
71 */
72
73/* #define FTRACE_STACK_ENTRIES 8 */
74
75TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore,
76 TRACE_STRUCT(
77 TRACE_FIELD(unsigned long, caller[0], stack0)
78 TRACE_FIELD(unsigned long, caller[1], stack1)
79 TRACE_FIELD(unsigned long, caller[2], stack2)
80 TRACE_FIELD(unsigned long, caller[3], stack3)
81 TRACE_FIELD(unsigned long, caller[4], stack4)
82 TRACE_FIELD(unsigned long, caller[5], stack5)
83 TRACE_FIELD(unsigned long, caller[6], stack6)
84 TRACE_FIELD(unsigned long, caller[7], stack7)
85 ),
86 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
87 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
88);
89
90TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
91 TRACE_STRUCT(
92 TRACE_FIELD(unsigned long, caller[0], stack0)
93 TRACE_FIELD(unsigned long, caller[1], stack1)
94 TRACE_FIELD(unsigned long, caller[2], stack2)
95 TRACE_FIELD(unsigned long, caller[3], stack3)
96 TRACE_FIELD(unsigned long, caller[4], stack4)
97 TRACE_FIELD(unsigned long, caller[5], stack5)
98 TRACE_FIELD(unsigned long, caller[6], stack6)
99 TRACE_FIELD(unsigned long, caller[7], stack7)
100 ),
101 TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
102 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
103);
104
105TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
106 TRACE_STRUCT(
107 TRACE_FIELD(unsigned long, ip, ip)
108 TRACE_FIELD(unsigned int, depth, depth)
109 TRACE_FIELD(char *, fmt, fmt)
110 TRACE_FIELD_ZERO_CHAR(buf)
111 ),
112 TP_RAW_FMT("%08lx (%d) fmt:%p %s")
113);
114
115TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
116 TRACE_STRUCT(
117 TRACE_FIELD(unsigned int, line, line)
118 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
119 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
120 TRACE_FIELD(char, correct, correct)
121 ),
122 TP_RAW_FMT("%u:%s:%s (%u)")
123);
124
125TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
126 TRACE_STRUCT(
127 TRACE_FIELD(u64, from, from)
128 TRACE_FIELD(u64, to, to)
129 ),
130 TP_RAW_FMT("from: %llx to: %llx")
131);
132
133TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
134 TRACE_STRUCT(
135 TRACE_FIELD(ktime_t, state_data.stamp, stamp)
136 TRACE_FIELD(ktime_t, state_data.end, end)
137 TRACE_FIELD(int, state_data.type, type)
138 TRACE_FIELD(int, state_data.state, state)
139 ),
140 TP_RAW_FMT("%llx->%llx type:%u state:%u")
141);
142
143TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore,
144 TRACE_STRUCT(
145 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
146 TRACE_FIELD(unsigned long, call_site, call_site)
147 TRACE_FIELD(const void *, ptr, ptr)
148 TRACE_FIELD(size_t, bytes_req, bytes_req)
149 TRACE_FIELD(size_t, bytes_alloc, bytes_alloc)
150 TRACE_FIELD(gfp_t, gfp_flags, gfp_flags)
151 TRACE_FIELD(int, node, node)
152 ),
153 TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
154 " flags:%x node:%d")
155);
156
157TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore,
158 TRACE_STRUCT(
159 TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
160 TRACE_FIELD(unsigned long, call_site, call_site)
161 TRACE_FIELD(const void *, ptr, ptr)
162 ),
163 TP_RAW_FMT("type:%u call_site:%lx ptr:%p")
164);
165
166#undef TRACE_SYSTEM
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
new file mode 100644
index 000000000000..769dfd00fc85
--- /dev/null
+++ b/kernel/trace/trace_events.c
@@ -0,0 +1,606 @@
1/*
2 * event tracer
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 * - Added format output of fields of the trace point.
7 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8 *
9 */
10
11#include <linux/debugfs.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
14#include <linux/ctype.h>
15
16#include "trace_output.h"
17
18#define TRACE_SYSTEM "TRACE_SYSTEM"
19
20static DEFINE_MUTEX(event_mutex);
21
22#define events_for_each(event) \
23 for (event = __start_ftrace_events; \
24 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
25 event++)
26
27void event_trace_printk(unsigned long ip, const char *fmt, ...)
28{
29 va_list ap;
30
31 va_start(ap, fmt);
32 tracing_record_cmdline(current);
33 trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
34 va_end(ap);
35}
36
37static void ftrace_clear_events(void)
38{
39 struct ftrace_event_call *call = (void *)__start_ftrace_events;
40
41
42 while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
43
44 if (call->enabled) {
45 call->enabled = 0;
46 call->unregfunc();
47 }
48 call++;
49 }
50}
51
52static void ftrace_event_enable_disable(struct ftrace_event_call *call,
53 int enable)
54{
55
56 switch (enable) {
57 case 0:
58 if (call->enabled) {
59 call->enabled = 0;
60 call->unregfunc();
61 }
62 break;
63 case 1:
64 if (!call->enabled) {
65 call->enabled = 1;
66 call->regfunc();
67 }
68 break;
69 }
70}
71
72static int ftrace_set_clr_event(char *buf, int set)
73{
74 struct ftrace_event_call *call = __start_ftrace_events;
75 char *event = NULL, *sub = NULL, *match;
76 int ret = -EINVAL;
77
78 /*
79 * The buf format can be <subsystem>:<event-name>
80 * *:<event-name> means any event by that name.
81 * :<event-name> is the same.
82 *
83 * <subsystem>:* means all events in that subsystem
84 * <subsystem>: means the same.
85 *
86 * <name> (no ':') means all events in a subsystem with
87 * the name <name> or any event that matches <name>
88 */
89
90 match = strsep(&buf, ":");
91 if (buf) {
92 sub = match;
93 event = buf;
94 match = NULL;
95
96 if (!strlen(sub) || strcmp(sub, "*") == 0)
97 sub = NULL;
98 if (!strlen(event) || strcmp(event, "*") == 0)
99 event = NULL;
100 }
101
102 mutex_lock(&event_mutex);
103 events_for_each(call) {
104
105 if (!call->name || !call->regfunc)
106 continue;
107
108 if (match &&
109 strcmp(match, call->name) != 0 &&
110 strcmp(match, call->system) != 0)
111 continue;
112
113 if (sub && strcmp(sub, call->system) != 0)
114 continue;
115
116 if (event && strcmp(event, call->name) != 0)
117 continue;
118
119 ftrace_event_enable_disable(call, set);
120
121 ret = 0;
122 }
123 mutex_unlock(&event_mutex);
124
125 return ret;
126}
127
128/* 128 should be much more than enough */
129#define EVENT_BUF_SIZE 127
130
131static ssize_t
132ftrace_event_write(struct file *file, const char __user *ubuf,
133 size_t cnt, loff_t *ppos)
134{
135 size_t read = 0;
136 int i, set = 1;
137 ssize_t ret;
138 char *buf;
139 char ch;
140
141 if (!cnt || cnt < 0)
142 return 0;
143
144 ret = get_user(ch, ubuf++);
145 if (ret)
146 return ret;
147 read++;
148 cnt--;
149
150 /* skip white space */
151 while (cnt && isspace(ch)) {
152 ret = get_user(ch, ubuf++);
153 if (ret)
154 return ret;
155 read++;
156 cnt--;
157 }
158
159 /* Only white space found? */
160 if (isspace(ch)) {
161 file->f_pos += read;
162 ret = read;
163 return ret;
164 }
165
166 buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
167 if (!buf)
168 return -ENOMEM;
169
170 if (cnt > EVENT_BUF_SIZE)
171 cnt = EVENT_BUF_SIZE;
172
173 i = 0;
174 while (cnt && !isspace(ch)) {
175 if (!i && ch == '!')
176 set = 0;
177 else
178 buf[i++] = ch;
179
180 ret = get_user(ch, ubuf++);
181 if (ret)
182 goto out_free;
183 read++;
184 cnt--;
185 }
186 buf[i] = 0;
187
188 file->f_pos += read;
189
190 ret = ftrace_set_clr_event(buf, set);
191 if (ret)
192 goto out_free;
193
194 ret = read;
195
196 out_free:
197 kfree(buf);
198
199 return ret;
200}
201
202static void *
203t_next(struct seq_file *m, void *v, loff_t *pos)
204{
205 struct ftrace_event_call *call = m->private;
206 struct ftrace_event_call *next = call;
207
208 (*pos)++;
209
210 for (;;) {
211 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
212 return NULL;
213
214 /*
215 * The ftrace subsystem is for showing formats only.
216 * They can not be enabled or disabled via the event files.
217 */
218 if (call->regfunc)
219 break;
220
221 call++;
222 next = call;
223 }
224
225 m->private = ++next;
226
227 return call;
228}
229
230static void *t_start(struct seq_file *m, loff_t *pos)
231{
232 return t_next(m, NULL, pos);
233}
234
235static void *
236s_next(struct seq_file *m, void *v, loff_t *pos)
237{
238 struct ftrace_event_call *call = m->private;
239 struct ftrace_event_call *next;
240
241 (*pos)++;
242
243 retry:
244 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
245 return NULL;
246
247 if (!call->enabled) {
248 call++;
249 goto retry;
250 }
251
252 next = call;
253 m->private = ++next;
254
255 return call;
256}
257
258static void *s_start(struct seq_file *m, loff_t *pos)
259{
260 return s_next(m, NULL, pos);
261}
262
263static int t_show(struct seq_file *m, void *v)
264{
265 struct ftrace_event_call *call = v;
266
267 if (strcmp(call->system, TRACE_SYSTEM) != 0)
268 seq_printf(m, "%s:", call->system);
269 seq_printf(m, "%s\n", call->name);
270
271 return 0;
272}
273
274static void t_stop(struct seq_file *m, void *p)
275{
276}
277
278static int
279ftrace_event_seq_open(struct inode *inode, struct file *file)
280{
281 int ret;
282 const struct seq_operations *seq_ops;
283
284 if ((file->f_mode & FMODE_WRITE) &&
285 !(file->f_flags & O_APPEND))
286 ftrace_clear_events();
287
288 seq_ops = inode->i_private;
289 ret = seq_open(file, seq_ops);
290 if (!ret) {
291 struct seq_file *m = file->private_data;
292
293 m->private = __start_ftrace_events;
294 }
295 return ret;
296}
297
298static ssize_t
299event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
300 loff_t *ppos)
301{
302 struct ftrace_event_call *call = filp->private_data;
303 char *buf;
304
305 if (call->enabled)
306 buf = "1\n";
307 else
308 buf = "0\n";
309
310 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
311}
312
313static ssize_t
314event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
315 loff_t *ppos)
316{
317 struct ftrace_event_call *call = filp->private_data;
318 char buf[64];
319 unsigned long val;
320 int ret;
321
322 if (cnt >= sizeof(buf))
323 return -EINVAL;
324
325 if (copy_from_user(&buf, ubuf, cnt))
326 return -EFAULT;
327
328 buf[cnt] = 0;
329
330 ret = strict_strtoul(buf, 10, &val);
331 if (ret < 0)
332 return ret;
333
334 switch (val) {
335 case 0:
336 case 1:
337 mutex_lock(&event_mutex);
338 ftrace_event_enable_disable(call, val);
339 mutex_unlock(&event_mutex);
340 break;
341
342 default:
343 return -EINVAL;
344 }
345
346 *ppos += cnt;
347
348 return cnt;
349}
350
351#undef FIELD
352#define FIELD(type, name) \
353 #type, #name, offsetof(typeof(field), name), sizeof(field.name)
354
355static int trace_write_header(struct trace_seq *s)
356{
357 struct trace_entry field;
358
359 /* struct trace_entry */
360 return trace_seq_printf(s,
361 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
362 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
363 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
364 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
365 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
366 "\n",
367 FIELD(unsigned char, type),
368 FIELD(unsigned char, flags),
369 FIELD(unsigned char, preempt_count),
370 FIELD(int, pid),
371 FIELD(int, tgid));
372}
373
374static ssize_t
375event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
376 loff_t *ppos)
377{
378 struct ftrace_event_call *call = filp->private_data;
379 struct trace_seq *s;
380 char *buf;
381 int r;
382
383 s = kmalloc(sizeof(*s), GFP_KERNEL);
384 if (!s)
385 return -ENOMEM;
386
387 trace_seq_init(s);
388
389 if (*ppos)
390 return 0;
391
392 /* If any of the first writes fail, so will the show_format. */
393
394 trace_seq_printf(s, "name: %s\n", call->name);
395 trace_seq_printf(s, "ID: %d\n", call->id);
396 trace_seq_printf(s, "format:\n");
397 trace_write_header(s);
398
399 r = call->show_format(s);
400 if (!r) {
401 /*
402 * ug! The format output is bigger than a PAGE!!
403 */
404 buf = "FORMAT TOO BIG\n";
405 r = simple_read_from_buffer(ubuf, cnt, ppos,
406 buf, strlen(buf));
407 goto out;
408 }
409
410 r = simple_read_from_buffer(ubuf, cnt, ppos,
411 s->buffer, s->len);
412 out:
413 kfree(s);
414 return r;
415}
416
417static const struct seq_operations show_event_seq_ops = {
418 .start = t_start,
419 .next = t_next,
420 .show = t_show,
421 .stop = t_stop,
422};
423
424static const struct seq_operations show_set_event_seq_ops = {
425 .start = s_start,
426 .next = s_next,
427 .show = t_show,
428 .stop = t_stop,
429};
430
431static const struct file_operations ftrace_avail_fops = {
432 .open = ftrace_event_seq_open,
433 .read = seq_read,
434 .llseek = seq_lseek,
435 .release = seq_release,
436};
437
438static const struct file_operations ftrace_set_event_fops = {
439 .open = ftrace_event_seq_open,
440 .read = seq_read,
441 .write = ftrace_event_write,
442 .llseek = seq_lseek,
443 .release = seq_release,
444};
445
446static const struct file_operations ftrace_enable_fops = {
447 .open = tracing_open_generic,
448 .read = event_enable_read,
449 .write = event_enable_write,
450};
451
452static const struct file_operations ftrace_event_format_fops = {
453 .open = tracing_open_generic,
454 .read = event_format_read,
455};
456
457static struct dentry *event_trace_events_dir(void)
458{
459 static struct dentry *d_tracer;
460 static struct dentry *d_events;
461
462 if (d_events)
463 return d_events;
464
465 d_tracer = tracing_init_dentry();
466 if (!d_tracer)
467 return NULL;
468
469 d_events = debugfs_create_dir("events", d_tracer);
470 if (!d_events)
471 pr_warning("Could not create debugfs "
472 "'events' directory\n");
473
474 return d_events;
475}
476
477struct event_subsystem {
478 struct list_head list;
479 const char *name;
480 struct dentry *entry;
481};
482
483static LIST_HEAD(event_subsystems);
484
485static struct dentry *
486event_subsystem_dir(const char *name, struct dentry *d_events)
487{
488 struct event_subsystem *system;
489
490 /* First see if we did not already create this dir */
491 list_for_each_entry(system, &event_subsystems, list) {
492 if (strcmp(system->name, name) == 0)
493 return system->entry;
494 }
495
496 /* need to create new entry */
497 system = kmalloc(sizeof(*system), GFP_KERNEL);
498 if (!system) {
499 pr_warning("No memory to create event subsystem %s\n",
500 name);
501 return d_events;
502 }
503
504 system->entry = debugfs_create_dir(name, d_events);
505 if (!system->entry) {
506 pr_warning("Could not create event subsystem %s\n",
507 name);
508 kfree(system);
509 return d_events;
510 }
511
512 system->name = name;
513 list_add(&system->list, &event_subsystems);
514
515 return system->entry;
516}
517
518static int
519event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
520{
521 struct dentry *entry;
522 int ret;
523
524 /*
525 * If the trace point header did not define TRACE_SYSTEM
526 * then the system would be called "TRACE_SYSTEM".
527 */
528 if (strcmp(call->system, "TRACE_SYSTEM") != 0)
529 d_events = event_subsystem_dir(call->system, d_events);
530
531 if (call->raw_init) {
532 ret = call->raw_init();
533 if (ret < 0) {
534 pr_warning("Could not initialize trace point"
535 " events/%s\n", call->name);
536 return ret;
537 }
538 }
539
540 call->dir = debugfs_create_dir(call->name, d_events);
541 if (!call->dir) {
542 pr_warning("Could not create debugfs "
543 "'%s' directory\n", call->name);
544 return -1;
545 }
546
547 if (call->regfunc) {
548 entry = debugfs_create_file("enable", 0644, call->dir, call,
549 &ftrace_enable_fops);
550 if (!entry)
551 pr_warning("Could not create debugfs "
552 "'%s/enable' entry\n", call->name);
553 }
554
555 /* A trace may not want to export its format */
556 if (!call->show_format)
557 return 0;
558
559 entry = debugfs_create_file("format", 0444, call->dir, call,
560 &ftrace_event_format_fops);
561 if (!entry)
562 pr_warning("Could not create debugfs "
563 "'%s/format' entry\n", call->name);
564
565 return 0;
566}
567
568static __init int event_trace_init(void)
569{
570 struct ftrace_event_call *call = __start_ftrace_events;
571 struct dentry *d_tracer;
572 struct dentry *entry;
573 struct dentry *d_events;
574
575 d_tracer = tracing_init_dentry();
576 if (!d_tracer)
577 return 0;
578
579 entry = debugfs_create_file("available_events", 0444, d_tracer,
580 (void *)&show_event_seq_ops,
581 &ftrace_avail_fops);
582 if (!entry)
583 pr_warning("Could not create debugfs "
584 "'available_events' entry\n");
585
586 entry = debugfs_create_file("set_event", 0644, d_tracer,
587 (void *)&show_set_event_seq_ops,
588 &ftrace_set_event_fops);
589 if (!entry)
590 pr_warning("Could not create debugfs "
591 "'set_event' entry\n");
592
593 d_events = event_trace_events_dir();
594 if (!d_events)
595 return 0;
596
597 events_for_each(call) {
598 /* The linker may leave blanks */
599 if (!call->name)
600 continue;
601 event_create_dir(call, d_events);
602 }
603
604 return 0;
605}
606fs_initcall(event_trace_init);
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
new file mode 100644
index 000000000000..38985f9b379c
--- /dev/null
+++ b/kernel/trace/trace_events_stage_1.h
@@ -0,0 +1,39 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * <type2> <item2>[<len>];
10 * [...]
11 * };
12 *
13 * The <type> <item> is created by the __field(type, item) macro or
14 * the __array(type2, item2, len) macro.
15 * We simply do "type item;", and that will create the fields
16 * in the structure.
17 */
18
19#undef TRACE_FORMAT
20#define TRACE_FORMAT(call, proto, args, fmt)
21
22#undef __array
23#define __array(type, item, len) type item[len];
24
25#undef __field
26#define __field(type, item) type item;
27
28#undef TP_STRUCT__entry
29#define TP_STRUCT__entry(args...) args
30
31#undef TRACE_EVENT
32#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
33 struct ftrace_raw_##name { \
34 struct trace_entry ent; \
35 tstruct \
36 }; \
37 static struct ftrace_event_call event_##name
38
39#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
new file mode 100644
index 000000000000..ca347afd6aa0
--- /dev/null
+++ b/kernel/trace/trace_events_stage_2.h
@@ -0,0 +1,131 @@
1/*
2 * Stage 2 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * enum print_line_t
7 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
8 * {
9 * struct trace_seq *s = &iter->seq;
10 * struct ftrace_raw_<call> *field; <-- defined in stage 1
11 * struct trace_entry *entry;
12 * int ret;
13 *
14 * entry = iter->ent;
15 *
16 * if (entry->type != event_<call>.id) {
17 * WARN_ON_ONCE(1);
18 * return TRACE_TYPE_UNHANDLED;
19 * }
20 *
21 * field = (typeof(field))entry;
22 *
23 * ret = trace_seq_printf(s, <TP_printk> "\n");
24 * if (!ret)
25 * return TRACE_TYPE_PARTIAL_LINE;
26 *
27 * return TRACE_TYPE_HANDLED;
28 * }
29 *
30 * This is the method used to print the raw event to the trace
31 * output format. Note, this is not needed if the data is read
32 * in binary.
33 */
34
35#undef __entry
36#define __entry field
37
38#undef TP_printk
39#define TP_printk(fmt, args...) fmt "\n", args
40
41#undef TRACE_EVENT
42#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
43enum print_line_t \
44ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
45{ \
46 struct trace_seq *s = &iter->seq; \
47 struct ftrace_raw_##call *field; \
48 struct trace_entry *entry; \
49 int ret; \
50 \
51 entry = iter->ent; \
52 \
53 if (entry->type != event_##call.id) { \
54 WARN_ON_ONCE(1); \
55 return TRACE_TYPE_UNHANDLED; \
56 } \
57 \
58 field = (typeof(field))entry; \
59 \
60 ret = trace_seq_printf(s, print); \
61 if (!ret) \
62 return TRACE_TYPE_PARTIAL_LINE; \
63 \
64 return TRACE_TYPE_HANDLED; \
65}
66
67#include <trace/trace_event_types.h>
68
69/*
70 * Setup the showing format of trace point.
71 *
72 * int
73 * ftrace_format_##call(struct trace_seq *s)
74 * {
75 * struct ftrace_raw_##call field;
76 * int ret;
77 *
78 * ret = trace_seq_printf(s, #type " " #item ";"
79 * " offset:%u; size:%u;\n",
80 * offsetof(struct ftrace_raw_##call, item),
81 * sizeof(field.type));
82 *
83 * }
84 */
85
86#undef TP_STRUCT__entry
87#define TP_STRUCT__entry(args...) args
88
89#undef __field
90#define __field(type, item) \
91 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
92 "offset:%u;\tsize:%u;\n", \
93 (unsigned int)offsetof(typeof(field), item), \
94 (unsigned int)sizeof(field.item)); \
95 if (!ret) \
96 return 0;
97
98#undef __array
99#define __array(type, item, len) \
100 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
101 "offset:%u;\tsize:%u;\n", \
102 (unsigned int)offsetof(typeof(field), item), \
103 (unsigned int)sizeof(field.item)); \
104 if (!ret) \
105 return 0;
106
107#undef __entry
108#define __entry "REC"
109
110#undef TP_printk
111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args
112
113#undef TP_fast_assign
114#define TP_fast_assign(args...) args
115
116#undef TRACE_EVENT
117#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
118static int \
119ftrace_format_##call(struct trace_seq *s) \
120{ \
121 struct ftrace_raw_##call field; \
122 int ret; \
123 \
124 tstruct; \
125 \
126 trace_seq_printf(s, "\nprint fmt: " print); \
127 \
128 return ret; \
129}
130
131#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
new file mode 100644
index 000000000000..ae2e323df0c7
--- /dev/null
+++ b/kernel/trace/trace_events_stage_3.h
@@ -0,0 +1,217 @@
1/*
2 * Stage 3 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * static void ftrace_event_<call>(proto)
7 * {
8 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
9 * }
10 *
11 * static int ftrace_reg_event_<call>(void)
12 * {
13 * int ret;
14 *
15 * ret = register_trace_<call>(ftrace_event_<call>);
16 * if (!ret)
17 * pr_info("event trace: Could not activate trace point "
18 * "probe to <call>");
19 * return ret;
20 * }
21 *
22 * static void ftrace_unreg_event_<call>(void)
23 * {
24 * unregister_trace_<call>(ftrace_event_<call>);
25 * }
26 *
27 * For those macros defined with TRACE_FORMAT:
28 *
29 * static struct ftrace_event_call __used
30 * __attribute__((__aligned__(4)))
31 * __attribute__((section("_ftrace_events"))) event_<call> = {
32 * .name = "<call>",
33 * .regfunc = ftrace_reg_event_<call>,
34 * .unregfunc = ftrace_unreg_event_<call>,
35 * }
36 *
37 *
38 * For those macros defined with TRACE_EVENT:
39 *
40 * static struct ftrace_event_call event_<call>;
41 *
42 * static void ftrace_raw_event_<call>(proto)
43 * {
44 * struct ring_buffer_event *event;
45 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
46 * unsigned long irq_flags;
47 * int pc;
48 *
49 * local_save_flags(irq_flags);
50 * pc = preempt_count();
51 *
52 * event = trace_current_buffer_lock_reserve(event_<call>.id,
53 * sizeof(struct ftrace_raw_<call>),
54 * irq_flags, pc);
55 * if (!event)
56 * return;
57 * entry = ring_buffer_event_data(event);
58 *
59 * <assign>; <-- Here we assign the entries by the __field and
60 * __array macros.
61 *
62 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
63 * }
64 *
65 * static int ftrace_raw_reg_event_<call>(void)
66 * {
67 * int ret;
68 *
69 * ret = register_trace_<call>(ftrace_raw_event_<call>);
70 * if (!ret)
71 * pr_info("event trace: Could not activate trace point "
72 * "probe to <call>");
73 * return ret;
74 * }
75 *
76 * static void ftrace_unreg_event_<call>(void)
77 * {
78 * unregister_trace_<call>(ftrace_raw_event_<call>);
79 * }
80 *
81 * static struct trace_event ftrace_event_type_<call> = {
82 * .trace = ftrace_raw_output_<call>, <-- stage 2
83 * };
84 *
85 * static int ftrace_raw_init_event_<call>(void)
86 * {
87 * int id;
88 *
89 * id = register_ftrace_event(&ftrace_event_type_<call>);
90 * if (!id)
91 * return -ENODEV;
92 * event_<call>.id = id;
93 * return 0;
94 * }
95 *
96 * static struct ftrace_event_call __used
97 * __attribute__((__aligned__(4)))
98 * __attribute__((section("_ftrace_events"))) event_<call> = {
99 * .name = "<call>",
100 * .system = "<system>",
101 * .raw_init = ftrace_raw_init_event_<call>,
102 * .regfunc = ftrace_reg_event_<call>,
103 * .unregfunc = ftrace_unreg_event_<call>,
104 * .show_format = ftrace_format_<call>,
105 * }
106 *
107 */
108
109#undef TP_FMT
110#define TP_FMT(fmt, args...) fmt "\n", ##args
111
112#define _TRACE_FORMAT(call, proto, args, fmt) \
113static void ftrace_event_##call(proto) \
114{ \
115 event_trace_printk(_RET_IP_, #call ": " fmt); \
116} \
117 \
118static int ftrace_reg_event_##call(void) \
119{ \
120 int ret; \
121 \
122 ret = register_trace_##call(ftrace_event_##call); \
123 if (ret) \
124 pr_info("event trace: Could not activate trace point " \
125 "probe to " #call "\n"); \
126 return ret; \
127} \
128 \
129static void ftrace_unreg_event_##call(void) \
130{ \
131 unregister_trace_##call(ftrace_event_##call); \
132} \
133
134
135#undef TRACE_FORMAT
136#define TRACE_FORMAT(call, proto, args, fmt) \
137_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
138static struct ftrace_event_call __used \
139__attribute__((__aligned__(4))) \
140__attribute__((section("_ftrace_events"))) event_##call = { \
141 .name = #call, \
142 .system = __stringify(TRACE_SYSTEM), \
143 .regfunc = ftrace_reg_event_##call, \
144 .unregfunc = ftrace_unreg_event_##call, \
145}
146
147#undef __entry
148#define __entry entry
149
150#undef TRACE_EVENT
151#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
152 \
153static struct ftrace_event_call event_##call; \
154 \
155static void ftrace_raw_event_##call(proto) \
156{ \
157 struct ring_buffer_event *event; \
158 struct ftrace_raw_##call *entry; \
159 unsigned long irq_flags; \
160 int pc; \
161 \
162 local_save_flags(irq_flags); \
163 pc = preempt_count(); \
164 \
165 event = trace_current_buffer_lock_reserve(event_##call.id, \
166 sizeof(struct ftrace_raw_##call), \
167 irq_flags, pc); \
168 if (!event) \
169 return; \
170 entry = ring_buffer_event_data(event); \
171 \
172 assign; \
173 \
174 trace_current_buffer_unlock_commit(event, irq_flags, pc); \
175} \
176 \
177static int ftrace_raw_reg_event_##call(void) \
178{ \
179 int ret; \
180 \
181 ret = register_trace_##call(ftrace_raw_event_##call); \
182 if (ret) \
183 pr_info("event trace: Could not activate trace point " \
184 "probe to " #call "\n"); \
185 return ret; \
186} \
187 \
188static void ftrace_raw_unreg_event_##call(void) \
189{ \
190 unregister_trace_##call(ftrace_raw_event_##call); \
191} \
192 \
193static struct trace_event ftrace_event_type_##call = { \
194 .trace = ftrace_raw_output_##call, \
195}; \
196 \
197static int ftrace_raw_init_event_##call(void) \
198{ \
199 int id; \
200 \
201 id = register_ftrace_event(&ftrace_event_type_##call); \
202 if (!id) \
203 return -ENODEV; \
204 event_##call.id = id; \
205 return 0; \
206} \
207 \
208static struct ftrace_event_call __used \
209__attribute__((__aligned__(4))) \
210__attribute__((section("_ftrace_events"))) event_##call = { \
211 .name = #call, \
212 .system = __stringify(TRACE_SYSTEM), \
213 .raw_init = ftrace_raw_init_event_##call, \
214 .regfunc = ftrace_raw_reg_event_##call, \
215 .unregfunc = ftrace_raw_unreg_event_##call, \
216 .show_format = ftrace_format_##call, \
217}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
new file mode 100644
index 000000000000..4d9952d3df50
--- /dev/null
+++ b/kernel/trace/trace_export.c
@@ -0,0 +1,102 @@
1/*
2 * trace_export.c - export basic ftrace utilities to user space
3 *
4 * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/stringify.h>
7#include <linux/kallsyms.h>
8#include <linux/seq_file.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/fs.h>
15
16#include "trace_output.h"
17
18
19#undef TRACE_STRUCT
20#define TRACE_STRUCT(args...) args
21
22#undef TRACE_FIELD
23#define TRACE_FIELD(type, item, assign) \
24 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
25 "offset:%u;\tsize:%u;\n", \
26 (unsigned int)offsetof(typeof(field), item), \
27 (unsigned int)sizeof(field.item)); \
28 if (!ret) \
29 return 0;
30
31
32#undef TRACE_FIELD_SPECIAL
33#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
34 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
35 "offset:%u;\tsize:%u;\n", \
36 (unsigned int)offsetof(typeof(field), item), \
37 (unsigned int)sizeof(field.item)); \
38 if (!ret) \
39 return 0;
40
41#undef TRACE_FIELD_ZERO_CHAR
42#define TRACE_FIELD_ZERO_CHAR(item) \
43 ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \
44 "offset:%u;\tsize:0;\n", \
45 (unsigned int)offsetof(typeof(field), item)); \
46 if (!ret) \
47 return 0;
48
49
50#undef TP_RAW_FMT
51#define TP_RAW_FMT(args...) args
52
53#undef TRACE_EVENT_FORMAT
54#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
55static int \
56ftrace_format_##call(struct trace_seq *s) \
57{ \
58 struct args field; \
59 int ret; \
60 \
61 tstruct; \
62 \
63 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
64 \
65 return ret; \
66}
67
68#include "trace_event_types.h"
69
70#undef TRACE_ZERO_CHAR
71#define TRACE_ZERO_CHAR(arg)
72
73#undef TRACE_FIELD
74#define TRACE_FIELD(type, item, assign)\
75 entry->item = assign;
76
77#undef TRACE_FIELD
78#define TRACE_FIELD(type, item, assign)\
79 entry->item = assign;
80
81#undef TP_CMD
82#define TP_CMD(cmd...) cmd
83
84#undef TRACE_ENTRY
85#define TRACE_ENTRY entry
86
87#undef TRACE_FIELD_SPECIAL
88#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
89 cmd;
90
91#undef TRACE_EVENT_FORMAT
92#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
93 \
94static struct ftrace_event_call __used \
95__attribute__((__aligned__(4))) \
96__attribute__((section("_ftrace_events"))) event_##call = { \
97 .name = #call, \
98 .id = proto, \
99 .system = __stringify(TRACE_SYSTEM), \
100 .show_format = ftrace_format_##call, \
101}
102#include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e25a16..c9a0b7df44ff 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -9,6 +9,7 @@
9 * Copyright (C) 2004-2006 Ingo Molnar 9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III 10 * Copyright (C) 2004 William Lee Irwin III
11 */ 11 */
12#include <linux/ring_buffer.h>
12#include <linux/debugfs.h> 13#include <linux/debugfs.h>
13#include <linux/uaccess.h> 14#include <linux/uaccess.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
@@ -16,52 +17,388 @@
16 17
17#include "trace.h" 18#include "trace.h"
18 19
19static void start_function_trace(struct trace_array *tr) 20/* function tracing enabled */
21static int ftrace_function_enabled;
22
23static struct trace_array *func_trace;
24
25static void tracing_start_function_trace(void);
26static void tracing_stop_function_trace(void);
27
28static int function_trace_init(struct trace_array *tr)
20{ 29{
30 func_trace = tr;
21 tr->cpu = get_cpu(); 31 tr->cpu = get_cpu();
22 tracing_reset_online_cpus(tr);
23 put_cpu(); 32 put_cpu();
24 33
25 tracing_start_cmdline_record(); 34 tracing_start_cmdline_record();
26 tracing_start_function_trace(); 35 tracing_start_function_trace();
36 return 0;
27} 37}
28 38
29static void stop_function_trace(struct trace_array *tr) 39static void function_trace_reset(struct trace_array *tr)
30{ 40{
31 tracing_stop_function_trace(); 41 tracing_stop_function_trace();
32 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
33} 43}
34 44
35static int function_trace_init(struct trace_array *tr) 45static void function_trace_start(struct trace_array *tr)
36{ 46{
37 start_function_trace(tr); 47 tracing_reset_online_cpus(tr);
38 return 0;
39} 48}
40 49
41static void function_trace_reset(struct trace_array *tr) 50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
52{
53 struct trace_array *tr = func_trace;
54 struct trace_array_cpu *data;
55 unsigned long flags;
56 long disabled;
57 int cpu, resched;
58 int pc;
59
60 if (unlikely(!ftrace_function_enabled))
61 return;
62
63 pc = preempt_count();
64 resched = ftrace_preempt_disable();
65 local_save_flags(flags);
66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu];
68 disabled = atomic_inc_return(&data->disabled);
69
70 if (likely(disabled == 1))
71 trace_function(tr, ip, parent_ip, flags, pc);
72
73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched);
75}
76
77static void
78function_trace_call(unsigned long ip, unsigned long parent_ip)
42{ 79{
43 stop_function_trace(tr); 80 struct trace_array *tr = func_trace;
81 struct trace_array_cpu *data;
82 unsigned long flags;
83 long disabled;
84 int cpu;
85 int pc;
86
87 if (unlikely(!ftrace_function_enabled))
88 return;
89
90 /*
91 * Need to use raw, since this must be called before the
92 * recursive protection is performed.
93 */
94 local_irq_save(flags);
95 cpu = raw_smp_processor_id();
96 data = tr->data[cpu];
97 disabled = atomic_inc_return(&data->disabled);
98
99 if (likely(disabled == 1)) {
100 pc = preempt_count();
101 trace_function(tr, ip, parent_ip, flags, pc);
102 }
103
104 atomic_dec(&data->disabled);
105 local_irq_restore(flags);
44} 106}
45 107
46static void function_trace_start(struct trace_array *tr) 108static void
109function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
47{ 110{
48 tracing_reset_online_cpus(tr); 111 struct trace_array *tr = func_trace;
112 struct trace_array_cpu *data;
113 unsigned long flags;
114 long disabled;
115 int cpu;
116 int pc;
117
118 if (unlikely(!ftrace_function_enabled))
119 return;
120
121 /*
122 * Need to use raw, since this must be called before the
123 * recursive protection is performed.
124 */
125 local_irq_save(flags);
126 cpu = raw_smp_processor_id();
127 data = tr->data[cpu];
128 disabled = atomic_inc_return(&data->disabled);
129
130 if (likely(disabled == 1)) {
131 pc = preempt_count();
132 trace_function(tr, ip, parent_ip, flags, pc);
133 /*
134 * skip over 5 funcs:
135 * __ftrace_trace_stack,
136 * __trace_stack,
137 * function_stack_trace_call
138 * ftrace_list_func
139 * ftrace_call
140 */
141 __trace_stack(tr, flags, 5, pc);
142 }
143
144 atomic_dec(&data->disabled);
145 local_irq_restore(flags);
146}
147
148
149static struct ftrace_ops trace_ops __read_mostly =
150{
151 .func = function_trace_call,
152};
153
154static struct ftrace_ops trace_stack_ops __read_mostly =
155{
156 .func = function_stack_trace_call,
157};
158
159/* Our two options */
160enum {
161 TRACE_FUNC_OPT_STACK = 0x1,
162};
163
164static struct tracer_opt func_opts[] = {
165#ifdef CONFIG_STACKTRACE
166 { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
167#endif
168 { } /* Always set a last empty entry */
169};
170
171static struct tracer_flags func_flags = {
172 .val = 0, /* By default: all flags disabled */
173 .opts = func_opts
174};
175
176static void tracing_start_function_trace(void)
177{
178 ftrace_function_enabled = 0;
179
180 if (trace_flags & TRACE_ITER_PREEMPTONLY)
181 trace_ops.func = function_trace_call_preempt_only;
182 else
183 trace_ops.func = function_trace_call;
184
185 if (func_flags.val & TRACE_FUNC_OPT_STACK)
186 register_ftrace_function(&trace_stack_ops);
187 else
188 register_ftrace_function(&trace_ops);
189
190 ftrace_function_enabled = 1;
191}
192
193static void tracing_stop_function_trace(void)
194{
195 ftrace_function_enabled = 0;
196 /* OK if they are not registered */
197 unregister_ftrace_function(&trace_stack_ops);
198 unregister_ftrace_function(&trace_ops);
199}
200
201static int func_set_flag(u32 old_flags, u32 bit, int set)
202{
203 if (bit == TRACE_FUNC_OPT_STACK) {
204 /* do nothing if already set */
205 if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
206 return 0;
207
208 if (set) {
209 unregister_ftrace_function(&trace_ops);
210 register_ftrace_function(&trace_stack_ops);
211 } else {
212 unregister_ftrace_function(&trace_stack_ops);
213 register_ftrace_function(&trace_ops);
214 }
215
216 return 0;
217 }
218
219 return -EINVAL;
49} 220}
50 221
51static struct tracer function_trace __read_mostly = 222static struct tracer function_trace __read_mostly =
52{ 223{
53 .name = "function", 224 .name = "function",
54 .init = function_trace_init, 225 .init = function_trace_init,
55 .reset = function_trace_reset, 226 .reset = function_trace_reset,
56 .start = function_trace_start, 227 .start = function_trace_start,
228 .wait_pipe = poll_wait_pipe,
229 .flags = &func_flags,
230 .set_flag = func_set_flag,
57#ifdef CONFIG_FTRACE_SELFTEST 231#ifdef CONFIG_FTRACE_SELFTEST
58 .selftest = trace_selftest_startup_function, 232 .selftest = trace_selftest_startup_function,
59#endif 233#endif
60}; 234};
61 235
236#ifdef CONFIG_DYNAMIC_FTRACE
237static void
238ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
239{
240 long *count = (long *)data;
241
242 if (tracing_is_on())
243 return;
244
245 if (!*count)
246 return;
247
248 if (*count != -1)
249 (*count)--;
250
251 tracing_on();
252}
253
254static void
255ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
256{
257 long *count = (long *)data;
258
259 if (!tracing_is_on())
260 return;
261
262 if (!*count)
263 return;
264
265 if (*count != -1)
266 (*count)--;
267
268 tracing_off();
269}
270
271static int
272ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
273 struct ftrace_probe_ops *ops, void *data);
274
275static struct ftrace_probe_ops traceon_probe_ops = {
276 .func = ftrace_traceon,
277 .print = ftrace_trace_onoff_print,
278};
279
280static struct ftrace_probe_ops traceoff_probe_ops = {
281 .func = ftrace_traceoff,
282 .print = ftrace_trace_onoff_print,
283};
284
285static int
286ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
287 struct ftrace_probe_ops *ops, void *data)
288{
289 char str[KSYM_SYMBOL_LEN];
290 long count = (long)data;
291
292 kallsyms_lookup(ip, NULL, NULL, NULL, str);
293 seq_printf(m, "%s:", str);
294
295 if (ops == &traceon_probe_ops)
296 seq_printf(m, "traceon");
297 else
298 seq_printf(m, "traceoff");
299
300 if (count == -1)
301 seq_printf(m, ":unlimited\n");
302 else
303 seq_printf(m, ":count=%ld", count);
304 seq_putc(m, '\n');
305
306 return 0;
307}
308
309static int
310ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
311{
312 struct ftrace_probe_ops *ops;
313
314 /* we register both traceon and traceoff to this callback */
315 if (strcmp(cmd, "traceon") == 0)
316 ops = &traceon_probe_ops;
317 else
318 ops = &traceoff_probe_ops;
319
320 unregister_ftrace_function_probe_func(glob, ops);
321
322 return 0;
323}
324
325static int
326ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
327{
328 struct ftrace_probe_ops *ops;
329 void *count = (void *)-1;
330 char *number;
331 int ret;
332
333 /* hash funcs only work with set_ftrace_filter */
334 if (!enable)
335 return -EINVAL;
336
337 if (glob[0] == '!')
338 return ftrace_trace_onoff_unreg(glob+1, cmd, param);
339
340 /* we register both traceon and traceoff to this callback */
341 if (strcmp(cmd, "traceon") == 0)
342 ops = &traceon_probe_ops;
343 else
344 ops = &traceoff_probe_ops;
345
346 if (!param)
347 goto out_reg;
348
349 number = strsep(&param, ":");
350
351 if (!strlen(number))
352 goto out_reg;
353
354 /*
355 * We use the callback data field (which is a pointer)
356 * as our counter.
357 */
358 ret = strict_strtoul(number, 0, (unsigned long *)&count);
359 if (ret)
360 return ret;
361
362 out_reg:
363 ret = register_ftrace_function_probe(glob, ops, count);
364
365 return ret;
366}
367
368static struct ftrace_func_command ftrace_traceon_cmd = {
369 .name = "traceon",
370 .func = ftrace_trace_onoff_callback,
371};
372
373static struct ftrace_func_command ftrace_traceoff_cmd = {
374 .name = "traceoff",
375 .func = ftrace_trace_onoff_callback,
376};
377
378static int __init init_func_cmd_traceon(void)
379{
380 int ret;
381
382 ret = register_ftrace_command(&ftrace_traceoff_cmd);
383 if (ret)
384 return ret;
385
386 ret = register_ftrace_command(&ftrace_traceon_cmd);
387 if (ret)
388 unregister_ftrace_command(&ftrace_traceoff_cmd);
389 return ret;
390}
391#else
392static inline int init_func_cmd_traceon(void)
393{
394 return 0;
395}
396#endif /* CONFIG_DYNAMIC_FTRACE */
397
62static __init int init_function_trace(void) 398static __init int init_function_trace(void)
63{ 399{
400 init_func_cmd_traceon();
64 return register_tracer(&function_trace); 401 return register_tracer(&function_trace);
65} 402}
66
67device_initcall(init_function_trace); 403device_initcall(init_function_trace);
404
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38e..8566c14b3e9a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * 2 *
3 * Function graph tracer. 3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com> 4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which 5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com> 6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 * 7 *
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16#define TRACE_GRAPH_INDENT 2 17#define TRACE_GRAPH_INDENT 2
17 18
@@ -20,9 +21,11 @@
20#define TRACE_GRAPH_PRINT_CPU 0x2 21#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 22#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8 23#define TRACE_GRAPH_PRINT_PROC 0x8
24#define TRACE_GRAPH_PRINT_DURATION 0x10
25#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
23 26
24static struct tracer_opt trace_opts[] = { 27static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */ 28 /* Display overruns? (for self-debug purpose) */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 29 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */ 30 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) }, 31 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -30,26 +33,101 @@ static struct tracer_opt trace_opts[] = {
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) }, 33 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */ 34 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) }, 35 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
36 /* Display duration of execution */
37 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
38 /* Display absolute time of an entry */
39 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
33 { } /* Empty entry */ 40 { } /* Empty entry */
34}; 41};
35 42
36static struct tracer_flags tracer_flags = { 43static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */ 44 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD, 45 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
46 TRACE_GRAPH_PRINT_DURATION,
39 .opts = trace_opts 47 .opts = trace_opts
40}; 48};
41 49
42/* pid on the last trace processed */ 50/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
44 51
45static int graph_trace_init(struct trace_array *tr) 52
53/* Add a function return address to the trace stack on thread info.*/
54int
55ftrace_push_return_trace(unsigned long ret, unsigned long long time,
56 unsigned long func, int *depth)
57{
58 int index;
59
60 if (!current->ret_stack)
61 return -EBUSY;
62
63 /* The return trace stack is full */
64 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
65 atomic_inc(&current->trace_overrun);
66 return -EBUSY;
67 }
68
69 index = ++current->curr_ret_stack;
70 barrier();
71 current->ret_stack[index].ret = ret;
72 current->ret_stack[index].func = func;
73 current->ret_stack[index].calltime = time;
74 *depth = index;
75
76 return 0;
77}
78
79/* Retrieve a function return address to the trace stack on thread info.*/
80void
81ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
82{
83 int index;
84
85 index = current->curr_ret_stack;
86
87 if (unlikely(index < 0)) {
88 ftrace_graph_stop();
89 WARN_ON(1);
90 /* Might as well panic, otherwise we have no where to go */
91 *ret = (unsigned long)panic;
92 return;
93 }
94
95 *ret = current->ret_stack[index].ret;
96 trace->func = current->ret_stack[index].func;
97 trace->calltime = current->ret_stack[index].calltime;
98 trace->overrun = atomic_read(&current->trace_overrun);
99 trace->depth = index;
100 barrier();
101 current->curr_ret_stack--;
102
103}
104
105/*
106 * Send the trace to the ring-buffer.
107 * @return the original return address.
108 */
109unsigned long ftrace_return_to_handler(void)
46{ 110{
47 int cpu, ret; 111 struct ftrace_graph_ret trace;
112 unsigned long ret;
113
114 ftrace_pop_return_trace(&trace, &ret);
115 trace.rettime = trace_clock_local();
116 ftrace_graph_return(&trace);
117
118 if (unlikely(!ret)) {
119 ftrace_graph_stop();
120 WARN_ON(1);
121 /* Might as well panic. What else to do? */
122 ret = (unsigned long)panic;
123 }
48 124
49 for_each_online_cpu(cpu) 125 return ret;
50 tracing_reset(tr, cpu); 126}
51 127
52 ret = register_ftrace_graph(&trace_graph_return, 128static int graph_trace_init(struct trace_array *tr)
129{
130 int ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry); 131 &trace_graph_entry);
54 if (ret) 132 if (ret)
55 return ret; 133 return ret;
@@ -153,17 +231,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
153 231
154/* If the pid changed since the last trace, output this event */ 232/* If the pid changed since the last trace, output this event */
155static enum print_line_t 233static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu) 234verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
157{ 235{
158 pid_t prev_pid; 236 pid_t prev_pid;
237 pid_t *last_pid;
159 int ret; 238 int ret;
160 239
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid) 240 if (!last_pids_cpu)
241 return TRACE_TYPE_HANDLED;
242
243 last_pid = per_cpu_ptr(last_pids_cpu, cpu);
244
245 if (*last_pid == pid)
162 return TRACE_TYPE_HANDLED; 246 return TRACE_TYPE_HANDLED;
163 247
164 prev_pid = last_pid[cpu]; 248 prev_pid = *last_pid;
165 last_pid[cpu] = pid; 249 *last_pid = pid;
166 250
251 if (prev_pid == -1)
252 return TRACE_TYPE_HANDLED;
167/* 253/*
168 * Context-switch trace line: 254 * Context-switch trace line:
169 255
@@ -175,34 +261,34 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu)
175 ret = trace_seq_printf(s, 261 ret = trace_seq_printf(s,
176 " ------------------------------------------\n"); 262 " ------------------------------------------\n");
177 if (!ret) 263 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE; 264 return TRACE_TYPE_PARTIAL_LINE;
179 265
180 ret = print_graph_cpu(s, cpu); 266 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE) 267 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE; 268 return TRACE_TYPE_PARTIAL_LINE;
183 269
184 ret = print_graph_proc(s, prev_pid); 270 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE) 271 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE; 272 return TRACE_TYPE_PARTIAL_LINE;
187 273
188 ret = trace_seq_printf(s, " => "); 274 ret = trace_seq_printf(s, " => ");
189 if (!ret) 275 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE; 276 return TRACE_TYPE_PARTIAL_LINE;
191 277
192 ret = print_graph_proc(s, pid); 278 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE) 279 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE; 280 return TRACE_TYPE_PARTIAL_LINE;
195 281
196 ret = trace_seq_printf(s, 282 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n"); 283 "\n ------------------------------------------\n\n");
198 if (!ret) 284 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE; 285 return TRACE_TYPE_PARTIAL_LINE;
200 286
201 return ret; 287 return TRACE_TYPE_HANDLED;
202} 288}
203 289
204static bool 290static struct ftrace_graph_ret_entry *
205trace_branch_is_leaf(struct trace_iterator *iter, 291get_return_for_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr) 292 struct ftrace_graph_ent_entry *curr)
207{ 293{
208 struct ring_buffer_iter *ring_iter; 294 struct ring_buffer_iter *ring_iter;
@@ -211,65 +297,123 @@ trace_branch_is_leaf(struct trace_iterator *iter,
211 297
212 ring_iter = iter->buffer_iter[iter->cpu]; 298 ring_iter = iter->buffer_iter[iter->cpu];
213 299
214 if (!ring_iter) 300 /* First peek to compare current entry and the next one */
215 return false; 301 if (ring_iter)
216 302 event = ring_buffer_iter_peek(ring_iter, NULL);
217 event = ring_buffer_iter_peek(ring_iter, NULL); 303 else {
304 /* We need to consume the current entry to see the next one */
305 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
306 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
307 NULL);
308 }
218 309
219 if (!event) 310 if (!event)
220 return false; 311 return NULL;
221 312
222 next = ring_buffer_event_data(event); 313 next = ring_buffer_event_data(event);
223 314
224 if (next->ent.type != TRACE_GRAPH_RET) 315 if (next->ent.type != TRACE_GRAPH_RET)
225 return false; 316 return NULL;
226 317
227 if (curr->ent.pid != next->ent.pid || 318 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func) 319 curr->graph_ent.func != next->ret.func)
229 return false; 320 return NULL;
321
322 /* this is a leaf, now advance the iterator */
323 if (ring_iter)
324 ring_buffer_read(ring_iter, NULL);
230 325
231 return true; 326 return next;
327}
328
329/* Signal a overhead of time execution to the output */
330static int
331print_graph_overhead(unsigned long long duration, struct trace_seq *s)
332{
333 /* If duration disappear, we don't need anything */
334 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
335 return 1;
336
337 /* Non nested entry or return */
338 if (duration == -1)
339 return trace_seq_printf(s, " ");
340
341 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
342 /* Duration exceeded 100 msecs */
343 if (duration > 100000ULL)
344 return trace_seq_printf(s, "! ");
345
346 /* Duration exceeded 10 msecs */
347 if (duration > 10000ULL)
348 return trace_seq_printf(s, "+ ");
349 }
350
351 return trace_seq_printf(s, " ");
352}
353
354static int print_graph_abs_time(u64 t, struct trace_seq *s)
355{
356 unsigned long usecs_rem;
357
358 usecs_rem = do_div(t, NSEC_PER_SEC);
359 usecs_rem /= 1000;
360
361 return trace_seq_printf(s, "%5lu.%06lu | ",
362 (unsigned long)t, usecs_rem);
232} 363}
233 364
234static enum print_line_t 365static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr, 366print_graph_irq(struct trace_iterator *iter, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid) 367 enum trace_type type, int cpu, pid_t pid)
237{ 368{
238 int ret; 369 int ret;
370 struct trace_seq *s = &iter->seq;
239 371
240 if (addr < (unsigned long)__irqentry_text_start || 372 if (addr < (unsigned long)__irqentry_text_start ||
241 addr >= (unsigned long)__irqentry_text_end) 373 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED; 374 return TRACE_TYPE_UNHANDLED;
243 375
244 if (type == TRACE_GRAPH_ENT) { 376 /* Absolute time */
245 ret = trace_seq_printf(s, "==========> | "); 377 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
246 } else { 378 ret = print_graph_abs_time(iter->ts, s);
247 /* Cpu */ 379 if (!ret)
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 380 return TRACE_TYPE_PARTIAL_LINE;
249 ret = print_graph_cpu(s, cpu); 381 }
250 if (ret == TRACE_TYPE_PARTIAL_LINE)
251 return TRACE_TYPE_PARTIAL_LINE;
252 }
253 /* Proc */
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
255 ret = print_graph_proc(s, pid);
256 if (ret == TRACE_TYPE_PARTIAL_LINE)
257 return TRACE_TYPE_PARTIAL_LINE;
258 382
259 ret = trace_seq_printf(s, " | "); 383 /* Cpu */
260 if (!ret) 384 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
261 return TRACE_TYPE_PARTIAL_LINE; 385 ret = print_graph_cpu(s, cpu);
262 } 386 if (ret == TRACE_TYPE_PARTIAL_LINE)
387 return TRACE_TYPE_PARTIAL_LINE;
388 }
389 /* Proc */
390 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
391 ret = print_graph_proc(s, pid);
392 if (ret == TRACE_TYPE_PARTIAL_LINE)
393 return TRACE_TYPE_PARTIAL_LINE;
394 ret = trace_seq_printf(s, " | ");
395 if (!ret)
396 return TRACE_TYPE_PARTIAL_LINE;
397 }
263 398
264 /* No overhead */ 399 /* No overhead */
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 400 ret = print_graph_overhead(-1, s);
266 ret = trace_seq_printf(s, " "); 401 if (!ret)
267 if (!ret) 402 return TRACE_TYPE_PARTIAL_LINE;
268 return TRACE_TYPE_PARTIAL_LINE; 403
269 } 404 if (type == TRACE_GRAPH_ENT)
405 ret = trace_seq_printf(s, "==========>");
406 else
407 ret = trace_seq_printf(s, "<==========");
408
409 if (!ret)
410 return TRACE_TYPE_PARTIAL_LINE;
411
412 /* Don't close the duration column if haven't one */
413 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
414 trace_seq_printf(s, " |");
415 ret = trace_seq_printf(s, "\n");
270 416
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret) 417 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE; 418 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED; 419 return TRACE_TYPE_HANDLED;
@@ -288,7 +432,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
288 sprintf(msecs_str, "%lu", (unsigned long) duration); 432 sprintf(msecs_str, "%lu", (unsigned long) duration);
289 433
290 /* Print msecs */ 434 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str); 435 ret = trace_seq_printf(s, "%s", msecs_str);
292 if (!ret) 436 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE; 437 return TRACE_TYPE_PARTIAL_LINE;
294 438
@@ -321,51 +465,33 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
321 465
322} 466}
323 467
324/* Signal a overhead of time execution to the output */
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{
328 /* Duration exceeded 100 msecs */
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331
332 /* Duration exceeded 10 msecs */
333 if (duration > 10000ULL)
334 return trace_seq_printf(s, "+ ");
335
336 return trace_seq_printf(s, " ");
337}
338
339/* Case of a leaf function on its call entry */ 468/* Case of a leaf function on its call entry */
340static enum print_line_t 469static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter, 470print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s) 471 struct ftrace_graph_ent_entry *entry,
472 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
343{ 473{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret; 474 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call; 475 struct ftrace_graph_ent *call;
348 unsigned long long duration; 476 unsigned long long duration;
349 int ret; 477 int ret;
350 int i; 478 int i;
351 479
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret; 480 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent; 481 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime; 482 duration = graph_ret->rettime - graph_ret->calltime;
357 483
358 /* Overhead */ 484 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 485 ret = print_graph_overhead(duration, s);
360 ret = print_graph_overhead(duration, s); 486 if (!ret)
361 if (!ret) 487 return TRACE_TYPE_PARTIAL_LINE;
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364 488
365 /* Duration */ 489 /* Duration */
366 ret = print_graph_duration(duration, s); 490 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
367 if (ret == TRACE_TYPE_PARTIAL_LINE) 491 ret = print_graph_duration(duration, s);
368 return TRACE_TYPE_PARTIAL_LINE; 492 if (ret == TRACE_TYPE_PARTIAL_LINE)
493 return TRACE_TYPE_PARTIAL_LINE;
494 }
369 495
370 /* Function */ 496 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 497 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -394,25 +520,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
394 struct ftrace_graph_ent *call = &entry->graph_ent; 520 struct ftrace_graph_ent *call = &entry->graph_ent;
395 521
396 /* No overhead */ 522 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 523 ret = print_graph_overhead(-1, s);
398 ret = trace_seq_printf(s, " "); 524 if (!ret)
399 if (!ret) 525 return TRACE_TYPE_PARTIAL_LINE;
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402 526
403 /* Interrupt */ 527 /* No time */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid); 528 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | "); 529 ret = trace_seq_printf(s, " | ");
408 if (!ret) 530 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE; 531 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 } 532 }
414 533
415
416 /* Function */ 534 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 535 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " "); 536 ret = trace_seq_printf(s, " ");
@@ -428,20 +546,40 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
428 if (!ret) 546 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE; 547 return TRACE_TYPE_PARTIAL_LINE;
430 548
431 return TRACE_TYPE_HANDLED; 549 /*
550 * we already consumed the current entry to check the next one
551 * and see if this is a leaf.
552 */
553 return TRACE_TYPE_NO_CONSUME;
432} 554}
433 555
434static enum print_line_t 556static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 557print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu) 558 struct trace_iterator *iter)
437{ 559{
438 int ret; 560 int ret;
561 int cpu = iter->cpu;
562 pid_t *last_entry = iter->private;
439 struct trace_entry *ent = iter->ent; 563 struct trace_entry *ent = iter->ent;
564 struct ftrace_graph_ent *call = &field->graph_ent;
565 struct ftrace_graph_ret_entry *leaf_ret;
440 566
441 /* Pid */ 567 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 568 if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
569 return TRACE_TYPE_PARTIAL_LINE;
570
571 /* Interrupt */
572 ret = print_graph_irq(iter, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
573 if (ret == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE; 574 return TRACE_TYPE_PARTIAL_LINE;
444 575
576 /* Absolute time */
577 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
578 ret = print_graph_abs_time(iter->ts, s);
579 if (!ret)
580 return TRACE_TYPE_PARTIAL_LINE;
581 }
582
445 /* Cpu */ 583 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 584 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu); 585 ret = print_graph_cpu(s, cpu);
@@ -460,8 +598,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
460 return TRACE_TYPE_PARTIAL_LINE; 598 return TRACE_TYPE_PARTIAL_LINE;
461 } 599 }
462 600
463 if (trace_branch_is_leaf(iter, field)) 601 leaf_ret = get_return_for_leaf(iter, field);
464 return print_graph_entry_leaf(iter, field, s); 602 if (leaf_ret)
603 return print_graph_entry_leaf(iter, field, leaf_ret, s);
465 else 604 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu); 605 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467 606
@@ -469,16 +608,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
469 608
470static enum print_line_t 609static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 610print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu) 611 struct trace_entry *ent, struct trace_iterator *iter)
473{ 612{
474 int i; 613 int i;
475 int ret; 614 int ret;
615 int cpu = iter->cpu;
616 pid_t *last_pid = iter->private, pid = ent->pid;
476 unsigned long long duration = trace->rettime - trace->calltime; 617 unsigned long long duration = trace->rettime - trace->calltime;
477 618
478 /* Pid */ 619 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE) 620 if (verif_pid(s, pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE; 621 return TRACE_TYPE_PARTIAL_LINE;
481 622
623 /* Absolute time */
624 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
625 ret = print_graph_abs_time(iter->ts, s);
626 if (!ret)
627 return TRACE_TYPE_PARTIAL_LINE;
628 }
629
482 /* Cpu */ 630 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 631 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu); 632 ret = print_graph_cpu(s, cpu);
@@ -498,16 +646,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
498 } 646 }
499 647
500 /* Overhead */ 648 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 649 ret = print_graph_overhead(duration, s);
502 ret = print_graph_overhead(duration, s); 650 if (!ret)
503 if (!ret) 651 return TRACE_TYPE_PARTIAL_LINE;
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506 652
507 /* Duration */ 653 /* Duration */
508 ret = print_graph_duration(duration, s); 654 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
509 if (ret == TRACE_TYPE_PARTIAL_LINE) 655 ret = print_graph_duration(duration, s);
510 return TRACE_TYPE_PARTIAL_LINE; 656 if (ret == TRACE_TYPE_PARTIAL_LINE)
657 return TRACE_TYPE_PARTIAL_LINE;
658 }
511 659
512 /* Closing brace */ 660 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 661 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -528,7 +676,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
528 return TRACE_TYPE_PARTIAL_LINE; 676 return TRACE_TYPE_PARTIAL_LINE;
529 } 677 }
530 678
531 ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid); 679 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid);
532 if (ret == TRACE_TYPE_PARTIAL_LINE) 680 if (ret == TRACE_TYPE_PARTIAL_LINE)
533 return TRACE_TYPE_PARTIAL_LINE; 681 return TRACE_TYPE_PARTIAL_LINE;
534 682
@@ -541,14 +689,23 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
541{ 689{
542 int i; 690 int i;
543 int ret; 691 int ret;
692 int cpu = iter->cpu;
693 pid_t *last_pid = iter->private;
544 694
545 /* Pid */ 695 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) 696 if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE; 697 return TRACE_TYPE_PARTIAL_LINE;
548 698
699 /* Absolute time */
700 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
701 ret = print_graph_abs_time(iter->ts, s);
702 if (!ret)
703 return TRACE_TYPE_PARTIAL_LINE;
704 }
705
549 /* Cpu */ 706 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 707 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu); 708 ret = print_graph_cpu(s, cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE) 709 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE; 710 return TRACE_TYPE_PARTIAL_LINE;
554 } 711 }
@@ -565,17 +722,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
565 } 722 }
566 723
567 /* No overhead */ 724 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 725 ret = print_graph_overhead(-1, s);
569 ret = trace_seq_printf(s, " "); 726 if (!ret)
727 return TRACE_TYPE_PARTIAL_LINE;
728
729 /* No time */
730 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
731 ret = trace_seq_printf(s, " | ");
570 if (!ret) 732 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE; 733 return TRACE_TYPE_PARTIAL_LINE;
572 } 734 }
573 735
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */ 736 /* Indentation */
580 if (trace->depth > 0) 737 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { 738 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -585,12 +742,19 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
585 } 742 }
586 743
587 /* The comment */ 744 /* The comment */
588 ret = trace_seq_printf(s, "/* %s", trace->buf); 745 ret = trace_seq_printf(s, "/* ");
746 if (!ret)
747 return TRACE_TYPE_PARTIAL_LINE;
748
749 ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
589 if (!ret) 750 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE; 751 return TRACE_TYPE_PARTIAL_LINE;
591 752
592 if (ent->flags & TRACE_FLAG_CONT) 753 /* Strip ending newline */
593 trace_seq_print_cont(s, iter); 754 if (s->buffer[s->len - 1] == '\n') {
755 s->buffer[s->len - 1] = '\0';
756 s->len--;
757 }
594 758
595 ret = trace_seq_printf(s, " */\n"); 759 ret = trace_seq_printf(s, " */\n");
596 if (!ret) 760 if (!ret)
@@ -610,13 +774,12 @@ print_graph_function(struct trace_iterator *iter)
610 case TRACE_GRAPH_ENT: { 774 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field; 775 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry); 776 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter, 777 return print_graph_entry(field, s, iter);
614 iter->cpu);
615 } 778 }
616 case TRACE_GRAPH_RET: { 779 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field; 780 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry); 781 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu); 782 return print_graph_return(&field->ret, s, entry, iter);
620 } 783 }
621 case TRACE_PRINT: { 784 case TRACE_PRINT: {
622 struct print_entry *field; 785 struct print_entry *field;
@@ -632,33 +795,64 @@ static void print_graph_headers(struct seq_file *s)
632{ 795{
633 /* 1st line */ 796 /* 1st line */
634 seq_printf(s, "# "); 797 seq_printf(s, "# ");
798 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
799 seq_printf(s, " TIME ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 800 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU "); 801 seq_printf(s, "CPU");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 802 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID "); 803 seq_printf(s, " TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) 804 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
640 seq_printf(s, "OVERHEAD/"); 805 seq_printf(s, " DURATION ");
641 seq_printf(s, "DURATION FUNCTION CALLS\n"); 806 seq_printf(s, " FUNCTION CALLS\n");
642 807
643 /* 2nd line */ 808 /* 2nd line */
644 seq_printf(s, "# "); 809 seq_printf(s, "# ");
810 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
811 seq_printf(s, " | ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 812 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| "); 813 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 814 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | "); 815 seq_printf(s, " | | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 816 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
650 seq_printf(s, "| "); 817 seq_printf(s, " | | ");
651 seq_printf(s, "| | | | |\n"); 818 seq_printf(s, " | | | |\n");
652 } else
653 seq_printf(s, " | | | | |\n");
654} 819}
820
821static void graph_trace_open(struct trace_iterator *iter)
822{
823 /* pid on the last trace processed */
824 pid_t *last_pid = alloc_percpu(pid_t);
825 int cpu;
826
827 if (!last_pid)
828 pr_warning("function graph tracer: not enough memory\n");
829 else
830 for_each_possible_cpu(cpu) {
831 pid_t *pid = per_cpu_ptr(last_pid, cpu);
832 *pid = -1;
833 }
834
835 iter->private = last_pid;
836}
837
838static void graph_trace_close(struct trace_iterator *iter)
839{
840 free_percpu(iter->private);
841}
842
655static struct tracer graph_trace __read_mostly = { 843static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph", 844 .name = "function_graph",
657 .init = graph_trace_init, 845 .open = graph_trace_open,
658 .reset = graph_trace_reset, 846 .close = graph_trace_close,
847 .wait_pipe = poll_wait_pipe,
848 .init = graph_trace_init,
849 .reset = graph_trace_reset,
659 .print_line = print_graph_function, 850 .print_line = print_graph_function,
660 .print_header = print_graph_headers, 851 .print_header = print_graph_headers,
661 .flags = &tracer_flags, 852 .flags = &tracer_flags,
853#ifdef CONFIG_FTRACE_SELFTEST
854 .selftest = trace_selftest_startup_function_graph,
855#endif
662}; 856};
663 857
664static __init int init_graph_trace(void) 858static __init int init_graph_trace(void)
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22d435f..7bfdf4c2347f 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,30 +1,53 @@
1/* 1/*
2 * h/w branch tracer for x86 based on bts 2 * h/w branch tracer for x86 based on bts
3 * 3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> 4 * Copyright (C) 2008-2009 Intel Corporation.
5 * 5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */ 6 */
7 7#include <linux/spinlock.h>
8#include <linux/module.h> 8#include <linux/kallsyms.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/ftrace.h> 10#include <linux/ftrace.h>
12#include <linux/kallsyms.h> 11#include <linux/module.h>
12#include <linux/cpu.h>
13#include <linux/smp.h>
14#include <linux/fs.h>
13 15
14#include <asm/ds.h> 16#include <asm/ds.h>
15 17
16#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
17 20
18 21
19#define SIZEOF_BTS (1 << 13) 22#define SIZEOF_BTS (1 << 13)
20 23
24/*
25 * The tracer lock protects the below per-cpu tracer array.
26 * It needs to be held to:
27 * - start tracing on all cpus
28 * - stop tracing on all cpus
29 * - start tracing on a single hotplug cpu
30 * - stop tracing on a single hotplug cpu
31 * - read the trace from all cpus
32 * - read the trace from a single cpu
33 */
34static DEFINE_SPINLOCK(bts_tracer_lock);
21static DEFINE_PER_CPU(struct bts_tracer *, tracer); 35static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); 36static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23 37
24#define this_tracer per_cpu(tracer, smp_processor_id()) 38#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id()) 39#define this_buffer per_cpu(buffer, smp_processor_id())
26 40
41static int __read_mostly trace_hw_branches_enabled;
42static struct trace_array *hw_branch_trace __read_mostly;
43
27 44
45/*
46 * Start tracing on the current cpu.
47 * The argument is ignored.
48 *
49 * pre: bts_tracer_lock must be locked.
50 */
28static void bts_trace_start_cpu(void *arg) 51static void bts_trace_start_cpu(void *arg)
29{ 52{
30 if (this_tracer) 53 if (this_tracer)
@@ -42,14 +65,20 @@ static void bts_trace_start_cpu(void *arg)
42 65
43static void bts_trace_start(struct trace_array *tr) 66static void bts_trace_start(struct trace_array *tr)
44{ 67{
45 int cpu; 68 spin_lock(&bts_tracer_lock);
46 69
47 tracing_reset_online_cpus(tr); 70 on_each_cpu(bts_trace_start_cpu, NULL, 1);
71 trace_hw_branches_enabled = 1;
48 72
49 for_each_cpu(cpu, cpu_possible_mask) 73 spin_unlock(&bts_tracer_lock);
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51} 74}
52 75
76/*
77 * Stop tracing on the current cpu.
78 * The argument is ignored.
79 *
80 * pre: bts_tracer_lock must be locked.
81 */
53static void bts_trace_stop_cpu(void *arg) 82static void bts_trace_stop_cpu(void *arg)
54{ 83{
55 if (this_tracer) { 84 if (this_tracer) {
@@ -60,26 +89,60 @@ static void bts_trace_stop_cpu(void *arg)
60 89
61static void bts_trace_stop(struct trace_array *tr) 90static void bts_trace_stop(struct trace_array *tr)
62{ 91{
63 int cpu; 92 spin_lock(&bts_tracer_lock);
93
94 trace_hw_branches_enabled = 0;
95 on_each_cpu(bts_trace_stop_cpu, NULL, 1);
96
97 spin_unlock(&bts_tracer_lock);
98}
99
100static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
101 unsigned long action, void *hcpu)
102{
103 unsigned int cpu = (unsigned long)hcpu;
64 104
65 for_each_cpu(cpu, cpu_possible_mask) 105 spin_lock(&bts_tracer_lock);
106
107 if (!trace_hw_branches_enabled)
108 goto out;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
114 break;
115 case CPU_DOWN_PREPARE:
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); 116 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
117 break;
118 }
119
120 out:
121 spin_unlock(&bts_tracer_lock);
122 return NOTIFY_DONE;
67} 123}
68 124
125static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
126 .notifier_call = bts_hotcpu_handler
127};
128
69static int bts_trace_init(struct trace_array *tr) 129static int bts_trace_init(struct trace_array *tr)
70{ 130{
71 tracing_reset_online_cpus(tr); 131 hw_branch_trace = tr;
132
72 bts_trace_start(tr); 133 bts_trace_start(tr);
73 134
74 return 0; 135 return 0;
75} 136}
76 137
138static void bts_trace_reset(struct trace_array *tr)
139{
140 bts_trace_stop(tr);
141}
142
77static void bts_trace_print_header(struct seq_file *m) 143static void bts_trace_print_header(struct seq_file *m)
78{ 144{
79 seq_puts(m, 145 seq_puts(m, "# CPU# TO <- FROM\n");
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83} 146}
84 147
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) 148static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
@@ -87,15 +150,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
87 struct trace_entry *entry = iter->ent; 150 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq; 151 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it; 152 struct hw_branch_entry *it;
153 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
90 154
91 trace_assign_type(it, entry); 155 trace_assign_type(it, entry);
92 156
93 if (entry->type == TRACE_HW_BRANCHES) { 157 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) && 158 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", 159 seq_print_ip_sym(seq, it->to, symflags) &&
96 it->from, it->to) && 160 trace_seq_printf(seq, "\t <- ") &&
97 (!it->from || 161 seq_print_ip_sym(seq, it->from, symflags) &&
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n")) 162 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED; 163 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;; 164 return TRACE_TYPE_PARTIAL_LINE;;
@@ -103,26 +166,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
103 return TRACE_TYPE_UNHANDLED; 166 return TRACE_TYPE_UNHANDLED;
104} 167}
105 168
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) 169void trace_hw_branch(u64 from, u64 to)
107{ 170{
171 struct trace_array *tr = hw_branch_trace;
108 struct ring_buffer_event *event; 172 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry; 173 struct hw_branch_entry *entry;
110 unsigned long irq; 174 unsigned long irq1;
175 int cpu;
111 176
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); 177 if (unlikely(!tr))
113 if (!event)
114 return; 178 return;
179
180 if (unlikely(!trace_hw_branches_enabled))
181 return;
182
183 local_irq_save(irq1);
184 cpu = raw_smp_processor_id();
185 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
186 goto out;
187
188 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
189 sizeof(*entry), 0, 0);
190 if (!event)
191 goto out;
115 entry = ring_buffer_event_data(event); 192 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from); 193 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES; 194 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id();
119 entry->from = from; 195 entry->from = from;
120 entry->to = to; 196 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq); 197 trace_buffer_unlock_commit(tr, event, 0, 0);
198
199 out:
200 atomic_dec(&tr->data[cpu]->disabled);
201 local_irq_restore(irq1);
122} 202}
123 203
124static void trace_bts_at(struct trace_array *tr, 204static void trace_bts_at(const struct bts_trace *trace, void *at)
125 const struct bts_trace *trace, void *at)
126{ 205{
127 struct bts_struct bts; 206 struct bts_struct bts;
128 int err = 0; 207 int err = 0;
@@ -137,18 +216,29 @@ static void trace_bts_at(struct trace_array *tr,
137 216
138 switch (bts.qualifier) { 217 switch (bts.qualifier) {
139 case BTS_BRANCH: 218 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); 219 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
141 break; 220 break;
142 } 221 }
143} 222}
144 223
224/*
225 * Collect the trace on the current cpu and write it into the ftrace buffer.
226 *
227 * pre: bts_tracer_lock must be locked
228 */
145static void trace_bts_cpu(void *arg) 229static void trace_bts_cpu(void *arg)
146{ 230{
147 struct trace_array *tr = (struct trace_array *) arg; 231 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace; 232 const struct bts_trace *trace;
149 unsigned char *at; 233 unsigned char *at;
150 234
151 if (!this_tracer) 235 if (unlikely(!tr))
236 return;
237
238 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
239 return;
240
241 if (unlikely(!this_tracer))
152 return; 242 return;
153 243
154 ds_suspend_bts(this_tracer); 244 ds_suspend_bts(this_tracer);
@@ -158,11 +248,11 @@ static void trace_bts_cpu(void *arg)
158 248
159 for (at = trace->ds.top; (void *)at < trace->ds.end; 249 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size) 250 at += trace->ds.size)
161 trace_bts_at(tr, trace, at); 251 trace_bts_at(trace, at);
162 252
163 for (at = trace->ds.begin; (void *)at < trace->ds.top; 253 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size) 254 at += trace->ds.size)
165 trace_bts_at(tr, trace, at); 255 trace_bts_at(trace, at);
166 256
167out: 257out:
168 ds_resume_bts(this_tracer); 258 ds_resume_bts(this_tracer);
@@ -170,26 +260,43 @@ out:
170 260
171static void trace_bts_prepare(struct trace_iterator *iter) 261static void trace_bts_prepare(struct trace_iterator *iter)
172{ 262{
173 int cpu; 263 spin_lock(&bts_tracer_lock);
264
265 on_each_cpu(trace_bts_cpu, iter->tr, 1);
266
267 spin_unlock(&bts_tracer_lock);
268}
269
270static void trace_bts_close(struct trace_iterator *iter)
271{
272 tracing_reset_online_cpus(iter->tr);
273}
274
275void trace_hw_branch_oops(void)
276{
277 spin_lock(&bts_tracer_lock);
278
279 trace_bts_cpu(hw_branch_trace);
174 280
175 for_each_cpu(cpu, cpu_possible_mask) 281 spin_unlock(&bts_tracer_lock);
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177} 282}
178 283
179struct tracer bts_tracer __read_mostly = 284struct tracer bts_tracer __read_mostly =
180{ 285{
181 .name = "hw-branch-tracer", 286 .name = "hw-branch-tracer",
182 .init = bts_trace_init, 287 .init = bts_trace_init,
183 .reset = bts_trace_stop, 288 .reset = bts_trace_reset,
184 .print_header = bts_trace_print_header, 289 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line, 290 .print_line = bts_trace_print_line,
186 .start = bts_trace_start, 291 .start = bts_trace_start,
187 .stop = bts_trace_stop, 292 .stop = bts_trace_stop,
188 .open = trace_bts_prepare 293 .open = trace_bts_prepare,
294 .close = trace_bts_close
189}; 295};
190 296
191__init static int init_bts_trace(void) 297__init static int init_bts_trace(void)
192{ 298{
299 register_hotcpu_notifier(&bts_hotcpu_notifier);
193 return register_tracer(&bts_tracer); 300 return register_tracer(&bts_tracer);
194} 301}
195device_initcall(init_bts_trace); 302device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 62a78d943534..b923d13e2fad 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * trace irqs off criticall timings 2 * trace irqs off critical timings
3 * 3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
@@ -32,6 +32,8 @@ enum {
32 32
33static int trace_type __read_mostly; 33static int trace_type __read_mostly;
34 34
35static int save_lat_flag;
36
35#ifdef CONFIG_PREEMPT_TRACER 37#ifdef CONFIG_PREEMPT_TRACER
36static inline int 38static inline int
37preempt_trace(void) 39preempt_trace(void)
@@ -95,7 +97,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 97 disabled = atomic_inc_return(&data->disabled);
96 98
97 if (likely(disabled == 1)) 99 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 100 trace_function(tr, ip, parent_ip, flags, preempt_count());
99 101
100 atomic_dec(&data->disabled); 102 atomic_dec(&data->disabled);
101} 103}
@@ -153,7 +155,7 @@ check_critical_timing(struct trace_array *tr,
153 if (!report_latency(delta)) 155 if (!report_latency(delta))
154 goto out_unlock; 156 goto out_unlock;
155 157
156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 158 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
157 159
158 latency = nsecs_to_usecs(delta); 160 latency = nsecs_to_usecs(delta);
159 161
@@ -177,7 +179,7 @@ out:
177 data->critical_sequence = max_sequence; 179 data->critical_sequence = max_sequence;
178 data->preempt_timestamp = ftrace_now(cpu); 180 data->preempt_timestamp = ftrace_now(cpu);
179 tracing_reset(tr, cpu); 181 tracing_reset(tr, cpu);
180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); 182 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
181} 183}
182 184
183static inline void 185static inline void
@@ -210,7 +212,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
210 212
211 local_save_flags(flags); 213 local_save_flags(flags);
212 214
213 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 215 trace_function(tr, ip, parent_ip, flags, preempt_count());
214 216
215 per_cpu(tracing_cpu, cpu) = 1; 217 per_cpu(tracing_cpu, cpu) = 1;
216 218
@@ -244,7 +246,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
244 atomic_inc(&data->disabled); 246 atomic_inc(&data->disabled);
245 247
246 local_save_flags(flags); 248 local_save_flags(flags);
247 trace_function(tr, data, ip, parent_ip, flags, preempt_count()); 249 trace_function(tr, ip, parent_ip, flags, preempt_count());
248 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 250 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
249 data->critical_start = 0; 251 data->critical_start = 0;
250 atomic_dec(&data->disabled); 252 atomic_dec(&data->disabled);
@@ -353,33 +355,26 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 355}
354#endif /* CONFIG_PREEMPT_TRACER */ 356#endif /* CONFIG_PREEMPT_TRACER */
355 357
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
362static void start_irqsoff_tracer(struct trace_array *tr) 358static void start_irqsoff_tracer(struct trace_array *tr)
363{ 359{
364 register_ftrace_function(&trace_ops); 360 register_ftrace_function(&trace_ops);
365 if (tracing_is_enabled()) { 361 if (tracing_is_enabled())
366 tracer_enabled = 1; 362 tracer_enabled = 1;
367 save_tracer_enabled = 1; 363 else
368 } else {
369 tracer_enabled = 0; 364 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
372} 365}
373 366
374static void stop_irqsoff_tracer(struct trace_array *tr) 367static void stop_irqsoff_tracer(struct trace_array *tr)
375{ 368{
376 tracer_enabled = 0; 369 tracer_enabled = 0;
377 save_tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 370 unregister_ftrace_function(&trace_ops);
379} 371}
380 372
381static void __irqsoff_tracer_init(struct trace_array *tr) 373static void __irqsoff_tracer_init(struct trace_array *tr)
382{ 374{
375 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
376 trace_flags |= TRACE_ITER_LATENCY_FMT;
377
383 tracing_max_latency = 0; 378 tracing_max_latency = 0;
384 irqsoff_trace = tr; 379 irqsoff_trace = tr;
385 /* make sure that the tracer is visible */ 380 /* make sure that the tracer is visible */
@@ -390,30 +385,19 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
390static void irqsoff_tracer_reset(struct trace_array *tr) 385static void irqsoff_tracer_reset(struct trace_array *tr)
391{ 386{
392 stop_irqsoff_tracer(tr); 387 stop_irqsoff_tracer(tr);
388
389 if (!save_lat_flag)
390 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
393} 391}
394 392
395static void irqsoff_tracer_start(struct trace_array *tr) 393static void irqsoff_tracer_start(struct trace_array *tr)
396{ 394{
397 tracer_enabled = 1; 395 tracer_enabled = 1;
398 save_tracer_enabled = 1;
399} 396}
400 397
401static void irqsoff_tracer_stop(struct trace_array *tr) 398static void irqsoff_tracer_stop(struct trace_array *tr)
402{ 399{
403 tracer_enabled = 0; 400 tracer_enabled = 0;
404 save_tracer_enabled = 0;
405}
406
407static void irqsoff_tracer_open(struct trace_iterator *iter)
408{
409 /* stop the trace while dumping */
410 tracer_enabled = 0;
411}
412
413static void irqsoff_tracer_close(struct trace_iterator *iter)
414{
415 /* restart tracing */
416 tracer_enabled = save_tracer_enabled;
417} 401}
418 402
419#ifdef CONFIG_IRQSOFF_TRACER 403#ifdef CONFIG_IRQSOFF_TRACER
@@ -431,8 +415,6 @@ static struct tracer irqsoff_tracer __read_mostly =
431 .reset = irqsoff_tracer_reset, 415 .reset = irqsoff_tracer_reset,
432 .start = irqsoff_tracer_start, 416 .start = irqsoff_tracer_start,
433 .stop = irqsoff_tracer_stop, 417 .stop = irqsoff_tracer_stop,
434 .open = irqsoff_tracer_open,
435 .close = irqsoff_tracer_close,
436 .print_max = 1, 418 .print_max = 1,
437#ifdef CONFIG_FTRACE_SELFTEST 419#ifdef CONFIG_FTRACE_SELFTEST
438 .selftest = trace_selftest_startup_irqsoff, 420 .selftest = trace_selftest_startup_irqsoff,
@@ -459,8 +441,6 @@ static struct tracer preemptoff_tracer __read_mostly =
459 .reset = irqsoff_tracer_reset, 441 .reset = irqsoff_tracer_reset,
460 .start = irqsoff_tracer_start, 442 .start = irqsoff_tracer_start,
461 .stop = irqsoff_tracer_stop, 443 .stop = irqsoff_tracer_stop,
462 .open = irqsoff_tracer_open,
463 .close = irqsoff_tracer_close,
464 .print_max = 1, 444 .print_max = 1,
465#ifdef CONFIG_FTRACE_SELFTEST 445#ifdef CONFIG_FTRACE_SELFTEST
466 .selftest = trace_selftest_startup_preemptoff, 446 .selftest = trace_selftest_startup_preemptoff,
@@ -489,8 +469,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
489 .reset = irqsoff_tracer_reset, 469 .reset = irqsoff_tracer_reset,
490 .start = irqsoff_tracer_start, 470 .start = irqsoff_tracer_start,
491 .stop = irqsoff_tracer_stop, 471 .stop = irqsoff_tracer_stop,
492 .open = irqsoff_tracer_open,
493 .close = irqsoff_tracer_close,
494 .print_max = 1, 472 .print_max = 1,
495#ifdef CONFIG_FTRACE_SELFTEST 473#ifdef CONFIG_FTRACE_SELFTEST
496 .selftest = trace_selftest_startup_preemptirqsoff, 474 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 80e503ef6136..23e346a734ca 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,6 +12,7 @@
12#include <asm/atomic.h> 12#include <asm/atomic.h>
13 13
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h"
15 16
16struct header_iter { 17struct header_iter {
17 struct pci_dev *dev; 18 struct pci_dev *dev;
@@ -183,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
183 switch (rw->opcode) { 184 switch (rw->opcode) {
184 case MMIO_READ: 185 case MMIO_READ:
185 ret = trace_seq_printf(s, 186 ret = trace_seq_printf(s,
186 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 187 "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
187 rw->width, secs, usec_rem, rw->map_id, 188 rw->width, secs, usec_rem, rw->map_id,
188 (unsigned long long)rw->phys, 189 (unsigned long long)rw->phys,
189 rw->value, rw->pc, 0); 190 rw->value, rw->pc, 0);
190 break; 191 break;
191 case MMIO_WRITE: 192 case MMIO_WRITE:
192 ret = trace_seq_printf(s, 193 ret = trace_seq_printf(s,
193 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 194 "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
194 rw->width, secs, usec_rem, rw->map_id, 195 rw->width, secs, usec_rem, rw->map_id,
195 (unsigned long long)rw->phys, 196 (unsigned long long)rw->phys,
196 rw->value, rw->pc, 0); 197 rw->value, rw->pc, 0);
197 break; 198 break;
198 case MMIO_UNKNOWN_OP: 199 case MMIO_UNKNOWN_OP:
199 ret = trace_seq_printf(s, 200 ret = trace_seq_printf(s,
200 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", 201 "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
202 "%02lx 0x%lx %d\n",
201 secs, usec_rem, rw->map_id, 203 secs, usec_rem, rw->map_id,
202 (unsigned long long)rw->phys, 204 (unsigned long long)rw->phys,
203 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, 205 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -229,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
229 switch (m->opcode) { 231 switch (m->opcode) {
230 case MMIO_PROBE: 232 case MMIO_PROBE:
231 ret = trace_seq_printf(s, 233 ret = trace_seq_printf(s,
232 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 234 "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
233 secs, usec_rem, m->map_id, 235 secs, usec_rem, m->map_id,
234 (unsigned long long)m->phys, m->virt, m->len, 236 (unsigned long long)m->phys, m->virt, m->len,
235 0UL, 0); 237 0UL, 0);
236 break; 238 break;
237 case MMIO_UNPROBE: 239 case MMIO_UNPROBE:
238 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
239 "UNMAP %lu.%06lu %d 0x%lx %d\n", 241 "UNMAP %u.%06lu %d 0x%lx %d\n",
240 secs, usec_rem, m->map_id, 0UL, 0); 242 secs, usec_rem, m->map_id, 0UL, 0);
241 break; 243 break;
242 default: 244 default:
@@ -252,20 +254,20 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
252{ 254{
253 struct trace_entry *entry = iter->ent; 255 struct trace_entry *entry = iter->ent;
254 struct print_entry *print = (struct print_entry *)entry; 256 struct print_entry *print = (struct print_entry *)entry;
255 const char *msg = print->buf;
256 struct trace_seq *s = &iter->seq; 257 struct trace_seq *s = &iter->seq;
257 unsigned long long t = ns2usecs(iter->ts); 258 unsigned long long t = ns2usecs(iter->ts);
258 unsigned long usec_rem = do_div(t, 1000000ULL); 259 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
259 unsigned secs = (unsigned long)t; 260 unsigned secs = (unsigned long)t;
260 int ret; 261 int ret;
261 262
262 /* The trailing newline must be in the message. */ 263 /* The trailing newline must be in the message. */
263 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg); 264 ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem);
264 if (!ret) 265 if (!ret)
265 return TRACE_TYPE_PARTIAL_LINE; 266 return TRACE_TYPE_PARTIAL_LINE;
266 267
267 if (entry->flags & TRACE_FLAG_CONT) 268 ret = trace_seq_bprintf(s, print->fmt, print->buf);
268 trace_seq_print_cont(s, iter); 269 if (!ret)
270 return TRACE_TYPE_PARTIAL_LINE;
269 271
270 return TRACE_TYPE_HANDLED; 272 return TRACE_TYPE_HANDLED;
271} 273}
@@ -308,21 +310,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
308{ 310{
309 struct ring_buffer_event *event; 311 struct ring_buffer_event *event;
310 struct trace_mmiotrace_rw *entry; 312 struct trace_mmiotrace_rw *entry;
311 unsigned long irq_flags; 313 int pc = preempt_count();
312 314
313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 315 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
314 &irq_flags); 316 sizeof(*entry), 0, pc);
315 if (!event) { 317 if (!event) {
316 atomic_inc(&dropped_count); 318 atomic_inc(&dropped_count);
317 return; 319 return;
318 } 320 }
319 entry = ring_buffer_event_data(event); 321 entry = ring_buffer_event_data(event);
320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
321 entry->ent.type = TRACE_MMIO_RW;
322 entry->rw = *rw; 322 entry->rw = *rw;
323 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 323 trace_buffer_unlock_commit(tr, event, 0, pc);
324
325 trace_wake_up();
326} 324}
327 325
328void mmio_trace_rw(struct mmiotrace_rw *rw) 326void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -338,21 +336,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338{ 336{
339 struct ring_buffer_event *event; 337 struct ring_buffer_event *event;
340 struct trace_mmiotrace_map *entry; 338 struct trace_mmiotrace_map *entry;
341 unsigned long irq_flags; 339 int pc = preempt_count();
342 340
343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 341 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
344 &irq_flags); 342 sizeof(*entry), 0, pc);
345 if (!event) { 343 if (!event) {
346 atomic_inc(&dropped_count); 344 atomic_inc(&dropped_count);
347 return; 345 return;
348 } 346 }
349 entry = ring_buffer_event_data(event); 347 entry = ring_buffer_event_data(event);
350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
351 entry->ent.type = TRACE_MMIO_MAP;
352 entry->map = *map; 348 entry->map = *map;
353 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 349 trace_buffer_unlock_commit(tr, event, 0, pc);
354
355 trace_wake_up();
356} 350}
357 351
358void mmio_trace_mapping(struct mmiotrace_map *map) 352void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index b9767acd30ac..9aa84bde23cd 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -47,12 +47,7 @@ static void stop_nop_trace(struct trace_array *tr)
47 47
48static int nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
49{ 49{
50 int cpu;
51 ctx_trace = tr; 50 ctx_trace = tr;
52
53 for_each_online_cpu(cpu)
54 tracing_reset(tr, cpu);
55
56 start_nop_trace(tr); 51 start_nop_trace(tr);
57 return 0; 52 return 0;
58} 53}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 000000000000..491832af9ba1
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,916 @@
1/*
2 * trace_output.c
3 *
4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/mutex.h>
10#include <linux/ftrace.h>
11
12#include "trace_output.h"
13
14/* must be a power of 2 */
15#define EVENT_HASHSIZE 128
16
17static DEFINE_MUTEX(trace_event_mutex);
18static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19
20static int next_event_type = __TRACE_LAST_TYPE + 1;
21
22/**
23 * trace_seq_printf - sequence printing of trace information
24 * @s: trace sequence descriptor
25 * @fmt: printf format string
26 *
27 * The tracer may use either sequence operations or its own
28 * copy to user routines. To simplify formating of a trace
29 * trace_seq_printf is used to store strings into a special
30 * buffer (@s). Then the output may be either used by
31 * the sequencer or pulled into another buffer.
32 */
33int
34trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
35{
36 int len = (PAGE_SIZE - 1) - s->len;
37 va_list ap;
38 int ret;
39
40 if (!len)
41 return 0;
42
43 va_start(ap, fmt);
44 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
45 va_end(ap);
46
47 /* If we can't write it all, don't bother writing anything */
48 if (ret >= len)
49 return 0;
50
51 s->len += ret;
52
53 return len;
54}
55
56int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
57{
58 int len = (PAGE_SIZE - 1) - s->len;
59 int ret;
60
61 if (!len)
62 return 0;
63
64 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
65
66 /* If we can't write it all, don't bother writing anything */
67 if (ret >= len)
68 return 0;
69
70 s->len += ret;
71
72 return len;
73}
74
75/**
76 * trace_seq_puts - trace sequence printing of simple string
77 * @s: trace sequence descriptor
78 * @str: simple string to record
79 *
80 * The tracer may use either the sequence operations or its own
81 * copy to user routines. This function records a simple string
82 * into a special buffer (@s) for later retrieval by a sequencer
83 * or other mechanism.
84 */
85int trace_seq_puts(struct trace_seq *s, const char *str)
86{
87 int len = strlen(str);
88
89 if (len > ((PAGE_SIZE - 1) - s->len))
90 return 0;
91
92 memcpy(s->buffer + s->len, str, len);
93 s->len += len;
94
95 return len;
96}
97
98int trace_seq_putc(struct trace_seq *s, unsigned char c)
99{
100 if (s->len >= (PAGE_SIZE - 1))
101 return 0;
102
103 s->buffer[s->len++] = c;
104
105 return 1;
106}
107
108int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
109{
110 if (len > ((PAGE_SIZE - 1) - s->len))
111 return 0;
112
113 memcpy(s->buffer + s->len, mem, len);
114 s->len += len;
115
116 return len;
117}
118
119int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
120{
121 unsigned char hex[HEX_CHARS];
122 unsigned char *data = mem;
123 int i, j;
124
125#ifdef __BIG_ENDIAN
126 for (i = 0, j = 0; i < len; i++) {
127#else
128 for (i = len-1, j = 0; i >= 0; i--) {
129#endif
130 hex[j++] = hex_asc_hi(data[i]);
131 hex[j++] = hex_asc_lo(data[i]);
132 }
133 hex[j++] = ' ';
134
135 return trace_seq_putmem(s, hex, j);
136}
137
138int trace_seq_path(struct trace_seq *s, struct path *path)
139{
140 unsigned char *p;
141
142 if (s->len >= (PAGE_SIZE - 1))
143 return 0;
144 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
145 if (!IS_ERR(p)) {
146 p = mangle_path(s->buffer + s->len, p, "\n");
147 if (p) {
148 s->len = p - s->buffer;
149 return 1;
150 }
151 } else {
152 s->buffer[s->len++] = '?';
153 return 1;
154 }
155
156 return 0;
157}
158
159#ifdef CONFIG_KRETPROBES
160static inline const char *kretprobed(const char *name)
161{
162 static const char tramp_name[] = "kretprobe_trampoline";
163 int size = sizeof(tramp_name);
164
165 if (strncmp(tramp_name, name, size) == 0)
166 return "[unknown/kretprobe'd]";
167 return name;
168}
169#else
170static inline const char *kretprobed(const char *name)
171{
172 return name;
173}
174#endif /* CONFIG_KRETPROBES */
175
176static int
177seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
178{
179#ifdef CONFIG_KALLSYMS
180 char str[KSYM_SYMBOL_LEN];
181 const char *name;
182
183 kallsyms_lookup(address, NULL, NULL, NULL, str);
184
185 name = kretprobed(str);
186
187 return trace_seq_printf(s, fmt, name);
188#endif
189 return 1;
190}
191
192static int
193seq_print_sym_offset(struct trace_seq *s, const char *fmt,
194 unsigned long address)
195{
196#ifdef CONFIG_KALLSYMS
197 char str[KSYM_SYMBOL_LEN];
198 const char *name;
199
200 sprint_symbol(str, address);
201 name = kretprobed(str);
202
203 return trace_seq_printf(s, fmt, name);
204#endif
205 return 1;
206}
207
208#ifndef CONFIG_64BIT
209# define IP_FMT "%08lx"
210#else
211# define IP_FMT "%016lx"
212#endif
213
214int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
215 unsigned long ip, unsigned long sym_flags)
216{
217 struct file *file = NULL;
218 unsigned long vmstart = 0;
219 int ret = 1;
220
221 if (mm) {
222 const struct vm_area_struct *vma;
223
224 down_read(&mm->mmap_sem);
225 vma = find_vma(mm, ip);
226 if (vma) {
227 file = vma->vm_file;
228 vmstart = vma->vm_start;
229 }
230 if (file) {
231 ret = trace_seq_path(s, &file->f_path);
232 if (ret)
233 ret = trace_seq_printf(s, "[+0x%lx]",
234 ip - vmstart);
235 }
236 up_read(&mm->mmap_sem);
237 }
238 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
239 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
240 return ret;
241}
242
243int
244seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
245 unsigned long sym_flags)
246{
247 struct mm_struct *mm = NULL;
248 int ret = 1;
249 unsigned int i;
250
251 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
252 struct task_struct *task;
253 /*
254 * we do the lookup on the thread group leader,
255 * since individual threads might have already quit!
256 */
257 rcu_read_lock();
258 task = find_task_by_vpid(entry->ent.tgid);
259 if (task)
260 mm = get_task_mm(task);
261 rcu_read_unlock();
262 }
263
264 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
265 unsigned long ip = entry->caller[i];
266
267 if (ip == ULONG_MAX || !ret)
268 break;
269 if (i && ret)
270 ret = trace_seq_puts(s, " <- ");
271 if (!ip) {
272 if (ret)
273 ret = trace_seq_puts(s, "??");
274 continue;
275 }
276 if (!ret)
277 break;
278 if (ret)
279 ret = seq_print_user_ip(s, mm, ip, sym_flags);
280 }
281
282 if (mm)
283 mmput(mm);
284 return ret;
285}
286
287int
288seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
289{
290 int ret;
291
292 if (!ip)
293 return trace_seq_printf(s, "0");
294
295 if (sym_flags & TRACE_ITER_SYM_OFFSET)
296 ret = seq_print_sym_offset(s, "%s", ip);
297 else
298 ret = seq_print_sym_short(s, "%s", ip);
299
300 if (!ret)
301 return 0;
302
303 if (sym_flags & TRACE_ITER_SYM_ADDR)
304 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
305 return ret;
306}
307
308static int
309lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
310{
311 int hardirq, softirq;
312 char *comm;
313
314 comm = trace_find_cmdline(entry->pid);
315 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
316 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
317
318 if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
319 comm, entry->pid, cpu,
320 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
321 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
322 'X' : '.',
323 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
324 'N' : '.',
325 (hardirq && softirq) ? 'H' :
326 hardirq ? 'h' : softirq ? 's' : '.'))
327 return 0;
328
329 if (entry->preempt_count)
330 return trace_seq_printf(s, "%x", entry->preempt_count);
331 return trace_seq_puts(s, ".");
332}
333
334static unsigned long preempt_mark_thresh = 100;
335
336static int
337lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
338 unsigned long rel_usecs)
339{
340 return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
341 rel_usecs > preempt_mark_thresh ? '!' :
342 rel_usecs > 1 ? '+' : ' ');
343}
344
345int trace_print_context(struct trace_iterator *iter)
346{
347 struct trace_seq *s = &iter->seq;
348 struct trace_entry *entry = iter->ent;
349 char *comm = trace_find_cmdline(entry->pid);
350 unsigned long long t = ns2usecs(iter->ts);
351 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
352 unsigned long secs = (unsigned long)t;
353
354 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
355 comm, entry->pid, iter->cpu, secs, usec_rem);
356}
357
358int trace_print_lat_context(struct trace_iterator *iter)
359{
360 u64 next_ts;
361 int ret;
362 struct trace_seq *s = &iter->seq;
363 struct trace_entry *entry = iter->ent,
364 *next_entry = trace_find_next_entry(iter, NULL,
365 &next_ts);
366 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
367 unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
368 unsigned long rel_usecs;
369
370 if (!next_entry)
371 next_ts = iter->ts;
372 rel_usecs = ns2usecs(next_ts - iter->ts);
373
374 if (verbose) {
375 char *comm = trace_find_cmdline(entry->pid);
376 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
377 " %ld.%03ldms (+%ld.%03ldms): ", comm,
378 entry->pid, iter->cpu, entry->flags,
379 entry->preempt_count, iter->idx,
380 ns2usecs(iter->ts),
381 abs_usecs / USEC_PER_MSEC,
382 abs_usecs % USEC_PER_MSEC,
383 rel_usecs / USEC_PER_MSEC,
384 rel_usecs % USEC_PER_MSEC);
385 } else {
386 ret = lat_print_generic(s, entry, iter->cpu);
387 if (ret)
388 ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
389 }
390
391 return ret;
392}
393
394static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
395
396static int task_state_char(unsigned long state)
397{
398 int bit = state ? __ffs(state) + 1 : 0;
399
400 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
401}
402
403/**
404 * ftrace_find_event - find a registered event
405 * @type: the type of event to look for
406 *
407 * Returns an event of type @type otherwise NULL
408 */
409struct trace_event *ftrace_find_event(int type)
410{
411 struct trace_event *event;
412 struct hlist_node *n;
413 unsigned key;
414
415 key = type & (EVENT_HASHSIZE - 1);
416
417 hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
418 if (event->type == type)
419 return event;
420 }
421
422 return NULL;
423}
424
425/**
426 * register_ftrace_event - register output for an event type
427 * @event: the event type to register
428 *
429 * Event types are stored in a hash and this hash is used to
430 * find a way to print an event. If the @event->type is set
431 * then it will use that type, otherwise it will assign a
432 * type to use.
433 *
434 * If you assign your own type, please make sure it is added
435 * to the trace_type enum in trace.h, to avoid collisions
436 * with the dynamic types.
437 *
438 * Returns the event type number or zero on error.
439 */
440int register_ftrace_event(struct trace_event *event)
441{
442 unsigned key;
443 int ret = 0;
444
445 mutex_lock(&trace_event_mutex);
446
447 if (!event->type)
448 event->type = next_event_type++;
449 else if (event->type > __TRACE_LAST_TYPE) {
450 printk(KERN_WARNING "Need to add type to trace.h\n");
451 WARN_ON(1);
452 }
453
454 if (ftrace_find_event(event->type))
455 goto out;
456
457 if (event->trace == NULL)
458 event->trace = trace_nop_print;
459 if (event->raw == NULL)
460 event->raw = trace_nop_print;
461 if (event->hex == NULL)
462 event->hex = trace_nop_print;
463 if (event->binary == NULL)
464 event->binary = trace_nop_print;
465
466 key = event->type & (EVENT_HASHSIZE - 1);
467
468 hlist_add_head_rcu(&event->node, &event_hash[key]);
469
470 ret = event->type;
471 out:
472 mutex_unlock(&trace_event_mutex);
473
474 return ret;
475}
476
477/**
478 * unregister_ftrace_event - remove a no longer used event
479 * @event: the event to remove
480 */
481int unregister_ftrace_event(struct trace_event *event)
482{
483 mutex_lock(&trace_event_mutex);
484 hlist_del(&event->node);
485 mutex_unlock(&trace_event_mutex);
486
487 return 0;
488}
489
490/*
491 * Standard events
492 */
493
494enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
495{
496 return TRACE_TYPE_HANDLED;
497}
498
499/* TRACE_FN */
500static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
501{
502 struct ftrace_entry *field;
503 struct trace_seq *s = &iter->seq;
504
505 trace_assign_type(field, iter->ent);
506
507 if (!seq_print_ip_sym(s, field->ip, flags))
508 goto partial;
509
510 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
511 if (!trace_seq_printf(s, " <-"))
512 goto partial;
513 if (!seq_print_ip_sym(s,
514 field->parent_ip,
515 flags))
516 goto partial;
517 }
518 if (!trace_seq_printf(s, "\n"))
519 goto partial;
520
521 return TRACE_TYPE_HANDLED;
522
523 partial:
524 return TRACE_TYPE_PARTIAL_LINE;
525}
526
527static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
528{
529 struct ftrace_entry *field;
530
531 trace_assign_type(field, iter->ent);
532
533 if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
534 field->ip,
535 field->parent_ip))
536 return TRACE_TYPE_PARTIAL_LINE;
537
538 return TRACE_TYPE_HANDLED;
539}
540
541static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
542{
543 struct ftrace_entry *field;
544 struct trace_seq *s = &iter->seq;
545
546 trace_assign_type(field, iter->ent);
547
548 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
549 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
550
551 return TRACE_TYPE_HANDLED;
552}
553
554static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
555{
556 struct ftrace_entry *field;
557 struct trace_seq *s = &iter->seq;
558
559 trace_assign_type(field, iter->ent);
560
561 SEQ_PUT_FIELD_RET(s, field->ip);
562 SEQ_PUT_FIELD_RET(s, field->parent_ip);
563
564 return TRACE_TYPE_HANDLED;
565}
566
567static struct trace_event trace_fn_event = {
568 .type = TRACE_FN,
569 .trace = trace_fn_trace,
570 .raw = trace_fn_raw,
571 .hex = trace_fn_hex,
572 .binary = trace_fn_bin,
573};
574
575/* TRACE_CTX an TRACE_WAKE */
576static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
577 char *delim)
578{
579 struct ctx_switch_entry *field;
580 char *comm;
581 int S, T;
582
583 trace_assign_type(field, iter->ent);
584
585 T = task_state_char(field->next_state);
586 S = task_state_char(field->prev_state);
587 comm = trace_find_cmdline(field->next_pid);
588 if (!trace_seq_printf(&iter->seq,
589 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
590 field->prev_pid,
591 field->prev_prio,
592 S, delim,
593 field->next_cpu,
594 field->next_pid,
595 field->next_prio,
596 T, comm))
597 return TRACE_TYPE_PARTIAL_LINE;
598
599 return TRACE_TYPE_HANDLED;
600}
601
602static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
603{
604 return trace_ctxwake_print(iter, "==>");
605}
606
607static enum print_line_t trace_wake_print(struct trace_iterator *iter,
608 int flags)
609{
610 return trace_ctxwake_print(iter, " +");
611}
612
613static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
614{
615 struct ctx_switch_entry *field;
616 int T;
617
618 trace_assign_type(field, iter->ent);
619
620 if (!S)
621 task_state_char(field->prev_state);
622 T = task_state_char(field->next_state);
623 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
624 field->prev_pid,
625 field->prev_prio,
626 S,
627 field->next_cpu,
628 field->next_pid,
629 field->next_prio,
630 T))
631 return TRACE_TYPE_PARTIAL_LINE;
632
633 return TRACE_TYPE_HANDLED;
634}
635
636static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
637{
638 return trace_ctxwake_raw(iter, 0);
639}
640
641static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
642{
643 return trace_ctxwake_raw(iter, '+');
644}
645
646
647static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
648{
649 struct ctx_switch_entry *field;
650 struct trace_seq *s = &iter->seq;
651 int T;
652
653 trace_assign_type(field, iter->ent);
654
655 if (!S)
656 task_state_char(field->prev_state);
657 T = task_state_char(field->next_state);
658
659 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
660 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
661 SEQ_PUT_HEX_FIELD_RET(s, S);
662 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
663 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
664 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
665 SEQ_PUT_HEX_FIELD_RET(s, T);
666
667 return TRACE_TYPE_HANDLED;
668}
669
670static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
671{
672 return trace_ctxwake_hex(iter, 0);
673}
674
675static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
676{
677 return trace_ctxwake_hex(iter, '+');
678}
679
680static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
681 int flags)
682{
683 struct ctx_switch_entry *field;
684 struct trace_seq *s = &iter->seq;
685
686 trace_assign_type(field, iter->ent);
687
688 SEQ_PUT_FIELD_RET(s, field->prev_pid);
689 SEQ_PUT_FIELD_RET(s, field->prev_prio);
690 SEQ_PUT_FIELD_RET(s, field->prev_state);
691 SEQ_PUT_FIELD_RET(s, field->next_pid);
692 SEQ_PUT_FIELD_RET(s, field->next_prio);
693 SEQ_PUT_FIELD_RET(s, field->next_state);
694
695 return TRACE_TYPE_HANDLED;
696}
697
698static struct trace_event trace_ctx_event = {
699 .type = TRACE_CTX,
700 .trace = trace_ctx_print,
701 .raw = trace_ctx_raw,
702 .hex = trace_ctx_hex,
703 .binary = trace_ctxwake_bin,
704};
705
706static struct trace_event trace_wake_event = {
707 .type = TRACE_WAKE,
708 .trace = trace_wake_print,
709 .raw = trace_wake_raw,
710 .hex = trace_wake_hex,
711 .binary = trace_ctxwake_bin,
712};
713
714/* TRACE_SPECIAL */
715static enum print_line_t trace_special_print(struct trace_iterator *iter,
716 int flags)
717{
718 struct special_entry *field;
719
720 trace_assign_type(field, iter->ent);
721
722 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
723 field->arg1,
724 field->arg2,
725 field->arg3))
726 return TRACE_TYPE_PARTIAL_LINE;
727
728 return TRACE_TYPE_HANDLED;
729}
730
731static enum print_line_t trace_special_hex(struct trace_iterator *iter,
732 int flags)
733{
734 struct special_entry *field;
735 struct trace_seq *s = &iter->seq;
736
737 trace_assign_type(field, iter->ent);
738
739 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
740 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
741 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
742
743 return TRACE_TYPE_HANDLED;
744}
745
746static enum print_line_t trace_special_bin(struct trace_iterator *iter,
747 int flags)
748{
749 struct special_entry *field;
750 struct trace_seq *s = &iter->seq;
751
752 trace_assign_type(field, iter->ent);
753
754 SEQ_PUT_FIELD_RET(s, field->arg1);
755 SEQ_PUT_FIELD_RET(s, field->arg2);
756 SEQ_PUT_FIELD_RET(s, field->arg3);
757
758 return TRACE_TYPE_HANDLED;
759}
760
761static struct trace_event trace_special_event = {
762 .type = TRACE_SPECIAL,
763 .trace = trace_special_print,
764 .raw = trace_special_print,
765 .hex = trace_special_hex,
766 .binary = trace_special_bin,
767};
768
769/* TRACE_STACK */
770
771static enum print_line_t trace_stack_print(struct trace_iterator *iter,
772 int flags)
773{
774 struct stack_entry *field;
775 struct trace_seq *s = &iter->seq;
776 int i;
777
778 trace_assign_type(field, iter->ent);
779
780 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
781 if (i) {
782 if (!trace_seq_puts(s, " <= "))
783 goto partial;
784
785 if (!seq_print_ip_sym(s, field->caller[i], flags))
786 goto partial;
787 }
788 if (!trace_seq_puts(s, "\n"))
789 goto partial;
790 }
791
792 return TRACE_TYPE_HANDLED;
793
794 partial:
795 return TRACE_TYPE_PARTIAL_LINE;
796}
797
798static struct trace_event trace_stack_event = {
799 .type = TRACE_STACK,
800 .trace = trace_stack_print,
801 .raw = trace_special_print,
802 .hex = trace_special_hex,
803 .binary = trace_special_bin,
804};
805
806/* TRACE_USER_STACK */
807static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
808 int flags)
809{
810 struct userstack_entry *field;
811 struct trace_seq *s = &iter->seq;
812
813 trace_assign_type(field, iter->ent);
814
815 if (!seq_print_userip_objs(field, s, flags))
816 goto partial;
817
818 if (!trace_seq_putc(s, '\n'))
819 goto partial;
820
821 return TRACE_TYPE_HANDLED;
822
823 partial:
824 return TRACE_TYPE_PARTIAL_LINE;
825}
826
827static struct trace_event trace_user_stack_event = {
828 .type = TRACE_USER_STACK,
829 .trace = trace_user_stack_print,
830 .raw = trace_special_print,
831 .hex = trace_special_hex,
832 .binary = trace_special_bin,
833};
834
835/* TRACE_PRINT */
836static enum print_line_t
837trace_print_print(struct trace_iterator *iter, int flags)
838{
839 struct trace_entry *entry = iter->ent;
840 struct trace_seq *s = &iter->seq;
841 struct print_entry *field;
842
843 trace_assign_type(field, entry);
844
845 if (!seq_print_ip_sym(s, field->ip, flags))
846 goto partial;
847
848 if (!trace_seq_puts(s, ": "))
849 goto partial;
850
851 if (!trace_seq_bprintf(s, field->fmt, field->buf))
852 goto partial;
853
854 return TRACE_TYPE_HANDLED;
855
856 partial:
857 return TRACE_TYPE_PARTIAL_LINE;
858}
859
860
861static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
862{
863 struct print_entry *field;
864 struct trace_seq *s = &iter->seq;
865
866 trace_assign_type(field, iter->ent);
867
868 if (!trace_seq_printf(s, ": %lx : ", field->ip))
869 goto partial;
870
871 if (!trace_seq_bprintf(s, field->fmt, field->buf))
872 goto partial;
873
874 return TRACE_TYPE_HANDLED;
875
876 partial:
877 return TRACE_TYPE_PARTIAL_LINE;
878}
879
880
881static struct trace_event trace_print_event = {
882 .type = TRACE_PRINT,
883 .trace = trace_print_print,
884 .raw = trace_print_raw,
885};
886
887static struct trace_event *events[] __initdata = {
888 &trace_fn_event,
889 &trace_ctx_event,
890 &trace_wake_event,
891 &trace_special_event,
892 &trace_stack_event,
893 &trace_user_stack_event,
894 &trace_print_event,
895 NULL
896};
897
898__init static int init_events(void)
899{
900 struct trace_event *event;
901 int i, ret;
902
903 for (i = 0; events[i]; i++) {
904 event = events[i];
905
906 ret = register_ftrace_event(event);
907 if (!ret) {
908 printk(KERN_WARNING "event %d failed to register\n",
909 event->type);
910 WARN_ON_ONCE(1);
911 }
912 }
913
914 return 0;
915}
916device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 000000000000..3b90e6ade1aa
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,63 @@
1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H
3
4#include "trace.h"
5
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func raw;
14 trace_print_func hex;
15 trace_print_func binary;
16};
17
18extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3)));
20extern int
21trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
22extern int
23seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
24 unsigned long sym_flags);
25extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
26 size_t cnt);
27int trace_seq_puts(struct trace_seq *s, const char *str);
28int trace_seq_putc(struct trace_seq *s, unsigned char c);
29int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
30int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
31int trace_seq_path(struct trace_seq *s, struct path *path);
32int seq_print_userip_objs(const struct userstack_entry *entry,
33 struct trace_seq *s, unsigned long sym_flags);
34int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
35 unsigned long ip, unsigned long sym_flags);
36
37int trace_print_context(struct trace_iterator *iter);
38int trace_print_lat_context(struct trace_iterator *iter);
39
40struct trace_event *ftrace_find_event(int type);
41int register_ftrace_event(struct trace_event *event);
42int unregister_ftrace_event(struct trace_event *event);
43
44enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
45
46#define MAX_MEMHEX_BYTES 8
47#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
48
49#define SEQ_PUT_FIELD_RET(s, x) \
50do { \
51 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
52 return TRACE_TYPE_PARTIAL_LINE; \
53} while (0)
54
55#define SEQ_PUT_HEX_FIELD_RET(s, x) \
56do { \
57 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
58 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
59 return TRACE_TYPE_PARTIAL_LINE; \
60} while (0)
61
62#endif
63
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248daf55..91ce672fb037 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -11,24 +11,126 @@
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/ftrace.h> 14#include <trace/power.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/module.h> 16#include <linux/module.h>
17 17
18#include "trace.h" 18#include "trace.h"
19#include "trace_output.h"
19 20
20static struct trace_array *power_trace; 21static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled; 22static int __read_mostly trace_power_enabled;
22 23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ring_buffer_event *event;
40 struct trace_power *entry;
41 struct trace_array_cpu *data;
42 struct trace_array *tr = power_trace;
43
44 if (!trace_power_enabled)
45 return;
46
47 preempt_disable();
48 it->end = ktime_get();
49 data = tr->data[smp_processor_id()];
50
51 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
52 sizeof(*entry), 0, 0);
53 if (!event)
54 goto out;
55 entry = ring_buffer_event_data(event);
56 entry->state_data = *it;
57 trace_buffer_unlock_commit(tr, event, 0, 0);
58 out:
59 preempt_enable();
60}
61
62static void probe_power_mark(struct power_trace *it, unsigned int type,
63 unsigned int level)
64{
65 struct ring_buffer_event *event;
66 struct trace_power *entry;
67 struct trace_array_cpu *data;
68 struct trace_array *tr = power_trace;
69
70 if (!trace_power_enabled)
71 return;
72
73 memset(it, 0, sizeof(struct power_trace));
74 it->state = level;
75 it->type = type;
76 it->stamp = ktime_get();
77 preempt_disable();
78 it->end = it->stamp;
79 data = tr->data[smp_processor_id()];
80
81 event = trace_buffer_lock_reserve(tr, TRACE_POWER,
82 sizeof(*entry), 0, 0);
83 if (!event)
84 goto out;
85 entry = ring_buffer_event_data(event);
86 entry->state_data = *it;
87 trace_buffer_unlock_commit(tr, event, 0, 0);
88 out:
89 preempt_enable();
90}
91
92static int tracing_power_register(void)
93{
94 int ret;
95
96 ret = register_trace_power_start(probe_power_start);
97 if (ret) {
98 pr_info("power trace: Couldn't activate tracepoint"
99 " probe to trace_power_start\n");
100 return ret;
101 }
102 ret = register_trace_power_end(probe_power_end);
103 if (ret) {
104 pr_info("power trace: Couldn't activate tracepoint"
105 " probe to trace_power_end\n");
106 goto fail_start;
107 }
108 ret = register_trace_power_mark(probe_power_mark);
109 if (ret) {
110 pr_info("power trace: Couldn't activate tracepoint"
111 " probe to trace_power_mark\n");
112 goto fail_end;
113 }
114 return ret;
115fail_end:
116 unregister_trace_power_end(probe_power_end);
117fail_start:
118 unregister_trace_power_start(probe_power_start);
119 return ret;
120}
23 121
24static void start_power_trace(struct trace_array *tr) 122static void start_power_trace(struct trace_array *tr)
25{ 123{
26 trace_power_enabled = 1; 124 trace_power_enabled = 1;
125 tracing_power_register();
27} 126}
28 127
29static void stop_power_trace(struct trace_array *tr) 128static void stop_power_trace(struct trace_array *tr)
30{ 129{
31 trace_power_enabled = 0; 130 trace_power_enabled = 0;
131 unregister_trace_power_start(probe_power_start);
132 unregister_trace_power_end(probe_power_end);
133 unregister_trace_power_mark(probe_power_mark);
32} 134}
33 135
34 136
@@ -38,6 +140,7 @@ static int power_trace_init(struct trace_array *tr)
38 power_trace = tr; 140 power_trace = tr;
39 141
40 trace_power_enabled = 1; 142 trace_power_enabled = 1;
143 tracing_power_register();
41 144
42 for_each_cpu(cpu, cpu_possible_mask) 145 for_each_cpu(cpu, cpu_possible_mask)
43 tracing_reset(tr, cpu); 146 tracing_reset(tr, cpu);
@@ -94,86 +197,3 @@ static int init_power_trace(void)
94 return register_tracer(&power_tracer); 197 return register_tracer(&power_tracer);
95} 198}
96device_initcall(init_power_trace); 199device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
new file mode 100644
index 000000000000..a50aea22e929
--- /dev/null
+++ b/kernel/trace/trace_printk.c
@@ -0,0 +1,138 @@
1/*
2 * trace binary printk
3 *
4 * Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
5 *
6 */
7#include <linux/kernel.h>
8#include <linux/ftrace.h>
9#include <linux/string.h>
10#include <linux/ctype.h>
11#include <linux/list.h>
12#include <linux/mutex.h>
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <linux/seq_file.h>
16#include <linux/fs.h>
17#include <linux/marker.h>
18#include <linux/uaccess.h>
19
20#include "trace.h"
21
22#ifdef CONFIG_MODULES
23
24/*
25 * modules trace_printk()'s formats are autosaved in struct trace_bprintk_fmt
26 * which are queued on trace_bprintk_fmt_list.
27 */
28static LIST_HEAD(trace_bprintk_fmt_list);
29
30/* serialize accesses to trace_bprintk_fmt_list */
31static DEFINE_MUTEX(btrace_mutex);
32
33struct trace_bprintk_fmt {
34 struct list_head list;
35 char fmt[0];
36};
37
38static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
39{
40 struct trace_bprintk_fmt *pos;
41 list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
42 if (!strcmp(pos->fmt, fmt))
43 return pos;
44 }
45 return NULL;
46}
47
48static
49void hold_module_trace_bprintk_format(const char **start, const char **end)
50{
51 const char **iter;
52
53 mutex_lock(&btrace_mutex);
54 for (iter = start; iter < end; iter++) {
55 struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
56 if (tb_fmt) {
57 *iter = tb_fmt->fmt;
58 continue;
59 }
60
61 tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
62 + strlen(*iter) + 1, GFP_KERNEL);
63 if (tb_fmt) {
64 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
65 strcpy(tb_fmt->fmt, *iter);
66 *iter = tb_fmt->fmt;
67 } else
68 *iter = NULL;
69 }
70 mutex_unlock(&btrace_mutex);
71}
72
73static int module_trace_bprintk_format_notify(struct notifier_block *self,
74 unsigned long val, void *data)
75{
76 struct module *mod = data;
77 if (mod->num_trace_bprintk_fmt) {
78 const char **start = mod->trace_bprintk_fmt_start;
79 const char **end = start + mod->num_trace_bprintk_fmt;
80
81 if (val == MODULE_STATE_COMING)
82 hold_module_trace_bprintk_format(start, end);
83 }
84 return 0;
85}
86
87#else /* !CONFIG_MODULES */
88__init static int
89module_trace_bprintk_format_notify(struct notifier_block *self,
90 unsigned long val, void *data)
91{
92 return 0;
93}
94#endif /* CONFIG_MODULES */
95
96
97__initdata_or_module static
98struct notifier_block module_trace_bprintk_format_nb = {
99 .notifier_call = module_trace_bprintk_format_notify,
100};
101
102int __trace_printk(unsigned long ip, const char *fmt, ...)
103 {
104 int ret;
105 va_list ap;
106
107 if (unlikely(!fmt))
108 return 0;
109
110 if (!(trace_flags & TRACE_ITER_PRINTK))
111 return 0;
112
113 va_start(ap, fmt);
114 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
115 va_end(ap);
116 return ret;
117}
118EXPORT_SYMBOL_GPL(__trace_printk);
119
120int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
121 {
122 if (unlikely(!fmt))
123 return 0;
124
125 if (!(trace_flags & TRACE_ITER_PRINTK))
126 return 0;
127
128 return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
129}
130EXPORT_SYMBOL_GPL(__ftrace_vprintk);
131
132
133static __init int init_trace_printk(void)
134{
135 return register_module_notifier(&module_trace_bprintk_format_nb);
136}
137
138early_initcall(init_trace_printk);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index df175cb4564f..77132c2cf3d9 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -43,7 +43,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
43 data = ctx_trace->data[cpu]; 43 data = ctx_trace->data[cpu];
44 44
45 if (likely(!atomic_read(&data->disabled))) 45 if (likely(!atomic_read(&data->disabled)))
46 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); 46 tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
47 47
48 local_irq_restore(flags); 48 local_irq_restore(flags);
49} 49}
@@ -66,7 +66,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
66 data = ctx_trace->data[cpu]; 66 data = ctx_trace->data[cpu];
67 67
68 if (likely(!atomic_read(&data->disabled))) 68 if (likely(!atomic_read(&data->disabled)))
69 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, 69 tracing_sched_wakeup_trace(ctx_trace, wakee, current,
70 flags, pc); 70 flags, pc);
71 71
72 local_irq_restore(flags); 72 local_irq_restore(flags);
@@ -93,7 +93,7 @@ static int tracing_sched_register(void)
93 ret = register_trace_sched_switch(probe_sched_switch); 93 ret = register_trace_sched_switch(probe_sched_switch);
94 if (ret) { 94 if (ret) {
95 pr_info("sched trace: Couldn't activate tracepoint" 95 pr_info("sched trace: Couldn't activate tracepoint"
96 " probe to kernel_sched_schedule\n"); 96 " probe to kernel_sched_switch\n");
97 goto fail_deprobe_wake_new; 97 goto fail_deprobe_wake_new;
98 } 98 }
99 99
@@ -185,12 +185,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
185 ctx_trace = tr; 185 ctx_trace = tr;
186} 186}
187 187
188static void start_sched_trace(struct trace_array *tr)
189{
190 tracing_reset_online_cpus(tr);
191 tracing_start_sched_switch_record();
192}
193
194static void stop_sched_trace(struct trace_array *tr) 188static void stop_sched_trace(struct trace_array *tr)
195{ 189{
196 tracing_stop_sched_switch_record(); 190 tracing_stop_sched_switch_record();
@@ -199,7 +193,7 @@ static void stop_sched_trace(struct trace_array *tr)
199static int sched_switch_trace_init(struct trace_array *tr) 193static int sched_switch_trace_init(struct trace_array *tr)
200{ 194{
201 ctx_trace = tr; 195 ctx_trace = tr;
202 start_sched_trace(tr); 196 tracing_start_sched_switch_record();
203 return 0; 197 return 0;
204} 198}
205 199
@@ -227,6 +221,7 @@ static struct tracer sched_switch_trace __read_mostly =
227 .reset = sched_switch_trace_reset, 221 .reset = sched_switch_trace_reset,
228 .start = sched_switch_trace_start, 222 .start = sched_switch_trace_start,
229 .stop = sched_switch_trace_stop, 223 .stop = sched_switch_trace_stop,
224 .wait_pipe = poll_wait_pipe,
230#ifdef CONFIG_FTRACE_SELFTEST 225#ifdef CONFIG_FTRACE_SELFTEST
231 .selftest = trace_selftest_startup_sched_switch, 226 .selftest = trace_selftest_startup_sched_switch,
232#endif 227#endif
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 42ae1e77b6b3..3c5ad6b2ec84 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -25,12 +25,15 @@ static int __read_mostly tracer_enabled;
25static struct task_struct *wakeup_task; 25static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static unsigned wakeup_prio = -1; 27static unsigned wakeup_prio = -1;
28static int wakeup_rt;
28 29
29static raw_spinlock_t wakeup_lock = 30static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31 32
32static void __wakeup_reset(struct trace_array *tr); 33static void __wakeup_reset(struct trace_array *tr);
33 34
35static int save_lat_flag;
36
34#ifdef CONFIG_FUNCTION_TRACER 37#ifdef CONFIG_FUNCTION_TRACER
35/* 38/*
36 * irqsoff uses its own tracer function to keep the overhead down: 39 * irqsoff uses its own tracer function to keep the overhead down:
@@ -71,7 +74,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
71 if (task_cpu(wakeup_task) != cpu) 74 if (task_cpu(wakeup_task) != cpu)
72 goto unlock; 75 goto unlock;
73 76
74 trace_function(tr, data, ip, parent_ip, flags, pc); 77 trace_function(tr, ip, parent_ip, flags, pc);
75 78
76 unlock: 79 unlock:
77 __raw_spin_unlock(&wakeup_lock); 80 __raw_spin_unlock(&wakeup_lock);
@@ -151,7 +154,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
151 if (unlikely(!tracer_enabled || next != wakeup_task)) 154 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock; 155 goto out_unlock;
153 156
154 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
155 159
156 /* 160 /*
157 * usecs conversion is slow so we try to delay the conversion 161 * usecs conversion is slow so we try to delay the conversion
@@ -182,13 +186,10 @@ out:
182 186
183static void __wakeup_reset(struct trace_array *tr) 187static void __wakeup_reset(struct trace_array *tr)
184{ 188{
185 struct trace_array_cpu *data;
186 int cpu; 189 int cpu;
187 190
188 for_each_possible_cpu(cpu) { 191 for_each_possible_cpu(cpu)
189 data = tr->data[cpu];
190 tracing_reset(tr, cpu); 192 tracing_reset(tr, cpu);
191 }
192 193
193 wakeup_cpu = -1; 194 wakeup_cpu = -1;
194 wakeup_prio = -1; 195 wakeup_prio = -1;
@@ -213,6 +214,7 @@ static void wakeup_reset(struct trace_array *tr)
213static void 214static void
214probe_wakeup(struct rq *rq, struct task_struct *p, int success) 215probe_wakeup(struct rq *rq, struct task_struct *p, int success)
215{ 216{
217 struct trace_array_cpu *data;
216 int cpu = smp_processor_id(); 218 int cpu = smp_processor_id();
217 unsigned long flags; 219 unsigned long flags;
218 long disabled; 220 long disabled;
@@ -224,7 +226,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
224 tracing_record_cmdline(p); 226 tracing_record_cmdline(p);
225 tracing_record_cmdline(current); 227 tracing_record_cmdline(current);
226 228
227 if (likely(!rt_task(p)) || 229 if ((wakeup_rt && !rt_task(p)) ||
228 p->prio >= wakeup_prio || 230 p->prio >= wakeup_prio ||
229 p->prio >= current->prio) 231 p->prio >= current->prio)
230 return; 232 return;
@@ -252,9 +254,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
252 254
253 local_save_flags(flags); 255 local_save_flags(flags);
254 256
255 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 257 data = wakeup_trace->data[wakeup_cpu];
256 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], 258 data->preempt_timestamp = ftrace_now(cpu);
257 CALLER_ADDR1, CALLER_ADDR2, flags, pc); 259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
260 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
258 261
259out_locked: 262out_locked:
260 __raw_spin_unlock(&wakeup_lock); 263 __raw_spin_unlock(&wakeup_lock);
@@ -262,12 +265,6 @@ out:
262 atomic_dec(&wakeup_trace->data[cpu]->disabled); 265 atomic_dec(&wakeup_trace->data[cpu]->disabled);
263} 266}
264 267
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
271static void start_wakeup_tracer(struct trace_array *tr) 268static void start_wakeup_tracer(struct trace_array *tr)
272{ 269{
273 int ret; 270 int ret;
@@ -289,7 +286,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
289 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 286 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
290 if (ret) { 287 if (ret) {
291 pr_info("sched trace: Couldn't activate tracepoint" 288 pr_info("sched trace: Couldn't activate tracepoint"
292 " probe to kernel_sched_schedule\n"); 289 " probe to kernel_sched_switch\n");
293 goto fail_deprobe_wake_new; 290 goto fail_deprobe_wake_new;
294 } 291 }
295 292
@@ -306,13 +303,10 @@ static void start_wakeup_tracer(struct trace_array *tr)
306 303
307 register_ftrace_function(&trace_ops); 304 register_ftrace_function(&trace_ops);
308 305
309 if (tracing_is_enabled()) { 306 if (tracing_is_enabled())
310 tracer_enabled = 1; 307 tracer_enabled = 1;
311 save_tracer_enabled = 1; 308 else
312 } else {
313 tracer_enabled = 0; 309 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
316 310
317 return; 311 return;
318fail_deprobe_wake_new: 312fail_deprobe_wake_new:
@@ -324,54 +318,54 @@ fail_deprobe:
324static void stop_wakeup_tracer(struct trace_array *tr) 318static void stop_wakeup_tracer(struct trace_array *tr)
325{ 319{
326 tracer_enabled = 0; 320 tracer_enabled = 0;
327 save_tracer_enabled = 0;
328 unregister_ftrace_function(&trace_ops); 321 unregister_ftrace_function(&trace_ops);
329 unregister_trace_sched_switch(probe_wakeup_sched_switch); 322 unregister_trace_sched_switch(probe_wakeup_sched_switch);
330 unregister_trace_sched_wakeup_new(probe_wakeup); 323 unregister_trace_sched_wakeup_new(probe_wakeup);
331 unregister_trace_sched_wakeup(probe_wakeup); 324 unregister_trace_sched_wakeup(probe_wakeup);
332} 325}
333 326
334static int wakeup_tracer_init(struct trace_array *tr) 327static int __wakeup_tracer_init(struct trace_array *tr)
335{ 328{
329 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
330 trace_flags |= TRACE_ITER_LATENCY_FMT;
331
336 tracing_max_latency = 0; 332 tracing_max_latency = 0;
337 wakeup_trace = tr; 333 wakeup_trace = tr;
338 start_wakeup_tracer(tr); 334 start_wakeup_tracer(tr);
339 return 0; 335 return 0;
340} 336}
341 337
338static int wakeup_tracer_init(struct trace_array *tr)
339{
340 wakeup_rt = 0;
341 return __wakeup_tracer_init(tr);
342}
343
344static int wakeup_rt_tracer_init(struct trace_array *tr)
345{
346 wakeup_rt = 1;
347 return __wakeup_tracer_init(tr);
348}
349
342static void wakeup_tracer_reset(struct trace_array *tr) 350static void wakeup_tracer_reset(struct trace_array *tr)
343{ 351{
344 stop_wakeup_tracer(tr); 352 stop_wakeup_tracer(tr);
345 /* make sure we put back any tasks we are tracing */ 353 /* make sure we put back any tasks we are tracing */
346 wakeup_reset(tr); 354 wakeup_reset(tr);
355
356 if (!save_lat_flag)
357 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
347} 358}
348 359
349static void wakeup_tracer_start(struct trace_array *tr) 360static void wakeup_tracer_start(struct trace_array *tr)
350{ 361{
351 wakeup_reset(tr); 362 wakeup_reset(tr);
352 tracer_enabled = 1; 363 tracer_enabled = 1;
353 save_tracer_enabled = 1;
354} 364}
355 365
356static void wakeup_tracer_stop(struct trace_array *tr) 366static void wakeup_tracer_stop(struct trace_array *tr)
357{ 367{
358 tracer_enabled = 0; 368 tracer_enabled = 0;
359 save_tracer_enabled = 0;
360}
361
362static void wakeup_tracer_open(struct trace_iterator *iter)
363{
364 /* stop the trace while dumping */
365 tracer_enabled = 0;
366}
367
368static void wakeup_tracer_close(struct trace_iterator *iter)
369{
370 /* forget about any processes we were recording */
371 if (save_tracer_enabled) {
372 wakeup_reset(iter->tr);
373 tracer_enabled = 1;
374 }
375} 369}
376 370
377static struct tracer wakeup_tracer __read_mostly = 371static struct tracer wakeup_tracer __read_mostly =
@@ -381,8 +375,20 @@ static struct tracer wakeup_tracer __read_mostly =
381 .reset = wakeup_tracer_reset, 375 .reset = wakeup_tracer_reset,
382 .start = wakeup_tracer_start, 376 .start = wakeup_tracer_start,
383 .stop = wakeup_tracer_stop, 377 .stop = wakeup_tracer_stop,
384 .open = wakeup_tracer_open, 378 .print_max = 1,
385 .close = wakeup_tracer_close, 379#ifdef CONFIG_FTRACE_SELFTEST
380 .selftest = trace_selftest_startup_wakeup,
381#endif
382};
383
384static struct tracer wakeup_rt_tracer __read_mostly =
385{
386 .name = "wakeup_rt",
387 .init = wakeup_rt_tracer_init,
388 .reset = wakeup_tracer_reset,
389 .start = wakeup_tracer_start,
390 .stop = wakeup_tracer_stop,
391 .wait_pipe = poll_wait_pipe,
386 .print_max = 1, 392 .print_max = 1,
387#ifdef CONFIG_FTRACE_SELFTEST 393#ifdef CONFIG_FTRACE_SELFTEST
388 .selftest = trace_selftest_startup_wakeup, 394 .selftest = trace_selftest_startup_wakeup,
@@ -397,6 +403,10 @@ __init static int init_wakeup_tracer(void)
397 if (ret) 403 if (ret)
398 return ret; 404 return ret;
399 405
406 ret = register_tracer(&wakeup_rt_tracer);
407 if (ret)
408 return ret;
409
400 return 0; 410 return 0;
401} 411}
402device_initcall(init_wakeup_tracer); 412device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index bc8e80a86bca..f907a2b29028 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,5 +1,6 @@
1/* Include in trace.c */ 1/* Include in trace.c */
2 2
3#include <linux/stringify.h>
3#include <linux/kthread.h> 4#include <linux/kthread.h>
4#include <linux/delay.h> 5#include <linux/delay.h>
5 6
@@ -9,11 +10,12 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 10 case TRACE_FN:
10 case TRACE_CTX: 11 case TRACE_CTX:
11 case TRACE_WAKE: 12 case TRACE_WAKE:
12 case TRACE_CONT:
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH: 16 case TRACE_BRANCH:
17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET:
17 return 1; 19 return 1;
18 } 20 }
19 return 0; 21 return 0;
@@ -99,9 +101,6 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
99 101
100#ifdef CONFIG_DYNAMIC_FTRACE 102#ifdef CONFIG_DYNAMIC_FTRACE
101 103
102#define __STR(x) #x
103#define STR(x) __STR(x)
104
105/* Test dynamic code modification and ftrace filters */ 104/* Test dynamic code modification and ftrace filters */
106int trace_selftest_startup_dynamic_tracing(struct tracer *trace, 105int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
107 struct trace_array *tr, 106 struct trace_array *tr,
@@ -125,17 +124,17 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
125 func(); 124 func();
126 125
127 /* 126 /*
128 * Some archs *cough*PowerPC*cough* add charachters to the 127 * Some archs *cough*PowerPC*cough* add characters to the
129 * start of the function names. We simply put a '*' to 128 * start of the function names. We simply put a '*' to
130 * accomodate them. 129 * accommodate them.
131 */ 130 */
132 func_name = "*" STR(DYN_FTRACE_TEST_NAME); 131 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
133 132
134 /* filter only on our function */ 133 /* filter only on our function */
135 ftrace_set_filter(func_name, strlen(func_name), 1); 134 ftrace_set_filter(func_name, strlen(func_name), 1);
136 135
137 /* enable tracing */ 136 /* enable tracing */
138 ret = trace->init(tr); 137 ret = tracer_init(trace, tr);
139 if (ret) { 138 if (ret) {
140 warn_failed_init_tracer(trace, ret); 139 warn_failed_init_tracer(trace, ret);
141 goto out; 140 goto out;
@@ -209,7 +208,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
209 ftrace_enabled = 1; 208 ftrace_enabled = 1;
210 tracer_enabled = 1; 209 tracer_enabled = 1;
211 210
212 ret = trace->init(tr); 211 ret = tracer_init(trace, tr);
213 if (ret) { 212 if (ret) {
214 warn_failed_init_tracer(trace, ret); 213 warn_failed_init_tracer(trace, ret);
215 goto out; 214 goto out;
@@ -247,6 +246,54 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
247} 246}
248#endif /* CONFIG_FUNCTION_TRACER */ 247#endif /* CONFIG_FUNCTION_TRACER */
249 248
249
250#ifdef CONFIG_FUNCTION_GRAPH_TRACER
251/*
252 * Pretty much the same than for the function tracer from which the selftest
253 * has been borrowed.
254 */
255int
256trace_selftest_startup_function_graph(struct tracer *trace,
257 struct trace_array *tr)
258{
259 int ret;
260 unsigned long count;
261
262 ret = tracer_init(trace, tr);
263 if (ret) {
264 warn_failed_init_tracer(trace, ret);
265 goto out;
266 }
267
268 /* Sleep for a 1/10 of a second */
269 msleep(100);
270
271 tracing_stop();
272
273 /* check the trace buffer */
274 ret = trace_test_buffer(tr, &count);
275
276 trace->reset(tr);
277 tracing_start();
278
279 if (!ret && !count) {
280 printk(KERN_CONT ".. no entries found ..");
281 ret = -1;
282 goto out;
283 }
284
285 /* Don't test dynamic tracing, the function tracer already did */
286
287out:
288 /* Stop it if we failed */
289 if (ret)
290 ftrace_graph_stop();
291
292 return ret;
293}
294#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
295
296
250#ifdef CONFIG_IRQSOFF_TRACER 297#ifdef CONFIG_IRQSOFF_TRACER
251int 298int
252trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) 299trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
@@ -256,7 +303,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
256 int ret; 303 int ret;
257 304
258 /* start the tracing */ 305 /* start the tracing */
259 ret = trace->init(tr); 306 ret = tracer_init(trace, tr);
260 if (ret) { 307 if (ret) {
261 warn_failed_init_tracer(trace, ret); 308 warn_failed_init_tracer(trace, ret);
262 return ret; 309 return ret;
@@ -310,7 +357,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
310 } 357 }
311 358
312 /* start the tracing */ 359 /* start the tracing */
313 ret = trace->init(tr); 360 ret = tracer_init(trace, tr);
314 if (ret) { 361 if (ret) {
315 warn_failed_init_tracer(trace, ret); 362 warn_failed_init_tracer(trace, ret);
316 return ret; 363 return ret;
@@ -364,7 +411,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
364 } 411 }
365 412
366 /* start the tracing */ 413 /* start the tracing */
367 ret = trace->init(tr); 414 ret = tracer_init(trace, tr);
368 if (ret) { 415 if (ret) {
369 warn_failed_init_tracer(trace, ret); 416 warn_failed_init_tracer(trace, ret);
370 goto out; 417 goto out;
@@ -496,7 +543,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
496 wait_for_completion(&isrt); 543 wait_for_completion(&isrt);
497 544
498 /* start the tracing */ 545 /* start the tracing */
499 ret = trace->init(tr); 546 ret = tracer_init(trace, tr);
500 if (ret) { 547 if (ret) {
501 warn_failed_init_tracer(trace, ret); 548 warn_failed_init_tracer(trace, ret);
502 return ret; 549 return ret;
@@ -557,7 +604,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
557 int ret; 604 int ret;
558 605
559 /* start the tracing */ 606 /* start the tracing */
560 ret = trace->init(tr); 607 ret = tracer_init(trace, tr);
561 if (ret) { 608 if (ret) {
562 warn_failed_init_tracer(trace, ret); 609 warn_failed_init_tracer(trace, ret);
563 return ret; 610 return ret;
@@ -589,10 +636,10 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
589 int ret; 636 int ret;
590 637
591 /* start the tracing */ 638 /* start the tracing */
592 ret = trace->init(tr); 639 ret = tracer_init(trace, tr);
593 if (ret) { 640 if (ret) {
594 warn_failed_init_tracer(trace, ret); 641 warn_failed_init_tracer(trace, ret);
595 return 0; 642 return ret;
596 } 643 }
597 644
598 /* Sleep for a 1/10 of a second */ 645 /* Sleep for a 1/10 of a second */
@@ -604,6 +651,11 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
604 trace->reset(tr); 651 trace->reset(tr);
605 tracing_start(); 652 tracing_start();
606 653
654 if (!ret && !count) {
655 printk(KERN_CONT ".. no entries found ..");
656 ret = -1;
657 }
658
607 return ret; 659 return ret;
608} 660}
609#endif /* CONFIG_SYSPROF_TRACER */ 661#endif /* CONFIG_SYSPROF_TRACER */
@@ -616,7 +668,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
616 int ret; 668 int ret;
617 669
618 /* start the tracing */ 670 /* start the tracing */
619 ret = trace->init(tr); 671 ret = tracer_init(trace, tr);
620 if (ret) { 672 if (ret) {
621 warn_failed_init_tracer(trace, ret); 673 warn_failed_init_tracer(trace, ret);
622 return ret; 674 return ret;
@@ -631,6 +683,11 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
631 trace->reset(tr); 683 trace->reset(tr);
632 tracing_start(); 684 tracing_start();
633 685
686 if (!ret && !count) {
687 printk(KERN_CONT ".. no entries found ..");
688 ret = -1;
689 }
690
634 return ret; 691 return ret;
635} 692}
636#endif /* CONFIG_BRANCH_TRACER */ 693#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 000000000000..39310e3434ee
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,319 @@
1/*
2 * Infrastructure for statistic tracing (histogram output).
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 * Based on the code from trace_branch.c which is
7 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
8 *
9 */
10
11
12#include <linux/list.h>
13#include <linux/debugfs.h>
14#include "trace_stat.h"
15#include "trace.h"
16
17
18/* List of stat entries from a tracer */
19struct trace_stat_list {
20 struct list_head list;
21 void *stat;
22};
23
24/* A stat session is the stats output in one file */
25struct tracer_stat_session {
26 struct list_head session_list;
27 struct tracer_stat *ts;
28 struct list_head stat_list;
29 struct mutex stat_mutex;
30 struct dentry *file;
31};
32
33/* All of the sessions currently in use. Each stat file embed one session */
34static LIST_HEAD(all_stat_sessions);
35static DEFINE_MUTEX(all_stat_sessions_mutex);
36
37/* The root directory for all stat files */
38static struct dentry *stat_dir;
39
40
41static void reset_stat_session(struct tracer_stat_session *session)
42{
43 struct trace_stat_list *node, *next;
44
45 list_for_each_entry_safe(node, next, &session->stat_list, list)
46 kfree(node);
47
48 INIT_LIST_HEAD(&session->stat_list);
49}
50
51static void destroy_session(struct tracer_stat_session *session)
52{
53 debugfs_remove(session->file);
54 reset_stat_session(session);
55 mutex_destroy(&session->stat_mutex);
56 kfree(session);
57}
58
59/*
60 * For tracers that don't provide a stat_cmp callback.
61 * This one will force an immediate insertion on tail of
62 * the list.
63 */
64static int dummy_cmp(void *p1, void *p2)
65{
66 return 1;
67}
68
69/*
70 * Initialize the stat list at each trace_stat file opening.
71 * All of these copies and sorting are required on all opening
72 * since the stats could have changed between two file sessions.
73 */
74static int stat_seq_init(struct tracer_stat_session *session)
75{
76 struct trace_stat_list *iter_entry, *new_entry;
77 struct tracer_stat *ts = session->ts;
78 void *prev_stat;
79 int ret = 0;
80 int i;
81
82 mutex_lock(&session->stat_mutex);
83 reset_stat_session(session);
84
85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp;
87
88 /*
89 * The first entry. Actually this is the second, but the first
90 * one (the stat_list head) is pointless.
91 */
92 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
93 if (!new_entry) {
94 ret = -ENOMEM;
95 goto exit;
96 }
97
98 INIT_LIST_HEAD(&new_entry->list);
99
100 list_add(&new_entry->list, &session->stat_list);
101
102 new_entry->stat = ts->stat_start();
103 prev_stat = new_entry->stat;
104
105 /*
106 * Iterate over the tracer stat entries and store them in a sorted
107 * list.
108 */
109 for (i = 1; ; i++) {
110 new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
111 if (!new_entry) {
112 ret = -ENOMEM;
113 goto exit_free_list;
114 }
115
116 INIT_LIST_HEAD(&new_entry->list);
117 new_entry->stat = ts->stat_next(prev_stat, i);
118
119 /* End of insertion */
120 if (!new_entry->stat)
121 break;
122
123 list_for_each_entry(iter_entry, &session->stat_list, list) {
124
125 /* Insertion with a descendent sorting */
126 if (ts->stat_cmp(new_entry->stat,
127 iter_entry->stat) > 0) {
128
129 list_add_tail(&new_entry->list,
130 &iter_entry->list);
131 break;
132
133 /* The current smaller value */
134 } else if (list_is_last(&iter_entry->list,
135 &session->stat_list)) {
136 list_add(&new_entry->list, &iter_entry->list);
137 break;
138 }
139 }
140
141 prev_stat = new_entry->stat;
142 }
143exit:
144 mutex_unlock(&session->stat_mutex);
145 return ret;
146
147exit_free_list:
148 reset_stat_session(session);
149 mutex_unlock(&session->stat_mutex);
150 return ret;
151}
152
153
154static void *stat_seq_start(struct seq_file *s, loff_t *pos)
155{
156 struct tracer_stat_session *session = s->private;
157
158 /* Prevent from tracer switch or stat_list modification */
159 mutex_lock(&session->stat_mutex);
160
161 /* If we are in the beginning of the file, print the headers */
162 if (!*pos && session->ts->stat_headers)
163 session->ts->stat_headers(s);
164
165 return seq_list_start(&session->stat_list, *pos);
166}
167
168static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
169{
170 struct tracer_stat_session *session = s->private;
171
172 return seq_list_next(p, &session->stat_list, pos);
173}
174
175static void stat_seq_stop(struct seq_file *s, void *p)
176{
177 struct tracer_stat_session *session = s->private;
178 mutex_unlock(&session->stat_mutex);
179}
180
181static int stat_seq_show(struct seq_file *s, void *v)
182{
183 struct tracer_stat_session *session = s->private;
184 struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
185
186 return session->ts->stat_show(s, l->stat);
187}
188
189static const struct seq_operations trace_stat_seq_ops = {
190 .start = stat_seq_start,
191 .next = stat_seq_next,
192 .stop = stat_seq_stop,
193 .show = stat_seq_show
194};
195
196/* The session stat is refilled and resorted at each stat file opening */
197static int tracing_stat_open(struct inode *inode, struct file *file)
198{
199 int ret;
200
201 struct tracer_stat_session *session = inode->i_private;
202
203 ret = seq_open(file, &trace_stat_seq_ops);
204 if (!ret) {
205 struct seq_file *m = file->private_data;
206 m->private = session;
207 ret = stat_seq_init(session);
208 }
209
210 return ret;
211}
212
213/*
214 * Avoid consuming memory with our now useless list.
215 */
216static int tracing_stat_release(struct inode *i, struct file *f)
217{
218 struct tracer_stat_session *session = i->i_private;
219
220 mutex_lock(&session->stat_mutex);
221 reset_stat_session(session);
222 mutex_unlock(&session->stat_mutex);
223
224 return 0;
225}
226
227static const struct file_operations tracing_stat_fops = {
228 .open = tracing_stat_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = tracing_stat_release
232};
233
234static int tracing_stat_init(void)
235{
236 struct dentry *d_tracing;
237
238 d_tracing = tracing_init_dentry();
239
240 stat_dir = debugfs_create_dir("trace_stat", d_tracing);
241 if (!stat_dir)
242 pr_warning("Could not create debugfs "
243 "'trace_stat' entry\n");
244 return 0;
245}
246
247static int init_stat_file(struct tracer_stat_session *session)
248{
249 if (!stat_dir && tracing_stat_init())
250 return -ENODEV;
251
252 session->file = debugfs_create_file(session->ts->name, 0644,
253 stat_dir,
254 session, &tracing_stat_fops);
255 if (!session->file)
256 return -ENOMEM;
257 return 0;
258}
259
260int register_stat_tracer(struct tracer_stat *trace)
261{
262 struct tracer_stat_session *session, *node, *tmp;
263 int ret;
264
265 if (!trace)
266 return -EINVAL;
267
268 if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
269 return -EINVAL;
270
271 /* Already registered? */
272 mutex_lock(&all_stat_sessions_mutex);
273 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
274 if (node->ts == trace) {
275 mutex_unlock(&all_stat_sessions_mutex);
276 return -EINVAL;
277 }
278 }
279 mutex_unlock(&all_stat_sessions_mutex);
280
281 /* Init the session */
282 session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
283 if (!session)
284 return -ENOMEM;
285
286 session->ts = trace;
287 INIT_LIST_HEAD(&session->session_list);
288 INIT_LIST_HEAD(&session->stat_list);
289 mutex_init(&session->stat_mutex);
290 session->file = NULL;
291
292 ret = init_stat_file(session);
293 if (ret) {
294 destroy_session(session);
295 return ret;
296 }
297
298 /* Register */
299 mutex_lock(&all_stat_sessions_mutex);
300 list_add_tail(&session->session_list, &all_stat_sessions);
301 mutex_unlock(&all_stat_sessions_mutex);
302
303 return 0;
304}
305
306void unregister_stat_tracer(struct tracer_stat *trace)
307{
308 struct tracer_stat_session *node, *tmp;
309
310 mutex_lock(&all_stat_sessions_mutex);
311 list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
312 if (node->ts == trace) {
313 list_del(&node->session_list);
314 destroy_session(node);
315 break;
316 }
317 }
318 mutex_unlock(&all_stat_sessions_mutex);
319}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644
index 000000000000..202274cf7f3d
--- /dev/null
+++ b/kernel/trace/trace_stat.h
@@ -0,0 +1,31 @@
1#ifndef __TRACE_STAT_H
2#define __TRACE_STAT_H
3
4#include <linux/seq_file.h>
5
6/*
7 * If you want to provide a stat file (one-shot statistics), fill
8 * an iterator with stat_start/stat_next and a stat_show callbacks.
9 * The others callbacks are optional.
10 */
11struct tracer_stat {
12 /* The name of your stat file */
13 const char *name;
14 /* Iteration over statistic entries */
15 void *(*stat_start)(void);
16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s);
23};
24
25/*
26 * Destroy or create a stat file
27 */
28extern int register_stat_tracer(struct tracer_stat *trace);
29extern void unregister_stat_tracer(struct tracer_stat *trace);
30
31#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
new file mode 100644
index 000000000000..c72e599230ff
--- /dev/null
+++ b/kernel/trace/trace_syscalls.c
@@ -0,0 +1,243 @@
1#include <linux/kernel.h>
2#include <linux/ftrace.h>
3#include <asm/syscall.h>
4
5#include "trace_output.h"
6#include "trace.h"
7
8static atomic_t refcount;
9
10/* Our two options */
11enum {
12 TRACE_SYSCALLS_OPT_TYPES = 0x1,
13};
14
15static struct tracer_opt syscalls_opts[] = {
16 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
17 { }
18};
19
20static struct tracer_flags syscalls_flags = {
21 .val = 0, /* By default: no args types */
22 .opts = syscalls_opts
23};
24
25enum print_line_t
26print_syscall_enter(struct trace_iterator *iter, int flags)
27{
28 struct trace_seq *s = &iter->seq;
29 struct trace_entry *ent = iter->ent;
30 struct syscall_trace_enter *trace;
31 struct syscall_metadata *entry;
32 int i, ret, syscall;
33
34 trace_assign_type(trace, ent);
35
36 syscall = trace->nr;
37
38 entry = syscall_nr_to_meta(syscall);
39 if (!entry)
40 goto end;
41
42 ret = trace_seq_printf(s, "%s(", entry->name);
43 if (!ret)
44 return TRACE_TYPE_PARTIAL_LINE;
45
46 for (i = 0; i < entry->nb_args; i++) {
47 /* parameter types */
48 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
49 ret = trace_seq_printf(s, "%s ", entry->types[i]);
50 if (!ret)
51 return TRACE_TYPE_PARTIAL_LINE;
52 }
53 /* parameter values */
54 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
55 trace->args[i],
56 i == entry->nb_args - 1 ? ")" : ",");
57 if (!ret)
58 return TRACE_TYPE_PARTIAL_LINE;
59 }
60
61end:
62 trace_seq_printf(s, "\n");
63 return TRACE_TYPE_HANDLED;
64}
65
66enum print_line_t
67print_syscall_exit(struct trace_iterator *iter, int flags)
68{
69 struct trace_seq *s = &iter->seq;
70 struct trace_entry *ent = iter->ent;
71 struct syscall_trace_exit *trace;
72 int syscall;
73 struct syscall_metadata *entry;
74 int ret;
75
76 trace_assign_type(trace, ent);
77
78 syscall = trace->nr;
79
80 entry = syscall_nr_to_meta(syscall);
81 if (!entry) {
82 trace_seq_printf(s, "\n");
83 return TRACE_TYPE_HANDLED;
84 }
85
86 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
87 trace->ret);
88 if (!ret)
89 return TRACE_TYPE_PARTIAL_LINE;
90
91 return TRACE_TYPE_HANDLED;
92}
93
94void start_ftrace_syscalls(void)
95{
96 unsigned long flags;
97 struct task_struct *g, *t;
98
99 if (atomic_inc_return(&refcount) != 1)
100 goto out;
101
102 arch_init_ftrace_syscalls();
103 read_lock_irqsave(&tasklist_lock, flags);
104
105 do_each_thread(g, t) {
106 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
107 } while_each_thread(g, t);
108
109 read_unlock_irqrestore(&tasklist_lock, flags);
110out:
111 atomic_dec(&refcount);
112}
113
114void stop_ftrace_syscalls(void)
115{
116 unsigned long flags;
117 struct task_struct *g, *t;
118
119 if (atomic_dec_return(&refcount))
120 goto out;
121
122 read_lock_irqsave(&tasklist_lock, flags);
123
124 do_each_thread(g, t) {
125 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
126 } while_each_thread(g, t);
127
128 read_unlock_irqrestore(&tasklist_lock, flags);
129out:
130 atomic_inc(&refcount);
131}
132
133void ftrace_syscall_enter(struct pt_regs *regs)
134{
135 struct syscall_trace_enter *entry;
136 struct syscall_metadata *sys_data;
137 struct ring_buffer_event *event;
138 int size;
139 int syscall_nr;
140 int cpu;
141
142 syscall_nr = syscall_get_nr(current, regs);
143
144 cpu = raw_smp_processor_id();
145
146 sys_data = syscall_nr_to_meta(syscall_nr);
147 if (!sys_data)
148 return;
149
150 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
151
152 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
153 0, 0);
154 if (!event)
155 return;
156
157 entry = ring_buffer_event_data(event);
158 entry->nr = syscall_nr;
159 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
160
161 trace_current_buffer_unlock_commit(event, 0, 0);
162 trace_wake_up();
163}
164
165void ftrace_syscall_exit(struct pt_regs *regs)
166{
167 struct syscall_trace_exit *entry;
168 struct syscall_metadata *sys_data;
169 struct ring_buffer_event *event;
170 int syscall_nr;
171 int cpu;
172
173 syscall_nr = syscall_get_nr(current, regs);
174
175 cpu = raw_smp_processor_id();
176
177 sys_data = syscall_nr_to_meta(syscall_nr);
178 if (!sys_data)
179 return;
180
181 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
182 sizeof(*entry), 0, 0);
183 if (!event)
184 return;
185
186 entry = ring_buffer_event_data(event);
187 entry->nr = syscall_nr;
188 entry->ret = syscall_get_return_value(current, regs);
189
190 trace_current_buffer_unlock_commit(event, 0, 0);
191 trace_wake_up();
192}
193
194static int init_syscall_tracer(struct trace_array *tr)
195{
196 start_ftrace_syscalls();
197
198 return 0;
199}
200
201static void reset_syscall_tracer(struct trace_array *tr)
202{
203 stop_ftrace_syscalls();
204}
205
206static struct trace_event syscall_enter_event = {
207 .type = TRACE_SYSCALL_ENTER,
208 .trace = print_syscall_enter,
209};
210
211static struct trace_event syscall_exit_event = {
212 .type = TRACE_SYSCALL_EXIT,
213 .trace = print_syscall_exit,
214};
215
216static struct tracer syscall_tracer __read_mostly = {
217 .name = "syscall",
218 .init = init_syscall_tracer,
219 .reset = reset_syscall_tracer,
220 .flags = &syscalls_flags,
221};
222
223__init int register_ftrace_syscalls(void)
224{
225 int ret;
226
227 ret = register_ftrace_event(&syscall_enter_event);
228 if (!ret) {
229 printk(KERN_WARNING "event %d failed to register\n",
230 syscall_enter_event.type);
231 WARN_ON_ONCE(1);
232 }
233
234 ret = register_ftrace_event(&syscall_exit_event);
235 if (!ret) {
236 printk(KERN_WARNING "event %d failed to register\n",
237 syscall_exit_event.type);
238 WARN_ON_ONCE(1);
239 }
240
241 return register_tracer(&syscall_tracer);
242}
243device_initcall(register_ftrace_syscalls);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index eaca5ad803ff..91fd19c2149f 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -88,7 +88,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
88 } 88 }
89} 89}
90 90
91const static struct stacktrace_ops backtrace_ops = { 91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning, 92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol, 93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack, 94 .stack = backtrace_stack,
@@ -226,15 +226,6 @@ static void stop_stack_timers(void)
226 stop_stack_timer(cpu); 226 stop_stack_timer(cpu);
227} 227}
228 228
229static void start_stack_trace(struct trace_array *tr)
230{
231 mutex_lock(&sample_timer_lock);
232 tracing_reset_online_cpus(tr);
233 start_stack_timers();
234 tracer_enabled = 1;
235 mutex_unlock(&sample_timer_lock);
236}
237
238static void stop_stack_trace(struct trace_array *tr) 229static void stop_stack_trace(struct trace_array *tr)
239{ 230{
240 mutex_lock(&sample_timer_lock); 231 mutex_lock(&sample_timer_lock);
@@ -247,12 +238,18 @@ static int stack_trace_init(struct trace_array *tr)
247{ 238{
248 sysprof_trace = tr; 239 sysprof_trace = tr;
249 240
250 start_stack_trace(tr); 241 tracing_start_cmdline_record();
242
243 mutex_lock(&sample_timer_lock);
244 start_stack_timers();
245 tracer_enabled = 1;
246 mutex_unlock(&sample_timer_lock);
251 return 0; 247 return 0;
252} 248}
253 249
254static void stack_trace_reset(struct trace_array *tr) 250static void stack_trace_reset(struct trace_array *tr)
255{ 251{
252 tracing_stop_cmdline_record();
256 stop_stack_trace(tr); 253 stop_stack_trace(tr);
257} 254}
258 255
@@ -317,7 +314,7 @@ sysprof_sample_write(struct file *filp, const char __user *ubuf,
317 return cnt; 314 return cnt;
318} 315}
319 316
320static struct file_operations sysprof_sample_fops = { 317static const struct file_operations sysprof_sample_fops = {
321 .read = sysprof_sample_read, 318 .read = sysprof_sample_read,
322 .write = sysprof_sample_write, 319 .write = sysprof_sample_write,
323}; 320};
@@ -330,5 +327,5 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
330 d_tracer, NULL, &sysprof_sample_fops); 327 d_tracer, NULL, &sysprof_sample_fops);
331 if (entry) 328 if (entry)
332 return; 329 return;
333 pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); 330 pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
334} 331}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..fb5ccac8bbc0
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,280 @@
1/*
2 * Workqueue statistical tracer.
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8
9#include <trace/workqueue.h>
10#include <linux/list.h>
11#include <linux/percpu.h>
12#include "trace_stat.h"
13#include "trace.h"
14
15
16/* A cpu workqueue thread */
17struct cpu_workqueue_stats {
18 struct list_head list;
19/* Useful to know if we print the cpu headers */
20 bool first_entry;
21 int cpu;
22 pid_t pid;
23/* Can be inserted from interrupt or user context, need to be atomic */
24 atomic_t inserted;
25/*
26 * Don't need to be atomic, works are serialized in a single workqueue thread
27 * on a single CPU.
28 */
29 unsigned int executed;
30};
31
32/* List of workqueue threads on one cpu */
33struct workqueue_global_stats {
34 struct list_head list;
35 spinlock_t lock;
36};
37
38/* Don't need a global lock because allocated before the workqueues, and
39 * never freed.
40 */
41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
43
44/* Insertion of a work */
45static void
46probe_workqueue_insertion(struct task_struct *wq_thread,
47 struct work_struct *work)
48{
49 int cpu = cpumask_first(&wq_thread->cpus_allowed);
50 struct cpu_workqueue_stats *node, *next;
51 unsigned long flags;
52
53 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
54 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
55 list) {
56 if (node->pid == wq_thread->pid) {
57 atomic_inc(&node->inserted);
58 goto found;
59 }
60 }
61 pr_debug("trace_workqueue: entry not found\n");
62found:
63 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
64}
65
66/* Execution of a work */
67static void
68probe_workqueue_execution(struct task_struct *wq_thread,
69 struct work_struct *work)
70{
71 int cpu = cpumask_first(&wq_thread->cpus_allowed);
72 struct cpu_workqueue_stats *node, *next;
73 unsigned long flags;
74
75 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
76 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
77 list) {
78 if (node->pid == wq_thread->pid) {
79 node->executed++;
80 goto found;
81 }
82 }
83 pr_debug("trace_workqueue: entry not found\n");
84found:
85 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
86}
87
88/* Creation of a cpu workqueue thread */
89static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
90{
91 struct cpu_workqueue_stats *cws;
92 unsigned long flags;
93
94 WARN_ON(cpu < 0);
95
96 /* Workqueues are sometimes created in atomic context */
97 cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
98 if (!cws) {
99 pr_warning("trace_workqueue: not enough memory\n");
100 return;
101 }
102 INIT_LIST_HEAD(&cws->list);
103 cws->cpu = cpu;
104
105 cws->pid = wq_thread->pid;
106
107 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
108 if (list_empty(&workqueue_cpu_stat(cpu)->list))
109 cws->first_entry = true;
110 list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
111 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
112}
113
114/* Destruction of a cpu workqueue thread */
115static void probe_workqueue_destruction(struct task_struct *wq_thread)
116{
117 /* Workqueue only execute on one cpu */
118 int cpu = cpumask_first(&wq_thread->cpus_allowed);
119 struct cpu_workqueue_stats *node, *next;
120 unsigned long flags;
121
122 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
123 list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
124 list) {
125 if (node->pid == wq_thread->pid) {
126 list_del(&node->list);
127 kfree(node);
128 goto found;
129 }
130 }
131
132 pr_debug("trace_workqueue: don't find workqueue to destroy\n");
133found:
134 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
135
136}
137
138static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
139{
140 unsigned long flags;
141 struct cpu_workqueue_stats *ret = NULL;
142
143
144 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
145
146 if (!list_empty(&workqueue_cpu_stat(cpu)->list))
147 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
148 struct cpu_workqueue_stats, list);
149
150 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
151
152 return ret;
153}
154
155static void *workqueue_stat_start(void)
156{
157 int cpu;
158 void *ret = NULL;
159
160 for_each_possible_cpu(cpu) {
161 ret = workqueue_stat_start_cpu(cpu);
162 if (ret)
163 return ret;
164 }
165 return NULL;
166}
167
168static void *workqueue_stat_next(void *prev, int idx)
169{
170 struct cpu_workqueue_stats *prev_cws = prev;
171 int cpu = prev_cws->cpu;
172 unsigned long flags;
173 void *ret = NULL;
174
175 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
176 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
177 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
178 do {
179 cpu = cpumask_next(cpu, cpu_possible_mask);
180 if (cpu >= nr_cpu_ids)
181 return NULL;
182 } while (!(ret = workqueue_stat_start_cpu(cpu)));
183 return ret;
184 }
185 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
186
187 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
188 list);
189}
190
191static int workqueue_stat_show(struct seq_file *s, void *p)
192{
193 struct cpu_workqueue_stats *cws = p;
194 unsigned long flags;
195 int cpu = cws->cpu;
196 struct task_struct *tsk = find_task_by_vpid(cws->pid);
197
198 seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
199 atomic_read(&cws->inserted),
200 cws->executed,
201 tsk ? tsk->comm : "<...>");
202
203 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
204 if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
205 seq_printf(s, "\n");
206 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
207
208 return 0;
209}
210
211static int workqueue_stat_headers(struct seq_file *s)
212{
213 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
214 seq_printf(s, "# | | | |\n\n");
215 return 0;
216}
217
218struct tracer_stat workqueue_stats __read_mostly = {
219 .name = "workqueues",
220 .stat_start = workqueue_stat_start,
221 .stat_next = workqueue_stat_next,
222 .stat_show = workqueue_stat_show,
223 .stat_headers = workqueue_stat_headers
224};
225
226
227int __init stat_workqueue_init(void)
228{
229 if (register_stat_tracer(&workqueue_stats)) {
230 pr_warning("Unable to register workqueue stat tracer\n");
231 return 1;
232 }
233
234 return 0;
235}
236fs_initcall(stat_workqueue_init);
237
238/*
239 * Workqueues are created very early, just after pre-smp initcalls.
240 * So we must register our tracepoints at this stage.
241 */
242int __init trace_workqueue_early_init(void)
243{
244 int ret, cpu;
245
246 ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
247 if (ret)
248 goto out;
249
250 ret = register_trace_workqueue_execution(probe_workqueue_execution);
251 if (ret)
252 goto no_insertion;
253
254 ret = register_trace_workqueue_creation(probe_workqueue_creation);
255 if (ret)
256 goto no_execution;
257
258 ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
259 if (ret)
260 goto no_creation;
261
262 for_each_possible_cpu(cpu) {
263 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
264 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
265 }
266
267 return 0;
268
269no_creation:
270 unregister_trace_workqueue_creation(probe_workqueue_creation);
271no_execution:
272 unregister_trace_workqueue_execution(probe_workqueue_execution);
273no_insertion:
274 unregister_trace_workqueue_insertion(probe_workqueue_insertion);
275out:
276 pr_warning("trace_workqueue: unable to trace workqueues\n");
277
278 return 1;
279}
280early_initcall(trace_workqueue_early_init);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1f0c509b40d3..e53ee18ef431 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/debug_locks.h> 34#include <linux/debug_locks.h>
35#include <linux/lockdep.h> 35#include <linux/lockdep.h>
36#include <trace/workqueue.h>
36 37
37/* 38/*
38 * The per-CPU workqueue (if single thread, we always use the first 39 * The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
125 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); 126 return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
126} 127}
127 128
129DEFINE_TRACE(workqueue_insertion);
130
128static void insert_work(struct cpu_workqueue_struct *cwq, 131static void insert_work(struct cpu_workqueue_struct *cwq,
129 struct work_struct *work, struct list_head *head) 132 struct work_struct *work, struct list_head *head)
130{ 133{
134 trace_workqueue_insertion(cwq->thread, work);
135
131 set_wq_data(work, cwq); 136 set_wq_data(work, cwq);
132 /* 137 /*
133 * Ensure that we get the right work->data if we see the 138 * Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
259} 264}
260EXPORT_SYMBOL_GPL(queue_delayed_work_on); 265EXPORT_SYMBOL_GPL(queue_delayed_work_on);
261 266
267DEFINE_TRACE(workqueue_execution);
268
262static void run_workqueue(struct cpu_workqueue_struct *cwq) 269static void run_workqueue(struct cpu_workqueue_struct *cwq)
263{ 270{
264 spin_lock_irq(&cwq->lock); 271 spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
284 */ 291 */
285 struct lockdep_map lockdep_map = work->lockdep_map; 292 struct lockdep_map lockdep_map = work->lockdep_map;
286#endif 293#endif
287 294 trace_workqueue_execution(cwq->thread, work);
288 cwq->current_work = work; 295 cwq->current_work = work;
289 list_del_init(cwq->worklist.next); 296 list_del_init(cwq->worklist.next);
290 spin_unlock_irq(&cwq->lock); 297 spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
765 return cwq; 772 return cwq;
766} 773}
767 774
775DEFINE_TRACE(workqueue_creation);
776
768static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 777static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
769{ 778{
770 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 779 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
787 sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 796 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
788 cwq->thread = p; 797 cwq->thread = p;
789 798
799 trace_workqueue_creation(cwq->thread, cpu);
800
790 return 0; 801 return 0;
791} 802}
792 803
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
868} 879}
869EXPORT_SYMBOL_GPL(__create_workqueue_key); 880EXPORT_SYMBOL_GPL(__create_workqueue_key);
870 881
882DEFINE_TRACE(workqueue_destruction);
883
871static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) 884static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872{ 885{
873 /* 886 /*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
891 * checks list_empty(), and a "normal" queue_work() can't use 904 * checks list_empty(), and a "normal" queue_work() can't use
892 * a dead CPU. 905 * a dead CPU.
893 */ 906 */
907 trace_workqueue_destruction(cwq->thread);
894 kthread_stop(cwq->thread); 908 kthread_stop(cwq->thread);
895 cwq->thread = NULL; 909 cwq->thread = NULL;
896} 910}
diff --git a/lib/Kconfig b/lib/Kconfig
index daa481824d9c..206f36a9efb4 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -2,6 +2,9 @@
2# Library configuration 2# Library configuration
3# 3#
4 4
5config BINARY_PRINTF
6 def_bool n
7
5menu "Library routines" 8menu "Library routines"
6 9
7config BITREVERSE 10config BITREVERSE
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1bcf9cd4baa0..a0879b2e8b6b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -402,7 +402,7 @@ config LOCKDEP
402 bool 402 bool
403 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 403 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
404 select STACKTRACE 404 select STACKTRACE
405 select FRAME_POINTER if !X86 && !MIPS && !PPC 405 select FRAME_POINTER if !MIPS && !PPC
406 select KALLSYMS 406 select KALLSYMS
407 select KALLSYMS_ALL 407 select KALLSYMS_ALL
408 408
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 280332c1827c..619313ed6c46 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -157,11 +157,11 @@ static void init_shared_classes(void)
157#define SOFTIRQ_ENTER() \ 157#define SOFTIRQ_ENTER() \
158 local_bh_disable(); \ 158 local_bh_disable(); \
159 local_irq_disable(); \ 159 local_irq_disable(); \
160 trace_softirq_enter(); \ 160 lockdep_softirq_enter(); \
161 WARN_ON(!in_softirq()); 161 WARN_ON(!in_softirq());
162 162
163#define SOFTIRQ_EXIT() \ 163#define SOFTIRQ_EXIT() \
164 trace_softirq_exit(); \ 164 lockdep_softirq_exit(); \
165 local_irq_enable(); \ 165 local_irq_enable(); \
166 local_bh_enable(); 166 local_bh_enable();
167 167
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0fbd0121d91d..dc1674377009 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -396,7 +396,38 @@ static noinline char* put_dec(char *buf, unsigned long long num)
396#define SMALL 32 /* Must be 32 == 0x20 */ 396#define SMALL 32 /* Must be 32 == 0x20 */
397#define SPECIAL 64 /* 0x */ 397#define SPECIAL 64 /* 0x */
398 398
399static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type) 399enum format_type {
400 FORMAT_TYPE_NONE, /* Just a string part */
401 FORMAT_TYPE_WITDH,
402 FORMAT_TYPE_PRECISION,
403 FORMAT_TYPE_CHAR,
404 FORMAT_TYPE_STR,
405 FORMAT_TYPE_PTR,
406 FORMAT_TYPE_PERCENT_CHAR,
407 FORMAT_TYPE_INVALID,
408 FORMAT_TYPE_LONG_LONG,
409 FORMAT_TYPE_ULONG,
410 FORMAT_TYPE_LONG,
411 FORMAT_TYPE_USHORT,
412 FORMAT_TYPE_SHORT,
413 FORMAT_TYPE_UINT,
414 FORMAT_TYPE_INT,
415 FORMAT_TYPE_NRCHARS,
416 FORMAT_TYPE_SIZE_T,
417 FORMAT_TYPE_PTRDIFF
418};
419
420struct printf_spec {
421 enum format_type type;
422 int flags; /* flags to number() */
423 int field_width; /* width of output field */
424 int base;
425 int precision; /* # of digits/chars */
426 int qualifier;
427};
428
429static char *number(char *buf, char *end, unsigned long long num,
430 struct printf_spec spec)
400{ 431{
401 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ 432 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
402 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ 433 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -404,32 +435,32 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
404 char tmp[66]; 435 char tmp[66];
405 char sign; 436 char sign;
406 char locase; 437 char locase;
407 int need_pfx = ((type & SPECIAL) && base != 10); 438 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
408 int i; 439 int i;
409 440
410 /* locase = 0 or 0x20. ORing digits or letters with 'locase' 441 /* locase = 0 or 0x20. ORing digits or letters with 'locase'
411 * produces same digits or (maybe lowercased) letters */ 442 * produces same digits or (maybe lowercased) letters */
412 locase = (type & SMALL); 443 locase = (spec.flags & SMALL);
413 if (type & LEFT) 444 if (spec.flags & LEFT)
414 type &= ~ZEROPAD; 445 spec.flags &= ~ZEROPAD;
415 sign = 0; 446 sign = 0;
416 if (type & SIGN) { 447 if (spec.flags & SIGN) {
417 if ((signed long long) num < 0) { 448 if ((signed long long) num < 0) {
418 sign = '-'; 449 sign = '-';
419 num = - (signed long long) num; 450 num = - (signed long long) num;
420 size--; 451 spec.field_width--;
421 } else if (type & PLUS) { 452 } else if (spec.flags & PLUS) {
422 sign = '+'; 453 sign = '+';
423 size--; 454 spec.field_width--;
424 } else if (type & SPACE) { 455 } else if (spec.flags & SPACE) {
425 sign = ' '; 456 sign = ' ';
426 size--; 457 spec.field_width--;
427 } 458 }
428 } 459 }
429 if (need_pfx) { 460 if (need_pfx) {
430 size--; 461 spec.field_width--;
431 if (base == 16) 462 if (spec.base == 16)
432 size--; 463 spec.field_width--;
433 } 464 }
434 465
435 /* generate full string in tmp[], in reverse order */ 466 /* generate full string in tmp[], in reverse order */
@@ -441,10 +472,10 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
441 tmp[i++] = (digits[do_div(num,base)] | locase); 472 tmp[i++] = (digits[do_div(num,base)] | locase);
442 } while (num != 0); 473 } while (num != 0);
443 */ 474 */
444 else if (base != 10) { /* 8 or 16 */ 475 else if (spec.base != 10) { /* 8 or 16 */
445 int mask = base - 1; 476 int mask = spec.base - 1;
446 int shift = 3; 477 int shift = 3;
447 if (base == 16) shift = 4; 478 if (spec.base == 16) shift = 4;
448 do { 479 do {
449 tmp[i++] = (digits[((unsigned char)num) & mask] | locase); 480 tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
450 num >>= shift; 481 num >>= shift;
@@ -454,12 +485,12 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
454 } 485 }
455 486
456 /* printing 100 using %2d gives "100", not "00" */ 487 /* printing 100 using %2d gives "100", not "00" */
457 if (i > precision) 488 if (i > spec.precision)
458 precision = i; 489 spec.precision = i;
459 /* leading space padding */ 490 /* leading space padding */
460 size -= precision; 491 spec.field_width -= spec.precision;
461 if (!(type & (ZEROPAD+LEFT))) { 492 if (!(spec.flags & (ZEROPAD+LEFT))) {
462 while(--size >= 0) { 493 while(--spec.field_width >= 0) {
463 if (buf < end) 494 if (buf < end)
464 *buf = ' '; 495 *buf = ' ';
465 ++buf; 496 ++buf;
@@ -476,23 +507,23 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
476 if (buf < end) 507 if (buf < end)
477 *buf = '0'; 508 *buf = '0';
478 ++buf; 509 ++buf;
479 if (base == 16) { 510 if (spec.base == 16) {
480 if (buf < end) 511 if (buf < end)
481 *buf = ('X' | locase); 512 *buf = ('X' | locase);
482 ++buf; 513 ++buf;
483 } 514 }
484 } 515 }
485 /* zero or space padding */ 516 /* zero or space padding */
486 if (!(type & LEFT)) { 517 if (!(spec.flags & LEFT)) {
487 char c = (type & ZEROPAD) ? '0' : ' '; 518 char c = (spec.flags & ZEROPAD) ? '0' : ' ';
488 while (--size >= 0) { 519 while (--spec.field_width >= 0) {
489 if (buf < end) 520 if (buf < end)
490 *buf = c; 521 *buf = c;
491 ++buf; 522 ++buf;
492 } 523 }
493 } 524 }
494 /* hmm even more zero padding? */ 525 /* hmm even more zero padding? */
495 while (i <= --precision) { 526 while (i <= --spec.precision) {
496 if (buf < end) 527 if (buf < end)
497 *buf = '0'; 528 *buf = '0';
498 ++buf; 529 ++buf;
@@ -504,7 +535,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
504 ++buf; 535 ++buf;
505 } 536 }
506 /* trailing space padding */ 537 /* trailing space padding */
507 while (--size >= 0) { 538 while (--spec.field_width >= 0) {
508 if (buf < end) 539 if (buf < end)
509 *buf = ' '; 540 *buf = ' ';
510 ++buf; 541 ++buf;
@@ -512,17 +543,17 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
512 return buf; 543 return buf;
513} 544}
514 545
515static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags) 546static char *string(char *buf, char *end, char *s, struct printf_spec spec)
516{ 547{
517 int len, i; 548 int len, i;
518 549
519 if ((unsigned long)s < PAGE_SIZE) 550 if ((unsigned long)s < PAGE_SIZE)
520 s = "<NULL>"; 551 s = "<NULL>";
521 552
522 len = strnlen(s, precision); 553 len = strnlen(s, spec.precision);
523 554
524 if (!(flags & LEFT)) { 555 if (!(spec.flags & LEFT)) {
525 while (len < field_width--) { 556 while (len < spec.field_width--) {
526 if (buf < end) 557 if (buf < end)
527 *buf = ' '; 558 *buf = ' ';
528 ++buf; 559 ++buf;
@@ -533,7 +564,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
533 *buf = *s; 564 *buf = *s;
534 ++buf; ++s; 565 ++buf; ++s;
535 } 566 }
536 while (len < field_width--) { 567 while (len < spec.field_width--) {
537 if (buf < end) 568 if (buf < end)
538 *buf = ' '; 569 *buf = ' ';
539 ++buf; 570 ++buf;
@@ -541,21 +572,24 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
541 return buf; 572 return buf;
542} 573}
543 574
544static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags) 575static char *symbol_string(char *buf, char *end, void *ptr,
576 struct printf_spec spec)
545{ 577{
546 unsigned long value = (unsigned long) ptr; 578 unsigned long value = (unsigned long) ptr;
547#ifdef CONFIG_KALLSYMS 579#ifdef CONFIG_KALLSYMS
548 char sym[KSYM_SYMBOL_LEN]; 580 char sym[KSYM_SYMBOL_LEN];
549 sprint_symbol(sym, value); 581 sprint_symbol(sym, value);
550 return string(buf, end, sym, field_width, precision, flags); 582 return string(buf, end, sym, spec);
551#else 583#else
552 field_width = 2*sizeof(void *); 584 spec.field_width = 2*sizeof(void *);
553 flags |= SPECIAL | SMALL | ZEROPAD; 585 spec.flags |= SPECIAL | SMALL | ZEROPAD;
554 return number(buf, end, value, 16, field_width, precision, flags); 586 spec.base = 16;
587 return number(buf, end, value, spec);
555#endif 588#endif
556} 589}
557 590
558static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags) 591static char *resource_string(char *buf, char *end, struct resource *res,
592 struct printf_spec spec)
559{ 593{
560#ifndef IO_RSRC_PRINTK_SIZE 594#ifndef IO_RSRC_PRINTK_SIZE
561#define IO_RSRC_PRINTK_SIZE 4 595#define IO_RSRC_PRINTK_SIZE 4
@@ -564,7 +598,11 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie
564#ifndef MEM_RSRC_PRINTK_SIZE 598#ifndef MEM_RSRC_PRINTK_SIZE
565#define MEM_RSRC_PRINTK_SIZE 8 599#define MEM_RSRC_PRINTK_SIZE 8
566#endif 600#endif
567 601 struct printf_spec num_spec = {
602 .base = 16,
603 .precision = -1,
604 .flags = SPECIAL | SMALL | ZEROPAD,
605 };
568 /* room for the actual numbers, the two "0x", -, [, ] and the final zero */ 606 /* room for the actual numbers, the two "0x", -, [, ] and the final zero */
569 char sym[4*sizeof(resource_size_t) + 8]; 607 char sym[4*sizeof(resource_size_t) + 8];
570 char *p = sym, *pend = sym + sizeof(sym); 608 char *p = sym, *pend = sym + sizeof(sym);
@@ -576,17 +614,18 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie
576 size = MEM_RSRC_PRINTK_SIZE; 614 size = MEM_RSRC_PRINTK_SIZE;
577 615
578 *p++ = '['; 616 *p++ = '[';
579 p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD); 617 num_spec.field_width = size;
618 p = number(p, pend, res->start, num_spec);
580 *p++ = '-'; 619 *p++ = '-';
581 p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD); 620 p = number(p, pend, res->end, num_spec);
582 *p++ = ']'; 621 *p++ = ']';
583 *p = 0; 622 *p = 0;
584 623
585 return string(buf, end, sym, field_width, precision, flags); 624 return string(buf, end, sym, spec);
586} 625}
587 626
588static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width, 627static char *mac_address_string(char *buf, char *end, u8 *addr,
589 int precision, int flags) 628 struct printf_spec spec)
590{ 629{
591 char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */ 630 char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */
592 char *p = mac_addr; 631 char *p = mac_addr;
@@ -594,16 +633,17 @@ static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width,
594 633
595 for (i = 0; i < 6; i++) { 634 for (i = 0; i < 6; i++) {
596 p = pack_hex_byte(p, addr[i]); 635 p = pack_hex_byte(p, addr[i]);
597 if (!(flags & SPECIAL) && i != 5) 636 if (!(spec.flags & SPECIAL) && i != 5)
598 *p++ = ':'; 637 *p++ = ':';
599 } 638 }
600 *p = '\0'; 639 *p = '\0';
640 spec.flags &= ~SPECIAL;
601 641
602 return string(buf, end, mac_addr, field_width, precision, flags & ~SPECIAL); 642 return string(buf, end, mac_addr, spec);
603} 643}
604 644
605static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width, 645static char *ip6_addr_string(char *buf, char *end, u8 *addr,
606 int precision, int flags) 646 struct printf_spec spec)
607{ 647{
608 char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */ 648 char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */
609 char *p = ip6_addr; 649 char *p = ip6_addr;
@@ -612,16 +652,17 @@ static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width,
612 for (i = 0; i < 8; i++) { 652 for (i = 0; i < 8; i++) {
613 p = pack_hex_byte(p, addr[2 * i]); 653 p = pack_hex_byte(p, addr[2 * i]);
614 p = pack_hex_byte(p, addr[2 * i + 1]); 654 p = pack_hex_byte(p, addr[2 * i + 1]);
615 if (!(flags & SPECIAL) && i != 7) 655 if (!(spec.flags & SPECIAL) && i != 7)
616 *p++ = ':'; 656 *p++ = ':';
617 } 657 }
618 *p = '\0'; 658 *p = '\0';
659 spec.flags &= ~SPECIAL;
619 660
620 return string(buf, end, ip6_addr, field_width, precision, flags & ~SPECIAL); 661 return string(buf, end, ip6_addr, spec);
621} 662}
622 663
623static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width, 664static char *ip4_addr_string(char *buf, char *end, u8 *addr,
624 int precision, int flags) 665 struct printf_spec spec)
625{ 666{
626 char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */ 667 char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */
627 char temp[3]; /* hold each IP quad in reverse order */ 668 char temp[3]; /* hold each IP quad in reverse order */
@@ -637,8 +678,9 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
637 *p++ = '.'; 678 *p++ = '.';
638 } 679 }
639 *p = '\0'; 680 *p = '\0';
681 spec.flags &= ~SPECIAL;
640 682
641 return string(buf, end, ip4_addr, field_width, precision, flags & ~SPECIAL); 683 return string(buf, end, ip4_addr, spec);
642} 684}
643 685
644/* 686/*
@@ -663,41 +705,233 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
663 * function pointers are really function descriptors, which contain a 705 * function pointers are really function descriptors, which contain a
664 * pointer to the real address. 706 * pointer to the real address.
665 */ 707 */
666static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags) 708static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
709 struct printf_spec spec)
667{ 710{
668 if (!ptr) 711 if (!ptr)
669 return string(buf, end, "(null)", field_width, precision, flags); 712 return string(buf, end, "(null)", spec);
670 713
671 switch (*fmt) { 714 switch (*fmt) {
672 case 'F': 715 case 'F':
673 ptr = dereference_function_descriptor(ptr); 716 ptr = dereference_function_descriptor(ptr);
674 /* Fallthrough */ 717 /* Fallthrough */
675 case 'S': 718 case 'S':
676 return symbol_string(buf, end, ptr, field_width, precision, flags); 719 return symbol_string(buf, end, ptr, spec);
677 case 'R': 720 case 'R':
678 return resource_string(buf, end, ptr, field_width, precision, flags); 721 return resource_string(buf, end, ptr, spec);
679 case 'm': 722 case 'm':
680 flags |= SPECIAL; 723 spec.flags |= SPECIAL;
681 /* Fallthrough */ 724 /* Fallthrough */
682 case 'M': 725 case 'M':
683 return mac_address_string(buf, end, ptr, field_width, precision, flags); 726 return mac_address_string(buf, end, ptr, spec);
684 case 'i': 727 case 'i':
685 flags |= SPECIAL; 728 spec.flags |= SPECIAL;
686 /* Fallthrough */ 729 /* Fallthrough */
687 case 'I': 730 case 'I':
688 if (fmt[1] == '6') 731 if (fmt[1] == '6')
689 return ip6_addr_string(buf, end, ptr, field_width, precision, flags); 732 return ip6_addr_string(buf, end, ptr, spec);
690 if (fmt[1] == '4') 733 if (fmt[1] == '4')
691 return ip4_addr_string(buf, end, ptr, field_width, precision, flags); 734 return ip4_addr_string(buf, end, ptr, spec);
692 flags &= ~SPECIAL; 735 spec.flags &= ~SPECIAL;
736 break;
737 }
738 spec.flags |= SMALL;
739 if (spec.field_width == -1) {
740 spec.field_width = 2*sizeof(void *);
741 spec.flags |= ZEROPAD;
742 }
743 spec.base = 16;
744
745 return number(buf, end, (unsigned long) ptr, spec);
746}
747
748/*
749 * Helper function to decode printf style format.
750 * Each call decode a token from the format and return the
751 * number of characters read (or likely the delta where it wants
752 * to go on the next call).
753 * The decoded token is returned through the parameters
754 *
755 * 'h', 'l', or 'L' for integer fields
756 * 'z' support added 23/7/1999 S.H.
757 * 'z' changed to 'Z' --davidm 1/25/99
758 * 't' added for ptrdiff_t
759 *
760 * @fmt: the format string
761 * @type of the token returned
762 * @flags: various flags such as +, -, # tokens..
763 * @field_width: overwritten width
764 * @base: base of the number (octal, hex, ...)
765 * @precision: precision of a number
766 * @qualifier: qualifier of a number (long, size_t, ...)
767 */
768static int format_decode(const char *fmt, struct printf_spec *spec)
769{
770 const char *start = fmt;
771
772 /* we finished early by reading the field width */
773 if (spec->type == FORMAT_TYPE_WITDH) {
774 if (spec->field_width < 0) {
775 spec->field_width = -spec->field_width;
776 spec->flags |= LEFT;
777 }
778 spec->type = FORMAT_TYPE_NONE;
779 goto precision;
780 }
781
782 /* we finished early by reading the precision */
783 if (spec->type == FORMAT_TYPE_PRECISION) {
784 if (spec->precision < 0)
785 spec->precision = 0;
786
787 spec->type = FORMAT_TYPE_NONE;
788 goto qualifier;
789 }
790
791 /* By default */
792 spec->type = FORMAT_TYPE_NONE;
793
794 for (; *fmt ; ++fmt) {
795 if (*fmt == '%')
796 break;
797 }
798
799 /* Return the current non-format string */
800 if (fmt != start || !*fmt)
801 return fmt - start;
802
803 /* Process flags */
804 spec->flags = 0;
805
806 while (1) { /* this also skips first '%' */
807 bool found = true;
808
809 ++fmt;
810
811 switch (*fmt) {
812 case '-': spec->flags |= LEFT; break;
813 case '+': spec->flags |= PLUS; break;
814 case ' ': spec->flags |= SPACE; break;
815 case '#': spec->flags |= SPECIAL; break;
816 case '0': spec->flags |= ZEROPAD; break;
817 default: found = false;
818 }
819
820 if (!found)
821 break;
822 }
823
824 /* get field width */
825 spec->field_width = -1;
826
827 if (isdigit(*fmt))
828 spec->field_width = skip_atoi(&fmt);
829 else if (*fmt == '*') {
830 /* it's the next argument */
831 spec->type = FORMAT_TYPE_WITDH;
832 return ++fmt - start;
833 }
834
835precision:
836 /* get the precision */
837 spec->precision = -1;
838 if (*fmt == '.') {
839 ++fmt;
840 if (isdigit(*fmt)) {
841 spec->precision = skip_atoi(&fmt);
842 if (spec->precision < 0)
843 spec->precision = 0;
844 } else if (*fmt == '*') {
845 /* it's the next argument */
846 spec->type = FORMAT_TYPE_WITDH;
847 return ++fmt - start;
848 }
849 }
850
851qualifier:
852 /* get the conversion qualifier */
853 spec->qualifier = -1;
854 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
855 *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
856 spec->qualifier = *fmt;
857 ++fmt;
858 if (spec->qualifier == 'l' && *fmt == 'l') {
859 spec->qualifier = 'L';
860 ++fmt;
861 }
862 }
863
864 /* default base */
865 spec->base = 10;
866 switch (*fmt) {
867 case 'c':
868 spec->type = FORMAT_TYPE_CHAR;
869 return ++fmt - start;
870
871 case 's':
872 spec->type = FORMAT_TYPE_STR;
873 return ++fmt - start;
874
875 case 'p':
876 spec->type = FORMAT_TYPE_PTR;
877 return fmt - start;
878 /* skip alnum */
879
880 case 'n':
881 spec->type = FORMAT_TYPE_NRCHARS;
882 return ++fmt - start;
883
884 case '%':
885 spec->type = FORMAT_TYPE_PERCENT_CHAR;
886 return ++fmt - start;
887
888 /* integer number formats - set up the flags and "break" */
889 case 'o':
890 spec->base = 8;
693 break; 891 break;
892
893 case 'x':
894 spec->flags |= SMALL;
895
896 case 'X':
897 spec->base = 16;
898 break;
899
900 case 'd':
901 case 'i':
902 spec->flags |= SIGN;
903 case 'u':
904 break;
905
906 default:
907 spec->type = FORMAT_TYPE_INVALID;
908 return fmt - start;
694 } 909 }
695 flags |= SMALL; 910
696 if (field_width == -1) { 911 if (spec->qualifier == 'L')
697 field_width = 2*sizeof(void *); 912 spec->type = FORMAT_TYPE_LONG_LONG;
698 flags |= ZEROPAD; 913 else if (spec->qualifier == 'l') {
914 if (spec->flags & SIGN)
915 spec->type = FORMAT_TYPE_LONG;
916 else
917 spec->type = FORMAT_TYPE_ULONG;
918 } else if (spec->qualifier == 'Z' || spec->qualifier == 'z') {
919 spec->type = FORMAT_TYPE_SIZE_T;
920 } else if (spec->qualifier == 't') {
921 spec->type = FORMAT_TYPE_PTRDIFF;
922 } else if (spec->qualifier == 'h') {
923 if (spec->flags & SIGN)
924 spec->type = FORMAT_TYPE_SHORT;
925 else
926 spec->type = FORMAT_TYPE_USHORT;
927 } else {
928 if (spec->flags & SIGN)
929 spec->type = FORMAT_TYPE_INT;
930 else
931 spec->type = FORMAT_TYPE_UINT;
699 } 932 }
700 return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags); 933
934 return ++fmt - start;
701} 935}
702 936
703/** 937/**
@@ -726,18 +960,9 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
726int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) 960int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
727{ 961{
728 unsigned long long num; 962 unsigned long long num;
729 int base;
730 char *str, *end, c; 963 char *str, *end, c;
731 964 int read;
732 int flags; /* flags to number() */ 965 struct printf_spec spec = {0};
733
734 int field_width; /* width of output field */
735 int precision; /* min. # of digits for integers; max
736 number of chars for from string */
737 int qualifier; /* 'h', 'l', or 'L' for integer fields */
738 /* 'z' support added 23/7/1999 S.H. */
739 /* 'z' changed to 'Z' --davidm 1/25/99 */
740 /* 't' added for ptrdiff_t */
741 966
742 /* Reject out-of-range values early. Large positive sizes are 967 /* Reject out-of-range values early. Large positive sizes are
743 used for unknown buffer sizes. */ 968 used for unknown buffer sizes. */
@@ -758,184 +983,144 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
758 size = end - buf; 983 size = end - buf;
759 } 984 }
760 985
761 for (; *fmt ; ++fmt) { 986 while (*fmt) {
762 if (*fmt != '%') { 987 const char *old_fmt = fmt;
763 if (str < end)
764 *str = *fmt;
765 ++str;
766 continue;
767 }
768 988
769 /* process flags */ 989 read = format_decode(fmt, &spec);
770 flags = 0;
771 repeat:
772 ++fmt; /* this also skips first '%' */
773 switch (*fmt) {
774 case '-': flags |= LEFT; goto repeat;
775 case '+': flags |= PLUS; goto repeat;
776 case ' ': flags |= SPACE; goto repeat;
777 case '#': flags |= SPECIAL; goto repeat;
778 case '0': flags |= ZEROPAD; goto repeat;
779 }
780 990
781 /* get field width */ 991 fmt += read;
782 field_width = -1;
783 if (isdigit(*fmt))
784 field_width = skip_atoi(&fmt);
785 else if (*fmt == '*') {
786 ++fmt;
787 /* it's the next argument */
788 field_width = va_arg(args, int);
789 if (field_width < 0) {
790 field_width = -field_width;
791 flags |= LEFT;
792 }
793 }
794 992
795 /* get the precision */ 993 switch (spec.type) {
796 precision = -1; 994 case FORMAT_TYPE_NONE: {
797 if (*fmt == '.') { 995 int copy = read;
798 ++fmt; 996 if (str < end) {
799 if (isdigit(*fmt)) 997 if (copy > end - str)
800 precision = skip_atoi(&fmt); 998 copy = end - str;
801 else if (*fmt == '*') { 999 memcpy(str, old_fmt, copy);
802 ++fmt;
803 /* it's the next argument */
804 precision = va_arg(args, int);
805 } 1000 }
806 if (precision < 0) 1001 str += read;
807 precision = 0; 1002 break;
808 } 1003 }
809 1004
810 /* get the conversion qualifier */ 1005 case FORMAT_TYPE_WITDH:
811 qualifier = -1; 1006 spec.field_width = va_arg(args, int);
812 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || 1007 break;
813 *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
814 qualifier = *fmt;
815 ++fmt;
816 if (qualifier == 'l' && *fmt == 'l') {
817 qualifier = 'L';
818 ++fmt;
819 }
820 }
821 1008
822 /* default base */ 1009 case FORMAT_TYPE_PRECISION:
823 base = 10; 1010 spec.precision = va_arg(args, int);
1011 break;
824 1012
825 switch (*fmt) { 1013 case FORMAT_TYPE_CHAR:
826 case 'c': 1014 if (!(spec.flags & LEFT)) {
827 if (!(flags & LEFT)) { 1015 while (--spec.field_width > 0) {
828 while (--field_width > 0) {
829 if (str < end)
830 *str = ' ';
831 ++str;
832 }
833 }
834 c = (unsigned char) va_arg(args, int);
835 if (str < end)
836 *str = c;
837 ++str;
838 while (--field_width > 0) {
839 if (str < end) 1016 if (str < end)
840 *str = ' '; 1017 *str = ' ';
841 ++str; 1018 ++str;
842 }
843 continue;
844
845 case 's':
846 str = string(str, end, va_arg(args, char *), field_width, precision, flags);
847 continue;
848
849 case 'p':
850 str = pointer(fmt+1, str, end,
851 va_arg(args, void *),
852 field_width, precision, flags);
853 /* Skip all alphanumeric pointer suffixes */
854 while (isalnum(fmt[1]))
855 fmt++;
856 continue;
857
858 case 'n':
859 /* FIXME:
860 * What does C99 say about the overflow case here? */
861 if (qualifier == 'l') {
862 long * ip = va_arg(args, long *);
863 *ip = (str - buf);
864 } else if (qualifier == 'Z' || qualifier == 'z') {
865 size_t * ip = va_arg(args, size_t *);
866 *ip = (str - buf);
867 } else {
868 int * ip = va_arg(args, int *);
869 *ip = (str - buf);
870 }
871 continue;
872 1019
873 case '%': 1020 }
1021 }
1022 c = (unsigned char) va_arg(args, int);
1023 if (str < end)
1024 *str = c;
1025 ++str;
1026 while (--spec.field_width > 0) {
874 if (str < end) 1027 if (str < end)
875 *str = '%'; 1028 *str = ' ';
876 ++str; 1029 ++str;
877 continue; 1030 }
1031 break;
878 1032
879 /* integer number formats - set up the flags and "break" */ 1033 case FORMAT_TYPE_STR:
880 case 'o': 1034 str = string(str, end, va_arg(args, char *), spec);
881 base = 8; 1035 break;
882 break;
883 1036
884 case 'x': 1037 case FORMAT_TYPE_PTR:
885 flags |= SMALL; 1038 str = pointer(fmt+1, str, end, va_arg(args, void *),
886 case 'X': 1039 spec);
887 base = 16; 1040 while (isalnum(*fmt))
888 break; 1041 fmt++;
1042 break;
889 1043
890 case 'd': 1044 case FORMAT_TYPE_PERCENT_CHAR:
891 case 'i': 1045 if (str < end)
892 flags |= SIGN; 1046 *str = '%';
893 case 'u': 1047 ++str;
894 break; 1048 break;
895 1049
896 default: 1050 case FORMAT_TYPE_INVALID:
1051 if (str < end)
1052 *str = '%';
1053 ++str;
1054 if (*fmt) {
897 if (str < end) 1055 if (str < end)
898 *str = '%'; 1056 *str = *fmt;
899 ++str; 1057 ++str;
900 if (*fmt) { 1058 } else {
901 if (str < end) 1059 --fmt;
902 *str = *fmt; 1060 }
903 ++str; 1061 break;
904 } else { 1062
905 --fmt; 1063 case FORMAT_TYPE_NRCHARS: {
906 } 1064 int qualifier = spec.qualifier;
907 continue; 1065
1066 if (qualifier == 'l') {
1067 long *ip = va_arg(args, long *);
1068 *ip = (str - buf);
1069 } else if (qualifier == 'Z' ||
1070 qualifier == 'z') {
1071 size_t *ip = va_arg(args, size_t *);
1072 *ip = (str - buf);
1073 } else {
1074 int *ip = va_arg(args, int *);
1075 *ip = (str - buf);
1076 }
1077 break;
908 } 1078 }
909 if (qualifier == 'L') 1079
910 num = va_arg(args, long long); 1080 default:
911 else if (qualifier == 'l') { 1081 switch (spec.type) {
912 num = va_arg(args, unsigned long); 1082 case FORMAT_TYPE_LONG_LONG:
913 if (flags & SIGN) 1083 num = va_arg(args, long long);
914 num = (signed long) num; 1084 break;
915 } else if (qualifier == 'Z' || qualifier == 'z') { 1085 case FORMAT_TYPE_ULONG:
916 num = va_arg(args, size_t); 1086 num = va_arg(args, unsigned long);
917 } else if (qualifier == 't') { 1087 break;
918 num = va_arg(args, ptrdiff_t); 1088 case FORMAT_TYPE_LONG:
919 } else if (qualifier == 'h') { 1089 num = va_arg(args, long);
920 num = (unsigned short) va_arg(args, int); 1090 break;
921 if (flags & SIGN) 1091 case FORMAT_TYPE_SIZE_T:
922 num = (signed short) num; 1092 num = va_arg(args, size_t);
923 } else { 1093 break;
924 num = va_arg(args, unsigned int); 1094 case FORMAT_TYPE_PTRDIFF:
925 if (flags & SIGN) 1095 num = va_arg(args, ptrdiff_t);
926 num = (signed int) num; 1096 break;
1097 case FORMAT_TYPE_USHORT:
1098 num = (unsigned short) va_arg(args, int);
1099 break;
1100 case FORMAT_TYPE_SHORT:
1101 num = (short) va_arg(args, int);
1102 break;
1103 case FORMAT_TYPE_INT:
1104 num = (int) va_arg(args, int);
1105 break;
1106 default:
1107 num = va_arg(args, unsigned int);
1108 }
1109
1110 str = number(str, end, num, spec);
927 } 1111 }
928 str = number(str, end, num, base,
929 field_width, precision, flags);
930 } 1112 }
1113
931 if (size > 0) { 1114 if (size > 0) {
932 if (str < end) 1115 if (str < end)
933 *str = '\0'; 1116 *str = '\0';
934 else 1117 else
935 end[-1] = '\0'; 1118 end[-1] = '\0';
936 } 1119 }
1120
937 /* the trailing null byte doesn't count towards the total */ 1121 /* the trailing null byte doesn't count towards the total */
938 return str-buf; 1122 return str-buf;
1123
939} 1124}
940EXPORT_SYMBOL(vsnprintf); 1125EXPORT_SYMBOL(vsnprintf);
941 1126
@@ -1058,6 +1243,372 @@ int sprintf(char * buf, const char *fmt, ...)
1058} 1243}
1059EXPORT_SYMBOL(sprintf); 1244EXPORT_SYMBOL(sprintf);
1060 1245
1246#ifdef CONFIG_BINARY_PRINTF
1247/*
1248 * bprintf service:
1249 * vbin_printf() - VA arguments to binary data
1250 * bstr_printf() - Binary data to text string
1251 */
1252
1253/**
1254 * vbin_printf - Parse a format string and place args' binary value in a buffer
1255 * @bin_buf: The buffer to place args' binary value
1256 * @size: The size of the buffer(by words(32bits), not characters)
1257 * @fmt: The format string to use
1258 * @args: Arguments for the format string
1259 *
1260 * The format follows C99 vsnprintf, except %n is ignored, and its argument
1261 * is skiped.
1262 *
1263 * The return value is the number of words(32bits) which would be generated for
1264 * the given input.
1265 *
1266 * NOTE:
1267 * If the return value is greater than @size, the resulting bin_buf is NOT
1268 * valid for bstr_printf().
1269 */
1270int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
1271{
1272 struct printf_spec spec = {0};
1273 char *str, *end;
1274 int read;
1275
1276 str = (char *)bin_buf;
1277 end = (char *)(bin_buf + size);
1278
1279#define save_arg(type) \
1280do { \
1281 if (sizeof(type) == 8) { \
1282 unsigned long long value; \
1283 str = PTR_ALIGN(str, sizeof(u32)); \
1284 value = va_arg(args, unsigned long long); \
1285 if (str + sizeof(type) <= end) { \
1286 *(u32 *)str = *(u32 *)&value; \
1287 *(u32 *)(str + 4) = *((u32 *)&value + 1); \
1288 } \
1289 } else { \
1290 unsigned long value; \
1291 str = PTR_ALIGN(str, sizeof(type)); \
1292 value = va_arg(args, int); \
1293 if (str + sizeof(type) <= end) \
1294 *(typeof(type) *)str = (type)value; \
1295 } \
1296 str += sizeof(type); \
1297} while (0)
1298
1299
1300 while (*fmt) {
1301 read = format_decode(fmt, &spec);
1302
1303 fmt += read;
1304
1305 switch (spec.type) {
1306 case FORMAT_TYPE_NONE:
1307 break;
1308
1309 case FORMAT_TYPE_WITDH:
1310 case FORMAT_TYPE_PRECISION:
1311 save_arg(int);
1312 break;
1313
1314 case FORMAT_TYPE_CHAR:
1315 save_arg(char);
1316 break;
1317
1318 case FORMAT_TYPE_STR: {
1319 const char *save_str = va_arg(args, char *);
1320 size_t len;
1321 if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
1322 || (unsigned long)save_str < PAGE_SIZE)
1323 save_str = "<NULL>";
1324 len = strlen(save_str);
1325 if (str + len + 1 < end)
1326 memcpy(str, save_str, len + 1);
1327 str += len + 1;
1328 break;
1329 }
1330
1331 case FORMAT_TYPE_PTR:
1332 save_arg(void *);
1333 /* skip all alphanumeric pointer suffixes */
1334 while (isalnum(*fmt))
1335 fmt++;
1336 break;
1337
1338 case FORMAT_TYPE_PERCENT_CHAR:
1339 break;
1340
1341 case FORMAT_TYPE_INVALID:
1342 if (!*fmt)
1343 --fmt;
1344 break;
1345
1346 case FORMAT_TYPE_NRCHARS: {
1347 /* skip %n 's argument */
1348 int qualifier = spec.qualifier;
1349 void *skip_arg;
1350 if (qualifier == 'l')
1351 skip_arg = va_arg(args, long *);
1352 else if (qualifier == 'Z' || qualifier == 'z')
1353 skip_arg = va_arg(args, size_t *);
1354 else
1355 skip_arg = va_arg(args, int *);
1356 break;
1357 }
1358
1359 default:
1360 switch (spec.type) {
1361
1362 case FORMAT_TYPE_LONG_LONG:
1363 save_arg(long long);
1364 break;
1365 case FORMAT_TYPE_ULONG:
1366 case FORMAT_TYPE_LONG:
1367 save_arg(unsigned long);
1368 break;
1369 case FORMAT_TYPE_SIZE_T:
1370 save_arg(size_t);
1371 break;
1372 case FORMAT_TYPE_PTRDIFF:
1373 save_arg(ptrdiff_t);
1374 break;
1375 case FORMAT_TYPE_USHORT:
1376 case FORMAT_TYPE_SHORT:
1377 save_arg(short);
1378 break;
1379 default:
1380 save_arg(int);
1381 }
1382 }
1383 }
1384 return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
1385
1386#undef save_arg
1387}
1388EXPORT_SYMBOL_GPL(vbin_printf);
1389
1390/**
1391 * bstr_printf - Format a string from binary arguments and place it in a buffer
1392 * @buf: The buffer to place the result into
1393 * @size: The size of the buffer, including the trailing null space
1394 * @fmt: The format string to use
1395 * @bin_buf: Binary arguments for the format string
1396 *
1397 * This function like C99 vsnprintf, but the difference is that vsnprintf gets
1398 * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
1399 * a binary buffer that generated by vbin_printf.
1400 *
1401 * The format follows C99 vsnprintf, but has some extensions:
1402 * %pS output the name of a text symbol
1403 * %pF output the name of a function pointer
1404 * %pR output the address range in a struct resource
1405 * %n is ignored
1406 *
1407 * The return value is the number of characters which would
1408 * be generated for the given input, excluding the trailing
1409 * '\0', as per ISO C99. If you want to have the exact
1410 * number of characters written into @buf as return value
1411 * (not including the trailing '\0'), use vscnprintf(). If the
1412 * return is greater than or equal to @size, the resulting
1413 * string is truncated.
1414 */
1415int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1416{
1417 unsigned long long num;
1418 char *str, *end, c;
1419 const char *args = (const char *)bin_buf;
1420
1421 struct printf_spec spec = {0};
1422
1423 if (unlikely((int) size < 0)) {
1424 /* There can be only one.. */
1425 static char warn = 1;
1426 WARN_ON(warn);
1427 warn = 0;
1428 return 0;
1429 }
1430
1431 str = buf;
1432 end = buf + size;
1433
1434#define get_arg(type) \
1435({ \
1436 typeof(type) value; \
1437 if (sizeof(type) == 8) { \
1438 args = PTR_ALIGN(args, sizeof(u32)); \
1439 *(u32 *)&value = *(u32 *)args; \
1440 *((u32 *)&value + 1) = *(u32 *)(args + 4); \
1441 } else { \
1442 args = PTR_ALIGN(args, sizeof(type)); \
1443 value = *(typeof(type) *)args; \
1444 } \
1445 args += sizeof(type); \
1446 value; \
1447})
1448
1449 /* Make sure end is always >= buf */
1450 if (end < buf) {
1451 end = ((void *)-1);
1452 size = end - buf;
1453 }
1454
1455 while (*fmt) {
1456 int read;
1457 const char *old_fmt = fmt;
1458
1459 read = format_decode(fmt, &spec);
1460
1461 fmt += read;
1462
1463 switch (spec.type) {
1464 case FORMAT_TYPE_NONE: {
1465 int copy = read;
1466 if (str < end) {
1467 if (copy > end - str)
1468 copy = end - str;
1469 memcpy(str, old_fmt, copy);
1470 }
1471 str += read;
1472 break;
1473 }
1474
1475 case FORMAT_TYPE_WITDH:
1476 spec.field_width = get_arg(int);
1477 break;
1478
1479 case FORMAT_TYPE_PRECISION:
1480 spec.precision = get_arg(int);
1481 break;
1482
1483 case FORMAT_TYPE_CHAR:
1484 if (!(spec.flags & LEFT)) {
1485 while (--spec.field_width > 0) {
1486 if (str < end)
1487 *str = ' ';
1488 ++str;
1489 }
1490 }
1491 c = (unsigned char) get_arg(char);
1492 if (str < end)
1493 *str = c;
1494 ++str;
1495 while (--spec.field_width > 0) {
1496 if (str < end)
1497 *str = ' ';
1498 ++str;
1499 }
1500 break;
1501
1502 case FORMAT_TYPE_STR: {
1503 const char *str_arg = args;
1504 size_t len = strlen(str_arg);
1505 args += len + 1;
1506 str = string(str, end, (char *)str_arg, spec);
1507 break;
1508 }
1509
1510 case FORMAT_TYPE_PTR:
1511 str = pointer(fmt+1, str, end, get_arg(void *), spec);
1512 while (isalnum(*fmt))
1513 fmt++;
1514 break;
1515
1516 case FORMAT_TYPE_PERCENT_CHAR:
1517 if (str < end)
1518 *str = '%';
1519 ++str;
1520 break;
1521
1522 case FORMAT_TYPE_INVALID:
1523 if (str < end)
1524 *str = '%';
1525 ++str;
1526 if (*fmt) {
1527 if (str < end)
1528 *str = *fmt;
1529 ++str;
1530 } else {
1531 --fmt;
1532 }
1533 break;
1534
1535 case FORMAT_TYPE_NRCHARS:
1536 /* skip */
1537 break;
1538
1539 default:
1540 switch (spec.type) {
1541
1542 case FORMAT_TYPE_LONG_LONG:
1543 num = get_arg(long long);
1544 break;
1545 case FORMAT_TYPE_ULONG:
1546 num = get_arg(unsigned long);
1547 break;
1548 case FORMAT_TYPE_LONG:
1549 num = get_arg(unsigned long);
1550 break;
1551 case FORMAT_TYPE_SIZE_T:
1552 num = get_arg(size_t);
1553 break;
1554 case FORMAT_TYPE_PTRDIFF:
1555 num = get_arg(ptrdiff_t);
1556 break;
1557 case FORMAT_TYPE_USHORT:
1558 num = get_arg(unsigned short);
1559 break;
1560 case FORMAT_TYPE_SHORT:
1561 num = get_arg(short);
1562 break;
1563 case FORMAT_TYPE_UINT:
1564 num = get_arg(unsigned int);
1565 break;
1566 default:
1567 num = get_arg(int);
1568 }
1569
1570 str = number(str, end, num, spec);
1571 }
1572 }
1573
1574 if (size > 0) {
1575 if (str < end)
1576 *str = '\0';
1577 else
1578 end[-1] = '\0';
1579 }
1580
1581#undef get_arg
1582
1583 /* the trailing null byte doesn't count towards the total */
1584 return str - buf;
1585}
1586EXPORT_SYMBOL_GPL(bstr_printf);
1587
1588/**
1589 * bprintf - Parse a format string and place args' binary value in a buffer
1590 * @bin_buf: The buffer to place args' binary value
1591 * @size: The size of the buffer(by words(32bits), not characters)
1592 * @fmt: The format string to use
1593 * @...: Arguments for the format string
1594 *
1595 * The function returns the number of words(u32) written
1596 * into @bin_buf.
1597 */
1598int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
1599{
1600 va_list args;
1601 int ret;
1602
1603 va_start(args, fmt);
1604 ret = vbin_printf(bin_buf, size, fmt, args);
1605 va_end(args);
1606 return ret;
1607}
1608EXPORT_SYMBOL_GPL(bprintf);
1609
1610#endif /* CONFIG_BINARY_PRINTF */
1611
1061/** 1612/**
1062 * vsscanf - Unformat a buffer into a list of arguments 1613 * vsscanf - Unformat a buffer into a list of arguments
1063 * @buf: input buffer 1614 * @buf: input buffer
diff --git a/mm/memory.c b/mm/memory.c
index baa999e87cd2..05fab3bc5b4b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -48,6 +48,8 @@
48#include <linux/rmap.h> 48#include <linux/rmap.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/delayacct.h> 50#include <linux/delayacct.h>
51#include <linux/kprobes.h>
52#include <linux/mutex.h>
51#include <linux/init.h> 53#include <linux/init.h>
52#include <linux/writeback.h> 54#include <linux/writeback.h>
53#include <linux/memcontrol.h> 55#include <linux/memcontrol.h>
@@ -99,6 +101,14 @@ int randomize_va_space __read_mostly =
99 2; 101 2;
100#endif 102#endif
101 103
104/*
105 * mutex protecting text section modification (dynamic code patching).
106 * some users need to sleep (allocating memory...) while they hold this lock.
107 *
108 * NOT exported to modules - patching kernel text is a really delicate matter.
109 */
110DEFINE_MUTEX(text_mutex);
111
102static int __init disable_randmaps(char *s) 112static int __init disable_randmaps(char *s)
103{ 113{
104 randomize_va_space = 0; 114 randomize_va_space = 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5c44ed49ca93..a3803ea8c27d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1479,6 +1479,8 @@ __alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
1479 unsigned long did_some_progress; 1479 unsigned long did_some_progress;
1480 unsigned long pages_reclaimed = 0; 1480 unsigned long pages_reclaimed = 0;
1481 1481
1482 lockdep_trace_alloc(gfp_mask);
1483
1482 might_sleep_if(wait); 1484 might_sleep_if(wait);
1483 1485
1484 if (should_fail_alloc_page(gfp_mask, order)) 1486 if (should_fail_alloc_page(gfp_mask, order))
@@ -1578,12 +1580,15 @@ nofail_alloc:
1578 */ 1580 */
1579 cpuset_update_task_memory_state(); 1581 cpuset_update_task_memory_state();
1580 p->flags |= PF_MEMALLOC; 1582 p->flags |= PF_MEMALLOC;
1583
1584 lockdep_set_current_reclaim_state(gfp_mask);
1581 reclaim_state.reclaimed_slab = 0; 1585 reclaim_state.reclaimed_slab = 0;
1582 p->reclaim_state = &reclaim_state; 1586 p->reclaim_state = &reclaim_state;
1583 1587
1584 did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); 1588 did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
1585 1589
1586 p->reclaim_state = NULL; 1590 p->reclaim_state = NULL;
1591 lockdep_clear_current_reclaim_state();
1587 p->flags &= ~PF_MEMALLOC; 1592 p->flags &= ~PF_MEMALLOC;
1588 1593
1589 cond_resched(); 1594 cond_resched();
diff --git a/mm/slab.c b/mm/slab.c
index 4d00855629c4..9ec66c3e6ee0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h>
105#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
106#include <linux/string.h> 107#include <linux/string.h>
107#include <linux/uaccess.h> 108#include <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
568 569
569#endif 570#endif
570 571
572#ifdef CONFIG_KMEMTRACE
573size_t slab_buffer_size(struct kmem_cache *cachep)
574{
575 return cachep->buffer_size;
576}
577EXPORT_SYMBOL(slab_buffer_size);
578#endif
579
571/* 580/*
572 * Do not go above this order unless 0 objects fit into the slab. 581 * Do not go above this order unless 0 objects fit into the slab.
573 */ 582 */
@@ -3318,6 +3327,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3318 unsigned long save_flags; 3327 unsigned long save_flags;
3319 void *ptr; 3328 void *ptr;
3320 3329
3330 lockdep_trace_alloc(flags);
3331
3321 if (slab_should_failslab(cachep, flags)) 3332 if (slab_should_failslab(cachep, flags))
3322 return NULL; 3333 return NULL;
3323 3334
@@ -3394,6 +3405,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3394 unsigned long save_flags; 3405 unsigned long save_flags;
3395 void *objp; 3406 void *objp;
3396 3407
3408 lockdep_trace_alloc(flags);
3409
3397 if (slab_should_failslab(cachep, flags)) 3410 if (slab_should_failslab(cachep, flags))
3398 return NULL; 3411 return NULL;
3399 3412
@@ -3550,10 +3563,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3550 */ 3563 */
3551void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3564void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3552{ 3565{
3553 return __cache_alloc(cachep, flags, __builtin_return_address(0)); 3566 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3567
3568 kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
3569 obj_size(cachep), cachep->buffer_size, flags);
3570
3571 return ret;
3554} 3572}
3555EXPORT_SYMBOL(kmem_cache_alloc); 3573EXPORT_SYMBOL(kmem_cache_alloc);
3556 3574
3575#ifdef CONFIG_KMEMTRACE
3576void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3577{
3578 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3579}
3580EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3581#endif
3582
3557/** 3583/**
3558 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. 3584 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
3559 * @cachep: the cache we're checking against 3585 * @cachep: the cache we're checking against
@@ -3598,23 +3624,47 @@ out:
3598#ifdef CONFIG_NUMA 3624#ifdef CONFIG_NUMA
3599void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3625void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3600{ 3626{
3601 return __cache_alloc_node(cachep, flags, nodeid, 3627 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3602 __builtin_return_address(0)); 3628 __builtin_return_address(0));
3629
3630 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
3631 obj_size(cachep), cachep->buffer_size,
3632 flags, nodeid);
3633
3634 return ret;
3603} 3635}
3604EXPORT_SYMBOL(kmem_cache_alloc_node); 3636EXPORT_SYMBOL(kmem_cache_alloc_node);
3605 3637
3638#ifdef CONFIG_KMEMTRACE
3639void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3640 gfp_t flags,
3641 int nodeid)
3642{
3643 return __cache_alloc_node(cachep, flags, nodeid,
3644 __builtin_return_address(0));
3645}
3646EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
3647#endif
3648
3606static __always_inline void * 3649static __always_inline void *
3607__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) 3650__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3608{ 3651{
3609 struct kmem_cache *cachep; 3652 struct kmem_cache *cachep;
3653 void *ret;
3610 3654
3611 cachep = kmem_find_general_cachep(size, flags); 3655 cachep = kmem_find_general_cachep(size, flags);
3612 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3656 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3613 return cachep; 3657 return cachep;
3614 return kmem_cache_alloc_node(cachep, flags, node); 3658 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
3659
3660 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
3661 (unsigned long) caller, ret,
3662 size, cachep->buffer_size, flags, node);
3663
3664 return ret;
3615} 3665}
3616 3666
3617#ifdef CONFIG_DEBUG_SLAB 3667#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3618void *__kmalloc_node(size_t size, gfp_t flags, int node) 3668void *__kmalloc_node(size_t size, gfp_t flags, int node)
3619{ 3669{
3620 return __do_kmalloc_node(size, flags, node, 3670 return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3697,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3647 void *caller) 3697 void *caller)
3648{ 3698{
3649 struct kmem_cache *cachep; 3699 struct kmem_cache *cachep;
3700 void *ret;
3650 3701
3651 /* If you want to save a few bytes .text space: replace 3702 /* If you want to save a few bytes .text space: replace
3652 * __ with kmem_. 3703 * __ with kmem_.
@@ -3656,11 +3707,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3656 cachep = __find_general_cachep(size, flags); 3707 cachep = __find_general_cachep(size, flags);
3657 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3708 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3658 return cachep; 3709 return cachep;
3659 return __cache_alloc(cachep, flags, caller); 3710 ret = __cache_alloc(cachep, flags, caller);
3711
3712 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
3713 (unsigned long) caller, ret,
3714 size, cachep->buffer_size, flags);
3715
3716 return ret;
3660} 3717}
3661 3718
3662 3719
3663#ifdef CONFIG_DEBUG_SLAB 3720#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3664void *__kmalloc(size_t size, gfp_t flags) 3721void *__kmalloc(size_t size, gfp_t flags)
3665{ 3722{
3666 return __do_kmalloc(size, flags, __builtin_return_address(0)); 3723 return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3756,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3699 debug_check_no_obj_freed(objp, obj_size(cachep)); 3756 debug_check_no_obj_freed(objp, obj_size(cachep));
3700 __cache_free(cachep, objp); 3757 __cache_free(cachep, objp);
3701 local_irq_restore(flags); 3758 local_irq_restore(flags);
3759
3760 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
3702} 3761}
3703EXPORT_SYMBOL(kmem_cache_free); 3762EXPORT_SYMBOL(kmem_cache_free);
3704 3763
@@ -3725,6 +3784,8 @@ void kfree(const void *objp)
3725 debug_check_no_obj_freed(objp, obj_size(c)); 3784 debug_check_no_obj_freed(objp, obj_size(c));
3726 __cache_free(c, (void *)objp); 3785 __cache_free(c, (void *)objp);
3727 local_irq_restore(flags); 3786 local_irq_restore(flags);
3787
3788 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
3728} 3789}
3729EXPORT_SYMBOL(kfree); 3790EXPORT_SYMBOL(kfree);
3730 3791
diff --git a/mm/slob.c b/mm/slob.c
index 52bc8a2bd9ef..596152926a8d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
65#include <linux/module.h> 65#include <linux/module.h>
66#include <linux/rcupdate.h> 66#include <linux/rcupdate.h>
67#include <linux/list.h> 67#include <linux/list.h>
68#include <trace/kmemtrace.h>
68#include <asm/atomic.h> 69#include <asm/atomic.h>
69 70
70/* 71/*
@@ -463,27 +464,40 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
463{ 464{
464 unsigned int *m; 465 unsigned int *m;
465 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 466 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
467 void *ret;
468
469 lockdep_trace_alloc(flags);
466 470
467 if (size < PAGE_SIZE - align) { 471 if (size < PAGE_SIZE - align) {
468 if (!size) 472 if (!size)
469 return ZERO_SIZE_PTR; 473 return ZERO_SIZE_PTR;
470 474
471 m = slob_alloc(size + align, gfp, align, node); 475 m = slob_alloc(size + align, gfp, align, node);
476
472 if (!m) 477 if (!m)
473 return NULL; 478 return NULL;
474 *m = size; 479 *m = size;
475 return (void *)m + align; 480 ret = (void *)m + align;
481
482 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
483 _RET_IP_, ret,
484 size, size + align, gfp, node);
476 } else { 485 } else {
477 void *ret; 486 unsigned int order = get_order(size);
478 487
479 ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); 488 ret = slob_new_page(gfp | __GFP_COMP, order, node);
480 if (ret) { 489 if (ret) {
481 struct page *page; 490 struct page *page;
482 page = virt_to_page(ret); 491 page = virt_to_page(ret);
483 page->private = size; 492 page->private = size;
484 } 493 }
485 return ret; 494
495 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
496 _RET_IP_, ret,
497 size, PAGE_SIZE << order, gfp, node);
486 } 498 }
499
500 return ret;
487} 501}
488EXPORT_SYMBOL(__kmalloc_node); 502EXPORT_SYMBOL(__kmalloc_node);
489 503
@@ -501,6 +515,8 @@ void kfree(const void *block)
501 slob_free(m, *m + align); 515 slob_free(m, *m + align);
502 } else 516 } else
503 put_page(&sp->page); 517 put_page(&sp->page);
518
519 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
504} 520}
505EXPORT_SYMBOL(kfree); 521EXPORT_SYMBOL(kfree);
506 522
@@ -570,10 +586,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
570{ 586{
571 void *b; 587 void *b;
572 588
573 if (c->size < PAGE_SIZE) 589 if (c->size < PAGE_SIZE) {
574 b = slob_alloc(c->size, flags, c->align, node); 590 b = slob_alloc(c->size, flags, c->align, node);
575 else 591 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
592 _RET_IP_, b, c->size,
593 SLOB_UNITS(c->size) * SLOB_UNIT,
594 flags, node);
595 } else {
576 b = slob_new_page(flags, get_order(c->size), node); 596 b = slob_new_page(flags, get_order(c->size), node);
597 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
598 _RET_IP_, b, c->size,
599 PAGE_SIZE << get_order(c->size),
600 flags, node);
601 }
577 602
578 if (c->ctor) 603 if (c->ctor)
579 c->ctor(b); 604 c->ctor(b);
@@ -609,6 +634,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
609 } else { 634 } else {
610 __kmem_cache_free(b, c->size); 635 __kmem_cache_free(b, c->size);
611 } 636 }
637
638 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
612} 639}
613EXPORT_SYMBOL(kmem_cache_free); 640EXPORT_SYMBOL(kmem_cache_free);
614 641
diff --git a/mm/slub.c b/mm/slub.c
index 0280eee6cf37..816734ed8aa3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <trace/kmemtrace.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/cpuset.h> 21#include <linux/cpuset.h>
21#include <linux/mempolicy.h> 22#include <linux/mempolicy.h>
@@ -1596,6 +1597,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1596 unsigned long flags; 1597 unsigned long flags;
1597 unsigned int objsize; 1598 unsigned int objsize;
1598 1599
1600 lockdep_trace_alloc(gfpflags);
1599 might_sleep_if(gfpflags & __GFP_WAIT); 1601 might_sleep_if(gfpflags & __GFP_WAIT);
1600 1602
1601 if (should_failslab(s->objsize, gfpflags)) 1603 if (should_failslab(s->objsize, gfpflags))
@@ -1623,18 +1625,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1623 1625
1624void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1626void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1625{ 1627{
1626 return slab_alloc(s, gfpflags, -1, _RET_IP_); 1628 void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
1629
1630 kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
1631 s->objsize, s->size, gfpflags);
1632
1633 return ret;
1627} 1634}
1628EXPORT_SYMBOL(kmem_cache_alloc); 1635EXPORT_SYMBOL(kmem_cache_alloc);
1629 1636
1637#ifdef CONFIG_KMEMTRACE
1638void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1639{
1640 return slab_alloc(s, gfpflags, -1, _RET_IP_);
1641}
1642EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1643#endif
1644
1630#ifdef CONFIG_NUMA 1645#ifdef CONFIG_NUMA
1631void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 1646void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1632{ 1647{
1633 return slab_alloc(s, gfpflags, node, _RET_IP_); 1648 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
1649
1650 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
1651 s->objsize, s->size, gfpflags, node);
1652
1653 return ret;
1634} 1654}
1635EXPORT_SYMBOL(kmem_cache_alloc_node); 1655EXPORT_SYMBOL(kmem_cache_alloc_node);
1636#endif 1656#endif
1637 1657
1658#ifdef CONFIG_KMEMTRACE
1659void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1660 gfp_t gfpflags,
1661 int node)
1662{
1663 return slab_alloc(s, gfpflags, node, _RET_IP_);
1664}
1665EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1666#endif
1667
1638/* 1668/*
1639 * Slow patch handling. This may still be called frequently since objects 1669 * Slow patch handling. This may still be called frequently since objects
1640 * have a longer lifetime than the cpu slabs in most processing loads. 1670 * have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1772,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
1742 page = virt_to_head_page(x); 1772 page = virt_to_head_page(x);
1743 1773
1744 slab_free(s, page, x, _RET_IP_); 1774 slab_free(s, page, x, _RET_IP_);
1775
1776 kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
1745} 1777}
1746EXPORT_SYMBOL(kmem_cache_free); 1778EXPORT_SYMBOL(kmem_cache_free);
1747 1779
@@ -2475,7 +2507,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2475 * Kmalloc subsystem 2507 * Kmalloc subsystem
2476 *******************************************************************/ 2508 *******************************************************************/
2477 2509
2478struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; 2510struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
2479EXPORT_SYMBOL(kmalloc_caches); 2511EXPORT_SYMBOL(kmalloc_caches);
2480 2512
2481static int __init setup_slub_min_order(char *str) 2513static int __init setup_slub_min_order(char *str)
@@ -2537,7 +2569,7 @@ panic:
2537} 2569}
2538 2570
2539#ifdef CONFIG_ZONE_DMA 2571#ifdef CONFIG_ZONE_DMA
2540static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; 2572static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
2541 2573
2542static void sysfs_add_func(struct work_struct *w) 2574static void sysfs_add_func(struct work_struct *w)
2543{ 2575{
@@ -2657,8 +2689,9 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2657void *__kmalloc(size_t size, gfp_t flags) 2689void *__kmalloc(size_t size, gfp_t flags)
2658{ 2690{
2659 struct kmem_cache *s; 2691 struct kmem_cache *s;
2692 void *ret;
2660 2693
2661 if (unlikely(size > PAGE_SIZE)) 2694 if (unlikely(size > SLUB_MAX_SIZE))
2662 return kmalloc_large(size, flags); 2695 return kmalloc_large(size, flags);
2663 2696
2664 s = get_slab(size, flags); 2697 s = get_slab(size, flags);
@@ -2666,7 +2699,12 @@ void *__kmalloc(size_t size, gfp_t flags)
2666 if (unlikely(ZERO_OR_NULL_PTR(s))) 2699 if (unlikely(ZERO_OR_NULL_PTR(s)))
2667 return s; 2700 return s;
2668 2701
2669 return slab_alloc(s, flags, -1, _RET_IP_); 2702 ret = slab_alloc(s, flags, -1, _RET_IP_);
2703
2704 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
2705 size, s->size, flags);
2706
2707 return ret;
2670} 2708}
2671EXPORT_SYMBOL(__kmalloc); 2709EXPORT_SYMBOL(__kmalloc);
2672 2710
@@ -2685,16 +2723,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2685void *__kmalloc_node(size_t size, gfp_t flags, int node) 2723void *__kmalloc_node(size_t size, gfp_t flags, int node)
2686{ 2724{
2687 struct kmem_cache *s; 2725 struct kmem_cache *s;
2726 void *ret;
2727
2728 if (unlikely(size > SLUB_MAX_SIZE)) {
2729 ret = kmalloc_large_node(size, flags, node);
2688 2730
2689 if (unlikely(size > PAGE_SIZE)) 2731 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
2690 return kmalloc_large_node(size, flags, node); 2732 _RET_IP_, ret,
2733 size, PAGE_SIZE << get_order(size),
2734 flags, node);
2735
2736 return ret;
2737 }
2691 2738
2692 s = get_slab(size, flags); 2739 s = get_slab(size, flags);
2693 2740
2694 if (unlikely(ZERO_OR_NULL_PTR(s))) 2741 if (unlikely(ZERO_OR_NULL_PTR(s)))
2695 return s; 2742 return s;
2696 2743
2697 return slab_alloc(s, flags, node, _RET_IP_); 2744 ret = slab_alloc(s, flags, node, _RET_IP_);
2745
2746 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
2747 size, s->size, flags, node);
2748
2749 return ret;
2698} 2750}
2699EXPORT_SYMBOL(__kmalloc_node); 2751EXPORT_SYMBOL(__kmalloc_node);
2700#endif 2752#endif
@@ -2753,6 +2805,8 @@ void kfree(const void *x)
2753 return; 2805 return;
2754 } 2806 }
2755 slab_free(page->slab, page, object, _RET_IP_); 2807 slab_free(page->slab, page, object, _RET_IP_);
2808
2809 kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
2756} 2810}
2757EXPORT_SYMBOL(kfree); 2811EXPORT_SYMBOL(kfree);
2758 2812
@@ -2986,7 +3040,7 @@ void __init kmem_cache_init(void)
2986 caches++; 3040 caches++;
2987 } 3041 }
2988 3042
2989 for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { 3043 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
2990 create_kmalloc_cache(&kmalloc_caches[i], 3044 create_kmalloc_cache(&kmalloc_caches[i],
2991 "kmalloc", 1 << i, GFP_KERNEL); 3045 "kmalloc", 1 << i, GFP_KERNEL);
2992 caches++; 3046 caches++;
@@ -3023,7 +3077,7 @@ void __init kmem_cache_init(void)
3023 slab_state = UP; 3077 slab_state = UP;
3024 3078
3025 /* Provide the correct kmalloc names now that the caches are up */ 3079 /* Provide the correct kmalloc names now that the caches are up */
3026 for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) 3080 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
3027 kmalloc_caches[i]. name = 3081 kmalloc_caches[i]. name =
3028 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3082 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
3029 3083
@@ -3222,8 +3276,9 @@ static struct notifier_block __cpuinitdata slab_notifier = {
3222void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) 3276void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3223{ 3277{
3224 struct kmem_cache *s; 3278 struct kmem_cache *s;
3279 void *ret;
3225 3280
3226 if (unlikely(size > PAGE_SIZE)) 3281 if (unlikely(size > SLUB_MAX_SIZE))
3227 return kmalloc_large(size, gfpflags); 3282 return kmalloc_large(size, gfpflags);
3228 3283
3229 s = get_slab(size, gfpflags); 3284 s = get_slab(size, gfpflags);
@@ -3231,15 +3286,22 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3231 if (unlikely(ZERO_OR_NULL_PTR(s))) 3286 if (unlikely(ZERO_OR_NULL_PTR(s)))
3232 return s; 3287 return s;
3233 3288
3234 return slab_alloc(s, gfpflags, -1, caller); 3289 ret = slab_alloc(s, gfpflags, -1, caller);
3290
3291 /* Honor the call site pointer we recieved. */
3292 kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
3293 s->size, gfpflags);
3294
3295 return ret;
3235} 3296}
3236 3297
3237void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 3298void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3238 int node, unsigned long caller) 3299 int node, unsigned long caller)
3239{ 3300{
3240 struct kmem_cache *s; 3301 struct kmem_cache *s;
3302 void *ret;
3241 3303
3242 if (unlikely(size > PAGE_SIZE)) 3304 if (unlikely(size > SLUB_MAX_SIZE))
3243 return kmalloc_large_node(size, gfpflags, node); 3305 return kmalloc_large_node(size, gfpflags, node);
3244 3306
3245 s = get_slab(size, gfpflags); 3307 s = get_slab(size, gfpflags);
@@ -3247,7 +3309,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3247 if (unlikely(ZERO_OR_NULL_PTR(s))) 3309 if (unlikely(ZERO_OR_NULL_PTR(s)))
3248 return s; 3310 return s;
3249 3311
3250 return slab_alloc(s, gfpflags, node, caller); 3312 ret = slab_alloc(s, gfpflags, node, caller);
3313
3314 /* Honor the call site pointer we recieved. */
3315 kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
3316 size, s->size, gfpflags, node);
3317
3318 return ret;
3251} 3319}
3252 3320
3253#ifdef CONFIG_SLUB_DEBUG 3321#ifdef CONFIG_SLUB_DEBUG
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6177e3bcd66b..79adbd7e21ae 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1469,7 +1469,7 @@ static void shrink_zone(int priority, struct zone *zone,
1469 int file = is_file_lru(l); 1469 int file = is_file_lru(l);
1470 int scan; 1470 int scan;
1471 1471
1472 scan = zone_page_state(zone, NR_LRU_BASE + l); 1472 scan = zone_nr_pages(zone, sc, l);
1473 if (priority) { 1473 if (priority) {
1474 scan >>= priority; 1474 scan >>= priority;
1475 scan = (scan * percent[file]) / 100; 1475 scan = (scan * percent[file]) / 100;
@@ -1965,6 +1965,8 @@ static int kswapd(void *p)
1965 }; 1965 };
1966 node_to_cpumask_ptr(cpumask, pgdat->node_id); 1966 node_to_cpumask_ptr(cpumask, pgdat->node_id);
1967 1967
1968 lockdep_set_current_reclaim_state(GFP_KERNEL);
1969
1968 if (!cpumask_empty(cpumask)) 1970 if (!cpumask_empty(cpumask))
1969 set_cpus_allowed_ptr(tsk, cpumask); 1971 set_cpus_allowed_ptr(tsk, cpumask);
1970 current->reclaim_state = &reclaim_state; 1972 current->reclaim_state = &reclaim_state;
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
index 01724e04c556..dffdc49878af 100644
--- a/samples/tracepoints/tp-samples-trace.h
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -5,9 +5,9 @@
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6 6
7DECLARE_TRACE(subsys_event, 7DECLARE_TRACE(subsys_event,
8 TPPROTO(struct inode *inode, struct file *file), 8 TP_PROTO(struct inode *inode, struct file *file),
9 TPARGS(inode, file)); 9 TP_ARGS(inode, file));
10DECLARE_TRACE(subsys_eventb, 10DECLARE_TRACE(subsys_eventb,
11 TPPROTO(void), 11 TP_PROTO(void),
12 TPARGS()); 12 TP_ARGS());
13#endif 13#endif
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index c7de8b39fcf1..39a9642927d3 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -112,13 +112,13 @@ endif
112# --------------------------------------------------------------------------- 112# ---------------------------------------------------------------------------
113 113
114# Default is built-in, unless we know otherwise 114# Default is built-in, unless we know otherwise
115modkern_cflags := $(CFLAGS_KERNEL) 115modkern_cflags = $(if $(part-of-module), $(CFLAGS_MODULE), $(CFLAGS_KERNEL))
116quiet_modtag := $(empty) $(empty) 116quiet_modtag := $(empty) $(empty)
117 117
118$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) 118$(real-objs-m) : part-of-module := y
119$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) 119$(real-objs-m:.o=.i) : part-of-module := y
120$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE) 120$(real-objs-m:.o=.s) : part-of-module := y
121$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) 121$(real-objs-m:.o=.lst): part-of-module := y
122 122
123$(real-objs-m) : quiet_modtag := [M] 123$(real-objs-m) : quiet_modtag := [M]
124$(real-objs-m:.o=.i) : quiet_modtag := [M] 124$(real-objs-m:.o=.i) : quiet_modtag := [M]
@@ -205,7 +205,8 @@ endif
205ifdef CONFIG_FTRACE_MCOUNT_RECORD 205ifdef CONFIG_FTRACE_MCOUNT_RECORD
206cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ 206cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
207 "$(if $(CONFIG_64BIT),64,32)" \ 207 "$(if $(CONFIG_64BIT),64,32)" \
208 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)"; 208 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
209 "$(if $(part-of-module),1,0)" "$(@)";
209endif 210endif
210 211
211define rule_cc_o_c 212define rule_cc_o_c
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 8c6b7b09606a..fa4a0a17b7e0 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -35,9 +35,10 @@ $(objtree)/kernel.spec: $(MKSPEC) $(srctree)/Makefile
35rpm-pkg rpm: $(objtree)/kernel.spec FORCE 35rpm-pkg rpm: $(objtree)/kernel.spec FORCE
36 $(MAKE) clean 36 $(MAKE) clean
37 $(PREV) ln -sf $(srctree) $(KERNELPATH) 37 $(PREV) ln -sf $(srctree) $(KERNELPATH)
38 $(CONFIG_SHELL) $(srctree)/scripts/setlocalversion > $(objtree)/.scmversion
38 $(PREV) tar -cz $(RCS_TAR_IGNORE) -f $(KERNELPATH).tar.gz $(KERNELPATH)/. 39 $(PREV) tar -cz $(RCS_TAR_IGNORE) -f $(KERNELPATH).tar.gz $(KERNELPATH)/.
39 $(PREV) rm $(KERNELPATH) 40 $(PREV) rm $(KERNELPATH)
40 41 rm -f $(objtree)/.scmversion
41 set -e; \ 42 set -e; \
42 $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version 43 $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
43 set -e; \ 44 set -e; \
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index ee448cdc6a2b..3d93f8c81252 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -96,7 +96,7 @@ echo "%endif"
96 96
97echo "" 97echo ""
98echo "%clean" 98echo "%clean"
99echo '#echo -rf $RPM_BUILD_ROOT' 99echo 'rm -rf $RPM_BUILD_ROOT'
100echo "" 100echo ""
101echo "%files" 101echo "%files"
102echo '%defattr (-, root, root)' 102echo '%defattr (-, root, root)'
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index fe831412bea9..409596eca124 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -100,14 +100,19 @@ $P =~ s@.*/@@g;
100 100
101my $V = '0.1'; 101my $V = '0.1';
102 102
103if ($#ARGV < 6) { 103if ($#ARGV < 7) {
104 print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n"; 104 print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
105 print "version: $V\n"; 105 print "version: $V\n";
106 exit(1); 106 exit(1);
107} 107}
108 108
109my ($arch, $bits, $objdump, $objcopy, $cc, 109my ($arch, $bits, $objdump, $objcopy, $cc,
110 $ld, $nm, $rm, $mv, $inputfile) = @ARGV; 110 $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
111
112# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
113if ($inputfile eq "kernel/trace/ftrace.o") {
114 exit(0);
115}
111 116
112# Acceptable sections to record. 117# Acceptable sections to record.
113my %text_sections = ( 118my %text_sections = (
@@ -201,6 +206,13 @@ if ($arch eq "x86_64") {
201 $alignment = 2; 206 $alignment = 2;
202 $section_type = '%progbits'; 207 $section_type = '%progbits';
203 208
209} elsif ($arch eq "ia64") {
210 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
211 $type = "data8";
212
213 if ($is_module eq "0") {
214 $cc .= " -mconstant-gp";
215 }
204} else { 216} else {
205 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 217 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
206} 218}
@@ -263,7 +275,6 @@ if (!$found_version) {
263 "\tDisabling local function references.\n"; 275 "\tDisabling local function references.\n";
264} 276}
265 277
266
267# 278#
268# Step 1: find all the local (static functions) and weak symbols. 279# Step 1: find all the local (static functions) and weak symbols.
269# 't' is local, 'w/W' is weak (we never use a weak function) 280# 't' is local, 'w/W' is weak (we never use a weak function)
@@ -331,13 +342,16 @@ sub update_funcs
331# 342#
332# Step 2: find the sections and mcount call sites 343# Step 2: find the sections and mcount call sites
333# 344#
334open(IN, "$objdump -dr $inputfile|") || die "error running $objdump"; 345open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump";
335 346
336my $text; 347my $text;
337 348
349my $read_headers = 1;
350
338while (<IN>) { 351while (<IN>) {
339 # is it a section? 352 # is it a section?
340 if (/$section_regex/) { 353 if (/$section_regex/) {
354 $read_headers = 0;
341 355
342 # Only record text sections that we know are safe 356 # Only record text sections that we know are safe
343 if (defined($text_sections{$1})) { 357 if (defined($text_sections{$1})) {
@@ -371,6 +385,19 @@ while (<IN>) {
371 $ref_func = $text; 385 $ref_func = $text;
372 } 386 }
373 } 387 }
388 } elsif ($read_headers && /$mcount_section/) {
389 #
390 # Somehow the make process can execute this script on an
391 # object twice. If it does, we would duplicate the mcount
392 # section and it will cause the function tracer self test
393 # to fail. Check if the mcount section exists, and if it does,
394 # warn and exit.
395 #
396 print STDERR "ERROR: $mcount_section already in $inputfile\n" .
397 "\tThis may be an indication that your build is corrupted.\n" .
398 "\tDelete $inputfile and try again. If the same object file\n" .
399 "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n";
400 exit(-1);
374 } 401 }
375 402
376 # is this a call site to mcount? If so, record it to print later 403 # is this a call site to mcount? If so, record it to print later
diff --git a/scripts/unifdef.c b/scripts/unifdef.c
index 552025e72acb..05a31a6c7e1b 100644
--- a/scripts/unifdef.c
+++ b/scripts/unifdef.c
@@ -206,7 +206,7 @@ static void done(void);
206static void error(const char *); 206static void error(const char *);
207static int findsym(const char *); 207static int findsym(const char *);
208static void flushline(bool); 208static void flushline(bool);
209static Linetype getline(void); 209static Linetype get_line(void);
210static Linetype ifeval(const char **); 210static Linetype ifeval(const char **);
211static void ignoreoff(void); 211static void ignoreoff(void);
212static void ignoreon(void); 212static void ignoreon(void);
@@ -512,7 +512,7 @@ process(void)
512 512
513 for (;;) { 513 for (;;) {
514 linenum++; 514 linenum++;
515 lineval = getline(); 515 lineval = get_line();
516 trans_table[ifstate[depth]][lineval](); 516 trans_table[ifstate[depth]][lineval]();
517 debug("process %s -> %s depth %d", 517 debug("process %s -> %s depth %d",
518 linetype_name[lineval], 518 linetype_name[lineval],
@@ -526,7 +526,7 @@ process(void)
526 * help from skipcomment(). 526 * help from skipcomment().
527 */ 527 */
528static Linetype 528static Linetype
529getline(void) 529get_line(void)
530{ 530{
531 const char *cp; 531 const char *cp;
532 int cursym; 532 int cursym;