aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-bus-mei7
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb6
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu12
-rw-r--r--Documentation/DocBook/device-drivers.tmpl2
-rw-r--r--Documentation/RCU/checklist.txt26
-rw-r--r--Documentation/RCU/lockdep.txt5
-rw-r--r--Documentation/RCU/rcubarrier.txt15
-rw-r--r--Documentation/RCU/stallwarn.txt33
-rw-r--r--Documentation/RCU/whatisRCU.txt4
-rw-r--r--Documentation/SubmittingPatches9
-rw-r--r--Documentation/arm/sunxi/clocks.txt56
-rw-r--r--Documentation/backlight/lp855x-driver.txt7
-rw-r--r--Documentation/cgroups/cgroups.txt3
-rw-r--r--Documentation/cgroups/devices.txt70
-rw-r--r--Documentation/cgroups/memory.txt70
-rw-r--r--Documentation/clk.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-adc.txt13
-rw-r--r--Documentation/devicetree/bindings/arm/msm/ssbi.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt60
-rw-r--r--Documentation/devicetree/bindings/clock/axi-clkgen.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-factor-clock.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5351.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/sunxi.txt151
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt6
-rw-r--r--Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt29
-rw-r--r--Documentation/devicetree/bindings/iio/iio-bindings.txt97
-rw-r--r--Documentation/devicetree/bindings/media/coda.txt30
-rw-r--r--Documentation/devicetree/bindings/mfd/ab8500.txt6
-rw-r--r--Documentation/devicetree/bindings/mfd/mc13xxx.txt36
-rw-r--r--Documentation/devicetree/bindings/misc/sram.txt16
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt109
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt3
-rw-r--r--Documentation/devicetree/bindings/regulator/max8952.txt52
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt22
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-spi.txt3
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt26
-rw-r--r--Documentation/devicetree/bindings/spi/spi-samsung.txt8
-rw-r--r--Documentation/devicetree/bindings/staging/dwc2.txt15
-rw-r--r--Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt2
-rw-r--r--Documentation/devicetree/bindings/tty/serial/of-serial.txt7
-rw-r--r--Documentation/devicetree/bindings/usb/ci13xxx-imx.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-omap.txt32
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-omap3.txt15
-rw-r--r--Documentation/devicetree/bindings/usb/omap-usb.txt40
-rw-r--r--Documentation/devicetree/bindings/usb/samsung-usbphy.txt76
-rw-r--r--Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt34
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt2
-rw-r--r--Documentation/devicetree/bindings/video/backlight/lp855x.txt41
-rw-r--r--Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt27
-rw-r--r--Documentation/devicetree/bindings/video/via,vt8500-fb.txt48
-rw-r--r--Documentation/devicetree/bindings/video/wm,wm8505-fb.txt32
-rw-r--r--Documentation/filesystems/vfat.txt26
-rw-r--r--Documentation/hwmon/adt741047
-rw-r--r--Documentation/hwmon/lm2506634
-rw-r--r--Documentation/hwmon/lm752
-rw-r--r--Documentation/hwmon/lm9523436
-rw-r--r--Documentation/hwmon/ltc2978143
-rw-r--r--Documentation/hwmon/nct6775188
-rw-r--r--Documentation/hwmon/sht152
-rw-r--r--Documentation/hwmon/tmp40125
-rw-r--r--Documentation/hwmon/zl61002
-rw-r--r--Documentation/i2c/busses/i2c-diolan-u2c2
-rw-r--r--Documentation/ia64/err_inject.txt2
-rw-r--r--Documentation/input/alps.txt67
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kdump/kdump.txt1
-rw-r--r--Documentation/kernel-parameters.txt88
-rw-r--r--Documentation/misc-devices/mei/mei-client-bus.txt138
-rw-r--r--Documentation/networking/ipvs-sysctl.txt7
-rw-r--r--Documentation/networking/tuntap.txt77
-rw-r--r--Documentation/pinctrl.txt112
-rw-r--r--Documentation/s390/s390dbf.txt3
-rw-r--r--Documentation/scsi/LICENSE.qla2xxx2
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt7
-rw-r--r--Documentation/sound/alsa/seq_oss.html2
-rw-r--r--Documentation/sysctl/vm.txt50
-rw-r--r--Documentation/this_cpu_ops.txt205
-rw-r--r--Documentation/trace/ftrace.txt2099
-rw-r--r--Documentation/trace/uprobetracer.txt114
-rw-r--r--Documentation/usb/power-management.txt10
-rw-r--r--Documentation/vm/overcommit-accounting8
-rw-r--r--Documentation/x86/x86_64/mm.txt4
83 files changed, 4156 insertions, 921 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei
new file mode 100644
index 000000000000..2066f0bbd453
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-mei
@@ -0,0 +1,7 @@
1What: /sys/bus/mei/devices/.../modalias
2Date: March 2013
3KernelVersion: 3.10
4Contact: Samuel Ortiz <sameo@linux.intel.com>
5 linux-mei@linux.intel.com
6Description: Stores the same MODALIAS value emitted by uevent
7 Format: mei:<mei device name>
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index c8baaf53594a..f093e59cbe5f 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -32,7 +32,7 @@ Date: January 2008
32KernelVersion: 2.6.25 32KernelVersion: 2.6.25
33Contact: Sarah Sharp <sarah.a.sharp@intel.com> 33Contact: Sarah Sharp <sarah.a.sharp@intel.com>
34Description: 34Description:
35 If CONFIG_PM and CONFIG_USB_SUSPEND are enabled, then this file 35 If CONFIG_PM_RUNTIME is enabled then this file
36 is present. When read, it returns the total time (in msec) 36 is present. When read, it returns the total time (in msec)
37 that the USB device has been connected to the machine. This 37 that the USB device has been connected to the machine. This
38 file is read-only. 38 file is read-only.
@@ -45,7 +45,7 @@ Date: January 2008
45KernelVersion: 2.6.25 45KernelVersion: 2.6.25
46Contact: Sarah Sharp <sarah.a.sharp@intel.com> 46Contact: Sarah Sharp <sarah.a.sharp@intel.com>
47Description: 47Description:
48 If CONFIG_PM and CONFIG_USB_SUSPEND are enabled, then this file 48 If CONFIG_PM_RUNTIME is enabled then this file
49 is present. When read, it returns the total time (in msec) 49 is present. When read, it returns the total time (in msec)
50 that the USB device has been active, i.e. not in a suspended 50 that the USB device has been active, i.e. not in a suspended
51 state. This file is read-only. 51 state. This file is read-only.
@@ -187,7 +187,7 @@ What: /sys/bus/usb/devices/.../power/usb2_hardware_lpm
187Date: September 2011 187Date: September 2011
188Contact: Andiry Xu <andiry.xu@amd.com> 188Contact: Andiry Xu <andiry.xu@amd.com>
189Description: 189Description:
190 If CONFIG_USB_SUSPEND is set and a USB 2.0 lpm-capable device 190 If CONFIG_PM_RUNTIME is set and a USB 2.0 lpm-capable device
191 is plugged in to a xHCI host which support link PM, it will 191 is plugged in to a xHCI host which support link PM, it will
192 perform a LPM test; if the test is passed and host supports 192 perform a LPM test; if the test is passed and host supports
193 USB2 hardware LPM (xHCI 1.0 feature), USB2 hardware LPM will 193 USB2 hardware LPM (xHCI 1.0 feature), USB2 hardware LPM will
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 9c978dcae07d..2447698aed41 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -173,3 +173,15 @@ Description: Processor frequency boosting control
173 Boosting allows the CPU and the firmware to run at a frequency 173 Boosting allows the CPU and the firmware to run at a frequency
174 beyound it's nominal limit. 174 beyound it's nominal limit.
175 More details can be found in Documentation/cpu-freq/boost.txt 175 More details can be found in Documentation/cpu-freq/boost.txt
176
177
178What: /sys/devices/system/cpu/cpu#/crash_notes
179 /sys/devices/system/cpu/cpu#/crash_notes_size
180Date: April 2013
181Contact: kexec@lists.infradead.org
182Description: address and size of the percpu note.
183
184 crash_notes: the physical address of the memory that holds the
185 note of cpu#.
186
187 crash_notes_size: size of the note of cpu#.
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 7514dbf0a679..c36892c072da 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -227,7 +227,7 @@ X!Isound/sound_firmware.c
227 <chapter id="uart16x50"> 227 <chapter id="uart16x50">
228 <title>16x50 UART Driver</title> 228 <title>16x50 UART Driver</title>
229!Edrivers/tty/serial/serial_core.c 229!Edrivers/tty/serial/serial_core.c
230!Edrivers/tty/serial/8250/8250.c 230!Edrivers/tty/serial/8250/8250_core.c
231 </chapter> 231 </chapter>
232 232
233 <chapter id="fbdev"> 233 <chapter id="fbdev">
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 31ef8fe07f82..79e789b8b8ea 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -217,9 +217,14 @@ over a rather long period of time, but improvements are always welcome!
217 whether the increased speed is worth it. 217 whether the increased speed is worth it.
218 218
2198. Although synchronize_rcu() is slower than is call_rcu(), it 2198. Although synchronize_rcu() is slower than is call_rcu(), it
220 usually results in simpler code. So, unless update performance 220 usually results in simpler code. So, unless update performance is
221 is critically important or the updaters cannot block, 221 critically important, the updaters cannot block, or the latency of
222 synchronize_rcu() should be used in preference to call_rcu(). 222 synchronize_rcu() is visible from userspace, synchronize_rcu()
223 should be used in preference to call_rcu(). Furthermore,
224 kfree_rcu() usually results in even simpler code than does
225 synchronize_rcu() without synchronize_rcu()'s multi-millisecond
226 latency. So please take advantage of kfree_rcu()'s "fire and
227 forget" memory-freeing capabilities where it applies.
223 228
224 An especially important property of the synchronize_rcu() 229 An especially important property of the synchronize_rcu()
225 primitive is that it automatically self-limits: if grace periods 230 primitive is that it automatically self-limits: if grace periods
@@ -268,7 +273,8 @@ over a rather long period of time, but improvements are always welcome!
268 e. Periodically invoke synchronize_rcu(), permitting a limited 273 e. Periodically invoke synchronize_rcu(), permitting a limited
269 number of updates per grace period. 274 number of updates per grace period.
270 275
271 The same cautions apply to call_rcu_bh() and call_rcu_sched(). 276 The same cautions apply to call_rcu_bh(), call_rcu_sched(),
277 call_srcu(), and kfree_rcu().
272 278
2739. All RCU list-traversal primitives, which include 2799. All RCU list-traversal primitives, which include
274 rcu_dereference(), list_for_each_entry_rcu(), and 280 rcu_dereference(), list_for_each_entry_rcu(), and
@@ -296,9 +302,9 @@ over a rather long period of time, but improvements are always welcome!
296 all currently executing rcu_read_lock()-protected RCU read-side 302 all currently executing rcu_read_lock()-protected RCU read-side
297 critical sections complete. It does -not- necessarily guarantee 303 critical sections complete. It does -not- necessarily guarantee
298 that all currently running interrupts, NMIs, preempt_disable() 304 that all currently running interrupts, NMIs, preempt_disable()
299 code, or idle loops will complete. Therefore, if you do not have 305 code, or idle loops will complete. Therefore, if your
300 rcu_read_lock()-protected read-side critical sections, do -not- 306 read-side critical sections are protected by something other
301 use synchronize_rcu(). 307 than rcu_read_lock(), do -not- use synchronize_rcu().
302 308
303 Similarly, disabling preemption is not an acceptable substitute 309 Similarly, disabling preemption is not an acceptable substitute
304 for rcu_read_lock(). Code that attempts to use preemption 310 for rcu_read_lock(). Code that attempts to use preemption
@@ -401,9 +407,9 @@ over a rather long period of time, but improvements are always welcome!
401 read-side critical sections. It is the responsibility of the 407 read-side critical sections. It is the responsibility of the
402 RCU update-side primitives to deal with this. 408 RCU update-side primitives to deal with this.
403 409
40417. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and 41017. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
405 the __rcu sparse checks to validate your RCU code. These 411 __rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to
406 can help find problems as follows: 412 validate your RCU code. These can help find problems as follows:
407 413
408 CONFIG_PROVE_RCU: check that accesses to RCU-protected data 414 CONFIG_PROVE_RCU: check that accesses to RCU-protected data
409 structures are carried out under the proper RCU 415 structures are carried out under the proper RCU
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index a102d4b3724b..cd83d2348fef 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -64,6 +64,11 @@ checking of rcu_dereference() primitives:
64 but retain the compiler constraints that prevent duplicating 64 but retain the compiler constraints that prevent duplicating
65 or coalescsing. This is useful when when testing the 65 or coalescsing. This is useful when when testing the
66 value of the pointer itself, for example, against NULL. 66 value of the pointer itself, for example, against NULL.
67 rcu_access_index(idx):
68 Return the value of the index and omit all barriers, but
69 retain the compiler constraints that prevent duplicating
70 or coalescsing. This is useful when when testing the
71 value of the index itself, for example, against -1.
67 72
68The rcu_dereference_check() check expression can be any boolean 73The rcu_dereference_check() check expression can be any boolean
69expression, but would normally include a lockdep expression. However, 74expression, but would normally include a lockdep expression. However,
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index 38428c125135..2e319d1b9ef2 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -79,7 +79,20 @@ complete. Pseudo-code using rcu_barrier() is as follows:
79 2. Execute rcu_barrier(). 79 2. Execute rcu_barrier().
80 3. Allow the module to be unloaded. 80 3. Allow the module to be unloaded.
81 81
82The rcutorture module makes use of rcu_barrier in its exit function 82There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier()
83functions for the other flavors of RCU, and you of course must match
84the flavor of rcu_barrier() with that of call_rcu(). If your module
85uses multiple flavors of call_rcu(), then it must also use multiple
86flavors of rcu_barrier() when unloading that module. For example, if
87it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on
88srcu_struct_2(), then the following three lines of code will be required
89when unloading:
90
91 1 rcu_barrier_bh();
92 2 srcu_barrier(&srcu_struct_1);
93 3 srcu_barrier(&srcu_struct_2);
94
95The rcutorture module makes use of rcu_barrier() in its exit function
83as follows: 96as follows:
84 97
85 1 static void 98 1 static void
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 1927151b386b..e38b8df3d727 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -92,14 +92,14 @@ If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set,
92more information is printed with the stall-warning message, for example: 92more information is printed with the stall-warning message, for example:
93 93
94 INFO: rcu_preempt detected stall on CPU 94 INFO: rcu_preempt detected stall on CPU
95 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 95 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543
96 (t=65000 jiffies) 96 (t=65000 jiffies)
97 97
98In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is 98In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is
99printed: 99printed:
100 100
101 INFO: rcu_preempt detected stall on CPU 101 INFO: rcu_preempt detected stall on CPU
102 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending 102 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D
103 (t=65000 jiffies) 103 (t=65000 jiffies)
104 104
105The "(64628 ticks this GP)" indicates that this CPU has taken more 105The "(64628 ticks this GP)" indicates that this CPU has taken more
@@ -116,13 +116,28 @@ number between the two "/"s is the value of the nesting, which will
116be a small positive number if in the idle loop and a very large positive 116be a small positive number if in the idle loop and a very large positive
117number (as shown above) otherwise. 117number (as shown above) otherwise.
118 118
119For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is 119The "softirq=" portion of the message tracks the number of RCU softirq
120not in the process of trying to force itself into dyntick-idle state, the 120handlers that the stalled CPU has executed. The number before the "/"
121"." indicates that the CPU has not given up forcing RCU into dyntick-idle 121is the number that had executed since boot at the time that this CPU
122mode (it would be "H" otherwise), and the "timer not pending" indicates 122last noted the beginning of a grace period, which might be the current
123that the CPU has not recently forced RCU into dyntick-idle mode (it 123(stalled) grace period, or it might be some earlier grace period (for
124would otherwise indicate the number of microseconds remaining in this 124example, if the CPU might have been in dyntick-idle mode for an extended
125forced state). 125time period. The number after the "/" is the number that have executed
126since boot until the current time. If this latter number stays constant
127across repeated stall-warning messages, it is possible that RCU's softirq
128handlers are no longer able to execute on this CPU. This can happen if
129the stalled CPU is spinning with interrupts are disabled, or, in -rt
130kernels, if a high-priority process is starving RCU's softirq handler.
131
132For CONFIG_RCU_FAST_NO_HZ kernels, the "last_accelerate:" prints the
133low-order 16 bits (in hex) of the jiffies counter when this CPU last
134invoked rcu_try_advance_all_cbs() from rcu_needs_cpu() or last invoked
135rcu_accelerate_cbs() from rcu_prepare_for_idle(). The "nonlazy_posted:"
136prints the number of non-lazy callbacks posted since the last call to
137rcu_needs_cpu(). Finally, an "L" indicates that there are currently
138no non-lazy callbacks ("." is printed otherwise, as shown above) and
139"D" indicates that dyntick-idle processing is enabled ("." is printed
140otherwise, for example, if disabled via the "nohz=" kernel boot parameter).
126 141
127 142
128Multiple Warnings From One Stall 143Multiple Warnings From One Stall
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 0cc7820967f4..10df0b82f459 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -265,9 +265,9 @@ rcu_dereference()
265 rcu_read_lock(); 265 rcu_read_lock();
266 p = rcu_dereference(head.next); 266 p = rcu_dereference(head.next);
267 rcu_read_unlock(); 267 rcu_read_unlock();
268 x = p->address; 268 x = p->address; /* BUG!!! */
269 rcu_read_lock(); 269 rcu_read_lock();
270 y = p->data; 270 y = p->data; /* BUG!!! */
271 rcu_read_unlock(); 271 rcu_read_unlock();
272 272
273 Holding a reference from one RCU read-side critical section 273 Holding a reference from one RCU read-side critical section
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index aa0c1e63f050..6e97e73d87b5 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -420,7 +420,7 @@ person it names. This tag documents that potentially interested parties
420have been included in the discussion 420have been included in the discussion
421 421
422 422
42314) Using Reported-by:, Tested-by: and Reviewed-by: 42314) Using Reported-by:, Tested-by:, Reviewed-by: and Suggested-by:
424 424
425If this patch fixes a problem reported by somebody else, consider adding a 425If this patch fixes a problem reported by somebody else, consider adding a
426Reported-by: tag to credit the reporter for their contribution. Please 426Reported-by: tag to credit the reporter for their contribution. Please
@@ -468,6 +468,13 @@ done on the patch. Reviewed-by: tags, when supplied by reviewers known to
468understand the subject area and to perform thorough reviews, will normally 468understand the subject area and to perform thorough reviews, will normally
469increase the likelihood of your patch getting into the kernel. 469increase the likelihood of your patch getting into the kernel.
470 470
471A Suggested-by: tag indicates that the patch idea is suggested by the person
472named and ensures credit to the person for the idea. Please note that this
473tag should not be added without the reporter's permission, especially if the
474idea was not posted in a public forum. That said, if we diligently credit our
475idea reporters, they will, hopefully, be inspired to help us again in the
476future.
477
471 478
47215) The canonical patch format 47915) The canonical patch format
473 480
diff --git a/Documentation/arm/sunxi/clocks.txt b/Documentation/arm/sunxi/clocks.txt
new file mode 100644
index 000000000000..e09a88aa3136
--- /dev/null
+++ b/Documentation/arm/sunxi/clocks.txt
@@ -0,0 +1,56 @@
1Frequently asked questions about the sunxi clock system
2=======================================================
3
4This document contains useful bits of information that people tend to ask
5about the sunxi clock system, as well as accompanying ASCII art when adequate.
6
7Q: Why is the main 24MHz oscillator gatable? Wouldn't that break the
8 system?
9
10A: The 24MHz oscillator allows gating to save power. Indeed, if gated
11 carelessly the system would stop functioning, but with the right
12 steps, one can gate it and keep the system running. Consider this
13 simplified suspend example:
14
15 While the system is operational, you would see something like
16
17 24MHz 32kHz
18 |
19 PLL1
20 \
21 \_ CPU Mux
22 |
23 [CPU]
24
25 When you are about to suspend, you switch the CPU Mux to the 32kHz
26 oscillator:
27
28 24Mhz 32kHz
29 | |
30 PLL1 |
31 /
32 CPU Mux _/
33 |
34 [CPU]
35
36 Finally you can gate the main oscillator
37
38 32kHz
39 |
40 |
41 /
42 CPU Mux _/
43 |
44 [CPU]
45
46Q: Were can I learn more about the sunxi clocks?
47
48A: The linux-sunxi wiki contains a page documenting the clock registers,
49 you can find it at
50
51 http://linux-sunxi.org/A10/CCM
52
53 The authoritative source for information at this time is the ccmu driver
54 released by Allwinner, you can find it at
55
56 https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu
diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index 18b06ca038ea..1c732f0c6758 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -32,14 +32,10 @@ Platform data for lp855x
32For supporting platform specific data, the lp855x platform data can be used. 32For supporting platform specific data, the lp855x platform data can be used.
33 33
34* name : Backlight driver name. If it is not defined, default name is set. 34* name : Backlight driver name. If it is not defined, default name is set.
35* mode : Brightness control mode. PWM or register based.
36* device_control : Value of DEVICE CONTROL register. 35* device_control : Value of DEVICE CONTROL register.
37* initial_brightness : Initial value of backlight brightness. 36* initial_brightness : Initial value of backlight brightness.
38* period_ns : Platform specific PWM period value. unit is nano. 37* period_ns : Platform specific PWM period value. unit is nano.
39 Only valid when brightness is pwm input mode. 38 Only valid when brightness is pwm input mode.
40* load_new_rom_data :
41 0 : use default configuration data
42 1 : update values of eeprom or eprom registers on loading driver
43* size_program : Total size of lp855x_rom_data. 39* size_program : Total size of lp855x_rom_data.
44* rom_data : List of new eeprom/eprom registers. 40* rom_data : List of new eeprom/eprom registers.
45 41
@@ -54,10 +50,8 @@ static struct lp855x_rom_data lp8552_eeprom_arr[] = {
54 50
55static struct lp855x_platform_data lp8552_pdata = { 51static struct lp855x_platform_data lp8552_pdata = {
56 .name = "lcd-bl", 52 .name = "lcd-bl",
57 .mode = REGISTER_BASED,
58 .device_control = I2C_CONFIG(LP8552), 53 .device_control = I2C_CONFIG(LP8552),
59 .initial_brightness = INITIAL_BRT, 54 .initial_brightness = INITIAL_BRT,
60 .load_new_rom_data = 1,
61 .size_program = ARRAY_SIZE(lp8552_eeprom_arr), 55 .size_program = ARRAY_SIZE(lp8552_eeprom_arr),
62 .rom_data = lp8552_eeprom_arr, 56 .rom_data = lp8552_eeprom_arr,
63}; 57};
@@ -65,7 +59,6 @@ static struct lp855x_platform_data lp8552_pdata = {
65example 2) lp8556 platform data : pwm input mode with default rom data 59example 2) lp8556 platform data : pwm input mode with default rom data
66 60
67static struct lp855x_platform_data lp8556_pdata = { 61static struct lp855x_platform_data lp8556_pdata = {
68 .mode = PWM_BASED,
69 .device_control = PWM_CONFIG(LP8556), 62 .device_control = PWM_CONFIG(LP8556),
70 .initial_brightness = INITIAL_BRT, 63 .initial_brightness = INITIAL_BRT,
71 .period_ns = 1000000, 64 .period_ns = 1000000,
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index bcf1a00b06a1..638bf17ff869 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -442,7 +442,7 @@ You can attach the current shell task by echoing 0:
442You can use the cgroup.procs file instead of the tasks file to move all 442You can use the cgroup.procs file instead of the tasks file to move all
443threads in a threadgroup at once. Echoing the PID of any task in a 443threads in a threadgroup at once. Echoing the PID of any task in a
444threadgroup to cgroup.procs causes all tasks in that threadgroup to be 444threadgroup to cgroup.procs causes all tasks in that threadgroup to be
445be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks 445attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
446in the writing task's threadgroup. 446in the writing task's threadgroup.
447 447
448Note: Since every task is always a member of exactly one cgroup in each 448Note: Since every task is always a member of exactly one cgroup in each
@@ -580,6 +580,7 @@ propagation along the hierarchy. See the comment on
580cgroup_for_each_descendant_pre() for details. 580cgroup_for_each_descendant_pre() for details.
581 581
582void css_offline(struct cgroup *cgrp); 582void css_offline(struct cgroup *cgrp);
583(cgroup_mutex held by caller)
583 584
584This is the counterpart of css_online() and called iff css_online() 585This is the counterpart of css_online() and called iff css_online()
585has succeeded on @cgrp. This signifies the beginning of the end of 586has succeeded on @cgrp. This signifies the beginning of the end of
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt
index 16624a7f8222..3c1095ca02ea 100644
--- a/Documentation/cgroups/devices.txt
+++ b/Documentation/cgroups/devices.txt
@@ -13,9 +13,7 @@ either an integer or * for all. Access is a composition of r
13The root device cgroup starts with rwm to 'all'. A child device 13The root device cgroup starts with rwm to 'all'. A child device
14cgroup gets a copy of the parent. Administrators can then remove 14cgroup gets a copy of the parent. Administrators can then remove
15devices from the whitelist or add new entries. A child cgroup can 15devices from the whitelist or add new entries. A child cgroup can
16never receive a device access which is denied by its parent. However 16never receive a device access which is denied by its parent.
17when a device access is removed from a parent it will not also be
18removed from the child(ren).
19 17
202. User Interface 182. User Interface
21 19
@@ -50,3 +48,69 @@ task to a new cgroup. (Again we'll probably want to change that).
50 48
51A cgroup may not be granted more permissions than the cgroup's 49A cgroup may not be granted more permissions than the cgroup's
52parent has. 50parent has.
51
524. Hierarchy
53
54device cgroups maintain hierarchy by making sure a cgroup never has more
55access permissions than its parent. Every time an entry is written to
56a cgroup's devices.deny file, all its children will have that entry removed
57from their whitelist and all the locally set whitelist entries will be
58re-evaluated. In case one of the locally set whitelist entries would provide
59more access than the cgroup's parent, it'll be removed from the whitelist.
60
61Example:
62 A
63 / \
64 B
65
66 group behavior exceptions
67 A allow "b 8:* rwm", "c 116:1 rw"
68 B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
69
70If a device is denied in group A:
71 # echo "c 116:* r" > A/devices.deny
72it'll propagate down and after revalidating B's entries, the whitelist entry
73"c 116:2 rwm" will be removed:
74
75 group whitelist entries denied devices
76 A all "b 8:* rwm", "c 116:* rw"
77 B "c 1:3 rwm", "b 3:* rwm" all the rest
78
79In case parent's exceptions change and local exceptions are not allowed
80anymore, they'll be deleted.
81
82Notice that new whitelist entries will not be propagated:
83 A
84 / \
85 B
86
87 group whitelist entries denied devices
88 A "c 1:3 rwm", "c 1:5 r" all the rest
89 B "c 1:3 rwm", "c 1:5 r" all the rest
90
91when adding "c *:3 rwm":
92 # echo "c *:3 rwm" >A/devices.allow
93
94the result:
95 group whitelist entries denied devices
96 A "c *:3 rwm", "c 1:5 r" all the rest
97 B "c 1:3 rwm", "c 1:5 r" all the rest
98
99but now it'll be possible to add new entries to B:
100 # echo "c 2:3 rwm" >B/devices.allow
101 # echo "c 50:3 r" >B/devices.allow
102or even
103 # echo "c *:3 rwm" >B/devices.allow
104
105Allowing or denying all by writing 'a' to devices.allow or devices.deny will
106not be possible once the device cgroups has children.
107
1084.1 Hierarchy (internal implementation)
109
110device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
111list of exceptions. The internal state is controlled using the same user
112interface to preserve compatibility with the previous whitelist-only
113implementation. Removal or addition of exceptions that will reduce the access
114to devices will be propagated down the hierarchy.
115For every propagated exception, the effective rules will be re-evaluated based
116on current parent's access rules.
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 8b8c28b9864c..f336ede58e62 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -40,6 +40,7 @@ Features:
40 - soft limit 40 - soft limit
41 - moving (recharging) account at moving a task is selectable. 41 - moving (recharging) account at moving a task is selectable.
42 - usage threshold notifier 42 - usage threshold notifier
43 - memory pressure notifier
43 - oom-killer disable knob and oom-notifier 44 - oom-killer disable knob and oom-notifier
44 - Root cgroup has no limit controls. 45 - Root cgroup has no limit controls.
45 46
@@ -65,6 +66,7 @@ Brief summary of control files.
65 memory.stat # show various statistics 66 memory.stat # show various statistics
66 memory.use_hierarchy # set/show hierarchical account enabled 67 memory.use_hierarchy # set/show hierarchical account enabled
67 memory.force_empty # trigger forced move charge to parent 68 memory.force_empty # trigger forced move charge to parent
69 memory.pressure_level # set memory pressure notifications
68 memory.swappiness # set/show swappiness parameter of vmscan 70 memory.swappiness # set/show swappiness parameter of vmscan
69 (See sysctl's vm.swappiness) 71 (See sysctl's vm.swappiness)
70 memory.move_charge_at_immigrate # set/show controls of moving charges 72 memory.move_charge_at_immigrate # set/show controls of moving charges
@@ -762,7 +764,73 @@ At reading, current status of OOM is shown.
762 under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may 764 under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may
763 be stopped.) 765 be stopped.)
764 766
76511. TODO 76711. Memory Pressure
768
769The pressure level notifications can be used to monitor the memory
770allocation cost; based on the pressure, applications can implement
771different strategies of managing their memory resources. The pressure
772levels are defined as following:
773
774The "low" level means that the system is reclaiming memory for new
775allocations. Monitoring this reclaiming activity might be useful for
776maintaining cache level. Upon notification, the program (typically
777"Activity Manager") might analyze vmstat and act in advance (i.e.
778prematurely shutdown unimportant services).
779
780The "medium" level means that the system is experiencing medium memory
781pressure, the system might be making swap, paging out active file caches,
782etc. Upon this event applications may decide to further analyze
783vmstat/zoneinfo/memcg or internal memory usage statistics and free any
784resources that can be easily reconstructed or re-read from a disk.
785
786The "critical" level means that the system is actively thrashing, it is
787about to out of memory (OOM) or even the in-kernel OOM killer is on its
788way to trigger. Applications should do whatever they can to help the
789system. It might be too late to consult with vmstat or any other
790statistics, so it's advisable to take an immediate action.
791
792The events are propagated upward until the event is handled, i.e. the
793events are not pass-through. Here is what this means: for example you have
794three cgroups: A->B->C. Now you set up an event listener on cgroups A, B
795and C, and suppose group C experiences some pressure. In this situation,
796only group C will receive the notification, i.e. groups A and B will not
797receive it. This is done to avoid excessive "broadcasting" of messages,
798which disturbs the system and which is especially bad if we are low on
799memory or thrashing. So, organize the cgroups wisely, or propagate the
800events manually (or, ask us to implement the pass-through events,
801explaining why would you need them.)
802
803The file memory.pressure_level is only used to setup an eventfd. To
804register a notification, an application must:
805
806- create an eventfd using eventfd(2);
807- open memory.pressure_level;
808- write string like "<event_fd> <fd of memory.pressure_level> <level>"
809 to cgroup.event_control.
810
811Application will be notified through eventfd when memory pressure is at
812the specific level (or higher). Read/write operations to
813memory.pressure_level are no implemented.
814
815Test:
816
817 Here is a small script example that makes a new cgroup, sets up a
818 memory limit, sets up a notification in the cgroup and then makes child
819 cgroup experience a critical pressure:
820
821 # cd /sys/fs/cgroup/memory/
822 # mkdir foo
823 # cd foo
824 # cgroup_event_listener memory.pressure_level low &
825 # echo 8000000 > memory.limit_in_bytes
826 # echo 8000000 > memory.memsw.limit_in_bytes
827 # echo $$ > tasks
828 # dd if=/dev/zero | read x
829
830 (Expect a bunch of notifications, and eventually, the oom-killer will
831 trigger.)
832
83312. TODO
766 834
7671. Add support for accounting huge pages (as a separate controller) 8351. Add support for accounting huge pages (as a separate controller)
7682. Make per-cgroup scanner reclaim not-shared pages first 8362. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index 1943fae014fd..b9911c27f496 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -174,9 +174,9 @@ int clk_foo_enable(struct clk_hw *hw)
174}; 174};
175 175
176Below is a matrix detailing which clk_ops are mandatory based upon the 176Below is a matrix detailing which clk_ops are mandatory based upon the
177hardware capbilities of that clock. A cell marked as "y" means 177hardware capabilities of that clock. A cell marked as "y" means
178mandatory, a cell marked as "n" implies that either including that 178mandatory, a cell marked as "n" implies that either including that
179callback is invalid or otherwise uneccesary. Empty cells are either 179callback is invalid or otherwise unnecessary. Empty cells are either
180optional or must be evaluated on a case-by-case basis. 180optional or must be evaluated on a case-by-case basis.
181 181
182 clock hardware characteristics 182 clock hardware characteristics
@@ -231,3 +231,14 @@ To better enforce this policy, always follow this simple rule: any
231statically initialized clock data MUST be defined in a separate file 231statically initialized clock data MUST be defined in a separate file
232from the logic that implements its ops. Basically separate the logic 232from the logic that implements its ops. Basically separate the logic
233from the data and all is well. 233from the data and all is well.
234
235 Part 6 - Disabling clock gating of unused clocks
236
237Sometimes during development it can be useful to be able to bypass the
238default disabling of unused clocks. For example, if drivers aren't enabling
239clocks properly but rely on them being on from the bootloader, bypassing
240the disabling means that the driver will remain functional while the issues
241are sorted out.
242
243To bypass this disabling, include "clk_ignore_unused" in the bootargs to the
244kernel.
diff --git a/Documentation/devicetree/bindings/arm/atmel-adc.txt b/Documentation/devicetree/bindings/arm/atmel-adc.txt
index c63097d6afeb..16769d9cedd6 100644
--- a/Documentation/devicetree/bindings/arm/atmel-adc.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-adc.txt
@@ -14,9 +14,19 @@ Required properties:
14 - atmel,adc-status-register: Offset of the Interrupt Status Register 14 - atmel,adc-status-register: Offset of the Interrupt Status Register
15 - atmel,adc-trigger-register: Offset of the Trigger Register 15 - atmel,adc-trigger-register: Offset of the Trigger Register
16 - atmel,adc-vref: Reference voltage in millivolts for the conversions 16 - atmel,adc-vref: Reference voltage in millivolts for the conversions
17 - atmel,adc-res: List of resolution in bits supported by the ADC. List size
18 must be two at least.
19 - atmel,adc-res-names: Contains one identifier string for each resolution
20 in atmel,adc-res property. "lowres" and "highres"
21 identifiers are required.
17 22
18Optional properties: 23Optional properties:
19 - atmel,adc-use-external: Boolean to enable of external triggers 24 - atmel,adc-use-external: Boolean to enable of external triggers
25 - atmel,adc-use-res: String corresponding to an identifier from
26 atmel,adc-res-names property. If not specified, the highest
27 resolution will be used.
28 - atmel,adc-sleep-mode: Boolean to enable sleep mode when no conversion
29 - atmel,adc-sample-hold-time: Sample and Hold Time in microseconds
20 30
21Optional trigger Nodes: 31Optional trigger Nodes:
22 - Required properties: 32 - Required properties:
@@ -40,6 +50,9 @@ adc0: adc@fffb0000 {
40 atmel,adc-trigger-register = <0x08>; 50 atmel,adc-trigger-register = <0x08>;
41 atmel,adc-use-external; 51 atmel,adc-use-external;
42 atmel,adc-vref = <3300>; 52 atmel,adc-vref = <3300>;
53 atmel,adc-res = <8 10>;
54 atmel,adc-res-names = "lowres", "highres";
55 atmel,adc-use-res = "lowres";
43 56
44 trigger@0 { 57 trigger@0 {
45 trigger-name = "external-rising"; 58 trigger-name = "external-rising";
diff --git a/Documentation/devicetree/bindings/arm/msm/ssbi.txt b/Documentation/devicetree/bindings/arm/msm/ssbi.txt
new file mode 100644
index 000000000000..54fd5ced3401
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/ssbi.txt
@@ -0,0 +1,18 @@
1* Qualcomm SSBI
2
3Some Qualcomm MSM devices contain a point-to-point serial bus used to
4communicate with a limited range of devices (mostly power management
5chips).
6
7These require the following properties:
8
9- compatible: "qcom,ssbi"
10
11- qcom,controller-type
12 indicates the SSBI bus variant the controller should use to talk
13 with the slave device. This should be one of "ssbi", "ssbi2", or
14 "pmic-arbiter". The type chosen is determined by the attached
15 slave.
16
17The slave device should be the single child node of the ssbi device
18with a compatible field.
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt b/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt
new file mode 100644
index 000000000000..47ada1dff216
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt
@@ -0,0 +1,60 @@
1Samsung Exynos Analog to Digital Converter bindings
2
3The devicetree bindings are for the new ADC driver written for
4Exynos4 and upward SoCs from Samsung.
5
6New driver handles the following
71. Supports ADC IF found on EXYNOS4412/EXYNOS5250
8 and future SoCs from Samsung
92. Add ADC driver under iio/adc framework
103. Also adds the Documentation for device tree bindings
11
12Required properties:
13- compatible: Must be "samsung,exynos-adc-v1"
14 for exynos4412/5250 controllers.
15 Must be "samsung,exynos-adc-v2" for
16 future controllers.
17- reg: Contains ADC register address range (base address and
18 length) and the address of the phy enable register.
19- interrupts: Contains the interrupt information for the timer. The
20 format is being dependent on which interrupt controller
21 the Samsung device uses.
22- #io-channel-cells = <1>; As ADC has multiple outputs
23- clocks From common clock binding: handle to adc clock.
24- clock-names From common clock binding: Shall be "adc".
25- vdd-supply VDD input supply.
26
27Note: child nodes can be added for auto probing from device tree.
28
29Example: adding device info in dtsi file
30
31adc: adc@12D10000 {
32 compatible = "samsung,exynos-adc-v1";
33 reg = <0x12D10000 0x100>, <0x10040718 0x4>;
34 interrupts = <0 106 0>;
35 #io-channel-cells = <1>;
36 io-channel-ranges;
37
38 clocks = <&clock 303>;
39 clock-names = "adc";
40
41 vdd-supply = <&buck5_reg>;
42};
43
44
45Example: Adding child nodes in dts file
46
47adc@12D10000 {
48
49 /* NTC thermistor is a hwmon device */
50 ncp15wb473@0 {
51 compatible = "ntc,ncp15wb473";
52 pullup-uV = <1800000>;
53 pullup-ohm = <47000>;
54 pulldown-ohm = <0>;
55 io-channels = <&adc 4>;
56 };
57};
58
59Note: Does not apply to ADC driver under arch/arm/plat-samsung/
60Note: The child node can be added under the adc node or separately.
diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
new file mode 100644
index 000000000000..028b493e97ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
@@ -0,0 +1,22 @@
1Binding for the axi-clkgen clock generator
2
3This binding uses the common clock binding[1].
4
5[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
6
7Required properties:
8- compatible : shall be "adi,axi-clkgen".
9- #clock-cells : from common clock binding; Should always be set to 0.
10- reg : Address and length of the axi-clkgen register set.
11- clocks : Phandle and clock specifier for the parent clock.
12
13Optional properties:
14- clock-output-names : From common clock binding.
15
16Example:
17 clock@0xff000000 {
18 compatible = "adi,axi-clkgen";
19 #clock-cells = <0>;
20 reg = <0xff000000 0x1000>;
21 clocks = <&osc 1>;
22 };
diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt
new file mode 100644
index 000000000000..5757f9abfc26
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt
@@ -0,0 +1,24 @@
1Binding for simple fixed factor rate clock sources.
2
3This binding uses the common clock binding[1].
4
5[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
6
7Required properties:
8- compatible : shall be "fixed-factor-clock".
9- #clock-cells : from common clock binding; shall be set to 0.
10- clock-div: fixed divider.
11- clock-mult: fixed multiplier.
12- clocks: parent clock.
13
14Optional properties:
15- clock-output-names : From common clock binding.
16
17Example:
18 clock {
19 compatible = "fixed-factor-clock";
20 clocks = <&parentclk>;
21 #clock-cells = <0>;
22 div = <2>;
23 mult = <1>;
24 };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.txt b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
new file mode 100644
index 000000000000..cc374651662c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
@@ -0,0 +1,114 @@
1Binding for Silicon Labs Si5351a/b/c programmable i2c clock generator.
2
3Reference
4[1] Si5351A/B/C Data Sheet
5 http://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
6
7The Si5351a/b/c are programmable i2c clock generators with upto 8 output
8clocks. Si5351a also has a reduced pin-count package (MSOP10) where only
93 output clocks are accessible. The internal structure of the clock
10generators can be found in [1].
11
12==I2C device node==
13
14Required properties:
15- compatible: shall be one of "silabs,si5351{a,a-msop,b,c}".
16- reg: i2c device address, shall be 0x60 or 0x61.
17- #clock-cells: from common clock binding; shall be set to 1.
18- clocks: from common clock binding; list of parent clock
19 handles, shall be xtal reference clock or xtal and clkin for
20 si5351c only.
21- #address-cells: shall be set to 1.
22- #size-cells: shall be set to 0.
23
24Optional properties:
25- silabs,pll-source: pair of (number, source) for each pll. Allows
26 to overwrite clock source of pll A (number=0) or B (number=1).
27
28==Child nodes==
29
30Each of the clock outputs can be overwritten individually by
31using a child node to the I2C device node. If a child node for a clock
32output is not set, the eeprom configuration is not overwritten.
33
34Required child node properties:
35- reg: number of clock output.
36
37Optional child node properties:
38- silabs,clock-source: source clock of the output divider stage N, shall be
39 0 = multisynth N
40 1 = multisynth 0 for output clocks 0-3, else multisynth4
41 2 = xtal
42 3 = clkin (si5351c only)
43- silabs,drive-strength: output drive strength in mA, shall be one of {2,4,6,8}.
44- silabs,multisynth-source: source pll A(0) or B(1) of corresponding multisynth
45 divider.
46- silabs,pll-master: boolean, multisynth can change pll frequency.
47
48==Example==
49
50/* 25MHz reference crystal */
51ref25: ref25M {
52 compatible = "fixed-clock";
53 #clock-cells = <0>;
54 clock-frequency = <25000000>;
55};
56
57i2c-master-node {
58
59 /* Si5351a msop10 i2c clock generator */
60 si5351a: clock-generator@60 {
61 compatible = "silabs,si5351a-msop";
62 reg = <0x60>;
63 #address-cells = <1>;
64 #size-cells = <0>;
65 #clock-cells = <1>;
66
67 /* connect xtal input to 25MHz reference */
68 clocks = <&ref25>;
69
70 /* connect xtal input as source of pll0 and pll1 */
71 silabs,pll-source = <0 0>, <1 0>;
72
73 /*
74 * overwrite clkout0 configuration with:
75 * - 8mA output drive strength
76 * - pll0 as clock source of multisynth0
77 * - multisynth0 as clock source of output divider
78 * - multisynth0 can change pll0
79 * - set initial clock frequency of 74.25MHz
80 */
81 clkout0 {
82 reg = <0>;
83 silabs,drive-strength = <8>;
84 silabs,multisynth-source = <0>;
85 silabs,clock-source = <0>;
86 silabs,pll-master;
87 clock-frequency = <74250000>;
88 };
89
90 /*
91 * overwrite clkout1 configuration with:
92 * - 4mA output drive strength
93 * - pll1 as clock source of multisynth1
94 * - multisynth1 as clock source of output divider
95 * - multisynth1 can change pll1
96 */
97 clkout1 {
98 reg = <1>;
99 silabs,drive-strength = <4>;
100 silabs,multisynth-source = <1>;
101 silabs,clock-source = <0>;
102 pll-master;
103 };
104
105 /*
106 * overwrite clkout2 configuration with:
107 * - xtal as clock source of output divider
108 */
109 clkout2 {
110 reg = <2>;
111 silabs,clock-source = <2>;
112 };
113 };
114};
diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
new file mode 100644
index 000000000000..729f52426fe1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt
@@ -0,0 +1,151 @@
1Device Tree Clock bindings for arch-sunxi
2
3This binding uses the common clock binding[1].
4
5[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
6
7Required properties:
8- compatible : shall be one of the following:
9 "allwinner,sun4i-osc-clk" - for a gatable oscillator
10 "allwinner,sun4i-pll1-clk" - for the main PLL clock
11 "allwinner,sun4i-cpu-clk" - for the CPU multiplexer clock
12 "allwinner,sun4i-axi-clk" - for the AXI clock
13 "allwinner,sun4i-axi-gates-clk" - for the AXI gates
14 "allwinner,sun4i-ahb-clk" - for the AHB clock
15 "allwinner,sun4i-ahb-gates-clk" - for the AHB gates
16 "allwinner,sun4i-apb0-clk" - for the APB0 clock
17 "allwinner,sun4i-apb0-gates-clk" - for the APB0 gates
18 "allwinner,sun4i-apb1-clk" - for the APB1 clock
19 "allwinner,sun4i-apb1-mux-clk" - for the APB1 clock muxing
20 "allwinner,sun4i-apb1-gates-clk" - for the APB1 gates
21
22Required properties for all clocks:
23- reg : shall be the control register address for the clock.
24- clocks : shall be the input parent clock(s) phandle for the clock
25- #clock-cells : from common clock binding; shall be set to 0 except for
26 "allwinner,sun4i-*-gates-clk" where it shall be set to 1
27
28Additionally, "allwinner,sun4i-*-gates-clk" clocks require:
29- clock-output-names : the corresponding gate names that the clock controls
30
31For example:
32
33osc24M: osc24M@01c20050 {
34 #clock-cells = <0>;
35 compatible = "allwinner,sun4i-osc-clk";
36 reg = <0x01c20050 0x4>;
37 clocks = <&osc24M_fixed>;
38};
39
40pll1: pll1@01c20000 {
41 #clock-cells = <0>;
42 compatible = "allwinner,sun4i-pll1-clk";
43 reg = <0x01c20000 0x4>;
44 clocks = <&osc24M>;
45};
46
47cpu: cpu@01c20054 {
48 #clock-cells = <0>;
49 compatible = "allwinner,sun4i-cpu-clk";
50 reg = <0x01c20054 0x4>;
51 clocks = <&osc32k>, <&osc24M>, <&pll1>;
52};
53
54
55
56Gate clock outputs
57
58The "allwinner,sun4i-*-gates-clk" clocks provide several gatable outputs;
59their corresponding offsets as present on sun4i are listed below. Note that
60some of these gates are not present on sun5i.
61
62 * AXI gates ("allwinner,sun4i-axi-gates-clk")
63
64 DRAM 0
65
66 * AHB gates ("allwinner,sun4i-ahb-gates-clk")
67
68 USB0 0
69 EHCI0 1
70 OHCI0 2*
71 EHCI1 3
72 OHCI1 4*
73 SS 5
74 DMA 6
75 BIST 7
76 MMC0 8
77 MMC1 9
78 MMC2 10
79 MMC3 11
80 MS 12**
81 NAND 13
82 SDRAM 14
83
84 ACE 16
85 EMAC 17
86 TS 18
87
88 SPI0 20
89 SPI1 21
90 SPI2 22
91 SPI3 23
92 PATA 24
93 SATA 25**
94 GPS 26*
95
96 VE 32
97 TVD 33
98 TVE0 34
99 TVE1 35
100 LCD0 36
101 LCD1 37
102
103 CSI0 40
104 CSI1 41
105
106 HDMI 43
107 DE_BE0 44
108 DE_BE1 45
109 DE_FE0 46
110 DE_FE1 47
111
112 MP 50
113
114 MALI400 52
115
116 * APB0 gates ("allwinner,sun4i-apb0-gates-clk")
117
118 CODEC 0
119 SPDIF 1*
120 AC97 2
121 IIS 3
122
123 PIO 5
124 IR0 6
125 IR1 7
126
127 KEYPAD 10
128
129 * APB1 gates ("allwinner,sun4i-apb1-gates-clk")
130
131 I2C0 0
132 I2C1 1
133 I2C2 2
134
135 CAN 4
136 SCR 5
137 PS20 6
138 PS21 7
139
140 UART0 16
141 UART1 17
142 UART2 18
143 UART3 19
144 UART4 20
145 UART5 21
146 UART6 22
147 UART7 23
148
149Notation:
150 [*]: The datasheet didn't mention these, but they are present on AW code
151 [**]: The datasheet had this marked as "NC" but they are used on AW code
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index a33628759d36..d933af370697 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -98,7 +98,7 @@ announce the pinrange to the pin ctrl subsystem. For example,
98 compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank"; 98 compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank";
99 reg = <0x1460 0x18>; 99 reg = <0x1460 0x18>;
100 gpio-controller; 100 gpio-controller;
101 gpio-ranges = <&pinctrl1 20 10>, <&pinctrl2 50 20>; 101 gpio-ranges = <&pinctrl1 0 20 10>, <&pinctrl2 10 50 20>;
102 102
103 } 103 }
104 104
@@ -107,8 +107,8 @@ where,
107 107
108 Next values specify the base pin and number of pins for the range 108 Next values specify the base pin and number of pins for the range
109 handled by 'qe_pio_e' gpio. In the given example from base pin 20 to 109 handled by 'qe_pio_e' gpio. In the given example from base pin 20 to
110 pin 29 under pinctrl1 and pin 50 to pin 69 under pinctrl2 is handled 110 pin 29 under pinctrl1 with gpio offset 0 and pin 50 to pin 69 under
111 by this gpio controller. 111 pinctrl2 with gpio offset 10 is handled by this gpio controller.
112 112
113The pinctrl node must have "#gpio-range-cells" property to show number of 113The pinctrl node must have "#gpio-range-cells" property to show number of
114arguments to pass with phandle from gpio controllers node. 114arguments to pass with phandle from gpio controllers node.
diff --git a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
new file mode 100644
index 000000000000..c6f66674f19c
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
@@ -0,0 +1,29 @@
1NTC Thermistor hwmon sensors
2-------------------------------
3
4Requires node properties:
5- "compatible" value : one of
6 "ntc,ncp15wb473"
7 "ntc,ncp18wb473"
8 "ntc,ncp21wb473"
9 "ntc,ncp03wb473"
10 "ntc,ncp15wl333"
11- "pullup-uv" Pull up voltage in micro volts
12- "pullup-ohm" Pull up resistor value in ohms
13- "pulldown-ohm" Pull down resistor value in ohms
14- "connected-positive" Always ON, If not specified.
15 Status change is possible.
16- "io-channels" Channel node of ADC to be used for
17 conversion.
18
19Read more about iio bindings at
20 Documentation/devicetree/bindings/iio/iio-bindings.txt
21
22Example:
23 ncp15wb473@0 {
24 compatible = "ntc,ncp15wb473";
25 pullup-uv = <1800000>;
26 pullup-ohm = <47000>;
27 pulldown-ohm = <0>;
28 io-channels = <&adc 3>;
29 };
diff --git a/Documentation/devicetree/bindings/iio/iio-bindings.txt b/Documentation/devicetree/bindings/iio/iio-bindings.txt
new file mode 100644
index 000000000000..0b447d9ad196
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/iio-bindings.txt
@@ -0,0 +1,97 @@
1This binding is derived from clock bindings, and based on suggestions
2from Lars-Peter Clausen [1].
3
4Sources of IIO channels can be represented by any node in the device
5tree. Those nodes are designated as IIO providers. IIO consumer
6nodes use a phandle and IIO specifier pair to connect IIO provider
7outputs to IIO inputs. Similar to the gpio specifiers, an IIO
8specifier is an array of one or more cells identifying the IIO
9output on a device. The length of an IIO specifier is defined by the
10value of a #io-channel-cells property in the IIO provider node.
11
12[1] http://marc.info/?l=linux-iio&m=135902119507483&w=2
13
14==IIO providers==
15
16Required properties:
17#io-channel-cells: Number of cells in an IIO specifier; Typically 0 for nodes
18 with a single IIO output and 1 for nodes with multiple
19 IIO outputs.
20
21Example for a simple configuration with no trigger:
22
23 adc: voltage-sensor@35 {
24 compatible = "maxim,max1139";
25 reg = <0x35>;
26 #io-channel-cells = <1>;
27 };
28
29Example for a configuration with trigger:
30
31 adc@35 {
32 compatible = "some-vendor,some-adc";
33 reg = <0x35>;
34
35 adc1: iio-device@0 {
36 #io-channel-cells = <1>;
37 /* other properties */
38 };
39 adc2: iio-device@1 {
40 #io-channel-cells = <1>;
41 /* other properties */
42 };
43 };
44
45==IIO consumers==
46
47Required properties:
48io-channels: List of phandle and IIO specifier pairs, one pair
49 for each IIO input to the device. Note: if the
50 IIO provider specifies '0' for #io-channel-cells,
51 then only the phandle portion of the pair will appear.
52
53Optional properties:
54io-channel-names:
55 List of IIO input name strings sorted in the same
56 order as the io-channels property. Consumers drivers
57 will use io-channel-names to match IIO input names
58 with IIO specifiers.
59io-channel-ranges:
60 Empty property indicating that child nodes can inherit named
61 IIO channels from this node. Useful for bus nodes to provide
62 and IIO channel to their children.
63
64For example:
65
66 device {
67 io-channels = <&adc 1>, <&ref 0>;
68 io-channel-names = "vcc", "vdd";
69 };
70
71This represents a device with two IIO inputs, named "vcc" and "vdd".
72The vcc channel is connected to output 1 of the &adc device, and the
73vdd channel is connected to output 0 of the &ref device.
74
75==Example==
76
77 adc: max1139@35 {
78 compatible = "maxim,max1139";
79 reg = <0x35>;
80 #io-channel-cells = <1>;
81 };
82
83 ...
84
85 iio_hwmon {
86 compatible = "iio-hwmon";
87 io-channels = <&adc 0>, <&adc 1>, <&adc 2>,
88 <&adc 3>, <&adc 4>, <&adc 5>,
89 <&adc 6>, <&adc 7>, <&adc 8>,
90 <&adc 9>;
91 };
92
93 some_consumer {
94 compatible = "some-consumer";
95 io-channels = <&adc 10>, <&adc 11>;
96 io-channel-names = "adc1", "adc2";
97 };
diff --git a/Documentation/devicetree/bindings/media/coda.txt b/Documentation/devicetree/bindings/media/coda.txt
new file mode 100644
index 000000000000..2865d04e4030
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/coda.txt
@@ -0,0 +1,30 @@
1Chips&Media Coda multi-standard codec IP
2========================================
3
4Coda codec IPs are present in i.MX SoCs in various versions,
5called VPU (Video Processing Unit).
6
7Required properties:
8- compatible : should be "fsl,<chip>-src" for i.MX SoCs:
9 (a) "fsl,imx27-vpu" for CodaDx6 present in i.MX27
10 (b) "fsl,imx53-vpu" for CODA7541 present in i.MX53
11 (c) "fsl,imx6q-vpu" for CODA960 present in i.MX6q
12- reg: should be register base and length as documented in the
13 SoC reference manual
14- interrupts : Should contain the VPU interrupt. For CODA960,
15 a second interrupt is needed for the MJPEG unit.
16- clocks : Should contain the ahb and per clocks, in the order
17 determined by the clock-names property.
18- clock-names : Should be "ahb", "per"
19- iram : phandle pointing to the SRAM device node
20
21Example:
22
23vpu: vpu@63ff4000 {
24 compatible = "fsl,imx53-vpu";
25 reg = <0x63ff4000 0x1000>;
26 interrupts = <9>;
27 clocks = <&clks 63>, <&clks 63>;
28 clock-names = "ahb", "per";
29 iram = <&ocram>;
30};
diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt
index 13b707b7355c..c3a14e0ad0ad 100644
--- a/Documentation/devicetree/bindings/mfd/ab8500.txt
+++ b/Documentation/devicetree/bindings/mfd/ab8500.txt
@@ -13,9 +13,6 @@ Required parent device properties:
13 4 = active high level-sensitive 13 4 = active high level-sensitive
14 8 = active low level-sensitive 14 8 = active low level-sensitive
15 15
16Optional parent device properties:
17- reg : contains the PRCMU mailbox address for the AB8500 i2c port
18
19The AB8500 consists of a large and varied group of sub-devices: 16The AB8500 consists of a large and varied group of sub-devices:
20 17
21Device IRQ Names Supply Names Description 18Device IRQ Names Supply Names Description
@@ -86,9 +83,8 @@ Non-standard child device properties:
86 - stericsson,amic2-bias-vamic1 : Analoge Mic wishes to use a non-standard Vamic 83 - stericsson,amic2-bias-vamic1 : Analoge Mic wishes to use a non-standard Vamic
87 - stericsson,earpeice-cmv : Earpeice voltage (only: 950 | 1100 | 1270 | 1580) 84 - stericsson,earpeice-cmv : Earpeice voltage (only: 950 | 1100 | 1270 | 1580)
88 85
89ab8500@5 { 86ab8500 {
90 compatible = "stericsson,ab8500"; 87 compatible = "stericsson,ab8500";
91 reg = <5>; /* mailbox 5 is i2c */
92 interrupts = <0 40 0x4>; 88 interrupts = <0 40 0x4>;
93 interrupt-controller; 89 interrupt-controller;
94 #interrupt-cells = <2>; 90 #interrupt-cells = <2>;
diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
index baf07987ae68..abd9e3cb2db7 100644
--- a/Documentation/devicetree/bindings/mfd/mc13xxx.txt
+++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
@@ -10,10 +10,40 @@ Optional properties:
10- fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used 10- fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used
11 11
12Sub-nodes: 12Sub-nodes:
13- regulators : Contain the regulator nodes. The MC13892 regulators are 13- regulators : Contain the regulator nodes. The regulators are bound using
14 bound using their names as listed below with their registers and bits 14 their names as listed below with their registers and bits for enabling.
15 for enabling.
16 15
16MC13783 regulators:
17 sw1a : regulator SW1A (register 24, bit 0)
18 sw1b : regulator SW1B (register 25, bit 0)
19 sw2a : regulator SW2A (register 26, bit 0)
20 sw2b : regulator SW2B (register 27, bit 0)
21 sw3 : regulator SW3 (register 29, bit 20)
22 vaudio : regulator VAUDIO (register 32, bit 0)
23 viohi : regulator VIOHI (register 32, bit 3)
24 violo : regulator VIOLO (register 32, bit 6)
25 vdig : regulator VDIG (register 32, bit 9)
26 vgen : regulator VGEN (register 32, bit 12)
27 vrfdig : regulator VRFDIG (register 32, bit 15)
28 vrfref : regulator VRFREF (register 32, bit 18)
29 vrfcp : regulator VRFCP (register 32, bit 21)
30 vsim : regulator VSIM (register 33, bit 0)
31 vesim : regulator VESIM (register 33, bit 3)
32 vcam : regulator VCAM (register 33, bit 6)
33 vrfbg : regulator VRFBG (register 33, bit 9)
34 vvib : regulator VVIB (register 33, bit 11)
35 vrf1 : regulator VRF1 (register 33, bit 12)
36 vrf2 : regulator VRF2 (register 33, bit 15)
37 vmmc1 : regulator VMMC1 (register 33, bit 18)
38 vmmc2 : regulator VMMC2 (register 33, bit 21)
39 gpo1 : regulator GPO1 (register 34, bit 6)
40 gpo2 : regulator GPO2 (register 34, bit 8)
41 gpo3 : regulator GPO3 (register 34, bit 10)
42 gpo4 : regulator GPO4 (register 34, bit 12)
43 pwgt1spi : regulator PWGT1SPI (register 34, bit 15)
44 pwgt2spi : regulator PWGT2SPI (register 34, bit 16)
45
46MC13892 regulators:
17 vcoincell : regulator VCOINCELL (register 13, bit 23) 47 vcoincell : regulator VCOINCELL (register 13, bit 23)
18 sw1 : regulator SW1 (register 24, bit 0) 48 sw1 : regulator SW1 (register 24, bit 0)
19 sw2 : regulator SW2 (register 25, bit 0) 49 sw2 : regulator SW2 (register 25, bit 0)
diff --git a/Documentation/devicetree/bindings/misc/sram.txt b/Documentation/devicetree/bindings/misc/sram.txt
new file mode 100644
index 000000000000..4d0a00e453a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/sram.txt
@@ -0,0 +1,16 @@
1Generic on-chip SRAM
2
3Simple IO memory regions to be managed by the genalloc API.
4
5Required properties:
6
7- compatible : mmio-sram
8
9- reg : SRAM iomem address range
10
11Example:
12
13sram: sram@5c000000 {
14 compatible = "mmio-sram";
15 reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */
16};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
index 2c81e45f1374..08f0c3d01575 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
@@ -1,7 +1,9 @@
1One-register-per-pin type device tree based pinctrl driver 1One-register-per-pin type device tree based pinctrl driver
2 2
3Required properties: 3Required properties:
4- compatible : "pinctrl-single" 4- compatible : "pinctrl-single" or "pinconf-single".
5 "pinctrl-single" means that pinconf isn't supported.
6 "pinconf-single" means that generic pinconf is supported.
5 7
6- reg : offset and length of the register set for the mux registers 8- reg : offset and length of the register set for the mux registers
7 9
@@ -14,9 +16,61 @@ Optional properties:
14- pinctrl-single,function-off : function off mode for disabled state if 16- pinctrl-single,function-off : function off mode for disabled state if
15 available and same for all registers; if not specified, disabling of 17 available and same for all registers; if not specified, disabling of
16 pin functions is ignored 18 pin functions is ignored
19
17- pinctrl-single,bit-per-mux : boolean to indicate that one register controls 20- pinctrl-single,bit-per-mux : boolean to indicate that one register controls
18 more than one pin 21 more than one pin
19 22
23- pinctrl-single,drive-strength : array of value that are used to configure
24 drive strength in the pinmux register. They're value of drive strength
25 current and drive strength mask.
26
27 /* drive strength current, mask */
28 pinctrl-single,power-source = <0x30 0xf0>;
29
30- pinctrl-single,bias-pullup : array of value that are used to configure the
31 input bias pullup in the pinmux register.
32
33 /* input, enabled pullup bits, disabled pullup bits, mask */
34 pinctrl-single,bias-pullup = <0 1 0 1>;
35
36- pinctrl-single,bias-pulldown : array of value that are used to configure the
37 input bias pulldown in the pinmux register.
38
39 /* input, enabled pulldown bits, disabled pulldown bits, mask */
40 pinctrl-single,bias-pulldown = <2 2 0 2>;
41
42 * Two bits to control input bias pullup and pulldown: User should use
43 pinctrl-single,bias-pullup & pinctrl-single,bias-pulldown. One bit means
44 pullup, and the other one bit means pulldown.
45 * Three bits to control input bias enable, pullup and pulldown. User should
46 use pinctrl-single,bias-pullup & pinctrl-single,bias-pulldown. Input bias
47 enable bit should be included in pullup or pulldown bits.
48 * Although driver could set PIN_CONFIG_BIAS_DISABLE, there's no property as
49 pinctrl-single,bias-disable. Because pinctrl single driver could implement
50 it by calling pulldown, pullup disabled.
51
52- pinctrl-single,input-schmitt : array of value that are used to configure
53 input schmitt in the pinmux register. In some silicons, there're two input
54 schmitt value (rising-edge & falling-edge) in the pinmux register.
55
56 /* input schmitt value, mask */
57 pinctrl-single,input-schmitt = <0x30 0x70>;
58
59- pinctrl-single,input-schmitt-enable : array of value that are used to
60 configure input schmitt enable or disable in the pinmux register.
61
62 /* input, enable bits, disable bits, mask */
63 pinctrl-single,input-schmitt-enable = <0x30 0x40 0 0x70>;
64
65- pinctrl-single,gpio-range : list of value that are used to configure a GPIO
66 range. They're value of subnode phandle, pin base in pinctrl device, pin
67 number in this range, GPIO function value of this GPIO range.
68 The number of parameters is depend on #pinctrl-single,gpio-range-cells
69 property.
70
71 /* pin base, nr pins & gpio function */
72 pinctrl-single,gpio-range = <&range 0 3 0 &range 3 9 1>;
73
20This driver assumes that there is only one register for each pin (unless the 74This driver assumes that there is only one register for each pin (unless the
21pinctrl-single,bit-per-mux is set), and uses the common pinctrl bindings as 75pinctrl-single,bit-per-mux is set), and uses the common pinctrl bindings as
22specified in the pinctrl-bindings.txt document in this directory. 76specified in the pinctrl-bindings.txt document in this directory.
@@ -42,6 +96,20 @@ Where 0xdc is the offset from the pinctrl register base address for the
42device pinctrl register, 0x18 is the desired value, and 0xff is the sub mask to 96device pinctrl register, 0x18 is the desired value, and 0xff is the sub mask to
43be used when applying this change to the register. 97be used when applying this change to the register.
44 98
99
100Optional sub-node: In case some pins could be configured as GPIO in the pinmux
101register, those pins could be defined as a GPIO range. This sub-node is required
102by pinctrl-single,gpio-range property.
103
104Required properties in sub-node:
105- #pinctrl-single,gpio-range-cells : the number of parameters after phandle in
106 pinctrl-single,gpio-range property.
107
108 range: gpio-range {
109 #pinctrl-single,gpio-range-cells = <3>;
110 };
111
112
45Example: 113Example:
46 114
47/* SoC common file */ 115/* SoC common file */
@@ -58,7 +126,7 @@ pmx_core: pinmux@4a100040 {
58 126
59/* second controller instance for pins in wkup domain */ 127/* second controller instance for pins in wkup domain */
60pmx_wkup: pinmux@4a31e040 { 128pmx_wkup: pinmux@4a31e040 {
61 compatible = "pinctrl-single; 129 compatible = "pinctrl-single";
62 reg = <0x4a31e040 0x0038>; 130 reg = <0x4a31e040 0x0038>;
63 #address-cells = <1>; 131 #address-cells = <1>;
64 #size-cells = <0>; 132 #size-cells = <0>;
@@ -76,6 +144,29 @@ control_devconf0: pinmux@48002274 {
76 pinctrl-single,function-mask = <0x5F>; 144 pinctrl-single,function-mask = <0x5F>;
77}; 145};
78 146
147/* third controller instance for pins in gpio domain */
148pmx_gpio: pinmux@d401e000 {
149 compatible = "pinconf-single";
150 reg = <0xd401e000 0x0330>;
151 #address-cells = <1>;
152 #size-cells = <1>;
153 ranges;
154
155 pinctrl-single,register-width = <32>;
156 pinctrl-single,function-mask = <7>;
157
158 /* sparse GPIO range could be supported */
159 pinctrl-single,gpio-range = <&range 0 3 0 &range 3 9 1
160 &range 12 1 0 &range 13 29 1
161 &range 43 1 0 &range 44 49 1
162 &range 94 1 1 &range 96 2 1>;
163
164 range: gpio-range {
165 #pinctrl-single,gpio-range-cells = <3>;
166 };
167};
168
169
79/* board specific .dts file */ 170/* board specific .dts file */
80 171
81&pmx_core { 172&pmx_core {
@@ -96,6 +187,15 @@ control_devconf0: pinmux@48002274 {
96 >; 187 >;
97 }; 188 };
98 189
190 uart0_pins: pinmux_uart0_pins {
191 pinctrl-single,pins = <
192 0x208 0 /* UART0_RXD (IOCFG138) */
193 0x20c 0 /* UART0_TXD (IOCFG139) */
194 >;
195 pinctrl-single,bias-pulldown = <0 2 2>;
196 pinctrl-single,bias-pullup = <0 1 1>;
197 };
198
99 /* map uart2 pins */ 199 /* map uart2 pins */
100 uart2_pins: pinmux_uart2_pins { 200 uart2_pins: pinmux_uart2_pins {
101 pinctrl-single,pins = < 201 pinctrl-single,pins = <
@@ -122,6 +222,11 @@ control_devconf0: pinmux@48002274 {
122 222
123}; 223};
124 224
225&uart1 {
226 pinctrl-names = "default";
227 pinctrl-0 = <&uart0_pins>;
228};
229
125&uart2 { 230&uart2 {
126 pinctrl-names = "default"; 231 pinctrl-names = "default";
127 pinctrl-0 = <&uart2_pins>; 232 pinctrl-0 = <&uart2_pins>;
diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
index 4598a47aa0cd..c70fca146e91 100644
--- a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
@@ -7,6 +7,7 @@ on-chip controllers onto these pads.
7 7
8Required Properties: 8Required Properties:
9- compatible: should be one of the following. 9- compatible: should be one of the following.
10 - "samsung,s3c64xx-pinctrl": for S3C64xx-compatible pin-controller,
10 - "samsung,exynos4210-pinctrl": for Exynos4210 compatible pin-controller. 11 - "samsung,exynos4210-pinctrl": for Exynos4210 compatible pin-controller.
11 - "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller. 12 - "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller.
12 - "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller. 13 - "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller.
@@ -105,6 +106,8 @@ B. External Wakeup Interrupts: For supporting external wakeup interrupts, a
105 106
106 - compatible: identifies the type of the external wakeup interrupt controller 107 - compatible: identifies the type of the external wakeup interrupt controller
107 The possible values are: 108 The possible values are:
109 - samsung,s3c64xx-wakeup-eint: represents wakeup interrupt controller
110 found on Samsung S3C64xx SoCs,
108 - samsung,exynos4210-wakeup-eint: represents wakeup interrupt controller 111 - samsung,exynos4210-wakeup-eint: represents wakeup interrupt controller
109 found on Samsung Exynos4210 SoC. 112 found on Samsung Exynos4210 SoC.
110 - interrupt-parent: phandle of the interrupt parent to which the external 113 - interrupt-parent: phandle of the interrupt parent to which the external
diff --git a/Documentation/devicetree/bindings/regulator/max8952.txt b/Documentation/devicetree/bindings/regulator/max8952.txt
new file mode 100644
index 000000000000..866fcdd0f4eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8952.txt
@@ -0,0 +1,52 @@
1Maxim MAX8952 voltage regulator
2
3Required properties:
4- compatible: must be equal to "maxim,max8952"
5- reg: I2C slave address, usually 0x60
6- max8952,dvs-mode-microvolt: array of 4 integer values defining DVS voltages
7 in microvolts. All values must be from range <770000, 1400000>
8- any required generic properties defined in regulator.txt
9
10Optional properties:
11- max8952,vid-gpios: array of two GPIO pins used for DVS voltage selection
12- max8952,en-gpio: GPIO used to control enable status of regulator
13- max8952,default-mode: index of default DVS voltage, from <0, 3> range
14- max8952,sync-freq: sync frequency, must be one of following values:
15 - 0: 26 MHz
16 - 1: 13 MHz
17 - 2: 19.2 MHz
18 Defaults to 26 MHz if not specified.
19- max8952,ramp-speed: voltage ramp speed, must be one of following values:
20 - 0: 32mV/us
21 - 1: 16mV/us
22 - 2: 8mV/us
23 - 3: 4mV/us
24 - 4: 2mV/us
25 - 5: 1mV/us
26 - 6: 0.5mV/us
27 - 7: 0.25mV/us
28 Defaults to 32mV/us if not specified.
29- any available generic properties defined in regulator.txt
30
31Example:
32
33 vdd_arm_reg: pmic@60 {
34 compatible = "maxim,max8952";
35 reg = <0x60>;
36
37 /* max8952-specific properties */
38 max8952,vid-gpios = <&gpx0 3 0>, <&gpx0 4 0>;
39 max8952,en-gpio = <&gpx0 1 0>;
40 max8952,default-mode = <0>;
41 max8952,dvs-mode-microvolt = <1250000>, <1200000>,
42 <1050000>, <950000>;
43 max8952,sync-freq = <0>;
44 max8952,ramp-speed = <0>;
45
46 /* generic regulator properties */
47 regulator-name = "vdd_arm";
48 regulator-min-microvolt = <770000>;
49 regulator-max-microvolt = <1400000>;
50 regulator-always-on;
51 regulator-boot-on;
52 };
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt
new file mode 100644
index 000000000000..2a3feabd3b22
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt
@@ -0,0 +1,15 @@
1Atmel AT91RM9200 Real Time Clock
2
3Required properties:
4- compatible: should be: "atmel,at91rm9200-rtc"
5- reg: physical base address of the controller and length of memory mapped
6 region.
7- interrupts: rtc alarm/event interrupt
8
9Example:
10
11rtc@fffffe00 {
12 compatible = "atmel,at91rm9200-rtc";
13 reg = <0xfffffe00 0x100>;
14 interrupts = <1 4 7>;
15};
diff --git a/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt b/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt
new file mode 100644
index 000000000000..8bf89c643640
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt
@@ -0,0 +1,22 @@
1Broadcom BCM2835 SPI0 controller
2
3The BCM2835 contains two forms of SPI master controller, one known simply as
4SPI0, and the other known as the "Universal SPI Master"; part of the
5auxilliary block. This binding applies to the SPI0 controller.
6
7Required properties:
8- compatible: Should be "brcm,bcm2835-spi".
9- reg: Should contain register location and length.
10- interrupts: Should contain interrupt.
11- clocks: The clock feeding the SPI controller.
12
13Example:
14
15spi@20204000 {
16 compatible = "brcm,bcm2835-spi";
17 reg = <0x7e204000 0x1000>;
18 interrupts = <2 22>;
19 clocks = <&clk_spi>;
20 #address-cells = <1>;
21 #size-cells = <0>;
22};
diff --git a/Documentation/devicetree/bindings/spi/fsl-spi.txt b/Documentation/devicetree/bindings/spi/fsl-spi.txt
index 777abd7399d5..b032dd76e9d2 100644
--- a/Documentation/devicetree/bindings/spi/fsl-spi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-spi.txt
@@ -4,7 +4,7 @@ Required properties:
4- cell-index : QE SPI subblock index. 4- cell-index : QE SPI subblock index.
5 0: QE subblock SPI1 5 0: QE subblock SPI1
6 1: QE subblock SPI2 6 1: QE subblock SPI2
7- compatible : should be "fsl,spi". 7- compatible : should be "fsl,spi" or "aeroflexgaisler,spictrl".
8- mode : the SPI operation mode, it can be "cpu" or "cpu-qe". 8- mode : the SPI operation mode, it can be "cpu" or "cpu-qe".
9- reg : Offset and length of the register set for the device 9- reg : Offset and length of the register set for the device
10- interrupts : <a b> where a is the interrupt number and b is a 10- interrupts : <a b> where a is the interrupt number and b is a
@@ -14,6 +14,7 @@ Required properties:
14 controller you have. 14 controller you have.
15- interrupt-parent : the phandle for the interrupt controller that 15- interrupt-parent : the phandle for the interrupt controller that
16 services interrupts for this device. 16 services interrupts for this device.
17- clock-frequency : input clock frequency to non FSL_SOC cores
17 18
18Optional properties: 19Optional properties:
19- gpios : specifies the gpio pins to be used for chipselects. 20- gpios : specifies the gpio pins to be used for chipselects.
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
new file mode 100644
index 000000000000..91ff771c7e77
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
@@ -0,0 +1,26 @@
1NVIDIA Tegra114 SPI controller.
2
3Required properties:
4- compatible : should be "nvidia,tegra114-spi".
5- reg: Should contain SPI registers location and length.
6- interrupts: Should contain SPI interrupts.
7- nvidia,dma-request-selector : The Tegra DMA controller's phandle and
8 request selector for this SPI controller.
9- This is also require clock named "spi" as per binding document
10 Documentation/devicetree/bindings/clock/clock-bindings.txt
11
12Recommended properties:
13- spi-max-frequency: Definition as per
14 Documentation/devicetree/bindings/spi/spi-bus.txt
15Example:
16
17spi@7000d600 {
18 compatible = "nvidia,tegra114-spi";
19 reg = <0x7000d600 0x200>;
20 interrupts = <0 82 0x04>;
21 nvidia,dma-request-selector = <&apbdma 16>;
22 spi-max-frequency = <25000000>;
23 #address-cells = <1>;
24 #size-cells = <0>;
25 status = "disabled";
26};
diff --git a/Documentation/devicetree/bindings/spi/spi-samsung.txt b/Documentation/devicetree/bindings/spi/spi-samsung.txt
index a15ffeddfba4..86aa061f069f 100644
--- a/Documentation/devicetree/bindings/spi/spi-samsung.txt
+++ b/Documentation/devicetree/bindings/spi/spi-samsung.txt
@@ -31,9 +31,6 @@ Required Board Specific Properties:
31 31
32- #address-cells: should be 1. 32- #address-cells: should be 1.
33- #size-cells: should be 0. 33- #size-cells: should be 0.
34- gpios: The gpio specifier for clock, mosi and miso interface lines (in the
35 order specified). The format of the gpio specifier depends on the gpio
36 controller.
37 34
38Optional Board Specific Properties: 35Optional Board Specific Properties:
39 36
@@ -86,9 +83,8 @@ Example:
86 spi_0: spi@12d20000 { 83 spi_0: spi@12d20000 {
87 #address-cells = <1>; 84 #address-cells = <1>;
88 #size-cells = <0>; 85 #size-cells = <0>;
89 gpios = <&gpa2 4 2 3 0>, 86 pinctrl-names = "default";
90 <&gpa2 6 2 3 0>, 87 pinctrl-0 = <&spi0_bus>;
91 <&gpa2 7 2 3 0>;
92 88
93 w25q80bw@0 { 89 w25q80bw@0 {
94 #address-cells = <1>; 90 #address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/staging/dwc2.txt b/Documentation/devicetree/bindings/staging/dwc2.txt
new file mode 100644
index 000000000000..1a1b7cfa4845
--- /dev/null
+++ b/Documentation/devicetree/bindings/staging/dwc2.txt
@@ -0,0 +1,15 @@
1Platform DesignWare HS OTG USB 2.0 controller
2-----------------------------------------------------
3
4Required properties:
5- compatible : "snps,dwc2"
6- reg : Should contain 1 register range (address and length)
7- interrupts : Should contain 1 interrupt
8
9Example:
10
11 usb@101c0000 {
12 compatible = "ralink,rt3050-usb, snps,dwc2";
13 reg = <0x101c0000 40000>;
14 interrupts = <18>;
15 };
diff --git a/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt b/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt
index 07654f0338b6..8071ac20d4b3 100644
--- a/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt
+++ b/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt
@@ -26,7 +26,7 @@ Required properties:
26- crtc: the crtc this display is connected to, see below 26- crtc: the crtc this display is connected to, see below
27Optional properties: 27Optional properties:
28- interface_pix_fmt: How this display is connected to the 28- interface_pix_fmt: How this display is connected to the
29 crtc. Currently supported types: "rgb24", "rgb565" 29 crtc. Currently supported types: "rgb24", "rgb565", "bgr666"
30- edid: verbatim EDID data block describing attached display. 30- edid: verbatim EDID data block describing attached display.
31- ddc: phandle describing the i2c bus handling the display data 31- ddc: phandle describing the i2c bus handling the display data
32 channel 32 channel
diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
index 1e1145ca4f3c..1928a3e83cd0 100644
--- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt
+++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
@@ -11,6 +11,9 @@ Required properties:
11 - "nvidia,tegra20-uart" 11 - "nvidia,tegra20-uart"
12 - "nxp,lpc3220-uart" 12 - "nxp,lpc3220-uart"
13 - "ibm,qpace-nwp-serial" 13 - "ibm,qpace-nwp-serial"
14 - "altr,16550-FIFO32"
15 - "altr,16550-FIFO64"
16 - "altr,16550-FIFO128"
14 - "serial" if the port type is unknown. 17 - "serial" if the port type is unknown.
15- reg : offset and length of the register set for the device. 18- reg : offset and length of the register set for the device.
16- interrupts : should contain uart interrupt. 19- interrupts : should contain uart interrupt.
@@ -30,6 +33,10 @@ Optional properties:
30 RTAS and should not be registered. 33 RTAS and should not be registered.
31- no-loopback-test: set to indicate that the port does not implements loopback 34- no-loopback-test: set to indicate that the port does not implements loopback
32 test mode 35 test mode
36- fifo-size: the fifo size of the UART.
37- auto-flow-control: one way to enable automatic flow control support. The
38 driver is allowed to detect support for the capability even without this
39 property.
33 40
34Example: 41Example:
35 42
diff --git a/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt b/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt
index 5778b9c83bd8..1c04a4c9515f 100644
--- a/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt
+++ b/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt
@@ -11,6 +11,7 @@ Optional properties:
11 that indicate usb controller index 11 that indicate usb controller index
12- vbus-supply: regulator for vbus 12- vbus-supply: regulator for vbus
13- disable-over-current: disable over current detect 13- disable-over-current: disable over current detect
14- external-vbus-divider: enables off-chip resistor divider for Vbus
14 15
15Examples: 16Examples:
16usb@02184000 { /* USB OTG */ 17usb@02184000 { /* USB OTG */
@@ -20,4 +21,5 @@ usb@02184000 { /* USB OTG */
20 fsl,usbphy = <&usbphy1>; 21 fsl,usbphy = <&usbphy1>;
21 fsl,usbmisc = <&usbmisc 0>; 22 fsl,usbmisc = <&usbmisc 0>;
22 disable-over-current; 23 disable-over-current;
24 external-vbus-divider;
23}; 25};
diff --git a/Documentation/devicetree/bindings/usb/ehci-omap.txt b/Documentation/devicetree/bindings/usb/ehci-omap.txt
new file mode 100644
index 000000000000..485a9a1efa7a
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ehci-omap.txt
@@ -0,0 +1,32 @@
1OMAP HS USB EHCI controller
2
3This device is usually the child of the omap-usb-host
4Documentation/devicetree/bindings/mfd/omap-usb-host.txt
5
6Required properties:
7
8- compatible: should be "ti,ehci-omap"
9- reg: should contain one register range i.e. start and length
10- interrupts: description of the interrupt line
11
12Optional properties:
13
14- phys: list of phandles to PHY nodes.
15 This property is required if at least one of the ports are in
16 PHY mode i.e. OMAP_EHCI_PORT_MODE_PHY
17
18To specify the port mode, see
19Documentation/devicetree/bindings/mfd/omap-usb-host.txt
20
21Example for OMAP4:
22
23usbhsehci: ehci@4a064c00 {
24 compatible = "ti,ehci-omap", "usb-ehci";
25 reg = <0x4a064c00 0x400>;
26 interrupts = <0 77 0x4>;
27};
28
29&usbhsehci {
30 phys = <&hsusb1_phy 0 &hsusb3_phy>;
31};
32
diff --git a/Documentation/devicetree/bindings/usb/ohci-omap3.txt b/Documentation/devicetree/bindings/usb/ohci-omap3.txt
new file mode 100644
index 000000000000..14ab42812a8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ohci-omap3.txt
@@ -0,0 +1,15 @@
1OMAP HS USB OHCI controller (OMAP3 and later)
2
3Required properties:
4
5- compatible: should be "ti,ohci-omap3"
6- reg: should contain one register range i.e. start and length
7- interrupts: description of the interrupt line
8
9Example for OMAP4:
10
11usbhsohci: ohci@4a064800 {
12 compatible = "ti,ohci-omap3", "usb-ohci";
13 reg = <0x4a064800 0x400>;
14 interrupts = <0 76 0x4>;
15};
diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt
index 1ef0ce71f8fa..662f0f1d2315 100644
--- a/Documentation/devicetree/bindings/usb/omap-usb.txt
+++ b/Documentation/devicetree/bindings/usb/omap-usb.txt
@@ -8,10 +8,10 @@ OMAP MUSB GLUE
8 and disconnect. 8 and disconnect.
9 - multipoint : Should be "1" indicating the musb controller supports 9 - multipoint : Should be "1" indicating the musb controller supports
10 multipoint. This is a MUSB configuration-specific setting. 10 multipoint. This is a MUSB configuration-specific setting.
11 - num_eps : Specifies the number of endpoints. This is also a 11 - num-eps : Specifies the number of endpoints. This is also a
12 MUSB configuration-specific setting. Should be set to "16" 12 MUSB configuration-specific setting. Should be set to "16"
13 - ram_bits : Specifies the ram address size. Should be set to "12" 13 - ram-bits : Specifies the ram address size. Should be set to "12"
14 - interface_type : This is a board specific setting to describe the type of 14 - interface-type : This is a board specific setting to describe the type of
15 interface between the controller and the phy. It should be "0" or "1" 15 interface between the controller and the phy. It should be "0" or "1"
16 specifying ULPI and UTMI respectively. 16 specifying ULPI and UTMI respectively.
17 - mode : Should be "3" to represent OTG. "1" signifies HOST and "2" 17 - mode : Should be "3" to represent OTG. "1" signifies HOST and "2"
@@ -29,18 +29,46 @@ usb_otg_hs: usb_otg_hs@4a0ab000 {
29 ti,hwmods = "usb_otg_hs"; 29 ti,hwmods = "usb_otg_hs";
30 ti,has-mailbox; 30 ti,has-mailbox;
31 multipoint = <1>; 31 multipoint = <1>;
32 num_eps = <16>; 32 num-eps = <16>;
33 ram_bits = <12>; 33 ram-bits = <12>;
34 ctrl-module = <&omap_control_usb>; 34 ctrl-module = <&omap_control_usb>;
35}; 35};
36 36
37Board specific device node entry 37Board specific device node entry
38&usb_otg_hs { 38&usb_otg_hs {
39 interface_type = <1>; 39 interface-type = <1>;
40 mode = <3>; 40 mode = <3>;
41 power = <50>; 41 power = <50>;
42}; 42};
43 43
44OMAP DWC3 GLUE
45 - compatible : Should be "ti,dwc3"
46 - ti,hwmods : Should be "usb_otg_ss"
47 - reg : Address and length of the register set for the device.
48 - interrupts : The irq number of this device that is used to interrupt the
49 MPU
50 - #address-cells, #size-cells : Must be present if the device has sub-nodes
51 - utmi-mode : controls the source of UTMI/PIPE status for VBUS and OTG ID.
52 It should be set to "1" for HW mode and "2" for SW mode.
53 - ranges: the child address space are mapped 1:1 onto the parent address space
54
55Sub-nodes:
56The dwc3 core should be added as subnode to omap dwc3 glue.
57- dwc3 :
58 The binding details of dwc3 can be found in:
59 Documentation/devicetree/bindings/usb/dwc3.txt
60
61omap_dwc3 {
62 compatible = "ti,dwc3";
63 ti,hwmods = "usb_otg_ss";
64 reg = <0x4a020000 0x1ff>;
65 interrupts = <0 93 4>;
66 #address-cells = <1>;
67 #size-cells = <1>;
68 utmi-mode = <2>;
69 ranges;
70};
71
44OMAP CONTROL USB 72OMAP CONTROL USB
45 73
46Required properties: 74Required properties:
diff --git a/Documentation/devicetree/bindings/usb/samsung-usbphy.txt b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
index 033194934f64..f575302e5173 100644
--- a/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
+++ b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
@@ -1,20 +1,25 @@
1* Samsung's usb phy transceiver 1SAMSUNG USB-PHY controllers
2 2
3The Samsung's phy transceiver is used for controlling usb phy for 3** Samsung's usb 2.0 phy transceiver
4s3c-hsotg as well as ehci-s5p and ohci-exynos usb controllers 4
5across Samsung SOCs. 5The Samsung's usb 2.0 phy transceiver is used for controlling
6usb 2.0 phy for s3c-hsotg as well as ehci-s5p and ohci-exynos
7usb controllers across Samsung SOCs.
6TODO: Adding the PHY binding with controller(s) according to the under 8TODO: Adding the PHY binding with controller(s) according to the under
7developement generic PHY driver. 9developement generic PHY driver.
8 10
9Required properties: 11Required properties:
10 12
11Exynos4210: 13Exynos4210:
12- compatible : should be "samsung,exynos4210-usbphy" 14- compatible : should be "samsung,exynos4210-usb2phy"
13- reg : base physical address of the phy registers and length of memory mapped 15- reg : base physical address of the phy registers and length of memory mapped
14 region. 16 region.
17- clocks: Clock IDs array as required by the controller.
18- clock-names: names of clock correseponding IDs clock property as requested
19 by the controller driver.
15 20
16Exynos5250: 21Exynos5250:
17- compatible : should be "samsung,exynos5250-usbphy" 22- compatible : should be "samsung,exynos5250-usb2phy"
18- reg : base physical address of the phy registers and length of memory mapped 23- reg : base physical address of the phy registers and length of memory mapped
19 region. 24 region.
20 25
@@ -44,12 +49,69 @@ Example:
44 usbphy@125B0000 { 49 usbphy@125B0000 {
45 #address-cells = <1>; 50 #address-cells = <1>;
46 #size-cells = <1>; 51 #size-cells = <1>;
47 compatible = "samsung,exynos4210-usbphy"; 52 compatible = "samsung,exynos4210-usb2phy";
48 reg = <0x125B0000 0x100>; 53 reg = <0x125B0000 0x100>;
49 ranges; 54 ranges;
50 55
56 clocks = <&clock 2>, <&clock 305>;
57 clock-names = "xusbxti", "otg";
58
51 usbphy-sys { 59 usbphy-sys {
52 /* USB device and host PHY_CONTROL registers */ 60 /* USB device and host PHY_CONTROL registers */
53 reg = <0x10020704 0x8>; 61 reg = <0x10020704 0x8>;
54 }; 62 };
55 }; 63 };
64
65
66** Samsung's usb 3.0 phy transceiver
67
68Starting exynso5250, Samsung's SoC have usb 3.0 phy transceiver
69which is used for controlling usb 3.0 phy for dwc3-exynos usb 3.0
70controllers across Samsung SOCs.
71
72Required properties:
73
74Exynos5250:
75- compatible : should be "samsung,exynos5250-usb3phy"
76- reg : base physical address of the phy registers and length of memory mapped
77 region.
78- clocks: Clock IDs array as required by the controller.
79- clock-names: names of clocks correseponding to IDs in the clock property
80 as requested by the controller driver.
81
82Optional properties:
83- #address-cells: should be '1' when usbphy node has a child node with 'reg'
84 property.
85- #size-cells: should be '1' when usbphy node has a child node with 'reg'
86 property.
87- ranges: allows valid translation between child's address space and parent's
88 address space.
89
90- The child node 'usbphy-sys' to the node 'usbphy' is for the system controller
91 interface for usb-phy. It should provide the following information required by
92 usb-phy controller to control phy.
93 - reg : base physical address of PHY_CONTROL registers.
94 The size of this register is the total sum of size of all PHY_CONTROL
95 registers that the SoC has. For example, the size will be
96 '0x4' in case we have only one PHY_CONTROL register (e.g.
97 OTHERS register in S3C64XX or USB_PHY_CONTROL register in S5PV210)
98 and, '0x8' in case we have two PHY_CONTROL registers (e.g.
99 USBDEVICE_PHY_CONTROL and USBHOST_PHY_CONTROL registers in exynos4x).
100 and so on.
101
102Example:
103 usbphy@12100000 {
104 compatible = "samsung,exynos5250-usb3phy";
105 reg = <0x12100000 0x100>;
106 #address-cells = <1>;
107 #size-cells = <1>;
108 ranges;
109
110 clocks = <&clock 1>, <&clock 286>;
111 clock-names = "ext_xtal", "usbdrd30";
112
113 usbphy-sys {
114 /* USB device and host PHY_CONTROL registers */
115 reg = <0x10040704 0x8>;
116 };
117 };
diff --git a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt
new file mode 100644
index 000000000000..d7e272671c7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt
@@ -0,0 +1,34 @@
1USB NOP PHY
2
3Required properties:
4- compatible: should be usb-nop-xceiv
5
6Optional properties:
7- clocks: phandle to the PHY clock. Use as per Documentation/devicetree
8 /bindings/clock/clock-bindings.txt
9 This property is required if clock-frequency is specified.
10
11- clock-names: Should be "main_clk"
12
13- clock-frequency: the clock frequency (in Hz) that the PHY clock must
14 be configured to.
15
16- vcc-supply: phandle to the regulator that provides RESET to the PHY.
17
18- reset-supply: phandle to the regulator that provides power to the PHY.
19
20Example:
21
22 hsusb1_phy {
23 compatible = "usb-nop-xceiv";
24 clock-frequency = <19200000>;
25 clocks = <&osc 0>;
26 clock-names = "main_clk";
27 vcc-supply = <&hsusb1_vcc_regulator>;
28 reset-supply = <&hsusb1_reset_regulator>;
29 };
30
31hsusb1_phy is a NOP USB PHY device that gets its clock from an oscillator
32and expects that clock to be configured to 19.2MHz by the NOP PHY driver.
33hsusb1_vcc_regulator provides power to the PHY and hsusb1_reset_regulator
34controls RESET.
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 19e1ef73ab0d..4d1919bf2332 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -5,6 +5,7 @@ using them to avoid name-space collisions.
5 5
6ad Avionic Design GmbH 6ad Avionic Design GmbH
7adi Analog Devices, Inc. 7adi Analog Devices, Inc.
8aeroflexgaisler Aeroflex Gaisler AB
8ak Asahi Kasei Corp. 9ak Asahi Kasei Corp.
9amcc Applied Micro Circuits Corporation (APM, formally AMCC) 10amcc Applied Micro Circuits Corporation (APM, formally AMCC)
10apm Applied Micro Circuits Corporation (APM) 11apm Applied Micro Circuits Corporation (APM)
@@ -48,6 +49,7 @@ samsung Samsung Semiconductor
48sbs Smart Battery System 49sbs Smart Battery System
49schindler Schindler 50schindler Schindler
50sil Silicon Image 51sil Silicon Image
52silabs Silicon Laboratories
51simtek 53simtek
52sirf SiRF Technology, Inc. 54sirf SiRF Technology, Inc.
53snps Synopsys, Inc. 55snps Synopsys, Inc.
diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
new file mode 100644
index 000000000000..1482103d288f
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
@@ -0,0 +1,41 @@
1lp855x bindings
2
3Required properties:
4 - compatible: "ti,lp8550", "ti,lp8551", "ti,lp8552", "ti,lp8553",
5 "ti,lp8556", "ti,lp8557"
6 - reg: I2C slave address (u8)
7 - dev-ctrl: Value of DEVICE CONTROL register (u8). It depends on the device.
8
9Optional properties:
10 - bl-name: Backlight device name (string)
11 - init-brt: Initial value of backlight brightness (u8)
12 - pwm-period: PWM period value. Set only PWM input mode used (u32)
13 - rom-addr: Register address of ROM area to be updated (u8)
14 - rom-val: Register value to be updated (u8)
15
16Example:
17
18 /* LP8556 */
19 backlight@2c {
20 compatible = "ti,lp8556";
21 reg = <0x2c>;
22
23 bl-name = "lcd-bl";
24 dev-ctrl = /bits/ 8 <0x85>;
25 init-brt = /bits/ 8 <0x10>;
26 };
27
28 /* LP8557 */
29 backlight@2c {
30 compatible = "ti,lp8557";
31 reg = <0x2c>;
32
33 dev-ctrl = /bits/ 8 <0x41>;
34 init-brt = /bits/ 8 <0x0a>;
35
36 /* 4V OV, 4 output LED string enabled */
37 rom_14h {
38 rom-addr = /bits/ 8 <0x14>;
39 rom-val = /bits/ 8 <0xcf>;
40 };
41 };
diff --git a/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt b/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt
new file mode 100644
index 000000000000..5fb9279ac287
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt
@@ -0,0 +1,27 @@
1TPS65217 family of regulators
2
3The TPS65217 chip contains a boost converter and current sinks which can be
4used to drive LEDs for use as backlights.
5
6Required properties:
7- compatible: "ti,tps65217"
8- reg: I2C slave address
9- backlight: node for specifying WLED1 and WLED2 lines in TPS65217
10- isel: selection bit, valid values: 1 for ISEL1 (low-level) and 2 for ISEL2 (high-level)
11- fdim: PWM dimming frequency, valid values: 100, 200, 500, 1000
12- default-brightness: valid values: 0-100
13
14Each regulator is defined using the standard binding for regulators.
15
16Example:
17
18 tps: tps@24 {
19 reg = <0x24>;
20 compatible = "ti,tps65217";
21 backlight {
22 isel = <1>; /* 1 - ISET1, 2 ISET2 */
23 fdim = <100>; /* TPS65217_BL_FDIM_100HZ */
24 default-brightness = <50>;
25 };
26 };
27
diff --git a/Documentation/devicetree/bindings/video/via,vt8500-fb.txt b/Documentation/devicetree/bindings/video/via,vt8500-fb.txt
index c870b6478ec8..2871e218a0fb 100644
--- a/Documentation/devicetree/bindings/video/via,vt8500-fb.txt
+++ b/Documentation/devicetree/bindings/video/via,vt8500-fb.txt
@@ -5,58 +5,32 @@ Required properties:
5- compatible : "via,vt8500-fb" 5- compatible : "via,vt8500-fb"
6- reg : Should contain 1 register ranges(address and length) 6- reg : Should contain 1 register ranges(address and length)
7- interrupts : framebuffer controller interrupt 7- interrupts : framebuffer controller interrupt
8- display: a phandle pointing to the display node 8- bits-per-pixel : bit depth of framebuffer (16 or 32)
9 9
10Required nodes: 10Required subnodes:
11- display: a display node is required to initialize the lcd panel 11- display-timings: see display-timing.txt for information
12 This should be in the board dts.
13- default-mode: a videomode within the display with timing parameters
14 as specified below.
15 12
16Example: 13Example:
17 14
18 fb@d800e400 { 15 fb@d8050800 {
19 compatible = "via,vt8500-fb"; 16 compatible = "via,vt8500-fb";
20 reg = <0xd800e400 0x400>; 17 reg = <0xd800e400 0x400>;
21 interrupts = <12>; 18 interrupts = <12>;
22 display = <&display>; 19 bits-per-pixel = <16>;
23 default-mode = <&mode0>;
24 };
25
26VIA VT8500 Display
27-----------------------------------------------------
28Required properties (as per of_videomode_helper):
29
30 - hactive, vactive: Display resolution
31 - hfront-porch, hback-porch, hsync-len: Horizontal Display timing parameters
32 in pixels
33 vfront-porch, vback-porch, vsync-len: Vertical display timing parameters in
34 lines
35 - clock: displayclock in Hz
36 - bpp: lcd panel bit-depth.
37 <16> for RGB565, <32> for RGB888
38
39Optional properties (as per of_videomode_helper):
40 - width-mm, height-mm: Display dimensions in mm
41 - hsync-active-high (bool): Hsync pulse is active high
42 - vsync-active-high (bool): Vsync pulse is active high
43 - interlaced (bool): This is an interlaced mode
44 - doublescan (bool): This is a doublescan mode
45 20
46Example: 21 display-timings {
47 display: display@0 { 22 native-mode = <&timing0>;
48 modes { 23 timing0: 800x480 {
49 mode0: mode@0 { 24 clock-frequency = <0>; /* unused but required */
50 hactive = <800>; 25 hactive = <800>;
51 vactive = <480>; 26 vactive = <480>;
52 hback-porch = <88>;
53 hfront-porch = <40>; 27 hfront-porch = <40>;
28 hback-porch = <88>;
54 hsync-len = <0>; 29 hsync-len = <0>;
55 vback-porch = <32>; 30 vback-porch = <32>;
56 vfront-porch = <11>; 31 vfront-porch = <11>;
57 vsync-len = <1>; 32 vsync-len = <1>;
58 clock = <0>; /* unused but required */
59 bpp = <16>; /* non-standard but required */
60 }; 33 };
61 }; 34 };
62 }; 35 };
36
diff --git a/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt b/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt
index 3d325e1d11ee..0bcadb2840a5 100644
--- a/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt
+++ b/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt
@@ -4,20 +4,30 @@ Wondermedia WM8505 Framebuffer
4Required properties: 4Required properties:
5- compatible : "wm,wm8505-fb" 5- compatible : "wm,wm8505-fb"
6- reg : Should contain 1 register ranges(address and length) 6- reg : Should contain 1 register ranges(address and length)
7- via,display: a phandle pointing to the display node 7- bits-per-pixel : bit depth of framebuffer (16 or 32)
8 8
9Required nodes: 9Required subnodes:
10- display: a display node is required to initialize the lcd panel 10- display-timings: see display-timing.txt for information
11 This should be in the board dts. See definition in
12 Documentation/devicetree/bindings/video/via,vt8500-fb.txt
13- default-mode: a videomode node as specified in
14 Documentation/devicetree/bindings/video/via,vt8500-fb.txt
15 11
16Example: 12Example:
17 13
18 fb@d8050800 { 14 fb@d8051700 {
19 compatible = "wm,wm8505-fb"; 15 compatible = "wm,wm8505-fb";
20 reg = <0xd8050800 0x200>; 16 reg = <0xd8051700 0x200>;
21 display = <&display>; 17 bits-per-pixel = <16>;
22 default-mode = <&mode0>; 18
19 display-timings {
20 native-mode = <&timing0>;
21 timing0: 800x480 {
22 clock-frequency = <0>; /* unused but required */
23 hactive = <800>;
24 vactive = <480>;
25 hfront-porch = <40>;
26 hback-porch = <88>;
27 hsync-len = <0>;
28 vback-porch = <32>;
29 vfront-porch = <11>;
30 vsync-len = <1>;
31 };
32 };
23 }; 33 };
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index d230dd9c99b0..4a93e98b290a 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -150,12 +150,28 @@ discard -- If set, issues discard/TRIM commands to the block
150 device when blocks are freed. This is useful for SSD devices 150 device when blocks are freed. This is useful for SSD devices
151 and sparse/thinly-provisoned LUNs. 151 and sparse/thinly-provisoned LUNs.
152 152
153nfs -- This option maintains an index (cache) of directory 153nfs=stale_rw|nostale_ro
154 inodes by i_logstart which is used by the nfs-related code to 154 Enable this only if you want to export the FAT filesystem
155 improve look-ups. 155 over NFS.
156
157 stale_rw: This option maintains an index (cache) of directory
158 inodes by i_logstart which is used by the nfs-related code to
159 improve look-ups. Full file operations (read/write) over NFS is
160 supported but with cache eviction at NFS server, this could
161 result in ESTALE issues.
162
163 nostale_ro: This option bases the inode number and filehandle
164 on the on-disk location of a file in the MS-DOS directory entry.
165 This ensures that ESTALE will not be returned after a file is
166 evicted from the inode cache. However, it means that operations
167 such as rename, create and unlink could cause filehandles that
168 previously pointed at one file to point at a different file,
169 potentially causing data corruption. For this reason, this
170 option also mounts the filesystem readonly.
171
172 To maintain backward compatibility, '-o nfs' is also accepted,
173 defaulting to stale_rw
156 174
157 Enable this only if you want to export the FAT filesystem
158 over NFS
159 175
160<bool>: 0,1,yes,no,true,false 176<bool>: 0,1,yes,no,true,false
161 177
diff --git a/Documentation/hwmon/adt7410 b/Documentation/hwmon/adt7410
index 58150c480e56..9817941e5f19 100644
--- a/Documentation/hwmon/adt7410
+++ b/Documentation/hwmon/adt7410
@@ -12,29 +12,42 @@ Supported chips:
12 Addresses scanned: None 12 Addresses scanned: None
13 Datasheet: Publicly available at the Analog Devices website 13 Datasheet: Publicly available at the Analog Devices website
14 http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf 14 http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf
15 * Analog Devices ADT7310
16 Prefix: 'adt7310'
17 Addresses scanned: None
18 Datasheet: Publicly available at the Analog Devices website
19 http://www.analog.com/static/imported-files/data_sheets/ADT7310.pdf
20 * Analog Devices ADT7320
21 Prefix: 'adt7320'
22 Addresses scanned: None
23 Datasheet: Publicly available at the Analog Devices website
24 http://www.analog.com/static/imported-files/data_sheets/ADT7320.pdf
15 25
16Author: Hartmut Knaack <knaack.h@gmx.de> 26Author: Hartmut Knaack <knaack.h@gmx.de>
17 27
18Description 28Description
19----------- 29-----------
20 30
21The ADT7410 is a temperature sensor with rated temperature range of -55°C to 31The ADT7310/ADT7410 is a temperature sensor with rated temperature range of
22+150°C. It has a high accuracy of +/-0.5°C and can be operated at a resolution 32-55°C to +150°C. It has a high accuracy of +/-0.5°C and can be operated at a
23of 13 bits (0.0625°C) or 16 bits (0.0078°C). The sensor provides an INT pin to 33resolution of 13 bits (0.0625°C) or 16 bits (0.0078°C). The sensor provides an
24indicate that a minimum or maximum temperature set point has been exceeded, as 34INT pin to indicate that a minimum or maximum temperature set point has been
25well as a critical temperature (CT) pin to indicate that the critical 35exceeded, as well as a critical temperature (CT) pin to indicate that the
26temperature set point has been exceeded. Both pins can be set up with a common 36critical temperature set point has been exceeded. Both pins can be set up with a
27hysteresis of 0°C - 15°C and a fault queue, ranging from 1 to 4 events. Both 37common hysteresis of 0°C - 15°C and a fault queue, ranging from 1 to 4 events.
28pins can individually set to be active-low or active-high, while the whole 38Both pins can individually set to be active-low or active-high, while the whole
29device can either run in comparator mode or interrupt mode. The ADT7410 39device can either run in comparator mode or interrupt mode. The ADT7410 supports
30supports continous temperature sampling, as well as sampling one temperature 40continuous temperature sampling, as well as sampling one temperature value per
31value per second or even justget one sample on demand for power saving. 41second or even just get one sample on demand for power saving. Besides, it can
32Besides, it can completely power down its ADC, if power management is 42completely power down its ADC, if power management is required.
33required. 43
34 44The ADT7320/ADT7420 is register compatible, the only differences being the
35The ADT7420 is register compatible, the only differences being the package, 45package, a slightly narrower operating temperature range (-40°C to +150°C), and
36a slightly narrower operating temperature range (-40°C to +150°C), and a 46a better accuracy (0.25°C instead of 0.50°C.)
37better accuracy (0.25°C instead of 0.50°C.) 47
48The difference between the ADT7310/ADT7320 and ADT7410/ADT7420 is the control
49interface, the ADT7310 and ADT7320 use SPI while the ADT7410 and ADT7420 use
50I2C.
38 51
39Configuration Notes 52Configuration Notes
40------------------- 53-------------------
diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066
index 26025e419d35..c1b57d72efc3 100644
--- a/Documentation/hwmon/lm25066
+++ b/Documentation/hwmon/lm25066
@@ -1,7 +1,13 @@
1Kernel driver max8688 1Kernel driver lm25066
2===================== 2=====================
3 3
4Supported chips: 4Supported chips:
5 * TI LM25056
6 Prefix: 'lm25056'
7 Addresses scanned: -
8 Datasheets:
9 http://www.ti.com/lit/gpn/lm25056
10 http://www.ti.com/lit/gpn/lm25056a
5 * National Semiconductor LM25066 11 * National Semiconductor LM25066
6 Prefix: 'lm25066' 12 Prefix: 'lm25066'
7 Addresses scanned: - 13 Addresses scanned: -
@@ -25,8 +31,9 @@ Author: Guenter Roeck <linux@roeck-us.net>
25Description 31Description
26----------- 32-----------
27 33
28This driver supports hardware montoring for National Semiconductor LM25066, 34This driver supports hardware montoring for National Semiconductor / TI LM25056,
29LM5064, and LM5064 Power Management, Monitoring, Control, and Protection ICs. 35LM25066, LM5064, and LM5064 Power Management, Monitoring, Control, and
36Protection ICs.
30 37
31The driver is a client driver to the core PMBus driver. Please see 38The driver is a client driver to the core PMBus driver. Please see
32Documentation/hwmon/pmbus for details on PMBus client drivers. 39Documentation/hwmon/pmbus for details on PMBus client drivers.
@@ -60,14 +67,19 @@ in1_max Maximum input voltage.
60in1_min_alarm Input voltage low alarm. 67in1_min_alarm Input voltage low alarm.
61in1_max_alarm Input voltage high alarm. 68in1_max_alarm Input voltage high alarm.
62 69
63in2_label "vout1" 70in2_label "vmon"
64in2_input Measured output voltage. 71in2_input Measured voltage on VAUX pin
65in2_average Average measured output voltage. 72in2_min Minimum VAUX voltage (LM25056 only).
66in2_min Minimum output voltage. 73in2_max Maximum VAUX voltage (LM25056 only).
67in2_min_alarm Output voltage low alarm. 74in2_min_alarm VAUX voltage low alarm (LM25056 only).
68 75in2_max_alarm VAUX voltage high alarm (LM25056 only).
69in3_label "vout2" 76
70in3_input Measured voltage on vaux pin 77in3_label "vout1"
78 Not supported on LM25056.
79in3_input Measured output voltage.
80in3_average Average measured output voltage.
81in3_min Minimum output voltage.
82in3_min_alarm Output voltage low alarm.
71 83
72curr1_label "iin" 84curr1_label "iin"
73curr1_input Measured input current. 85curr1_input Measured input current.
diff --git a/Documentation/hwmon/lm75 b/Documentation/hwmon/lm75
index c91a1d15fa28..69af1c7db6b7 100644
--- a/Documentation/hwmon/lm75
+++ b/Documentation/hwmon/lm75
@@ -23,7 +23,7 @@ Supported chips:
23 Datasheet: Publicly available at the Maxim website 23 Datasheet: Publicly available at the Maxim website
24 http://www.maxim-ic.com/ 24 http://www.maxim-ic.com/
25 * Microchip (TelCom) TCN75 25 * Microchip (TelCom) TCN75
26 Prefix: 'lm75' 26 Prefix: 'tcn75'
27 Addresses scanned: none 27 Addresses scanned: none
28 Datasheet: Publicly available at the Microchip website 28 Datasheet: Publicly available at the Microchip website
29 http://www.microchip.com/ 29 http://www.microchip.com/
diff --git a/Documentation/hwmon/lm95234 b/Documentation/hwmon/lm95234
new file mode 100644
index 000000000000..a0e95ddfd372
--- /dev/null
+++ b/Documentation/hwmon/lm95234
@@ -0,0 +1,36 @@
1Kernel driver lm95234
2=====================
3
4Supported chips:
5 * National Semiconductor / Texas Instruments LM95234
6 Addresses scanned: I2C 0x18, 0x4d, 0x4e
7 Datasheet: Publicly available at the Texas Instruments website
8 http://www.ti.com/product/lm95234
9
10
11Author: Guenter Roeck <linux@roeck-us.net>
12
13Description
14-----------
15
16LM95234 is an 11-bit digital temperature sensor with a 2-wire System Management
17Bus (SMBus) interface and TrueTherm technology that can very accurately monitor
18the temperature of four remote diodes as well as its own temperature.
19The four remote diodes can be external devices such as microprocessors,
20graphics processors or diode-connected 2N3904s. The LM95234's TruTherm
21beta compensation technology allows sensing of 90 nm or 65 nm process
22thermal diodes accurately.
23
24All temperature values are given in millidegrees Celsius. Temperature
25is provided within a range of -127 to +255 degrees (+127.875 degrees for
26the internal sensor). Resolution depends on temperature input and range.
27
28Each sensor has its own maximum limit, but the hysteresis is common to all
29channels. The hysteresis is configurable with the tem1_max_hyst attribute and
30affects the hysteresis on all channels. The first two external sensors also
31have a critical limit.
32
33The lm95234 driver can change its update interval to a fixed set of values.
34It will round up to the next selectable interval. See the datasheet for exact
35values. Reading sensor values more often will do no harm, but will return
36'old' values.
diff --git a/Documentation/hwmon/ltc2978 b/Documentation/hwmon/ltc2978
index e4d75c606c97..dc0d08c61305 100644
--- a/Documentation/hwmon/ltc2978
+++ b/Documentation/hwmon/ltc2978
@@ -2,6 +2,10 @@ Kernel driver ltc2978
2===================== 2=====================
3 3
4Supported chips: 4Supported chips:
5 * Linear Technology LTC2974
6 Prefix: 'ltc2974'
7 Addresses scanned: -
8 Datasheet: http://www.linear.com/product/ltc2974
5 * Linear Technology LTC2978 9 * Linear Technology LTC2978
6 Prefix: 'ltc2978' 10 Prefix: 'ltc2978'
7 Addresses scanned: - 11 Addresses scanned: -
@@ -10,6 +14,10 @@ Supported chips:
10 Prefix: 'ltc3880' 14 Prefix: 'ltc3880'
11 Addresses scanned: - 15 Addresses scanned: -
12 Datasheet: http://www.linear.com/product/ltc3880 16 Datasheet: http://www.linear.com/product/ltc3880
17 * Linear Technology LTC3883
18 Prefix: 'ltc3883'
19 Addresses scanned: -
20 Datasheet: http://www.linear.com/product/ltc3883
13 21
14Author: Guenter Roeck <linux@roeck-us.net> 22Author: Guenter Roeck <linux@roeck-us.net>
15 23
@@ -17,9 +25,9 @@ Author: Guenter Roeck <linux@roeck-us.net>
17Description 25Description
18----------- 26-----------
19 27
20The LTC2978 is an octal power supply monitor, supervisor, sequencer and 28LTC2974 is a quad digital power supply manager. LTC2978 is an octal power supply
21margin controller. The LTC3880 is a dual, PolyPhase DC/DC synchronous 29monitor. LTC3880 is a dual output poly-phase step-down DC/DC controller. LTC3883
22step-down switching regulator controller. 30is a single phase step-down DC/DC controller.
23 31
24 32
25Usage Notes 33Usage Notes
@@ -41,63 +49,90 @@ Sysfs attributes
41in1_label "vin" 49in1_label "vin"
42in1_input Measured input voltage. 50in1_input Measured input voltage.
43in1_min Minimum input voltage. 51in1_min Minimum input voltage.
44in1_max Maximum input voltage. 52in1_max Maximum input voltage. LTC2974 and LTC2978 only.
45in1_lcrit Critical minimum input voltage. 53in1_lcrit Critical minimum input voltage. LTC2974 and LTC2978
54 only.
46in1_crit Critical maximum input voltage. 55in1_crit Critical maximum input voltage.
47in1_min_alarm Input voltage low alarm. 56in1_min_alarm Input voltage low alarm.
48in1_max_alarm Input voltage high alarm. 57in1_max_alarm Input voltage high alarm. LTC2974 and LTC2978 only.
49in1_lcrit_alarm Input voltage critical low alarm. 58in1_lcrit_alarm Input voltage critical low alarm. LTC2974 and LTC2978
59 only.
50in1_crit_alarm Input voltage critical high alarm. 60in1_crit_alarm Input voltage critical high alarm.
51in1_lowest Lowest input voltage. LTC2978 only. 61in1_lowest Lowest input voltage. LTC2974 and LTC2978 only.
52in1_highest Highest input voltage. 62in1_highest Highest input voltage.
53in1_reset_history Reset history. Writing into this attribute will reset 63in1_reset_history Reset input voltage history.
54 history for all attributes. 64
55 65in[N]_label "vout[1-8]".
56in[2-9]_label "vout[1-8]". Channels 3 to 9 on LTC2978 only. 66 LTC2974: N=2-5
57in[2-9]_input Measured output voltage. 67 LTC2978: N=2-9
58in[2-9]_min Minimum output voltage. 68 LTC3880: N=2-3
59in[2-9]_max Maximum output voltage. 69 LTC3883: N=2
60in[2-9]_lcrit Critical minimum output voltage. 70in[N]_input Measured output voltage.
61in[2-9]_crit Critical maximum output voltage. 71in[N]_min Minimum output voltage.
62in[2-9]_min_alarm Output voltage low alarm. 72in[N]_max Maximum output voltage.
63in[2-9]_max_alarm Output voltage high alarm. 73in[N]_lcrit Critical minimum output voltage.
64in[2-9]_lcrit_alarm Output voltage critical low alarm. 74in[N]_crit Critical maximum output voltage.
65in[2-9]_crit_alarm Output voltage critical high alarm. 75in[N]_min_alarm Output voltage low alarm.
66in[2-9]_lowest Lowest output voltage. LTC2978 only. 76in[N]_max_alarm Output voltage high alarm.
67in[2-9]_highest Lowest output voltage. 77in[N]_lcrit_alarm Output voltage critical low alarm.
68in[2-9]_reset_history Reset history. Writing into this attribute will reset 78in[N]_crit_alarm Output voltage critical high alarm.
69 history for all attributes. 79in[N]_lowest Lowest output voltage. LTC2974 and LTC2978 only.
70 80in[N]_highest Highest output voltage.
71temp[1-3]_input Measured temperature. 81in[N]_reset_history Reset output voltage history.
82
83temp[N]_input Measured temperature.
84 On LTC2974, temp[1-4] report external temperatures,
85 and temp5 reports the chip temperature.
72 On LTC2978, only one temperature measurement is 86 On LTC2978, only one temperature measurement is
73 supported and reflects the internal temperature. 87 supported and reports the chip temperature.
74 On LTC3880, temp1 and temp2 report external 88 On LTC3880, temp1 and temp2 report external
75 temperatures, and temp3 reports the internal 89 temperatures, and temp3 reports the chip temperature.
76 temperature. 90 On LTC3883, temp1 reports an external temperature,
77temp[1-3]_min Mimimum temperature. 91 and temp2 reports the chip temperature.
78temp[1-3]_max Maximum temperature. 92temp[N]_min Mimimum temperature. LTC2974 and LTC2978 only.
79temp[1-3]_lcrit Critical low temperature. 93temp[N]_max Maximum temperature.
80temp[1-3]_crit Critical high temperature. 94temp[N]_lcrit Critical low temperature.
81temp[1-3]_min_alarm Chip temperature low alarm. 95temp[N]_crit Critical high temperature.
82temp[1-3]_max_alarm Chip temperature high alarm. 96temp[N]_min_alarm Temperature low alarm. LTC2974 and LTC2978 only.
83temp[1-3]_lcrit_alarm Chip temperature critical low alarm. 97temp[N]_max_alarm Temperature high alarm.
84temp[1-3]_crit_alarm Chip temperature critical high alarm. 98temp[N]_lcrit_alarm Temperature critical low alarm.
85temp[1-3]_lowest Lowest measured temperature. LTC2978 only. 99temp[N]_crit_alarm Temperature critical high alarm.
86temp[1-3]_highest Highest measured temperature. 100temp[N]_lowest Lowest measured temperature. LTC2974 and LTC2978 only.
87temp[1-3]_reset_history Reset history. Writing into this attribute will reset 101 Not supported for chip temperature sensor on LTC2974.
88 history for all attributes. 102temp[N]_highest Highest measured temperature. Not supported for chip
89 103 temperature sensor on LTC2974.
90power[1-2]_label "pout[1-2]". LTC3880 only. 104temp[N]_reset_history Reset temperature history. Not supported for chip
91power[1-2]_input Measured power. 105 temperature sensor on LTC2974.
92 106
93curr1_label "iin". LTC3880 only. 107power1_label "pin". LTC3883 only.
108power1_input Measured input power.
109
110power[N]_label "pout[1-4]".
111 LTC2974: N=1-4
112 LTC2978: Not supported
113 LTC3880: N=1-2
114 LTC3883: N=2
115power[N]_input Measured output power.
116
117curr1_label "iin". LTC3880 and LTC3883 only.
94curr1_input Measured input current. 118curr1_input Measured input current.
95curr1_max Maximum input current. 119curr1_max Maximum input current.
96curr1_max_alarm Input current high alarm. 120curr1_max_alarm Input current high alarm.
97 121curr1_highest Highest input current. LTC3883 only.
98curr[2-3]_label "iout[1-2]". LTC3880 only. 122curr1_reset_history Reset input current history. LTC3883 only.
99curr[2-3]_input Measured input current. 123
100curr[2-3]_max Maximum input current. 124curr[N]_label "iout[1-4]".
101curr[2-3]_crit Critical input current. 125 LTC2974: N=1-4
102curr[2-3]_max_alarm Input current high alarm. 126 LTC2978: not supported
103curr[2-3]_crit_alarm Input current critical high alarm. 127 LTC3880: N=2-3
128 LTC3883: N=2
129curr[N]_input Measured output current.
130curr[N]_max Maximum output current.
131curr[N]_crit Critical high output current.
132curr[N]_lcrit Critical low output current. LTC2974 only.
133curr[N]_max_alarm Output current high alarm.
134curr[N]_crit_alarm Output current critical high alarm.
135curr[N]_lcrit_alarm Output current critical low alarm. LTC2974 only.
136curr[N]_lowest Lowest output current. LTC2974 only.
137curr[N]_highest Highest output current.
138curr[N]_reset_history Reset output current history.
diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775
new file mode 100644
index 000000000000..4e9ef60e8c6c
--- /dev/null
+++ b/Documentation/hwmon/nct6775
@@ -0,0 +1,188 @@
1Note
2====
3
4This driver supersedes the NCT6775F and NCT6776F support in the W83627EHF
5driver.
6
7Kernel driver NCT6775
8=====================
9
10Supported chips:
11 * Nuvoton NCT5572D/NCT6771F/NCT6772F/NCT6775F/W83677HG-I
12 Prefix: 'nct6775'
13 Addresses scanned: ISA address retrieved from Super I/O registers
14 Datasheet: Available from Nuvoton upon request
15 * Nuvoton NCT5577D/NCT6776D/NCT6776F
16 Prefix: 'nct6776'
17 Addresses scanned: ISA address retrieved from Super I/O registers
18 Datasheet: Available from Nuvoton upon request
19 * Nuvoton NCT5532D/NCT6779D
20 Prefix: 'nct6779'
21 Addresses scanned: ISA address retrieved from Super I/O registers
22 Datasheet: Available from Nuvoton upon request
23
24Authors:
25 Guenter Roeck <linux@roeck-us.net>
26
27Description
28-----------
29
30This driver implements support for the Nuvoton NCT6775F, NCT6776F, and NCT6779D
31and compatible super I/O chips.
32
33The chips support up to 25 temperature monitoring sources. Up to 6 of those are
34direct temperature sensor inputs, the others are special sources such as PECI,
35PCH, and SMBUS. Depending on the chip type, 2 to 6 of the temperature sources
36can be monitored and compared against minimum, maximum, and critical
37temperatures. The driver reports up to 10 of the temperatures to the user.
38There are 4 to 5 fan rotation speed sensors, 8 to 15 analog voltage sensors,
39one VID, alarms with beep warnings (control unimplemented), and some automatic
40fan regulation strategies (plus manual fan control mode).
41
42The temperature sensor sources on all chips are configurable. The configured
43source for each of the temperature sensors is provided in tempX_label.
44
45Temperatures are measured in degrees Celsius and measurement resolution is
46either 1 degC or 0.5 degC, depending on the temperature source and
47configuration. An alarm is triggered when the temperature gets higher than
48the high limit; it stays on until the temperature falls below the hysteresis
49value. Alarms are only supported for temp1 to temp6, depending on the chip type.
50
51Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
52triggered if the rotation speed has dropped below a programmable limit. On
53NCT6775F, fan readings can be divided by a programmable divider (1, 2, 4, 8,
5416, 32, 64 or 128) to give the readings more range or accuracy; the other chips
55do not have a fan speed divider. The driver sets the most suitable fan divisor
56itself; specifically, it increases the divider value each time a fan speed
57reading returns an invalid value, and it reduces it if the fan speed reading
58is lower than optimal. Some fans might not be present because they share pins
59with other functions.
60
61Voltage sensors (also known as IN sensors) report their values in millivolts.
62An alarm is triggered if the voltage has crossed a programmable minimum
63or maximum limit.
64
65The driver supports automatic fan control mode known as Thermal Cruise.
66In this mode, the chip attempts to keep the measured temperature in a
67predefined temperature range. If the temperature goes out of range, fan
68is driven slower/faster to reach the predefined range again.
69
70The mode works for fan1-fan5.
71
72sysfs attributes
73----------------
74
75pwm[1-5] - this file stores PWM duty cycle or DC value (fan speed) in range:
76 0 (lowest speed) to 255 (full)
77
78pwm[1-5]_enable - this file controls mode of fan/temperature control:
79 * 0 Fan control disabled (fans set to maximum speed)
80 * 1 Manual mode, write to pwm[0-5] any value 0-255
81 * 2 "Thermal Cruise" mode
82 * 3 "Fan Speed Cruise" mode
83 * 4 "Smart Fan III" mode (NCT6775F only)
84 * 5 "Smart Fan IV" mode
85
86pwm[1-5]_mode - controls if output is PWM or DC level
87 * 0 DC output
88 * 1 PWM output
89
90Common fan control attributes
91-----------------------------
92
93pwm[1-5]_temp_sel Temperature source. Value is temperature sensor index.
94 For example, select '1' for temp1_input.
95pwm[1-5]_weight_temp_sel
96 Secondary temperature source. Value is temperature
97 sensor index. For example, select '1' for temp1_input.
98 Set to 0 to disable secondary temperature control.
99
100If secondary temperature functionality is enabled, it is controlled with the
101following attributes.
102
103pwm[1-5]_weight_duty_step
104 Duty step size.
105pwm[1-5]_weight_temp_step
106 Temperature step size. With each step over
107 temp_step_base, the value of weight_duty_step is added
108 to the current pwm value.
109pwm[1-5]_weight_temp_step_base
110 Temperature at which secondary temperature control kicks
111 in.
112pwm[1-5]_weight_temp_step_tol
113 Temperature step tolerance.
114
115Thermal Cruise mode (2)
116-----------------------
117
118If the temperature is in the range defined by:
119
120pwm[1-5]_target_temp Target temperature, unit millidegree Celsius
121 (range 0 - 127000)
122pwm[1-5]_temp_tolerance
123 Target temperature tolerance, unit millidegree Celsius
124
125there are no changes to fan speed. Once the temperature leaves the interval, fan
126speed increases (if temperature is higher that desired) or decreases (if
127temperature is lower than desired), using the following limits and time
128intervals.
129
130pwm[1-5]_start fan pwm start value (range 1 - 255), to start fan
131 when the temperature is above defined range.
132pwm[1-5]_floor lowest fan pwm (range 0 - 255) if temperature is below
133 the defined range. If set to 0, the fan is expected to
134 stop if the temperature is below the defined range.
135pwm[1-5]_step_up_time milliseconds before fan speed is increased
136pwm[1-5]_step_down_time milliseconds before fan speed is decreased
137pwm[1-5]_stop_time how many milliseconds must elapse to switch
138 corresponding fan off (when the temperature was below
139 defined range).
140
141Speed Cruise mode (3)
142---------------------
143
144This modes tries to keep the fan speed constant.
145
146fan[1-5]_target Target fan speed
147fan[1-5]_tolerance
148 Target speed tolerance
149
150
151Untested; use at your own risk.
152
153Smart Fan IV mode (5)
154---------------------
155
156This mode offers multiple slopes to control the fan speed. The slopes can be
157controlled by setting the pwm and temperature attributes. When the temperature
158rises, the chip will calculate the DC/PWM output based on the current slope.
159There are up to seven data points depending on the chip type. Subsequent data
160points should be set to higher temperatures and higher pwm values to achieve
161higher fan speeds with increasing temperature. The last data point reflects
162critical temperature mode, in which the fans should run at full speed.
163
164pwm[1-5]_auto_point[1-7]_pwm
165 pwm value to be set if temperature reaches matching
166 temperature range.
167pwm[1-5]_auto_point[1-7]_temp
168 Temperature over which the matching pwm is enabled.
169pwm[1-5]_temp_tolerance
170 Temperature tolerance, unit millidegree Celsius
171pwm[1-5]_crit_temp_tolerance
172 Temperature tolerance for critical temperature,
173 unit millidegree Celsius
174
175pwm[1-5]_step_up_time milliseconds before fan speed is increased
176pwm[1-5]_step_down_time milliseconds before fan speed is decreased
177
178Usage Notes
179-----------
180
181On various ASUS boards with NCT6776F, it appears that CPUTIN is not really
182connected to anything and floats, or that it is connected to some non-standard
183temperature measurement device. As a result, the temperature reported on CPUTIN
184will not reflect a usable value. It often reports unreasonably high
185temperatures, and in some cases the reported temperature declines if the actual
186temperature increases (similar to the raw PECI temperature value - see PECI
187specification for details). CPUTIN should therefore be be ignored on ASUS
188boards. The CPU temperature on ASUS boards is reported from PECI 0.
diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
index 02850bdfac18..778987d1856f 100644
--- a/Documentation/hwmon/sht15
+++ b/Documentation/hwmon/sht15
@@ -40,7 +40,7 @@ bits for humidity, or 12 bits for temperature and 8 bits for humidity.
40The humidity calibration coefficients are programmed into an OTP memory on the 40The humidity calibration coefficients are programmed into an OTP memory on the
41chip. These coefficients are used to internally calibrate the signals from the 41chip. These coefficients are used to internally calibrate the signals from the
42sensors. Disabling the reload of those coefficients allows saving 10ms for each 42sensors. Disabling the reload of those coefficients allows saving 10ms for each
43measurement and decrease power consumption, while loosing on precision. 43measurement and decrease power consumption, while losing on precision.
44 44
45Some options may be set directly in the sht15_platform_data structure 45Some options may be set directly in the sht15_platform_data structure
46or via sysfs attributes. 46or via sysfs attributes.
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401
index 9fc447249212..f91e3fa7e5ec 100644
--- a/Documentation/hwmon/tmp401
+++ b/Documentation/hwmon/tmp401
@@ -8,8 +8,16 @@ Supported chips:
8 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html 8 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html
9 * Texas Instruments TMP411 9 * Texas Instruments TMP411
10 Prefix: 'tmp411' 10 Prefix: 'tmp411'
11 Addresses scanned: I2C 0x4c 11 Addresses scanned: I2C 0x4c, 0x4d, 0x4e
12 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html 12 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html
13 * Texas Instruments TMP431
14 Prefix: 'tmp431'
15 Addresses scanned: I2C 0x4c, 0x4d
16 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp431.html
17 * Texas Instruments TMP432
18 Prefix: 'tmp432'
19 Addresses scanned: I2C 0x4c, 0x4d
20 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html
13 21
14Authors: 22Authors:
15 Hans de Goede <hdegoede@redhat.com> 23 Hans de Goede <hdegoede@redhat.com>
@@ -18,19 +26,19 @@ Authors:
18Description 26Description
19----------- 27-----------
20 28
21This driver implements support for Texas Instruments TMP401 and 29This driver implements support for Texas Instruments TMP401, TMP411,
22TMP411 chips. These chips implements one remote and one local 30TMP431, and TMP432 chips. These chips implement one or two remote and
23temperature sensor. Temperature is measured in degrees 31one local temperature sensors. Temperature is measured in degrees
24Celsius. Resolution of the remote sensor is 0.0625 degree. Local 32Celsius. Resolution of the remote sensor is 0.0625 degree. Local
25sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not 33sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not
26supported by the driver so far, so using the default resolution of 0.5 34supported by the driver so far, so using the default resolution of 0.5
27degree). 35degree).
28 36
29The driver provides the common sysfs-interface for temperatures (see 37The driver provides the common sysfs-interface for temperatures (see
30/Documentation/hwmon/sysfs-interface under Temperatures). 38Documentation/hwmon/sysfs-interface under Temperatures).
31 39
32The TMP411 chip is compatible with TMP401. It provides some additional 40The TMP411 and TMP431 chips are compatible with TMP401. TMP411 provides
33features. 41some additional features.
34 42
35* Minimum and Maximum temperature measured since power-on, chip-reset 43* Minimum and Maximum temperature measured since power-on, chip-reset
36 44
@@ -40,3 +48,6 @@ features.
40 48
41 Exported via sysfs attribute temp_reset_history. Writing 1 to this 49 Exported via sysfs attribute temp_reset_history. Writing 1 to this
42 file triggers a reset. 50 file triggers a reset.
51
52TMP432 is compatible with TMP401 and TMP431. It supports two external
53temperature sensors.
diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100
index 756b57c6b73e..33908a4d68ff 100644
--- a/Documentation/hwmon/zl6100
+++ b/Documentation/hwmon/zl6100
@@ -125,7 +125,7 @@ in2_label "vmon"
125in2_input Measured voltage on VMON (ZL2004) or VDRV (ZL9101M, 125in2_input Measured voltage on VMON (ZL2004) or VDRV (ZL9101M,
126 ZL9117M) pin. Reported voltage is 16x the voltage on the 126 ZL9117M) pin. Reported voltage is 16x the voltage on the
127 pin (adjusted internally by the chip). 127 pin (adjusted internally by the chip).
128in2_lcrit Critical minumum VMON/VDRV Voltage. 128in2_lcrit Critical minimum VMON/VDRV Voltage.
129in2_crit Critical maximum VMON/VDRV voltage. 129in2_crit Critical maximum VMON/VDRV voltage.
130in2_lcrit_alarm VMON/VDRV voltage critical low alarm. 130in2_lcrit_alarm VMON/VDRV voltage critical low alarm.
131in2_crit_alarm VMON/VDRV voltage critical high alarm. 131in2_crit_alarm VMON/VDRV voltage critical high alarm.
diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c
index 30fe4bb9a069..0d6018c316c7 100644
--- a/Documentation/i2c/busses/i2c-diolan-u2c
+++ b/Documentation/i2c/busses/i2c-diolan-u2c
@@ -5,7 +5,7 @@ Supported adapters:
5 Documentation: 5 Documentation:
6 http://www.diolan.com/i2c/u2c12.html 6 http://www.diolan.com/i2c/u2c12.html
7 7
8Author: Guenter Roeck <guenter.roeck@ericsson.com> 8Author: Guenter Roeck <linux@roeck-us.net>
9 9
10Description 10Description
11----------- 11-----------
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
index 223e4f0582d0..9f651c181429 100644
--- a/Documentation/ia64/err_inject.txt
+++ b/Documentation/ia64/err_inject.txt
@@ -882,7 +882,7 @@ int err_inj()
882 cpu=parameters[i].cpu; 882 cpu=parameters[i].cpu;
883 k = cpu%64; 883 k = cpu%64;
884 j = cpu/64; 884 j = cpu/64;
885 mask[j]=1<<k; 885 mask[j] = 1UL << k;
886 886
887 if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) { 887 if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
888 perror("Error sched_setaffinity:"); 888 perror("Error sched_setaffinity:");
diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt
index 3262b6e4d686..e544c7ff8cfa 100644
--- a/Documentation/input/alps.txt
+++ b/Documentation/input/alps.txt
@@ -3,10 +3,26 @@ ALPS Touchpad Protocol
3 3
4Introduction 4Introduction
5------------ 5------------
6 6Currently the ALPS touchpad driver supports five protocol versions in use by
7Currently the ALPS touchpad driver supports four protocol versions in use by 7ALPS touchpads, called versions 1, 2, 3, 4 and 5.
8ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various 8
9protocol versions is contained in the following sections. 9Since roughly mid-2010 several new ALPS touchpads have been released and
10integrated into a variety of laptops and netbooks. These new touchpads
11have enough behavior differences that the alps_model_data definition
12table, describing the properties of the different versions, is no longer
13adequate. The design choices were to re-define the alps_model_data
14table, with the risk of regression testing existing devices, or isolate
15the new devices outside of the alps_model_data table. The latter design
16choice was made. The new touchpad signatures are named: "Rushmore",
17"Pinnacle", and "Dolphin", which you will see in the alps.c code.
18For the purposes of this document, this group of ALPS touchpads will
19generically be called "new ALPS touchpads".
20
21We experimented with probing the ACPI interface _HID (Hardware ID)/_CID
22(Compatibility ID) definition as a way to uniquely identify the
23different ALPS variants but there did not appear to be a 1:1 mapping.
24In fact, it appeared to be an m:n mapping between the _HID and actual
25hardware type.
10 26
11Detection 27Detection
12--------- 28---------
@@ -20,9 +36,13 @@ If the E6 report is successful, the touchpad model is identified using the "E7
20report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is 36report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is
21matched against known models in the alps_model_data_array. 37matched against known models in the alps_model_data_array.
22 38
23With protocol versions 3 and 4, the E7 report model signature is always 39For older touchpads supporting protocol versions 3 and 4, the E7 report
2473-02-64. To differentiate between these versions, the response from the 40model signature is always 73-02-64. To differentiate between these
25"Enter Command Mode" sequence must be inspected as described below. 41versions, the response from the "Enter Command Mode" sequence must be
42inspected as described below.
43
44The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but
45seem to be better differentiated by the EC Command Mode response.
26 46
27Command Mode 47Command Mode
28------------ 48------------
@@ -47,6 +67,14 @@ address of the register being read, and the third contains the value of the
47register. Registers are written by writing the value one nibble at a time 67register. Registers are written by writing the value one nibble at a time
48using the same encoding used for addresses. 68using the same encoding used for addresses.
49 69
70For the new ALPS touchpads, the EC command is used to enter command
71mode. The response in the new ALPS touchpads is significantly different,
72and more important in determining the behavior. This code has been
73separated from the original alps_model_data table and put in the
74alps_identify function. For example, there seem to be two hardware init
75sequences for the "Dolphin" touchpads as determined by the second byte
76of the EC response.
77
50Packet Format 78Packet Format
51------------- 79-------------
52 80
@@ -187,3 +215,28 @@ There are several things worth noting here.
187 well. 215 well.
188 216
189So far no v4 devices with tracksticks have been encountered. 217So far no v4 devices with tracksticks have been encountered.
218
219ALPS Absolute Mode - Protocol Version 5
220---------------------------------------
221This is basically Protocol Version 3 but with different logic for packet
222decode. It uses the same alps_process_touchpad_packet_v3 call with a
223specialized decode_fields function pointer to correctly interpret the
224packets. This appears to only be used by the Dolphin devices.
225
226For single-touch, the 6-byte packet format is:
227
228 byte 0: 1 1 0 0 1 0 0 0
229 byte 1: 0 x6 x5 x4 x3 x2 x1 x0
230 byte 2: 0 y6 y5 y4 y3 y2 y1 y0
231 byte 3: 0 M R L 1 m r l
232 byte 4: y10 y9 y8 y7 x10 x9 x8 x7
233 byte 5: 0 z6 z5 z4 z3 z2 z1 z0
234
235For mt, the format is:
236
237 byte 0: 1 1 1 n3 1 n2 n1 x24
238 byte 1: 1 y7 y6 y5 y4 y3 y2 y1
239 byte 2: ? x2 x1 y12 y11 y10 y9 y8
240 byte 3: 0 x23 x22 x21 x20 x19 x18 x17
241 byte 4: 0 x9 x8 x7 x6 x5 x4 x3
242 byte 5: 0 x16 x15 x14 x13 x12 x11 x10
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 3210540f8bd3..237acab169dd 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -131,6 +131,7 @@ Code Seq#(hex) Include File Comments
131'H' 40-4F sound/hdspm.h conflict! 131'H' 40-4F sound/hdspm.h conflict!
132'H' 40-4F sound/hdsp.h conflict! 132'H' 40-4F sound/hdsp.h conflict!
133'H' 90 sound/usb/usx2y/usb_stream.h 133'H' 90 sound/usb/usx2y/usb_stream.h
134'H' A0 uapi/linux/usb/cdc-wdm.h
134'H' C0-F0 net/bluetooth/hci.h conflict! 135'H' C0-F0 net/bluetooth/hci.h conflict!
135'H' C0-DF net/bluetooth/hidp/hidp.h conflict! 136'H' C0-DF net/bluetooth/hidp/hidp.h conflict!
136'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict! 137'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict!
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 13f1aa09b938..9c7fd988e299 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -297,6 +297,7 @@ Boot into System Kernel
297 On ia64, 256M@256M is a generous value that typically works. 297 On ia64, 256M@256M is a generous value that typically works.
298 The region may be automatically placed on ia64, see the 298 The region may be automatically placed on ia64, see the
299 dump-capture kernel config option notes above. 299 dump-capture kernel config option notes above.
300 If use sparse memory, the size should be rounded to GRANULE boundaries.
300 301
301 On s390x, typically use "crashkernel=xxM". The value of xx is dependent 302 On s390x, typically use "crashkernel=xxM". The value of xx is dependent
302 on the memory consumption of the kdump system. In general this is not 303 on the memory consumption of the kdump system. In general this is not
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b5cfd047becb..b801a332fb6f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -44,6 +44,7 @@ parameter is applicable:
44 AVR32 AVR32 architecture is enabled. 44 AVR32 AVR32 architecture is enabled.
45 AX25 Appropriate AX.25 support is enabled. 45 AX25 Appropriate AX.25 support is enabled.
46 BLACKFIN Blackfin architecture is enabled. 46 BLACKFIN Blackfin architecture is enabled.
47 CLK Common clock infrastructure is enabled.
47 DRM Direct Rendering Management support is enabled. 48 DRM Direct Rendering Management support is enabled.
48 DYNAMIC_DEBUG Build in debug messages and enable them at runtime 49 DYNAMIC_DEBUG Build in debug messages and enable them at runtime
49 EDD BIOS Enhanced Disk Drive Services (EDD) is enabled 50 EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
@@ -320,6 +321,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
320 on: enable for both 32- and 64-bit processes 321 on: enable for both 32- and 64-bit processes
321 off: disable for both 32- and 64-bit processes 322 off: disable for both 32- and 64-bit processes
322 323
324 alloc_snapshot [FTRACE]
325 Allocate the ftrace snapshot buffer on boot up when the
326 main buffer is allocated. This is handy if debugging
327 and you need to use tracing_snapshot() on boot up, and
328 do not want to use tracing_snapshot_alloc() as it needs
329 to be done where GFP_KERNEL allocations are allowed.
330
323 amd_iommu= [HW,X86-64] 331 amd_iommu= [HW,X86-64]
324 Pass parameters to the AMD IOMMU driver in the system. 332 Pass parameters to the AMD IOMMU driver in the system.
325 Possible values are: 333 Possible values are:
@@ -465,6 +473,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
465 473
466 cio_ignore= [S390] 474 cio_ignore= [S390]
467 See Documentation/s390/CommonIO for details. 475 See Documentation/s390/CommonIO for details.
476 clk_ignore_unused
477 [CLK]
478 Keep all clocks already enabled by bootloader on,
479 even if no driver has claimed them. This is useful
480 for debug and development, but should not be
481 needed on a platform with proper driver support.
482 For more information, see Documentation/clk.txt.
468 483
469 clock= [BUGS=X86-32, HW] gettimeofday clocksource override. 484 clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
470 [Deprecated] 485 [Deprecated]
@@ -596,9 +611,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
596 is selected automatically. Check 611 is selected automatically. Check
597 Documentation/kdump/kdump.txt for further details. 612 Documentation/kdump/kdump.txt for further details.
598 613
599 crashkernel_low=size[KMG]
600 [KNL, x86] parts under 4G.
601
602 crashkernel=range1:size1[,range2:size2,...][@offset] 614 crashkernel=range1:size1[,range2:size2,...][@offset]
603 [KNL] Same as above, but depends on the memory 615 [KNL] Same as above, but depends on the memory
604 in the running system. The syntax of range is 616 in the running system. The syntax of range is
@@ -606,6 +618,26 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
606 a memory unit (amount[KMG]). See also 618 a memory unit (amount[KMG]). See also
607 Documentation/kdump/kdump.txt for an example. 619 Documentation/kdump/kdump.txt for an example.
608 620
621 crashkernel=size[KMG],high
622 [KNL, x86_64] range could be above 4G. Allow kernel
623 to allocate physical memory region from top, so could
624 be above 4G if system have more than 4G ram installed.
625 Otherwise memory region will be allocated below 4G, if
626 available.
627 It will be ignored if crashkernel=X is specified.
628 crashkernel=size[KMG],low
629 [KNL, x86_64] range under 4G. When crashkernel=X,high
630 is passed, kernel could allocate physical memory region
631 above 4G, that cause second kernel crash on system
632 that require some amount of low memory, e.g. swiotlb
633 requires at least 64M+32K low memory. Kernel would
634 try to allocate 72M below 4G automatically.
635 This one let user to specify own low range under 4G
636 for second kernel instead.
637 0: to disable low allocation.
638 It will be ignored when crashkernel=X,high is not used
639 or memory reserved is below 4G.
640
609 cs89x0_dma= [HW,NET] 641 cs89x0_dma= [HW,NET]
610 Format: <dma> 642 Format: <dma>
611 643
@@ -800,6 +832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
800 edd= [EDD] 832 edd= [EDD]
801 Format: {"off" | "on" | "skip[mbr]"} 833 Format: {"off" | "on" | "skip[mbr]"}
802 834
835 efi_no_storage_paranoia [EFI; X86]
836 Using this parameter you can use more than 50% of
837 your efi variable storage. Use this parameter only if
838 you are really sure that your UEFI does sane gc and
839 fulfills the spec otherwise your board may brick.
840
803 eisa_irq_edge= [PARISC,HW] 841 eisa_irq_edge= [PARISC,HW]
804 See header of drivers/parisc/eisa.c. 842 See header of drivers/parisc/eisa.c.
805 843
@@ -2473,9 +2511,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2473 In kernels built with CONFIG_RCU_NOCB_CPU=y, set 2511 In kernels built with CONFIG_RCU_NOCB_CPU=y, set
2474 the specified list of CPUs to be no-callback CPUs. 2512 the specified list of CPUs to be no-callback CPUs.
2475 Invocation of these CPUs' RCU callbacks will 2513 Invocation of these CPUs' RCU callbacks will
2476 be offloaded to "rcuoN" kthreads created for 2514 be offloaded to "rcuox/N" kthreads created for
2477 that purpose. This reduces OS jitter on the 2515 that purpose, where "x" is "b" for RCU-bh, "p"
2516 for RCU-preempt, and "s" for RCU-sched, and "N"
2517 is the CPU number. This reduces OS jitter on the
2478 offloaded CPUs, which can be useful for HPC and 2518 offloaded CPUs, which can be useful for HPC and
2519
2479 real-time workloads. It can also improve energy 2520 real-time workloads. It can also improve energy
2480 efficiency for asymmetric multiprocessors. 2521 efficiency for asymmetric multiprocessors.
2481 2522
@@ -2499,6 +2540,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2499 leaf rcu_node structure. Useful for very large 2540 leaf rcu_node structure. Useful for very large
2500 systems. 2541 systems.
2501 2542
2543 rcutree.jiffies_till_first_fqs= [KNL,BOOT]
2544 Set delay from grace-period initialization to
2545 first attempt to force quiescent states.
2546 Units are jiffies, minimum value is zero,
2547 and maximum value is HZ.
2548
2549 rcutree.jiffies_till_next_fqs= [KNL,BOOT]
2550 Set delay between subsequent attempts to force
2551 quiescent states. Units are jiffies, minimum
2552 value is one, and maximum value is HZ.
2553
2502 rcutree.qhimark= [KNL,BOOT] 2554 rcutree.qhimark= [KNL,BOOT]
2503 Set threshold of queued 2555 Set threshold of queued
2504 RCU callbacks over which batch limiting is disabled. 2556 RCU callbacks over which batch limiting is disabled.
@@ -2513,16 +2565,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2513 rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] 2565 rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
2514 Set timeout for RCU CPU stall warning messages. 2566 Set timeout for RCU CPU stall warning messages.
2515 2567
2516 rcutree.jiffies_till_first_fqs= [KNL,BOOT] 2568 rcutree.rcu_idle_gp_delay= [KNL,BOOT]
2517 Set delay from grace-period initialization to 2569 Set wakeup interval for idle CPUs that have
2518 first attempt to force quiescent states. 2570 RCU callbacks (RCU_FAST_NO_HZ=y).
2519 Units are jiffies, minimum value is zero,
2520 and maximum value is HZ.
2521 2571
2522 rcutree.jiffies_till_next_fqs= [KNL,BOOT] 2572 rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT]
2523 Set delay between subsequent attempts to force 2573 Set wakeup interval for idle CPUs that have
2524 quiescent states. Units are jiffies, minimum 2574 only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
2525 value is one, and maximum value is HZ. 2575 Lazy RCU callbacks are those which RCU can
2576 prove do nothing more than free memory.
2526 2577
2527 rcutorture.fqs_duration= [KNL,BOOT] 2578 rcutorture.fqs_duration= [KNL,BOOT]
2528 Set duration of force_quiescent_state bursts. 2579 Set duration of force_quiescent_state bursts.
@@ -3234,6 +3285,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
3234 or other driver-specific files in the 3285 or other driver-specific files in the
3235 Documentation/watchdog/ directory. 3286 Documentation/watchdog/ directory.
3236 3287
3288 workqueue.disable_numa
3289 By default, all work items queued to unbound
3290 workqueues are affine to the NUMA nodes they're
3291 issued on, which results in better behavior in
3292 general. If NUMA affinity needs to be disabled for
3293 whatever reason, this option can be used. Note
3294 that this also can be controlled per-workqueue for
3295 workqueues visible under /sys/bus/workqueue/.
3296
3237 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of 3297 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
3238 default x2apic cluster mode on platforms 3298 default x2apic cluster mode on platforms
3239 supporting x2apic. 3299 supporting x2apic.
diff --git a/Documentation/misc-devices/mei/mei-client-bus.txt b/Documentation/misc-devices/mei/mei-client-bus.txt
new file mode 100644
index 000000000000..f83910a8ce76
--- /dev/null
+++ b/Documentation/misc-devices/mei/mei-client-bus.txt
@@ -0,0 +1,138 @@
1Intel(R) Management Engine (ME) Client bus API
2===============================================
3
4
5Rationale
6=========
7MEI misc character device is useful for dedicated applications to send and receive
8data to the many FW appliance found in Intel's ME from the user space.
9However for some of the ME functionalities it make sense to leverage existing software
10stack and expose them through existing kernel subsystems.
11
12In order to plug seamlessly into the kernel device driver model we add kernel virtual
13bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers
14for the various MEI features as a stand alone entities found in their respective subsystem.
15Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to
16the existing code.
17
18
19MEI CL bus API
20===========
21A driver implementation for an MEI Client is very similar to existing bus
22based device drivers. The driver registers itself as an MEI CL bus driver through
23the mei_cl_driver structure:
24
25struct mei_cl_driver {
26 struct device_driver driver;
27 const char *name;
28
29 const struct mei_cl_device_id *id_table;
30
31 int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id);
32 int (*remove)(struct mei_cl_device *dev);
33};
34
35struct mei_cl_id {
36 char name[MEI_NAME_SIZE];
37 kernel_ulong_t driver_info;
38};
39
40The mei_cl_id structure allows the driver to bind itself against a device name.
41
42To actually register a driver on the ME Client bus one must call the mei_cl_add_driver()
43API. This is typically called at module init time.
44
45Once registered on the ME Client bus, a driver will typically try to do some I/O on
46this bus and this should be done through the mei_cl_send() and mei_cl_recv()
47routines. The latter is synchronous (blocks and sleeps until data shows up).
48In order for drivers to be notified of pending events waiting for them (e.g.
49an Rx event) they can register an event handler through the
50mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event
51will trigger an event handler call and the driver implementation is supposed
52to call mei_recv() from the event handler in order to fetch the pending
53received buffers.
54
55
56Example
57=======
58As a theoretical example let's pretend the ME comes with a "contact" NFC IP.
59The driver init and exit routines for this device would look like:
60
61#define CONTACT_DRIVER_NAME "contact"
62
63static struct mei_cl_device_id contact_mei_cl_tbl[] = {
64 { CONTACT_DRIVER_NAME, },
65
66 /* required last entry */
67 { }
68};
69MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl);
70
71static struct mei_cl_driver contact_driver = {
72 .id_table = contact_mei_tbl,
73 .name = CONTACT_DRIVER_NAME,
74
75 .probe = contact_probe,
76 .remove = contact_remove,
77};
78
79static int contact_init(void)
80{
81 int r;
82
83 r = mei_cl_driver_register(&contact_driver);
84 if (r) {
85 pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n");
86 return r;
87 }
88
89 return 0;
90}
91
92static void __exit contact_exit(void)
93{
94 mei_cl_driver_unregister(&contact_driver);
95}
96
97module_init(contact_init);
98module_exit(contact_exit);
99
100And the driver's simplified probe routine would look like that:
101
102int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id)
103{
104 struct contact_driver *contact;
105
106 [...]
107 mei_cl_enable_device(dev);
108
109 mei_cl_register_event_cb(dev, contact_event_cb, contact);
110
111 return 0;
112 }
113
114In the probe routine the driver first enable the MEI device and then registers
115an ME bus event handler which is as close as it can get to registering a
116threaded IRQ handler.
117The handler implementation will typically call some I/O routine depending on
118the pending events:
119
120#define MAX_NFC_PAYLOAD 128
121
122static void contact_event_cb(struct mei_cl_device *dev, u32 events,
123 void *context)
124{
125 struct contact_driver *contact = context;
126
127 if (events & BIT(MEI_EVENT_RX)) {
128 u8 payload[MAX_NFC_PAYLOAD];
129 int payload_size;
130
131 payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD);
132 if (payload_size <= 0)
133 return;
134
135 /* Hook to the NFC subsystem */
136 nfc_hci_recv_frame(contact->hdev, payload, payload_size);
137 }
138}
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
index f2a2488f1bf3..9573d0c48c6e 100644
--- a/Documentation/networking/ipvs-sysctl.txt
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -15,6 +15,13 @@ amemthresh - INTEGER
15 enabled and the variable is automatically set to 2, otherwise 15 enabled and the variable is automatically set to 2, otherwise
16 the strategy is disabled and the variable is set to 1. 16 the strategy is disabled and the variable is set to 1.
17 17
18backup_only - BOOLEAN
19 0 - disabled (default)
20 not 0 - enabled
21
22 If set, disable the director function while the server is
23 in backup mode to avoid packet loops for DR/TUN methods.
24
18conntrack - BOOLEAN 25conntrack - BOOLEAN
19 0 - disabled (default) 26 0 - disabled (default)
20 not 0 - enabled 27 not 0 - enabled
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
index c0aab985bad9..949d5dcdd9a3 100644
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@@ -105,6 +105,83 @@ Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
105 Proto [2 bytes] 105 Proto [2 bytes]
106 Raw protocol(IP, IPv6, etc) frame. 106 Raw protocol(IP, IPv6, etc) frame.
107 107
108 3.3 Multiqueue tuntap interface:
109
110 From version 3.8, Linux supports multiqueue tuntap which can uses multiple
111 file descriptors (queues) to parallelize packets sending or receiving. The
112 device allocation is the same as before, and if user wants to create multiple
113 queues, TUNSETIFF with the same device name must be called many times with
114 IFF_MULTI_QUEUE flag.
115
116 char *dev should be the name of the device, queues is the number of queues to
117 be created, fds is used to store and return the file descriptors (queues)
118 created to the caller. Each file descriptor were served as the interface of a
119 queue which could be accessed by userspace.
120
121 #include <linux/if.h>
122 #include <linux/if_tun.h>
123
124 int tun_alloc_mq(char *dev, int queues, int *fds)
125 {
126 struct ifreq ifr;
127 int fd, err, i;
128
129 if (!dev)
130 return -1;
131
132 memset(&ifr, 0, sizeof(ifr));
133 /* Flags: IFF_TUN - TUN device (no Ethernet headers)
134 * IFF_TAP - TAP device
135 *
136 * IFF_NO_PI - Do not provide packet information
137 * IFF_MULTI_QUEUE - Create a queue of multiqueue device
138 */
139 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE;
140 strcpy(ifr.ifr_name, dev);
141
142 for (i = 0; i < queues; i++) {
143 if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
144 goto err;
145 err = ioctl(fd, TUNSETIFF, (void *)&ifr);
146 if (err) {
147 close(fd);
148 goto err;
149 }
150 fds[i] = fd;
151 }
152
153 return 0;
154 err:
155 for (--i; i >= 0; i--)
156 close(fds[i]);
157 return err;
158 }
159
160 A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When
161 calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when
162 calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were
163 enabled by default after it was created through TUNSETIFF.
164
165 fd is the file descriptor (queue) that we want to enable or disable, when
166 enable is true we enable it, otherwise we disable it
167
168 #include <linux/if.h>
169 #include <linux/if_tun.h>
170
171 int tun_set_queue(int fd, int enable)
172 {
173 struct ifreq ifr;
174
175 memset(&ifr, 0, sizeof(ifr));
176
177 if (enable)
178 ifr.ifr_flags = IFF_ATTACH_QUEUE;
179 else
180 ifr.ifr_flags = IFF_DETACH_QUEUE;
181
182 return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
183 }
184
108Universal TUN/TAP device driver Frequently Asked Question. 185Universal TUN/TAP device driver Frequently Asked Question.
109 186
1101. What platforms are supported by TUN/TAP driver ? 1871. What platforms are supported by TUN/TAP driver ?
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index a2b57e0a1db0..447fd4cd54ec 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -736,6 +736,13 @@ All the above functions are mandatory to implement for a pinmux driver.
736Pin control interaction with the GPIO subsystem 736Pin control interaction with the GPIO subsystem
737=============================================== 737===============================================
738 738
739Note that the following implies that the use case is to use a certain pin
740from the Linux kernel using the API in <linux/gpio.h> with gpio_request()
741and similar functions. There are cases where you may be using something
742that your datasheet calls "GPIO mode" but actually is just an electrical
743configuration for a certain device. See the section below named
744"GPIO mode pitfalls" for more details on this scenario.
745
739The public pinmux API contains two functions named pinctrl_request_gpio() 746The public pinmux API contains two functions named pinctrl_request_gpio()
740and pinctrl_free_gpio(). These two functions shall *ONLY* be called from 747and pinctrl_free_gpio(). These two functions shall *ONLY* be called from
741gpiolib-based drivers as part of their gpio_request() and 748gpiolib-based drivers as part of their gpio_request() and
@@ -774,6 +781,111 @@ obtain the function "gpioN" where "N" is the global GPIO pin number if no
774special GPIO-handler is registered. 781special GPIO-handler is registered.
775 782
776 783
784GPIO mode pitfalls
785==================
786
787Sometime the developer may be confused by a datasheet talking about a pin
788being possible to set into "GPIO mode". It appears that what hardware
789engineers mean with "GPIO mode" is not necessarily the use case that is
790implied in the kernel interface <linux/gpio.h>: a pin that you grab from
791kernel code and then either listen for input or drive high/low to
792assert/deassert some external line.
793
794Rather hardware engineers think that "GPIO mode" means that you can
795software-control a few electrical properties of the pin that you would
796not be able to control if the pin was in some other mode, such as muxed in
797for a device.
798
799Example: a pin is usually muxed in to be used as a UART TX line. But during
800system sleep, we need to put this pin into "GPIO mode" and ground it.
801
802If you make a 1-to-1 map to the GPIO subsystem for this pin, you may start
803to think that you need to come up with something real complex, that the
804pin shall be used for UART TX and GPIO at the same time, that you will grab
805a pin control handle and set it to a certain state to enable UART TX to be
806muxed in, then twist it over to GPIO mode and use gpio_direction_output()
807to drive it low during sleep, then mux it over to UART TX again when you
808wake up and maybe even gpio_request/gpio_free as part of this cycle. This
809all gets very complicated.
810
811The solution is to not think that what the datasheet calls "GPIO mode"
812has to be handled by the <linux/gpio.h> interface. Instead view this as
813a certain pin config setting. Look in e.g. <linux/pinctrl/pinconf-generic.h>
814and you find this in the documentation:
815
816 PIN_CONFIG_OUTPUT: this will configure the pin in output, use argument
817 1 to indicate high level, argument 0 to indicate low level.
818
819So it is perfectly possible to push a pin into "GPIO mode" and drive the
820line low as part of the usual pin control map. So for example your UART
821driver may look like this:
822
823#include <linux/pinctrl/consumer.h>
824
825struct pinctrl *pinctrl;
826struct pinctrl_state *pins_default;
827struct pinctrl_state *pins_sleep;
828
829pins_default = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_DEFAULT);
830pins_sleep = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_SLEEP);
831
832/* Normal mode */
833retval = pinctrl_select_state(pinctrl, pins_default);
834/* Sleep mode */
835retval = pinctrl_select_state(pinctrl, pins_sleep);
836
837And your machine configuration may look like this:
838--------------------------------------------------
839
840static unsigned long uart_default_mode[] = {
841 PIN_CONF_PACKED(PIN_CONFIG_DRIVE_PUSH_PULL, 0),
842};
843
844static unsigned long uart_sleep_mode[] = {
845 PIN_CONF_PACKED(PIN_CONFIG_OUTPUT, 0),
846};
847
848static struct pinctrl_map __initdata pinmap[] = {
849 PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo",
850 "u0_group", "u0"),
851 PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo",
852 "UART_TX_PIN", uart_default_mode),
853 PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo",
854 "u0_group", "gpio-mode"),
855 PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo",
856 "UART_TX_PIN", uart_sleep_mode),
857};
858
859foo_init(void) {
860 pinctrl_register_mappings(pinmap, ARRAY_SIZE(pinmap));
861}
862
863Here the pins we want to control are in the "u0_group" and there is some
864function called "u0" that can be enabled on this group of pins, and then
865everything is UART business as usual. But there is also some function
866named "gpio-mode" that can be mapped onto the same pins to move them into
867GPIO mode.
868
869This will give the desired effect without any bogus interaction with the
870GPIO subsystem. It is just an electrical configuration used by that device
871when going to sleep, it might imply that the pin is set into something the
872datasheet calls "GPIO mode" but that is not the point: it is still used
873by that UART device to control the pins that pertain to that very UART
874driver, putting them into modes needed by the UART. GPIO in the Linux
875kernel sense are just some 1-bit line, and is a different use case.
876
877How the registers are poked to attain the push/pull and output low
878configuration and the muxing of the "u0" or "gpio-mode" group onto these
879pins is a question for the driver.
880
881Some datasheets will be more helpful and refer to the "GPIO mode" as
882"low power mode" rather than anything to do with GPIO. This often means
883the same thing electrically speaking, but in this latter case the
884software engineers will usually quickly identify that this is some
885specific muxing/configuration rather than anything related to the GPIO
886API.
887
888
777Board/machine configuration 889Board/machine configuration
778================================== 890==================================
779 891
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index ae66f9b90a25..fcaf0b4efba2 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -143,7 +143,8 @@ Parameter: id: handle for debug log
143 143
144Return Value: none 144Return Value: none
145 145
146Description: frees memory for a debug log 146Description: frees memory for a debug log and removes all registered debug
147 views.
147 Must not be called within an interrupt handler 148 Must not be called within an interrupt handler
148 149
149--------------------------------------------------------------------------- 150---------------------------------------------------------------------------
diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx
index 27a91cf43d6d..5020b7b5a244 100644
--- a/Documentation/scsi/LICENSE.qla2xxx
+++ b/Documentation/scsi/LICENSE.qla2xxx
@@ -1,4 +1,4 @@
1Copyright (c) 2003-2012 QLogic Corporation 1Copyright (c) 2003-2013 QLogic Corporation
2QLogic Linux FC-FCoE Driver 2QLogic Linux FC-FCoE Driver
3 3
4This program includes a device driver for Linux 3.x. 4This program includes a device driver for Linux 3.x.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index ce6581c8ca26..95731a08f257 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -890,9 +890,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
890 enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) 890 enable_msi - Enable Message Signaled Interrupt (MSI) (default = off)
891 power_save - Automatic power-saving timeout (in second, 0 = 891 power_save - Automatic power-saving timeout (in second, 0 =
892 disable) 892 disable)
893 power_save_controller - Support runtime D3 of HD-audio controller 893 power_save_controller - Reset HD-audio controller in power-saving mode
894 (-1 = on for supported chip (default), false = off, 894 (default = on)
895 true = force to on even for unsupported hardware)
896 align_buffer_size - Force rounding of buffer/period sizes to multiples 895 align_buffer_size - Force rounding of buffer/period sizes to multiples
897 of 128 bytes. This is more efficient in terms of memory 896 of 128 bytes. This is more efficient in terms of memory
898 access but isn't required by the HDA spec and prevents 897 access but isn't required by the HDA spec and prevents
@@ -912,7 +911,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
912 models depending on the codec chip. The list of available models 911 models depending on the codec chip. The list of available models
913 is found in HD-Audio-Models.txt 912 is found in HD-Audio-Models.txt
914 913
915 The model name "genric" is treated as a special case. When this 914 The model name "generic" is treated as a special case. When this
916 model is given, the driver uses the generic codec parser without 915 model is given, the driver uses the generic codec parser without
917 "codec-patch". It's sometimes good for testing and debugging. 916 "codec-patch". It's sometimes good for testing and debugging.
918 917
diff --git a/Documentation/sound/alsa/seq_oss.html b/Documentation/sound/alsa/seq_oss.html
index d9776cf60c07..9663b45f6fde 100644
--- a/Documentation/sound/alsa/seq_oss.html
+++ b/Documentation/sound/alsa/seq_oss.html
@@ -285,7 +285,7 @@ sample data.
285<H4> 285<H4>
2867.2.4 Close Callback</H4> 2867.2.4 Close Callback</H4>
287The <TT>close</TT> callback is called when this device is closed by the 287The <TT>close</TT> callback is called when this device is closed by the
288applicaion. If any private data was allocated in open callback, it must 288application. If any private data was allocated in open callback, it must
289be released in the close callback. The deletion of ALSA port should be 289be released in the close callback. The deletion of ALSA port should be
290done here, too. This callback must not be NULL. 290done here, too. This callback must not be NULL.
291<H4> 291<H4>
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 078701fdbd4d..dcc75a9ed919 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -18,6 +18,7 @@ files can be found in mm/swap.c.
18 18
19Currently, these files are in /proc/sys/vm: 19Currently, these files are in /proc/sys/vm:
20 20
21- admin_reserve_kbytes
21- block_dump 22- block_dump
22- compact_memory 23- compact_memory
23- dirty_background_bytes 24- dirty_background_bytes
@@ -53,11 +54,41 @@ Currently, these files are in /proc/sys/vm:
53- percpu_pagelist_fraction 54- percpu_pagelist_fraction
54- stat_interval 55- stat_interval
55- swappiness 56- swappiness
57- user_reserve_kbytes
56- vfs_cache_pressure 58- vfs_cache_pressure
57- zone_reclaim_mode 59- zone_reclaim_mode
58 60
59============================================================== 61==============================================================
60 62
63admin_reserve_kbytes
64
65The amount of free memory in the system that should be reserved for users
66with the capability cap_sys_admin.
67
68admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
69
70That should provide enough for the admin to log in and kill a process,
71if necessary, under the default overcommit 'guess' mode.
72
73Systems running under overcommit 'never' should increase this to account
74for the full Virtual Memory Size of programs used to recover. Otherwise,
75root may not be able to log in to recover the system.
76
77How do you calculate a minimum useful reserve?
78
79sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
80
81For overcommit 'guess', we can sum resident set sizes (RSS).
82On x86_64 this is about 8MB.
83
84For overcommit 'never', we can take the max of their virtual sizes (VSZ)
85and add the sum of their RSS.
86On x86_64 this is about 128MB.
87
88Changing this takes effect whenever an application requests memory.
89
90==============================================================
91
61block_dump 92block_dump
62 93
63block_dump enables block I/O debugging when set to a nonzero value. More 94block_dump enables block I/O debugging when set to a nonzero value. More
@@ -542,6 +573,7 @@ memory until it actually runs out.
542 573
543When this flag is 2, the kernel uses a "never overcommit" 574When this flag is 2, the kernel uses a "never overcommit"
544policy that attempts to prevent any overcommit of memory. 575policy that attempts to prevent any overcommit of memory.
576Note that user_reserve_kbytes affects this policy.
545 577
546This feature can be very useful because there are a lot of 578This feature can be very useful because there are a lot of
547programs that malloc() huge amounts of memory "just-in-case" 579programs that malloc() huge amounts of memory "just-in-case"
@@ -645,6 +677,24 @@ The default value is 60.
645 677
646============================================================== 678==============================================================
647 679
680- user_reserve_kbytes
681
682When overcommit_memory is set to 2, "never overommit" mode, reserve
683min(3% of current process size, user_reserve_kbytes) of free memory.
684This is intended to prevent a user from starting a single memory hogging
685process, such that they cannot recover (kill the hog).
686
687user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
688
689If this is reduced to zero, then the user will be allowed to allocate
690all free memory with a single process, minus admin_reserve_kbytes.
691Any subsequent attempts to execute a command will result in
692"fork: Cannot allocate memory".
693
694Changing this takes effect whenever an application requests memory.
695
696==============================================================
697
648vfs_cache_pressure 698vfs_cache_pressure
649------------------ 699------------------
650 700
diff --git a/Documentation/this_cpu_ops.txt b/Documentation/this_cpu_ops.txt
new file mode 100644
index 000000000000..1a4ce7e3e05f
--- /dev/null
+++ b/Documentation/this_cpu_ops.txt
@@ -0,0 +1,205 @@
1this_cpu operations
2-------------------
3
4this_cpu operations are a way of optimizing access to per cpu
5variables associated with the *currently* executing processor through
6the use of segment registers (or a dedicated register where the cpu
7permanently stored the beginning of the per cpu area for a specific
8processor).
9
10The this_cpu operations add a per cpu variable offset to the processor
11specific percpu base and encode that operation in the instruction
12operating on the per cpu variable.
13
14This means there are no atomicity issues between the calculation of
15the offset and the operation on the data. Therefore it is not
16necessary to disable preempt or interrupts to ensure that the
17processor is not changed between the calculation of the address and
18the operation on the data.
19
20Read-modify-write operations are of particular interest. Frequently
21processors have special lower latency instructions that can operate
22without the typical synchronization overhead but still provide some
23sort of relaxed atomicity guarantee. The x86 for example can execute
24RMV (Read Modify Write) instructions like inc/dec/cmpxchg without the
25lock prefix and the associated latency penalty.
26
27Access to the variable without the lock prefix is not synchronized but
28synchronization is not necessary since we are dealing with per cpu
29data specific to the currently executing processor. Only the current
30processor should be accessing that variable and therefore there are no
31concurrency issues with other processors in the system.
32
33On x86 the fs: or the gs: segment registers contain the base of the
34per cpu area. It is then possible to simply use the segment override
35to relocate a per cpu relative address to the proper per cpu area for
36the processor. So the relocation to the per cpu base is encoded in the
37instruction via a segment register prefix.
38
39For example:
40
41 DEFINE_PER_CPU(int, x);
42 int z;
43
44 z = this_cpu_read(x);
45
46results in a single instruction
47
48 mov ax, gs:[x]
49
50instead of a sequence of calculation of the address and then a fetch
51from that address which occurs with the percpu operations. Before
52this_cpu_ops such sequence also required preempt disable/enable to
53prevent the kernel from moving the thread to a different processor
54while the calculation is performed.
55
56The main use of the this_cpu operations has been to optimize counter
57operations.
58
59 this_cpu_inc(x)
60
61results in the following single instruction (no lock prefix!)
62
63 inc gs:[x]
64
65instead of the following operations required if there is no segment
66register.
67
68 int *y;
69 int cpu;
70
71 cpu = get_cpu();
72 y = per_cpu_ptr(&x, cpu);
73 (*y)++;
74 put_cpu();
75
76Note that these operations can only be used on percpu data that is
77reserved for a specific processor. Without disabling preemption in the
78surrounding code this_cpu_inc() will only guarantee that one of the
79percpu counters is correctly incremented. However, there is no
80guarantee that the OS will not move the process directly before or
81after the this_cpu instruction is executed. In general this means that
82the value of the individual counters for each processor are
83meaningless. The sum of all the per cpu counters is the only value
84that is of interest.
85
86Per cpu variables are used for performance reasons. Bouncing cache
87lines can be avoided if multiple processors concurrently go through
88the same code paths. Since each processor has its own per cpu
89variables no concurrent cacheline updates take place. The price that
90has to be paid for this optimization is the need to add up the per cpu
91counters when the value of the counter is needed.
92
93
94Special operations:
95-------------------
96
97 y = this_cpu_ptr(&x)
98
99Takes the offset of a per cpu variable (&x !) and returns the address
100of the per cpu variable that belongs to the currently executing
101processor. this_cpu_ptr avoids multiple steps that the common
102get_cpu/put_cpu sequence requires. No processor number is
103available. Instead the offset of the local per cpu area is simply
104added to the percpu offset.
105
106
107
108Per cpu variables and offsets
109-----------------------------
110
111Per cpu variables have *offsets* to the beginning of the percpu
112area. They do not have addresses although they look like that in the
113code. Offsets cannot be directly dereferenced. The offset must be
114added to a base pointer of a percpu area of a processor in order to
115form a valid address.
116
117Therefore the use of x or &x outside of the context of per cpu
118operations is invalid and will generally be treated like a NULL
119pointer dereference.
120
121In the context of per cpu operations
122
123 x is a per cpu variable. Most this_cpu operations take a cpu
124 variable.
125
126 &x is the *offset* a per cpu variable. this_cpu_ptr() takes
127 the offset of a per cpu variable which makes this look a bit
128 strange.
129
130
131
132Operations on a field of a per cpu structure
133--------------------------------------------
134
135Let's say we have a percpu structure
136
137 struct s {
138 int n,m;
139 };
140
141 DEFINE_PER_CPU(struct s, p);
142
143
144Operations on these fields are straightforward
145
146 this_cpu_inc(p.m)
147
148 z = this_cpu_cmpxchg(p.m, 0, 1);
149
150
151If we have an offset to struct s:
152
153 struct s __percpu *ps = &p;
154
155 z = this_cpu_dec(ps->m);
156
157 z = this_cpu_inc_return(ps->n);
158
159
160The calculation of the pointer may require the use of this_cpu_ptr()
161if we do not make use of this_cpu ops later to manipulate fields:
162
163 struct s *pp;
164
165 pp = this_cpu_ptr(&p);
166
167 pp->m--;
168
169 z = pp->n++;
170
171
172Variants of this_cpu ops
173-------------------------
174
175this_cpu ops are interrupt safe. Some architecture do not support
176these per cpu local operations. In that case the operation must be
177replaced by code that disables interrupts, then does the operations
178that are guaranteed to be atomic and then reenable interrupts. Doing
179so is expensive. If there are other reasons why the scheduler cannot
180change the processor we are executing on then there is no reason to
181disable interrupts. For that purpose the __this_cpu operations are
182provided. For example.
183
184 __this_cpu_inc(x);
185
186Will increment x and will not fallback to code that disables
187interrupts on platforms that cannot accomplish atomicity through
188address relocation and a Read-Modify-Write operation in the same
189instruction.
190
191
192
193&this_cpu_ptr(pp)->n vs this_cpu_ptr(&pp->n)
194--------------------------------------------
195
196The first operation takes the offset and forms an address and then
197adds the offset of the n field.
198
199The second one first adds the two offsets and then does the
200relocation. IMHO the second form looks cleaner and has an easier time
201with (). The second form also is consistent with the way
202this_cpu_read() and friends are used.
203
204
205Christoph Lameter, April 3rd, 2013
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 53d6a3c51d87..bfe8c29b1f1d 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -8,6 +8,7 @@ Copyright 2008 Red Hat Inc.
8Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, 8Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton,
9 John Kacur, and David Teigland. 9 John Kacur, and David Teigland.
10Written for: 2.6.28-rc2 10Written for: 2.6.28-rc2
11Updated for: 3.10
11 12
12Introduction 13Introduction
13------------ 14------------
@@ -17,13 +18,16 @@ designers of systems to find what is going on inside the kernel.
17It can be used for debugging or analyzing latencies and 18It can be used for debugging or analyzing latencies and
18performance issues that take place outside of user-space. 19performance issues that take place outside of user-space.
19 20
20Although ftrace is the function tracer, it also includes an 21Although ftrace is typically considered the function tracer, it
21infrastructure that allows for other types of tracing. Some of 22is really a frame work of several assorted tracing utilities.
22the tracers that are currently in ftrace include a tracer to 23There's latency tracing to examine what occurs between interrupts
23trace context switches, the time it takes for a high priority 24disabled and enabled, as well as for preemption and from a time
24task to run after it was woken up, the time interrupts are 25a task is woken to the task is actually scheduled in.
25disabled, and more (ftrace allows for tracer plugins, which 26
26means that the list of tracers can always grow). 27One of the most common uses of ftrace is the event tracing.
28Through out the kernel is hundreds of static event points that
29can be enabled via the debugfs file system to see what is
30going on in certain parts of the kernel.
27 31
28 32
29Implementation Details 33Implementation Details
@@ -61,7 +65,7 @@ the extended "/sys/kernel/debug/tracing" path name.
61 65
62That's it! (assuming that you have ftrace configured into your kernel) 66That's it! (assuming that you have ftrace configured into your kernel)
63 67
64After mounting the debugfs, you can see a directory called 68After mounting debugfs, you can see a directory called
65"tracing". This directory contains the control and output files 69"tracing". This directory contains the control and output files
66of ftrace. Here is a list of some of the key files: 70of ftrace. Here is a list of some of the key files:
67 71
@@ -84,7 +88,9 @@ of ftrace. Here is a list of some of the key files:
84 88
85 This sets or displays whether writing to the trace 89 This sets or displays whether writing to the trace
86 ring buffer is enabled. Echo 0 into this file to disable 90 ring buffer is enabled. Echo 0 into this file to disable
87 the tracer or 1 to enable it. 91 the tracer or 1 to enable it. Note, this only disables
92 writing to the ring buffer, the tracing overhead may
93 still be occurring.
88 94
89 trace: 95 trace:
90 96
@@ -109,7 +115,15 @@ of ftrace. Here is a list of some of the key files:
109 115
110 This file lets the user control the amount of data 116 This file lets the user control the amount of data
111 that is displayed in one of the above output 117 that is displayed in one of the above output
112 files. 118 files. Options also exist to modify how a tracer
119 or events work (stack traces, timestamps, etc).
120
121 options:
122
123 This is a directory that has a file for every available
124 trace option (also in trace_options). Options may also be set
125 or cleared by writing a "1" or "0" respectively into the
126 corresponding file with the option name.
113 127
114 tracing_max_latency: 128 tracing_max_latency:
115 129
@@ -121,10 +135,17 @@ of ftrace. Here is a list of some of the key files:
121 latency is greater than the value in this 135 latency is greater than the value in this
122 file. (in microseconds) 136 file. (in microseconds)
123 137
138 tracing_thresh:
139
140 Some latency tracers will record a trace whenever the
141 latency is greater than the number in this file.
142 Only active when the file contains a number greater than 0.
143 (in microseconds)
144
124 buffer_size_kb: 145 buffer_size_kb:
125 146
126 This sets or displays the number of kilobytes each CPU 147 This sets or displays the number of kilobytes each CPU
127 buffer can hold. The tracer buffers are the same size 148 buffer holds. By default, the trace buffers are the same size
128 for each CPU. The displayed number is the size of the 149 for each CPU. The displayed number is the size of the
129 CPU buffer and not total size of all buffers. The 150 CPU buffer and not total size of all buffers. The
130 trace buffers are allocated in pages (blocks of memory 151 trace buffers are allocated in pages (blocks of memory
@@ -133,16 +154,30 @@ of ftrace. Here is a list of some of the key files:
133 than requested, the rest of the page will be used, 154 than requested, the rest of the page will be used,
134 making the actual allocation bigger than requested. 155 making the actual allocation bigger than requested.
135 ( Note, the size may not be a multiple of the page size 156 ( Note, the size may not be a multiple of the page size
136 due to buffer management overhead. ) 157 due to buffer management meta-data. )
137 158
138 This can only be updated when the current_tracer 159 buffer_total_size_kb:
139 is set to "nop". 160
161 This displays the total combined size of all the trace buffers.
162
163 free_buffer:
164
165 If a process is performing the tracing, and the ring buffer
166 should be shrunk "freed" when the process is finished, even
167 if it were to be killed by a signal, this file can be used
168 for that purpose. On close of this file, the ring buffer will
169 be resized to its minimum size. Having a process that is tracing
170 also open this file, when the process exits its file descriptor
171 for this file will be closed, and in doing so, the ring buffer
172 will be "freed".
173
174 It may also stop tracing if disable_on_free option is set.
140 175
141 tracing_cpumask: 176 tracing_cpumask:
142 177
143 This is a mask that lets the user only trace 178 This is a mask that lets the user only trace
144 on specified CPUS. The format is a hex string 179 on specified CPUs. The format is a hex string
145 representing the CPUS. 180 representing the CPUs.
146 181
147 set_ftrace_filter: 182 set_ftrace_filter:
148 183
@@ -183,6 +218,261 @@ of ftrace. Here is a list of some of the key files:
183 "set_ftrace_notrace". (See the section "dynamic ftrace" 218 "set_ftrace_notrace". (See the section "dynamic ftrace"
184 below for more details.) 219 below for more details.)
185 220
221 enabled_functions:
222
223 This file is more for debugging ftrace, but can also be useful
224 in seeing if any function has a callback attached to it.
225 Not only does the trace infrastructure use ftrace function
226 trace utility, but other subsystems might too. This file
227 displays all functions that have a callback attached to them
228 as well as the number of callbacks that have been attached.
229 Note, a callback may also call multiple functions which will
230 not be listed in this count.
231
232 If the callback registered to be traced by a function with
233 the "save regs" attribute (thus even more overhead), a 'R'
234 will be displayed on the same line as the function that
235 is returning registers.
236
237 function_profile_enabled:
238
239 When set it will enable all functions with either the function
240 tracer, or if enabled, the function graph tracer. It will
241 keep a histogram of the number of functions that were called
242 and if run with the function graph tracer, it will also keep
243 track of the time spent in those functions. The histogram
244 content can be displayed in the files:
245
246 trace_stats/function<cpu> ( function0, function1, etc).
247
248 trace_stats:
249
250 A directory that holds different tracing stats.
251
252 kprobe_events:
253
254 Enable dynamic trace points. See kprobetrace.txt.
255
256 kprobe_profile:
257
258 Dynamic trace points stats. See kprobetrace.txt.
259
260 max_graph_depth:
261
262 Used with the function graph tracer. This is the max depth
263 it will trace into a function. Setting this to a value of
264 one will show only the first kernel function that is called
265 from user space.
266
267 printk_formats:
268
269 This is for tools that read the raw format files. If an event in
270 the ring buffer references a string (currently only trace_printk()
271 does this), only a pointer to the string is recorded into the buffer
272 and not the string itself. This prevents tools from knowing what
273 that string was. This file displays the string and address for
274 the string allowing tools to map the pointers to what the
275 strings were.
276
277 saved_cmdlines:
278
279 Only the pid of the task is recorded in a trace event unless
280 the event specifically saves the task comm as well. Ftrace
281 makes a cache of pid mappings to comms to try to display
282 comms for events. If a pid for a comm is not listed, then
283 "<...>" is displayed in the output.
284
285 snapshot:
286
287 This displays the "snapshot" buffer and also lets the user
288 take a snapshot of the current running trace.
289 See the "Snapshot" section below for more details.
290
291 stack_max_size:
292
293 When the stack tracer is activated, this will display the
294 maximum stack size it has encountered.
295 See the "Stack Trace" section below.
296
297 stack_trace:
298
299 This displays the stack back trace of the largest stack
300 that was encountered when the stack tracer is activated.
301 See the "Stack Trace" section below.
302
303 stack_trace_filter:
304
305 This is similar to "set_ftrace_filter" but it limits what
306 functions the stack tracer will check.
307
308 trace_clock:
309
310 Whenever an event is recorded into the ring buffer, a
311 "timestamp" is added. This stamp comes from a specified
312 clock. By default, ftrace uses the "local" clock. This
313 clock is very fast and strictly per cpu, but on some
314 systems it may not be monotonic with respect to other
315 CPUs. In other words, the local clocks may not be in sync
316 with local clocks on other CPUs.
317
318 Usual clocks for tracing:
319
320 # cat trace_clock
321 [local] global counter x86-tsc
322
323 local: Default clock, but may not be in sync across CPUs
324
325 global: This clock is in sync with all CPUs but may
326 be a bit slower than the local clock.
327
328 counter: This is not a clock at all, but literally an atomic
329 counter. It counts up one by one, but is in sync
330 with all CPUs. This is useful when you need to
331 know exactly the order events occurred with respect to
332 each other on different CPUs.
333
334 uptime: This uses the jiffies counter and the time stamp
335 is relative to the time since boot up.
336
337 perf: This makes ftrace use the same clock that perf uses.
338 Eventually perf will be able to read ftrace buffers
339 and this will help out in interleaving the data.
340
341 x86-tsc: Architectures may define their own clocks. For
342 example, x86 uses its own TSC cycle clock here.
343
344 To set a clock, simply echo the clock name into this file.
345
346 echo global > trace_clock
347
348 trace_marker:
349
350 This is a very useful file for synchronizing user space
351 with events happening in the kernel. Writing strings into
352 this file will be written into the ftrace buffer.
353
354 It is useful in applications to open this file at the start
355 of the application and just reference the file descriptor
356 for the file.
357
358 void trace_write(const char *fmt, ...)
359 {
360 va_list ap;
361 char buf[256];
362 int n;
363
364 if (trace_fd < 0)
365 return;
366
367 va_start(ap, fmt);
368 n = vsnprintf(buf, 256, fmt, ap);
369 va_end(ap);
370
371 write(trace_fd, buf, n);
372 }
373
374 start:
375
376 trace_fd = open("trace_marker", WR_ONLY);
377
378 uprobe_events:
379
380 Add dynamic tracepoints in programs.
381 See uprobetracer.txt
382
383 uprobe_profile:
384
385 Uprobe statistics. See uprobetrace.txt
386
387 instances:
388
389 This is a way to make multiple trace buffers where different
390 events can be recorded in different buffers.
391 See "Instances" section below.
392
393 events:
394
395 This is the trace event directory. It holds event tracepoints
396 (also known as static tracepoints) that have been compiled
397 into the kernel. It shows what event tracepoints exist
398 and how they are grouped by system. There are "enable"
399 files at various levels that can enable the tracepoints
400 when a "1" is written to them.
401
402 See events.txt for more information.
403
404 per_cpu:
405
406 This is a directory that contains the trace per_cpu information.
407
408 per_cpu/cpu0/buffer_size_kb:
409
410 The ftrace buffer is defined per_cpu. That is, there's a separate
411 buffer for each CPU to allow writes to be done atomically,
412 and free from cache bouncing. These buffers may have different
413 size buffers. This file is similar to the buffer_size_kb
414 file, but it only displays or sets the buffer size for the
415 specific CPU. (here cpu0).
416
417 per_cpu/cpu0/trace:
418
419 This is similar to the "trace" file, but it will only display
420 the data specific for the CPU. If written to, it only clears
421 the specific CPU buffer.
422
423 per_cpu/cpu0/trace_pipe
424
425 This is similar to the "trace_pipe" file, and is a consuming
426 read, but it will only display (and consume) the data specific
427 for the CPU.
428
429 per_cpu/cpu0/trace_pipe_raw
430
431 For tools that can parse the ftrace ring buffer binary format,
432 the trace_pipe_raw file can be used to extract the data
433 from the ring buffer directly. With the use of the splice()
434 system call, the buffer data can be quickly transferred to
435 a file or to the network where a server is collecting the
436 data.
437
438 Like trace_pipe, this is a consuming reader, where multiple
439 reads will always produce different data.
440
441 per_cpu/cpu0/snapshot:
442
443 This is similar to the main "snapshot" file, but will only
444 snapshot the current CPU (if supported). It only displays
445 the content of the snapshot for a given CPU, and if
446 written to, only clears this CPU buffer.
447
448 per_cpu/cpu0/snapshot_raw:
449
450 Similar to the trace_pipe_raw, but will read the binary format
451 from the snapshot buffer for the given CPU.
452
453 per_cpu/cpu0/stats:
454
455 This displays certain stats about the ring buffer:
456
457 entries: The number of events that are still in the buffer.
458
459 overrun: The number of lost events due to overwriting when
460 the buffer was full.
461
462 commit overrun: Should always be zero.
463 This gets set if so many events happened within a nested
464 event (ring buffer is re-entrant), that it fills the
465 buffer and starts dropping events.
466
467 bytes: Bytes actually read (not overwritten).
468
469 oldest event ts: The oldest timestamp in the buffer
470
471 now ts: The current timestamp
472
473 dropped events: Events lost due to overwrite option being off.
474
475 read events: The number of events read.
186 476
187The Tracers 477The Tracers
188----------- 478-----------
@@ -234,11 +524,6 @@ Here is the list of current tracers that may be configured.
234 RT tasks (as the current "wakeup" does). This is useful 524 RT tasks (as the current "wakeup" does). This is useful
235 for those interested in wake up timings of RT tasks. 525 for those interested in wake up timings of RT tasks.
236 526
237 "hw-branch-tracer"
238
239 Uses the BTS CPU feature on x86 CPUs to traces all
240 branches executed.
241
242 "nop" 527 "nop"
243 528
244 This is the "trace nothing" tracer. To remove all 529 This is the "trace nothing" tracer. To remove all
@@ -261,70 +546,100 @@ Here is an example of the output format of the file "trace"
261 -------- 546 --------
262# tracer: function 547# tracer: function
263# 548#
264# TASK-PID CPU# TIMESTAMP FUNCTION 549# entries-in-buffer/entries-written: 140080/250280 #P:4
265# | | | | | 550#
266 bash-4251 [01] 10152.583854: path_put <-path_walk 551# _-----=> irqs-off
267 bash-4251 [01] 10152.583855: dput <-path_put 552# / _----=> need-resched
268 bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput 553# | / _---=> hardirq/softirq
554# || / _--=> preempt-depth
555# ||| / delay
556# TASK-PID CPU# |||| TIMESTAMP FUNCTION
557# | | | |||| | |
558 bash-1977 [000] .... 17284.993652: sys_close <-system_call_fastpath
559 bash-1977 [000] .... 17284.993653: __close_fd <-sys_close
560 bash-1977 [000] .... 17284.993653: _raw_spin_lock <-__close_fd
561 sshd-1974 [003] .... 17284.993653: __srcu_read_unlock <-fsnotify
562 bash-1977 [000] .... 17284.993654: add_preempt_count <-_raw_spin_lock
563 bash-1977 [000] ...1 17284.993655: _raw_spin_unlock <-__close_fd
564 bash-1977 [000] ...1 17284.993656: sub_preempt_count <-_raw_spin_unlock
565 bash-1977 [000] .... 17284.993657: filp_close <-__close_fd
566 bash-1977 [000] .... 17284.993657: dnotify_flush <-filp_close
567 sshd-1974 [003] .... 17284.993658: sys_select <-system_call_fastpath
269 -------- 568 --------
270 569
271A header is printed with the tracer name that is represented by 570A header is printed with the tracer name that is represented by
272the trace. In this case the tracer is "function". Then a header 571the trace. In this case the tracer is "function". Then it shows the
273showing the format. Task name "bash", the task PID "4251", the 572number of events in the buffer as well as the total number of entries
274CPU that it was running on "01", the timestamp in <secs>.<usecs> 573that were written. The difference is the number of entries that were
275format, the function name that was traced "path_put" and the 574lost due to the buffer filling up (250280 - 140080 = 110200 events
276parent function that called this function "path_walk". The 575lost).
277timestamp is the time at which the function was entered. 576
577The header explains the content of the events. Task name "bash", the task
578PID "1977", the CPU that it was running on "000", the latency format
579(explained below), the timestamp in <secs>.<usecs> format, the
580function name that was traced "sys_close" and the parent function that
581called this function "system_call_fastpath". The timestamp is the time
582at which the function was entered.
278 583
279Latency trace format 584Latency trace format
280-------------------- 585--------------------
281 586
282When the latency-format option is enabled, the trace file gives 587When the latency-format option is enabled or when one of the latency
283somewhat more information to see why a latency happened. 588tracers is set, the trace file gives somewhat more information to see
284Here is a typical trace. 589why a latency happened. Here is a typical trace.
285 590
286# tracer: irqsoff 591# tracer: irqsoff
287# 592#
288irqsoff latency trace v1.1.5 on 2.6.26-rc8 593# irqsoff latency trace v1.1.5 on 3.8.0-test+
289-------------------------------------------------------------------- 594# --------------------------------------------------------------------
290 latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 595# latency: 259 us, #4/4, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
291 ----------------- 596# -----------------
292 | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) 597# | task: ps-6143 (uid:0 nice:0 policy:0 rt_prio:0)
293 ----------------- 598# -----------------
294 => started at: apic_timer_interrupt 599# => started at: __lock_task_sighand
295 => ended at: do_softirq 600# => ended at: _raw_spin_unlock_irqrestore
296 601#
297# _------=> CPU# 602#
298# / _-----=> irqs-off 603# _------=> CPU#
299# | / _----=> need-resched 604# / _-----=> irqs-off
300# || / _---=> hardirq/softirq 605# | / _----=> need-resched
301# ||| / _--=> preempt-depth 606# || / _---=> hardirq/softirq
302# |||| / 607# ||| / _--=> preempt-depth
303# ||||| delay 608# |||| / delay
304# cmd pid ||||| time | caller 609# cmd pid ||||| time | caller
305# \ / ||||| \ | / 610# \ / ||||| \ | /
306 <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) 611 ps-6143 2d... 0us!: trace_hardirqs_off <-__lock_task_sighand
307 <idle>-0 0d.s. 97us : __do_softirq (do_softirq) 612 ps-6143 2d..1 259us+: trace_hardirqs_on <-_raw_spin_unlock_irqrestore
308 <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) 613 ps-6143 2d..1 263us+: time_hardirqs_on <-_raw_spin_unlock_irqrestore
614 ps-6143 2d..1 306us : <stack trace>
615 => trace_hardirqs_on_caller
616 => trace_hardirqs_on
617 => _raw_spin_unlock_irqrestore
618 => do_task_stat
619 => proc_tgid_stat
620 => proc_single_show
621 => seq_read
622 => vfs_read
623 => sys_read
624 => system_call_fastpath
309 625
310 626
311This shows that the current tracer is "irqsoff" tracing the time 627This shows that the current tracer is "irqsoff" tracing the time
312for which interrupts were disabled. It gives the trace version 628for which interrupts were disabled. It gives the trace version (which
313and the version of the kernel upon which this was executed on 629never changes) and the version of the kernel upon which this was executed on
314(2.6.26-rc8). Then it displays the max latency in microsecs (97 630(3.10). Then it displays the max latency in microseconds (259 us). The number
315us). The number of trace entries displayed and the total number 631of trace entries displayed and the total number (both are four: #4/4).
316recorded (both are three: #3/3). The type of preemption that was 632VP, KP, SP, and HP are always zero and are reserved for later use.
317used (PREEMPT). VP, KP, SP, and HP are always zero and are 633#P is the number of online CPUs (#P:4).
318reserved for later use. #P is the number of online CPUS (#P:2).
319 634
320The task is the process that was running when the latency 635The task is the process that was running when the latency
321occurred. (swapper pid: 0). 636occurred. (ps pid: 6143).
322 637
323The start and stop (the functions in which the interrupts were 638The start and stop (the functions in which the interrupts were
324disabled and enabled respectively) that caused the latencies: 639disabled and enabled respectively) that caused the latencies:
325 640
326 apic_timer_interrupt is where the interrupts were disabled. 641 __lock_task_sighand is where the interrupts were disabled.
327 do_softirq is where they were enabled again. 642 _raw_spin_unlock_irqrestore is where they were enabled again.
328 643
329The next lines after the header are the trace itself. The header 644The next lines after the header are the trace itself. The header
330explains which is which. 645explains which is which.
@@ -367,16 +682,43 @@ The above is mostly meaningful for kernel developers.
367 682
368 The rest is the same as the 'trace' file. 683 The rest is the same as the 'trace' file.
369 684
685 Note, the latency tracers will usually end with a back trace
686 to easily find where the latency occurred.
370 687
371trace_options 688trace_options
372------------- 689-------------
373 690
374The trace_options file is used to control what gets printed in 691The trace_options file (or the options directory) is used to control
375the trace output. To see what is available, simply cat the file: 692what gets printed in the trace output, or manipulate the tracers.
693To see what is available, simply cat the file:
376 694
377 cat trace_options 695 cat trace_options
378 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ 696print-parent
379 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj 697nosym-offset
698nosym-addr
699noverbose
700noraw
701nohex
702nobin
703noblock
704nostacktrace
705trace_printk
706noftrace_preempt
707nobranch
708annotate
709nouserstacktrace
710nosym-userobj
711noprintk-msg-only
712context-info
713latency-format
714sleep-time
715graph-time
716record-cmd
717overwrite
718nodisable_on_free
719irq-info
720markers
721function-trace
380 722
381To disable one of the options, echo in the option prepended with 723To disable one of the options, echo in the option prepended with
382"no". 724"no".
@@ -428,13 +770,34 @@ Here are the available options:
428 770
429 bin - This will print out the formats in raw binary. 771 bin - This will print out the formats in raw binary.
430 772
431 block - TBD (needs update) 773 block - When set, reading trace_pipe will not block when polled.
432 774
433 stacktrace - This is one of the options that changes the trace 775 stacktrace - This is one of the options that changes the trace
434 itself. When a trace is recorded, so is the stack 776 itself. When a trace is recorded, so is the stack
435 of functions. This allows for back traces of 777 of functions. This allows for back traces of
436 trace sites. 778 trace sites.
437 779
780 trace_printk - Can disable trace_printk() from writing into the buffer.
781
782 branch - Enable branch tracing with the tracer.
783
784 annotate - It is sometimes confusing when the CPU buffers are full
785 and one CPU buffer had a lot of events recently, thus
786 a shorter time frame, were another CPU may have only had
787 a few events, which lets it have older events. When
788 the trace is reported, it shows the oldest events first,
789 and it may look like only one CPU ran (the one with the
790 oldest events). When the annotate option is set, it will
791 display when a new CPU buffer started:
792
793 <idle>-0 [001] dNs4 21169.031481: wake_up_idle_cpu <-add_timer_on
794 <idle>-0 [001] dNs4 21169.031482: _raw_spin_unlock_irqrestore <-add_timer_on
795 <idle>-0 [001] .Ns4 21169.031484: sub_preempt_count <-_raw_spin_unlock_irqrestore
796##### CPU 2 buffer started ####
797 <idle>-0 [002] .N.1 21169.031484: rcu_idle_exit <-cpu_idle
798 <idle>-0 [001] .Ns3 21169.031484: _raw_spin_unlock <-clocksource_watchdog
799 <idle>-0 [001] .Ns3 21169.031485: sub_preempt_count <-_raw_spin_unlock
800
438 userstacktrace - This option changes the trace. It records a 801 userstacktrace - This option changes the trace. It records a
439 stacktrace of the current userspace thread. 802 stacktrace of the current userspace thread.
440 803
@@ -451,9 +814,13 @@ Here are the available options:
451 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 814 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
452x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] 815x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
453 816
454 sched-tree - trace all tasks that are on the runqueue, at 817
455 every scheduling event. Will add overhead if 818 printk-msg-only - When set, trace_printk()s will only show the format
456 there's a lot of tasks running at once. 819 and not their parameters (if trace_bprintk() or
820 trace_bputs() was used to save the trace_printk()).
821
822 context-info - Show only the event data. Hides the comm, PID,
823 timestamp, CPU, and other useful data.
457 824
458 latency-format - This option changes the trace. When 825 latency-format - This option changes the trace. When
459 it is enabled, the trace displays 826 it is enabled, the trace displays
@@ -461,31 +828,61 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
461 latencies, as described in "Latency 828 latencies, as described in "Latency
462 trace format". 829 trace format".
463 830
831 sleep-time - When running function graph tracer, to include
832 the time a task schedules out in its function.
833 When enabled, it will account time the task has been
834 scheduled out as part of the function call.
835
836 graph-time - When running function graph tracer, to include the
837 time to call nested functions. When this is not set,
838 the time reported for the function will only include
839 the time the function itself executed for, not the time
840 for functions that it called.
841
842 record-cmd - When any event or tracer is enabled, a hook is enabled
843 in the sched_switch trace point to fill comm cache
844 with mapped pids and comms. But this may cause some
845 overhead, and if you only care about pids, and not the
846 name of the task, disabling this option can lower the
847 impact of tracing.
848
464 overwrite - This controls what happens when the trace buffer is 849 overwrite - This controls what happens when the trace buffer is
465 full. If "1" (default), the oldest events are 850 full. If "1" (default), the oldest events are
466 discarded and overwritten. If "0", then the newest 851 discarded and overwritten. If "0", then the newest
467 events are discarded. 852 events are discarded.
853 (see per_cpu/cpu0/stats for overrun and dropped)
468 854
469ftrace_enabled 855 disable_on_free - When the free_buffer is closed, tracing will
470-------------- 856 stop (tracing_on set to 0).
471 857
472The following tracers (listed below) give different output 858 irq-info - Shows the interrupt, preempt count, need resched data.
473depending on whether or not the sysctl ftrace_enabled is set. To 859 When disabled, the trace looks like:
474set ftrace_enabled, one can either use the sysctl function or
475set it via the proc file system interface.
476 860
477 sysctl kernel.ftrace_enabled=1 861# tracer: function
862#
863# entries-in-buffer/entries-written: 144405/9452052 #P:4
864#
865# TASK-PID CPU# TIMESTAMP FUNCTION
866# | | | | |
867 <idle>-0 [002] 23636.756054: ttwu_do_activate.constprop.89 <-try_to_wake_up
868 <idle>-0 [002] 23636.756054: activate_task <-ttwu_do_activate.constprop.89
869 <idle>-0 [002] 23636.756055: enqueue_task <-activate_task
478 870
479 or
480 871
481 echo 1 > /proc/sys/kernel/ftrace_enabled 872 markers - When set, the trace_marker is writable (only by root).
873 When disabled, the trace_marker will error with EINVAL
874 on write.
875
876
877 function-trace - The latency tracers will enable function tracing
878 if this option is enabled (default it is). When
879 it is disabled, the latency tracers do not trace
880 functions. This keeps the overhead of the tracer down
881 when performing latency tests.
482 882
483To disable ftrace_enabled simply replace the '1' with '0' in the 883 Note: Some tracers have their own options. They only appear
484above commands. 884 when the tracer is active.
485 885
486When ftrace_enabled is set the tracers will also record the
487functions that are within the trace. The descriptions of the
488tracers will also show an example with ftrace enabled.
489 886
490 887
491irqsoff 888irqsoff
@@ -506,95 +903,133 @@ new trace is saved.
506To reset the maximum, echo 0 into tracing_max_latency. Here is 903To reset the maximum, echo 0 into tracing_max_latency. Here is
507an example: 904an example:
508 905
906 # echo 0 > options/function-trace
509 # echo irqsoff > current_tracer 907 # echo irqsoff > current_tracer
510 # echo latency-format > trace_options
511 # echo 0 > tracing_max_latency
512 # echo 1 > tracing_on 908 # echo 1 > tracing_on
909 # echo 0 > tracing_max_latency
513 # ls -ltr 910 # ls -ltr
514 [...] 911 [...]
515 # echo 0 > tracing_on 912 # echo 0 > tracing_on
516 # cat trace 913 # cat trace
517# tracer: irqsoff 914# tracer: irqsoff
518# 915#
519irqsoff latency trace v1.1.5 on 2.6.26 916# irqsoff latency trace v1.1.5 on 3.8.0-test+
520-------------------------------------------------------------------- 917# --------------------------------------------------------------------
521 latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 918# latency: 16 us, #4/4, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
522 ----------------- 919# -----------------
523 | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0) 920# | task: swapper/0-0 (uid:0 nice:0 policy:0 rt_prio:0)
524 ----------------- 921# -----------------
525 => started at: sys_setpgid 922# => started at: run_timer_softirq
526 => ended at: sys_setpgid 923# => ended at: run_timer_softirq
527 924#
528# _------=> CPU# 925#
529# / _-----=> irqs-off 926# _------=> CPU#
530# | / _----=> need-resched 927# / _-----=> irqs-off
531# || / _---=> hardirq/softirq 928# | / _----=> need-resched
532# ||| / _--=> preempt-depth 929# || / _---=> hardirq/softirq
533# |||| / 930# ||| / _--=> preempt-depth
534# ||||| delay 931# |||| / delay
535# cmd pid ||||| time | caller 932# cmd pid ||||| time | caller
536# \ / ||||| \ | / 933# \ / ||||| \ | /
537 bash-3730 1d... 0us : _write_lock_irq (sys_setpgid) 934 <idle>-0 0d.s2 0us+: _raw_spin_lock_irq <-run_timer_softirq
538 bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid) 935 <idle>-0 0dNs3 17us : _raw_spin_unlock_irq <-run_timer_softirq
539 bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid) 936 <idle>-0 0dNs3 17us+: trace_hardirqs_on <-run_timer_softirq
540 937 <idle>-0 0dNs3 25us : <stack trace>
541 938 => _raw_spin_unlock_irq
542Here we see that that we had a latency of 12 microsecs (which is 939 => run_timer_softirq
543very good). The _write_lock_irq in sys_setpgid disabled 940 => __do_softirq
544interrupts. The difference between the 12 and the displayed 941 => call_softirq
545timestamp 14us occurred because the clock was incremented 942 => do_softirq
943 => irq_exit
944 => smp_apic_timer_interrupt
945 => apic_timer_interrupt
946 => rcu_idle_exit
947 => cpu_idle
948 => rest_init
949 => start_kernel
950 => x86_64_start_reservations
951 => x86_64_start_kernel
952
953Here we see that that we had a latency of 16 microseconds (which is
954very good). The _raw_spin_lock_irq in run_timer_softirq disabled
955interrupts. The difference between the 16 and the displayed
956timestamp 25us occurred because the clock was incremented
546between the time of recording the max latency and the time of 957between the time of recording the max latency and the time of
547recording the function that had that latency. 958recording the function that had that latency.
548 959
549Note the above example had ftrace_enabled not set. If we set the 960Note the above example had function-trace not set. If we set
550ftrace_enabled, we get a much larger output: 961function-trace, we get a much larger output:
962
963 with echo 1 > options/function-trace
551 964
552# tracer: irqsoff 965# tracer: irqsoff
553# 966#
554irqsoff latency trace v1.1.5 on 2.6.26-rc8 967# irqsoff latency trace v1.1.5 on 3.8.0-test+
555-------------------------------------------------------------------- 968# --------------------------------------------------------------------
556 latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 969# latency: 71 us, #168/168, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
557 ----------------- 970# -----------------
558 | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) 971# | task: bash-2042 (uid:0 nice:0 policy:0 rt_prio:0)
559 ----------------- 972# -----------------
560 => started at: __alloc_pages_internal 973# => started at: ata_scsi_queuecmd
561 => ended at: __alloc_pages_internal 974# => ended at: ata_scsi_queuecmd
562 975#
563# _------=> CPU# 976#
564# / _-----=> irqs-off 977# _------=> CPU#
565# | / _----=> need-resched 978# / _-----=> irqs-off
566# || / _---=> hardirq/softirq 979# | / _----=> need-resched
567# ||| / _--=> preempt-depth 980# || / _---=> hardirq/softirq
568# |||| / 981# ||| / _--=> preempt-depth
569# ||||| delay 982# |||| / delay
570# cmd pid ||||| time | caller 983# cmd pid ||||| time | caller
571# \ / ||||| \ | / 984# \ / ||||| \ | /
572 ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) 985 bash-2042 3d... 0us : _raw_spin_lock_irqsave <-ata_scsi_queuecmd
573 ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) 986 bash-2042 3d... 0us : add_preempt_count <-_raw_spin_lock_irqsave
574 ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) 987 bash-2042 3d..1 1us : ata_scsi_find_dev <-ata_scsi_queuecmd
575 ls-4339 0d..1 4us : add_preempt_count (_spin_lock) 988 bash-2042 3d..1 1us : __ata_scsi_find_dev <-ata_scsi_find_dev
576 ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) 989 bash-2042 3d..1 2us : ata_find_dev.part.14 <-__ata_scsi_find_dev
577 ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) 990 bash-2042 3d..1 2us : ata_qc_new_init <-__ata_scsi_queuecmd
578 ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) 991 bash-2042 3d..1 3us : ata_sg_init <-__ata_scsi_queuecmd
579 ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) 992 bash-2042 3d..1 4us : ata_scsi_rw_xlat <-__ata_scsi_queuecmd
580 ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) 993 bash-2042 3d..1 4us : ata_build_rw_tf <-ata_scsi_rw_xlat
581 ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest)
582 ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk)
583 ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue)
584[...] 994[...]
585 ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) 995 bash-2042 3d..1 67us : delay_tsc <-__delay
586 ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) 996 bash-2042 3d..1 67us : add_preempt_count <-delay_tsc
587 ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) 997 bash-2042 3d..2 67us : sub_preempt_count <-delay_tsc
588 ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) 998 bash-2042 3d..1 67us : add_preempt_count <-delay_tsc
589 ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) 999 bash-2042 3d..2 68us : sub_preempt_count <-delay_tsc
590 ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) 1000 bash-2042 3d..1 68us+: ata_bmdma_start <-ata_bmdma_qc_issue
591 ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) 1001 bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd
592 ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) 1002 bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd
593 ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) 1003 bash-2042 3d..1 72us+: trace_hardirqs_on <-ata_scsi_queuecmd
594 1004 bash-2042 3d..1 120us : <stack trace>
595 1005 => _raw_spin_unlock_irqrestore
596 1006 => ata_scsi_queuecmd
597Here we traced a 50 microsecond latency. But we also see all the 1007 => scsi_dispatch_cmd
1008 => scsi_request_fn
1009 => __blk_run_queue_uncond
1010 => __blk_run_queue
1011 => blk_queue_bio
1012 => generic_make_request
1013 => submit_bio
1014 => submit_bh
1015 => __ext3_get_inode_loc
1016 => ext3_iget
1017 => ext3_lookup
1018 => lookup_real
1019 => __lookup_hash
1020 => walk_component
1021 => lookup_last
1022 => path_lookupat
1023 => filename_lookup
1024 => user_path_at_empty
1025 => user_path_at
1026 => vfs_fstatat
1027 => vfs_stat
1028 => sys_newstat
1029 => system_call_fastpath
1030
1031
1032Here we traced a 71 microsecond latency. But we also see all the
598functions that were called during that time. Note that by 1033functions that were called during that time. Note that by
599enabling function tracing, we incur an added overhead. This 1034enabling function tracing, we incur an added overhead. This
600overhead may extend the latency times. But nevertheless, this 1035overhead may extend the latency times. But nevertheless, this
@@ -614,120 +1049,122 @@ Like the irqsoff tracer, it records the maximum latency for
614which preemption was disabled. The control of preemptoff tracer 1049which preemption was disabled. The control of preemptoff tracer
615is much like the irqsoff tracer. 1050is much like the irqsoff tracer.
616 1051
1052 # echo 0 > options/function-trace
617 # echo preemptoff > current_tracer 1053 # echo preemptoff > current_tracer
618 # echo latency-format > trace_options
619 # echo 0 > tracing_max_latency
620 # echo 1 > tracing_on 1054 # echo 1 > tracing_on
1055 # echo 0 > tracing_max_latency
621 # ls -ltr 1056 # ls -ltr
622 [...] 1057 [...]
623 # echo 0 > tracing_on 1058 # echo 0 > tracing_on
624 # cat trace 1059 # cat trace
625# tracer: preemptoff 1060# tracer: preemptoff
626# 1061#
627preemptoff latency trace v1.1.5 on 2.6.26-rc8 1062# preemptoff latency trace v1.1.5 on 3.8.0-test+
628-------------------------------------------------------------------- 1063# --------------------------------------------------------------------
629 latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 1064# latency: 46 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
630 ----------------- 1065# -----------------
631 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) 1066# | task: sshd-1991 (uid:0 nice:0 policy:0 rt_prio:0)
632 ----------------- 1067# -----------------
633 => started at: do_IRQ 1068# => started at: do_IRQ
634 => ended at: __do_softirq 1069# => ended at: do_IRQ
635 1070#
636# _------=> CPU# 1071#
637# / _-----=> irqs-off 1072# _------=> CPU#
638# | / _----=> need-resched 1073# / _-----=> irqs-off
639# || / _---=> hardirq/softirq 1074# | / _----=> need-resched
640# ||| / _--=> preempt-depth 1075# || / _---=> hardirq/softirq
641# |||| / 1076# ||| / _--=> preempt-depth
642# ||||| delay 1077# |||| / delay
643# cmd pid ||||| time | caller 1078# cmd pid ||||| time | caller
644# \ / ||||| \ | / 1079# \ / ||||| \ | /
645 sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) 1080 sshd-1991 1d.h. 0us+: irq_enter <-do_IRQ
646 sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) 1081 sshd-1991 1d..1 46us : irq_exit <-do_IRQ
647 sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) 1082 sshd-1991 1d..1 47us+: trace_preempt_on <-do_IRQ
1083 sshd-1991 1d..1 52us : <stack trace>
1084 => sub_preempt_count
1085 => irq_exit
1086 => do_IRQ
1087 => ret_from_intr
648 1088
649 1089
650This has some more changes. Preemption was disabled when an 1090This has some more changes. Preemption was disabled when an
651interrupt came in (notice the 'h'), and was enabled while doing 1091interrupt came in (notice the 'h'), and was enabled on exit.
652a softirq. (notice the 's'). But we also see that interrupts 1092But we also see that interrupts have been disabled when entering
653have been disabled when entering the preempt off section and 1093the preempt off section and leaving it (the 'd'). We do not know if
654leaving it (the 'd'). We do not know if interrupts were enabled 1094interrupts were enabled in the mean time or shortly after this
655in the mean time. 1095was over.
656 1096
657# tracer: preemptoff 1097# tracer: preemptoff
658# 1098#
659preemptoff latency trace v1.1.5 on 2.6.26-rc8 1099# preemptoff latency trace v1.1.5 on 3.8.0-test+
660-------------------------------------------------------------------- 1100# --------------------------------------------------------------------
661 latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 1101# latency: 83 us, #241/241, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
662 ----------------- 1102# -----------------
663 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) 1103# | task: bash-1994 (uid:0 nice:0 policy:0 rt_prio:0)
664 ----------------- 1104# -----------------
665 => started at: remove_wait_queue 1105# => started at: wake_up_new_task
666 => ended at: __do_softirq 1106# => ended at: task_rq_unlock
667 1107#
668# _------=> CPU# 1108#
669# / _-----=> irqs-off 1109# _------=> CPU#
670# | / _----=> need-resched 1110# / _-----=> irqs-off
671# || / _---=> hardirq/softirq 1111# | / _----=> need-resched
672# ||| / _--=> preempt-depth 1112# || / _---=> hardirq/softirq
673# |||| / 1113# ||| / _--=> preempt-depth
674# ||||| delay 1114# |||| / delay
675# cmd pid ||||| time | caller 1115# cmd pid ||||| time | caller
676# \ / ||||| \ | / 1116# \ / ||||| \ | /
677 sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) 1117 bash-1994 1d..1 0us : _raw_spin_lock_irqsave <-wake_up_new_task
678 sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) 1118 bash-1994 1d..1 0us : select_task_rq_fair <-select_task_rq
679 sshd-4261 0d..1 2us : do_IRQ (common_interrupt) 1119 bash-1994 1d..1 1us : __rcu_read_lock <-select_task_rq_fair
680 sshd-4261 0d..1 2us : irq_enter (do_IRQ) 1120 bash-1994 1d..1 1us : source_load <-select_task_rq_fair
681 sshd-4261 0d..1 2us : idle_cpu (irq_enter) 1121 bash-1994 1d..1 1us : source_load <-select_task_rq_fair
682 sshd-4261 0d..1 3us : add_preempt_count (irq_enter)
683 sshd-4261 0d.h1 3us : idle_cpu (irq_enter)
684 sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ)
685[...] 1122[...]
686 sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) 1123 bash-1994 1d..1 12us : irq_enter <-smp_apic_timer_interrupt
687 sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) 1124 bash-1994 1d..1 12us : rcu_irq_enter <-irq_enter
688 sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) 1125 bash-1994 1d..1 13us : add_preempt_count <-irq_enter
689 sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) 1126 bash-1994 1d.h1 13us : exit_idle <-smp_apic_timer_interrupt
690 sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) 1127 bash-1994 1d.h1 13us : hrtimer_interrupt <-smp_apic_timer_interrupt
691 sshd-4261 0d.h1 14us : irq_exit (do_IRQ) 1128 bash-1994 1d.h1 13us : _raw_spin_lock <-hrtimer_interrupt
692 sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) 1129 bash-1994 1d.h1 14us : add_preempt_count <-_raw_spin_lock
693 sshd-4261 0d..2 15us : do_softirq (irq_exit) 1130 bash-1994 1d.h2 14us : ktime_get_update_offsets <-hrtimer_interrupt
694 sshd-4261 0d... 15us : __do_softirq (do_softirq)
695 sshd-4261 0d... 16us : __local_bh_disable (__do_softirq)
696 sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable)
697 sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable)
698 sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable)
699 sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable)
700[...] 1131[...]
701 sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) 1132 bash-1994 1d.h1 35us : lapic_next_event <-clockevents_program_event
702 sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) 1133 bash-1994 1d.h1 35us : irq_exit <-smp_apic_timer_interrupt
703 sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) 1134 bash-1994 1d.h1 36us : sub_preempt_count <-irq_exit
704 sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) 1135 bash-1994 1d..2 36us : do_softirq <-irq_exit
705 sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) 1136 bash-1994 1d..2 36us : __do_softirq <-call_softirq
706 sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) 1137 bash-1994 1d..2 36us : __local_bh_disable <-__do_softirq
707 sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) 1138 bash-1994 1d.s2 37us : add_preempt_count <-_raw_spin_lock_irq
708 sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) 1139 bash-1994 1d.s3 38us : _raw_spin_unlock <-run_timer_softirq
1140 bash-1994 1d.s3 39us : sub_preempt_count <-_raw_spin_unlock
1141 bash-1994 1d.s2 39us : call_timer_fn <-run_timer_softirq
709[...] 1142[...]
710 sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) 1143 bash-1994 1dNs2 81us : cpu_needs_another_gp <-rcu_process_callbacks
711 sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) 1144 bash-1994 1dNs2 82us : __local_bh_enable <-__do_softirq
1145 bash-1994 1dNs2 82us : sub_preempt_count <-__local_bh_enable
1146 bash-1994 1dN.2 82us : idle_cpu <-irq_exit
1147 bash-1994 1dN.2 83us : rcu_irq_exit <-irq_exit
1148 bash-1994 1dN.2 83us : sub_preempt_count <-irq_exit
1149 bash-1994 1.N.1 84us : _raw_spin_unlock_irqrestore <-task_rq_unlock
1150 bash-1994 1.N.1 84us+: trace_preempt_on <-task_rq_unlock
1151 bash-1994 1.N.1 104us : <stack trace>
1152 => sub_preempt_count
1153 => _raw_spin_unlock_irqrestore
1154 => task_rq_unlock
1155 => wake_up_new_task
1156 => do_fork
1157 => sys_clone
1158 => stub_clone
712 1159
713 1160
714The above is an example of the preemptoff trace with 1161The above is an example of the preemptoff trace with
715ftrace_enabled set. Here we see that interrupts were disabled 1162function-trace set. Here we see that interrupts were not disabled
716the entire time. The irq_enter code lets us know that we entered 1163the entire time. The irq_enter code lets us know that we entered
717an interrupt 'h'. Before that, the functions being traced still 1164an interrupt 'h'. Before that, the functions being traced still
718show that it is not in an interrupt, but we can see from the 1165show that it is not in an interrupt, but we can see from the
719functions themselves that this is not the case. 1166functions themselves that this is not the case.
720 1167
721Notice that __do_softirq when called does not have a
722preempt_count. It may seem that we missed a preempt enabling.
723What really happened is that the preempt count is held on the
724thread's stack and we switched to the softirq stack (4K stacks
725in effect). The code does not copy the preempt count, but
726because interrupts are disabled, we do not need to worry about
727it. Having a tracer like this is good for letting people know
728what really happens inside the kernel.
729
730
731preemptirqsoff 1168preemptirqsoff
732-------------- 1169--------------
733 1170
@@ -762,38 +1199,57 @@ tracer.
762Again, using this trace is much like the irqsoff and preemptoff 1199Again, using this trace is much like the irqsoff and preemptoff
763tracers. 1200tracers.
764 1201
1202 # echo 0 > options/function-trace
765 # echo preemptirqsoff > current_tracer 1203 # echo preemptirqsoff > current_tracer
766 # echo latency-format > trace_options
767 # echo 0 > tracing_max_latency
768 # echo 1 > tracing_on 1204 # echo 1 > tracing_on
1205 # echo 0 > tracing_max_latency
769 # ls -ltr 1206 # ls -ltr
770 [...] 1207 [...]
771 # echo 0 > tracing_on 1208 # echo 0 > tracing_on
772 # cat trace 1209 # cat trace
773# tracer: preemptirqsoff 1210# tracer: preemptirqsoff
774# 1211#
775preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 1212# preemptirqsoff latency trace v1.1.5 on 3.8.0-test+
776-------------------------------------------------------------------- 1213# --------------------------------------------------------------------
777 latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 1214# latency: 100 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
778 ----------------- 1215# -----------------
779 | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) 1216# | task: ls-2230 (uid:0 nice:0 policy:0 rt_prio:0)
780 ----------------- 1217# -----------------
781 => started at: apic_timer_interrupt 1218# => started at: ata_scsi_queuecmd
782 => ended at: __do_softirq 1219# => ended at: ata_scsi_queuecmd
783 1220#
784# _------=> CPU# 1221#
785# / _-----=> irqs-off 1222# _------=> CPU#
786# | / _----=> need-resched 1223# / _-----=> irqs-off
787# || / _---=> hardirq/softirq 1224# | / _----=> need-resched
788# ||| / _--=> preempt-depth 1225# || / _---=> hardirq/softirq
789# |||| / 1226# ||| / _--=> preempt-depth
790# ||||| delay 1227# |||| / delay
791# cmd pid ||||| time | caller 1228# cmd pid ||||| time | caller
792# \ / ||||| \ | / 1229# \ / ||||| \ | /
793 ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) 1230 ls-2230 3d... 0us+: _raw_spin_lock_irqsave <-ata_scsi_queuecmd
794 ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) 1231 ls-2230 3...1 100us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd
795 ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) 1232 ls-2230 3...1 101us+: trace_preempt_on <-ata_scsi_queuecmd
796 1233 ls-2230 3...1 111us : <stack trace>
1234 => sub_preempt_count
1235 => _raw_spin_unlock_irqrestore
1236 => ata_scsi_queuecmd
1237 => scsi_dispatch_cmd
1238 => scsi_request_fn
1239 => __blk_run_queue_uncond
1240 => __blk_run_queue
1241 => blk_queue_bio
1242 => generic_make_request
1243 => submit_bio
1244 => submit_bh
1245 => ext3_bread
1246 => ext3_dir_bread
1247 => htree_dirblock_to_tree
1248 => ext3_htree_fill_tree
1249 => ext3_readdir
1250 => vfs_readdir
1251 => sys_getdents
1252 => system_call_fastpath
797 1253
798 1254
799The trace_hardirqs_off_thunk is called from assembly on x86 when 1255The trace_hardirqs_off_thunk is called from assembly on x86 when
@@ -802,105 +1258,158 @@ function tracing, we do not know if interrupts were enabled
802within the preemption points. We do see that it started with 1258within the preemption points. We do see that it started with
803preemption enabled. 1259preemption enabled.
804 1260
805Here is a trace with ftrace_enabled set: 1261Here is a trace with function-trace set:
806
807 1262
808# tracer: preemptirqsoff 1263# tracer: preemptirqsoff
809# 1264#
810preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 1265# preemptirqsoff latency trace v1.1.5 on 3.8.0-test+
811-------------------------------------------------------------------- 1266# --------------------------------------------------------------------
812 latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 1267# latency: 161 us, #339/339, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
813 ----------------- 1268# -----------------
814 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) 1269# | task: ls-2269 (uid:0 nice:0 policy:0 rt_prio:0)
815 ----------------- 1270# -----------------
816 => started at: write_chan 1271# => started at: schedule
817 => ended at: __do_softirq 1272# => ended at: mutex_unlock
818 1273#
819# _------=> CPU# 1274#
820# / _-----=> irqs-off 1275# _------=> CPU#
821# | / _----=> need-resched 1276# / _-----=> irqs-off
822# || / _---=> hardirq/softirq 1277# | / _----=> need-resched
823# ||| / _--=> preempt-depth 1278# || / _---=> hardirq/softirq
824# |||| / 1279# ||| / _--=> preempt-depth
825# ||||| delay 1280# |||| / delay
826# cmd pid ||||| time | caller 1281# cmd pid ||||| time | caller
827# \ / ||||| \ | / 1282# \ / ||||| \ | /
828 ls-4473 0.N.. 0us : preempt_schedule (write_chan) 1283kworker/-59 3...1 0us : __schedule <-schedule
829 ls-4473 0dN.1 1us : _spin_lock (schedule) 1284kworker/-59 3d..1 0us : rcu_preempt_qs <-rcu_note_context_switch
830 ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) 1285kworker/-59 3d..1 1us : add_preempt_count <-_raw_spin_lock_irq
831 ls-4473 0d..2 2us : put_prev_task_fair (schedule) 1286kworker/-59 3d..2 1us : deactivate_task <-__schedule
832[...] 1287kworker/-59 3d..2 1us : dequeue_task <-deactivate_task
833 ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) 1288kworker/-59 3d..2 2us : update_rq_clock <-dequeue_task
834 ls-4473 0d..2 13us : __switch_to (schedule) 1289kworker/-59 3d..2 2us : dequeue_task_fair <-dequeue_task
835 sshd-4261 0d..2 14us : finish_task_switch (schedule) 1290kworker/-59 3d..2 2us : update_curr <-dequeue_task_fair
836 sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) 1291kworker/-59 3d..2 2us : update_min_vruntime <-update_curr
837 sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) 1292kworker/-59 3d..2 3us : cpuacct_charge <-update_curr
838 sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) 1293kworker/-59 3d..2 3us : __rcu_read_lock <-cpuacct_charge
839 sshd-4261 0d..2 16us : do_IRQ (common_interrupt) 1294kworker/-59 3d..2 3us : __rcu_read_unlock <-cpuacct_charge
840 sshd-4261 0d..2 17us : irq_enter (do_IRQ) 1295kworker/-59 3d..2 3us : update_cfs_rq_blocked_load <-dequeue_task_fair
841 sshd-4261 0d..2 17us : idle_cpu (irq_enter) 1296kworker/-59 3d..2 4us : clear_buddies <-dequeue_task_fair
842 sshd-4261 0d..2 18us : add_preempt_count (irq_enter) 1297kworker/-59 3d..2 4us : account_entity_dequeue <-dequeue_task_fair
843 sshd-4261 0d.h2 18us : idle_cpu (irq_enter) 1298kworker/-59 3d..2 4us : update_min_vruntime <-dequeue_task_fair
844 sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) 1299kworker/-59 3d..2 4us : update_cfs_shares <-dequeue_task_fair
845 sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) 1300kworker/-59 3d..2 5us : hrtick_update <-dequeue_task_fair
846 sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) 1301kworker/-59 3d..2 5us : wq_worker_sleeping <-__schedule
847 sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) 1302kworker/-59 3d..2 5us : kthread_data <-wq_worker_sleeping
848 sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) 1303kworker/-59 3d..2 5us : put_prev_task_fair <-__schedule
849[...] 1304kworker/-59 3d..2 6us : pick_next_task_fair <-pick_next_task
850 sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) 1305kworker/-59 3d..2 6us : clear_buddies <-pick_next_task_fair
851 sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) 1306kworker/-59 3d..2 6us : set_next_entity <-pick_next_task_fair
852 sshd-4261 0d.h2 29us : irq_exit (do_IRQ) 1307kworker/-59 3d..2 6us : update_stats_wait_end <-set_next_entity
853 sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) 1308 ls-2269 3d..2 7us : finish_task_switch <-__schedule
854 sshd-4261 0d..3 30us : do_softirq (irq_exit) 1309 ls-2269 3d..2 7us : _raw_spin_unlock_irq <-finish_task_switch
855 sshd-4261 0d... 30us : __do_softirq (do_softirq) 1310 ls-2269 3d..2 8us : do_IRQ <-ret_from_intr
856 sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) 1311 ls-2269 3d..2 8us : irq_enter <-do_IRQ
857 sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) 1312 ls-2269 3d..2 8us : rcu_irq_enter <-irq_enter
858 sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) 1313 ls-2269 3d..2 9us : add_preempt_count <-irq_enter
1314 ls-2269 3d.h2 9us : exit_idle <-do_IRQ
859[...] 1315[...]
860 sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) 1316 ls-2269 3d.h3 20us : sub_preempt_count <-_raw_spin_unlock
861 sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) 1317 ls-2269 3d.h2 20us : irq_exit <-do_IRQ
862 sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) 1318 ls-2269 3d.h2 21us : sub_preempt_count <-irq_exit
863 sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) 1319 ls-2269 3d..3 21us : do_softirq <-irq_exit
864 sshd-4261 0d.s3 45us : idle_cpu (irq_enter) 1320 ls-2269 3d..3 21us : __do_softirq <-call_softirq
865 sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) 1321 ls-2269 3d..3 21us+: __local_bh_disable <-__do_softirq
866 sshd-4261 0d.H3 46us : idle_cpu (irq_enter) 1322 ls-2269 3d.s4 29us : sub_preempt_count <-_local_bh_enable_ip
867 sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) 1323 ls-2269 3d.s5 29us : sub_preempt_count <-_local_bh_enable_ip
868 sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) 1324 ls-2269 3d.s5 31us : do_IRQ <-ret_from_intr
1325 ls-2269 3d.s5 31us : irq_enter <-do_IRQ
1326 ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter
869[...] 1327[...]
870 sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) 1328 ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter
871 sshd-4261 0d.H3 82us : ktime_get (tick_program_event) 1329 ls-2269 3d.s5 32us : add_preempt_count <-irq_enter
872 sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) 1330 ls-2269 3d.H5 32us : exit_idle <-do_IRQ
873 sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) 1331 ls-2269 3d.H5 32us : handle_irq <-do_IRQ
874 sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) 1332 ls-2269 3d.H5 32us : irq_to_desc <-handle_irq
875 sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) 1333 ls-2269 3d.H5 33us : handle_fasteoi_irq <-handle_irq
876 sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event)
877 sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt)
878 sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit)
879 sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit)
880 sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable)
881[...] 1334[...]
882 sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) 1335 ls-2269 3d.s5 158us : _raw_spin_unlock_irqrestore <-rtl8139_poll
883 sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) 1336 ls-2269 3d.s3 158us : net_rps_action_and_irq_enable.isra.65 <-net_rx_action
884 sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) 1337 ls-2269 3d.s3 159us : __local_bh_enable <-__do_softirq
885 sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) 1338 ls-2269 3d.s3 159us : sub_preempt_count <-__local_bh_enable
886 sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) 1339 ls-2269 3d..3 159us : idle_cpu <-irq_exit
887 sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) 1340 ls-2269 3d..3 159us : rcu_irq_exit <-irq_exit
888 sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) 1341 ls-2269 3d..3 160us : sub_preempt_count <-irq_exit
889 1342 ls-2269 3d... 161us : __mutex_unlock_slowpath <-mutex_unlock
890 1343 ls-2269 3d... 162us+: trace_hardirqs_on <-mutex_unlock
891This is a very interesting trace. It started with the preemption 1344 ls-2269 3d... 186us : <stack trace>
892of the ls task. We see that the task had the "need_resched" bit 1345 => __mutex_unlock_slowpath
893set via the 'N' in the trace. Interrupts were disabled before 1346 => mutex_unlock
894the spin_lock at the beginning of the trace. We see that a 1347 => process_output
895schedule took place to run sshd. When the interrupts were 1348 => n_tty_write
896enabled, we took an interrupt. On return from the interrupt 1349 => tty_write
897handler, the softirq ran. We took another interrupt while 1350 => vfs_write
898running the softirq as we see from the capital 'H'. 1351 => sys_write
1352 => system_call_fastpath
1353
1354This is an interesting trace. It started with kworker running and
1355scheduling out and ls taking over. But as soon as ls released the
1356rq lock and enabled interrupts (but not preemption) an interrupt
1357triggered. When the interrupt finished, it started running softirqs.
1358But while the softirq was running, another interrupt triggered.
1359When an interrupt is running inside a softirq, the annotation is 'H'.
899 1360
900 1361
901wakeup 1362wakeup
902------ 1363------
903 1364
1365One common case that people are interested in tracing is the
1366time it takes for a task that is woken to actually wake up.
1367Now for non Real-Time tasks, this can be arbitrary. But tracing
1368it none the less can be interesting.
1369
1370Without function tracing:
1371
1372 # echo 0 > options/function-trace
1373 # echo wakeup > current_tracer
1374 # echo 1 > tracing_on
1375 # echo 0 > tracing_max_latency
1376 # chrt -f 5 sleep 1
1377 # echo 0 > tracing_on
1378 # cat trace
1379# tracer: wakeup
1380#
1381# wakeup latency trace v1.1.5 on 3.8.0-test+
1382# --------------------------------------------------------------------
1383# latency: 15 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
1384# -----------------
1385# | task: kworker/3:1H-312 (uid:0 nice:-20 policy:0 rt_prio:0)
1386# -----------------
1387#
1388# _------=> CPU#
1389# / _-----=> irqs-off
1390# | / _----=> need-resched
1391# || / _---=> hardirq/softirq
1392# ||| / _--=> preempt-depth
1393# |||| / delay
1394# cmd pid ||||| time | caller
1395# \ / ||||| \ | /
1396 <idle>-0 3dNs7 0us : 0:120:R + [003] 312:100:R kworker/3:1H
1397 <idle>-0 3dNs7 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up
1398 <idle>-0 3d..3 15us : __schedule <-schedule
1399 <idle>-0 3d..3 15us : 0:120:R ==> [003] 312:100:R kworker/3:1H
1400
1401The tracer only traces the highest priority task in the system
1402to avoid tracing the normal circumstances. Here we see that
1403the kworker with a nice priority of -20 (not very nice), took
1404just 15 microseconds from the time it woke up, to the time it
1405ran.
1406
1407Non Real-Time tasks are not that interesting. A more interesting
1408trace is to concentrate only on Real-Time tasks.
1409
1410wakeup_rt
1411---------
1412
904In a Real-Time environment it is very important to know the 1413In a Real-Time environment it is very important to know the
905wakeup time it takes for the highest priority task that is woken 1414wakeup time it takes for the highest priority task that is woken
906up to the time that it executes. This is also known as "schedule 1415up to the time that it executes. This is also known as "schedule
@@ -914,124 +1423,229 @@ Real-Time environments are interested in the worst case latency.
914That is the longest latency it takes for something to happen, 1423That is the longest latency it takes for something to happen,
915and not the average. We can have a very fast scheduler that may 1424and not the average. We can have a very fast scheduler that may
916only have a large latency once in a while, but that would not 1425only have a large latency once in a while, but that would not
917work well with Real-Time tasks. The wakeup tracer was designed 1426work well with Real-Time tasks. The wakeup_rt tracer was designed
918to record the worst case wakeups of RT tasks. Non-RT tasks are 1427to record the worst case wakeups of RT tasks. Non-RT tasks are
919not recorded because the tracer only records one worst case and 1428not recorded because the tracer only records one worst case and
920tracing non-RT tasks that are unpredictable will overwrite the 1429tracing non-RT tasks that are unpredictable will overwrite the
921worst case latency of RT tasks. 1430worst case latency of RT tasks (just run the normal wakeup
1431tracer for a while to see that effect).
922 1432
923Since this tracer only deals with RT tasks, we will run this 1433Since this tracer only deals with RT tasks, we will run this
924slightly differently than we did with the previous tracers. 1434slightly differently than we did with the previous tracers.
925Instead of performing an 'ls', we will run 'sleep 1' under 1435Instead of performing an 'ls', we will run 'sleep 1' under
926'chrt' which changes the priority of the task. 1436'chrt' which changes the priority of the task.
927 1437
928 # echo wakeup > current_tracer 1438 # echo 0 > options/function-trace
929 # echo latency-format > trace_options 1439 # echo wakeup_rt > current_tracer
930 # echo 0 > tracing_max_latency
931 # echo 1 > tracing_on 1440 # echo 1 > tracing_on
1441 # echo 0 > tracing_max_latency
932 # chrt -f 5 sleep 1 1442 # chrt -f 5 sleep 1
933 # echo 0 > tracing_on 1443 # echo 0 > tracing_on
934 # cat trace 1444 # cat trace
935# tracer: wakeup 1445# tracer: wakeup
936# 1446#
937wakeup latency trace v1.1.5 on 2.6.26-rc8 1447# tracer: wakeup_rt
938-------------------------------------------------------------------- 1448#
939 latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 1449# wakeup_rt latency trace v1.1.5 on 3.8.0-test+
940 ----------------- 1450# --------------------------------------------------------------------
941 | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) 1451# latency: 5 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
942 ----------------- 1452# -----------------
943 1453# | task: sleep-2389 (uid:0 nice:0 policy:1 rt_prio:5)
944# _------=> CPU# 1454# -----------------
945# / _-----=> irqs-off 1455#
946# | / _----=> need-resched 1456# _------=> CPU#
947# || / _---=> hardirq/softirq 1457# / _-----=> irqs-off
948# ||| / _--=> preempt-depth 1458# | / _----=> need-resched
949# |||| / 1459# || / _---=> hardirq/softirq
950# ||||| delay 1460# ||| / _--=> preempt-depth
951# cmd pid ||||| time | caller 1461# |||| / delay
952# \ / ||||| \ | / 1462# cmd pid ||||| time | caller
953 <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) 1463# \ / ||||| \ | /
954 <idle>-0 1d..4 4us : schedule (cpu_idle) 1464 <idle>-0 3d.h4 0us : 0:120:R + [003] 2389: 94:R sleep
955 1465 <idle>-0 3d.h4 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up
956 1466 <idle>-0 3d..3 5us : __schedule <-schedule
957Running this on an idle system, we see that it only took 4 1467 <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep
958microseconds to perform the task switch. Note, since the trace 1468
959marker in the schedule is before the actual "switch", we stop 1469
960the tracing when the recorded task is about to schedule in. This 1470Running this on an idle system, we see that it only took 5 microseconds
961may change if we add a new marker at the end of the scheduler. 1471to perform the task switch. Note, since the trace point in the schedule
962 1472is before the actual "switch", we stop the tracing when the recorded task
963Notice that the recorded task is 'sleep' with the PID of 4901 1473is about to schedule in. This may change if we add a new marker at the
1474end of the scheduler.
1475
1476Notice that the recorded task is 'sleep' with the PID of 2389
964and it has an rt_prio of 5. This priority is user-space priority 1477and it has an rt_prio of 5. This priority is user-space priority
965and not the internal kernel priority. The policy is 1 for 1478and not the internal kernel priority. The policy is 1 for
966SCHED_FIFO and 2 for SCHED_RR. 1479SCHED_FIFO and 2 for SCHED_RR.
967 1480
968Doing the same with chrt -r 5 and ftrace_enabled set. 1481Note, that the trace data shows the internal priority (99 - rtprio).
969 1482
970# tracer: wakeup 1483 <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep
1484
1485The 0:120:R means idle was running with a nice priority of 0 (120 - 20)
1486and in the running state 'R'. The sleep task was scheduled in with
14872389: 94:R. That is the priority is the kernel rtprio (99 - 5 = 94)
1488and it too is in the running state.
1489
1490Doing the same with chrt -r 5 and function-trace set.
1491
1492 echo 1 > options/function-trace
1493
1494# tracer: wakeup_rt
971# 1495#
972wakeup latency trace v1.1.5 on 2.6.26-rc8 1496# wakeup_rt latency trace v1.1.5 on 3.8.0-test+
973-------------------------------------------------------------------- 1497# --------------------------------------------------------------------
974 latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) 1498# latency: 29 us, #85/85, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
975 ----------------- 1499# -----------------
976 | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) 1500# | task: sleep-2448 (uid:0 nice:0 policy:1 rt_prio:5)
977 ----------------- 1501# -----------------
978 1502#
979# _------=> CPU# 1503# _------=> CPU#
980# / _-----=> irqs-off 1504# / _-----=> irqs-off
981# | / _----=> need-resched 1505# | / _----=> need-resched
982# || / _---=> hardirq/softirq 1506# || / _---=> hardirq/softirq
983# ||| / _--=> preempt-depth 1507# ||| / _--=> preempt-depth
984# |||| / 1508# |||| / delay
985# ||||| delay 1509# cmd pid ||||| time | caller
986# cmd pid ||||| time | caller 1510# \ / ||||| \ | /
987# \ / ||||| \ | / 1511 <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep
988ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) 1512 <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up
989ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) 1513 <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup
990ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) 1514 <idle>-0 3d.h3 3us : resched_task <-check_preempt_curr
991ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) 1515 <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup
992ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) 1516 <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up
993ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) 1517 <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock
994ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) 1518 <idle>-0 3dNh2 5us : ttwu_stat <-try_to_wake_up
995ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) 1519 <idle>-0 3dNh2 5us : _raw_spin_unlock_irqrestore <-try_to_wake_up
996[...] 1520 <idle>-0 3dNh2 6us : sub_preempt_count <-_raw_spin_unlock_irqrestore
997ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) 1521 <idle>-0 3dNh1 6us : _raw_spin_lock <-__run_hrtimer
998ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) 1522 <idle>-0 3dNh1 6us : add_preempt_count <-_raw_spin_lock
999ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) 1523 <idle>-0 3dNh2 7us : _raw_spin_unlock <-hrtimer_interrupt
1000ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) 1524 <idle>-0 3dNh2 7us : sub_preempt_count <-_raw_spin_unlock
1001[...] 1525 <idle>-0 3dNh1 7us : tick_program_event <-hrtimer_interrupt
1002ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) 1526 <idle>-0 3dNh1 7us : clockevents_program_event <-tick_program_event
1003ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) 1527 <idle>-0 3dNh1 8us : ktime_get <-clockevents_program_event
1004ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) 1528 <idle>-0 3dNh1 8us : lapic_next_event <-clockevents_program_event
1005ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) 1529 <idle>-0 3dNh1 8us : irq_exit <-smp_apic_timer_interrupt
1006ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) 1530 <idle>-0 3dNh1 9us : sub_preempt_count <-irq_exit
1007ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) 1531 <idle>-0 3dN.2 9us : idle_cpu <-irq_exit
1008ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) 1532 <idle>-0 3dN.2 9us : rcu_irq_exit <-irq_exit
1009ksoftirq-7 1.N.2 33us : schedule (__cond_resched) 1533 <idle>-0 3dN.2 10us : rcu_eqs_enter_common.isra.45 <-rcu_irq_exit
1010ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) 1534 <idle>-0 3dN.2 10us : sub_preempt_count <-irq_exit
1011ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) 1535 <idle>-0 3.N.1 11us : rcu_idle_exit <-cpu_idle
1012ksoftirq-7 1dN.3 35us : _spin_lock (schedule) 1536 <idle>-0 3dN.1 11us : rcu_eqs_exit_common.isra.43 <-rcu_idle_exit
1013ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) 1537 <idle>-0 3.N.1 11us : tick_nohz_idle_exit <-cpu_idle
1014ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) 1538 <idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
1015ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) 1539 <idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
1016[...] 1540 <idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
1017ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) 1541 <idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit
1018ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) 1542 <idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz
1019ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) 1543 <idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
1020ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) 1544 <idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz
1021ksoftirq-7 1d..4 50us : schedule (__cond_resched) 1545 <idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load
1022 1546 <idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz
1023The interrupt went off while running ksoftirqd. This task runs 1547 <idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
1024at SCHED_OTHER. Why did not we see the 'N' set early? This may 1548 <idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
1025be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K 1549 <idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
1026stacks configured, the interrupt and softirq run with their own 1550 <idle>-0 3dN.1 15us : hrtimer_cancel <-tick_nohz_idle_exit
1027stack. Some information is held on the top of the task's stack 1551 <idle>-0 3dN.1 15us : hrtimer_try_to_cancel <-hrtimer_cancel
1028(need_resched and preempt_count are both stored there). The 1552 <idle>-0 3dN.1 16us : lock_hrtimer_base.isra.18 <-hrtimer_try_to_cancel
1029setting of the NEED_RESCHED bit is done directly to the task's 1553 <idle>-0 3dN.1 16us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18
1030stack, but the reading of the NEED_RESCHED is done by looking at 1554 <idle>-0 3dN.1 16us : add_preempt_count <-_raw_spin_lock_irqsave
1031the current stack, which in this case is the stack for the hard 1555 <idle>-0 3dN.2 17us : __remove_hrtimer <-remove_hrtimer.part.16
1032interrupt. This hides the fact that NEED_RESCHED has been set. 1556 <idle>-0 3dN.2 17us : hrtimer_force_reprogram <-__remove_hrtimer
1033We do not see the 'N' until we switch back to the task's 1557 <idle>-0 3dN.2 17us : tick_program_event <-hrtimer_force_reprogram
1034assigned stack. 1558 <idle>-0 3dN.2 18us : clockevents_program_event <-tick_program_event
1559 <idle>-0 3dN.2 18us : ktime_get <-clockevents_program_event
1560 <idle>-0 3dN.2 18us : lapic_next_event <-clockevents_program_event
1561 <idle>-0 3dN.2 19us : _raw_spin_unlock_irqrestore <-hrtimer_try_to_cancel
1562 <idle>-0 3dN.2 19us : sub_preempt_count <-_raw_spin_unlock_irqrestore
1563 <idle>-0 3dN.1 19us : hrtimer_forward <-tick_nohz_idle_exit
1564 <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward
1565 <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward
1566 <idle>-0 3dN.1 20us : hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11
1567 <idle>-0 3dN.1 20us : __hrtimer_start_range_ns <-hrtimer_start_range_ns
1568 <idle>-0 3dN.1 21us : lock_hrtimer_base.isra.18 <-__hrtimer_start_range_ns
1569 <idle>-0 3dN.1 21us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18
1570 <idle>-0 3dN.1 21us : add_preempt_count <-_raw_spin_lock_irqsave
1571 <idle>-0 3dN.2 22us : ktime_add_safe <-__hrtimer_start_range_ns
1572 <idle>-0 3dN.2 22us : enqueue_hrtimer <-__hrtimer_start_range_ns
1573 <idle>-0 3dN.2 22us : tick_program_event <-__hrtimer_start_range_ns
1574 <idle>-0 3dN.2 23us : clockevents_program_event <-tick_program_event
1575 <idle>-0 3dN.2 23us : ktime_get <-clockevents_program_event
1576 <idle>-0 3dN.2 23us : lapic_next_event <-clockevents_program_event
1577 <idle>-0 3dN.2 24us : _raw_spin_unlock_irqrestore <-__hrtimer_start_range_ns
1578 <idle>-0 3dN.2 24us : sub_preempt_count <-_raw_spin_unlock_irqrestore
1579 <idle>-0 3dN.1 24us : account_idle_ticks <-tick_nohz_idle_exit
1580 <idle>-0 3dN.1 24us : account_idle_time <-account_idle_ticks
1581 <idle>-0 3.N.1 25us : sub_preempt_count <-cpu_idle
1582 <idle>-0 3.N.. 25us : schedule <-cpu_idle
1583 <idle>-0 3.N.. 25us : __schedule <-preempt_schedule
1584 <idle>-0 3.N.. 26us : add_preempt_count <-__schedule
1585 <idle>-0 3.N.1 26us : rcu_note_context_switch <-__schedule
1586 <idle>-0 3.N.1 26us : rcu_sched_qs <-rcu_note_context_switch
1587 <idle>-0 3dN.1 27us : rcu_preempt_qs <-rcu_note_context_switch
1588 <idle>-0 3.N.1 27us : _raw_spin_lock_irq <-__schedule
1589 <idle>-0 3dN.1 27us : add_preempt_count <-_raw_spin_lock_irq
1590 <idle>-0 3dN.2 28us : put_prev_task_idle <-__schedule
1591 <idle>-0 3dN.2 28us : pick_next_task_stop <-pick_next_task
1592 <idle>-0 3dN.2 28us : pick_next_task_rt <-pick_next_task
1593 <idle>-0 3dN.2 29us : dequeue_pushable_task <-pick_next_task_rt
1594 <idle>-0 3d..3 29us : __schedule <-preempt_schedule
1595 <idle>-0 3d..3 30us : 0:120:R ==> [003] 2448: 94:R sleep
1596
1597This isn't that big of a trace, even with function tracing enabled,
1598so I included the entire trace.
1599
1600The interrupt went off while when the system was idle. Somewhere
1601before task_woken_rt() was called, the NEED_RESCHED flag was set,
1602this is indicated by the first occurrence of the 'N' flag.
1603
1604Latency tracing and events
1605--------------------------
1606As function tracing can induce a much larger latency, but without
1607seeing what happens within the latency it is hard to know what
1608caused it. There is a middle ground, and that is with enabling
1609events.
1610
1611 # echo 0 > options/function-trace
1612 # echo wakeup_rt > current_tracer
1613 # echo 1 > events/enable
1614 # echo 1 > tracing_on
1615 # echo 0 > tracing_max_latency
1616 # chrt -f 5 sleep 1
1617 # echo 0 > tracing_on
1618 # cat trace
1619# tracer: wakeup_rt
1620#
1621# wakeup_rt latency trace v1.1.5 on 3.8.0-test+
1622# --------------------------------------------------------------------
1623# latency: 6 us, #12/12, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
1624# -----------------
1625# | task: sleep-5882 (uid:0 nice:0 policy:1 rt_prio:5)
1626# -----------------
1627#
1628# _------=> CPU#
1629# / _-----=> irqs-off
1630# | / _----=> need-resched
1631# || / _---=> hardirq/softirq
1632# ||| / _--=> preempt-depth
1633# |||| / delay
1634# cmd pid ||||| time | caller
1635# \ / ||||| \ | /
1636 <idle>-0 2d.h4 0us : 0:120:R + [002] 5882: 94:R sleep
1637 <idle>-0 2d.h4 0us : ttwu_do_activate.constprop.87 <-try_to_wake_up
1638 <idle>-0 2d.h4 1us : sched_wakeup: comm=sleep pid=5882 prio=94 success=1 target_cpu=002
1639 <idle>-0 2dNh2 1us : hrtimer_expire_exit: hrtimer=ffff88007796feb8
1640 <idle>-0 2.N.2 2us : power_end: cpu_id=2
1641 <idle>-0 2.N.2 3us : cpu_idle: state=4294967295 cpu_id=2
1642 <idle>-0 2dN.3 4us : hrtimer_cancel: hrtimer=ffff88007d50d5e0
1643 <idle>-0 2dN.3 4us : hrtimer_start: hrtimer=ffff88007d50d5e0 function=tick_sched_timer expires=34311211000000 softexpires=34311211000000
1644 <idle>-0 2.N.2 5us : rcu_utilization: Start context switch
1645 <idle>-0 2.N.2 5us : rcu_utilization: End context switch
1646 <idle>-0 2d..3 6us : __schedule <-schedule
1647 <idle>-0 2d..3 6us : 0:120:R ==> [002] 5882: 94:R sleep
1648
1035 1649
1036function 1650function
1037-------- 1651--------
@@ -1039,6 +1653,7 @@ function
1039This tracer is the function tracer. Enabling the function tracer 1653This tracer is the function tracer. Enabling the function tracer
1040can be done from the debug file system. Make sure the 1654can be done from the debug file system. Make sure the
1041ftrace_enabled is set; otherwise this tracer is a nop. 1655ftrace_enabled is set; otherwise this tracer is a nop.
1656See the "ftrace_enabled" section below.
1042 1657
1043 # sysctl kernel.ftrace_enabled=1 1658 # sysctl kernel.ftrace_enabled=1
1044 # echo function > current_tracer 1659 # echo function > current_tracer
@@ -1048,23 +1663,23 @@ ftrace_enabled is set; otherwise this tracer is a nop.
1048 # cat trace 1663 # cat trace
1049# tracer: function 1664# tracer: function
1050# 1665#
1051# TASK-PID CPU# TIMESTAMP FUNCTION 1666# entries-in-buffer/entries-written: 24799/24799 #P:4
1052# | | | | | 1667#
1053 bash-4003 [00] 123.638713: finish_task_switch <-schedule 1668# _-----=> irqs-off
1054 bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch 1669# / _----=> need-resched
1055 bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq 1670# | / _---=> hardirq/softirq
1056 bash-4003 [00] 123.638715: hrtick_set <-schedule 1671# || / _--=> preempt-depth
1057 bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set 1672# ||| / delay
1058 bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave 1673# TASK-PID CPU# |||| TIMESTAMP FUNCTION
1059 bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set 1674# | | | |||| | |
1060 bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore 1675 bash-1994 [002] .... 3082.063030: mutex_unlock <-rb_simple_write
1061 bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set 1676 bash-1994 [002] .... 3082.063031: __mutex_unlock_slowpath <-mutex_unlock
1062 bash-4003 [00] 123.638718: sub_preempt_count <-schedule 1677 bash-1994 [002] .... 3082.063031: __fsnotify_parent <-fsnotify_modify
1063 bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule 1678 bash-1994 [002] .... 3082.063032: fsnotify <-fsnotify_modify
1064 bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run 1679 bash-1994 [002] .... 3082.063032: __srcu_read_lock <-fsnotify
1065 bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion 1680 bash-1994 [002] .... 3082.063032: add_preempt_count <-__srcu_read_lock
1066 bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common 1681 bash-1994 [002] ...1 3082.063032: sub_preempt_count <-__srcu_read_lock
1067 bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq 1682 bash-1994 [002] .... 3082.063033: __srcu_read_unlock <-fsnotify
1068[...] 1683[...]
1069 1684
1070 1685
@@ -1214,79 +1829,19 @@ int main (int argc, char **argv)
1214 return 0; 1829 return 0;
1215} 1830}
1216 1831
1832Or this simple script!
1217 1833
1218hw-branch-tracer (x86 only) 1834------
1219--------------------------- 1835#!/bin/bash
1220 1836
1221This tracer uses the x86 last branch tracing hardware feature to 1837debugfs=`sed -ne 's/^debugfs \(.*\) debugfs.*/\1/p' /proc/mounts`
1222collect a branch trace on all cpus with relatively low overhead. 1838echo nop > $debugfs/tracing/current_tracer
1223 1839echo 0 > $debugfs/tracing/tracing_on
1224The tracer uses a fixed-size circular buffer per cpu and only 1840echo $$ > $debugfs/tracing/set_ftrace_pid
1225traces ring 0 branches. The trace file dumps that buffer in the 1841echo function > $debugfs/tracing/current_tracer
1226following format: 1842echo 1 > $debugfs/tracing/tracing_on
1227 1843exec "$@"
1228# tracer: hw-branch-tracer 1844------
1229#
1230# CPU# TO <- FROM
1231 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6
1232 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a
1233 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf
1234 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf
1235 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a
1236 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf
1237
1238
1239The tracer may be used to dump the trace for the oops'ing cpu on
1240a kernel oops into the system log. To enable this,
1241ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
1242can either use the sysctl function or set it via the proc system
1243interface.
1244
1245 sysctl kernel.ftrace_dump_on_oops=n
1246
1247or
1248
1249 echo n > /proc/sys/kernel/ftrace_dump_on_oops
1250
1251If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will
1252only dump the buffer of the CPU that triggered the oops.
1253
1254Here's an example of such a dump after a null pointer
1255dereference in a kernel module:
1256
1257[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
1258[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
1259[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
1260[57848.106019] Oops: 0002 [#1] SMP
1261[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
1262[57848.106019] Dumping ftrace buffer:
1263[57848.106019] ---------------------------------
1264[...]
1265[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24
1266[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165
1267[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165
1268[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165
1269[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165
1270[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops]
1271[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30
1272[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b
1273[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31
1274[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1
1275[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30
1276[...]
1277[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2
1278[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881
1279[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881
1280[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96
1281[...]
1282[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3
1283[57848.106019] ---------------------------------
1284[57848.106019] CPU 0
1285[57848.106019] Modules linked in: oops
1286[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23
1287[57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops]
1288[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246
1289[...]
1290 1845
1291 1846
1292function graph tracer 1847function graph tracer
@@ -1473,16 +2028,18 @@ starts of pointing to a simple return. (Enabling FTRACE will
1473include the -pg switch in the compiling of the kernel.) 2028include the -pg switch in the compiling of the kernel.)
1474 2029
1475At compile time every C file object is run through the 2030At compile time every C file object is run through the
1476recordmcount.pl script (located in the scripts directory). This 2031recordmcount program (located in the scripts directory). This
1477script will process the C object using objdump to find all the 2032program will parse the ELF headers in the C object to find all
1478locations in the .text section that call mcount. (Note, only the 2033the locations in the .text section that call mcount. (Note, only
1479.text section is processed, since processing other sections like 2034white listed .text sections are processed, since processing other
1480.init.text may cause races due to those sections being freed). 2035sections like .init.text may cause races due to those sections
2036being freed unexpectedly).
1481 2037
1482A new section called "__mcount_loc" is created that holds 2038A new section called "__mcount_loc" is created that holds
1483references to all the mcount call sites in the .text section. 2039references to all the mcount call sites in the .text section.
1484This section is compiled back into the original object. The 2040The recordmcount program re-links this section back into the
1485final linker will add all these references into a single table. 2041original object. The final linking stage of the kernel will add all these
2042references into a single table.
1486 2043
1487On boot up, before SMP is initialized, the dynamic ftrace code 2044On boot up, before SMP is initialized, the dynamic ftrace code
1488scans this table and updates all the locations into nops. It 2045scans this table and updates all the locations into nops. It
@@ -1493,13 +2050,25 @@ unloaded, it also removes its functions from the ftrace function
1493list. This is automatic in the module unload code, and the 2050list. This is automatic in the module unload code, and the
1494module author does not need to worry about it. 2051module author does not need to worry about it.
1495 2052
1496When tracing is enabled, kstop_machine is called to prevent 2053When tracing is enabled, the process of modifying the function
1497races with the CPUS executing code being modified (which can 2054tracepoints is dependent on architecture. The old method is to use
1498cause the CPU to do undesirable things), and the nops are 2055kstop_machine to prevent races with the CPUs executing code being
2056modified (which can cause the CPU to do undesirable things, especially
2057if the modified code crosses cache (or page) boundaries), and the nops are
1499patched back to calls. But this time, they do not call mcount 2058patched back to calls. But this time, they do not call mcount
1500(which is just a function stub). They now call into the ftrace 2059(which is just a function stub). They now call into the ftrace
1501infrastructure. 2060infrastructure.
1502 2061
2062The new method of modifying the function tracepoints is to place
2063a breakpoint at the location to be modified, sync all CPUs, modify
2064the rest of the instruction not covered by the breakpoint. Sync
2065all CPUs again, and then remove the breakpoint with the finished
2066version to the ftrace call site.
2067
2068Some archs do not even need to monkey around with the synchronization,
2069and can just slap the new code on top of the old without any
2070problems with other CPUs executing it at the same time.
2071
1503One special side-effect to the recording of the functions being 2072One special side-effect to the recording of the functions being
1504traced is that we can now selectively choose which functions we 2073traced is that we can now selectively choose which functions we
1505wish to trace and which ones we want the mcount calls to remain 2074wish to trace and which ones we want the mcount calls to remain
@@ -1530,20 +2099,28 @@ mutex_lock
1530 2099
1531If I am only interested in sys_nanosleep and hrtimer_interrupt: 2100If I am only interested in sys_nanosleep and hrtimer_interrupt:
1532 2101
1533 # echo sys_nanosleep hrtimer_interrupt \ 2102 # echo sys_nanosleep hrtimer_interrupt > set_ftrace_filter
1534 > set_ftrace_filter
1535 # echo function > current_tracer 2103 # echo function > current_tracer
1536 # echo 1 > tracing_on 2104 # echo 1 > tracing_on
1537 # usleep 1 2105 # usleep 1
1538 # echo 0 > tracing_on 2106 # echo 0 > tracing_on
1539 # cat trace 2107 # cat trace
1540# tracer: ftrace 2108# tracer: function
2109#
2110# entries-in-buffer/entries-written: 5/5 #P:4
1541# 2111#
1542# TASK-PID CPU# TIMESTAMP FUNCTION 2112# _-----=> irqs-off
1543# | | | | | 2113# / _----=> need-resched
1544 usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt 2114# | / _---=> hardirq/softirq
1545 usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call 2115# || / _--=> preempt-depth
1546 <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt 2116# ||| / delay
2117# TASK-PID CPU# |||| TIMESTAMP FUNCTION
2118# | | | |||| | |
2119 usleep-2665 [001] .... 4186.475355: sys_nanosleep <-system_call_fastpath
2120 <idle>-0 [001] d.h1 4186.475409: hrtimer_interrupt <-smp_apic_timer_interrupt
2121 usleep-2665 [001] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt
2122 <idle>-0 [003] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt
2123 <idle>-0 [002] d.h1 4186.475427: hrtimer_interrupt <-smp_apic_timer_interrupt
1547 2124
1548To see which functions are being traced, you can cat the file: 2125To see which functions are being traced, you can cat the file:
1549 2126
@@ -1571,20 +2148,25 @@ Note: It is better to use quotes to enclose the wild cards,
1571 2148
1572Produces: 2149Produces:
1573 2150
1574# tracer: ftrace 2151# tracer: function
1575# 2152#
1576# TASK-PID CPU# TIMESTAMP FUNCTION 2153# entries-in-buffer/entries-written: 897/897 #P:4
1577# | | | | | 2154#
1578 bash-4003 [00] 1480.611794: hrtimer_init <-copy_process 2155# _-----=> irqs-off
1579 bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set 2156# / _----=> need-resched
1580 bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear 2157# | / _---=> hardirq/softirq
1581 bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel 2158# || / _--=> preempt-depth
1582 <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt 2159# ||| / delay
1583 <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt 2160# TASK-PID CPU# |||| TIMESTAMP FUNCTION
1584 <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt 2161# | | | |||| | |
1585 <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt 2162 <idle>-0 [003] dN.1 4228.547803: hrtimer_cancel <-tick_nohz_idle_exit
1586 <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt 2163 <idle>-0 [003] dN.1 4228.547804: hrtimer_try_to_cancel <-hrtimer_cancel
1587 2164 <idle>-0 [003] dN.2 4228.547805: hrtimer_force_reprogram <-__remove_hrtimer
2165 <idle>-0 [003] dN.1 4228.547805: hrtimer_forward <-tick_nohz_idle_exit
2166 <idle>-0 [003] dN.1 4228.547805: hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11
2167 <idle>-0 [003] d..1 4228.547858: hrtimer_get_next_event <-get_next_timer_interrupt
2168 <idle>-0 [003] d..1 4228.547859: hrtimer_start <-__tick_nohz_idle_enter
2169 <idle>-0 [003] d..2 4228.547860: hrtimer_force_reprogram <-__rem
1588 2170
1589Notice that we lost the sys_nanosleep. 2171Notice that we lost the sys_nanosleep.
1590 2172
@@ -1651,19 +2233,29 @@ traced.
1651 2233
1652Produces: 2234Produces:
1653 2235
1654# tracer: ftrace 2236# tracer: function
2237#
2238# entries-in-buffer/entries-written: 39608/39608 #P:4
1655# 2239#
1656# TASK-PID CPU# TIMESTAMP FUNCTION 2240# _-----=> irqs-off
1657# | | | | | 2241# / _----=> need-resched
1658 bash-4043 [01] 115.281644: finish_task_switch <-schedule 2242# | / _---=> hardirq/softirq
1659 bash-4043 [01] 115.281645: hrtick_set <-schedule 2243# || / _--=> preempt-depth
1660 bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set 2244# ||| / delay
1661 bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run 2245# TASK-PID CPU# |||| TIMESTAMP FUNCTION
1662 bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion 2246# | | | |||| | |
1663 bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run 2247 bash-1994 [000] .... 4342.324896: file_ra_state_init <-do_dentry_open
1664 bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop 2248 bash-1994 [000] .... 4342.324897: open_check_o_direct <-do_last
1665 bash-4043 [01] 115.281648: wake_up_process <-kthread_stop 2249 bash-1994 [000] .... 4342.324897: ima_file_check <-do_last
1666 bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process 2250 bash-1994 [000] .... 4342.324898: process_measurement <-ima_file_check
2251 bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement
2252 bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action
2253 bash-1994 [000] .... 4342.324899: do_truncate <-do_last
2254 bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate
2255 bash-1994 [000] .... 4342.324899: notify_change <-do_truncate
2256 bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change
2257 bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time
2258 bash-1994 [000] .... 4342.324900: timespec_trunc <-current_fs_time
1667 2259
1668We can see that there's no more lock or preempt tracing. 2260We can see that there's no more lock or preempt tracing.
1669 2261
@@ -1729,6 +2321,28 @@ this special filter via:
1729 echo > set_graph_function 2321 echo > set_graph_function
1730 2322
1731 2323
2324ftrace_enabled
2325--------------
2326
2327Note, the proc sysctl ftrace_enable is a big on/off switch for the
2328function tracer. By default it is enabled (when function tracing is
2329enabled in the kernel). If it is disabled, all function tracing is
2330disabled. This includes not only the function tracers for ftrace, but
2331also for any other uses (perf, kprobes, stack tracing, profiling, etc).
2332
2333Please disable this with care.
2334
2335This can be disable (and enabled) with:
2336
2337 sysctl kernel.ftrace_enabled=0
2338 sysctl kernel.ftrace_enabled=1
2339
2340 or
2341
2342 echo 0 > /proc/sys/kernel/ftrace_enabled
2343 echo 1 > /proc/sys/kernel/ftrace_enabled
2344
2345
1732Filter commands 2346Filter commands
1733--------------- 2347---------------
1734 2348
@@ -1763,12 +2377,58 @@ The following commands are supported:
1763 2377
1764 echo '__schedule_bug:traceoff:5' > set_ftrace_filter 2378 echo '__schedule_bug:traceoff:5' > set_ftrace_filter
1765 2379
2380 To always disable tracing when __schedule_bug is hit:
2381
2382 echo '__schedule_bug:traceoff' > set_ftrace_filter
2383
1766 These commands are cumulative whether or not they are appended 2384 These commands are cumulative whether or not they are appended
1767 to set_ftrace_filter. To remove a command, prepend it by '!' 2385 to set_ftrace_filter. To remove a command, prepend it by '!'
1768 and drop the parameter: 2386 and drop the parameter:
1769 2387
2388 echo '!__schedule_bug:traceoff:0' > set_ftrace_filter
2389
2390 The above removes the traceoff command for __schedule_bug
2391 that have a counter. To remove commands without counters:
2392
1770 echo '!__schedule_bug:traceoff' > set_ftrace_filter 2393 echo '!__schedule_bug:traceoff' > set_ftrace_filter
1771 2394
2395- snapshot
2396 Will cause a snapshot to be triggered when the function is hit.
2397
2398 echo 'native_flush_tlb_others:snapshot' > set_ftrace_filter
2399
2400 To only snapshot once:
2401
2402 echo 'native_flush_tlb_others:snapshot:1' > set_ftrace_filter
2403
2404 To remove the above commands:
2405
2406 echo '!native_flush_tlb_others:snapshot' > set_ftrace_filter
2407 echo '!native_flush_tlb_others:snapshot:0' > set_ftrace_filter
2408
2409- enable_event/disable_event
2410 These commands can enable or disable a trace event. Note, because
2411 function tracing callbacks are very sensitive, when these commands
2412 are registered, the trace point is activated, but disabled in
2413 a "soft" mode. That is, the tracepoint will be called, but
2414 just will not be traced. The event tracepoint stays in this mode
2415 as long as there's a command that triggers it.
2416
2417 echo 'try_to_wake_up:enable_event:sched:sched_switch:2' > \
2418 set_ftrace_filter
2419
2420 The format is:
2421
2422 <function>:enable_event:<system>:<event>[:count]
2423 <function>:disable_event:<system>:<event>[:count]
2424
2425 To remove the events commands:
2426
2427
2428 echo '!try_to_wake_up:enable_event:sched:sched_switch:0' > \
2429 set_ftrace_filter
2430 echo '!schedule:disable_event:sched:sched_switch' > \
2431 set_ftrace_filter
1772 2432
1773trace_pipe 2433trace_pipe
1774---------- 2434----------
@@ -1787,28 +2447,31 @@ different. The trace is live.
1787 # cat trace 2447 # cat trace
1788# tracer: function 2448# tracer: function
1789# 2449#
1790# TASK-PID CPU# TIMESTAMP FUNCTION 2450# entries-in-buffer/entries-written: 0/0 #P:4
1791# | | | | | 2451#
2452# _-----=> irqs-off
2453# / _----=> need-resched
2454# | / _---=> hardirq/softirq
2455# || / _--=> preempt-depth
2456# ||| / delay
2457# TASK-PID CPU# |||| TIMESTAMP FUNCTION
2458# | | | |||| | |
1792 2459
1793 # 2460 #
1794 # cat /tmp/trace.out 2461 # cat /tmp/trace.out
1795 bash-4043 [00] 41.267106: finish_task_switch <-schedule 2462 bash-1994 [000] .... 5281.568961: mutex_unlock <-rb_simple_write
1796 bash-4043 [00] 41.267106: hrtick_set <-schedule 2463 bash-1994 [000] .... 5281.568963: __mutex_unlock_slowpath <-mutex_unlock
1797 bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set 2464 bash-1994 [000] .... 5281.568963: __fsnotify_parent <-fsnotify_modify
1798 bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run 2465 bash-1994 [000] .... 5281.568964: fsnotify <-fsnotify_modify
1799 bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion 2466 bash-1994 [000] .... 5281.568964: __srcu_read_lock <-fsnotify
1800 bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run 2467 bash-1994 [000] .... 5281.568964: add_preempt_count <-__srcu_read_lock
1801 bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop 2468 bash-1994 [000] ...1 5281.568965: sub_preempt_count <-__srcu_read_lock
1802 bash-4043 [00] 41.267110: wake_up_process <-kthread_stop 2469 bash-1994 [000] .... 5281.568965: __srcu_read_unlock <-fsnotify
1803 bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process 2470 bash-1994 [000] .... 5281.568967: sys_dup2 <-system_call_fastpath
1804 bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up
1805 2471
1806 2472
1807Note, reading the trace_pipe file will block until more input is 2473Note, reading the trace_pipe file will block until more input is
1808added. By changing the tracer, trace_pipe will issue an EOF. We 2474added.
1809needed to set the function tracer _before_ we "cat" the
1810trace_pipe file.
1811
1812 2475
1813trace entries 2476trace entries
1814------------- 2477-------------
@@ -1817,31 +2480,50 @@ Having too much or not enough data can be troublesome in
1817diagnosing an issue in the kernel. The file buffer_size_kb is 2480diagnosing an issue in the kernel. The file buffer_size_kb is
1818used to modify the size of the internal trace buffers. The 2481used to modify the size of the internal trace buffers. The
1819number listed is the number of entries that can be recorded per 2482number listed is the number of entries that can be recorded per
1820CPU. To know the full size, multiply the number of possible CPUS 2483CPU. To know the full size, multiply the number of possible CPUs
1821with the number of entries. 2484with the number of entries.
1822 2485
1823 # cat buffer_size_kb 2486 # cat buffer_size_kb
18241408 (units kilobytes) 24871408 (units kilobytes)
1825 2488
1826Note, to modify this, you must have tracing completely disabled. 2489Or simply read buffer_total_size_kb
1827To do that, echo "nop" into the current_tracer. If the 2490
1828current_tracer is not set to "nop", an EINVAL error will be 2491 # cat buffer_total_size_kb
1829returned. 24925632
2493
2494To modify the buffer, simple echo in a number (in 1024 byte segments).
1830 2495
1831 # echo nop > current_tracer
1832 # echo 10000 > buffer_size_kb 2496 # echo 10000 > buffer_size_kb
1833 # cat buffer_size_kb 2497 # cat buffer_size_kb
183410000 (units kilobytes) 249810000 (units kilobytes)
1835 2499
1836The number of pages which will be allocated is limited to a 2500It will try to allocate as much as possible. If you allocate too
1837percentage of available memory. Allocating too much will produce 2501much, it can cause Out-Of-Memory to trigger.
1838an error.
1839 2502
1840 # echo 1000000000000 > buffer_size_kb 2503 # echo 1000000000000 > buffer_size_kb
1841-bash: echo: write error: Cannot allocate memory 2504-bash: echo: write error: Cannot allocate memory
1842 # cat buffer_size_kb 2505 # cat buffer_size_kb
184385 250685
1844 2507
2508The per_cpu buffers can be changed individually as well:
2509
2510 # echo 10000 > per_cpu/cpu0/buffer_size_kb
2511 # echo 100 > per_cpu/cpu1/buffer_size_kb
2512
2513When the per_cpu buffers are not the same, the buffer_size_kb
2514at the top level will just show an X
2515
2516 # cat buffer_size_kb
2517X
2518
2519This is where the buffer_total_size_kb is useful:
2520
2521 # cat buffer_total_size_kb
252212916
2523
2524Writing to the top level buffer_size_kb will reset all the buffers
2525to be the same again.
2526
1845Snapshot 2527Snapshot
1846-------- 2528--------
1847CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature 2529CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
@@ -1873,7 +2555,7 @@ feature:
1873 2555
1874 status\input | 0 | 1 | else | 2556 status\input | 0 | 1 | else |
1875 --------------+------------+------------+------------+ 2557 --------------+------------+------------+------------+
1876 not allocated |(do nothing)| alloc+swap | EINVAL | 2558 not allocated |(do nothing)| alloc+swap |(do nothing)|
1877 --------------+------------+------------+------------+ 2559 --------------+------------+------------+------------+
1878 allocated | free | swap | clear | 2560 allocated | free | swap | clear |
1879 --------------+------------+------------+------------+ 2561 --------------+------------+------------+------------+
@@ -1925,7 +2607,188 @@ bash: echo: write error: Device or resource busy
1925 # cat snapshot 2607 # cat snapshot
1926cat: snapshot: Device or resource busy 2608cat: snapshot: Device or resource busy
1927 2609
2610
2611Instances
2612---------
2613In the debugfs tracing directory is a directory called "instances".
2614This directory can have new directories created inside of it using
2615mkdir, and removing directories with rmdir. The directory created
2616with mkdir in this directory will already contain files and other
2617directories after it is created.
2618
2619 # mkdir instances/foo
2620 # ls instances/foo
2621buffer_size_kb buffer_total_size_kb events free_buffer per_cpu
2622set_event snapshot trace trace_clock trace_marker trace_options
2623trace_pipe tracing_on
2624
2625As you can see, the new directory looks similar to the tracing directory
2626itself. In fact, it is very similar, except that the buffer and
2627events are agnostic from the main director, or from any other
2628instances that are created.
2629
2630The files in the new directory work just like the files with the
2631same name in the tracing directory except the buffer that is used
2632is a separate and new buffer. The files affect that buffer but do not
2633affect the main buffer with the exception of trace_options. Currently,
2634the trace_options affect all instances and the top level buffer
2635the same, but this may change in future releases. That is, options
2636may become specific to the instance they reside in.
2637
2638Notice that none of the function tracer files are there, nor is
2639current_tracer and available_tracers. This is because the buffers
2640can currently only have events enabled for them.
2641
2642 # mkdir instances/foo
2643 # mkdir instances/bar
2644 # mkdir instances/zoot
2645 # echo 100000 > buffer_size_kb
2646 # echo 1000 > instances/foo/buffer_size_kb
2647 # echo 5000 > instances/bar/per_cpu/cpu1/buffer_size_kb
2648 # echo function > current_trace
2649 # echo 1 > instances/foo/events/sched/sched_wakeup/enable
2650 # echo 1 > instances/foo/events/sched/sched_wakeup_new/enable
2651 # echo 1 > instances/foo/events/sched/sched_switch/enable
2652 # echo 1 > instances/bar/events/irq/enable
2653 # echo 1 > instances/zoot/events/syscalls/enable
2654 # cat trace_pipe
2655CPU:2 [LOST 11745 EVENTS]
2656 bash-2044 [002] .... 10594.481032: _raw_spin_lock_irqsave <-get_page_from_freelist
2657 bash-2044 [002] d... 10594.481032: add_preempt_count <-_raw_spin_lock_irqsave
2658 bash-2044 [002] d..1 10594.481032: __rmqueue <-get_page_from_freelist
2659 bash-2044 [002] d..1 10594.481033: _raw_spin_unlock <-get_page_from_freelist
2660 bash-2044 [002] d..1 10594.481033: sub_preempt_count <-_raw_spin_unlock
2661 bash-2044 [002] d... 10594.481033: get_pageblock_flags_group <-get_pageblock_migratetype
2662 bash-2044 [002] d... 10594.481034: __mod_zone_page_state <-get_page_from_freelist
2663 bash-2044 [002] d... 10594.481034: zone_statistics <-get_page_from_freelist
2664 bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics
2665 bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics
2666 bash-2044 [002] .... 10594.481035: arch_dup_task_struct <-copy_process
2667[...]
2668
2669 # cat instances/foo/trace_pipe
2670 bash-1998 [000] d..4 136.676759: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000
2671 bash-1998 [000] dN.4 136.676760: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000
2672 <idle>-0 [003] d.h3 136.676906: sched_wakeup: comm=rcu_preempt pid=9 prio=120 success=1 target_cpu=003
2673 <idle>-0 [003] d..3 136.676909: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=rcu_preempt next_pid=9 next_prio=120
2674 rcu_preempt-9 [003] d..3 136.676916: sched_switch: prev_comm=rcu_preempt prev_pid=9 prev_prio=120 prev_state=S ==> next_comm=swapper/3 next_pid=0 next_prio=120
2675 bash-1998 [000] d..4 136.677014: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000
2676 bash-1998 [000] dN.4 136.677016: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000
2677 bash-1998 [000] d..3 136.677018: sched_switch: prev_comm=bash prev_pid=1998 prev_prio=120 prev_state=R+ ==> next_comm=kworker/0:1 next_pid=59 next_prio=120
2678 kworker/0:1-59 [000] d..4 136.677022: sched_wakeup: comm=sshd pid=1995 prio=120 success=1 target_cpu=001
2679 kworker/0:1-59 [000] d..3 136.677025: sched_switch: prev_comm=kworker/0:1 prev_pid=59 prev_prio=120 prev_state=S ==> next_comm=bash next_pid=1998 next_prio=120
2680[...]
2681
2682 # cat instances/bar/trace_pipe
2683 migration/1-14 [001] d.h3 138.732674: softirq_raise: vec=3 [action=NET_RX]
2684 <idle>-0 [001] dNh3 138.732725: softirq_raise: vec=3 [action=NET_RX]
2685 bash-1998 [000] d.h1 138.733101: softirq_raise: vec=1 [action=TIMER]
2686 bash-1998 [000] d.h1 138.733102: softirq_raise: vec=9 [action=RCU]
2687 bash-1998 [000] ..s2 138.733105: softirq_entry: vec=1 [action=TIMER]
2688 bash-1998 [000] ..s2 138.733106: softirq_exit: vec=1 [action=TIMER]
2689 bash-1998 [000] ..s2 138.733106: softirq_entry: vec=9 [action=RCU]
2690 bash-1998 [000] ..s2 138.733109: softirq_exit: vec=9 [action=RCU]
2691 sshd-1995 [001] d.h1 138.733278: irq_handler_entry: irq=21 name=uhci_hcd:usb4
2692 sshd-1995 [001] d.h1 138.733280: irq_handler_exit: irq=21 ret=unhandled
2693 sshd-1995 [001] d.h1 138.733281: irq_handler_entry: irq=21 name=eth0
2694 sshd-1995 [001] d.h1 138.733283: irq_handler_exit: irq=21 ret=handled
2695[...]
2696
2697 # cat instances/zoot/trace
2698# tracer: nop
2699#
2700# entries-in-buffer/entries-written: 18996/18996 #P:4
2701#
2702# _-----=> irqs-off
2703# / _----=> need-resched
2704# | / _---=> hardirq/softirq
2705# || / _--=> preempt-depth
2706# ||| / delay
2707# TASK-PID CPU# |||| TIMESTAMP FUNCTION
2708# | | | |||| | |
2709 bash-1998 [000] d... 140.733501: sys_write -> 0x2
2710 bash-1998 [000] d... 140.733504: sys_dup2(oldfd: a, newfd: 1)
2711 bash-1998 [000] d... 140.733506: sys_dup2 -> 0x1
2712 bash-1998 [000] d... 140.733508: sys_fcntl(fd: a, cmd: 1, arg: 0)
2713 bash-1998 [000] d... 140.733509: sys_fcntl -> 0x1
2714 bash-1998 [000] d... 140.733510: sys_close(fd: a)
2715 bash-1998 [000] d... 140.733510: sys_close -> 0x0
2716 bash-1998 [000] d... 140.733514: sys_rt_sigprocmask(how: 0, nset: 0, oset: 6e2768, sigsetsize: 8)
2717 bash-1998 [000] d... 140.733515: sys_rt_sigprocmask -> 0x0
2718 bash-1998 [000] d... 140.733516: sys_rt_sigaction(sig: 2, act: 7fff718846f0, oact: 7fff71884650, sigsetsize: 8)
2719 bash-1998 [000] d... 140.733516: sys_rt_sigaction -> 0x0
2720
2721You can see that the trace of the top most trace buffer shows only
2722the function tracing. The foo instance displays wakeups and task
2723switches.
2724
2725To remove the instances, simply delete their directories:
2726
2727 # rmdir instances/foo
2728 # rmdir instances/bar
2729 # rmdir instances/zoot
2730
2731Note, if a process has a trace file open in one of the instance
2732directories, the rmdir will fail with EBUSY.
2733
2734
2735Stack trace
1928----------- 2736-----------
2737Since the kernel has a fixed sized stack, it is important not to
2738waste it in functions. A kernel developer must be conscience of
2739what they allocate on the stack. If they add too much, the system
2740can be in danger of a stack overflow, and corruption will occur,
2741usually leading to a system panic.
2742
2743There are some tools that check this, usually with interrupts
2744periodically checking usage. But if you can perform a check
2745at every function call that will become very useful. As ftrace provides
2746a function tracer, it makes it convenient to check the stack size
2747at every function call. This is enabled via the stack tracer.
2748
2749CONFIG_STACK_TRACER enables the ftrace stack tracing functionality.
2750To enable it, write a '1' into /proc/sys/kernel/stack_tracer_enabled.
2751
2752 # echo 1 > /proc/sys/kernel/stack_tracer_enabled
2753
2754You can also enable it from the kernel command line to trace
2755the stack size of the kernel during boot up, by adding "stacktrace"
2756to the kernel command line parameter.
2757
2758After running it for a few minutes, the output looks like:
2759
2760 # cat stack_max_size
27612928
2762
2763 # cat stack_trace
2764 Depth Size Location (18 entries)
2765 ----- ---- --------
2766 0) 2928 224 update_sd_lb_stats+0xbc/0x4ac
2767 1) 2704 160 find_busiest_group+0x31/0x1f1
2768 2) 2544 256 load_balance+0xd9/0x662
2769 3) 2288 80 idle_balance+0xbb/0x130
2770 4) 2208 128 __schedule+0x26e/0x5b9
2771 5) 2080 16 schedule+0x64/0x66
2772 6) 2064 128 schedule_timeout+0x34/0xe0
2773 7) 1936 112 wait_for_common+0x97/0xf1
2774 8) 1824 16 wait_for_completion+0x1d/0x1f
2775 9) 1808 128 flush_work+0xfe/0x119
2776 10) 1680 16 tty_flush_to_ldisc+0x1e/0x20
2777 11) 1664 48 input_available_p+0x1d/0x5c
2778 12) 1616 48 n_tty_poll+0x6d/0x134
2779 13) 1568 64 tty_poll+0x64/0x7f
2780 14) 1504 880 do_select+0x31e/0x511
2781 15) 624 400 core_sys_select+0x177/0x216
2782 16) 224 96 sys_select+0x91/0xb9
2783 17) 128 128 system_call_fastpath+0x16/0x1b
2784
2785Note, if -mfentry is being used by gcc, functions get traced before
2786they set up the stack frame. This means that leaf level functions
2787are not tested by the stack tracer when -mfentry is used.
2788
2789Currently, -mfentry is used by gcc 4.6.0 and above on x86 only.
2790
2791---------
1929 2792
1930More details can be found in the source code, in the 2793More details can be found in the source code, in the
1931kernel/trace/*.c files. 2794kernel/trace/*.c files.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index 24ce6823a09e..d9c3e682312c 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -1,6 +1,8 @@
1 Uprobe-tracer: Uprobe-based Event Tracing 1 Uprobe-tracer: Uprobe-based Event Tracing
2 ========================================= 2 =========================================
3 Documentation written by Srikar Dronamraju 3
4 Documentation written by Srikar Dronamraju
5
4 6
5Overview 7Overview
6-------- 8--------
@@ -13,78 +15,94 @@ current_tracer. Instead of that, add probe points via
13/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. 15/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
14 16
15However unlike kprobe-event tracer, the uprobe event interface expects the 17However unlike kprobe-event tracer, the uprobe event interface expects the
16user to calculate the offset of the probepoint in the object 18user to calculate the offset of the probepoint in the object.
17 19
18Synopsis of uprobe_tracer 20Synopsis of uprobe_tracer
19------------------------- 21-------------------------
20 p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe 22 p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe
23 r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe)
24 -:[GRP/]EVENT : Clear uprobe or uretprobe event
21 25
22 GRP : Group name. If omitted, use "uprobes" for it. 26 GRP : Group name. If omitted, "uprobes" is the default value.
23 EVENT : Event name. If omitted, the event name is generated 27 EVENT : Event name. If omitted, the event name is generated based
24 based on SYMBOL+offs. 28 on SYMBOL+offs.
25 PATH : path to an executable or a library. 29 PATH : Path to an executable or a library.
26 SYMBOL[+offs] : Symbol+offset where the probe is inserted. 30 SYMBOL[+offs] : Symbol+offset where the probe is inserted.
27 31
28 FETCHARGS : Arguments. Each probe can have up to 128 args. 32 FETCHARGS : Arguments. Each probe can have up to 128 args.
29 %REG : Fetch register REG 33 %REG : Fetch register REG
30 34
31Event Profiling 35Event Profiling
32--------------- 36---------------
33 You can check the total number of probe hits and probe miss-hits via 37You can check the total number of probe hits and probe miss-hits via
34/sys/kernel/debug/tracing/uprobe_profile. 38/sys/kernel/debug/tracing/uprobe_profile.
35 The first column is event name, the second is the number of probe hits, 39The first column is event name, the second is the number of probe hits,
36the third is the number of probe miss-hits. 40the third is the number of probe miss-hits.
37 41
38Usage examples 42Usage examples
39-------------- 43--------------
40To add a probe as a new event, write a new definition to uprobe_events 44 * Add a probe as a new uprobe event, write a new definition to uprobe_events
41as below. 45as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash)
46
47 echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
48
49 * Add a probe as a new uretprobe event:
50
51 echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
52
53 * Unset registered event:
42 54
43 echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events 55 echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events
44 56
45 This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash 57 * Print out the events that are registered:
46 58
47 echo > /sys/kernel/debug/tracing/uprobe_events 59 cat /sys/kernel/debug/tracing/uprobe_events
48 60
49 This clears all probe points. 61 * Clear all events:
50 62
51The following example shows how to dump the instruction pointer and %ax 63 echo > /sys/kernel/debug/tracing/uprobe_events
52a register at the probed text address. Here we are trying to probe 64
53function zfree in /bin/zsh 65Following example shows how to dump the instruction pointer and %ax register
66at the probed text address. Probe zfree function in /bin/zsh:
54 67
55 # cd /sys/kernel/debug/tracing/ 68 # cd /sys/kernel/debug/tracing/
56 # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp 69 # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
57 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh 70 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
58 # objdump -T /bin/zsh | grep -w zfree 71 # objdump -T /bin/zsh | grep -w zfree
59 0000000000446420 g DF .text 0000000000000012 Base zfree 72 0000000000446420 g DF .text 0000000000000012 Base zfree
60 73
610x46420 is the offset of zfree in object /bin/zsh that is loaded at 74 0x46420 is the offset of zfree in object /bin/zsh that is loaded at
620x00400000. Hence the command to probe would be : 75 0x00400000. Hence the command to uprobe would be:
76
77 # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events
78
79 And the same for the uretprobe would be:
63 80
64 # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events 81 # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events
65 82
66Please note: User has to explicitly calculate the offset of the probepoint 83Please note: User has to explicitly calculate the offset of the probe-point
67in the object. We can see the events that are registered by looking at the 84in the object. We can see the events that are registered by looking at the
68uprobe_events file. 85uprobe_events file.
69 86
70 # cat uprobe_events 87 # cat uprobe_events
71 p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax 88 p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax
89 r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax
72 90
73The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format 91Format of events can be seen by viewing the file events/uprobes/zfree_entry/format
74 92
75 # cat events/uprobes/p_zsh_0x46420/format 93 # cat events/uprobes/zfree_entry/format
76 name: p_zsh_0x46420 94 name: zfree_entry
77 ID: 922 95 ID: 922
78 format: 96 format:
79 field:unsigned short common_type; offset:0; size:2; signed:0; 97 field:unsigned short common_type; offset:0; size:2; signed:0;
80 field:unsigned char common_flags; offset:2; size:1; signed:0; 98 field:unsigned char common_flags; offset:2; size:1; signed:0;
81 field:unsigned char common_preempt_count; offset:3; size:1; signed:0; 99 field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
82 field:int common_pid; offset:4; size:4; signed:1; 100 field:int common_pid; offset:4; size:4; signed:1;
83 field:int common_padding; offset:8; size:4; signed:1; 101 field:int common_padding; offset:8; size:4; signed:1;
84 102
85 field:unsigned long __probe_ip; offset:12; size:4; signed:0; 103 field:unsigned long __probe_ip; offset:12; size:4; signed:0;
86 field:u32 arg1; offset:16; size:4; signed:0; 104 field:u32 arg1; offset:16; size:4; signed:0;
87 field:u32 arg2; offset:20; size:4; signed:0; 105 field:u32 arg2; offset:20; size:4; signed:0;
88 106
89 print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 107 print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2
90 108
@@ -94,6 +112,7 @@ events, you need to enable it by:
94 # echo 1 > events/uprobes/enable 112 # echo 1 > events/uprobes/enable
95 113
96Lets disable the event after sleeping for some time. 114Lets disable the event after sleeping for some time.
115
97 # sleep 20 116 # sleep 20
98 # echo 0 > events/uprobes/enable 117 # echo 0 > events/uprobes/enable
99 118
@@ -104,10 +123,11 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace.
104 # 123 #
105 # TASK-PID CPU# TIMESTAMP FUNCTION 124 # TASK-PID CPU# TIMESTAMP FUNCTION
106 # | | | | | 125 # | | | | |
107 zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 126 zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79
108 zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 127 zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
109 zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 128 zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79
110 zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 129 zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
111 130
112Each line shows us probes were triggered for a pid 24842 with ip being 131Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420
1130x446421 and contents of ax register being 79. 132and contents of ax register being 79. And uretprobe was triggered with ip at
1330x446540 with counterpart function entry at 0x446420.
diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index 4204eb01fd38..1392b61d6ebe 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -33,6 +33,10 @@ built with CONFIG_USB_SUSPEND enabled (which depends on
33CONFIG_PM_RUNTIME). System PM support is present only if the kernel 33CONFIG_PM_RUNTIME). System PM support is present only if the kernel
34was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled. 34was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled.
35 35
36(Starting with the 3.10 kernel release, dynamic PM support for USB is
37present whenever the kernel was built with CONFIG_PM_RUNTIME enabled.
38The CONFIG_USB_SUSPEND option has been eliminated.)
39
36 40
37 What is Remote Wakeup? 41 What is Remote Wakeup?
38 ---------------------- 42 ----------------------
@@ -206,10 +210,8 @@ initialized to 5. (The idle-delay values for already existing devices
206will not be affected.) 210will not be affected.)
207 211
208Setting the initial default idle-delay to -1 will prevent any 212Setting the initial default idle-delay to -1 will prevent any
209autosuspend of any USB device. This is a simple alternative to 213autosuspend of any USB device. This has the benefit of allowing you
210disabling CONFIG_USB_SUSPEND and rebuilding the kernel, and it has the 214then to enable autosuspend for selected devices.
211added benefit of allowing you to enable autosuspend for selected
212devices.
213 215
214 216
215 Warnings 217 Warnings
diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting
index 706d7ed9d8d2..8eaa2fc4b8fa 100644
--- a/Documentation/vm/overcommit-accounting
+++ b/Documentation/vm/overcommit-accounting
@@ -8,7 +8,9 @@ The Linux kernel supports the following overcommit handling modes
8 default. 8 default.
9 9
101 - Always overcommit. Appropriate for some scientific 101 - Always overcommit. Appropriate for some scientific
11 applications. 11 applications. Classic example is code using sparse arrays
12 and just relying on the virtual memory consisting almost
13 entirely of zero pages.
12 14
132 - Don't overcommit. The total address space commit 152 - Don't overcommit. The total address space commit
14 for the system is not permitted to exceed swap + a 16 for the system is not permitted to exceed swap + a
@@ -18,6 +20,10 @@ The Linux kernel supports the following overcommit handling modes
18 pages but will receive errors on memory allocation as 20 pages but will receive errors on memory allocation as
19 appropriate. 21 appropriate.
20 22
23 Useful for applications that want to guarantee their
24 memory allocations will be available in the future
25 without having to initialize every page.
26
21The overcommit policy is set via the sysctl `vm.overcommit_memory'. 27The overcommit policy is set via the sysctl `vm.overcommit_memory'.
22 28
23The overcommit percentage is set via `vm.overcommit_ratio'. 29The overcommit percentage is set via `vm.overcommit_ratio'.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index d6498e3cd713..881582f75c9c 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -13,7 +13,9 @@ ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) 13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
14... unused hole ... 14... unused hole ...
15ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 15ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
16ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space 16ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
17ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
18ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
17 19
18The direct mapping covers all memory in the system up to the highest 20The direct mapping covers all memory in the system up to the highest
19memory address (this means in some cases it can also include PCI memory 21memory address (this means in some cases it can also include PCI memory