diff options
Diffstat (limited to 'Documentation')
83 files changed, 4156 insertions, 921 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei new file mode 100644 index 000000000000..2066f0bbd453 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-mei | |||
@@ -0,0 +1,7 @@ | |||
1 | What: /sys/bus/mei/devices/.../modalias | ||
2 | Date: March 2013 | ||
3 | KernelVersion: 3.10 | ||
4 | Contact: Samuel Ortiz <sameo@linux.intel.com> | ||
5 | linux-mei@linux.intel.com | ||
6 | Description: Stores the same MODALIAS value emitted by uevent | ||
7 | Format: mei:<mei device name> | ||
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb index c8baaf53594a..f093e59cbe5f 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb +++ b/Documentation/ABI/testing/sysfs-bus-usb | |||
@@ -32,7 +32,7 @@ Date: January 2008 | |||
32 | KernelVersion: 2.6.25 | 32 | KernelVersion: 2.6.25 |
33 | Contact: Sarah Sharp <sarah.a.sharp@intel.com> | 33 | Contact: Sarah Sharp <sarah.a.sharp@intel.com> |
34 | Description: | 34 | Description: |
35 | If CONFIG_PM and CONFIG_USB_SUSPEND are enabled, then this file | 35 | If CONFIG_PM_RUNTIME is enabled then this file |
36 | is present. When read, it returns the total time (in msec) | 36 | is present. When read, it returns the total time (in msec) |
37 | that the USB device has been connected to the machine. This | 37 | that the USB device has been connected to the machine. This |
38 | file is read-only. | 38 | file is read-only. |
@@ -45,7 +45,7 @@ Date: January 2008 | |||
45 | KernelVersion: 2.6.25 | 45 | KernelVersion: 2.6.25 |
46 | Contact: Sarah Sharp <sarah.a.sharp@intel.com> | 46 | Contact: Sarah Sharp <sarah.a.sharp@intel.com> |
47 | Description: | 47 | Description: |
48 | If CONFIG_PM and CONFIG_USB_SUSPEND are enabled, then this file | 48 | If CONFIG_PM_RUNTIME is enabled then this file |
49 | is present. When read, it returns the total time (in msec) | 49 | is present. When read, it returns the total time (in msec) |
50 | that the USB device has been active, i.e. not in a suspended | 50 | that the USB device has been active, i.e. not in a suspended |
51 | state. This file is read-only. | 51 | state. This file is read-only. |
@@ -187,7 +187,7 @@ What: /sys/bus/usb/devices/.../power/usb2_hardware_lpm | |||
187 | Date: September 2011 | 187 | Date: September 2011 |
188 | Contact: Andiry Xu <andiry.xu@amd.com> | 188 | Contact: Andiry Xu <andiry.xu@amd.com> |
189 | Description: | 189 | Description: |
190 | If CONFIG_USB_SUSPEND is set and a USB 2.0 lpm-capable device | 190 | If CONFIG_PM_RUNTIME is set and a USB 2.0 lpm-capable device |
191 | is plugged in to a xHCI host which support link PM, it will | 191 | is plugged in to a xHCI host which support link PM, it will |
192 | perform a LPM test; if the test is passed and host supports | 192 | perform a LPM test; if the test is passed and host supports |
193 | USB2 hardware LPM (xHCI 1.0 feature), USB2 hardware LPM will | 193 | USB2 hardware LPM (xHCI 1.0 feature), USB2 hardware LPM will |
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 9c978dcae07d..2447698aed41 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | |||
@@ -173,3 +173,15 @@ Description: Processor frequency boosting control | |||
173 | Boosting allows the CPU and the firmware to run at a frequency | 173 | Boosting allows the CPU and the firmware to run at a frequency |
174 | beyound it's nominal limit. | 174 | beyound it's nominal limit. |
175 | More details can be found in Documentation/cpu-freq/boost.txt | 175 | More details can be found in Documentation/cpu-freq/boost.txt |
176 | |||
177 | |||
178 | What: /sys/devices/system/cpu/cpu#/crash_notes | ||
179 | /sys/devices/system/cpu/cpu#/crash_notes_size | ||
180 | Date: April 2013 | ||
181 | Contact: kexec@lists.infradead.org | ||
182 | Description: address and size of the percpu note. | ||
183 | |||
184 | crash_notes: the physical address of the memory that holds the | ||
185 | note of cpu#. | ||
186 | |||
187 | crash_notes_size: size of the note of cpu#. | ||
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 7514dbf0a679..c36892c072da 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl | |||
@@ -227,7 +227,7 @@ X!Isound/sound_firmware.c | |||
227 | <chapter id="uart16x50"> | 227 | <chapter id="uart16x50"> |
228 | <title>16x50 UART Driver</title> | 228 | <title>16x50 UART Driver</title> |
229 | !Edrivers/tty/serial/serial_core.c | 229 | !Edrivers/tty/serial/serial_core.c |
230 | !Edrivers/tty/serial/8250/8250.c | 230 | !Edrivers/tty/serial/8250/8250_core.c |
231 | </chapter> | 231 | </chapter> |
232 | 232 | ||
233 | <chapter id="fbdev"> | 233 | <chapter id="fbdev"> |
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 31ef8fe07f82..79e789b8b8ea 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -217,9 +217,14 @@ over a rather long period of time, but improvements are always welcome! | |||
217 | whether the increased speed is worth it. | 217 | whether the increased speed is worth it. |
218 | 218 | ||
219 | 8. Although synchronize_rcu() is slower than is call_rcu(), it | 219 | 8. Although synchronize_rcu() is slower than is call_rcu(), it |
220 | usually results in simpler code. So, unless update performance | 220 | usually results in simpler code. So, unless update performance is |
221 | is critically important or the updaters cannot block, | 221 | critically important, the updaters cannot block, or the latency of |
222 | synchronize_rcu() should be used in preference to call_rcu(). | 222 | synchronize_rcu() is visible from userspace, synchronize_rcu() |
223 | should be used in preference to call_rcu(). Furthermore, | ||
224 | kfree_rcu() usually results in even simpler code than does | ||
225 | synchronize_rcu() without synchronize_rcu()'s multi-millisecond | ||
226 | latency. So please take advantage of kfree_rcu()'s "fire and | ||
227 | forget" memory-freeing capabilities where it applies. | ||
223 | 228 | ||
224 | An especially important property of the synchronize_rcu() | 229 | An especially important property of the synchronize_rcu() |
225 | primitive is that it automatically self-limits: if grace periods | 230 | primitive is that it automatically self-limits: if grace periods |
@@ -268,7 +273,8 @@ over a rather long period of time, but improvements are always welcome! | |||
268 | e. Periodically invoke synchronize_rcu(), permitting a limited | 273 | e. Periodically invoke synchronize_rcu(), permitting a limited |
269 | number of updates per grace period. | 274 | number of updates per grace period. |
270 | 275 | ||
271 | The same cautions apply to call_rcu_bh() and call_rcu_sched(). | 276 | The same cautions apply to call_rcu_bh(), call_rcu_sched(), |
277 | call_srcu(), and kfree_rcu(). | ||
272 | 278 | ||
273 | 9. All RCU list-traversal primitives, which include | 279 | 9. All RCU list-traversal primitives, which include |
274 | rcu_dereference(), list_for_each_entry_rcu(), and | 280 | rcu_dereference(), list_for_each_entry_rcu(), and |
@@ -296,9 +302,9 @@ over a rather long period of time, but improvements are always welcome! | |||
296 | all currently executing rcu_read_lock()-protected RCU read-side | 302 | all currently executing rcu_read_lock()-protected RCU read-side |
297 | critical sections complete. It does -not- necessarily guarantee | 303 | critical sections complete. It does -not- necessarily guarantee |
298 | that all currently running interrupts, NMIs, preempt_disable() | 304 | that all currently running interrupts, NMIs, preempt_disable() |
299 | code, or idle loops will complete. Therefore, if you do not have | 305 | code, or idle loops will complete. Therefore, if your |
300 | rcu_read_lock()-protected read-side critical sections, do -not- | 306 | read-side critical sections are protected by something other |
301 | use synchronize_rcu(). | 307 | than rcu_read_lock(), do -not- use synchronize_rcu(). |
302 | 308 | ||
303 | Similarly, disabling preemption is not an acceptable substitute | 309 | Similarly, disabling preemption is not an acceptable substitute |
304 | for rcu_read_lock(). Code that attempts to use preemption | 310 | for rcu_read_lock(). Code that attempts to use preemption |
@@ -401,9 +407,9 @@ over a rather long period of time, but improvements are always welcome! | |||
401 | read-side critical sections. It is the responsibility of the | 407 | read-side critical sections. It is the responsibility of the |
402 | RCU update-side primitives to deal with this. | 408 | RCU update-side primitives to deal with this. |
403 | 409 | ||
404 | 17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and | 410 | 17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the |
405 | the __rcu sparse checks to validate your RCU code. These | 411 | __rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to |
406 | can help find problems as follows: | 412 | validate your RCU code. These can help find problems as follows: |
407 | 413 | ||
408 | CONFIG_PROVE_RCU: check that accesses to RCU-protected data | 414 | CONFIG_PROVE_RCU: check that accesses to RCU-protected data |
409 | structures are carried out under the proper RCU | 415 | structures are carried out under the proper RCU |
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt index a102d4b3724b..cd83d2348fef 100644 --- a/Documentation/RCU/lockdep.txt +++ b/Documentation/RCU/lockdep.txt | |||
@@ -64,6 +64,11 @@ checking of rcu_dereference() primitives: | |||
64 | but retain the compiler constraints that prevent duplicating | 64 | but retain the compiler constraints that prevent duplicating |
65 | or coalescsing. This is useful when when testing the | 65 | or coalescsing. This is useful when when testing the |
66 | value of the pointer itself, for example, against NULL. | 66 | value of the pointer itself, for example, against NULL. |
67 | rcu_access_index(idx): | ||
68 | Return the value of the index and omit all barriers, but | ||
69 | retain the compiler constraints that prevent duplicating | ||
70 | or coalescsing. This is useful when when testing the | ||
71 | value of the index itself, for example, against -1. | ||
67 | 72 | ||
68 | The rcu_dereference_check() check expression can be any boolean | 73 | The rcu_dereference_check() check expression can be any boolean |
69 | expression, but would normally include a lockdep expression. However, | 74 | expression, but would normally include a lockdep expression. However, |
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index 38428c125135..2e319d1b9ef2 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt | |||
@@ -79,7 +79,20 @@ complete. Pseudo-code using rcu_barrier() is as follows: | |||
79 | 2. Execute rcu_barrier(). | 79 | 2. Execute rcu_barrier(). |
80 | 3. Allow the module to be unloaded. | 80 | 3. Allow the module to be unloaded. |
81 | 81 | ||
82 | The rcutorture module makes use of rcu_barrier in its exit function | 82 | There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier() |
83 | functions for the other flavors of RCU, and you of course must match | ||
84 | the flavor of rcu_barrier() with that of call_rcu(). If your module | ||
85 | uses multiple flavors of call_rcu(), then it must also use multiple | ||
86 | flavors of rcu_barrier() when unloading that module. For example, if | ||
87 | it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on | ||
88 | srcu_struct_2(), then the following three lines of code will be required | ||
89 | when unloading: | ||
90 | |||
91 | 1 rcu_barrier_bh(); | ||
92 | 2 srcu_barrier(&srcu_struct_1); | ||
93 | 3 srcu_barrier(&srcu_struct_2); | ||
94 | |||
95 | The rcutorture module makes use of rcu_barrier() in its exit function | ||
83 | as follows: | 96 | as follows: |
84 | 97 | ||
85 | 1 static void | 98 | 1 static void |
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1927151b386b..e38b8df3d727 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt | |||
@@ -92,14 +92,14 @@ If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set, | |||
92 | more information is printed with the stall-warning message, for example: | 92 | more information is printed with the stall-warning message, for example: |
93 | 93 | ||
94 | INFO: rcu_preempt detected stall on CPU | 94 | INFO: rcu_preempt detected stall on CPU |
95 | 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 | 95 | 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543 |
96 | (t=65000 jiffies) | 96 | (t=65000 jiffies) |
97 | 97 | ||
98 | In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is | 98 | In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is |
99 | printed: | 99 | printed: |
100 | 100 | ||
101 | INFO: rcu_preempt detected stall on CPU | 101 | INFO: rcu_preempt detected stall on CPU |
102 | 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending | 102 | 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D |
103 | (t=65000 jiffies) | 103 | (t=65000 jiffies) |
104 | 104 | ||
105 | The "(64628 ticks this GP)" indicates that this CPU has taken more | 105 | The "(64628 ticks this GP)" indicates that this CPU has taken more |
@@ -116,13 +116,28 @@ number between the two "/"s is the value of the nesting, which will | |||
116 | be a small positive number if in the idle loop and a very large positive | 116 | be a small positive number if in the idle loop and a very large positive |
117 | number (as shown above) otherwise. | 117 | number (as shown above) otherwise. |
118 | 118 | ||
119 | For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is | 119 | The "softirq=" portion of the message tracks the number of RCU softirq |
120 | not in the process of trying to force itself into dyntick-idle state, the | 120 | handlers that the stalled CPU has executed. The number before the "/" |
121 | "." indicates that the CPU has not given up forcing RCU into dyntick-idle | 121 | is the number that had executed since boot at the time that this CPU |
122 | mode (it would be "H" otherwise), and the "timer not pending" indicates | 122 | last noted the beginning of a grace period, which might be the current |
123 | that the CPU has not recently forced RCU into dyntick-idle mode (it | 123 | (stalled) grace period, or it might be some earlier grace period (for |
124 | would otherwise indicate the number of microseconds remaining in this | 124 | example, if the CPU might have been in dyntick-idle mode for an extended |
125 | forced state). | 125 | time period. The number after the "/" is the number that have executed |
126 | since boot until the current time. If this latter number stays constant | ||
127 | across repeated stall-warning messages, it is possible that RCU's softirq | ||
128 | handlers are no longer able to execute on this CPU. This can happen if | ||
129 | the stalled CPU is spinning with interrupts are disabled, or, in -rt | ||
130 | kernels, if a high-priority process is starving RCU's softirq handler. | ||
131 | |||
132 | For CONFIG_RCU_FAST_NO_HZ kernels, the "last_accelerate:" prints the | ||
133 | low-order 16 bits (in hex) of the jiffies counter when this CPU last | ||
134 | invoked rcu_try_advance_all_cbs() from rcu_needs_cpu() or last invoked | ||
135 | rcu_accelerate_cbs() from rcu_prepare_for_idle(). The "nonlazy_posted:" | ||
136 | prints the number of non-lazy callbacks posted since the last call to | ||
137 | rcu_needs_cpu(). Finally, an "L" indicates that there are currently | ||
138 | no non-lazy callbacks ("." is printed otherwise, as shown above) and | ||
139 | "D" indicates that dyntick-idle processing is enabled ("." is printed | ||
140 | otherwise, for example, if disabled via the "nohz=" kernel boot parameter). | ||
126 | 141 | ||
127 | 142 | ||
128 | Multiple Warnings From One Stall | 143 | Multiple Warnings From One Stall |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 0cc7820967f4..10df0b82f459 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -265,9 +265,9 @@ rcu_dereference() | |||
265 | rcu_read_lock(); | 265 | rcu_read_lock(); |
266 | p = rcu_dereference(head.next); | 266 | p = rcu_dereference(head.next); |
267 | rcu_read_unlock(); | 267 | rcu_read_unlock(); |
268 | x = p->address; | 268 | x = p->address; /* BUG!!! */ |
269 | rcu_read_lock(); | 269 | rcu_read_lock(); |
270 | y = p->data; | 270 | y = p->data; /* BUG!!! */ |
271 | rcu_read_unlock(); | 271 | rcu_read_unlock(); |
272 | 272 | ||
273 | Holding a reference from one RCU read-side critical section | 273 | Holding a reference from one RCU read-side critical section |
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index aa0c1e63f050..6e97e73d87b5 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches | |||
@@ -420,7 +420,7 @@ person it names. This tag documents that potentially interested parties | |||
420 | have been included in the discussion | 420 | have been included in the discussion |
421 | 421 | ||
422 | 422 | ||
423 | 14) Using Reported-by:, Tested-by: and Reviewed-by: | 423 | 14) Using Reported-by:, Tested-by:, Reviewed-by: and Suggested-by: |
424 | 424 | ||
425 | If this patch fixes a problem reported by somebody else, consider adding a | 425 | If this patch fixes a problem reported by somebody else, consider adding a |
426 | Reported-by: tag to credit the reporter for their contribution. Please | 426 | Reported-by: tag to credit the reporter for their contribution. Please |
@@ -468,6 +468,13 @@ done on the patch. Reviewed-by: tags, when supplied by reviewers known to | |||
468 | understand the subject area and to perform thorough reviews, will normally | 468 | understand the subject area and to perform thorough reviews, will normally |
469 | increase the likelihood of your patch getting into the kernel. | 469 | increase the likelihood of your patch getting into the kernel. |
470 | 470 | ||
471 | A Suggested-by: tag indicates that the patch idea is suggested by the person | ||
472 | named and ensures credit to the person for the idea. Please note that this | ||
473 | tag should not be added without the reporter's permission, especially if the | ||
474 | idea was not posted in a public forum. That said, if we diligently credit our | ||
475 | idea reporters, they will, hopefully, be inspired to help us again in the | ||
476 | future. | ||
477 | |||
471 | 478 | ||
472 | 15) The canonical patch format | 479 | 15) The canonical patch format |
473 | 480 | ||
diff --git a/Documentation/arm/sunxi/clocks.txt b/Documentation/arm/sunxi/clocks.txt new file mode 100644 index 000000000000..e09a88aa3136 --- /dev/null +++ b/Documentation/arm/sunxi/clocks.txt | |||
@@ -0,0 +1,56 @@ | |||
1 | Frequently asked questions about the sunxi clock system | ||
2 | ======================================================= | ||
3 | |||
4 | This document contains useful bits of information that people tend to ask | ||
5 | about the sunxi clock system, as well as accompanying ASCII art when adequate. | ||
6 | |||
7 | Q: Why is the main 24MHz oscillator gatable? Wouldn't that break the | ||
8 | system? | ||
9 | |||
10 | A: The 24MHz oscillator allows gating to save power. Indeed, if gated | ||
11 | carelessly the system would stop functioning, but with the right | ||
12 | steps, one can gate it and keep the system running. Consider this | ||
13 | simplified suspend example: | ||
14 | |||
15 | While the system is operational, you would see something like | ||
16 | |||
17 | 24MHz 32kHz | ||
18 | | | ||
19 | PLL1 | ||
20 | \ | ||
21 | \_ CPU Mux | ||
22 | | | ||
23 | [CPU] | ||
24 | |||
25 | When you are about to suspend, you switch the CPU Mux to the 32kHz | ||
26 | oscillator: | ||
27 | |||
28 | 24Mhz 32kHz | ||
29 | | | | ||
30 | PLL1 | | ||
31 | / | ||
32 | CPU Mux _/ | ||
33 | | | ||
34 | [CPU] | ||
35 | |||
36 | Finally you can gate the main oscillator | ||
37 | |||
38 | 32kHz | ||
39 | | | ||
40 | | | ||
41 | / | ||
42 | CPU Mux _/ | ||
43 | | | ||
44 | [CPU] | ||
45 | |||
46 | Q: Were can I learn more about the sunxi clocks? | ||
47 | |||
48 | A: The linux-sunxi wiki contains a page documenting the clock registers, | ||
49 | you can find it at | ||
50 | |||
51 | http://linux-sunxi.org/A10/CCM | ||
52 | |||
53 | The authoritative source for information at this time is the ccmu driver | ||
54 | released by Allwinner, you can find it at | ||
55 | |||
56 | https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu | ||
diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt index 18b06ca038ea..1c732f0c6758 100644 --- a/Documentation/backlight/lp855x-driver.txt +++ b/Documentation/backlight/lp855x-driver.txt | |||
@@ -32,14 +32,10 @@ Platform data for lp855x | |||
32 | For supporting platform specific data, the lp855x platform data can be used. | 32 | For supporting platform specific data, the lp855x platform data can be used. |
33 | 33 | ||
34 | * name : Backlight driver name. If it is not defined, default name is set. | 34 | * name : Backlight driver name. If it is not defined, default name is set. |
35 | * mode : Brightness control mode. PWM or register based. | ||
36 | * device_control : Value of DEVICE CONTROL register. | 35 | * device_control : Value of DEVICE CONTROL register. |
37 | * initial_brightness : Initial value of backlight brightness. | 36 | * initial_brightness : Initial value of backlight brightness. |
38 | * period_ns : Platform specific PWM period value. unit is nano. | 37 | * period_ns : Platform specific PWM period value. unit is nano. |
39 | Only valid when brightness is pwm input mode. | 38 | Only valid when brightness is pwm input mode. |
40 | * load_new_rom_data : | ||
41 | 0 : use default configuration data | ||
42 | 1 : update values of eeprom or eprom registers on loading driver | ||
43 | * size_program : Total size of lp855x_rom_data. | 39 | * size_program : Total size of lp855x_rom_data. |
44 | * rom_data : List of new eeprom/eprom registers. | 40 | * rom_data : List of new eeprom/eprom registers. |
45 | 41 | ||
@@ -54,10 +50,8 @@ static struct lp855x_rom_data lp8552_eeprom_arr[] = { | |||
54 | 50 | ||
55 | static struct lp855x_platform_data lp8552_pdata = { | 51 | static struct lp855x_platform_data lp8552_pdata = { |
56 | .name = "lcd-bl", | 52 | .name = "lcd-bl", |
57 | .mode = REGISTER_BASED, | ||
58 | .device_control = I2C_CONFIG(LP8552), | 53 | .device_control = I2C_CONFIG(LP8552), |
59 | .initial_brightness = INITIAL_BRT, | 54 | .initial_brightness = INITIAL_BRT, |
60 | .load_new_rom_data = 1, | ||
61 | .size_program = ARRAY_SIZE(lp8552_eeprom_arr), | 55 | .size_program = ARRAY_SIZE(lp8552_eeprom_arr), |
62 | .rom_data = lp8552_eeprom_arr, | 56 | .rom_data = lp8552_eeprom_arr, |
63 | }; | 57 | }; |
@@ -65,7 +59,6 @@ static struct lp855x_platform_data lp8552_pdata = { | |||
65 | example 2) lp8556 platform data : pwm input mode with default rom data | 59 | example 2) lp8556 platform data : pwm input mode with default rom data |
66 | 60 | ||
67 | static struct lp855x_platform_data lp8556_pdata = { | 61 | static struct lp855x_platform_data lp8556_pdata = { |
68 | .mode = PWM_BASED, | ||
69 | .device_control = PWM_CONFIG(LP8556), | 62 | .device_control = PWM_CONFIG(LP8556), |
70 | .initial_brightness = INITIAL_BRT, | 63 | .initial_brightness = INITIAL_BRT, |
71 | .period_ns = 1000000, | 64 | .period_ns = 1000000, |
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index bcf1a00b06a1..638bf17ff869 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -442,7 +442,7 @@ You can attach the current shell task by echoing 0: | |||
442 | You can use the cgroup.procs file instead of the tasks file to move all | 442 | You can use the cgroup.procs file instead of the tasks file to move all |
443 | threads in a threadgroup at once. Echoing the PID of any task in a | 443 | threads in a threadgroup at once. Echoing the PID of any task in a |
444 | threadgroup to cgroup.procs causes all tasks in that threadgroup to be | 444 | threadgroup to cgroup.procs causes all tasks in that threadgroup to be |
445 | be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks | 445 | attached to the cgroup. Writing 0 to cgroup.procs moves all tasks |
446 | in the writing task's threadgroup. | 446 | in the writing task's threadgroup. |
447 | 447 | ||
448 | Note: Since every task is always a member of exactly one cgroup in each | 448 | Note: Since every task is always a member of exactly one cgroup in each |
@@ -580,6 +580,7 @@ propagation along the hierarchy. See the comment on | |||
580 | cgroup_for_each_descendant_pre() for details. | 580 | cgroup_for_each_descendant_pre() for details. |
581 | 581 | ||
582 | void css_offline(struct cgroup *cgrp); | 582 | void css_offline(struct cgroup *cgrp); |
583 | (cgroup_mutex held by caller) | ||
583 | 584 | ||
584 | This is the counterpart of css_online() and called iff css_online() | 585 | This is the counterpart of css_online() and called iff css_online() |
585 | has succeeded on @cgrp. This signifies the beginning of the end of | 586 | has succeeded on @cgrp. This signifies the beginning of the end of |
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 16624a7f8222..3c1095ca02ea 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt | |||
@@ -13,9 +13,7 @@ either an integer or * for all. Access is a composition of r | |||
13 | The root device cgroup starts with rwm to 'all'. A child device | 13 | The root device cgroup starts with rwm to 'all'. A child device |
14 | cgroup gets a copy of the parent. Administrators can then remove | 14 | cgroup gets a copy of the parent. Administrators can then remove |
15 | devices from the whitelist or add new entries. A child cgroup can | 15 | devices from the whitelist or add new entries. A child cgroup can |
16 | never receive a device access which is denied by its parent. However | 16 | never receive a device access which is denied by its parent. |
17 | when a device access is removed from a parent it will not also be | ||
18 | removed from the child(ren). | ||
19 | 17 | ||
20 | 2. User Interface | 18 | 2. User Interface |
21 | 19 | ||
@@ -50,3 +48,69 @@ task to a new cgroup. (Again we'll probably want to change that). | |||
50 | 48 | ||
51 | A cgroup may not be granted more permissions than the cgroup's | 49 | A cgroup may not be granted more permissions than the cgroup's |
52 | parent has. | 50 | parent has. |
51 | |||
52 | 4. Hierarchy | ||
53 | |||
54 | device cgroups maintain hierarchy by making sure a cgroup never has more | ||
55 | access permissions than its parent. Every time an entry is written to | ||
56 | a cgroup's devices.deny file, all its children will have that entry removed | ||
57 | from their whitelist and all the locally set whitelist entries will be | ||
58 | re-evaluated. In case one of the locally set whitelist entries would provide | ||
59 | more access than the cgroup's parent, it'll be removed from the whitelist. | ||
60 | |||
61 | Example: | ||
62 | A | ||
63 | / \ | ||
64 | B | ||
65 | |||
66 | group behavior exceptions | ||
67 | A allow "b 8:* rwm", "c 116:1 rw" | ||
68 | B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" | ||
69 | |||
70 | If a device is denied in group A: | ||
71 | # echo "c 116:* r" > A/devices.deny | ||
72 | it'll propagate down and after revalidating B's entries, the whitelist entry | ||
73 | "c 116:2 rwm" will be removed: | ||
74 | |||
75 | group whitelist entries denied devices | ||
76 | A all "b 8:* rwm", "c 116:* rw" | ||
77 | B "c 1:3 rwm", "b 3:* rwm" all the rest | ||
78 | |||
79 | In case parent's exceptions change and local exceptions are not allowed | ||
80 | anymore, they'll be deleted. | ||
81 | |||
82 | Notice that new whitelist entries will not be propagated: | ||
83 | A | ||
84 | / \ | ||
85 | B | ||
86 | |||
87 | group whitelist entries denied devices | ||
88 | A "c 1:3 rwm", "c 1:5 r" all the rest | ||
89 | B "c 1:3 rwm", "c 1:5 r" all the rest | ||
90 | |||
91 | when adding "c *:3 rwm": | ||
92 | # echo "c *:3 rwm" >A/devices.allow | ||
93 | |||
94 | the result: | ||
95 | group whitelist entries denied devices | ||
96 | A "c *:3 rwm", "c 1:5 r" all the rest | ||
97 | B "c 1:3 rwm", "c 1:5 r" all the rest | ||
98 | |||
99 | but now it'll be possible to add new entries to B: | ||
100 | # echo "c 2:3 rwm" >B/devices.allow | ||
101 | # echo "c 50:3 r" >B/devices.allow | ||
102 | or even | ||
103 | # echo "c *:3 rwm" >B/devices.allow | ||
104 | |||
105 | Allowing or denying all by writing 'a' to devices.allow or devices.deny will | ||
106 | not be possible once the device cgroups has children. | ||
107 | |||
108 | 4.1 Hierarchy (internal implementation) | ||
109 | |||
110 | device cgroups is implemented internally using a behavior (ALLOW, DENY) and a | ||
111 | list of exceptions. The internal state is controlled using the same user | ||
112 | interface to preserve compatibility with the previous whitelist-only | ||
113 | implementation. Removal or addition of exceptions that will reduce the access | ||
114 | to devices will be propagated down the hierarchy. | ||
115 | For every propagated exception, the effective rules will be re-evaluated based | ||
116 | on current parent's access rules. | ||
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 8b8c28b9864c..f336ede58e62 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -40,6 +40,7 @@ Features: | |||
40 | - soft limit | 40 | - soft limit |
41 | - moving (recharging) account at moving a task is selectable. | 41 | - moving (recharging) account at moving a task is selectable. |
42 | - usage threshold notifier | 42 | - usage threshold notifier |
43 | - memory pressure notifier | ||
43 | - oom-killer disable knob and oom-notifier | 44 | - oom-killer disable knob and oom-notifier |
44 | - Root cgroup has no limit controls. | 45 | - Root cgroup has no limit controls. |
45 | 46 | ||
@@ -65,6 +66,7 @@ Brief summary of control files. | |||
65 | memory.stat # show various statistics | 66 | memory.stat # show various statistics |
66 | memory.use_hierarchy # set/show hierarchical account enabled | 67 | memory.use_hierarchy # set/show hierarchical account enabled |
67 | memory.force_empty # trigger forced move charge to parent | 68 | memory.force_empty # trigger forced move charge to parent |
69 | memory.pressure_level # set memory pressure notifications | ||
68 | memory.swappiness # set/show swappiness parameter of vmscan | 70 | memory.swappiness # set/show swappiness parameter of vmscan |
69 | (See sysctl's vm.swappiness) | 71 | (See sysctl's vm.swappiness) |
70 | memory.move_charge_at_immigrate # set/show controls of moving charges | 72 | memory.move_charge_at_immigrate # set/show controls of moving charges |
@@ -762,7 +764,73 @@ At reading, current status of OOM is shown. | |||
762 | under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may | 764 | under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may |
763 | be stopped.) | 765 | be stopped.) |
764 | 766 | ||
765 | 11. TODO | 767 | 11. Memory Pressure |
768 | |||
769 | The pressure level notifications can be used to monitor the memory | ||
770 | allocation cost; based on the pressure, applications can implement | ||
771 | different strategies of managing their memory resources. The pressure | ||
772 | levels are defined as following: | ||
773 | |||
774 | The "low" level means that the system is reclaiming memory for new | ||
775 | allocations. Monitoring this reclaiming activity might be useful for | ||
776 | maintaining cache level. Upon notification, the program (typically | ||
777 | "Activity Manager") might analyze vmstat and act in advance (i.e. | ||
778 | prematurely shutdown unimportant services). | ||
779 | |||
780 | The "medium" level means that the system is experiencing medium memory | ||
781 | pressure, the system might be making swap, paging out active file caches, | ||
782 | etc. Upon this event applications may decide to further analyze | ||
783 | vmstat/zoneinfo/memcg or internal memory usage statistics and free any | ||
784 | resources that can be easily reconstructed or re-read from a disk. | ||
785 | |||
786 | The "critical" level means that the system is actively thrashing, it is | ||
787 | about to out of memory (OOM) or even the in-kernel OOM killer is on its | ||
788 | way to trigger. Applications should do whatever they can to help the | ||
789 | system. It might be too late to consult with vmstat or any other | ||
790 | statistics, so it's advisable to take an immediate action. | ||
791 | |||
792 | The events are propagated upward until the event is handled, i.e. the | ||
793 | events are not pass-through. Here is what this means: for example you have | ||
794 | three cgroups: A->B->C. Now you set up an event listener on cgroups A, B | ||
795 | and C, and suppose group C experiences some pressure. In this situation, | ||
796 | only group C will receive the notification, i.e. groups A and B will not | ||
797 | receive it. This is done to avoid excessive "broadcasting" of messages, | ||
798 | which disturbs the system and which is especially bad if we are low on | ||
799 | memory or thrashing. So, organize the cgroups wisely, or propagate the | ||
800 | events manually (or, ask us to implement the pass-through events, | ||
801 | explaining why would you need them.) | ||
802 | |||
803 | The file memory.pressure_level is only used to setup an eventfd. To | ||
804 | register a notification, an application must: | ||
805 | |||
806 | - create an eventfd using eventfd(2); | ||
807 | - open memory.pressure_level; | ||
808 | - write string like "<event_fd> <fd of memory.pressure_level> <level>" | ||
809 | to cgroup.event_control. | ||
810 | |||
811 | Application will be notified through eventfd when memory pressure is at | ||
812 | the specific level (or higher). Read/write operations to | ||
813 | memory.pressure_level are no implemented. | ||
814 | |||
815 | Test: | ||
816 | |||
817 | Here is a small script example that makes a new cgroup, sets up a | ||
818 | memory limit, sets up a notification in the cgroup and then makes child | ||
819 | cgroup experience a critical pressure: | ||
820 | |||
821 | # cd /sys/fs/cgroup/memory/ | ||
822 | # mkdir foo | ||
823 | # cd foo | ||
824 | # cgroup_event_listener memory.pressure_level low & | ||
825 | # echo 8000000 > memory.limit_in_bytes | ||
826 | # echo 8000000 > memory.memsw.limit_in_bytes | ||
827 | # echo $$ > tasks | ||
828 | # dd if=/dev/zero | read x | ||
829 | |||
830 | (Expect a bunch of notifications, and eventually, the oom-killer will | ||
831 | trigger.) | ||
832 | |||
833 | 12. TODO | ||
766 | 834 | ||
767 | 1. Add support for accounting huge pages (as a separate controller) | 835 | 1. Add support for accounting huge pages (as a separate controller) |
768 | 2. Make per-cgroup scanner reclaim not-shared pages first | 836 | 2. Make per-cgroup scanner reclaim not-shared pages first |
diff --git a/Documentation/clk.txt b/Documentation/clk.txt index 1943fae014fd..b9911c27f496 100644 --- a/Documentation/clk.txt +++ b/Documentation/clk.txt | |||
@@ -174,9 +174,9 @@ int clk_foo_enable(struct clk_hw *hw) | |||
174 | }; | 174 | }; |
175 | 175 | ||
176 | Below is a matrix detailing which clk_ops are mandatory based upon the | 176 | Below is a matrix detailing which clk_ops are mandatory based upon the |
177 | hardware capbilities of that clock. A cell marked as "y" means | 177 | hardware capabilities of that clock. A cell marked as "y" means |
178 | mandatory, a cell marked as "n" implies that either including that | 178 | mandatory, a cell marked as "n" implies that either including that |
179 | callback is invalid or otherwise uneccesary. Empty cells are either | 179 | callback is invalid or otherwise unnecessary. Empty cells are either |
180 | optional or must be evaluated on a case-by-case basis. | 180 | optional or must be evaluated on a case-by-case basis. |
181 | 181 | ||
182 | clock hardware characteristics | 182 | clock hardware characteristics |
@@ -231,3 +231,14 @@ To better enforce this policy, always follow this simple rule: any | |||
231 | statically initialized clock data MUST be defined in a separate file | 231 | statically initialized clock data MUST be defined in a separate file |
232 | from the logic that implements its ops. Basically separate the logic | 232 | from the logic that implements its ops. Basically separate the logic |
233 | from the data and all is well. | 233 | from the data and all is well. |
234 | |||
235 | Part 6 - Disabling clock gating of unused clocks | ||
236 | |||
237 | Sometimes during development it can be useful to be able to bypass the | ||
238 | default disabling of unused clocks. For example, if drivers aren't enabling | ||
239 | clocks properly but rely on them being on from the bootloader, bypassing | ||
240 | the disabling means that the driver will remain functional while the issues | ||
241 | are sorted out. | ||
242 | |||
243 | To bypass this disabling, include "clk_ignore_unused" in the bootargs to the | ||
244 | kernel. | ||
diff --git a/Documentation/devicetree/bindings/arm/atmel-adc.txt b/Documentation/devicetree/bindings/arm/atmel-adc.txt index c63097d6afeb..16769d9cedd6 100644 --- a/Documentation/devicetree/bindings/arm/atmel-adc.txt +++ b/Documentation/devicetree/bindings/arm/atmel-adc.txt | |||
@@ -14,9 +14,19 @@ Required properties: | |||
14 | - atmel,adc-status-register: Offset of the Interrupt Status Register | 14 | - atmel,adc-status-register: Offset of the Interrupt Status Register |
15 | - atmel,adc-trigger-register: Offset of the Trigger Register | 15 | - atmel,adc-trigger-register: Offset of the Trigger Register |
16 | - atmel,adc-vref: Reference voltage in millivolts for the conversions | 16 | - atmel,adc-vref: Reference voltage in millivolts for the conversions |
17 | - atmel,adc-res: List of resolution in bits supported by the ADC. List size | ||
18 | must be two at least. | ||
19 | - atmel,adc-res-names: Contains one identifier string for each resolution | ||
20 | in atmel,adc-res property. "lowres" and "highres" | ||
21 | identifiers are required. | ||
17 | 22 | ||
18 | Optional properties: | 23 | Optional properties: |
19 | - atmel,adc-use-external: Boolean to enable of external triggers | 24 | - atmel,adc-use-external: Boolean to enable of external triggers |
25 | - atmel,adc-use-res: String corresponding to an identifier from | ||
26 | atmel,adc-res-names property. If not specified, the highest | ||
27 | resolution will be used. | ||
28 | - atmel,adc-sleep-mode: Boolean to enable sleep mode when no conversion | ||
29 | - atmel,adc-sample-hold-time: Sample and Hold Time in microseconds | ||
20 | 30 | ||
21 | Optional trigger Nodes: | 31 | Optional trigger Nodes: |
22 | - Required properties: | 32 | - Required properties: |
@@ -40,6 +50,9 @@ adc0: adc@fffb0000 { | |||
40 | atmel,adc-trigger-register = <0x08>; | 50 | atmel,adc-trigger-register = <0x08>; |
41 | atmel,adc-use-external; | 51 | atmel,adc-use-external; |
42 | atmel,adc-vref = <3300>; | 52 | atmel,adc-vref = <3300>; |
53 | atmel,adc-res = <8 10>; | ||
54 | atmel,adc-res-names = "lowres", "highres"; | ||
55 | atmel,adc-use-res = "lowres"; | ||
43 | 56 | ||
44 | trigger@0 { | 57 | trigger@0 { |
45 | trigger-name = "external-rising"; | 58 | trigger-name = "external-rising"; |
diff --git a/Documentation/devicetree/bindings/arm/msm/ssbi.txt b/Documentation/devicetree/bindings/arm/msm/ssbi.txt new file mode 100644 index 000000000000..54fd5ced3401 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/msm/ssbi.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | * Qualcomm SSBI | ||
2 | |||
3 | Some Qualcomm MSM devices contain a point-to-point serial bus used to | ||
4 | communicate with a limited range of devices (mostly power management | ||
5 | chips). | ||
6 | |||
7 | These require the following properties: | ||
8 | |||
9 | - compatible: "qcom,ssbi" | ||
10 | |||
11 | - qcom,controller-type | ||
12 | indicates the SSBI bus variant the controller should use to talk | ||
13 | with the slave device. This should be one of "ssbi", "ssbi2", or | ||
14 | "pmic-arbiter". The type chosen is determined by the attached | ||
15 | slave. | ||
16 | |||
17 | The slave device should be the single child node of the ssbi device | ||
18 | with a compatible field. | ||
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt b/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt new file mode 100644 index 000000000000..47ada1dff216 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt | |||
@@ -0,0 +1,60 @@ | |||
1 | Samsung Exynos Analog to Digital Converter bindings | ||
2 | |||
3 | The devicetree bindings are for the new ADC driver written for | ||
4 | Exynos4 and upward SoCs from Samsung. | ||
5 | |||
6 | New driver handles the following | ||
7 | 1. Supports ADC IF found on EXYNOS4412/EXYNOS5250 | ||
8 | and future SoCs from Samsung | ||
9 | 2. Add ADC driver under iio/adc framework | ||
10 | 3. Also adds the Documentation for device tree bindings | ||
11 | |||
12 | Required properties: | ||
13 | - compatible: Must be "samsung,exynos-adc-v1" | ||
14 | for exynos4412/5250 controllers. | ||
15 | Must be "samsung,exynos-adc-v2" for | ||
16 | future controllers. | ||
17 | - reg: Contains ADC register address range (base address and | ||
18 | length) and the address of the phy enable register. | ||
19 | - interrupts: Contains the interrupt information for the timer. The | ||
20 | format is being dependent on which interrupt controller | ||
21 | the Samsung device uses. | ||
22 | - #io-channel-cells = <1>; As ADC has multiple outputs | ||
23 | - clocks From common clock binding: handle to adc clock. | ||
24 | - clock-names From common clock binding: Shall be "adc". | ||
25 | - vdd-supply VDD input supply. | ||
26 | |||
27 | Note: child nodes can be added for auto probing from device tree. | ||
28 | |||
29 | Example: adding device info in dtsi file | ||
30 | |||
31 | adc: adc@12D10000 { | ||
32 | compatible = "samsung,exynos-adc-v1"; | ||
33 | reg = <0x12D10000 0x100>, <0x10040718 0x4>; | ||
34 | interrupts = <0 106 0>; | ||
35 | #io-channel-cells = <1>; | ||
36 | io-channel-ranges; | ||
37 | |||
38 | clocks = <&clock 303>; | ||
39 | clock-names = "adc"; | ||
40 | |||
41 | vdd-supply = <&buck5_reg>; | ||
42 | }; | ||
43 | |||
44 | |||
45 | Example: Adding child nodes in dts file | ||
46 | |||
47 | adc@12D10000 { | ||
48 | |||
49 | /* NTC thermistor is a hwmon device */ | ||
50 | ncp15wb473@0 { | ||
51 | compatible = "ntc,ncp15wb473"; | ||
52 | pullup-uV = <1800000>; | ||
53 | pullup-ohm = <47000>; | ||
54 | pulldown-ohm = <0>; | ||
55 | io-channels = <&adc 4>; | ||
56 | }; | ||
57 | }; | ||
58 | |||
59 | Note: Does not apply to ADC driver under arch/arm/plat-samsung/ | ||
60 | Note: The child node can be added under the adc node or separately. | ||
diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt new file mode 100644 index 000000000000..028b493e97ff --- /dev/null +++ b/Documentation/devicetree/bindings/clock/axi-clkgen.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | Binding for the axi-clkgen clock generator | ||
2 | |||
3 | This binding uses the common clock binding[1]. | ||
4 | |||
5 | [1] Documentation/devicetree/bindings/clock/clock-bindings.txt | ||
6 | |||
7 | Required properties: | ||
8 | - compatible : shall be "adi,axi-clkgen". | ||
9 | - #clock-cells : from common clock binding; Should always be set to 0. | ||
10 | - reg : Address and length of the axi-clkgen register set. | ||
11 | - clocks : Phandle and clock specifier for the parent clock. | ||
12 | |||
13 | Optional properties: | ||
14 | - clock-output-names : From common clock binding. | ||
15 | |||
16 | Example: | ||
17 | clock@0xff000000 { | ||
18 | compatible = "adi,axi-clkgen"; | ||
19 | #clock-cells = <0>; | ||
20 | reg = <0xff000000 0x1000>; | ||
21 | clocks = <&osc 1>; | ||
22 | }; | ||
diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt new file mode 100644 index 000000000000..5757f9abfc26 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | Binding for simple fixed factor rate clock sources. | ||
2 | |||
3 | This binding uses the common clock binding[1]. | ||
4 | |||
5 | [1] Documentation/devicetree/bindings/clock/clock-bindings.txt | ||
6 | |||
7 | Required properties: | ||
8 | - compatible : shall be "fixed-factor-clock". | ||
9 | - #clock-cells : from common clock binding; shall be set to 0. | ||
10 | - clock-div: fixed divider. | ||
11 | - clock-mult: fixed multiplier. | ||
12 | - clocks: parent clock. | ||
13 | |||
14 | Optional properties: | ||
15 | - clock-output-names : From common clock binding. | ||
16 | |||
17 | Example: | ||
18 | clock { | ||
19 | compatible = "fixed-factor-clock"; | ||
20 | clocks = <&parentclk>; | ||
21 | #clock-cells = <0>; | ||
22 | div = <2>; | ||
23 | mult = <1>; | ||
24 | }; | ||
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.txt b/Documentation/devicetree/bindings/clock/silabs,si5351.txt new file mode 100644 index 000000000000..cc374651662c --- /dev/null +++ b/Documentation/devicetree/bindings/clock/silabs,si5351.txt | |||
@@ -0,0 +1,114 @@ | |||
1 | Binding for Silicon Labs Si5351a/b/c programmable i2c clock generator. | ||
2 | |||
3 | Reference | ||
4 | [1] Si5351A/B/C Data Sheet | ||
5 | http://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf | ||
6 | |||
7 | The Si5351a/b/c are programmable i2c clock generators with upto 8 output | ||
8 | clocks. Si5351a also has a reduced pin-count package (MSOP10) where only | ||
9 | 3 output clocks are accessible. The internal structure of the clock | ||
10 | generators can be found in [1]. | ||
11 | |||
12 | ==I2C device node== | ||
13 | |||
14 | Required properties: | ||
15 | - compatible: shall be one of "silabs,si5351{a,a-msop,b,c}". | ||
16 | - reg: i2c device address, shall be 0x60 or 0x61. | ||
17 | - #clock-cells: from common clock binding; shall be set to 1. | ||
18 | - clocks: from common clock binding; list of parent clock | ||
19 | handles, shall be xtal reference clock or xtal and clkin for | ||
20 | si5351c only. | ||
21 | - #address-cells: shall be set to 1. | ||
22 | - #size-cells: shall be set to 0. | ||
23 | |||
24 | Optional properties: | ||
25 | - silabs,pll-source: pair of (number, source) for each pll. Allows | ||
26 | to overwrite clock source of pll A (number=0) or B (number=1). | ||
27 | |||
28 | ==Child nodes== | ||
29 | |||
30 | Each of the clock outputs can be overwritten individually by | ||
31 | using a child node to the I2C device node. If a child node for a clock | ||
32 | output is not set, the eeprom configuration is not overwritten. | ||
33 | |||
34 | Required child node properties: | ||
35 | - reg: number of clock output. | ||
36 | |||
37 | Optional child node properties: | ||
38 | - silabs,clock-source: source clock of the output divider stage N, shall be | ||
39 | 0 = multisynth N | ||
40 | 1 = multisynth 0 for output clocks 0-3, else multisynth4 | ||
41 | 2 = xtal | ||
42 | 3 = clkin (si5351c only) | ||
43 | - silabs,drive-strength: output drive strength in mA, shall be one of {2,4,6,8}. | ||
44 | - silabs,multisynth-source: source pll A(0) or B(1) of corresponding multisynth | ||
45 | divider. | ||
46 | - silabs,pll-master: boolean, multisynth can change pll frequency. | ||
47 | |||
48 | ==Example== | ||
49 | |||
50 | /* 25MHz reference crystal */ | ||
51 | ref25: ref25M { | ||
52 | compatible = "fixed-clock"; | ||
53 | #clock-cells = <0>; | ||
54 | clock-frequency = <25000000>; | ||
55 | }; | ||
56 | |||
57 | i2c-master-node { | ||
58 | |||
59 | /* Si5351a msop10 i2c clock generator */ | ||
60 | si5351a: clock-generator@60 { | ||
61 | compatible = "silabs,si5351a-msop"; | ||
62 | reg = <0x60>; | ||
63 | #address-cells = <1>; | ||
64 | #size-cells = <0>; | ||
65 | #clock-cells = <1>; | ||
66 | |||
67 | /* connect xtal input to 25MHz reference */ | ||
68 | clocks = <&ref25>; | ||
69 | |||
70 | /* connect xtal input as source of pll0 and pll1 */ | ||
71 | silabs,pll-source = <0 0>, <1 0>; | ||
72 | |||
73 | /* | ||
74 | * overwrite clkout0 configuration with: | ||
75 | * - 8mA output drive strength | ||
76 | * - pll0 as clock source of multisynth0 | ||
77 | * - multisynth0 as clock source of output divider | ||
78 | * - multisynth0 can change pll0 | ||
79 | * - set initial clock frequency of 74.25MHz | ||
80 | */ | ||
81 | clkout0 { | ||
82 | reg = <0>; | ||
83 | silabs,drive-strength = <8>; | ||
84 | silabs,multisynth-source = <0>; | ||
85 | silabs,clock-source = <0>; | ||
86 | silabs,pll-master; | ||
87 | clock-frequency = <74250000>; | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * overwrite clkout1 configuration with: | ||
92 | * - 4mA output drive strength | ||
93 | * - pll1 as clock source of multisynth1 | ||
94 | * - multisynth1 as clock source of output divider | ||
95 | * - multisynth1 can change pll1 | ||
96 | */ | ||
97 | clkout1 { | ||
98 | reg = <1>; | ||
99 | silabs,drive-strength = <4>; | ||
100 | silabs,multisynth-source = <1>; | ||
101 | silabs,clock-source = <0>; | ||
102 | pll-master; | ||
103 | }; | ||
104 | |||
105 | /* | ||
106 | * overwrite clkout2 configuration with: | ||
107 | * - xtal as clock source of output divider | ||
108 | */ | ||
109 | clkout2 { | ||
110 | reg = <2>; | ||
111 | silabs,clock-source = <2>; | ||
112 | }; | ||
113 | }; | ||
114 | }; | ||
diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt new file mode 100644 index 000000000000..729f52426fe1 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/sunxi.txt | |||
@@ -0,0 +1,151 @@ | |||
1 | Device Tree Clock bindings for arch-sunxi | ||
2 | |||
3 | This binding uses the common clock binding[1]. | ||
4 | |||
5 | [1] Documentation/devicetree/bindings/clock/clock-bindings.txt | ||
6 | |||
7 | Required properties: | ||
8 | - compatible : shall be one of the following: | ||
9 | "allwinner,sun4i-osc-clk" - for a gatable oscillator | ||
10 | "allwinner,sun4i-pll1-clk" - for the main PLL clock | ||
11 | "allwinner,sun4i-cpu-clk" - for the CPU multiplexer clock | ||
12 | "allwinner,sun4i-axi-clk" - for the AXI clock | ||
13 | "allwinner,sun4i-axi-gates-clk" - for the AXI gates | ||
14 | "allwinner,sun4i-ahb-clk" - for the AHB clock | ||
15 | "allwinner,sun4i-ahb-gates-clk" - for the AHB gates | ||
16 | "allwinner,sun4i-apb0-clk" - for the APB0 clock | ||
17 | "allwinner,sun4i-apb0-gates-clk" - for the APB0 gates | ||
18 | "allwinner,sun4i-apb1-clk" - for the APB1 clock | ||
19 | "allwinner,sun4i-apb1-mux-clk" - for the APB1 clock muxing | ||
20 | "allwinner,sun4i-apb1-gates-clk" - for the APB1 gates | ||
21 | |||
22 | Required properties for all clocks: | ||
23 | - reg : shall be the control register address for the clock. | ||
24 | - clocks : shall be the input parent clock(s) phandle for the clock | ||
25 | - #clock-cells : from common clock binding; shall be set to 0 except for | ||
26 | "allwinner,sun4i-*-gates-clk" where it shall be set to 1 | ||
27 | |||
28 | Additionally, "allwinner,sun4i-*-gates-clk" clocks require: | ||
29 | - clock-output-names : the corresponding gate names that the clock controls | ||
30 | |||
31 | For example: | ||
32 | |||
33 | osc24M: osc24M@01c20050 { | ||
34 | #clock-cells = <0>; | ||
35 | compatible = "allwinner,sun4i-osc-clk"; | ||
36 | reg = <0x01c20050 0x4>; | ||
37 | clocks = <&osc24M_fixed>; | ||
38 | }; | ||
39 | |||
40 | pll1: pll1@01c20000 { | ||
41 | #clock-cells = <0>; | ||
42 | compatible = "allwinner,sun4i-pll1-clk"; | ||
43 | reg = <0x01c20000 0x4>; | ||
44 | clocks = <&osc24M>; | ||
45 | }; | ||
46 | |||
47 | cpu: cpu@01c20054 { | ||
48 | #clock-cells = <0>; | ||
49 | compatible = "allwinner,sun4i-cpu-clk"; | ||
50 | reg = <0x01c20054 0x4>; | ||
51 | clocks = <&osc32k>, <&osc24M>, <&pll1>; | ||
52 | }; | ||
53 | |||
54 | |||
55 | |||
56 | Gate clock outputs | ||
57 | |||
58 | The "allwinner,sun4i-*-gates-clk" clocks provide several gatable outputs; | ||
59 | their corresponding offsets as present on sun4i are listed below. Note that | ||
60 | some of these gates are not present on sun5i. | ||
61 | |||
62 | * AXI gates ("allwinner,sun4i-axi-gates-clk") | ||
63 | |||
64 | DRAM 0 | ||
65 | |||
66 | * AHB gates ("allwinner,sun4i-ahb-gates-clk") | ||
67 | |||
68 | USB0 0 | ||
69 | EHCI0 1 | ||
70 | OHCI0 2* | ||
71 | EHCI1 3 | ||
72 | OHCI1 4* | ||
73 | SS 5 | ||
74 | DMA 6 | ||
75 | BIST 7 | ||
76 | MMC0 8 | ||
77 | MMC1 9 | ||
78 | MMC2 10 | ||
79 | MMC3 11 | ||
80 | MS 12** | ||
81 | NAND 13 | ||
82 | SDRAM 14 | ||
83 | |||
84 | ACE 16 | ||
85 | EMAC 17 | ||
86 | TS 18 | ||
87 | |||
88 | SPI0 20 | ||
89 | SPI1 21 | ||
90 | SPI2 22 | ||
91 | SPI3 23 | ||
92 | PATA 24 | ||
93 | SATA 25** | ||
94 | GPS 26* | ||
95 | |||
96 | VE 32 | ||
97 | TVD 33 | ||
98 | TVE0 34 | ||
99 | TVE1 35 | ||
100 | LCD0 36 | ||
101 | LCD1 37 | ||
102 | |||
103 | CSI0 40 | ||
104 | CSI1 41 | ||
105 | |||
106 | HDMI 43 | ||
107 | DE_BE0 44 | ||
108 | DE_BE1 45 | ||
109 | DE_FE0 46 | ||
110 | DE_FE1 47 | ||
111 | |||
112 | MP 50 | ||
113 | |||
114 | MALI400 52 | ||
115 | |||
116 | * APB0 gates ("allwinner,sun4i-apb0-gates-clk") | ||
117 | |||
118 | CODEC 0 | ||
119 | SPDIF 1* | ||
120 | AC97 2 | ||
121 | IIS 3 | ||
122 | |||
123 | PIO 5 | ||
124 | IR0 6 | ||
125 | IR1 7 | ||
126 | |||
127 | KEYPAD 10 | ||
128 | |||
129 | * APB1 gates ("allwinner,sun4i-apb1-gates-clk") | ||
130 | |||
131 | I2C0 0 | ||
132 | I2C1 1 | ||
133 | I2C2 2 | ||
134 | |||
135 | CAN 4 | ||
136 | SCR 5 | ||
137 | PS20 6 | ||
138 | PS21 7 | ||
139 | |||
140 | UART0 16 | ||
141 | UART1 17 | ||
142 | UART2 18 | ||
143 | UART3 19 | ||
144 | UART4 20 | ||
145 | UART5 21 | ||
146 | UART6 22 | ||
147 | UART7 23 | ||
148 | |||
149 | Notation: | ||
150 | [*]: The datasheet didn't mention these, but they are present on AW code | ||
151 | [**]: The datasheet had this marked as "NC" but they are used on AW code | ||
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt index a33628759d36..d933af370697 100644 --- a/Documentation/devicetree/bindings/gpio/gpio.txt +++ b/Documentation/devicetree/bindings/gpio/gpio.txt | |||
@@ -98,7 +98,7 @@ announce the pinrange to the pin ctrl subsystem. For example, | |||
98 | compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank"; | 98 | compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank"; |
99 | reg = <0x1460 0x18>; | 99 | reg = <0x1460 0x18>; |
100 | gpio-controller; | 100 | gpio-controller; |
101 | gpio-ranges = <&pinctrl1 20 10>, <&pinctrl2 50 20>; | 101 | gpio-ranges = <&pinctrl1 0 20 10>, <&pinctrl2 10 50 20>; |
102 | 102 | ||
103 | } | 103 | } |
104 | 104 | ||
@@ -107,8 +107,8 @@ where, | |||
107 | 107 | ||
108 | Next values specify the base pin and number of pins for the range | 108 | Next values specify the base pin and number of pins for the range |
109 | handled by 'qe_pio_e' gpio. In the given example from base pin 20 to | 109 | handled by 'qe_pio_e' gpio. In the given example from base pin 20 to |
110 | pin 29 under pinctrl1 and pin 50 to pin 69 under pinctrl2 is handled | 110 | pin 29 under pinctrl1 with gpio offset 0 and pin 50 to pin 69 under |
111 | by this gpio controller. | 111 | pinctrl2 with gpio offset 10 is handled by this gpio controller. |
112 | 112 | ||
113 | The pinctrl node must have "#gpio-range-cells" property to show number of | 113 | The pinctrl node must have "#gpio-range-cells" property to show number of |
114 | arguments to pass with phandle from gpio controllers node. | 114 | arguments to pass with phandle from gpio controllers node. |
diff --git a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt new file mode 100644 index 000000000000..c6f66674f19c --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | NTC Thermistor hwmon sensors | ||
2 | ------------------------------- | ||
3 | |||
4 | Requires node properties: | ||
5 | - "compatible" value : one of | ||
6 | "ntc,ncp15wb473" | ||
7 | "ntc,ncp18wb473" | ||
8 | "ntc,ncp21wb473" | ||
9 | "ntc,ncp03wb473" | ||
10 | "ntc,ncp15wl333" | ||
11 | - "pullup-uv" Pull up voltage in micro volts | ||
12 | - "pullup-ohm" Pull up resistor value in ohms | ||
13 | - "pulldown-ohm" Pull down resistor value in ohms | ||
14 | - "connected-positive" Always ON, If not specified. | ||
15 | Status change is possible. | ||
16 | - "io-channels" Channel node of ADC to be used for | ||
17 | conversion. | ||
18 | |||
19 | Read more about iio bindings at | ||
20 | Documentation/devicetree/bindings/iio/iio-bindings.txt | ||
21 | |||
22 | Example: | ||
23 | ncp15wb473@0 { | ||
24 | compatible = "ntc,ncp15wb473"; | ||
25 | pullup-uv = <1800000>; | ||
26 | pullup-ohm = <47000>; | ||
27 | pulldown-ohm = <0>; | ||
28 | io-channels = <&adc 3>; | ||
29 | }; | ||
diff --git a/Documentation/devicetree/bindings/iio/iio-bindings.txt b/Documentation/devicetree/bindings/iio/iio-bindings.txt new file mode 100644 index 000000000000..0b447d9ad196 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/iio-bindings.txt | |||
@@ -0,0 +1,97 @@ | |||
1 | This binding is derived from clock bindings, and based on suggestions | ||
2 | from Lars-Peter Clausen [1]. | ||
3 | |||
4 | Sources of IIO channels can be represented by any node in the device | ||
5 | tree. Those nodes are designated as IIO providers. IIO consumer | ||
6 | nodes use a phandle and IIO specifier pair to connect IIO provider | ||
7 | outputs to IIO inputs. Similar to the gpio specifiers, an IIO | ||
8 | specifier is an array of one or more cells identifying the IIO | ||
9 | output on a device. The length of an IIO specifier is defined by the | ||
10 | value of a #io-channel-cells property in the IIO provider node. | ||
11 | |||
12 | [1] http://marc.info/?l=linux-iio&m=135902119507483&w=2 | ||
13 | |||
14 | ==IIO providers== | ||
15 | |||
16 | Required properties: | ||
17 | #io-channel-cells: Number of cells in an IIO specifier; Typically 0 for nodes | ||
18 | with a single IIO output and 1 for nodes with multiple | ||
19 | IIO outputs. | ||
20 | |||
21 | Example for a simple configuration with no trigger: | ||
22 | |||
23 | adc: voltage-sensor@35 { | ||
24 | compatible = "maxim,max1139"; | ||
25 | reg = <0x35>; | ||
26 | #io-channel-cells = <1>; | ||
27 | }; | ||
28 | |||
29 | Example for a configuration with trigger: | ||
30 | |||
31 | adc@35 { | ||
32 | compatible = "some-vendor,some-adc"; | ||
33 | reg = <0x35>; | ||
34 | |||
35 | adc1: iio-device@0 { | ||
36 | #io-channel-cells = <1>; | ||
37 | /* other properties */ | ||
38 | }; | ||
39 | adc2: iio-device@1 { | ||
40 | #io-channel-cells = <1>; | ||
41 | /* other properties */ | ||
42 | }; | ||
43 | }; | ||
44 | |||
45 | ==IIO consumers== | ||
46 | |||
47 | Required properties: | ||
48 | io-channels: List of phandle and IIO specifier pairs, one pair | ||
49 | for each IIO input to the device. Note: if the | ||
50 | IIO provider specifies '0' for #io-channel-cells, | ||
51 | then only the phandle portion of the pair will appear. | ||
52 | |||
53 | Optional properties: | ||
54 | io-channel-names: | ||
55 | List of IIO input name strings sorted in the same | ||
56 | order as the io-channels property. Consumers drivers | ||
57 | will use io-channel-names to match IIO input names | ||
58 | with IIO specifiers. | ||
59 | io-channel-ranges: | ||
60 | Empty property indicating that child nodes can inherit named | ||
61 | IIO channels from this node. Useful for bus nodes to provide | ||
62 | and IIO channel to their children. | ||
63 | |||
64 | For example: | ||
65 | |||
66 | device { | ||
67 | io-channels = <&adc 1>, <&ref 0>; | ||
68 | io-channel-names = "vcc", "vdd"; | ||
69 | }; | ||
70 | |||
71 | This represents a device with two IIO inputs, named "vcc" and "vdd". | ||
72 | The vcc channel is connected to output 1 of the &adc device, and the | ||
73 | vdd channel is connected to output 0 of the &ref device. | ||
74 | |||
75 | ==Example== | ||
76 | |||
77 | adc: max1139@35 { | ||
78 | compatible = "maxim,max1139"; | ||
79 | reg = <0x35>; | ||
80 | #io-channel-cells = <1>; | ||
81 | }; | ||
82 | |||
83 | ... | ||
84 | |||
85 | iio_hwmon { | ||
86 | compatible = "iio-hwmon"; | ||
87 | io-channels = <&adc 0>, <&adc 1>, <&adc 2>, | ||
88 | <&adc 3>, <&adc 4>, <&adc 5>, | ||
89 | <&adc 6>, <&adc 7>, <&adc 8>, | ||
90 | <&adc 9>; | ||
91 | }; | ||
92 | |||
93 | some_consumer { | ||
94 | compatible = "some-consumer"; | ||
95 | io-channels = <&adc 10>, <&adc 11>; | ||
96 | io-channel-names = "adc1", "adc2"; | ||
97 | }; | ||
diff --git a/Documentation/devicetree/bindings/media/coda.txt b/Documentation/devicetree/bindings/media/coda.txt new file mode 100644 index 000000000000..2865d04e4030 --- /dev/null +++ b/Documentation/devicetree/bindings/media/coda.txt | |||
@@ -0,0 +1,30 @@ | |||
1 | Chips&Media Coda multi-standard codec IP | ||
2 | ======================================== | ||
3 | |||
4 | Coda codec IPs are present in i.MX SoCs in various versions, | ||
5 | called VPU (Video Processing Unit). | ||
6 | |||
7 | Required properties: | ||
8 | - compatible : should be "fsl,<chip>-src" for i.MX SoCs: | ||
9 | (a) "fsl,imx27-vpu" for CodaDx6 present in i.MX27 | ||
10 | (b) "fsl,imx53-vpu" for CODA7541 present in i.MX53 | ||
11 | (c) "fsl,imx6q-vpu" for CODA960 present in i.MX6q | ||
12 | - reg: should be register base and length as documented in the | ||
13 | SoC reference manual | ||
14 | - interrupts : Should contain the VPU interrupt. For CODA960, | ||
15 | a second interrupt is needed for the MJPEG unit. | ||
16 | - clocks : Should contain the ahb and per clocks, in the order | ||
17 | determined by the clock-names property. | ||
18 | - clock-names : Should be "ahb", "per" | ||
19 | - iram : phandle pointing to the SRAM device node | ||
20 | |||
21 | Example: | ||
22 | |||
23 | vpu: vpu@63ff4000 { | ||
24 | compatible = "fsl,imx53-vpu"; | ||
25 | reg = <0x63ff4000 0x1000>; | ||
26 | interrupts = <9>; | ||
27 | clocks = <&clks 63>, <&clks 63>; | ||
28 | clock-names = "ahb", "per"; | ||
29 | iram = <&ocram>; | ||
30 | }; | ||
diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt index 13b707b7355c..c3a14e0ad0ad 100644 --- a/Documentation/devicetree/bindings/mfd/ab8500.txt +++ b/Documentation/devicetree/bindings/mfd/ab8500.txt | |||
@@ -13,9 +13,6 @@ Required parent device properties: | |||
13 | 4 = active high level-sensitive | 13 | 4 = active high level-sensitive |
14 | 8 = active low level-sensitive | 14 | 8 = active low level-sensitive |
15 | 15 | ||
16 | Optional parent device properties: | ||
17 | - reg : contains the PRCMU mailbox address for the AB8500 i2c port | ||
18 | |||
19 | The AB8500 consists of a large and varied group of sub-devices: | 16 | The AB8500 consists of a large and varied group of sub-devices: |
20 | 17 | ||
21 | Device IRQ Names Supply Names Description | 18 | Device IRQ Names Supply Names Description |
@@ -86,9 +83,8 @@ Non-standard child device properties: | |||
86 | - stericsson,amic2-bias-vamic1 : Analoge Mic wishes to use a non-standard Vamic | 83 | - stericsson,amic2-bias-vamic1 : Analoge Mic wishes to use a non-standard Vamic |
87 | - stericsson,earpeice-cmv : Earpeice voltage (only: 950 | 1100 | 1270 | 1580) | 84 | - stericsson,earpeice-cmv : Earpeice voltage (only: 950 | 1100 | 1270 | 1580) |
88 | 85 | ||
89 | ab8500@5 { | 86 | ab8500 { |
90 | compatible = "stericsson,ab8500"; | 87 | compatible = "stericsson,ab8500"; |
91 | reg = <5>; /* mailbox 5 is i2c */ | ||
92 | interrupts = <0 40 0x4>; | 88 | interrupts = <0 40 0x4>; |
93 | interrupt-controller; | 89 | interrupt-controller; |
94 | #interrupt-cells = <2>; | 90 | #interrupt-cells = <2>; |
diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt index baf07987ae68..abd9e3cb2db7 100644 --- a/Documentation/devicetree/bindings/mfd/mc13xxx.txt +++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt | |||
@@ -10,10 +10,40 @@ Optional properties: | |||
10 | - fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used | 10 | - fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used |
11 | 11 | ||
12 | Sub-nodes: | 12 | Sub-nodes: |
13 | - regulators : Contain the regulator nodes. The MC13892 regulators are | 13 | - regulators : Contain the regulator nodes. The regulators are bound using |
14 | bound using their names as listed below with their registers and bits | 14 | their names as listed below with their registers and bits for enabling. |
15 | for enabling. | ||
16 | 15 | ||
16 | MC13783 regulators: | ||
17 | sw1a : regulator SW1A (register 24, bit 0) | ||
18 | sw1b : regulator SW1B (register 25, bit 0) | ||
19 | sw2a : regulator SW2A (register 26, bit 0) | ||
20 | sw2b : regulator SW2B (register 27, bit 0) | ||
21 | sw3 : regulator SW3 (register 29, bit 20) | ||
22 | vaudio : regulator VAUDIO (register 32, bit 0) | ||
23 | viohi : regulator VIOHI (register 32, bit 3) | ||
24 | violo : regulator VIOLO (register 32, bit 6) | ||
25 | vdig : regulator VDIG (register 32, bit 9) | ||
26 | vgen : regulator VGEN (register 32, bit 12) | ||
27 | vrfdig : regulator VRFDIG (register 32, bit 15) | ||
28 | vrfref : regulator VRFREF (register 32, bit 18) | ||
29 | vrfcp : regulator VRFCP (register 32, bit 21) | ||
30 | vsim : regulator VSIM (register 33, bit 0) | ||
31 | vesim : regulator VESIM (register 33, bit 3) | ||
32 | vcam : regulator VCAM (register 33, bit 6) | ||
33 | vrfbg : regulator VRFBG (register 33, bit 9) | ||
34 | vvib : regulator VVIB (register 33, bit 11) | ||
35 | vrf1 : regulator VRF1 (register 33, bit 12) | ||
36 | vrf2 : regulator VRF2 (register 33, bit 15) | ||
37 | vmmc1 : regulator VMMC1 (register 33, bit 18) | ||
38 | vmmc2 : regulator VMMC2 (register 33, bit 21) | ||
39 | gpo1 : regulator GPO1 (register 34, bit 6) | ||
40 | gpo2 : regulator GPO2 (register 34, bit 8) | ||
41 | gpo3 : regulator GPO3 (register 34, bit 10) | ||
42 | gpo4 : regulator GPO4 (register 34, bit 12) | ||
43 | pwgt1spi : regulator PWGT1SPI (register 34, bit 15) | ||
44 | pwgt2spi : regulator PWGT2SPI (register 34, bit 16) | ||
45 | |||
46 | MC13892 regulators: | ||
17 | vcoincell : regulator VCOINCELL (register 13, bit 23) | 47 | vcoincell : regulator VCOINCELL (register 13, bit 23) |
18 | sw1 : regulator SW1 (register 24, bit 0) | 48 | sw1 : regulator SW1 (register 24, bit 0) |
19 | sw2 : regulator SW2 (register 25, bit 0) | 49 | sw2 : regulator SW2 (register 25, bit 0) |
diff --git a/Documentation/devicetree/bindings/misc/sram.txt b/Documentation/devicetree/bindings/misc/sram.txt new file mode 100644 index 000000000000..4d0a00e453a8 --- /dev/null +++ b/Documentation/devicetree/bindings/misc/sram.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | Generic on-chip SRAM | ||
2 | |||
3 | Simple IO memory regions to be managed by the genalloc API. | ||
4 | |||
5 | Required properties: | ||
6 | |||
7 | - compatible : mmio-sram | ||
8 | |||
9 | - reg : SRAM iomem address range | ||
10 | |||
11 | Example: | ||
12 | |||
13 | sram: sram@5c000000 { | ||
14 | compatible = "mmio-sram"; | ||
15 | reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */ | ||
16 | }; | ||
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt index 2c81e45f1374..08f0c3d01575 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt | |||
@@ -1,7 +1,9 @@ | |||
1 | One-register-per-pin type device tree based pinctrl driver | 1 | One-register-per-pin type device tree based pinctrl driver |
2 | 2 | ||
3 | Required properties: | 3 | Required properties: |
4 | - compatible : "pinctrl-single" | 4 | - compatible : "pinctrl-single" or "pinconf-single". |
5 | "pinctrl-single" means that pinconf isn't supported. | ||
6 | "pinconf-single" means that generic pinconf is supported. | ||
5 | 7 | ||
6 | - reg : offset and length of the register set for the mux registers | 8 | - reg : offset and length of the register set for the mux registers |
7 | 9 | ||
@@ -14,9 +16,61 @@ Optional properties: | |||
14 | - pinctrl-single,function-off : function off mode for disabled state if | 16 | - pinctrl-single,function-off : function off mode for disabled state if |
15 | available and same for all registers; if not specified, disabling of | 17 | available and same for all registers; if not specified, disabling of |
16 | pin functions is ignored | 18 | pin functions is ignored |
19 | |||
17 | - pinctrl-single,bit-per-mux : boolean to indicate that one register controls | 20 | - pinctrl-single,bit-per-mux : boolean to indicate that one register controls |
18 | more than one pin | 21 | more than one pin |
19 | 22 | ||
23 | - pinctrl-single,drive-strength : array of value that are used to configure | ||
24 | drive strength in the pinmux register. They're value of drive strength | ||
25 | current and drive strength mask. | ||
26 | |||
27 | /* drive strength current, mask */ | ||
28 | pinctrl-single,power-source = <0x30 0xf0>; | ||
29 | |||
30 | - pinctrl-single,bias-pullup : array of value that are used to configure the | ||
31 | input bias pullup in the pinmux register. | ||
32 | |||
33 | /* input, enabled pullup bits, disabled pullup bits, mask */ | ||
34 | pinctrl-single,bias-pullup = <0 1 0 1>; | ||
35 | |||
36 | - pinctrl-single,bias-pulldown : array of value that are used to configure the | ||
37 | input bias pulldown in the pinmux register. | ||
38 | |||
39 | /* input, enabled pulldown bits, disabled pulldown bits, mask */ | ||
40 | pinctrl-single,bias-pulldown = <2 2 0 2>; | ||
41 | |||
42 | * Two bits to control input bias pullup and pulldown: User should use | ||
43 | pinctrl-single,bias-pullup & pinctrl-single,bias-pulldown. One bit means | ||
44 | pullup, and the other one bit means pulldown. | ||
45 | * Three bits to control input bias enable, pullup and pulldown. User should | ||
46 | use pinctrl-single,bias-pullup & pinctrl-single,bias-pulldown. Input bias | ||
47 | enable bit should be included in pullup or pulldown bits. | ||
48 | * Although driver could set PIN_CONFIG_BIAS_DISABLE, there's no property as | ||
49 | pinctrl-single,bias-disable. Because pinctrl single driver could implement | ||
50 | it by calling pulldown, pullup disabled. | ||
51 | |||
52 | - pinctrl-single,input-schmitt : array of value that are used to configure | ||
53 | input schmitt in the pinmux register. In some silicons, there're two input | ||
54 | schmitt value (rising-edge & falling-edge) in the pinmux register. | ||
55 | |||
56 | /* input schmitt value, mask */ | ||
57 | pinctrl-single,input-schmitt = <0x30 0x70>; | ||
58 | |||
59 | - pinctrl-single,input-schmitt-enable : array of value that are used to | ||
60 | configure input schmitt enable or disable in the pinmux register. | ||
61 | |||
62 | /* input, enable bits, disable bits, mask */ | ||
63 | pinctrl-single,input-schmitt-enable = <0x30 0x40 0 0x70>; | ||
64 | |||
65 | - pinctrl-single,gpio-range : list of value that are used to configure a GPIO | ||
66 | range. They're value of subnode phandle, pin base in pinctrl device, pin | ||
67 | number in this range, GPIO function value of this GPIO range. | ||
68 | The number of parameters is depend on #pinctrl-single,gpio-range-cells | ||
69 | property. | ||
70 | |||
71 | /* pin base, nr pins & gpio function */ | ||
72 | pinctrl-single,gpio-range = <&range 0 3 0 &range 3 9 1>; | ||
73 | |||
20 | This driver assumes that there is only one register for each pin (unless the | 74 | This driver assumes that there is only one register for each pin (unless the |
21 | pinctrl-single,bit-per-mux is set), and uses the common pinctrl bindings as | 75 | pinctrl-single,bit-per-mux is set), and uses the common pinctrl bindings as |
22 | specified in the pinctrl-bindings.txt document in this directory. | 76 | specified in the pinctrl-bindings.txt document in this directory. |
@@ -42,6 +96,20 @@ Where 0xdc is the offset from the pinctrl register base address for the | |||
42 | device pinctrl register, 0x18 is the desired value, and 0xff is the sub mask to | 96 | device pinctrl register, 0x18 is the desired value, and 0xff is the sub mask to |
43 | be used when applying this change to the register. | 97 | be used when applying this change to the register. |
44 | 98 | ||
99 | |||
100 | Optional sub-node: In case some pins could be configured as GPIO in the pinmux | ||
101 | register, those pins could be defined as a GPIO range. This sub-node is required | ||
102 | by pinctrl-single,gpio-range property. | ||
103 | |||
104 | Required properties in sub-node: | ||
105 | - #pinctrl-single,gpio-range-cells : the number of parameters after phandle in | ||
106 | pinctrl-single,gpio-range property. | ||
107 | |||
108 | range: gpio-range { | ||
109 | #pinctrl-single,gpio-range-cells = <3>; | ||
110 | }; | ||
111 | |||
112 | |||
45 | Example: | 113 | Example: |
46 | 114 | ||
47 | /* SoC common file */ | 115 | /* SoC common file */ |
@@ -58,7 +126,7 @@ pmx_core: pinmux@4a100040 { | |||
58 | 126 | ||
59 | /* second controller instance for pins in wkup domain */ | 127 | /* second controller instance for pins in wkup domain */ |
60 | pmx_wkup: pinmux@4a31e040 { | 128 | pmx_wkup: pinmux@4a31e040 { |
61 | compatible = "pinctrl-single; | 129 | compatible = "pinctrl-single"; |
62 | reg = <0x4a31e040 0x0038>; | 130 | reg = <0x4a31e040 0x0038>; |
63 | #address-cells = <1>; | 131 | #address-cells = <1>; |
64 | #size-cells = <0>; | 132 | #size-cells = <0>; |
@@ -76,6 +144,29 @@ control_devconf0: pinmux@48002274 { | |||
76 | pinctrl-single,function-mask = <0x5F>; | 144 | pinctrl-single,function-mask = <0x5F>; |
77 | }; | 145 | }; |
78 | 146 | ||
147 | /* third controller instance for pins in gpio domain */ | ||
148 | pmx_gpio: pinmux@d401e000 { | ||
149 | compatible = "pinconf-single"; | ||
150 | reg = <0xd401e000 0x0330>; | ||
151 | #address-cells = <1>; | ||
152 | #size-cells = <1>; | ||
153 | ranges; | ||
154 | |||
155 | pinctrl-single,register-width = <32>; | ||
156 | pinctrl-single,function-mask = <7>; | ||
157 | |||
158 | /* sparse GPIO range could be supported */ | ||
159 | pinctrl-single,gpio-range = <&range 0 3 0 &range 3 9 1 | ||
160 | &range 12 1 0 &range 13 29 1 | ||
161 | &range 43 1 0 &range 44 49 1 | ||
162 | &range 94 1 1 &range 96 2 1>; | ||
163 | |||
164 | range: gpio-range { | ||
165 | #pinctrl-single,gpio-range-cells = <3>; | ||
166 | }; | ||
167 | }; | ||
168 | |||
169 | |||
79 | /* board specific .dts file */ | 170 | /* board specific .dts file */ |
80 | 171 | ||
81 | &pmx_core { | 172 | &pmx_core { |
@@ -96,6 +187,15 @@ control_devconf0: pinmux@48002274 { | |||
96 | >; | 187 | >; |
97 | }; | 188 | }; |
98 | 189 | ||
190 | uart0_pins: pinmux_uart0_pins { | ||
191 | pinctrl-single,pins = < | ||
192 | 0x208 0 /* UART0_RXD (IOCFG138) */ | ||
193 | 0x20c 0 /* UART0_TXD (IOCFG139) */ | ||
194 | >; | ||
195 | pinctrl-single,bias-pulldown = <0 2 2>; | ||
196 | pinctrl-single,bias-pullup = <0 1 1>; | ||
197 | }; | ||
198 | |||
99 | /* map uart2 pins */ | 199 | /* map uart2 pins */ |
100 | uart2_pins: pinmux_uart2_pins { | 200 | uart2_pins: pinmux_uart2_pins { |
101 | pinctrl-single,pins = < | 201 | pinctrl-single,pins = < |
@@ -122,6 +222,11 @@ control_devconf0: pinmux@48002274 { | |||
122 | 222 | ||
123 | }; | 223 | }; |
124 | 224 | ||
225 | &uart1 { | ||
226 | pinctrl-names = "default"; | ||
227 | pinctrl-0 = <&uart0_pins>; | ||
228 | }; | ||
229 | |||
125 | &uart2 { | 230 | &uart2 { |
126 | pinctrl-names = "default"; | 231 | pinctrl-names = "default"; |
127 | pinctrl-0 = <&uart2_pins>; | 232 | pinctrl-0 = <&uart2_pins>; |
diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt index 4598a47aa0cd..c70fca146e91 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt | |||
@@ -7,6 +7,7 @@ on-chip controllers onto these pads. | |||
7 | 7 | ||
8 | Required Properties: | 8 | Required Properties: |
9 | - compatible: should be one of the following. | 9 | - compatible: should be one of the following. |
10 | - "samsung,s3c64xx-pinctrl": for S3C64xx-compatible pin-controller, | ||
10 | - "samsung,exynos4210-pinctrl": for Exynos4210 compatible pin-controller. | 11 | - "samsung,exynos4210-pinctrl": for Exynos4210 compatible pin-controller. |
11 | - "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller. | 12 | - "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller. |
12 | - "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller. | 13 | - "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller. |
@@ -105,6 +106,8 @@ B. External Wakeup Interrupts: For supporting external wakeup interrupts, a | |||
105 | 106 | ||
106 | - compatible: identifies the type of the external wakeup interrupt controller | 107 | - compatible: identifies the type of the external wakeup interrupt controller |
107 | The possible values are: | 108 | The possible values are: |
109 | - samsung,s3c64xx-wakeup-eint: represents wakeup interrupt controller | ||
110 | found on Samsung S3C64xx SoCs, | ||
108 | - samsung,exynos4210-wakeup-eint: represents wakeup interrupt controller | 111 | - samsung,exynos4210-wakeup-eint: represents wakeup interrupt controller |
109 | found on Samsung Exynos4210 SoC. | 112 | found on Samsung Exynos4210 SoC. |
110 | - interrupt-parent: phandle of the interrupt parent to which the external | 113 | - interrupt-parent: phandle of the interrupt parent to which the external |
diff --git a/Documentation/devicetree/bindings/regulator/max8952.txt b/Documentation/devicetree/bindings/regulator/max8952.txt new file mode 100644 index 000000000000..866fcdd0f4eb --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/max8952.txt | |||
@@ -0,0 +1,52 @@ | |||
1 | Maxim MAX8952 voltage regulator | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: must be equal to "maxim,max8952" | ||
5 | - reg: I2C slave address, usually 0x60 | ||
6 | - max8952,dvs-mode-microvolt: array of 4 integer values defining DVS voltages | ||
7 | in microvolts. All values must be from range <770000, 1400000> | ||
8 | - any required generic properties defined in regulator.txt | ||
9 | |||
10 | Optional properties: | ||
11 | - max8952,vid-gpios: array of two GPIO pins used for DVS voltage selection | ||
12 | - max8952,en-gpio: GPIO used to control enable status of regulator | ||
13 | - max8952,default-mode: index of default DVS voltage, from <0, 3> range | ||
14 | - max8952,sync-freq: sync frequency, must be one of following values: | ||
15 | - 0: 26 MHz | ||
16 | - 1: 13 MHz | ||
17 | - 2: 19.2 MHz | ||
18 | Defaults to 26 MHz if not specified. | ||
19 | - max8952,ramp-speed: voltage ramp speed, must be one of following values: | ||
20 | - 0: 32mV/us | ||
21 | - 1: 16mV/us | ||
22 | - 2: 8mV/us | ||
23 | - 3: 4mV/us | ||
24 | - 4: 2mV/us | ||
25 | - 5: 1mV/us | ||
26 | - 6: 0.5mV/us | ||
27 | - 7: 0.25mV/us | ||
28 | Defaults to 32mV/us if not specified. | ||
29 | - any available generic properties defined in regulator.txt | ||
30 | |||
31 | Example: | ||
32 | |||
33 | vdd_arm_reg: pmic@60 { | ||
34 | compatible = "maxim,max8952"; | ||
35 | reg = <0x60>; | ||
36 | |||
37 | /* max8952-specific properties */ | ||
38 | max8952,vid-gpios = <&gpx0 3 0>, <&gpx0 4 0>; | ||
39 | max8952,en-gpio = <&gpx0 1 0>; | ||
40 | max8952,default-mode = <0>; | ||
41 | max8952,dvs-mode-microvolt = <1250000>, <1200000>, | ||
42 | <1050000>, <950000>; | ||
43 | max8952,sync-freq = <0>; | ||
44 | max8952,ramp-speed = <0>; | ||
45 | |||
46 | /* generic regulator properties */ | ||
47 | regulator-name = "vdd_arm"; | ||
48 | regulator-min-microvolt = <770000>; | ||
49 | regulator-max-microvolt = <1400000>; | ||
50 | regulator-always-on; | ||
51 | regulator-boot-on; | ||
52 | }; | ||
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt new file mode 100644 index 000000000000..2a3feabd3b22 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | Atmel AT91RM9200 Real Time Clock | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: should be: "atmel,at91rm9200-rtc" | ||
5 | - reg: physical base address of the controller and length of memory mapped | ||
6 | region. | ||
7 | - interrupts: rtc alarm/event interrupt | ||
8 | |||
9 | Example: | ||
10 | |||
11 | rtc@fffffe00 { | ||
12 | compatible = "atmel,at91rm9200-rtc"; | ||
13 | reg = <0xfffffe00 0x100>; | ||
14 | interrupts = <1 4 7>; | ||
15 | }; | ||
diff --git a/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt b/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt new file mode 100644 index 000000000000..8bf89c643640 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | Broadcom BCM2835 SPI0 controller | ||
2 | |||
3 | The BCM2835 contains two forms of SPI master controller, one known simply as | ||
4 | SPI0, and the other known as the "Universal SPI Master"; part of the | ||
5 | auxilliary block. This binding applies to the SPI0 controller. | ||
6 | |||
7 | Required properties: | ||
8 | - compatible: Should be "brcm,bcm2835-spi". | ||
9 | - reg: Should contain register location and length. | ||
10 | - interrupts: Should contain interrupt. | ||
11 | - clocks: The clock feeding the SPI controller. | ||
12 | |||
13 | Example: | ||
14 | |||
15 | spi@20204000 { | ||
16 | compatible = "brcm,bcm2835-spi"; | ||
17 | reg = <0x7e204000 0x1000>; | ||
18 | interrupts = <2 22>; | ||
19 | clocks = <&clk_spi>; | ||
20 | #address-cells = <1>; | ||
21 | #size-cells = <0>; | ||
22 | }; | ||
diff --git a/Documentation/devicetree/bindings/spi/fsl-spi.txt b/Documentation/devicetree/bindings/spi/fsl-spi.txt index 777abd7399d5..b032dd76e9d2 100644 --- a/Documentation/devicetree/bindings/spi/fsl-spi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-spi.txt | |||
@@ -4,7 +4,7 @@ Required properties: | |||
4 | - cell-index : QE SPI subblock index. | 4 | - cell-index : QE SPI subblock index. |
5 | 0: QE subblock SPI1 | 5 | 0: QE subblock SPI1 |
6 | 1: QE subblock SPI2 | 6 | 1: QE subblock SPI2 |
7 | - compatible : should be "fsl,spi". | 7 | - compatible : should be "fsl,spi" or "aeroflexgaisler,spictrl". |
8 | - mode : the SPI operation mode, it can be "cpu" or "cpu-qe". | 8 | - mode : the SPI operation mode, it can be "cpu" or "cpu-qe". |
9 | - reg : Offset and length of the register set for the device | 9 | - reg : Offset and length of the register set for the device |
10 | - interrupts : <a b> where a is the interrupt number and b is a | 10 | - interrupts : <a b> where a is the interrupt number and b is a |
@@ -14,6 +14,7 @@ Required properties: | |||
14 | controller you have. | 14 | controller you have. |
15 | - interrupt-parent : the phandle for the interrupt controller that | 15 | - interrupt-parent : the phandle for the interrupt controller that |
16 | services interrupts for this device. | 16 | services interrupts for this device. |
17 | - clock-frequency : input clock frequency to non FSL_SOC cores | ||
17 | 18 | ||
18 | Optional properties: | 19 | Optional properties: |
19 | - gpios : specifies the gpio pins to be used for chipselects. | 20 | - gpios : specifies the gpio pins to be used for chipselects. |
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt new file mode 100644 index 000000000000..91ff771c7e77 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | NVIDIA Tegra114 SPI controller. | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should be "nvidia,tegra114-spi". | ||
5 | - reg: Should contain SPI registers location and length. | ||
6 | - interrupts: Should contain SPI interrupts. | ||
7 | - nvidia,dma-request-selector : The Tegra DMA controller's phandle and | ||
8 | request selector for this SPI controller. | ||
9 | - This is also require clock named "spi" as per binding document | ||
10 | Documentation/devicetree/bindings/clock/clock-bindings.txt | ||
11 | |||
12 | Recommended properties: | ||
13 | - spi-max-frequency: Definition as per | ||
14 | Documentation/devicetree/bindings/spi/spi-bus.txt | ||
15 | Example: | ||
16 | |||
17 | spi@7000d600 { | ||
18 | compatible = "nvidia,tegra114-spi"; | ||
19 | reg = <0x7000d600 0x200>; | ||
20 | interrupts = <0 82 0x04>; | ||
21 | nvidia,dma-request-selector = <&apbdma 16>; | ||
22 | spi-max-frequency = <25000000>; | ||
23 | #address-cells = <1>; | ||
24 | #size-cells = <0>; | ||
25 | status = "disabled"; | ||
26 | }; | ||
diff --git a/Documentation/devicetree/bindings/spi/spi-samsung.txt b/Documentation/devicetree/bindings/spi/spi-samsung.txt index a15ffeddfba4..86aa061f069f 100644 --- a/Documentation/devicetree/bindings/spi/spi-samsung.txt +++ b/Documentation/devicetree/bindings/spi/spi-samsung.txt | |||
@@ -31,9 +31,6 @@ Required Board Specific Properties: | |||
31 | 31 | ||
32 | - #address-cells: should be 1. | 32 | - #address-cells: should be 1. |
33 | - #size-cells: should be 0. | 33 | - #size-cells: should be 0. |
34 | - gpios: The gpio specifier for clock, mosi and miso interface lines (in the | ||
35 | order specified). The format of the gpio specifier depends on the gpio | ||
36 | controller. | ||
37 | 34 | ||
38 | Optional Board Specific Properties: | 35 | Optional Board Specific Properties: |
39 | 36 | ||
@@ -86,9 +83,8 @@ Example: | |||
86 | spi_0: spi@12d20000 { | 83 | spi_0: spi@12d20000 { |
87 | #address-cells = <1>; | 84 | #address-cells = <1>; |
88 | #size-cells = <0>; | 85 | #size-cells = <0>; |
89 | gpios = <&gpa2 4 2 3 0>, | 86 | pinctrl-names = "default"; |
90 | <&gpa2 6 2 3 0>, | 87 | pinctrl-0 = <&spi0_bus>; |
91 | <&gpa2 7 2 3 0>; | ||
92 | 88 | ||
93 | w25q80bw@0 { | 89 | w25q80bw@0 { |
94 | #address-cells = <1>; | 90 | #address-cells = <1>; |
diff --git a/Documentation/devicetree/bindings/staging/dwc2.txt b/Documentation/devicetree/bindings/staging/dwc2.txt new file mode 100644 index 000000000000..1a1b7cfa4845 --- /dev/null +++ b/Documentation/devicetree/bindings/staging/dwc2.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | Platform DesignWare HS OTG USB 2.0 controller | ||
2 | ----------------------------------------------------- | ||
3 | |||
4 | Required properties: | ||
5 | - compatible : "snps,dwc2" | ||
6 | - reg : Should contain 1 register range (address and length) | ||
7 | - interrupts : Should contain 1 interrupt | ||
8 | |||
9 | Example: | ||
10 | |||
11 | usb@101c0000 { | ||
12 | compatible = "ralink,rt3050-usb, snps,dwc2"; | ||
13 | reg = <0x101c0000 40000>; | ||
14 | interrupts = <18>; | ||
15 | }; | ||
diff --git a/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt b/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt index 07654f0338b6..8071ac20d4b3 100644 --- a/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt +++ b/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt | |||
@@ -26,7 +26,7 @@ Required properties: | |||
26 | - crtc: the crtc this display is connected to, see below | 26 | - crtc: the crtc this display is connected to, see below |
27 | Optional properties: | 27 | Optional properties: |
28 | - interface_pix_fmt: How this display is connected to the | 28 | - interface_pix_fmt: How this display is connected to the |
29 | crtc. Currently supported types: "rgb24", "rgb565" | 29 | crtc. Currently supported types: "rgb24", "rgb565", "bgr666" |
30 | - edid: verbatim EDID data block describing attached display. | 30 | - edid: verbatim EDID data block describing attached display. |
31 | - ddc: phandle describing the i2c bus handling the display data | 31 | - ddc: phandle describing the i2c bus handling the display data |
32 | channel | 32 | channel |
diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt index 1e1145ca4f3c..1928a3e83cd0 100644 --- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt | |||
@@ -11,6 +11,9 @@ Required properties: | |||
11 | - "nvidia,tegra20-uart" | 11 | - "nvidia,tegra20-uart" |
12 | - "nxp,lpc3220-uart" | 12 | - "nxp,lpc3220-uart" |
13 | - "ibm,qpace-nwp-serial" | 13 | - "ibm,qpace-nwp-serial" |
14 | - "altr,16550-FIFO32" | ||
15 | - "altr,16550-FIFO64" | ||
16 | - "altr,16550-FIFO128" | ||
14 | - "serial" if the port type is unknown. | 17 | - "serial" if the port type is unknown. |
15 | - reg : offset and length of the register set for the device. | 18 | - reg : offset and length of the register set for the device. |
16 | - interrupts : should contain uart interrupt. | 19 | - interrupts : should contain uart interrupt. |
@@ -30,6 +33,10 @@ Optional properties: | |||
30 | RTAS and should not be registered. | 33 | RTAS and should not be registered. |
31 | - no-loopback-test: set to indicate that the port does not implements loopback | 34 | - no-loopback-test: set to indicate that the port does not implements loopback |
32 | test mode | 35 | test mode |
36 | - fifo-size: the fifo size of the UART. | ||
37 | - auto-flow-control: one way to enable automatic flow control support. The | ||
38 | driver is allowed to detect support for the capability even without this | ||
39 | property. | ||
33 | 40 | ||
34 | Example: | 41 | Example: |
35 | 42 | ||
diff --git a/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt b/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt index 5778b9c83bd8..1c04a4c9515f 100644 --- a/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt +++ b/Documentation/devicetree/bindings/usb/ci13xxx-imx.txt | |||
@@ -11,6 +11,7 @@ Optional properties: | |||
11 | that indicate usb controller index | 11 | that indicate usb controller index |
12 | - vbus-supply: regulator for vbus | 12 | - vbus-supply: regulator for vbus |
13 | - disable-over-current: disable over current detect | 13 | - disable-over-current: disable over current detect |
14 | - external-vbus-divider: enables off-chip resistor divider for Vbus | ||
14 | 15 | ||
15 | Examples: | 16 | Examples: |
16 | usb@02184000 { /* USB OTG */ | 17 | usb@02184000 { /* USB OTG */ |
@@ -20,4 +21,5 @@ usb@02184000 { /* USB OTG */ | |||
20 | fsl,usbphy = <&usbphy1>; | 21 | fsl,usbphy = <&usbphy1>; |
21 | fsl,usbmisc = <&usbmisc 0>; | 22 | fsl,usbmisc = <&usbmisc 0>; |
22 | disable-over-current; | 23 | disable-over-current; |
24 | external-vbus-divider; | ||
23 | }; | 25 | }; |
diff --git a/Documentation/devicetree/bindings/usb/ehci-omap.txt b/Documentation/devicetree/bindings/usb/ehci-omap.txt new file mode 100644 index 000000000000..485a9a1efa7a --- /dev/null +++ b/Documentation/devicetree/bindings/usb/ehci-omap.txt | |||
@@ -0,0 +1,32 @@ | |||
1 | OMAP HS USB EHCI controller | ||
2 | |||
3 | This device is usually the child of the omap-usb-host | ||
4 | Documentation/devicetree/bindings/mfd/omap-usb-host.txt | ||
5 | |||
6 | Required properties: | ||
7 | |||
8 | - compatible: should be "ti,ehci-omap" | ||
9 | - reg: should contain one register range i.e. start and length | ||
10 | - interrupts: description of the interrupt line | ||
11 | |||
12 | Optional properties: | ||
13 | |||
14 | - phys: list of phandles to PHY nodes. | ||
15 | This property is required if at least one of the ports are in | ||
16 | PHY mode i.e. OMAP_EHCI_PORT_MODE_PHY | ||
17 | |||
18 | To specify the port mode, see | ||
19 | Documentation/devicetree/bindings/mfd/omap-usb-host.txt | ||
20 | |||
21 | Example for OMAP4: | ||
22 | |||
23 | usbhsehci: ehci@4a064c00 { | ||
24 | compatible = "ti,ehci-omap", "usb-ehci"; | ||
25 | reg = <0x4a064c00 0x400>; | ||
26 | interrupts = <0 77 0x4>; | ||
27 | }; | ||
28 | |||
29 | &usbhsehci { | ||
30 | phys = <&hsusb1_phy 0 &hsusb3_phy>; | ||
31 | }; | ||
32 | |||
diff --git a/Documentation/devicetree/bindings/usb/ohci-omap3.txt b/Documentation/devicetree/bindings/usb/ohci-omap3.txt new file mode 100644 index 000000000000..14ab42812a8e --- /dev/null +++ b/Documentation/devicetree/bindings/usb/ohci-omap3.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | OMAP HS USB OHCI controller (OMAP3 and later) | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible: should be "ti,ohci-omap3" | ||
6 | - reg: should contain one register range i.e. start and length | ||
7 | - interrupts: description of the interrupt line | ||
8 | |||
9 | Example for OMAP4: | ||
10 | |||
11 | usbhsohci: ohci@4a064800 { | ||
12 | compatible = "ti,ohci-omap3", "usb-ohci"; | ||
13 | reg = <0x4a064800 0x400>; | ||
14 | interrupts = <0 76 0x4>; | ||
15 | }; | ||
diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt index 1ef0ce71f8fa..662f0f1d2315 100644 --- a/Documentation/devicetree/bindings/usb/omap-usb.txt +++ b/Documentation/devicetree/bindings/usb/omap-usb.txt | |||
@@ -8,10 +8,10 @@ OMAP MUSB GLUE | |||
8 | and disconnect. | 8 | and disconnect. |
9 | - multipoint : Should be "1" indicating the musb controller supports | 9 | - multipoint : Should be "1" indicating the musb controller supports |
10 | multipoint. This is a MUSB configuration-specific setting. | 10 | multipoint. This is a MUSB configuration-specific setting. |
11 | - num_eps : Specifies the number of endpoints. This is also a | 11 | - num-eps : Specifies the number of endpoints. This is also a |
12 | MUSB configuration-specific setting. Should be set to "16" | 12 | MUSB configuration-specific setting. Should be set to "16" |
13 | - ram_bits : Specifies the ram address size. Should be set to "12" | 13 | - ram-bits : Specifies the ram address size. Should be set to "12" |
14 | - interface_type : This is a board specific setting to describe the type of | 14 | - interface-type : This is a board specific setting to describe the type of |
15 | interface between the controller and the phy. It should be "0" or "1" | 15 | interface between the controller and the phy. It should be "0" or "1" |
16 | specifying ULPI and UTMI respectively. | 16 | specifying ULPI and UTMI respectively. |
17 | - mode : Should be "3" to represent OTG. "1" signifies HOST and "2" | 17 | - mode : Should be "3" to represent OTG. "1" signifies HOST and "2" |
@@ -29,18 +29,46 @@ usb_otg_hs: usb_otg_hs@4a0ab000 { | |||
29 | ti,hwmods = "usb_otg_hs"; | 29 | ti,hwmods = "usb_otg_hs"; |
30 | ti,has-mailbox; | 30 | ti,has-mailbox; |
31 | multipoint = <1>; | 31 | multipoint = <1>; |
32 | num_eps = <16>; | 32 | num-eps = <16>; |
33 | ram_bits = <12>; | 33 | ram-bits = <12>; |
34 | ctrl-module = <&omap_control_usb>; | 34 | ctrl-module = <&omap_control_usb>; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | Board specific device node entry | 37 | Board specific device node entry |
38 | &usb_otg_hs { | 38 | &usb_otg_hs { |
39 | interface_type = <1>; | 39 | interface-type = <1>; |
40 | mode = <3>; | 40 | mode = <3>; |
41 | power = <50>; | 41 | power = <50>; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | OMAP DWC3 GLUE | ||
45 | - compatible : Should be "ti,dwc3" | ||
46 | - ti,hwmods : Should be "usb_otg_ss" | ||
47 | - reg : Address and length of the register set for the device. | ||
48 | - interrupts : The irq number of this device that is used to interrupt the | ||
49 | MPU | ||
50 | - #address-cells, #size-cells : Must be present if the device has sub-nodes | ||
51 | - utmi-mode : controls the source of UTMI/PIPE status for VBUS and OTG ID. | ||
52 | It should be set to "1" for HW mode and "2" for SW mode. | ||
53 | - ranges: the child address space are mapped 1:1 onto the parent address space | ||
54 | |||
55 | Sub-nodes: | ||
56 | The dwc3 core should be added as subnode to omap dwc3 glue. | ||
57 | - dwc3 : | ||
58 | The binding details of dwc3 can be found in: | ||
59 | Documentation/devicetree/bindings/usb/dwc3.txt | ||
60 | |||
61 | omap_dwc3 { | ||
62 | compatible = "ti,dwc3"; | ||
63 | ti,hwmods = "usb_otg_ss"; | ||
64 | reg = <0x4a020000 0x1ff>; | ||
65 | interrupts = <0 93 4>; | ||
66 | #address-cells = <1>; | ||
67 | #size-cells = <1>; | ||
68 | utmi-mode = <2>; | ||
69 | ranges; | ||
70 | }; | ||
71 | |||
44 | OMAP CONTROL USB | 72 | OMAP CONTROL USB |
45 | 73 | ||
46 | Required properties: | 74 | Required properties: |
diff --git a/Documentation/devicetree/bindings/usb/samsung-usbphy.txt b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt index 033194934f64..f575302e5173 100644 --- a/Documentation/devicetree/bindings/usb/samsung-usbphy.txt +++ b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt | |||
@@ -1,20 +1,25 @@ | |||
1 | * Samsung's usb phy transceiver | 1 | SAMSUNG USB-PHY controllers |
2 | 2 | ||
3 | The Samsung's phy transceiver is used for controlling usb phy for | 3 | ** Samsung's usb 2.0 phy transceiver |
4 | s3c-hsotg as well as ehci-s5p and ohci-exynos usb controllers | 4 | |
5 | across Samsung SOCs. | 5 | The Samsung's usb 2.0 phy transceiver is used for controlling |
6 | usb 2.0 phy for s3c-hsotg as well as ehci-s5p and ohci-exynos | ||
7 | usb controllers across Samsung SOCs. | ||
6 | TODO: Adding the PHY binding with controller(s) according to the under | 8 | TODO: Adding the PHY binding with controller(s) according to the under |
7 | developement generic PHY driver. | 9 | developement generic PHY driver. |
8 | 10 | ||
9 | Required properties: | 11 | Required properties: |
10 | 12 | ||
11 | Exynos4210: | 13 | Exynos4210: |
12 | - compatible : should be "samsung,exynos4210-usbphy" | 14 | - compatible : should be "samsung,exynos4210-usb2phy" |
13 | - reg : base physical address of the phy registers and length of memory mapped | 15 | - reg : base physical address of the phy registers and length of memory mapped |
14 | region. | 16 | region. |
17 | - clocks: Clock IDs array as required by the controller. | ||
18 | - clock-names: names of clock correseponding IDs clock property as requested | ||
19 | by the controller driver. | ||
15 | 20 | ||
16 | Exynos5250: | 21 | Exynos5250: |
17 | - compatible : should be "samsung,exynos5250-usbphy" | 22 | - compatible : should be "samsung,exynos5250-usb2phy" |
18 | - reg : base physical address of the phy registers and length of memory mapped | 23 | - reg : base physical address of the phy registers and length of memory mapped |
19 | region. | 24 | region. |
20 | 25 | ||
@@ -44,12 +49,69 @@ Example: | |||
44 | usbphy@125B0000 { | 49 | usbphy@125B0000 { |
45 | #address-cells = <1>; | 50 | #address-cells = <1>; |
46 | #size-cells = <1>; | 51 | #size-cells = <1>; |
47 | compatible = "samsung,exynos4210-usbphy"; | 52 | compatible = "samsung,exynos4210-usb2phy"; |
48 | reg = <0x125B0000 0x100>; | 53 | reg = <0x125B0000 0x100>; |
49 | ranges; | 54 | ranges; |
50 | 55 | ||
56 | clocks = <&clock 2>, <&clock 305>; | ||
57 | clock-names = "xusbxti", "otg"; | ||
58 | |||
51 | usbphy-sys { | 59 | usbphy-sys { |
52 | /* USB device and host PHY_CONTROL registers */ | 60 | /* USB device and host PHY_CONTROL registers */ |
53 | reg = <0x10020704 0x8>; | 61 | reg = <0x10020704 0x8>; |
54 | }; | 62 | }; |
55 | }; | 63 | }; |
64 | |||
65 | |||
66 | ** Samsung's usb 3.0 phy transceiver | ||
67 | |||
68 | Starting exynso5250, Samsung's SoC have usb 3.0 phy transceiver | ||
69 | which is used for controlling usb 3.0 phy for dwc3-exynos usb 3.0 | ||
70 | controllers across Samsung SOCs. | ||
71 | |||
72 | Required properties: | ||
73 | |||
74 | Exynos5250: | ||
75 | - compatible : should be "samsung,exynos5250-usb3phy" | ||
76 | - reg : base physical address of the phy registers and length of memory mapped | ||
77 | region. | ||
78 | - clocks: Clock IDs array as required by the controller. | ||
79 | - clock-names: names of clocks correseponding to IDs in the clock property | ||
80 | as requested by the controller driver. | ||
81 | |||
82 | Optional properties: | ||
83 | - #address-cells: should be '1' when usbphy node has a child node with 'reg' | ||
84 | property. | ||
85 | - #size-cells: should be '1' when usbphy node has a child node with 'reg' | ||
86 | property. | ||
87 | - ranges: allows valid translation between child's address space and parent's | ||
88 | address space. | ||
89 | |||
90 | - The child node 'usbphy-sys' to the node 'usbphy' is for the system controller | ||
91 | interface for usb-phy. It should provide the following information required by | ||
92 | usb-phy controller to control phy. | ||
93 | - reg : base physical address of PHY_CONTROL registers. | ||
94 | The size of this register is the total sum of size of all PHY_CONTROL | ||
95 | registers that the SoC has. For example, the size will be | ||
96 | '0x4' in case we have only one PHY_CONTROL register (e.g. | ||
97 | OTHERS register in S3C64XX or USB_PHY_CONTROL register in S5PV210) | ||
98 | and, '0x8' in case we have two PHY_CONTROL registers (e.g. | ||
99 | USBDEVICE_PHY_CONTROL and USBHOST_PHY_CONTROL registers in exynos4x). | ||
100 | and so on. | ||
101 | |||
102 | Example: | ||
103 | usbphy@12100000 { | ||
104 | compatible = "samsung,exynos5250-usb3phy"; | ||
105 | reg = <0x12100000 0x100>; | ||
106 | #address-cells = <1>; | ||
107 | #size-cells = <1>; | ||
108 | ranges; | ||
109 | |||
110 | clocks = <&clock 1>, <&clock 286>; | ||
111 | clock-names = "ext_xtal", "usbdrd30"; | ||
112 | |||
113 | usbphy-sys { | ||
114 | /* USB device and host PHY_CONTROL registers */ | ||
115 | reg = <0x10040704 0x8>; | ||
116 | }; | ||
117 | }; | ||
diff --git a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt new file mode 100644 index 000000000000..d7e272671c7e --- /dev/null +++ b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt | |||
@@ -0,0 +1,34 @@ | |||
1 | USB NOP PHY | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: should be usb-nop-xceiv | ||
5 | |||
6 | Optional properties: | ||
7 | - clocks: phandle to the PHY clock. Use as per Documentation/devicetree | ||
8 | /bindings/clock/clock-bindings.txt | ||
9 | This property is required if clock-frequency is specified. | ||
10 | |||
11 | - clock-names: Should be "main_clk" | ||
12 | |||
13 | - clock-frequency: the clock frequency (in Hz) that the PHY clock must | ||
14 | be configured to. | ||
15 | |||
16 | - vcc-supply: phandle to the regulator that provides RESET to the PHY. | ||
17 | |||
18 | - reset-supply: phandle to the regulator that provides power to the PHY. | ||
19 | |||
20 | Example: | ||
21 | |||
22 | hsusb1_phy { | ||
23 | compatible = "usb-nop-xceiv"; | ||
24 | clock-frequency = <19200000>; | ||
25 | clocks = <&osc 0>; | ||
26 | clock-names = "main_clk"; | ||
27 | vcc-supply = <&hsusb1_vcc_regulator>; | ||
28 | reset-supply = <&hsusb1_reset_regulator>; | ||
29 | }; | ||
30 | |||
31 | hsusb1_phy is a NOP USB PHY device that gets its clock from an oscillator | ||
32 | and expects that clock to be configured to 19.2MHz by the NOP PHY driver. | ||
33 | hsusb1_vcc_regulator provides power to the PHY and hsusb1_reset_regulator | ||
34 | controls RESET. | ||
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 19e1ef73ab0d..4d1919bf2332 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt | |||
@@ -5,6 +5,7 @@ using them to avoid name-space collisions. | |||
5 | 5 | ||
6 | ad Avionic Design GmbH | 6 | ad Avionic Design GmbH |
7 | adi Analog Devices, Inc. | 7 | adi Analog Devices, Inc. |
8 | aeroflexgaisler Aeroflex Gaisler AB | ||
8 | ak Asahi Kasei Corp. | 9 | ak Asahi Kasei Corp. |
9 | amcc Applied Micro Circuits Corporation (APM, formally AMCC) | 10 | amcc Applied Micro Circuits Corporation (APM, formally AMCC) |
10 | apm Applied Micro Circuits Corporation (APM) | 11 | apm Applied Micro Circuits Corporation (APM) |
@@ -48,6 +49,7 @@ samsung Samsung Semiconductor | |||
48 | sbs Smart Battery System | 49 | sbs Smart Battery System |
49 | schindler Schindler | 50 | schindler Schindler |
50 | sil Silicon Image | 51 | sil Silicon Image |
52 | silabs Silicon Laboratories | ||
51 | simtek | 53 | simtek |
52 | sirf SiRF Technology, Inc. | 54 | sirf SiRF Technology, Inc. |
53 | snps Synopsys, Inc. | 55 | snps Synopsys, Inc. |
diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt new file mode 100644 index 000000000000..1482103d288f --- /dev/null +++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt | |||
@@ -0,0 +1,41 @@ | |||
1 | lp855x bindings | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: "ti,lp8550", "ti,lp8551", "ti,lp8552", "ti,lp8553", | ||
5 | "ti,lp8556", "ti,lp8557" | ||
6 | - reg: I2C slave address (u8) | ||
7 | - dev-ctrl: Value of DEVICE CONTROL register (u8). It depends on the device. | ||
8 | |||
9 | Optional properties: | ||
10 | - bl-name: Backlight device name (string) | ||
11 | - init-brt: Initial value of backlight brightness (u8) | ||
12 | - pwm-period: PWM period value. Set only PWM input mode used (u32) | ||
13 | - rom-addr: Register address of ROM area to be updated (u8) | ||
14 | - rom-val: Register value to be updated (u8) | ||
15 | |||
16 | Example: | ||
17 | |||
18 | /* LP8556 */ | ||
19 | backlight@2c { | ||
20 | compatible = "ti,lp8556"; | ||
21 | reg = <0x2c>; | ||
22 | |||
23 | bl-name = "lcd-bl"; | ||
24 | dev-ctrl = /bits/ 8 <0x85>; | ||
25 | init-brt = /bits/ 8 <0x10>; | ||
26 | }; | ||
27 | |||
28 | /* LP8557 */ | ||
29 | backlight@2c { | ||
30 | compatible = "ti,lp8557"; | ||
31 | reg = <0x2c>; | ||
32 | |||
33 | dev-ctrl = /bits/ 8 <0x41>; | ||
34 | init-brt = /bits/ 8 <0x0a>; | ||
35 | |||
36 | /* 4V OV, 4 output LED string enabled */ | ||
37 | rom_14h { | ||
38 | rom-addr = /bits/ 8 <0x14>; | ||
39 | rom-val = /bits/ 8 <0xcf>; | ||
40 | }; | ||
41 | }; | ||
diff --git a/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt b/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt new file mode 100644 index 000000000000..5fb9279ac287 --- /dev/null +++ b/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | TPS65217 family of regulators | ||
2 | |||
3 | The TPS65217 chip contains a boost converter and current sinks which can be | ||
4 | used to drive LEDs for use as backlights. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible: "ti,tps65217" | ||
8 | - reg: I2C slave address | ||
9 | - backlight: node for specifying WLED1 and WLED2 lines in TPS65217 | ||
10 | - isel: selection bit, valid values: 1 for ISEL1 (low-level) and 2 for ISEL2 (high-level) | ||
11 | - fdim: PWM dimming frequency, valid values: 100, 200, 500, 1000 | ||
12 | - default-brightness: valid values: 0-100 | ||
13 | |||
14 | Each regulator is defined using the standard binding for regulators. | ||
15 | |||
16 | Example: | ||
17 | |||
18 | tps: tps@24 { | ||
19 | reg = <0x24>; | ||
20 | compatible = "ti,tps65217"; | ||
21 | backlight { | ||
22 | isel = <1>; /* 1 - ISET1, 2 ISET2 */ | ||
23 | fdim = <100>; /* TPS65217_BL_FDIM_100HZ */ | ||
24 | default-brightness = <50>; | ||
25 | }; | ||
26 | }; | ||
27 | |||
diff --git a/Documentation/devicetree/bindings/video/via,vt8500-fb.txt b/Documentation/devicetree/bindings/video/via,vt8500-fb.txt index c870b6478ec8..2871e218a0fb 100644 --- a/Documentation/devicetree/bindings/video/via,vt8500-fb.txt +++ b/Documentation/devicetree/bindings/video/via,vt8500-fb.txt | |||
@@ -5,58 +5,32 @@ Required properties: | |||
5 | - compatible : "via,vt8500-fb" | 5 | - compatible : "via,vt8500-fb" |
6 | - reg : Should contain 1 register ranges(address and length) | 6 | - reg : Should contain 1 register ranges(address and length) |
7 | - interrupts : framebuffer controller interrupt | 7 | - interrupts : framebuffer controller interrupt |
8 | - display: a phandle pointing to the display node | 8 | - bits-per-pixel : bit depth of framebuffer (16 or 32) |
9 | 9 | ||
10 | Required nodes: | 10 | Required subnodes: |
11 | - display: a display node is required to initialize the lcd panel | 11 | - display-timings: see display-timing.txt for information |
12 | This should be in the board dts. | ||
13 | - default-mode: a videomode within the display with timing parameters | ||
14 | as specified below. | ||
15 | 12 | ||
16 | Example: | 13 | Example: |
17 | 14 | ||
18 | fb@d800e400 { | 15 | fb@d8050800 { |
19 | compatible = "via,vt8500-fb"; | 16 | compatible = "via,vt8500-fb"; |
20 | reg = <0xd800e400 0x400>; | 17 | reg = <0xd800e400 0x400>; |
21 | interrupts = <12>; | 18 | interrupts = <12>; |
22 | display = <&display>; | 19 | bits-per-pixel = <16>; |
23 | default-mode = <&mode0>; | ||
24 | }; | ||
25 | |||
26 | VIA VT8500 Display | ||
27 | ----------------------------------------------------- | ||
28 | Required properties (as per of_videomode_helper): | ||
29 | |||
30 | - hactive, vactive: Display resolution | ||
31 | - hfront-porch, hback-porch, hsync-len: Horizontal Display timing parameters | ||
32 | in pixels | ||
33 | vfront-porch, vback-porch, vsync-len: Vertical display timing parameters in | ||
34 | lines | ||
35 | - clock: displayclock in Hz | ||
36 | - bpp: lcd panel bit-depth. | ||
37 | <16> for RGB565, <32> for RGB888 | ||
38 | |||
39 | Optional properties (as per of_videomode_helper): | ||
40 | - width-mm, height-mm: Display dimensions in mm | ||
41 | - hsync-active-high (bool): Hsync pulse is active high | ||
42 | - vsync-active-high (bool): Vsync pulse is active high | ||
43 | - interlaced (bool): This is an interlaced mode | ||
44 | - doublescan (bool): This is a doublescan mode | ||
45 | 20 | ||
46 | Example: | 21 | display-timings { |
47 | display: display@0 { | 22 | native-mode = <&timing0>; |
48 | modes { | 23 | timing0: 800x480 { |
49 | mode0: mode@0 { | 24 | clock-frequency = <0>; /* unused but required */ |
50 | hactive = <800>; | 25 | hactive = <800>; |
51 | vactive = <480>; | 26 | vactive = <480>; |
52 | hback-porch = <88>; | ||
53 | hfront-porch = <40>; | 27 | hfront-porch = <40>; |
28 | hback-porch = <88>; | ||
54 | hsync-len = <0>; | 29 | hsync-len = <0>; |
55 | vback-porch = <32>; | 30 | vback-porch = <32>; |
56 | vfront-porch = <11>; | 31 | vfront-porch = <11>; |
57 | vsync-len = <1>; | 32 | vsync-len = <1>; |
58 | clock = <0>; /* unused but required */ | ||
59 | bpp = <16>; /* non-standard but required */ | ||
60 | }; | 33 | }; |
61 | }; | 34 | }; |
62 | }; | 35 | }; |
36 | |||
diff --git a/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt b/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt index 3d325e1d11ee..0bcadb2840a5 100644 --- a/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt +++ b/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt | |||
@@ -4,20 +4,30 @@ Wondermedia WM8505 Framebuffer | |||
4 | Required properties: | 4 | Required properties: |
5 | - compatible : "wm,wm8505-fb" | 5 | - compatible : "wm,wm8505-fb" |
6 | - reg : Should contain 1 register ranges(address and length) | 6 | - reg : Should contain 1 register ranges(address and length) |
7 | - via,display: a phandle pointing to the display node | 7 | - bits-per-pixel : bit depth of framebuffer (16 or 32) |
8 | 8 | ||
9 | Required nodes: | 9 | Required subnodes: |
10 | - display: a display node is required to initialize the lcd panel | 10 | - display-timings: see display-timing.txt for information |
11 | This should be in the board dts. See definition in | ||
12 | Documentation/devicetree/bindings/video/via,vt8500-fb.txt | ||
13 | - default-mode: a videomode node as specified in | ||
14 | Documentation/devicetree/bindings/video/via,vt8500-fb.txt | ||
15 | 11 | ||
16 | Example: | 12 | Example: |
17 | 13 | ||
18 | fb@d8050800 { | 14 | fb@d8051700 { |
19 | compatible = "wm,wm8505-fb"; | 15 | compatible = "wm,wm8505-fb"; |
20 | reg = <0xd8050800 0x200>; | 16 | reg = <0xd8051700 0x200>; |
21 | display = <&display>; | 17 | bits-per-pixel = <16>; |
22 | default-mode = <&mode0>; | 18 | |
19 | display-timings { | ||
20 | native-mode = <&timing0>; | ||
21 | timing0: 800x480 { | ||
22 | clock-frequency = <0>; /* unused but required */ | ||
23 | hactive = <800>; | ||
24 | vactive = <480>; | ||
25 | hfront-porch = <40>; | ||
26 | hback-porch = <88>; | ||
27 | hsync-len = <0>; | ||
28 | vback-porch = <32>; | ||
29 | vfront-porch = <11>; | ||
30 | vsync-len = <1>; | ||
31 | }; | ||
32 | }; | ||
23 | }; | 33 | }; |
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index d230dd9c99b0..4a93e98b290a 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt | |||
@@ -150,12 +150,28 @@ discard -- If set, issues discard/TRIM commands to the block | |||
150 | device when blocks are freed. This is useful for SSD devices | 150 | device when blocks are freed. This is useful for SSD devices |
151 | and sparse/thinly-provisoned LUNs. | 151 | and sparse/thinly-provisoned LUNs. |
152 | 152 | ||
153 | nfs -- This option maintains an index (cache) of directory | 153 | nfs=stale_rw|nostale_ro |
154 | inodes by i_logstart which is used by the nfs-related code to | 154 | Enable this only if you want to export the FAT filesystem |
155 | improve look-ups. | 155 | over NFS. |
156 | |||
157 | stale_rw: This option maintains an index (cache) of directory | ||
158 | inodes by i_logstart which is used by the nfs-related code to | ||
159 | improve look-ups. Full file operations (read/write) over NFS is | ||
160 | supported but with cache eviction at NFS server, this could | ||
161 | result in ESTALE issues. | ||
162 | |||
163 | nostale_ro: This option bases the inode number and filehandle | ||
164 | on the on-disk location of a file in the MS-DOS directory entry. | ||
165 | This ensures that ESTALE will not be returned after a file is | ||
166 | evicted from the inode cache. However, it means that operations | ||
167 | such as rename, create and unlink could cause filehandles that | ||
168 | previously pointed at one file to point at a different file, | ||
169 | potentially causing data corruption. For this reason, this | ||
170 | option also mounts the filesystem readonly. | ||
171 | |||
172 | To maintain backward compatibility, '-o nfs' is also accepted, | ||
173 | defaulting to stale_rw | ||
156 | 174 | ||
157 | Enable this only if you want to export the FAT filesystem | ||
158 | over NFS | ||
159 | 175 | ||
160 | <bool>: 0,1,yes,no,true,false | 176 | <bool>: 0,1,yes,no,true,false |
161 | 177 | ||
diff --git a/Documentation/hwmon/adt7410 b/Documentation/hwmon/adt7410 index 58150c480e56..9817941e5f19 100644 --- a/Documentation/hwmon/adt7410 +++ b/Documentation/hwmon/adt7410 | |||
@@ -12,29 +12,42 @@ Supported chips: | |||
12 | Addresses scanned: None | 12 | Addresses scanned: None |
13 | Datasheet: Publicly available at the Analog Devices website | 13 | Datasheet: Publicly available at the Analog Devices website |
14 | http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf | 14 | http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf |
15 | * Analog Devices ADT7310 | ||
16 | Prefix: 'adt7310' | ||
17 | Addresses scanned: None | ||
18 | Datasheet: Publicly available at the Analog Devices website | ||
19 | http://www.analog.com/static/imported-files/data_sheets/ADT7310.pdf | ||
20 | * Analog Devices ADT7320 | ||
21 | Prefix: 'adt7320' | ||
22 | Addresses scanned: None | ||
23 | Datasheet: Publicly available at the Analog Devices website | ||
24 | http://www.analog.com/static/imported-files/data_sheets/ADT7320.pdf | ||
15 | 25 | ||
16 | Author: Hartmut Knaack <knaack.h@gmx.de> | 26 | Author: Hartmut Knaack <knaack.h@gmx.de> |
17 | 27 | ||
18 | Description | 28 | Description |
19 | ----------- | 29 | ----------- |
20 | 30 | ||
21 | The ADT7410 is a temperature sensor with rated temperature range of -55°C to | 31 | The ADT7310/ADT7410 is a temperature sensor with rated temperature range of |
22 | +150°C. It has a high accuracy of +/-0.5°C and can be operated at a resolution | 32 | -55°C to +150°C. It has a high accuracy of +/-0.5°C and can be operated at a |
23 | of 13 bits (0.0625°C) or 16 bits (0.0078°C). The sensor provides an INT pin to | 33 | resolution of 13 bits (0.0625°C) or 16 bits (0.0078°C). The sensor provides an |
24 | indicate that a minimum or maximum temperature set point has been exceeded, as | 34 | INT pin to indicate that a minimum or maximum temperature set point has been |
25 | well as a critical temperature (CT) pin to indicate that the critical | 35 | exceeded, as well as a critical temperature (CT) pin to indicate that the |
26 | temperature set point has been exceeded. Both pins can be set up with a common | 36 | critical temperature set point has been exceeded. Both pins can be set up with a |
27 | hysteresis of 0°C - 15°C and a fault queue, ranging from 1 to 4 events. Both | 37 | common hysteresis of 0°C - 15°C and a fault queue, ranging from 1 to 4 events. |
28 | pins can individually set to be active-low or active-high, while the whole | 38 | Both pins can individually set to be active-low or active-high, while the whole |
29 | device can either run in comparator mode or interrupt mode. The ADT7410 | 39 | device can either run in comparator mode or interrupt mode. The ADT7410 supports |
30 | supports continous temperature sampling, as well as sampling one temperature | 40 | continuous temperature sampling, as well as sampling one temperature value per |
31 | value per second or even justget one sample on demand for power saving. | 41 | second or even just get one sample on demand for power saving. Besides, it can |
32 | Besides, it can completely power down its ADC, if power management is | 42 | completely power down its ADC, if power management is required. |
33 | required. | 43 | |
34 | 44 | The ADT7320/ADT7420 is register compatible, the only differences being the | |
35 | The ADT7420 is register compatible, the only differences being the package, | 45 | package, a slightly narrower operating temperature range (-40°C to +150°C), and |
36 | a slightly narrower operating temperature range (-40°C to +150°C), and a | 46 | a better accuracy (0.25°C instead of 0.50°C.) |
37 | better accuracy (0.25°C instead of 0.50°C.) | 47 | |
48 | The difference between the ADT7310/ADT7320 and ADT7410/ADT7420 is the control | ||
49 | interface, the ADT7310 and ADT7320 use SPI while the ADT7410 and ADT7420 use | ||
50 | I2C. | ||
38 | 51 | ||
39 | Configuration Notes | 52 | Configuration Notes |
40 | ------------------- | 53 | ------------------- |
diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066 index 26025e419d35..c1b57d72efc3 100644 --- a/Documentation/hwmon/lm25066 +++ b/Documentation/hwmon/lm25066 | |||
@@ -1,7 +1,13 @@ | |||
1 | Kernel driver max8688 | 1 | Kernel driver lm25066 |
2 | ===================== | 2 | ===================== |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | * TI LM25056 | ||
6 | Prefix: 'lm25056' | ||
7 | Addresses scanned: - | ||
8 | Datasheets: | ||
9 | http://www.ti.com/lit/gpn/lm25056 | ||
10 | http://www.ti.com/lit/gpn/lm25056a | ||
5 | * National Semiconductor LM25066 | 11 | * National Semiconductor LM25066 |
6 | Prefix: 'lm25066' | 12 | Prefix: 'lm25066' |
7 | Addresses scanned: - | 13 | Addresses scanned: - |
@@ -25,8 +31,9 @@ Author: Guenter Roeck <linux@roeck-us.net> | |||
25 | Description | 31 | Description |
26 | ----------- | 32 | ----------- |
27 | 33 | ||
28 | This driver supports hardware montoring for National Semiconductor LM25066, | 34 | This driver supports hardware montoring for National Semiconductor / TI LM25056, |
29 | LM5064, and LM5064 Power Management, Monitoring, Control, and Protection ICs. | 35 | LM25066, LM5064, and LM5064 Power Management, Monitoring, Control, and |
36 | Protection ICs. | ||
30 | 37 | ||
31 | The driver is a client driver to the core PMBus driver. Please see | 38 | The driver is a client driver to the core PMBus driver. Please see |
32 | Documentation/hwmon/pmbus for details on PMBus client drivers. | 39 | Documentation/hwmon/pmbus for details on PMBus client drivers. |
@@ -60,14 +67,19 @@ in1_max Maximum input voltage. | |||
60 | in1_min_alarm Input voltage low alarm. | 67 | in1_min_alarm Input voltage low alarm. |
61 | in1_max_alarm Input voltage high alarm. | 68 | in1_max_alarm Input voltage high alarm. |
62 | 69 | ||
63 | in2_label "vout1" | 70 | in2_label "vmon" |
64 | in2_input Measured output voltage. | 71 | in2_input Measured voltage on VAUX pin |
65 | in2_average Average measured output voltage. | 72 | in2_min Minimum VAUX voltage (LM25056 only). |
66 | in2_min Minimum output voltage. | 73 | in2_max Maximum VAUX voltage (LM25056 only). |
67 | in2_min_alarm Output voltage low alarm. | 74 | in2_min_alarm VAUX voltage low alarm (LM25056 only). |
68 | 75 | in2_max_alarm VAUX voltage high alarm (LM25056 only). | |
69 | in3_label "vout2" | 76 | |
70 | in3_input Measured voltage on vaux pin | 77 | in3_label "vout1" |
78 | Not supported on LM25056. | ||
79 | in3_input Measured output voltage. | ||
80 | in3_average Average measured output voltage. | ||
81 | in3_min Minimum output voltage. | ||
82 | in3_min_alarm Output voltage low alarm. | ||
71 | 83 | ||
72 | curr1_label "iin" | 84 | curr1_label "iin" |
73 | curr1_input Measured input current. | 85 | curr1_input Measured input current. |
diff --git a/Documentation/hwmon/lm75 b/Documentation/hwmon/lm75 index c91a1d15fa28..69af1c7db6b7 100644 --- a/Documentation/hwmon/lm75 +++ b/Documentation/hwmon/lm75 | |||
@@ -23,7 +23,7 @@ Supported chips: | |||
23 | Datasheet: Publicly available at the Maxim website | 23 | Datasheet: Publicly available at the Maxim website |
24 | http://www.maxim-ic.com/ | 24 | http://www.maxim-ic.com/ |
25 | * Microchip (TelCom) TCN75 | 25 | * Microchip (TelCom) TCN75 |
26 | Prefix: 'lm75' | 26 | Prefix: 'tcn75' |
27 | Addresses scanned: none | 27 | Addresses scanned: none |
28 | Datasheet: Publicly available at the Microchip website | 28 | Datasheet: Publicly available at the Microchip website |
29 | http://www.microchip.com/ | 29 | http://www.microchip.com/ |
diff --git a/Documentation/hwmon/lm95234 b/Documentation/hwmon/lm95234 new file mode 100644 index 000000000000..a0e95ddfd372 --- /dev/null +++ b/Documentation/hwmon/lm95234 | |||
@@ -0,0 +1,36 @@ | |||
1 | Kernel driver lm95234 | ||
2 | ===================== | ||
3 | |||
4 | Supported chips: | ||
5 | * National Semiconductor / Texas Instruments LM95234 | ||
6 | Addresses scanned: I2C 0x18, 0x4d, 0x4e | ||
7 | Datasheet: Publicly available at the Texas Instruments website | ||
8 | http://www.ti.com/product/lm95234 | ||
9 | |||
10 | |||
11 | Author: Guenter Roeck <linux@roeck-us.net> | ||
12 | |||
13 | Description | ||
14 | ----------- | ||
15 | |||
16 | LM95234 is an 11-bit digital temperature sensor with a 2-wire System Management | ||
17 | Bus (SMBus) interface and TrueTherm technology that can very accurately monitor | ||
18 | the temperature of four remote diodes as well as its own temperature. | ||
19 | The four remote diodes can be external devices such as microprocessors, | ||
20 | graphics processors or diode-connected 2N3904s. The LM95234's TruTherm | ||
21 | beta compensation technology allows sensing of 90 nm or 65 nm process | ||
22 | thermal diodes accurately. | ||
23 | |||
24 | All temperature values are given in millidegrees Celsius. Temperature | ||
25 | is provided within a range of -127 to +255 degrees (+127.875 degrees for | ||
26 | the internal sensor). Resolution depends on temperature input and range. | ||
27 | |||
28 | Each sensor has its own maximum limit, but the hysteresis is common to all | ||
29 | channels. The hysteresis is configurable with the tem1_max_hyst attribute and | ||
30 | affects the hysteresis on all channels. The first two external sensors also | ||
31 | have a critical limit. | ||
32 | |||
33 | The lm95234 driver can change its update interval to a fixed set of values. | ||
34 | It will round up to the next selectable interval. See the datasheet for exact | ||
35 | values. Reading sensor values more often will do no harm, but will return | ||
36 | 'old' values. | ||
diff --git a/Documentation/hwmon/ltc2978 b/Documentation/hwmon/ltc2978 index e4d75c606c97..dc0d08c61305 100644 --- a/Documentation/hwmon/ltc2978 +++ b/Documentation/hwmon/ltc2978 | |||
@@ -2,6 +2,10 @@ Kernel driver ltc2978 | |||
2 | ===================== | 2 | ===================== |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | * Linear Technology LTC2974 | ||
6 | Prefix: 'ltc2974' | ||
7 | Addresses scanned: - | ||
8 | Datasheet: http://www.linear.com/product/ltc2974 | ||
5 | * Linear Technology LTC2978 | 9 | * Linear Technology LTC2978 |
6 | Prefix: 'ltc2978' | 10 | Prefix: 'ltc2978' |
7 | Addresses scanned: - | 11 | Addresses scanned: - |
@@ -10,6 +14,10 @@ Supported chips: | |||
10 | Prefix: 'ltc3880' | 14 | Prefix: 'ltc3880' |
11 | Addresses scanned: - | 15 | Addresses scanned: - |
12 | Datasheet: http://www.linear.com/product/ltc3880 | 16 | Datasheet: http://www.linear.com/product/ltc3880 |
17 | * Linear Technology LTC3883 | ||
18 | Prefix: 'ltc3883' | ||
19 | Addresses scanned: - | ||
20 | Datasheet: http://www.linear.com/product/ltc3883 | ||
13 | 21 | ||
14 | Author: Guenter Roeck <linux@roeck-us.net> | 22 | Author: Guenter Roeck <linux@roeck-us.net> |
15 | 23 | ||
@@ -17,9 +25,9 @@ Author: Guenter Roeck <linux@roeck-us.net> | |||
17 | Description | 25 | Description |
18 | ----------- | 26 | ----------- |
19 | 27 | ||
20 | The LTC2978 is an octal power supply monitor, supervisor, sequencer and | 28 | LTC2974 is a quad digital power supply manager. LTC2978 is an octal power supply |
21 | margin controller. The LTC3880 is a dual, PolyPhase DC/DC synchronous | 29 | monitor. LTC3880 is a dual output poly-phase step-down DC/DC controller. LTC3883 |
22 | step-down switching regulator controller. | 30 | is a single phase step-down DC/DC controller. |
23 | 31 | ||
24 | 32 | ||
25 | Usage Notes | 33 | Usage Notes |
@@ -41,63 +49,90 @@ Sysfs attributes | |||
41 | in1_label "vin" | 49 | in1_label "vin" |
42 | in1_input Measured input voltage. | 50 | in1_input Measured input voltage. |
43 | in1_min Minimum input voltage. | 51 | in1_min Minimum input voltage. |
44 | in1_max Maximum input voltage. | 52 | in1_max Maximum input voltage. LTC2974 and LTC2978 only. |
45 | in1_lcrit Critical minimum input voltage. | 53 | in1_lcrit Critical minimum input voltage. LTC2974 and LTC2978 |
54 | only. | ||
46 | in1_crit Critical maximum input voltage. | 55 | in1_crit Critical maximum input voltage. |
47 | in1_min_alarm Input voltage low alarm. | 56 | in1_min_alarm Input voltage low alarm. |
48 | in1_max_alarm Input voltage high alarm. | 57 | in1_max_alarm Input voltage high alarm. LTC2974 and LTC2978 only. |
49 | in1_lcrit_alarm Input voltage critical low alarm. | 58 | in1_lcrit_alarm Input voltage critical low alarm. LTC2974 and LTC2978 |
59 | only. | ||
50 | in1_crit_alarm Input voltage critical high alarm. | 60 | in1_crit_alarm Input voltage critical high alarm. |
51 | in1_lowest Lowest input voltage. LTC2978 only. | 61 | in1_lowest Lowest input voltage. LTC2974 and LTC2978 only. |
52 | in1_highest Highest input voltage. | 62 | in1_highest Highest input voltage. |
53 | in1_reset_history Reset history. Writing into this attribute will reset | 63 | in1_reset_history Reset input voltage history. |
54 | history for all attributes. | 64 | |
55 | 65 | in[N]_label "vout[1-8]". | |
56 | in[2-9]_label "vout[1-8]". Channels 3 to 9 on LTC2978 only. | 66 | LTC2974: N=2-5 |
57 | in[2-9]_input Measured output voltage. | 67 | LTC2978: N=2-9 |
58 | in[2-9]_min Minimum output voltage. | 68 | LTC3880: N=2-3 |
59 | in[2-9]_max Maximum output voltage. | 69 | LTC3883: N=2 |
60 | in[2-9]_lcrit Critical minimum output voltage. | 70 | in[N]_input Measured output voltage. |
61 | in[2-9]_crit Critical maximum output voltage. | 71 | in[N]_min Minimum output voltage. |
62 | in[2-9]_min_alarm Output voltage low alarm. | 72 | in[N]_max Maximum output voltage. |
63 | in[2-9]_max_alarm Output voltage high alarm. | 73 | in[N]_lcrit Critical minimum output voltage. |
64 | in[2-9]_lcrit_alarm Output voltage critical low alarm. | 74 | in[N]_crit Critical maximum output voltage. |
65 | in[2-9]_crit_alarm Output voltage critical high alarm. | 75 | in[N]_min_alarm Output voltage low alarm. |
66 | in[2-9]_lowest Lowest output voltage. LTC2978 only. | 76 | in[N]_max_alarm Output voltage high alarm. |
67 | in[2-9]_highest Lowest output voltage. | 77 | in[N]_lcrit_alarm Output voltage critical low alarm. |
68 | in[2-9]_reset_history Reset history. Writing into this attribute will reset | 78 | in[N]_crit_alarm Output voltage critical high alarm. |
69 | history for all attributes. | 79 | in[N]_lowest Lowest output voltage. LTC2974 and LTC2978 only. |
70 | 80 | in[N]_highest Highest output voltage. | |
71 | temp[1-3]_input Measured temperature. | 81 | in[N]_reset_history Reset output voltage history. |
82 | |||
83 | temp[N]_input Measured temperature. | ||
84 | On LTC2974, temp[1-4] report external temperatures, | ||
85 | and temp5 reports the chip temperature. | ||
72 | On LTC2978, only one temperature measurement is | 86 | On LTC2978, only one temperature measurement is |
73 | supported and reflects the internal temperature. | 87 | supported and reports the chip temperature. |
74 | On LTC3880, temp1 and temp2 report external | 88 | On LTC3880, temp1 and temp2 report external |
75 | temperatures, and temp3 reports the internal | 89 | temperatures, and temp3 reports the chip temperature. |
76 | temperature. | 90 | On LTC3883, temp1 reports an external temperature, |
77 | temp[1-3]_min Mimimum temperature. | 91 | and temp2 reports the chip temperature. |
78 | temp[1-3]_max Maximum temperature. | 92 | temp[N]_min Mimimum temperature. LTC2974 and LTC2978 only. |
79 | temp[1-3]_lcrit Critical low temperature. | 93 | temp[N]_max Maximum temperature. |
80 | temp[1-3]_crit Critical high temperature. | 94 | temp[N]_lcrit Critical low temperature. |
81 | temp[1-3]_min_alarm Chip temperature low alarm. | 95 | temp[N]_crit Critical high temperature. |
82 | temp[1-3]_max_alarm Chip temperature high alarm. | 96 | temp[N]_min_alarm Temperature low alarm. LTC2974 and LTC2978 only. |
83 | temp[1-3]_lcrit_alarm Chip temperature critical low alarm. | 97 | temp[N]_max_alarm Temperature high alarm. |
84 | temp[1-3]_crit_alarm Chip temperature critical high alarm. | 98 | temp[N]_lcrit_alarm Temperature critical low alarm. |
85 | temp[1-3]_lowest Lowest measured temperature. LTC2978 only. | 99 | temp[N]_crit_alarm Temperature critical high alarm. |
86 | temp[1-3]_highest Highest measured temperature. | 100 | temp[N]_lowest Lowest measured temperature. LTC2974 and LTC2978 only. |
87 | temp[1-3]_reset_history Reset history. Writing into this attribute will reset | 101 | Not supported for chip temperature sensor on LTC2974. |
88 | history for all attributes. | 102 | temp[N]_highest Highest measured temperature. Not supported for chip |
89 | 103 | temperature sensor on LTC2974. | |
90 | power[1-2]_label "pout[1-2]". LTC3880 only. | 104 | temp[N]_reset_history Reset temperature history. Not supported for chip |
91 | power[1-2]_input Measured power. | 105 | temperature sensor on LTC2974. |
92 | 106 | ||
93 | curr1_label "iin". LTC3880 only. | 107 | power1_label "pin". LTC3883 only. |
108 | power1_input Measured input power. | ||
109 | |||
110 | power[N]_label "pout[1-4]". | ||
111 | LTC2974: N=1-4 | ||
112 | LTC2978: Not supported | ||
113 | LTC3880: N=1-2 | ||
114 | LTC3883: N=2 | ||
115 | power[N]_input Measured output power. | ||
116 | |||
117 | curr1_label "iin". LTC3880 and LTC3883 only. | ||
94 | curr1_input Measured input current. | 118 | curr1_input Measured input current. |
95 | curr1_max Maximum input current. | 119 | curr1_max Maximum input current. |
96 | curr1_max_alarm Input current high alarm. | 120 | curr1_max_alarm Input current high alarm. |
97 | 121 | curr1_highest Highest input current. LTC3883 only. | |
98 | curr[2-3]_label "iout[1-2]". LTC3880 only. | 122 | curr1_reset_history Reset input current history. LTC3883 only. |
99 | curr[2-3]_input Measured input current. | 123 | |
100 | curr[2-3]_max Maximum input current. | 124 | curr[N]_label "iout[1-4]". |
101 | curr[2-3]_crit Critical input current. | 125 | LTC2974: N=1-4 |
102 | curr[2-3]_max_alarm Input current high alarm. | 126 | LTC2978: not supported |
103 | curr[2-3]_crit_alarm Input current critical high alarm. | 127 | LTC3880: N=2-3 |
128 | LTC3883: N=2 | ||
129 | curr[N]_input Measured output current. | ||
130 | curr[N]_max Maximum output current. | ||
131 | curr[N]_crit Critical high output current. | ||
132 | curr[N]_lcrit Critical low output current. LTC2974 only. | ||
133 | curr[N]_max_alarm Output current high alarm. | ||
134 | curr[N]_crit_alarm Output current critical high alarm. | ||
135 | curr[N]_lcrit_alarm Output current critical low alarm. LTC2974 only. | ||
136 | curr[N]_lowest Lowest output current. LTC2974 only. | ||
137 | curr[N]_highest Highest output current. | ||
138 | curr[N]_reset_history Reset output current history. | ||
diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775 new file mode 100644 index 000000000000..4e9ef60e8c6c --- /dev/null +++ b/Documentation/hwmon/nct6775 | |||
@@ -0,0 +1,188 @@ | |||
1 | Note | ||
2 | ==== | ||
3 | |||
4 | This driver supersedes the NCT6775F and NCT6776F support in the W83627EHF | ||
5 | driver. | ||
6 | |||
7 | Kernel driver NCT6775 | ||
8 | ===================== | ||
9 | |||
10 | Supported chips: | ||
11 | * Nuvoton NCT5572D/NCT6771F/NCT6772F/NCT6775F/W83677HG-I | ||
12 | Prefix: 'nct6775' | ||
13 | Addresses scanned: ISA address retrieved from Super I/O registers | ||
14 | Datasheet: Available from Nuvoton upon request | ||
15 | * Nuvoton NCT5577D/NCT6776D/NCT6776F | ||
16 | Prefix: 'nct6776' | ||
17 | Addresses scanned: ISA address retrieved from Super I/O registers | ||
18 | Datasheet: Available from Nuvoton upon request | ||
19 | * Nuvoton NCT5532D/NCT6779D | ||
20 | Prefix: 'nct6779' | ||
21 | Addresses scanned: ISA address retrieved from Super I/O registers | ||
22 | Datasheet: Available from Nuvoton upon request | ||
23 | |||
24 | Authors: | ||
25 | Guenter Roeck <linux@roeck-us.net> | ||
26 | |||
27 | Description | ||
28 | ----------- | ||
29 | |||
30 | This driver implements support for the Nuvoton NCT6775F, NCT6776F, and NCT6779D | ||
31 | and compatible super I/O chips. | ||
32 | |||
33 | The chips support up to 25 temperature monitoring sources. Up to 6 of those are | ||
34 | direct temperature sensor inputs, the others are special sources such as PECI, | ||
35 | PCH, and SMBUS. Depending on the chip type, 2 to 6 of the temperature sources | ||
36 | can be monitored and compared against minimum, maximum, and critical | ||
37 | temperatures. The driver reports up to 10 of the temperatures to the user. | ||
38 | There are 4 to 5 fan rotation speed sensors, 8 to 15 analog voltage sensors, | ||
39 | one VID, alarms with beep warnings (control unimplemented), and some automatic | ||
40 | fan regulation strategies (plus manual fan control mode). | ||
41 | |||
42 | The temperature sensor sources on all chips are configurable. The configured | ||
43 | source for each of the temperature sensors is provided in tempX_label. | ||
44 | |||
45 | Temperatures are measured in degrees Celsius and measurement resolution is | ||
46 | either 1 degC or 0.5 degC, depending on the temperature source and | ||
47 | configuration. An alarm is triggered when the temperature gets higher than | ||
48 | the high limit; it stays on until the temperature falls below the hysteresis | ||
49 | value. Alarms are only supported for temp1 to temp6, depending on the chip type. | ||
50 | |||
51 | Fan rotation speeds are reported in RPM (rotations per minute). An alarm is | ||
52 | triggered if the rotation speed has dropped below a programmable limit. On | ||
53 | NCT6775F, fan readings can be divided by a programmable divider (1, 2, 4, 8, | ||
54 | 16, 32, 64 or 128) to give the readings more range or accuracy; the other chips | ||
55 | do not have a fan speed divider. The driver sets the most suitable fan divisor | ||
56 | itself; specifically, it increases the divider value each time a fan speed | ||
57 | reading returns an invalid value, and it reduces it if the fan speed reading | ||
58 | is lower than optimal. Some fans might not be present because they share pins | ||
59 | with other functions. | ||
60 | |||
61 | Voltage sensors (also known as IN sensors) report their values in millivolts. | ||
62 | An alarm is triggered if the voltage has crossed a programmable minimum | ||
63 | or maximum limit. | ||
64 | |||
65 | The driver supports automatic fan control mode known as Thermal Cruise. | ||
66 | In this mode, the chip attempts to keep the measured temperature in a | ||
67 | predefined temperature range. If the temperature goes out of range, fan | ||
68 | is driven slower/faster to reach the predefined range again. | ||
69 | |||
70 | The mode works for fan1-fan5. | ||
71 | |||
72 | sysfs attributes | ||
73 | ---------------- | ||
74 | |||
75 | pwm[1-5] - this file stores PWM duty cycle or DC value (fan speed) in range: | ||
76 | 0 (lowest speed) to 255 (full) | ||
77 | |||
78 | pwm[1-5]_enable - this file controls mode of fan/temperature control: | ||
79 | * 0 Fan control disabled (fans set to maximum speed) | ||
80 | * 1 Manual mode, write to pwm[0-5] any value 0-255 | ||
81 | * 2 "Thermal Cruise" mode | ||
82 | * 3 "Fan Speed Cruise" mode | ||
83 | * 4 "Smart Fan III" mode (NCT6775F only) | ||
84 | * 5 "Smart Fan IV" mode | ||
85 | |||
86 | pwm[1-5]_mode - controls if output is PWM or DC level | ||
87 | * 0 DC output | ||
88 | * 1 PWM output | ||
89 | |||
90 | Common fan control attributes | ||
91 | ----------------------------- | ||
92 | |||
93 | pwm[1-5]_temp_sel Temperature source. Value is temperature sensor index. | ||
94 | For example, select '1' for temp1_input. | ||
95 | pwm[1-5]_weight_temp_sel | ||
96 | Secondary temperature source. Value is temperature | ||
97 | sensor index. For example, select '1' for temp1_input. | ||
98 | Set to 0 to disable secondary temperature control. | ||
99 | |||
100 | If secondary temperature functionality is enabled, it is controlled with the | ||
101 | following attributes. | ||
102 | |||
103 | pwm[1-5]_weight_duty_step | ||
104 | Duty step size. | ||
105 | pwm[1-5]_weight_temp_step | ||
106 | Temperature step size. With each step over | ||
107 | temp_step_base, the value of weight_duty_step is added | ||
108 | to the current pwm value. | ||
109 | pwm[1-5]_weight_temp_step_base | ||
110 | Temperature at which secondary temperature control kicks | ||
111 | in. | ||
112 | pwm[1-5]_weight_temp_step_tol | ||
113 | Temperature step tolerance. | ||
114 | |||
115 | Thermal Cruise mode (2) | ||
116 | ----------------------- | ||
117 | |||
118 | If the temperature is in the range defined by: | ||
119 | |||
120 | pwm[1-5]_target_temp Target temperature, unit millidegree Celsius | ||
121 | (range 0 - 127000) | ||
122 | pwm[1-5]_temp_tolerance | ||
123 | Target temperature tolerance, unit millidegree Celsius | ||
124 | |||
125 | there are no changes to fan speed. Once the temperature leaves the interval, fan | ||
126 | speed increases (if temperature is higher that desired) or decreases (if | ||
127 | temperature is lower than desired), using the following limits and time | ||
128 | intervals. | ||
129 | |||
130 | pwm[1-5]_start fan pwm start value (range 1 - 255), to start fan | ||
131 | when the temperature is above defined range. | ||
132 | pwm[1-5]_floor lowest fan pwm (range 0 - 255) if temperature is below | ||
133 | the defined range. If set to 0, the fan is expected to | ||
134 | stop if the temperature is below the defined range. | ||
135 | pwm[1-5]_step_up_time milliseconds before fan speed is increased | ||
136 | pwm[1-5]_step_down_time milliseconds before fan speed is decreased | ||
137 | pwm[1-5]_stop_time how many milliseconds must elapse to switch | ||
138 | corresponding fan off (when the temperature was below | ||
139 | defined range). | ||
140 | |||
141 | Speed Cruise mode (3) | ||
142 | --------------------- | ||
143 | |||
144 | This modes tries to keep the fan speed constant. | ||
145 | |||
146 | fan[1-5]_target Target fan speed | ||
147 | fan[1-5]_tolerance | ||
148 | Target speed tolerance | ||
149 | |||
150 | |||
151 | Untested; use at your own risk. | ||
152 | |||
153 | Smart Fan IV mode (5) | ||
154 | --------------------- | ||
155 | |||
156 | This mode offers multiple slopes to control the fan speed. The slopes can be | ||
157 | controlled by setting the pwm and temperature attributes. When the temperature | ||
158 | rises, the chip will calculate the DC/PWM output based on the current slope. | ||
159 | There are up to seven data points depending on the chip type. Subsequent data | ||
160 | points should be set to higher temperatures and higher pwm values to achieve | ||
161 | higher fan speeds with increasing temperature. The last data point reflects | ||
162 | critical temperature mode, in which the fans should run at full speed. | ||
163 | |||
164 | pwm[1-5]_auto_point[1-7]_pwm | ||
165 | pwm value to be set if temperature reaches matching | ||
166 | temperature range. | ||
167 | pwm[1-5]_auto_point[1-7]_temp | ||
168 | Temperature over which the matching pwm is enabled. | ||
169 | pwm[1-5]_temp_tolerance | ||
170 | Temperature tolerance, unit millidegree Celsius | ||
171 | pwm[1-5]_crit_temp_tolerance | ||
172 | Temperature tolerance for critical temperature, | ||
173 | unit millidegree Celsius | ||
174 | |||
175 | pwm[1-5]_step_up_time milliseconds before fan speed is increased | ||
176 | pwm[1-5]_step_down_time milliseconds before fan speed is decreased | ||
177 | |||
178 | Usage Notes | ||
179 | ----------- | ||
180 | |||
181 | On various ASUS boards with NCT6776F, it appears that CPUTIN is not really | ||
182 | connected to anything and floats, or that it is connected to some non-standard | ||
183 | temperature measurement device. As a result, the temperature reported on CPUTIN | ||
184 | will not reflect a usable value. It often reports unreasonably high | ||
185 | temperatures, and in some cases the reported temperature declines if the actual | ||
186 | temperature increases (similar to the raw PECI temperature value - see PECI | ||
187 | specification for details). CPUTIN should therefore be be ignored on ASUS | ||
188 | boards. The CPU temperature on ASUS boards is reported from PECI 0. | ||
diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15 index 02850bdfac18..778987d1856f 100644 --- a/Documentation/hwmon/sht15 +++ b/Documentation/hwmon/sht15 | |||
@@ -40,7 +40,7 @@ bits for humidity, or 12 bits for temperature and 8 bits for humidity. | |||
40 | The humidity calibration coefficients are programmed into an OTP memory on the | 40 | The humidity calibration coefficients are programmed into an OTP memory on the |
41 | chip. These coefficients are used to internally calibrate the signals from the | 41 | chip. These coefficients are used to internally calibrate the signals from the |
42 | sensors. Disabling the reload of those coefficients allows saving 10ms for each | 42 | sensors. Disabling the reload of those coefficients allows saving 10ms for each |
43 | measurement and decrease power consumption, while loosing on precision. | 43 | measurement and decrease power consumption, while losing on precision. |
44 | 44 | ||
45 | Some options may be set directly in the sht15_platform_data structure | 45 | Some options may be set directly in the sht15_platform_data structure |
46 | or via sysfs attributes. | 46 | or via sysfs attributes. |
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401 index 9fc447249212..f91e3fa7e5ec 100644 --- a/Documentation/hwmon/tmp401 +++ b/Documentation/hwmon/tmp401 | |||
@@ -8,8 +8,16 @@ Supported chips: | |||
8 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html | 8 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html |
9 | * Texas Instruments TMP411 | 9 | * Texas Instruments TMP411 |
10 | Prefix: 'tmp411' | 10 | Prefix: 'tmp411' |
11 | Addresses scanned: I2C 0x4c | 11 | Addresses scanned: I2C 0x4c, 0x4d, 0x4e |
12 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html | 12 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html |
13 | * Texas Instruments TMP431 | ||
14 | Prefix: 'tmp431' | ||
15 | Addresses scanned: I2C 0x4c, 0x4d | ||
16 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp431.html | ||
17 | * Texas Instruments TMP432 | ||
18 | Prefix: 'tmp432' | ||
19 | Addresses scanned: I2C 0x4c, 0x4d | ||
20 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html | ||
13 | 21 | ||
14 | Authors: | 22 | Authors: |
15 | Hans de Goede <hdegoede@redhat.com> | 23 | Hans de Goede <hdegoede@redhat.com> |
@@ -18,19 +26,19 @@ Authors: | |||
18 | Description | 26 | Description |
19 | ----------- | 27 | ----------- |
20 | 28 | ||
21 | This driver implements support for Texas Instruments TMP401 and | 29 | This driver implements support for Texas Instruments TMP401, TMP411, |
22 | TMP411 chips. These chips implements one remote and one local | 30 | TMP431, and TMP432 chips. These chips implement one or two remote and |
23 | temperature sensor. Temperature is measured in degrees | 31 | one local temperature sensors. Temperature is measured in degrees |
24 | Celsius. Resolution of the remote sensor is 0.0625 degree. Local | 32 | Celsius. Resolution of the remote sensor is 0.0625 degree. Local |
25 | sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not | 33 | sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not |
26 | supported by the driver so far, so using the default resolution of 0.5 | 34 | supported by the driver so far, so using the default resolution of 0.5 |
27 | degree). | 35 | degree). |
28 | 36 | ||
29 | The driver provides the common sysfs-interface for temperatures (see | 37 | The driver provides the common sysfs-interface for temperatures (see |
30 | /Documentation/hwmon/sysfs-interface under Temperatures). | 38 | Documentation/hwmon/sysfs-interface under Temperatures). |
31 | 39 | ||
32 | The TMP411 chip is compatible with TMP401. It provides some additional | 40 | The TMP411 and TMP431 chips are compatible with TMP401. TMP411 provides |
33 | features. | 41 | some additional features. |
34 | 42 | ||
35 | * Minimum and Maximum temperature measured since power-on, chip-reset | 43 | * Minimum and Maximum temperature measured since power-on, chip-reset |
36 | 44 | ||
@@ -40,3 +48,6 @@ features. | |||
40 | 48 | ||
41 | Exported via sysfs attribute temp_reset_history. Writing 1 to this | 49 | Exported via sysfs attribute temp_reset_history. Writing 1 to this |
42 | file triggers a reset. | 50 | file triggers a reset. |
51 | |||
52 | TMP432 is compatible with TMP401 and TMP431. It supports two external | ||
53 | temperature sensors. | ||
diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100 index 756b57c6b73e..33908a4d68ff 100644 --- a/Documentation/hwmon/zl6100 +++ b/Documentation/hwmon/zl6100 | |||
@@ -125,7 +125,7 @@ in2_label "vmon" | |||
125 | in2_input Measured voltage on VMON (ZL2004) or VDRV (ZL9101M, | 125 | in2_input Measured voltage on VMON (ZL2004) or VDRV (ZL9101M, |
126 | ZL9117M) pin. Reported voltage is 16x the voltage on the | 126 | ZL9117M) pin. Reported voltage is 16x the voltage on the |
127 | pin (adjusted internally by the chip). | 127 | pin (adjusted internally by the chip). |
128 | in2_lcrit Critical minumum VMON/VDRV Voltage. | 128 | in2_lcrit Critical minimum VMON/VDRV Voltage. |
129 | in2_crit Critical maximum VMON/VDRV voltage. | 129 | in2_crit Critical maximum VMON/VDRV voltage. |
130 | in2_lcrit_alarm VMON/VDRV voltage critical low alarm. | 130 | in2_lcrit_alarm VMON/VDRV voltage critical low alarm. |
131 | in2_crit_alarm VMON/VDRV voltage critical high alarm. | 131 | in2_crit_alarm VMON/VDRV voltage critical high alarm. |
diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c index 30fe4bb9a069..0d6018c316c7 100644 --- a/Documentation/i2c/busses/i2c-diolan-u2c +++ b/Documentation/i2c/busses/i2c-diolan-u2c | |||
@@ -5,7 +5,7 @@ Supported adapters: | |||
5 | Documentation: | 5 | Documentation: |
6 | http://www.diolan.com/i2c/u2c12.html | 6 | http://www.diolan.com/i2c/u2c12.html |
7 | 7 | ||
8 | Author: Guenter Roeck <guenter.roeck@ericsson.com> | 8 | Author: Guenter Roeck <linux@roeck-us.net> |
9 | 9 | ||
10 | Description | 10 | Description |
11 | ----------- | 11 | ----------- |
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt index 223e4f0582d0..9f651c181429 100644 --- a/Documentation/ia64/err_inject.txt +++ b/Documentation/ia64/err_inject.txt | |||
@@ -882,7 +882,7 @@ int err_inj() | |||
882 | cpu=parameters[i].cpu; | 882 | cpu=parameters[i].cpu; |
883 | k = cpu%64; | 883 | k = cpu%64; |
884 | j = cpu/64; | 884 | j = cpu/64; |
885 | mask[j]=1<<k; | 885 | mask[j] = 1UL << k; |
886 | 886 | ||
887 | if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) { | 887 | if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) { |
888 | perror("Error sched_setaffinity:"); | 888 | perror("Error sched_setaffinity:"); |
diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt index 3262b6e4d686..e544c7ff8cfa 100644 --- a/Documentation/input/alps.txt +++ b/Documentation/input/alps.txt | |||
@@ -3,10 +3,26 @@ ALPS Touchpad Protocol | |||
3 | 3 | ||
4 | Introduction | 4 | Introduction |
5 | ------------ | 5 | ------------ |
6 | 6 | Currently the ALPS touchpad driver supports five protocol versions in use by | |
7 | Currently the ALPS touchpad driver supports four protocol versions in use by | 7 | ALPS touchpads, called versions 1, 2, 3, 4 and 5. |
8 | ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various | 8 | |
9 | protocol versions is contained in the following sections. | 9 | Since roughly mid-2010 several new ALPS touchpads have been released and |
10 | integrated into a variety of laptops and netbooks. These new touchpads | ||
11 | have enough behavior differences that the alps_model_data definition | ||
12 | table, describing the properties of the different versions, is no longer | ||
13 | adequate. The design choices were to re-define the alps_model_data | ||
14 | table, with the risk of regression testing existing devices, or isolate | ||
15 | the new devices outside of the alps_model_data table. The latter design | ||
16 | choice was made. The new touchpad signatures are named: "Rushmore", | ||
17 | "Pinnacle", and "Dolphin", which you will see in the alps.c code. | ||
18 | For the purposes of this document, this group of ALPS touchpads will | ||
19 | generically be called "new ALPS touchpads". | ||
20 | |||
21 | We experimented with probing the ACPI interface _HID (Hardware ID)/_CID | ||
22 | (Compatibility ID) definition as a way to uniquely identify the | ||
23 | different ALPS variants but there did not appear to be a 1:1 mapping. | ||
24 | In fact, it appeared to be an m:n mapping between the _HID and actual | ||
25 | hardware type. | ||
10 | 26 | ||
11 | Detection | 27 | Detection |
12 | --------- | 28 | --------- |
@@ -20,9 +36,13 @@ If the E6 report is successful, the touchpad model is identified using the "E7 | |||
20 | report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is | 36 | report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is |
21 | matched against known models in the alps_model_data_array. | 37 | matched against known models in the alps_model_data_array. |
22 | 38 | ||
23 | With protocol versions 3 and 4, the E7 report model signature is always | 39 | For older touchpads supporting protocol versions 3 and 4, the E7 report |
24 | 73-02-64. To differentiate between these versions, the response from the | 40 | model signature is always 73-02-64. To differentiate between these |
25 | "Enter Command Mode" sequence must be inspected as described below. | 41 | versions, the response from the "Enter Command Mode" sequence must be |
42 | inspected as described below. | ||
43 | |||
44 | The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but | ||
45 | seem to be better differentiated by the EC Command Mode response. | ||
26 | 46 | ||
27 | Command Mode | 47 | Command Mode |
28 | ------------ | 48 | ------------ |
@@ -47,6 +67,14 @@ address of the register being read, and the third contains the value of the | |||
47 | register. Registers are written by writing the value one nibble at a time | 67 | register. Registers are written by writing the value one nibble at a time |
48 | using the same encoding used for addresses. | 68 | using the same encoding used for addresses. |
49 | 69 | ||
70 | For the new ALPS touchpads, the EC command is used to enter command | ||
71 | mode. The response in the new ALPS touchpads is significantly different, | ||
72 | and more important in determining the behavior. This code has been | ||
73 | separated from the original alps_model_data table and put in the | ||
74 | alps_identify function. For example, there seem to be two hardware init | ||
75 | sequences for the "Dolphin" touchpads as determined by the second byte | ||
76 | of the EC response. | ||
77 | |||
50 | Packet Format | 78 | Packet Format |
51 | ------------- | 79 | ------------- |
52 | 80 | ||
@@ -187,3 +215,28 @@ There are several things worth noting here. | |||
187 | well. | 215 | well. |
188 | 216 | ||
189 | So far no v4 devices with tracksticks have been encountered. | 217 | So far no v4 devices with tracksticks have been encountered. |
218 | |||
219 | ALPS Absolute Mode - Protocol Version 5 | ||
220 | --------------------------------------- | ||
221 | This is basically Protocol Version 3 but with different logic for packet | ||
222 | decode. It uses the same alps_process_touchpad_packet_v3 call with a | ||
223 | specialized decode_fields function pointer to correctly interpret the | ||
224 | packets. This appears to only be used by the Dolphin devices. | ||
225 | |||
226 | For single-touch, the 6-byte packet format is: | ||
227 | |||
228 | byte 0: 1 1 0 0 1 0 0 0 | ||
229 | byte 1: 0 x6 x5 x4 x3 x2 x1 x0 | ||
230 | byte 2: 0 y6 y5 y4 y3 y2 y1 y0 | ||
231 | byte 3: 0 M R L 1 m r l | ||
232 | byte 4: y10 y9 y8 y7 x10 x9 x8 x7 | ||
233 | byte 5: 0 z6 z5 z4 z3 z2 z1 z0 | ||
234 | |||
235 | For mt, the format is: | ||
236 | |||
237 | byte 0: 1 1 1 n3 1 n2 n1 x24 | ||
238 | byte 1: 1 y7 y6 y5 y4 y3 y2 y1 | ||
239 | byte 2: ? x2 x1 y12 y11 y10 y9 y8 | ||
240 | byte 3: 0 x23 x22 x21 x20 x19 x18 x17 | ||
241 | byte 4: 0 x9 x8 x7 x6 x5 x4 x3 | ||
242 | byte 5: 0 x16 x15 x14 x13 x12 x11 x10 | ||
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 3210540f8bd3..237acab169dd 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt | |||
@@ -131,6 +131,7 @@ Code Seq#(hex) Include File Comments | |||
131 | 'H' 40-4F sound/hdspm.h conflict! | 131 | 'H' 40-4F sound/hdspm.h conflict! |
132 | 'H' 40-4F sound/hdsp.h conflict! | 132 | 'H' 40-4F sound/hdsp.h conflict! |
133 | 'H' 90 sound/usb/usx2y/usb_stream.h | 133 | 'H' 90 sound/usb/usx2y/usb_stream.h |
134 | 'H' A0 uapi/linux/usb/cdc-wdm.h | ||
134 | 'H' C0-F0 net/bluetooth/hci.h conflict! | 135 | 'H' C0-F0 net/bluetooth/hci.h conflict! |
135 | 'H' C0-DF net/bluetooth/hidp/hidp.h conflict! | 136 | 'H' C0-DF net/bluetooth/hidp/hidp.h conflict! |
136 | 'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict! | 137 | 'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict! |
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 13f1aa09b938..9c7fd988e299 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt | |||
@@ -297,6 +297,7 @@ Boot into System Kernel | |||
297 | On ia64, 256M@256M is a generous value that typically works. | 297 | On ia64, 256M@256M is a generous value that typically works. |
298 | The region may be automatically placed on ia64, see the | 298 | The region may be automatically placed on ia64, see the |
299 | dump-capture kernel config option notes above. | 299 | dump-capture kernel config option notes above. |
300 | If use sparse memory, the size should be rounded to GRANULE boundaries. | ||
300 | 301 | ||
301 | On s390x, typically use "crashkernel=xxM". The value of xx is dependent | 302 | On s390x, typically use "crashkernel=xxM". The value of xx is dependent |
302 | on the memory consumption of the kdump system. In general this is not | 303 | on the memory consumption of the kdump system. In general this is not |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b5cfd047becb..b801a332fb6f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -44,6 +44,7 @@ parameter is applicable: | |||
44 | AVR32 AVR32 architecture is enabled. | 44 | AVR32 AVR32 architecture is enabled. |
45 | AX25 Appropriate AX.25 support is enabled. | 45 | AX25 Appropriate AX.25 support is enabled. |
46 | BLACKFIN Blackfin architecture is enabled. | 46 | BLACKFIN Blackfin architecture is enabled. |
47 | CLK Common clock infrastructure is enabled. | ||
47 | DRM Direct Rendering Management support is enabled. | 48 | DRM Direct Rendering Management support is enabled. |
48 | DYNAMIC_DEBUG Build in debug messages and enable them at runtime | 49 | DYNAMIC_DEBUG Build in debug messages and enable them at runtime |
49 | EDD BIOS Enhanced Disk Drive Services (EDD) is enabled | 50 | EDD BIOS Enhanced Disk Drive Services (EDD) is enabled |
@@ -320,6 +321,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
320 | on: enable for both 32- and 64-bit processes | 321 | on: enable for both 32- and 64-bit processes |
321 | off: disable for both 32- and 64-bit processes | 322 | off: disable for both 32- and 64-bit processes |
322 | 323 | ||
324 | alloc_snapshot [FTRACE] | ||
325 | Allocate the ftrace snapshot buffer on boot up when the | ||
326 | main buffer is allocated. This is handy if debugging | ||
327 | and you need to use tracing_snapshot() on boot up, and | ||
328 | do not want to use tracing_snapshot_alloc() as it needs | ||
329 | to be done where GFP_KERNEL allocations are allowed. | ||
330 | |||
323 | amd_iommu= [HW,X86-64] | 331 | amd_iommu= [HW,X86-64] |
324 | Pass parameters to the AMD IOMMU driver in the system. | 332 | Pass parameters to the AMD IOMMU driver in the system. |
325 | Possible values are: | 333 | Possible values are: |
@@ -465,6 +473,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
465 | 473 | ||
466 | cio_ignore= [S390] | 474 | cio_ignore= [S390] |
467 | See Documentation/s390/CommonIO for details. | 475 | See Documentation/s390/CommonIO for details. |
476 | clk_ignore_unused | ||
477 | [CLK] | ||
478 | Keep all clocks already enabled by bootloader on, | ||
479 | even if no driver has claimed them. This is useful | ||
480 | for debug and development, but should not be | ||
481 | needed on a platform with proper driver support. | ||
482 | For more information, see Documentation/clk.txt. | ||
468 | 483 | ||
469 | clock= [BUGS=X86-32, HW] gettimeofday clocksource override. | 484 | clock= [BUGS=X86-32, HW] gettimeofday clocksource override. |
470 | [Deprecated] | 485 | [Deprecated] |
@@ -596,9 +611,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
596 | is selected automatically. Check | 611 | is selected automatically. Check |
597 | Documentation/kdump/kdump.txt for further details. | 612 | Documentation/kdump/kdump.txt for further details. |
598 | 613 | ||
599 | crashkernel_low=size[KMG] | ||
600 | [KNL, x86] parts under 4G. | ||
601 | |||
602 | crashkernel=range1:size1[,range2:size2,...][@offset] | 614 | crashkernel=range1:size1[,range2:size2,...][@offset] |
603 | [KNL] Same as above, but depends on the memory | 615 | [KNL] Same as above, but depends on the memory |
604 | in the running system. The syntax of range is | 616 | in the running system. The syntax of range is |
@@ -606,6 +618,26 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
606 | a memory unit (amount[KMG]). See also | 618 | a memory unit (amount[KMG]). See also |
607 | Documentation/kdump/kdump.txt for an example. | 619 | Documentation/kdump/kdump.txt for an example. |
608 | 620 | ||
621 | crashkernel=size[KMG],high | ||
622 | [KNL, x86_64] range could be above 4G. Allow kernel | ||
623 | to allocate physical memory region from top, so could | ||
624 | be above 4G if system have more than 4G ram installed. | ||
625 | Otherwise memory region will be allocated below 4G, if | ||
626 | available. | ||
627 | It will be ignored if crashkernel=X is specified. | ||
628 | crashkernel=size[KMG],low | ||
629 | [KNL, x86_64] range under 4G. When crashkernel=X,high | ||
630 | is passed, kernel could allocate physical memory region | ||
631 | above 4G, that cause second kernel crash on system | ||
632 | that require some amount of low memory, e.g. swiotlb | ||
633 | requires at least 64M+32K low memory. Kernel would | ||
634 | try to allocate 72M below 4G automatically. | ||
635 | This one let user to specify own low range under 4G | ||
636 | for second kernel instead. | ||
637 | 0: to disable low allocation. | ||
638 | It will be ignored when crashkernel=X,high is not used | ||
639 | or memory reserved is below 4G. | ||
640 | |||
609 | cs89x0_dma= [HW,NET] | 641 | cs89x0_dma= [HW,NET] |
610 | Format: <dma> | 642 | Format: <dma> |
611 | 643 | ||
@@ -800,6 +832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
800 | edd= [EDD] | 832 | edd= [EDD] |
801 | Format: {"off" | "on" | "skip[mbr]"} | 833 | Format: {"off" | "on" | "skip[mbr]"} |
802 | 834 | ||
835 | efi_no_storage_paranoia [EFI; X86] | ||
836 | Using this parameter you can use more than 50% of | ||
837 | your efi variable storage. Use this parameter only if | ||
838 | you are really sure that your UEFI does sane gc and | ||
839 | fulfills the spec otherwise your board may brick. | ||
840 | |||
803 | eisa_irq_edge= [PARISC,HW] | 841 | eisa_irq_edge= [PARISC,HW] |
804 | See header of drivers/parisc/eisa.c. | 842 | See header of drivers/parisc/eisa.c. |
805 | 843 | ||
@@ -2473,9 +2511,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2473 | In kernels built with CONFIG_RCU_NOCB_CPU=y, set | 2511 | In kernels built with CONFIG_RCU_NOCB_CPU=y, set |
2474 | the specified list of CPUs to be no-callback CPUs. | 2512 | the specified list of CPUs to be no-callback CPUs. |
2475 | Invocation of these CPUs' RCU callbacks will | 2513 | Invocation of these CPUs' RCU callbacks will |
2476 | be offloaded to "rcuoN" kthreads created for | 2514 | be offloaded to "rcuox/N" kthreads created for |
2477 | that purpose. This reduces OS jitter on the | 2515 | that purpose, where "x" is "b" for RCU-bh, "p" |
2516 | for RCU-preempt, and "s" for RCU-sched, and "N" | ||
2517 | is the CPU number. This reduces OS jitter on the | ||
2478 | offloaded CPUs, which can be useful for HPC and | 2518 | offloaded CPUs, which can be useful for HPC and |
2519 | |||
2479 | real-time workloads. It can also improve energy | 2520 | real-time workloads. It can also improve energy |
2480 | efficiency for asymmetric multiprocessors. | 2521 | efficiency for asymmetric multiprocessors. |
2481 | 2522 | ||
@@ -2499,6 +2540,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2499 | leaf rcu_node structure. Useful for very large | 2540 | leaf rcu_node structure. Useful for very large |
2500 | systems. | 2541 | systems. |
2501 | 2542 | ||
2543 | rcutree.jiffies_till_first_fqs= [KNL,BOOT] | ||
2544 | Set delay from grace-period initialization to | ||
2545 | first attempt to force quiescent states. | ||
2546 | Units are jiffies, minimum value is zero, | ||
2547 | and maximum value is HZ. | ||
2548 | |||
2549 | rcutree.jiffies_till_next_fqs= [KNL,BOOT] | ||
2550 | Set delay between subsequent attempts to force | ||
2551 | quiescent states. Units are jiffies, minimum | ||
2552 | value is one, and maximum value is HZ. | ||
2553 | |||
2502 | rcutree.qhimark= [KNL,BOOT] | 2554 | rcutree.qhimark= [KNL,BOOT] |
2503 | Set threshold of queued | 2555 | Set threshold of queued |
2504 | RCU callbacks over which batch limiting is disabled. | 2556 | RCU callbacks over which batch limiting is disabled. |
@@ -2513,16 +2565,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2513 | rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] | 2565 | rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] |
2514 | Set timeout for RCU CPU stall warning messages. | 2566 | Set timeout for RCU CPU stall warning messages. |
2515 | 2567 | ||
2516 | rcutree.jiffies_till_first_fqs= [KNL,BOOT] | 2568 | rcutree.rcu_idle_gp_delay= [KNL,BOOT] |
2517 | Set delay from grace-period initialization to | 2569 | Set wakeup interval for idle CPUs that have |
2518 | first attempt to force quiescent states. | 2570 | RCU callbacks (RCU_FAST_NO_HZ=y). |
2519 | Units are jiffies, minimum value is zero, | ||
2520 | and maximum value is HZ. | ||
2521 | 2571 | ||
2522 | rcutree.jiffies_till_next_fqs= [KNL,BOOT] | 2572 | rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] |
2523 | Set delay between subsequent attempts to force | 2573 | Set wakeup interval for idle CPUs that have |
2524 | quiescent states. Units are jiffies, minimum | 2574 | only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). |
2525 | value is one, and maximum value is HZ. | 2575 | Lazy RCU callbacks are those which RCU can |
2576 | prove do nothing more than free memory. | ||
2526 | 2577 | ||
2527 | rcutorture.fqs_duration= [KNL,BOOT] | 2578 | rcutorture.fqs_duration= [KNL,BOOT] |
2528 | Set duration of force_quiescent_state bursts. | 2579 | Set duration of force_quiescent_state bursts. |
@@ -3234,6 +3285,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
3234 | or other driver-specific files in the | 3285 | or other driver-specific files in the |
3235 | Documentation/watchdog/ directory. | 3286 | Documentation/watchdog/ directory. |
3236 | 3287 | ||
3288 | workqueue.disable_numa | ||
3289 | By default, all work items queued to unbound | ||
3290 | workqueues are affine to the NUMA nodes they're | ||
3291 | issued on, which results in better behavior in | ||
3292 | general. If NUMA affinity needs to be disabled for | ||
3293 | whatever reason, this option can be used. Note | ||
3294 | that this also can be controlled per-workqueue for | ||
3295 | workqueues visible under /sys/bus/workqueue/. | ||
3296 | |||
3237 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of | 3297 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of |
3238 | default x2apic cluster mode on platforms | 3298 | default x2apic cluster mode on platforms |
3239 | supporting x2apic. | 3299 | supporting x2apic. |
diff --git a/Documentation/misc-devices/mei/mei-client-bus.txt b/Documentation/misc-devices/mei/mei-client-bus.txt new file mode 100644 index 000000000000..f83910a8ce76 --- /dev/null +++ b/Documentation/misc-devices/mei/mei-client-bus.txt | |||
@@ -0,0 +1,138 @@ | |||
1 | Intel(R) Management Engine (ME) Client bus API | ||
2 | =============================================== | ||
3 | |||
4 | |||
5 | Rationale | ||
6 | ========= | ||
7 | MEI misc character device is useful for dedicated applications to send and receive | ||
8 | data to the many FW appliance found in Intel's ME from the user space. | ||
9 | However for some of the ME functionalities it make sense to leverage existing software | ||
10 | stack and expose them through existing kernel subsystems. | ||
11 | |||
12 | In order to plug seamlessly into the kernel device driver model we add kernel virtual | ||
13 | bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers | ||
14 | for the various MEI features as a stand alone entities found in their respective subsystem. | ||
15 | Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to | ||
16 | the existing code. | ||
17 | |||
18 | |||
19 | MEI CL bus API | ||
20 | =========== | ||
21 | A driver implementation for an MEI Client is very similar to existing bus | ||
22 | based device drivers. The driver registers itself as an MEI CL bus driver through | ||
23 | the mei_cl_driver structure: | ||
24 | |||
25 | struct mei_cl_driver { | ||
26 | struct device_driver driver; | ||
27 | const char *name; | ||
28 | |||
29 | const struct mei_cl_device_id *id_table; | ||
30 | |||
31 | int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id); | ||
32 | int (*remove)(struct mei_cl_device *dev); | ||
33 | }; | ||
34 | |||
35 | struct mei_cl_id { | ||
36 | char name[MEI_NAME_SIZE]; | ||
37 | kernel_ulong_t driver_info; | ||
38 | }; | ||
39 | |||
40 | The mei_cl_id structure allows the driver to bind itself against a device name. | ||
41 | |||
42 | To actually register a driver on the ME Client bus one must call the mei_cl_add_driver() | ||
43 | API. This is typically called at module init time. | ||
44 | |||
45 | Once registered on the ME Client bus, a driver will typically try to do some I/O on | ||
46 | this bus and this should be done through the mei_cl_send() and mei_cl_recv() | ||
47 | routines. The latter is synchronous (blocks and sleeps until data shows up). | ||
48 | In order for drivers to be notified of pending events waiting for them (e.g. | ||
49 | an Rx event) they can register an event handler through the | ||
50 | mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event | ||
51 | will trigger an event handler call and the driver implementation is supposed | ||
52 | to call mei_recv() from the event handler in order to fetch the pending | ||
53 | received buffers. | ||
54 | |||
55 | |||
56 | Example | ||
57 | ======= | ||
58 | As a theoretical example let's pretend the ME comes with a "contact" NFC IP. | ||
59 | The driver init and exit routines for this device would look like: | ||
60 | |||
61 | #define CONTACT_DRIVER_NAME "contact" | ||
62 | |||
63 | static struct mei_cl_device_id contact_mei_cl_tbl[] = { | ||
64 | { CONTACT_DRIVER_NAME, }, | ||
65 | |||
66 | /* required last entry */ | ||
67 | { } | ||
68 | }; | ||
69 | MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl); | ||
70 | |||
71 | static struct mei_cl_driver contact_driver = { | ||
72 | .id_table = contact_mei_tbl, | ||
73 | .name = CONTACT_DRIVER_NAME, | ||
74 | |||
75 | .probe = contact_probe, | ||
76 | .remove = contact_remove, | ||
77 | }; | ||
78 | |||
79 | static int contact_init(void) | ||
80 | { | ||
81 | int r; | ||
82 | |||
83 | r = mei_cl_driver_register(&contact_driver); | ||
84 | if (r) { | ||
85 | pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n"); | ||
86 | return r; | ||
87 | } | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static void __exit contact_exit(void) | ||
93 | { | ||
94 | mei_cl_driver_unregister(&contact_driver); | ||
95 | } | ||
96 | |||
97 | module_init(contact_init); | ||
98 | module_exit(contact_exit); | ||
99 | |||
100 | And the driver's simplified probe routine would look like that: | ||
101 | |||
102 | int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id) | ||
103 | { | ||
104 | struct contact_driver *contact; | ||
105 | |||
106 | [...] | ||
107 | mei_cl_enable_device(dev); | ||
108 | |||
109 | mei_cl_register_event_cb(dev, contact_event_cb, contact); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | In the probe routine the driver first enable the MEI device and then registers | ||
115 | an ME bus event handler which is as close as it can get to registering a | ||
116 | threaded IRQ handler. | ||
117 | The handler implementation will typically call some I/O routine depending on | ||
118 | the pending events: | ||
119 | |||
120 | #define MAX_NFC_PAYLOAD 128 | ||
121 | |||
122 | static void contact_event_cb(struct mei_cl_device *dev, u32 events, | ||
123 | void *context) | ||
124 | { | ||
125 | struct contact_driver *contact = context; | ||
126 | |||
127 | if (events & BIT(MEI_EVENT_RX)) { | ||
128 | u8 payload[MAX_NFC_PAYLOAD]; | ||
129 | int payload_size; | ||
130 | |||
131 | payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD); | ||
132 | if (payload_size <= 0) | ||
133 | return; | ||
134 | |||
135 | /* Hook to the NFC subsystem */ | ||
136 | nfc_hci_recv_frame(contact->hdev, payload, payload_size); | ||
137 | } | ||
138 | } | ||
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index f2a2488f1bf3..9573d0c48c6e 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt | |||
@@ -15,6 +15,13 @@ amemthresh - INTEGER | |||
15 | enabled and the variable is automatically set to 2, otherwise | 15 | enabled and the variable is automatically set to 2, otherwise |
16 | the strategy is disabled and the variable is set to 1. | 16 | the strategy is disabled and the variable is set to 1. |
17 | 17 | ||
18 | backup_only - BOOLEAN | ||
19 | 0 - disabled (default) | ||
20 | not 0 - enabled | ||
21 | |||
22 | If set, disable the director function while the server is | ||
23 | in backup mode to avoid packet loops for DR/TUN methods. | ||
24 | |||
18 | conntrack - BOOLEAN | 25 | conntrack - BOOLEAN |
19 | 0 - disabled (default) | 26 | 0 - disabled (default) |
20 | not 0 - enabled | 27 | not 0 - enabled |
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt index c0aab985bad9..949d5dcdd9a3 100644 --- a/Documentation/networking/tuntap.txt +++ b/Documentation/networking/tuntap.txt | |||
@@ -105,6 +105,83 @@ Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com> | |||
105 | Proto [2 bytes] | 105 | Proto [2 bytes] |
106 | Raw protocol(IP, IPv6, etc) frame. | 106 | Raw protocol(IP, IPv6, etc) frame. |
107 | 107 | ||
108 | 3.3 Multiqueue tuntap interface: | ||
109 | |||
110 | From version 3.8, Linux supports multiqueue tuntap which can uses multiple | ||
111 | file descriptors (queues) to parallelize packets sending or receiving. The | ||
112 | device allocation is the same as before, and if user wants to create multiple | ||
113 | queues, TUNSETIFF with the same device name must be called many times with | ||
114 | IFF_MULTI_QUEUE flag. | ||
115 | |||
116 | char *dev should be the name of the device, queues is the number of queues to | ||
117 | be created, fds is used to store and return the file descriptors (queues) | ||
118 | created to the caller. Each file descriptor were served as the interface of a | ||
119 | queue which could be accessed by userspace. | ||
120 | |||
121 | #include <linux/if.h> | ||
122 | #include <linux/if_tun.h> | ||
123 | |||
124 | int tun_alloc_mq(char *dev, int queues, int *fds) | ||
125 | { | ||
126 | struct ifreq ifr; | ||
127 | int fd, err, i; | ||
128 | |||
129 | if (!dev) | ||
130 | return -1; | ||
131 | |||
132 | memset(&ifr, 0, sizeof(ifr)); | ||
133 | /* Flags: IFF_TUN - TUN device (no Ethernet headers) | ||
134 | * IFF_TAP - TAP device | ||
135 | * | ||
136 | * IFF_NO_PI - Do not provide packet information | ||
137 | * IFF_MULTI_QUEUE - Create a queue of multiqueue device | ||
138 | */ | ||
139 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE; | ||
140 | strcpy(ifr.ifr_name, dev); | ||
141 | |||
142 | for (i = 0; i < queues; i++) { | ||
143 | if ((fd = open("/dev/net/tun", O_RDWR)) < 0) | ||
144 | goto err; | ||
145 | err = ioctl(fd, TUNSETIFF, (void *)&ifr); | ||
146 | if (err) { | ||
147 | close(fd); | ||
148 | goto err; | ||
149 | } | ||
150 | fds[i] = fd; | ||
151 | } | ||
152 | |||
153 | return 0; | ||
154 | err: | ||
155 | for (--i; i >= 0; i--) | ||
156 | close(fds[i]); | ||
157 | return err; | ||
158 | } | ||
159 | |||
160 | A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When | ||
161 | calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when | ||
162 | calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were | ||
163 | enabled by default after it was created through TUNSETIFF. | ||
164 | |||
165 | fd is the file descriptor (queue) that we want to enable or disable, when | ||
166 | enable is true we enable it, otherwise we disable it | ||
167 | |||
168 | #include <linux/if.h> | ||
169 | #include <linux/if_tun.h> | ||
170 | |||
171 | int tun_set_queue(int fd, int enable) | ||
172 | { | ||
173 | struct ifreq ifr; | ||
174 | |||
175 | memset(&ifr, 0, sizeof(ifr)); | ||
176 | |||
177 | if (enable) | ||
178 | ifr.ifr_flags = IFF_ATTACH_QUEUE; | ||
179 | else | ||
180 | ifr.ifr_flags = IFF_DETACH_QUEUE; | ||
181 | |||
182 | return ioctl(fd, TUNSETQUEUE, (void *)&ifr); | ||
183 | } | ||
184 | |||
108 | Universal TUN/TAP device driver Frequently Asked Question. | 185 | Universal TUN/TAP device driver Frequently Asked Question. |
109 | 186 | ||
110 | 1. What platforms are supported by TUN/TAP driver ? | 187 | 1. What platforms are supported by TUN/TAP driver ? |
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index a2b57e0a1db0..447fd4cd54ec 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt | |||
@@ -736,6 +736,13 @@ All the above functions are mandatory to implement for a pinmux driver. | |||
736 | Pin control interaction with the GPIO subsystem | 736 | Pin control interaction with the GPIO subsystem |
737 | =============================================== | 737 | =============================================== |
738 | 738 | ||
739 | Note that the following implies that the use case is to use a certain pin | ||
740 | from the Linux kernel using the API in <linux/gpio.h> with gpio_request() | ||
741 | and similar functions. There are cases where you may be using something | ||
742 | that your datasheet calls "GPIO mode" but actually is just an electrical | ||
743 | configuration for a certain device. See the section below named | ||
744 | "GPIO mode pitfalls" for more details on this scenario. | ||
745 | |||
739 | The public pinmux API contains two functions named pinctrl_request_gpio() | 746 | The public pinmux API contains two functions named pinctrl_request_gpio() |
740 | and pinctrl_free_gpio(). These two functions shall *ONLY* be called from | 747 | and pinctrl_free_gpio(). These two functions shall *ONLY* be called from |
741 | gpiolib-based drivers as part of their gpio_request() and | 748 | gpiolib-based drivers as part of their gpio_request() and |
@@ -774,6 +781,111 @@ obtain the function "gpioN" where "N" is the global GPIO pin number if no | |||
774 | special GPIO-handler is registered. | 781 | special GPIO-handler is registered. |
775 | 782 | ||
776 | 783 | ||
784 | GPIO mode pitfalls | ||
785 | ================== | ||
786 | |||
787 | Sometime the developer may be confused by a datasheet talking about a pin | ||
788 | being possible to set into "GPIO mode". It appears that what hardware | ||
789 | engineers mean with "GPIO mode" is not necessarily the use case that is | ||
790 | implied in the kernel interface <linux/gpio.h>: a pin that you grab from | ||
791 | kernel code and then either listen for input or drive high/low to | ||
792 | assert/deassert some external line. | ||
793 | |||
794 | Rather hardware engineers think that "GPIO mode" means that you can | ||
795 | software-control a few electrical properties of the pin that you would | ||
796 | not be able to control if the pin was in some other mode, such as muxed in | ||
797 | for a device. | ||
798 | |||
799 | Example: a pin is usually muxed in to be used as a UART TX line. But during | ||
800 | system sleep, we need to put this pin into "GPIO mode" and ground it. | ||
801 | |||
802 | If you make a 1-to-1 map to the GPIO subsystem for this pin, you may start | ||
803 | to think that you need to come up with something real complex, that the | ||
804 | pin shall be used for UART TX and GPIO at the same time, that you will grab | ||
805 | a pin control handle and set it to a certain state to enable UART TX to be | ||
806 | muxed in, then twist it over to GPIO mode and use gpio_direction_output() | ||
807 | to drive it low during sleep, then mux it over to UART TX again when you | ||
808 | wake up and maybe even gpio_request/gpio_free as part of this cycle. This | ||
809 | all gets very complicated. | ||
810 | |||
811 | The solution is to not think that what the datasheet calls "GPIO mode" | ||
812 | has to be handled by the <linux/gpio.h> interface. Instead view this as | ||
813 | a certain pin config setting. Look in e.g. <linux/pinctrl/pinconf-generic.h> | ||
814 | and you find this in the documentation: | ||
815 | |||
816 | PIN_CONFIG_OUTPUT: this will configure the pin in output, use argument | ||
817 | 1 to indicate high level, argument 0 to indicate low level. | ||
818 | |||
819 | So it is perfectly possible to push a pin into "GPIO mode" and drive the | ||
820 | line low as part of the usual pin control map. So for example your UART | ||
821 | driver may look like this: | ||
822 | |||
823 | #include <linux/pinctrl/consumer.h> | ||
824 | |||
825 | struct pinctrl *pinctrl; | ||
826 | struct pinctrl_state *pins_default; | ||
827 | struct pinctrl_state *pins_sleep; | ||
828 | |||
829 | pins_default = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_DEFAULT); | ||
830 | pins_sleep = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_SLEEP); | ||
831 | |||
832 | /* Normal mode */ | ||
833 | retval = pinctrl_select_state(pinctrl, pins_default); | ||
834 | /* Sleep mode */ | ||
835 | retval = pinctrl_select_state(pinctrl, pins_sleep); | ||
836 | |||
837 | And your machine configuration may look like this: | ||
838 | -------------------------------------------------- | ||
839 | |||
840 | static unsigned long uart_default_mode[] = { | ||
841 | PIN_CONF_PACKED(PIN_CONFIG_DRIVE_PUSH_PULL, 0), | ||
842 | }; | ||
843 | |||
844 | static unsigned long uart_sleep_mode[] = { | ||
845 | PIN_CONF_PACKED(PIN_CONFIG_OUTPUT, 0), | ||
846 | }; | ||
847 | |||
848 | static struct pinctrl_map __initdata pinmap[] = { | ||
849 | PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo", | ||
850 | "u0_group", "u0"), | ||
851 | PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo", | ||
852 | "UART_TX_PIN", uart_default_mode), | ||
853 | PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo", | ||
854 | "u0_group", "gpio-mode"), | ||
855 | PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo", | ||
856 | "UART_TX_PIN", uart_sleep_mode), | ||
857 | }; | ||
858 | |||
859 | foo_init(void) { | ||
860 | pinctrl_register_mappings(pinmap, ARRAY_SIZE(pinmap)); | ||
861 | } | ||
862 | |||
863 | Here the pins we want to control are in the "u0_group" and there is some | ||
864 | function called "u0" that can be enabled on this group of pins, and then | ||
865 | everything is UART business as usual. But there is also some function | ||
866 | named "gpio-mode" that can be mapped onto the same pins to move them into | ||
867 | GPIO mode. | ||
868 | |||
869 | This will give the desired effect without any bogus interaction with the | ||
870 | GPIO subsystem. It is just an electrical configuration used by that device | ||
871 | when going to sleep, it might imply that the pin is set into something the | ||
872 | datasheet calls "GPIO mode" but that is not the point: it is still used | ||
873 | by that UART device to control the pins that pertain to that very UART | ||
874 | driver, putting them into modes needed by the UART. GPIO in the Linux | ||
875 | kernel sense are just some 1-bit line, and is a different use case. | ||
876 | |||
877 | How the registers are poked to attain the push/pull and output low | ||
878 | configuration and the muxing of the "u0" or "gpio-mode" group onto these | ||
879 | pins is a question for the driver. | ||
880 | |||
881 | Some datasheets will be more helpful and refer to the "GPIO mode" as | ||
882 | "low power mode" rather than anything to do with GPIO. This often means | ||
883 | the same thing electrically speaking, but in this latter case the | ||
884 | software engineers will usually quickly identify that this is some | ||
885 | specific muxing/configuration rather than anything related to the GPIO | ||
886 | API. | ||
887 | |||
888 | |||
777 | Board/machine configuration | 889 | Board/machine configuration |
778 | ================================== | 890 | ================================== |
779 | 891 | ||
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index ae66f9b90a25..fcaf0b4efba2 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt | |||
@@ -143,7 +143,8 @@ Parameter: id: handle for debug log | |||
143 | 143 | ||
144 | Return Value: none | 144 | Return Value: none |
145 | 145 | ||
146 | Description: frees memory for a debug log | 146 | Description: frees memory for a debug log and removes all registered debug |
147 | views. | ||
147 | Must not be called within an interrupt handler | 148 | Must not be called within an interrupt handler |
148 | 149 | ||
149 | --------------------------------------------------------------------------- | 150 | --------------------------------------------------------------------------- |
diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx index 27a91cf43d6d..5020b7b5a244 100644 --- a/Documentation/scsi/LICENSE.qla2xxx +++ b/Documentation/scsi/LICENSE.qla2xxx | |||
@@ -1,4 +1,4 @@ | |||
1 | Copyright (c) 2003-2012 QLogic Corporation | 1 | Copyright (c) 2003-2013 QLogic Corporation |
2 | QLogic Linux FC-FCoE Driver | 2 | QLogic Linux FC-FCoE Driver |
3 | 3 | ||
4 | This program includes a device driver for Linux 3.x. | 4 | This program includes a device driver for Linux 3.x. |
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index ce6581c8ca26..95731a08f257 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -890,9 +890,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
890 | enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) | 890 | enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) |
891 | power_save - Automatic power-saving timeout (in second, 0 = | 891 | power_save - Automatic power-saving timeout (in second, 0 = |
892 | disable) | 892 | disable) |
893 | power_save_controller - Support runtime D3 of HD-audio controller | 893 | power_save_controller - Reset HD-audio controller in power-saving mode |
894 | (-1 = on for supported chip (default), false = off, | 894 | (default = on) |
895 | true = force to on even for unsupported hardware) | ||
896 | align_buffer_size - Force rounding of buffer/period sizes to multiples | 895 | align_buffer_size - Force rounding of buffer/period sizes to multiples |
897 | of 128 bytes. This is more efficient in terms of memory | 896 | of 128 bytes. This is more efficient in terms of memory |
898 | access but isn't required by the HDA spec and prevents | 897 | access but isn't required by the HDA spec and prevents |
@@ -912,7 +911,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
912 | models depending on the codec chip. The list of available models | 911 | models depending on the codec chip. The list of available models |
913 | is found in HD-Audio-Models.txt | 912 | is found in HD-Audio-Models.txt |
914 | 913 | ||
915 | The model name "genric" is treated as a special case. When this | 914 | The model name "generic" is treated as a special case. When this |
916 | model is given, the driver uses the generic codec parser without | 915 | model is given, the driver uses the generic codec parser without |
917 | "codec-patch". It's sometimes good for testing and debugging. | 916 | "codec-patch". It's sometimes good for testing and debugging. |
918 | 917 | ||
diff --git a/Documentation/sound/alsa/seq_oss.html b/Documentation/sound/alsa/seq_oss.html index d9776cf60c07..9663b45f6fde 100644 --- a/Documentation/sound/alsa/seq_oss.html +++ b/Documentation/sound/alsa/seq_oss.html | |||
@@ -285,7 +285,7 @@ sample data. | |||
285 | <H4> | 285 | <H4> |
286 | 7.2.4 Close Callback</H4> | 286 | 7.2.4 Close Callback</H4> |
287 | The <TT>close</TT> callback is called when this device is closed by the | 287 | The <TT>close</TT> callback is called when this device is closed by the |
288 | applicaion. If any private data was allocated in open callback, it must | 288 | application. If any private data was allocated in open callback, it must |
289 | be released in the close callback. The deletion of ALSA port should be | 289 | be released in the close callback. The deletion of ALSA port should be |
290 | done here, too. This callback must not be NULL. | 290 | done here, too. This callback must not be NULL. |
291 | <H4> | 291 | <H4> |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 078701fdbd4d..dcc75a9ed919 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -18,6 +18,7 @@ files can be found in mm/swap.c. | |||
18 | 18 | ||
19 | Currently, these files are in /proc/sys/vm: | 19 | Currently, these files are in /proc/sys/vm: |
20 | 20 | ||
21 | - admin_reserve_kbytes | ||
21 | - block_dump | 22 | - block_dump |
22 | - compact_memory | 23 | - compact_memory |
23 | - dirty_background_bytes | 24 | - dirty_background_bytes |
@@ -53,11 +54,41 @@ Currently, these files are in /proc/sys/vm: | |||
53 | - percpu_pagelist_fraction | 54 | - percpu_pagelist_fraction |
54 | - stat_interval | 55 | - stat_interval |
55 | - swappiness | 56 | - swappiness |
57 | - user_reserve_kbytes | ||
56 | - vfs_cache_pressure | 58 | - vfs_cache_pressure |
57 | - zone_reclaim_mode | 59 | - zone_reclaim_mode |
58 | 60 | ||
59 | ============================================================== | 61 | ============================================================== |
60 | 62 | ||
63 | admin_reserve_kbytes | ||
64 | |||
65 | The amount of free memory in the system that should be reserved for users | ||
66 | with the capability cap_sys_admin. | ||
67 | |||
68 | admin_reserve_kbytes defaults to min(3% of free pages, 8MB) | ||
69 | |||
70 | That should provide enough for the admin to log in and kill a process, | ||
71 | if necessary, under the default overcommit 'guess' mode. | ||
72 | |||
73 | Systems running under overcommit 'never' should increase this to account | ||
74 | for the full Virtual Memory Size of programs used to recover. Otherwise, | ||
75 | root may not be able to log in to recover the system. | ||
76 | |||
77 | How do you calculate a minimum useful reserve? | ||
78 | |||
79 | sshd or login + bash (or some other shell) + top (or ps, kill, etc.) | ||
80 | |||
81 | For overcommit 'guess', we can sum resident set sizes (RSS). | ||
82 | On x86_64 this is about 8MB. | ||
83 | |||
84 | For overcommit 'never', we can take the max of their virtual sizes (VSZ) | ||
85 | and add the sum of their RSS. | ||
86 | On x86_64 this is about 128MB. | ||
87 | |||
88 | Changing this takes effect whenever an application requests memory. | ||
89 | |||
90 | ============================================================== | ||
91 | |||
61 | block_dump | 92 | block_dump |
62 | 93 | ||
63 | block_dump enables block I/O debugging when set to a nonzero value. More | 94 | block_dump enables block I/O debugging when set to a nonzero value. More |
@@ -542,6 +573,7 @@ memory until it actually runs out. | |||
542 | 573 | ||
543 | When this flag is 2, the kernel uses a "never overcommit" | 574 | When this flag is 2, the kernel uses a "never overcommit" |
544 | policy that attempts to prevent any overcommit of memory. | 575 | policy that attempts to prevent any overcommit of memory. |
576 | Note that user_reserve_kbytes affects this policy. | ||
545 | 577 | ||
546 | This feature can be very useful because there are a lot of | 578 | This feature can be very useful because there are a lot of |
547 | programs that malloc() huge amounts of memory "just-in-case" | 579 | programs that malloc() huge amounts of memory "just-in-case" |
@@ -645,6 +677,24 @@ The default value is 60. | |||
645 | 677 | ||
646 | ============================================================== | 678 | ============================================================== |
647 | 679 | ||
680 | - user_reserve_kbytes | ||
681 | |||
682 | When overcommit_memory is set to 2, "never overommit" mode, reserve | ||
683 | min(3% of current process size, user_reserve_kbytes) of free memory. | ||
684 | This is intended to prevent a user from starting a single memory hogging | ||
685 | process, such that they cannot recover (kill the hog). | ||
686 | |||
687 | user_reserve_kbytes defaults to min(3% of the current process size, 128MB). | ||
688 | |||
689 | If this is reduced to zero, then the user will be allowed to allocate | ||
690 | all free memory with a single process, minus admin_reserve_kbytes. | ||
691 | Any subsequent attempts to execute a command will result in | ||
692 | "fork: Cannot allocate memory". | ||
693 | |||
694 | Changing this takes effect whenever an application requests memory. | ||
695 | |||
696 | ============================================================== | ||
697 | |||
648 | vfs_cache_pressure | 698 | vfs_cache_pressure |
649 | ------------------ | 699 | ------------------ |
650 | 700 | ||
diff --git a/Documentation/this_cpu_ops.txt b/Documentation/this_cpu_ops.txt new file mode 100644 index 000000000000..1a4ce7e3e05f --- /dev/null +++ b/Documentation/this_cpu_ops.txt | |||
@@ -0,0 +1,205 @@ | |||
1 | this_cpu operations | ||
2 | ------------------- | ||
3 | |||
4 | this_cpu operations are a way of optimizing access to per cpu | ||
5 | variables associated with the *currently* executing processor through | ||
6 | the use of segment registers (or a dedicated register where the cpu | ||
7 | permanently stored the beginning of the per cpu area for a specific | ||
8 | processor). | ||
9 | |||
10 | The this_cpu operations add a per cpu variable offset to the processor | ||
11 | specific percpu base and encode that operation in the instruction | ||
12 | operating on the per cpu variable. | ||
13 | |||
14 | This means there are no atomicity issues between the calculation of | ||
15 | the offset and the operation on the data. Therefore it is not | ||
16 | necessary to disable preempt or interrupts to ensure that the | ||
17 | processor is not changed between the calculation of the address and | ||
18 | the operation on the data. | ||
19 | |||
20 | Read-modify-write operations are of particular interest. Frequently | ||
21 | processors have special lower latency instructions that can operate | ||
22 | without the typical synchronization overhead but still provide some | ||
23 | sort of relaxed atomicity guarantee. The x86 for example can execute | ||
24 | RMV (Read Modify Write) instructions like inc/dec/cmpxchg without the | ||
25 | lock prefix and the associated latency penalty. | ||
26 | |||
27 | Access to the variable without the lock prefix is not synchronized but | ||
28 | synchronization is not necessary since we are dealing with per cpu | ||
29 | data specific to the currently executing processor. Only the current | ||
30 | processor should be accessing that variable and therefore there are no | ||
31 | concurrency issues with other processors in the system. | ||
32 | |||
33 | On x86 the fs: or the gs: segment registers contain the base of the | ||
34 | per cpu area. It is then possible to simply use the segment override | ||
35 | to relocate a per cpu relative address to the proper per cpu area for | ||
36 | the processor. So the relocation to the per cpu base is encoded in the | ||
37 | instruction via a segment register prefix. | ||
38 | |||
39 | For example: | ||
40 | |||
41 | DEFINE_PER_CPU(int, x); | ||
42 | int z; | ||
43 | |||
44 | z = this_cpu_read(x); | ||
45 | |||
46 | results in a single instruction | ||
47 | |||
48 | mov ax, gs:[x] | ||
49 | |||
50 | instead of a sequence of calculation of the address and then a fetch | ||
51 | from that address which occurs with the percpu operations. Before | ||
52 | this_cpu_ops such sequence also required preempt disable/enable to | ||
53 | prevent the kernel from moving the thread to a different processor | ||
54 | while the calculation is performed. | ||
55 | |||
56 | The main use of the this_cpu operations has been to optimize counter | ||
57 | operations. | ||
58 | |||
59 | this_cpu_inc(x) | ||
60 | |||
61 | results in the following single instruction (no lock prefix!) | ||
62 | |||
63 | inc gs:[x] | ||
64 | |||
65 | instead of the following operations required if there is no segment | ||
66 | register. | ||
67 | |||
68 | int *y; | ||
69 | int cpu; | ||
70 | |||
71 | cpu = get_cpu(); | ||
72 | y = per_cpu_ptr(&x, cpu); | ||
73 | (*y)++; | ||
74 | put_cpu(); | ||
75 | |||
76 | Note that these operations can only be used on percpu data that is | ||
77 | reserved for a specific processor. Without disabling preemption in the | ||
78 | surrounding code this_cpu_inc() will only guarantee that one of the | ||
79 | percpu counters is correctly incremented. However, there is no | ||
80 | guarantee that the OS will not move the process directly before or | ||
81 | after the this_cpu instruction is executed. In general this means that | ||
82 | the value of the individual counters for each processor are | ||
83 | meaningless. The sum of all the per cpu counters is the only value | ||
84 | that is of interest. | ||
85 | |||
86 | Per cpu variables are used for performance reasons. Bouncing cache | ||
87 | lines can be avoided if multiple processors concurrently go through | ||
88 | the same code paths. Since each processor has its own per cpu | ||
89 | variables no concurrent cacheline updates take place. The price that | ||
90 | has to be paid for this optimization is the need to add up the per cpu | ||
91 | counters when the value of the counter is needed. | ||
92 | |||
93 | |||
94 | Special operations: | ||
95 | ------------------- | ||
96 | |||
97 | y = this_cpu_ptr(&x) | ||
98 | |||
99 | Takes the offset of a per cpu variable (&x !) and returns the address | ||
100 | of the per cpu variable that belongs to the currently executing | ||
101 | processor. this_cpu_ptr avoids multiple steps that the common | ||
102 | get_cpu/put_cpu sequence requires. No processor number is | ||
103 | available. Instead the offset of the local per cpu area is simply | ||
104 | added to the percpu offset. | ||
105 | |||
106 | |||
107 | |||
108 | Per cpu variables and offsets | ||
109 | ----------------------------- | ||
110 | |||
111 | Per cpu variables have *offsets* to the beginning of the percpu | ||
112 | area. They do not have addresses although they look like that in the | ||
113 | code. Offsets cannot be directly dereferenced. The offset must be | ||
114 | added to a base pointer of a percpu area of a processor in order to | ||
115 | form a valid address. | ||
116 | |||
117 | Therefore the use of x or &x outside of the context of per cpu | ||
118 | operations is invalid and will generally be treated like a NULL | ||
119 | pointer dereference. | ||
120 | |||
121 | In the context of per cpu operations | ||
122 | |||
123 | x is a per cpu variable. Most this_cpu operations take a cpu | ||
124 | variable. | ||
125 | |||
126 | &x is the *offset* a per cpu variable. this_cpu_ptr() takes | ||
127 | the offset of a per cpu variable which makes this look a bit | ||
128 | strange. | ||
129 | |||
130 | |||
131 | |||
132 | Operations on a field of a per cpu structure | ||
133 | -------------------------------------------- | ||
134 | |||
135 | Let's say we have a percpu structure | ||
136 | |||
137 | struct s { | ||
138 | int n,m; | ||
139 | }; | ||
140 | |||
141 | DEFINE_PER_CPU(struct s, p); | ||
142 | |||
143 | |||
144 | Operations on these fields are straightforward | ||
145 | |||
146 | this_cpu_inc(p.m) | ||
147 | |||
148 | z = this_cpu_cmpxchg(p.m, 0, 1); | ||
149 | |||
150 | |||
151 | If we have an offset to struct s: | ||
152 | |||
153 | struct s __percpu *ps = &p; | ||
154 | |||
155 | z = this_cpu_dec(ps->m); | ||
156 | |||
157 | z = this_cpu_inc_return(ps->n); | ||
158 | |||
159 | |||
160 | The calculation of the pointer may require the use of this_cpu_ptr() | ||
161 | if we do not make use of this_cpu ops later to manipulate fields: | ||
162 | |||
163 | struct s *pp; | ||
164 | |||
165 | pp = this_cpu_ptr(&p); | ||
166 | |||
167 | pp->m--; | ||
168 | |||
169 | z = pp->n++; | ||
170 | |||
171 | |||
172 | Variants of this_cpu ops | ||
173 | ------------------------- | ||
174 | |||
175 | this_cpu ops are interrupt safe. Some architecture do not support | ||
176 | these per cpu local operations. In that case the operation must be | ||
177 | replaced by code that disables interrupts, then does the operations | ||
178 | that are guaranteed to be atomic and then reenable interrupts. Doing | ||
179 | so is expensive. If there are other reasons why the scheduler cannot | ||
180 | change the processor we are executing on then there is no reason to | ||
181 | disable interrupts. For that purpose the __this_cpu operations are | ||
182 | provided. For example. | ||
183 | |||
184 | __this_cpu_inc(x); | ||
185 | |||
186 | Will increment x and will not fallback to code that disables | ||
187 | interrupts on platforms that cannot accomplish atomicity through | ||
188 | address relocation and a Read-Modify-Write operation in the same | ||
189 | instruction. | ||
190 | |||
191 | |||
192 | |||
193 | &this_cpu_ptr(pp)->n vs this_cpu_ptr(&pp->n) | ||
194 | -------------------------------------------- | ||
195 | |||
196 | The first operation takes the offset and forms an address and then | ||
197 | adds the offset of the n field. | ||
198 | |||
199 | The second one first adds the two offsets and then does the | ||
200 | relocation. IMHO the second form looks cleaner and has an easier time | ||
201 | with (). The second form also is consistent with the way | ||
202 | this_cpu_read() and friends are used. | ||
203 | |||
204 | |||
205 | Christoph Lameter, April 3rd, 2013 | ||
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 53d6a3c51d87..bfe8c29b1f1d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -8,6 +8,7 @@ Copyright 2008 Red Hat Inc. | |||
8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, | 8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, |
9 | John Kacur, and David Teigland. | 9 | John Kacur, and David Teigland. |
10 | Written for: 2.6.28-rc2 | 10 | Written for: 2.6.28-rc2 |
11 | Updated for: 3.10 | ||
11 | 12 | ||
12 | Introduction | 13 | Introduction |
13 | ------------ | 14 | ------------ |
@@ -17,13 +18,16 @@ designers of systems to find what is going on inside the kernel. | |||
17 | It can be used for debugging or analyzing latencies and | 18 | It can be used for debugging or analyzing latencies and |
18 | performance issues that take place outside of user-space. | 19 | performance issues that take place outside of user-space. |
19 | 20 | ||
20 | Although ftrace is the function tracer, it also includes an | 21 | Although ftrace is typically considered the function tracer, it |
21 | infrastructure that allows for other types of tracing. Some of | 22 | is really a frame work of several assorted tracing utilities. |
22 | the tracers that are currently in ftrace include a tracer to | 23 | There's latency tracing to examine what occurs between interrupts |
23 | trace context switches, the time it takes for a high priority | 24 | disabled and enabled, as well as for preemption and from a time |
24 | task to run after it was woken up, the time interrupts are | 25 | a task is woken to the task is actually scheduled in. |
25 | disabled, and more (ftrace allows for tracer plugins, which | 26 | |
26 | means that the list of tracers can always grow). | 27 | One of the most common uses of ftrace is the event tracing. |
28 | Through out the kernel is hundreds of static event points that | ||
29 | can be enabled via the debugfs file system to see what is | ||
30 | going on in certain parts of the kernel. | ||
27 | 31 | ||
28 | 32 | ||
29 | Implementation Details | 33 | Implementation Details |
@@ -61,7 +65,7 @@ the extended "/sys/kernel/debug/tracing" path name. | |||
61 | 65 | ||
62 | That's it! (assuming that you have ftrace configured into your kernel) | 66 | That's it! (assuming that you have ftrace configured into your kernel) |
63 | 67 | ||
64 | After mounting the debugfs, you can see a directory called | 68 | After mounting debugfs, you can see a directory called |
65 | "tracing". This directory contains the control and output files | 69 | "tracing". This directory contains the control and output files |
66 | of ftrace. Here is a list of some of the key files: | 70 | of ftrace. Here is a list of some of the key files: |
67 | 71 | ||
@@ -84,7 +88,9 @@ of ftrace. Here is a list of some of the key files: | |||
84 | 88 | ||
85 | This sets or displays whether writing to the trace | 89 | This sets or displays whether writing to the trace |
86 | ring buffer is enabled. Echo 0 into this file to disable | 90 | ring buffer is enabled. Echo 0 into this file to disable |
87 | the tracer or 1 to enable it. | 91 | the tracer or 1 to enable it. Note, this only disables |
92 | writing to the ring buffer, the tracing overhead may | ||
93 | still be occurring. | ||
88 | 94 | ||
89 | trace: | 95 | trace: |
90 | 96 | ||
@@ -109,7 +115,15 @@ of ftrace. Here is a list of some of the key files: | |||
109 | 115 | ||
110 | This file lets the user control the amount of data | 116 | This file lets the user control the amount of data |
111 | that is displayed in one of the above output | 117 | that is displayed in one of the above output |
112 | files. | 118 | files. Options also exist to modify how a tracer |
119 | or events work (stack traces, timestamps, etc). | ||
120 | |||
121 | options: | ||
122 | |||
123 | This is a directory that has a file for every available | ||
124 | trace option (also in trace_options). Options may also be set | ||
125 | or cleared by writing a "1" or "0" respectively into the | ||
126 | corresponding file with the option name. | ||
113 | 127 | ||
114 | tracing_max_latency: | 128 | tracing_max_latency: |
115 | 129 | ||
@@ -121,10 +135,17 @@ of ftrace. Here is a list of some of the key files: | |||
121 | latency is greater than the value in this | 135 | latency is greater than the value in this |
122 | file. (in microseconds) | 136 | file. (in microseconds) |
123 | 137 | ||
138 | tracing_thresh: | ||
139 | |||
140 | Some latency tracers will record a trace whenever the | ||
141 | latency is greater than the number in this file. | ||
142 | Only active when the file contains a number greater than 0. | ||
143 | (in microseconds) | ||
144 | |||
124 | buffer_size_kb: | 145 | buffer_size_kb: |
125 | 146 | ||
126 | This sets or displays the number of kilobytes each CPU | 147 | This sets or displays the number of kilobytes each CPU |
127 | buffer can hold. The tracer buffers are the same size | 148 | buffer holds. By default, the trace buffers are the same size |
128 | for each CPU. The displayed number is the size of the | 149 | for each CPU. The displayed number is the size of the |
129 | CPU buffer and not total size of all buffers. The | 150 | CPU buffer and not total size of all buffers. The |
130 | trace buffers are allocated in pages (blocks of memory | 151 | trace buffers are allocated in pages (blocks of memory |
@@ -133,16 +154,30 @@ of ftrace. Here is a list of some of the key files: | |||
133 | than requested, the rest of the page will be used, | 154 | than requested, the rest of the page will be used, |
134 | making the actual allocation bigger than requested. | 155 | making the actual allocation bigger than requested. |
135 | ( Note, the size may not be a multiple of the page size | 156 | ( Note, the size may not be a multiple of the page size |
136 | due to buffer management overhead. ) | 157 | due to buffer management meta-data. ) |
137 | 158 | ||
138 | This can only be updated when the current_tracer | 159 | buffer_total_size_kb: |
139 | is set to "nop". | 160 | |
161 | This displays the total combined size of all the trace buffers. | ||
162 | |||
163 | free_buffer: | ||
164 | |||
165 | If a process is performing the tracing, and the ring buffer | ||
166 | should be shrunk "freed" when the process is finished, even | ||
167 | if it were to be killed by a signal, this file can be used | ||
168 | for that purpose. On close of this file, the ring buffer will | ||
169 | be resized to its minimum size. Having a process that is tracing | ||
170 | also open this file, when the process exits its file descriptor | ||
171 | for this file will be closed, and in doing so, the ring buffer | ||
172 | will be "freed". | ||
173 | |||
174 | It may also stop tracing if disable_on_free option is set. | ||
140 | 175 | ||
141 | tracing_cpumask: | 176 | tracing_cpumask: |
142 | 177 | ||
143 | This is a mask that lets the user only trace | 178 | This is a mask that lets the user only trace |
144 | on specified CPUS. The format is a hex string | 179 | on specified CPUs. The format is a hex string |
145 | representing the CPUS. | 180 | representing the CPUs. |
146 | 181 | ||
147 | set_ftrace_filter: | 182 | set_ftrace_filter: |
148 | 183 | ||
@@ -183,6 +218,261 @@ of ftrace. Here is a list of some of the key files: | |||
183 | "set_ftrace_notrace". (See the section "dynamic ftrace" | 218 | "set_ftrace_notrace". (See the section "dynamic ftrace" |
184 | below for more details.) | 219 | below for more details.) |
185 | 220 | ||
221 | enabled_functions: | ||
222 | |||
223 | This file is more for debugging ftrace, but can also be useful | ||
224 | in seeing if any function has a callback attached to it. | ||
225 | Not only does the trace infrastructure use ftrace function | ||
226 | trace utility, but other subsystems might too. This file | ||
227 | displays all functions that have a callback attached to them | ||
228 | as well as the number of callbacks that have been attached. | ||
229 | Note, a callback may also call multiple functions which will | ||
230 | not be listed in this count. | ||
231 | |||
232 | If the callback registered to be traced by a function with | ||
233 | the "save regs" attribute (thus even more overhead), a 'R' | ||
234 | will be displayed on the same line as the function that | ||
235 | is returning registers. | ||
236 | |||
237 | function_profile_enabled: | ||
238 | |||
239 | When set it will enable all functions with either the function | ||
240 | tracer, or if enabled, the function graph tracer. It will | ||
241 | keep a histogram of the number of functions that were called | ||
242 | and if run with the function graph tracer, it will also keep | ||
243 | track of the time spent in those functions. The histogram | ||
244 | content can be displayed in the files: | ||
245 | |||
246 | trace_stats/function<cpu> ( function0, function1, etc). | ||
247 | |||
248 | trace_stats: | ||
249 | |||
250 | A directory that holds different tracing stats. | ||
251 | |||
252 | kprobe_events: | ||
253 | |||
254 | Enable dynamic trace points. See kprobetrace.txt. | ||
255 | |||
256 | kprobe_profile: | ||
257 | |||
258 | Dynamic trace points stats. See kprobetrace.txt. | ||
259 | |||
260 | max_graph_depth: | ||
261 | |||
262 | Used with the function graph tracer. This is the max depth | ||
263 | it will trace into a function. Setting this to a value of | ||
264 | one will show only the first kernel function that is called | ||
265 | from user space. | ||
266 | |||
267 | printk_formats: | ||
268 | |||
269 | This is for tools that read the raw format files. If an event in | ||
270 | the ring buffer references a string (currently only trace_printk() | ||
271 | does this), only a pointer to the string is recorded into the buffer | ||
272 | and not the string itself. This prevents tools from knowing what | ||
273 | that string was. This file displays the string and address for | ||
274 | the string allowing tools to map the pointers to what the | ||
275 | strings were. | ||
276 | |||
277 | saved_cmdlines: | ||
278 | |||
279 | Only the pid of the task is recorded in a trace event unless | ||
280 | the event specifically saves the task comm as well. Ftrace | ||
281 | makes a cache of pid mappings to comms to try to display | ||
282 | comms for events. If a pid for a comm is not listed, then | ||
283 | "<...>" is displayed in the output. | ||
284 | |||
285 | snapshot: | ||
286 | |||
287 | This displays the "snapshot" buffer and also lets the user | ||
288 | take a snapshot of the current running trace. | ||
289 | See the "Snapshot" section below for more details. | ||
290 | |||
291 | stack_max_size: | ||
292 | |||
293 | When the stack tracer is activated, this will display the | ||
294 | maximum stack size it has encountered. | ||
295 | See the "Stack Trace" section below. | ||
296 | |||
297 | stack_trace: | ||
298 | |||
299 | This displays the stack back trace of the largest stack | ||
300 | that was encountered when the stack tracer is activated. | ||
301 | See the "Stack Trace" section below. | ||
302 | |||
303 | stack_trace_filter: | ||
304 | |||
305 | This is similar to "set_ftrace_filter" but it limits what | ||
306 | functions the stack tracer will check. | ||
307 | |||
308 | trace_clock: | ||
309 | |||
310 | Whenever an event is recorded into the ring buffer, a | ||
311 | "timestamp" is added. This stamp comes from a specified | ||
312 | clock. By default, ftrace uses the "local" clock. This | ||
313 | clock is very fast and strictly per cpu, but on some | ||
314 | systems it may not be monotonic with respect to other | ||
315 | CPUs. In other words, the local clocks may not be in sync | ||
316 | with local clocks on other CPUs. | ||
317 | |||
318 | Usual clocks for tracing: | ||
319 | |||
320 | # cat trace_clock | ||
321 | [local] global counter x86-tsc | ||
322 | |||
323 | local: Default clock, but may not be in sync across CPUs | ||
324 | |||
325 | global: This clock is in sync with all CPUs but may | ||
326 | be a bit slower than the local clock. | ||
327 | |||
328 | counter: This is not a clock at all, but literally an atomic | ||
329 | counter. It counts up one by one, but is in sync | ||
330 | with all CPUs. This is useful when you need to | ||
331 | know exactly the order events occurred with respect to | ||
332 | each other on different CPUs. | ||
333 | |||
334 | uptime: This uses the jiffies counter and the time stamp | ||
335 | is relative to the time since boot up. | ||
336 | |||
337 | perf: This makes ftrace use the same clock that perf uses. | ||
338 | Eventually perf will be able to read ftrace buffers | ||
339 | and this will help out in interleaving the data. | ||
340 | |||
341 | x86-tsc: Architectures may define their own clocks. For | ||
342 | example, x86 uses its own TSC cycle clock here. | ||
343 | |||
344 | To set a clock, simply echo the clock name into this file. | ||
345 | |||
346 | echo global > trace_clock | ||
347 | |||
348 | trace_marker: | ||
349 | |||
350 | This is a very useful file for synchronizing user space | ||
351 | with events happening in the kernel. Writing strings into | ||
352 | this file will be written into the ftrace buffer. | ||
353 | |||
354 | It is useful in applications to open this file at the start | ||
355 | of the application and just reference the file descriptor | ||
356 | for the file. | ||
357 | |||
358 | void trace_write(const char *fmt, ...) | ||
359 | { | ||
360 | va_list ap; | ||
361 | char buf[256]; | ||
362 | int n; | ||
363 | |||
364 | if (trace_fd < 0) | ||
365 | return; | ||
366 | |||
367 | va_start(ap, fmt); | ||
368 | n = vsnprintf(buf, 256, fmt, ap); | ||
369 | va_end(ap); | ||
370 | |||
371 | write(trace_fd, buf, n); | ||
372 | } | ||
373 | |||
374 | start: | ||
375 | |||
376 | trace_fd = open("trace_marker", WR_ONLY); | ||
377 | |||
378 | uprobe_events: | ||
379 | |||
380 | Add dynamic tracepoints in programs. | ||
381 | See uprobetracer.txt | ||
382 | |||
383 | uprobe_profile: | ||
384 | |||
385 | Uprobe statistics. See uprobetrace.txt | ||
386 | |||
387 | instances: | ||
388 | |||
389 | This is a way to make multiple trace buffers where different | ||
390 | events can be recorded in different buffers. | ||
391 | See "Instances" section below. | ||
392 | |||
393 | events: | ||
394 | |||
395 | This is the trace event directory. It holds event tracepoints | ||
396 | (also known as static tracepoints) that have been compiled | ||
397 | into the kernel. It shows what event tracepoints exist | ||
398 | and how they are grouped by system. There are "enable" | ||
399 | files at various levels that can enable the tracepoints | ||
400 | when a "1" is written to them. | ||
401 | |||
402 | See events.txt for more information. | ||
403 | |||
404 | per_cpu: | ||
405 | |||
406 | This is a directory that contains the trace per_cpu information. | ||
407 | |||
408 | per_cpu/cpu0/buffer_size_kb: | ||
409 | |||
410 | The ftrace buffer is defined per_cpu. That is, there's a separate | ||
411 | buffer for each CPU to allow writes to be done atomically, | ||
412 | and free from cache bouncing. These buffers may have different | ||
413 | size buffers. This file is similar to the buffer_size_kb | ||
414 | file, but it only displays or sets the buffer size for the | ||
415 | specific CPU. (here cpu0). | ||
416 | |||
417 | per_cpu/cpu0/trace: | ||
418 | |||
419 | This is similar to the "trace" file, but it will only display | ||
420 | the data specific for the CPU. If written to, it only clears | ||
421 | the specific CPU buffer. | ||
422 | |||
423 | per_cpu/cpu0/trace_pipe | ||
424 | |||
425 | This is similar to the "trace_pipe" file, and is a consuming | ||
426 | read, but it will only display (and consume) the data specific | ||
427 | for the CPU. | ||
428 | |||
429 | per_cpu/cpu0/trace_pipe_raw | ||
430 | |||
431 | For tools that can parse the ftrace ring buffer binary format, | ||
432 | the trace_pipe_raw file can be used to extract the data | ||
433 | from the ring buffer directly. With the use of the splice() | ||
434 | system call, the buffer data can be quickly transferred to | ||
435 | a file or to the network where a server is collecting the | ||
436 | data. | ||
437 | |||
438 | Like trace_pipe, this is a consuming reader, where multiple | ||
439 | reads will always produce different data. | ||
440 | |||
441 | per_cpu/cpu0/snapshot: | ||
442 | |||
443 | This is similar to the main "snapshot" file, but will only | ||
444 | snapshot the current CPU (if supported). It only displays | ||
445 | the content of the snapshot for a given CPU, and if | ||
446 | written to, only clears this CPU buffer. | ||
447 | |||
448 | per_cpu/cpu0/snapshot_raw: | ||
449 | |||
450 | Similar to the trace_pipe_raw, but will read the binary format | ||
451 | from the snapshot buffer for the given CPU. | ||
452 | |||
453 | per_cpu/cpu0/stats: | ||
454 | |||
455 | This displays certain stats about the ring buffer: | ||
456 | |||
457 | entries: The number of events that are still in the buffer. | ||
458 | |||
459 | overrun: The number of lost events due to overwriting when | ||
460 | the buffer was full. | ||
461 | |||
462 | commit overrun: Should always be zero. | ||
463 | This gets set if so many events happened within a nested | ||
464 | event (ring buffer is re-entrant), that it fills the | ||
465 | buffer and starts dropping events. | ||
466 | |||
467 | bytes: Bytes actually read (not overwritten). | ||
468 | |||
469 | oldest event ts: The oldest timestamp in the buffer | ||
470 | |||
471 | now ts: The current timestamp | ||
472 | |||
473 | dropped events: Events lost due to overwrite option being off. | ||
474 | |||
475 | read events: The number of events read. | ||
186 | 476 | ||
187 | The Tracers | 477 | The Tracers |
188 | ----------- | 478 | ----------- |
@@ -234,11 +524,6 @@ Here is the list of current tracers that may be configured. | |||
234 | RT tasks (as the current "wakeup" does). This is useful | 524 | RT tasks (as the current "wakeup" does). This is useful |
235 | for those interested in wake up timings of RT tasks. | 525 | for those interested in wake up timings of RT tasks. |
236 | 526 | ||
237 | "hw-branch-tracer" | ||
238 | |||
239 | Uses the BTS CPU feature on x86 CPUs to traces all | ||
240 | branches executed. | ||
241 | |||
242 | "nop" | 527 | "nop" |
243 | 528 | ||
244 | This is the "trace nothing" tracer. To remove all | 529 | This is the "trace nothing" tracer. To remove all |
@@ -261,70 +546,100 @@ Here is an example of the output format of the file "trace" | |||
261 | -------- | 546 | -------- |
262 | # tracer: function | 547 | # tracer: function |
263 | # | 548 | # |
264 | # TASK-PID CPU# TIMESTAMP FUNCTION | 549 | # entries-in-buffer/entries-written: 140080/250280 #P:4 |
265 | # | | | | | | 550 | # |
266 | bash-4251 [01] 10152.583854: path_put <-path_walk | 551 | # _-----=> irqs-off |
267 | bash-4251 [01] 10152.583855: dput <-path_put | 552 | # / _----=> need-resched |
268 | bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput | 553 | # | / _---=> hardirq/softirq |
554 | # || / _--=> preempt-depth | ||
555 | # ||| / delay | ||
556 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
557 | # | | | |||| | | | ||
558 | bash-1977 [000] .... 17284.993652: sys_close <-system_call_fastpath | ||
559 | bash-1977 [000] .... 17284.993653: __close_fd <-sys_close | ||
560 | bash-1977 [000] .... 17284.993653: _raw_spin_lock <-__close_fd | ||
561 | sshd-1974 [003] .... 17284.993653: __srcu_read_unlock <-fsnotify | ||
562 | bash-1977 [000] .... 17284.993654: add_preempt_count <-_raw_spin_lock | ||
563 | bash-1977 [000] ...1 17284.993655: _raw_spin_unlock <-__close_fd | ||
564 | bash-1977 [000] ...1 17284.993656: sub_preempt_count <-_raw_spin_unlock | ||
565 | bash-1977 [000] .... 17284.993657: filp_close <-__close_fd | ||
566 | bash-1977 [000] .... 17284.993657: dnotify_flush <-filp_close | ||
567 | sshd-1974 [003] .... 17284.993658: sys_select <-system_call_fastpath | ||
269 | -------- | 568 | -------- |
270 | 569 | ||
271 | A header is printed with the tracer name that is represented by | 570 | A header is printed with the tracer name that is represented by |
272 | the trace. In this case the tracer is "function". Then a header | 571 | the trace. In this case the tracer is "function". Then it shows the |
273 | showing the format. Task name "bash", the task PID "4251", the | 572 | number of events in the buffer as well as the total number of entries |
274 | CPU that it was running on "01", the timestamp in <secs>.<usecs> | 573 | that were written. The difference is the number of entries that were |
275 | format, the function name that was traced "path_put" and the | 574 | lost due to the buffer filling up (250280 - 140080 = 110200 events |
276 | parent function that called this function "path_walk". The | 575 | lost). |
277 | timestamp is the time at which the function was entered. | 576 | |
577 | The header explains the content of the events. Task name "bash", the task | ||
578 | PID "1977", the CPU that it was running on "000", the latency format | ||
579 | (explained below), the timestamp in <secs>.<usecs> format, the | ||
580 | function name that was traced "sys_close" and the parent function that | ||
581 | called this function "system_call_fastpath". The timestamp is the time | ||
582 | at which the function was entered. | ||
278 | 583 | ||
279 | Latency trace format | 584 | Latency trace format |
280 | -------------------- | 585 | -------------------- |
281 | 586 | ||
282 | When the latency-format option is enabled, the trace file gives | 587 | When the latency-format option is enabled or when one of the latency |
283 | somewhat more information to see why a latency happened. | 588 | tracers is set, the trace file gives somewhat more information to see |
284 | Here is a typical trace. | 589 | why a latency happened. Here is a typical trace. |
285 | 590 | ||
286 | # tracer: irqsoff | 591 | # tracer: irqsoff |
287 | # | 592 | # |
288 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | 593 | # irqsoff latency trace v1.1.5 on 3.8.0-test+ |
289 | -------------------------------------------------------------------- | 594 | # -------------------------------------------------------------------- |
290 | latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 595 | # latency: 259 us, #4/4, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
291 | ----------------- | 596 | # ----------------- |
292 | | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) | 597 | # | task: ps-6143 (uid:0 nice:0 policy:0 rt_prio:0) |
293 | ----------------- | 598 | # ----------------- |
294 | => started at: apic_timer_interrupt | 599 | # => started at: __lock_task_sighand |
295 | => ended at: do_softirq | 600 | # => ended at: _raw_spin_unlock_irqrestore |
296 | 601 | # | |
297 | # _------=> CPU# | 602 | # |
298 | # / _-----=> irqs-off | 603 | # _------=> CPU# |
299 | # | / _----=> need-resched | 604 | # / _-----=> irqs-off |
300 | # || / _---=> hardirq/softirq | 605 | # | / _----=> need-resched |
301 | # ||| / _--=> preempt-depth | 606 | # || / _---=> hardirq/softirq |
302 | # |||| / | 607 | # ||| / _--=> preempt-depth |
303 | # ||||| delay | 608 | # |||| / delay |
304 | # cmd pid ||||| time | caller | 609 | # cmd pid ||||| time | caller |
305 | # \ / ||||| \ | / | 610 | # \ / ||||| \ | / |
306 | <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) | 611 | ps-6143 2d... 0us!: trace_hardirqs_off <-__lock_task_sighand |
307 | <idle>-0 0d.s. 97us : __do_softirq (do_softirq) | 612 | ps-6143 2d..1 259us+: trace_hardirqs_on <-_raw_spin_unlock_irqrestore |
308 | <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) | 613 | ps-6143 2d..1 263us+: time_hardirqs_on <-_raw_spin_unlock_irqrestore |
614 | ps-6143 2d..1 306us : <stack trace> | ||
615 | => trace_hardirqs_on_caller | ||
616 | => trace_hardirqs_on | ||
617 | => _raw_spin_unlock_irqrestore | ||
618 | => do_task_stat | ||
619 | => proc_tgid_stat | ||
620 | => proc_single_show | ||
621 | => seq_read | ||
622 | => vfs_read | ||
623 | => sys_read | ||
624 | => system_call_fastpath | ||
309 | 625 | ||
310 | 626 | ||
311 | This shows that the current tracer is "irqsoff" tracing the time | 627 | This shows that the current tracer is "irqsoff" tracing the time |
312 | for which interrupts were disabled. It gives the trace version | 628 | for which interrupts were disabled. It gives the trace version (which |
313 | and the version of the kernel upon which this was executed on | 629 | never changes) and the version of the kernel upon which this was executed on |
314 | (2.6.26-rc8). Then it displays the max latency in microsecs (97 | 630 | (3.10). Then it displays the max latency in microseconds (259 us). The number |
315 | us). The number of trace entries displayed and the total number | 631 | of trace entries displayed and the total number (both are four: #4/4). |
316 | recorded (both are three: #3/3). The type of preemption that was | 632 | VP, KP, SP, and HP are always zero and are reserved for later use. |
317 | used (PREEMPT). VP, KP, SP, and HP are always zero and are | 633 | #P is the number of online CPUs (#P:4). |
318 | reserved for later use. #P is the number of online CPUS (#P:2). | ||
319 | 634 | ||
320 | The task is the process that was running when the latency | 635 | The task is the process that was running when the latency |
321 | occurred. (swapper pid: 0). | 636 | occurred. (ps pid: 6143). |
322 | 637 | ||
323 | The start and stop (the functions in which the interrupts were | 638 | The start and stop (the functions in which the interrupts were |
324 | disabled and enabled respectively) that caused the latencies: | 639 | disabled and enabled respectively) that caused the latencies: |
325 | 640 | ||
326 | apic_timer_interrupt is where the interrupts were disabled. | 641 | __lock_task_sighand is where the interrupts were disabled. |
327 | do_softirq is where they were enabled again. | 642 | _raw_spin_unlock_irqrestore is where they were enabled again. |
328 | 643 | ||
329 | The next lines after the header are the trace itself. The header | 644 | The next lines after the header are the trace itself. The header |
330 | explains which is which. | 645 | explains which is which. |
@@ -367,16 +682,43 @@ The above is mostly meaningful for kernel developers. | |||
367 | 682 | ||
368 | The rest is the same as the 'trace' file. | 683 | The rest is the same as the 'trace' file. |
369 | 684 | ||
685 | Note, the latency tracers will usually end with a back trace | ||
686 | to easily find where the latency occurred. | ||
370 | 687 | ||
371 | trace_options | 688 | trace_options |
372 | ------------- | 689 | ------------- |
373 | 690 | ||
374 | The trace_options file is used to control what gets printed in | 691 | The trace_options file (or the options directory) is used to control |
375 | the trace output. To see what is available, simply cat the file: | 692 | what gets printed in the trace output, or manipulate the tracers. |
693 | To see what is available, simply cat the file: | ||
376 | 694 | ||
377 | cat trace_options | 695 | cat trace_options |
378 | print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ | 696 | print-parent |
379 | noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj | 697 | nosym-offset |
698 | nosym-addr | ||
699 | noverbose | ||
700 | noraw | ||
701 | nohex | ||
702 | nobin | ||
703 | noblock | ||
704 | nostacktrace | ||
705 | trace_printk | ||
706 | noftrace_preempt | ||
707 | nobranch | ||
708 | annotate | ||
709 | nouserstacktrace | ||
710 | nosym-userobj | ||
711 | noprintk-msg-only | ||
712 | context-info | ||
713 | latency-format | ||
714 | sleep-time | ||
715 | graph-time | ||
716 | record-cmd | ||
717 | overwrite | ||
718 | nodisable_on_free | ||
719 | irq-info | ||
720 | markers | ||
721 | function-trace | ||
380 | 722 | ||
381 | To disable one of the options, echo in the option prepended with | 723 | To disable one of the options, echo in the option prepended with |
382 | "no". | 724 | "no". |
@@ -428,13 +770,34 @@ Here are the available options: | |||
428 | 770 | ||
429 | bin - This will print out the formats in raw binary. | 771 | bin - This will print out the formats in raw binary. |
430 | 772 | ||
431 | block - TBD (needs update) | 773 | block - When set, reading trace_pipe will not block when polled. |
432 | 774 | ||
433 | stacktrace - This is one of the options that changes the trace | 775 | stacktrace - This is one of the options that changes the trace |
434 | itself. When a trace is recorded, so is the stack | 776 | itself. When a trace is recorded, so is the stack |
435 | of functions. This allows for back traces of | 777 | of functions. This allows for back traces of |
436 | trace sites. | 778 | trace sites. |
437 | 779 | ||
780 | trace_printk - Can disable trace_printk() from writing into the buffer. | ||
781 | |||
782 | branch - Enable branch tracing with the tracer. | ||
783 | |||
784 | annotate - It is sometimes confusing when the CPU buffers are full | ||
785 | and one CPU buffer had a lot of events recently, thus | ||
786 | a shorter time frame, were another CPU may have only had | ||
787 | a few events, which lets it have older events. When | ||
788 | the trace is reported, it shows the oldest events first, | ||
789 | and it may look like only one CPU ran (the one with the | ||
790 | oldest events). When the annotate option is set, it will | ||
791 | display when a new CPU buffer started: | ||
792 | |||
793 | <idle>-0 [001] dNs4 21169.031481: wake_up_idle_cpu <-add_timer_on | ||
794 | <idle>-0 [001] dNs4 21169.031482: _raw_spin_unlock_irqrestore <-add_timer_on | ||
795 | <idle>-0 [001] .Ns4 21169.031484: sub_preempt_count <-_raw_spin_unlock_irqrestore | ||
796 | ##### CPU 2 buffer started #### | ||
797 | <idle>-0 [002] .N.1 21169.031484: rcu_idle_exit <-cpu_idle | ||
798 | <idle>-0 [001] .Ns3 21169.031484: _raw_spin_unlock <-clocksource_watchdog | ||
799 | <idle>-0 [001] .Ns3 21169.031485: sub_preempt_count <-_raw_spin_unlock | ||
800 | |||
438 | userstacktrace - This option changes the trace. It records a | 801 | userstacktrace - This option changes the trace. It records a |
439 | stacktrace of the current userspace thread. | 802 | stacktrace of the current userspace thread. |
440 | 803 | ||
@@ -451,9 +814,13 @@ Here are the available options: | |||
451 | a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 | 814 | a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 |
452 | x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] | 815 | x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] |
453 | 816 | ||
454 | sched-tree - trace all tasks that are on the runqueue, at | 817 | |
455 | every scheduling event. Will add overhead if | 818 | printk-msg-only - When set, trace_printk()s will only show the format |
456 | there's a lot of tasks running at once. | 819 | and not their parameters (if trace_bprintk() or |
820 | trace_bputs() was used to save the trace_printk()). | ||
821 | |||
822 | context-info - Show only the event data. Hides the comm, PID, | ||
823 | timestamp, CPU, and other useful data. | ||
457 | 824 | ||
458 | latency-format - This option changes the trace. When | 825 | latency-format - This option changes the trace. When |
459 | it is enabled, the trace displays | 826 | it is enabled, the trace displays |
@@ -461,31 +828,61 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] | |||
461 | latencies, as described in "Latency | 828 | latencies, as described in "Latency |
462 | trace format". | 829 | trace format". |
463 | 830 | ||
831 | sleep-time - When running function graph tracer, to include | ||
832 | the time a task schedules out in its function. | ||
833 | When enabled, it will account time the task has been | ||
834 | scheduled out as part of the function call. | ||
835 | |||
836 | graph-time - When running function graph tracer, to include the | ||
837 | time to call nested functions. When this is not set, | ||
838 | the time reported for the function will only include | ||
839 | the time the function itself executed for, not the time | ||
840 | for functions that it called. | ||
841 | |||
842 | record-cmd - When any event or tracer is enabled, a hook is enabled | ||
843 | in the sched_switch trace point to fill comm cache | ||
844 | with mapped pids and comms. But this may cause some | ||
845 | overhead, and if you only care about pids, and not the | ||
846 | name of the task, disabling this option can lower the | ||
847 | impact of tracing. | ||
848 | |||
464 | overwrite - This controls what happens when the trace buffer is | 849 | overwrite - This controls what happens when the trace buffer is |
465 | full. If "1" (default), the oldest events are | 850 | full. If "1" (default), the oldest events are |
466 | discarded and overwritten. If "0", then the newest | 851 | discarded and overwritten. If "0", then the newest |
467 | events are discarded. | 852 | events are discarded. |
853 | (see per_cpu/cpu0/stats for overrun and dropped) | ||
468 | 854 | ||
469 | ftrace_enabled | 855 | disable_on_free - When the free_buffer is closed, tracing will |
470 | -------------- | 856 | stop (tracing_on set to 0). |
471 | 857 | ||
472 | The following tracers (listed below) give different output | 858 | irq-info - Shows the interrupt, preempt count, need resched data. |
473 | depending on whether or not the sysctl ftrace_enabled is set. To | 859 | When disabled, the trace looks like: |
474 | set ftrace_enabled, one can either use the sysctl function or | ||
475 | set it via the proc file system interface. | ||
476 | 860 | ||
477 | sysctl kernel.ftrace_enabled=1 | 861 | # tracer: function |
862 | # | ||
863 | # entries-in-buffer/entries-written: 144405/9452052 #P:4 | ||
864 | # | ||
865 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
866 | # | | | | | | ||
867 | <idle>-0 [002] 23636.756054: ttwu_do_activate.constprop.89 <-try_to_wake_up | ||
868 | <idle>-0 [002] 23636.756054: activate_task <-ttwu_do_activate.constprop.89 | ||
869 | <idle>-0 [002] 23636.756055: enqueue_task <-activate_task | ||
478 | 870 | ||
479 | or | ||
480 | 871 | ||
481 | echo 1 > /proc/sys/kernel/ftrace_enabled | 872 | markers - When set, the trace_marker is writable (only by root). |
873 | When disabled, the trace_marker will error with EINVAL | ||
874 | on write. | ||
875 | |||
876 | |||
877 | function-trace - The latency tracers will enable function tracing | ||
878 | if this option is enabled (default it is). When | ||
879 | it is disabled, the latency tracers do not trace | ||
880 | functions. This keeps the overhead of the tracer down | ||
881 | when performing latency tests. | ||
482 | 882 | ||
483 | To disable ftrace_enabled simply replace the '1' with '0' in the | 883 | Note: Some tracers have their own options. They only appear |
484 | above commands. | 884 | when the tracer is active. |
485 | 885 | ||
486 | When ftrace_enabled is set the tracers will also record the | ||
487 | functions that are within the trace. The descriptions of the | ||
488 | tracers will also show an example with ftrace enabled. | ||
489 | 886 | ||
490 | 887 | ||
491 | irqsoff | 888 | irqsoff |
@@ -506,95 +903,133 @@ new trace is saved. | |||
506 | To reset the maximum, echo 0 into tracing_max_latency. Here is | 903 | To reset the maximum, echo 0 into tracing_max_latency. Here is |
507 | an example: | 904 | an example: |
508 | 905 | ||
906 | # echo 0 > options/function-trace | ||
509 | # echo irqsoff > current_tracer | 907 | # echo irqsoff > current_tracer |
510 | # echo latency-format > trace_options | ||
511 | # echo 0 > tracing_max_latency | ||
512 | # echo 1 > tracing_on | 908 | # echo 1 > tracing_on |
909 | # echo 0 > tracing_max_latency | ||
513 | # ls -ltr | 910 | # ls -ltr |
514 | [...] | 911 | [...] |
515 | # echo 0 > tracing_on | 912 | # echo 0 > tracing_on |
516 | # cat trace | 913 | # cat trace |
517 | # tracer: irqsoff | 914 | # tracer: irqsoff |
518 | # | 915 | # |
519 | irqsoff latency trace v1.1.5 on 2.6.26 | 916 | # irqsoff latency trace v1.1.5 on 3.8.0-test+ |
520 | -------------------------------------------------------------------- | 917 | # -------------------------------------------------------------------- |
521 | latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 918 | # latency: 16 us, #4/4, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
522 | ----------------- | 919 | # ----------------- |
523 | | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0) | 920 | # | task: swapper/0-0 (uid:0 nice:0 policy:0 rt_prio:0) |
524 | ----------------- | 921 | # ----------------- |
525 | => started at: sys_setpgid | 922 | # => started at: run_timer_softirq |
526 | => ended at: sys_setpgid | 923 | # => ended at: run_timer_softirq |
527 | 924 | # | |
528 | # _------=> CPU# | 925 | # |
529 | # / _-----=> irqs-off | 926 | # _------=> CPU# |
530 | # | / _----=> need-resched | 927 | # / _-----=> irqs-off |
531 | # || / _---=> hardirq/softirq | 928 | # | / _----=> need-resched |
532 | # ||| / _--=> preempt-depth | 929 | # || / _---=> hardirq/softirq |
533 | # |||| / | 930 | # ||| / _--=> preempt-depth |
534 | # ||||| delay | 931 | # |||| / delay |
535 | # cmd pid ||||| time | caller | 932 | # cmd pid ||||| time | caller |
536 | # \ / ||||| \ | / | 933 | # \ / ||||| \ | / |
537 | bash-3730 1d... 0us : _write_lock_irq (sys_setpgid) | 934 | <idle>-0 0d.s2 0us+: _raw_spin_lock_irq <-run_timer_softirq |
538 | bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid) | 935 | <idle>-0 0dNs3 17us : _raw_spin_unlock_irq <-run_timer_softirq |
539 | bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid) | 936 | <idle>-0 0dNs3 17us+: trace_hardirqs_on <-run_timer_softirq |
540 | 937 | <idle>-0 0dNs3 25us : <stack trace> | |
541 | 938 | => _raw_spin_unlock_irq | |
542 | Here we see that that we had a latency of 12 microsecs (which is | 939 | => run_timer_softirq |
543 | very good). The _write_lock_irq in sys_setpgid disabled | 940 | => __do_softirq |
544 | interrupts. The difference between the 12 and the displayed | 941 | => call_softirq |
545 | timestamp 14us occurred because the clock was incremented | 942 | => do_softirq |
943 | => irq_exit | ||
944 | => smp_apic_timer_interrupt | ||
945 | => apic_timer_interrupt | ||
946 | => rcu_idle_exit | ||
947 | => cpu_idle | ||
948 | => rest_init | ||
949 | => start_kernel | ||
950 | => x86_64_start_reservations | ||
951 | => x86_64_start_kernel | ||
952 | |||
953 | Here we see that that we had a latency of 16 microseconds (which is | ||
954 | very good). The _raw_spin_lock_irq in run_timer_softirq disabled | ||
955 | interrupts. The difference between the 16 and the displayed | ||
956 | timestamp 25us occurred because the clock was incremented | ||
546 | between the time of recording the max latency and the time of | 957 | between the time of recording the max latency and the time of |
547 | recording the function that had that latency. | 958 | recording the function that had that latency. |
548 | 959 | ||
549 | Note the above example had ftrace_enabled not set. If we set the | 960 | Note the above example had function-trace not set. If we set |
550 | ftrace_enabled, we get a much larger output: | 961 | function-trace, we get a much larger output: |
962 | |||
963 | with echo 1 > options/function-trace | ||
551 | 964 | ||
552 | # tracer: irqsoff | 965 | # tracer: irqsoff |
553 | # | 966 | # |
554 | irqsoff latency trace v1.1.5 on 2.6.26-rc8 | 967 | # irqsoff latency trace v1.1.5 on 3.8.0-test+ |
555 | -------------------------------------------------------------------- | 968 | # -------------------------------------------------------------------- |
556 | latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 969 | # latency: 71 us, #168/168, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
557 | ----------------- | 970 | # ----------------- |
558 | | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) | 971 | # | task: bash-2042 (uid:0 nice:0 policy:0 rt_prio:0) |
559 | ----------------- | 972 | # ----------------- |
560 | => started at: __alloc_pages_internal | 973 | # => started at: ata_scsi_queuecmd |
561 | => ended at: __alloc_pages_internal | 974 | # => ended at: ata_scsi_queuecmd |
562 | 975 | # | |
563 | # _------=> CPU# | 976 | # |
564 | # / _-----=> irqs-off | 977 | # _------=> CPU# |
565 | # | / _----=> need-resched | 978 | # / _-----=> irqs-off |
566 | # || / _---=> hardirq/softirq | 979 | # | / _----=> need-resched |
567 | # ||| / _--=> preempt-depth | 980 | # || / _---=> hardirq/softirq |
568 | # |||| / | 981 | # ||| / _--=> preempt-depth |
569 | # ||||| delay | 982 | # |||| / delay |
570 | # cmd pid ||||| time | caller | 983 | # cmd pid ||||| time | caller |
571 | # \ / ||||| \ | / | 984 | # \ / ||||| \ | / |
572 | ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) | 985 | bash-2042 3d... 0us : _raw_spin_lock_irqsave <-ata_scsi_queuecmd |
573 | ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) | 986 | bash-2042 3d... 0us : add_preempt_count <-_raw_spin_lock_irqsave |
574 | ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) | 987 | bash-2042 3d..1 1us : ata_scsi_find_dev <-ata_scsi_queuecmd |
575 | ls-4339 0d..1 4us : add_preempt_count (_spin_lock) | 988 | bash-2042 3d..1 1us : __ata_scsi_find_dev <-ata_scsi_find_dev |
576 | ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) | 989 | bash-2042 3d..1 2us : ata_find_dev.part.14 <-__ata_scsi_find_dev |
577 | ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) | 990 | bash-2042 3d..1 2us : ata_qc_new_init <-__ata_scsi_queuecmd |
578 | ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) | 991 | bash-2042 3d..1 3us : ata_sg_init <-__ata_scsi_queuecmd |
579 | ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) | 992 | bash-2042 3d..1 4us : ata_scsi_rw_xlat <-__ata_scsi_queuecmd |
580 | ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) | 993 | bash-2042 3d..1 4us : ata_build_rw_tf <-ata_scsi_rw_xlat |
581 | ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest) | ||
582 | ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk) | ||
583 | ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue) | ||
584 | [...] | 994 | [...] |
585 | ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) | 995 | bash-2042 3d..1 67us : delay_tsc <-__delay |
586 | ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) | 996 | bash-2042 3d..1 67us : add_preempt_count <-delay_tsc |
587 | ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) | 997 | bash-2042 3d..2 67us : sub_preempt_count <-delay_tsc |
588 | ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) | 998 | bash-2042 3d..1 67us : add_preempt_count <-delay_tsc |
589 | ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) | 999 | bash-2042 3d..2 68us : sub_preempt_count <-delay_tsc |
590 | ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) | 1000 | bash-2042 3d..1 68us+: ata_bmdma_start <-ata_bmdma_qc_issue |
591 | ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) | 1001 | bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd |
592 | ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) | 1002 | bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd |
593 | ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) | 1003 | bash-2042 3d..1 72us+: trace_hardirqs_on <-ata_scsi_queuecmd |
594 | 1004 | bash-2042 3d..1 120us : <stack trace> | |
595 | 1005 | => _raw_spin_unlock_irqrestore | |
596 | 1006 | => ata_scsi_queuecmd | |
597 | Here we traced a 50 microsecond latency. But we also see all the | 1007 | => scsi_dispatch_cmd |
1008 | => scsi_request_fn | ||
1009 | => __blk_run_queue_uncond | ||
1010 | => __blk_run_queue | ||
1011 | => blk_queue_bio | ||
1012 | => generic_make_request | ||
1013 | => submit_bio | ||
1014 | => submit_bh | ||
1015 | => __ext3_get_inode_loc | ||
1016 | => ext3_iget | ||
1017 | => ext3_lookup | ||
1018 | => lookup_real | ||
1019 | => __lookup_hash | ||
1020 | => walk_component | ||
1021 | => lookup_last | ||
1022 | => path_lookupat | ||
1023 | => filename_lookup | ||
1024 | => user_path_at_empty | ||
1025 | => user_path_at | ||
1026 | => vfs_fstatat | ||
1027 | => vfs_stat | ||
1028 | => sys_newstat | ||
1029 | => system_call_fastpath | ||
1030 | |||
1031 | |||
1032 | Here we traced a 71 microsecond latency. But we also see all the | ||
598 | functions that were called during that time. Note that by | 1033 | functions that were called during that time. Note that by |
599 | enabling function tracing, we incur an added overhead. This | 1034 | enabling function tracing, we incur an added overhead. This |
600 | overhead may extend the latency times. But nevertheless, this | 1035 | overhead may extend the latency times. But nevertheless, this |
@@ -614,120 +1049,122 @@ Like the irqsoff tracer, it records the maximum latency for | |||
614 | which preemption was disabled. The control of preemptoff tracer | 1049 | which preemption was disabled. The control of preemptoff tracer |
615 | is much like the irqsoff tracer. | 1050 | is much like the irqsoff tracer. |
616 | 1051 | ||
1052 | # echo 0 > options/function-trace | ||
617 | # echo preemptoff > current_tracer | 1053 | # echo preemptoff > current_tracer |
618 | # echo latency-format > trace_options | ||
619 | # echo 0 > tracing_max_latency | ||
620 | # echo 1 > tracing_on | 1054 | # echo 1 > tracing_on |
1055 | # echo 0 > tracing_max_latency | ||
621 | # ls -ltr | 1056 | # ls -ltr |
622 | [...] | 1057 | [...] |
623 | # echo 0 > tracing_on | 1058 | # echo 0 > tracing_on |
624 | # cat trace | 1059 | # cat trace |
625 | # tracer: preemptoff | 1060 | # tracer: preemptoff |
626 | # | 1061 | # |
627 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | 1062 | # preemptoff latency trace v1.1.5 on 3.8.0-test+ |
628 | -------------------------------------------------------------------- | 1063 | # -------------------------------------------------------------------- |
629 | latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1064 | # latency: 46 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
630 | ----------------- | 1065 | # ----------------- |
631 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | 1066 | # | task: sshd-1991 (uid:0 nice:0 policy:0 rt_prio:0) |
632 | ----------------- | 1067 | # ----------------- |
633 | => started at: do_IRQ | 1068 | # => started at: do_IRQ |
634 | => ended at: __do_softirq | 1069 | # => ended at: do_IRQ |
635 | 1070 | # | |
636 | # _------=> CPU# | 1071 | # |
637 | # / _-----=> irqs-off | 1072 | # _------=> CPU# |
638 | # | / _----=> need-resched | 1073 | # / _-----=> irqs-off |
639 | # || / _---=> hardirq/softirq | 1074 | # | / _----=> need-resched |
640 | # ||| / _--=> preempt-depth | 1075 | # || / _---=> hardirq/softirq |
641 | # |||| / | 1076 | # ||| / _--=> preempt-depth |
642 | # ||||| delay | 1077 | # |||| / delay |
643 | # cmd pid ||||| time | caller | 1078 | # cmd pid ||||| time | caller |
644 | # \ / ||||| \ | / | 1079 | # \ / ||||| \ | / |
645 | sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) | 1080 | sshd-1991 1d.h. 0us+: irq_enter <-do_IRQ |
646 | sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) | 1081 | sshd-1991 1d..1 46us : irq_exit <-do_IRQ |
647 | sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) | 1082 | sshd-1991 1d..1 47us+: trace_preempt_on <-do_IRQ |
1083 | sshd-1991 1d..1 52us : <stack trace> | ||
1084 | => sub_preempt_count | ||
1085 | => irq_exit | ||
1086 | => do_IRQ | ||
1087 | => ret_from_intr | ||
648 | 1088 | ||
649 | 1089 | ||
650 | This has some more changes. Preemption was disabled when an | 1090 | This has some more changes. Preemption was disabled when an |
651 | interrupt came in (notice the 'h'), and was enabled while doing | 1091 | interrupt came in (notice the 'h'), and was enabled on exit. |
652 | a softirq. (notice the 's'). But we also see that interrupts | 1092 | But we also see that interrupts have been disabled when entering |
653 | have been disabled when entering the preempt off section and | 1093 | the preempt off section and leaving it (the 'd'). We do not know if |
654 | leaving it (the 'd'). We do not know if interrupts were enabled | 1094 | interrupts were enabled in the mean time or shortly after this |
655 | in the mean time. | 1095 | was over. |
656 | 1096 | ||
657 | # tracer: preemptoff | 1097 | # tracer: preemptoff |
658 | # | 1098 | # |
659 | preemptoff latency trace v1.1.5 on 2.6.26-rc8 | 1099 | # preemptoff latency trace v1.1.5 on 3.8.0-test+ |
660 | -------------------------------------------------------------------- | 1100 | # -------------------------------------------------------------------- |
661 | latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1101 | # latency: 83 us, #241/241, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
662 | ----------------- | 1102 | # ----------------- |
663 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | 1103 | # | task: bash-1994 (uid:0 nice:0 policy:0 rt_prio:0) |
664 | ----------------- | 1104 | # ----------------- |
665 | => started at: remove_wait_queue | 1105 | # => started at: wake_up_new_task |
666 | => ended at: __do_softirq | 1106 | # => ended at: task_rq_unlock |
667 | 1107 | # | |
668 | # _------=> CPU# | 1108 | # |
669 | # / _-----=> irqs-off | 1109 | # _------=> CPU# |
670 | # | / _----=> need-resched | 1110 | # / _-----=> irqs-off |
671 | # || / _---=> hardirq/softirq | 1111 | # | / _----=> need-resched |
672 | # ||| / _--=> preempt-depth | 1112 | # || / _---=> hardirq/softirq |
673 | # |||| / | 1113 | # ||| / _--=> preempt-depth |
674 | # ||||| delay | 1114 | # |||| / delay |
675 | # cmd pid ||||| time | caller | 1115 | # cmd pid ||||| time | caller |
676 | # \ / ||||| \ | / | 1116 | # \ / ||||| \ | / |
677 | sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) | 1117 | bash-1994 1d..1 0us : _raw_spin_lock_irqsave <-wake_up_new_task |
678 | sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) | 1118 | bash-1994 1d..1 0us : select_task_rq_fair <-select_task_rq |
679 | sshd-4261 0d..1 2us : do_IRQ (common_interrupt) | 1119 | bash-1994 1d..1 1us : __rcu_read_lock <-select_task_rq_fair |
680 | sshd-4261 0d..1 2us : irq_enter (do_IRQ) | 1120 | bash-1994 1d..1 1us : source_load <-select_task_rq_fair |
681 | sshd-4261 0d..1 2us : idle_cpu (irq_enter) | 1121 | bash-1994 1d..1 1us : source_load <-select_task_rq_fair |
682 | sshd-4261 0d..1 3us : add_preempt_count (irq_enter) | ||
683 | sshd-4261 0d.h1 3us : idle_cpu (irq_enter) | ||
684 | sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ) | ||
685 | [...] | 1122 | [...] |
686 | sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) | 1123 | bash-1994 1d..1 12us : irq_enter <-smp_apic_timer_interrupt |
687 | sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) | 1124 | bash-1994 1d..1 12us : rcu_irq_enter <-irq_enter |
688 | sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) | 1125 | bash-1994 1d..1 13us : add_preempt_count <-irq_enter |
689 | sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) | 1126 | bash-1994 1d.h1 13us : exit_idle <-smp_apic_timer_interrupt |
690 | sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) | 1127 | bash-1994 1d.h1 13us : hrtimer_interrupt <-smp_apic_timer_interrupt |
691 | sshd-4261 0d.h1 14us : irq_exit (do_IRQ) | 1128 | bash-1994 1d.h1 13us : _raw_spin_lock <-hrtimer_interrupt |
692 | sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) | 1129 | bash-1994 1d.h1 14us : add_preempt_count <-_raw_spin_lock |
693 | sshd-4261 0d..2 15us : do_softirq (irq_exit) | 1130 | bash-1994 1d.h2 14us : ktime_get_update_offsets <-hrtimer_interrupt |
694 | sshd-4261 0d... 15us : __do_softirq (do_softirq) | ||
695 | sshd-4261 0d... 16us : __local_bh_disable (__do_softirq) | ||
696 | sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable) | ||
697 | sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable) | ||
698 | sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable) | ||
699 | sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable) | ||
700 | [...] | 1131 | [...] |
701 | sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) | 1132 | bash-1994 1d.h1 35us : lapic_next_event <-clockevents_program_event |
702 | sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) | 1133 | bash-1994 1d.h1 35us : irq_exit <-smp_apic_timer_interrupt |
703 | sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) | 1134 | bash-1994 1d.h1 36us : sub_preempt_count <-irq_exit |
704 | sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) | 1135 | bash-1994 1d..2 36us : do_softirq <-irq_exit |
705 | sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) | 1136 | bash-1994 1d..2 36us : __do_softirq <-call_softirq |
706 | sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) | 1137 | bash-1994 1d..2 36us : __local_bh_disable <-__do_softirq |
707 | sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) | 1138 | bash-1994 1d.s2 37us : add_preempt_count <-_raw_spin_lock_irq |
708 | sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) | 1139 | bash-1994 1d.s3 38us : _raw_spin_unlock <-run_timer_softirq |
1140 | bash-1994 1d.s3 39us : sub_preempt_count <-_raw_spin_unlock | ||
1141 | bash-1994 1d.s2 39us : call_timer_fn <-run_timer_softirq | ||
709 | [...] | 1142 | [...] |
710 | sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) | 1143 | bash-1994 1dNs2 81us : cpu_needs_another_gp <-rcu_process_callbacks |
711 | sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) | 1144 | bash-1994 1dNs2 82us : __local_bh_enable <-__do_softirq |
1145 | bash-1994 1dNs2 82us : sub_preempt_count <-__local_bh_enable | ||
1146 | bash-1994 1dN.2 82us : idle_cpu <-irq_exit | ||
1147 | bash-1994 1dN.2 83us : rcu_irq_exit <-irq_exit | ||
1148 | bash-1994 1dN.2 83us : sub_preempt_count <-irq_exit | ||
1149 | bash-1994 1.N.1 84us : _raw_spin_unlock_irqrestore <-task_rq_unlock | ||
1150 | bash-1994 1.N.1 84us+: trace_preempt_on <-task_rq_unlock | ||
1151 | bash-1994 1.N.1 104us : <stack trace> | ||
1152 | => sub_preempt_count | ||
1153 | => _raw_spin_unlock_irqrestore | ||
1154 | => task_rq_unlock | ||
1155 | => wake_up_new_task | ||
1156 | => do_fork | ||
1157 | => sys_clone | ||
1158 | => stub_clone | ||
712 | 1159 | ||
713 | 1160 | ||
714 | The above is an example of the preemptoff trace with | 1161 | The above is an example of the preemptoff trace with |
715 | ftrace_enabled set. Here we see that interrupts were disabled | 1162 | function-trace set. Here we see that interrupts were not disabled |
716 | the entire time. The irq_enter code lets us know that we entered | 1163 | the entire time. The irq_enter code lets us know that we entered |
717 | an interrupt 'h'. Before that, the functions being traced still | 1164 | an interrupt 'h'. Before that, the functions being traced still |
718 | show that it is not in an interrupt, but we can see from the | 1165 | show that it is not in an interrupt, but we can see from the |
719 | functions themselves that this is not the case. | 1166 | functions themselves that this is not the case. |
720 | 1167 | ||
721 | Notice that __do_softirq when called does not have a | ||
722 | preempt_count. It may seem that we missed a preempt enabling. | ||
723 | What really happened is that the preempt count is held on the | ||
724 | thread's stack and we switched to the softirq stack (4K stacks | ||
725 | in effect). The code does not copy the preempt count, but | ||
726 | because interrupts are disabled, we do not need to worry about | ||
727 | it. Having a tracer like this is good for letting people know | ||
728 | what really happens inside the kernel. | ||
729 | |||
730 | |||
731 | preemptirqsoff | 1168 | preemptirqsoff |
732 | -------------- | 1169 | -------------- |
733 | 1170 | ||
@@ -762,38 +1199,57 @@ tracer. | |||
762 | Again, using this trace is much like the irqsoff and preemptoff | 1199 | Again, using this trace is much like the irqsoff and preemptoff |
763 | tracers. | 1200 | tracers. |
764 | 1201 | ||
1202 | # echo 0 > options/function-trace | ||
765 | # echo preemptirqsoff > current_tracer | 1203 | # echo preemptirqsoff > current_tracer |
766 | # echo latency-format > trace_options | ||
767 | # echo 0 > tracing_max_latency | ||
768 | # echo 1 > tracing_on | 1204 | # echo 1 > tracing_on |
1205 | # echo 0 > tracing_max_latency | ||
769 | # ls -ltr | 1206 | # ls -ltr |
770 | [...] | 1207 | [...] |
771 | # echo 0 > tracing_on | 1208 | # echo 0 > tracing_on |
772 | # cat trace | 1209 | # cat trace |
773 | # tracer: preemptirqsoff | 1210 | # tracer: preemptirqsoff |
774 | # | 1211 | # |
775 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | 1212 | # preemptirqsoff latency trace v1.1.5 on 3.8.0-test+ |
776 | -------------------------------------------------------------------- | 1213 | # -------------------------------------------------------------------- |
777 | latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1214 | # latency: 100 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
778 | ----------------- | 1215 | # ----------------- |
779 | | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) | 1216 | # | task: ls-2230 (uid:0 nice:0 policy:0 rt_prio:0) |
780 | ----------------- | 1217 | # ----------------- |
781 | => started at: apic_timer_interrupt | 1218 | # => started at: ata_scsi_queuecmd |
782 | => ended at: __do_softirq | 1219 | # => ended at: ata_scsi_queuecmd |
783 | 1220 | # | |
784 | # _------=> CPU# | 1221 | # |
785 | # / _-----=> irqs-off | 1222 | # _------=> CPU# |
786 | # | / _----=> need-resched | 1223 | # / _-----=> irqs-off |
787 | # || / _---=> hardirq/softirq | 1224 | # | / _----=> need-resched |
788 | # ||| / _--=> preempt-depth | 1225 | # || / _---=> hardirq/softirq |
789 | # |||| / | 1226 | # ||| / _--=> preempt-depth |
790 | # ||||| delay | 1227 | # |||| / delay |
791 | # cmd pid ||||| time | caller | 1228 | # cmd pid ||||| time | caller |
792 | # \ / ||||| \ | / | 1229 | # \ / ||||| \ | / |
793 | ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) | 1230 | ls-2230 3d... 0us+: _raw_spin_lock_irqsave <-ata_scsi_queuecmd |
794 | ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) | 1231 | ls-2230 3...1 100us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd |
795 | ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) | 1232 | ls-2230 3...1 101us+: trace_preempt_on <-ata_scsi_queuecmd |
796 | 1233 | ls-2230 3...1 111us : <stack trace> | |
1234 | => sub_preempt_count | ||
1235 | => _raw_spin_unlock_irqrestore | ||
1236 | => ata_scsi_queuecmd | ||
1237 | => scsi_dispatch_cmd | ||
1238 | => scsi_request_fn | ||
1239 | => __blk_run_queue_uncond | ||
1240 | => __blk_run_queue | ||
1241 | => blk_queue_bio | ||
1242 | => generic_make_request | ||
1243 | => submit_bio | ||
1244 | => submit_bh | ||
1245 | => ext3_bread | ||
1246 | => ext3_dir_bread | ||
1247 | => htree_dirblock_to_tree | ||
1248 | => ext3_htree_fill_tree | ||
1249 | => ext3_readdir | ||
1250 | => vfs_readdir | ||
1251 | => sys_getdents | ||
1252 | => system_call_fastpath | ||
797 | 1253 | ||
798 | 1254 | ||
799 | The trace_hardirqs_off_thunk is called from assembly on x86 when | 1255 | The trace_hardirqs_off_thunk is called from assembly on x86 when |
@@ -802,105 +1258,158 @@ function tracing, we do not know if interrupts were enabled | |||
802 | within the preemption points. We do see that it started with | 1258 | within the preemption points. We do see that it started with |
803 | preemption enabled. | 1259 | preemption enabled. |
804 | 1260 | ||
805 | Here is a trace with ftrace_enabled set: | 1261 | Here is a trace with function-trace set: |
806 | |||
807 | 1262 | ||
808 | # tracer: preemptirqsoff | 1263 | # tracer: preemptirqsoff |
809 | # | 1264 | # |
810 | preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | 1265 | # preemptirqsoff latency trace v1.1.5 on 3.8.0-test+ |
811 | -------------------------------------------------------------------- | 1266 | # -------------------------------------------------------------------- |
812 | latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1267 | # latency: 161 us, #339/339, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
813 | ----------------- | 1268 | # ----------------- |
814 | | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | 1269 | # | task: ls-2269 (uid:0 nice:0 policy:0 rt_prio:0) |
815 | ----------------- | 1270 | # ----------------- |
816 | => started at: write_chan | 1271 | # => started at: schedule |
817 | => ended at: __do_softirq | 1272 | # => ended at: mutex_unlock |
818 | 1273 | # | |
819 | # _------=> CPU# | 1274 | # |
820 | # / _-----=> irqs-off | 1275 | # _------=> CPU# |
821 | # | / _----=> need-resched | 1276 | # / _-----=> irqs-off |
822 | # || / _---=> hardirq/softirq | 1277 | # | / _----=> need-resched |
823 | # ||| / _--=> preempt-depth | 1278 | # || / _---=> hardirq/softirq |
824 | # |||| / | 1279 | # ||| / _--=> preempt-depth |
825 | # ||||| delay | 1280 | # |||| / delay |
826 | # cmd pid ||||| time | caller | 1281 | # cmd pid ||||| time | caller |
827 | # \ / ||||| \ | / | 1282 | # \ / ||||| \ | / |
828 | ls-4473 0.N.. 0us : preempt_schedule (write_chan) | 1283 | kworker/-59 3...1 0us : __schedule <-schedule |
829 | ls-4473 0dN.1 1us : _spin_lock (schedule) | 1284 | kworker/-59 3d..1 0us : rcu_preempt_qs <-rcu_note_context_switch |
830 | ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) | 1285 | kworker/-59 3d..1 1us : add_preempt_count <-_raw_spin_lock_irq |
831 | ls-4473 0d..2 2us : put_prev_task_fair (schedule) | 1286 | kworker/-59 3d..2 1us : deactivate_task <-__schedule |
832 | [...] | 1287 | kworker/-59 3d..2 1us : dequeue_task <-deactivate_task |
833 | ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) | 1288 | kworker/-59 3d..2 2us : update_rq_clock <-dequeue_task |
834 | ls-4473 0d..2 13us : __switch_to (schedule) | 1289 | kworker/-59 3d..2 2us : dequeue_task_fair <-dequeue_task |
835 | sshd-4261 0d..2 14us : finish_task_switch (schedule) | 1290 | kworker/-59 3d..2 2us : update_curr <-dequeue_task_fair |
836 | sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) | 1291 | kworker/-59 3d..2 2us : update_min_vruntime <-update_curr |
837 | sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) | 1292 | kworker/-59 3d..2 3us : cpuacct_charge <-update_curr |
838 | sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) | 1293 | kworker/-59 3d..2 3us : __rcu_read_lock <-cpuacct_charge |
839 | sshd-4261 0d..2 16us : do_IRQ (common_interrupt) | 1294 | kworker/-59 3d..2 3us : __rcu_read_unlock <-cpuacct_charge |
840 | sshd-4261 0d..2 17us : irq_enter (do_IRQ) | 1295 | kworker/-59 3d..2 3us : update_cfs_rq_blocked_load <-dequeue_task_fair |
841 | sshd-4261 0d..2 17us : idle_cpu (irq_enter) | 1296 | kworker/-59 3d..2 4us : clear_buddies <-dequeue_task_fair |
842 | sshd-4261 0d..2 18us : add_preempt_count (irq_enter) | 1297 | kworker/-59 3d..2 4us : account_entity_dequeue <-dequeue_task_fair |
843 | sshd-4261 0d.h2 18us : idle_cpu (irq_enter) | 1298 | kworker/-59 3d..2 4us : update_min_vruntime <-dequeue_task_fair |
844 | sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) | 1299 | kworker/-59 3d..2 4us : update_cfs_shares <-dequeue_task_fair |
845 | sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) | 1300 | kworker/-59 3d..2 5us : hrtick_update <-dequeue_task_fair |
846 | sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) | 1301 | kworker/-59 3d..2 5us : wq_worker_sleeping <-__schedule |
847 | sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) | 1302 | kworker/-59 3d..2 5us : kthread_data <-wq_worker_sleeping |
848 | sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) | 1303 | kworker/-59 3d..2 5us : put_prev_task_fair <-__schedule |
849 | [...] | 1304 | kworker/-59 3d..2 6us : pick_next_task_fair <-pick_next_task |
850 | sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) | 1305 | kworker/-59 3d..2 6us : clear_buddies <-pick_next_task_fair |
851 | sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) | 1306 | kworker/-59 3d..2 6us : set_next_entity <-pick_next_task_fair |
852 | sshd-4261 0d.h2 29us : irq_exit (do_IRQ) | 1307 | kworker/-59 3d..2 6us : update_stats_wait_end <-set_next_entity |
853 | sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) | 1308 | ls-2269 3d..2 7us : finish_task_switch <-__schedule |
854 | sshd-4261 0d..3 30us : do_softirq (irq_exit) | 1309 | ls-2269 3d..2 7us : _raw_spin_unlock_irq <-finish_task_switch |
855 | sshd-4261 0d... 30us : __do_softirq (do_softirq) | 1310 | ls-2269 3d..2 8us : do_IRQ <-ret_from_intr |
856 | sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) | 1311 | ls-2269 3d..2 8us : irq_enter <-do_IRQ |
857 | sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) | 1312 | ls-2269 3d..2 8us : rcu_irq_enter <-irq_enter |
858 | sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) | 1313 | ls-2269 3d..2 9us : add_preempt_count <-irq_enter |
1314 | ls-2269 3d.h2 9us : exit_idle <-do_IRQ | ||
859 | [...] | 1315 | [...] |
860 | sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) | 1316 | ls-2269 3d.h3 20us : sub_preempt_count <-_raw_spin_unlock |
861 | sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) | 1317 | ls-2269 3d.h2 20us : irq_exit <-do_IRQ |
862 | sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) | 1318 | ls-2269 3d.h2 21us : sub_preempt_count <-irq_exit |
863 | sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) | 1319 | ls-2269 3d..3 21us : do_softirq <-irq_exit |
864 | sshd-4261 0d.s3 45us : idle_cpu (irq_enter) | 1320 | ls-2269 3d..3 21us : __do_softirq <-call_softirq |
865 | sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) | 1321 | ls-2269 3d..3 21us+: __local_bh_disable <-__do_softirq |
866 | sshd-4261 0d.H3 46us : idle_cpu (irq_enter) | 1322 | ls-2269 3d.s4 29us : sub_preempt_count <-_local_bh_enable_ip |
867 | sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) | 1323 | ls-2269 3d.s5 29us : sub_preempt_count <-_local_bh_enable_ip |
868 | sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) | 1324 | ls-2269 3d.s5 31us : do_IRQ <-ret_from_intr |
1325 | ls-2269 3d.s5 31us : irq_enter <-do_IRQ | ||
1326 | ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter | ||
869 | [...] | 1327 | [...] |
870 | sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) | 1328 | ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter |
871 | sshd-4261 0d.H3 82us : ktime_get (tick_program_event) | 1329 | ls-2269 3d.s5 32us : add_preempt_count <-irq_enter |
872 | sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) | 1330 | ls-2269 3d.H5 32us : exit_idle <-do_IRQ |
873 | sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) | 1331 | ls-2269 3d.H5 32us : handle_irq <-do_IRQ |
874 | sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) | 1332 | ls-2269 3d.H5 32us : irq_to_desc <-handle_irq |
875 | sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) | 1333 | ls-2269 3d.H5 33us : handle_fasteoi_irq <-handle_irq |
876 | sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event) | ||
877 | sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt) | ||
878 | sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit) | ||
879 | sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit) | ||
880 | sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable) | ||
881 | [...] | 1334 | [...] |
882 | sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) | 1335 | ls-2269 3d.s5 158us : _raw_spin_unlock_irqrestore <-rtl8139_poll |
883 | sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) | 1336 | ls-2269 3d.s3 158us : net_rps_action_and_irq_enable.isra.65 <-net_rx_action |
884 | sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) | 1337 | ls-2269 3d.s3 159us : __local_bh_enable <-__do_softirq |
885 | sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) | 1338 | ls-2269 3d.s3 159us : sub_preempt_count <-__local_bh_enable |
886 | sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) | 1339 | ls-2269 3d..3 159us : idle_cpu <-irq_exit |
887 | sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) | 1340 | ls-2269 3d..3 159us : rcu_irq_exit <-irq_exit |
888 | sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) | 1341 | ls-2269 3d..3 160us : sub_preempt_count <-irq_exit |
889 | 1342 | ls-2269 3d... 161us : __mutex_unlock_slowpath <-mutex_unlock | |
890 | 1343 | ls-2269 3d... 162us+: trace_hardirqs_on <-mutex_unlock | |
891 | This is a very interesting trace. It started with the preemption | 1344 | ls-2269 3d... 186us : <stack trace> |
892 | of the ls task. We see that the task had the "need_resched" bit | 1345 | => __mutex_unlock_slowpath |
893 | set via the 'N' in the trace. Interrupts were disabled before | 1346 | => mutex_unlock |
894 | the spin_lock at the beginning of the trace. We see that a | 1347 | => process_output |
895 | schedule took place to run sshd. When the interrupts were | 1348 | => n_tty_write |
896 | enabled, we took an interrupt. On return from the interrupt | 1349 | => tty_write |
897 | handler, the softirq ran. We took another interrupt while | 1350 | => vfs_write |
898 | running the softirq as we see from the capital 'H'. | 1351 | => sys_write |
1352 | => system_call_fastpath | ||
1353 | |||
1354 | This is an interesting trace. It started with kworker running and | ||
1355 | scheduling out and ls taking over. But as soon as ls released the | ||
1356 | rq lock and enabled interrupts (but not preemption) an interrupt | ||
1357 | triggered. When the interrupt finished, it started running softirqs. | ||
1358 | But while the softirq was running, another interrupt triggered. | ||
1359 | When an interrupt is running inside a softirq, the annotation is 'H'. | ||
899 | 1360 | ||
900 | 1361 | ||
901 | wakeup | 1362 | wakeup |
902 | ------ | 1363 | ------ |
903 | 1364 | ||
1365 | One common case that people are interested in tracing is the | ||
1366 | time it takes for a task that is woken to actually wake up. | ||
1367 | Now for non Real-Time tasks, this can be arbitrary. But tracing | ||
1368 | it none the less can be interesting. | ||
1369 | |||
1370 | Without function tracing: | ||
1371 | |||
1372 | # echo 0 > options/function-trace | ||
1373 | # echo wakeup > current_tracer | ||
1374 | # echo 1 > tracing_on | ||
1375 | # echo 0 > tracing_max_latency | ||
1376 | # chrt -f 5 sleep 1 | ||
1377 | # echo 0 > tracing_on | ||
1378 | # cat trace | ||
1379 | # tracer: wakeup | ||
1380 | # | ||
1381 | # wakeup latency trace v1.1.5 on 3.8.0-test+ | ||
1382 | # -------------------------------------------------------------------- | ||
1383 | # latency: 15 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) | ||
1384 | # ----------------- | ||
1385 | # | task: kworker/3:1H-312 (uid:0 nice:-20 policy:0 rt_prio:0) | ||
1386 | # ----------------- | ||
1387 | # | ||
1388 | # _------=> CPU# | ||
1389 | # / _-----=> irqs-off | ||
1390 | # | / _----=> need-resched | ||
1391 | # || / _---=> hardirq/softirq | ||
1392 | # ||| / _--=> preempt-depth | ||
1393 | # |||| / delay | ||
1394 | # cmd pid ||||| time | caller | ||
1395 | # \ / ||||| \ | / | ||
1396 | <idle>-0 3dNs7 0us : 0:120:R + [003] 312:100:R kworker/3:1H | ||
1397 | <idle>-0 3dNs7 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up | ||
1398 | <idle>-0 3d..3 15us : __schedule <-schedule | ||
1399 | <idle>-0 3d..3 15us : 0:120:R ==> [003] 312:100:R kworker/3:1H | ||
1400 | |||
1401 | The tracer only traces the highest priority task in the system | ||
1402 | to avoid tracing the normal circumstances. Here we see that | ||
1403 | the kworker with a nice priority of -20 (not very nice), took | ||
1404 | just 15 microseconds from the time it woke up, to the time it | ||
1405 | ran. | ||
1406 | |||
1407 | Non Real-Time tasks are not that interesting. A more interesting | ||
1408 | trace is to concentrate only on Real-Time tasks. | ||
1409 | |||
1410 | wakeup_rt | ||
1411 | --------- | ||
1412 | |||
904 | In a Real-Time environment it is very important to know the | 1413 | In a Real-Time environment it is very important to know the |
905 | wakeup time it takes for the highest priority task that is woken | 1414 | wakeup time it takes for the highest priority task that is woken |
906 | up to the time that it executes. This is also known as "schedule | 1415 | up to the time that it executes. This is also known as "schedule |
@@ -914,124 +1423,229 @@ Real-Time environments are interested in the worst case latency. | |||
914 | That is the longest latency it takes for something to happen, | 1423 | That is the longest latency it takes for something to happen, |
915 | and not the average. We can have a very fast scheduler that may | 1424 | and not the average. We can have a very fast scheduler that may |
916 | only have a large latency once in a while, but that would not | 1425 | only have a large latency once in a while, but that would not |
917 | work well with Real-Time tasks. The wakeup tracer was designed | 1426 | work well with Real-Time tasks. The wakeup_rt tracer was designed |
918 | to record the worst case wakeups of RT tasks. Non-RT tasks are | 1427 | to record the worst case wakeups of RT tasks. Non-RT tasks are |
919 | not recorded because the tracer only records one worst case and | 1428 | not recorded because the tracer only records one worst case and |
920 | tracing non-RT tasks that are unpredictable will overwrite the | 1429 | tracing non-RT tasks that are unpredictable will overwrite the |
921 | worst case latency of RT tasks. | 1430 | worst case latency of RT tasks (just run the normal wakeup |
1431 | tracer for a while to see that effect). | ||
922 | 1432 | ||
923 | Since this tracer only deals with RT tasks, we will run this | 1433 | Since this tracer only deals with RT tasks, we will run this |
924 | slightly differently than we did with the previous tracers. | 1434 | slightly differently than we did with the previous tracers. |
925 | Instead of performing an 'ls', we will run 'sleep 1' under | 1435 | Instead of performing an 'ls', we will run 'sleep 1' under |
926 | 'chrt' which changes the priority of the task. | 1436 | 'chrt' which changes the priority of the task. |
927 | 1437 | ||
928 | # echo wakeup > current_tracer | 1438 | # echo 0 > options/function-trace |
929 | # echo latency-format > trace_options | 1439 | # echo wakeup_rt > current_tracer |
930 | # echo 0 > tracing_max_latency | ||
931 | # echo 1 > tracing_on | 1440 | # echo 1 > tracing_on |
1441 | # echo 0 > tracing_max_latency | ||
932 | # chrt -f 5 sleep 1 | 1442 | # chrt -f 5 sleep 1 |
933 | # echo 0 > tracing_on | 1443 | # echo 0 > tracing_on |
934 | # cat trace | 1444 | # cat trace |
935 | # tracer: wakeup | 1445 | # tracer: wakeup |
936 | # | 1446 | # |
937 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | 1447 | # tracer: wakeup_rt |
938 | -------------------------------------------------------------------- | 1448 | # |
939 | latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1449 | # wakeup_rt latency trace v1.1.5 on 3.8.0-test+ |
940 | ----------------- | 1450 | # -------------------------------------------------------------------- |
941 | | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) | 1451 | # latency: 5 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
942 | ----------------- | 1452 | # ----------------- |
943 | 1453 | # | task: sleep-2389 (uid:0 nice:0 policy:1 rt_prio:5) | |
944 | # _------=> CPU# | 1454 | # ----------------- |
945 | # / _-----=> irqs-off | 1455 | # |
946 | # | / _----=> need-resched | 1456 | # _------=> CPU# |
947 | # || / _---=> hardirq/softirq | 1457 | # / _-----=> irqs-off |
948 | # ||| / _--=> preempt-depth | 1458 | # | / _----=> need-resched |
949 | # |||| / | 1459 | # || / _---=> hardirq/softirq |
950 | # ||||| delay | 1460 | # ||| / _--=> preempt-depth |
951 | # cmd pid ||||| time | caller | 1461 | # |||| / delay |
952 | # \ / ||||| \ | / | 1462 | # cmd pid ||||| time | caller |
953 | <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) | 1463 | # \ / ||||| \ | / |
954 | <idle>-0 1d..4 4us : schedule (cpu_idle) | 1464 | <idle>-0 3d.h4 0us : 0:120:R + [003] 2389: 94:R sleep |
955 | 1465 | <idle>-0 3d.h4 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up | |
956 | 1466 | <idle>-0 3d..3 5us : __schedule <-schedule | |
957 | Running this on an idle system, we see that it only took 4 | 1467 | <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep |
958 | microseconds to perform the task switch. Note, since the trace | 1468 | |
959 | marker in the schedule is before the actual "switch", we stop | 1469 | |
960 | the tracing when the recorded task is about to schedule in. This | 1470 | Running this on an idle system, we see that it only took 5 microseconds |
961 | may change if we add a new marker at the end of the scheduler. | 1471 | to perform the task switch. Note, since the trace point in the schedule |
962 | 1472 | is before the actual "switch", we stop the tracing when the recorded task | |
963 | Notice that the recorded task is 'sleep' with the PID of 4901 | 1473 | is about to schedule in. This may change if we add a new marker at the |
1474 | end of the scheduler. | ||
1475 | |||
1476 | Notice that the recorded task is 'sleep' with the PID of 2389 | ||
964 | and it has an rt_prio of 5. This priority is user-space priority | 1477 | and it has an rt_prio of 5. This priority is user-space priority |
965 | and not the internal kernel priority. The policy is 1 for | 1478 | and not the internal kernel priority. The policy is 1 for |
966 | SCHED_FIFO and 2 for SCHED_RR. | 1479 | SCHED_FIFO and 2 for SCHED_RR. |
967 | 1480 | ||
968 | Doing the same with chrt -r 5 and ftrace_enabled set. | 1481 | Note, that the trace data shows the internal priority (99 - rtprio). |
969 | 1482 | ||
970 | # tracer: wakeup | 1483 | <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep |
1484 | |||
1485 | The 0:120:R means idle was running with a nice priority of 0 (120 - 20) | ||
1486 | and in the running state 'R'. The sleep task was scheduled in with | ||
1487 | 2389: 94:R. That is the priority is the kernel rtprio (99 - 5 = 94) | ||
1488 | and it too is in the running state. | ||
1489 | |||
1490 | Doing the same with chrt -r 5 and function-trace set. | ||
1491 | |||
1492 | echo 1 > options/function-trace | ||
1493 | |||
1494 | # tracer: wakeup_rt | ||
971 | # | 1495 | # |
972 | wakeup latency trace v1.1.5 on 2.6.26-rc8 | 1496 | # wakeup_rt latency trace v1.1.5 on 3.8.0-test+ |
973 | -------------------------------------------------------------------- | 1497 | # -------------------------------------------------------------------- |
974 | latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | 1498 | # latency: 29 us, #85/85, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) |
975 | ----------------- | 1499 | # ----------------- |
976 | | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) | 1500 | # | task: sleep-2448 (uid:0 nice:0 policy:1 rt_prio:5) |
977 | ----------------- | 1501 | # ----------------- |
978 | 1502 | # | |
979 | # _------=> CPU# | 1503 | # _------=> CPU# |
980 | # / _-----=> irqs-off | 1504 | # / _-----=> irqs-off |
981 | # | / _----=> need-resched | 1505 | # | / _----=> need-resched |
982 | # || / _---=> hardirq/softirq | 1506 | # || / _---=> hardirq/softirq |
983 | # ||| / _--=> preempt-depth | 1507 | # ||| / _--=> preempt-depth |
984 | # |||| / | 1508 | # |||| / delay |
985 | # ||||| delay | 1509 | # cmd pid ||||| time | caller |
986 | # cmd pid ||||| time | caller | 1510 | # \ / ||||| \ | / |
987 | # \ / ||||| \ | / | 1511 | <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep |
988 | ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) | 1512 | <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up |
989 | ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) | 1513 | <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup |
990 | ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) | 1514 | <idle>-0 3d.h3 3us : resched_task <-check_preempt_curr |
991 | ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) | 1515 | <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup |
992 | ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) | 1516 | <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up |
993 | ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) | 1517 | <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock |
994 | ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) | 1518 | <idle>-0 3dNh2 5us : ttwu_stat <-try_to_wake_up |
995 | ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) | 1519 | <idle>-0 3dNh2 5us : _raw_spin_unlock_irqrestore <-try_to_wake_up |
996 | [...] | 1520 | <idle>-0 3dNh2 6us : sub_preempt_count <-_raw_spin_unlock_irqrestore |
997 | ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) | 1521 | <idle>-0 3dNh1 6us : _raw_spin_lock <-__run_hrtimer |
998 | ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) | 1522 | <idle>-0 3dNh1 6us : add_preempt_count <-_raw_spin_lock |
999 | ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) | 1523 | <idle>-0 3dNh2 7us : _raw_spin_unlock <-hrtimer_interrupt |
1000 | ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) | 1524 | <idle>-0 3dNh2 7us : sub_preempt_count <-_raw_spin_unlock |
1001 | [...] | 1525 | <idle>-0 3dNh1 7us : tick_program_event <-hrtimer_interrupt |
1002 | ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) | 1526 | <idle>-0 3dNh1 7us : clockevents_program_event <-tick_program_event |
1003 | ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) | 1527 | <idle>-0 3dNh1 8us : ktime_get <-clockevents_program_event |
1004 | ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) | 1528 | <idle>-0 3dNh1 8us : lapic_next_event <-clockevents_program_event |
1005 | ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) | 1529 | <idle>-0 3dNh1 8us : irq_exit <-smp_apic_timer_interrupt |
1006 | ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) | 1530 | <idle>-0 3dNh1 9us : sub_preempt_count <-irq_exit |
1007 | ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) | 1531 | <idle>-0 3dN.2 9us : idle_cpu <-irq_exit |
1008 | ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) | 1532 | <idle>-0 3dN.2 9us : rcu_irq_exit <-irq_exit |
1009 | ksoftirq-7 1.N.2 33us : schedule (__cond_resched) | 1533 | <idle>-0 3dN.2 10us : rcu_eqs_enter_common.isra.45 <-rcu_irq_exit |
1010 | ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) | 1534 | <idle>-0 3dN.2 10us : sub_preempt_count <-irq_exit |
1011 | ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) | 1535 | <idle>-0 3.N.1 11us : rcu_idle_exit <-cpu_idle |
1012 | ksoftirq-7 1dN.3 35us : _spin_lock (schedule) | 1536 | <idle>-0 3dN.1 11us : rcu_eqs_exit_common.isra.43 <-rcu_idle_exit |
1013 | ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) | 1537 | <idle>-0 3.N.1 11us : tick_nohz_idle_exit <-cpu_idle |
1014 | ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) | 1538 | <idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit |
1015 | ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) | 1539 | <idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit |
1016 | [...] | 1540 | <idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit |
1017 | ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) | 1541 | <idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit |
1018 | ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) | 1542 | <idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz |
1019 | ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) | 1543 | <idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock |
1020 | ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) | 1544 | <idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz |
1021 | ksoftirq-7 1d..4 50us : schedule (__cond_resched) | 1545 | <idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load |
1022 | 1546 | <idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz | |
1023 | The interrupt went off while running ksoftirqd. This task runs | 1547 | <idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock |
1024 | at SCHED_OTHER. Why did not we see the 'N' set early? This may | 1548 | <idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit |
1025 | be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K | 1549 | <idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit |
1026 | stacks configured, the interrupt and softirq run with their own | 1550 | <idle>-0 3dN.1 15us : hrtimer_cancel <-tick_nohz_idle_exit |
1027 | stack. Some information is held on the top of the task's stack | 1551 | <idle>-0 3dN.1 15us : hrtimer_try_to_cancel <-hrtimer_cancel |
1028 | (need_resched and preempt_count are both stored there). The | 1552 | <idle>-0 3dN.1 16us : lock_hrtimer_base.isra.18 <-hrtimer_try_to_cancel |
1029 | setting of the NEED_RESCHED bit is done directly to the task's | 1553 | <idle>-0 3dN.1 16us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18 |
1030 | stack, but the reading of the NEED_RESCHED is done by looking at | 1554 | <idle>-0 3dN.1 16us : add_preempt_count <-_raw_spin_lock_irqsave |
1031 | the current stack, which in this case is the stack for the hard | 1555 | <idle>-0 3dN.2 17us : __remove_hrtimer <-remove_hrtimer.part.16 |
1032 | interrupt. This hides the fact that NEED_RESCHED has been set. | 1556 | <idle>-0 3dN.2 17us : hrtimer_force_reprogram <-__remove_hrtimer |
1033 | We do not see the 'N' until we switch back to the task's | 1557 | <idle>-0 3dN.2 17us : tick_program_event <-hrtimer_force_reprogram |
1034 | assigned stack. | 1558 | <idle>-0 3dN.2 18us : clockevents_program_event <-tick_program_event |
1559 | <idle>-0 3dN.2 18us : ktime_get <-clockevents_program_event | ||
1560 | <idle>-0 3dN.2 18us : lapic_next_event <-clockevents_program_event | ||
1561 | <idle>-0 3dN.2 19us : _raw_spin_unlock_irqrestore <-hrtimer_try_to_cancel | ||
1562 | <idle>-0 3dN.2 19us : sub_preempt_count <-_raw_spin_unlock_irqrestore | ||
1563 | <idle>-0 3dN.1 19us : hrtimer_forward <-tick_nohz_idle_exit | ||
1564 | <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward | ||
1565 | <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward | ||
1566 | <idle>-0 3dN.1 20us : hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11 | ||
1567 | <idle>-0 3dN.1 20us : __hrtimer_start_range_ns <-hrtimer_start_range_ns | ||
1568 | <idle>-0 3dN.1 21us : lock_hrtimer_base.isra.18 <-__hrtimer_start_range_ns | ||
1569 | <idle>-0 3dN.1 21us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18 | ||
1570 | <idle>-0 3dN.1 21us : add_preempt_count <-_raw_spin_lock_irqsave | ||
1571 | <idle>-0 3dN.2 22us : ktime_add_safe <-__hrtimer_start_range_ns | ||
1572 | <idle>-0 3dN.2 22us : enqueue_hrtimer <-__hrtimer_start_range_ns | ||
1573 | <idle>-0 3dN.2 22us : tick_program_event <-__hrtimer_start_range_ns | ||
1574 | <idle>-0 3dN.2 23us : clockevents_program_event <-tick_program_event | ||
1575 | <idle>-0 3dN.2 23us : ktime_get <-clockevents_program_event | ||
1576 | <idle>-0 3dN.2 23us : lapic_next_event <-clockevents_program_event | ||
1577 | <idle>-0 3dN.2 24us : _raw_spin_unlock_irqrestore <-__hrtimer_start_range_ns | ||
1578 | <idle>-0 3dN.2 24us : sub_preempt_count <-_raw_spin_unlock_irqrestore | ||
1579 | <idle>-0 3dN.1 24us : account_idle_ticks <-tick_nohz_idle_exit | ||
1580 | <idle>-0 3dN.1 24us : account_idle_time <-account_idle_ticks | ||
1581 | <idle>-0 3.N.1 25us : sub_preempt_count <-cpu_idle | ||
1582 | <idle>-0 3.N.. 25us : schedule <-cpu_idle | ||
1583 | <idle>-0 3.N.. 25us : __schedule <-preempt_schedule | ||
1584 | <idle>-0 3.N.. 26us : add_preempt_count <-__schedule | ||
1585 | <idle>-0 3.N.1 26us : rcu_note_context_switch <-__schedule | ||
1586 | <idle>-0 3.N.1 26us : rcu_sched_qs <-rcu_note_context_switch | ||
1587 | <idle>-0 3dN.1 27us : rcu_preempt_qs <-rcu_note_context_switch | ||
1588 | <idle>-0 3.N.1 27us : _raw_spin_lock_irq <-__schedule | ||
1589 | <idle>-0 3dN.1 27us : add_preempt_count <-_raw_spin_lock_irq | ||
1590 | <idle>-0 3dN.2 28us : put_prev_task_idle <-__schedule | ||
1591 | <idle>-0 3dN.2 28us : pick_next_task_stop <-pick_next_task | ||
1592 | <idle>-0 3dN.2 28us : pick_next_task_rt <-pick_next_task | ||
1593 | <idle>-0 3dN.2 29us : dequeue_pushable_task <-pick_next_task_rt | ||
1594 | <idle>-0 3d..3 29us : __schedule <-preempt_schedule | ||
1595 | <idle>-0 3d..3 30us : 0:120:R ==> [003] 2448: 94:R sleep | ||
1596 | |||
1597 | This isn't that big of a trace, even with function tracing enabled, | ||
1598 | so I included the entire trace. | ||
1599 | |||
1600 | The interrupt went off while when the system was idle. Somewhere | ||
1601 | before task_woken_rt() was called, the NEED_RESCHED flag was set, | ||
1602 | this is indicated by the first occurrence of the 'N' flag. | ||
1603 | |||
1604 | Latency tracing and events | ||
1605 | -------------------------- | ||
1606 | As function tracing can induce a much larger latency, but without | ||
1607 | seeing what happens within the latency it is hard to know what | ||
1608 | caused it. There is a middle ground, and that is with enabling | ||
1609 | events. | ||
1610 | |||
1611 | # echo 0 > options/function-trace | ||
1612 | # echo wakeup_rt > current_tracer | ||
1613 | # echo 1 > events/enable | ||
1614 | # echo 1 > tracing_on | ||
1615 | # echo 0 > tracing_max_latency | ||
1616 | # chrt -f 5 sleep 1 | ||
1617 | # echo 0 > tracing_on | ||
1618 | # cat trace | ||
1619 | # tracer: wakeup_rt | ||
1620 | # | ||
1621 | # wakeup_rt latency trace v1.1.5 on 3.8.0-test+ | ||
1622 | # -------------------------------------------------------------------- | ||
1623 | # latency: 6 us, #12/12, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4) | ||
1624 | # ----------------- | ||
1625 | # | task: sleep-5882 (uid:0 nice:0 policy:1 rt_prio:5) | ||
1626 | # ----------------- | ||
1627 | # | ||
1628 | # _------=> CPU# | ||
1629 | # / _-----=> irqs-off | ||
1630 | # | / _----=> need-resched | ||
1631 | # || / _---=> hardirq/softirq | ||
1632 | # ||| / _--=> preempt-depth | ||
1633 | # |||| / delay | ||
1634 | # cmd pid ||||| time | caller | ||
1635 | # \ / ||||| \ | / | ||
1636 | <idle>-0 2d.h4 0us : 0:120:R + [002] 5882: 94:R sleep | ||
1637 | <idle>-0 2d.h4 0us : ttwu_do_activate.constprop.87 <-try_to_wake_up | ||
1638 | <idle>-0 2d.h4 1us : sched_wakeup: comm=sleep pid=5882 prio=94 success=1 target_cpu=002 | ||
1639 | <idle>-0 2dNh2 1us : hrtimer_expire_exit: hrtimer=ffff88007796feb8 | ||
1640 | <idle>-0 2.N.2 2us : power_end: cpu_id=2 | ||
1641 | <idle>-0 2.N.2 3us : cpu_idle: state=4294967295 cpu_id=2 | ||
1642 | <idle>-0 2dN.3 4us : hrtimer_cancel: hrtimer=ffff88007d50d5e0 | ||
1643 | <idle>-0 2dN.3 4us : hrtimer_start: hrtimer=ffff88007d50d5e0 function=tick_sched_timer expires=34311211000000 softexpires=34311211000000 | ||
1644 | <idle>-0 2.N.2 5us : rcu_utilization: Start context switch | ||
1645 | <idle>-0 2.N.2 5us : rcu_utilization: End context switch | ||
1646 | <idle>-0 2d..3 6us : __schedule <-schedule | ||
1647 | <idle>-0 2d..3 6us : 0:120:R ==> [002] 5882: 94:R sleep | ||
1648 | |||
1035 | 1649 | ||
1036 | function | 1650 | function |
1037 | -------- | 1651 | -------- |
@@ -1039,6 +1653,7 @@ function | |||
1039 | This tracer is the function tracer. Enabling the function tracer | 1653 | This tracer is the function tracer. Enabling the function tracer |
1040 | can be done from the debug file system. Make sure the | 1654 | can be done from the debug file system. Make sure the |
1041 | ftrace_enabled is set; otherwise this tracer is a nop. | 1655 | ftrace_enabled is set; otherwise this tracer is a nop. |
1656 | See the "ftrace_enabled" section below. | ||
1042 | 1657 | ||
1043 | # sysctl kernel.ftrace_enabled=1 | 1658 | # sysctl kernel.ftrace_enabled=1 |
1044 | # echo function > current_tracer | 1659 | # echo function > current_tracer |
@@ -1048,23 +1663,23 @@ ftrace_enabled is set; otherwise this tracer is a nop. | |||
1048 | # cat trace | 1663 | # cat trace |
1049 | # tracer: function | 1664 | # tracer: function |
1050 | # | 1665 | # |
1051 | # TASK-PID CPU# TIMESTAMP FUNCTION | 1666 | # entries-in-buffer/entries-written: 24799/24799 #P:4 |
1052 | # | | | | | | 1667 | # |
1053 | bash-4003 [00] 123.638713: finish_task_switch <-schedule | 1668 | # _-----=> irqs-off |
1054 | bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch | 1669 | # / _----=> need-resched |
1055 | bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq | 1670 | # | / _---=> hardirq/softirq |
1056 | bash-4003 [00] 123.638715: hrtick_set <-schedule | 1671 | # || / _--=> preempt-depth |
1057 | bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set | 1672 | # ||| / delay |
1058 | bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave | 1673 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION |
1059 | bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set | 1674 | # | | | |||| | | |
1060 | bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore | 1675 | bash-1994 [002] .... 3082.063030: mutex_unlock <-rb_simple_write |
1061 | bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set | 1676 | bash-1994 [002] .... 3082.063031: __mutex_unlock_slowpath <-mutex_unlock |
1062 | bash-4003 [00] 123.638718: sub_preempt_count <-schedule | 1677 | bash-1994 [002] .... 3082.063031: __fsnotify_parent <-fsnotify_modify |
1063 | bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule | 1678 | bash-1994 [002] .... 3082.063032: fsnotify <-fsnotify_modify |
1064 | bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run | 1679 | bash-1994 [002] .... 3082.063032: __srcu_read_lock <-fsnotify |
1065 | bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion | 1680 | bash-1994 [002] .... 3082.063032: add_preempt_count <-__srcu_read_lock |
1066 | bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common | 1681 | bash-1994 [002] ...1 3082.063032: sub_preempt_count <-__srcu_read_lock |
1067 | bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq | 1682 | bash-1994 [002] .... 3082.063033: __srcu_read_unlock <-fsnotify |
1068 | [...] | 1683 | [...] |
1069 | 1684 | ||
1070 | 1685 | ||
@@ -1214,79 +1829,19 @@ int main (int argc, char **argv) | |||
1214 | return 0; | 1829 | return 0; |
1215 | } | 1830 | } |
1216 | 1831 | ||
1832 | Or this simple script! | ||
1217 | 1833 | ||
1218 | hw-branch-tracer (x86 only) | 1834 | ------ |
1219 | --------------------------- | 1835 | #!/bin/bash |
1220 | 1836 | ||
1221 | This tracer uses the x86 last branch tracing hardware feature to | 1837 | debugfs=`sed -ne 's/^debugfs \(.*\) debugfs.*/\1/p' /proc/mounts` |
1222 | collect a branch trace on all cpus with relatively low overhead. | 1838 | echo nop > $debugfs/tracing/current_tracer |
1223 | 1839 | echo 0 > $debugfs/tracing/tracing_on | |
1224 | The tracer uses a fixed-size circular buffer per cpu and only | 1840 | echo $$ > $debugfs/tracing/set_ftrace_pid |
1225 | traces ring 0 branches. The trace file dumps that buffer in the | 1841 | echo function > $debugfs/tracing/current_tracer |
1226 | following format: | 1842 | echo 1 > $debugfs/tracing/tracing_on |
1227 | 1843 | exec "$@" | |
1228 | # tracer: hw-branch-tracer | 1844 | ------ |
1229 | # | ||
1230 | # CPU# TO <- FROM | ||
1231 | 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6 | ||
1232 | 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a | ||
1233 | 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf | ||
1234 | 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf | ||
1235 | 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a | ||
1236 | 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf | ||
1237 | |||
1238 | |||
1239 | The tracer may be used to dump the trace for the oops'ing cpu on | ||
1240 | a kernel oops into the system log. To enable this, | ||
1241 | ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one | ||
1242 | can either use the sysctl function or set it via the proc system | ||
1243 | interface. | ||
1244 | |||
1245 | sysctl kernel.ftrace_dump_on_oops=n | ||
1246 | |||
1247 | or | ||
1248 | |||
1249 | echo n > /proc/sys/kernel/ftrace_dump_on_oops | ||
1250 | |||
1251 | If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will | ||
1252 | only dump the buffer of the CPU that triggered the oops. | ||
1253 | |||
1254 | Here's an example of such a dump after a null pointer | ||
1255 | dereference in a kernel module: | ||
1256 | |||
1257 | [57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 | ||
1258 | [57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops] | ||
1259 | [57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0 | ||
1260 | [57848.106019] Oops: 0002 [#1] SMP | ||
1261 | [57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus | ||
1262 | [57848.106019] Dumping ftrace buffer: | ||
1263 | [57848.106019] --------------------------------- | ||
1264 | [...] | ||
1265 | [57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24 | ||
1266 | [57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165 | ||
1267 | [57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165 | ||
1268 | [57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165 | ||
1269 | [57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165 | ||
1270 | [57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops] | ||
1271 | [57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30 | ||
1272 | [57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b | ||
1273 | [57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31 | ||
1274 | [57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1 | ||
1275 | [57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30 | ||
1276 | [...] | ||
1277 | [57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2 | ||
1278 | [57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881 | ||
1279 | [57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881 | ||
1280 | [57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96 | ||
1281 | [...] | ||
1282 | [57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3 | ||
1283 | [57848.106019] --------------------------------- | ||
1284 | [57848.106019] CPU 0 | ||
1285 | [57848.106019] Modules linked in: oops | ||
1286 | [57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23 | ||
1287 | [57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops] | ||
1288 | [57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246 | ||
1289 | [...] | ||
1290 | 1845 | ||
1291 | 1846 | ||
1292 | function graph tracer | 1847 | function graph tracer |
@@ -1473,16 +2028,18 @@ starts of pointing to a simple return. (Enabling FTRACE will | |||
1473 | include the -pg switch in the compiling of the kernel.) | 2028 | include the -pg switch in the compiling of the kernel.) |
1474 | 2029 | ||
1475 | At compile time every C file object is run through the | 2030 | At compile time every C file object is run through the |
1476 | recordmcount.pl script (located in the scripts directory). This | 2031 | recordmcount program (located in the scripts directory). This |
1477 | script will process the C object using objdump to find all the | 2032 | program will parse the ELF headers in the C object to find all |
1478 | locations in the .text section that call mcount. (Note, only the | 2033 | the locations in the .text section that call mcount. (Note, only |
1479 | .text section is processed, since processing other sections like | 2034 | white listed .text sections are processed, since processing other |
1480 | .init.text may cause races due to those sections being freed). | 2035 | sections like .init.text may cause races due to those sections |
2036 | being freed unexpectedly). | ||
1481 | 2037 | ||
1482 | A new section called "__mcount_loc" is created that holds | 2038 | A new section called "__mcount_loc" is created that holds |
1483 | references to all the mcount call sites in the .text section. | 2039 | references to all the mcount call sites in the .text section. |
1484 | This section is compiled back into the original object. The | 2040 | The recordmcount program re-links this section back into the |
1485 | final linker will add all these references into a single table. | 2041 | original object. The final linking stage of the kernel will add all these |
2042 | references into a single table. | ||
1486 | 2043 | ||
1487 | On boot up, before SMP is initialized, the dynamic ftrace code | 2044 | On boot up, before SMP is initialized, the dynamic ftrace code |
1488 | scans this table and updates all the locations into nops. It | 2045 | scans this table and updates all the locations into nops. It |
@@ -1493,13 +2050,25 @@ unloaded, it also removes its functions from the ftrace function | |||
1493 | list. This is automatic in the module unload code, and the | 2050 | list. This is automatic in the module unload code, and the |
1494 | module author does not need to worry about it. | 2051 | module author does not need to worry about it. |
1495 | 2052 | ||
1496 | When tracing is enabled, kstop_machine is called to prevent | 2053 | When tracing is enabled, the process of modifying the function |
1497 | races with the CPUS executing code being modified (which can | 2054 | tracepoints is dependent on architecture. The old method is to use |
1498 | cause the CPU to do undesirable things), and the nops are | 2055 | kstop_machine to prevent races with the CPUs executing code being |
2056 | modified (which can cause the CPU to do undesirable things, especially | ||
2057 | if the modified code crosses cache (or page) boundaries), and the nops are | ||
1499 | patched back to calls. But this time, they do not call mcount | 2058 | patched back to calls. But this time, they do not call mcount |
1500 | (which is just a function stub). They now call into the ftrace | 2059 | (which is just a function stub). They now call into the ftrace |
1501 | infrastructure. | 2060 | infrastructure. |
1502 | 2061 | ||
2062 | The new method of modifying the function tracepoints is to place | ||
2063 | a breakpoint at the location to be modified, sync all CPUs, modify | ||
2064 | the rest of the instruction not covered by the breakpoint. Sync | ||
2065 | all CPUs again, and then remove the breakpoint with the finished | ||
2066 | version to the ftrace call site. | ||
2067 | |||
2068 | Some archs do not even need to monkey around with the synchronization, | ||
2069 | and can just slap the new code on top of the old without any | ||
2070 | problems with other CPUs executing it at the same time. | ||
2071 | |||
1503 | One special side-effect to the recording of the functions being | 2072 | One special side-effect to the recording of the functions being |
1504 | traced is that we can now selectively choose which functions we | 2073 | traced is that we can now selectively choose which functions we |
1505 | wish to trace and which ones we want the mcount calls to remain | 2074 | wish to trace and which ones we want the mcount calls to remain |
@@ -1530,20 +2099,28 @@ mutex_lock | |||
1530 | 2099 | ||
1531 | If I am only interested in sys_nanosleep and hrtimer_interrupt: | 2100 | If I am only interested in sys_nanosleep and hrtimer_interrupt: |
1532 | 2101 | ||
1533 | # echo sys_nanosleep hrtimer_interrupt \ | 2102 | # echo sys_nanosleep hrtimer_interrupt > set_ftrace_filter |
1534 | > set_ftrace_filter | ||
1535 | # echo function > current_tracer | 2103 | # echo function > current_tracer |
1536 | # echo 1 > tracing_on | 2104 | # echo 1 > tracing_on |
1537 | # usleep 1 | 2105 | # usleep 1 |
1538 | # echo 0 > tracing_on | 2106 | # echo 0 > tracing_on |
1539 | # cat trace | 2107 | # cat trace |
1540 | # tracer: ftrace | 2108 | # tracer: function |
2109 | # | ||
2110 | # entries-in-buffer/entries-written: 5/5 #P:4 | ||
1541 | # | 2111 | # |
1542 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2112 | # _-----=> irqs-off |
1543 | # | | | | | | 2113 | # / _----=> need-resched |
1544 | usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt | 2114 | # | / _---=> hardirq/softirq |
1545 | usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call | 2115 | # || / _--=> preempt-depth |
1546 | <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt | 2116 | # ||| / delay |
2117 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
2118 | # | | | |||| | | | ||
2119 | usleep-2665 [001] .... 4186.475355: sys_nanosleep <-system_call_fastpath | ||
2120 | <idle>-0 [001] d.h1 4186.475409: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
2121 | usleep-2665 [001] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
2122 | <idle>-0 [003] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
2123 | <idle>-0 [002] d.h1 4186.475427: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1547 | 2124 | ||
1548 | To see which functions are being traced, you can cat the file: | 2125 | To see which functions are being traced, you can cat the file: |
1549 | 2126 | ||
@@ -1571,20 +2148,25 @@ Note: It is better to use quotes to enclose the wild cards, | |||
1571 | 2148 | ||
1572 | Produces: | 2149 | Produces: |
1573 | 2150 | ||
1574 | # tracer: ftrace | 2151 | # tracer: function |
1575 | # | 2152 | # |
1576 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2153 | # entries-in-buffer/entries-written: 897/897 #P:4 |
1577 | # | | | | | | 2154 | # |
1578 | bash-4003 [00] 1480.611794: hrtimer_init <-copy_process | 2155 | # _-----=> irqs-off |
1579 | bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set | 2156 | # / _----=> need-resched |
1580 | bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear | 2157 | # | / _---=> hardirq/softirq |
1581 | bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel | 2158 | # || / _--=> preempt-depth |
1582 | <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt | 2159 | # ||| / delay |
1583 | <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt | 2160 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION |
1584 | <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt | 2161 | # | | | |||| | | |
1585 | <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt | 2162 | <idle>-0 [003] dN.1 4228.547803: hrtimer_cancel <-tick_nohz_idle_exit |
1586 | <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt | 2163 | <idle>-0 [003] dN.1 4228.547804: hrtimer_try_to_cancel <-hrtimer_cancel |
1587 | 2164 | <idle>-0 [003] dN.2 4228.547805: hrtimer_force_reprogram <-__remove_hrtimer | |
2165 | <idle>-0 [003] dN.1 4228.547805: hrtimer_forward <-tick_nohz_idle_exit | ||
2166 | <idle>-0 [003] dN.1 4228.547805: hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11 | ||
2167 | <idle>-0 [003] d..1 4228.547858: hrtimer_get_next_event <-get_next_timer_interrupt | ||
2168 | <idle>-0 [003] d..1 4228.547859: hrtimer_start <-__tick_nohz_idle_enter | ||
2169 | <idle>-0 [003] d..2 4228.547860: hrtimer_force_reprogram <-__rem | ||
1588 | 2170 | ||
1589 | Notice that we lost the sys_nanosleep. | 2171 | Notice that we lost the sys_nanosleep. |
1590 | 2172 | ||
@@ -1651,19 +2233,29 @@ traced. | |||
1651 | 2233 | ||
1652 | Produces: | 2234 | Produces: |
1653 | 2235 | ||
1654 | # tracer: ftrace | 2236 | # tracer: function |
2237 | # | ||
2238 | # entries-in-buffer/entries-written: 39608/39608 #P:4 | ||
1655 | # | 2239 | # |
1656 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2240 | # _-----=> irqs-off |
1657 | # | | | | | | 2241 | # / _----=> need-resched |
1658 | bash-4043 [01] 115.281644: finish_task_switch <-schedule | 2242 | # | / _---=> hardirq/softirq |
1659 | bash-4043 [01] 115.281645: hrtick_set <-schedule | 2243 | # || / _--=> preempt-depth |
1660 | bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set | 2244 | # ||| / delay |
1661 | bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run | 2245 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION |
1662 | bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion | 2246 | # | | | |||| | | |
1663 | bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run | 2247 | bash-1994 [000] .... 4342.324896: file_ra_state_init <-do_dentry_open |
1664 | bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop | 2248 | bash-1994 [000] .... 4342.324897: open_check_o_direct <-do_last |
1665 | bash-4043 [01] 115.281648: wake_up_process <-kthread_stop | 2249 | bash-1994 [000] .... 4342.324897: ima_file_check <-do_last |
1666 | bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process | 2250 | bash-1994 [000] .... 4342.324898: process_measurement <-ima_file_check |
2251 | bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement | ||
2252 | bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action | ||
2253 | bash-1994 [000] .... 4342.324899: do_truncate <-do_last | ||
2254 | bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate | ||
2255 | bash-1994 [000] .... 4342.324899: notify_change <-do_truncate | ||
2256 | bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change | ||
2257 | bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time | ||
2258 | bash-1994 [000] .... 4342.324900: timespec_trunc <-current_fs_time | ||
1667 | 2259 | ||
1668 | We can see that there's no more lock or preempt tracing. | 2260 | We can see that there's no more lock or preempt tracing. |
1669 | 2261 | ||
@@ -1729,6 +2321,28 @@ this special filter via: | |||
1729 | echo > set_graph_function | 2321 | echo > set_graph_function |
1730 | 2322 | ||
1731 | 2323 | ||
2324 | ftrace_enabled | ||
2325 | -------------- | ||
2326 | |||
2327 | Note, the proc sysctl ftrace_enable is a big on/off switch for the | ||
2328 | function tracer. By default it is enabled (when function tracing is | ||
2329 | enabled in the kernel). If it is disabled, all function tracing is | ||
2330 | disabled. This includes not only the function tracers for ftrace, but | ||
2331 | also for any other uses (perf, kprobes, stack tracing, profiling, etc). | ||
2332 | |||
2333 | Please disable this with care. | ||
2334 | |||
2335 | This can be disable (and enabled) with: | ||
2336 | |||
2337 | sysctl kernel.ftrace_enabled=0 | ||
2338 | sysctl kernel.ftrace_enabled=1 | ||
2339 | |||
2340 | or | ||
2341 | |||
2342 | echo 0 > /proc/sys/kernel/ftrace_enabled | ||
2343 | echo 1 > /proc/sys/kernel/ftrace_enabled | ||
2344 | |||
2345 | |||
1732 | Filter commands | 2346 | Filter commands |
1733 | --------------- | 2347 | --------------- |
1734 | 2348 | ||
@@ -1763,12 +2377,58 @@ The following commands are supported: | |||
1763 | 2377 | ||
1764 | echo '__schedule_bug:traceoff:5' > set_ftrace_filter | 2378 | echo '__schedule_bug:traceoff:5' > set_ftrace_filter |
1765 | 2379 | ||
2380 | To always disable tracing when __schedule_bug is hit: | ||
2381 | |||
2382 | echo '__schedule_bug:traceoff' > set_ftrace_filter | ||
2383 | |||
1766 | These commands are cumulative whether or not they are appended | 2384 | These commands are cumulative whether or not they are appended |
1767 | to set_ftrace_filter. To remove a command, prepend it by '!' | 2385 | to set_ftrace_filter. To remove a command, prepend it by '!' |
1768 | and drop the parameter: | 2386 | and drop the parameter: |
1769 | 2387 | ||
2388 | echo '!__schedule_bug:traceoff:0' > set_ftrace_filter | ||
2389 | |||
2390 | The above removes the traceoff command for __schedule_bug | ||
2391 | that have a counter. To remove commands without counters: | ||
2392 | |||
1770 | echo '!__schedule_bug:traceoff' > set_ftrace_filter | 2393 | echo '!__schedule_bug:traceoff' > set_ftrace_filter |
1771 | 2394 | ||
2395 | - snapshot | ||
2396 | Will cause a snapshot to be triggered when the function is hit. | ||
2397 | |||
2398 | echo 'native_flush_tlb_others:snapshot' > set_ftrace_filter | ||
2399 | |||
2400 | To only snapshot once: | ||
2401 | |||
2402 | echo 'native_flush_tlb_others:snapshot:1' > set_ftrace_filter | ||
2403 | |||
2404 | To remove the above commands: | ||
2405 | |||
2406 | echo '!native_flush_tlb_others:snapshot' > set_ftrace_filter | ||
2407 | echo '!native_flush_tlb_others:snapshot:0' > set_ftrace_filter | ||
2408 | |||
2409 | - enable_event/disable_event | ||
2410 | These commands can enable or disable a trace event. Note, because | ||
2411 | function tracing callbacks are very sensitive, when these commands | ||
2412 | are registered, the trace point is activated, but disabled in | ||
2413 | a "soft" mode. That is, the tracepoint will be called, but | ||
2414 | just will not be traced. The event tracepoint stays in this mode | ||
2415 | as long as there's a command that triggers it. | ||
2416 | |||
2417 | echo 'try_to_wake_up:enable_event:sched:sched_switch:2' > \ | ||
2418 | set_ftrace_filter | ||
2419 | |||
2420 | The format is: | ||
2421 | |||
2422 | <function>:enable_event:<system>:<event>[:count] | ||
2423 | <function>:disable_event:<system>:<event>[:count] | ||
2424 | |||
2425 | To remove the events commands: | ||
2426 | |||
2427 | |||
2428 | echo '!try_to_wake_up:enable_event:sched:sched_switch:0' > \ | ||
2429 | set_ftrace_filter | ||
2430 | echo '!schedule:disable_event:sched:sched_switch' > \ | ||
2431 | set_ftrace_filter | ||
1772 | 2432 | ||
1773 | trace_pipe | 2433 | trace_pipe |
1774 | ---------- | 2434 | ---------- |
@@ -1787,28 +2447,31 @@ different. The trace is live. | |||
1787 | # cat trace | 2447 | # cat trace |
1788 | # tracer: function | 2448 | # tracer: function |
1789 | # | 2449 | # |
1790 | # TASK-PID CPU# TIMESTAMP FUNCTION | 2450 | # entries-in-buffer/entries-written: 0/0 #P:4 |
1791 | # | | | | | | 2451 | # |
2452 | # _-----=> irqs-off | ||
2453 | # / _----=> need-resched | ||
2454 | # | / _---=> hardirq/softirq | ||
2455 | # || / _--=> preempt-depth | ||
2456 | # ||| / delay | ||
2457 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
2458 | # | | | |||| | | | ||
1792 | 2459 | ||
1793 | # | 2460 | # |
1794 | # cat /tmp/trace.out | 2461 | # cat /tmp/trace.out |
1795 | bash-4043 [00] 41.267106: finish_task_switch <-schedule | 2462 | bash-1994 [000] .... 5281.568961: mutex_unlock <-rb_simple_write |
1796 | bash-4043 [00] 41.267106: hrtick_set <-schedule | 2463 | bash-1994 [000] .... 5281.568963: __mutex_unlock_slowpath <-mutex_unlock |
1797 | bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set | 2464 | bash-1994 [000] .... 5281.568963: __fsnotify_parent <-fsnotify_modify |
1798 | bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run | 2465 | bash-1994 [000] .... 5281.568964: fsnotify <-fsnotify_modify |
1799 | bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion | 2466 | bash-1994 [000] .... 5281.568964: __srcu_read_lock <-fsnotify |
1800 | bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run | 2467 | bash-1994 [000] .... 5281.568964: add_preempt_count <-__srcu_read_lock |
1801 | bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop | 2468 | bash-1994 [000] ...1 5281.568965: sub_preempt_count <-__srcu_read_lock |
1802 | bash-4043 [00] 41.267110: wake_up_process <-kthread_stop | 2469 | bash-1994 [000] .... 5281.568965: __srcu_read_unlock <-fsnotify |
1803 | bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process | 2470 | bash-1994 [000] .... 5281.568967: sys_dup2 <-system_call_fastpath |
1804 | bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up | ||
1805 | 2471 | ||
1806 | 2472 | ||
1807 | Note, reading the trace_pipe file will block until more input is | 2473 | Note, reading the trace_pipe file will block until more input is |
1808 | added. By changing the tracer, trace_pipe will issue an EOF. We | 2474 | added. |
1809 | needed to set the function tracer _before_ we "cat" the | ||
1810 | trace_pipe file. | ||
1811 | |||
1812 | 2475 | ||
1813 | trace entries | 2476 | trace entries |
1814 | ------------- | 2477 | ------------- |
@@ -1817,31 +2480,50 @@ Having too much or not enough data can be troublesome in | |||
1817 | diagnosing an issue in the kernel. The file buffer_size_kb is | 2480 | diagnosing an issue in the kernel. The file buffer_size_kb is |
1818 | used to modify the size of the internal trace buffers. The | 2481 | used to modify the size of the internal trace buffers. The |
1819 | number listed is the number of entries that can be recorded per | 2482 | number listed is the number of entries that can be recorded per |
1820 | CPU. To know the full size, multiply the number of possible CPUS | 2483 | CPU. To know the full size, multiply the number of possible CPUs |
1821 | with the number of entries. | 2484 | with the number of entries. |
1822 | 2485 | ||
1823 | # cat buffer_size_kb | 2486 | # cat buffer_size_kb |
1824 | 1408 (units kilobytes) | 2487 | 1408 (units kilobytes) |
1825 | 2488 | ||
1826 | Note, to modify this, you must have tracing completely disabled. | 2489 | Or simply read buffer_total_size_kb |
1827 | To do that, echo "nop" into the current_tracer. If the | 2490 | |
1828 | current_tracer is not set to "nop", an EINVAL error will be | 2491 | # cat buffer_total_size_kb |
1829 | returned. | 2492 | 5632 |
2493 | |||
2494 | To modify the buffer, simple echo in a number (in 1024 byte segments). | ||
1830 | 2495 | ||
1831 | # echo nop > current_tracer | ||
1832 | # echo 10000 > buffer_size_kb | 2496 | # echo 10000 > buffer_size_kb |
1833 | # cat buffer_size_kb | 2497 | # cat buffer_size_kb |
1834 | 10000 (units kilobytes) | 2498 | 10000 (units kilobytes) |
1835 | 2499 | ||
1836 | The number of pages which will be allocated is limited to a | 2500 | It will try to allocate as much as possible. If you allocate too |
1837 | percentage of available memory. Allocating too much will produce | 2501 | much, it can cause Out-Of-Memory to trigger. |
1838 | an error. | ||
1839 | 2502 | ||
1840 | # echo 1000000000000 > buffer_size_kb | 2503 | # echo 1000000000000 > buffer_size_kb |
1841 | -bash: echo: write error: Cannot allocate memory | 2504 | -bash: echo: write error: Cannot allocate memory |
1842 | # cat buffer_size_kb | 2505 | # cat buffer_size_kb |
1843 | 85 | 2506 | 85 |
1844 | 2507 | ||
2508 | The per_cpu buffers can be changed individually as well: | ||
2509 | |||
2510 | # echo 10000 > per_cpu/cpu0/buffer_size_kb | ||
2511 | # echo 100 > per_cpu/cpu1/buffer_size_kb | ||
2512 | |||
2513 | When the per_cpu buffers are not the same, the buffer_size_kb | ||
2514 | at the top level will just show an X | ||
2515 | |||
2516 | # cat buffer_size_kb | ||
2517 | X | ||
2518 | |||
2519 | This is where the buffer_total_size_kb is useful: | ||
2520 | |||
2521 | # cat buffer_total_size_kb | ||
2522 | 12916 | ||
2523 | |||
2524 | Writing to the top level buffer_size_kb will reset all the buffers | ||
2525 | to be the same again. | ||
2526 | |||
1845 | Snapshot | 2527 | Snapshot |
1846 | -------- | 2528 | -------- |
1847 | CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature | 2529 | CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature |
@@ -1873,7 +2555,7 @@ feature: | |||
1873 | 2555 | ||
1874 | status\input | 0 | 1 | else | | 2556 | status\input | 0 | 1 | else | |
1875 | --------------+------------+------------+------------+ | 2557 | --------------+------------+------------+------------+ |
1876 | not allocated |(do nothing)| alloc+swap | EINVAL | | 2558 | not allocated |(do nothing)| alloc+swap |(do nothing)| |
1877 | --------------+------------+------------+------------+ | 2559 | --------------+------------+------------+------------+ |
1878 | allocated | free | swap | clear | | 2560 | allocated | free | swap | clear | |
1879 | --------------+------------+------------+------------+ | 2561 | --------------+------------+------------+------------+ |
@@ -1925,7 +2607,188 @@ bash: echo: write error: Device or resource busy | |||
1925 | # cat snapshot | 2607 | # cat snapshot |
1926 | cat: snapshot: Device or resource busy | 2608 | cat: snapshot: Device or resource busy |
1927 | 2609 | ||
2610 | |||
2611 | Instances | ||
2612 | --------- | ||
2613 | In the debugfs tracing directory is a directory called "instances". | ||
2614 | This directory can have new directories created inside of it using | ||
2615 | mkdir, and removing directories with rmdir. The directory created | ||
2616 | with mkdir in this directory will already contain files and other | ||
2617 | directories after it is created. | ||
2618 | |||
2619 | # mkdir instances/foo | ||
2620 | # ls instances/foo | ||
2621 | buffer_size_kb buffer_total_size_kb events free_buffer per_cpu | ||
2622 | set_event snapshot trace trace_clock trace_marker trace_options | ||
2623 | trace_pipe tracing_on | ||
2624 | |||
2625 | As you can see, the new directory looks similar to the tracing directory | ||
2626 | itself. In fact, it is very similar, except that the buffer and | ||
2627 | events are agnostic from the main director, or from any other | ||
2628 | instances that are created. | ||
2629 | |||
2630 | The files in the new directory work just like the files with the | ||
2631 | same name in the tracing directory except the buffer that is used | ||
2632 | is a separate and new buffer. The files affect that buffer but do not | ||
2633 | affect the main buffer with the exception of trace_options. Currently, | ||
2634 | the trace_options affect all instances and the top level buffer | ||
2635 | the same, but this may change in future releases. That is, options | ||
2636 | may become specific to the instance they reside in. | ||
2637 | |||
2638 | Notice that none of the function tracer files are there, nor is | ||
2639 | current_tracer and available_tracers. This is because the buffers | ||
2640 | can currently only have events enabled for them. | ||
2641 | |||
2642 | # mkdir instances/foo | ||
2643 | # mkdir instances/bar | ||
2644 | # mkdir instances/zoot | ||
2645 | # echo 100000 > buffer_size_kb | ||
2646 | # echo 1000 > instances/foo/buffer_size_kb | ||
2647 | # echo 5000 > instances/bar/per_cpu/cpu1/buffer_size_kb | ||
2648 | # echo function > current_trace | ||
2649 | # echo 1 > instances/foo/events/sched/sched_wakeup/enable | ||
2650 | # echo 1 > instances/foo/events/sched/sched_wakeup_new/enable | ||
2651 | # echo 1 > instances/foo/events/sched/sched_switch/enable | ||
2652 | # echo 1 > instances/bar/events/irq/enable | ||
2653 | # echo 1 > instances/zoot/events/syscalls/enable | ||
2654 | # cat trace_pipe | ||
2655 | CPU:2 [LOST 11745 EVENTS] | ||
2656 | bash-2044 [002] .... 10594.481032: _raw_spin_lock_irqsave <-get_page_from_freelist | ||
2657 | bash-2044 [002] d... 10594.481032: add_preempt_count <-_raw_spin_lock_irqsave | ||
2658 | bash-2044 [002] d..1 10594.481032: __rmqueue <-get_page_from_freelist | ||
2659 | bash-2044 [002] d..1 10594.481033: _raw_spin_unlock <-get_page_from_freelist | ||
2660 | bash-2044 [002] d..1 10594.481033: sub_preempt_count <-_raw_spin_unlock | ||
2661 | bash-2044 [002] d... 10594.481033: get_pageblock_flags_group <-get_pageblock_migratetype | ||
2662 | bash-2044 [002] d... 10594.481034: __mod_zone_page_state <-get_page_from_freelist | ||
2663 | bash-2044 [002] d... 10594.481034: zone_statistics <-get_page_from_freelist | ||
2664 | bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics | ||
2665 | bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics | ||
2666 | bash-2044 [002] .... 10594.481035: arch_dup_task_struct <-copy_process | ||
2667 | [...] | ||
2668 | |||
2669 | # cat instances/foo/trace_pipe | ||
2670 | bash-1998 [000] d..4 136.676759: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000 | ||
2671 | bash-1998 [000] dN.4 136.676760: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000 | ||
2672 | <idle>-0 [003] d.h3 136.676906: sched_wakeup: comm=rcu_preempt pid=9 prio=120 success=1 target_cpu=003 | ||
2673 | <idle>-0 [003] d..3 136.676909: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=rcu_preempt next_pid=9 next_prio=120 | ||
2674 | rcu_preempt-9 [003] d..3 136.676916: sched_switch: prev_comm=rcu_preempt prev_pid=9 prev_prio=120 prev_state=S ==> next_comm=swapper/3 next_pid=0 next_prio=120 | ||
2675 | bash-1998 [000] d..4 136.677014: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000 | ||
2676 | bash-1998 [000] dN.4 136.677016: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000 | ||
2677 | bash-1998 [000] d..3 136.677018: sched_switch: prev_comm=bash prev_pid=1998 prev_prio=120 prev_state=R+ ==> next_comm=kworker/0:1 next_pid=59 next_prio=120 | ||
2678 | kworker/0:1-59 [000] d..4 136.677022: sched_wakeup: comm=sshd pid=1995 prio=120 success=1 target_cpu=001 | ||
2679 | kworker/0:1-59 [000] d..3 136.677025: sched_switch: prev_comm=kworker/0:1 prev_pid=59 prev_prio=120 prev_state=S ==> next_comm=bash next_pid=1998 next_prio=120 | ||
2680 | [...] | ||
2681 | |||
2682 | # cat instances/bar/trace_pipe | ||
2683 | migration/1-14 [001] d.h3 138.732674: softirq_raise: vec=3 [action=NET_RX] | ||
2684 | <idle>-0 [001] dNh3 138.732725: softirq_raise: vec=3 [action=NET_RX] | ||
2685 | bash-1998 [000] d.h1 138.733101: softirq_raise: vec=1 [action=TIMER] | ||
2686 | bash-1998 [000] d.h1 138.733102: softirq_raise: vec=9 [action=RCU] | ||
2687 | bash-1998 [000] ..s2 138.733105: softirq_entry: vec=1 [action=TIMER] | ||
2688 | bash-1998 [000] ..s2 138.733106: softirq_exit: vec=1 [action=TIMER] | ||
2689 | bash-1998 [000] ..s2 138.733106: softirq_entry: vec=9 [action=RCU] | ||
2690 | bash-1998 [000] ..s2 138.733109: softirq_exit: vec=9 [action=RCU] | ||
2691 | sshd-1995 [001] d.h1 138.733278: irq_handler_entry: irq=21 name=uhci_hcd:usb4 | ||
2692 | sshd-1995 [001] d.h1 138.733280: irq_handler_exit: irq=21 ret=unhandled | ||
2693 | sshd-1995 [001] d.h1 138.733281: irq_handler_entry: irq=21 name=eth0 | ||
2694 | sshd-1995 [001] d.h1 138.733283: irq_handler_exit: irq=21 ret=handled | ||
2695 | [...] | ||
2696 | |||
2697 | # cat instances/zoot/trace | ||
2698 | # tracer: nop | ||
2699 | # | ||
2700 | # entries-in-buffer/entries-written: 18996/18996 #P:4 | ||
2701 | # | ||
2702 | # _-----=> irqs-off | ||
2703 | # / _----=> need-resched | ||
2704 | # | / _---=> hardirq/softirq | ||
2705 | # || / _--=> preempt-depth | ||
2706 | # ||| / delay | ||
2707 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
2708 | # | | | |||| | | | ||
2709 | bash-1998 [000] d... 140.733501: sys_write -> 0x2 | ||
2710 | bash-1998 [000] d... 140.733504: sys_dup2(oldfd: a, newfd: 1) | ||
2711 | bash-1998 [000] d... 140.733506: sys_dup2 -> 0x1 | ||
2712 | bash-1998 [000] d... 140.733508: sys_fcntl(fd: a, cmd: 1, arg: 0) | ||
2713 | bash-1998 [000] d... 140.733509: sys_fcntl -> 0x1 | ||
2714 | bash-1998 [000] d... 140.733510: sys_close(fd: a) | ||
2715 | bash-1998 [000] d... 140.733510: sys_close -> 0x0 | ||
2716 | bash-1998 [000] d... 140.733514: sys_rt_sigprocmask(how: 0, nset: 0, oset: 6e2768, sigsetsize: 8) | ||
2717 | bash-1998 [000] d... 140.733515: sys_rt_sigprocmask -> 0x0 | ||
2718 | bash-1998 [000] d... 140.733516: sys_rt_sigaction(sig: 2, act: 7fff718846f0, oact: 7fff71884650, sigsetsize: 8) | ||
2719 | bash-1998 [000] d... 140.733516: sys_rt_sigaction -> 0x0 | ||
2720 | |||
2721 | You can see that the trace of the top most trace buffer shows only | ||
2722 | the function tracing. The foo instance displays wakeups and task | ||
2723 | switches. | ||
2724 | |||
2725 | To remove the instances, simply delete their directories: | ||
2726 | |||
2727 | # rmdir instances/foo | ||
2728 | # rmdir instances/bar | ||
2729 | # rmdir instances/zoot | ||
2730 | |||
2731 | Note, if a process has a trace file open in one of the instance | ||
2732 | directories, the rmdir will fail with EBUSY. | ||
2733 | |||
2734 | |||
2735 | Stack trace | ||
1928 | ----------- | 2736 | ----------- |
2737 | Since the kernel has a fixed sized stack, it is important not to | ||
2738 | waste it in functions. A kernel developer must be conscience of | ||
2739 | what they allocate on the stack. If they add too much, the system | ||
2740 | can be in danger of a stack overflow, and corruption will occur, | ||
2741 | usually leading to a system panic. | ||
2742 | |||
2743 | There are some tools that check this, usually with interrupts | ||
2744 | periodically checking usage. But if you can perform a check | ||
2745 | at every function call that will become very useful. As ftrace provides | ||
2746 | a function tracer, it makes it convenient to check the stack size | ||
2747 | at every function call. This is enabled via the stack tracer. | ||
2748 | |||
2749 | CONFIG_STACK_TRACER enables the ftrace stack tracing functionality. | ||
2750 | To enable it, write a '1' into /proc/sys/kernel/stack_tracer_enabled. | ||
2751 | |||
2752 | # echo 1 > /proc/sys/kernel/stack_tracer_enabled | ||
2753 | |||
2754 | You can also enable it from the kernel command line to trace | ||
2755 | the stack size of the kernel during boot up, by adding "stacktrace" | ||
2756 | to the kernel command line parameter. | ||
2757 | |||
2758 | After running it for a few minutes, the output looks like: | ||
2759 | |||
2760 | # cat stack_max_size | ||
2761 | 2928 | ||
2762 | |||
2763 | # cat stack_trace | ||
2764 | Depth Size Location (18 entries) | ||
2765 | ----- ---- -------- | ||
2766 | 0) 2928 224 update_sd_lb_stats+0xbc/0x4ac | ||
2767 | 1) 2704 160 find_busiest_group+0x31/0x1f1 | ||
2768 | 2) 2544 256 load_balance+0xd9/0x662 | ||
2769 | 3) 2288 80 idle_balance+0xbb/0x130 | ||
2770 | 4) 2208 128 __schedule+0x26e/0x5b9 | ||
2771 | 5) 2080 16 schedule+0x64/0x66 | ||
2772 | 6) 2064 128 schedule_timeout+0x34/0xe0 | ||
2773 | 7) 1936 112 wait_for_common+0x97/0xf1 | ||
2774 | 8) 1824 16 wait_for_completion+0x1d/0x1f | ||
2775 | 9) 1808 128 flush_work+0xfe/0x119 | ||
2776 | 10) 1680 16 tty_flush_to_ldisc+0x1e/0x20 | ||
2777 | 11) 1664 48 input_available_p+0x1d/0x5c | ||
2778 | 12) 1616 48 n_tty_poll+0x6d/0x134 | ||
2779 | 13) 1568 64 tty_poll+0x64/0x7f | ||
2780 | 14) 1504 880 do_select+0x31e/0x511 | ||
2781 | 15) 624 400 core_sys_select+0x177/0x216 | ||
2782 | 16) 224 96 sys_select+0x91/0xb9 | ||
2783 | 17) 128 128 system_call_fastpath+0x16/0x1b | ||
2784 | |||
2785 | Note, if -mfentry is being used by gcc, functions get traced before | ||
2786 | they set up the stack frame. This means that leaf level functions | ||
2787 | are not tested by the stack tracer when -mfentry is used. | ||
2788 | |||
2789 | Currently, -mfentry is used by gcc 4.6.0 and above on x86 only. | ||
2790 | |||
2791 | --------- | ||
1929 | 2792 | ||
1930 | More details can be found in the source code, in the | 2793 | More details can be found in the source code, in the |
1931 | kernel/trace/*.c files. | 2794 | kernel/trace/*.c files. |
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index 24ce6823a09e..d9c3e682312c 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt | |||
@@ -1,6 +1,8 @@ | |||
1 | Uprobe-tracer: Uprobe-based Event Tracing | 1 | Uprobe-tracer: Uprobe-based Event Tracing |
2 | ========================================= | 2 | ========================================= |
3 | Documentation written by Srikar Dronamraju | 3 | |
4 | Documentation written by Srikar Dronamraju | ||
5 | |||
4 | 6 | ||
5 | Overview | 7 | Overview |
6 | -------- | 8 | -------- |
@@ -13,78 +15,94 @@ current_tracer. Instead of that, add probe points via | |||
13 | /sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. | 15 | /sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. |
14 | 16 | ||
15 | However unlike kprobe-event tracer, the uprobe event interface expects the | 17 | However unlike kprobe-event tracer, the uprobe event interface expects the |
16 | user to calculate the offset of the probepoint in the object | 18 | user to calculate the offset of the probepoint in the object. |
17 | 19 | ||
18 | Synopsis of uprobe_tracer | 20 | Synopsis of uprobe_tracer |
19 | ------------------------- | 21 | ------------------------- |
20 | p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe | 22 | p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe |
23 | r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe) | ||
24 | -:[GRP/]EVENT : Clear uprobe or uretprobe event | ||
21 | 25 | ||
22 | GRP : Group name. If omitted, use "uprobes" for it. | 26 | GRP : Group name. If omitted, "uprobes" is the default value. |
23 | EVENT : Event name. If omitted, the event name is generated | 27 | EVENT : Event name. If omitted, the event name is generated based |
24 | based on SYMBOL+offs. | 28 | on SYMBOL+offs. |
25 | PATH : path to an executable or a library. | 29 | PATH : Path to an executable or a library. |
26 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. | 30 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. |
27 | 31 | ||
28 | FETCHARGS : Arguments. Each probe can have up to 128 args. | 32 | FETCHARGS : Arguments. Each probe can have up to 128 args. |
29 | %REG : Fetch register REG | 33 | %REG : Fetch register REG |
30 | 34 | ||
31 | Event Profiling | 35 | Event Profiling |
32 | --------------- | 36 | --------------- |
33 | You can check the total number of probe hits and probe miss-hits via | 37 | You can check the total number of probe hits and probe miss-hits via |
34 | /sys/kernel/debug/tracing/uprobe_profile. | 38 | /sys/kernel/debug/tracing/uprobe_profile. |
35 | The first column is event name, the second is the number of probe hits, | 39 | The first column is event name, the second is the number of probe hits, |
36 | the third is the number of probe miss-hits. | 40 | the third is the number of probe miss-hits. |
37 | 41 | ||
38 | Usage examples | 42 | Usage examples |
39 | -------------- | 43 | -------------- |
40 | To add a probe as a new event, write a new definition to uprobe_events | 44 | * Add a probe as a new uprobe event, write a new definition to uprobe_events |
41 | as below. | 45 | as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash) |
46 | |||
47 | echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events | ||
48 | |||
49 | * Add a probe as a new uretprobe event: | ||
50 | |||
51 | echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events | ||
52 | |||
53 | * Unset registered event: | ||
42 | 54 | ||
43 | echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events | 55 | echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events |
44 | 56 | ||
45 | This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash | 57 | * Print out the events that are registered: |
46 | 58 | ||
47 | echo > /sys/kernel/debug/tracing/uprobe_events | 59 | cat /sys/kernel/debug/tracing/uprobe_events |
48 | 60 | ||
49 | This clears all probe points. | 61 | * Clear all events: |
50 | 62 | ||
51 | The following example shows how to dump the instruction pointer and %ax | 63 | echo > /sys/kernel/debug/tracing/uprobe_events |
52 | a register at the probed text address. Here we are trying to probe | 64 | |
53 | function zfree in /bin/zsh | 65 | Following example shows how to dump the instruction pointer and %ax register |
66 | at the probed text address. Probe zfree function in /bin/zsh: | ||
54 | 67 | ||
55 | # cd /sys/kernel/debug/tracing/ | 68 | # cd /sys/kernel/debug/tracing/ |
56 | # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp | 69 | # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp |
57 | 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh | 70 | 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh |
58 | # objdump -T /bin/zsh | grep -w zfree | 71 | # objdump -T /bin/zsh | grep -w zfree |
59 | 0000000000446420 g DF .text 0000000000000012 Base zfree | 72 | 0000000000446420 g DF .text 0000000000000012 Base zfree |
60 | 73 | ||
61 | 0x46420 is the offset of zfree in object /bin/zsh that is loaded at | 74 | 0x46420 is the offset of zfree in object /bin/zsh that is loaded at |
62 | 0x00400000. Hence the command to probe would be : | 75 | 0x00400000. Hence the command to uprobe would be: |
76 | |||
77 | # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events | ||
78 | |||
79 | And the same for the uretprobe would be: | ||
63 | 80 | ||
64 | # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events | 81 | # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events |
65 | 82 | ||
66 | Please note: User has to explicitly calculate the offset of the probepoint | 83 | Please note: User has to explicitly calculate the offset of the probe-point |
67 | in the object. We can see the events that are registered by looking at the | 84 | in the object. We can see the events that are registered by looking at the |
68 | uprobe_events file. | 85 | uprobe_events file. |
69 | 86 | ||
70 | # cat uprobe_events | 87 | # cat uprobe_events |
71 | p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax | 88 | p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax |
89 | r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax | ||
72 | 90 | ||
73 | The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format | 91 | Format of events can be seen by viewing the file events/uprobes/zfree_entry/format |
74 | 92 | ||
75 | # cat events/uprobes/p_zsh_0x46420/format | 93 | # cat events/uprobes/zfree_entry/format |
76 | name: p_zsh_0x46420 | 94 | name: zfree_entry |
77 | ID: 922 | 95 | ID: 922 |
78 | format: | 96 | format: |
79 | field:unsigned short common_type; offset:0; size:2; signed:0; | 97 | field:unsigned short common_type; offset:0; size:2; signed:0; |
80 | field:unsigned char common_flags; offset:2; size:1; signed:0; | 98 | field:unsigned char common_flags; offset:2; size:1; signed:0; |
81 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | 99 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; |
82 | field:int common_pid; offset:4; size:4; signed:1; | 100 | field:int common_pid; offset:4; size:4; signed:1; |
83 | field:int common_padding; offset:8; size:4; signed:1; | 101 | field:int common_padding; offset:8; size:4; signed:1; |
84 | 102 | ||
85 | field:unsigned long __probe_ip; offset:12; size:4; signed:0; | 103 | field:unsigned long __probe_ip; offset:12; size:4; signed:0; |
86 | field:u32 arg1; offset:16; size:4; signed:0; | 104 | field:u32 arg1; offset:16; size:4; signed:0; |
87 | field:u32 arg2; offset:20; size:4; signed:0; | 105 | field:u32 arg2; offset:20; size:4; signed:0; |
88 | 106 | ||
89 | print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 | 107 | print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2 |
90 | 108 | ||
@@ -94,6 +112,7 @@ events, you need to enable it by: | |||
94 | # echo 1 > events/uprobes/enable | 112 | # echo 1 > events/uprobes/enable |
95 | 113 | ||
96 | Lets disable the event after sleeping for some time. | 114 | Lets disable the event after sleeping for some time. |
115 | |||
97 | # sleep 20 | 116 | # sleep 20 |
98 | # echo 0 > events/uprobes/enable | 117 | # echo 0 > events/uprobes/enable |
99 | 118 | ||
@@ -104,10 +123,11 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace. | |||
104 | # | 123 | # |
105 | # TASK-PID CPU# TIMESTAMP FUNCTION | 124 | # TASK-PID CPU# TIMESTAMP FUNCTION |
106 | # | | | | | | 125 | # | | | | | |
107 | zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 126 | zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79 |
108 | zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 127 | zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0 |
109 | zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 128 | zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79 |
110 | zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 | 129 | zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0 |
111 | 130 | ||
112 | Each line shows us probes were triggered for a pid 24842 with ip being | 131 | Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420 |
113 | 0x446421 and contents of ax register being 79. | 132 | and contents of ax register being 79. And uretprobe was triggered with ip at |
133 | 0x446540 with counterpart function entry at 0x446420. | ||
diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index 4204eb01fd38..1392b61d6ebe 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt | |||
@@ -33,6 +33,10 @@ built with CONFIG_USB_SUSPEND enabled (which depends on | |||
33 | CONFIG_PM_RUNTIME). System PM support is present only if the kernel | 33 | CONFIG_PM_RUNTIME). System PM support is present only if the kernel |
34 | was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled. | 34 | was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled. |
35 | 35 | ||
36 | (Starting with the 3.10 kernel release, dynamic PM support for USB is | ||
37 | present whenever the kernel was built with CONFIG_PM_RUNTIME enabled. | ||
38 | The CONFIG_USB_SUSPEND option has been eliminated.) | ||
39 | |||
36 | 40 | ||
37 | What is Remote Wakeup? | 41 | What is Remote Wakeup? |
38 | ---------------------- | 42 | ---------------------- |
@@ -206,10 +210,8 @@ initialized to 5. (The idle-delay values for already existing devices | |||
206 | will not be affected.) | 210 | will not be affected.) |
207 | 211 | ||
208 | Setting the initial default idle-delay to -1 will prevent any | 212 | Setting the initial default idle-delay to -1 will prevent any |
209 | autosuspend of any USB device. This is a simple alternative to | 213 | autosuspend of any USB device. This has the benefit of allowing you |
210 | disabling CONFIG_USB_SUSPEND and rebuilding the kernel, and it has the | 214 | then to enable autosuspend for selected devices. |
211 | added benefit of allowing you to enable autosuspend for selected | ||
212 | devices. | ||
213 | 215 | ||
214 | 216 | ||
215 | Warnings | 217 | Warnings |
diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting index 706d7ed9d8d2..8eaa2fc4b8fa 100644 --- a/Documentation/vm/overcommit-accounting +++ b/Documentation/vm/overcommit-accounting | |||
@@ -8,7 +8,9 @@ The Linux kernel supports the following overcommit handling modes | |||
8 | default. | 8 | default. |
9 | 9 | ||
10 | 1 - Always overcommit. Appropriate for some scientific | 10 | 1 - Always overcommit. Appropriate for some scientific |
11 | applications. | 11 | applications. Classic example is code using sparse arrays |
12 | and just relying on the virtual memory consisting almost | ||
13 | entirely of zero pages. | ||
12 | 14 | ||
13 | 2 - Don't overcommit. The total address space commit | 15 | 2 - Don't overcommit. The total address space commit |
14 | for the system is not permitted to exceed swap + a | 16 | for the system is not permitted to exceed swap + a |
@@ -18,6 +20,10 @@ The Linux kernel supports the following overcommit handling modes | |||
18 | pages but will receive errors on memory allocation as | 20 | pages but will receive errors on memory allocation as |
19 | appropriate. | 21 | appropriate. |
20 | 22 | ||
23 | Useful for applications that want to guarantee their | ||
24 | memory allocations will be available in the future | ||
25 | without having to initialize every page. | ||
26 | |||
21 | The overcommit policy is set via the sysctl `vm.overcommit_memory'. | 27 | The overcommit policy is set via the sysctl `vm.overcommit_memory'. |
22 | 28 | ||
23 | The overcommit percentage is set via `vm.overcommit_ratio'. | 29 | The overcommit percentage is set via `vm.overcommit_ratio'. |
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index d6498e3cd713..881582f75c9c 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -13,7 +13,9 @@ ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole | |||
13 | ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) | 13 | ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) |
14 | ... unused hole ... | 14 | ... unused hole ... |
15 | ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 | 15 | ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 |
16 | ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space | 16 | ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space |
17 | ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls | ||
18 | ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole | ||
17 | 19 | ||
18 | The direct mapping covers all memory in the system up to the highest | 20 | The direct mapping covers all memory in the system up to the highest |
19 | memory address (this means in some cases it can also include PCI memory | 21 | memory address (this means in some cases it can also include PCI memory |