aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/obsolete/proc-pid-oom_adj2
-rw-r--r--Documentation/ABI/stable/firewire-cdev103
-rw-r--r--Documentation/ABI/stable/sysfs-bus-firewire122
-rw-r--r--Documentation/ABI/stable/vdso27
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa948021
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus8
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-wiimote10
-rw-r--r--Documentation/DocBook/80211.tmpl5
-rw-r--r--Documentation/DocBook/kernel-hacking.tmpl2
-rw-r--r--Documentation/DocBook/v4l/io.xml2
-rw-r--r--Documentation/DocBook/writing-an-alsa-driver.tmpl10
-rw-r--r--Documentation/RCU/NMI-RCU.txt4
-rw-r--r--Documentation/SubmitChecklist4
-rw-r--r--Documentation/arm/Booting5
-rw-r--r--Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt42
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Overview.txt7
-rw-r--r--Documentation/arm/kernel_user_helpers.txt267
-rw-r--r--Documentation/blackfin/bfin-spi-notes.txt2
-rw-r--r--Documentation/block/queue-sysfs.txt10
-rw-r--r--Documentation/blockdev/README.DAC9602
-rw-r--r--Documentation/blockdev/ramdisk.txt8
-rw-r--r--Documentation/cgroups/cpuacct.txt2
-rw-r--r--Documentation/cgroups/cpusets.txt2
-rw-r--r--Documentation/cpu-freq/cpu-drivers.txt2
-rw-r--r--Documentation/development-process/4.Coding2
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt21
-rw-r--r--Documentation/devicetree/bindings/arm/primecell.txt21
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec2.txt (renamed from Documentation/devicetree/bindings/powerpc/fsl/sec.txt)2
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_nvidia.txt8
-rw-r--r--[-rwxr-xr-x]Documentation/devicetree/bindings/net/can/fsl-flexcan.txt0
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt22
-rw-r--r--Documentation/devicetree/bindings/spi/spi_nvidia.txt5
-rw-r--r--Documentation/devicetree/bindings/tty/serial/of-serial.txt36
-rw-r--r--Documentation/feature-removal-schedule.txt44
-rw-r--r--Documentation/filesystems/Locking10
-rw-r--r--Documentation/filesystems/nfs/nfsroot.txt2
-rw-r--r--Documentation/filesystems/nilfs2.txt1
-rw-r--r--Documentation/filesystems/porting28
-rw-r--r--Documentation/filesystems/ubifs.txt28
-rw-r--r--Documentation/filesystems/vfs.txt30
-rw-r--r--Documentation/hwmon/it873
-rw-r--r--Documentation/hwmon/lm783
-rw-r--r--Documentation/hwmon/sch563631
-rw-r--r--Documentation/i2o/ioctl2
-rw-r--r--Documentation/isdn/README.HiSax2
-rw-r--r--Documentation/ja_JP/SubmitChecklist2
-rw-r--r--Documentation/kbuild/makefiles.txt38
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/magic-number.txt2
-rw-r--r--Documentation/mca.txt4
-rw-r--r--Documentation/mmc/00-INDEX2
-rw-r--r--Documentation/mmc/mmc-async-req.txt87
-rw-r--r--Documentation/networking/ifenslave.c18
-rw-r--r--Documentation/networking/ip-sysctl.txt29
-rw-r--r--Documentation/networking/netdev-features.txt154
-rw-r--r--Documentation/networking/nfc.txt128
-rw-r--r--Documentation/networking/stmmac.txt200
-rw-r--r--Documentation/power/devices.txt14
-rw-r--r--Documentation/power/opp.txt2
-rw-r--r--Documentation/power/runtime_pm.txt229
-rw-r--r--Documentation/rbtree.txt23
-rw-r--r--Documentation/s390/TAPE122
-rw-r--r--Documentation/scheduler/sched-arch.txt2
-rw-r--r--Documentation/scsi/BusLogic.txt2
-rw-r--r--Documentation/serial/computone.txt2
-rw-r--r--Documentation/sound/alsa/HD-Audio-Controls.txt100
-rw-r--r--Documentation/spi/ep93xx_spi10
-rw-r--r--Documentation/spi/pxa2xx5
-rw-r--r--Documentation/sysctl/kernel.txt215
-rw-r--r--Documentation/trace/kprobetrace.txt9
-rw-r--r--Documentation/vDSO/parse_vdso.c256
-rw-r--r--Documentation/vDSO/vdso_test.c111
-rw-r--r--Documentation/virtual/kvm/api.txt172
-rw-r--r--Documentation/virtual/kvm/mmu.txt18
-rw-r--r--Documentation/virtual/kvm/msr.txt34
-rw-r--r--Documentation/virtual/kvm/nested-vmx.txt251
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt8
-rw-r--r--Documentation/virtual/lguest/lguest.c47
-rw-r--r--Documentation/x86/boot.txt2
-rw-r--r--Documentation/x86/entry_64.txt98
-rw-r--r--Documentation/zh_CN/SubmitChecklist2
-rw-r--r--Documentation/zh_CN/magic-number.txt2
84 files changed, 2833 insertions, 613 deletions
diff --git a/Documentation/ABI/obsolete/proc-pid-oom_adj b/Documentation/ABI/obsolete/proc-pid-oom_adj
index cf63f264ce0f..9a3cb88ade47 100644
--- a/Documentation/ABI/obsolete/proc-pid-oom_adj
+++ b/Documentation/ABI/obsolete/proc-pid-oom_adj
@@ -14,7 +14,7 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
14 14
15 A much more powerful interface, /proc/<pid>/oom_score_adj, was 15 A much more powerful interface, /proc/<pid>/oom_score_adj, was
16 introduced with the oom killer rewrite that allows users to increase or 16 introduced with the oom killer rewrite that allows users to increase or
17 decrease the badness() score linearly. This interface will replace 17 decrease the badness score linearly. This interface will replace
18 /proc/<pid>/oom_adj. 18 /proc/<pid>/oom_adj.
19 19
20 A warning will be emitted to the kernel log if an application uses this 20 A warning will be emitted to the kernel log if an application uses this
diff --git a/Documentation/ABI/stable/firewire-cdev b/Documentation/ABI/stable/firewire-cdev
new file mode 100644
index 000000000000..16d030827368
--- /dev/null
+++ b/Documentation/ABI/stable/firewire-cdev
@@ -0,0 +1,103 @@
1What: /dev/fw[0-9]+
2Date: May 2007
3KernelVersion: 2.6.22
4Contact: linux1394-devel@lists.sourceforge.net
5Description:
6 The character device files /dev/fw* are the interface between
7 firewire-core and IEEE 1394 device drivers implemented in
8 userspace. The ioctl(2)- and read(2)-based ABI is defined and
9 documented in <linux/firewire-cdev.h>.
10
11 This ABI offers most of the features which firewire-core also
12 exposes to kernelspace IEEE 1394 drivers.
13
14 Each /dev/fw* is associated with one IEEE 1394 node, which can
15 be remote or local nodes. Operations on a /dev/fw* file have
16 different scope:
17 - The 1394 node which is associated with the file:
18 - Asynchronous request transmission
19 - Get the Configuration ROM
20 - Query node ID
21 - Query maximum speed of the path between this node
22 and local node
23 - The 1394 bus (i.e. "card") to which the node is attached to:
24 - Isochronous stream transmission and reception
25 - Asynchronous stream transmission and reception
26 - Asynchronous broadcast request transmission
27 - PHY packet transmission and reception
28 - Allocate, reallocate, deallocate isochronous
29 resources (channels, bandwidth) at the bus's IRM
30 - Query node IDs of local node, root node, IRM, bus
31 manager
32 - Query cycle time
33 - Bus reset initiation, bus reset event reception
34 - All 1394 buses:
35 - Allocation of IEEE 1212 address ranges on the local
36 link layers, reception of inbound requests to such
37 an address range, asynchronous response transmission
38 to inbound requests
39 - Addition of descriptors or directories to the local
40 nodes' Configuration ROM
41
42 Due to the different scope of operations and in order to let
43 userland implement different access permission models, some
44 operations are restricted to /dev/fw* files that are associated
45 with a local node:
46 - Addition of descriptors or directories to the local
47 nodes' Configuration ROM
48 - PHY packet transmission and reception
49
50 A /dev/fw* file remains associated with one particular node
51 during its entire life time. Bus topology changes, and hence
52 node ID changes, are tracked by firewire-core. ABI users do not
53 need to be aware of topology.
54
55 The following file operations are supported:
56
57 open(2)
58 Currently the only useful flags are O_RDWR.
59
60 ioctl(2)
61 Initiate various actions. Some take immediate effect, others
62 are performed asynchronously while or after the ioctl returns.
63 See the inline documentation in <linux/firewire-cdev.h> for
64 descriptions of all ioctls.
65
66 poll(2), select(2), epoll_wait(2) etc.
67 Watch for events to become available to be read.
68
69 read(2)
70 Receive various events. There are solicited events like
71 outbound asynchronous transaction completion or isochronous
72 buffer completion, and unsolicited events such as bus resets,
73 request reception, or PHY packet reception. Always use a read
74 buffer which is large enough to receive the largest event that
75 could ever arrive. See <linux/firewire-cdev.h> for descriptions
76 of all event types and for which ioctls affect reception of
77 events.
78
79 mmap(2)
80 Allocate a DMA buffer for isochronous reception or transmission
81 and map it into the process address space. The arguments should
82 be used as follows: addr = NULL, length = the desired buffer
83 size, i.e. number of packets times size of largest packet,
84 prot = at least PROT_READ for reception and at least PROT_WRITE
85 for transmission, flags = MAP_SHARED, fd = the handle to the
86 /dev/fw*, offset = 0.
87
88 Isochronous reception works in packet-per-buffer fashion except
89 for multichannel reception which works in buffer-fill mode.
90
91 munmap(2)
92 Unmap the isochronous I/O buffer from the process address space.
93
94 close(2)
95 Besides stopping and freeing I/O contexts that were associated
96 with the file descriptor, back out any changes to the local
97 nodes' Configuration ROM. Deallocate isochronous channels and
98 bandwidth at the IRM that were marked for kernel-assisted
99 re- and deallocation.
100
101Users: libraw1394
102 libdc1394
103 tools like jujuutils, fwhack, ...
diff --git a/Documentation/ABI/stable/sysfs-bus-firewire b/Documentation/ABI/stable/sysfs-bus-firewire
new file mode 100644
index 000000000000..3d484e5dc846
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-firewire
@@ -0,0 +1,122 @@
1What: /sys/bus/firewire/devices/fw[0-9]+/
2Date: May 2007
3KernelVersion: 2.6.22
4Contact: linux1394-devel@lists.sourceforge.net
5Description:
6 IEEE 1394 node device attributes.
7 Read-only. Mutable during the node device's lifetime.
8 See IEEE 1212 for semantic definitions.
9
10 config_rom
11 Contents of the Configuration ROM register.
12 Binary attribute; an array of host-endian u32.
13
14 guid
15 The node's EUI-64 in the bus information block of
16 Configuration ROM.
17 Hexadecimal string representation of an u64.
18
19
20What: /sys/bus/firewire/devices/fw[0-9]+/units
21Date: June 2009
22KernelVersion: 2.6.31
23Contact: linux1394-devel@lists.sourceforge.net
24Description:
25 IEEE 1394 node device attribute.
26 Read-only. Mutable during the node device's lifetime.
27 See IEEE 1212 for semantic definitions.
28
29 units
30 Summary of all units present in an IEEE 1394 node.
31 Contains space-separated tuples of specifier_id and
32 version of each unit present in the node. Specifier_id
33 and version are hexadecimal string representations of
34 u24 of the respective unit directory entries.
35 Specifier_id and version within each tuple are separated
36 by a colon.
37
38Users: udev rules to set ownership and access permissions or ACLs of
39 /dev/fw[0-9]+ character device files
40
41
42What: /sys/bus/firewire/devices/fw[0-9]+[.][0-9]+/
43Date: May 2007
44KernelVersion: 2.6.22
45Contact: linux1394-devel@lists.sourceforge.net
46Description:
47 IEEE 1394 unit device attributes.
48 Read-only. Immutable during the unit device's lifetime.
49 See IEEE 1212 for semantic definitions.
50
51 modalias
52 Same as MODALIAS in the uevent at device creation.
53
54 rom_index
55 Offset of the unit directory within the parent device's
56 (node device's) Configuration ROM, in quadlets.
57 Decimal string representation.
58
59
60What: /sys/bus/firewire/devices/*/
61Date: May 2007
62KernelVersion: 2.6.22
63Contact: linux1394-devel@lists.sourceforge.net
64Description:
65 Attributes common to IEEE 1394 node devices and unit devices.
66 Read-only. Mutable during the node device's lifetime.
67 Immutable during the unit device's lifetime.
68 See IEEE 1212 for semantic definitions.
69
70 These attributes are only created if the root directory of an
71 IEEE 1394 node or the unit directory of an IEEE 1394 unit
72 actually contains according entries.
73
74 hardware_version
75 Hexadecimal string representation of an u24.
76
77 hardware_version_name
78 Contents of a respective textual descriptor leaf.
79
80 model
81 Hexadecimal string representation of an u24.
82
83 model_name
84 Contents of a respective textual descriptor leaf.
85
86 specifier_id
87 Hexadecimal string representation of an u24.
88 Mandatory in unit directories according to IEEE 1212.
89
90 vendor
91 Hexadecimal string representation of an u24.
92 Mandatory in the root directory according to IEEE 1212.
93
94 vendor_name
95 Contents of a respective textual descriptor leaf.
96
97 version
98 Hexadecimal string representation of an u24.
99 Mandatory in unit directories according to IEEE 1212.
100
101
102What: /sys/bus/firewire/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
103 formerly
104 /sys/bus/ieee1394/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
105Date: Feb 2004
106KernelVersion: 2.6.4
107Contact: linux1394-devel@lists.sourceforge.net
108Description:
109 SCSI target port identifier and logical unit identifier of a
110 logical unit of an SBP-2 target. The identifiers are specified
111 in SAM-2...SAM-4 annex A. They are persistent and world-wide
112 unique properties the SBP-2 attached target.
113
114 Read-only attribute, immutable during the target's lifetime.
115 Format, as exposed by firewire-sbp2 since 2.6.22, May 2007:
116 Colon-separated hexadecimal string representations of
117 u64 EUI-64 : u24 directory_ID : u16 LUN
118 without 0x prefixes, without whitespace. The former sbp2 driver
119 (removed in 2.6.37 after being superseded by firewire-sbp2) used
120 a somewhat shorter format which was not as close to SAM.
121
122Users: udev rules to create /dev/disk/by-id/ symlinks
diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso
new file mode 100644
index 000000000000..8a1cbb594497
--- /dev/null
+++ b/Documentation/ABI/stable/vdso
@@ -0,0 +1,27 @@
1On some architectures, when the kernel loads any userspace program it
2maps an ELF DSO into that program's address space. This DSO is called
3the vDSO and it often contains useful and highly-optimized alternatives
4to real syscalls.
5
6These functions are called just like ordinary C function according to
7your platform's ABI. Call them from a sensible context. (For example,
8if you set CS on x86 to something strange, the vDSO functions are
9within their rights to crash.) In addition, if you pass a bad
10pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
11
12To find the DSO, parse the auxiliary vector passed to the program's
13entry point. The AT_SYSINFO_EHDR entry will point to the vDSO.
14
15The vDSO uses symbol versioning; whenever you request a symbol from the
16vDSO, specify the version you are expecting.
17
18Programs that dynamically link to glibc will use the vDSO automatically.
19Otherwise, you can use the reference parser in Documentation/vDSO/parse_vdso.c.
20
21Unless otherwise noted, the set of symbols with any given version and the
22ABI of those symbols is considered stable. It may vary across architectures,
23though.
24
25(As of this writing, this ABI documentation as been confirmed for x86_64.
26 The maintainers of the other vDSO-using architectures should confirm
27 that it is correct for their architecture.) \ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
new file mode 100644
index 000000000000..9de269bb0ae5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
@@ -0,0 +1,21 @@
1What: /sys/bus/i2c/devices/.../device
2Date: February 2011
3Contact: Minkyu Kang <mk7.kang@samsung.com>
4Description:
5 show what device is attached
6 NONE - no device
7 USB - USB device is attached
8 UART - UART is attached
9 CHARGER - Charger is attaced
10 JIG - JIG is attached
11
12What: /sys/bus/i2c/devices/.../switch
13Date: February 2011
14Contact: Minkyu Kang <mk7.kang@samsung.com>
15Description:
16 show or set the state of manual switch
17 VAUDIO - switch to VAUDIO path
18 UART - switch to UART path
19 AUDIO - switch to AUDIO path
20 DHOST - switch to DHOST path
21 AUTO - switch automatically by device
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus
index c1b53b8bc2ae..65e6e5dd67e8 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus
@@ -92,6 +92,14 @@ Description: The mouse has a tracking- and a distance-control-unit. These
92 This file is writeonly. 92 This file is writeonly.
93Users: http://roccat.sourceforge.net 93Users: http://roccat.sourceforge.net
94 94
95What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/talk
96Date: May 2011
97Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
98Description: Used to active some easy* functions of the mouse from outside.
99 The data has to be 16 bytes long.
100 This file is writeonly.
101Users: http://roccat.sourceforge.net
102
95What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu 103What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu
96Date: October 2010 104Date: October 2010
97Contact: Stefan Achatz <erazor_de@users.sourceforge.net> 105Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-wiimote b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
new file mode 100644
index 000000000000..5d5a16ea57c6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
@@ -0,0 +1,10 @@
1What: /sys/bus/hid/drivers/wiimote/<dev>/led1
2What: /sys/bus/hid/drivers/wiimote/<dev>/led2
3What: /sys/bus/hid/drivers/wiimote/<dev>/led3
4What: /sys/bus/hid/drivers/wiimote/<dev>/led4
5Date: July 2011
6KernelVersion: 3.1
7Contact: David Herrmann <dh.herrmann@googlemail.com>
8Description: Make it possible to set/get current led state. Reading from it
9 returns 0 if led is off and 1 if it is on. Writing 0 to it
10 disables the led, writing 1 enables it.
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 8906648f962b..445289cd0e65 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -402,8 +402,9 @@
402!Finclude/net/mac80211.h set_key_cmd 402!Finclude/net/mac80211.h set_key_cmd
403!Finclude/net/mac80211.h ieee80211_key_conf 403!Finclude/net/mac80211.h ieee80211_key_conf
404!Finclude/net/mac80211.h ieee80211_key_flags 404!Finclude/net/mac80211.h ieee80211_key_flags
405!Finclude/net/mac80211.h ieee80211_tkip_key_type 405!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
406!Finclude/net/mac80211.h ieee80211_get_tkip_key 406!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
407!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
407!Finclude/net/mac80211.h ieee80211_key_removed 408!Finclude/net/mac80211.h ieee80211_key_removed
408 </chapter> 409 </chapter>
409 410
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 7b3f49363413..07a9c48de5a2 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -409,7 +409,7 @@ cond_resched(); /* Will sleep */
409 409
410 <para> 410 <para>
411 You should always compile your kernel 411 You should always compile your kernel
412 <symbol>CONFIG_DEBUG_SPINLOCK_SLEEP</symbol> on, and it will warn 412 <symbol>CONFIG_DEBUG_ATOMIC_SLEEP</symbol> on, and it will warn
413 you if you break these rules. If you <emphasis>do</emphasis> break 413 you if you break these rules. If you <emphasis>do</emphasis> break
414 the rules, you will eventually lock up your box. 414 the rules, you will eventually lock up your box.
415 </para> 415 </para>
diff --git a/Documentation/DocBook/v4l/io.xml b/Documentation/DocBook/v4l/io.xml
index 227e7ac45a06..c57d1ec6291c 100644
--- a/Documentation/DocBook/v4l/io.xml
+++ b/Documentation/DocBook/v4l/io.xml
@@ -210,7 +210,7 @@ for (i = 0; i &lt; reqbuf.count; i++)
210 <programlisting> 210 <programlisting>
211&v4l2-requestbuffers; reqbuf; 211&v4l2-requestbuffers; reqbuf;
212/* Our current format uses 3 planes per buffer */ 212/* Our current format uses 3 planes per buffer */
213#define FMT_NUM_PLANES = 3; 213#define FMT_NUM_PLANES = 3
214 214
215struct { 215struct {
216 void *start[FMT_NUM_PLANES]; 216 void *start[FMT_NUM_PLANES];
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl
index 58ced2346e67..598c22f3b3ac 100644
--- a/Documentation/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl
@@ -1164,7 +1164,7 @@
1164 } 1164 }
1165 chip->port = pci_resource_start(pci, 0); 1165 chip->port = pci_resource_start(pci, 0);
1166 if (request_irq(pci->irq, snd_mychip_interrupt, 1166 if (request_irq(pci->irq, snd_mychip_interrupt,
1167 IRQF_SHARED, "My Chip", chip)) { 1167 IRQF_SHARED, KBUILD_MODNAME, chip)) {
1168 printk(KERN_ERR "cannot grab irq %d\n", pci->irq); 1168 printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
1169 snd_mychip_free(chip); 1169 snd_mychip_free(chip);
1170 return -EBUSY; 1170 return -EBUSY;
@@ -1197,7 +1197,7 @@
1197 1197
1198 /* pci_driver definition */ 1198 /* pci_driver definition */
1199 static struct pci_driver driver = { 1199 static struct pci_driver driver = {
1200 .name = "My Own Chip", 1200 .name = KBUILD_MODNAME,
1201 .id_table = snd_mychip_ids, 1201 .id_table = snd_mychip_ids,
1202 .probe = snd_mychip_probe, 1202 .probe = snd_mychip_probe,
1203 .remove = __devexit_p(snd_mychip_remove), 1203 .remove = __devexit_p(snd_mychip_remove),
@@ -1340,7 +1340,7 @@
1340 <programlisting> 1340 <programlisting>
1341<![CDATA[ 1341<![CDATA[
1342 if (request_irq(pci->irq, snd_mychip_interrupt, 1342 if (request_irq(pci->irq, snd_mychip_interrupt,
1343 IRQF_SHARED, "My Chip", chip)) { 1343 IRQF_SHARED, KBUILD_MODNAME, chip)) {
1344 printk(KERN_ERR "cannot grab irq %d\n", pci->irq); 1344 printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
1345 snd_mychip_free(chip); 1345 snd_mychip_free(chip);
1346 return -EBUSY; 1346 return -EBUSY;
@@ -1616,7 +1616,7 @@
1616 <programlisting> 1616 <programlisting>
1617<![CDATA[ 1617<![CDATA[
1618 static struct pci_driver driver = { 1618 static struct pci_driver driver = {
1619 .name = "My Own Chip", 1619 .name = KBUILD_MODNAME,
1620 .id_table = snd_mychip_ids, 1620 .id_table = snd_mychip_ids,
1621 .probe = snd_mychip_probe, 1621 .probe = snd_mychip_probe,
1622 .remove = __devexit_p(snd_mychip_remove), 1622 .remove = __devexit_p(snd_mychip_remove),
@@ -5816,7 +5816,7 @@ struct _snd_pcm_runtime {
5816 <programlisting> 5816 <programlisting>
5817<![CDATA[ 5817<![CDATA[
5818 static struct pci_driver driver = { 5818 static struct pci_driver driver = {
5819 .name = "My Chip", 5819 .name = KBUILD_MODNAME,
5820 .id_table = snd_my_ids, 5820 .id_table = snd_my_ids,
5821 .probe = snd_my_probe, 5821 .probe = snd_my_probe,
5822 .remove = __devexit_p(snd_my_remove), 5822 .remove = __devexit_p(snd_my_remove),
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index a8536cb88091..bf82851a0e57 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -5,8 +5,8 @@ Although RCU is usually used to protect read-mostly data structures,
5it is possible to use RCU to provide dynamic non-maskable interrupt 5it is possible to use RCU to provide dynamic non-maskable interrupt
6handlers, as well as dynamic irq handlers. This document describes 6handlers, as well as dynamic irq handlers. This document describes
7how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer 7how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
8work in "arch/i386/oprofile/nmi_timer_int.c" and in 8work in "arch/x86/oprofile/nmi_timer_int.c" and in
9"arch/i386/kernel/traps.c". 9"arch/x86/kernel/traps.c".
10 10
11The relevant pieces of code are listed below, each followed by a 11The relevant pieces of code are listed below, each followed by a
12brief explanation. 12brief explanation.
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index da0382daa395..dc0e33210d7e 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -53,8 +53,8 @@ kernel patches.
53 53
5412: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, 5412: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
55 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, 55 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
56 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously 56 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, CONFIG_PROVE_RCU
57 enabled. 57 and CONFIG_DEBUG_OBJECTS_RCU_HEAD all simultaneously enabled.
58 58
5913: Has been build- and runtime tested with and without CONFIG_SMP and 5913: Has been build- and runtime tested with and without CONFIG_SMP and
60 CONFIG_PREEMPT. 60 CONFIG_PREEMPT.
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
index 4e686a2ed91e..a341d87d276e 100644
--- a/Documentation/arm/Booting
+++ b/Documentation/arm/Booting
@@ -164,3 +164,8 @@ In either case, the following conditions must be met:
164- The boot loader is expected to call the kernel image by jumping 164- The boot loader is expected to call the kernel image by jumping
165 directly to the first instruction of the kernel image. 165 directly to the first instruction of the kernel image.
166 166
167 On CPUs supporting the ARM instruction set, the entry must be
168 made in ARM state, even for a Thumb-2 kernel.
169
170 On CPUs supporting only the Thumb instruction set such as
171 Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt b/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt
new file mode 100644
index 000000000000..441959846e1a
--- /dev/null
+++ b/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt
@@ -0,0 +1,42 @@
1ROM-able zImage boot from eSD
2-----------------------------
3
4An ROM-able zImage compiled with ZBOOT_ROM_SDHI may be written to eSD and
5SuperH Mobile ARM will to boot directly from the SDHI hardware block.
6
7This is achieved by the mask ROM loading the first portion of the image into
8MERAM and then jumping to it. This portion contains loader code which
9copies the entire image to SDRAM and jumps to it. From there the zImage
10boot code proceeds as normal, uncompressing the image into its final
11location and then jumping to it.
12
13This code has been tested on an mackerel board using the developer 1A eSD
14boot mode which is configured using the following jumper settings.
15
16 8 7 6 5 4 3 2 1
17 x|x|x|x| |x|x|
18S4 -+-+-+-+-+-+-+-
19 | | | |x| | |x on
20
21The eSD card needs to be present in SDHI slot 1 (CN7).
22As such S1 and S33 also need to be configured as per
23the notes in arch/arm/mach-shmobile/board-mackerel.c.
24
25A partial zImage must be written to physical partition #1 (boot)
26of the eSD at sector 0 in vrl4 format. A utility vrl4 is supplied to
27accomplish this.
28
29e.g.
30 vrl4 < zImage | dd of=/dev/sdX bs=512 count=17
31
32A full copy of _the same_ zImage should be written to physical partition #1
33(boot) of the eSD at sector 0. This should _not_ be in vrl4 format.
34
35 vrl4 < zImage | dd of=/dev/sdX bs=512
36
37Note: The commands above assume that the physical partition has been
38switched. No such facility currently exists in the Linux Kernel.
39
40Physical partitions are described in the eSD specification. At the time of
41writing they are not the same as partitions that are typically configured
42using fdisk and visible through /proc/partitions
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
index c12bfc1a00c9..359587b2367b 100644
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt
@@ -8,10 +8,13 @@ Introduction
8 8
9 The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported 9 The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
10 by the 's3c2410' architecture of ARM Linux. Currently the S3C2410, 10 by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
11 S3C2412, S3C2413, S3C2416 S3C2440, S3C2442, S3C2443 and S3C2450 devices 11 S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
12 are supported. 12 are supported.
13 13
14 Support for the S3C2400 and S3C24A0 series are in progress. 14 Support for the S3C2400 and S3C24A0 series was never completed and the
15 corresponding code has been removed after a while. If someone wishes to
16 revive this effort, partial support can be retrieved from earlier Linux
17 versions.
15 18
16 The S3C2416 and S3C2450 devices are very similar and S3C2450 support is 19 The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
17 included under the arch/arm/mach-s3c2416 directory. Note, whilst core 20 included under the arch/arm/mach-s3c2416 directory. Note, whilst core
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.txt
new file mode 100644
index 000000000000..a17df9f91d16
--- /dev/null
+++ b/Documentation/arm/kernel_user_helpers.txt
@@ -0,0 +1,267 @@
1Kernel-provided User Helpers
2============================
3
4These are segment of kernel provided user code reachable from user space
5at a fixed address in kernel memory. This is used to provide user space
6with some operations which require kernel help because of unimplemented
7native feature and/or instructions in many ARM CPUs. The idea is for this
8code to be executed directly in user mode for best efficiency but which is
9too intimate with the kernel counter part to be left to user libraries.
10In fact this code might even differ from one CPU to another depending on
11the available instruction set, or whether it is a SMP systems. In other
12words, the kernel reserves the right to change this code as needed without
13warning. Only the entry points and their results as documented here are
14guaranteed to be stable.
15
16This is different from (but doesn't preclude) a full blown VDSO
17implementation, however a VDSO would prevent some assembly tricks with
18constants that allows for efficient branching to those code segments. And
19since those code segments only use a few cycles before returning to user
20code, the overhead of a VDSO indirect far call would add a measurable
21overhead to such minimalistic operations.
22
23User space is expected to bypass those helpers and implement those things
24inline (either in the code emitted directly by the compiler, or part of
25the implementation of a library call) when optimizing for a recent enough
26processor that has the necessary native support, but only if resulting
27binaries are already to be incompatible with earlier ARM processors due to
28useage of similar native instructions for other things. In other words
29don't make binaries unable to run on earlier processors just for the sake
30of not using these kernel helpers if your compiled code is not going to
31use new instructions for other purpose.
32
33New helpers may be added over time, so an older kernel may be missing some
34helpers present in a newer kernel. For this reason, programs must check
35the value of __kuser_helper_version (see below) before assuming that it is
36safe to call any particular helper. This check should ideally be
37performed only once at process startup time, and execution aborted early
38if the required helpers are not provided by the kernel version that
39process is running on.
40
41kuser_helper_version
42--------------------
43
44Location: 0xffff0ffc
45
46Reference declaration:
47
48 extern int32_t __kuser_helper_version;
49
50Definition:
51
52 This field contains the number of helpers being implemented by the
53 running kernel. User space may read this to determine the availability
54 of a particular helper.
55
56Usage example:
57
58#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
59
60void check_kuser_version(void)
61{
62 if (__kuser_helper_version < 2) {
63 fprintf(stderr, "can't do atomic operations, kernel too old\n");
64 abort();
65 }
66}
67
68Notes:
69
70 User space may assume that the value of this field never changes
71 during the lifetime of any single process. This means that this
72 field can be read once during the initialisation of a library or
73 startup phase of a program.
74
75kuser_get_tls
76-------------
77
78Location: 0xffff0fe0
79
80Reference prototype:
81
82 void * __kuser_get_tls(void);
83
84Input:
85
86 lr = return address
87
88Output:
89
90 r0 = TLS value
91
92Clobbered registers:
93
94 none
95
96Definition:
97
98 Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
99
100Usage example:
101
102typedef void * (__kuser_get_tls_t)(void);
103#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
104
105void foo()
106{
107 void *tls = __kuser_get_tls();
108 printf("TLS = %p\n", tls);
109}
110
111Notes:
112
113 - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
114
115kuser_cmpxchg
116-------------
117
118Location: 0xffff0fc0
119
120Reference prototype:
121
122 int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
123
124Input:
125
126 r0 = oldval
127 r1 = newval
128 r2 = ptr
129 lr = return address
130
131Output:
132
133 r0 = success code (zero or non-zero)
134 C flag = set if r0 == 0, clear if r0 != 0
135
136Clobbered registers:
137
138 r3, ip, flags
139
140Definition:
141
142 Atomically store newval in *ptr only if *ptr is equal to oldval.
143 Return zero if *ptr was changed or non-zero if no exchange happened.
144 The C flag is also set if *ptr was changed to allow for assembly
145 optimization in the calling code.
146
147Usage example:
148
149typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
150#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
151
152int atomic_add(volatile int *ptr, int val)
153{
154 int old, new;
155
156 do {
157 old = *ptr;
158 new = old + val;
159 } while(__kuser_cmpxchg(old, new, ptr));
160
161 return new;
162}
163
164Notes:
165
166 - This routine already includes memory barriers as needed.
167
168 - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
169
170kuser_memory_barrier
171--------------------
172
173Location: 0xffff0fa0
174
175Reference prototype:
176
177 void __kuser_memory_barrier(void);
178
179Input:
180
181 lr = return address
182
183Output:
184
185 none
186
187Clobbered registers:
188
189 none
190
191Definition:
192
193 Apply any needed memory barrier to preserve consistency with data modified
194 manually and __kuser_cmpxchg usage.
195
196Usage example:
197
198typedef void (__kuser_dmb_t)(void);
199#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
200
201Notes:
202
203 - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
204
205kuser_cmpxchg64
206---------------
207
208Location: 0xffff0f60
209
210Reference prototype:
211
212 int __kuser_cmpxchg64(const int64_t *oldval,
213 const int64_t *newval,
214 volatile int64_t *ptr);
215
216Input:
217
218 r0 = pointer to oldval
219 r1 = pointer to newval
220 r2 = pointer to target value
221 lr = return address
222
223Output:
224
225 r0 = success code (zero or non-zero)
226 C flag = set if r0 == 0, clear if r0 != 0
227
228Clobbered registers:
229
230 r3, lr, flags
231
232Definition:
233
234 Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
235 is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
236 changed or non-zero if no exchange happened.
237
238 The C flag is also set if *ptr was changed to allow for assembly
239 optimization in the calling code.
240
241Usage example:
242
243typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
244 const int64_t *newval,
245 volatile int64_t *ptr);
246#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
247
248int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
249{
250 int64_t old, new;
251
252 do {
253 old = *ptr;
254 new = old + val;
255 } while(__kuser_cmpxchg64(&old, &new, ptr));
256
257 return new;
258}
259
260Notes:
261
262 - This routine already includes memory barriers as needed.
263
264 - Due to the length of this sequence, this spans 2 conventional kuser
265 "slots", therefore 0xffff0f80 is not used as a valid entry point.
266
267 - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/blackfin/bfin-spi-notes.txt b/Documentation/blackfin/bfin-spi-notes.txt
index 556fa877f2e8..eae6eaf2a09d 100644
--- a/Documentation/blackfin/bfin-spi-notes.txt
+++ b/Documentation/blackfin/bfin-spi-notes.txt
@@ -9,6 +9,8 @@ the entire SPI transfer. - And not just bits_per_word duration.
9In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this 9In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this
10behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2 10behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2
11timing, you can utilize the GPIO controlled SPI Slave Select option instead. 11timing, you can utilize the GPIO controlled SPI Slave Select option instead.
12In this case, you should use GPIO based CS for all of your slaves and not just
13the ones using mode 0 or 2 in order to guarantee correct CS toggling behavior.
12 14
13You can even use the same pin whose peripheral role is a SSEL, 15You can even use the same pin whose peripheral role is a SSEL,
14but use it as a GPIO instead. 16but use it as a GPIO instead.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index f65274081c8d..d8147b336c35 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -45,9 +45,13 @@ device.
45 45
46rq_affinity (RW) 46rq_affinity (RW)
47---------------- 47----------------
48If this option is enabled, the block layer will migrate request completions 48If this option is '1', the block layer will migrate request completions to the
49to the CPU that originally submitted the request. For some workloads 49cpu "group" that originally submitted the request. For some workloads this
50this provides a significant reduction in CPU cycles due to caching effects. 50provides a significant reduction in CPU cycles due to caching effects.
51
52For storage configurations that need to maximize distribution of completion
53processing setting this option to '2' forces the completion to run on the
54requesting cpu (bypassing the "group" aggregation logic).
51 55
52scheduler (RW) 56scheduler (RW)
53-------------- 57--------------
diff --git a/Documentation/blockdev/README.DAC960 b/Documentation/blockdev/README.DAC960
index 0e8f618ab534..bd85fb9dc6e5 100644
--- a/Documentation/blockdev/README.DAC960
+++ b/Documentation/blockdev/README.DAC960
@@ -214,7 +214,7 @@ replacing "/usr/src" with wherever you keep your Linux kernel source tree:
214 make config 214 make config
215 make bzImage (or zImage) 215 make bzImage (or zImage)
216 216
217Then install "arch/i386/boot/bzImage" or "arch/i386/boot/zImage" as your 217Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your
218standard kernel, run lilo if appropriate, and reboot. 218standard kernel, run lilo if appropriate, and reboot.
219 219
220To create the necessary devices in /dev, the "make_rd" script included in 220To create the necessary devices in /dev, the "make_rd" script included in
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt
index 6c820baa19a6..fa72e97dd669 100644
--- a/Documentation/blockdev/ramdisk.txt
+++ b/Documentation/blockdev/ramdisk.txt
@@ -64,9 +64,9 @@ the RAM disk dynamically grows as data is being written into it, a size field
64is not required. Bits 11 to 13 are not currently used and may as well be zero. 64is not required. Bits 11 to 13 are not currently used and may as well be zero.
65These numbers are no magical secrets, as seen below: 65These numbers are no magical secrets, as seen below:
66 66
67./arch/i386/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF 67./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
68./arch/i386/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 68./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
69./arch/i386/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 69./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
70 70
71Consider a typical two floppy disk setup, where you will have the 71Consider a typical two floppy disk setup, where you will have the
72kernel on disk one, and have already put a RAM disk image onto disk #2. 72kernel on disk one, and have already put a RAM disk image onto disk #2.
@@ -85,7 +85,7 @@ The command line equivalent is: "prompt_ramdisk=1"
85Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word. 85Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
86So to create disk one of the set, you would do: 86So to create disk one of the set, you would do:
87 87
88 /usr/src/linux# cat arch/i386/boot/zImage > /dev/fd0 88 /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
89 /usr/src/linux# rdev /dev/fd0 /dev/fd0 89 /usr/src/linux# rdev /dev/fd0 /dev/fd0
90 /usr/src/linux# rdev -r /dev/fd0 49152 90 /usr/src/linux# rdev -r /dev/fd0 49152
91 91
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
index 9ad85df4b983..9d73cc0cadb9 100644
--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -23,7 +23,7 @@ New accounting groups can be created under the parent group /sys/fs/cgroup.
23 23
24# cd /sys/fs/cgroup 24# cd /sys/fs/cgroup
25# mkdir g1 25# mkdir g1
26# echo $$ > g1 26# echo $$ > g1/tasks
27 27
28The above steps create a new group g1 and move the current shell 28The above steps create a new group g1 and move the current shell
29process (bash) into it. CPU time consumed by this bash and its children 29process (bash) into it. CPU time consumed by this bash and its children
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index 5b0d78e55ccc..5c51ed406d1d 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -180,7 +180,7 @@ files describing that cpuset:
180 - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset 180 - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
181 - cpuset.sched_relax_domain_level: the searching range when migrating tasks 181 - cpuset.sched_relax_domain_level: the searching range when migrating tasks
182 182
183In addition, the root cpuset only has the following file: 183In addition, only the root cpuset has the following file:
184 - cpuset.memory_pressure_enabled flag: compute memory_pressure? 184 - cpuset.memory_pressure_enabled flag: compute memory_pressure?
185 185
186New cpusets are created using the mkdir system call or shell 186New cpusets are created using the mkdir system call or shell
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 6c30e930c122..c436096351f8 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -168,7 +168,7 @@ in-chipset dynamic frequency switching to policy->min, the upper limit
168to policy->max, and -if supported- select a performance-oriented 168to policy->max, and -if supported- select a performance-oriented
169setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a 169setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
170powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check 170powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
171the reference implementation in arch/i386/kernel/cpu/cpufreq/longrun.c 171the reference implementation in drivers/cpufreq/longrun.c
172 172
173 173
174 174
diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding
index f3f1a469443c..83f5f5b365a3 100644
--- a/Documentation/development-process/4.Coding
+++ b/Documentation/development-process/4.Coding
@@ -244,7 +244,7 @@ testing purposes. In particular, you should turn on:
244 - DEBUG_SLAB can find a variety of memory allocation and use errors; it 244 - DEBUG_SLAB can find a variety of memory allocation and use errors; it
245 should be used on most development kernels. 245 should be used on most development kernels.
246 246
247 - DEBUG_SPINLOCK, DEBUG_SPINLOCK_SLEEP, and DEBUG_MUTEXES will find a 247 - DEBUG_SPINLOCK, DEBUG_ATOMIC_SLEEP, and DEBUG_MUTEXES will find a
248 number of common locking errors. 248 number of common locking errors.
249 249
250There are quite a few other debugging options, some of which will be 250There are quite a few other debugging options, some of which will be
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
new file mode 100644
index 000000000000..1c044eb320cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -0,0 +1,21 @@
1* ARM Performance Monitor Units
2
3ARM cores often have a PMU for counting cpu and cache events like cache misses
4and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
5representation in the device tree should be done as under:-
6
7Required properties:
8
9- compatible : should be one of
10 "arm,cortex-a9-pmu"
11 "arm,cortex-a8-pmu"
12 "arm,arm1176-pmu"
13 "arm,arm1136-pmu"
14- interrupts : 1 combined interrupt or 1 per core.
15
16Example:
17
18pmu {
19 compatible = "arm,cortex-a9-pmu";
20 interrupts = <100 101>;
21};
diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt
new file mode 100644
index 000000000000..1d5d7a870ec7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/primecell.txt
@@ -0,0 +1,21 @@
1* ARM Primecell Peripherals
2
3ARM, Ltd. Primecell peripherals have a standard id register that can be used to
4identify the peripheral type, vendor, and revision. This value can be used for
5driver matching.
6
7Required properties:
8
9- compatible : should be a specific value for peripheral and "arm,primecell"
10
11Optional properties:
12
13- arm,primecell-periphid : Value to override the h/w value with
14
15Example:
16
17serial@fff36000 {
18 compatible = "arm,pl011", "arm,primecell";
19 arm,primecell-periphid = <0x00341011>;
20};
21
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/sec.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
index 2b6f2d45c45a..38988ef1336b 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/sec.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
@@ -1,4 +1,4 @@
1Freescale SoC SEC Security Engines 1Freescale SoC SEC Security Engines versions 2.x-3.x
2 2
3Required properties: 3Required properties:
4 4
diff --git a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt
new file mode 100644
index 000000000000..4363ae4b3c14
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt
@@ -0,0 +1,22 @@
1* Freescale i.MX/MXC GPIO controller
2
3Required properties:
4- compatible : Should be "fsl,<soc>-gpio"
5- reg : Address and length of the register set for the device
6- interrupts : Should be the port interrupt shared by all 32 pins, if
7 one number. If two numbers, the first one is the interrupt shared
8 by low 16 pins and the second one is for high 16 pins.
9- gpio-controller : Marks the device node as a gpio controller.
10- #gpio-cells : Should be two. The first cell is the pin number and
11 the second cell is used to specify optional parameters (currently
12 unused).
13
14Example:
15
16gpio0: gpio@73f84000 {
17 compatible = "fsl,imx51-gpio", "fsl,imx31-gpio";
18 reg = <0x73f84000 0x4000>;
19 interrupts = <50 51>;
20 gpio-controller;
21 #gpio-cells = <2>;
22};
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index edaa84d288a1..4e16ba4feab0 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -4,17 +4,45 @@ Specifying GPIO information for devices
41) gpios property 41) gpios property
5----------------- 5-----------------
6 6
7Nodes that makes use of GPIOs should define them using `gpios' property, 7Nodes that makes use of GPIOs should specify them using one or more
8format of which is: <&gpio-controller1-phandle gpio1-specifier 8properties, each containing a 'gpio-list':
9 &gpio-controller2-phandle gpio2-specifier
10 0 /* holes are permitted, means no GPIO 3 */
11 &gpio-controller4-phandle gpio4-specifier
12 ...>;
13 9
14Note that gpio-specifier length is controller dependent. 10 gpio-list ::= <single-gpio> [gpio-list]
11 single-gpio ::= <gpio-phandle> <gpio-specifier>
12 gpio-phandle : phandle to gpio controller node
13 gpio-specifier : Array of #gpio-cells specifying specific gpio
14 (controller specific)
15
16GPIO properties should be named "[<name>-]gpios". Exact
17meaning of each gpios property must be documented in the device tree
18binding for each device.
19
20For example, the following could be used to describe gpios pins to use
21as chip select lines; with chip selects 0, 1 and 3 populated, and chip
22select 2 left empty:
23
24 gpio1: gpio1 {
25 gpio-controller
26 #gpio-cells = <2>;
27 };
28 gpio2: gpio2 {
29 gpio-controller
30 #gpio-cells = <1>;
31 };
32 [...]
33 chipsel-gpios = <&gpio1 12 0>,
34 <&gpio1 13 0>,
35 <0>, /* holes are permitted, means no GPIO 2 */
36 <&gpio2 2>;
37
38Note that gpio-specifier length is controller dependent. In the
39above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2
40only uses one.
15 41
16gpio-specifier may encode: bank, pin position inside the bank, 42gpio-specifier may encode: bank, pin position inside the bank,
17whether pin is open-drain and whether pin is logically inverted. 43whether pin is open-drain and whether pin is logically inverted.
44Exact meaning of each specifier cell is controller specific, and must
45be documented in the device tree binding for the device.
18 46
19Example of the node using GPIOs: 47Example of the node using GPIOs:
20 48
@@ -28,8 +56,8 @@ and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller.
282) gpio-controller nodes 562) gpio-controller nodes
29------------------------ 57------------------------
30 58
31Every GPIO controller node must have #gpio-cells property defined, 59Every GPIO controller node must both an empty "gpio-controller"
32this information will be used to translate gpio-specifiers. 60property, and have #gpio-cells contain the size of the gpio-specifier.
33 61
34Example of two SOC GPIO banks defined as gpio-controller nodes: 62Example of two SOC GPIO banks defined as gpio-controller nodes:
35 63
diff --git a/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt
new file mode 100644
index 000000000000..eb4b530d64e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt
@@ -0,0 +1,8 @@
1NVIDIA Tegra 2 GPIO controller
2
3Required properties:
4- compatible : "nvidia,tegra20-gpio"
5- #gpio-cells : Should be two. The first cell is the pin number and the
6 second cell is used to specify optional parameters:
7 - bit 0 specifies polarity (0 for normal, 1 for inverted)
8- gpio-controller : Marks the device node as a GPIO controller.
diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
index 1a729f089866..1a729f089866 100755..100644
--- a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
+++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
new file mode 100644
index 000000000000..9841057d112b
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -0,0 +1,22 @@
1* Freescale (Enhanced) Configurable Serial Peripheral Interface
2 (CSPI/eCSPI) for i.MX
3
4Required properties:
5- compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi"
6- reg : Offset and length of the register set for the device
7- interrupts : Should contain CSPI/eCSPI interrupt
8- fsl,spi-num-chipselects : Contains the number of the chipselect
9- cs-gpios : Specifies the gpio pins to be used for chipselects.
10
11Example:
12
13ecspi@70010000 {
14 #address-cells = <1>;
15 #size-cells = <0>;
16 compatible = "fsl,imx51-ecspi";
17 reg = <0x70010000 0x4000>;
18 interrupts = <36>;
19 fsl,spi-num-chipselects = <2>;
20 cs-gpios = <&gpio3 24 0>, /* GPIO4_24 */
21 <&gpio3 25 0>; /* GPIO4_25 */
22};
diff --git a/Documentation/devicetree/bindings/spi/spi_nvidia.txt b/Documentation/devicetree/bindings/spi/spi_nvidia.txt
new file mode 100644
index 000000000000..6b9e51896693
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_nvidia.txt
@@ -0,0 +1,5 @@
1NVIDIA Tegra 2 SPI device
2
3Required properties:
4- compatible : should be "nvidia,tegra20-spi".
5- gpios : should specify GPIOs used for chipselect.
diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
new file mode 100644
index 000000000000..b8b27b0aca10
--- /dev/null
+++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
@@ -0,0 +1,36 @@
1* UART (Universal Asynchronous Receiver/Transmitter)
2
3Required properties:
4- compatible : one of:
5 - "ns8250"
6 - "ns16450"
7 - "ns16550a"
8 - "ns16550"
9 - "ns16750"
10 - "ns16850"
11 - "nvidia,tegra20-uart"
12 - "ibm,qpace-nwp-serial"
13 - "serial" if the port type is unknown.
14- reg : offset and length of the register set for the device.
15- interrupts : should contain uart interrupt.
16- clock-frequency : the input clock frequency for the UART.
17
18Optional properties:
19- current-speed : the current active speed of the UART.
20- reg-offset : offset to apply to the mapbase from the start of the registers.
21- reg-shift : quantity to shift the register offsets by.
22- reg-io-width : the size (in bytes) of the IO accesses that should be
23 performed on the device. There are some systems that require 32-bit
24 accesses to the UART (e.g. TI davinci).
25- used-by-rtas : set to indicate that the port is in use by the OpenFirmware
26 RTAS and should not be registered.
27
28Example:
29
30 uart@80230000 {
31 compatible = "ns8250";
32 reg = <0x80230000 0x100>;
33 clock-frequency = <3686400>;
34 interrupts = <10>;
35 reg-shift = <2>;
36 };
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d43900c9ede2..aca4f8235969 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -184,7 +184,7 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
184 184
185 A much more powerful interface, /proc/<pid>/oom_score_adj, was 185 A much more powerful interface, /proc/<pid>/oom_score_adj, was
186 introduced with the oom killer rewrite that allows users to increase or 186 introduced with the oom killer rewrite that allows users to increase or
187 decrease the badness() score linearly. This interface will replace 187 decrease the badness score linearly. This interface will replace
188 /proc/<pid>/oom_adj. 188 /proc/<pid>/oom_adj.
189 189
190 A warning will be emitted to the kernel log if an application uses this 190 A warning will be emitted to the kernel log if an application uses this
@@ -199,7 +199,7 @@ Files: drivers/staging/cs5535_gpio/*
199Check: drivers/staging/cs5535_gpio/cs5535_gpio.c 199Check: drivers/staging/cs5535_gpio/cs5535_gpio.c
200Why: A newer driver replaces this; it is drivers/gpio/cs5535-gpio.c, and 200Why: A newer driver replaces this; it is drivers/gpio/cs5535-gpio.c, and
201 integrates with the Linux GPIO subsystem. The old driver has been 201 integrates with the Linux GPIO subsystem. The old driver has been
202 moved to staging, and will be removed altogether around 2.6.40. 202 moved to staging, and will be removed altogether around 3.0.
203 Please test the new driver, and ensure that the functionality you 203 Please test the new driver, and ensure that the functionality you
204 need and any bugfixes from the old driver are available in the new 204 need and any bugfixes from the old driver are available in the new
205 one. 205 one.
@@ -294,7 +294,7 @@ When: The schedule was July 2008, but it was decided that we are going to keep t
294Why: The support code for the old firmware hurts code readability/maintainability 294Why: The support code for the old firmware hurts code readability/maintainability
295 and slightly hurts runtime performance. Bugfixes for the old firmware 295 and slightly hurts runtime performance. Bugfixes for the old firmware
296 are not provided by Broadcom anymore. 296 are not provided by Broadcom anymore.
297Who: Michael Buesch <mb@bu3sch.de> 297Who: Michael Buesch <m@bues.ch>
298 298
299--------------------------- 299---------------------------
300 300
@@ -430,7 +430,7 @@ Who: Avi Kivity <avi@redhat.com>
430---------------------------- 430----------------------------
431 431
432What: iwlwifi 50XX module parameters 432What: iwlwifi 50XX module parameters
433When: 2.6.40 433When: 3.0
434Why: The "..50" modules parameters were used to configure 5000 series and 434Why: The "..50" modules parameters were used to configure 5000 series and
435 up devices; different set of module parameters also available for 4965 435 up devices; different set of module parameters also available for 4965
436 with same functionalities. Consolidate both set into single place 436 with same functionalities. Consolidate both set into single place
@@ -441,7 +441,7 @@ Who: Wey-Yi Guy <wey-yi.w.guy@intel.com>
441---------------------------- 441----------------------------
442 442
443What: iwl4965 alias support 443What: iwl4965 alias support
444When: 2.6.40 444When: 3.0
445Why: Internal alias support has been present in module-init-tools for some 445Why: Internal alias support has been present in module-init-tools for some
446 time, the MODULE_ALIAS("iwl4965") boilerplate aliases can be removed 446 time, the MODULE_ALIAS("iwl4965") boilerplate aliases can be removed
447 with no impact. 447 with no impact.
@@ -482,7 +482,7 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
482---------------------------- 482----------------------------
483 483
484What: iwlwifi disable_hw_scan module parameters 484What: iwlwifi disable_hw_scan module parameters
485When: 2.6.40 485When: 3.0
486Why: Hareware scan is the prefer method for iwlwifi devices for 486Why: Hareware scan is the prefer method for iwlwifi devices for
487 scanning operation. Remove software scan support for all the 487 scanning operation. Remove software scan support for all the
488 iwlwifi devices. 488 iwlwifi devices.
@@ -491,16 +491,6 @@ Who: Wey-Yi Guy <wey-yi.w.guy@intel.com>
491 491
492---------------------------- 492----------------------------
493 493
494What: cancel_rearming_delayed_work[queue]()
495When: 2.6.39
496
497Why: The functions have been superceded by cancel_delayed_work_sync()
498 quite some time ago. The conversion is trivial and there is no
499 in-kernel user left.
500Who: Tejun Heo <tj@kernel.org>
501
502----------------------------
503
504What: Legacy, non-standard chassis intrusion detection interface. 494What: Legacy, non-standard chassis intrusion detection interface.
505When: June 2011 495When: June 2011
506Why: The adm9240, w83792d and w83793 hardware monitoring drivers have 496Why: The adm9240, w83792d and w83793 hardware monitoring drivers have
@@ -518,22 +508,6 @@ Files: net/netfilter/xt_connlimit.c
518 508
519---------------------------- 509----------------------------
520 510
521What: noswapaccount kernel command line parameter
522When: 2.6.40
523Why: The original implementation of memsw feature enabled by
524 CONFIG_CGROUP_MEM_RES_CTLR_SWAP could be disabled by the noswapaccount
525 kernel parameter (introduced in 2.6.29-rc1). Later on, this decision
526 turned out to be not ideal because we cannot have the feature compiled
527 in and disabled by default and let only interested to enable it
528 (e.g. general distribution kernels might need it). Therefore we have
529 added swapaccount[=0|1] parameter (introduced in 2.6.37) which provides
530 the both possibilities. If we remove noswapaccount we will have
531 less command line parameters with the same functionality and we
532 can also cleanup the parameter handling a bit ().
533Who: Michal Hocko <mhocko@suse.cz>
534
535----------------------------
536
537What: ipt_addrtype match include file 511What: ipt_addrtype match include file
538When: 2012 512When: 2012
539Why: superseded by xt_addrtype 513Why: superseded by xt_addrtype
@@ -552,7 +526,7 @@ Who: Jean Delvare <khali@linux-fr.org>
552---------------------------- 526----------------------------
553 527
554What: Support for UVCIOC_CTRL_ADD in the uvcvideo driver 528What: Support for UVCIOC_CTRL_ADD in the uvcvideo driver
555When: 2.6.42 529When: 3.2
556Why: The information passed to the driver by this ioctl is now queried 530Why: The information passed to the driver by this ioctl is now queried
557 dynamically from the device. 531 dynamically from the device.
558Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> 532Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
@@ -560,7 +534,7 @@ Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
560---------------------------- 534----------------------------
561 535
562What: Support for UVCIOC_CTRL_MAP_OLD in the uvcvideo driver 536What: Support for UVCIOC_CTRL_MAP_OLD in the uvcvideo driver
563When: 2.6.42 537When: 3.2
564Why: Used only by applications compiled against older driver versions. 538Why: Used only by applications compiled against older driver versions.
565 Superseded by UVCIOC_CTRL_MAP which supports V4L2 menu controls. 539 Superseded by UVCIOC_CTRL_MAP which supports V4L2 menu controls.
566Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> 540Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
@@ -568,7 +542,7 @@ Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
568---------------------------- 542----------------------------
569 543
570What: Support for UVCIOC_CTRL_GET and UVCIOC_CTRL_SET in the uvcvideo driver 544What: Support for UVCIOC_CTRL_GET and UVCIOC_CTRL_SET in the uvcvideo driver
571When: 2.6.42 545When: 3.2
572Why: Superseded by the UVCIOC_CTRL_QUERY ioctl. 546Why: Superseded by the UVCIOC_CTRL_QUERY ioctl.
573Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> 547Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
574 548
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 82e8e52e8790..653380793a6c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -52,7 +52,7 @@ ata *);
52 void (*put_link) (struct dentry *, struct nameidata *, void *); 52 void (*put_link) (struct dentry *, struct nameidata *, void *);
53 void (*truncate) (struct inode *); 53 void (*truncate) (struct inode *);
54 int (*permission) (struct inode *, int, unsigned int); 54 int (*permission) (struct inode *, int, unsigned int);
55 int (*check_acl)(struct inode *, int, unsigned int); 55 int (*get_acl)(struct inode *, int);
56 int (*setattr) (struct dentry *, struct iattr *); 56 int (*setattr) (struct dentry *, struct iattr *);
57 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); 57 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
58 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 58 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -80,7 +80,7 @@ put_link: no
80truncate: yes (see below) 80truncate: yes (see below)
81setattr: yes 81setattr: yes
82permission: no (may not block if called in rcu-walk mode) 82permission: no (may not block if called in rcu-walk mode)
83check_acl: no 83get_acl: no
84getattr: no 84getattr: no
85setxattr: yes 85setxattr: yes
86getxattr: no 86getxattr: no
@@ -412,7 +412,7 @@ prototypes:
412 int (*open) (struct inode *, struct file *); 412 int (*open) (struct inode *, struct file *);
413 int (*flush) (struct file *); 413 int (*flush) (struct file *);
414 int (*release) (struct inode *, struct file *); 414 int (*release) (struct inode *, struct file *);
415 int (*fsync) (struct file *, int datasync); 415 int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
416 int (*aio_fsync) (struct kiocb *, int datasync); 416 int (*aio_fsync) (struct kiocb *, int datasync);
417 int (*fasync) (int, struct file *, int); 417 int (*fasync) (int, struct file *, int);
418 int (*lock) (struct file *, int, struct file_lock *); 418 int (*lock) (struct file *, int, struct file_lock *);
@@ -438,9 +438,7 @@ prototypes:
438 438
439locking rules: 439locking rules:
440 All may block except for ->setlease. 440 All may block except for ->setlease.
441 No VFS locks held on entry except for ->fsync and ->setlease. 441 No VFS locks held on entry except for ->setlease.
442
443->fsync() has i_mutex on inode.
444 442
445->setlease has the file_list_lock held and must not sleep. 443->setlease has the file_list_lock held and must not sleep.
446 444
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index 90c71c6f0d00..ffdd9d866ad7 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -226,7 +226,7 @@ They depend on various facilities being available:
226 cdrecord. 226 cdrecord.
227 227
228 e.g. 228 e.g.
229 cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso 229 cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso
230 230
231 For more information on isolinux, including how to create bootdisks 231 For more information on isolinux, including how to create bootdisks
232 for prebuilt kernels, see http://syslinux.zytor.com/ 232 for prebuilt kernels, see http://syslinux.zytor.com/
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index d5c0cef38a71..873a2ab2e9f8 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -40,7 +40,6 @@ Features which NILFS2 does not support yet:
40 - POSIX ACLs 40 - POSIX ACLs
41 - quotas 41 - quotas
42 - fsck 42 - fsck
43 - resize
44 - defragmentation 43 - defragmentation
45 44
46Mount options 45Mount options
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 6e29954851a2..b4a3d765ff9a 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -400,10 +400,32 @@ a file off.
400 400
401-- 401--
402[mandatory] 402[mandatory]
403
404--
405[mandatory]
406 ->get_sb() is gone. Switch to use of ->mount(). Typically it's just 403 ->get_sb() is gone. Switch to use of ->mount(). Typically it's just
407a matter of switching from calling get_sb_... to mount_... and changing the 404a matter of switching from calling get_sb_... to mount_... and changing the
408function type. If you were doing it manually, just switch from setting ->mnt_root 405function type. If you were doing it manually, just switch from setting ->mnt_root
409to some pointer to returning that pointer. On errors return ERR_PTR(...). 406to some pointer to returning that pointer. On errors return ERR_PTR(...).
407
408--
409[mandatory]
410 ->permission() and generic_permission()have lost flags
411argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask.
412 generic_permission() has also lost the check_acl argument; ACL checking
413has been taken to VFS and filesystems need to provide a non-NULL ->i_op->get_acl
414to read an ACL from disk.
415
416--
417[mandatory]
418 If you implement your own ->llseek() you must handle SEEK_HOLE and
419SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to
420support it in some way. The generic handler assumes that the entire file is
421data and there is a virtual hole at the end of the file. So if the provided
422offset is less than i_size and SEEK_DATA is specified, return the same offset.
423If the above is true for the offset and you are given SEEK_HOLE, return the end
424of the file. If the offset is i_size or greater return -ENXIO in either case.
425
426[mandatory]
427 If you have your own ->fsync() you must make sure to call
428filemap_write_and_wait_range() so that all dirty pages are synced out properly.
429You must also keep in mind that ->fsync() is not called with i_mutex held
430anymore, so if you require i_mutex locking you must make sure to take it and
431release it yourself.
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index 8e4fab639d9c..a0a61d2f389f 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0
111to UBI and mount volume "rootfs": 111to UBI and mount volume "rootfs":
112ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs 112ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
113 113
114
115Module Parameters for Debugging
116===============================
117
118When UBIFS has been compiled with debugging enabled, there are 2 module
119parameters that are available to control aspects of testing and debugging.
120
121debug_chks Selects extra checks that UBIFS can do while running:
122
123 Check Flag value
124
125 General checks 1
126 Check Tree Node Cache (TNC) 2
127 Check indexing tree size 4
128 Check orphan area 8
129 Check old indexing tree 16
130 Check LEB properties (lprops) 32
131 Check leaf nodes and inodes 64
132
133debug_tsts Selects a mode of testing, as follows:
134
135 Test mode Flag value
136
137 Failure mode for recovery testing 4
138
139For example, set debug_chks to 3 to enable general and TNC checks.
140
141
142References 114References
143========== 115==========
144 116
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 88b9f5519af9..52d8fb81cfff 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -229,6 +229,8 @@ struct super_operations {
229 229
230 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 230 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
231 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 231 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
232 int (*nr_cached_objects)(struct super_block *);
233 void (*free_cached_objects)(struct super_block *, int);
232}; 234};
233 235
234All methods are called without any locks being held, unless otherwise 236All methods are called without any locks being held, unless otherwise
@@ -301,6 +303,26 @@ or bottom half).
301 303
302 quota_write: called by the VFS to write to filesystem quota file. 304 quota_write: called by the VFS to write to filesystem quota file.
303 305
306 nr_cached_objects: called by the sb cache shrinking function for the
307 filesystem to return the number of freeable cached objects it contains.
308 Optional.
309
310 free_cache_objects: called by the sb cache shrinking function for the
311 filesystem to scan the number of objects indicated to try to free them.
312 Optional, but any filesystem implementing this method needs to also
313 implement ->nr_cached_objects for it to be called correctly.
314
315 We can't do anything with any errors that the filesystem might
316 encountered, hence the void return type. This will never be called if
317 the VM is trying to reclaim under GFP_NOFS conditions, hence this
318 method does not need to handle that situation itself.
319
320 Implementations must include conditional reschedule calls inside any
321 scanning loop that is done. This allows the VFS to determine
322 appropriate scan batch sizes without having to worry about whether
323 implementations will cause holdoff problems due to large scan batch
324 sizes.
325
304Whoever sets up the inode is responsible for filling in the "i_op" field. This 326Whoever sets up the inode is responsible for filling in the "i_op" field. This
305is a pointer to a "struct inode_operations" which describes the methods that 327is a pointer to a "struct inode_operations" which describes the methods that
306can be performed on individual inodes. 328can be performed on individual inodes.
@@ -333,8 +355,8 @@ struct inode_operations {
333 void * (*follow_link) (struct dentry *, struct nameidata *); 355 void * (*follow_link) (struct dentry *, struct nameidata *);
334 void (*put_link) (struct dentry *, struct nameidata *, void *); 356 void (*put_link) (struct dentry *, struct nameidata *, void *);
335 void (*truncate) (struct inode *); 357 void (*truncate) (struct inode *);
336 int (*permission) (struct inode *, int, unsigned int); 358 int (*permission) (struct inode *, int);
337 int (*check_acl)(struct inode *, int, unsigned int); 359 int (*get_acl)(struct inode *, int);
338 int (*setattr) (struct dentry *, struct iattr *); 360 int (*setattr) (struct dentry *, struct iattr *);
339 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 361 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
340 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 362 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -423,7 +445,7 @@ otherwise noted.
423 permission: called by the VFS to check for access rights on a POSIX-like 445 permission: called by the VFS to check for access rights on a POSIX-like
424 filesystem. 446 filesystem.
425 447
426 May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk 448 May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
427 mode, the filesystem must check the permission without blocking or 449 mode, the filesystem must check the permission without blocking or
428 storing to the inode. 450 storing to the inode.
429 451
@@ -755,7 +777,7 @@ struct file_operations {
755 int (*open) (struct inode *, struct file *); 777 int (*open) (struct inode *, struct file *);
756 int (*flush) (struct file *); 778 int (*flush) (struct file *);
757 int (*release) (struct inode *, struct file *); 779 int (*release) (struct inode *, struct file *);
758 int (*fsync) (struct file *, int datasync); 780 int (*fsync) (struct file *, loff_t, loff_t, int datasync);
759 int (*aio_fsync) (struct kiocb *, int datasync); 781 int (*aio_fsync) (struct kiocb *, int datasync);
760 int (*fasync) (int, struct file *, int); 782 int (*fasync) (int, struct file *, int);
761 int (*lock) (struct file *, int, struct file_lock *); 783 int (*lock) (struct file *, int, struct file_lock *);
diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
index 38425f0f2645..6f496a586732 100644
--- a/Documentation/hwmon/it87
+++ b/Documentation/hwmon/it87
@@ -76,7 +76,8 @@ IT8718F, IT8720F, IT8721F, IT8726F, IT8758E and SiS950 chips.
76These chips are 'Super I/O chips', supporting floppy disks, infrared ports, 76These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
77joysticks and other miscellaneous stuff. For hardware monitoring, they 77joysticks and other miscellaneous stuff. For hardware monitoring, they
78include an 'environment controller' with 3 temperature sensors, 3 fan 78include an 'environment controller' with 3 temperature sensors, 3 fan
79rotation speed sensors, 8 voltage sensors, and associated alarms. 79rotation speed sensors, 8 voltage sensors, associated alarms, and chassis
80intrusion detection.
80 81
81The IT8712F and IT8716F additionally feature VID inputs, used to report 82The IT8712F and IT8716F additionally feature VID inputs, used to report
82the Vcore voltage of the processor. The early IT8712F have 5 VID pins, 83the Vcore voltage of the processor. The early IT8712F have 5 VID pins,
diff --git a/Documentation/hwmon/lm78 b/Documentation/hwmon/lm78
index 60932e26abaa..2bdc881a0c12 100644
--- a/Documentation/hwmon/lm78
+++ b/Documentation/hwmon/lm78
@@ -13,7 +13,8 @@ Supported chips:
13 Datasheet: Publicly available at the National Semiconductor website 13 Datasheet: Publicly available at the National Semiconductor website
14 http://www.national.com/ 14 http://www.national.com/
15 15
16Author: Frodo Looijaard <frodol@dds.nl> 16Authors: Frodo Looijaard <frodol@dds.nl>
17 Jean Delvare <khali@linux-fr.org>
17 18
18Description 19Description
19----------- 20-----------
diff --git a/Documentation/hwmon/sch5636 b/Documentation/hwmon/sch5636
new file mode 100644
index 000000000000..f83bd1c260f0
--- /dev/null
+++ b/Documentation/hwmon/sch5636
@@ -0,0 +1,31 @@
1Kernel driver sch5636
2=====================
3
4Supported chips:
5 * SMSC SCH5636
6 Prefix: 'sch5636'
7 Addresses scanned: none, address read from Super I/O config space
8
9Author: Hans de Goede <hdegoede@redhat.com>
10
11
12Description
13-----------
14
15SMSC SCH5636 Super I/O chips include an embedded microcontroller for
16hardware monitoring solutions, allowing motherboard manufacturers to create
17their own custom hwmon solution based upon the SCH5636.
18
19Currently the sch5636 driver only supports the Fujitsu Theseus SCH5636 based
20hwmon solution. The sch5636 driver runs a sanity check on loading to ensure
21it is dealing with a Fujitsu Theseus and not with another custom SCH5636 based
22hwmon solution.
23
24The Fujitsu Theseus can monitor up to 5 voltages, 8 fans and 16
25temperatures. Note that the driver detects how many fan headers /
26temperature sensors are actually implemented on the motherboard, so you will
27likely see fewer temperature and fan inputs.
28
29An application note describing the Theseus' registers, as well as an
30application note describing the protocol for communicating with the
31microcontroller is available upon request. Please mail me if you want a copy.
diff --git a/Documentation/i2o/ioctl b/Documentation/i2o/ioctl
index 1e77fac4e120..22ca53a67e23 100644
--- a/Documentation/i2o/ioctl
+++ b/Documentation/i2o/ioctl
@@ -110,7 +110,7 @@ V. Getting Logical Configuration Table
110 ENOBUFS Buffer not large enough. If this occurs, the required 110 ENOBUFS Buffer not large enough. If this occurs, the required
111 buffer length is written into *(lct->reslen) 111 buffer length is written into *(lct->reslen)
112 112
113VI. Settting Parameters 113VI. Setting Parameters
114 114
115 SYNOPSIS 115 SYNOPSIS
116 116
diff --git a/Documentation/isdn/README.HiSax b/Documentation/isdn/README.HiSax
index 99e87a61897d..b1a573cf4472 100644
--- a/Documentation/isdn/README.HiSax
+++ b/Documentation/isdn/README.HiSax
@@ -506,7 +506,7 @@ to e.g. the Internet:
506 <ISDN subsystem - ISDN support -- HiSax> 506 <ISDN subsystem - ISDN support -- HiSax>
507 make clean; make zImage; make modules; make modules_install 507 make clean; make zImage; make modules; make modules_install
5082. Install the new kernel 5082. Install the new kernel
509 cp /usr/src/linux/arch/i386/boot/zImage /etc/kernel/linux.isdn 509 cp /usr/src/linux/arch/x86/boot/zImage /etc/kernel/linux.isdn
510 vi /etc/lilo.conf 510 vi /etc/lilo.conf
511 <add new kernel in the bootable image section> 511 <add new kernel in the bootable image section>
512 lilo 512 lilo
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist
index 2df4576f1173..cb5507b1ac81 100644
--- a/Documentation/ja_JP/SubmitChecklist
+++ b/Documentation/ja_JP/SubmitChecklist
@@ -68,7 +68,7 @@ Linux 銈兗銉嶃儷銉戙儍銉佹姇绋胯呭悜銇戙儊銈с儍銈儶銈广儓
68 68
6912: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB, 6912: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB,
70 CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK, 70 CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK,
71 CONFIG_DEBUG_SPINLOCK_SLEEP 銇撱倢銈夊叏銇︺倰鍚屾檪銇湁鍔广伀銇椼仸鍕曚綔纰鸿獚銈 71 CONFIG_DEBUG_ATOMIC_SLEEP 銇撱倢銈夊叏銇︺倰鍚屾檪銇湁鍔广伀銇椼仸鍕曚綔纰鸿獚銈
72 琛屻仯銇︺亸銇犮仌銇勩 72 琛屻仯銇︺亸銇犮仌銇勩
73 73
7413: CONFIG_SMP, CONFIG_PREEMPT 銈掓湁鍔广伀銇椼仧鍫村悎銇ㄧ劇鍔广伀銇椼仧鍫村悎銇浮鏂广仹 7413: CONFIG_SMP, CONFIG_PREEMPT 銈掓湁鍔广伀銇椼仧鍫村悎銇ㄧ劇鍔广伀銇椼仧鍫村悎銇浮鏂广仹
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 47435e56c5da..f47cdefb4d1e 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -441,7 +441,7 @@ more details, with real examples.
441 specified if first option are not supported. 441 specified if first option are not supported.
442 442
443 Example: 443 Example:
444 #arch/i386/kernel/Makefile 444 #arch/x86/kernel/Makefile
445 vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) 445 vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
446 446
447 In the above example, vsyscall-flags will be assigned the option 447 In the above example, vsyscall-flags will be assigned the option
@@ -460,7 +460,7 @@ more details, with real examples.
460 supported to use an optional second option. 460 supported to use an optional second option.
461 461
462 Example: 462 Example:
463 #arch/i386/Makefile 463 #arch/x86/Makefile
464 cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586) 464 cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
465 465
466 In the above example, cflags-y will be assigned the option 466 In the above example, cflags-y will be assigned the option
@@ -522,7 +522,7 @@ more details, with real examples.
522 even though the option was accepted by gcc. 522 even though the option was accepted by gcc.
523 523
524 Example: 524 Example:
525 #arch/i386/Makefile 525 #arch/x86/Makefile
526 cflags-y += $(shell \ 526 cflags-y += $(shell \
527 if [ $(call cc-version) -ge 0300 ] ; then \ 527 if [ $(call cc-version) -ge 0300 ] ; then \
528 echo "-mregparm=3"; fi ;) 528 echo "-mregparm=3"; fi ;)
@@ -802,7 +802,7 @@ but in the architecture makefiles where the kbuild infrastructure
802is not sufficient this sometimes needs to be explicit. 802is not sufficient this sometimes needs to be explicit.
803 803
804 Example: 804 Example:
805 #arch/i386/boot/Makefile 805 #arch/x86/boot/Makefile
806 subdir- := compressed/ 806 subdir- := compressed/
807 807
808The above assignment instructs kbuild to descend down in the 808The above assignment instructs kbuild to descend down in the
@@ -812,12 +812,12 @@ To support the clean infrastructure in the Makefiles that builds the
812final bootimage there is an optional target named archclean: 812final bootimage there is an optional target named archclean:
813 813
814 Example: 814 Example:
815 #arch/i386/Makefile 815 #arch/x86/Makefile
816 archclean: 816 archclean:
817 $(Q)$(MAKE) $(clean)=arch/i386/boot 817 $(Q)$(MAKE) $(clean)=arch/x86/boot
818 818
819When "make clean" is executed, make will descend down in arch/i386/boot, 819When "make clean" is executed, make will descend down in arch/x86/boot,
820and clean as usual. The Makefile located in arch/i386/boot/ may use 820and clean as usual. The Makefile located in arch/x86/boot/ may use
821the subdir- trick to descend further down. 821the subdir- trick to descend further down.
822 822
823Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is 823Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is
@@ -882,7 +882,7 @@ When kbuild executes, the following steps are followed (roughly):
882 LDFLAGS_vmlinux uses the LDFLAGS_$@ support. 882 LDFLAGS_vmlinux uses the LDFLAGS_$@ support.
883 883
884 Example: 884 Example:
885 #arch/i386/Makefile 885 #arch/x86/Makefile
886 LDFLAGS_vmlinux := -e stext 886 LDFLAGS_vmlinux := -e stext
887 887
888 OBJCOPYFLAGS objcopy flags 888 OBJCOPYFLAGS objcopy flags
@@ -920,14 +920,14 @@ When kbuild executes, the following steps are followed (roughly):
920 Often, the KBUILD_CFLAGS variable depends on the configuration. 920 Often, the KBUILD_CFLAGS variable depends on the configuration.
921 921
922 Example: 922 Example:
923 #arch/i386/Makefile 923 #arch/x86/Makefile
924 cflags-$(CONFIG_M386) += -march=i386 924 cflags-$(CONFIG_M386) += -march=i386
925 KBUILD_CFLAGS += $(cflags-y) 925 KBUILD_CFLAGS += $(cflags-y)
926 926
927 Many arch Makefiles dynamically run the target C compiler to 927 Many arch Makefiles dynamically run the target C compiler to
928 probe supported options: 928 probe supported options:
929 929
930 #arch/i386/Makefile 930 #arch/x86/Makefile
931 931
932 ... 932 ...
933 cflags-$(CONFIG_MPENTIUMII) += $(call cc-option,\ 933 cflags-$(CONFIG_MPENTIUMII) += $(call cc-option,\
@@ -1038,8 +1038,8 @@ When kbuild executes, the following steps are followed (roughly):
1038 into the arch/$(ARCH)/boot/Makefile. 1038 into the arch/$(ARCH)/boot/Makefile.
1039 1039
1040 Example: 1040 Example:
1041 #arch/i386/Makefile 1041 #arch/x86/Makefile
1042 boot := arch/i386/boot 1042 boot := arch/x86/boot
1043 bzImage: vmlinux 1043 bzImage: vmlinux
1044 $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ 1044 $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
1045 1045
@@ -1051,7 +1051,7 @@ When kbuild executes, the following steps are followed (roughly):
1051 To support this, $(archhelp) must be defined. 1051 To support this, $(archhelp) must be defined.
1052 1052
1053 Example: 1053 Example:
1054 #arch/i386/Makefile 1054 #arch/x86/Makefile
1055 define archhelp 1055 define archhelp
1056 echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)' 1056 echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)'
1057 endif 1057 endif
@@ -1065,7 +1065,7 @@ When kbuild executes, the following steps are followed (roughly):
1065 from vmlinux. 1065 from vmlinux.
1066 1066
1067 Example: 1067 Example:
1068 #arch/i386/Makefile 1068 #arch/x86/Makefile
1069 all: bzImage 1069 all: bzImage
1070 1070
1071 When "make" is executed without arguments, bzImage will be built. 1071 When "make" is executed without arguments, bzImage will be built.
@@ -1083,7 +1083,7 @@ When kbuild executes, the following steps are followed (roughly):
1083 2) kbuild knows what files to delete during "make clean" 1083 2) kbuild knows what files to delete during "make clean"
1084 1084
1085 Example: 1085 Example:
1086 #arch/i386/kernel/Makefile 1086 #arch/x86/kernel/Makefile
1087 extra-y := head.o init_task.o 1087 extra-y := head.o init_task.o
1088 1088
1089 In this example, extra-y is used to list object files that 1089 In this example, extra-y is used to list object files that
@@ -1133,7 +1133,7 @@ When kbuild executes, the following steps are followed (roughly):
1133 Compress target. Use maximum compression to compress target. 1133 Compress target. Use maximum compression to compress target.
1134 1134
1135 Example: 1135 Example:
1136 #arch/i386/boot/Makefile 1136 #arch/x86/boot/Makefile
1137 LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary 1137 LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
1138 LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext 1138 LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext
1139 1139
@@ -1193,7 +1193,7 @@ When kbuild executes, the following steps are followed (roughly):
1193 1193
1194 When updating the $(obj)/bzImage target, the line 1194 When updating the $(obj)/bzImage target, the line
1195 1195
1196 BUILD arch/i386/boot/bzImage 1196 BUILD arch/x86/boot/bzImage
1197 1197
1198 will be displayed with "make KBUILD_VERBOSE=0". 1198 will be displayed with "make KBUILD_VERBOSE=0".
1199 1199
@@ -1207,7 +1207,7 @@ When kbuild executes, the following steps are followed (roughly):
1207 kbuild knows .lds files and includes a rule *lds.S -> *lds. 1207 kbuild knows .lds files and includes a rule *lds.S -> *lds.
1208 1208
1209 Example: 1209 Example:
1210 #arch/i386/kernel/Makefile 1210 #arch/x86/kernel/Makefile
1211 always := vmlinux.lds 1211 always := vmlinux.lds
1212 1212
1213 #Makefile 1213 #Makefile
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index aa47be71df4c..40cc653984ee 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1159,10 +1159,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1159 for all guests. 1159 for all guests.
1160 Default is 1 (enabled) if in 64bit or 32bit-PAE mode 1160 Default is 1 (enabled) if in 64bit or 32bit-PAE mode
1161 1161
1162 kvm-intel.bypass_guest_pf=
1163 [KVM,Intel] Disables bypassing of guest page faults
1164 on Intel chips. Default is 1 (enabled)
1165
1166 kvm-intel.ept= [KVM,Intel] Disable extended page tables 1162 kvm-intel.ept= [KVM,Intel] Disable extended page tables
1167 (virtualized MMU) support on capable Intel chips. 1163 (virtualized MMU) support on capable Intel chips.
1168 Default is 1 (enabled) 1164 Default is 1 (enabled)
@@ -1737,6 +1733,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1737 no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page 1733 no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
1738 fault handling. 1734 fault handling.
1739 1735
1736 no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
1737 steal time is computed, but won't influence scheduler
1738 behaviour
1739
1740 nolapic [X86-32,APIC] Do not enable or use the local APIC. 1740 nolapic [X86-32,APIC] Do not enable or use the local APIC.
1741 1741
1742 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1742 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt
index 4b12abcb2ad3..abf481f780ec 100644
--- a/Documentation/magic-number.txt
+++ b/Documentation/magic-number.txt
@@ -66,7 +66,7 @@ MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h
66RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h 66RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h
67SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h 67SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h
68HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c 68HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c
69APM_BIOS_MAGIC 0x4101 apm_user arch/i386/kernel/apm.c 69APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c
70CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h 70CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h
71DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c 71DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c
72DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c 72DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c
diff --git a/Documentation/mca.txt b/Documentation/mca.txt
index 510375d4209a..dfd130c2207d 100644
--- a/Documentation/mca.txt
+++ b/Documentation/mca.txt
@@ -11,7 +11,7 @@ Adapter Detection
11 11
12The ideal MCA adapter detection is done through the use of the 12The ideal MCA adapter detection is done through the use of the
13Programmable Option Select registers. Generic functions for doing 13Programmable Option Select registers. Generic functions for doing
14this have been added in include/linux/mca.h and arch/i386/kernel/mca.c. 14this have been added in include/linux/mca.h and arch/x86/kernel/mca_32.c.
15Everything needed to detect adapters and read (and write) configuration 15Everything needed to detect adapters and read (and write) configuration
16information is there. A number of MCA-specific drivers already use 16information is there. A number of MCA-specific drivers already use
17this. The typical probe code looks like the following: 17this. The typical probe code looks like the following:
@@ -81,7 +81,7 @@ more people use shared IRQs on PCI machines.
81In general, an interrupt must be acknowledged not only at the ICU (which 81In general, an interrupt must be acknowledged not only at the ICU (which
82is done automagically by the kernel), but at the device level. In 82is done automagically by the kernel), but at the device level. In
83particular, IRQ 0 must be reset after a timer interrupt (now done in 83particular, IRQ 0 must be reset after a timer interrupt (now done in
84arch/i386/kernel/time.c) or the first timer interrupt hangs the system. 84arch/x86/kernel/time.c) or the first timer interrupt hangs the system.
85There were also problems with the 1.3.x floppy drivers, but that seems 85There were also problems with the 1.3.x floppy drivers, but that seems
86to have been fixed. 86to have been fixed.
87 87
diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
index 93dd7a714075..a9ba6720ffdf 100644
--- a/Documentation/mmc/00-INDEX
+++ b/Documentation/mmc/00-INDEX
@@ -4,3 +4,5 @@ mmc-dev-attrs.txt
4 - info on SD and MMC device attributes 4 - info on SD and MMC device attributes
5mmc-dev-parts.txt 5mmc-dev-parts.txt
6 - info on SD and MMC device partitions 6 - info on SD and MMC device partitions
7mmc-async-req.txt
8 - info on mmc asynchronous requests
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt
new file mode 100644
index 000000000000..ae1907b10e4a
--- /dev/null
+++ b/Documentation/mmc/mmc-async-req.txt
@@ -0,0 +1,87 @@
1Rationale
2=========
3
4How significant is the cache maintenance overhead?
5It depends. Fast eMMC and multiple cache levels with speculative cache
6pre-fetch makes the cache overhead relatively significant. If the DMA
7preparations for the next request are done in parallel with the current
8transfer, the DMA preparation overhead would not affect the MMC performance.
9The intention of non-blocking (asynchronous) MMC requests is to minimize the
10time between when an MMC request ends and another MMC request begins.
11Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
12dma_unmap_sg are processing. Using non-blocking MMC requests makes it
13possible to prepare the caches for next job in parallel with an active
14MMC request.
15
16MMC block driver
17================
18
19The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
20The increase in throughput is proportional to the time it takes to
21prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
22a request and how fast the memory is. The faster the MMC/SD is the
23more significant the prepare request time becomes. Roughly the expected
24performance gain is 5% for large writes and 10% on large reads on a L2 cache
25platform. In power save mode, when clocks run on a lower frequency, the DMA
26preparation may cost even more. As long as these slower preparations are run
27in parallel with the transfer performance won't be affected.
28
29Details on measurements from IOZone and mmc_test
30================================================
31
32https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
33
34MMC core API extension
35======================
36
37There is one new public function mmc_start_req().
38It starts a new MMC command request for a host. The function isn't
39truly non-blocking. If there is an ongoing async request it waits
40for completion of that request and starts the new one and returns. It
41doesn't wait for the new request to complete. If there is no ongoing
42request it starts the new request and returns immediately.
43
44MMC host extensions
45===================
46
47There are two optional members in the mmc_host_ops -- pre_req() and
48post_req() -- that the host driver may implement in order to move work
49to before and after the actual mmc_host_ops.request() function is called.
50In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
51descriptor, and post_req() runs the dma_unmap_sg().
52
53Optimize for the first request
54==============================
55
56The first request in a series of requests can't be prepared in parallel
57with the previous transfer, since there is no previous request.
58The argument is_first_req in pre_req() indicates that there is no previous
59request. The host driver may optimize for this scenario to minimize
60the performance loss. A way to optimize for this is to split the current
61request in two chunks, prepare the first chunk and start the request,
62and finally prepare the second chunk and start the transfer.
63
64Pseudocode to handle is_first_req scenario with minimal prepare overhead:
65
66if (is_first_req && req->size > threshold)
67 /* start MMC transfer for the complete transfer size */
68 mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
69
70 /*
71 * Begin to prepare DMA while cmd is being processed by MMC.
72 * The first chunk of the request should take the same time
73 * to prepare as the "MMC process command time".
74 * If prepare time exceeds MMC cmd time
75 * the transfer is delayed, guesstimate max 4k as first chunk size.
76 */
77 prepare_1st_chunk_for_dma(req);
78 /* flush pending desc to the DMAC (dmaengine.h) */
79 dma_issue_pending(req->dma_desc);
80
81 prepare_2nd_chunk_for_dma(req);
82 /*
83 * The second issue_pending should be called before MMC runs out
84 * of the first chunk. If the MMC runs out of the first data chunk
85 * before this call, the transfer is delayed.
86 */
87 dma_issue_pending(req->dma_desc);
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 2bac9618c345..65968fbf1e49 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -260,7 +260,7 @@ int main(int argc, char *argv[])
260 case 'V': opt_V++; exclusive++; break; 260 case 'V': opt_V++; exclusive++; break;
261 261
262 case '?': 262 case '?':
263 fprintf(stderr, usage_msg); 263 fprintf(stderr, "%s", usage_msg);
264 res = 2; 264 res = 2;
265 goto out; 265 goto out;
266 } 266 }
@@ -268,13 +268,13 @@ int main(int argc, char *argv[])
268 268
269 /* options check */ 269 /* options check */
270 if (exclusive > 1) { 270 if (exclusive > 1) {
271 fprintf(stderr, usage_msg); 271 fprintf(stderr, "%s", usage_msg);
272 res = 2; 272 res = 2;
273 goto out; 273 goto out;
274 } 274 }
275 275
276 if (opt_v || opt_V) { 276 if (opt_v || opt_V) {
277 printf(version); 277 printf("%s", version);
278 if (opt_V) { 278 if (opt_V) {
279 res = 0; 279 res = 0;
280 goto out; 280 goto out;
@@ -282,14 +282,14 @@ int main(int argc, char *argv[])
282 } 282 }
283 283
284 if (opt_u) { 284 if (opt_u) {
285 printf(usage_msg); 285 printf("%s", usage_msg);
286 res = 0; 286 res = 0;
287 goto out; 287 goto out;
288 } 288 }
289 289
290 if (opt_h) { 290 if (opt_h) {
291 printf(usage_msg); 291 printf("%s", usage_msg);
292 printf(help_msg); 292 printf("%s", help_msg);
293 res = 0; 293 res = 0;
294 goto out; 294 goto out;
295 } 295 }
@@ -309,7 +309,7 @@ int main(int argc, char *argv[])
309 goto out; 309 goto out;
310 } else { 310 } else {
311 /* Just show usage */ 311 /* Just show usage */
312 fprintf(stderr, usage_msg); 312 fprintf(stderr, "%s", usage_msg);
313 res = 2; 313 res = 2;
314 goto out; 314 goto out;
315 } 315 }
@@ -320,7 +320,7 @@ int main(int argc, char *argv[])
320 master_ifname = *spp++; 320 master_ifname = *spp++;
321 321
322 if (master_ifname == NULL) { 322 if (master_ifname == NULL) {
323 fprintf(stderr, usage_msg); 323 fprintf(stderr, "%s", usage_msg);
324 res = 2; 324 res = 2;
325 goto out; 325 goto out;
326 } 326 }
@@ -339,7 +339,7 @@ int main(int argc, char *argv[])
339 339
340 if (slave_ifname == NULL) { 340 if (slave_ifname == NULL) {
341 if (opt_d || opt_c) { 341 if (opt_d || opt_c) {
342 fprintf(stderr, usage_msg); 342 fprintf(stderr, "%s", usage_msg);
343 res = 2; 343 res = 2;
344 goto out; 344 goto out;
345 } 345 }
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index bfe924217f24..db2a4067013c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -106,16 +106,6 @@ inet_peer_maxttl - INTEGER
106 when the number of entries in the pool is very small). 106 when the number of entries in the pool is very small).
107 Measured in seconds. 107 Measured in seconds.
108 108
109inet_peer_gc_mintime - INTEGER
110 Minimum interval between garbage collection passes. This interval is
111 in effect under high memory pressure on the pool.
112 Measured in seconds.
113
114inet_peer_gc_maxtime - INTEGER
115 Minimum interval between garbage collection passes. This interval is
116 in effect under low (or absent) memory pressure on the pool.
117 Measured in seconds.
118
119TCP variables: 109TCP variables:
120 110
121somaxconn - INTEGER 111somaxconn - INTEGER
@@ -394,7 +384,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
394 min: Minimal size of receive buffer used by TCP sockets. 384 min: Minimal size of receive buffer used by TCP sockets.
395 It is guaranteed to each TCP socket, even under moderate memory 385 It is guaranteed to each TCP socket, even under moderate memory
396 pressure. 386 pressure.
397 Default: 8K 387 Default: 1 page
398 388
399 default: initial size of receive buffer used by TCP sockets. 389 default: initial size of receive buffer used by TCP sockets.
400 This value overrides net.core.rmem_default used by other protocols. 390 This value overrides net.core.rmem_default used by other protocols.
@@ -483,7 +473,7 @@ tcp_window_scaling - BOOLEAN
483tcp_wmem - vector of 3 INTEGERs: min, default, max 473tcp_wmem - vector of 3 INTEGERs: min, default, max
484 min: Amount of memory reserved for send buffers for TCP sockets. 474 min: Amount of memory reserved for send buffers for TCP sockets.
485 Each TCP socket has rights to use it due to fact of its birth. 475 Each TCP socket has rights to use it due to fact of its birth.
486 Default: 4K 476 Default: 1 page
487 477
488 default: initial size of send buffer used by TCP sockets. This 478 default: initial size of send buffer used by TCP sockets. This
489 value overrides net.core.wmem_default used by other protocols. 479 value overrides net.core.wmem_default used by other protocols.
@@ -553,13 +543,13 @@ udp_rmem_min - INTEGER
553 Minimal size of receive buffer used by UDP sockets in moderation. 543 Minimal size of receive buffer used by UDP sockets in moderation.
554 Each UDP socket is able to use the size for receiving data, even if 544 Each UDP socket is able to use the size for receiving data, even if
555 total pages of UDP sockets exceed udp_mem pressure. The unit is byte. 545 total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
556 Default: 4096 546 Default: 1 page
557 547
558udp_wmem_min - INTEGER 548udp_wmem_min - INTEGER
559 Minimal size of send buffer used by UDP sockets in moderation. 549 Minimal size of send buffer used by UDP sockets in moderation.
560 Each UDP socket is able to use the size for sending data, even if 550 Each UDP socket is able to use the size for sending data, even if
561 total pages of UDP sockets exceed udp_mem pressure. The unit is byte. 551 total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
562 Default: 4096 552 Default: 1 page
563 553
564CIPSOv4 Variables: 554CIPSOv4 Variables:
565 555
@@ -1465,10 +1455,17 @@ sctp_mem - vector of 3 INTEGERs: min, pressure, max
1465 Default is calculated at boot time from amount of available memory. 1455 Default is calculated at boot time from amount of available memory.
1466 1456
1467sctp_rmem - vector of 3 INTEGERs: min, default, max 1457sctp_rmem - vector of 3 INTEGERs: min, default, max
1468 See tcp_rmem for a description. 1458 Only the first value ("min") is used, "default" and "max" are
1459 ignored.
1460
1461 min: Minimal size of receive buffer used by SCTP socket.
1462 It is guaranteed to each SCTP socket (but not association) even
1463 under moderate memory pressure.
1464
1465 Default: 1 page
1469 1466
1470sctp_wmem - vector of 3 INTEGERs: min, default, max 1467sctp_wmem - vector of 3 INTEGERs: min, default, max
1471 See tcp_wmem for a description. 1468 Currently this tunable has no effect.
1472 1469
1473addr_scope_policy - INTEGER 1470addr_scope_policy - INTEGER
1474 Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 1471 Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
new file mode 100644
index 000000000000..4b1c0dcef84c
--- /dev/null
+++ b/Documentation/networking/netdev-features.txt
@@ -0,0 +1,154 @@
1Netdev features mess and how to get out from it alive
2=====================================================
3
4Author:
5 Micha艂 Miros艂aw <mirq-linux@rere.qmqm.pl>
6
7
8
9 Part I: Feature sets
10======================
11
12Long gone are the days when a network card would just take and give packets
13verbatim. Today's devices add multiple features and bugs (read: offloads)
14that relieve an OS of various tasks like generating and checking checksums,
15splitting packets, classifying them. Those capabilities and their state
16are commonly referred to as netdev features in Linux kernel world.
17
18There are currently three sets of features relevant to the driver, and
19one used internally by network core:
20
21 1. netdev->hw_features set contains features whose state may possibly
22 be changed (enabled or disabled) for a particular device by user's
23 request. This set should be initialized in ndo_init callback and not
24 changed later.
25
26 2. netdev->features set contains features which are currently enabled
27 for a device. This should be changed only by network core or in
28 error paths of ndo_set_features callback.
29
30 3. netdev->vlan_features set contains features whose state is inherited
31 by child VLAN devices (limits netdev->features set). This is currently
32 used for all VLAN devices whether tags are stripped or inserted in
33 hardware or software.
34
35 4. netdev->wanted_features set contains feature set requested by user.
36 This set is filtered by ndo_fix_features callback whenever it or
37 some device-specific conditions change. This set is internal to
38 networking core and should not be referenced in drivers.
39
40
41
42 Part II: Controlling enabled features
43=======================================
44
45When current feature set (netdev->features) is to be changed, new set
46is calculated and filtered by calling ndo_fix_features callback
47and netdev_fix_features(). If the resulting set differs from current
48set, it is passed to ndo_set_features callback and (if the callback
49returns success) replaces value stored in netdev->features.
50NETDEV_FEAT_CHANGE notification is issued after that whenever current
51set might have changed.
52
53The following events trigger recalculation:
54 1. device's registration, after ndo_init returned success
55 2. user requested changes in features state
56 3. netdev_update_features() is called
57
58ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
59are treated as always returning success.
60
61A driver that wants to trigger recalculation must do so by calling
62netdev_update_features() while holding rtnl_lock. This should not be done
63from ndo_*_features callbacks. netdev->features should not be modified by
64driver except by means of ndo_fix_features callback.
65
66
67
68 Part III: Implementation hints
69================================
70
71 * ndo_fix_features:
72
73All dependencies between features should be resolved here. The resulting
74set can be reduced further by networking core imposed limitations (as coded
75in netdev_fix_features()). For this reason it is safer to disable a feature
76when its dependencies are not met instead of forcing the dependency on.
77
78This callback should not modify hardware nor driver state (should be
79stateless). It can be called multiple times between successive
80ndo_set_features calls.
81
82Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
83NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
84care must be taken as the change won't affect already configured VLANs.
85
86 * ndo_set_features:
87
88Hardware should be reconfigured to match passed feature set. The set
89should not be altered unless some error condition happens that can't
90be reliably detected in ndo_fix_features. In this case, the callback
91should update netdev->features to match resulting hardware state.
92Errors returned are not (and cannot be) propagated anywhere except dmesg.
93(Note: successful return is zero, >0 means silent error.)
94
95
96
97 Part IV: Features
98===================
99
100For current list of features, see include/linux/netdev_features.h.
101This section describes semantics of some of them.
102
103 * Transmit checksumming
104
105For complete description, see comments near the top of include/linux/skbuff.h.
106
107Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
108It means that device can fill TCP/UDP-like checksum anywhere in the packets
109whatever headers there might be.
110
111 * Transmit TCP segmentation offload
112
113NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
114set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
115
116 * Transmit DMA from high memory
117
118On platforms where this is relevant, NETIF_F_HIGHDMA signals that
119ndo_start_xmit can handle skbs with frags in high memory.
120
121 * Transmit scatter-gather
122
123Those features say that ndo_start_xmit can handle fragmented skbs:
124NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
125chained skbs (skb->next/prev list).
126
127 * Software features
128
129Features contained in NETIF_F_SOFT_FEATURES are features of networking
130stack. Driver should not change behaviour based on them.
131
132 * LLTX driver (deprecated for hardware drivers)
133
134NETIF_F_LLTX should be set in drivers that implement their own locking in
135transmit path or don't need locking at all (e.g. software tunnels).
136In ndo_start_xmit, it is recommended to use a try_lock and return
137NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly
138protect against other callbacks (the rules you need to find out).
139
140Don't use it for new drivers.
141
142 * netns-local device
143
144NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
145network namespaces (e.g. loopback).
146
147Don't use it in drivers.
148
149 * VLAN challenged
150
151NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
152headers. Some drivers set this because the cards can't handle the bigger MTU.
153[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
154VLANs. This may be not useful, though.]
diff --git a/Documentation/networking/nfc.txt b/Documentation/networking/nfc.txt
new file mode 100644
index 000000000000..b24c29bdae27
--- /dev/null
+++ b/Documentation/networking/nfc.txt
@@ -0,0 +1,128 @@
1Linux NFC subsystem
2===================
3
4The Near Field Communication (NFC) subsystem is required to standardize the
5NFC device drivers development and to create an unified userspace interface.
6
7This document covers the architecture overview, the device driver interface
8description and the userspace interface description.
9
10Architecture overview
11---------------------
12
13The NFC subsystem is responsible for:
14 - NFC adapters management;
15 - Polling for targets;
16 - Low-level data exchange;
17
18The subsystem is divided in some parts. The 'core' is responsible for
19providing the device driver interface. On the other side, it is also
20responsible for providing an interface to control operations and low-level
21data exchange.
22
23The control operations are available to userspace via generic netlink.
24
25The low-level data exchange interface is provided by the new socket family
26PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
27
28
29 +--------------------------------------+
30 | USER SPACE |
31 +--------------------------------------+
32 ^ ^
33 | low-level | control
34 | data exchange | operations
35 | |
36 | v
37 | +-----------+
38 | AF_NFC | netlink |
39 | socket +-----------+
40 | raw ^
41 | |
42 v v
43 +---------+ +-----------+
44 | rawsock | <--------> | core |
45 +---------+ +-----------+
46 ^
47 |
48 v
49 +-----------+
50 | driver |
51 +-----------+
52
53Device Driver Interface
54-----------------------
55
56When registering on the NFC subsystem, the device driver must inform the core
57of the set of supported NFC protocols and the set of ops callbacks. The ops
58callbacks that must be implemented are the following:
59
60* start_poll - setup the device to poll for targets
61* stop_poll - stop on progress polling operation
62* activate_target - select and initialize one of the targets found
63* deactivate_target - deselect and deinitialize the selected target
64* data_exchange - send data and receive the response (transceive operation)
65
66Userspace interface
67--------------------
68
69The userspace interface is divided in control operations and low-level data
70exchange operation.
71
72CONTROL OPERATIONS:
73
74Generic netlink is used to implement the interface to the control operations.
75The operations are composed by commands and events, all listed below:
76
77* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
78* NFC_CMD_START_POLL - setup a specific device to polling for targets
79* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
80* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
81
82* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
83* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
84* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
85are found
86
87The user must call START_POLL to poll for NFC targets, passing the desired NFC
88protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
89state until it finds any target. However, the user can stop the polling
90operation by calling STOP_POLL command. In this case, it will be checked if
91the requester of STOP_POLL is the same of START_POLL.
92
93If the polling operation finds one or more targets, the event TARGETS_FOUND is
94sent (including the device id). The user must call GET_TARGET to get the list of
95all targets found by such device. Each reply message has target attributes with
96relevant information such as the supported NFC protocols.
97
98All polling operations requested through one netlink socket are stopped when
99it's closed.
100
101LOW-LEVEL DATA EXCHANGE:
102
103The userspace must use PF_NFC sockets to perform any data communication with
104targets. All NFC sockets use AF_NFC:
105
106struct sockaddr_nfc {
107 sa_family_t sa_family;
108 __u32 dev_idx;
109 __u32 target_idx;
110 __u32 nfc_protocol;
111};
112
113To establish a connection with one target, the user must create an
114NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
115struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
116netlink event. As a target can support more than one NFC protocol, the user
117must inform which protocol it wants to use.
118
119Internally, 'connect' will result in an activate_target call to the driver.
120When the socket is closed, the target is deactivated.
121
122The data format exchanged through the sockets is NFC protocol dependent. For
123instance, when communicating with MIFARE tags, the data exchanged are MIFARE
124commands and their responses.
125
126The first received package is the response to the first sent package and so
127on. In order to allow valid "empty" responses, every data received has a NULL
128header of 1 byte.
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 80a7a3454902..57a24108b845 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -7,7 +7,7 @@ This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
7(Synopsys IP blocks); it has been fully tested on STLinux platforms. 7(Synopsys IP blocks); it has been fully tested on STLinux platforms.
8 8
9Currently this network device driver is for all STM embedded MAC/GMAC 9Currently this network device driver is for all STM embedded MAC/GMAC
10(7xxx SoCs). Other platforms start using it i.e. ARM SPEAr. 10(i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr.
11 11
12DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 12DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100
13Universal version 4.0 have been used for developing the first code 13Universal version 4.0 have been used for developing the first code
@@ -71,7 +71,7 @@ Several performance tests on STM platforms showed this optimisation allows to sp
71the CPU while having the maximum throughput. 71the CPU while having the maximum throughput.
72 72
734.4) WOL 734.4) WOL
74Wake up on Lan feature through Magic Frame is only supported for the GMAC 74Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC
75core. 75core.
76 76
774.5) DMA descriptors 774.5) DMA descriptors
@@ -91,11 +91,15 @@ LRO is not supported.
91The driver is compatible with PAL to work with PHY and GPHY devices. 91The driver is compatible with PAL to work with PHY and GPHY devices.
92 92
934.9) Platform information 934.9) Platform information
94Several information came from the platform; please refer to the 94Several driver's information can be passed through the platform
95driver's Header file in include/linux directory. 95These are included in the include/linux/stmmac.h header file
96and detailed below as well:
96 97
97struct plat_stmmacenet_data { 98 struct plat_stmmacenet_data {
98 int bus_id; 99 int bus_id;
100 int phy_addr;
101 int interface;
102 struct stmmac_mdio_bus_data *mdio_bus_data;
99 int pbl; 103 int pbl;
100 int clk_csr; 104 int clk_csr;
101 int has_gmac; 105 int has_gmac;
@@ -103,67 +107,135 @@ struct plat_stmmacenet_data {
103 int tx_coe; 107 int tx_coe;
104 int bugged_jumbo; 108 int bugged_jumbo;
105 int pmt; 109 int pmt;
106 void (*fix_mac_speed)(void *priv, unsigned int speed); 110 int force_sf_dma_mode;
107 void (*bus_setup)(unsigned long ioaddr); 111 void (*fix_mac_speed)(void *priv, unsigned int speed);
108#ifdef CONFIG_STM_DRIVERS 112 void (*bus_setup)(void __iomem *ioaddr);
109 struct stm_pad_config *pad_config; 113 int (*init)(struct platform_device *pdev);
110#endif 114 void (*exit)(struct platform_device *pdev);
111 void *bsp_priv; 115 void *bsp_priv;
112}; 116 };
113 117
114Where: 118Where:
115- pbl (Programmable Burst Length) is maximum number of 119 o bus_id: bus identifier.
116 beats to be transferred in one DMA transaction. 120 o phy_addr: the physical address can be passed from the platform.
117 GMAC also enables the 4xPBL by default. 121 If it is set to -1 the driver will automatically
118- fix_mac_speed and bus_setup are used to configure internal target 122 detect it at run-time by probing all the 32 addresses.
119 registers (on STM platforms); 123 o interface: PHY device's interface.
120- has_gmac: GMAC core is on board (get it at run-time in the next step); 124 o mdio_bus_data: specific platform fields for the MDIO bus.
121- bus_id: bus identifier. 125 o pbl: the Programmable Burst Length is maximum number of beats to
122- tx_coe: core is able to perform the tx csum in HW. 126 be transferred in one DMA transaction.
123- enh_desc: if sets the MAC will use the enhanced descriptor structure. 127 GMAC also enables the 4xPBL by default.
124- clk_csr: CSR Clock range selection. 128 o clk_csr: CSR Clock range selection.
125- bugged_jumbo: some HWs are not able to perform the csum in HW for 129 o has_gmac: uses the GMAC core.
126 over-sized frames due to limited buffer sizes. Setting this 130 o enh_desc: if sets the MAC will use the enhanced descriptor structure.
127 flag the csum will be done in SW on JUMBO frames. 131 o tx_coe: core is able to perform the tx csum in HW.
128 132 o bugged_jumbo: some HWs are not able to perform the csum in HW for
129struct plat_stmmacphy_data { 133 over-sized frames due to limited buffer sizes.
130 int bus_id; 134 Setting this flag the csum will be done in SW on
131 int phy_addr; 135 JUMBO frames.
132 unsigned int phy_mask; 136 o pmt: core has the embedded power module (optional).
133 int interface; 137 o force_sf_dma_mode: force DMA to use the Store and Forward mode
134 int (*phy_reset)(void *priv); 138 instead of the Threshold.
135 void *priv; 139 o fix_mac_speed: this callback is used for modifying some syscfg registers
136}; 140 (on ST SoCs) according to the link speed negotiated by the
141 physical layer .
142 o bus_setup: perform HW setup of the bus. For example, on some ST platforms
143 this field is used to configure the AMBA bridge to generate more
144 efficient STBus traffic.
145 o init/exit: callbacks used for calling a custom initialisation;
146 this is sometime necessary on some platforms (e.g. ST boxes)
147 where the HW needs to have set some PIO lines or system cfg
148 registers.
149 o custom_cfg: this is a custom configuration that can be passed while
150 initialising the resources.
151
152The we have:
153
154 struct stmmac_mdio_bus_data {
155 int bus_id;
156 int (*phy_reset)(void *priv);
157 unsigned int phy_mask;
158 int *irqs;
159 int probed_phy_irq;
160 };
137 161
138Where: 162Where:
139- bus_id: bus identifier; 163 o bus_id: bus identifier;
140- phy_addr: physical address used for the attached phy device; 164 o phy_reset: hook to reset the phy device attached to the bus.
141 set it to -1 to get it at run-time; 165 o phy_mask: phy mask passed when register the MDIO bus within the driver.
142- interface: physical MII interface mode; 166 o irqs: list of IRQs, one per PHY.
143- phy_reset: hook to reset HW function. 167 o probed_phy_irq: if irqs is NULL, use this for probed PHY.
144 168
145SOURCES: 169Below an example how the structures above are using on ST platforms.
146- Kconfig 170
147- Makefile 171 static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
148- stmmac_main.c: main network device driver; 172 .pbl = 32,
149- stmmac_mdio.c: mdio functions; 173 .has_gmac = 0,
150- stmmac_ethtool.c: ethtool support; 174 .enh_desc = 0,
151- stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts 175 .fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
152 Only tested on ST40 platforms based. 176 |
153- stmmac.h: private driver structure; 177 |-> to write an internal syscfg
154- common.h: common definitions and VFTs; 178 | on this platform when the
155- descs.h: descriptor structure definitions; 179 | link speed changes from 10 to
156- dwmac1000_core.c: GMAC core functions; 180 | 100 and viceversa
157- dwmac1000_dma.c: dma functions for the GMAC chip; 181 .init = &stmmac_claim_resource,
158- dwmac1000.h: specific header file for the GMAC; 182 |
159- dwmac100_core: MAC 100 core and dma code; 183 |-> On ST SoC this calls own "PAD"
160- dwmac100_dma.c: dma funtions for the MAC chip; 184 | manager framework to claim
161- dwmac1000.h: specific header file for the MAC; 185 | all the resources necessary
162- dwmac_lib.c: generic DMA functions shared among chips 186 | (GPIO ...). The .custom_cfg field
163- enh_desc.c: functions for handling enhanced descriptors 187 | is used to pass a custom config.
164- norm_desc.c: functions for handling normal descriptors 188};
165 189
166TODO: 190Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
167- XGMAC controller is not supported. 191there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
168- Review the timer optimisation code to use an embedded device that seems to be 192with fixed_link support.
193
194static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
195 .bus_id = 1,
196 |
197 |-> phy device on the bus_id 1
198 .phy_reset = phy_reset;
199 |
200 |-> function to provide the phy_reset on this board
201 .phy_mask = 0,
202};
203
204static struct fixed_phy_status stmmac0_fixed_phy_status = {
205 .link = 1,
206 .speed = 100,
207 .duplex = 1,
208};
209
210During the board's device_init we can configure the first
211MAC for fixed_link by calling:
212 fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));)
213and the second one, with a real PHY device attached to the bus,
214by using the stmmac_mdio_bus_data structure (to provide the id, the
215reset procedure etc).
216
2174.10) List of source files:
218 o Kconfig
219 o Makefile
220 o stmmac_main.c: main network device driver;
221 o stmmac_mdio.c: mdio functions;
222 o stmmac_ethtool.c: ethtool support;
223 o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
224 Only tested on ST40 platforms based.
225 o stmmac.h: private driver structure;
226 o common.h: common definitions and VFTs;
227 o descs.h: descriptor structure definitions;
228 o dwmac1000_core.c: GMAC core functions;
229 o dwmac1000_dma.c: dma functions for the GMAC chip;
230 o dwmac1000.h: specific header file for the GMAC;
231 o dwmac100_core: MAC 100 core and dma code;
232 o dwmac100_dma.c: dma funtions for the MAC chip;
233 o dwmac1000.h: specific header file for the MAC;
234 o dwmac_lib.c: generic DMA functions shared among chips
235 o enh_desc.c: functions for handling enhanced descriptors
236 o norm_desc.c: functions for handling normal descriptors
237
2385) TODO:
239 o XGMAC is not supported.
240 o Review the timer optimisation code to use an embedded device that will be
169 available in new chip generations. 241 available in new chip generations.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 64565aac6e40..3384d5996be2 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -506,8 +506,8 @@ routines. Nevertheless, different callback pointers are used in case there is a
506situation where it actually matters. 506situation where it actually matters.
507 507
508 508
509Device Power Domains 509Device Power Management Domains
510-------------------- 510-------------------------------
511Sometimes devices share reference clocks or other power resources. In those 511Sometimes devices share reference clocks or other power resources. In those
512cases it generally is not possible to put devices into low-power states 512cases it generally is not possible to put devices into low-power states
513individually. Instead, a set of devices sharing a power resource can be put 513individually. Instead, a set of devices sharing a power resource can be put
@@ -516,8 +516,8 @@ power resource. Of course, they also need to be put into the full-power state
516together, by turning the shared power resource on. A set of devices with this 516together, by turning the shared power resource on. A set of devices with this
517property is often referred to as a power domain. 517property is often referred to as a power domain.
518 518
519Support for power domains is provided through the pwr_domain field of struct 519Support for power domains is provided through the pm_domain field of struct
520device. This field is a pointer to an object of type struct dev_power_domain, 520device. This field is a pointer to an object of type struct dev_pm_domain,
521defined in include/linux/pm.h, providing a set of power management callbacks 521defined in include/linux/pm.h, providing a set of power management callbacks
522analogous to the subsystem-level and device driver callbacks that are executed 522analogous to the subsystem-level and device driver callbacks that are executed
523for the given device during all power transitions, instead of the respective 523for the given device during all power transitions, instead of the respective
@@ -604,7 +604,7 @@ state temporarily, for example so that its system wakeup capability can be
604disabled. This all depends on the hardware and the design of the subsystem and 604disabled. This all depends on the hardware and the design of the subsystem and
605device driver in question. 605device driver in question.
606 606
607During system-wide resume from a sleep state it's best to put devices into the 607During system-wide resume from a sleep state it's easiest to put devices into
608full-power state, as explained in Documentation/power/runtime_pm.txt. Refer to 608the full-power state, as explained in Documentation/power/runtime_pm.txt. Refer
609that document for more information regarding this particular issue as well as 609to that document for more information regarding this particular issue as well as
610for information on the device runtime power management framework in general. 610for information on the device runtime power management framework in general.
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
index 5ae70a12c1e2..3035d00757ad 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@@ -321,6 +321,8 @@ opp_init_cpufreq_table - cpufreq framework typically is initialized with
321 addition to CONFIG_PM as power management feature is required to 321 addition to CONFIG_PM as power management feature is required to
322 dynamically scale voltage and frequency in a system. 322 dynamically scale voltage and frequency in a system.
323 323
324opp_free_cpufreq_table - Free up the table allocated by opp_init_cpufreq_table
325
3247. Data Structures 3267. Data Structures
325================== 327==================
326Typically an SoC contains multiple voltage domains which are variable. Each 328Typically an SoC contains multiple voltage domains which are variable. Each
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index b24875b1ced5..14dd3c6ad97e 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -1,39 +1,39 @@
1Run-time Power Management Framework for I/O Devices 1Runtime Power Management Framework for I/O Devices
2 2
3(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. 3(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4(C) 2010 Alan Stern <stern@rowland.harvard.edu> 4(C) 2010 Alan Stern <stern@rowland.harvard.edu>
5 5
61. Introduction 61. Introduction
7 7
8Support for run-time power management (run-time PM) of I/O devices is provided 8Support for runtime power management (runtime PM) of I/O devices is provided
9at the power management core (PM core) level by means of: 9at the power management core (PM core) level by means of:
10 10
11* The power management workqueue pm_wq in which bus types and device drivers can 11* The power management workqueue pm_wq in which bus types and device drivers can
12 put their PM-related work items. It is strongly recommended that pm_wq be 12 put their PM-related work items. It is strongly recommended that pm_wq be
13 used for queuing all work items related to run-time PM, because this allows 13 used for queuing all work items related to runtime PM, because this allows
14 them to be synchronized with system-wide power transitions (suspend to RAM, 14 them to be synchronized with system-wide power transitions (suspend to RAM,
15 hibernation and resume from system sleep states). pm_wq is declared in 15 hibernation and resume from system sleep states). pm_wq is declared in
16 include/linux/pm_runtime.h and defined in kernel/power/main.c. 16 include/linux/pm_runtime.h and defined in kernel/power/main.c.
17 17
18* A number of run-time PM fields in the 'power' member of 'struct device' (which 18* A number of runtime PM fields in the 'power' member of 'struct device' (which
19 is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can 19 is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
20 be used for synchronizing run-time PM operations with one another. 20 be used for synchronizing runtime PM operations with one another.
21 21
22* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in 22* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in
23 include/linux/pm.h). 23 include/linux/pm.h).
24 24
25* A set of helper functions defined in drivers/base/power/runtime.c that can be 25* A set of helper functions defined in drivers/base/power/runtime.c that can be
26 used for carrying out run-time PM operations in such a way that the 26 used for carrying out runtime PM operations in such a way that the
27 synchronization between them is taken care of by the PM core. Bus types and 27 synchronization between them is taken care of by the PM core. Bus types and
28 device drivers are encouraged to use these functions. 28 device drivers are encouraged to use these functions.
29 29
30The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM 30The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM
31fields of 'struct dev_pm_info' and the core helper functions provided for 31fields of 'struct dev_pm_info' and the core helper functions provided for
32run-time PM are described below. 32runtime PM are described below.
33 33
342. Device Run-time PM Callbacks 342. Device Runtime PM Callbacks
35 35
36There are three device run-time PM callbacks defined in 'struct dev_pm_ops': 36There are three device runtime PM callbacks defined in 'struct dev_pm_ops':
37 37
38struct dev_pm_ops { 38struct dev_pm_ops {
39 ... 39 ...
@@ -72,11 +72,11 @@ knows what to do to handle the device).
72 not mean that the device has been put into a low power state. It is 72 not mean that the device has been put into a low power state. It is
73 supposed to mean, however, that the device will not process data and will 73 supposed to mean, however, that the device will not process data and will
74 not communicate with the CPU(s) and RAM until the subsystem-level resume 74 not communicate with the CPU(s) and RAM until the subsystem-level resume
75 callback is executed for it. The run-time PM status of a device after 75 callback is executed for it. The runtime PM status of a device after
76 successful execution of the subsystem-level suspend callback is 'suspended'. 76 successful execution of the subsystem-level suspend callback is 'suspended'.
77 77
78 * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN, 78 * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN,
79 the device's run-time PM status is 'active', which means that the device 79 the device's runtime PM status is 'active', which means that the device
80 _must_ be fully operational afterwards. 80 _must_ be fully operational afterwards.
81 81
82 * If the subsystem-level suspend callback returns an error code different 82 * If the subsystem-level suspend callback returns an error code different
@@ -104,7 +104,7 @@ the device).
104 104
105 * Once the subsystem-level resume callback has completed successfully, the PM 105 * Once the subsystem-level resume callback has completed successfully, the PM
106 core regards the device as fully operational, which means that the device 106 core regards the device as fully operational, which means that the device
107 _must_ be able to complete I/O operations as needed. The run-time PM status 107 _must_ be able to complete I/O operations as needed. The runtime PM status
108 of the device is then 'active'. 108 of the device is then 'active'.
109 109
110 * If the subsystem-level resume callback returns an error code, the PM core 110 * If the subsystem-level resume callback returns an error code, the PM core
@@ -130,7 +130,7 @@ device in that case. The value returned by this callback is ignored by the PM
130core. 130core.
131 131
132The helper functions provided by the PM core, described in Section 4, guarantee 132The helper functions provided by the PM core, described in Section 4, guarantee
133that the following constraints are met with respect to the bus type's run-time 133that the following constraints are met with respect to the bus type's runtime
134PM callbacks: 134PM callbacks:
135 135
136(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute 136(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
@@ -142,7 +142,7 @@ PM callbacks:
142 142
143(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' 143(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
144 devices (i.e. the PM core will only execute ->runtime_idle() or 144 devices (i.e. the PM core will only execute ->runtime_idle() or
145 ->runtime_suspend() for the devices the run-time PM status of which is 145 ->runtime_suspend() for the devices the runtime PM status of which is
146 'active'). 146 'active').
147 147
148(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device 148(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
@@ -151,7 +151,7 @@ PM callbacks:
151 flag of which is set. 151 flag of which is set.
152 152
153(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the 153(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the
154 PM core will only execute ->runtime_resume() for the devices the run-time 154 PM core will only execute ->runtime_resume() for the devices the runtime
155 PM status of which is 'suspended'). 155 PM status of which is 'suspended').
156 156
157Additionally, the helper functions provided by the PM core obey the following 157Additionally, the helper functions provided by the PM core obey the following
@@ -171,9 +171,9 @@ rules:
171 scheduled requests to execute the other callbacks for the same device, 171 scheduled requests to execute the other callbacks for the same device,
172 except for scheduled autosuspends. 172 except for scheduled autosuspends.
173 173
1743. Run-time PM Device Fields 1743. Runtime PM Device Fields
175 175
176The following device run-time PM fields are present in 'struct dev_pm_info', as 176The following device runtime PM fields are present in 'struct dev_pm_info', as
177defined in include/linux/pm.h: 177defined in include/linux/pm.h:
178 178
179 struct timer_list suspend_timer; 179 struct timer_list suspend_timer;
@@ -205,7 +205,7 @@ defined in include/linux/pm.h:
205 205
206 unsigned int disable_depth; 206 unsigned int disable_depth;
207 - used for disabling the helper funcions (they work normally if this is 207 - used for disabling the helper funcions (they work normally if this is
208 equal to zero); the initial value of it is 1 (i.e. run-time PM is 208 equal to zero); the initial value of it is 1 (i.e. runtime PM is
209 initially disabled for all devices) 209 initially disabled for all devices)
210 210
211 unsigned int runtime_error; 211 unsigned int runtime_error;
@@ -229,10 +229,10 @@ defined in include/linux/pm.h:
229 suspend to complete; means "start a resume as soon as you've suspended" 229 suspend to complete; means "start a resume as soon as you've suspended"
230 230
231 unsigned int run_wake; 231 unsigned int run_wake;
232 - set if the device is capable of generating run-time wake-up events 232 - set if the device is capable of generating runtime wake-up events
233 233
234 enum rpm_status runtime_status; 234 enum rpm_status runtime_status;
235 - the run-time PM status of the device; this field's initial value is 235 - the runtime PM status of the device; this field's initial value is
236 RPM_SUSPENDED, which means that each device is initially regarded by the 236 RPM_SUSPENDED, which means that each device is initially regarded by the
237 PM core as 'suspended', regardless of its real hardware status 237 PM core as 'suspended', regardless of its real hardware status
238 238
@@ -243,7 +243,7 @@ defined in include/linux/pm.h:
243 and pm_runtime_forbid() helper functions 243 and pm_runtime_forbid() helper functions
244 244
245 unsigned int no_callbacks; 245 unsigned int no_callbacks;
246 - indicates that the device does not use the run-time PM callbacks (see 246 - indicates that the device does not use the runtime PM callbacks (see
247 Section 8); it may be modified only by the pm_runtime_no_callbacks() 247 Section 8); it may be modified only by the pm_runtime_no_callbacks()
248 helper function 248 helper function
249 249
@@ -270,16 +270,16 @@ defined in include/linux/pm.h:
270 270
271All of the above fields are members of the 'power' member of 'struct device'. 271All of the above fields are members of the 'power' member of 'struct device'.
272 272
2734. Run-time PM Device Helper Functions 2734. Runtime PM Device Helper Functions
274 274
275The following run-time PM helper functions are defined in 275The following runtime PM helper functions are defined in
276drivers/base/power/runtime.c and include/linux/pm_runtime.h: 276drivers/base/power/runtime.c and include/linux/pm_runtime.h:
277 277
278 void pm_runtime_init(struct device *dev); 278 void pm_runtime_init(struct device *dev);
279 - initialize the device run-time PM fields in 'struct dev_pm_info' 279 - initialize the device runtime PM fields in 'struct dev_pm_info'
280 280
281 void pm_runtime_remove(struct device *dev); 281 void pm_runtime_remove(struct device *dev);
282 - make sure that the run-time PM of the device will be disabled after 282 - make sure that the runtime PM of the device will be disabled after
283 removing the device from device hierarchy 283 removing the device from device hierarchy
284 284
285 int pm_runtime_idle(struct device *dev); 285 int pm_runtime_idle(struct device *dev);
@@ -289,9 +289,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
289 289
290 int pm_runtime_suspend(struct device *dev); 290 int pm_runtime_suspend(struct device *dev);
291 - execute the subsystem-level suspend callback for the device; returns 0 on 291 - execute the subsystem-level suspend callback for the device; returns 0 on
292 success, 1 if the device's run-time PM status was already 'suspended', or 292 success, 1 if the device's runtime PM status was already 'suspended', or
293 error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt 293 error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
294 to suspend the device again in future 294 to suspend the device again in future and -EACCES means that
295 'power.disable_depth' is different from 0
295 296
296 int pm_runtime_autosuspend(struct device *dev); 297 int pm_runtime_autosuspend(struct device *dev);
297 - same as pm_runtime_suspend() except that the autosuspend delay is taken 298 - same as pm_runtime_suspend() except that the autosuspend delay is taken
@@ -301,10 +302,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
301 302
302 int pm_runtime_resume(struct device *dev); 303 int pm_runtime_resume(struct device *dev);
303 - execute the subsystem-level resume callback for the device; returns 0 on 304 - execute the subsystem-level resume callback for the device; returns 0 on
304 success, 1 if the device's run-time PM status was already 'active' or 305 success, 1 if the device's runtime PM status was already 'active' or
305 error code on failure, where -EAGAIN means it may be safe to attempt to 306 error code on failure, where -EAGAIN means it may be safe to attempt to
306 resume the device again in future, but 'power.runtime_error' should be 307 resume the device again in future, but 'power.runtime_error' should be
307 checked additionally 308 checked additionally, and -EACCES means that 'power.disable_depth' is
309 different from 0
308 310
309 int pm_request_idle(struct device *dev); 311 int pm_request_idle(struct device *dev);
310 - submit a request to execute the subsystem-level idle callback for the 312 - submit a request to execute the subsystem-level idle callback for the
@@ -321,7 +323,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
321 device in future, where 'delay' is the time to wait before queuing up a 323 device in future, where 'delay' is the time to wait before queuing up a
322 suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work 324 suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
323 item is queued up immediately); returns 0 on success, 1 if the device's PM 325 item is queued up immediately); returns 0 on success, 1 if the device's PM
324 run-time status was already 'suspended', or error code if the request 326 runtime status was already 'suspended', or error code if the request
325 hasn't been scheduled (or queued up if 'delay' is 0); if the execution of 327 hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
326 ->runtime_suspend() is already scheduled and not yet expired, the new 328 ->runtime_suspend() is already scheduled and not yet expired, the new
327 value of 'delay' will be used as the time to wait 329 value of 'delay' will be used as the time to wait
@@ -329,7 +331,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
329 int pm_request_resume(struct device *dev); 331 int pm_request_resume(struct device *dev);
330 - submit a request to execute the subsystem-level resume callback for the 332 - submit a request to execute the subsystem-level resume callback for the
331 device (the request is represented by a work item in pm_wq); returns 0 on 333 device (the request is represented by a work item in pm_wq); returns 0 on
332 success, 1 if the device's run-time PM status was already 'active', or 334 success, 1 if the device's runtime PM status was already 'active', or
333 error code if the request hasn't been queued up 335 error code if the request hasn't been queued up
334 336
335 void pm_runtime_get_noresume(struct device *dev); 337 void pm_runtime_get_noresume(struct device *dev);
@@ -367,22 +369,32 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
367 pm_runtime_autosuspend(dev) and return its result 369 pm_runtime_autosuspend(dev) and return its result
368 370
369 void pm_runtime_enable(struct device *dev); 371 void pm_runtime_enable(struct device *dev);
370 - enable the run-time PM helper functions to run the device bus type's 372 - decrement the device's 'power.disable_depth' field; if that field is equal
371 run-time PM callbacks described in Section 2 373 to zero, the runtime PM helper functions can execute subsystem-level
374 callbacks described in Section 2 for the device
372 375
373 int pm_runtime_disable(struct device *dev); 376 int pm_runtime_disable(struct device *dev);
374 - prevent the run-time PM helper functions from running subsystem-level 377 - increment the device's 'power.disable_depth' field (if the value of that
375 run-time PM callbacks for the device, make sure that all of the pending 378 field was previously zero, this prevents subsystem-level runtime PM
376 run-time PM operations on the device are either completed or canceled; 379 callbacks from being run for the device), make sure that all of the pending
380 runtime PM operations on the device are either completed or canceled;
377 returns 1 if there was a resume request pending and it was necessary to 381 returns 1 if there was a resume request pending and it was necessary to
378 execute the subsystem-level resume callback for the device to satisfy that 382 execute the subsystem-level resume callback for the device to satisfy that
379 request, otherwise 0 is returned 383 request, otherwise 0 is returned
380 384
385 int pm_runtime_barrier(struct device *dev);
386 - check if there's a resume request pending for the device and resume it
387 (synchronously) in that case, cancel any other pending runtime PM requests
388 regarding it and wait for all runtime PM operations on it in progress to
389 complete; returns 1 if there was a resume request pending and it was
390 necessary to execute the subsystem-level resume callback for the device to
391 satisfy that request, otherwise 0 is returned
392
381 void pm_suspend_ignore_children(struct device *dev, bool enable); 393 void pm_suspend_ignore_children(struct device *dev, bool enable);
382 - set/unset the power.ignore_children flag of the device 394 - set/unset the power.ignore_children flag of the device
383 395
384 int pm_runtime_set_active(struct device *dev); 396 int pm_runtime_set_active(struct device *dev);
385 - clear the device's 'power.runtime_error' flag, set the device's run-time 397 - clear the device's 'power.runtime_error' flag, set the device's runtime
386 PM status to 'active' and update its parent's counter of 'active' 398 PM status to 'active' and update its parent's counter of 'active'
387 children as appropriate (it is only valid to use this function if 399 children as appropriate (it is only valid to use this function if
388 'power.runtime_error' is set or 'power.disable_depth' is greater than 400 'power.runtime_error' is set or 'power.disable_depth' is greater than
@@ -390,7 +402,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
390 which is not active and the 'power.ignore_children' flag of which is unset 402 which is not active and the 'power.ignore_children' flag of which is unset
391 403
392 void pm_runtime_set_suspended(struct device *dev); 404 void pm_runtime_set_suspended(struct device *dev);
393 - clear the device's 'power.runtime_error' flag, set the device's run-time 405 - clear the device's 'power.runtime_error' flag, set the device's runtime
394 PM status to 'suspended' and update its parent's counter of 'active' 406 PM status to 'suspended' and update its parent's counter of 'active'
395 children as appropriate (it is only valid to use this function if 407 children as appropriate (it is only valid to use this function if
396 'power.runtime_error' is set or 'power.disable_depth' is greater than 408 'power.runtime_error' is set or 'power.disable_depth' is greater than
@@ -400,6 +412,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
400 - return true if the device's runtime PM status is 'suspended' and its 412 - return true if the device's runtime PM status is 'suspended' and its
401 'power.disable_depth' field is equal to zero, or false otherwise 413 'power.disable_depth' field is equal to zero, or false otherwise
402 414
415 bool pm_runtime_status_suspended(struct device *dev);
416 - return true if the device's runtime PM status is 'suspended'
417
403 void pm_runtime_allow(struct device *dev); 418 void pm_runtime_allow(struct device *dev);
404 - set the power.runtime_auto flag for the device and decrease its usage 419 - set the power.runtime_auto flag for the device and decrease its usage
405 counter (used by the /sys/devices/.../power/control interface to 420 counter (used by the /sys/devices/.../power/control interface to
@@ -411,7 +426,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
411 effectively prevent the device from being power managed at run time) 426 effectively prevent the device from being power managed at run time)
412 427
413 void pm_runtime_no_callbacks(struct device *dev); 428 void pm_runtime_no_callbacks(struct device *dev);
414 - set the power.no_callbacks flag for the device and remove the run-time 429 - set the power.no_callbacks flag for the device and remove the runtime
415 PM attributes from /sys/devices/.../power (or prevent them from being 430 PM attributes from /sys/devices/.../power (or prevent them from being
416 added when the device is registered) 431 added when the device is registered)
417 432
@@ -431,7 +446,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
431 446
432 void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); 447 void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
433 - set the power.autosuspend_delay value to 'delay' (expressed in 448 - set the power.autosuspend_delay value to 'delay' (expressed in
434 milliseconds); if 'delay' is negative then run-time suspends are 449 milliseconds); if 'delay' is negative then runtime suspends are
435 prevented 450 prevented
436 451
437 unsigned long pm_runtime_autosuspend_expiration(struct device *dev); 452 unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
@@ -470,35 +485,35 @@ pm_runtime_resume()
470pm_runtime_get_sync() 485pm_runtime_get_sync()
471pm_runtime_put_sync_suspend() 486pm_runtime_put_sync_suspend()
472 487
4735. Run-time PM Initialization, Device Probing and Removal 4885. Runtime PM Initialization, Device Probing and Removal
474 489
475Initially, the run-time PM is disabled for all devices, which means that the 490Initially, the runtime PM is disabled for all devices, which means that the
476majority of the run-time PM helper funtions described in Section 4 will return 491majority of the runtime PM helper funtions described in Section 4 will return
477-EAGAIN until pm_runtime_enable() is called for the device. 492-EAGAIN until pm_runtime_enable() is called for the device.
478 493
479In addition to that, the initial run-time PM status of all devices is 494In addition to that, the initial runtime PM status of all devices is
480'suspended', but it need not reflect the actual physical state of the device. 495'suspended', but it need not reflect the actual physical state of the device.
481Thus, if the device is initially active (i.e. it is able to process I/O), its 496Thus, if the device is initially active (i.e. it is able to process I/O), its
482run-time PM status must be changed to 'active', with the help of 497runtime PM status must be changed to 'active', with the help of
483pm_runtime_set_active(), before pm_runtime_enable() is called for the device. 498pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
484 499
485However, if the device has a parent and the parent's run-time PM is enabled, 500However, if the device has a parent and the parent's runtime PM is enabled,
486calling pm_runtime_set_active() for the device will affect the parent, unless 501calling pm_runtime_set_active() for the device will affect the parent, unless
487the parent's 'power.ignore_children' flag is set. Namely, in that case the 502the parent's 'power.ignore_children' flag is set. Namely, in that case the
488parent won't be able to suspend at run time, using the PM core's helper 503parent won't be able to suspend at run time, using the PM core's helper
489functions, as long as the child's status is 'active', even if the child's 504functions, as long as the child's status is 'active', even if the child's
490run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for 505runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
491the child yet or pm_runtime_disable() has been called for it). For this reason, 506the child yet or pm_runtime_disable() has been called for it). For this reason,
492once pm_runtime_set_active() has been called for the device, pm_runtime_enable() 507once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
493should be called for it too as soon as reasonably possible or its run-time PM 508should be called for it too as soon as reasonably possible or its runtime PM
494status should be changed back to 'suspended' with the help of 509status should be changed back to 'suspended' with the help of
495pm_runtime_set_suspended(). 510pm_runtime_set_suspended().
496 511
497If the default initial run-time PM status of the device (i.e. 'suspended') 512If the default initial runtime PM status of the device (i.e. 'suspended')
498reflects the actual state of the device, its bus type's or its driver's 513reflects the actual state of the device, its bus type's or its driver's
499->probe() callback will likely need to wake it up using one of the PM core's 514->probe() callback will likely need to wake it up using one of the PM core's
500helper functions described in Section 4. In that case, pm_runtime_resume() 515helper functions described in Section 4. In that case, pm_runtime_resume()
501should be used. Of course, for this purpose the device's run-time PM has to be 516should be used. Of course, for this purpose the device's runtime PM has to be
502enabled earlier by calling pm_runtime_enable(). 517enabled earlier by calling pm_runtime_enable().
503 518
504If the device bus type's or driver's ->probe() callback runs 519If the device bus type's or driver's ->probe() callback runs
@@ -529,33 +544,33 @@ The user space can effectively disallow the driver of the device to power manage
529it at run time by changing the value of its /sys/devices/.../power/control 544it at run time by changing the value of its /sys/devices/.../power/control
530attribute to "on", which causes pm_runtime_forbid() to be called. In principle, 545attribute to "on", which causes pm_runtime_forbid() to be called. In principle,
531this mechanism may also be used by the driver to effectively turn off the 546this mechanism may also be used by the driver to effectively turn off the
532run-time power management of the device until the user space turns it on. 547runtime power management of the device until the user space turns it on.
533Namely, during the initialization the driver can make sure that the run-time PM 548Namely, during the initialization the driver can make sure that the runtime PM
534status of the device is 'active' and call pm_runtime_forbid(). It should be 549status of the device is 'active' and call pm_runtime_forbid(). It should be
535noted, however, that if the user space has already intentionally changed the 550noted, however, that if the user space has already intentionally changed the
536value of /sys/devices/.../power/control to "auto" to allow the driver to power 551value of /sys/devices/.../power/control to "auto" to allow the driver to power
537manage the device at run time, the driver may confuse it by using 552manage the device at run time, the driver may confuse it by using
538pm_runtime_forbid() this way. 553pm_runtime_forbid() this way.
539 554
5406. Run-time PM and System Sleep 5556. Runtime PM and System Sleep
541 556
542Run-time PM and system sleep (i.e., system suspend and hibernation, also known 557Runtime PM and system sleep (i.e., system suspend and hibernation, also known
543as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of 558as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
544ways. If a device is active when a system sleep starts, everything is 559ways. If a device is active when a system sleep starts, everything is
545straightforward. But what should happen if the device is already suspended? 560straightforward. But what should happen if the device is already suspended?
546 561
547The device may have different wake-up settings for run-time PM and system sleep. 562The device may have different wake-up settings for runtime PM and system sleep.
548For example, remote wake-up may be enabled for run-time suspend but disallowed 563For example, remote wake-up may be enabled for runtime suspend but disallowed
549for system sleep (device_may_wakeup(dev) returns 'false'). When this happens, 564for system sleep (device_may_wakeup(dev) returns 'false'). When this happens,
550the subsystem-level system suspend callback is responsible for changing the 565the subsystem-level system suspend callback is responsible for changing the
551device's wake-up setting (it may leave that to the device driver's system 566device's wake-up setting (it may leave that to the device driver's system
552suspend routine). It may be necessary to resume the device and suspend it again 567suspend routine). It may be necessary to resume the device and suspend it again
553in order to do so. The same is true if the driver uses different power levels 568in order to do so. The same is true if the driver uses different power levels
554or other settings for run-time suspend and system sleep. 569or other settings for runtime suspend and system sleep.
555 570
556During system resume, devices generally should be brought back to full power, 571During system resume, the simplest approach is to bring all devices back to full
557even if they were suspended before the system sleep began. There are several 572power, even if they had been suspended before the system suspend began. There
558reasons for this, including: 573are several reasons for this, including:
559 574
560 * The device might need to switch power levels, wake-up settings, etc. 575 * The device might need to switch power levels, wake-up settings, etc.
561 576
@@ -570,18 +585,50 @@ reasons for this, including:
570 * The device might need to be reset. 585 * The device might need to be reset.
571 586
572 * Even though the device was suspended, if its usage counter was > 0 then most 587 * Even though the device was suspended, if its usage counter was > 0 then most
573 likely it would need a run-time resume in the near future anyway. 588 likely it would need a runtime resume in the near future anyway.
574
575 * Always going back to full power is simplest.
576 589
577If the device was suspended before the sleep began, then its run-time PM status 590If the device had been suspended before the system suspend began and it's
578will have to be updated to reflect the actual post-system sleep status. The way 591brought back to full power during resume, then its runtime PM status will have
579to do this is: 592to be updated to reflect the actual post-system sleep status. The way to do
593this is:
580 594
581 pm_runtime_disable(dev); 595 pm_runtime_disable(dev);
582 pm_runtime_set_active(dev); 596 pm_runtime_set_active(dev);
583 pm_runtime_enable(dev); 597 pm_runtime_enable(dev);
584 598
599The PM core always increments the runtime usage counter before calling the
600->suspend() callback and decrements it after calling the ->resume() callback.
601Hence disabling runtime PM temporarily like this will not cause any runtime
602suspend attempts to be permanently lost. If the usage count goes to zero
603following the return of the ->resume() callback, the ->runtime_idle() callback
604will be invoked as usual.
605
606On some systems, however, system sleep is not entered through a global firmware
607or hardware operation. Instead, all hardware components are put into low-power
608states directly by the kernel in a coordinated way. Then, the system sleep
609state effectively follows from the states the hardware components end up in
610and the system is woken up from that state by a hardware interrupt or a similar
611mechanism entirely under the kernel's control. As a result, the kernel never
612gives control away and the states of all devices during resume are precisely
613known to it. If that is the case and none of the situations listed above takes
614place (in particular, if the system is not waking up from hibernation), it may
615be more efficient to leave the devices that had been suspended before the system
616suspend began in the suspended state.
617
618The PM core does its best to reduce the probability of race conditions between
619the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
620out the following operations:
621
622 * During system suspend it calls pm_runtime_get_noresume() and
623 pm_runtime_barrier() for every device right before executing the
624 subsystem-level .suspend() callback for it. In addition to that it calls
625 pm_runtime_disable() for every device right after executing the
626 subsystem-level .suspend() callback for it.
627
628 * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
629 for every device right before and right after executing the subsystem-level
630 .resume() callback for it, respectively.
631
5857. Generic subsystem callbacks 6327. Generic subsystem callbacks
586 633
587Subsystems may wish to conserve code space by using the set of generic power 634Subsystems may wish to conserve code space by using the set of generic power
@@ -606,40 +653,68 @@ driver/base/power/generic_ops.c:
606 callback provided by its driver and return its result, or return 0 if not 653 callback provided by its driver and return its result, or return 0 if not
607 defined 654 defined
608 655
656 int pm_generic_suspend_noirq(struct device *dev);
657 - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
658 callback provided by the device's driver and return its result, or return
659 0 if not defined
660
609 int pm_generic_resume(struct device *dev); 661 int pm_generic_resume(struct device *dev);
610 - invoke the ->resume() callback provided by the driver of this device and, 662 - invoke the ->resume() callback provided by the driver of this device and,
611 if successful, change the device's runtime PM status to 'active' 663 if successful, change the device's runtime PM status to 'active'
612 664
665 int pm_generic_resume_noirq(struct device *dev);
666 - invoke the ->resume_noirq() callback provided by the driver of this device
667
613 int pm_generic_freeze(struct device *dev); 668 int pm_generic_freeze(struct device *dev);
614 - if the device has not been suspended at run time, invoke the ->freeze() 669 - if the device has not been suspended at run time, invoke the ->freeze()
615 callback provided by its driver and return its result, or return 0 if not 670 callback provided by its driver and return its result, or return 0 if not
616 defined 671 defined
617 672
673 int pm_generic_freeze_noirq(struct device *dev);
674 - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
675 callback provided by the device's driver and return its result, or return
676 0 if not defined
677
618 int pm_generic_thaw(struct device *dev); 678 int pm_generic_thaw(struct device *dev);
619 - if the device has not been suspended at run time, invoke the ->thaw() 679 - if the device has not been suspended at run time, invoke the ->thaw()
620 callback provided by its driver and return its result, or return 0 if not 680 callback provided by its driver and return its result, or return 0 if not
621 defined 681 defined
622 682
683 int pm_generic_thaw_noirq(struct device *dev);
684 - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
685 callback provided by the device's driver and return its result, or return
686 0 if not defined
687
623 int pm_generic_poweroff(struct device *dev); 688 int pm_generic_poweroff(struct device *dev);
624 - if the device has not been suspended at run time, invoke the ->poweroff() 689 - if the device has not been suspended at run time, invoke the ->poweroff()
625 callback provided by its driver and return its result, or return 0 if not 690 callback provided by its driver and return its result, or return 0 if not
626 defined 691 defined
627 692
693 int pm_generic_poweroff_noirq(struct device *dev);
694 - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
695 callback provided by the device's driver and return its result, or return
696 0 if not defined
697
628 int pm_generic_restore(struct device *dev); 698 int pm_generic_restore(struct device *dev);
629 - invoke the ->restore() callback provided by the driver of this device and, 699 - invoke the ->restore() callback provided by the driver of this device and,
630 if successful, change the device's runtime PM status to 'active' 700 if successful, change the device's runtime PM status to 'active'
631 701
702 int pm_generic_restore_noirq(struct device *dev);
703 - invoke the ->restore_noirq() callback provided by the device's driver
704
632These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(), 705These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(),
633->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(), 706->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(),
634or ->restore() callback pointers in the subsystem-level dev_pm_ops structures. 707->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(),
708->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() callback
709pointers in the subsystem-level dev_pm_ops structures.
635 710
636If a subsystem wishes to use all of them at the same time, it can simply assign 711If a subsystem wishes to use all of them at the same time, it can simply assign
637the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its 712the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its
638dev_pm_ops structure pointer. 713dev_pm_ops structure pointer.
639 714
640Device drivers that wish to use the same function as a system suspend, freeze, 715Device drivers that wish to use the same function as a system suspend, freeze,
641poweroff and run-time suspend callback, and similarly for system resume, thaw, 716poweroff and runtime suspend callback, and similarly for system resume, thaw,
642restore, and run-time resume, can achieve this with the help of the 717restore, and runtime resume, can achieve this with the help of the
643UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its 718UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
644last argument to NULL). 719last argument to NULL).
645 720
@@ -649,7 +724,7 @@ Some "devices" are only logical sub-devices of their parent and cannot be
649power-managed on their own. (The prototype example is a USB interface. Entire 724power-managed on their own. (The prototype example is a USB interface. Entire
650USB devices can go into low-power mode or send wake-up requests, but neither is 725USB devices can go into low-power mode or send wake-up requests, but neither is
651possible for individual interfaces.) The drivers for these devices have no 726possible for individual interfaces.) The drivers for these devices have no
652need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend() 727need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend()
653and ->runtime_resume() would always return 0 without doing anything else and 728and ->runtime_resume() would always return 0 without doing anything else and
654->runtime_idle() would always call pm_runtime_suspend(). 729->runtime_idle() would always call pm_runtime_suspend().
655 730
@@ -657,7 +732,7 @@ Subsystems can tell the PM core about these devices by calling
657pm_runtime_no_callbacks(). This should be done after the device structure is 732pm_runtime_no_callbacks(). This should be done after the device structure is
658initialized and before it is registered (although after device registration is 733initialized and before it is registered (although after device registration is
659also okay). The routine will set the device's power.no_callbacks flag and 734also okay). The routine will set the device's power.no_callbacks flag and
660prevent the non-debugging run-time PM sysfs attributes from being created. 735prevent the non-debugging runtime PM sysfs attributes from being created.
661 736
662When power.no_callbacks is set, the PM core will not invoke the 737When power.no_callbacks is set, the PM core will not invoke the
663->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks. 738->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
@@ -665,7 +740,7 @@ Instead it will assume that suspends and resumes always succeed and that idle
665devices should be suspended. 740devices should be suspended.
666 741
667As a consequence, the PM core will never directly inform the device's subsystem 742As a consequence, the PM core will never directly inform the device's subsystem
668or driver about run-time power changes. Instead, the driver for the device's 743or driver about runtime power changes. Instead, the driver for the device's
669parent must take responsibility for telling the device's driver when the 744parent must take responsibility for telling the device's driver when the
670parent's power state changes. 745parent's power state changes.
671 746
@@ -676,13 +751,13 @@ A device should be put in a low-power state only when there's some reason to
676think it will remain in that state for a substantial time. A common heuristic 751think it will remain in that state for a substantial time. A common heuristic
677says that a device which hasn't been used for a while is liable to remain 752says that a device which hasn't been used for a while is liable to remain
678unused; following this advice, drivers should not allow devices to be suspended 753unused; following this advice, drivers should not allow devices to be suspended
679at run-time until they have been inactive for some minimum period. Even when 754at runtime until they have been inactive for some minimum period. Even when
680the heuristic ends up being non-optimal, it will still prevent devices from 755the heuristic ends up being non-optimal, it will still prevent devices from
681"bouncing" too rapidly between low-power and full-power states. 756"bouncing" too rapidly between low-power and full-power states.
682 757
683The term "autosuspend" is an historical remnant. It doesn't mean that the 758The term "autosuspend" is an historical remnant. It doesn't mean that the
684device is automatically suspended (the subsystem or driver still has to call 759device is automatically suspended (the subsystem or driver still has to call
685the appropriate PM routines); rather it means that run-time suspends will 760the appropriate PM routines); rather it means that runtime suspends will
686automatically be delayed until the desired period of inactivity has elapsed. 761automatically be delayed until the desired period of inactivity has elapsed.
687 762
688Inactivity is determined based on the power.last_busy field. Drivers should 763Inactivity is determined based on the power.last_busy field. Drivers should
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 19f8278c3854..8d32d85a5234 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -196,15 +196,20 @@ Support for Augmented rbtrees
196Augmented rbtree is an rbtree with "some" additional data stored in each node. 196Augmented rbtree is an rbtree with "some" additional data stored in each node.
197This data can be used to augment some new functionality to rbtree. 197This data can be used to augment some new functionality to rbtree.
198Augmented rbtree is an optional feature built on top of basic rbtree 198Augmented rbtree is an optional feature built on top of basic rbtree
199infrastructure. rbtree user who wants this feature will have an augment 199infrastructure. An rbtree user who wants this feature will have to call the
200callback function in rb_root initialized. 200augmentation functions with the user provided augmentation callback
201 201when inserting and erasing nodes.
202This callback function will be called from rbtree core routines whenever 202
203a node has a change in one or both of its children. It is the responsibility 203On insertion, the user must call rb_augment_insert() once the new node is in
204of the callback function to recalculate the additional data that is in the 204place. This will cause the augmentation function callback to be called for
205rb node using new children information. Note that if this new additional 205each node between the new node and the root which has been affected by the
206data affects the parent node's additional data, then callback function has 206insertion.
207to handle it and do the recursive updates. 207
208When erasing a node, the user must call rb_augment_erase_begin() first to
209retrieve the deepest node on the rebalance path. Then, after erasing the
210original node, the user must call rb_augment_erase_end() with the deepest
211node found earlier. This will cause the augmentation function to be called
212for each affected node between the deepest node and the root.
208 213
209 214
210Interval tree is an example of augmented rb tree. Reference - 215Interval tree is an example of augmented rb tree. Reference -
diff --git a/Documentation/s390/TAPE b/Documentation/s390/TAPE
deleted file mode 100644
index c639aa5603ff..000000000000
--- a/Documentation/s390/TAPE
+++ /dev/null
@@ -1,122 +0,0 @@
1Channel attached Tape device driver
2
3-----------------------------WARNING-----------------------------------------
4This driver is considered to be EXPERIMENTAL. Do NOT use it in
5production environments. Feel free to test it and report problems back to us.
6-----------------------------------------------------------------------------
7
8The LINUX for zSeries tape device driver manages channel attached tape drives
9which are compatible to IBM 3480 or IBM 3490 magnetic tape subsystems. This
10includes various models of these devices (for example the 3490E).
11
12
13Tape driver features
14
15The device driver supports a maximum of 128 tape devices.
16No official LINUX device major number is assigned to the zSeries tape device
17driver. It allocates major numbers dynamically and reports them on system
18startup.
19Typically it will get major number 254 for both the character device front-end
20and the block device front-end.
21
22The tape device driver needs no kernel parameters. All supported devices
23present are detected on driver initialization at system startup or module load.
24The devices detected are ordered by their subchannel numbers. The device with
25the lowest subchannel number becomes device 0, the next one will be device 1
26and so on.
27
28
29Tape character device front-end
30
31The usual way to read or write to the tape device is through the character
32device front-end. The zSeries tape device driver provides two character devices
33for each physical device -- the first of these will rewind automatically when
34it is closed, the second will not rewind automatically.
35
36The character device nodes are named /dev/rtibm0 (rewinding) and /dev/ntibm0
37(non-rewinding) for the first device, /dev/rtibm1 and /dev/ntibm1 for the
38second, and so on.
39
40The character device front-end can be used as any other LINUX tape device. You
41can write to it and read from it using LINUX facilities such as GNU tar. The
42tool mt can be used to perform control operations, such as rewinding the tape
43or skipping a file.
44
45Most LINUX tape software should work with either tape character device.
46
47
48Tape block device front-end
49
50The tape device may also be accessed as a block device in read-only mode.
51This could be used for software installation in the same way as it is used with
52other operation systems on the zSeries platform (and most LINUX
53distributions are shipped on compact disk using ISO9660 filesystems).
54
55One block device node is provided for each physical device. These are named
56/dev/btibm0 for the first device, /dev/btibm1 for the second and so on.
57You should only use the ISO9660 filesystem on LINUX for zSeries tapes because
58the physical tape devices cannot perform fast seeks and the ISO9660 system is
59optimized for this situation.
60
61
62Tape block device example
63
64In this example a tape with an ISO9660 filesystem is created using the first
65tape device. ISO9660 filesystem support must be built into your system kernel
66for this.
67The mt command is used to issue tape commands and the mkisofs command to
68create an ISO9660 filesystem:
69
70- create a LINUX directory (somedir) with the contents of the filesystem
71 mkdir somedir
72 cp contents somedir
73
74- insert a tape
75
76- ensure the tape is at the beginning
77 mt -f /dev/ntibm0 rewind
78
79- set the blocksize of the character driver. The blocksize 2048 bytes
80 is commonly used on ISO9660 CD-Roms
81 mt -f /dev/ntibm0 setblk 2048
82
83- write the filesystem to the character device driver
84 mkisofs -o /dev/ntibm0 somedir
85
86- rewind the tape again
87 mt -f /dev/ntibm0 rewind
88
89- Now you can mount your new filesystem as a block device:
90 mount -t iso9660 -o ro,block=2048 /dev/btibm0 /mnt
91
92TODO List
93
94 - Driver has to be stabilized still
95
96BUGS
97
98This driver is considered BETA, which means some weaknesses may still
99be in it.
100If an error occurs which cannot be handled by the code you will get a
101sense-data dump.In that case please do the following:
102
1031. set the tape driver debug level to maximum:
104 echo 6 >/proc/s390dbf/tape/level
105
1062. re-perform the actions which produced the bug. (Hopefully the bug will
107 reappear.)
108
1093. get a snapshot from the debug-feature:
110 cat /proc/s390dbf/tape/hex_ascii >somefile
111
1124. Now put the snapshot together with a detailed description of the situation
113 that led to the bug:
114 - Which tool did you use?
115 - Which hardware do you have?
116 - Was your tape unit online?
117 - Is it a shared tape unit?
118
1195. Send an email with your bug report to:
120 mailto:Linux390@de.ibm.com
121
122
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index d43dbcbd163b..28aa1075e291 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -66,7 +66,7 @@ Your cpu_idle routines need to obey the following rules:
66 barrier issued (followed by a test of need_resched with 66 barrier issued (followed by a test of need_resched with
67 interrupts disabled, as explained in 3). 67 interrupts disabled, as explained in 3).
68 68
69arch/i386/kernel/process.c has examples of both polling and 69arch/x86/kernel/process.c has examples of both polling and
70sleeping idle functions. 70sleeping idle functions.
71 71
72 72
diff --git a/Documentation/scsi/BusLogic.txt b/Documentation/scsi/BusLogic.txt
index d7fbc9488b98..48e982cd6fe7 100644
--- a/Documentation/scsi/BusLogic.txt
+++ b/Documentation/scsi/BusLogic.txt
@@ -553,7 +553,7 @@ replacing "/usr/src" with wherever you keep your Linux kernel source tree:
553 make config 553 make config
554 make zImage 554 make zImage
555 555
556Then install "arch/i386/boot/zImage" as your standard kernel, run lilo if 556Then install "arch/x86/boot/zImage" as your standard kernel, run lilo if
557appropriate, and reboot. 557appropriate, and reboot.
558 558
559 559
diff --git a/Documentation/serial/computone.txt b/Documentation/serial/computone.txt
index c57ea4781e5d..60a6f657c37d 100644
--- a/Documentation/serial/computone.txt
+++ b/Documentation/serial/computone.txt
@@ -87,7 +87,7 @@ c) Set address on ISA cards then:
87 edit /usr/src/linux/drivers/char/ip2.c 87 edit /usr/src/linux/drivers/char/ip2.c
88 (Optional - may be specified on kernel command line now) 88 (Optional - may be specified on kernel command line now)
89d) Run "make zImage" or whatever target you prefer. 89d) Run "make zImage" or whatever target you prefer.
90e) mv /usr/src/linux/arch/i386/boot/zImage to /boot. 90e) mv /usr/src/linux/arch/x86/boot/zImage to /boot.
91f) Add new config for this kernel into /etc/lilo.conf, run "lilo" 91f) Add new config for this kernel into /etc/lilo.conf, run "lilo"
92 or copy to a floppy disk and boot from that floppy disk. 92 or copy to a floppy disk and boot from that floppy disk.
93g) Reboot using this kernel 93g) Reboot using this kernel
diff --git a/Documentation/sound/alsa/HD-Audio-Controls.txt b/Documentation/sound/alsa/HD-Audio-Controls.txt
new file mode 100644
index 000000000000..1482035243e6
--- /dev/null
+++ b/Documentation/sound/alsa/HD-Audio-Controls.txt
@@ -0,0 +1,100 @@
1This file explains the codec-specific mixer controls.
2
3Realtek codecs
4--------------
5
6* Channel Mode
7 This is an enum control to change the surround-channel setup,
8 appears only when the surround channels are available.
9 It gives the number of channels to be used, "2ch", "4ch", "6ch",
10 and "8ch". According to the configuration, this also controls the
11 jack-retasking of multi-I/O jacks.
12
13* Auto-Mute Mode
14 This is an enum control to change the auto-mute behavior of the
15 headphone and line-out jacks. If built-in speakers and headphone
16 and/or line-out jacks are available on a machine, this controls
17 appears.
18 When there are only either headphones or line-out jacks, it gives
19 "Disabled" and "Enabled" state. When enabled, the speaker is muted
20 automatically when a jack is plugged.
21
22 When both headphone and line-out jacks are present, it gives
23 "Disabled", "Speaker Only" and "Line-Out+Speaker". When
24 speaker-only is chosen, plugging into a headphone or a line-out jack
25 mutes the speakers, but not line-outs. When line-out+speaker is
26 selected, plugging to a headphone jack mutes both speakers and
27 line-outs.
28
29
30IDT/Sigmatel codecs
31-------------------
32
33* Analog Loopback
34 This control enables/disables the analog-loopback circuit. This
35 appears only when "loopback" is set to true in a codec hint
36 (see HD-Audio.txt). Note that on some codecs the analog-loopback
37 and the normal PCM playback are exclusive, i.e. when this is on, you
38 won't hear any PCM stream.
39
40* Swap Center/LFE
41 Swaps the center and LFE channel order. Normally, the left
42 corresponds to the center and the right to the LFE. When this is
43 ON, the left to the LFE and the right to the center.
44
45* Headphone as Line Out
46 When this control is ON, treat the headphone jacks as line-out
47 jacks. That is, the headphone won't auto-mute the other line-outs,
48 and no HP-amp is set to the pins.
49
50* Mic Jack Mode, Line Jack Mode, etc
51 These enum controls the direction and the bias of the input jack
52 pins. Depending on the jack type, it can set as "Mic In" and "Line
53 In", for determining the input bias, or it can be set to "Line Out"
54 when the pin is a multi-I/O jack for surround channels.
55
56
57VIA codecs
58----------
59
60* Smart 5.1
61 An enum control to re-task the multi-I/O jacks for surround outputs.
62 When it's ON, the corresponding input jacks (usually a line-in and a
63 mic-in) are switched as the surround and the CLFE output jacks.
64
65* Independent HP
66 When this enum control is enabled, the headphone output is routed
67 from an individual stream (the third PCM such as hw:0,2) instead of
68 the primary stream. In the case the headphone DAC is shared with a
69 side or a CLFE-channel DAC, the DAC is switched to the headphone
70 automatically.
71
72* Loopback Mixing
73 An enum control to determine whether the analog-loopback route is
74 enabled or not. When it's enabled, the analog-loopback is mixed to
75 the front-channel. Also, the same route is used for the headphone
76 and speaker outputs. As a side-effect, when this mode is set, the
77 individual volume controls will be no longer available for
78 headphones and speakers because there is only one DAC connected to a
79 mixer widget.
80
81* Dynamic Power-Control
82 This control determines whether the dynamic power-control per jack
83 detection is enabled or not. When enabled, the widgets power state
84 (D0/D3) are changed dynamically depending on the jack plugging
85 state for saving power consumptions. However, if your system
86 doesn't provide a proper jack-detection, this won't work; in such a
87 case, turn this control OFF.
88
89* Jack Detect
90 This control is provided only for VT1708 codec which gives no proper
91 unsolicited event per jack plug. When this is on, the driver polls
92 the jack detection so that the headphone auto-mute can work, while
93 turning this off would reduce the power consumption.
94
95
96Conexant codecs
97---------------
98
99* Auto-Mute Mode
100 See Reatek codecs.
diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi
index 6325f5b48635..d8eb01c15db1 100644
--- a/Documentation/spi/ep93xx_spi
+++ b/Documentation/spi/ep93xx_spi
@@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void)
88 ARRAY_SIZE(ts72xx_spi_devices)); 88 ARRAY_SIZE(ts72xx_spi_devices));
89} 89}
90 90
91The driver can use DMA for the transfers also. In this case ts72xx_spi_info
92becomes:
93
94static struct ep93xx_spi_info ts72xx_spi_info = {
95 .num_chipselect = ARRAY_SIZE(ts72xx_spi_devices),
96 .use_dma = true;
97};
98
99Note that CONFIG_EP93XX_DMA should be enabled as well.
100
91Thanks to 101Thanks to
92========= 102=========
93Martin Guy, H. Hartley Sweeten and others who helped me during development of 103Martin Guy, H. Hartley Sweeten and others who helped me during development of
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index 493dada57372..00511e08db78 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -22,15 +22,11 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
22found in include/linux/spi/pxa2xx_spi.h: 22found in include/linux/spi/pxa2xx_spi.h:
23 23
24struct pxa2xx_spi_master { 24struct pxa2xx_spi_master {
25 enum pxa_ssp_type ssp_type;
26 u32 clock_enable; 25 u32 clock_enable;
27 u16 num_chipselect; 26 u16 num_chipselect;
28 u8 enable_dma; 27 u8 enable_dma;
29}; 28};
30 29
31The "pxa2xx_spi_master.ssp_type" field must have a value between 1 and 3 and
32informs the driver which features a particular SSP supports.
33
34The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the 30The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
35corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See 31corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
36the "PXA2xx Developer Manual" section "Clocks and Power Management". 32the "PXA2xx Developer Manual" section "Clocks and Power Management".
@@ -61,7 +57,6 @@ static struct resource pxa_spi_nssp_resources[] = {
61}; 57};
62 58
63static struct pxa2xx_spi_master pxa_nssp_master_info = { 59static struct pxa2xx_spi_master pxa_nssp_master_info = {
64 .ssp_type = PXA25x_NSSP, /* Type of SSP */
65 .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */ 60 .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
66 .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ 61 .num_chipselect = 1, /* Matches the number of chips attached to NSSP */
67 .enable_dma = 1, /* Enables NSSP DMA */ 62 .enable_dma = 1, /* Enables NSSP DMA */
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 5e7cb39ad195..1c7fb0a94e28 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -17,23 +17,21 @@ before actually making adjustments.
17 17
18Currently, these files might (depending on your configuration) 18Currently, these files might (depending on your configuration)
19show up in /proc/sys/kernel: 19show up in /proc/sys/kernel:
20- acpi_video_flags 20
21- acct 21- acct
22- acpi_video_flags
23- auto_msgmni
22- bootloader_type [ X86 only ] 24- bootloader_type [ X86 only ]
23- bootloader_version [ X86 only ] 25- bootloader_version [ X86 only ]
24- callhome [ S390 only ] 26- callhome [ S390 only ]
25- auto_msgmni
26- core_pattern 27- core_pattern
27- core_pipe_limit 28- core_pipe_limit
28- core_uses_pid 29- core_uses_pid
29- ctrl-alt-del 30- ctrl-alt-del
30- dentry-state
31- dmesg_restrict 31- dmesg_restrict
32- domainname 32- domainname
33- hostname 33- hostname
34- hotplug 34- hotplug
35- java-appletviewer [ binfmt_java, obsolete ]
36- java-interpreter [ binfmt_java, obsolete ]
37- kptr_restrict 35- kptr_restrict
38- kstack_depth_to_print [ X86 only ] 36- kstack_depth_to_print [ X86 only ]
39- l2cr [ PPC only ] 37- l2cr [ PPC only ]
@@ -48,10 +46,14 @@ show up in /proc/sys/kernel:
48- overflowgid 46- overflowgid
49- overflowuid 47- overflowuid
50- panic 48- panic
49- panic_on_oops
50- panic_on_unrecovered_nmi
51- pid_max 51- pid_max
52- powersave-nap [ PPC only ] 52- powersave-nap [ PPC only ]
53- panic_on_unrecovered_nmi
54- printk 53- printk
54- printk_delay
55- printk_ratelimit
56- printk_ratelimit_burst
55- randomize_va_space 57- randomize_va_space
56- real-root-dev ==> Documentation/initrd.txt 58- real-root-dev ==> Documentation/initrd.txt
57- reboot-cmd [ SPARC only ] 59- reboot-cmd [ SPARC only ]
@@ -62,6 +64,7 @@ show up in /proc/sys/kernel:
62- shmall 64- shmall
63- shmmax [ sysv ipc ] 65- shmmax [ sysv ipc ]
64- shmmni 66- shmmni
67- softlockup_thresh
65- stop-a [ SPARC only ] 68- stop-a [ SPARC only ]
66- sysrq ==> Documentation/sysrq.txt 69- sysrq ==> Documentation/sysrq.txt
67- tainted 70- tainted
@@ -71,15 +74,6 @@ show up in /proc/sys/kernel:
71 74
72============================================================== 75==============================================================
73 76
74acpi_video_flags:
75
76flags
77
78See Doc*/kernel/power/video.txt, it allows mode of video boot to be
79set during run time.
80
81==============================================================
82
83acct: 77acct:
84 78
85highwater lowwater frequency 79highwater lowwater frequency
@@ -97,6 +91,25 @@ valid for 30 seconds.
97 91
98============================================================== 92==============================================================
99 93
94acpi_video_flags:
95
96flags
97
98See Doc*/kernel/power/video.txt, it allows mode of video boot to be
99set during run time.
100
101==============================================================
102
103auto_msgmni:
104
105Enables/Disables automatic recomputing of msgmni upon memory add/remove
106or upon ipc namespace creation/removal (see the msgmni description
107above). Echoing "1" into this file enables msgmni automatic recomputing.
108Echoing "0" turns it off. auto_msgmni default value is 1.
109
110
111==============================================================
112
100bootloader_type: 113bootloader_type:
101 114
102x86 bootloader identification 115x86 bootloader identification
@@ -172,22 +185,24 @@ core_pattern is used to specify a core dumpfile pattern name.
172 185
173core_pipe_limit: 186core_pipe_limit:
174 187
175This sysctl is only applicable when core_pattern is configured to pipe core 188This sysctl is only applicable when core_pattern is configured to pipe
176files to a user space helper (when the first character of core_pattern is a '|', 189core files to a user space helper (when the first character of
177see above). When collecting cores via a pipe to an application, it is 190core_pattern is a '|', see above). When collecting cores via a pipe
178occasionally useful for the collecting application to gather data about the 191to an application, it is occasionally useful for the collecting
179crashing process from its /proc/pid directory. In order to do this safely, the 192application to gather data about the crashing process from its
180kernel must wait for the collecting process to exit, so as not to remove the 193/proc/pid directory. In order to do this safely, the kernel must wait
181crashing processes proc files prematurely. This in turn creates the possibility 194for the collecting process to exit, so as not to remove the crashing
182that a misbehaving userspace collecting process can block the reaping of a 195processes proc files prematurely. This in turn creates the
183crashed process simply by never exiting. This sysctl defends against that. It 196possibility that a misbehaving userspace collecting process can block
184defines how many concurrent crashing processes may be piped to user space 197the reaping of a crashed process simply by never exiting. This sysctl
185applications in parallel. If this value is exceeded, then those crashing 198defends against that. It defines how many concurrent crashing
186processes above that value are noted via the kernel log and their cores are 199processes may be piped to user space applications in parallel. If
187skipped. 0 is a special value, indicating that unlimited processes may be 200this value is exceeded, then those crashing processes above that value
188captured in parallel, but that no waiting will take place (i.e. the collecting 201are noted via the kernel log and their cores are skipped. 0 is a
189process is not guaranteed access to /proc/<crashing pid>/). This value defaults 202special value, indicating that unlimited processes may be captured in
190to 0. 203parallel, but that no waiting will take place (i.e. the collecting
204process is not guaranteed access to /proc/<crashing pid>/). This
205value defaults to 0.
191 206
192============================================================== 207==============================================================
193 208
@@ -218,14 +233,14 @@ to decide what to do with it.
218 233
219dmesg_restrict: 234dmesg_restrict:
220 235
221This toggle indicates whether unprivileged users are prevented from using 236This toggle indicates whether unprivileged users are prevented
222dmesg(8) to view messages from the kernel's log buffer. When 237from using dmesg(8) to view messages from the kernel's log buffer.
223dmesg_restrict is set to (0) there are no restrictions. When 238When dmesg_restrict is set to (0) there are no restrictions. When
224dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use 239dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
225dmesg(8). 240dmesg(8).
226 241
227The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the default 242The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
228value of dmesg_restrict. 243default value of dmesg_restrict.
229 244
230============================================================== 245==============================================================
231 246
@@ -256,13 +271,6 @@ Default value is "/sbin/hotplug".
256 271
257============================================================== 272==============================================================
258 273
259l2cr: (PPC only)
260
261This flag controls the L2 cache of G3 processor boards. If
2620, the cache is disabled. Enabled if nonzero.
263
264==============================================================
265
266kptr_restrict: 274kptr_restrict:
267 275
268This toggle indicates whether restrictions are placed on 276This toggle indicates whether restrictions are placed on
@@ -283,6 +291,13 @@ kernel stack.
283 291
284============================================================== 292==============================================================
285 293
294l2cr: (PPC only)
295
296This flag controls the L2 cache of G3 processor boards. If
2970, the cache is disabled. Enabled if nonzero.
298
299==============================================================
300
286modules_disabled: 301modules_disabled:
287 302
288A toggle value indicating if modules are allowed to be loaded 303A toggle value indicating if modules are allowed to be loaded
@@ -293,6 +308,21 @@ to false.
293 308
294============================================================== 309==============================================================
295 310
311nmi_watchdog:
312
313Enables/Disables the NMI watchdog on x86 systems. When the value is
314non-zero the NMI watchdog is enabled and will continuously test all
315online cpus to determine whether or not they are still functioning
316properly. Currently, passing "nmi_watchdog=" parameter at boot time is
317required for this function to work.
318
319If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel
320parameter), the NMI watchdog shares registers with oprofile. By
321disabling the NMI watchdog, oprofile may have more registers to
322utilize.
323
324==============================================================
325
296osrelease, ostype & version: 326osrelease, ostype & version:
297 327
298# cat osrelease 328# cat osrelease
@@ -312,10 +342,10 @@ The only way to tune these values is to rebuild the kernel :-)
312 342
313overflowgid & overflowuid: 343overflowgid & overflowuid:
314 344
315if your architecture did not always support 32-bit UIDs (i.e. arm, i386, 345if your architecture did not always support 32-bit UIDs (i.e. arm,
316m68k, sh, and sparc32), a fixed UID and GID will be returned to 346i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
317applications that use the old 16-bit UID/GID system calls, if the actual 347applications that use the old 16-bit UID/GID system calls, if the
318UID or GID would exceed 65535. 348actual UID or GID would exceed 65535.
319 349
320These sysctls allow you to change the value of the fixed UID and GID. 350These sysctls allow you to change the value of the fixed UID and GID.
321The default is 65534. 351The default is 65534.
@@ -324,9 +354,22 @@ The default is 65534.
324 354
325panic: 355panic:
326 356
327The value in this file represents the number of seconds the 357The value in this file represents the number of seconds the kernel
328kernel waits before rebooting on a panic. When you use the 358waits before rebooting on a panic. When you use the software watchdog,
329software watchdog, the recommended setting is 60. 359the recommended setting is 60.
360
361==============================================================
362
363panic_on_unrecovered_nmi:
364
365The default Linux behaviour on an NMI of either memory or unknown is
366to continue operation. For many environments such as scientific
367computing it is preferable that the box is taken out and the error
368dealt with than an uncorrected parity/ECC error get propagated.
369
370A small number of systems do generate NMI's for bizarre random reasons
371such as power management so the default is off. That sysctl works like
372the existing panic controls already in that directory.
330 373
331============================================================== 374==============================================================
332 375
@@ -376,6 +419,14 @@ the different loglevels.
376 419
377============================================================== 420==============================================================
378 421
422printk_delay:
423
424Delay each printk message in printk_delay milliseconds
425
426Value from 0 - 10000 is allowed.
427
428==============================================================
429
379printk_ratelimit: 430printk_ratelimit:
380 431
381Some warning messages are rate limited. printk_ratelimit specifies 432Some warning messages are rate limited. printk_ratelimit specifies
@@ -395,15 +446,7 @@ send before ratelimiting kicks in.
395 446
396============================================================== 447==============================================================
397 448
398printk_delay: 449randomize_va_space:
399
400Delay each printk message in printk_delay milliseconds
401
402Value from 0 - 10000 is allowed.
403
404==============================================================
405
406randomize-va-space:
407 450
408This option can be used to select the type of process address 451This option can be used to select the type of process address
409space randomization that is used in the system, for architectures 452space randomization that is used in the system, for architectures
@@ -466,11 +509,11 @@ are doing anyway :)
466 509
467============================================================== 510==============================================================
468 511
469shmmax: 512shmmax:
470 513
471This value can be used to query and set the run time limit 514This value can be used to query and set the run time limit
472on the maximum shared memory segment size that can be created. 515on the maximum shared memory segment size that can be created.
473Shared memory segments up to 1Gb are now supported in the 516Shared memory segments up to 1Gb are now supported in the
474kernel. This value defaults to SHMMAX. 517kernel. This value defaults to SHMMAX.
475 518
476============================================================== 519==============================================================
@@ -484,7 +527,7 @@ tunable to zero will disable the softlockup detection altogether.
484 527
485============================================================== 528==============================================================
486 529
487tainted: 530tainted:
488 531
489Non-zero if the kernel has been tainted. Numeric values, which 532Non-zero if the kernel has been tainted. Numeric values, which
490can be ORed together: 533can be ORed together:
@@ -509,49 +552,11 @@ can be ORed together:
509 552
510============================================================== 553==============================================================
511 554
512auto_msgmni:
513
514Enables/Disables automatic recomputing of msgmni upon memory add/remove or
515upon ipc namespace creation/removal (see the msgmni description above).
516Echoing "1" into this file enables msgmni automatic recomputing.
517Echoing "0" turns it off.
518auto_msgmni default value is 1.
519
520==============================================================
521
522nmi_watchdog:
523
524Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
525the NMI watchdog is enabled and will continuously test all online cpus to
526determine whether or not they are still functioning properly. Currently,
527passing "nmi_watchdog=" parameter at boot time is required for this function
528to work.
529
530If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
531NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
532oprofile may have more registers to utilize.
533
534==============================================================
535
536unknown_nmi_panic: 555unknown_nmi_panic:
537 556
538The value in this file affects behavior of handling NMI. When the value is 557The value in this file affects behavior of handling NMI. When the
539non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel 558value is non-zero, unknown NMI is trapped and then panic occurs. At
540debugging information is displayed on console. 559that time, kernel debugging information is displayed on console.
541
542NMI switch that most IA32 servers have fires unknown NMI up, for example.
543If a system hangs up, try pressing the NMI switch.
544
545==============================================================
546
547panic_on_unrecovered_nmi:
548
549The default Linux behaviour on an NMI of either memory or unknown is to continue
550operation. For many environments such as scientific computing it is preferable
551that the box is taken out and the error dealt with than an uncorrected
552parity/ECC error get propogated.
553
554A small number of systems do generate NMI's for bizarre random reasons such as
555power management so the default is off. That sysctl works like the existing
556panic controls already in that directory.
557 560
561NMI switch that most IA32 servers have fires unknown NMI up, for
562example. If a system hangs up, try pressing the NMI switch.
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index c83bd6b4e6e8..d0d0bb9e3e25 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
22 22
23Synopsis of kprobe_events 23Synopsis of kprobe_events
24------------------------- 24-------------------------
25 p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe 25 p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
26 r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe 26 r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
27 -:[GRP/]EVENT : Clear a probe 27 -:[GRP/]EVENT : Clear a probe
28 28
29 GRP : Group name. If omitted, use "kprobes" for it. 29 GRP : Group name. If omitted, use "kprobes" for it.
30 EVENT : Event name. If omitted, the event name is generated 30 EVENT : Event name. If omitted, the event name is generated
31 based on SYMBOL+offs or MEMADDR. 31 based on SYM+offs or MEMADDR.
32 SYMBOL[+offs] : Symbol+offset where the probe is inserted. 32 MOD : Module name which has given SYM.
33 SYM[+offs] : Symbol+offset where the probe is inserted.
33 MEMADDR : Address where the probe is inserted. 34 MEMADDR : Address where the probe is inserted.
34 35
35 FETCHARGS : Arguments. Each probe can have up to 128 args. 36 FETCHARGS : Arguments. Each probe can have up to 128 args.
diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c
new file mode 100644
index 000000000000..85870208edcf
--- /dev/null
+++ b/Documentation/vDSO/parse_vdso.c
@@ -0,0 +1,256 @@
1/*
2 * parse_vdso.c: Linux reference vDSO parser
3 * Written by Andrew Lutomirski, 2011.
4 *
5 * This code is meant to be linked in to various programs that run on Linux.
6 * As such, it is available with as few restrictions as possible. This file
7 * is licensed under the Creative Commons Zero License, version 1.0,
8 * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
9 *
10 * The vDSO is a regular ELF DSO that the kernel maps into user space when
11 * it starts a program. It works equally well in statically and dynamically
12 * linked binaries.
13 *
14 * This code is tested on x86_64. In principle it should work on any 64-bit
15 * architecture that has a vDSO.
16 */
17
18#include <stdbool.h>
19#include <stdint.h>
20#include <string.h>
21#include <elf.h>
22
23/*
24 * To use this vDSO parser, first call one of the vdso_init_* functions.
25 * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
26 * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
27 * Then call vdso_sym for each symbol you want. For example, to look up
28 * gettimeofday on x86_64, use:
29 *
30 * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
31 * or
32 * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
33 *
34 * vdso_sym will return 0 if the symbol doesn't exist or if the init function
35 * failed or was not called. vdso_sym is a little slow, so its return value
36 * should be cached.
37 *
38 * vdso_sym is threadsafe; the init functions are not.
39 *
40 * These are the prototypes:
41 */
42extern void vdso_init_from_auxv(void *auxv);
43extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
44extern void *vdso_sym(const char *version, const char *name);
45
46
47/* And here's the code. */
48
49#ifndef __x86_64__
50# error Not yet ported to non-x86_64 architectures
51#endif
52
53static struct vdso_info
54{
55 bool valid;
56
57 /* Load information */
58 uintptr_t load_addr;
59 uintptr_t load_offset; /* load_addr - recorded vaddr */
60
61 /* Symbol table */
62 Elf64_Sym *symtab;
63 const char *symstrings;
64 Elf64_Word *bucket, *chain;
65 Elf64_Word nbucket, nchain;
66
67 /* Version table */
68 Elf64_Versym *versym;
69 Elf64_Verdef *verdef;
70} vdso_info;
71
72/* Straight from the ELF specification. */
73static unsigned long elf_hash(const unsigned char *name)
74{
75 unsigned long h = 0, g;
76 while (*name)
77 {
78 h = (h << 4) + *name++;
79 if (g = h & 0xf0000000)
80 h ^= g >> 24;
81 h &= ~g;
82 }
83 return h;
84}
85
86void vdso_init_from_sysinfo_ehdr(uintptr_t base)
87{
88 size_t i;
89 bool found_vaddr = false;
90
91 vdso_info.valid = false;
92
93 vdso_info.load_addr = base;
94
95 Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
96 Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
97 Elf64_Dyn *dyn = 0;
98
99 /*
100 * We need two things from the segment table: the load offset
101 * and the dynamic table.
102 */
103 for (i = 0; i < hdr->e_phnum; i++)
104 {
105 if (pt[i].p_type == PT_LOAD && !found_vaddr) {
106 found_vaddr = true;
107 vdso_info.load_offset = base
108 + (uintptr_t)pt[i].p_offset
109 - (uintptr_t)pt[i].p_vaddr;
110 } else if (pt[i].p_type == PT_DYNAMIC) {
111 dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
112 }
113 }
114
115 if (!found_vaddr || !dyn)
116 return; /* Failed */
117
118 /*
119 * Fish out the useful bits of the dynamic table.
120 */
121 Elf64_Word *hash = 0;
122 vdso_info.symstrings = 0;
123 vdso_info.symtab = 0;
124 vdso_info.versym = 0;
125 vdso_info.verdef = 0;
126 for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
127 switch (dyn[i].d_tag) {
128 case DT_STRTAB:
129 vdso_info.symstrings = (const char *)
130 ((uintptr_t)dyn[i].d_un.d_ptr
131 + vdso_info.load_offset);
132 break;
133 case DT_SYMTAB:
134 vdso_info.symtab = (Elf64_Sym *)
135 ((uintptr_t)dyn[i].d_un.d_ptr
136 + vdso_info.load_offset);
137 break;
138 case DT_HASH:
139 hash = (Elf64_Word *)
140 ((uintptr_t)dyn[i].d_un.d_ptr
141 + vdso_info.load_offset);
142 break;
143 case DT_VERSYM:
144 vdso_info.versym = (Elf64_Versym *)
145 ((uintptr_t)dyn[i].d_un.d_ptr
146 + vdso_info.load_offset);
147 break;
148 case DT_VERDEF:
149 vdso_info.verdef = (Elf64_Verdef *)
150 ((uintptr_t)dyn[i].d_un.d_ptr
151 + vdso_info.load_offset);
152 break;
153 }
154 }
155 if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
156 return; /* Failed */
157
158 if (!vdso_info.verdef)
159 vdso_info.versym = 0;
160
161 /* Parse the hash table header. */
162 vdso_info.nbucket = hash[0];
163 vdso_info.nchain = hash[1];
164 vdso_info.bucket = &hash[2];
165 vdso_info.chain = &hash[vdso_info.nbucket + 2];
166
167 /* That's all we need. */
168 vdso_info.valid = true;
169}
170
171static bool vdso_match_version(Elf64_Versym ver,
172 const char *name, Elf64_Word hash)
173{
174 /*
175 * This is a helper function to check if the version indexed by
176 * ver matches name (which hashes to hash).
177 *
178 * The version definition table is a mess, and I don't know how
179 * to do this in better than linear time without allocating memory
180 * to build an index. I also don't know why the table has
181 * variable size entries in the first place.
182 *
183 * For added fun, I can't find a comprehensible specification of how
184 * to parse all the weird flags in the table.
185 *
186 * So I just parse the whole table every time.
187 */
188
189 /* First step: find the version definition */
190 ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
191 Elf64_Verdef *def = vdso_info.verdef;
192 while(true) {
193 if ((def->vd_flags & VER_FLG_BASE) == 0
194 && (def->vd_ndx & 0x7fff) == ver)
195 break;
196
197 if (def->vd_next == 0)
198 return false; /* No definition. */
199
200 def = (Elf64_Verdef *)((char *)def + def->vd_next);
201 }
202
203 /* Now figure out whether it matches. */
204 Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
205 return def->vd_hash == hash
206 && !strcmp(name, vdso_info.symstrings + aux->vda_name);
207}
208
209void *vdso_sym(const char *version, const char *name)
210{
211 unsigned long ver_hash;
212 if (!vdso_info.valid)
213 return 0;
214
215 ver_hash = elf_hash(version);
216 Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
217
218 for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
219 Elf64_Sym *sym = &vdso_info.symtab[chain];
220
221 /* Check for a defined global or weak function w/ right name. */
222 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
223 continue;
224 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
225 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
226 continue;
227 if (sym->st_shndx == SHN_UNDEF)
228 continue;
229 if (strcmp(name, vdso_info.symstrings + sym->st_name))
230 continue;
231
232 /* Check symbol version. */
233 if (vdso_info.versym
234 && !vdso_match_version(vdso_info.versym[chain],
235 version, ver_hash))
236 continue;
237
238 return (void *)(vdso_info.load_offset + sym->st_value);
239 }
240
241 return 0;
242}
243
244void vdso_init_from_auxv(void *auxv)
245{
246 Elf64_auxv_t *elf_auxv = auxv;
247 for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
248 {
249 if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
250 vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
251 return;
252 }
253 }
254
255 vdso_info.valid = false;
256}
diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c
new file mode 100644
index 000000000000..fff633432dff
--- /dev/null
+++ b/Documentation/vDSO/vdso_test.c
@@ -0,0 +1,111 @@
1/*
2 * vdso_test.c: Sample code to test parse_vdso.c on x86_64
3 * Copyright (c) 2011 Andy Lutomirski
4 * Subject to the GNU General Public License, version 2
5 *
6 * You can amuse yourself by compiling with:
7 * gcc -std=gnu99 -nostdlib
8 * -Os -fno-asynchronous-unwind-tables -flto
9 * vdso_test.c parse_vdso.c -o vdso_test
10 * to generate a small binary with no dependencies at all.
11 */
12
13#include <sys/syscall.h>
14#include <sys/time.h>
15#include <unistd.h>
16#include <stdint.h>
17
18extern void *vdso_sym(const char *version, const char *name);
19extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
20extern void vdso_init_from_auxv(void *auxv);
21
22/* We need a libc functions... */
23int strcmp(const char *a, const char *b)
24{
25 /* This implementation is buggy: it never returns -1. */
26 while (*a || *b) {
27 if (*a != *b)
28 return 1;
29 if (*a == 0 || *b == 0)
30 return 1;
31 a++;
32 b++;
33 }
34
35 return 0;
36}
37
38/* ...and two syscalls. This is x86_64-specific. */
39static inline long linux_write(int fd, const void *data, size_t len)
40{
41
42 long ret;
43 asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write),
44 "D" (fd), "S" (data), "d" (len) :
45 "cc", "memory", "rcx",
46 "r8", "r9", "r10", "r11" );
47 return ret;
48}
49
50static inline void linux_exit(int code)
51{
52 asm volatile ("syscall" : : "a" (__NR_exit), "D" (code));
53}
54
55void to_base10(char *lastdig, uint64_t n)
56{
57 while (n) {
58 *lastdig = (n % 10) + '0';
59 n /= 10;
60 lastdig--;
61 }
62}
63
64__attribute__((externally_visible)) void c_main(void **stack)
65{
66 /* Parse the stack */
67 long argc = (long)*stack;
68 stack += argc + 2;
69
70 /* Now we're pointing at the environment. Skip it. */
71 while(*stack)
72 stack++;
73 stack++;
74
75 /* Now we're pointing at auxv. Initialize the vDSO parser. */
76 vdso_init_from_auxv((void *)stack);
77
78 /* Find gettimeofday. */
79 typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
80 gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
81
82 if (!gtod)
83 linux_exit(1);
84
85 struct timeval tv;
86 long ret = gtod(&tv, 0);
87
88 if (ret == 0) {
89 char buf[] = "The time is .000000\n";
90 to_base10(buf + 31, tv.tv_sec);
91 to_base10(buf + 38, tv.tv_usec);
92 linux_write(1, buf, sizeof(buf) - 1);
93 } else {
94 linux_exit(ret);
95 }
96
97 linux_exit(0);
98}
99
100/*
101 * This is the real entry point. It passes the initial stack into
102 * the C entry point.
103 */
104asm (
105 ".text\n"
106 ".global _start\n"
107 ".type _start,@function\n"
108 "_start:\n\t"
109 "mov %rsp,%rdi\n\t"
110 "jmp c_main"
111 );
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 42542eb802ca..b0e4b9cd6a66 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -180,6 +180,19 @@ KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
180If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 180If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
181cpus max. 181cpus max.
182 182
183On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
184threads in one or more virtual CPU cores. (This is because the
185hardware requires all the hardware threads in a CPU core to be in the
186same partition.) The KVM_CAP_PPC_SMT capability indicates the number
187of vcpus per virtual core (vcore). The vcore id is obtained by
188dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
189given vcore will always be in the same physical core as each other
190(though that might be a different physical core from time to time).
191Userspace can control the threading (SMT) mode of the guest by its
192allocation of vcpu ids. For example, if userspace wants
193single-threaded guest vcpus, it should make all vcpu ids be a multiple
194of the number of vcpus per vcore.
195
1834.8 KVM_GET_DIRTY_LOG (vm ioctl) 1964.8 KVM_GET_DIRTY_LOG (vm ioctl)
184 197
185Capability: basic 198Capability: basic
@@ -1143,15 +1156,10 @@ Assigns an IRQ to a passed-through device.
1143 1156
1144struct kvm_assigned_irq { 1157struct kvm_assigned_irq {
1145 __u32 assigned_dev_id; 1158 __u32 assigned_dev_id;
1146 __u32 host_irq; 1159 __u32 host_irq; /* ignored (legacy field) */
1147 __u32 guest_irq; 1160 __u32 guest_irq;
1148 __u32 flags; 1161 __u32 flags;
1149 union { 1162 union {
1150 struct {
1151 __u32 addr_lo;
1152 __u32 addr_hi;
1153 __u32 data;
1154 } guest_msi;
1155 __u32 reserved[12]; 1163 __u32 reserved[12];
1156 }; 1164 };
1157}; 1165};
@@ -1239,8 +1247,10 @@ Type: vm ioctl
1239Parameters: struct kvm_assigned_msix_nr (in) 1247Parameters: struct kvm_assigned_msix_nr (in)
1240Returns: 0 on success, -1 on error 1248Returns: 0 on success, -1 on error
1241 1249
1242Set the number of MSI-X interrupts for an assigned device. This service can 1250Set the number of MSI-X interrupts for an assigned device. The number is
1243only be called once in the lifetime of an assigned device. 1251reset again by terminating the MSI-X assignment of the device via
1252KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier
1253point will fail.
1244 1254
1245struct kvm_assigned_msix_nr { 1255struct kvm_assigned_msix_nr {
1246 __u32 assigned_dev_id; 1256 __u32 assigned_dev_id;
@@ -1291,6 +1301,135 @@ Returns the tsc frequency of the guest. The unit of the return value is
1291KHz. If the host has unstable tsc this ioctl returns -EIO instead as an 1301KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
1292error. 1302error.
1293 1303
13044.56 KVM_GET_LAPIC
1305
1306Capability: KVM_CAP_IRQCHIP
1307Architectures: x86
1308Type: vcpu ioctl
1309Parameters: struct kvm_lapic_state (out)
1310Returns: 0 on success, -1 on error
1311
1312#define KVM_APIC_REG_SIZE 0x400
1313struct kvm_lapic_state {
1314 char regs[KVM_APIC_REG_SIZE];
1315};
1316
1317Reads the Local APIC registers and copies them into the input argument. The
1318data format and layout are the same as documented in the architecture manual.
1319
13204.57 KVM_SET_LAPIC
1321
1322Capability: KVM_CAP_IRQCHIP
1323Architectures: x86
1324Type: vcpu ioctl
1325Parameters: struct kvm_lapic_state (in)
1326Returns: 0 on success, -1 on error
1327
1328#define KVM_APIC_REG_SIZE 0x400
1329struct kvm_lapic_state {
1330 char regs[KVM_APIC_REG_SIZE];
1331};
1332
1333Copies the input argument into the the Local APIC registers. The data format
1334and layout are the same as documented in the architecture manual.
1335
13364.58 KVM_IOEVENTFD
1337
1338Capability: KVM_CAP_IOEVENTFD
1339Architectures: all
1340Type: vm ioctl
1341Parameters: struct kvm_ioeventfd (in)
1342Returns: 0 on success, !0 on error
1343
1344This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
1345within the guest. A guest write in the registered address will signal the
1346provided event instead of triggering an exit.
1347
1348struct kvm_ioeventfd {
1349 __u64 datamatch;
1350 __u64 addr; /* legal pio/mmio address */
1351 __u32 len; /* 1, 2, 4, or 8 bytes */
1352 __s32 fd;
1353 __u32 flags;
1354 __u8 pad[36];
1355};
1356
1357The following flags are defined:
1358
1359#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
1360#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
1361#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
1362
1363If datamatch flag is set, the event will be signaled only if the written value
1364to the registered address is equal to datamatch in struct kvm_ioeventfd.
1365
13664.62 KVM_CREATE_SPAPR_TCE
1367
1368Capability: KVM_CAP_SPAPR_TCE
1369Architectures: powerpc
1370Type: vm ioctl
1371Parameters: struct kvm_create_spapr_tce (in)
1372Returns: file descriptor for manipulating the created TCE table
1373
1374This creates a virtual TCE (translation control entry) table, which
1375is an IOMMU for PAPR-style virtual I/O. It is used to translate
1376logical addresses used in virtual I/O into guest physical addresses,
1377and provides a scatter/gather capability for PAPR virtual I/O.
1378
1379/* for KVM_CAP_SPAPR_TCE */
1380struct kvm_create_spapr_tce {
1381 __u64 liobn;
1382 __u32 window_size;
1383};
1384
1385The liobn field gives the logical IO bus number for which to create a
1386TCE table. The window_size field specifies the size of the DMA window
1387which this TCE table will translate - the table will contain one 64
1388bit TCE entry for every 4kiB of the DMA window.
1389
1390When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
1391table has been created using this ioctl(), the kernel will handle it
1392in real mode, updating the TCE table. H_PUT_TCE calls for other
1393liobns will cause a vm exit and must be handled by userspace.
1394
1395The return value is a file descriptor which can be passed to mmap(2)
1396to map the created TCE table into userspace. This lets userspace read
1397the entries written by kernel-handled H_PUT_TCE calls, and also lets
1398userspace update the TCE table directly which is useful in some
1399circumstances.
1400
14014.63 KVM_ALLOCATE_RMA
1402
1403Capability: KVM_CAP_PPC_RMA
1404Architectures: powerpc
1405Type: vm ioctl
1406Parameters: struct kvm_allocate_rma (out)
1407Returns: file descriptor for mapping the allocated RMA
1408
1409This allocates a Real Mode Area (RMA) from the pool allocated at boot
1410time by the kernel. An RMA is a physically-contiguous, aligned region
1411of memory used on older POWER processors to provide the memory which
1412will be accessed by real-mode (MMU off) accesses in a KVM guest.
1413POWER processors support a set of sizes for the RMA that usually
1414includes 64MB, 128MB, 256MB and some larger powers of two.
1415
1416/* for KVM_ALLOCATE_RMA */
1417struct kvm_allocate_rma {
1418 __u64 rma_size;
1419};
1420
1421The return value is a file descriptor which can be passed to mmap(2)
1422to map the allocated RMA into userspace. The mapped area can then be
1423passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the
1424RMA for a virtual machine. The size of the RMA in bytes (which is
1425fixed at host kernel boot time) is returned in the rma_size field of
1426the argument structure.
1427
1428The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl
1429is supported; 2 if the processor requires all virtual machines to have
1430an RMA, or 1 if the processor can use an RMA but doesn't require it,
1431because it supports the Virtual RMA (VRMA) facility.
1432
12945. The kvm_run structure 14335. The kvm_run structure
1295 1434
1296Application code obtains a pointer to the kvm_run structure by 1435Application code obtains a pointer to the kvm_run structure by
@@ -1473,6 +1612,23 @@ Userspace can now handle the hypercall and when it's done modify the gprs as
1473necessary. Upon guest entry all guest GPRs will then be replaced by the values 1612necessary. Upon guest entry all guest GPRs will then be replaced by the values
1474in this struct. 1613in this struct.
1475 1614
1615 /* KVM_EXIT_PAPR_HCALL */
1616 struct {
1617 __u64 nr;
1618 __u64 ret;
1619 __u64 args[9];
1620 } papr_hcall;
1621
1622This is used on 64-bit PowerPC when emulating a pSeries partition,
1623e.g. with the 'pseries' machine type in qemu. It occurs when the
1624guest does a hypercall using the 'sc 1' instruction. The 'nr' field
1625contains the hypercall number (from the guest R3), and 'args' contains
1626the arguments (from the guest R4 - R12). Userspace should put the
1627return code in 'ret' and any extra returned values in args[].
1628The possible hypercalls are defined in the Power Architecture Platform
1629Requirements (PAPR) document available from www.power.org (free
1630developer registration required to access it).
1631
1476 /* Fix the size of the union. */ 1632 /* Fix the size of the union. */
1477 char padding[256]; 1633 char padding[256];
1478 }; 1634 };
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index f46aa58389ca..5dc972c09b55 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -165,6 +165,10 @@ Shadow pages contain the following information:
165 Contains the value of efer.nxe for which the page is valid. 165 Contains the value of efer.nxe for which the page is valid.
166 role.cr0_wp: 166 role.cr0_wp:
167 Contains the value of cr0.wp for which the page is valid. 167 Contains the value of cr0.wp for which the page is valid.
168 role.smep_andnot_wp:
169 Contains the value of cr4.smep && !cr0.wp for which the page is valid
170 (pages for which this is true are different from other pages; see the
171 treatment of cr0.wp=0 below).
168 gfn: 172 gfn:
169 Either the guest page table containing the translations shadowed by this 173 Either the guest page table containing the translations shadowed by this
170 page, or the base page frame for linear translations. See role.direct. 174 page, or the base page frame for linear translations. See role.direct.
@@ -317,6 +321,20 @@ on fault type:
317 321
318(user write faults generate a #PF) 322(user write faults generate a #PF)
319 323
324In the first case there is an additional complication if CR4.SMEP is
325enabled: since we've turned the page into a kernel page, the kernel may now
326execute it. We handle this by also setting spte.nx. If we get a user
327fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back.
328
329To prevent an spte that was converted into a kernel page with cr0.wp=0
330from being written by the kernel after cr0.wp has changed to 1, we make
331the value of cr0.wp part of the page role. This means that an spte created
332with one value of cr0.wp cannot be used when cr0.wp has a different value -
333it will simply be missed by the shadow page lookup code. A similar issue
334exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after
335changing cr4.smep to 1. To avoid this, the value of !cr0.wp && cr4.smep
336is also made a part of the page role.
337
320Large pages 338Large pages
321=========== 339===========
322 340
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index d079aed27e03..50317809113d 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -185,3 +185,37 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
185 185
186 Currently type 2 APF will be always delivered on the same vcpu as 186 Currently type 2 APF will be always delivered on the same vcpu as
187 type 1 was, but guest should not rely on that. 187 type 1 was, but guest should not rely on that.
188
189MSR_KVM_STEAL_TIME: 0x4b564d03
190
191 data: 64-byte alignment physical address of a memory area which must be
192 in guest RAM, plus an enable bit in bit 0. This memory is expected to
193 hold a copy of the following structure:
194
195 struct kvm_steal_time {
196 __u64 steal;
197 __u32 version;
198 __u32 flags;
199 __u32 pad[12];
200 }
201
202 whose data will be filled in by the hypervisor periodically. Only one
203 write, or registration, is needed for each VCPU. The interval between
204 updates of this structure is arbitrary and implementation-dependent.
205 The hypervisor may update this structure at any time it sees fit until
206 anything with bit0 == 0 is written to it. Guest is required to make sure
207 this structure is initialized to zero.
208
209 Fields have the following meanings:
210
211 version: a sequence counter. In other words, guest has to check
212 this field before and after grabbing time information and make
213 sure they are both equal and even. An odd version indicates an
214 in-progress update.
215
216 flags: At this point, always zero. May be used to indicate
217 changes in this structure in the future.
218
219 steal: the amount of time in which this vCPU did not run, in
220 nanoseconds. Time during which the vcpu is idle, will not be
221 reported as steal time.
diff --git a/Documentation/virtual/kvm/nested-vmx.txt b/Documentation/virtual/kvm/nested-vmx.txt
new file mode 100644
index 000000000000..8ed937de1163
--- /dev/null
+++ b/Documentation/virtual/kvm/nested-vmx.txt
@@ -0,0 +1,251 @@
1Nested VMX
2==========
3
4Overview
5---------
6
7On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions)
8to easily and efficiently run guest operating systems. Normally, these guests
9*cannot* themselves be hypervisors running their own guests, because in VMX,
10guests cannot use VMX instructions.
11
12The "Nested VMX" feature adds this missing capability - of running guest
13hypervisors (which use VMX) with their own nested guests. It does so by
14allowing a guest to use VMX instructions, and correctly and efficiently
15emulating them using the single level of VMX available in the hardware.
16
17We describe in much greater detail the theory behind the nested VMX feature,
18its implementation and its performance characteristics, in the OSDI 2010 paper
19"The Turtles Project: Design and Implementation of Nested Virtualization",
20available at:
21
22 http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
23
24
25Terminology
26-----------
27
28Single-level virtualization has two levels - the host (KVM) and the guests.
29In nested virtualization, we have three levels: The host (KVM), which we call
30L0, the guest hypervisor, which we call L1, and its nested guest, which we
31call L2.
32
33
34Known limitations
35-----------------
36
37The current code supports running Linux guests under KVM guests.
38Only 64-bit guest hypervisors are supported.
39
40Additional patches for running Windows under guest KVM, and Linux under
41guest VMware server, and support for nested EPT, are currently running in
42the lab, and will be sent as follow-on patchsets.
43
44
45Running nested VMX
46------------------
47
48The nested VMX feature is disabled by default. It can be enabled by giving
49the "nested=1" option to the kvm-intel module.
50
51No modifications are required to user space (qemu). However, qemu's default
52emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
53explicitly enabled, by giving qemu one of the following options:
54
55 -cpu host (emulated CPU has all features of the real CPU)
56
57 -cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
58
59
60ABIs
61----
62
63Nested VMX aims to present a standard and (eventually) fully-functional VMX
64implementation for the a guest hypervisor to use. As such, the official
65specification of the ABI that it provides is Intel's VMX specification,
66namely volume 3B of their "Intel 64 and IA-32 Architectures Software
67Developer's Manual". Not all of VMX's features are currently fully supported,
68but the goal is to eventually support them all, starting with the VMX features
69which are used in practice by popular hypervisors (KVM and others).
70
71As a VMX implementation, nested VMX presents a VMCS structure to L1.
72As mandated by the spec, other than the two fields revision_id and abort,
73this structure is *opaque* to its user, who is not supposed to know or care
74about its internal structure. Rather, the structure is accessed through the
75VMREAD and VMWRITE instructions.
76Still, for debugging purposes, KVM developers might be interested to know the
77internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c.
78
79The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we
80also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS
81which L0 builds to actually run L2 - how this is done is explained in the
82aforementioned paper.
83
84For convenience, we repeat the content of struct vmcs12 here. If the internals
85of this structure changes, this can break live migration across KVM versions.
86VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
87struct shadow_vmcs is ever changed.
88
89 typedef u64 natural_width;
90 struct __packed vmcs12 {
91 /* According to the Intel spec, a VMCS region must start with
92 * these two user-visible fields */
93 u32 revision_id;
94 u32 abort;
95
96 u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
97 u32 padding[7]; /* room for future expansion */
98
99 u64 io_bitmap_a;
100 u64 io_bitmap_b;
101 u64 msr_bitmap;
102 u64 vm_exit_msr_store_addr;
103 u64 vm_exit_msr_load_addr;
104 u64 vm_entry_msr_load_addr;
105 u64 tsc_offset;
106 u64 virtual_apic_page_addr;
107 u64 apic_access_addr;
108 u64 ept_pointer;
109 u64 guest_physical_address;
110 u64 vmcs_link_pointer;
111 u64 guest_ia32_debugctl;
112 u64 guest_ia32_pat;
113 u64 guest_ia32_efer;
114 u64 guest_pdptr0;
115 u64 guest_pdptr1;
116 u64 guest_pdptr2;
117 u64 guest_pdptr3;
118 u64 host_ia32_pat;
119 u64 host_ia32_efer;
120 u64 padding64[8]; /* room for future expansion */
121 natural_width cr0_guest_host_mask;
122 natural_width cr4_guest_host_mask;
123 natural_width cr0_read_shadow;
124 natural_width cr4_read_shadow;
125 natural_width cr3_target_value0;
126 natural_width cr3_target_value1;
127 natural_width cr3_target_value2;
128 natural_width cr3_target_value3;
129 natural_width exit_qualification;
130 natural_width guest_linear_address;
131 natural_width guest_cr0;
132 natural_width guest_cr3;
133 natural_width guest_cr4;
134 natural_width guest_es_base;
135 natural_width guest_cs_base;
136 natural_width guest_ss_base;
137 natural_width guest_ds_base;
138 natural_width guest_fs_base;
139 natural_width guest_gs_base;
140 natural_width guest_ldtr_base;
141 natural_width guest_tr_base;
142 natural_width guest_gdtr_base;
143 natural_width guest_idtr_base;
144 natural_width guest_dr7;
145 natural_width guest_rsp;
146 natural_width guest_rip;
147 natural_width guest_rflags;
148 natural_width guest_pending_dbg_exceptions;
149 natural_width guest_sysenter_esp;
150 natural_width guest_sysenter_eip;
151 natural_width host_cr0;
152 natural_width host_cr3;
153 natural_width host_cr4;
154 natural_width host_fs_base;
155 natural_width host_gs_base;
156 natural_width host_tr_base;
157 natural_width host_gdtr_base;
158 natural_width host_idtr_base;
159 natural_width host_ia32_sysenter_esp;
160 natural_width host_ia32_sysenter_eip;
161 natural_width host_rsp;
162 natural_width host_rip;
163 natural_width paddingl[8]; /* room for future expansion */
164 u32 pin_based_vm_exec_control;
165 u32 cpu_based_vm_exec_control;
166 u32 exception_bitmap;
167 u32 page_fault_error_code_mask;
168 u32 page_fault_error_code_match;
169 u32 cr3_target_count;
170 u32 vm_exit_controls;
171 u32 vm_exit_msr_store_count;
172 u32 vm_exit_msr_load_count;
173 u32 vm_entry_controls;
174 u32 vm_entry_msr_load_count;
175 u32 vm_entry_intr_info_field;
176 u32 vm_entry_exception_error_code;
177 u32 vm_entry_instruction_len;
178 u32 tpr_threshold;
179 u32 secondary_vm_exec_control;
180 u32 vm_instruction_error;
181 u32 vm_exit_reason;
182 u32 vm_exit_intr_info;
183 u32 vm_exit_intr_error_code;
184 u32 idt_vectoring_info_field;
185 u32 idt_vectoring_error_code;
186 u32 vm_exit_instruction_len;
187 u32 vmx_instruction_info;
188 u32 guest_es_limit;
189 u32 guest_cs_limit;
190 u32 guest_ss_limit;
191 u32 guest_ds_limit;
192 u32 guest_fs_limit;
193 u32 guest_gs_limit;
194 u32 guest_ldtr_limit;
195 u32 guest_tr_limit;
196 u32 guest_gdtr_limit;
197 u32 guest_idtr_limit;
198 u32 guest_es_ar_bytes;
199 u32 guest_cs_ar_bytes;
200 u32 guest_ss_ar_bytes;
201 u32 guest_ds_ar_bytes;
202 u32 guest_fs_ar_bytes;
203 u32 guest_gs_ar_bytes;
204 u32 guest_ldtr_ar_bytes;
205 u32 guest_tr_ar_bytes;
206 u32 guest_interruptibility_info;
207 u32 guest_activity_state;
208 u32 guest_sysenter_cs;
209 u32 host_ia32_sysenter_cs;
210 u32 padding32[8]; /* room for future expansion */
211 u16 virtual_processor_id;
212 u16 guest_es_selector;
213 u16 guest_cs_selector;
214 u16 guest_ss_selector;
215 u16 guest_ds_selector;
216 u16 guest_fs_selector;
217 u16 guest_gs_selector;
218 u16 guest_ldtr_selector;
219 u16 guest_tr_selector;
220 u16 host_es_selector;
221 u16 host_cs_selector;
222 u16 host_ss_selector;
223 u16 host_ds_selector;
224 u16 host_fs_selector;
225 u16 host_gs_selector;
226 u16 host_tr_selector;
227 };
228
229
230Authors
231-------
232
233These patches were written by:
234 Abel Gordon, abelg <at> il.ibm.com
235 Nadav Har'El, nyh <at> il.ibm.com
236 Orit Wasserman, oritw <at> il.ibm.com
237 Ben-Ami Yassor, benami <at> il.ibm.com
238 Muli Ben-Yehuda, muli <at> il.ibm.com
239
240With contributions by:
241 Anthony Liguori, aliguori <at> us.ibm.com
242 Mike Day, mdday <at> us.ibm.com
243 Michael Factor, factor <at> il.ibm.com
244 Zvi Dubitzky, dubi <at> il.ibm.com
245
246And valuable reviews by:
247 Avi Kivity, avi <at> redhat.com
248 Gleb Natapov, gleb <at> redhat.com
249 Marcelo Tosatti, mtosatti <at> redhat.com
250 Kevin Tian, kevin.tian <at> intel.com
251 and others.
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
index 3ab969c59046..2b7ce190cde4 100644
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -68,9 +68,11 @@ page that contains parts of supervisor visible register state. The guest can
68map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE. 68map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
69 69
70With this hypercall issued the guest always gets the magic page mapped at the 70With this hypercall issued the guest always gets the magic page mapped at the
71desired location in effective and physical address space. For now, we always 71desired location. The first parameter indicates the effective address when the
72map the page to -4096. This way we can access it using absolute load and store 72MMU is enabled. The second parameter indicates the address in real mode, if
73functions. The following instruction reads the first field of the magic page: 73applicable to the target. For now, we always map the page to -4096. This way we
74can access it using absolute load and store functions. The following
75instruction reads the first field of the magic page:
74 76
75 ld rX, -4096(0) 77 ld rX, -4096(0)
76 78
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index cd9d6af61d07..043bd7df3139 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -51,7 +51,7 @@
51#include <asm/bootparam.h> 51#include <asm/bootparam.h>
52#include "../../../include/linux/lguest_launcher.h" 52#include "../../../include/linux/lguest_launcher.h"
53/*L:110 53/*L:110
54 * We can ignore the 42 include files we need for this program, but I do want 54 * We can ignore the 43 include files we need for this program, but I do want
55 * to draw attention to the use of kernel-style types. 55 * to draw attention to the use of kernel-style types.
56 * 56 *
57 * As Linus said, "C is a Spartan language, and so should your naming be." I 57 * As Linus said, "C is a Spartan language, and so should your naming be." I
@@ -65,7 +65,6 @@ typedef uint16_t u16;
65typedef uint8_t u8; 65typedef uint8_t u8;
66/*:*/ 66/*:*/
67 67
68#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
69#define BRIDGE_PFX "bridge:" 68#define BRIDGE_PFX "bridge:"
70#ifndef SIOCBRADDIF 69#ifndef SIOCBRADDIF
71#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ 70#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
@@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq)
861 /* writev can return a partial write, so we loop here. */ 860 /* writev can return a partial write, so we loop here. */
862 while (!iov_empty(iov, out)) { 861 while (!iov_empty(iov, out)) {
863 int len = writev(STDOUT_FILENO, iov, out); 862 int len = writev(STDOUT_FILENO, iov, out);
864 if (len <= 0) 863 if (len <= 0) {
865 err(1, "Write to stdout gave %i", len); 864 warn("Write to stdout gave %i (%d)", len, errno);
865 break;
866 }
866 iov_consume(iov, out, len); 867 iov_consume(iov, out, len);
867 } 868 }
868 869
@@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq)
898 * same format: what a coincidence! 899 * same format: what a coincidence!
899 */ 900 */
900 if (writev(net_info->tunfd, iov, out) < 0) 901 if (writev(net_info->tunfd, iov, out) < 0)
901 errx(1, "Write to tun failed?"); 902 warnx("Write to tun failed (%d)?", errno);
902 903
903 /* 904 /*
904 * Done with that one; wait_for_vq_desc() will send the interrupt if 905 * Done with that one; wait_for_vq_desc() will send the interrupt if
@@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq)
955 */ 956 */
956 len = readv(net_info->tunfd, iov, in); 957 len = readv(net_info->tunfd, iov, in);
957 if (len <= 0) 958 if (len <= 0)
958 err(1, "Failed to read from tun."); 959 warn("Failed to read from tun (%d).", errno);
959 960
960 /* 961 /*
961 * Mark that packet buffer as used, but don't interrupt here. We want 962 * Mark that packet buffer as used, but don't interrupt here. We want
@@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev)
1093 warnx("Device %s configuration FAILED", dev->name); 1094 warnx("Device %s configuration FAILED", dev->name);
1094 if (dev->running) 1095 if (dev->running)
1095 reset_device(dev); 1096 reset_device(dev);
1096 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { 1097 } else {
1097 if (!dev->running) 1098 if (dev->running)
1098 start_device(dev); 1099 err(1, "Device %s features finalized twice", dev->name);
1100 start_device(dev);
1099 } 1101 }
1100} 1102}
1101 1103
@@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr)
1120 return; 1122 return;
1121 } 1123 }
1122 1124
1123 /* 1125 /* Devices should not be used before features are finalized. */
1124 * Devices *can* be used before status is set to DRIVER_OK.
1125 * The original plan was that they would never do this: they
1126 * would always finish setting up their status bits before
1127 * actually touching the virtqueues. In practice, we allowed
1128 * them to, and they do (eg. the disk probes for partition
1129 * tables as part of initialization).
1130 *
1131 * If we see this, we start the device: once it's running, we
1132 * expect the device to catch all the notifications.
1133 */
1134 for (vq = i->vq; vq; vq = vq->next) { 1126 for (vq = i->vq; vq; vq = vq->next) {
1135 if (addr != vq->config.pfn*getpagesize()) 1127 if (addr != vq->config.pfn*getpagesize())
1136 continue; 1128 continue;
1137 if (i->running) 1129 errx(1, "Notification on %s before setup!", i->name);
1138 errx(1, "Notification on running %s", i->name);
1139 /* This just calls create_thread() for each virtqueue */
1140 start_device(i);
1141 return;
1142 } 1130 }
1143 } 1131 }
1144 1132
@@ -1370,7 +1358,7 @@ static void setup_console(void)
1370 * --sharenet=<name> option which opens or creates a named pipe. This can be 1358 * --sharenet=<name> option which opens or creates a named pipe. This can be
1371 * used to send packets to another guest in a 1:1 manner. 1359 * used to send packets to another guest in a 1:1 manner.
1372 * 1360 *
1373 * More sopisticated is to use one of the tools developed for project like UML 1361 * More sophisticated is to use one of the tools developed for project like UML
1374 * to do networking. 1362 * to do networking.
1375 * 1363 *
1376 * Faster is to do virtio bonding in kernel. Doing this 1:1 would be 1364 * Faster is to do virtio bonding in kernel. Doing this 1:1 would be
@@ -1380,7 +1368,7 @@ static void setup_console(void)
1380 * multiple inter-guest channels behind one interface, although it would 1368 * multiple inter-guest channels behind one interface, although it would
1381 * require some manner of hotplugging new virtio channels. 1369 * require some manner of hotplugging new virtio channels.
1382 * 1370 *
1383 * Finally, we could implement a virtio network switch in the kernel. 1371 * Finally, we could use a virtio network switch in the kernel, ie. vhost.
1384:*/ 1372:*/
1385 1373
1386static u32 str2ip(const char *ipaddr) 1374static u32 str2ip(const char *ipaddr)
@@ -2017,10 +2005,7 @@ int main(int argc, char *argv[])
2017 /* Tell the entry path not to try to reload segment registers. */ 2005 /* Tell the entry path not to try to reload segment registers. */
2018 boot->hdr.loadflags |= KEEP_SEGMENTS; 2006 boot->hdr.loadflags |= KEEP_SEGMENTS;
2019 2007
2020 /* 2008 /* We tell the kernel to initialize the Guest. */
2021 * We tell the kernel to initialize the Guest: this returns the open
2022 * /dev/lguest file descriptor.
2023 */
2024 tell_kernel(start); 2009 tell_kernel(start);
2025 2010
2026 /* Ensure that we terminate if a device-servicing child dies. */ 2011 /* Ensure that we terminate if a device-servicing child dies. */
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 9b7221a86df2..7c3a8801b7ce 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -674,7 +674,7 @@ Protocol: 2.10+
674 674
675Field name: init_size 675Field name: init_size
676Type: read 676Type: read
677Offset/size: 0x25c/4 677Offset/size: 0x260/4
678 678
679 This field indicates the amount of linear contiguous memory starting 679 This field indicates the amount of linear contiguous memory starting
680 at the kernel runtime start address that the kernel needs before it 680 at the kernel runtime start address that the kernel needs before it
diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt
new file mode 100644
index 000000000000..7869f14d055c
--- /dev/null
+++ b/Documentation/x86/entry_64.txt
@@ -0,0 +1,98 @@
1This file documents some of the kernel entries in
2arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from
3an email from Ingo Molnar:
4
5http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu>
6
7The x86 architecture has quite a few different ways to jump into
8kernel code. Most of these entry points are registered in
9arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S
10and arch/x86/ia32/ia32entry.S.
11
12The IDT vector assignments are listed in arch/x86/include/irq_vectors.h.
13
14Some of these entries are:
15
16 - system_call: syscall instruction from 64-bit code.
17
18 - ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall
19 either way.
20
21 - ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit
22 code
23
24 - interrupt: An array of entries. Every IDT vector that doesn't
25 explicitly point somewhere else gets set to the corresponding
26 value in interrupts. These point to a whole array of
27 magically-generated functions that make their way to do_IRQ with
28 the interrupt number as a parameter.
29
30 - emulate_vsyscall: int 0xcc, a special non-ABI entry used by
31 vsyscall emulation.
32
33 - APIC interrupts: Various special-purpose interrupts for things
34 like TLB shootdown.
35
36 - Architecturally-defined exceptions like divide_error.
37
38There are a few complexities here. The different x86-64 entries
39have different calling conventions. The syscall and sysenter
40instructions have their own peculiar calling conventions. Some of
41the IDT entries push an error code onto the stack; others don't.
42IDT entries using the IST alternative stack mechanism need their own
43magic to get the stack frames right. (You can find some
44documentation in the AMD APM, Volume 2, Chapter 8 and the Intel SDM,
45Volume 3, Chapter 6.)
46
47Dealing with the swapgs instruction is especially tricky. Swapgs
48toggles whether gs is the kernel gs or the user gs. The swapgs
49instruction is rather fragile: it must nest perfectly and only in
50single depth, it should only be used if entering from user mode to
51kernel mode and then when returning to user-space, and precisely
52so. If we mess that up even slightly, we crash.
53
54So when we have a secondary entry, already in kernel mode, we *must
55not* use SWAPGS blindly - nor must we forget doing a SWAPGS when it's
56not switched/swapped yet.
57
58Now, there's a secondary complication: there's a cheap way to test
59which mode the CPU is in and an expensive way.
60
61The cheap way is to pick this info off the entry frame on the kernel
62stack, from the CS of the ptregs area of the kernel stack:
63
64 xorl %ebx,%ebx
65 testl $3,CS+8(%rsp)
66 je error_kernelspace
67 SWAPGS
68
69The expensive (paranoid) way is to read back the MSR_GS_BASE value
70(which is what SWAPGS modifies):
71
72 movl $1,%ebx
73 movl $MSR_GS_BASE,%ecx
74 rdmsr
75 testl %edx,%edx
76 js 1f /* negative -> in kernel */
77 SWAPGS
78 xorl %ebx,%ebx
791: ret
80
81and the whole paranoid non-paranoid macro complexity is about whether
82to suffer that RDMSR cost.
83
84If we are at an interrupt or user-trap/gate-alike boundary then we can
85use the faster check: the stack will be a reliable indicator of
86whether SWAPGS was already done: if we see that we are a secondary
87entry interrupting kernel mode execution, then we know that the GS
88base has already been switched. If it says that we interrupted
89user-space execution then we must do the SWAPGS.
90
91But if we are in an NMI/MCE/DEBUG/whatever super-atomic entry context,
92which might have triggered right after a normal entry wrote CS to the
93stack but before we executed SWAPGS, then the only safe way to check
94for GS is the slower method: the RDMSR.
95
96So we try only to mark those entry methods 'paranoid' that absolutely
97need the more expensive check for the GS base - and we generate all
98'normal' entry points with the regular (faster) entry macros.
diff --git a/Documentation/zh_CN/SubmitChecklist b/Documentation/zh_CN/SubmitChecklist
index 951415bbab0c..4c741d6bc048 100644
--- a/Documentation/zh_CN/SubmitChecklist
+++ b/Documentation/zh_CN/SubmitChecklist
@@ -67,7 +67,7 @@ Linux内核提交清单
67 67
6812:已经通过CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, 6812:已经通过CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
69 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, 69 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
70 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP测试,并且同时都 70 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP测试,并且同时都
71 使能。 71 使能。
72 72
7313:已经都构建并且使用或者不使用 CONFIG_SMP 和 CONFIG_PREEMPT测试执行时间。 7313:已经都构建并且使用或者不使用 CONFIG_SMP 和 CONFIG_PREEMPT测试执行时间。
diff --git a/Documentation/zh_CN/magic-number.txt b/Documentation/zh_CN/magic-number.txt
index 4c4ce853577b..c278f412dc65 100644
--- a/Documentation/zh_CN/magic-number.txt
+++ b/Documentation/zh_CN/magic-number.txt
@@ -66,7 +66,7 @@ MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h
66RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h 66RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h
67SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h 67SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h
68HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c 68HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c
69APM_BIOS_MAGIC 0x4101 apm_user arch/i386/kernel/apm.c 69APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c
70CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h 70CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h
71DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c 71DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c
72DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c 72DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c