diff options
Diffstat (limited to 'Documentation')
84 files changed, 2833 insertions, 613 deletions
diff --git a/Documentation/ABI/obsolete/proc-pid-oom_adj b/Documentation/ABI/obsolete/proc-pid-oom_adj index cf63f264ce0f..9a3cb88ade47 100644 --- a/Documentation/ABI/obsolete/proc-pid-oom_adj +++ b/Documentation/ABI/obsolete/proc-pid-oom_adj | |||
@@ -14,7 +14,7 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's | |||
14 | 14 | ||
15 | A much more powerful interface, /proc/<pid>/oom_score_adj, was | 15 | A much more powerful interface, /proc/<pid>/oom_score_adj, was |
16 | introduced with the oom killer rewrite that allows users to increase or | 16 | introduced with the oom killer rewrite that allows users to increase or |
17 | decrease the badness() score linearly. This interface will replace | 17 | decrease the badness score linearly. This interface will replace |
18 | /proc/<pid>/oom_adj. | 18 | /proc/<pid>/oom_adj. |
19 | 19 | ||
20 | A warning will be emitted to the kernel log if an application uses this | 20 | A warning will be emitted to the kernel log if an application uses this |
diff --git a/Documentation/ABI/stable/firewire-cdev b/Documentation/ABI/stable/firewire-cdev new file mode 100644 index 000000000000..16d030827368 --- /dev/null +++ b/Documentation/ABI/stable/firewire-cdev | |||
@@ -0,0 +1,103 @@ | |||
1 | What: /dev/fw[0-9]+ | ||
2 | Date: May 2007 | ||
3 | KernelVersion: 2.6.22 | ||
4 | Contact: linux1394-devel@lists.sourceforge.net | ||
5 | Description: | ||
6 | The character device files /dev/fw* are the interface between | ||
7 | firewire-core and IEEE 1394 device drivers implemented in | ||
8 | userspace. The ioctl(2)- and read(2)-based ABI is defined and | ||
9 | documented in <linux/firewire-cdev.h>. | ||
10 | |||
11 | This ABI offers most of the features which firewire-core also | ||
12 | exposes to kernelspace IEEE 1394 drivers. | ||
13 | |||
14 | Each /dev/fw* is associated with one IEEE 1394 node, which can | ||
15 | be remote or local nodes. Operations on a /dev/fw* file have | ||
16 | different scope: | ||
17 | - The 1394 node which is associated with the file: | ||
18 | - Asynchronous request transmission | ||
19 | - Get the Configuration ROM | ||
20 | - Query node ID | ||
21 | - Query maximum speed of the path between this node | ||
22 | and local node | ||
23 | - The 1394 bus (i.e. "card") to which the node is attached to: | ||
24 | - Isochronous stream transmission and reception | ||
25 | - Asynchronous stream transmission and reception | ||
26 | - Asynchronous broadcast request transmission | ||
27 | - PHY packet transmission and reception | ||
28 | - Allocate, reallocate, deallocate isochronous | ||
29 | resources (channels, bandwidth) at the bus's IRM | ||
30 | - Query node IDs of local node, root node, IRM, bus | ||
31 | manager | ||
32 | - Query cycle time | ||
33 | - Bus reset initiation, bus reset event reception | ||
34 | - All 1394 buses: | ||
35 | - Allocation of IEEE 1212 address ranges on the local | ||
36 | link layers, reception of inbound requests to such | ||
37 | an address range, asynchronous response transmission | ||
38 | to inbound requests | ||
39 | - Addition of descriptors or directories to the local | ||
40 | nodes' Configuration ROM | ||
41 | |||
42 | Due to the different scope of operations and in order to let | ||
43 | userland implement different access permission models, some | ||
44 | operations are restricted to /dev/fw* files that are associated | ||
45 | with a local node: | ||
46 | - Addition of descriptors or directories to the local | ||
47 | nodes' Configuration ROM | ||
48 | - PHY packet transmission and reception | ||
49 | |||
50 | A /dev/fw* file remains associated with one particular node | ||
51 | during its entire life time. Bus topology changes, and hence | ||
52 | node ID changes, are tracked by firewire-core. ABI users do not | ||
53 | need to be aware of topology. | ||
54 | |||
55 | The following file operations are supported: | ||
56 | |||
57 | open(2) | ||
58 | Currently the only useful flags are O_RDWR. | ||
59 | |||
60 | ioctl(2) | ||
61 | Initiate various actions. Some take immediate effect, others | ||
62 | are performed asynchronously while or after the ioctl returns. | ||
63 | See the inline documentation in <linux/firewire-cdev.h> for | ||
64 | descriptions of all ioctls. | ||
65 | |||
66 | poll(2), select(2), epoll_wait(2) etc. | ||
67 | Watch for events to become available to be read. | ||
68 | |||
69 | read(2) | ||
70 | Receive various events. There are solicited events like | ||
71 | outbound asynchronous transaction completion or isochronous | ||
72 | buffer completion, and unsolicited events such as bus resets, | ||
73 | request reception, or PHY packet reception. Always use a read | ||
74 | buffer which is large enough to receive the largest event that | ||
75 | could ever arrive. See <linux/firewire-cdev.h> for descriptions | ||
76 | of all event types and for which ioctls affect reception of | ||
77 | events. | ||
78 | |||
79 | mmap(2) | ||
80 | Allocate a DMA buffer for isochronous reception or transmission | ||
81 | and map it into the process address space. The arguments should | ||
82 | be used as follows: addr = NULL, length = the desired buffer | ||
83 | size, i.e. number of packets times size of largest packet, | ||
84 | prot = at least PROT_READ for reception and at least PROT_WRITE | ||
85 | for transmission, flags = MAP_SHARED, fd = the handle to the | ||
86 | /dev/fw*, offset = 0. | ||
87 | |||
88 | Isochronous reception works in packet-per-buffer fashion except | ||
89 | for multichannel reception which works in buffer-fill mode. | ||
90 | |||
91 | munmap(2) | ||
92 | Unmap the isochronous I/O buffer from the process address space. | ||
93 | |||
94 | close(2) | ||
95 | Besides stopping and freeing I/O contexts that were associated | ||
96 | with the file descriptor, back out any changes to the local | ||
97 | nodes' Configuration ROM. Deallocate isochronous channels and | ||
98 | bandwidth at the IRM that were marked for kernel-assisted | ||
99 | re- and deallocation. | ||
100 | |||
101 | Users: libraw1394 | ||
102 | libdc1394 | ||
103 | tools like jujuutils, fwhack, ... | ||
diff --git a/Documentation/ABI/stable/sysfs-bus-firewire b/Documentation/ABI/stable/sysfs-bus-firewire new file mode 100644 index 000000000000..3d484e5dc846 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-bus-firewire | |||
@@ -0,0 +1,122 @@ | |||
1 | What: /sys/bus/firewire/devices/fw[0-9]+/ | ||
2 | Date: May 2007 | ||
3 | KernelVersion: 2.6.22 | ||
4 | Contact: linux1394-devel@lists.sourceforge.net | ||
5 | Description: | ||
6 | IEEE 1394 node device attributes. | ||
7 | Read-only. Mutable during the node device's lifetime. | ||
8 | See IEEE 1212 for semantic definitions. | ||
9 | |||
10 | config_rom | ||
11 | Contents of the Configuration ROM register. | ||
12 | Binary attribute; an array of host-endian u32. | ||
13 | |||
14 | guid | ||
15 | The node's EUI-64 in the bus information block of | ||
16 | Configuration ROM. | ||
17 | Hexadecimal string representation of an u64. | ||
18 | |||
19 | |||
20 | What: /sys/bus/firewire/devices/fw[0-9]+/units | ||
21 | Date: June 2009 | ||
22 | KernelVersion: 2.6.31 | ||
23 | Contact: linux1394-devel@lists.sourceforge.net | ||
24 | Description: | ||
25 | IEEE 1394 node device attribute. | ||
26 | Read-only. Mutable during the node device's lifetime. | ||
27 | See IEEE 1212 for semantic definitions. | ||
28 | |||
29 | units | ||
30 | Summary of all units present in an IEEE 1394 node. | ||
31 | Contains space-separated tuples of specifier_id and | ||
32 | version of each unit present in the node. Specifier_id | ||
33 | and version are hexadecimal string representations of | ||
34 | u24 of the respective unit directory entries. | ||
35 | Specifier_id and version within each tuple are separated | ||
36 | by a colon. | ||
37 | |||
38 | Users: udev rules to set ownership and access permissions or ACLs of | ||
39 | /dev/fw[0-9]+ character device files | ||
40 | |||
41 | |||
42 | What: /sys/bus/firewire/devices/fw[0-9]+[.][0-9]+/ | ||
43 | Date: May 2007 | ||
44 | KernelVersion: 2.6.22 | ||
45 | Contact: linux1394-devel@lists.sourceforge.net | ||
46 | Description: | ||
47 | IEEE 1394 unit device attributes. | ||
48 | Read-only. Immutable during the unit device's lifetime. | ||
49 | See IEEE 1212 for semantic definitions. | ||
50 | |||
51 | modalias | ||
52 | Same as MODALIAS in the uevent at device creation. | ||
53 | |||
54 | rom_index | ||
55 | Offset of the unit directory within the parent device's | ||
56 | (node device's) Configuration ROM, in quadlets. | ||
57 | Decimal string representation. | ||
58 | |||
59 | |||
60 | What: /sys/bus/firewire/devices/*/ | ||
61 | Date: May 2007 | ||
62 | KernelVersion: 2.6.22 | ||
63 | Contact: linux1394-devel@lists.sourceforge.net | ||
64 | Description: | ||
65 | Attributes common to IEEE 1394 node devices and unit devices. | ||
66 | Read-only. Mutable during the node device's lifetime. | ||
67 | Immutable during the unit device's lifetime. | ||
68 | See IEEE 1212 for semantic definitions. | ||
69 | |||
70 | These attributes are only created if the root directory of an | ||
71 | IEEE 1394 node or the unit directory of an IEEE 1394 unit | ||
72 | actually contains according entries. | ||
73 | |||
74 | hardware_version | ||
75 | Hexadecimal string representation of an u24. | ||
76 | |||
77 | hardware_version_name | ||
78 | Contents of a respective textual descriptor leaf. | ||
79 | |||
80 | model | ||
81 | Hexadecimal string representation of an u24. | ||
82 | |||
83 | model_name | ||
84 | Contents of a respective textual descriptor leaf. | ||
85 | |||
86 | specifier_id | ||
87 | Hexadecimal string representation of an u24. | ||
88 | Mandatory in unit directories according to IEEE 1212. | ||
89 | |||
90 | vendor | ||
91 | Hexadecimal string representation of an u24. | ||
92 | Mandatory in the root directory according to IEEE 1212. | ||
93 | |||
94 | vendor_name | ||
95 | Contents of a respective textual descriptor leaf. | ||
96 | |||
97 | version | ||
98 | Hexadecimal string representation of an u24. | ||
99 | Mandatory in unit directories according to IEEE 1212. | ||
100 | |||
101 | |||
102 | What: /sys/bus/firewire/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id | ||
103 | formerly | ||
104 | /sys/bus/ieee1394/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id | ||
105 | Date: Feb 2004 | ||
106 | KernelVersion: 2.6.4 | ||
107 | Contact: linux1394-devel@lists.sourceforge.net | ||
108 | Description: | ||
109 | SCSI target port identifier and logical unit identifier of a | ||
110 | logical unit of an SBP-2 target. The identifiers are specified | ||
111 | in SAM-2...SAM-4 annex A. They are persistent and world-wide | ||
112 | unique properties the SBP-2 attached target. | ||
113 | |||
114 | Read-only attribute, immutable during the target's lifetime. | ||
115 | Format, as exposed by firewire-sbp2 since 2.6.22, May 2007: | ||
116 | Colon-separated hexadecimal string representations of | ||
117 | u64 EUI-64 : u24 directory_ID : u16 LUN | ||
118 | without 0x prefixes, without whitespace. The former sbp2 driver | ||
119 | (removed in 2.6.37 after being superseded by firewire-sbp2) used | ||
120 | a somewhat shorter format which was not as close to SAM. | ||
121 | |||
122 | Users: udev rules to create /dev/disk/by-id/ symlinks | ||
diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso new file mode 100644 index 000000000000..8a1cbb594497 --- /dev/null +++ b/Documentation/ABI/stable/vdso | |||
@@ -0,0 +1,27 @@ | |||
1 | On some architectures, when the kernel loads any userspace program it | ||
2 | maps an ELF DSO into that program's address space. This DSO is called | ||
3 | the vDSO and it often contains useful and highly-optimized alternatives | ||
4 | to real syscalls. | ||
5 | |||
6 | These functions are called just like ordinary C function according to | ||
7 | your platform's ABI. Call them from a sensible context. (For example, | ||
8 | if you set CS on x86 to something strange, the vDSO functions are | ||
9 | within their rights to crash.) In addition, if you pass a bad | ||
10 | pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT. | ||
11 | |||
12 | To find the DSO, parse the auxiliary vector passed to the program's | ||
13 | entry point. The AT_SYSINFO_EHDR entry will point to the vDSO. | ||
14 | |||
15 | The vDSO uses symbol versioning; whenever you request a symbol from the | ||
16 | vDSO, specify the version you are expecting. | ||
17 | |||
18 | Programs that dynamically link to glibc will use the vDSO automatically. | ||
19 | Otherwise, you can use the reference parser in Documentation/vDSO/parse_vdso.c. | ||
20 | |||
21 | Unless otherwise noted, the set of symbols with any given version and the | ||
22 | ABI of those symbols is considered stable. It may vary across architectures, | ||
23 | though. | ||
24 | |||
25 | (As of this writing, this ABI documentation as been confirmed for x86_64. | ||
26 | The maintainers of the other vDSO-using architectures should confirm | ||
27 | that it is correct for their architecture.) \ No newline at end of file | ||
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 new file mode 100644 index 000000000000..9de269bb0ae5 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 | |||
@@ -0,0 +1,21 @@ | |||
1 | What: /sys/bus/i2c/devices/.../device | ||
2 | Date: February 2011 | ||
3 | Contact: Minkyu Kang <mk7.kang@samsung.com> | ||
4 | Description: | ||
5 | show what device is attached | ||
6 | NONE - no device | ||
7 | USB - USB device is attached | ||
8 | UART - UART is attached | ||
9 | CHARGER - Charger is attaced | ||
10 | JIG - JIG is attached | ||
11 | |||
12 | What: /sys/bus/i2c/devices/.../switch | ||
13 | Date: February 2011 | ||
14 | Contact: Minkyu Kang <mk7.kang@samsung.com> | ||
15 | Description: | ||
16 | show or set the state of manual switch | ||
17 | VAUDIO - switch to VAUDIO path | ||
18 | UART - switch to UART path | ||
19 | AUDIO - switch to AUDIO path | ||
20 | DHOST - switch to DHOST path | ||
21 | AUTO - switch automatically by device | ||
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus index c1b53b8bc2ae..65e6e5dd67e8 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus +++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus | |||
@@ -92,6 +92,14 @@ Description: The mouse has a tracking- and a distance-control-unit. These | |||
92 | This file is writeonly. | 92 | This file is writeonly. |
93 | Users: http://roccat.sourceforge.net | 93 | Users: http://roccat.sourceforge.net |
94 | 94 | ||
95 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/talk | ||
96 | Date: May 2011 | ||
97 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
98 | Description: Used to active some easy* functions of the mouse from outside. | ||
99 | The data has to be 16 bytes long. | ||
100 | This file is writeonly. | ||
101 | Users: http://roccat.sourceforge.net | ||
102 | |||
95 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu | 103 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu |
96 | Date: October 2010 | 104 | Date: October 2010 |
97 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | 105 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> |
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-wiimote b/Documentation/ABI/testing/sysfs-driver-hid-wiimote new file mode 100644 index 000000000000..5d5a16ea57c6 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-hid-wiimote | |||
@@ -0,0 +1,10 @@ | |||
1 | What: /sys/bus/hid/drivers/wiimote/<dev>/led1 | ||
2 | What: /sys/bus/hid/drivers/wiimote/<dev>/led2 | ||
3 | What: /sys/bus/hid/drivers/wiimote/<dev>/led3 | ||
4 | What: /sys/bus/hid/drivers/wiimote/<dev>/led4 | ||
5 | Date: July 2011 | ||
6 | KernelVersion: 3.1 | ||
7 | Contact: David Herrmann <dh.herrmann@googlemail.com> | ||
8 | Description: Make it possible to set/get current led state. Reading from it | ||
9 | returns 0 if led is off and 1 if it is on. Writing 0 to it | ||
10 | disables the led, writing 1 enables it. | ||
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 8906648f962b..445289cd0e65 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl | |||
@@ -402,8 +402,9 @@ | |||
402 | !Finclude/net/mac80211.h set_key_cmd | 402 | !Finclude/net/mac80211.h set_key_cmd |
403 | !Finclude/net/mac80211.h ieee80211_key_conf | 403 | !Finclude/net/mac80211.h ieee80211_key_conf |
404 | !Finclude/net/mac80211.h ieee80211_key_flags | 404 | !Finclude/net/mac80211.h ieee80211_key_flags |
405 | !Finclude/net/mac80211.h ieee80211_tkip_key_type | 405 | !Finclude/net/mac80211.h ieee80211_get_tkip_p1k |
406 | !Finclude/net/mac80211.h ieee80211_get_tkip_key | 406 | !Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv |
407 | !Finclude/net/mac80211.h ieee80211_get_tkip_p2k | ||
407 | !Finclude/net/mac80211.h ieee80211_key_removed | 408 | !Finclude/net/mac80211.h ieee80211_key_removed |
408 | </chapter> | 409 | </chapter> |
409 | 410 | ||
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index 7b3f49363413..07a9c48de5a2 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl | |||
@@ -409,7 +409,7 @@ cond_resched(); /* Will sleep */ | |||
409 | 409 | ||
410 | <para> | 410 | <para> |
411 | You should always compile your kernel | 411 | You should always compile your kernel |
412 | <symbol>CONFIG_DEBUG_SPINLOCK_SLEEP</symbol> on, and it will warn | 412 | <symbol>CONFIG_DEBUG_ATOMIC_SLEEP</symbol> on, and it will warn |
413 | you if you break these rules. If you <emphasis>do</emphasis> break | 413 | you if you break these rules. If you <emphasis>do</emphasis> break |
414 | the rules, you will eventually lock up your box. | 414 | the rules, you will eventually lock up your box. |
415 | </para> | 415 | </para> |
diff --git a/Documentation/DocBook/v4l/io.xml b/Documentation/DocBook/v4l/io.xml index 227e7ac45a06..c57d1ec6291c 100644 --- a/Documentation/DocBook/v4l/io.xml +++ b/Documentation/DocBook/v4l/io.xml | |||
@@ -210,7 +210,7 @@ for (i = 0; i < reqbuf.count; i++) | |||
210 | <programlisting> | 210 | <programlisting> |
211 | &v4l2-requestbuffers; reqbuf; | 211 | &v4l2-requestbuffers; reqbuf; |
212 | /* Our current format uses 3 planes per buffer */ | 212 | /* Our current format uses 3 planes per buffer */ |
213 | #define FMT_NUM_PLANES = 3; | 213 | #define FMT_NUM_PLANES = 3 |
214 | 214 | ||
215 | struct { | 215 | struct { |
216 | void *start[FMT_NUM_PLANES]; | 216 | void *start[FMT_NUM_PLANES]; |
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl index 58ced2346e67..598c22f3b3ac 100644 --- a/Documentation/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl | |||
@@ -1164,7 +1164,7 @@ | |||
1164 | } | 1164 | } |
1165 | chip->port = pci_resource_start(pci, 0); | 1165 | chip->port = pci_resource_start(pci, 0); |
1166 | if (request_irq(pci->irq, snd_mychip_interrupt, | 1166 | if (request_irq(pci->irq, snd_mychip_interrupt, |
1167 | IRQF_SHARED, "My Chip", chip)) { | 1167 | IRQF_SHARED, KBUILD_MODNAME, chip)) { |
1168 | printk(KERN_ERR "cannot grab irq %d\n", pci->irq); | 1168 | printk(KERN_ERR "cannot grab irq %d\n", pci->irq); |
1169 | snd_mychip_free(chip); | 1169 | snd_mychip_free(chip); |
1170 | return -EBUSY; | 1170 | return -EBUSY; |
@@ -1197,7 +1197,7 @@ | |||
1197 | 1197 | ||
1198 | /* pci_driver definition */ | 1198 | /* pci_driver definition */ |
1199 | static struct pci_driver driver = { | 1199 | static struct pci_driver driver = { |
1200 | .name = "My Own Chip", | 1200 | .name = KBUILD_MODNAME, |
1201 | .id_table = snd_mychip_ids, | 1201 | .id_table = snd_mychip_ids, |
1202 | .probe = snd_mychip_probe, | 1202 | .probe = snd_mychip_probe, |
1203 | .remove = __devexit_p(snd_mychip_remove), | 1203 | .remove = __devexit_p(snd_mychip_remove), |
@@ -1340,7 +1340,7 @@ | |||
1340 | <programlisting> | 1340 | <programlisting> |
1341 | <![CDATA[ | 1341 | <![CDATA[ |
1342 | if (request_irq(pci->irq, snd_mychip_interrupt, | 1342 | if (request_irq(pci->irq, snd_mychip_interrupt, |
1343 | IRQF_SHARED, "My Chip", chip)) { | 1343 | IRQF_SHARED, KBUILD_MODNAME, chip)) { |
1344 | printk(KERN_ERR "cannot grab irq %d\n", pci->irq); | 1344 | printk(KERN_ERR "cannot grab irq %d\n", pci->irq); |
1345 | snd_mychip_free(chip); | 1345 | snd_mychip_free(chip); |
1346 | return -EBUSY; | 1346 | return -EBUSY; |
@@ -1616,7 +1616,7 @@ | |||
1616 | <programlisting> | 1616 | <programlisting> |
1617 | <![CDATA[ | 1617 | <![CDATA[ |
1618 | static struct pci_driver driver = { | 1618 | static struct pci_driver driver = { |
1619 | .name = "My Own Chip", | 1619 | .name = KBUILD_MODNAME, |
1620 | .id_table = snd_mychip_ids, | 1620 | .id_table = snd_mychip_ids, |
1621 | .probe = snd_mychip_probe, | 1621 | .probe = snd_mychip_probe, |
1622 | .remove = __devexit_p(snd_mychip_remove), | 1622 | .remove = __devexit_p(snd_mychip_remove), |
@@ -5816,7 +5816,7 @@ struct _snd_pcm_runtime { | |||
5816 | <programlisting> | 5816 | <programlisting> |
5817 | <![CDATA[ | 5817 | <![CDATA[ |
5818 | static struct pci_driver driver = { | 5818 | static struct pci_driver driver = { |
5819 | .name = "My Chip", | 5819 | .name = KBUILD_MODNAME, |
5820 | .id_table = snd_my_ids, | 5820 | .id_table = snd_my_ids, |
5821 | .probe = snd_my_probe, | 5821 | .probe = snd_my_probe, |
5822 | .remove = __devexit_p(snd_my_remove), | 5822 | .remove = __devexit_p(snd_my_remove), |
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index a8536cb88091..bf82851a0e57 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt | |||
@@ -5,8 +5,8 @@ Although RCU is usually used to protect read-mostly data structures, | |||
5 | it is possible to use RCU to provide dynamic non-maskable interrupt | 5 | it is possible to use RCU to provide dynamic non-maskable interrupt |
6 | handlers, as well as dynamic irq handlers. This document describes | 6 | handlers, as well as dynamic irq handlers. This document describes |
7 | how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer | 7 | how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer |
8 | work in "arch/i386/oprofile/nmi_timer_int.c" and in | 8 | work in "arch/x86/oprofile/nmi_timer_int.c" and in |
9 | "arch/i386/kernel/traps.c". | 9 | "arch/x86/kernel/traps.c". |
10 | 10 | ||
11 | The relevant pieces of code are listed below, each followed by a | 11 | The relevant pieces of code are listed below, each followed by a |
12 | brief explanation. | 12 | brief explanation. |
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index da0382daa395..dc0e33210d7e 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist | |||
@@ -53,8 +53,8 @@ kernel patches. | |||
53 | 53 | ||
54 | 12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, | 54 | 12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, |
55 | CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, | 55 | CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, |
56 | CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously | 56 | CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, CONFIG_PROVE_RCU |
57 | enabled. | 57 | and CONFIG_DEBUG_OBJECTS_RCU_HEAD all simultaneously enabled. |
58 | 58 | ||
59 | 13: Has been build- and runtime tested with and without CONFIG_SMP and | 59 | 13: Has been build- and runtime tested with and without CONFIG_SMP and |
60 | CONFIG_PREEMPT. | 60 | CONFIG_PREEMPT. |
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting index 4e686a2ed91e..a341d87d276e 100644 --- a/Documentation/arm/Booting +++ b/Documentation/arm/Booting | |||
@@ -164,3 +164,8 @@ In either case, the following conditions must be met: | |||
164 | - The boot loader is expected to call the kernel image by jumping | 164 | - The boot loader is expected to call the kernel image by jumping |
165 | directly to the first instruction of the kernel image. | 165 | directly to the first instruction of the kernel image. |
166 | 166 | ||
167 | On CPUs supporting the ARM instruction set, the entry must be | ||
168 | made in ARM state, even for a Thumb-2 kernel. | ||
169 | |||
170 | On CPUs supporting only the Thumb instruction set such as | ||
171 | Cortex-M class CPUs, the entry must be made in Thumb state. | ||
diff --git a/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt b/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt new file mode 100644 index 000000000000..441959846e1a --- /dev/null +++ b/Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt | |||
@@ -0,0 +1,42 @@ | |||
1 | ROM-able zImage boot from eSD | ||
2 | ----------------------------- | ||
3 | |||
4 | An ROM-able zImage compiled with ZBOOT_ROM_SDHI may be written to eSD and | ||
5 | SuperH Mobile ARM will to boot directly from the SDHI hardware block. | ||
6 | |||
7 | This is achieved by the mask ROM loading the first portion of the image into | ||
8 | MERAM and then jumping to it. This portion contains loader code which | ||
9 | copies the entire image to SDRAM and jumps to it. From there the zImage | ||
10 | boot code proceeds as normal, uncompressing the image into its final | ||
11 | location and then jumping to it. | ||
12 | |||
13 | This code has been tested on an mackerel board using the developer 1A eSD | ||
14 | boot mode which is configured using the following jumper settings. | ||
15 | |||
16 | 8 7 6 5 4 3 2 1 | ||
17 | x|x|x|x| |x|x| | ||
18 | S4 -+-+-+-+-+-+-+- | ||
19 | | | | |x| | |x on | ||
20 | |||
21 | The eSD card needs to be present in SDHI slot 1 (CN7). | ||
22 | As such S1 and S33 also need to be configured as per | ||
23 | the notes in arch/arm/mach-shmobile/board-mackerel.c. | ||
24 | |||
25 | A partial zImage must be written to physical partition #1 (boot) | ||
26 | of the eSD at sector 0 in vrl4 format. A utility vrl4 is supplied to | ||
27 | accomplish this. | ||
28 | |||
29 | e.g. | ||
30 | vrl4 < zImage | dd of=/dev/sdX bs=512 count=17 | ||
31 | |||
32 | A full copy of _the same_ zImage should be written to physical partition #1 | ||
33 | (boot) of the eSD at sector 0. This should _not_ be in vrl4 format. | ||
34 | |||
35 | vrl4 < zImage | dd of=/dev/sdX bs=512 | ||
36 | |||
37 | Note: The commands above assume that the physical partition has been | ||
38 | switched. No such facility currently exists in the Linux Kernel. | ||
39 | |||
40 | Physical partitions are described in the eSD specification. At the time of | ||
41 | writing they are not the same as partitions that are typically configured | ||
42 | using fdisk and visible through /proc/partitions | ||
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt index c12bfc1a00c9..359587b2367b 100644 --- a/Documentation/arm/Samsung-S3C24XX/Overview.txt +++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt | |||
@@ -8,10 +8,13 @@ Introduction | |||
8 | 8 | ||
9 | The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported | 9 | The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported |
10 | by the 's3c2410' architecture of ARM Linux. Currently the S3C2410, | 10 | by the 's3c2410' architecture of ARM Linux. Currently the S3C2410, |
11 | S3C2412, S3C2413, S3C2416 S3C2440, S3C2442, S3C2443 and S3C2450 devices | 11 | S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices |
12 | are supported. | 12 | are supported. |
13 | 13 | ||
14 | Support for the S3C2400 and S3C24A0 series are in progress. | 14 | Support for the S3C2400 and S3C24A0 series was never completed and the |
15 | corresponding code has been removed after a while. If someone wishes to | ||
16 | revive this effort, partial support can be retrieved from earlier Linux | ||
17 | versions. | ||
15 | 18 | ||
16 | The S3C2416 and S3C2450 devices are very similar and S3C2450 support is | 19 | The S3C2416 and S3C2450 devices are very similar and S3C2450 support is |
17 | included under the arch/arm/mach-s3c2416 directory. Note, whilst core | 20 | included under the arch/arm/mach-s3c2416 directory. Note, whilst core |
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.txt new file mode 100644 index 000000000000..a17df9f91d16 --- /dev/null +++ b/Documentation/arm/kernel_user_helpers.txt | |||
@@ -0,0 +1,267 @@ | |||
1 | Kernel-provided User Helpers | ||
2 | ============================ | ||
3 | |||
4 | These are segment of kernel provided user code reachable from user space | ||
5 | at a fixed address in kernel memory. This is used to provide user space | ||
6 | with some operations which require kernel help because of unimplemented | ||
7 | native feature and/or instructions in many ARM CPUs. The idea is for this | ||
8 | code to be executed directly in user mode for best efficiency but which is | ||
9 | too intimate with the kernel counter part to be left to user libraries. | ||
10 | In fact this code might even differ from one CPU to another depending on | ||
11 | the available instruction set, or whether it is a SMP systems. In other | ||
12 | words, the kernel reserves the right to change this code as needed without | ||
13 | warning. Only the entry points and their results as documented here are | ||
14 | guaranteed to be stable. | ||
15 | |||
16 | This is different from (but doesn't preclude) a full blown VDSO | ||
17 | implementation, however a VDSO would prevent some assembly tricks with | ||
18 | constants that allows for efficient branching to those code segments. And | ||
19 | since those code segments only use a few cycles before returning to user | ||
20 | code, the overhead of a VDSO indirect far call would add a measurable | ||
21 | overhead to such minimalistic operations. | ||
22 | |||
23 | User space is expected to bypass those helpers and implement those things | ||
24 | inline (either in the code emitted directly by the compiler, or part of | ||
25 | the implementation of a library call) when optimizing for a recent enough | ||
26 | processor that has the necessary native support, but only if resulting | ||
27 | binaries are already to be incompatible with earlier ARM processors due to | ||
28 | useage of similar native instructions for other things. In other words | ||
29 | don't make binaries unable to run on earlier processors just for the sake | ||
30 | of not using these kernel helpers if your compiled code is not going to | ||
31 | use new instructions for other purpose. | ||
32 | |||
33 | New helpers may be added over time, so an older kernel may be missing some | ||
34 | helpers present in a newer kernel. For this reason, programs must check | ||
35 | the value of __kuser_helper_version (see below) before assuming that it is | ||
36 | safe to call any particular helper. This check should ideally be | ||
37 | performed only once at process startup time, and execution aborted early | ||
38 | if the required helpers are not provided by the kernel version that | ||
39 | process is running on. | ||
40 | |||
41 | kuser_helper_version | ||
42 | -------------------- | ||
43 | |||
44 | Location: 0xffff0ffc | ||
45 | |||
46 | Reference declaration: | ||
47 | |||
48 | extern int32_t __kuser_helper_version; | ||
49 | |||
50 | Definition: | ||
51 | |||
52 | This field contains the number of helpers being implemented by the | ||
53 | running kernel. User space may read this to determine the availability | ||
54 | of a particular helper. | ||
55 | |||
56 | Usage example: | ||
57 | |||
58 | #define __kuser_helper_version (*(int32_t *)0xffff0ffc) | ||
59 | |||
60 | void check_kuser_version(void) | ||
61 | { | ||
62 | if (__kuser_helper_version < 2) { | ||
63 | fprintf(stderr, "can't do atomic operations, kernel too old\n"); | ||
64 | abort(); | ||
65 | } | ||
66 | } | ||
67 | |||
68 | Notes: | ||
69 | |||
70 | User space may assume that the value of this field never changes | ||
71 | during the lifetime of any single process. This means that this | ||
72 | field can be read once during the initialisation of a library or | ||
73 | startup phase of a program. | ||
74 | |||
75 | kuser_get_tls | ||
76 | ------------- | ||
77 | |||
78 | Location: 0xffff0fe0 | ||
79 | |||
80 | Reference prototype: | ||
81 | |||
82 | void * __kuser_get_tls(void); | ||
83 | |||
84 | Input: | ||
85 | |||
86 | lr = return address | ||
87 | |||
88 | Output: | ||
89 | |||
90 | r0 = TLS value | ||
91 | |||
92 | Clobbered registers: | ||
93 | |||
94 | none | ||
95 | |||
96 | Definition: | ||
97 | |||
98 | Get the TLS value as previously set via the __ARM_NR_set_tls syscall. | ||
99 | |||
100 | Usage example: | ||
101 | |||
102 | typedef void * (__kuser_get_tls_t)(void); | ||
103 | #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) | ||
104 | |||
105 | void foo() | ||
106 | { | ||
107 | void *tls = __kuser_get_tls(); | ||
108 | printf("TLS = %p\n", tls); | ||
109 | } | ||
110 | |||
111 | Notes: | ||
112 | |||
113 | - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12). | ||
114 | |||
115 | kuser_cmpxchg | ||
116 | ------------- | ||
117 | |||
118 | Location: 0xffff0fc0 | ||
119 | |||
120 | Reference prototype: | ||
121 | |||
122 | int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); | ||
123 | |||
124 | Input: | ||
125 | |||
126 | r0 = oldval | ||
127 | r1 = newval | ||
128 | r2 = ptr | ||
129 | lr = return address | ||
130 | |||
131 | Output: | ||
132 | |||
133 | r0 = success code (zero or non-zero) | ||
134 | C flag = set if r0 == 0, clear if r0 != 0 | ||
135 | |||
136 | Clobbered registers: | ||
137 | |||
138 | r3, ip, flags | ||
139 | |||
140 | Definition: | ||
141 | |||
142 | Atomically store newval in *ptr only if *ptr is equal to oldval. | ||
143 | Return zero if *ptr was changed or non-zero if no exchange happened. | ||
144 | The C flag is also set if *ptr was changed to allow for assembly | ||
145 | optimization in the calling code. | ||
146 | |||
147 | Usage example: | ||
148 | |||
149 | typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); | ||
150 | #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) | ||
151 | |||
152 | int atomic_add(volatile int *ptr, int val) | ||
153 | { | ||
154 | int old, new; | ||
155 | |||
156 | do { | ||
157 | old = *ptr; | ||
158 | new = old + val; | ||
159 | } while(__kuser_cmpxchg(old, new, ptr)); | ||
160 | |||
161 | return new; | ||
162 | } | ||
163 | |||
164 | Notes: | ||
165 | |||
166 | - This routine already includes memory barriers as needed. | ||
167 | |||
168 | - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12). | ||
169 | |||
170 | kuser_memory_barrier | ||
171 | -------------------- | ||
172 | |||
173 | Location: 0xffff0fa0 | ||
174 | |||
175 | Reference prototype: | ||
176 | |||
177 | void __kuser_memory_barrier(void); | ||
178 | |||
179 | Input: | ||
180 | |||
181 | lr = return address | ||
182 | |||
183 | Output: | ||
184 | |||
185 | none | ||
186 | |||
187 | Clobbered registers: | ||
188 | |||
189 | none | ||
190 | |||
191 | Definition: | ||
192 | |||
193 | Apply any needed memory barrier to preserve consistency with data modified | ||
194 | manually and __kuser_cmpxchg usage. | ||
195 | |||
196 | Usage example: | ||
197 | |||
198 | typedef void (__kuser_dmb_t)(void); | ||
199 | #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) | ||
200 | |||
201 | Notes: | ||
202 | |||
203 | - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15). | ||
204 | |||
205 | kuser_cmpxchg64 | ||
206 | --------------- | ||
207 | |||
208 | Location: 0xffff0f60 | ||
209 | |||
210 | Reference prototype: | ||
211 | |||
212 | int __kuser_cmpxchg64(const int64_t *oldval, | ||
213 | const int64_t *newval, | ||
214 | volatile int64_t *ptr); | ||
215 | |||
216 | Input: | ||
217 | |||
218 | r0 = pointer to oldval | ||
219 | r1 = pointer to newval | ||
220 | r2 = pointer to target value | ||
221 | lr = return address | ||
222 | |||
223 | Output: | ||
224 | |||
225 | r0 = success code (zero or non-zero) | ||
226 | C flag = set if r0 == 0, clear if r0 != 0 | ||
227 | |||
228 | Clobbered registers: | ||
229 | |||
230 | r3, lr, flags | ||
231 | |||
232 | Definition: | ||
233 | |||
234 | Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr | ||
235 | is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was | ||
236 | changed or non-zero if no exchange happened. | ||
237 | |||
238 | The C flag is also set if *ptr was changed to allow for assembly | ||
239 | optimization in the calling code. | ||
240 | |||
241 | Usage example: | ||
242 | |||
243 | typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, | ||
244 | const int64_t *newval, | ||
245 | volatile int64_t *ptr); | ||
246 | #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) | ||
247 | |||
248 | int64_t atomic_add64(volatile int64_t *ptr, int64_t val) | ||
249 | { | ||
250 | int64_t old, new; | ||
251 | |||
252 | do { | ||
253 | old = *ptr; | ||
254 | new = old + val; | ||
255 | } while(__kuser_cmpxchg64(&old, &new, ptr)); | ||
256 | |||
257 | return new; | ||
258 | } | ||
259 | |||
260 | Notes: | ||
261 | |||
262 | - This routine already includes memory barriers as needed. | ||
263 | |||
264 | - Due to the length of this sequence, this spans 2 conventional kuser | ||
265 | "slots", therefore 0xffff0f80 is not used as a valid entry point. | ||
266 | |||
267 | - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1). | ||
diff --git a/Documentation/blackfin/bfin-spi-notes.txt b/Documentation/blackfin/bfin-spi-notes.txt index 556fa877f2e8..eae6eaf2a09d 100644 --- a/Documentation/blackfin/bfin-spi-notes.txt +++ b/Documentation/blackfin/bfin-spi-notes.txt | |||
@@ -9,6 +9,8 @@ the entire SPI transfer. - And not just bits_per_word duration. | |||
9 | In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this | 9 | In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this |
10 | behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2 | 10 | behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2 |
11 | timing, you can utilize the GPIO controlled SPI Slave Select option instead. | 11 | timing, you can utilize the GPIO controlled SPI Slave Select option instead. |
12 | In this case, you should use GPIO based CS for all of your slaves and not just | ||
13 | the ones using mode 0 or 2 in order to guarantee correct CS toggling behavior. | ||
12 | 14 | ||
13 | You can even use the same pin whose peripheral role is a SSEL, | 15 | You can even use the same pin whose peripheral role is a SSEL, |
14 | but use it as a GPIO instead. | 16 | but use it as a GPIO instead. |
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index f65274081c8d..d8147b336c35 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt | |||
@@ -45,9 +45,13 @@ device. | |||
45 | 45 | ||
46 | rq_affinity (RW) | 46 | rq_affinity (RW) |
47 | ---------------- | 47 | ---------------- |
48 | If this option is enabled, the block layer will migrate request completions | 48 | If this option is '1', the block layer will migrate request completions to the |
49 | to the CPU that originally submitted the request. For some workloads | 49 | cpu "group" that originally submitted the request. For some workloads this |
50 | this provides a significant reduction in CPU cycles due to caching effects. | 50 | provides a significant reduction in CPU cycles due to caching effects. |
51 | |||
52 | For storage configurations that need to maximize distribution of completion | ||
53 | processing setting this option to '2' forces the completion to run on the | ||
54 | requesting cpu (bypassing the "group" aggregation logic). | ||
51 | 55 | ||
52 | scheduler (RW) | 56 | scheduler (RW) |
53 | -------------- | 57 | -------------- |
diff --git a/Documentation/blockdev/README.DAC960 b/Documentation/blockdev/README.DAC960 index 0e8f618ab534..bd85fb9dc6e5 100644 --- a/Documentation/blockdev/README.DAC960 +++ b/Documentation/blockdev/README.DAC960 | |||
@@ -214,7 +214,7 @@ replacing "/usr/src" with wherever you keep your Linux kernel source tree: | |||
214 | make config | 214 | make config |
215 | make bzImage (or zImage) | 215 | make bzImage (or zImage) |
216 | 216 | ||
217 | Then install "arch/i386/boot/bzImage" or "arch/i386/boot/zImage" as your | 217 | Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your |
218 | standard kernel, run lilo if appropriate, and reboot. | 218 | standard kernel, run lilo if appropriate, and reboot. |
219 | 219 | ||
220 | To create the necessary devices in /dev, the "make_rd" script included in | 220 | To create the necessary devices in /dev, the "make_rd" script included in |
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt index 6c820baa19a6..fa72e97dd669 100644 --- a/Documentation/blockdev/ramdisk.txt +++ b/Documentation/blockdev/ramdisk.txt | |||
@@ -64,9 +64,9 @@ the RAM disk dynamically grows as data is being written into it, a size field | |||
64 | is not required. Bits 11 to 13 are not currently used and may as well be zero. | 64 | is not required. Bits 11 to 13 are not currently used and may as well be zero. |
65 | These numbers are no magical secrets, as seen below: | 65 | These numbers are no magical secrets, as seen below: |
66 | 66 | ||
67 | ./arch/i386/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF | 67 | ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF |
68 | ./arch/i386/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 | 68 | ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 |
69 | ./arch/i386/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 | 69 | ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 |
70 | 70 | ||
71 | Consider a typical two floppy disk setup, where you will have the | 71 | Consider a typical two floppy disk setup, where you will have the |
72 | kernel on disk one, and have already put a RAM disk image onto disk #2. | 72 | kernel on disk one, and have already put a RAM disk image onto disk #2. |
@@ -85,7 +85,7 @@ The command line equivalent is: "prompt_ramdisk=1" | |||
85 | Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word. | 85 | Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word. |
86 | So to create disk one of the set, you would do: | 86 | So to create disk one of the set, you would do: |
87 | 87 | ||
88 | /usr/src/linux# cat arch/i386/boot/zImage > /dev/fd0 | 88 | /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0 |
89 | /usr/src/linux# rdev /dev/fd0 /dev/fd0 | 89 | /usr/src/linux# rdev /dev/fd0 /dev/fd0 |
90 | /usr/src/linux# rdev -r /dev/fd0 49152 | 90 | /usr/src/linux# rdev -r /dev/fd0 49152 |
91 | 91 | ||
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index 9ad85df4b983..9d73cc0cadb9 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt | |||
@@ -23,7 +23,7 @@ New accounting groups can be created under the parent group /sys/fs/cgroup. | |||
23 | 23 | ||
24 | # cd /sys/fs/cgroup | 24 | # cd /sys/fs/cgroup |
25 | # mkdir g1 | 25 | # mkdir g1 |
26 | # echo $$ > g1 | 26 | # echo $$ > g1/tasks |
27 | 27 | ||
28 | The above steps create a new group g1 and move the current shell | 28 | The above steps create a new group g1 and move the current shell |
29 | process (bash) into it. CPU time consumed by this bash and its children | 29 | process (bash) into it. CPU time consumed by this bash and its children |
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 5b0d78e55ccc..5c51ed406d1d 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt | |||
@@ -180,7 +180,7 @@ files describing that cpuset: | |||
180 | - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset | 180 | - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset |
181 | - cpuset.sched_relax_domain_level: the searching range when migrating tasks | 181 | - cpuset.sched_relax_domain_level: the searching range when migrating tasks |
182 | 182 | ||
183 | In addition, the root cpuset only has the following file: | 183 | In addition, only the root cpuset has the following file: |
184 | - cpuset.memory_pressure_enabled flag: compute memory_pressure? | 184 | - cpuset.memory_pressure_enabled flag: compute memory_pressure? |
185 | 185 | ||
186 | New cpusets are created using the mkdir system call or shell | 186 | New cpusets are created using the mkdir system call or shell |
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 6c30e930c122..c436096351f8 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt | |||
@@ -168,7 +168,7 @@ in-chipset dynamic frequency switching to policy->min, the upper limit | |||
168 | to policy->max, and -if supported- select a performance-oriented | 168 | to policy->max, and -if supported- select a performance-oriented |
169 | setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a | 169 | setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a |
170 | powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check | 170 | powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check |
171 | the reference implementation in arch/i386/kernel/cpu/cpufreq/longrun.c | 171 | the reference implementation in drivers/cpufreq/longrun.c |
172 | 172 | ||
173 | 173 | ||
174 | 174 | ||
diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding index f3f1a469443c..83f5f5b365a3 100644 --- a/Documentation/development-process/4.Coding +++ b/Documentation/development-process/4.Coding | |||
@@ -244,7 +244,7 @@ testing purposes. In particular, you should turn on: | |||
244 | - DEBUG_SLAB can find a variety of memory allocation and use errors; it | 244 | - DEBUG_SLAB can find a variety of memory allocation and use errors; it |
245 | should be used on most development kernels. | 245 | should be used on most development kernels. |
246 | 246 | ||
247 | - DEBUG_SPINLOCK, DEBUG_SPINLOCK_SLEEP, and DEBUG_MUTEXES will find a | 247 | - DEBUG_SPINLOCK, DEBUG_ATOMIC_SLEEP, and DEBUG_MUTEXES will find a |
248 | number of common locking errors. | 248 | number of common locking errors. |
249 | 249 | ||
250 | There are quite a few other debugging options, some of which will be | 250 | There are quite a few other debugging options, some of which will be |
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt new file mode 100644 index 000000000000..1c044eb320cc --- /dev/null +++ b/Documentation/devicetree/bindings/arm/pmu.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | * ARM Performance Monitor Units | ||
2 | |||
3 | ARM cores often have a PMU for counting cpu and cache events like cache misses | ||
4 | and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU | ||
5 | representation in the device tree should be done as under:- | ||
6 | |||
7 | Required properties: | ||
8 | |||
9 | - compatible : should be one of | ||
10 | "arm,cortex-a9-pmu" | ||
11 | "arm,cortex-a8-pmu" | ||
12 | "arm,arm1176-pmu" | ||
13 | "arm,arm1136-pmu" | ||
14 | - interrupts : 1 combined interrupt or 1 per core. | ||
15 | |||
16 | Example: | ||
17 | |||
18 | pmu { | ||
19 | compatible = "arm,cortex-a9-pmu"; | ||
20 | interrupts = <100 101>; | ||
21 | }; | ||
diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt new file mode 100644 index 000000000000..1d5d7a870ec7 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/primecell.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | * ARM Primecell Peripherals | ||
2 | |||
3 | ARM, Ltd. Primecell peripherals have a standard id register that can be used to | ||
4 | identify the peripheral type, vendor, and revision. This value can be used for | ||
5 | driver matching. | ||
6 | |||
7 | Required properties: | ||
8 | |||
9 | - compatible : should be a specific value for peripheral and "arm,primecell" | ||
10 | |||
11 | Optional properties: | ||
12 | |||
13 | - arm,primecell-periphid : Value to override the h/w value with | ||
14 | |||
15 | Example: | ||
16 | |||
17 | serial@fff36000 { | ||
18 | compatible = "arm,pl011", "arm,primecell"; | ||
19 | arm,primecell-periphid = <0x00341011>; | ||
20 | }; | ||
21 | |||
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/sec.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt index 2b6f2d45c45a..38988ef1336b 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/sec.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt | |||
@@ -1,4 +1,4 @@ | |||
1 | Freescale SoC SEC Security Engines | 1 | Freescale SoC SEC Security Engines versions 2.x-3.x |
2 | 2 | ||
3 | Required properties: | 3 | Required properties: |
4 | 4 | ||
diff --git a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt new file mode 100644 index 000000000000..4363ae4b3c14 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | * Freescale i.MX/MXC GPIO controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : Should be "fsl,<soc>-gpio" | ||
5 | - reg : Address and length of the register set for the device | ||
6 | - interrupts : Should be the port interrupt shared by all 32 pins, if | ||
7 | one number. If two numbers, the first one is the interrupt shared | ||
8 | by low 16 pins and the second one is for high 16 pins. | ||
9 | - gpio-controller : Marks the device node as a gpio controller. | ||
10 | - #gpio-cells : Should be two. The first cell is the pin number and | ||
11 | the second cell is used to specify optional parameters (currently | ||
12 | unused). | ||
13 | |||
14 | Example: | ||
15 | |||
16 | gpio0: gpio@73f84000 { | ||
17 | compatible = "fsl,imx51-gpio", "fsl,imx31-gpio"; | ||
18 | reg = <0x73f84000 0x4000>; | ||
19 | interrupts = <50 51>; | ||
20 | gpio-controller; | ||
21 | #gpio-cells = <2>; | ||
22 | }; | ||
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt index edaa84d288a1..4e16ba4feab0 100644 --- a/Documentation/devicetree/bindings/gpio/gpio.txt +++ b/Documentation/devicetree/bindings/gpio/gpio.txt | |||
@@ -4,17 +4,45 @@ Specifying GPIO information for devices | |||
4 | 1) gpios property | 4 | 1) gpios property |
5 | ----------------- | 5 | ----------------- |
6 | 6 | ||
7 | Nodes that makes use of GPIOs should define them using `gpios' property, | 7 | Nodes that makes use of GPIOs should specify them using one or more |
8 | format of which is: <&gpio-controller1-phandle gpio1-specifier | 8 | properties, each containing a 'gpio-list': |
9 | &gpio-controller2-phandle gpio2-specifier | ||
10 | 0 /* holes are permitted, means no GPIO 3 */ | ||
11 | &gpio-controller4-phandle gpio4-specifier | ||
12 | ...>; | ||
13 | 9 | ||
14 | Note that gpio-specifier length is controller dependent. | 10 | gpio-list ::= <single-gpio> [gpio-list] |
11 | single-gpio ::= <gpio-phandle> <gpio-specifier> | ||
12 | gpio-phandle : phandle to gpio controller node | ||
13 | gpio-specifier : Array of #gpio-cells specifying specific gpio | ||
14 | (controller specific) | ||
15 | |||
16 | GPIO properties should be named "[<name>-]gpios". Exact | ||
17 | meaning of each gpios property must be documented in the device tree | ||
18 | binding for each device. | ||
19 | |||
20 | For example, the following could be used to describe gpios pins to use | ||
21 | as chip select lines; with chip selects 0, 1 and 3 populated, and chip | ||
22 | select 2 left empty: | ||
23 | |||
24 | gpio1: gpio1 { | ||
25 | gpio-controller | ||
26 | #gpio-cells = <2>; | ||
27 | }; | ||
28 | gpio2: gpio2 { | ||
29 | gpio-controller | ||
30 | #gpio-cells = <1>; | ||
31 | }; | ||
32 | [...] | ||
33 | chipsel-gpios = <&gpio1 12 0>, | ||
34 | <&gpio1 13 0>, | ||
35 | <0>, /* holes are permitted, means no GPIO 2 */ | ||
36 | <&gpio2 2>; | ||
37 | |||
38 | Note that gpio-specifier length is controller dependent. In the | ||
39 | above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2 | ||
40 | only uses one. | ||
15 | 41 | ||
16 | gpio-specifier may encode: bank, pin position inside the bank, | 42 | gpio-specifier may encode: bank, pin position inside the bank, |
17 | whether pin is open-drain and whether pin is logically inverted. | 43 | whether pin is open-drain and whether pin is logically inverted. |
44 | Exact meaning of each specifier cell is controller specific, and must | ||
45 | be documented in the device tree binding for the device. | ||
18 | 46 | ||
19 | Example of the node using GPIOs: | 47 | Example of the node using GPIOs: |
20 | 48 | ||
@@ -28,8 +56,8 @@ and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller. | |||
28 | 2) gpio-controller nodes | 56 | 2) gpio-controller nodes |
29 | ------------------------ | 57 | ------------------------ |
30 | 58 | ||
31 | Every GPIO controller node must have #gpio-cells property defined, | 59 | Every GPIO controller node must both an empty "gpio-controller" |
32 | this information will be used to translate gpio-specifiers. | 60 | property, and have #gpio-cells contain the size of the gpio-specifier. |
33 | 61 | ||
34 | Example of two SOC GPIO banks defined as gpio-controller nodes: | 62 | Example of two SOC GPIO banks defined as gpio-controller nodes: |
35 | 63 | ||
diff --git a/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt new file mode 100644 index 000000000000..eb4b530d64e1 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | NVIDIA Tegra 2 GPIO controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : "nvidia,tegra20-gpio" | ||
5 | - #gpio-cells : Should be two. The first cell is the pin number and the | ||
6 | second cell is used to specify optional parameters: | ||
7 | - bit 0 specifies polarity (0 for normal, 1 for inverted) | ||
8 | - gpio-controller : Marks the device node as a GPIO controller. | ||
diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt index 1a729f089866..1a729f089866 100755..100644 --- a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt +++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt | |||
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt new file mode 100644 index 000000000000..9841057d112b --- /dev/null +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | * Freescale (Enhanced) Configurable Serial Peripheral Interface | ||
2 | (CSPI/eCSPI) for i.MX | ||
3 | |||
4 | Required properties: | ||
5 | - compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi" | ||
6 | - reg : Offset and length of the register set for the device | ||
7 | - interrupts : Should contain CSPI/eCSPI interrupt | ||
8 | - fsl,spi-num-chipselects : Contains the number of the chipselect | ||
9 | - cs-gpios : Specifies the gpio pins to be used for chipselects. | ||
10 | |||
11 | Example: | ||
12 | |||
13 | ecspi@70010000 { | ||
14 | #address-cells = <1>; | ||
15 | #size-cells = <0>; | ||
16 | compatible = "fsl,imx51-ecspi"; | ||
17 | reg = <0x70010000 0x4000>; | ||
18 | interrupts = <36>; | ||
19 | fsl,spi-num-chipselects = <2>; | ||
20 | cs-gpios = <&gpio3 24 0>, /* GPIO4_24 */ | ||
21 | <&gpio3 25 0>; /* GPIO4_25 */ | ||
22 | }; | ||
diff --git a/Documentation/devicetree/bindings/spi/spi_nvidia.txt b/Documentation/devicetree/bindings/spi/spi_nvidia.txt new file mode 100644 index 000000000000..6b9e51896693 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi_nvidia.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | NVIDIA Tegra 2 SPI device | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should be "nvidia,tegra20-spi". | ||
5 | - gpios : should specify GPIOs used for chipselect. | ||
diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt new file mode 100644 index 000000000000..b8b27b0aca10 --- /dev/null +++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt | |||
@@ -0,0 +1,36 @@ | |||
1 | * UART (Universal Asynchronous Receiver/Transmitter) | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : one of: | ||
5 | - "ns8250" | ||
6 | - "ns16450" | ||
7 | - "ns16550a" | ||
8 | - "ns16550" | ||
9 | - "ns16750" | ||
10 | - "ns16850" | ||
11 | - "nvidia,tegra20-uart" | ||
12 | - "ibm,qpace-nwp-serial" | ||
13 | - "serial" if the port type is unknown. | ||
14 | - reg : offset and length of the register set for the device. | ||
15 | - interrupts : should contain uart interrupt. | ||
16 | - clock-frequency : the input clock frequency for the UART. | ||
17 | |||
18 | Optional properties: | ||
19 | - current-speed : the current active speed of the UART. | ||
20 | - reg-offset : offset to apply to the mapbase from the start of the registers. | ||
21 | - reg-shift : quantity to shift the register offsets by. | ||
22 | - reg-io-width : the size (in bytes) of the IO accesses that should be | ||
23 | performed on the device. There are some systems that require 32-bit | ||
24 | accesses to the UART (e.g. TI davinci). | ||
25 | - used-by-rtas : set to indicate that the port is in use by the OpenFirmware | ||
26 | RTAS and should not be registered. | ||
27 | |||
28 | Example: | ||
29 | |||
30 | uart@80230000 { | ||
31 | compatible = "ns8250"; | ||
32 | reg = <0x80230000 0x100>; | ||
33 | clock-frequency = <3686400>; | ||
34 | interrupts = <10>; | ||
35 | reg-shift = <2>; | ||
36 | }; | ||
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d43900c9ede2..aca4f8235969 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -184,7 +184,7 @@ Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's | |||
184 | 184 | ||
185 | A much more powerful interface, /proc/<pid>/oom_score_adj, was | 185 | A much more powerful interface, /proc/<pid>/oom_score_adj, was |
186 | introduced with the oom killer rewrite that allows users to increase or | 186 | introduced with the oom killer rewrite that allows users to increase or |
187 | decrease the badness() score linearly. This interface will replace | 187 | decrease the badness score linearly. This interface will replace |
188 | /proc/<pid>/oom_adj. | 188 | /proc/<pid>/oom_adj. |
189 | 189 | ||
190 | A warning will be emitted to the kernel log if an application uses this | 190 | A warning will be emitted to the kernel log if an application uses this |
@@ -199,7 +199,7 @@ Files: drivers/staging/cs5535_gpio/* | |||
199 | Check: drivers/staging/cs5535_gpio/cs5535_gpio.c | 199 | Check: drivers/staging/cs5535_gpio/cs5535_gpio.c |
200 | Why: A newer driver replaces this; it is drivers/gpio/cs5535-gpio.c, and | 200 | Why: A newer driver replaces this; it is drivers/gpio/cs5535-gpio.c, and |
201 | integrates with the Linux GPIO subsystem. The old driver has been | 201 | integrates with the Linux GPIO subsystem. The old driver has been |
202 | moved to staging, and will be removed altogether around 2.6.40. | 202 | moved to staging, and will be removed altogether around 3.0. |
203 | Please test the new driver, and ensure that the functionality you | 203 | Please test the new driver, and ensure that the functionality you |
204 | need and any bugfixes from the old driver are available in the new | 204 | need and any bugfixes from the old driver are available in the new |
205 | one. | 205 | one. |
@@ -294,7 +294,7 @@ When: The schedule was July 2008, but it was decided that we are going to keep t | |||
294 | Why: The support code for the old firmware hurts code readability/maintainability | 294 | Why: The support code for the old firmware hurts code readability/maintainability |
295 | and slightly hurts runtime performance. Bugfixes for the old firmware | 295 | and slightly hurts runtime performance. Bugfixes for the old firmware |
296 | are not provided by Broadcom anymore. | 296 | are not provided by Broadcom anymore. |
297 | Who: Michael Buesch <mb@bu3sch.de> | 297 | Who: Michael Buesch <m@bues.ch> |
298 | 298 | ||
299 | --------------------------- | 299 | --------------------------- |
300 | 300 | ||
@@ -430,7 +430,7 @@ Who: Avi Kivity <avi@redhat.com> | |||
430 | ---------------------------- | 430 | ---------------------------- |
431 | 431 | ||
432 | What: iwlwifi 50XX module parameters | 432 | What: iwlwifi 50XX module parameters |
433 | When: 2.6.40 | 433 | When: 3.0 |
434 | Why: The "..50" modules parameters were used to configure 5000 series and | 434 | Why: The "..50" modules parameters were used to configure 5000 series and |
435 | up devices; different set of module parameters also available for 4965 | 435 | up devices; different set of module parameters also available for 4965 |
436 | with same functionalities. Consolidate both set into single place | 436 | with same functionalities. Consolidate both set into single place |
@@ -441,7 +441,7 @@ Who: Wey-Yi Guy <wey-yi.w.guy@intel.com> | |||
441 | ---------------------------- | 441 | ---------------------------- |
442 | 442 | ||
443 | What: iwl4965 alias support | 443 | What: iwl4965 alias support |
444 | When: 2.6.40 | 444 | When: 3.0 |
445 | Why: Internal alias support has been present in module-init-tools for some | 445 | Why: Internal alias support has been present in module-init-tools for some |
446 | time, the MODULE_ALIAS("iwl4965") boilerplate aliases can be removed | 446 | time, the MODULE_ALIAS("iwl4965") boilerplate aliases can be removed |
447 | with no impact. | 447 | with no impact. |
@@ -482,7 +482,7 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> | |||
482 | ---------------------------- | 482 | ---------------------------- |
483 | 483 | ||
484 | What: iwlwifi disable_hw_scan module parameters | 484 | What: iwlwifi disable_hw_scan module parameters |
485 | When: 2.6.40 | 485 | When: 3.0 |
486 | Why: Hareware scan is the prefer method for iwlwifi devices for | 486 | Why: Hareware scan is the prefer method for iwlwifi devices for |
487 | scanning operation. Remove software scan support for all the | 487 | scanning operation. Remove software scan support for all the |
488 | iwlwifi devices. | 488 | iwlwifi devices. |
@@ -491,16 +491,6 @@ Who: Wey-Yi Guy <wey-yi.w.guy@intel.com> | |||
491 | 491 | ||
492 | ---------------------------- | 492 | ---------------------------- |
493 | 493 | ||
494 | What: cancel_rearming_delayed_work[queue]() | ||
495 | When: 2.6.39 | ||
496 | |||
497 | Why: The functions have been superceded by cancel_delayed_work_sync() | ||
498 | quite some time ago. The conversion is trivial and there is no | ||
499 | in-kernel user left. | ||
500 | Who: Tejun Heo <tj@kernel.org> | ||
501 | |||
502 | ---------------------------- | ||
503 | |||
504 | What: Legacy, non-standard chassis intrusion detection interface. | 494 | What: Legacy, non-standard chassis intrusion detection interface. |
505 | When: June 2011 | 495 | When: June 2011 |
506 | Why: The adm9240, w83792d and w83793 hardware monitoring drivers have | 496 | Why: The adm9240, w83792d and w83793 hardware monitoring drivers have |
@@ -518,22 +508,6 @@ Files: net/netfilter/xt_connlimit.c | |||
518 | 508 | ||
519 | ---------------------------- | 509 | ---------------------------- |
520 | 510 | ||
521 | What: noswapaccount kernel command line parameter | ||
522 | When: 2.6.40 | ||
523 | Why: The original implementation of memsw feature enabled by | ||
524 | CONFIG_CGROUP_MEM_RES_CTLR_SWAP could be disabled by the noswapaccount | ||
525 | kernel parameter (introduced in 2.6.29-rc1). Later on, this decision | ||
526 | turned out to be not ideal because we cannot have the feature compiled | ||
527 | in and disabled by default and let only interested to enable it | ||
528 | (e.g. general distribution kernels might need it). Therefore we have | ||
529 | added swapaccount[=0|1] parameter (introduced in 2.6.37) which provides | ||
530 | the both possibilities. If we remove noswapaccount we will have | ||
531 | less command line parameters with the same functionality and we | ||
532 | can also cleanup the parameter handling a bit (). | ||
533 | Who: Michal Hocko <mhocko@suse.cz> | ||
534 | |||
535 | ---------------------------- | ||
536 | |||
537 | What: ipt_addrtype match include file | 511 | What: ipt_addrtype match include file |
538 | When: 2012 | 512 | When: 2012 |
539 | Why: superseded by xt_addrtype | 513 | Why: superseded by xt_addrtype |
@@ -552,7 +526,7 @@ Who: Jean Delvare <khali@linux-fr.org> | |||
552 | ---------------------------- | 526 | ---------------------------- |
553 | 527 | ||
554 | What: Support for UVCIOC_CTRL_ADD in the uvcvideo driver | 528 | What: Support for UVCIOC_CTRL_ADD in the uvcvideo driver |
555 | When: 2.6.42 | 529 | When: 3.2 |
556 | Why: The information passed to the driver by this ioctl is now queried | 530 | Why: The information passed to the driver by this ioctl is now queried |
557 | dynamically from the device. | 531 | dynamically from the device. |
558 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> | 532 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> |
@@ -560,7 +534,7 @@ Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> | |||
560 | ---------------------------- | 534 | ---------------------------- |
561 | 535 | ||
562 | What: Support for UVCIOC_CTRL_MAP_OLD in the uvcvideo driver | 536 | What: Support for UVCIOC_CTRL_MAP_OLD in the uvcvideo driver |
563 | When: 2.6.42 | 537 | When: 3.2 |
564 | Why: Used only by applications compiled against older driver versions. | 538 | Why: Used only by applications compiled against older driver versions. |
565 | Superseded by UVCIOC_CTRL_MAP which supports V4L2 menu controls. | 539 | Superseded by UVCIOC_CTRL_MAP which supports V4L2 menu controls. |
566 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> | 540 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> |
@@ -568,7 +542,7 @@ Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> | |||
568 | ---------------------------- | 542 | ---------------------------- |
569 | 543 | ||
570 | What: Support for UVCIOC_CTRL_GET and UVCIOC_CTRL_SET in the uvcvideo driver | 544 | What: Support for UVCIOC_CTRL_GET and UVCIOC_CTRL_SET in the uvcvideo driver |
571 | When: 2.6.42 | 545 | When: 3.2 |
572 | Why: Superseded by the UVCIOC_CTRL_QUERY ioctl. | 546 | Why: Superseded by the UVCIOC_CTRL_QUERY ioctl. |
573 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> | 547 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> |
574 | 548 | ||
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 82e8e52e8790..653380793a6c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -52,7 +52,7 @@ ata *); | |||
52 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 52 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
53 | void (*truncate) (struct inode *); | 53 | void (*truncate) (struct inode *); |
54 | int (*permission) (struct inode *, int, unsigned int); | 54 | int (*permission) (struct inode *, int, unsigned int); |
55 | int (*check_acl)(struct inode *, int, unsigned int); | 55 | int (*get_acl)(struct inode *, int); |
56 | int (*setattr) (struct dentry *, struct iattr *); | 56 | int (*setattr) (struct dentry *, struct iattr *); |
57 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); | 57 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); |
58 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 58 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
@@ -80,7 +80,7 @@ put_link: no | |||
80 | truncate: yes (see below) | 80 | truncate: yes (see below) |
81 | setattr: yes | 81 | setattr: yes |
82 | permission: no (may not block if called in rcu-walk mode) | 82 | permission: no (may not block if called in rcu-walk mode) |
83 | check_acl: no | 83 | get_acl: no |
84 | getattr: no | 84 | getattr: no |
85 | setxattr: yes | 85 | setxattr: yes |
86 | getxattr: no | 86 | getxattr: no |
@@ -412,7 +412,7 @@ prototypes: | |||
412 | int (*open) (struct inode *, struct file *); | 412 | int (*open) (struct inode *, struct file *); |
413 | int (*flush) (struct file *); | 413 | int (*flush) (struct file *); |
414 | int (*release) (struct inode *, struct file *); | 414 | int (*release) (struct inode *, struct file *); |
415 | int (*fsync) (struct file *, int datasync); | 415 | int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); |
416 | int (*aio_fsync) (struct kiocb *, int datasync); | 416 | int (*aio_fsync) (struct kiocb *, int datasync); |
417 | int (*fasync) (int, struct file *, int); | 417 | int (*fasync) (int, struct file *, int); |
418 | int (*lock) (struct file *, int, struct file_lock *); | 418 | int (*lock) (struct file *, int, struct file_lock *); |
@@ -438,9 +438,7 @@ prototypes: | |||
438 | 438 | ||
439 | locking rules: | 439 | locking rules: |
440 | All may block except for ->setlease. | 440 | All may block except for ->setlease. |
441 | No VFS locks held on entry except for ->fsync and ->setlease. | 441 | No VFS locks held on entry except for ->setlease. |
442 | |||
443 | ->fsync() has i_mutex on inode. | ||
444 | 442 | ||
445 | ->setlease has the file_list_lock held and must not sleep. | 443 | ->setlease has the file_list_lock held and must not sleep. |
446 | 444 | ||
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 90c71c6f0d00..ffdd9d866ad7 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt | |||
@@ -226,7 +226,7 @@ They depend on various facilities being available: | |||
226 | cdrecord. | 226 | cdrecord. |
227 | 227 | ||
228 | e.g. | 228 | e.g. |
229 | cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso | 229 | cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso |
230 | 230 | ||
231 | For more information on isolinux, including how to create bootdisks | 231 | For more information on isolinux, including how to create bootdisks |
232 | for prebuilt kernels, see http://syslinux.zytor.com/ | 232 | for prebuilt kernels, see http://syslinux.zytor.com/ |
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index d5c0cef38a71..873a2ab2e9f8 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt | |||
@@ -40,7 +40,6 @@ Features which NILFS2 does not support yet: | |||
40 | - POSIX ACLs | 40 | - POSIX ACLs |
41 | - quotas | 41 | - quotas |
42 | - fsck | 42 | - fsck |
43 | - resize | ||
44 | - defragmentation | 43 | - defragmentation |
45 | 44 | ||
46 | Mount options | 45 | Mount options |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 6e29954851a2..b4a3d765ff9a 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -400,10 +400,32 @@ a file off. | |||
400 | 400 | ||
401 | -- | 401 | -- |
402 | [mandatory] | 402 | [mandatory] |
403 | |||
404 | -- | ||
405 | [mandatory] | ||
406 | ->get_sb() is gone. Switch to use of ->mount(). Typically it's just | 403 | ->get_sb() is gone. Switch to use of ->mount(). Typically it's just |
407 | a matter of switching from calling get_sb_... to mount_... and changing the | 404 | a matter of switching from calling get_sb_... to mount_... and changing the |
408 | function type. If you were doing it manually, just switch from setting ->mnt_root | 405 | function type. If you were doing it manually, just switch from setting ->mnt_root |
409 | to some pointer to returning that pointer. On errors return ERR_PTR(...). | 406 | to some pointer to returning that pointer. On errors return ERR_PTR(...). |
407 | |||
408 | -- | ||
409 | [mandatory] | ||
410 | ->permission() and generic_permission()have lost flags | ||
411 | argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask. | ||
412 | generic_permission() has also lost the check_acl argument; ACL checking | ||
413 | has been taken to VFS and filesystems need to provide a non-NULL ->i_op->get_acl | ||
414 | to read an ACL from disk. | ||
415 | |||
416 | -- | ||
417 | [mandatory] | ||
418 | If you implement your own ->llseek() you must handle SEEK_HOLE and | ||
419 | SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to | ||
420 | support it in some way. The generic handler assumes that the entire file is | ||
421 | data and there is a virtual hole at the end of the file. So if the provided | ||
422 | offset is less than i_size and SEEK_DATA is specified, return the same offset. | ||
423 | If the above is true for the offset and you are given SEEK_HOLE, return the end | ||
424 | of the file. If the offset is i_size or greater return -ENXIO in either case. | ||
425 | |||
426 | [mandatory] | ||
427 | If you have your own ->fsync() you must make sure to call | ||
428 | filemap_write_and_wait_range() so that all dirty pages are synced out properly. | ||
429 | You must also keep in mind that ->fsync() is not called with i_mutex held | ||
430 | anymore, so if you require i_mutex locking you must make sure to take it and | ||
431 | release it yourself. | ||
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index 8e4fab639d9c..a0a61d2f389f 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt | |||
@@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0 | |||
111 | to UBI and mount volume "rootfs": | 111 | to UBI and mount volume "rootfs": |
112 | ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs | 112 | ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs |
113 | 113 | ||
114 | |||
115 | Module Parameters for Debugging | ||
116 | =============================== | ||
117 | |||
118 | When UBIFS has been compiled with debugging enabled, there are 2 module | ||
119 | parameters that are available to control aspects of testing and debugging. | ||
120 | |||
121 | debug_chks Selects extra checks that UBIFS can do while running: | ||
122 | |||
123 | Check Flag value | ||
124 | |||
125 | General checks 1 | ||
126 | Check Tree Node Cache (TNC) 2 | ||
127 | Check indexing tree size 4 | ||
128 | Check orphan area 8 | ||
129 | Check old indexing tree 16 | ||
130 | Check LEB properties (lprops) 32 | ||
131 | Check leaf nodes and inodes 64 | ||
132 | |||
133 | debug_tsts Selects a mode of testing, as follows: | ||
134 | |||
135 | Test mode Flag value | ||
136 | |||
137 | Failure mode for recovery testing 4 | ||
138 | |||
139 | For example, set debug_chks to 3 to enable general and TNC checks. | ||
140 | |||
141 | |||
142 | References | 114 | References |
143 | ========== | 115 | ========== |
144 | 116 | ||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 88b9f5519af9..52d8fb81cfff 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -229,6 +229,8 @@ struct super_operations { | |||
229 | 229 | ||
230 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 230 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
231 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 231 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
232 | int (*nr_cached_objects)(struct super_block *); | ||
233 | void (*free_cached_objects)(struct super_block *, int); | ||
232 | }; | 234 | }; |
233 | 235 | ||
234 | All methods are called without any locks being held, unless otherwise | 236 | All methods are called without any locks being held, unless otherwise |
@@ -301,6 +303,26 @@ or bottom half). | |||
301 | 303 | ||
302 | quota_write: called by the VFS to write to filesystem quota file. | 304 | quota_write: called by the VFS to write to filesystem quota file. |
303 | 305 | ||
306 | nr_cached_objects: called by the sb cache shrinking function for the | ||
307 | filesystem to return the number of freeable cached objects it contains. | ||
308 | Optional. | ||
309 | |||
310 | free_cache_objects: called by the sb cache shrinking function for the | ||
311 | filesystem to scan the number of objects indicated to try to free them. | ||
312 | Optional, but any filesystem implementing this method needs to also | ||
313 | implement ->nr_cached_objects for it to be called correctly. | ||
314 | |||
315 | We can't do anything with any errors that the filesystem might | ||
316 | encountered, hence the void return type. This will never be called if | ||
317 | the VM is trying to reclaim under GFP_NOFS conditions, hence this | ||
318 | method does not need to handle that situation itself. | ||
319 | |||
320 | Implementations must include conditional reschedule calls inside any | ||
321 | scanning loop that is done. This allows the VFS to determine | ||
322 | appropriate scan batch sizes without having to worry about whether | ||
323 | implementations will cause holdoff problems due to large scan batch | ||
324 | sizes. | ||
325 | |||
304 | Whoever sets up the inode is responsible for filling in the "i_op" field. This | 326 | Whoever sets up the inode is responsible for filling in the "i_op" field. This |
305 | is a pointer to a "struct inode_operations" which describes the methods that | 327 | is a pointer to a "struct inode_operations" which describes the methods that |
306 | can be performed on individual inodes. | 328 | can be performed on individual inodes. |
@@ -333,8 +355,8 @@ struct inode_operations { | |||
333 | void * (*follow_link) (struct dentry *, struct nameidata *); | 355 | void * (*follow_link) (struct dentry *, struct nameidata *); |
334 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 356 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
335 | void (*truncate) (struct inode *); | 357 | void (*truncate) (struct inode *); |
336 | int (*permission) (struct inode *, int, unsigned int); | 358 | int (*permission) (struct inode *, int); |
337 | int (*check_acl)(struct inode *, int, unsigned int); | 359 | int (*get_acl)(struct inode *, int); |
338 | int (*setattr) (struct dentry *, struct iattr *); | 360 | int (*setattr) (struct dentry *, struct iattr *); |
339 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); | 361 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); |
340 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 362 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
@@ -423,7 +445,7 @@ otherwise noted. | |||
423 | permission: called by the VFS to check for access rights on a POSIX-like | 445 | permission: called by the VFS to check for access rights on a POSIX-like |
424 | filesystem. | 446 | filesystem. |
425 | 447 | ||
426 | May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk | 448 | May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk |
427 | mode, the filesystem must check the permission without blocking or | 449 | mode, the filesystem must check the permission without blocking or |
428 | storing to the inode. | 450 | storing to the inode. |
429 | 451 | ||
@@ -755,7 +777,7 @@ struct file_operations { | |||
755 | int (*open) (struct inode *, struct file *); | 777 | int (*open) (struct inode *, struct file *); |
756 | int (*flush) (struct file *); | 778 | int (*flush) (struct file *); |
757 | int (*release) (struct inode *, struct file *); | 779 | int (*release) (struct inode *, struct file *); |
758 | int (*fsync) (struct file *, int datasync); | 780 | int (*fsync) (struct file *, loff_t, loff_t, int datasync); |
759 | int (*aio_fsync) (struct kiocb *, int datasync); | 781 | int (*aio_fsync) (struct kiocb *, int datasync); |
760 | int (*fasync) (int, struct file *, int); | 782 | int (*fasync) (int, struct file *, int); |
761 | int (*lock) (struct file *, int, struct file_lock *); | 783 | int (*lock) (struct file *, int, struct file_lock *); |
diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87 index 38425f0f2645..6f496a586732 100644 --- a/Documentation/hwmon/it87 +++ b/Documentation/hwmon/it87 | |||
@@ -76,7 +76,8 @@ IT8718F, IT8720F, IT8721F, IT8726F, IT8758E and SiS950 chips. | |||
76 | These chips are 'Super I/O chips', supporting floppy disks, infrared ports, | 76 | These chips are 'Super I/O chips', supporting floppy disks, infrared ports, |
77 | joysticks and other miscellaneous stuff. For hardware monitoring, they | 77 | joysticks and other miscellaneous stuff. For hardware monitoring, they |
78 | include an 'environment controller' with 3 temperature sensors, 3 fan | 78 | include an 'environment controller' with 3 temperature sensors, 3 fan |
79 | rotation speed sensors, 8 voltage sensors, and associated alarms. | 79 | rotation speed sensors, 8 voltage sensors, associated alarms, and chassis |
80 | intrusion detection. | ||
80 | 81 | ||
81 | The IT8712F and IT8716F additionally feature VID inputs, used to report | 82 | The IT8712F and IT8716F additionally feature VID inputs, used to report |
82 | the Vcore voltage of the processor. The early IT8712F have 5 VID pins, | 83 | the Vcore voltage of the processor. The early IT8712F have 5 VID pins, |
diff --git a/Documentation/hwmon/lm78 b/Documentation/hwmon/lm78 index 60932e26abaa..2bdc881a0c12 100644 --- a/Documentation/hwmon/lm78 +++ b/Documentation/hwmon/lm78 | |||
@@ -13,7 +13,8 @@ Supported chips: | |||
13 | Datasheet: Publicly available at the National Semiconductor website | 13 | Datasheet: Publicly available at the National Semiconductor website |
14 | http://www.national.com/ | 14 | http://www.national.com/ |
15 | 15 | ||
16 | Author: Frodo Looijaard <frodol@dds.nl> | 16 | Authors: Frodo Looijaard <frodol@dds.nl> |
17 | Jean Delvare <khali@linux-fr.org> | ||
17 | 18 | ||
18 | Description | 19 | Description |
19 | ----------- | 20 | ----------- |
diff --git a/Documentation/hwmon/sch5636 b/Documentation/hwmon/sch5636 new file mode 100644 index 000000000000..f83bd1c260f0 --- /dev/null +++ b/Documentation/hwmon/sch5636 | |||
@@ -0,0 +1,31 @@ | |||
1 | Kernel driver sch5636 | ||
2 | ===================== | ||
3 | |||
4 | Supported chips: | ||
5 | * SMSC SCH5636 | ||
6 | Prefix: 'sch5636' | ||
7 | Addresses scanned: none, address read from Super I/O config space | ||
8 | |||
9 | Author: Hans de Goede <hdegoede@redhat.com> | ||
10 | |||
11 | |||
12 | Description | ||
13 | ----------- | ||
14 | |||
15 | SMSC SCH5636 Super I/O chips include an embedded microcontroller for | ||
16 | hardware monitoring solutions, allowing motherboard manufacturers to create | ||
17 | their own custom hwmon solution based upon the SCH5636. | ||
18 | |||
19 | Currently the sch5636 driver only supports the Fujitsu Theseus SCH5636 based | ||
20 | hwmon solution. The sch5636 driver runs a sanity check on loading to ensure | ||
21 | it is dealing with a Fujitsu Theseus and not with another custom SCH5636 based | ||
22 | hwmon solution. | ||
23 | |||
24 | The Fujitsu Theseus can monitor up to 5 voltages, 8 fans and 16 | ||
25 | temperatures. Note that the driver detects how many fan headers / | ||
26 | temperature sensors are actually implemented on the motherboard, so you will | ||
27 | likely see fewer temperature and fan inputs. | ||
28 | |||
29 | An application note describing the Theseus' registers, as well as an | ||
30 | application note describing the protocol for communicating with the | ||
31 | microcontroller is available upon request. Please mail me if you want a copy. | ||
diff --git a/Documentation/i2o/ioctl b/Documentation/i2o/ioctl index 1e77fac4e120..22ca53a67e23 100644 --- a/Documentation/i2o/ioctl +++ b/Documentation/i2o/ioctl | |||
@@ -110,7 +110,7 @@ V. Getting Logical Configuration Table | |||
110 | ENOBUFS Buffer not large enough. If this occurs, the required | 110 | ENOBUFS Buffer not large enough. If this occurs, the required |
111 | buffer length is written into *(lct->reslen) | 111 | buffer length is written into *(lct->reslen) |
112 | 112 | ||
113 | VI. Settting Parameters | 113 | VI. Setting Parameters |
114 | 114 | ||
115 | SYNOPSIS | 115 | SYNOPSIS |
116 | 116 | ||
diff --git a/Documentation/isdn/README.HiSax b/Documentation/isdn/README.HiSax index 99e87a61897d..b1a573cf4472 100644 --- a/Documentation/isdn/README.HiSax +++ b/Documentation/isdn/README.HiSax | |||
@@ -506,7 +506,7 @@ to e.g. the Internet: | |||
506 | <ISDN subsystem - ISDN support -- HiSax> | 506 | <ISDN subsystem - ISDN support -- HiSax> |
507 | make clean; make zImage; make modules; make modules_install | 507 | make clean; make zImage; make modules; make modules_install |
508 | 2. Install the new kernel | 508 | 2. Install the new kernel |
509 | cp /usr/src/linux/arch/i386/boot/zImage /etc/kernel/linux.isdn | 509 | cp /usr/src/linux/arch/x86/boot/zImage /etc/kernel/linux.isdn |
510 | vi /etc/lilo.conf | 510 | vi /etc/lilo.conf |
511 | <add new kernel in the bootable image section> | 511 | <add new kernel in the bootable image section> |
512 | lilo | 512 | lilo |
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist index 2df4576f1173..cb5507b1ac81 100644 --- a/Documentation/ja_JP/SubmitChecklist +++ b/Documentation/ja_JP/SubmitChecklist | |||
@@ -68,7 +68,7 @@ Linux 銈兗銉嶃儷銉戙儍銉佹姇绋胯呭悜銇戙儊銈с儍銈儶銈广儓 | |||
68 | 68 | ||
69 | 12: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB, | 69 | 12: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB, |
70 | CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK, | 70 | CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK, |
71 | CONFIG_DEBUG_SPINLOCK_SLEEP 銇撱倢銈夊叏銇︺倰鍚屾檪銇湁鍔广伀銇椼仸鍕曚綔纰鸿獚銈 | 71 | CONFIG_DEBUG_ATOMIC_SLEEP 銇撱倢銈夊叏銇︺倰鍚屾檪銇湁鍔广伀銇椼仸鍕曚綔纰鸿獚銈 |
72 | 琛屻仯銇︺亸銇犮仌銇勩 | 72 | 琛屻仯銇︺亸銇犮仌銇勩 |
73 | 73 | ||
74 | 13: CONFIG_SMP, CONFIG_PREEMPT 銈掓湁鍔广伀銇椼仧鍫村悎銇ㄧ劇鍔广伀銇椼仧鍫村悎銇浮鏂广仹 | 74 | 13: CONFIG_SMP, CONFIG_PREEMPT 銈掓湁鍔广伀銇椼仧鍫村悎銇ㄧ劇鍔广伀銇椼仧鍫村悎銇浮鏂广仹 |
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 47435e56c5da..f47cdefb4d1e 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt | |||
@@ -441,7 +441,7 @@ more details, with real examples. | |||
441 | specified if first option are not supported. | 441 | specified if first option are not supported. |
442 | 442 | ||
443 | Example: | 443 | Example: |
444 | #arch/i386/kernel/Makefile | 444 | #arch/x86/kernel/Makefile |
445 | vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) | 445 | vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) |
446 | 446 | ||
447 | In the above example, vsyscall-flags will be assigned the option | 447 | In the above example, vsyscall-flags will be assigned the option |
@@ -460,7 +460,7 @@ more details, with real examples. | |||
460 | supported to use an optional second option. | 460 | supported to use an optional second option. |
461 | 461 | ||
462 | Example: | 462 | Example: |
463 | #arch/i386/Makefile | 463 | #arch/x86/Makefile |
464 | cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586) | 464 | cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586) |
465 | 465 | ||
466 | In the above example, cflags-y will be assigned the option | 466 | In the above example, cflags-y will be assigned the option |
@@ -522,7 +522,7 @@ more details, with real examples. | |||
522 | even though the option was accepted by gcc. | 522 | even though the option was accepted by gcc. |
523 | 523 | ||
524 | Example: | 524 | Example: |
525 | #arch/i386/Makefile | 525 | #arch/x86/Makefile |
526 | cflags-y += $(shell \ | 526 | cflags-y += $(shell \ |
527 | if [ $(call cc-version) -ge 0300 ] ; then \ | 527 | if [ $(call cc-version) -ge 0300 ] ; then \ |
528 | echo "-mregparm=3"; fi ;) | 528 | echo "-mregparm=3"; fi ;) |
@@ -802,7 +802,7 @@ but in the architecture makefiles where the kbuild infrastructure | |||
802 | is not sufficient this sometimes needs to be explicit. | 802 | is not sufficient this sometimes needs to be explicit. |
803 | 803 | ||
804 | Example: | 804 | Example: |
805 | #arch/i386/boot/Makefile | 805 | #arch/x86/boot/Makefile |
806 | subdir- := compressed/ | 806 | subdir- := compressed/ |
807 | 807 | ||
808 | The above assignment instructs kbuild to descend down in the | 808 | The above assignment instructs kbuild to descend down in the |
@@ -812,12 +812,12 @@ To support the clean infrastructure in the Makefiles that builds the | |||
812 | final bootimage there is an optional target named archclean: | 812 | final bootimage there is an optional target named archclean: |
813 | 813 | ||
814 | Example: | 814 | Example: |
815 | #arch/i386/Makefile | 815 | #arch/x86/Makefile |
816 | archclean: | 816 | archclean: |
817 | $(Q)$(MAKE) $(clean)=arch/i386/boot | 817 | $(Q)$(MAKE) $(clean)=arch/x86/boot |
818 | 818 | ||
819 | When "make clean" is executed, make will descend down in arch/i386/boot, | 819 | When "make clean" is executed, make will descend down in arch/x86/boot, |
820 | and clean as usual. The Makefile located in arch/i386/boot/ may use | 820 | and clean as usual. The Makefile located in arch/x86/boot/ may use |
821 | the subdir- trick to descend further down. | 821 | the subdir- trick to descend further down. |
822 | 822 | ||
823 | Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is | 823 | Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is |
@@ -882,7 +882,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
882 | LDFLAGS_vmlinux uses the LDFLAGS_$@ support. | 882 | LDFLAGS_vmlinux uses the LDFLAGS_$@ support. |
883 | 883 | ||
884 | Example: | 884 | Example: |
885 | #arch/i386/Makefile | 885 | #arch/x86/Makefile |
886 | LDFLAGS_vmlinux := -e stext | 886 | LDFLAGS_vmlinux := -e stext |
887 | 887 | ||
888 | OBJCOPYFLAGS objcopy flags | 888 | OBJCOPYFLAGS objcopy flags |
@@ -920,14 +920,14 @@ When kbuild executes, the following steps are followed (roughly): | |||
920 | Often, the KBUILD_CFLAGS variable depends on the configuration. | 920 | Often, the KBUILD_CFLAGS variable depends on the configuration. |
921 | 921 | ||
922 | Example: | 922 | Example: |
923 | #arch/i386/Makefile | 923 | #arch/x86/Makefile |
924 | cflags-$(CONFIG_M386) += -march=i386 | 924 | cflags-$(CONFIG_M386) += -march=i386 |
925 | KBUILD_CFLAGS += $(cflags-y) | 925 | KBUILD_CFLAGS += $(cflags-y) |
926 | 926 | ||
927 | Many arch Makefiles dynamically run the target C compiler to | 927 | Many arch Makefiles dynamically run the target C compiler to |
928 | probe supported options: | 928 | probe supported options: |
929 | 929 | ||
930 | #arch/i386/Makefile | 930 | #arch/x86/Makefile |
931 | 931 | ||
932 | ... | 932 | ... |
933 | cflags-$(CONFIG_MPENTIUMII) += $(call cc-option,\ | 933 | cflags-$(CONFIG_MPENTIUMII) += $(call cc-option,\ |
@@ -1038,8 +1038,8 @@ When kbuild executes, the following steps are followed (roughly): | |||
1038 | into the arch/$(ARCH)/boot/Makefile. | 1038 | into the arch/$(ARCH)/boot/Makefile. |
1039 | 1039 | ||
1040 | Example: | 1040 | Example: |
1041 | #arch/i386/Makefile | 1041 | #arch/x86/Makefile |
1042 | boot := arch/i386/boot | 1042 | boot := arch/x86/boot |
1043 | bzImage: vmlinux | 1043 | bzImage: vmlinux |
1044 | $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ | 1044 | $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ |
1045 | 1045 | ||
@@ -1051,7 +1051,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
1051 | To support this, $(archhelp) must be defined. | 1051 | To support this, $(archhelp) must be defined. |
1052 | 1052 | ||
1053 | Example: | 1053 | Example: |
1054 | #arch/i386/Makefile | 1054 | #arch/x86/Makefile |
1055 | define archhelp | 1055 | define archhelp |
1056 | echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)' | 1056 | echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)' |
1057 | endif | 1057 | endif |
@@ -1065,7 +1065,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
1065 | from vmlinux. | 1065 | from vmlinux. |
1066 | 1066 | ||
1067 | Example: | 1067 | Example: |
1068 | #arch/i386/Makefile | 1068 | #arch/x86/Makefile |
1069 | all: bzImage | 1069 | all: bzImage |
1070 | 1070 | ||
1071 | When "make" is executed without arguments, bzImage will be built. | 1071 | When "make" is executed without arguments, bzImage will be built. |
@@ -1083,7 +1083,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
1083 | 2) kbuild knows what files to delete during "make clean" | 1083 | 2) kbuild knows what files to delete during "make clean" |
1084 | 1084 | ||
1085 | Example: | 1085 | Example: |
1086 | #arch/i386/kernel/Makefile | 1086 | #arch/x86/kernel/Makefile |
1087 | extra-y := head.o init_task.o | 1087 | extra-y := head.o init_task.o |
1088 | 1088 | ||
1089 | In this example, extra-y is used to list object files that | 1089 | In this example, extra-y is used to list object files that |
@@ -1133,7 +1133,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
1133 | Compress target. Use maximum compression to compress target. | 1133 | Compress target. Use maximum compression to compress target. |
1134 | 1134 | ||
1135 | Example: | 1135 | Example: |
1136 | #arch/i386/boot/Makefile | 1136 | #arch/x86/boot/Makefile |
1137 | LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary | 1137 | LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary |
1138 | LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext | 1138 | LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext |
1139 | 1139 | ||
@@ -1193,7 +1193,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
1193 | 1193 | ||
1194 | When updating the $(obj)/bzImage target, the line | 1194 | When updating the $(obj)/bzImage target, the line |
1195 | 1195 | ||
1196 | BUILD arch/i386/boot/bzImage | 1196 | BUILD arch/x86/boot/bzImage |
1197 | 1197 | ||
1198 | will be displayed with "make KBUILD_VERBOSE=0". | 1198 | will be displayed with "make KBUILD_VERBOSE=0". |
1199 | 1199 | ||
@@ -1207,7 +1207,7 @@ When kbuild executes, the following steps are followed (roughly): | |||
1207 | kbuild knows .lds files and includes a rule *lds.S -> *lds. | 1207 | kbuild knows .lds files and includes a rule *lds.S -> *lds. |
1208 | 1208 | ||
1209 | Example: | 1209 | Example: |
1210 | #arch/i386/kernel/Makefile | 1210 | #arch/x86/kernel/Makefile |
1211 | always := vmlinux.lds | 1211 | always := vmlinux.lds |
1212 | 1212 | ||
1213 | #Makefile | 1213 | #Makefile |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be71df4c..40cc653984ee 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1159,10 +1159,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1159 | for all guests. | 1159 | for all guests. |
1160 | Default is 1 (enabled) if in 64bit or 32bit-PAE mode | 1160 | Default is 1 (enabled) if in 64bit or 32bit-PAE mode |
1161 | 1161 | ||
1162 | kvm-intel.bypass_guest_pf= | ||
1163 | [KVM,Intel] Disables bypassing of guest page faults | ||
1164 | on Intel chips. Default is 1 (enabled) | ||
1165 | |||
1166 | kvm-intel.ept= [KVM,Intel] Disable extended page tables | 1162 | kvm-intel.ept= [KVM,Intel] Disable extended page tables |
1167 | (virtualized MMU) support on capable Intel chips. | 1163 | (virtualized MMU) support on capable Intel chips. |
1168 | Default is 1 (enabled) | 1164 | Default is 1 (enabled) |
@@ -1737,6 +1733,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1737 | no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page | 1733 | no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page |
1738 | fault handling. | 1734 | fault handling. |
1739 | 1735 | ||
1736 | no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting. | ||
1737 | steal time is computed, but won't influence scheduler | ||
1738 | behaviour | ||
1739 | |||
1740 | nolapic [X86-32,APIC] Do not enable or use the local APIC. | 1740 | nolapic [X86-32,APIC] Do not enable or use the local APIC. |
1741 | 1741 | ||
1742 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. | 1742 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. |
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt index 4b12abcb2ad3..abf481f780ec 100644 --- a/Documentation/magic-number.txt +++ b/Documentation/magic-number.txt | |||
@@ -66,7 +66,7 @@ MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h | |||
66 | RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h | 66 | RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h |
67 | SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h | 67 | SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h |
68 | HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c | 68 | HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c |
69 | APM_BIOS_MAGIC 0x4101 apm_user arch/i386/kernel/apm.c | 69 | APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c |
70 | CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h | 70 | CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h |
71 | DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c | 71 | DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c |
72 | DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c | 72 | DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c |
diff --git a/Documentation/mca.txt b/Documentation/mca.txt index 510375d4209a..dfd130c2207d 100644 --- a/Documentation/mca.txt +++ b/Documentation/mca.txt | |||
@@ -11,7 +11,7 @@ Adapter Detection | |||
11 | 11 | ||
12 | The ideal MCA adapter detection is done through the use of the | 12 | The ideal MCA adapter detection is done through the use of the |
13 | Programmable Option Select registers. Generic functions for doing | 13 | Programmable Option Select registers. Generic functions for doing |
14 | this have been added in include/linux/mca.h and arch/i386/kernel/mca.c. | 14 | this have been added in include/linux/mca.h and arch/x86/kernel/mca_32.c. |
15 | Everything needed to detect adapters and read (and write) configuration | 15 | Everything needed to detect adapters and read (and write) configuration |
16 | information is there. A number of MCA-specific drivers already use | 16 | information is there. A number of MCA-specific drivers already use |
17 | this. The typical probe code looks like the following: | 17 | this. The typical probe code looks like the following: |
@@ -81,7 +81,7 @@ more people use shared IRQs on PCI machines. | |||
81 | In general, an interrupt must be acknowledged not only at the ICU (which | 81 | In general, an interrupt must be acknowledged not only at the ICU (which |
82 | is done automagically by the kernel), but at the device level. In | 82 | is done automagically by the kernel), but at the device level. In |
83 | particular, IRQ 0 must be reset after a timer interrupt (now done in | 83 | particular, IRQ 0 must be reset after a timer interrupt (now done in |
84 | arch/i386/kernel/time.c) or the first timer interrupt hangs the system. | 84 | arch/x86/kernel/time.c) or the first timer interrupt hangs the system. |
85 | There were also problems with the 1.3.x floppy drivers, but that seems | 85 | There were also problems with the 1.3.x floppy drivers, but that seems |
86 | to have been fixed. | 86 | to have been fixed. |
87 | 87 | ||
diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX index 93dd7a714075..a9ba6720ffdf 100644 --- a/Documentation/mmc/00-INDEX +++ b/Documentation/mmc/00-INDEX | |||
@@ -4,3 +4,5 @@ mmc-dev-attrs.txt | |||
4 | - info on SD and MMC device attributes | 4 | - info on SD and MMC device attributes |
5 | mmc-dev-parts.txt | 5 | mmc-dev-parts.txt |
6 | - info on SD and MMC device partitions | 6 | - info on SD and MMC device partitions |
7 | mmc-async-req.txt | ||
8 | - info on mmc asynchronous requests | ||
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt new file mode 100644 index 000000000000..ae1907b10e4a --- /dev/null +++ b/Documentation/mmc/mmc-async-req.txt | |||
@@ -0,0 +1,87 @@ | |||
1 | Rationale | ||
2 | ========= | ||
3 | |||
4 | How significant is the cache maintenance overhead? | ||
5 | It depends. Fast eMMC and multiple cache levels with speculative cache | ||
6 | pre-fetch makes the cache overhead relatively significant. If the DMA | ||
7 | preparations for the next request are done in parallel with the current | ||
8 | transfer, the DMA preparation overhead would not affect the MMC performance. | ||
9 | The intention of non-blocking (asynchronous) MMC requests is to minimize the | ||
10 | time between when an MMC request ends and another MMC request begins. | ||
11 | Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and | ||
12 | dma_unmap_sg are processing. Using non-blocking MMC requests makes it | ||
13 | possible to prepare the caches for next job in parallel with an active | ||
14 | MMC request. | ||
15 | |||
16 | MMC block driver | ||
17 | ================ | ||
18 | |||
19 | The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking. | ||
20 | The increase in throughput is proportional to the time it takes to | ||
21 | prepare (major part of preparations are dma_map_sg() and dma_unmap_sg()) | ||
22 | a request and how fast the memory is. The faster the MMC/SD is the | ||
23 | more significant the prepare request time becomes. Roughly the expected | ||
24 | performance gain is 5% for large writes and 10% on large reads on a L2 cache | ||
25 | platform. In power save mode, when clocks run on a lower frequency, the DMA | ||
26 | preparation may cost even more. As long as these slower preparations are run | ||
27 | in parallel with the transfer performance won't be affected. | ||
28 | |||
29 | Details on measurements from IOZone and mmc_test | ||
30 | ================================================ | ||
31 | |||
32 | https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req | ||
33 | |||
34 | MMC core API extension | ||
35 | ====================== | ||
36 | |||
37 | There is one new public function mmc_start_req(). | ||
38 | It starts a new MMC command request for a host. The function isn't | ||
39 | truly non-blocking. If there is an ongoing async request it waits | ||
40 | for completion of that request and starts the new one and returns. It | ||
41 | doesn't wait for the new request to complete. If there is no ongoing | ||
42 | request it starts the new request and returns immediately. | ||
43 | |||
44 | MMC host extensions | ||
45 | =================== | ||
46 | |||
47 | There are two optional members in the mmc_host_ops -- pre_req() and | ||
48 | post_req() -- that the host driver may implement in order to move work | ||
49 | to before and after the actual mmc_host_ops.request() function is called. | ||
50 | In the DMA case pre_req() may do dma_map_sg() and prepare the DMA | ||
51 | descriptor, and post_req() runs the dma_unmap_sg(). | ||
52 | |||
53 | Optimize for the first request | ||
54 | ============================== | ||
55 | |||
56 | The first request in a series of requests can't be prepared in parallel | ||
57 | with the previous transfer, since there is no previous request. | ||
58 | The argument is_first_req in pre_req() indicates that there is no previous | ||
59 | request. The host driver may optimize for this scenario to minimize | ||
60 | the performance loss. A way to optimize for this is to split the current | ||
61 | request in two chunks, prepare the first chunk and start the request, | ||
62 | and finally prepare the second chunk and start the transfer. | ||
63 | |||
64 | Pseudocode to handle is_first_req scenario with minimal prepare overhead: | ||
65 | |||
66 | if (is_first_req && req->size > threshold) | ||
67 | /* start MMC transfer for the complete transfer size */ | ||
68 | mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE); | ||
69 | |||
70 | /* | ||
71 | * Begin to prepare DMA while cmd is being processed by MMC. | ||
72 | * The first chunk of the request should take the same time | ||
73 | * to prepare as the "MMC process command time". | ||
74 | * If prepare time exceeds MMC cmd time | ||
75 | * the transfer is delayed, guesstimate max 4k as first chunk size. | ||
76 | */ | ||
77 | prepare_1st_chunk_for_dma(req); | ||
78 | /* flush pending desc to the DMAC (dmaengine.h) */ | ||
79 | dma_issue_pending(req->dma_desc); | ||
80 | |||
81 | prepare_2nd_chunk_for_dma(req); | ||
82 | /* | ||
83 | * The second issue_pending should be called before MMC runs out | ||
84 | * of the first chunk. If the MMC runs out of the first data chunk | ||
85 | * before this call, the transfer is delayed. | ||
86 | */ | ||
87 | dma_issue_pending(req->dma_desc); | ||
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c index 2bac9618c345..65968fbf1e49 100644 --- a/Documentation/networking/ifenslave.c +++ b/Documentation/networking/ifenslave.c | |||
@@ -260,7 +260,7 @@ int main(int argc, char *argv[]) | |||
260 | case 'V': opt_V++; exclusive++; break; | 260 | case 'V': opt_V++; exclusive++; break; |
261 | 261 | ||
262 | case '?': | 262 | case '?': |
263 | fprintf(stderr, usage_msg); | 263 | fprintf(stderr, "%s", usage_msg); |
264 | res = 2; | 264 | res = 2; |
265 | goto out; | 265 | goto out; |
266 | } | 266 | } |
@@ -268,13 +268,13 @@ int main(int argc, char *argv[]) | |||
268 | 268 | ||
269 | /* options check */ | 269 | /* options check */ |
270 | if (exclusive > 1) { | 270 | if (exclusive > 1) { |
271 | fprintf(stderr, usage_msg); | 271 | fprintf(stderr, "%s", usage_msg); |
272 | res = 2; | 272 | res = 2; |
273 | goto out; | 273 | goto out; |
274 | } | 274 | } |
275 | 275 | ||
276 | if (opt_v || opt_V) { | 276 | if (opt_v || opt_V) { |
277 | printf(version); | 277 | printf("%s", version); |
278 | if (opt_V) { | 278 | if (opt_V) { |
279 | res = 0; | 279 | res = 0; |
280 | goto out; | 280 | goto out; |
@@ -282,14 +282,14 @@ int main(int argc, char *argv[]) | |||
282 | } | 282 | } |
283 | 283 | ||
284 | if (opt_u) { | 284 | if (opt_u) { |
285 | printf(usage_msg); | 285 | printf("%s", usage_msg); |
286 | res = 0; | 286 | res = 0; |
287 | goto out; | 287 | goto out; |
288 | } | 288 | } |
289 | 289 | ||
290 | if (opt_h) { | 290 | if (opt_h) { |
291 | printf(usage_msg); | 291 | printf("%s", usage_msg); |
292 | printf(help_msg); | 292 | printf("%s", help_msg); |
293 | res = 0; | 293 | res = 0; |
294 | goto out; | 294 | goto out; |
295 | } | 295 | } |
@@ -309,7 +309,7 @@ int main(int argc, char *argv[]) | |||
309 | goto out; | 309 | goto out; |
310 | } else { | 310 | } else { |
311 | /* Just show usage */ | 311 | /* Just show usage */ |
312 | fprintf(stderr, usage_msg); | 312 | fprintf(stderr, "%s", usage_msg); |
313 | res = 2; | 313 | res = 2; |
314 | goto out; | 314 | goto out; |
315 | } | 315 | } |
@@ -320,7 +320,7 @@ int main(int argc, char *argv[]) | |||
320 | master_ifname = *spp++; | 320 | master_ifname = *spp++; |
321 | 321 | ||
322 | if (master_ifname == NULL) { | 322 | if (master_ifname == NULL) { |
323 | fprintf(stderr, usage_msg); | 323 | fprintf(stderr, "%s", usage_msg); |
324 | res = 2; | 324 | res = 2; |
325 | goto out; | 325 | goto out; |
326 | } | 326 | } |
@@ -339,7 +339,7 @@ int main(int argc, char *argv[]) | |||
339 | 339 | ||
340 | if (slave_ifname == NULL) { | 340 | if (slave_ifname == NULL) { |
341 | if (opt_d || opt_c) { | 341 | if (opt_d || opt_c) { |
342 | fprintf(stderr, usage_msg); | 342 | fprintf(stderr, "%s", usage_msg); |
343 | res = 2; | 343 | res = 2; |
344 | goto out; | 344 | goto out; |
345 | } | 345 | } |
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index bfe924217f24..db2a4067013c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -106,16 +106,6 @@ inet_peer_maxttl - INTEGER | |||
106 | when the number of entries in the pool is very small). | 106 | when the number of entries in the pool is very small). |
107 | Measured in seconds. | 107 | Measured in seconds. |
108 | 108 | ||
109 | inet_peer_gc_mintime - INTEGER | ||
110 | Minimum interval between garbage collection passes. This interval is | ||
111 | in effect under high memory pressure on the pool. | ||
112 | Measured in seconds. | ||
113 | |||
114 | inet_peer_gc_maxtime - INTEGER | ||
115 | Minimum interval between garbage collection passes. This interval is | ||
116 | in effect under low (or absent) memory pressure on the pool. | ||
117 | Measured in seconds. | ||
118 | |||
119 | TCP variables: | 109 | TCP variables: |
120 | 110 | ||
121 | somaxconn - INTEGER | 111 | somaxconn - INTEGER |
@@ -394,7 +384,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
394 | min: Minimal size of receive buffer used by TCP sockets. | 384 | min: Minimal size of receive buffer used by TCP sockets. |
395 | It is guaranteed to each TCP socket, even under moderate memory | 385 | It is guaranteed to each TCP socket, even under moderate memory |
396 | pressure. | 386 | pressure. |
397 | Default: 8K | 387 | Default: 1 page |
398 | 388 | ||
399 | default: initial size of receive buffer used by TCP sockets. | 389 | default: initial size of receive buffer used by TCP sockets. |
400 | This value overrides net.core.rmem_default used by other protocols. | 390 | This value overrides net.core.rmem_default used by other protocols. |
@@ -483,7 +473,7 @@ tcp_window_scaling - BOOLEAN | |||
483 | tcp_wmem - vector of 3 INTEGERs: min, default, max | 473 | tcp_wmem - vector of 3 INTEGERs: min, default, max |
484 | min: Amount of memory reserved for send buffers for TCP sockets. | 474 | min: Amount of memory reserved for send buffers for TCP sockets. |
485 | Each TCP socket has rights to use it due to fact of its birth. | 475 | Each TCP socket has rights to use it due to fact of its birth. |
486 | Default: 4K | 476 | Default: 1 page |
487 | 477 | ||
488 | default: initial size of send buffer used by TCP sockets. This | 478 | default: initial size of send buffer used by TCP sockets. This |
489 | value overrides net.core.wmem_default used by other protocols. | 479 | value overrides net.core.wmem_default used by other protocols. |
@@ -553,13 +543,13 @@ udp_rmem_min - INTEGER | |||
553 | Minimal size of receive buffer used by UDP sockets in moderation. | 543 | Minimal size of receive buffer used by UDP sockets in moderation. |
554 | Each UDP socket is able to use the size for receiving data, even if | 544 | Each UDP socket is able to use the size for receiving data, even if |
555 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | 545 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. |
556 | Default: 4096 | 546 | Default: 1 page |
557 | 547 | ||
558 | udp_wmem_min - INTEGER | 548 | udp_wmem_min - INTEGER |
559 | Minimal size of send buffer used by UDP sockets in moderation. | 549 | Minimal size of send buffer used by UDP sockets in moderation. |
560 | Each UDP socket is able to use the size for sending data, even if | 550 | Each UDP socket is able to use the size for sending data, even if |
561 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | 551 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. |
562 | Default: 4096 | 552 | Default: 1 page |
563 | 553 | ||
564 | CIPSOv4 Variables: | 554 | CIPSOv4 Variables: |
565 | 555 | ||
@@ -1465,10 +1455,17 @@ sctp_mem - vector of 3 INTEGERs: min, pressure, max | |||
1465 | Default is calculated at boot time from amount of available memory. | 1455 | Default is calculated at boot time from amount of available memory. |
1466 | 1456 | ||
1467 | sctp_rmem - vector of 3 INTEGERs: min, default, max | 1457 | sctp_rmem - vector of 3 INTEGERs: min, default, max |
1468 | See tcp_rmem for a description. | 1458 | Only the first value ("min") is used, "default" and "max" are |
1459 | ignored. | ||
1460 | |||
1461 | min: Minimal size of receive buffer used by SCTP socket. | ||
1462 | It is guaranteed to each SCTP socket (but not association) even | ||
1463 | under moderate memory pressure. | ||
1464 | |||
1465 | Default: 1 page | ||
1469 | 1466 | ||
1470 | sctp_wmem - vector of 3 INTEGERs: min, default, max | 1467 | sctp_wmem - vector of 3 INTEGERs: min, default, max |
1471 | See tcp_wmem for a description. | 1468 | Currently this tunable has no effect. |
1472 | 1469 | ||
1473 | addr_scope_policy - INTEGER | 1470 | addr_scope_policy - INTEGER |
1474 | Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 | 1471 | Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 |
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt new file mode 100644 index 000000000000..4b1c0dcef84c --- /dev/null +++ b/Documentation/networking/netdev-features.txt | |||
@@ -0,0 +1,154 @@ | |||
1 | Netdev features mess and how to get out from it alive | ||
2 | ===================================================== | ||
3 | |||
4 | Author: | ||
5 | Micha艂 Miros艂aw <mirq-linux@rere.qmqm.pl> | ||
6 | |||
7 | |||
8 | |||
9 | Part I: Feature sets | ||
10 | ====================== | ||
11 | |||
12 | Long gone are the days when a network card would just take and give packets | ||
13 | verbatim. Today's devices add multiple features and bugs (read: offloads) | ||
14 | that relieve an OS of various tasks like generating and checking checksums, | ||
15 | splitting packets, classifying them. Those capabilities and their state | ||
16 | are commonly referred to as netdev features in Linux kernel world. | ||
17 | |||
18 | There are currently three sets of features relevant to the driver, and | ||
19 | one used internally by network core: | ||
20 | |||
21 | 1. netdev->hw_features set contains features whose state may possibly | ||
22 | be changed (enabled or disabled) for a particular device by user's | ||
23 | request. This set should be initialized in ndo_init callback and not | ||
24 | changed later. | ||
25 | |||
26 | 2. netdev->features set contains features which are currently enabled | ||
27 | for a device. This should be changed only by network core or in | ||
28 | error paths of ndo_set_features callback. | ||
29 | |||
30 | 3. netdev->vlan_features set contains features whose state is inherited | ||
31 | by child VLAN devices (limits netdev->features set). This is currently | ||
32 | used for all VLAN devices whether tags are stripped or inserted in | ||
33 | hardware or software. | ||
34 | |||
35 | 4. netdev->wanted_features set contains feature set requested by user. | ||
36 | This set is filtered by ndo_fix_features callback whenever it or | ||
37 | some device-specific conditions change. This set is internal to | ||
38 | networking core and should not be referenced in drivers. | ||
39 | |||
40 | |||
41 | |||
42 | Part II: Controlling enabled features | ||
43 | ======================================= | ||
44 | |||
45 | When current feature set (netdev->features) is to be changed, new set | ||
46 | is calculated and filtered by calling ndo_fix_features callback | ||
47 | and netdev_fix_features(). If the resulting set differs from current | ||
48 | set, it is passed to ndo_set_features callback and (if the callback | ||
49 | returns success) replaces value stored in netdev->features. | ||
50 | NETDEV_FEAT_CHANGE notification is issued after that whenever current | ||
51 | set might have changed. | ||
52 | |||
53 | The following events trigger recalculation: | ||
54 | 1. device's registration, after ndo_init returned success | ||
55 | 2. user requested changes in features state | ||
56 | 3. netdev_update_features() is called | ||
57 | |||
58 | ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks | ||
59 | are treated as always returning success. | ||
60 | |||
61 | A driver that wants to trigger recalculation must do so by calling | ||
62 | netdev_update_features() while holding rtnl_lock. This should not be done | ||
63 | from ndo_*_features callbacks. netdev->features should not be modified by | ||
64 | driver except by means of ndo_fix_features callback. | ||
65 | |||
66 | |||
67 | |||
68 | Part III: Implementation hints | ||
69 | ================================ | ||
70 | |||
71 | * ndo_fix_features: | ||
72 | |||
73 | All dependencies between features should be resolved here. The resulting | ||
74 | set can be reduced further by networking core imposed limitations (as coded | ||
75 | in netdev_fix_features()). For this reason it is safer to disable a feature | ||
76 | when its dependencies are not met instead of forcing the dependency on. | ||
77 | |||
78 | This callback should not modify hardware nor driver state (should be | ||
79 | stateless). It can be called multiple times between successive | ||
80 | ndo_set_features calls. | ||
81 | |||
82 | Callback must not alter features contained in NETIF_F_SOFT_FEATURES or | ||
83 | NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but | ||
84 | care must be taken as the change won't affect already configured VLANs. | ||
85 | |||
86 | * ndo_set_features: | ||
87 | |||
88 | Hardware should be reconfigured to match passed feature set. The set | ||
89 | should not be altered unless some error condition happens that can't | ||
90 | be reliably detected in ndo_fix_features. In this case, the callback | ||
91 | should update netdev->features to match resulting hardware state. | ||
92 | Errors returned are not (and cannot be) propagated anywhere except dmesg. | ||
93 | (Note: successful return is zero, >0 means silent error.) | ||
94 | |||
95 | |||
96 | |||
97 | Part IV: Features | ||
98 | =================== | ||
99 | |||
100 | For current list of features, see include/linux/netdev_features.h. | ||
101 | This section describes semantics of some of them. | ||
102 | |||
103 | * Transmit checksumming | ||
104 | |||
105 | For complete description, see comments near the top of include/linux/skbuff.h. | ||
106 | |||
107 | Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM. | ||
108 | It means that device can fill TCP/UDP-like checksum anywhere in the packets | ||
109 | whatever headers there might be. | ||
110 | |||
111 | * Transmit TCP segmentation offload | ||
112 | |||
113 | NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit | ||
114 | set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6). | ||
115 | |||
116 | * Transmit DMA from high memory | ||
117 | |||
118 | On platforms where this is relevant, NETIF_F_HIGHDMA signals that | ||
119 | ndo_start_xmit can handle skbs with frags in high memory. | ||
120 | |||
121 | * Transmit scatter-gather | ||
122 | |||
123 | Those features say that ndo_start_xmit can handle fragmented skbs: | ||
124 | NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST --- | ||
125 | chained skbs (skb->next/prev list). | ||
126 | |||
127 | * Software features | ||
128 | |||
129 | Features contained in NETIF_F_SOFT_FEATURES are features of networking | ||
130 | stack. Driver should not change behaviour based on them. | ||
131 | |||
132 | * LLTX driver (deprecated for hardware drivers) | ||
133 | |||
134 | NETIF_F_LLTX should be set in drivers that implement their own locking in | ||
135 | transmit path or don't need locking at all (e.g. software tunnels). | ||
136 | In ndo_start_xmit, it is recommended to use a try_lock and return | ||
137 | NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly | ||
138 | protect against other callbacks (the rules you need to find out). | ||
139 | |||
140 | Don't use it for new drivers. | ||
141 | |||
142 | * netns-local device | ||
143 | |||
144 | NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between | ||
145 | network namespaces (e.g. loopback). | ||
146 | |||
147 | Don't use it in drivers. | ||
148 | |||
149 | * VLAN challenged | ||
150 | |||
151 | NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN | ||
152 | headers. Some drivers set this because the cards can't handle the bigger MTU. | ||
153 | [FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU | ||
154 | VLANs. This may be not useful, though.] | ||
diff --git a/Documentation/networking/nfc.txt b/Documentation/networking/nfc.txt new file mode 100644 index 000000000000..b24c29bdae27 --- /dev/null +++ b/Documentation/networking/nfc.txt | |||
@@ -0,0 +1,128 @@ | |||
1 | Linux NFC subsystem | ||
2 | =================== | ||
3 | |||
4 | The Near Field Communication (NFC) subsystem is required to standardize the | ||
5 | NFC device drivers development and to create an unified userspace interface. | ||
6 | |||
7 | This document covers the architecture overview, the device driver interface | ||
8 | description and the userspace interface description. | ||
9 | |||
10 | Architecture overview | ||
11 | --------------------- | ||
12 | |||
13 | The NFC subsystem is responsible for: | ||
14 | - NFC adapters management; | ||
15 | - Polling for targets; | ||
16 | - Low-level data exchange; | ||
17 | |||
18 | The subsystem is divided in some parts. The 'core' is responsible for | ||
19 | providing the device driver interface. On the other side, it is also | ||
20 | responsible for providing an interface to control operations and low-level | ||
21 | data exchange. | ||
22 | |||
23 | The control operations are available to userspace via generic netlink. | ||
24 | |||
25 | The low-level data exchange interface is provided by the new socket family | ||
26 | PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets. | ||
27 | |||
28 | |||
29 | +--------------------------------------+ | ||
30 | | USER SPACE | | ||
31 | +--------------------------------------+ | ||
32 | ^ ^ | ||
33 | | low-level | control | ||
34 | | data exchange | operations | ||
35 | | | | ||
36 | | v | ||
37 | | +-----------+ | ||
38 | | AF_NFC | netlink | | ||
39 | | socket +-----------+ | ||
40 | | raw ^ | ||
41 | | | | ||
42 | v v | ||
43 | +---------+ +-----------+ | ||
44 | | rawsock | <--------> | core | | ||
45 | +---------+ +-----------+ | ||
46 | ^ | ||
47 | | | ||
48 | v | ||
49 | +-----------+ | ||
50 | | driver | | ||
51 | +-----------+ | ||
52 | |||
53 | Device Driver Interface | ||
54 | ----------------------- | ||
55 | |||
56 | When registering on the NFC subsystem, the device driver must inform the core | ||
57 | of the set of supported NFC protocols and the set of ops callbacks. The ops | ||
58 | callbacks that must be implemented are the following: | ||
59 | |||
60 | * start_poll - setup the device to poll for targets | ||
61 | * stop_poll - stop on progress polling operation | ||
62 | * activate_target - select and initialize one of the targets found | ||
63 | * deactivate_target - deselect and deinitialize the selected target | ||
64 | * data_exchange - send data and receive the response (transceive operation) | ||
65 | |||
66 | Userspace interface | ||
67 | -------------------- | ||
68 | |||
69 | The userspace interface is divided in control operations and low-level data | ||
70 | exchange operation. | ||
71 | |||
72 | CONTROL OPERATIONS: | ||
73 | |||
74 | Generic netlink is used to implement the interface to the control operations. | ||
75 | The operations are composed by commands and events, all listed below: | ||
76 | |||
77 | * NFC_CMD_GET_DEVICE - get specific device info or dump the device list | ||
78 | * NFC_CMD_START_POLL - setup a specific device to polling for targets | ||
79 | * NFC_CMD_STOP_POLL - stop the polling operation in a specific device | ||
80 | * NFC_CMD_GET_TARGET - dump the list of targets found by a specific device | ||
81 | |||
82 | * NFC_EVENT_DEVICE_ADDED - reports an NFC device addition | ||
83 | * NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal | ||
84 | * NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets | ||
85 | are found | ||
86 | |||
87 | The user must call START_POLL to poll for NFC targets, passing the desired NFC | ||
88 | protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling | ||
89 | state until it finds any target. However, the user can stop the polling | ||
90 | operation by calling STOP_POLL command. In this case, it will be checked if | ||
91 | the requester of STOP_POLL is the same of START_POLL. | ||
92 | |||
93 | If the polling operation finds one or more targets, the event TARGETS_FOUND is | ||
94 | sent (including the device id). The user must call GET_TARGET to get the list of | ||
95 | all targets found by such device. Each reply message has target attributes with | ||
96 | relevant information such as the supported NFC protocols. | ||
97 | |||
98 | All polling operations requested through one netlink socket are stopped when | ||
99 | it's closed. | ||
100 | |||
101 | LOW-LEVEL DATA EXCHANGE: | ||
102 | |||
103 | The userspace must use PF_NFC sockets to perform any data communication with | ||
104 | targets. All NFC sockets use AF_NFC: | ||
105 | |||
106 | struct sockaddr_nfc { | ||
107 | sa_family_t sa_family; | ||
108 | __u32 dev_idx; | ||
109 | __u32 target_idx; | ||
110 | __u32 nfc_protocol; | ||
111 | }; | ||
112 | |||
113 | To establish a connection with one target, the user must create an | ||
114 | NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc | ||
115 | struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND | ||
116 | netlink event. As a target can support more than one NFC protocol, the user | ||
117 | must inform which protocol it wants to use. | ||
118 | |||
119 | Internally, 'connect' will result in an activate_target call to the driver. | ||
120 | When the socket is closed, the target is deactivated. | ||
121 | |||
122 | The data format exchanged through the sockets is NFC protocol dependent. For | ||
123 | instance, when communicating with MIFARE tags, the data exchanged are MIFARE | ||
124 | commands and their responses. | ||
125 | |||
126 | The first received package is the response to the first sent package and so | ||
127 | on. In order to allow valid "empty" responses, every data received has a NULL | ||
128 | header of 1 byte. | ||
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 80a7a3454902..57a24108b845 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt | |||
@@ -7,7 +7,7 @@ This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers | |||
7 | (Synopsys IP blocks); it has been fully tested on STLinux platforms. | 7 | (Synopsys IP blocks); it has been fully tested on STLinux platforms. |
8 | 8 | ||
9 | Currently this network device driver is for all STM embedded MAC/GMAC | 9 | Currently this network device driver is for all STM embedded MAC/GMAC |
10 | (7xxx SoCs). Other platforms start using it i.e. ARM SPEAr. | 10 | (i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr. |
11 | 11 | ||
12 | DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 | 12 | DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 |
13 | Universal version 4.0 have been used for developing the first code | 13 | Universal version 4.0 have been used for developing the first code |
@@ -71,7 +71,7 @@ Several performance tests on STM platforms showed this optimisation allows to sp | |||
71 | the CPU while having the maximum throughput. | 71 | the CPU while having the maximum throughput. |
72 | 72 | ||
73 | 4.4) WOL | 73 | 4.4) WOL |
74 | Wake up on Lan feature through Magic Frame is only supported for the GMAC | 74 | Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC |
75 | core. | 75 | core. |
76 | 76 | ||
77 | 4.5) DMA descriptors | 77 | 4.5) DMA descriptors |
@@ -91,11 +91,15 @@ LRO is not supported. | |||
91 | The driver is compatible with PAL to work with PHY and GPHY devices. | 91 | The driver is compatible with PAL to work with PHY and GPHY devices. |
92 | 92 | ||
93 | 4.9) Platform information | 93 | 4.9) Platform information |
94 | Several information came from the platform; please refer to the | 94 | Several driver's information can be passed through the platform |
95 | driver's Header file in include/linux directory. | 95 | These are included in the include/linux/stmmac.h header file |
96 | and detailed below as well: | ||
96 | 97 | ||
97 | struct plat_stmmacenet_data { | 98 | struct plat_stmmacenet_data { |
98 | int bus_id; | 99 | int bus_id; |
100 | int phy_addr; | ||
101 | int interface; | ||
102 | struct stmmac_mdio_bus_data *mdio_bus_data; | ||
99 | int pbl; | 103 | int pbl; |
100 | int clk_csr; | 104 | int clk_csr; |
101 | int has_gmac; | 105 | int has_gmac; |
@@ -103,67 +107,135 @@ struct plat_stmmacenet_data { | |||
103 | int tx_coe; | 107 | int tx_coe; |
104 | int bugged_jumbo; | 108 | int bugged_jumbo; |
105 | int pmt; | 109 | int pmt; |
106 | void (*fix_mac_speed)(void *priv, unsigned int speed); | 110 | int force_sf_dma_mode; |
107 | void (*bus_setup)(unsigned long ioaddr); | 111 | void (*fix_mac_speed)(void *priv, unsigned int speed); |
108 | #ifdef CONFIG_STM_DRIVERS | 112 | void (*bus_setup)(void __iomem *ioaddr); |
109 | struct stm_pad_config *pad_config; | 113 | int (*init)(struct platform_device *pdev); |
110 | #endif | 114 | void (*exit)(struct platform_device *pdev); |
111 | void *bsp_priv; | 115 | void *bsp_priv; |
112 | }; | 116 | }; |
113 | 117 | ||
114 | Where: | 118 | Where: |
115 | - pbl (Programmable Burst Length) is maximum number of | 119 | o bus_id: bus identifier. |
116 | beats to be transferred in one DMA transaction. | 120 | o phy_addr: the physical address can be passed from the platform. |
117 | GMAC also enables the 4xPBL by default. | 121 | If it is set to -1 the driver will automatically |
118 | - fix_mac_speed and bus_setup are used to configure internal target | 122 | detect it at run-time by probing all the 32 addresses. |
119 | registers (on STM platforms); | 123 | o interface: PHY device's interface. |
120 | - has_gmac: GMAC core is on board (get it at run-time in the next step); | 124 | o mdio_bus_data: specific platform fields for the MDIO bus. |
121 | - bus_id: bus identifier. | 125 | o pbl: the Programmable Burst Length is maximum number of beats to |
122 | - tx_coe: core is able to perform the tx csum in HW. | 126 | be transferred in one DMA transaction. |
123 | - enh_desc: if sets the MAC will use the enhanced descriptor structure. | 127 | GMAC also enables the 4xPBL by default. |
124 | - clk_csr: CSR Clock range selection. | 128 | o clk_csr: CSR Clock range selection. |
125 | - bugged_jumbo: some HWs are not able to perform the csum in HW for | 129 | o has_gmac: uses the GMAC core. |
126 | over-sized frames due to limited buffer sizes. Setting this | 130 | o enh_desc: if sets the MAC will use the enhanced descriptor structure. |
127 | flag the csum will be done in SW on JUMBO frames. | 131 | o tx_coe: core is able to perform the tx csum in HW. |
128 | 132 | o bugged_jumbo: some HWs are not able to perform the csum in HW for | |
129 | struct plat_stmmacphy_data { | 133 | over-sized frames due to limited buffer sizes. |
130 | int bus_id; | 134 | Setting this flag the csum will be done in SW on |
131 | int phy_addr; | 135 | JUMBO frames. |
132 | unsigned int phy_mask; | 136 | o pmt: core has the embedded power module (optional). |
133 | int interface; | 137 | o force_sf_dma_mode: force DMA to use the Store and Forward mode |
134 | int (*phy_reset)(void *priv); | 138 | instead of the Threshold. |
135 | void *priv; | 139 | o fix_mac_speed: this callback is used for modifying some syscfg registers |
136 | }; | 140 | (on ST SoCs) according to the link speed negotiated by the |
141 | physical layer . | ||
142 | o bus_setup: perform HW setup of the bus. For example, on some ST platforms | ||
143 | this field is used to configure the AMBA bridge to generate more | ||
144 | efficient STBus traffic. | ||
145 | o init/exit: callbacks used for calling a custom initialisation; | ||
146 | this is sometime necessary on some platforms (e.g. ST boxes) | ||
147 | where the HW needs to have set some PIO lines or system cfg | ||
148 | registers. | ||
149 | o custom_cfg: this is a custom configuration that can be passed while | ||
150 | initialising the resources. | ||
151 | |||
152 | The we have: | ||
153 | |||
154 | struct stmmac_mdio_bus_data { | ||
155 | int bus_id; | ||
156 | int (*phy_reset)(void *priv); | ||
157 | unsigned int phy_mask; | ||
158 | int *irqs; | ||
159 | int probed_phy_irq; | ||
160 | }; | ||
137 | 161 | ||
138 | Where: | 162 | Where: |
139 | - bus_id: bus identifier; | 163 | o bus_id: bus identifier; |
140 | - phy_addr: physical address used for the attached phy device; | 164 | o phy_reset: hook to reset the phy device attached to the bus. |
141 | set it to -1 to get it at run-time; | 165 | o phy_mask: phy mask passed when register the MDIO bus within the driver. |
142 | - interface: physical MII interface mode; | 166 | o irqs: list of IRQs, one per PHY. |
143 | - phy_reset: hook to reset HW function. | 167 | o probed_phy_irq: if irqs is NULL, use this for probed PHY. |
144 | 168 | ||
145 | SOURCES: | 169 | Below an example how the structures above are using on ST platforms. |
146 | - Kconfig | 170 | |
147 | - Makefile | 171 | static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = { |
148 | - stmmac_main.c: main network device driver; | 172 | .pbl = 32, |
149 | - stmmac_mdio.c: mdio functions; | 173 | .has_gmac = 0, |
150 | - stmmac_ethtool.c: ethtool support; | 174 | .enh_desc = 0, |
151 | - stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts | 175 | .fix_mac_speed = stxYYY_ethernet_fix_mac_speed, |
152 | Only tested on ST40 platforms based. | 176 | | |
153 | - stmmac.h: private driver structure; | 177 | |-> to write an internal syscfg |
154 | - common.h: common definitions and VFTs; | 178 | | on this platform when the |
155 | - descs.h: descriptor structure definitions; | 179 | | link speed changes from 10 to |
156 | - dwmac1000_core.c: GMAC core functions; | 180 | | 100 and viceversa |
157 | - dwmac1000_dma.c: dma functions for the GMAC chip; | 181 | .init = &stmmac_claim_resource, |
158 | - dwmac1000.h: specific header file for the GMAC; | 182 | | |
159 | - dwmac100_core: MAC 100 core and dma code; | 183 | |-> On ST SoC this calls own "PAD" |
160 | - dwmac100_dma.c: dma funtions for the MAC chip; | 184 | | manager framework to claim |
161 | - dwmac1000.h: specific header file for the MAC; | 185 | | all the resources necessary |
162 | - dwmac_lib.c: generic DMA functions shared among chips | 186 | | (GPIO ...). The .custom_cfg field |
163 | - enh_desc.c: functions for handling enhanced descriptors | 187 | | is used to pass a custom config. |
164 | - norm_desc.c: functions for handling normal descriptors | 188 | }; |
165 | 189 | ||
166 | TODO: | 190 | Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact, |
167 | - XGMAC controller is not supported. | 191 | there are two MAC cores: one MAC is for MDIO Bus/PHY emulation |
168 | - Review the timer optimisation code to use an embedded device that seems to be | 192 | with fixed_link support. |
193 | |||
194 | static struct stmmac_mdio_bus_data stmmac1_mdio_bus = { | ||
195 | .bus_id = 1, | ||
196 | | | ||
197 | |-> phy device on the bus_id 1 | ||
198 | .phy_reset = phy_reset; | ||
199 | | | ||
200 | |-> function to provide the phy_reset on this board | ||
201 | .phy_mask = 0, | ||
202 | }; | ||
203 | |||
204 | static struct fixed_phy_status stmmac0_fixed_phy_status = { | ||
205 | .link = 1, | ||
206 | .speed = 100, | ||
207 | .duplex = 1, | ||
208 | }; | ||
209 | |||
210 | During the board's device_init we can configure the first | ||
211 | MAC for fixed_link by calling: | ||
212 | fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));) | ||
213 | and the second one, with a real PHY device attached to the bus, | ||
214 | by using the stmmac_mdio_bus_data structure (to provide the id, the | ||
215 | reset procedure etc). | ||
216 | |||
217 | 4.10) List of source files: | ||
218 | o Kconfig | ||
219 | o Makefile | ||
220 | o stmmac_main.c: main network device driver; | ||
221 | o stmmac_mdio.c: mdio functions; | ||
222 | o stmmac_ethtool.c: ethtool support; | ||
223 | o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts | ||
224 | Only tested on ST40 platforms based. | ||
225 | o stmmac.h: private driver structure; | ||
226 | o common.h: common definitions and VFTs; | ||
227 | o descs.h: descriptor structure definitions; | ||
228 | o dwmac1000_core.c: GMAC core functions; | ||
229 | o dwmac1000_dma.c: dma functions for the GMAC chip; | ||
230 | o dwmac1000.h: specific header file for the GMAC; | ||
231 | o dwmac100_core: MAC 100 core and dma code; | ||
232 | o dwmac100_dma.c: dma funtions for the MAC chip; | ||
233 | o dwmac1000.h: specific header file for the MAC; | ||
234 | o dwmac_lib.c: generic DMA functions shared among chips | ||
235 | o enh_desc.c: functions for handling enhanced descriptors | ||
236 | o norm_desc.c: functions for handling normal descriptors | ||
237 | |||
238 | 5) TODO: | ||
239 | o XGMAC is not supported. | ||
240 | o Review the timer optimisation code to use an embedded device that will be | ||
169 | available in new chip generations. | 241 | available in new chip generations. |
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 64565aac6e40..3384d5996be2 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt | |||
@@ -506,8 +506,8 @@ routines. Nevertheless, different callback pointers are used in case there is a | |||
506 | situation where it actually matters. | 506 | situation where it actually matters. |
507 | 507 | ||
508 | 508 | ||
509 | Device Power Domains | 509 | Device Power Management Domains |
510 | -------------------- | 510 | ------------------------------- |
511 | Sometimes devices share reference clocks or other power resources. In those | 511 | Sometimes devices share reference clocks or other power resources. In those |
512 | cases it generally is not possible to put devices into low-power states | 512 | cases it generally is not possible to put devices into low-power states |
513 | individually. Instead, a set of devices sharing a power resource can be put | 513 | individually. Instead, a set of devices sharing a power resource can be put |
@@ -516,8 +516,8 @@ power resource. Of course, they also need to be put into the full-power state | |||
516 | together, by turning the shared power resource on. A set of devices with this | 516 | together, by turning the shared power resource on. A set of devices with this |
517 | property is often referred to as a power domain. | 517 | property is often referred to as a power domain. |
518 | 518 | ||
519 | Support for power domains is provided through the pwr_domain field of struct | 519 | Support for power domains is provided through the pm_domain field of struct |
520 | device. This field is a pointer to an object of type struct dev_power_domain, | 520 | device. This field is a pointer to an object of type struct dev_pm_domain, |
521 | defined in include/linux/pm.h, providing a set of power management callbacks | 521 | defined in include/linux/pm.h, providing a set of power management callbacks |
522 | analogous to the subsystem-level and device driver callbacks that are executed | 522 | analogous to the subsystem-level and device driver callbacks that are executed |
523 | for the given device during all power transitions, instead of the respective | 523 | for the given device during all power transitions, instead of the respective |
@@ -604,7 +604,7 @@ state temporarily, for example so that its system wakeup capability can be | |||
604 | disabled. This all depends on the hardware and the design of the subsystem and | 604 | disabled. This all depends on the hardware and the design of the subsystem and |
605 | device driver in question. | 605 | device driver in question. |
606 | 606 | ||
607 | During system-wide resume from a sleep state it's best to put devices into the | 607 | During system-wide resume from a sleep state it's easiest to put devices into |
608 | full-power state, as explained in Documentation/power/runtime_pm.txt. Refer to | 608 | the full-power state, as explained in Documentation/power/runtime_pm.txt. Refer |
609 | that document for more information regarding this particular issue as well as | 609 | to that document for more information regarding this particular issue as well as |
610 | for information on the device runtime power management framework in general. | 610 | for information on the device runtime power management framework in general. |
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt index 5ae70a12c1e2..3035d00757ad 100644 --- a/Documentation/power/opp.txt +++ b/Documentation/power/opp.txt | |||
@@ -321,6 +321,8 @@ opp_init_cpufreq_table - cpufreq framework typically is initialized with | |||
321 | addition to CONFIG_PM as power management feature is required to | 321 | addition to CONFIG_PM as power management feature is required to |
322 | dynamically scale voltage and frequency in a system. | 322 | dynamically scale voltage and frequency in a system. |
323 | 323 | ||
324 | opp_free_cpufreq_table - Free up the table allocated by opp_init_cpufreq_table | ||
325 | |||
324 | 7. Data Structures | 326 | 7. Data Structures |
325 | ================== | 327 | ================== |
326 | Typically an SoC contains multiple voltage domains which are variable. Each | 328 | Typically an SoC contains multiple voltage domains which are variable. Each |
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index b24875b1ced5..14dd3c6ad97e 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt | |||
@@ -1,39 +1,39 @@ | |||
1 | Run-time Power Management Framework for I/O Devices | 1 | Runtime Power Management Framework for I/O Devices |
2 | 2 | ||
3 | (C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. | 3 | (C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. |
4 | (C) 2010 Alan Stern <stern@rowland.harvard.edu> | 4 | (C) 2010 Alan Stern <stern@rowland.harvard.edu> |
5 | 5 | ||
6 | 1. Introduction | 6 | 1. Introduction |
7 | 7 | ||
8 | Support for run-time power management (run-time PM) of I/O devices is provided | 8 | Support for runtime power management (runtime PM) of I/O devices is provided |
9 | at the power management core (PM core) level by means of: | 9 | at the power management core (PM core) level by means of: |
10 | 10 | ||
11 | * The power management workqueue pm_wq in which bus types and device drivers can | 11 | * The power management workqueue pm_wq in which bus types and device drivers can |
12 | put their PM-related work items. It is strongly recommended that pm_wq be | 12 | put their PM-related work items. It is strongly recommended that pm_wq be |
13 | used for queuing all work items related to run-time PM, because this allows | 13 | used for queuing all work items related to runtime PM, because this allows |
14 | them to be synchronized with system-wide power transitions (suspend to RAM, | 14 | them to be synchronized with system-wide power transitions (suspend to RAM, |
15 | hibernation and resume from system sleep states). pm_wq is declared in | 15 | hibernation and resume from system sleep states). pm_wq is declared in |
16 | include/linux/pm_runtime.h and defined in kernel/power/main.c. | 16 | include/linux/pm_runtime.h and defined in kernel/power/main.c. |
17 | 17 | ||
18 | * A number of run-time PM fields in the 'power' member of 'struct device' (which | 18 | * A number of runtime PM fields in the 'power' member of 'struct device' (which |
19 | is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can | 19 | is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can |
20 | be used for synchronizing run-time PM operations with one another. | 20 | be used for synchronizing runtime PM operations with one another. |
21 | 21 | ||
22 | * Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in | 22 | * Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in |
23 | include/linux/pm.h). | 23 | include/linux/pm.h). |
24 | 24 | ||
25 | * A set of helper functions defined in drivers/base/power/runtime.c that can be | 25 | * A set of helper functions defined in drivers/base/power/runtime.c that can be |
26 | used for carrying out run-time PM operations in such a way that the | 26 | used for carrying out runtime PM operations in such a way that the |
27 | synchronization between them is taken care of by the PM core. Bus types and | 27 | synchronization between them is taken care of by the PM core. Bus types and |
28 | device drivers are encouraged to use these functions. | 28 | device drivers are encouraged to use these functions. |
29 | 29 | ||
30 | The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM | 30 | The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM |
31 | fields of 'struct dev_pm_info' and the core helper functions provided for | 31 | fields of 'struct dev_pm_info' and the core helper functions provided for |
32 | run-time PM are described below. | 32 | runtime PM are described below. |
33 | 33 | ||
34 | 2. Device Run-time PM Callbacks | 34 | 2. Device Runtime PM Callbacks |
35 | 35 | ||
36 | There are three device run-time PM callbacks defined in 'struct dev_pm_ops': | 36 | There are three device runtime PM callbacks defined in 'struct dev_pm_ops': |
37 | 37 | ||
38 | struct dev_pm_ops { | 38 | struct dev_pm_ops { |
39 | ... | 39 | ... |
@@ -72,11 +72,11 @@ knows what to do to handle the device). | |||
72 | not mean that the device has been put into a low power state. It is | 72 | not mean that the device has been put into a low power state. It is |
73 | supposed to mean, however, that the device will not process data and will | 73 | supposed to mean, however, that the device will not process data and will |
74 | not communicate with the CPU(s) and RAM until the subsystem-level resume | 74 | not communicate with the CPU(s) and RAM until the subsystem-level resume |
75 | callback is executed for it. The run-time PM status of a device after | 75 | callback is executed for it. The runtime PM status of a device after |
76 | successful execution of the subsystem-level suspend callback is 'suspended'. | 76 | successful execution of the subsystem-level suspend callback is 'suspended'. |
77 | 77 | ||
78 | * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN, | 78 | * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN, |
79 | the device's run-time PM status is 'active', which means that the device | 79 | the device's runtime PM status is 'active', which means that the device |
80 | _must_ be fully operational afterwards. | 80 | _must_ be fully operational afterwards. |
81 | 81 | ||
82 | * If the subsystem-level suspend callback returns an error code different | 82 | * If the subsystem-level suspend callback returns an error code different |
@@ -104,7 +104,7 @@ the device). | |||
104 | 104 | ||
105 | * Once the subsystem-level resume callback has completed successfully, the PM | 105 | * Once the subsystem-level resume callback has completed successfully, the PM |
106 | core regards the device as fully operational, which means that the device | 106 | core regards the device as fully operational, which means that the device |
107 | _must_ be able to complete I/O operations as needed. The run-time PM status | 107 | _must_ be able to complete I/O operations as needed. The runtime PM status |
108 | of the device is then 'active'. | 108 | of the device is then 'active'. |
109 | 109 | ||
110 | * If the subsystem-level resume callback returns an error code, the PM core | 110 | * If the subsystem-level resume callback returns an error code, the PM core |
@@ -130,7 +130,7 @@ device in that case. The value returned by this callback is ignored by the PM | |||
130 | core. | 130 | core. |
131 | 131 | ||
132 | The helper functions provided by the PM core, described in Section 4, guarantee | 132 | The helper functions provided by the PM core, described in Section 4, guarantee |
133 | that the following constraints are met with respect to the bus type's run-time | 133 | that the following constraints are met with respect to the bus type's runtime |
134 | PM callbacks: | 134 | PM callbacks: |
135 | 135 | ||
136 | (1) The callbacks are mutually exclusive (e.g. it is forbidden to execute | 136 | (1) The callbacks are mutually exclusive (e.g. it is forbidden to execute |
@@ -142,7 +142,7 @@ PM callbacks: | |||
142 | 142 | ||
143 | (2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' | 143 | (2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' |
144 | devices (i.e. the PM core will only execute ->runtime_idle() or | 144 | devices (i.e. the PM core will only execute ->runtime_idle() or |
145 | ->runtime_suspend() for the devices the run-time PM status of which is | 145 | ->runtime_suspend() for the devices the runtime PM status of which is |
146 | 'active'). | 146 | 'active'). |
147 | 147 | ||
148 | (3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device | 148 | (3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device |
@@ -151,7 +151,7 @@ PM callbacks: | |||
151 | flag of which is set. | 151 | flag of which is set. |
152 | 152 | ||
153 | (4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the | 153 | (4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the |
154 | PM core will only execute ->runtime_resume() for the devices the run-time | 154 | PM core will only execute ->runtime_resume() for the devices the runtime |
155 | PM status of which is 'suspended'). | 155 | PM status of which is 'suspended'). |
156 | 156 | ||
157 | Additionally, the helper functions provided by the PM core obey the following | 157 | Additionally, the helper functions provided by the PM core obey the following |
@@ -171,9 +171,9 @@ rules: | |||
171 | scheduled requests to execute the other callbacks for the same device, | 171 | scheduled requests to execute the other callbacks for the same device, |
172 | except for scheduled autosuspends. | 172 | except for scheduled autosuspends. |
173 | 173 | ||
174 | 3. Run-time PM Device Fields | 174 | 3. Runtime PM Device Fields |
175 | 175 | ||
176 | The following device run-time PM fields are present in 'struct dev_pm_info', as | 176 | The following device runtime PM fields are present in 'struct dev_pm_info', as |
177 | defined in include/linux/pm.h: | 177 | defined in include/linux/pm.h: |
178 | 178 | ||
179 | struct timer_list suspend_timer; | 179 | struct timer_list suspend_timer; |
@@ -205,7 +205,7 @@ defined in include/linux/pm.h: | |||
205 | 205 | ||
206 | unsigned int disable_depth; | 206 | unsigned int disable_depth; |
207 | - used for disabling the helper funcions (they work normally if this is | 207 | - used for disabling the helper funcions (they work normally if this is |
208 | equal to zero); the initial value of it is 1 (i.e. run-time PM is | 208 | equal to zero); the initial value of it is 1 (i.e. runtime PM is |
209 | initially disabled for all devices) | 209 | initially disabled for all devices) |
210 | 210 | ||
211 | unsigned int runtime_error; | 211 | unsigned int runtime_error; |
@@ -229,10 +229,10 @@ defined in include/linux/pm.h: | |||
229 | suspend to complete; means "start a resume as soon as you've suspended" | 229 | suspend to complete; means "start a resume as soon as you've suspended" |
230 | 230 | ||
231 | unsigned int run_wake; | 231 | unsigned int run_wake; |
232 | - set if the device is capable of generating run-time wake-up events | 232 | - set if the device is capable of generating runtime wake-up events |
233 | 233 | ||
234 | enum rpm_status runtime_status; | 234 | enum rpm_status runtime_status; |
235 | - the run-time PM status of the device; this field's initial value is | 235 | - the runtime PM status of the device; this field's initial value is |
236 | RPM_SUSPENDED, which means that each device is initially regarded by the | 236 | RPM_SUSPENDED, which means that each device is initially regarded by the |
237 | PM core as 'suspended', regardless of its real hardware status | 237 | PM core as 'suspended', regardless of its real hardware status |
238 | 238 | ||
@@ -243,7 +243,7 @@ defined in include/linux/pm.h: | |||
243 | and pm_runtime_forbid() helper functions | 243 | and pm_runtime_forbid() helper functions |
244 | 244 | ||
245 | unsigned int no_callbacks; | 245 | unsigned int no_callbacks; |
246 | - indicates that the device does not use the run-time PM callbacks (see | 246 | - indicates that the device does not use the runtime PM callbacks (see |
247 | Section 8); it may be modified only by the pm_runtime_no_callbacks() | 247 | Section 8); it may be modified only by the pm_runtime_no_callbacks() |
248 | helper function | 248 | helper function |
249 | 249 | ||
@@ -270,16 +270,16 @@ defined in include/linux/pm.h: | |||
270 | 270 | ||
271 | All of the above fields are members of the 'power' member of 'struct device'. | 271 | All of the above fields are members of the 'power' member of 'struct device'. |
272 | 272 | ||
273 | 4. Run-time PM Device Helper Functions | 273 | 4. Runtime PM Device Helper Functions |
274 | 274 | ||
275 | The following run-time PM helper functions are defined in | 275 | The following runtime PM helper functions are defined in |
276 | drivers/base/power/runtime.c and include/linux/pm_runtime.h: | 276 | drivers/base/power/runtime.c and include/linux/pm_runtime.h: |
277 | 277 | ||
278 | void pm_runtime_init(struct device *dev); | 278 | void pm_runtime_init(struct device *dev); |
279 | - initialize the device run-time PM fields in 'struct dev_pm_info' | 279 | - initialize the device runtime PM fields in 'struct dev_pm_info' |
280 | 280 | ||
281 | void pm_runtime_remove(struct device *dev); | 281 | void pm_runtime_remove(struct device *dev); |
282 | - make sure that the run-time PM of the device will be disabled after | 282 | - make sure that the runtime PM of the device will be disabled after |
283 | removing the device from device hierarchy | 283 | removing the device from device hierarchy |
284 | 284 | ||
285 | int pm_runtime_idle(struct device *dev); | 285 | int pm_runtime_idle(struct device *dev); |
@@ -289,9 +289,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
289 | 289 | ||
290 | int pm_runtime_suspend(struct device *dev); | 290 | int pm_runtime_suspend(struct device *dev); |
291 | - execute the subsystem-level suspend callback for the device; returns 0 on | 291 | - execute the subsystem-level suspend callback for the device; returns 0 on |
292 | success, 1 if the device's run-time PM status was already 'suspended', or | 292 | success, 1 if the device's runtime PM status was already 'suspended', or |
293 | error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt | 293 | error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt |
294 | to suspend the device again in future | 294 | to suspend the device again in future and -EACCES means that |
295 | 'power.disable_depth' is different from 0 | ||
295 | 296 | ||
296 | int pm_runtime_autosuspend(struct device *dev); | 297 | int pm_runtime_autosuspend(struct device *dev); |
297 | - same as pm_runtime_suspend() except that the autosuspend delay is taken | 298 | - same as pm_runtime_suspend() except that the autosuspend delay is taken |
@@ -301,10 +302,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
301 | 302 | ||
302 | int pm_runtime_resume(struct device *dev); | 303 | int pm_runtime_resume(struct device *dev); |
303 | - execute the subsystem-level resume callback for the device; returns 0 on | 304 | - execute the subsystem-level resume callback for the device; returns 0 on |
304 | success, 1 if the device's run-time PM status was already 'active' or | 305 | success, 1 if the device's runtime PM status was already 'active' or |
305 | error code on failure, where -EAGAIN means it may be safe to attempt to | 306 | error code on failure, where -EAGAIN means it may be safe to attempt to |
306 | resume the device again in future, but 'power.runtime_error' should be | 307 | resume the device again in future, but 'power.runtime_error' should be |
307 | checked additionally | 308 | checked additionally, and -EACCES means that 'power.disable_depth' is |
309 | different from 0 | ||
308 | 310 | ||
309 | int pm_request_idle(struct device *dev); | 311 | int pm_request_idle(struct device *dev); |
310 | - submit a request to execute the subsystem-level idle callback for the | 312 | - submit a request to execute the subsystem-level idle callback for the |
@@ -321,7 +323,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
321 | device in future, where 'delay' is the time to wait before queuing up a | 323 | device in future, where 'delay' is the time to wait before queuing up a |
322 | suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work | 324 | suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work |
323 | item is queued up immediately); returns 0 on success, 1 if the device's PM | 325 | item is queued up immediately); returns 0 on success, 1 if the device's PM |
324 | run-time status was already 'suspended', or error code if the request | 326 | runtime status was already 'suspended', or error code if the request |
325 | hasn't been scheduled (or queued up if 'delay' is 0); if the execution of | 327 | hasn't been scheduled (or queued up if 'delay' is 0); if the execution of |
326 | ->runtime_suspend() is already scheduled and not yet expired, the new | 328 | ->runtime_suspend() is already scheduled and not yet expired, the new |
327 | value of 'delay' will be used as the time to wait | 329 | value of 'delay' will be used as the time to wait |
@@ -329,7 +331,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
329 | int pm_request_resume(struct device *dev); | 331 | int pm_request_resume(struct device *dev); |
330 | - submit a request to execute the subsystem-level resume callback for the | 332 | - submit a request to execute the subsystem-level resume callback for the |
331 | device (the request is represented by a work item in pm_wq); returns 0 on | 333 | device (the request is represented by a work item in pm_wq); returns 0 on |
332 | success, 1 if the device's run-time PM status was already 'active', or | 334 | success, 1 if the device's runtime PM status was already 'active', or |
333 | error code if the request hasn't been queued up | 335 | error code if the request hasn't been queued up |
334 | 336 | ||
335 | void pm_runtime_get_noresume(struct device *dev); | 337 | void pm_runtime_get_noresume(struct device *dev); |
@@ -367,22 +369,32 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
367 | pm_runtime_autosuspend(dev) and return its result | 369 | pm_runtime_autosuspend(dev) and return its result |
368 | 370 | ||
369 | void pm_runtime_enable(struct device *dev); | 371 | void pm_runtime_enable(struct device *dev); |
370 | - enable the run-time PM helper functions to run the device bus type's | 372 | - decrement the device's 'power.disable_depth' field; if that field is equal |
371 | run-time PM callbacks described in Section 2 | 373 | to zero, the runtime PM helper functions can execute subsystem-level |
374 | callbacks described in Section 2 for the device | ||
372 | 375 | ||
373 | int pm_runtime_disable(struct device *dev); | 376 | int pm_runtime_disable(struct device *dev); |
374 | - prevent the run-time PM helper functions from running subsystem-level | 377 | - increment the device's 'power.disable_depth' field (if the value of that |
375 | run-time PM callbacks for the device, make sure that all of the pending | 378 | field was previously zero, this prevents subsystem-level runtime PM |
376 | run-time PM operations on the device are either completed or canceled; | 379 | callbacks from being run for the device), make sure that all of the pending |
380 | runtime PM operations on the device are either completed or canceled; | ||
377 | returns 1 if there was a resume request pending and it was necessary to | 381 | returns 1 if there was a resume request pending and it was necessary to |
378 | execute the subsystem-level resume callback for the device to satisfy that | 382 | execute the subsystem-level resume callback for the device to satisfy that |
379 | request, otherwise 0 is returned | 383 | request, otherwise 0 is returned |
380 | 384 | ||
385 | int pm_runtime_barrier(struct device *dev); | ||
386 | - check if there's a resume request pending for the device and resume it | ||
387 | (synchronously) in that case, cancel any other pending runtime PM requests | ||
388 | regarding it and wait for all runtime PM operations on it in progress to | ||
389 | complete; returns 1 if there was a resume request pending and it was | ||
390 | necessary to execute the subsystem-level resume callback for the device to | ||
391 | satisfy that request, otherwise 0 is returned | ||
392 | |||
381 | void pm_suspend_ignore_children(struct device *dev, bool enable); | 393 | void pm_suspend_ignore_children(struct device *dev, bool enable); |
382 | - set/unset the power.ignore_children flag of the device | 394 | - set/unset the power.ignore_children flag of the device |
383 | 395 | ||
384 | int pm_runtime_set_active(struct device *dev); | 396 | int pm_runtime_set_active(struct device *dev); |
385 | - clear the device's 'power.runtime_error' flag, set the device's run-time | 397 | - clear the device's 'power.runtime_error' flag, set the device's runtime |
386 | PM status to 'active' and update its parent's counter of 'active' | 398 | PM status to 'active' and update its parent's counter of 'active' |
387 | children as appropriate (it is only valid to use this function if | 399 | children as appropriate (it is only valid to use this function if |
388 | 'power.runtime_error' is set or 'power.disable_depth' is greater than | 400 | 'power.runtime_error' is set or 'power.disable_depth' is greater than |
@@ -390,7 +402,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
390 | which is not active and the 'power.ignore_children' flag of which is unset | 402 | which is not active and the 'power.ignore_children' flag of which is unset |
391 | 403 | ||
392 | void pm_runtime_set_suspended(struct device *dev); | 404 | void pm_runtime_set_suspended(struct device *dev); |
393 | - clear the device's 'power.runtime_error' flag, set the device's run-time | 405 | - clear the device's 'power.runtime_error' flag, set the device's runtime |
394 | PM status to 'suspended' and update its parent's counter of 'active' | 406 | PM status to 'suspended' and update its parent's counter of 'active' |
395 | children as appropriate (it is only valid to use this function if | 407 | children as appropriate (it is only valid to use this function if |
396 | 'power.runtime_error' is set or 'power.disable_depth' is greater than | 408 | 'power.runtime_error' is set or 'power.disable_depth' is greater than |
@@ -400,6 +412,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
400 | - return true if the device's runtime PM status is 'suspended' and its | 412 | - return true if the device's runtime PM status is 'suspended' and its |
401 | 'power.disable_depth' field is equal to zero, or false otherwise | 413 | 'power.disable_depth' field is equal to zero, or false otherwise |
402 | 414 | ||
415 | bool pm_runtime_status_suspended(struct device *dev); | ||
416 | - return true if the device's runtime PM status is 'suspended' | ||
417 | |||
403 | void pm_runtime_allow(struct device *dev); | 418 | void pm_runtime_allow(struct device *dev); |
404 | - set the power.runtime_auto flag for the device and decrease its usage | 419 | - set the power.runtime_auto flag for the device and decrease its usage |
405 | counter (used by the /sys/devices/.../power/control interface to | 420 | counter (used by the /sys/devices/.../power/control interface to |
@@ -411,7 +426,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
411 | effectively prevent the device from being power managed at run time) | 426 | effectively prevent the device from being power managed at run time) |
412 | 427 | ||
413 | void pm_runtime_no_callbacks(struct device *dev); | 428 | void pm_runtime_no_callbacks(struct device *dev); |
414 | - set the power.no_callbacks flag for the device and remove the run-time | 429 | - set the power.no_callbacks flag for the device and remove the runtime |
415 | PM attributes from /sys/devices/.../power (or prevent them from being | 430 | PM attributes from /sys/devices/.../power (or prevent them from being |
416 | added when the device is registered) | 431 | added when the device is registered) |
417 | 432 | ||
@@ -431,7 +446,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
431 | 446 | ||
432 | void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); | 447 | void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); |
433 | - set the power.autosuspend_delay value to 'delay' (expressed in | 448 | - set the power.autosuspend_delay value to 'delay' (expressed in |
434 | milliseconds); if 'delay' is negative then run-time suspends are | 449 | milliseconds); if 'delay' is negative then runtime suspends are |
435 | prevented | 450 | prevented |
436 | 451 | ||
437 | unsigned long pm_runtime_autosuspend_expiration(struct device *dev); | 452 | unsigned long pm_runtime_autosuspend_expiration(struct device *dev); |
@@ -470,35 +485,35 @@ pm_runtime_resume() | |||
470 | pm_runtime_get_sync() | 485 | pm_runtime_get_sync() |
471 | pm_runtime_put_sync_suspend() | 486 | pm_runtime_put_sync_suspend() |
472 | 487 | ||
473 | 5. Run-time PM Initialization, Device Probing and Removal | 488 | 5. Runtime PM Initialization, Device Probing and Removal |
474 | 489 | ||
475 | Initially, the run-time PM is disabled for all devices, which means that the | 490 | Initially, the runtime PM is disabled for all devices, which means that the |
476 | majority of the run-time PM helper funtions described in Section 4 will return | 491 | majority of the runtime PM helper funtions described in Section 4 will return |
477 | -EAGAIN until pm_runtime_enable() is called for the device. | 492 | -EAGAIN until pm_runtime_enable() is called for the device. |
478 | 493 | ||
479 | In addition to that, the initial run-time PM status of all devices is | 494 | In addition to that, the initial runtime PM status of all devices is |
480 | 'suspended', but it need not reflect the actual physical state of the device. | 495 | 'suspended', but it need not reflect the actual physical state of the device. |
481 | Thus, if the device is initially active (i.e. it is able to process I/O), its | 496 | Thus, if the device is initially active (i.e. it is able to process I/O), its |
482 | run-time PM status must be changed to 'active', with the help of | 497 | runtime PM status must be changed to 'active', with the help of |
483 | pm_runtime_set_active(), before pm_runtime_enable() is called for the device. | 498 | pm_runtime_set_active(), before pm_runtime_enable() is called for the device. |
484 | 499 | ||
485 | However, if the device has a parent and the parent's run-time PM is enabled, | 500 | However, if the device has a parent and the parent's runtime PM is enabled, |
486 | calling pm_runtime_set_active() for the device will affect the parent, unless | 501 | calling pm_runtime_set_active() for the device will affect the parent, unless |
487 | the parent's 'power.ignore_children' flag is set. Namely, in that case the | 502 | the parent's 'power.ignore_children' flag is set. Namely, in that case the |
488 | parent won't be able to suspend at run time, using the PM core's helper | 503 | parent won't be able to suspend at run time, using the PM core's helper |
489 | functions, as long as the child's status is 'active', even if the child's | 504 | functions, as long as the child's status is 'active', even if the child's |
490 | run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for | 505 | runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for |
491 | the child yet or pm_runtime_disable() has been called for it). For this reason, | 506 | the child yet or pm_runtime_disable() has been called for it). For this reason, |
492 | once pm_runtime_set_active() has been called for the device, pm_runtime_enable() | 507 | once pm_runtime_set_active() has been called for the device, pm_runtime_enable() |
493 | should be called for it too as soon as reasonably possible or its run-time PM | 508 | should be called for it too as soon as reasonably possible or its runtime PM |
494 | status should be changed back to 'suspended' with the help of | 509 | status should be changed back to 'suspended' with the help of |
495 | pm_runtime_set_suspended(). | 510 | pm_runtime_set_suspended(). |
496 | 511 | ||
497 | If the default initial run-time PM status of the device (i.e. 'suspended') | 512 | If the default initial runtime PM status of the device (i.e. 'suspended') |
498 | reflects the actual state of the device, its bus type's or its driver's | 513 | reflects the actual state of the device, its bus type's or its driver's |
499 | ->probe() callback will likely need to wake it up using one of the PM core's | 514 | ->probe() callback will likely need to wake it up using one of the PM core's |
500 | helper functions described in Section 4. In that case, pm_runtime_resume() | 515 | helper functions described in Section 4. In that case, pm_runtime_resume() |
501 | should be used. Of course, for this purpose the device's run-time PM has to be | 516 | should be used. Of course, for this purpose the device's runtime PM has to be |
502 | enabled earlier by calling pm_runtime_enable(). | 517 | enabled earlier by calling pm_runtime_enable(). |
503 | 518 | ||
504 | If the device bus type's or driver's ->probe() callback runs | 519 | If the device bus type's or driver's ->probe() callback runs |
@@ -529,33 +544,33 @@ The user space can effectively disallow the driver of the device to power manage | |||
529 | it at run time by changing the value of its /sys/devices/.../power/control | 544 | it at run time by changing the value of its /sys/devices/.../power/control |
530 | attribute to "on", which causes pm_runtime_forbid() to be called. In principle, | 545 | attribute to "on", which causes pm_runtime_forbid() to be called. In principle, |
531 | this mechanism may also be used by the driver to effectively turn off the | 546 | this mechanism may also be used by the driver to effectively turn off the |
532 | run-time power management of the device until the user space turns it on. | 547 | runtime power management of the device until the user space turns it on. |
533 | Namely, during the initialization the driver can make sure that the run-time PM | 548 | Namely, during the initialization the driver can make sure that the runtime PM |
534 | status of the device is 'active' and call pm_runtime_forbid(). It should be | 549 | status of the device is 'active' and call pm_runtime_forbid(). It should be |
535 | noted, however, that if the user space has already intentionally changed the | 550 | noted, however, that if the user space has already intentionally changed the |
536 | value of /sys/devices/.../power/control to "auto" to allow the driver to power | 551 | value of /sys/devices/.../power/control to "auto" to allow the driver to power |
537 | manage the device at run time, the driver may confuse it by using | 552 | manage the device at run time, the driver may confuse it by using |
538 | pm_runtime_forbid() this way. | 553 | pm_runtime_forbid() this way. |
539 | 554 | ||
540 | 6. Run-time PM and System Sleep | 555 | 6. Runtime PM and System Sleep |
541 | 556 | ||
542 | Run-time PM and system sleep (i.e., system suspend and hibernation, also known | 557 | Runtime PM and system sleep (i.e., system suspend and hibernation, also known |
543 | as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of | 558 | as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of |
544 | ways. If a device is active when a system sleep starts, everything is | 559 | ways. If a device is active when a system sleep starts, everything is |
545 | straightforward. But what should happen if the device is already suspended? | 560 | straightforward. But what should happen if the device is already suspended? |
546 | 561 | ||
547 | The device may have different wake-up settings for run-time PM and system sleep. | 562 | The device may have different wake-up settings for runtime PM and system sleep. |
548 | For example, remote wake-up may be enabled for run-time suspend but disallowed | 563 | For example, remote wake-up may be enabled for runtime suspend but disallowed |
549 | for system sleep (device_may_wakeup(dev) returns 'false'). When this happens, | 564 | for system sleep (device_may_wakeup(dev) returns 'false'). When this happens, |
550 | the subsystem-level system suspend callback is responsible for changing the | 565 | the subsystem-level system suspend callback is responsible for changing the |
551 | device's wake-up setting (it may leave that to the device driver's system | 566 | device's wake-up setting (it may leave that to the device driver's system |
552 | suspend routine). It may be necessary to resume the device and suspend it again | 567 | suspend routine). It may be necessary to resume the device and suspend it again |
553 | in order to do so. The same is true if the driver uses different power levels | 568 | in order to do so. The same is true if the driver uses different power levels |
554 | or other settings for run-time suspend and system sleep. | 569 | or other settings for runtime suspend and system sleep. |
555 | 570 | ||
556 | During system resume, devices generally should be brought back to full power, | 571 | During system resume, the simplest approach is to bring all devices back to full |
557 | even if they were suspended before the system sleep began. There are several | 572 | power, even if they had been suspended before the system suspend began. There |
558 | reasons for this, including: | 573 | are several reasons for this, including: |
559 | 574 | ||
560 | * The device might need to switch power levels, wake-up settings, etc. | 575 | * The device might need to switch power levels, wake-up settings, etc. |
561 | 576 | ||
@@ -570,18 +585,50 @@ reasons for this, including: | |||
570 | * The device might need to be reset. | 585 | * The device might need to be reset. |
571 | 586 | ||
572 | * Even though the device was suspended, if its usage counter was > 0 then most | 587 | * Even though the device was suspended, if its usage counter was > 0 then most |
573 | likely it would need a run-time resume in the near future anyway. | 588 | likely it would need a runtime resume in the near future anyway. |
574 | |||
575 | * Always going back to full power is simplest. | ||
576 | 589 | ||
577 | If the device was suspended before the sleep began, then its run-time PM status | 590 | If the device had been suspended before the system suspend began and it's |
578 | will have to be updated to reflect the actual post-system sleep status. The way | 591 | brought back to full power during resume, then its runtime PM status will have |
579 | to do this is: | 592 | to be updated to reflect the actual post-system sleep status. The way to do |
593 | this is: | ||
580 | 594 | ||
581 | pm_runtime_disable(dev); | 595 | pm_runtime_disable(dev); |
582 | pm_runtime_set_active(dev); | 596 | pm_runtime_set_active(dev); |
583 | pm_runtime_enable(dev); | 597 | pm_runtime_enable(dev); |
584 | 598 | ||
599 | The PM core always increments the runtime usage counter before calling the | ||
600 | ->suspend() callback and decrements it after calling the ->resume() callback. | ||
601 | Hence disabling runtime PM temporarily like this will not cause any runtime | ||
602 | suspend attempts to be permanently lost. If the usage count goes to zero | ||
603 | following the return of the ->resume() callback, the ->runtime_idle() callback | ||
604 | will be invoked as usual. | ||
605 | |||
606 | On some systems, however, system sleep is not entered through a global firmware | ||
607 | or hardware operation. Instead, all hardware components are put into low-power | ||
608 | states directly by the kernel in a coordinated way. Then, the system sleep | ||
609 | state effectively follows from the states the hardware components end up in | ||
610 | and the system is woken up from that state by a hardware interrupt or a similar | ||
611 | mechanism entirely under the kernel's control. As a result, the kernel never | ||
612 | gives control away and the states of all devices during resume are precisely | ||
613 | known to it. If that is the case and none of the situations listed above takes | ||
614 | place (in particular, if the system is not waking up from hibernation), it may | ||
615 | be more efficient to leave the devices that had been suspended before the system | ||
616 | suspend began in the suspended state. | ||
617 | |||
618 | The PM core does its best to reduce the probability of race conditions between | ||
619 | the runtime PM and system suspend/resume (and hibernation) callbacks by carrying | ||
620 | out the following operations: | ||
621 | |||
622 | * During system suspend it calls pm_runtime_get_noresume() and | ||
623 | pm_runtime_barrier() for every device right before executing the | ||
624 | subsystem-level .suspend() callback for it. In addition to that it calls | ||
625 | pm_runtime_disable() for every device right after executing the | ||
626 | subsystem-level .suspend() callback for it. | ||
627 | |||
628 | * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync() | ||
629 | for every device right before and right after executing the subsystem-level | ||
630 | .resume() callback for it, respectively. | ||
631 | |||
585 | 7. Generic subsystem callbacks | 632 | 7. Generic subsystem callbacks |
586 | 633 | ||
587 | Subsystems may wish to conserve code space by using the set of generic power | 634 | Subsystems may wish to conserve code space by using the set of generic power |
@@ -606,40 +653,68 @@ driver/base/power/generic_ops.c: | |||
606 | callback provided by its driver and return its result, or return 0 if not | 653 | callback provided by its driver and return its result, or return 0 if not |
607 | defined | 654 | defined |
608 | 655 | ||
656 | int pm_generic_suspend_noirq(struct device *dev); | ||
657 | - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq() | ||
658 | callback provided by the device's driver and return its result, or return | ||
659 | 0 if not defined | ||
660 | |||
609 | int pm_generic_resume(struct device *dev); | 661 | int pm_generic_resume(struct device *dev); |
610 | - invoke the ->resume() callback provided by the driver of this device and, | 662 | - invoke the ->resume() callback provided by the driver of this device and, |
611 | if successful, change the device's runtime PM status to 'active' | 663 | if successful, change the device's runtime PM status to 'active' |
612 | 664 | ||
665 | int pm_generic_resume_noirq(struct device *dev); | ||
666 | - invoke the ->resume_noirq() callback provided by the driver of this device | ||
667 | |||
613 | int pm_generic_freeze(struct device *dev); | 668 | int pm_generic_freeze(struct device *dev); |
614 | - if the device has not been suspended at run time, invoke the ->freeze() | 669 | - if the device has not been suspended at run time, invoke the ->freeze() |
615 | callback provided by its driver and return its result, or return 0 if not | 670 | callback provided by its driver and return its result, or return 0 if not |
616 | defined | 671 | defined |
617 | 672 | ||
673 | int pm_generic_freeze_noirq(struct device *dev); | ||
674 | - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq() | ||
675 | callback provided by the device's driver and return its result, or return | ||
676 | 0 if not defined | ||
677 | |||
618 | int pm_generic_thaw(struct device *dev); | 678 | int pm_generic_thaw(struct device *dev); |
619 | - if the device has not been suspended at run time, invoke the ->thaw() | 679 | - if the device has not been suspended at run time, invoke the ->thaw() |
620 | callback provided by its driver and return its result, or return 0 if not | 680 | callback provided by its driver and return its result, or return 0 if not |
621 | defined | 681 | defined |
622 | 682 | ||
683 | int pm_generic_thaw_noirq(struct device *dev); | ||
684 | - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq() | ||
685 | callback provided by the device's driver and return its result, or return | ||
686 | 0 if not defined | ||
687 | |||
623 | int pm_generic_poweroff(struct device *dev); | 688 | int pm_generic_poweroff(struct device *dev); |
624 | - if the device has not been suspended at run time, invoke the ->poweroff() | 689 | - if the device has not been suspended at run time, invoke the ->poweroff() |
625 | callback provided by its driver and return its result, or return 0 if not | 690 | callback provided by its driver and return its result, or return 0 if not |
626 | defined | 691 | defined |
627 | 692 | ||
693 | int pm_generic_poweroff_noirq(struct device *dev); | ||
694 | - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq() | ||
695 | callback provided by the device's driver and return its result, or return | ||
696 | 0 if not defined | ||
697 | |||
628 | int pm_generic_restore(struct device *dev); | 698 | int pm_generic_restore(struct device *dev); |
629 | - invoke the ->restore() callback provided by the driver of this device and, | 699 | - invoke the ->restore() callback provided by the driver of this device and, |
630 | if successful, change the device's runtime PM status to 'active' | 700 | if successful, change the device's runtime PM status to 'active' |
631 | 701 | ||
702 | int pm_generic_restore_noirq(struct device *dev); | ||
703 | - invoke the ->restore_noirq() callback provided by the device's driver | ||
704 | |||
632 | These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(), | 705 | These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(), |
633 | ->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(), | 706 | ->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(), |
634 | or ->restore() callback pointers in the subsystem-level dev_pm_ops structures. | 707 | ->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(), |
708 | ->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() callback | ||
709 | pointers in the subsystem-level dev_pm_ops structures. | ||
635 | 710 | ||
636 | If a subsystem wishes to use all of them at the same time, it can simply assign | 711 | If a subsystem wishes to use all of them at the same time, it can simply assign |
637 | the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its | 712 | the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its |
638 | dev_pm_ops structure pointer. | 713 | dev_pm_ops structure pointer. |
639 | 714 | ||
640 | Device drivers that wish to use the same function as a system suspend, freeze, | 715 | Device drivers that wish to use the same function as a system suspend, freeze, |
641 | poweroff and run-time suspend callback, and similarly for system resume, thaw, | 716 | poweroff and runtime suspend callback, and similarly for system resume, thaw, |
642 | restore, and run-time resume, can achieve this with the help of the | 717 | restore, and runtime resume, can achieve this with the help of the |
643 | UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its | 718 | UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its |
644 | last argument to NULL). | 719 | last argument to NULL). |
645 | 720 | ||
@@ -649,7 +724,7 @@ Some "devices" are only logical sub-devices of their parent and cannot be | |||
649 | power-managed on their own. (The prototype example is a USB interface. Entire | 724 | power-managed on their own. (The prototype example is a USB interface. Entire |
650 | USB devices can go into low-power mode or send wake-up requests, but neither is | 725 | USB devices can go into low-power mode or send wake-up requests, but neither is |
651 | possible for individual interfaces.) The drivers for these devices have no | 726 | possible for individual interfaces.) The drivers for these devices have no |
652 | need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend() | 727 | need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend() |
653 | and ->runtime_resume() would always return 0 without doing anything else and | 728 | and ->runtime_resume() would always return 0 without doing anything else and |
654 | ->runtime_idle() would always call pm_runtime_suspend(). | 729 | ->runtime_idle() would always call pm_runtime_suspend(). |
655 | 730 | ||
@@ -657,7 +732,7 @@ Subsystems can tell the PM core about these devices by calling | |||
657 | pm_runtime_no_callbacks(). This should be done after the device structure is | 732 | pm_runtime_no_callbacks(). This should be done after the device structure is |
658 | initialized and before it is registered (although after device registration is | 733 | initialized and before it is registered (although after device registration is |
659 | also okay). The routine will set the device's power.no_callbacks flag and | 734 | also okay). The routine will set the device's power.no_callbacks flag and |
660 | prevent the non-debugging run-time PM sysfs attributes from being created. | 735 | prevent the non-debugging runtime PM sysfs attributes from being created. |
661 | 736 | ||
662 | When power.no_callbacks is set, the PM core will not invoke the | 737 | When power.no_callbacks is set, the PM core will not invoke the |
663 | ->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks. | 738 | ->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks. |
@@ -665,7 +740,7 @@ Instead it will assume that suspends and resumes always succeed and that idle | |||
665 | devices should be suspended. | 740 | devices should be suspended. |
666 | 741 | ||
667 | As a consequence, the PM core will never directly inform the device's subsystem | 742 | As a consequence, the PM core will never directly inform the device's subsystem |
668 | or driver about run-time power changes. Instead, the driver for the device's | 743 | or driver about runtime power changes. Instead, the driver for the device's |
669 | parent must take responsibility for telling the device's driver when the | 744 | parent must take responsibility for telling the device's driver when the |
670 | parent's power state changes. | 745 | parent's power state changes. |
671 | 746 | ||
@@ -676,13 +751,13 @@ A device should be put in a low-power state only when there's some reason to | |||
676 | think it will remain in that state for a substantial time. A common heuristic | 751 | think it will remain in that state for a substantial time. A common heuristic |
677 | says that a device which hasn't been used for a while is liable to remain | 752 | says that a device which hasn't been used for a while is liable to remain |
678 | unused; following this advice, drivers should not allow devices to be suspended | 753 | unused; following this advice, drivers should not allow devices to be suspended |
679 | at run-time until they have been inactive for some minimum period. Even when | 754 | at runtime until they have been inactive for some minimum period. Even when |
680 | the heuristic ends up being non-optimal, it will still prevent devices from | 755 | the heuristic ends up being non-optimal, it will still prevent devices from |
681 | "bouncing" too rapidly between low-power and full-power states. | 756 | "bouncing" too rapidly between low-power and full-power states. |
682 | 757 | ||
683 | The term "autosuspend" is an historical remnant. It doesn't mean that the | 758 | The term "autosuspend" is an historical remnant. It doesn't mean that the |
684 | device is automatically suspended (the subsystem or driver still has to call | 759 | device is automatically suspended (the subsystem or driver still has to call |
685 | the appropriate PM routines); rather it means that run-time suspends will | 760 | the appropriate PM routines); rather it means that runtime suspends will |
686 | automatically be delayed until the desired period of inactivity has elapsed. | 761 | automatically be delayed until the desired period of inactivity has elapsed. |
687 | 762 | ||
688 | Inactivity is determined based on the power.last_busy field. Drivers should | 763 | Inactivity is determined based on the power.last_busy field. Drivers should |
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt index 19f8278c3854..8d32d85a5234 100644 --- a/Documentation/rbtree.txt +++ b/Documentation/rbtree.txt | |||
@@ -196,15 +196,20 @@ Support for Augmented rbtrees | |||
196 | Augmented rbtree is an rbtree with "some" additional data stored in each node. | 196 | Augmented rbtree is an rbtree with "some" additional data stored in each node. |
197 | This data can be used to augment some new functionality to rbtree. | 197 | This data can be used to augment some new functionality to rbtree. |
198 | Augmented rbtree is an optional feature built on top of basic rbtree | 198 | Augmented rbtree is an optional feature built on top of basic rbtree |
199 | infrastructure. rbtree user who wants this feature will have an augment | 199 | infrastructure. An rbtree user who wants this feature will have to call the |
200 | callback function in rb_root initialized. | 200 | augmentation functions with the user provided augmentation callback |
201 | 201 | when inserting and erasing nodes. | |
202 | This callback function will be called from rbtree core routines whenever | 202 | |
203 | a node has a change in one or both of its children. It is the responsibility | 203 | On insertion, the user must call rb_augment_insert() once the new node is in |
204 | of the callback function to recalculate the additional data that is in the | 204 | place. This will cause the augmentation function callback to be called for |
205 | rb node using new children information. Note that if this new additional | 205 | each node between the new node and the root which has been affected by the |
206 | data affects the parent node's additional data, then callback function has | 206 | insertion. |
207 | to handle it and do the recursive updates. | 207 | |
208 | When erasing a node, the user must call rb_augment_erase_begin() first to | ||
209 | retrieve the deepest node on the rebalance path. Then, after erasing the | ||
210 | original node, the user must call rb_augment_erase_end() with the deepest | ||
211 | node found earlier. This will cause the augmentation function to be called | ||
212 | for each affected node between the deepest node and the root. | ||
208 | 213 | ||
209 | 214 | ||
210 | Interval tree is an example of augmented rb tree. Reference - | 215 | Interval tree is an example of augmented rb tree. Reference - |
diff --git a/Documentation/s390/TAPE b/Documentation/s390/TAPE deleted file mode 100644 index c639aa5603ff..000000000000 --- a/Documentation/s390/TAPE +++ /dev/null | |||
@@ -1,122 +0,0 @@ | |||
1 | Channel attached Tape device driver | ||
2 | |||
3 | -----------------------------WARNING----------------------------------------- | ||
4 | This driver is considered to be EXPERIMENTAL. Do NOT use it in | ||
5 | production environments. Feel free to test it and report problems back to us. | ||
6 | ----------------------------------------------------------------------------- | ||
7 | |||
8 | The LINUX for zSeries tape device driver manages channel attached tape drives | ||
9 | which are compatible to IBM 3480 or IBM 3490 magnetic tape subsystems. This | ||
10 | includes various models of these devices (for example the 3490E). | ||
11 | |||
12 | |||
13 | Tape driver features | ||
14 | |||
15 | The device driver supports a maximum of 128 tape devices. | ||
16 | No official LINUX device major number is assigned to the zSeries tape device | ||
17 | driver. It allocates major numbers dynamically and reports them on system | ||
18 | startup. | ||
19 | Typically it will get major number 254 for both the character device front-end | ||
20 | and the block device front-end. | ||
21 | |||
22 | The tape device driver needs no kernel parameters. All supported devices | ||
23 | present are detected on driver initialization at system startup or module load. | ||
24 | The devices detected are ordered by their subchannel numbers. The device with | ||
25 | the lowest subchannel number becomes device 0, the next one will be device 1 | ||
26 | and so on. | ||
27 | |||
28 | |||
29 | Tape character device front-end | ||
30 | |||
31 | The usual way to read or write to the tape device is through the character | ||
32 | device front-end. The zSeries tape device driver provides two character devices | ||
33 | for each physical device -- the first of these will rewind automatically when | ||
34 | it is closed, the second will not rewind automatically. | ||
35 | |||
36 | The character device nodes are named /dev/rtibm0 (rewinding) and /dev/ntibm0 | ||
37 | (non-rewinding) for the first device, /dev/rtibm1 and /dev/ntibm1 for the | ||
38 | second, and so on. | ||
39 | |||
40 | The character device front-end can be used as any other LINUX tape device. You | ||
41 | can write to it and read from it using LINUX facilities such as GNU tar. The | ||
42 | tool mt can be used to perform control operations, such as rewinding the tape | ||
43 | or skipping a file. | ||
44 | |||
45 | Most LINUX tape software should work with either tape character device. | ||
46 | |||
47 | |||
48 | Tape block device front-end | ||
49 | |||
50 | The tape device may also be accessed as a block device in read-only mode. | ||
51 | This could be used for software installation in the same way as it is used with | ||
52 | other operation systems on the zSeries platform (and most LINUX | ||
53 | distributions are shipped on compact disk using ISO9660 filesystems). | ||
54 | |||
55 | One block device node is provided for each physical device. These are named | ||
56 | /dev/btibm0 for the first device, /dev/btibm1 for the second and so on. | ||
57 | You should only use the ISO9660 filesystem on LINUX for zSeries tapes because | ||
58 | the physical tape devices cannot perform fast seeks and the ISO9660 system is | ||
59 | optimized for this situation. | ||
60 | |||
61 | |||
62 | Tape block device example | ||
63 | |||
64 | In this example a tape with an ISO9660 filesystem is created using the first | ||
65 | tape device. ISO9660 filesystem support must be built into your system kernel | ||
66 | for this. | ||
67 | The mt command is used to issue tape commands and the mkisofs command to | ||
68 | create an ISO9660 filesystem: | ||
69 | |||
70 | - create a LINUX directory (somedir) with the contents of the filesystem | ||
71 | mkdir somedir | ||
72 | cp contents somedir | ||
73 | |||
74 | - insert a tape | ||
75 | |||
76 | - ensure the tape is at the beginning | ||
77 | mt -f /dev/ntibm0 rewind | ||
78 | |||
79 | - set the blocksize of the character driver. The blocksize 2048 bytes | ||
80 | is commonly used on ISO9660 CD-Roms | ||
81 | mt -f /dev/ntibm0 setblk 2048 | ||
82 | |||
83 | - write the filesystem to the character device driver | ||
84 | mkisofs -o /dev/ntibm0 somedir | ||
85 | |||
86 | - rewind the tape again | ||
87 | mt -f /dev/ntibm0 rewind | ||
88 | |||
89 | - Now you can mount your new filesystem as a block device: | ||
90 | mount -t iso9660 -o ro,block=2048 /dev/btibm0 /mnt | ||
91 | |||
92 | TODO List | ||
93 | |||
94 | - Driver has to be stabilized still | ||
95 | |||
96 | BUGS | ||
97 | |||
98 | This driver is considered BETA, which means some weaknesses may still | ||
99 | be in it. | ||
100 | If an error occurs which cannot be handled by the code you will get a | ||
101 | sense-data dump.In that case please do the following: | ||
102 | |||
103 | 1. set the tape driver debug level to maximum: | ||
104 | echo 6 >/proc/s390dbf/tape/level | ||
105 | |||
106 | 2. re-perform the actions which produced the bug. (Hopefully the bug will | ||
107 | reappear.) | ||
108 | |||
109 | 3. get a snapshot from the debug-feature: | ||
110 | cat /proc/s390dbf/tape/hex_ascii >somefile | ||
111 | |||
112 | 4. Now put the snapshot together with a detailed description of the situation | ||
113 | that led to the bug: | ||
114 | - Which tool did you use? | ||
115 | - Which hardware do you have? | ||
116 | - Was your tape unit online? | ||
117 | - Is it a shared tape unit? | ||
118 | |||
119 | 5. Send an email with your bug report to: | ||
120 | mailto:Linux390@de.ibm.com | ||
121 | |||
122 | |||
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt index d43dbcbd163b..28aa1075e291 100644 --- a/Documentation/scheduler/sched-arch.txt +++ b/Documentation/scheduler/sched-arch.txt | |||
@@ -66,7 +66,7 @@ Your cpu_idle routines need to obey the following rules: | |||
66 | barrier issued (followed by a test of need_resched with | 66 | barrier issued (followed by a test of need_resched with |
67 | interrupts disabled, as explained in 3). | 67 | interrupts disabled, as explained in 3). |
68 | 68 | ||
69 | arch/i386/kernel/process.c has examples of both polling and | 69 | arch/x86/kernel/process.c has examples of both polling and |
70 | sleeping idle functions. | 70 | sleeping idle functions. |
71 | 71 | ||
72 | 72 | ||
diff --git a/Documentation/scsi/BusLogic.txt b/Documentation/scsi/BusLogic.txt index d7fbc9488b98..48e982cd6fe7 100644 --- a/Documentation/scsi/BusLogic.txt +++ b/Documentation/scsi/BusLogic.txt | |||
@@ -553,7 +553,7 @@ replacing "/usr/src" with wherever you keep your Linux kernel source tree: | |||
553 | make config | 553 | make config |
554 | make zImage | 554 | make zImage |
555 | 555 | ||
556 | Then install "arch/i386/boot/zImage" as your standard kernel, run lilo if | 556 | Then install "arch/x86/boot/zImage" as your standard kernel, run lilo if |
557 | appropriate, and reboot. | 557 | appropriate, and reboot. |
558 | 558 | ||
559 | 559 | ||
diff --git a/Documentation/serial/computone.txt b/Documentation/serial/computone.txt index c57ea4781e5d..60a6f657c37d 100644 --- a/Documentation/serial/computone.txt +++ b/Documentation/serial/computone.txt | |||
@@ -87,7 +87,7 @@ c) Set address on ISA cards then: | |||
87 | edit /usr/src/linux/drivers/char/ip2.c | 87 | edit /usr/src/linux/drivers/char/ip2.c |
88 | (Optional - may be specified on kernel command line now) | 88 | (Optional - may be specified on kernel command line now) |
89 | d) Run "make zImage" or whatever target you prefer. | 89 | d) Run "make zImage" or whatever target you prefer. |
90 | e) mv /usr/src/linux/arch/i386/boot/zImage to /boot. | 90 | e) mv /usr/src/linux/arch/x86/boot/zImage to /boot. |
91 | f) Add new config for this kernel into /etc/lilo.conf, run "lilo" | 91 | f) Add new config for this kernel into /etc/lilo.conf, run "lilo" |
92 | or copy to a floppy disk and boot from that floppy disk. | 92 | or copy to a floppy disk and boot from that floppy disk. |
93 | g) Reboot using this kernel | 93 | g) Reboot using this kernel |
diff --git a/Documentation/sound/alsa/HD-Audio-Controls.txt b/Documentation/sound/alsa/HD-Audio-Controls.txt new file mode 100644 index 000000000000..1482035243e6 --- /dev/null +++ b/Documentation/sound/alsa/HD-Audio-Controls.txt | |||
@@ -0,0 +1,100 @@ | |||
1 | This file explains the codec-specific mixer controls. | ||
2 | |||
3 | Realtek codecs | ||
4 | -------------- | ||
5 | |||
6 | * Channel Mode | ||
7 | This is an enum control to change the surround-channel setup, | ||
8 | appears only when the surround channels are available. | ||
9 | It gives the number of channels to be used, "2ch", "4ch", "6ch", | ||
10 | and "8ch". According to the configuration, this also controls the | ||
11 | jack-retasking of multi-I/O jacks. | ||
12 | |||
13 | * Auto-Mute Mode | ||
14 | This is an enum control to change the auto-mute behavior of the | ||
15 | headphone and line-out jacks. If built-in speakers and headphone | ||
16 | and/or line-out jacks are available on a machine, this controls | ||
17 | appears. | ||
18 | When there are only either headphones or line-out jacks, it gives | ||
19 | "Disabled" and "Enabled" state. When enabled, the speaker is muted | ||
20 | automatically when a jack is plugged. | ||
21 | |||
22 | When both headphone and line-out jacks are present, it gives | ||
23 | "Disabled", "Speaker Only" and "Line-Out+Speaker". When | ||
24 | speaker-only is chosen, plugging into a headphone or a line-out jack | ||
25 | mutes the speakers, but not line-outs. When line-out+speaker is | ||
26 | selected, plugging to a headphone jack mutes both speakers and | ||
27 | line-outs. | ||
28 | |||
29 | |||
30 | IDT/Sigmatel codecs | ||
31 | ------------------- | ||
32 | |||
33 | * Analog Loopback | ||
34 | This control enables/disables the analog-loopback circuit. This | ||
35 | appears only when "loopback" is set to true in a codec hint | ||
36 | (see HD-Audio.txt). Note that on some codecs the analog-loopback | ||
37 | and the normal PCM playback are exclusive, i.e. when this is on, you | ||
38 | won't hear any PCM stream. | ||
39 | |||
40 | * Swap Center/LFE | ||
41 | Swaps the center and LFE channel order. Normally, the left | ||
42 | corresponds to the center and the right to the LFE. When this is | ||
43 | ON, the left to the LFE and the right to the center. | ||
44 | |||
45 | * Headphone as Line Out | ||
46 | When this control is ON, treat the headphone jacks as line-out | ||
47 | jacks. That is, the headphone won't auto-mute the other line-outs, | ||
48 | and no HP-amp is set to the pins. | ||
49 | |||
50 | * Mic Jack Mode, Line Jack Mode, etc | ||
51 | These enum controls the direction and the bias of the input jack | ||
52 | pins. Depending on the jack type, it can set as "Mic In" and "Line | ||
53 | In", for determining the input bias, or it can be set to "Line Out" | ||
54 | when the pin is a multi-I/O jack for surround channels. | ||
55 | |||
56 | |||
57 | VIA codecs | ||
58 | ---------- | ||
59 | |||
60 | * Smart 5.1 | ||
61 | An enum control to re-task the multi-I/O jacks for surround outputs. | ||
62 | When it's ON, the corresponding input jacks (usually a line-in and a | ||
63 | mic-in) are switched as the surround and the CLFE output jacks. | ||
64 | |||
65 | * Independent HP | ||
66 | When this enum control is enabled, the headphone output is routed | ||
67 | from an individual stream (the third PCM such as hw:0,2) instead of | ||
68 | the primary stream. In the case the headphone DAC is shared with a | ||
69 | side or a CLFE-channel DAC, the DAC is switched to the headphone | ||
70 | automatically. | ||
71 | |||
72 | * Loopback Mixing | ||
73 | An enum control to determine whether the analog-loopback route is | ||
74 | enabled or not. When it's enabled, the analog-loopback is mixed to | ||
75 | the front-channel. Also, the same route is used for the headphone | ||
76 | and speaker outputs. As a side-effect, when this mode is set, the | ||
77 | individual volume controls will be no longer available for | ||
78 | headphones and speakers because there is only one DAC connected to a | ||
79 | mixer widget. | ||
80 | |||
81 | * Dynamic Power-Control | ||
82 | This control determines whether the dynamic power-control per jack | ||
83 | detection is enabled or not. When enabled, the widgets power state | ||
84 | (D0/D3) are changed dynamically depending on the jack plugging | ||
85 | state for saving power consumptions. However, if your system | ||
86 | doesn't provide a proper jack-detection, this won't work; in such a | ||
87 | case, turn this control OFF. | ||
88 | |||
89 | * Jack Detect | ||
90 | This control is provided only for VT1708 codec which gives no proper | ||
91 | unsolicited event per jack plug. When this is on, the driver polls | ||
92 | the jack detection so that the headphone auto-mute can work, while | ||
93 | turning this off would reduce the power consumption. | ||
94 | |||
95 | |||
96 | Conexant codecs | ||
97 | --------------- | ||
98 | |||
99 | * Auto-Mute Mode | ||
100 | See Reatek codecs. | ||
diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi index 6325f5b48635..d8eb01c15db1 100644 --- a/Documentation/spi/ep93xx_spi +++ b/Documentation/spi/ep93xx_spi | |||
@@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void) | |||
88 | ARRAY_SIZE(ts72xx_spi_devices)); | 88 | ARRAY_SIZE(ts72xx_spi_devices)); |
89 | } | 89 | } |
90 | 90 | ||
91 | The driver can use DMA for the transfers also. In this case ts72xx_spi_info | ||
92 | becomes: | ||
93 | |||
94 | static struct ep93xx_spi_info ts72xx_spi_info = { | ||
95 | .num_chipselect = ARRAY_SIZE(ts72xx_spi_devices), | ||
96 | .use_dma = true; | ||
97 | }; | ||
98 | |||
99 | Note that CONFIG_EP93XX_DMA should be enabled as well. | ||
100 | |||
91 | Thanks to | 101 | Thanks to |
92 | ========= | 102 | ========= |
93 | Martin Guy, H. Hartley Sweeten and others who helped me during development of | 103 | Martin Guy, H. Hartley Sweeten and others who helped me during development of |
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx index 493dada57372..00511e08db78 100644 --- a/Documentation/spi/pxa2xx +++ b/Documentation/spi/pxa2xx | |||
@@ -22,15 +22,11 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a | |||
22 | found in include/linux/spi/pxa2xx_spi.h: | 22 | found in include/linux/spi/pxa2xx_spi.h: |
23 | 23 | ||
24 | struct pxa2xx_spi_master { | 24 | struct pxa2xx_spi_master { |
25 | enum pxa_ssp_type ssp_type; | ||
26 | u32 clock_enable; | 25 | u32 clock_enable; |
27 | u16 num_chipselect; | 26 | u16 num_chipselect; |
28 | u8 enable_dma; | 27 | u8 enable_dma; |
29 | }; | 28 | }; |
30 | 29 | ||
31 | The "pxa2xx_spi_master.ssp_type" field must have a value between 1 and 3 and | ||
32 | informs the driver which features a particular SSP supports. | ||
33 | |||
34 | The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the | 30 | The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the |
35 | corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See | 31 | corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See |
36 | the "PXA2xx Developer Manual" section "Clocks and Power Management". | 32 | the "PXA2xx Developer Manual" section "Clocks and Power Management". |
@@ -61,7 +57,6 @@ static struct resource pxa_spi_nssp_resources[] = { | |||
61 | }; | 57 | }; |
62 | 58 | ||
63 | static struct pxa2xx_spi_master pxa_nssp_master_info = { | 59 | static struct pxa2xx_spi_master pxa_nssp_master_info = { |
64 | .ssp_type = PXA25x_NSSP, /* Type of SSP */ | ||
65 | .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */ | 60 | .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */ |
66 | .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ | 61 | .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ |
67 | .enable_dma = 1, /* Enables NSSP DMA */ | 62 | .enable_dma = 1, /* Enables NSSP DMA */ |
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 5e7cb39ad195..1c7fb0a94e28 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -17,23 +17,21 @@ before actually making adjustments. | |||
17 | 17 | ||
18 | Currently, these files might (depending on your configuration) | 18 | Currently, these files might (depending on your configuration) |
19 | show up in /proc/sys/kernel: | 19 | show up in /proc/sys/kernel: |
20 | - acpi_video_flags | 20 | |
21 | - acct | 21 | - acct |
22 | - acpi_video_flags | ||
23 | - auto_msgmni | ||
22 | - bootloader_type [ X86 only ] | 24 | - bootloader_type [ X86 only ] |
23 | - bootloader_version [ X86 only ] | 25 | - bootloader_version [ X86 only ] |
24 | - callhome [ S390 only ] | 26 | - callhome [ S390 only ] |
25 | - auto_msgmni | ||
26 | - core_pattern | 27 | - core_pattern |
27 | - core_pipe_limit | 28 | - core_pipe_limit |
28 | - core_uses_pid | 29 | - core_uses_pid |
29 | - ctrl-alt-del | 30 | - ctrl-alt-del |
30 | - dentry-state | ||
31 | - dmesg_restrict | 31 | - dmesg_restrict |
32 | - domainname | 32 | - domainname |
33 | - hostname | 33 | - hostname |
34 | - hotplug | 34 | - hotplug |
35 | - java-appletviewer [ binfmt_java, obsolete ] | ||
36 | - java-interpreter [ binfmt_java, obsolete ] | ||
37 | - kptr_restrict | 35 | - kptr_restrict |
38 | - kstack_depth_to_print [ X86 only ] | 36 | - kstack_depth_to_print [ X86 only ] |
39 | - l2cr [ PPC only ] | 37 | - l2cr [ PPC only ] |
@@ -48,10 +46,14 @@ show up in /proc/sys/kernel: | |||
48 | - overflowgid | 46 | - overflowgid |
49 | - overflowuid | 47 | - overflowuid |
50 | - panic | 48 | - panic |
49 | - panic_on_oops | ||
50 | - panic_on_unrecovered_nmi | ||
51 | - pid_max | 51 | - pid_max |
52 | - powersave-nap [ PPC only ] | 52 | - powersave-nap [ PPC only ] |
53 | - panic_on_unrecovered_nmi | ||
54 | - printk | 53 | - printk |
54 | - printk_delay | ||
55 | - printk_ratelimit | ||
56 | - printk_ratelimit_burst | ||
55 | - randomize_va_space | 57 | - randomize_va_space |
56 | - real-root-dev ==> Documentation/initrd.txt | 58 | - real-root-dev ==> Documentation/initrd.txt |
57 | - reboot-cmd [ SPARC only ] | 59 | - reboot-cmd [ SPARC only ] |
@@ -62,6 +64,7 @@ show up in /proc/sys/kernel: | |||
62 | - shmall | 64 | - shmall |
63 | - shmmax [ sysv ipc ] | 65 | - shmmax [ sysv ipc ] |
64 | - shmmni | 66 | - shmmni |
67 | - softlockup_thresh | ||
65 | - stop-a [ SPARC only ] | 68 | - stop-a [ SPARC only ] |
66 | - sysrq ==> Documentation/sysrq.txt | 69 | - sysrq ==> Documentation/sysrq.txt |
67 | - tainted | 70 | - tainted |
@@ -71,15 +74,6 @@ show up in /proc/sys/kernel: | |||
71 | 74 | ||
72 | ============================================================== | 75 | ============================================================== |
73 | 76 | ||
74 | acpi_video_flags: | ||
75 | |||
76 | flags | ||
77 | |||
78 | See Doc*/kernel/power/video.txt, it allows mode of video boot to be | ||
79 | set during run time. | ||
80 | |||
81 | ============================================================== | ||
82 | |||
83 | acct: | 77 | acct: |
84 | 78 | ||
85 | highwater lowwater frequency | 79 | highwater lowwater frequency |
@@ -97,6 +91,25 @@ valid for 30 seconds. | |||
97 | 91 | ||
98 | ============================================================== | 92 | ============================================================== |
99 | 93 | ||
94 | acpi_video_flags: | ||
95 | |||
96 | flags | ||
97 | |||
98 | See Doc*/kernel/power/video.txt, it allows mode of video boot to be | ||
99 | set during run time. | ||
100 | |||
101 | ============================================================== | ||
102 | |||
103 | auto_msgmni: | ||
104 | |||
105 | Enables/Disables automatic recomputing of msgmni upon memory add/remove | ||
106 | or upon ipc namespace creation/removal (see the msgmni description | ||
107 | above). Echoing "1" into this file enables msgmni automatic recomputing. | ||
108 | Echoing "0" turns it off. auto_msgmni default value is 1. | ||
109 | |||
110 | |||
111 | ============================================================== | ||
112 | |||
100 | bootloader_type: | 113 | bootloader_type: |
101 | 114 | ||
102 | x86 bootloader identification | 115 | x86 bootloader identification |
@@ -172,22 +185,24 @@ core_pattern is used to specify a core dumpfile pattern name. | |||
172 | 185 | ||
173 | core_pipe_limit: | 186 | core_pipe_limit: |
174 | 187 | ||
175 | This sysctl is only applicable when core_pattern is configured to pipe core | 188 | This sysctl is only applicable when core_pattern is configured to pipe |
176 | files to a user space helper (when the first character of core_pattern is a '|', | 189 | core files to a user space helper (when the first character of |
177 | see above). When collecting cores via a pipe to an application, it is | 190 | core_pattern is a '|', see above). When collecting cores via a pipe |
178 | occasionally useful for the collecting application to gather data about the | 191 | to an application, it is occasionally useful for the collecting |
179 | crashing process from its /proc/pid directory. In order to do this safely, the | 192 | application to gather data about the crashing process from its |
180 | kernel must wait for the collecting process to exit, so as not to remove the | 193 | /proc/pid directory. In order to do this safely, the kernel must wait |
181 | crashing processes proc files prematurely. This in turn creates the possibility | 194 | for the collecting process to exit, so as not to remove the crashing |
182 | that a misbehaving userspace collecting process can block the reaping of a | 195 | processes proc files prematurely. This in turn creates the |
183 | crashed process simply by never exiting. This sysctl defends against that. It | 196 | possibility that a misbehaving userspace collecting process can block |
184 | defines how many concurrent crashing processes may be piped to user space | 197 | the reaping of a crashed process simply by never exiting. This sysctl |
185 | applications in parallel. If this value is exceeded, then those crashing | 198 | defends against that. It defines how many concurrent crashing |
186 | processes above that value are noted via the kernel log and their cores are | 199 | processes may be piped to user space applications in parallel. If |
187 | skipped. 0 is a special value, indicating that unlimited processes may be | 200 | this value is exceeded, then those crashing processes above that value |
188 | captured in parallel, but that no waiting will take place (i.e. the collecting | 201 | are noted via the kernel log and their cores are skipped. 0 is a |
189 | process is not guaranteed access to /proc/<crashing pid>/). This value defaults | 202 | special value, indicating that unlimited processes may be captured in |
190 | to 0. | 203 | parallel, but that no waiting will take place (i.e. the collecting |
204 | process is not guaranteed access to /proc/<crashing pid>/). This | ||
205 | value defaults to 0. | ||
191 | 206 | ||
192 | ============================================================== | 207 | ============================================================== |
193 | 208 | ||
@@ -218,14 +233,14 @@ to decide what to do with it. | |||
218 | 233 | ||
219 | dmesg_restrict: | 234 | dmesg_restrict: |
220 | 235 | ||
221 | This toggle indicates whether unprivileged users are prevented from using | 236 | This toggle indicates whether unprivileged users are prevented |
222 | dmesg(8) to view messages from the kernel's log buffer. When | 237 | from using dmesg(8) to view messages from the kernel's log buffer. |
223 | dmesg_restrict is set to (0) there are no restrictions. When | 238 | When dmesg_restrict is set to (0) there are no restrictions. When |
224 | dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use | 239 | dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use |
225 | dmesg(8). | 240 | dmesg(8). |
226 | 241 | ||
227 | The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the default | 242 | The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the |
228 | value of dmesg_restrict. | 243 | default value of dmesg_restrict. |
229 | 244 | ||
230 | ============================================================== | 245 | ============================================================== |
231 | 246 | ||
@@ -256,13 +271,6 @@ Default value is "/sbin/hotplug". | |||
256 | 271 | ||
257 | ============================================================== | 272 | ============================================================== |
258 | 273 | ||
259 | l2cr: (PPC only) | ||
260 | |||
261 | This flag controls the L2 cache of G3 processor boards. If | ||
262 | 0, the cache is disabled. Enabled if nonzero. | ||
263 | |||
264 | ============================================================== | ||
265 | |||
266 | kptr_restrict: | 274 | kptr_restrict: |
267 | 275 | ||
268 | This toggle indicates whether restrictions are placed on | 276 | This toggle indicates whether restrictions are placed on |
@@ -283,6 +291,13 @@ kernel stack. | |||
283 | 291 | ||
284 | ============================================================== | 292 | ============================================================== |
285 | 293 | ||
294 | l2cr: (PPC only) | ||
295 | |||
296 | This flag controls the L2 cache of G3 processor boards. If | ||
297 | 0, the cache is disabled. Enabled if nonzero. | ||
298 | |||
299 | ============================================================== | ||
300 | |||
286 | modules_disabled: | 301 | modules_disabled: |
287 | 302 | ||
288 | A toggle value indicating if modules are allowed to be loaded | 303 | A toggle value indicating if modules are allowed to be loaded |
@@ -293,6 +308,21 @@ to false. | |||
293 | 308 | ||
294 | ============================================================== | 309 | ============================================================== |
295 | 310 | ||
311 | nmi_watchdog: | ||
312 | |||
313 | Enables/Disables the NMI watchdog on x86 systems. When the value is | ||
314 | non-zero the NMI watchdog is enabled and will continuously test all | ||
315 | online cpus to determine whether or not they are still functioning | ||
316 | properly. Currently, passing "nmi_watchdog=" parameter at boot time is | ||
317 | required for this function to work. | ||
318 | |||
319 | If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel | ||
320 | parameter), the NMI watchdog shares registers with oprofile. By | ||
321 | disabling the NMI watchdog, oprofile may have more registers to | ||
322 | utilize. | ||
323 | |||
324 | ============================================================== | ||
325 | |||
296 | osrelease, ostype & version: | 326 | osrelease, ostype & version: |
297 | 327 | ||
298 | # cat osrelease | 328 | # cat osrelease |
@@ -312,10 +342,10 @@ The only way to tune these values is to rebuild the kernel :-) | |||
312 | 342 | ||
313 | overflowgid & overflowuid: | 343 | overflowgid & overflowuid: |
314 | 344 | ||
315 | if your architecture did not always support 32-bit UIDs (i.e. arm, i386, | 345 | if your architecture did not always support 32-bit UIDs (i.e. arm, |
316 | m68k, sh, and sparc32), a fixed UID and GID will be returned to | 346 | i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to |
317 | applications that use the old 16-bit UID/GID system calls, if the actual | 347 | applications that use the old 16-bit UID/GID system calls, if the |
318 | UID or GID would exceed 65535. | 348 | actual UID or GID would exceed 65535. |
319 | 349 | ||
320 | These sysctls allow you to change the value of the fixed UID and GID. | 350 | These sysctls allow you to change the value of the fixed UID and GID. |
321 | The default is 65534. | 351 | The default is 65534. |
@@ -324,9 +354,22 @@ The default is 65534. | |||
324 | 354 | ||
325 | panic: | 355 | panic: |
326 | 356 | ||
327 | The value in this file represents the number of seconds the | 357 | The value in this file represents the number of seconds the kernel |
328 | kernel waits before rebooting on a panic. When you use the | 358 | waits before rebooting on a panic. When you use the software watchdog, |
329 | software watchdog, the recommended setting is 60. | 359 | the recommended setting is 60. |
360 | |||
361 | ============================================================== | ||
362 | |||
363 | panic_on_unrecovered_nmi: | ||
364 | |||
365 | The default Linux behaviour on an NMI of either memory or unknown is | ||
366 | to continue operation. For many environments such as scientific | ||
367 | computing it is preferable that the box is taken out and the error | ||
368 | dealt with than an uncorrected parity/ECC error get propagated. | ||
369 | |||
370 | A small number of systems do generate NMI's for bizarre random reasons | ||
371 | such as power management so the default is off. That sysctl works like | ||
372 | the existing panic controls already in that directory. | ||
330 | 373 | ||
331 | ============================================================== | 374 | ============================================================== |
332 | 375 | ||
@@ -376,6 +419,14 @@ the different loglevels. | |||
376 | 419 | ||
377 | ============================================================== | 420 | ============================================================== |
378 | 421 | ||
422 | printk_delay: | ||
423 | |||
424 | Delay each printk message in printk_delay milliseconds | ||
425 | |||
426 | Value from 0 - 10000 is allowed. | ||
427 | |||
428 | ============================================================== | ||
429 | |||
379 | printk_ratelimit: | 430 | printk_ratelimit: |
380 | 431 | ||
381 | Some warning messages are rate limited. printk_ratelimit specifies | 432 | Some warning messages are rate limited. printk_ratelimit specifies |
@@ -395,15 +446,7 @@ send before ratelimiting kicks in. | |||
395 | 446 | ||
396 | ============================================================== | 447 | ============================================================== |
397 | 448 | ||
398 | printk_delay: | 449 | randomize_va_space: |
399 | |||
400 | Delay each printk message in printk_delay milliseconds | ||
401 | |||
402 | Value from 0 - 10000 is allowed. | ||
403 | |||
404 | ============================================================== | ||
405 | |||
406 | randomize-va-space: | ||
407 | 450 | ||
408 | This option can be used to select the type of process address | 451 | This option can be used to select the type of process address |
409 | space randomization that is used in the system, for architectures | 452 | space randomization that is used in the system, for architectures |
@@ -466,11 +509,11 @@ are doing anyway :) | |||
466 | 509 | ||
467 | ============================================================== | 510 | ============================================================== |
468 | 511 | ||
469 | shmmax: | 512 | shmmax: |
470 | 513 | ||
471 | This value can be used to query and set the run time limit | 514 | This value can be used to query and set the run time limit |
472 | on the maximum shared memory segment size that can be created. | 515 | on the maximum shared memory segment size that can be created. |
473 | Shared memory segments up to 1Gb are now supported in the | 516 | Shared memory segments up to 1Gb are now supported in the |
474 | kernel. This value defaults to SHMMAX. | 517 | kernel. This value defaults to SHMMAX. |
475 | 518 | ||
476 | ============================================================== | 519 | ============================================================== |
@@ -484,7 +527,7 @@ tunable to zero will disable the softlockup detection altogether. | |||
484 | 527 | ||
485 | ============================================================== | 528 | ============================================================== |
486 | 529 | ||
487 | tainted: | 530 | tainted: |
488 | 531 | ||
489 | Non-zero if the kernel has been tainted. Numeric values, which | 532 | Non-zero if the kernel has been tainted. Numeric values, which |
490 | can be ORed together: | 533 | can be ORed together: |
@@ -509,49 +552,11 @@ can be ORed together: | |||
509 | 552 | ||
510 | ============================================================== | 553 | ============================================================== |
511 | 554 | ||
512 | auto_msgmni: | ||
513 | |||
514 | Enables/Disables automatic recomputing of msgmni upon memory add/remove or | ||
515 | upon ipc namespace creation/removal (see the msgmni description above). | ||
516 | Echoing "1" into this file enables msgmni automatic recomputing. | ||
517 | Echoing "0" turns it off. | ||
518 | auto_msgmni default value is 1. | ||
519 | |||
520 | ============================================================== | ||
521 | |||
522 | nmi_watchdog: | ||
523 | |||
524 | Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero | ||
525 | the NMI watchdog is enabled and will continuously test all online cpus to | ||
526 | determine whether or not they are still functioning properly. Currently, | ||
527 | passing "nmi_watchdog=" parameter at boot time is required for this function | ||
528 | to work. | ||
529 | |||
530 | If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the | ||
531 | NMI watchdog shares registers with oprofile. By disabling the NMI watchdog, | ||
532 | oprofile may have more registers to utilize. | ||
533 | |||
534 | ============================================================== | ||
535 | |||
536 | unknown_nmi_panic: | 555 | unknown_nmi_panic: |
537 | 556 | ||
538 | The value in this file affects behavior of handling NMI. When the value is | 557 | The value in this file affects behavior of handling NMI. When the |
539 | non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel | 558 | value is non-zero, unknown NMI is trapped and then panic occurs. At |
540 | debugging information is displayed on console. | 559 | that time, kernel debugging information is displayed on console. |
541 | |||
542 | NMI switch that most IA32 servers have fires unknown NMI up, for example. | ||
543 | If a system hangs up, try pressing the NMI switch. | ||
544 | |||
545 | ============================================================== | ||
546 | |||
547 | panic_on_unrecovered_nmi: | ||
548 | |||
549 | The default Linux behaviour on an NMI of either memory or unknown is to continue | ||
550 | operation. For many environments such as scientific computing it is preferable | ||
551 | that the box is taken out and the error dealt with than an uncorrected | ||
552 | parity/ECC error get propogated. | ||
553 | |||
554 | A small number of systems do generate NMI's for bizarre random reasons such as | ||
555 | power management so the default is off. That sysctl works like the existing | ||
556 | panic controls already in that directory. | ||
557 | 560 | ||
561 | NMI switch that most IA32 servers have fires unknown NMI up, for | ||
562 | example. If a system hangs up, try pressing the NMI switch. | ||
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index c83bd6b4e6e8..d0d0bb9e3e25 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt | |||
@@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via | |||
22 | 22 | ||
23 | Synopsis of kprobe_events | 23 | Synopsis of kprobe_events |
24 | ------------------------- | 24 | ------------------------- |
25 | p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe | 25 | p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe |
26 | r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe | 26 | r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe |
27 | -:[GRP/]EVENT : Clear a probe | 27 | -:[GRP/]EVENT : Clear a probe |
28 | 28 | ||
29 | GRP : Group name. If omitted, use "kprobes" for it. | 29 | GRP : Group name. If omitted, use "kprobes" for it. |
30 | EVENT : Event name. If omitted, the event name is generated | 30 | EVENT : Event name. If omitted, the event name is generated |
31 | based on SYMBOL+offs or MEMADDR. | 31 | based on SYM+offs or MEMADDR. |
32 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. | 32 | MOD : Module name which has given SYM. |
33 | SYM[+offs] : Symbol+offset where the probe is inserted. | ||
33 | MEMADDR : Address where the probe is inserted. | 34 | MEMADDR : Address where the probe is inserted. |
34 | 35 | ||
35 | FETCHARGS : Arguments. Each probe can have up to 128 args. | 36 | FETCHARGS : Arguments. Each probe can have up to 128 args. |
diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c new file mode 100644 index 000000000000..85870208edcf --- /dev/null +++ b/Documentation/vDSO/parse_vdso.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /* | ||
2 | * parse_vdso.c: Linux reference vDSO parser | ||
3 | * Written by Andrew Lutomirski, 2011. | ||
4 | * | ||
5 | * This code is meant to be linked in to various programs that run on Linux. | ||
6 | * As such, it is available with as few restrictions as possible. This file | ||
7 | * is licensed under the Creative Commons Zero License, version 1.0, | ||
8 | * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode | ||
9 | * | ||
10 | * The vDSO is a regular ELF DSO that the kernel maps into user space when | ||
11 | * it starts a program. It works equally well in statically and dynamically | ||
12 | * linked binaries. | ||
13 | * | ||
14 | * This code is tested on x86_64. In principle it should work on any 64-bit | ||
15 | * architecture that has a vDSO. | ||
16 | */ | ||
17 | |||
18 | #include <stdbool.h> | ||
19 | #include <stdint.h> | ||
20 | #include <string.h> | ||
21 | #include <elf.h> | ||
22 | |||
23 | /* | ||
24 | * To use this vDSO parser, first call one of the vdso_init_* functions. | ||
25 | * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR | ||
26 | * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv. | ||
27 | * Then call vdso_sym for each symbol you want. For example, to look up | ||
28 | * gettimeofday on x86_64, use: | ||
29 | * | ||
30 | * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday"); | ||
31 | * or | ||
32 | * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); | ||
33 | * | ||
34 | * vdso_sym will return 0 if the symbol doesn't exist or if the init function | ||
35 | * failed or was not called. vdso_sym is a little slow, so its return value | ||
36 | * should be cached. | ||
37 | * | ||
38 | * vdso_sym is threadsafe; the init functions are not. | ||
39 | * | ||
40 | * These are the prototypes: | ||
41 | */ | ||
42 | extern void vdso_init_from_auxv(void *auxv); | ||
43 | extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); | ||
44 | extern void *vdso_sym(const char *version, const char *name); | ||
45 | |||
46 | |||
47 | /* And here's the code. */ | ||
48 | |||
49 | #ifndef __x86_64__ | ||
50 | # error Not yet ported to non-x86_64 architectures | ||
51 | #endif | ||
52 | |||
53 | static struct vdso_info | ||
54 | { | ||
55 | bool valid; | ||
56 | |||
57 | /* Load information */ | ||
58 | uintptr_t load_addr; | ||
59 | uintptr_t load_offset; /* load_addr - recorded vaddr */ | ||
60 | |||
61 | /* Symbol table */ | ||
62 | Elf64_Sym *symtab; | ||
63 | const char *symstrings; | ||
64 | Elf64_Word *bucket, *chain; | ||
65 | Elf64_Word nbucket, nchain; | ||
66 | |||
67 | /* Version table */ | ||
68 | Elf64_Versym *versym; | ||
69 | Elf64_Verdef *verdef; | ||
70 | } vdso_info; | ||
71 | |||
72 | /* Straight from the ELF specification. */ | ||
73 | static unsigned long elf_hash(const unsigned char *name) | ||
74 | { | ||
75 | unsigned long h = 0, g; | ||
76 | while (*name) | ||
77 | { | ||
78 | h = (h << 4) + *name++; | ||
79 | if (g = h & 0xf0000000) | ||
80 | h ^= g >> 24; | ||
81 | h &= ~g; | ||
82 | } | ||
83 | return h; | ||
84 | } | ||
85 | |||
86 | void vdso_init_from_sysinfo_ehdr(uintptr_t base) | ||
87 | { | ||
88 | size_t i; | ||
89 | bool found_vaddr = false; | ||
90 | |||
91 | vdso_info.valid = false; | ||
92 | |||
93 | vdso_info.load_addr = base; | ||
94 | |||
95 | Elf64_Ehdr *hdr = (Elf64_Ehdr*)base; | ||
96 | Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); | ||
97 | Elf64_Dyn *dyn = 0; | ||
98 | |||
99 | /* | ||
100 | * We need two things from the segment table: the load offset | ||
101 | * and the dynamic table. | ||
102 | */ | ||
103 | for (i = 0; i < hdr->e_phnum; i++) | ||
104 | { | ||
105 | if (pt[i].p_type == PT_LOAD && !found_vaddr) { | ||
106 | found_vaddr = true; | ||
107 | vdso_info.load_offset = base | ||
108 | + (uintptr_t)pt[i].p_offset | ||
109 | - (uintptr_t)pt[i].p_vaddr; | ||
110 | } else if (pt[i].p_type == PT_DYNAMIC) { | ||
111 | dyn = (Elf64_Dyn*)(base + pt[i].p_offset); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | if (!found_vaddr || !dyn) | ||
116 | return; /* Failed */ | ||
117 | |||
118 | /* | ||
119 | * Fish out the useful bits of the dynamic table. | ||
120 | */ | ||
121 | Elf64_Word *hash = 0; | ||
122 | vdso_info.symstrings = 0; | ||
123 | vdso_info.symtab = 0; | ||
124 | vdso_info.versym = 0; | ||
125 | vdso_info.verdef = 0; | ||
126 | for (i = 0; dyn[i].d_tag != DT_NULL; i++) { | ||
127 | switch (dyn[i].d_tag) { | ||
128 | case DT_STRTAB: | ||
129 | vdso_info.symstrings = (const char *) | ||
130 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
131 | + vdso_info.load_offset); | ||
132 | break; | ||
133 | case DT_SYMTAB: | ||
134 | vdso_info.symtab = (Elf64_Sym *) | ||
135 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
136 | + vdso_info.load_offset); | ||
137 | break; | ||
138 | case DT_HASH: | ||
139 | hash = (Elf64_Word *) | ||
140 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
141 | + vdso_info.load_offset); | ||
142 | break; | ||
143 | case DT_VERSYM: | ||
144 | vdso_info.versym = (Elf64_Versym *) | ||
145 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
146 | + vdso_info.load_offset); | ||
147 | break; | ||
148 | case DT_VERDEF: | ||
149 | vdso_info.verdef = (Elf64_Verdef *) | ||
150 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
151 | + vdso_info.load_offset); | ||
152 | break; | ||
153 | } | ||
154 | } | ||
155 | if (!vdso_info.symstrings || !vdso_info.symtab || !hash) | ||
156 | return; /* Failed */ | ||
157 | |||
158 | if (!vdso_info.verdef) | ||
159 | vdso_info.versym = 0; | ||
160 | |||
161 | /* Parse the hash table header. */ | ||
162 | vdso_info.nbucket = hash[0]; | ||
163 | vdso_info.nchain = hash[1]; | ||
164 | vdso_info.bucket = &hash[2]; | ||
165 | vdso_info.chain = &hash[vdso_info.nbucket + 2]; | ||
166 | |||
167 | /* That's all we need. */ | ||
168 | vdso_info.valid = true; | ||
169 | } | ||
170 | |||
171 | static bool vdso_match_version(Elf64_Versym ver, | ||
172 | const char *name, Elf64_Word hash) | ||
173 | { | ||
174 | /* | ||
175 | * This is a helper function to check if the version indexed by | ||
176 | * ver matches name (which hashes to hash). | ||
177 | * | ||
178 | * The version definition table is a mess, and I don't know how | ||
179 | * to do this in better than linear time without allocating memory | ||
180 | * to build an index. I also don't know why the table has | ||
181 | * variable size entries in the first place. | ||
182 | * | ||
183 | * For added fun, I can't find a comprehensible specification of how | ||
184 | * to parse all the weird flags in the table. | ||
185 | * | ||
186 | * So I just parse the whole table every time. | ||
187 | */ | ||
188 | |||
189 | /* First step: find the version definition */ | ||
190 | ver &= 0x7fff; /* Apparently bit 15 means "hidden" */ | ||
191 | Elf64_Verdef *def = vdso_info.verdef; | ||
192 | while(true) { | ||
193 | if ((def->vd_flags & VER_FLG_BASE) == 0 | ||
194 | && (def->vd_ndx & 0x7fff) == ver) | ||
195 | break; | ||
196 | |||
197 | if (def->vd_next == 0) | ||
198 | return false; /* No definition. */ | ||
199 | |||
200 | def = (Elf64_Verdef *)((char *)def + def->vd_next); | ||
201 | } | ||
202 | |||
203 | /* Now figure out whether it matches. */ | ||
204 | Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux); | ||
205 | return def->vd_hash == hash | ||
206 | && !strcmp(name, vdso_info.symstrings + aux->vda_name); | ||
207 | } | ||
208 | |||
209 | void *vdso_sym(const char *version, const char *name) | ||
210 | { | ||
211 | unsigned long ver_hash; | ||
212 | if (!vdso_info.valid) | ||
213 | return 0; | ||
214 | |||
215 | ver_hash = elf_hash(version); | ||
216 | Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; | ||
217 | |||
218 | for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { | ||
219 | Elf64_Sym *sym = &vdso_info.symtab[chain]; | ||
220 | |||
221 | /* Check for a defined global or weak function w/ right name. */ | ||
222 | if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) | ||
223 | continue; | ||
224 | if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && | ||
225 | ELF64_ST_BIND(sym->st_info) != STB_WEAK) | ||
226 | continue; | ||
227 | if (sym->st_shndx == SHN_UNDEF) | ||
228 | continue; | ||
229 | if (strcmp(name, vdso_info.symstrings + sym->st_name)) | ||
230 | continue; | ||
231 | |||
232 | /* Check symbol version. */ | ||
233 | if (vdso_info.versym | ||
234 | && !vdso_match_version(vdso_info.versym[chain], | ||
235 | version, ver_hash)) | ||
236 | continue; | ||
237 | |||
238 | return (void *)(vdso_info.load_offset + sym->st_value); | ||
239 | } | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | void vdso_init_from_auxv(void *auxv) | ||
245 | { | ||
246 | Elf64_auxv_t *elf_auxv = auxv; | ||
247 | for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) | ||
248 | { | ||
249 | if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { | ||
250 | vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); | ||
251 | return; | ||
252 | } | ||
253 | } | ||
254 | |||
255 | vdso_info.valid = false; | ||
256 | } | ||
diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c new file mode 100644 index 000000000000..fff633432dff --- /dev/null +++ b/Documentation/vDSO/vdso_test.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /* | ||
2 | * vdso_test.c: Sample code to test parse_vdso.c on x86_64 | ||
3 | * Copyright (c) 2011 Andy Lutomirski | ||
4 | * Subject to the GNU General Public License, version 2 | ||
5 | * | ||
6 | * You can amuse yourself by compiling with: | ||
7 | * gcc -std=gnu99 -nostdlib | ||
8 | * -Os -fno-asynchronous-unwind-tables -flto | ||
9 | * vdso_test.c parse_vdso.c -o vdso_test | ||
10 | * to generate a small binary with no dependencies at all. | ||
11 | */ | ||
12 | |||
13 | #include <sys/syscall.h> | ||
14 | #include <sys/time.h> | ||
15 | #include <unistd.h> | ||
16 | #include <stdint.h> | ||
17 | |||
18 | extern void *vdso_sym(const char *version, const char *name); | ||
19 | extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); | ||
20 | extern void vdso_init_from_auxv(void *auxv); | ||
21 | |||
22 | /* We need a libc functions... */ | ||
23 | int strcmp(const char *a, const char *b) | ||
24 | { | ||
25 | /* This implementation is buggy: it never returns -1. */ | ||
26 | while (*a || *b) { | ||
27 | if (*a != *b) | ||
28 | return 1; | ||
29 | if (*a == 0 || *b == 0) | ||
30 | return 1; | ||
31 | a++; | ||
32 | b++; | ||
33 | } | ||
34 | |||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | /* ...and two syscalls. This is x86_64-specific. */ | ||
39 | static inline long linux_write(int fd, const void *data, size_t len) | ||
40 | { | ||
41 | |||
42 | long ret; | ||
43 | asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write), | ||
44 | "D" (fd), "S" (data), "d" (len) : | ||
45 | "cc", "memory", "rcx", | ||
46 | "r8", "r9", "r10", "r11" ); | ||
47 | return ret; | ||
48 | } | ||
49 | |||
50 | static inline void linux_exit(int code) | ||
51 | { | ||
52 | asm volatile ("syscall" : : "a" (__NR_exit), "D" (code)); | ||
53 | } | ||
54 | |||
55 | void to_base10(char *lastdig, uint64_t n) | ||
56 | { | ||
57 | while (n) { | ||
58 | *lastdig = (n % 10) + '0'; | ||
59 | n /= 10; | ||
60 | lastdig--; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | __attribute__((externally_visible)) void c_main(void **stack) | ||
65 | { | ||
66 | /* Parse the stack */ | ||
67 | long argc = (long)*stack; | ||
68 | stack += argc + 2; | ||
69 | |||
70 | /* Now we're pointing at the environment. Skip it. */ | ||
71 | while(*stack) | ||
72 | stack++; | ||
73 | stack++; | ||
74 | |||
75 | /* Now we're pointing at auxv. Initialize the vDSO parser. */ | ||
76 | vdso_init_from_auxv((void *)stack); | ||
77 | |||
78 | /* Find gettimeofday. */ | ||
79 | typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); | ||
80 | gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); | ||
81 | |||
82 | if (!gtod) | ||
83 | linux_exit(1); | ||
84 | |||
85 | struct timeval tv; | ||
86 | long ret = gtod(&tv, 0); | ||
87 | |||
88 | if (ret == 0) { | ||
89 | char buf[] = "The time is .000000\n"; | ||
90 | to_base10(buf + 31, tv.tv_sec); | ||
91 | to_base10(buf + 38, tv.tv_usec); | ||
92 | linux_write(1, buf, sizeof(buf) - 1); | ||
93 | } else { | ||
94 | linux_exit(ret); | ||
95 | } | ||
96 | |||
97 | linux_exit(0); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * This is the real entry point. It passes the initial stack into | ||
102 | * the C entry point. | ||
103 | */ | ||
104 | asm ( | ||
105 | ".text\n" | ||
106 | ".global _start\n" | ||
107 | ".type _start,@function\n" | ||
108 | "_start:\n\t" | ||
109 | "mov %rsp,%rdi\n\t" | ||
110 | "jmp c_main" | ||
111 | ); | ||
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 42542eb802ca..b0e4b9cd6a66 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -180,6 +180,19 @@ KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. | |||
180 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 | 180 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 |
181 | cpus max. | 181 | cpus max. |
182 | 182 | ||
183 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual | ||
184 | threads in one or more virtual CPU cores. (This is because the | ||
185 | hardware requires all the hardware threads in a CPU core to be in the | ||
186 | same partition.) The KVM_CAP_PPC_SMT capability indicates the number | ||
187 | of vcpus per virtual core (vcore). The vcore id is obtained by | ||
188 | dividing the vcpu id by the number of vcpus per vcore. The vcpus in a | ||
189 | given vcore will always be in the same physical core as each other | ||
190 | (though that might be a different physical core from time to time). | ||
191 | Userspace can control the threading (SMT) mode of the guest by its | ||
192 | allocation of vcpu ids. For example, if userspace wants | ||
193 | single-threaded guest vcpus, it should make all vcpu ids be a multiple | ||
194 | of the number of vcpus per vcore. | ||
195 | |||
183 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) | 196 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) |
184 | 197 | ||
185 | Capability: basic | 198 | Capability: basic |
@@ -1143,15 +1156,10 @@ Assigns an IRQ to a passed-through device. | |||
1143 | 1156 | ||
1144 | struct kvm_assigned_irq { | 1157 | struct kvm_assigned_irq { |
1145 | __u32 assigned_dev_id; | 1158 | __u32 assigned_dev_id; |
1146 | __u32 host_irq; | 1159 | __u32 host_irq; /* ignored (legacy field) */ |
1147 | __u32 guest_irq; | 1160 | __u32 guest_irq; |
1148 | __u32 flags; | 1161 | __u32 flags; |
1149 | union { | 1162 | union { |
1150 | struct { | ||
1151 | __u32 addr_lo; | ||
1152 | __u32 addr_hi; | ||
1153 | __u32 data; | ||
1154 | } guest_msi; | ||
1155 | __u32 reserved[12]; | 1163 | __u32 reserved[12]; |
1156 | }; | 1164 | }; |
1157 | }; | 1165 | }; |
@@ -1239,8 +1247,10 @@ Type: vm ioctl | |||
1239 | Parameters: struct kvm_assigned_msix_nr (in) | 1247 | Parameters: struct kvm_assigned_msix_nr (in) |
1240 | Returns: 0 on success, -1 on error | 1248 | Returns: 0 on success, -1 on error |
1241 | 1249 | ||
1242 | Set the number of MSI-X interrupts for an assigned device. This service can | 1250 | Set the number of MSI-X interrupts for an assigned device. The number is |
1243 | only be called once in the lifetime of an assigned device. | 1251 | reset again by terminating the MSI-X assignment of the device via |
1252 | KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier | ||
1253 | point will fail. | ||
1244 | 1254 | ||
1245 | struct kvm_assigned_msix_nr { | 1255 | struct kvm_assigned_msix_nr { |
1246 | __u32 assigned_dev_id; | 1256 | __u32 assigned_dev_id; |
@@ -1291,6 +1301,135 @@ Returns the tsc frequency of the guest. The unit of the return value is | |||
1291 | KHz. If the host has unstable tsc this ioctl returns -EIO instead as an | 1301 | KHz. If the host has unstable tsc this ioctl returns -EIO instead as an |
1292 | error. | 1302 | error. |
1293 | 1303 | ||
1304 | 4.56 KVM_GET_LAPIC | ||
1305 | |||
1306 | Capability: KVM_CAP_IRQCHIP | ||
1307 | Architectures: x86 | ||
1308 | Type: vcpu ioctl | ||
1309 | Parameters: struct kvm_lapic_state (out) | ||
1310 | Returns: 0 on success, -1 on error | ||
1311 | |||
1312 | #define KVM_APIC_REG_SIZE 0x400 | ||
1313 | struct kvm_lapic_state { | ||
1314 | char regs[KVM_APIC_REG_SIZE]; | ||
1315 | }; | ||
1316 | |||
1317 | Reads the Local APIC registers and copies them into the input argument. The | ||
1318 | data format and layout are the same as documented in the architecture manual. | ||
1319 | |||
1320 | 4.57 KVM_SET_LAPIC | ||
1321 | |||
1322 | Capability: KVM_CAP_IRQCHIP | ||
1323 | Architectures: x86 | ||
1324 | Type: vcpu ioctl | ||
1325 | Parameters: struct kvm_lapic_state (in) | ||
1326 | Returns: 0 on success, -1 on error | ||
1327 | |||
1328 | #define KVM_APIC_REG_SIZE 0x400 | ||
1329 | struct kvm_lapic_state { | ||
1330 | char regs[KVM_APIC_REG_SIZE]; | ||
1331 | }; | ||
1332 | |||
1333 | Copies the input argument into the the Local APIC registers. The data format | ||
1334 | and layout are the same as documented in the architecture manual. | ||
1335 | |||
1336 | 4.58 KVM_IOEVENTFD | ||
1337 | |||
1338 | Capability: KVM_CAP_IOEVENTFD | ||
1339 | Architectures: all | ||
1340 | Type: vm ioctl | ||
1341 | Parameters: struct kvm_ioeventfd (in) | ||
1342 | Returns: 0 on success, !0 on error | ||
1343 | |||
1344 | This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address | ||
1345 | within the guest. A guest write in the registered address will signal the | ||
1346 | provided event instead of triggering an exit. | ||
1347 | |||
1348 | struct kvm_ioeventfd { | ||
1349 | __u64 datamatch; | ||
1350 | __u64 addr; /* legal pio/mmio address */ | ||
1351 | __u32 len; /* 1, 2, 4, or 8 bytes */ | ||
1352 | __s32 fd; | ||
1353 | __u32 flags; | ||
1354 | __u8 pad[36]; | ||
1355 | }; | ||
1356 | |||
1357 | The following flags are defined: | ||
1358 | |||
1359 | #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) | ||
1360 | #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) | ||
1361 | #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) | ||
1362 | |||
1363 | If datamatch flag is set, the event will be signaled only if the written value | ||
1364 | to the registered address is equal to datamatch in struct kvm_ioeventfd. | ||
1365 | |||
1366 | 4.62 KVM_CREATE_SPAPR_TCE | ||
1367 | |||
1368 | Capability: KVM_CAP_SPAPR_TCE | ||
1369 | Architectures: powerpc | ||
1370 | Type: vm ioctl | ||
1371 | Parameters: struct kvm_create_spapr_tce (in) | ||
1372 | Returns: file descriptor for manipulating the created TCE table | ||
1373 | |||
1374 | This creates a virtual TCE (translation control entry) table, which | ||
1375 | is an IOMMU for PAPR-style virtual I/O. It is used to translate | ||
1376 | logical addresses used in virtual I/O into guest physical addresses, | ||
1377 | and provides a scatter/gather capability for PAPR virtual I/O. | ||
1378 | |||
1379 | /* for KVM_CAP_SPAPR_TCE */ | ||
1380 | struct kvm_create_spapr_tce { | ||
1381 | __u64 liobn; | ||
1382 | __u32 window_size; | ||
1383 | }; | ||
1384 | |||
1385 | The liobn field gives the logical IO bus number for which to create a | ||
1386 | TCE table. The window_size field specifies the size of the DMA window | ||
1387 | which this TCE table will translate - the table will contain one 64 | ||
1388 | bit TCE entry for every 4kiB of the DMA window. | ||
1389 | |||
1390 | When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE | ||
1391 | table has been created using this ioctl(), the kernel will handle it | ||
1392 | in real mode, updating the TCE table. H_PUT_TCE calls for other | ||
1393 | liobns will cause a vm exit and must be handled by userspace. | ||
1394 | |||
1395 | The return value is a file descriptor which can be passed to mmap(2) | ||
1396 | to map the created TCE table into userspace. This lets userspace read | ||
1397 | the entries written by kernel-handled H_PUT_TCE calls, and also lets | ||
1398 | userspace update the TCE table directly which is useful in some | ||
1399 | circumstances. | ||
1400 | |||
1401 | 4.63 KVM_ALLOCATE_RMA | ||
1402 | |||
1403 | Capability: KVM_CAP_PPC_RMA | ||
1404 | Architectures: powerpc | ||
1405 | Type: vm ioctl | ||
1406 | Parameters: struct kvm_allocate_rma (out) | ||
1407 | Returns: file descriptor for mapping the allocated RMA | ||
1408 | |||
1409 | This allocates a Real Mode Area (RMA) from the pool allocated at boot | ||
1410 | time by the kernel. An RMA is a physically-contiguous, aligned region | ||
1411 | of memory used on older POWER processors to provide the memory which | ||
1412 | will be accessed by real-mode (MMU off) accesses in a KVM guest. | ||
1413 | POWER processors support a set of sizes for the RMA that usually | ||
1414 | includes 64MB, 128MB, 256MB and some larger powers of two. | ||
1415 | |||
1416 | /* for KVM_ALLOCATE_RMA */ | ||
1417 | struct kvm_allocate_rma { | ||
1418 | __u64 rma_size; | ||
1419 | }; | ||
1420 | |||
1421 | The return value is a file descriptor which can be passed to mmap(2) | ||
1422 | to map the allocated RMA into userspace. The mapped area can then be | ||
1423 | passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the | ||
1424 | RMA for a virtual machine. The size of the RMA in bytes (which is | ||
1425 | fixed at host kernel boot time) is returned in the rma_size field of | ||
1426 | the argument structure. | ||
1427 | |||
1428 | The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl | ||
1429 | is supported; 2 if the processor requires all virtual machines to have | ||
1430 | an RMA, or 1 if the processor can use an RMA but doesn't require it, | ||
1431 | because it supports the Virtual RMA (VRMA) facility. | ||
1432 | |||
1294 | 5. The kvm_run structure | 1433 | 5. The kvm_run structure |
1295 | 1434 | ||
1296 | Application code obtains a pointer to the kvm_run structure by | 1435 | Application code obtains a pointer to the kvm_run structure by |
@@ -1473,6 +1612,23 @@ Userspace can now handle the hypercall and when it's done modify the gprs as | |||
1473 | necessary. Upon guest entry all guest GPRs will then be replaced by the values | 1612 | necessary. Upon guest entry all guest GPRs will then be replaced by the values |
1474 | in this struct. | 1613 | in this struct. |
1475 | 1614 | ||
1615 | /* KVM_EXIT_PAPR_HCALL */ | ||
1616 | struct { | ||
1617 | __u64 nr; | ||
1618 | __u64 ret; | ||
1619 | __u64 args[9]; | ||
1620 | } papr_hcall; | ||
1621 | |||
1622 | This is used on 64-bit PowerPC when emulating a pSeries partition, | ||
1623 | e.g. with the 'pseries' machine type in qemu. It occurs when the | ||
1624 | guest does a hypercall using the 'sc 1' instruction. The 'nr' field | ||
1625 | contains the hypercall number (from the guest R3), and 'args' contains | ||
1626 | the arguments (from the guest R4 - R12). Userspace should put the | ||
1627 | return code in 'ret' and any extra returned values in args[]. | ||
1628 | The possible hypercalls are defined in the Power Architecture Platform | ||
1629 | Requirements (PAPR) document available from www.power.org (free | ||
1630 | developer registration required to access it). | ||
1631 | |||
1476 | /* Fix the size of the union. */ | 1632 | /* Fix the size of the union. */ |
1477 | char padding[256]; | 1633 | char padding[256]; |
1478 | }; | 1634 | }; |
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index f46aa58389ca..5dc972c09b55 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -165,6 +165,10 @@ Shadow pages contain the following information: | |||
165 | Contains the value of efer.nxe for which the page is valid. | 165 | Contains the value of efer.nxe for which the page is valid. |
166 | role.cr0_wp: | 166 | role.cr0_wp: |
167 | Contains the value of cr0.wp for which the page is valid. | 167 | Contains the value of cr0.wp for which the page is valid. |
168 | role.smep_andnot_wp: | ||
169 | Contains the value of cr4.smep && !cr0.wp for which the page is valid | ||
170 | (pages for which this is true are different from other pages; see the | ||
171 | treatment of cr0.wp=0 below). | ||
168 | gfn: | 172 | gfn: |
169 | Either the guest page table containing the translations shadowed by this | 173 | Either the guest page table containing the translations shadowed by this |
170 | page, or the base page frame for linear translations. See role.direct. | 174 | page, or the base page frame for linear translations. See role.direct. |
@@ -317,6 +321,20 @@ on fault type: | |||
317 | 321 | ||
318 | (user write faults generate a #PF) | 322 | (user write faults generate a #PF) |
319 | 323 | ||
324 | In the first case there is an additional complication if CR4.SMEP is | ||
325 | enabled: since we've turned the page into a kernel page, the kernel may now | ||
326 | execute it. We handle this by also setting spte.nx. If we get a user | ||
327 | fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back. | ||
328 | |||
329 | To prevent an spte that was converted into a kernel page with cr0.wp=0 | ||
330 | from being written by the kernel after cr0.wp has changed to 1, we make | ||
331 | the value of cr0.wp part of the page role. This means that an spte created | ||
332 | with one value of cr0.wp cannot be used when cr0.wp has a different value - | ||
333 | it will simply be missed by the shadow page lookup code. A similar issue | ||
334 | exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after | ||
335 | changing cr4.smep to 1. To avoid this, the value of !cr0.wp && cr4.smep | ||
336 | is also made a part of the page role. | ||
337 | |||
320 | Large pages | 338 | Large pages |
321 | =========== | 339 | =========== |
322 | 340 | ||
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index d079aed27e03..50317809113d 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt | |||
@@ -185,3 +185,37 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 | |||
185 | 185 | ||
186 | Currently type 2 APF will be always delivered on the same vcpu as | 186 | Currently type 2 APF will be always delivered on the same vcpu as |
187 | type 1 was, but guest should not rely on that. | 187 | type 1 was, but guest should not rely on that. |
188 | |||
189 | MSR_KVM_STEAL_TIME: 0x4b564d03 | ||
190 | |||
191 | data: 64-byte alignment physical address of a memory area which must be | ||
192 | in guest RAM, plus an enable bit in bit 0. This memory is expected to | ||
193 | hold a copy of the following structure: | ||
194 | |||
195 | struct kvm_steal_time { | ||
196 | __u64 steal; | ||
197 | __u32 version; | ||
198 | __u32 flags; | ||
199 | __u32 pad[12]; | ||
200 | } | ||
201 | |||
202 | whose data will be filled in by the hypervisor periodically. Only one | ||
203 | write, or registration, is needed for each VCPU. The interval between | ||
204 | updates of this structure is arbitrary and implementation-dependent. | ||
205 | The hypervisor may update this structure at any time it sees fit until | ||
206 | anything with bit0 == 0 is written to it. Guest is required to make sure | ||
207 | this structure is initialized to zero. | ||
208 | |||
209 | Fields have the following meanings: | ||
210 | |||
211 | version: a sequence counter. In other words, guest has to check | ||
212 | this field before and after grabbing time information and make | ||
213 | sure they are both equal and even. An odd version indicates an | ||
214 | in-progress update. | ||
215 | |||
216 | flags: At this point, always zero. May be used to indicate | ||
217 | changes in this structure in the future. | ||
218 | |||
219 | steal: the amount of time in which this vCPU did not run, in | ||
220 | nanoseconds. Time during which the vcpu is idle, will not be | ||
221 | reported as steal time. | ||
diff --git a/Documentation/virtual/kvm/nested-vmx.txt b/Documentation/virtual/kvm/nested-vmx.txt new file mode 100644 index 000000000000..8ed937de1163 --- /dev/null +++ b/Documentation/virtual/kvm/nested-vmx.txt | |||
@@ -0,0 +1,251 @@ | |||
1 | Nested VMX | ||
2 | ========== | ||
3 | |||
4 | Overview | ||
5 | --------- | ||
6 | |||
7 | On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions) | ||
8 | to easily and efficiently run guest operating systems. Normally, these guests | ||
9 | *cannot* themselves be hypervisors running their own guests, because in VMX, | ||
10 | guests cannot use VMX instructions. | ||
11 | |||
12 | The "Nested VMX" feature adds this missing capability - of running guest | ||
13 | hypervisors (which use VMX) with their own nested guests. It does so by | ||
14 | allowing a guest to use VMX instructions, and correctly and efficiently | ||
15 | emulating them using the single level of VMX available in the hardware. | ||
16 | |||
17 | We describe in much greater detail the theory behind the nested VMX feature, | ||
18 | its implementation and its performance characteristics, in the OSDI 2010 paper | ||
19 | "The Turtles Project: Design and Implementation of Nested Virtualization", | ||
20 | available at: | ||
21 | |||
22 | http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf | ||
23 | |||
24 | |||
25 | Terminology | ||
26 | ----------- | ||
27 | |||
28 | Single-level virtualization has two levels - the host (KVM) and the guests. | ||
29 | In nested virtualization, we have three levels: The host (KVM), which we call | ||
30 | L0, the guest hypervisor, which we call L1, and its nested guest, which we | ||
31 | call L2. | ||
32 | |||
33 | |||
34 | Known limitations | ||
35 | ----------------- | ||
36 | |||
37 | The current code supports running Linux guests under KVM guests. | ||
38 | Only 64-bit guest hypervisors are supported. | ||
39 | |||
40 | Additional patches for running Windows under guest KVM, and Linux under | ||
41 | guest VMware server, and support for nested EPT, are currently running in | ||
42 | the lab, and will be sent as follow-on patchsets. | ||
43 | |||
44 | |||
45 | Running nested VMX | ||
46 | ------------------ | ||
47 | |||
48 | The nested VMX feature is disabled by default. It can be enabled by giving | ||
49 | the "nested=1" option to the kvm-intel module. | ||
50 | |||
51 | No modifications are required to user space (qemu). However, qemu's default | ||
52 | emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be | ||
53 | explicitly enabled, by giving qemu one of the following options: | ||
54 | |||
55 | -cpu host (emulated CPU has all features of the real CPU) | ||
56 | |||
57 | -cpu qemu64,+vmx (add just the vmx feature to a named CPU type) | ||
58 | |||
59 | |||
60 | ABIs | ||
61 | ---- | ||
62 | |||
63 | Nested VMX aims to present a standard and (eventually) fully-functional VMX | ||
64 | implementation for the a guest hypervisor to use. As such, the official | ||
65 | specification of the ABI that it provides is Intel's VMX specification, | ||
66 | namely volume 3B of their "Intel 64 and IA-32 Architectures Software | ||
67 | Developer's Manual". Not all of VMX's features are currently fully supported, | ||
68 | but the goal is to eventually support them all, starting with the VMX features | ||
69 | which are used in practice by popular hypervisors (KVM and others). | ||
70 | |||
71 | As a VMX implementation, nested VMX presents a VMCS structure to L1. | ||
72 | As mandated by the spec, other than the two fields revision_id and abort, | ||
73 | this structure is *opaque* to its user, who is not supposed to know or care | ||
74 | about its internal structure. Rather, the structure is accessed through the | ||
75 | VMREAD and VMWRITE instructions. | ||
76 | Still, for debugging purposes, KVM developers might be interested to know the | ||
77 | internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c. | ||
78 | |||
79 | The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we | ||
80 | also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS | ||
81 | which L0 builds to actually run L2 - how this is done is explained in the | ||
82 | aforementioned paper. | ||
83 | |||
84 | For convenience, we repeat the content of struct vmcs12 here. If the internals | ||
85 | of this structure changes, this can break live migration across KVM versions. | ||
86 | VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner | ||
87 | struct shadow_vmcs is ever changed. | ||
88 | |||
89 | typedef u64 natural_width; | ||
90 | struct __packed vmcs12 { | ||
91 | /* According to the Intel spec, a VMCS region must start with | ||
92 | * these two user-visible fields */ | ||
93 | u32 revision_id; | ||
94 | u32 abort; | ||
95 | |||
96 | u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */ | ||
97 | u32 padding[7]; /* room for future expansion */ | ||
98 | |||
99 | u64 io_bitmap_a; | ||
100 | u64 io_bitmap_b; | ||
101 | u64 msr_bitmap; | ||
102 | u64 vm_exit_msr_store_addr; | ||
103 | u64 vm_exit_msr_load_addr; | ||
104 | u64 vm_entry_msr_load_addr; | ||
105 | u64 tsc_offset; | ||
106 | u64 virtual_apic_page_addr; | ||
107 | u64 apic_access_addr; | ||
108 | u64 ept_pointer; | ||
109 | u64 guest_physical_address; | ||
110 | u64 vmcs_link_pointer; | ||
111 | u64 guest_ia32_debugctl; | ||
112 | u64 guest_ia32_pat; | ||
113 | u64 guest_ia32_efer; | ||
114 | u64 guest_pdptr0; | ||
115 | u64 guest_pdptr1; | ||
116 | u64 guest_pdptr2; | ||
117 | u64 guest_pdptr3; | ||
118 | u64 host_ia32_pat; | ||
119 | u64 host_ia32_efer; | ||
120 | u64 padding64[8]; /* room for future expansion */ | ||
121 | natural_width cr0_guest_host_mask; | ||
122 | natural_width cr4_guest_host_mask; | ||
123 | natural_width cr0_read_shadow; | ||
124 | natural_width cr4_read_shadow; | ||
125 | natural_width cr3_target_value0; | ||
126 | natural_width cr3_target_value1; | ||
127 | natural_width cr3_target_value2; | ||
128 | natural_width cr3_target_value3; | ||
129 | natural_width exit_qualification; | ||
130 | natural_width guest_linear_address; | ||
131 | natural_width guest_cr0; | ||
132 | natural_width guest_cr3; | ||
133 | natural_width guest_cr4; | ||
134 | natural_width guest_es_base; | ||
135 | natural_width guest_cs_base; | ||
136 | natural_width guest_ss_base; | ||
137 | natural_width guest_ds_base; | ||
138 | natural_width guest_fs_base; | ||
139 | natural_width guest_gs_base; | ||
140 | natural_width guest_ldtr_base; | ||
141 | natural_width guest_tr_base; | ||
142 | natural_width guest_gdtr_base; | ||
143 | natural_width guest_idtr_base; | ||
144 | natural_width guest_dr7; | ||
145 | natural_width guest_rsp; | ||
146 | natural_width guest_rip; | ||
147 | natural_width guest_rflags; | ||
148 | natural_width guest_pending_dbg_exceptions; | ||
149 | natural_width guest_sysenter_esp; | ||
150 | natural_width guest_sysenter_eip; | ||
151 | natural_width host_cr0; | ||
152 | natural_width host_cr3; | ||
153 | natural_width host_cr4; | ||
154 | natural_width host_fs_base; | ||
155 | natural_width host_gs_base; | ||
156 | natural_width host_tr_base; | ||
157 | natural_width host_gdtr_base; | ||
158 | natural_width host_idtr_base; | ||
159 | natural_width host_ia32_sysenter_esp; | ||
160 | natural_width host_ia32_sysenter_eip; | ||
161 | natural_width host_rsp; | ||
162 | natural_width host_rip; | ||
163 | natural_width paddingl[8]; /* room for future expansion */ | ||
164 | u32 pin_based_vm_exec_control; | ||
165 | u32 cpu_based_vm_exec_control; | ||
166 | u32 exception_bitmap; | ||
167 | u32 page_fault_error_code_mask; | ||
168 | u32 page_fault_error_code_match; | ||
169 | u32 cr3_target_count; | ||
170 | u32 vm_exit_controls; | ||
171 | u32 vm_exit_msr_store_count; | ||
172 | u32 vm_exit_msr_load_count; | ||
173 | u32 vm_entry_controls; | ||
174 | u32 vm_entry_msr_load_count; | ||
175 | u32 vm_entry_intr_info_field; | ||
176 | u32 vm_entry_exception_error_code; | ||
177 | u32 vm_entry_instruction_len; | ||
178 | u32 tpr_threshold; | ||
179 | u32 secondary_vm_exec_control; | ||
180 | u32 vm_instruction_error; | ||
181 | u32 vm_exit_reason; | ||
182 | u32 vm_exit_intr_info; | ||
183 | u32 vm_exit_intr_error_code; | ||
184 | u32 idt_vectoring_info_field; | ||
185 | u32 idt_vectoring_error_code; | ||
186 | u32 vm_exit_instruction_len; | ||
187 | u32 vmx_instruction_info; | ||
188 | u32 guest_es_limit; | ||
189 | u32 guest_cs_limit; | ||
190 | u32 guest_ss_limit; | ||
191 | u32 guest_ds_limit; | ||
192 | u32 guest_fs_limit; | ||
193 | u32 guest_gs_limit; | ||
194 | u32 guest_ldtr_limit; | ||
195 | u32 guest_tr_limit; | ||
196 | u32 guest_gdtr_limit; | ||
197 | u32 guest_idtr_limit; | ||
198 | u32 guest_es_ar_bytes; | ||
199 | u32 guest_cs_ar_bytes; | ||
200 | u32 guest_ss_ar_bytes; | ||
201 | u32 guest_ds_ar_bytes; | ||
202 | u32 guest_fs_ar_bytes; | ||
203 | u32 guest_gs_ar_bytes; | ||
204 | u32 guest_ldtr_ar_bytes; | ||
205 | u32 guest_tr_ar_bytes; | ||
206 | u32 guest_interruptibility_info; | ||
207 | u32 guest_activity_state; | ||
208 | u32 guest_sysenter_cs; | ||
209 | u32 host_ia32_sysenter_cs; | ||
210 | u32 padding32[8]; /* room for future expansion */ | ||
211 | u16 virtual_processor_id; | ||
212 | u16 guest_es_selector; | ||
213 | u16 guest_cs_selector; | ||
214 | u16 guest_ss_selector; | ||
215 | u16 guest_ds_selector; | ||
216 | u16 guest_fs_selector; | ||
217 | u16 guest_gs_selector; | ||
218 | u16 guest_ldtr_selector; | ||
219 | u16 guest_tr_selector; | ||
220 | u16 host_es_selector; | ||
221 | u16 host_cs_selector; | ||
222 | u16 host_ss_selector; | ||
223 | u16 host_ds_selector; | ||
224 | u16 host_fs_selector; | ||
225 | u16 host_gs_selector; | ||
226 | u16 host_tr_selector; | ||
227 | }; | ||
228 | |||
229 | |||
230 | Authors | ||
231 | ------- | ||
232 | |||
233 | These patches were written by: | ||
234 | Abel Gordon, abelg <at> il.ibm.com | ||
235 | Nadav Har'El, nyh <at> il.ibm.com | ||
236 | Orit Wasserman, oritw <at> il.ibm.com | ||
237 | Ben-Ami Yassor, benami <at> il.ibm.com | ||
238 | Muli Ben-Yehuda, muli <at> il.ibm.com | ||
239 | |||
240 | With contributions by: | ||
241 | Anthony Liguori, aliguori <at> us.ibm.com | ||
242 | Mike Day, mdday <at> us.ibm.com | ||
243 | Michael Factor, factor <at> il.ibm.com | ||
244 | Zvi Dubitzky, dubi <at> il.ibm.com | ||
245 | |||
246 | And valuable reviews by: | ||
247 | Avi Kivity, avi <at> redhat.com | ||
248 | Gleb Natapov, gleb <at> redhat.com | ||
249 | Marcelo Tosatti, mtosatti <at> redhat.com | ||
250 | Kevin Tian, kevin.tian <at> intel.com | ||
251 | and others. | ||
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt index 3ab969c59046..2b7ce190cde4 100644 --- a/Documentation/virtual/kvm/ppc-pv.txt +++ b/Documentation/virtual/kvm/ppc-pv.txt | |||
@@ -68,9 +68,11 @@ page that contains parts of supervisor visible register state. The guest can | |||
68 | map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE. | 68 | map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE. |
69 | 69 | ||
70 | With this hypercall issued the guest always gets the magic page mapped at the | 70 | With this hypercall issued the guest always gets the magic page mapped at the |
71 | desired location in effective and physical address space. For now, we always | 71 | desired location. The first parameter indicates the effective address when the |
72 | map the page to -4096. This way we can access it using absolute load and store | 72 | MMU is enabled. The second parameter indicates the address in real mode, if |
73 | functions. The following instruction reads the first field of the magic page: | 73 | applicable to the target. For now, we always map the page to -4096. This way we |
74 | can access it using absolute load and store functions. The following | ||
75 | instruction reads the first field of the magic page: | ||
74 | 76 | ||
75 | ld rX, -4096(0) | 77 | ld rX, -4096(0) |
76 | 78 | ||
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index cd9d6af61d07..043bd7df3139 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #include <asm/bootparam.h> | 51 | #include <asm/bootparam.h> |
52 | #include "../../../include/linux/lguest_launcher.h" | 52 | #include "../../../include/linux/lguest_launcher.h" |
53 | /*L:110 | 53 | /*L:110 |
54 | * We can ignore the 42 include files we need for this program, but I do want | 54 | * We can ignore the 43 include files we need for this program, but I do want |
55 | * to draw attention to the use of kernel-style types. | 55 | * to draw attention to the use of kernel-style types. |
56 | * | 56 | * |
57 | * As Linus said, "C is a Spartan language, and so should your naming be." I | 57 | * As Linus said, "C is a Spartan language, and so should your naming be." I |
@@ -65,7 +65,6 @@ typedef uint16_t u16; | |||
65 | typedef uint8_t u8; | 65 | typedef uint8_t u8; |
66 | /*:*/ | 66 | /*:*/ |
67 | 67 | ||
68 | #define PAGE_PRESENT 0x7 /* Present, RW, Execute */ | ||
69 | #define BRIDGE_PFX "bridge:" | 68 | #define BRIDGE_PFX "bridge:" |
70 | #ifndef SIOCBRADDIF | 69 | #ifndef SIOCBRADDIF |
71 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ | 70 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ |
@@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq) | |||
861 | /* writev can return a partial write, so we loop here. */ | 860 | /* writev can return a partial write, so we loop here. */ |
862 | while (!iov_empty(iov, out)) { | 861 | while (!iov_empty(iov, out)) { |
863 | int len = writev(STDOUT_FILENO, iov, out); | 862 | int len = writev(STDOUT_FILENO, iov, out); |
864 | if (len <= 0) | 863 | if (len <= 0) { |
865 | err(1, "Write to stdout gave %i", len); | 864 | warn("Write to stdout gave %i (%d)", len, errno); |
865 | break; | ||
866 | } | ||
866 | iov_consume(iov, out, len); | 867 | iov_consume(iov, out, len); |
867 | } | 868 | } |
868 | 869 | ||
@@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq) | |||
898 | * same format: what a coincidence! | 899 | * same format: what a coincidence! |
899 | */ | 900 | */ |
900 | if (writev(net_info->tunfd, iov, out) < 0) | 901 | if (writev(net_info->tunfd, iov, out) < 0) |
901 | errx(1, "Write to tun failed?"); | 902 | warnx("Write to tun failed (%d)?", errno); |
902 | 903 | ||
903 | /* | 904 | /* |
904 | * Done with that one; wait_for_vq_desc() will send the interrupt if | 905 | * Done with that one; wait_for_vq_desc() will send the interrupt if |
@@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq) | |||
955 | */ | 956 | */ |
956 | len = readv(net_info->tunfd, iov, in); | 957 | len = readv(net_info->tunfd, iov, in); |
957 | if (len <= 0) | 958 | if (len <= 0) |
958 | err(1, "Failed to read from tun."); | 959 | warn("Failed to read from tun (%d).", errno); |
959 | 960 | ||
960 | /* | 961 | /* |
961 | * Mark that packet buffer as used, but don't interrupt here. We want | 962 | * Mark that packet buffer as used, but don't interrupt here. We want |
@@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev) | |||
1093 | warnx("Device %s configuration FAILED", dev->name); | 1094 | warnx("Device %s configuration FAILED", dev->name); |
1094 | if (dev->running) | 1095 | if (dev->running) |
1095 | reset_device(dev); | 1096 | reset_device(dev); |
1096 | } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { | 1097 | } else { |
1097 | if (!dev->running) | 1098 | if (dev->running) |
1098 | start_device(dev); | 1099 | err(1, "Device %s features finalized twice", dev->name); |
1100 | start_device(dev); | ||
1099 | } | 1101 | } |
1100 | } | 1102 | } |
1101 | 1103 | ||
@@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr) | |||
1120 | return; | 1122 | return; |
1121 | } | 1123 | } |
1122 | 1124 | ||
1123 | /* | 1125 | /* Devices should not be used before features are finalized. */ |
1124 | * Devices *can* be used before status is set to DRIVER_OK. | ||
1125 | * The original plan was that they would never do this: they | ||
1126 | * would always finish setting up their status bits before | ||
1127 | * actually touching the virtqueues. In practice, we allowed | ||
1128 | * them to, and they do (eg. the disk probes for partition | ||
1129 | * tables as part of initialization). | ||
1130 | * | ||
1131 | * If we see this, we start the device: once it's running, we | ||
1132 | * expect the device to catch all the notifications. | ||
1133 | */ | ||
1134 | for (vq = i->vq; vq; vq = vq->next) { | 1126 | for (vq = i->vq; vq; vq = vq->next) { |
1135 | if (addr != vq->config.pfn*getpagesize()) | 1127 | if (addr != vq->config.pfn*getpagesize()) |
1136 | continue; | 1128 | continue; |
1137 | if (i->running) | 1129 | errx(1, "Notification on %s before setup!", i->name); |
1138 | errx(1, "Notification on running %s", i->name); | ||
1139 | /* This just calls create_thread() for each virtqueue */ | ||
1140 | start_device(i); | ||
1141 | return; | ||
1142 | } | 1130 | } |
1143 | } | 1131 | } |
1144 | 1132 | ||
@@ -1370,7 +1358,7 @@ static void setup_console(void) | |||
1370 | * --sharenet=<name> option which opens or creates a named pipe. This can be | 1358 | * --sharenet=<name> option which opens or creates a named pipe. This can be |
1371 | * used to send packets to another guest in a 1:1 manner. | 1359 | * used to send packets to another guest in a 1:1 manner. |
1372 | * | 1360 | * |
1373 | * More sopisticated is to use one of the tools developed for project like UML | 1361 | * More sophisticated is to use one of the tools developed for project like UML |
1374 | * to do networking. | 1362 | * to do networking. |
1375 | * | 1363 | * |
1376 | * Faster is to do virtio bonding in kernel. Doing this 1:1 would be | 1364 | * Faster is to do virtio bonding in kernel. Doing this 1:1 would be |
@@ -1380,7 +1368,7 @@ static void setup_console(void) | |||
1380 | * multiple inter-guest channels behind one interface, although it would | 1368 | * multiple inter-guest channels behind one interface, although it would |
1381 | * require some manner of hotplugging new virtio channels. | 1369 | * require some manner of hotplugging new virtio channels. |
1382 | * | 1370 | * |
1383 | * Finally, we could implement a virtio network switch in the kernel. | 1371 | * Finally, we could use a virtio network switch in the kernel, ie. vhost. |
1384 | :*/ | 1372 | :*/ |
1385 | 1373 | ||
1386 | static u32 str2ip(const char *ipaddr) | 1374 | static u32 str2ip(const char *ipaddr) |
@@ -2017,10 +2005,7 @@ int main(int argc, char *argv[]) | |||
2017 | /* Tell the entry path not to try to reload segment registers. */ | 2005 | /* Tell the entry path not to try to reload segment registers. */ |
2018 | boot->hdr.loadflags |= KEEP_SEGMENTS; | 2006 | boot->hdr.loadflags |= KEEP_SEGMENTS; |
2019 | 2007 | ||
2020 | /* | 2008 | /* We tell the kernel to initialize the Guest. */ |
2021 | * We tell the kernel to initialize the Guest: this returns the open | ||
2022 | * /dev/lguest file descriptor. | ||
2023 | */ | ||
2024 | tell_kernel(start); | 2009 | tell_kernel(start); |
2025 | 2010 | ||
2026 | /* Ensure that we terminate if a device-servicing child dies. */ | 2011 | /* Ensure that we terminate if a device-servicing child dies. */ |
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 9b7221a86df2..7c3a8801b7ce 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt | |||
@@ -674,7 +674,7 @@ Protocol: 2.10+ | |||
674 | 674 | ||
675 | Field name: init_size | 675 | Field name: init_size |
676 | Type: read | 676 | Type: read |
677 | Offset/size: 0x25c/4 | 677 | Offset/size: 0x260/4 |
678 | 678 | ||
679 | This field indicates the amount of linear contiguous memory starting | 679 | This field indicates the amount of linear contiguous memory starting |
680 | at the kernel runtime start address that the kernel needs before it | 680 | at the kernel runtime start address that the kernel needs before it |
diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt new file mode 100644 index 000000000000..7869f14d055c --- /dev/null +++ b/Documentation/x86/entry_64.txt | |||
@@ -0,0 +1,98 @@ | |||
1 | This file documents some of the kernel entries in | ||
2 | arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from | ||
3 | an email from Ingo Molnar: | ||
4 | |||
5 | http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu> | ||
6 | |||
7 | The x86 architecture has quite a few different ways to jump into | ||
8 | kernel code. Most of these entry points are registered in | ||
9 | arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S | ||
10 | and arch/x86/ia32/ia32entry.S. | ||
11 | |||
12 | The IDT vector assignments are listed in arch/x86/include/irq_vectors.h. | ||
13 | |||
14 | Some of these entries are: | ||
15 | |||
16 | - system_call: syscall instruction from 64-bit code. | ||
17 | |||
18 | - ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall | ||
19 | either way. | ||
20 | |||
21 | - ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit | ||
22 | code | ||
23 | |||
24 | - interrupt: An array of entries. Every IDT vector that doesn't | ||
25 | explicitly point somewhere else gets set to the corresponding | ||
26 | value in interrupts. These point to a whole array of | ||
27 | magically-generated functions that make their way to do_IRQ with | ||
28 | the interrupt number as a parameter. | ||
29 | |||
30 | - emulate_vsyscall: int 0xcc, a special non-ABI entry used by | ||
31 | vsyscall emulation. | ||
32 | |||
33 | - APIC interrupts: Various special-purpose interrupts for things | ||
34 | like TLB shootdown. | ||
35 | |||
36 | - Architecturally-defined exceptions like divide_error. | ||
37 | |||
38 | There are a few complexities here. The different x86-64 entries | ||
39 | have different calling conventions. The syscall and sysenter | ||
40 | instructions have their own peculiar calling conventions. Some of | ||
41 | the IDT entries push an error code onto the stack; others don't. | ||
42 | IDT entries using the IST alternative stack mechanism need their own | ||
43 | magic to get the stack frames right. (You can find some | ||
44 | documentation in the AMD APM, Volume 2, Chapter 8 and the Intel SDM, | ||
45 | Volume 3, Chapter 6.) | ||
46 | |||
47 | Dealing with the swapgs instruction is especially tricky. Swapgs | ||
48 | toggles whether gs is the kernel gs or the user gs. The swapgs | ||
49 | instruction is rather fragile: it must nest perfectly and only in | ||
50 | single depth, it should only be used if entering from user mode to | ||
51 | kernel mode and then when returning to user-space, and precisely | ||
52 | so. If we mess that up even slightly, we crash. | ||
53 | |||
54 | So when we have a secondary entry, already in kernel mode, we *must | ||
55 | not* use SWAPGS blindly - nor must we forget doing a SWAPGS when it's | ||
56 | not switched/swapped yet. | ||
57 | |||
58 | Now, there's a secondary complication: there's a cheap way to test | ||
59 | which mode the CPU is in and an expensive way. | ||
60 | |||
61 | The cheap way is to pick this info off the entry frame on the kernel | ||
62 | stack, from the CS of the ptregs area of the kernel stack: | ||
63 | |||
64 | xorl %ebx,%ebx | ||
65 | testl $3,CS+8(%rsp) | ||
66 | je error_kernelspace | ||
67 | SWAPGS | ||
68 | |||
69 | The expensive (paranoid) way is to read back the MSR_GS_BASE value | ||
70 | (which is what SWAPGS modifies): | ||
71 | |||
72 | movl $1,%ebx | ||
73 | movl $MSR_GS_BASE,%ecx | ||
74 | rdmsr | ||
75 | testl %edx,%edx | ||
76 | js 1f /* negative -> in kernel */ | ||
77 | SWAPGS | ||
78 | xorl %ebx,%ebx | ||
79 | 1: ret | ||
80 | |||
81 | and the whole paranoid non-paranoid macro complexity is about whether | ||
82 | to suffer that RDMSR cost. | ||
83 | |||
84 | If we are at an interrupt or user-trap/gate-alike boundary then we can | ||
85 | use the faster check: the stack will be a reliable indicator of | ||
86 | whether SWAPGS was already done: if we see that we are a secondary | ||
87 | entry interrupting kernel mode execution, then we know that the GS | ||
88 | base has already been switched. If it says that we interrupted | ||
89 | user-space execution then we must do the SWAPGS. | ||
90 | |||
91 | But if we are in an NMI/MCE/DEBUG/whatever super-atomic entry context, | ||
92 | which might have triggered right after a normal entry wrote CS to the | ||
93 | stack but before we executed SWAPGS, then the only safe way to check | ||
94 | for GS is the slower method: the RDMSR. | ||
95 | |||
96 | So we try only to mark those entry methods 'paranoid' that absolutely | ||
97 | need the more expensive check for the GS base - and we generate all | ||
98 | 'normal' entry points with the regular (faster) entry macros. | ||
diff --git a/Documentation/zh_CN/SubmitChecklist b/Documentation/zh_CN/SubmitChecklist index 951415bbab0c..4c741d6bc048 100644 --- a/Documentation/zh_CN/SubmitChecklist +++ b/Documentation/zh_CN/SubmitChecklist | |||
@@ -67,7 +67,7 @@ Linux内核提交清单 | |||
67 | 67 | ||
68 | 12:已经通过CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, | 68 | 12:已经通过CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, |
69 | CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, | 69 | CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, |
70 | CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP测试,并且同时都 | 70 | CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP测试,并且同时都 |
71 | 使能。 | 71 | 使能。 |
72 | 72 | ||
73 | 13:已经都构建并且使用或者不使用 CONFIG_SMP 和 CONFIG_PREEMPT测试执行时间。 | 73 | 13:已经都构建并且使用或者不使用 CONFIG_SMP 和 CONFIG_PREEMPT测试执行时间。 |
diff --git a/Documentation/zh_CN/magic-number.txt b/Documentation/zh_CN/magic-number.txt index 4c4ce853577b..c278f412dc65 100644 --- a/Documentation/zh_CN/magic-number.txt +++ b/Documentation/zh_CN/magic-number.txt | |||
@@ -66,7 +66,7 @@ MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h | |||
66 | RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h | 66 | RISCOM8_MAGIC 0x0907 riscom_port drivers/char/riscom8.h |
67 | SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h | 67 | SPECIALIX_MAGIC 0x0907 specialix_port drivers/char/specialix_io8.h |
68 | HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c | 68 | HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c |
69 | APM_BIOS_MAGIC 0x4101 apm_user arch/i386/kernel/apm.c | 69 | APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c |
70 | CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h | 70 | CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h |
71 | DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c | 71 | DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c |
72 | DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c | 72 | DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c |