diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/Changes | 10 | ||||
-rw-r--r-- | Documentation/CodingStyle | 21 | ||||
-rw-r--r-- | Documentation/DocBook/kernel-hacking.tmpl | 2 | ||||
-rw-r--r-- | Documentation/SubmittingPatches | 86 | ||||
-rw-r--r-- | Documentation/connector/connector.txt | 44 | ||||
-rw-r--r-- | Documentation/dell_rbu.txt | 50 | ||||
-rw-r--r-- | Documentation/device-mapper/snapshot.txt | 73 | ||||
-rw-r--r-- | Documentation/filesystems/relayfs.txt | 2 | ||||
-rw-r--r-- | Documentation/ia64/mca.txt | 194 | ||||
-rw-r--r-- | Documentation/keys-request-key.txt | 161 | ||||
-rw-r--r-- | Documentation/keys.txt | 92 | ||||
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 10 | ||||
-rw-r--r-- | Documentation/sparse.txt | 4 | ||||
-rw-r--r-- | Documentation/usb/URB.txt | 74 |
14 files changed, 733 insertions, 90 deletions
diff --git a/Documentation/Changes b/Documentation/Changes index 5eaab0441d76..27232be26e1a 100644 --- a/Documentation/Changes +++ b/Documentation/Changes | |||
@@ -237,6 +237,12 @@ udev | |||
237 | udev is a userspace application for populating /dev dynamically with | 237 | udev is a userspace application for populating /dev dynamically with |
238 | only entries for devices actually present. udev replaces devfs. | 238 | only entries for devices actually present. udev replaces devfs. |
239 | 239 | ||
240 | FUSE | ||
241 | ---- | ||
242 | |||
243 | Needs libfuse 2.4.0 or later. Absolute minimum is 2.3.0 but mount | ||
244 | options 'direct_io' and 'kernel_cache' won't work. | ||
245 | |||
240 | Networking | 246 | Networking |
241 | ========== | 247 | ========== |
242 | 248 | ||
@@ -390,6 +396,10 @@ udev | |||
390 | ---- | 396 | ---- |
391 | o <http://www.kernel.org/pub/linux/utils/kernel/hotplug/udev.html> | 397 | o <http://www.kernel.org/pub/linux/utils/kernel/hotplug/udev.html> |
392 | 398 | ||
399 | FUSE | ||
400 | ---- | ||
401 | o <http://sourceforge.net/projects/fuse> | ||
402 | |||
393 | Networking | 403 | Networking |
394 | ********** | 404 | ********** |
395 | 405 | ||
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index 22e5f9036f3c..eb7db3c19227 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle | |||
@@ -410,7 +410,26 @@ Kernel messages do not have to be terminated with a period. | |||
410 | Printing numbers in parentheses (%d) adds no value and should be avoided. | 410 | Printing numbers in parentheses (%d) adds no value and should be avoided. |
411 | 411 | ||
412 | 412 | ||
413 | Chapter 13: References | 413 | Chapter 13: Allocating memory |
414 | |||
415 | The kernel provides the following general purpose memory allocators: | ||
416 | kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API | ||
417 | documentation for further information about them. | ||
418 | |||
419 | The preferred form for passing a size of a struct is the following: | ||
420 | |||
421 | p = kmalloc(sizeof(*p), ...); | ||
422 | |||
423 | The alternative form where struct name is spelled out hurts readability and | ||
424 | introduces an opportunity for a bug when the pointer variable type is changed | ||
425 | but the corresponding sizeof that is passed to a memory allocator is not. | ||
426 | |||
427 | Casting the return value which is a void pointer is redundant. The conversion | ||
428 | from void pointer to any other pointer type is guaranteed by the C programming | ||
429 | language. | ||
430 | |||
431 | |||
432 | Chapter 14: References | ||
414 | 433 | ||
415 | The C Programming Language, Second Edition | 434 | The C Programming Language, Second Edition |
416 | by Brian W. Kernighan and Dennis M. Ritchie. | 435 | by Brian W. Kernighan and Dennis M. Ritchie. |
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index 6367bba32d22..582032eea872 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl | |||
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = { | |||
1105 | </listitem> | 1105 | </listitem> |
1106 | <listitem> | 1106 | <listitem> |
1107 | <para> | 1107 | <para> |
1108 | Function names as strings (__func__). | 1108 | Function names as strings (__FUNCTION__). |
1109 | </para> | 1109 | </para> |
1110 | </listitem> | 1110 | </listitem> |
1111 | <listitem> | 1111 | <listitem> |
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 7f43b040311e..237d54c44bc5 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches | |||
@@ -301,8 +301,84 @@ now, but you can do this to mark internal company procedures or just | |||
301 | point out some special detail about the sign-off. | 301 | point out some special detail about the sign-off. |
302 | 302 | ||
303 | 303 | ||
304 | 12) The canonical patch format | ||
304 | 305 | ||
305 | 12) More references for submitting patches | 306 | The canonical patch subject line is: |
307 | |||
308 | Subject: [PATCH 001/123] subsystem: summary phrase | ||
309 | |||
310 | The canonical patch message body contains the following: | ||
311 | |||
312 | - A "from" line specifying the patch author. | ||
313 | |||
314 | - An empty line. | ||
315 | |||
316 | - The body of the explanation, which will be copied to the | ||
317 | permanent changelog to describe this patch. | ||
318 | |||
319 | - The "Signed-off-by:" lines, described above, which will | ||
320 | also go in the changelog. | ||
321 | |||
322 | - A marker line containing simply "---". | ||
323 | |||
324 | - Any additional comments not suitable for the changelog. | ||
325 | |||
326 | - The actual patch (diff output). | ||
327 | |||
328 | The Subject line format makes it very easy to sort the emails | ||
329 | alphabetically by subject line - pretty much any email reader will | ||
330 | support that - since because the sequence number is zero-padded, | ||
331 | the numerical and alphabetic sort is the same. | ||
332 | |||
333 | The "subsystem" in the email's Subject should identify which | ||
334 | area or subsystem of the kernel is being patched. | ||
335 | |||
336 | The "summary phrase" in the email's Subject should concisely | ||
337 | describe the patch which that email contains. The "summary | ||
338 | phrase" should not be a filename. Do not use the same "summary | ||
339 | phrase" for every patch in a whole patch series. | ||
340 | |||
341 | Bear in mind that the "summary phrase" of your email becomes | ||
342 | a globally-unique identifier for that patch. It propagates | ||
343 | all the way into the git changelog. The "summary phrase" may | ||
344 | later be used in developer discussions which refer to the patch. | ||
345 | People will want to google for the "summary phrase" to read | ||
346 | discussion regarding that patch. | ||
347 | |||
348 | A couple of example Subjects: | ||
349 | |||
350 | Subject: [patch 2/5] ext2: improve scalability of bitmap searching | ||
351 | Subject: [PATCHv2 001/207] x86: fix eflags tracking | ||
352 | |||
353 | The "from" line must be the very first line in the message body, | ||
354 | and has the form: | ||
355 | |||
356 | From: Original Author <author@example.com> | ||
357 | |||
358 | The "from" line specifies who will be credited as the author of the | ||
359 | patch in the permanent changelog. If the "from" line is missing, | ||
360 | then the "From:" line from the email header will be used to determine | ||
361 | the patch author in the changelog. | ||
362 | |||
363 | The explanation body will be committed to the permanent source | ||
364 | changelog, so should make sense to a competent reader who has long | ||
365 | since forgotten the immediate details of the discussion that might | ||
366 | have led to this patch. | ||
367 | |||
368 | The "---" marker line serves the essential purpose of marking for patch | ||
369 | handling tools where the changelog message ends. | ||
370 | |||
371 | One good use for the additional comments after the "---" marker is for | ||
372 | a diffstat, to show what files have changed, and the number of inserted | ||
373 | and deleted lines per file. A diffstat is especially useful on bigger | ||
374 | patches. Other comments relevant only to the moment or the maintainer, | ||
375 | not suitable for the permanent changelog, should also go here. | ||
376 | |||
377 | See more details on the proper patch format in the following | ||
378 | references. | ||
379 | |||
380 | |||
381 | 13) More references for submitting patches | ||
306 | 382 | ||
307 | Andrew Morton, "The perfect patch" (tpp). | 383 | Andrew Morton, "The perfect patch" (tpp). |
308 | <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt> | 384 | <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt> |
@@ -310,6 +386,14 @@ Andrew Morton, "The perfect patch" (tpp). | |||
310 | Jeff Garzik, "Linux kernel patch submission format." | 386 | Jeff Garzik, "Linux kernel patch submission format." |
311 | <http://linux.yyz.us/patch-format.html> | 387 | <http://linux.yyz.us/patch-format.html> |
312 | 388 | ||
389 | Greg KH, "How to piss off a kernel subsystem maintainer" | ||
390 | <http://www.kroah.com/log/2005/03/31/> | ||
391 | |||
392 | Kernel Documentation/CodingStyle | ||
393 | <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle> | ||
394 | |||
395 | Linus Torvald's mail on the canonical patch format: | ||
396 | <http://lkml.org/lkml/2005/4/7/183> | ||
313 | 397 | ||
314 | 398 | ||
315 | ----------------------------------- | 399 | ----------------------------------- |
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt index 54a0a14bfbe3..57a314b14cf8 100644 --- a/Documentation/connector/connector.txt +++ b/Documentation/connector/connector.txt | |||
@@ -131,3 +131,47 @@ Netlink itself is not reliable protocol, that means that messages can | |||
131 | be lost due to memory pressure or process' receiving queue overflowed, | 131 | be lost due to memory pressure or process' receiving queue overflowed, |
132 | so caller is warned must be prepared. That is why struct cn_msg [main | 132 | so caller is warned must be prepared. That is why struct cn_msg [main |
133 | connector's message header] contains u32 seq and u32 ack fields. | 133 | connector's message header] contains u32 seq and u32 ack fields. |
134 | |||
135 | /*****************************************/ | ||
136 | Userspace usage. | ||
137 | /*****************************************/ | ||
138 | 2.6.14 has a new netlink socket implementation, which by default does not | ||
139 | allow to send data to netlink groups other than 1. | ||
140 | So, if to use netlink socket (for example using connector) | ||
141 | with different group number userspace application must subscribe to | ||
142 | that group. It can be achieved by following pseudocode: | ||
143 | |||
144 | s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); | ||
145 | |||
146 | l_local.nl_family = AF_NETLINK; | ||
147 | l_local.nl_groups = 12345; | ||
148 | l_local.nl_pid = 0; | ||
149 | |||
150 | if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) { | ||
151 | perror("bind"); | ||
152 | close(s); | ||
153 | return -1; | ||
154 | } | ||
155 | |||
156 | { | ||
157 | int on = l_local.nl_groups; | ||
158 | setsockopt(s, 270, 1, &on, sizeof(on)); | ||
159 | } | ||
160 | |||
161 | Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket | ||
162 | option. To drop multicast subscription one should call above socket option | ||
163 | with NETLINK_DROP_MEMBERSHIP parameter which is defined as 0. | ||
164 | |||
165 | 2.6.14 netlink code only allows to select a group which is less or equal to | ||
166 | the maximum group number, which is used at netlink_kernel_create() time. | ||
167 | In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use | ||
168 | group number 12345, you must increment CN_NETLINK_USERS to that number. | ||
169 | Additional 0xf numbers are allocated to be used by non-in-kernel users. | ||
170 | |||
171 | Due to this limitation, group 0xffffffff does not work now, so one can | ||
172 | not use add/remove connector's group notifications, but as far as I know, | ||
173 | only cn_test.c test module used it. | ||
174 | |||
175 | Some work in netlink area is still being done, so things can be changed in | ||
176 | 2.6.15 timeframe, if it will happen, documentation will be updated for that | ||
177 | kernel. | ||
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt index bcfa5c35036b..941343a7a265 100644 --- a/Documentation/dell_rbu.txt +++ b/Documentation/dell_rbu.txt | |||
@@ -13,6 +13,8 @@ the BIOS on Dell servers (starting from servers sold since 1999), desktops | |||
13 | and notebooks (starting from those sold in 2005). | 13 | and notebooks (starting from those sold in 2005). |
14 | Please go to http://support.dell.com register and you can find info on | 14 | Please go to http://support.dell.com register and you can find info on |
15 | OpenManage and Dell Update packages (DUP). | 15 | OpenManage and Dell Update packages (DUP). |
16 | Libsmbios can also be used to update BIOS on Dell systems go to | ||
17 | http://linux.dell.com/libsmbios/ for details. | ||
16 | 18 | ||
17 | Dell_RBU driver supports BIOS update using the monilothic image and packetized | 19 | Dell_RBU driver supports BIOS update using the monilothic image and packetized |
18 | image methods. In case of moniolithic the driver allocates a contiguous chunk | 20 | image methods. In case of moniolithic the driver allocates a contiguous chunk |
@@ -22,8 +24,8 @@ would place each packet in contiguous physical memory. The driver also | |||
22 | maintains a link list of packets for reading them back. | 24 | maintains a link list of packets for reading them back. |
23 | If the dell_rbu driver is unloaded all the allocated memory is freed. | 25 | If the dell_rbu driver is unloaded all the allocated memory is freed. |
24 | 26 | ||
25 | The rbu driver needs to have an application which will inform the BIOS to | 27 | The rbu driver needs to have an application (as mentioned above)which will |
26 | enable the update in the next system reboot. | 28 | inform the BIOS to enable the update in the next system reboot. |
27 | 29 | ||
28 | The user should not unload the rbu driver after downloading the BIOS image | 30 | The user should not unload the rbu driver after downloading the BIOS image |
29 | or updating. | 31 | or updating. |
@@ -33,6 +35,7 @@ The driver load creates the following directories under the /sys file system. | |||
33 | /sys/class/firmware/dell_rbu/data | 35 | /sys/class/firmware/dell_rbu/data |
34 | /sys/devices/platform/dell_rbu/image_type | 36 | /sys/devices/platform/dell_rbu/image_type |
35 | /sys/devices/platform/dell_rbu/data | 37 | /sys/devices/platform/dell_rbu/data |
38 | /sys/devices/platform/dell_rbu/packet_size | ||
36 | 39 | ||
37 | The driver supports two types of update mechanism; monolithic and packetized. | 40 | The driver supports two types of update mechanism; monolithic and packetized. |
38 | These update mechanism depends upon the BIOS currently running on the system. | 41 | These update mechanism depends upon the BIOS currently running on the system. |
@@ -42,10 +45,30 @@ In case of packet mechanism the single memory can be broken in smaller chuks | |||
42 | of contiguous memory and the BIOS image is scattered in these packets. | 45 | of contiguous memory and the BIOS image is scattered in these packets. |
43 | 46 | ||
44 | By default the driver uses monolithic memory for the update type. This can be | 47 | By default the driver uses monolithic memory for the update type. This can be |
45 | changed to contiguous during the driver load time by specifying the load | 48 | changed to packets during the driver load time by specifying the load |
46 | parameter image_type=packet. This can also be changed later as below | 49 | parameter image_type=packet. This can also be changed later as below |
47 | echo packet > /sys/devices/platform/dell_rbu/image_type | 50 | echo packet > /sys/devices/platform/dell_rbu/image_type |
48 | 51 | ||
52 | In packet update mode the packet size has to be given before any packets can | ||
53 | be downloaded. It is done as below | ||
54 | echo XXXX > /sys/devices/platform/dell_rbu/packet_size | ||
55 | In the packet update mechanism, the user neesd to create a new file having | ||
56 | packets of data arranged back to back. It can be done as follows | ||
57 | The user creates packets header, gets the chunk of the BIOS image and | ||
58 | placs it next to the packetheader; now, the packetheader + BIOS image chunk | ||
59 | added to geather should match the specified packet_size. This makes one | ||
60 | packet, the user needs to create more such packets out of the entire BIOS | ||
61 | image file and then arrange all these packets back to back in to one single | ||
62 | file. | ||
63 | This file is then copied to /sys/class/firmware/dell_rbu/data. | ||
64 | Once this file gets to the driver, the driver extracts packet_size data from | ||
65 | the file and spreads it accross the physical memory in contiguous packet_sized | ||
66 | space. | ||
67 | This method makes sure that all the packets get to the driver in a single operation. | ||
68 | |||
69 | In monolithic update the user simply get the BIOS image (.hdr file) and copies | ||
70 | to the data file as is without any change to the BIOS image itself. | ||
71 | |||
49 | Do the steps below to download the BIOS image. | 72 | Do the steps below to download the BIOS image. |
50 | 1) echo 1 > /sys/class/firmware/dell_rbu/loading | 73 | 1) echo 1 > /sys/class/firmware/dell_rbu/loading |
51 | 2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data | 74 | 2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data |
@@ -53,20 +76,23 @@ Do the steps below to download the BIOS image. | |||
53 | 76 | ||
54 | The /sys/class/firmware/dell_rbu/ entries will remain till the following is | 77 | The /sys/class/firmware/dell_rbu/ entries will remain till the following is |
55 | done. | 78 | done. |
56 | echo -1 > /sys/class/firmware/dell_rbu/loading | 79 | echo -1 > /sys/class/firmware/dell_rbu/loading. |
80 | Until this step is completed the driver cannot be unloaded. | ||
81 | Also echoing either mono ,packet or init in to image_type will free up the | ||
82 | memory allocated by the driver. | ||
57 | 83 | ||
58 | Until this step is completed the drivr cannot be unloaded. | 84 | If an user by accident executes steps 1 and 3 above without executing step 2; |
85 | it will make the /sys/class/firmware/dell_rbu/ entries to disappear. | ||
86 | The entries can be recreated by doing the following | ||
87 | echo init > /sys/devices/platform/dell_rbu/image_type | ||
88 | NOTE: echoing init in image_type does not change it original value. | ||
59 | 89 | ||
60 | Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to | 90 | Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to |
61 | read back the image downloaded. This is useful in case of packet update | 91 | read back the image downloaded. |
62 | mechanism where the above steps 1,2,3 will repeated for every packet. | ||
63 | By reading the /sys/devices/platform/dell_rbu/data file all packet data | ||
64 | downloaded can be verified in a single file. | ||
65 | The packets are arranged in this file one after the other in a FIFO order. | ||
66 | 92 | ||
67 | NOTE: | 93 | NOTE: |
68 | This driver requires a patch for firmware_class.c which has the addition | 94 | This driver requires a patch for firmware_class.c which has the modified |
69 | of request_firmware_nowait_nohotplug function to wortk | 95 | request_firmware_nowait function. |
70 | Also after updating the BIOS image an user mdoe application neeeds to execute | 96 | Also after updating the BIOS image an user mdoe application neeeds to execute |
71 | code which message the BIOS update request to the BIOS. So on the next reboot | 97 | code which message the BIOS update request to the BIOS. So on the next reboot |
72 | the BIOS knows about the new image downloaded and it updates it self. | 98 | the BIOS knows about the new image downloaded and it updates it self. |
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt new file mode 100644 index 000000000000..dca274ff4005 --- /dev/null +++ b/Documentation/device-mapper/snapshot.txt | |||
@@ -0,0 +1,73 @@ | |||
1 | Device-mapper snapshot support | ||
2 | ============================== | ||
3 | |||
4 | Device-mapper allows you, without massive data copying: | ||
5 | |||
6 | *) To create snapshots of any block device i.e. mountable, saved states of | ||
7 | the block device which are also writable without interfering with the | ||
8 | original content; | ||
9 | *) To create device "forks", i.e. multiple different versions of the | ||
10 | same data stream. | ||
11 | |||
12 | |||
13 | In both cases, dm copies only the chunks of data that get changed and | ||
14 | uses a separate copy-on-write (COW) block device for storage. | ||
15 | |||
16 | |||
17 | There are two dm targets available: snapshot and snapshot-origin. | ||
18 | |||
19 | *) snapshot-origin <origin> | ||
20 | |||
21 | which will normally have one or more snapshots based on it. | ||
22 | You must create the snapshot-origin device before you can create snapshots. | ||
23 | Reads will be mapped directly to the backing device. For each write, the | ||
24 | original data will be saved in the <COW device> of each snapshot to keep | ||
25 | its visible content unchanged, at least until the <COW device> fills up. | ||
26 | |||
27 | |||
28 | *) snapshot <origin> <COW device> <persistent?> <chunksize> | ||
29 | |||
30 | A snapshot is created of the <origin> block device. Changed chunks of | ||
31 | <chunksize> sectors will be stored on the <COW device>. Writes will | ||
32 | only go to the <COW device>. Reads will come from the <COW device> or | ||
33 | from <origin> for unchanged data. <COW device> will often be | ||
34 | smaller than the origin and if it fills up the snapshot will become | ||
35 | useless and be disabled, returning errors. So it is important to monitor | ||
36 | the amount of free space and expand the <COW device> before it fills up. | ||
37 | |||
38 | <persistent?> is P (Persistent) or N (Not persistent - will not survive | ||
39 | after reboot). | ||
40 | |||
41 | |||
42 | How this is used by LVM2 | ||
43 | ======================== | ||
44 | When you create the first LVM2 snapshot of a volume, four dm devices are used: | ||
45 | |||
46 | 1) a device containing the original mapping table of the source volume; | ||
47 | 2) a device used as the <COW device>; | ||
48 | 3) a "snapshot" device, combining #1 and #2, which is the visible snapshot | ||
49 | volume; | ||
50 | 4) the "original" volume (which uses the device number used by the original | ||
51 | source volume), whose table is replaced by a "snapshot-origin" mapping | ||
52 | from device #1. | ||
53 | |||
54 | A fixed naming scheme is used, so with the following commands: | ||
55 | |||
56 | lvcreate -L 1G -n base volumeGroup | ||
57 | lvcreate -L 100M --snapshot -n snap volumeGroup/base | ||
58 | |||
59 | we'll have this situation (with volumes in above order): | ||
60 | |||
61 | # dmsetup table|grep volumeGroup | ||
62 | |||
63 | volumeGroup-base-real: 0 2097152 linear 8:19 384 | ||
64 | volumeGroup-snap-cow: 0 204800 linear 8:19 2097536 | ||
65 | volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16 | ||
66 | volumeGroup-base: 0 2097152 snapshot-origin 254:11 | ||
67 | |||
68 | # ls -lL /dev/mapper/volumeGroup-* | ||
69 | brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real | ||
70 | brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow | ||
71 | brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap | ||
72 | brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base | ||
73 | |||
diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt index d24e1b0d4f39..d803abed29f0 100644 --- a/Documentation/filesystems/relayfs.txt +++ b/Documentation/filesystems/relayfs.txt | |||
@@ -15,7 +15,7 @@ retrieve the data as it becomes available. | |||
15 | 15 | ||
16 | The format of the data logged into the channel buffers is completely | 16 | The format of the data logged into the channel buffers is completely |
17 | up to the relayfs client; relayfs does however provide hooks which | 17 | up to the relayfs client; relayfs does however provide hooks which |
18 | allow clients to impose some stucture on the buffer data. Nor does | 18 | allow clients to impose some structure on the buffer data. Nor does |
19 | relayfs implement any form of data filtering - this also is left to | 19 | relayfs implement any form of data filtering - this also is left to |
20 | the client. The purpose is to keep relayfs as simple as possible. | 20 | the client. The purpose is to keep relayfs as simple as possible. |
21 | 21 | ||
diff --git a/Documentation/ia64/mca.txt b/Documentation/ia64/mca.txt new file mode 100644 index 000000000000..a71cc6a67ef7 --- /dev/null +++ b/Documentation/ia64/mca.txt | |||
@@ -0,0 +1,194 @@ | |||
1 | An ad-hoc collection of notes on IA64 MCA and INIT processing. Feel | ||
2 | free to update it with notes about any area that is not clear. | ||
3 | |||
4 | --- | ||
5 | |||
6 | MCA/INIT are completely asynchronous. They can occur at any time, when | ||
7 | the OS is in any state. Including when one of the cpus is already | ||
8 | holding a spinlock. Trying to get any lock from MCA/INIT state is | ||
9 | asking for deadlock. Also the state of structures that are protected | ||
10 | by locks is indeterminate, including linked lists. | ||
11 | |||
12 | --- | ||
13 | |||
14 | The complicated ia64 MCA process. All of this is mandated by Intel's | ||
15 | specification for ia64 SAL, error recovery and and unwind, it is not as | ||
16 | if we have a choice here. | ||
17 | |||
18 | * MCA occurs on one cpu, usually due to a double bit memory error. | ||
19 | This is the monarch cpu. | ||
20 | |||
21 | * SAL sends an MCA rendezvous interrupt (which is a normal interrupt) | ||
22 | to all the other cpus, the slaves. | ||
23 | |||
24 | * Slave cpus that receive the MCA interrupt call down into SAL, they | ||
25 | end up spinning disabled while the MCA is being serviced. | ||
26 | |||
27 | * If any slave cpu was already spinning disabled when the MCA occurred | ||
28 | then it cannot service the MCA interrupt. SAL waits ~20 seconds then | ||
29 | sends an unmaskable INIT event to the slave cpus that have not | ||
30 | already rendezvoused. | ||
31 | |||
32 | * Because MCA/INIT can be delivered at any time, including when the cpu | ||
33 | is down in PAL in physical mode, the registers at the time of the | ||
34 | event are _completely_ undefined. In particular the MCA/INIT | ||
35 | handlers cannot rely on the thread pointer, PAL physical mode can | ||
36 | (and does) modify TP. It is allowed to do that as long as it resets | ||
37 | TP on return. However MCA/INIT events expose us to these PAL | ||
38 | internal TP changes. Hence curr_task(). | ||
39 | |||
40 | * If an MCA/INIT event occurs while the kernel was running (not user | ||
41 | space) and the kernel has called PAL then the MCA/INIT handler cannot | ||
42 | assume that the kernel stack is in a fit state to be used. Mainly | ||
43 | because PAL may or may not maintain the stack pointer internally. | ||
44 | Because the MCA/INIT handlers cannot trust the kernel stack, they | ||
45 | have to use their own, per-cpu stacks. The MCA/INIT stacks are | ||
46 | preformatted with just enough task state to let the relevant handlers | ||
47 | do their job. | ||
48 | |||
49 | * Unlike most other architectures, the ia64 struct task is embedded in | ||
50 | the kernel stack[1]. So switching to a new kernel stack means that | ||
51 | we switch to a new task as well. Because various bits of the kernel | ||
52 | assume that current points into the struct task, switching to a new | ||
53 | stack also means a new value for current. | ||
54 | |||
55 | * Once all slaves have rendezvoused and are spinning disabled, the | ||
56 | monarch is entered. The monarch now tries to diagnose the problem | ||
57 | and decide if it can recover or not. | ||
58 | |||
59 | * Part of the monarch's job is to look at the state of all the other | ||
60 | tasks. The only way to do that on ia64 is to call the unwinder, | ||
61 | as mandated by Intel. | ||
62 | |||
63 | * The starting point for the unwind depends on whether a task is | ||
64 | running or not. That is, whether it is on a cpu or is blocked. The | ||
65 | monarch has to determine whether or not a task is on a cpu before it | ||
66 | knows how to start unwinding it. The tasks that received an MCA or | ||
67 | INIT event are no longer running, they have been converted to blocked | ||
68 | tasks. But (and its a big but), the cpus that received the MCA | ||
69 | rendezvous interrupt are still running on their normal kernel stacks! | ||
70 | |||
71 | * To distinguish between these two cases, the monarch must know which | ||
72 | tasks are on a cpu and which are not. Hence each slave cpu that | ||
73 | switches to an MCA/INIT stack, registers its new stack using | ||
74 | set_curr_task(), so the monarch can tell that the _original_ task is | ||
75 | no longer running on that cpu. That gives us a decent chance of | ||
76 | getting a valid backtrace of the _original_ task. | ||
77 | |||
78 | * MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a | ||
79 | nested error, we want diagnostics on the MCA/INIT handler that | ||
80 | failed, not on the task that was originally running. Again this | ||
81 | requires set_curr_task() so the MCA/INIT handlers can register their | ||
82 | own stack as running on that cpu. Then a recursive error gets a | ||
83 | trace of the failing handler's "task". | ||
84 | |||
85 | [1] My (Keith Owens) original design called for ia64 to separate its | ||
86 | struct task and the kernel stacks. Then the MCA/INIT data would be | ||
87 | chained stacks like i386 interrupt stacks. But that required | ||
88 | radical surgery on the rest of ia64, plus extra hard wired TLB | ||
89 | entries with its associated performance degradation. David | ||
90 | Mosberger vetoed that approach. Which meant that separate kernel | ||
91 | stacks meant separate "tasks" for the MCA/INIT handlers. | ||
92 | |||
93 | --- | ||
94 | |||
95 | INIT is less complicated than MCA. Pressing the nmi button or using | ||
96 | the equivalent command on the management console sends INIT to all | ||
97 | cpus. SAL picks one one of the cpus as the monarch and the rest are | ||
98 | slaves. All the OS INIT handlers are entered at approximately the same | ||
99 | time. The OS monarch prints the state of all tasks and returns, after | ||
100 | which the slaves return and the system resumes. | ||
101 | |||
102 | At least that is what is supposed to happen. Alas there are broken | ||
103 | versions of SAL out there. Some drive all the cpus as monarchs. Some | ||
104 | drive them all as slaves. Some drive one cpu as monarch, wait for that | ||
105 | cpu to return from the OS then drive the rest as slaves. Some versions | ||
106 | of SAL cannot even cope with returning from the OS, they spin inside | ||
107 | SAL on resume. The OS INIT code has workarounds for some of these | ||
108 | broken SAL symptoms, but some simply cannot be fixed from the OS side. | ||
109 | |||
110 | --- | ||
111 | |||
112 | The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer | ||
113 | violations. Unfortunately MCA/INIT start off as massive layer | ||
114 | violations (can occur at _any_ time) and they build from there. | ||
115 | |||
116 | At least ia64 makes an attempt at recovering from hardware errors, but | ||
117 | it is a difficult problem because of the asynchronous nature of these | ||
118 | errors. When processing an unmaskable interrupt we sometimes need | ||
119 | special code to cope with our inability to take any locks. | ||
120 | |||
121 | --- | ||
122 | |||
123 | How is ia64 MCA/INIT different from x86 NMI? | ||
124 | |||
125 | * x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to | ||
126 | all cpus. | ||
127 | |||
128 | * x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2 | ||
129 | per cpu. | ||
130 | |||
131 | * x86 has a separate struct task which points to one of multiple kernel | ||
132 | stacks. ia64 has the struct task embedded in the single kernel | ||
133 | stack, so switching stack means switching task. | ||
134 | |||
135 | * x86 does not call the BIOS so the NMI handler does not have to worry | ||
136 | about any registers having changed. MCA/INIT can occur while the cpu | ||
137 | is in PAL in physical mode, with undefined registers and an undefined | ||
138 | kernel stack. | ||
139 | |||
140 | * i386 backtrace is not very sensitive to whether a process is running | ||
141 | or not. ia64 unwind is very, very sensitive to whether a process is | ||
142 | running or not. | ||
143 | |||
144 | --- | ||
145 | |||
146 | What happens when MCA/INIT is delivered what a cpu is running user | ||
147 | space code? | ||
148 | |||
149 | The user mode registers are stored in the RSE area of the MCA/INIT on | ||
150 | entry to the OS and are restored from there on return to SAL, so user | ||
151 | mode registers are preserved across a recoverable MCA/INIT. Since the | ||
152 | OS has no idea what unwind data is available for the user space stack, | ||
153 | MCA/INIT never tries to backtrace user space. Which means that the OS | ||
154 | does not bother making the user space process look like a blocked task, | ||
155 | i.e. the OS does not copy pt_regs and switch_stack to the user space | ||
156 | stack. Also the OS has no idea how big the user space RSE and memory | ||
157 | stacks are, which makes it too risky to copy the saved state to a user | ||
158 | mode stack. | ||
159 | |||
160 | --- | ||
161 | |||
162 | How do we get a backtrace on the tasks that were running when MCA/INIT | ||
163 | was delivered? | ||
164 | |||
165 | mca.c:::ia64_mca_modify_original_stack(). That identifies and | ||
166 | verifies the original kernel stack, copies the dirty registers from | ||
167 | the MCA/INIT stack's RSE to the original stack's RSE, copies the | ||
168 | skeleton struct pt_regs and switch_stack to the original stack, fills | ||
169 | in the skeleton structures from the PAL minstate area and updates the | ||
170 | original stack's thread.ksp. That makes the original stack look | ||
171 | exactly like any other blocked task, i.e. it now appears to be | ||
172 | sleeping. To get a backtrace, just start with thread.ksp for the | ||
173 | original task and unwind like any other sleeping task. | ||
174 | |||
175 | --- | ||
176 | |||
177 | How do we identify the tasks that were running when MCA/INIT was | ||
178 | delivered? | ||
179 | |||
180 | If the previous task has been verified and converted to a blocked | ||
181 | state, then sos->prev_task on the MCA/INIT stack is updated to point to | ||
182 | the previous task. You can look at that field in dumps or debuggers. | ||
183 | To help distinguish between the handler and the original tasks, | ||
184 | handlers have _TIF_MCA_INIT set in thread_info.flags. | ||
185 | |||
186 | The sos data is always in the MCA/INIT handler stack, at offset | ||
187 | MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it | ||
188 | as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct | ||
189 | ia64_sal_os_state), with 16 byte alignment for all structures. | ||
190 | |||
191 | Also the comm field of the MCA/INIT task is modified to include the pid | ||
192 | of the original task, for humans to use. For example, a comm field of | ||
193 | 'MCA 12159' means that pid 12159 was running when the MCA was | ||
194 | delivered. | ||
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt new file mode 100644 index 000000000000..5f2b9c5edbb5 --- /dev/null +++ b/Documentation/keys-request-key.txt | |||
@@ -0,0 +1,161 @@ | |||
1 | =================== | ||
2 | KEY REQUEST SERVICE | ||
3 | =================== | ||
4 | |||
5 | The key request service is part of the key retention service (refer to | ||
6 | Documentation/keys.txt). This document explains more fully how that the | ||
7 | requesting algorithm works. | ||
8 | |||
9 | The process starts by either the kernel requesting a service by calling | ||
10 | request_key(): | ||
11 | |||
12 | struct key *request_key(const struct key_type *type, | ||
13 | const char *description, | ||
14 | const char *callout_string); | ||
15 | |||
16 | Or by userspace invoking the request_key system call: | ||
17 | |||
18 | key_serial_t request_key(const char *type, | ||
19 | const char *description, | ||
20 | const char *callout_info, | ||
21 | key_serial_t dest_keyring); | ||
22 | |||
23 | The main difference between the two access points is that the in-kernel | ||
24 | interface does not need to link the key to a keyring to prevent it from being | ||
25 | immediately destroyed. The kernel interface returns a pointer directly to the | ||
26 | key, and it's up to the caller to destroy the key. | ||
27 | |||
28 | The userspace interface links the key to a keyring associated with the process | ||
29 | to prevent the key from going away, and returns the serial number of the key to | ||
30 | the caller. | ||
31 | |||
32 | |||
33 | =========== | ||
34 | THE PROCESS | ||
35 | =========== | ||
36 | |||
37 | A request proceeds in the following manner: | ||
38 | |||
39 | (1) Process A calls request_key() [the userspace syscall calls the kernel | ||
40 | interface]. | ||
41 | |||
42 | (2) request_key() searches the process's subscribed keyrings to see if there's | ||
43 | a suitable key there. If there is, it returns the key. If there isn't, and | ||
44 | callout_info is not set, an error is returned. Otherwise the process | ||
45 | proceeds to the next step. | ||
46 | |||
47 | (3) request_key() sees that A doesn't have the desired key yet, so it creates | ||
48 | two things: | ||
49 | |||
50 | (a) An uninstantiated key U of requested type and description. | ||
51 | |||
52 | (b) An authorisation key V that refers to key U and notes that process A | ||
53 | is the context in which key U should be instantiated and secured, and | ||
54 | from which associated key requests may be satisfied. | ||
55 | |||
56 | (4) request_key() then forks and executes /sbin/request-key with a new session | ||
57 | keyring that contains a link to auth key V. | ||
58 | |||
59 | (5) /sbin/request-key execs an appropriate program to perform the actual | ||
60 | instantiation. | ||
61 | |||
62 | (6) The program may want to access another key from A's context (say a | ||
63 | Kerberos TGT key). It just requests the appropriate key, and the keyring | ||
64 | search notes that the session keyring has auth key V in its bottom level. | ||
65 | |||
66 | This will permit it to then search the keyrings of process A with the | ||
67 | UID, GID, groups and security info of process A as if it was process A, | ||
68 | and come up with key W. | ||
69 | |||
70 | (7) The program then does what it must to get the data with which to | ||
71 | instantiate key U, using key W as a reference (perhaps it contacts a | ||
72 | Kerberos server using the TGT) and then instantiates key U. | ||
73 | |||
74 | (8) Upon instantiating key U, auth key V is automatically revoked so that it | ||
75 | may not be used again. | ||
76 | |||
77 | (9) The program then exits 0 and request_key() deletes key V and returns key | ||
78 | U to the caller. | ||
79 | |||
80 | This also extends further. If key W (step 5 above) didn't exist, key W would be | ||
81 | created uninstantiated, another auth key (X) would be created [as per step 3] | ||
82 | and another copy of /sbin/request-key spawned [as per step 4]; but the context | ||
83 | specified by auth key X will still be process A, as it was in auth key V. | ||
84 | |||
85 | This is because process A's keyrings can't simply be attached to | ||
86 | /sbin/request-key at the appropriate places because (a) execve will discard two | ||
87 | of them, and (b) it requires the same UID/GID/Groups all the way through. | ||
88 | |||
89 | |||
90 | ====================== | ||
91 | NEGATIVE INSTANTIATION | ||
92 | ====================== | ||
93 | |||
94 | Rather than instantiating a key, it is possible for the possessor of an | ||
95 | authorisation key to negatively instantiate a key that's under construction. | ||
96 | This is a short duration placeholder that causes any attempt at re-requesting | ||
97 | the key whilst it exists to fail with error ENOKEY. | ||
98 | |||
99 | This is provided to prevent excessive repeated spawning of /sbin/request-key | ||
100 | processes for a key that will never be obtainable. | ||
101 | |||
102 | Should the /sbin/request-key process exit anything other than 0 or die on a | ||
103 | signal, the key under construction will be automatically negatively | ||
104 | instantiated for a short amount of time. | ||
105 | |||
106 | |||
107 | ==================== | ||
108 | THE SEARCH ALGORITHM | ||
109 | ==================== | ||
110 | |||
111 | A search of any particular keyring proceeds in the following fashion: | ||
112 | |||
113 | (1) When the key management code searches for a key (keyring_search_aux) it | ||
114 | firstly calls key_permission(SEARCH) on the keyring it's starting with, | ||
115 | if this denies permission, it doesn't search further. | ||
116 | |||
117 | (2) It considers all the non-keyring keys within that keyring and, if any key | ||
118 | matches the criteria specified, calls key_permission(SEARCH) on it to see | ||
119 | if the key is allowed to be found. If it is, that key is returned; if | ||
120 | not, the search continues, and the error code is retained if of higher | ||
121 | priority than the one currently set. | ||
122 | |||
123 | (3) It then considers all the keyring-type keys in the keyring it's currently | ||
124 | searching. It calls key_permission(SEARCH) on each keyring, and if this | ||
125 | grants permission, it recurses, executing steps (2) and (3) on that | ||
126 | keyring. | ||
127 | |||
128 | The process stops immediately a valid key is found with permission granted to | ||
129 | use it. Any error from a previous match attempt is discarded and the key is | ||
130 | returned. | ||
131 | |||
132 | When search_process_keyrings() is invoked, it performs the following searches | ||
133 | until one succeeds: | ||
134 | |||
135 | (1) If extant, the process's thread keyring is searched. | ||
136 | |||
137 | (2) If extant, the process's process keyring is searched. | ||
138 | |||
139 | (3) The process's session keyring is searched. | ||
140 | |||
141 | (4) If the process has a request_key() authorisation key in its session | ||
142 | keyring then: | ||
143 | |||
144 | (a) If extant, the calling process's thread keyring is searched. | ||
145 | |||
146 | (b) If extant, the calling process's process keyring is searched. | ||
147 | |||
148 | (c) The calling process's session keyring is searched. | ||
149 | |||
150 | The moment one succeeds, all pending errors are discarded and the found key is | ||
151 | returned. | ||
152 | |||
153 | Only if all these fail does the whole thing fail with the highest priority | ||
154 | error. Note that several errors may have come from LSM. | ||
155 | |||
156 | The error priority is: | ||
157 | |||
158 | EKEYREVOKED > EKEYEXPIRED > ENOKEY | ||
159 | |||
160 | EACCES/EPERM are only returned on a direct search of a specific keyring where | ||
161 | the basal keyring does not grant Search permission. | ||
diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 0321ded4b9ae..4afe03a58c5b 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt | |||
@@ -195,8 +195,8 @@ KEY ACCESS PERMISSIONS | |||
195 | ====================== | 195 | ====================== |
196 | 196 | ||
197 | Keys have an owner user ID, a group access ID, and a permissions mask. The mask | 197 | Keys have an owner user ID, a group access ID, and a permissions mask. The mask |
198 | has up to eight bits each for user, group and other access. Only five of each | 198 | has up to eight bits each for possessor, user, group and other access. Only |
199 | set of eight bits are defined. These permissions granted are: | 199 | five of each set of eight bits are defined. These permissions granted are: |
200 | 200 | ||
201 | (*) View | 201 | (*) View |
202 | 202 | ||
@@ -241,16 +241,16 @@ about the status of the key service: | |||
241 | type, description and permissions. The payload of the key is not available | 241 | type, description and permissions. The payload of the key is not available |
242 | this way: | 242 | this way: |
243 | 243 | ||
244 | SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY | 244 | SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY |
245 | 00000001 I----- 39 perm 1f0000 0 0 keyring _uid_ses.0: 1/4 | 245 | 00000001 I----- 39 perm 1f1f0000 0 0 keyring _uid_ses.0: 1/4 |
246 | 00000002 I----- 2 perm 1f0000 0 0 keyring _uid.0: empty | 246 | 00000002 I----- 2 perm 1f1f0000 0 0 keyring _uid.0: empty |
247 | 00000007 I----- 1 perm 1f0000 0 0 keyring _pid.1: empty | 247 | 00000007 I----- 1 perm 1f1f0000 0 0 keyring _pid.1: empty |
248 | 0000018d I----- 1 perm 1f0000 0 0 keyring _pid.412: empty | 248 | 0000018d I----- 1 perm 1f1f0000 0 0 keyring _pid.412: empty |
249 | 000004d2 I--Q-- 1 perm 1f0000 32 -1 keyring _uid.32: 1/4 | 249 | 000004d2 I--Q-- 1 perm 1f1f0000 32 -1 keyring _uid.32: 1/4 |
250 | 000004d3 I--Q-- 3 perm 1f0000 32 -1 keyring _uid_ses.32: empty | 250 | 000004d3 I--Q-- 3 perm 1f1f0000 32 -1 keyring _uid_ses.32: empty |
251 | 00000892 I--QU- 1 perm 1f0000 0 0 user metal:copper: 0 | 251 | 00000892 I--QU- 1 perm 1f000000 0 0 user metal:copper: 0 |
252 | 00000893 I--Q-N 1 35s 1f0000 0 0 user metal:silver: 0 | 252 | 00000893 I--Q-N 1 35s 1f1f0000 0 0 user metal:silver: 0 |
253 | 00000894 I--Q-- 1 10h 1f0000 0 0 user metal:gold: 0 | 253 | 00000894 I--Q-- 1 10h 001f0000 0 0 user metal:gold: 0 |
254 | 254 | ||
255 | The flags are: | 255 | The flags are: |
256 | 256 | ||
@@ -361,6 +361,8 @@ The main syscalls are: | |||
361 | /sbin/request-key will be invoked in an attempt to obtain a key. The | 361 | /sbin/request-key will be invoked in an attempt to obtain a key. The |
362 | callout_info string will be passed as an argument to the program. | 362 | callout_info string will be passed as an argument to the program. |
363 | 363 | ||
364 | See also Documentation/keys-request-key.txt. | ||
365 | |||
364 | 366 | ||
365 | The keyctl syscall functions are: | 367 | The keyctl syscall functions are: |
366 | 368 | ||
@@ -533,8 +535,8 @@ The keyctl syscall functions are: | |||
533 | 535 | ||
534 | (*) Read the payload data from a key: | 536 | (*) Read the payload data from a key: |
535 | 537 | ||
536 | key_serial_t keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer, | 538 | long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer, |
537 | size_t buflen); | 539 | size_t buflen); |
538 | 540 | ||
539 | This function attempts to read the payload data from the specified key | 541 | This function attempts to read the payload data from the specified key |
540 | into the buffer. The process must have read permission on the key to | 542 | into the buffer. The process must have read permission on the key to |
@@ -555,9 +557,9 @@ The keyctl syscall functions are: | |||
555 | 557 | ||
556 | (*) Instantiate a partially constructed key. | 558 | (*) Instantiate a partially constructed key. |
557 | 559 | ||
558 | key_serial_t keyctl(KEYCTL_INSTANTIATE, key_serial_t key, | 560 | long keyctl(KEYCTL_INSTANTIATE, key_serial_t key, |
559 | const void *payload, size_t plen, | 561 | const void *payload, size_t plen, |
560 | key_serial_t keyring); | 562 | key_serial_t keyring); |
561 | 563 | ||
562 | If the kernel calls back to userspace to complete the instantiation of a | 564 | If the kernel calls back to userspace to complete the instantiation of a |
563 | key, userspace should use this call to supply data for the key before the | 565 | key, userspace should use this call to supply data for the key before the |
@@ -576,8 +578,8 @@ The keyctl syscall functions are: | |||
576 | 578 | ||
577 | (*) Negatively instantiate a partially constructed key. | 579 | (*) Negatively instantiate a partially constructed key. |
578 | 580 | ||
579 | key_serial_t keyctl(KEYCTL_NEGATE, key_serial_t key, | 581 | long keyctl(KEYCTL_NEGATE, key_serial_t key, |
580 | unsigned timeout, key_serial_t keyring); | 582 | unsigned timeout, key_serial_t keyring); |
581 | 583 | ||
582 | If the kernel calls back to userspace to complete the instantiation of a | 584 | If the kernel calls back to userspace to complete the instantiation of a |
583 | key, userspace should use this call mark the key as negative before the | 585 | key, userspace should use this call mark the key as negative before the |
@@ -637,6 +639,34 @@ call, and the key released upon close. How to deal with conflicting keys due to | |||
637 | two different users opening the same file is left to the filesystem author to | 639 | two different users opening the same file is left to the filesystem author to |
638 | solve. | 640 | solve. |
639 | 641 | ||
642 | Note that there are two different types of pointers to keys that may be | ||
643 | encountered: | ||
644 | |||
645 | (*) struct key * | ||
646 | |||
647 | This simply points to the key structure itself. Key structures will be at | ||
648 | least four-byte aligned. | ||
649 | |||
650 | (*) key_ref_t | ||
651 | |||
652 | This is equivalent to a struct key *, but the least significant bit is set | ||
653 | if the caller "possesses" the key. By "possession" it is meant that the | ||
654 | calling processes has a searchable link to the key from one of its | ||
655 | keyrings. There are three functions for dealing with these: | ||
656 | |||
657 | key_ref_t make_key_ref(const struct key *key, | ||
658 | unsigned long possession); | ||
659 | |||
660 | struct key *key_ref_to_ptr(const key_ref_t key_ref); | ||
661 | |||
662 | unsigned long is_key_possessed(const key_ref_t key_ref); | ||
663 | |||
664 | The first function constructs a key reference from a key pointer and | ||
665 | possession information (which must be 0 or 1 and not any other value). | ||
666 | |||
667 | The second function retrieves the key pointer from a reference and the | ||
668 | third retrieves the possession flag. | ||
669 | |||
640 | When accessing a key's payload contents, certain precautions must be taken to | 670 | When accessing a key's payload contents, certain precautions must be taken to |
641 | prevent access vs modification races. See the section "Notes on accessing | 671 | prevent access vs modification races. See the section "Notes on accessing |
642 | payload contents" for more information. | 672 | payload contents" for more information. |
@@ -660,12 +690,18 @@ payload contents" for more information. | |||
660 | If successful, the key will have been attached to the default keyring for | 690 | If successful, the key will have been attached to the default keyring for |
661 | implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING. | 691 | implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING. |
662 | 692 | ||
693 | See also Documentation/keys-request-key.txt. | ||
694 | |||
663 | 695 | ||
664 | (*) When it is no longer required, the key should be released using: | 696 | (*) When it is no longer required, the key should be released using: |
665 | 697 | ||
666 | void key_put(struct key *key); | 698 | void key_put(struct key *key); |
667 | 699 | ||
668 | This can be called from interrupt context. If CONFIG_KEYS is not set then | 700 | Or: |
701 | |||
702 | void key_ref_put(key_ref_t key_ref); | ||
703 | |||
704 | These can be called from interrupt context. If CONFIG_KEYS is not set then | ||
669 | the argument will not be parsed. | 705 | the argument will not be parsed. |
670 | 706 | ||
671 | 707 | ||
@@ -689,13 +725,17 @@ payload contents" for more information. | |||
689 | 725 | ||
690 | (*) If a keyring was found in the search, this can be further searched by: | 726 | (*) If a keyring was found in the search, this can be further searched by: |
691 | 727 | ||
692 | struct key *keyring_search(struct key *keyring, | 728 | key_ref_t keyring_search(key_ref_t keyring_ref, |
693 | const struct key_type *type, | 729 | const struct key_type *type, |
694 | const char *description) | 730 | const char *description) |
695 | 731 | ||
696 | This searches the keyring tree specified for a matching key. Error ENOKEY | 732 | This searches the keyring tree specified for a matching key. Error ENOKEY |
697 | is returned upon failure. If successful, the returned key will need to be | 733 | is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful, |
698 | released. | 734 | the returned key will need to be released. |
735 | |||
736 | The possession attribute from the keyring reference is used to control | ||
737 | access through the permissions mask and is propagated to the returned key | ||
738 | reference pointer if successful. | ||
699 | 739 | ||
700 | 740 | ||
701 | (*) To check the validity of a key, this function can be called: | 741 | (*) To check the validity of a key, this function can be called: |
@@ -732,7 +772,7 @@ More complex payload contents must be allocated and a pointer to them set in | |||
732 | key->payload.data. One of the following ways must be selected to access the | 772 | key->payload.data. One of the following ways must be selected to access the |
733 | data: | 773 | data: |
734 | 774 | ||
735 | (1) Unmodifyable key type. | 775 | (1) Unmodifiable key type. |
736 | 776 | ||
737 | If the key type does not have a modify method, then the key's payload can | 777 | If the key type does not have a modify method, then the key's payload can |
738 | be accessed without any form of locking, provided that it's known to be | 778 | be accessed without any form of locking, provided that it's known to be |
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab65714d95fc..b433c8a27e2d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -355,10 +355,14 @@ ip_dynaddr - BOOLEAN | |||
355 | Default: 0 | 355 | Default: 0 |
356 | 356 | ||
357 | icmp_echo_ignore_all - BOOLEAN | 357 | icmp_echo_ignore_all - BOOLEAN |
358 | If set non-zero, then the kernel will ignore all ICMP ECHO | ||
359 | requests sent to it. | ||
360 | Default: 0 | ||
361 | |||
358 | icmp_echo_ignore_broadcasts - BOOLEAN | 362 | icmp_echo_ignore_broadcasts - BOOLEAN |
359 | If either is set to true, then the kernel will ignore either all | 363 | If set non-zero, then the kernel will ignore all ICMP ECHO and |
360 | ICMP ECHO requests sent to it or just those to broadcast/multicast | 364 | TIMESTAMP requests sent to it via broadcast/multicast. |
361 | addresses, respectively. | 365 | Default: 1 |
362 | 366 | ||
363 | icmp_ratelimit - INTEGER | 367 | icmp_ratelimit - INTEGER |
364 | Limit the maximal rates for sending ICMP packets whose type matches | 368 | Limit the maximal rates for sending ICMP packets whose type matches |
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt index 5df44dc894e5..1829009db771 100644 --- a/Documentation/sparse.txt +++ b/Documentation/sparse.txt | |||
@@ -51,9 +51,9 @@ or you don't get any checking at all. | |||
51 | Where to get sparse | 51 | Where to get sparse |
52 | ~~~~~~~~~~~~~~~~~~~ | 52 | ~~~~~~~~~~~~~~~~~~~ |
53 | 53 | ||
54 | With BK, you can just get it from | 54 | With git, you can just get it from |
55 | 55 | ||
56 | bk://sparse.bkbits.net/sparse | 56 | rsync://rsync.kernel.org/pub/scm/devel/sparse/sparse.git |
57 | 57 | ||
58 | and DaveJ has tar-balls at | 58 | and DaveJ has tar-balls at |
59 | 59 | ||
diff --git a/Documentation/usb/URB.txt b/Documentation/usb/URB.txt index d59b95cc6f1b..a49e5f2c2b46 100644 --- a/Documentation/usb/URB.txt +++ b/Documentation/usb/URB.txt | |||
@@ -1,5 +1,6 @@ | |||
1 | Revised: 2000-Dec-05. | 1 | Revised: 2000-Dec-05. |
2 | Again: 2002-Jul-06 | 2 | Again: 2002-Jul-06 |
3 | Again: 2005-Sep-19 | ||
3 | 4 | ||
4 | NOTE: | 5 | NOTE: |
5 | 6 | ||
@@ -18,8 +19,8 @@ called USB Request Block, or URB for short. | |||
18 | and deliver the data and status back. | 19 | and deliver the data and status back. |
19 | 20 | ||
20 | - Execution of an URB is inherently an asynchronous operation, i.e. the | 21 | - Execution of an URB is inherently an asynchronous operation, i.e. the |
21 | usb_submit_urb(urb) call returns immediately after it has successfully queued | 22 | usb_submit_urb(urb) call returns immediately after it has successfully |
22 | the requested action. | 23 | queued the requested action. |
23 | 24 | ||
24 | - Transfers for one URB can be canceled with usb_unlink_urb(urb) at any time. | 25 | - Transfers for one URB can be canceled with usb_unlink_urb(urb) at any time. |
25 | 26 | ||
@@ -94,8 +95,9 @@ To free an URB, use | |||
94 | 95 | ||
95 | void usb_free_urb(struct urb *urb) | 96 | void usb_free_urb(struct urb *urb) |
96 | 97 | ||
97 | You may not free an urb that you've submitted, but which hasn't yet been | 98 | You may free an urb that you've submitted, but which hasn't yet been |
98 | returned to you in a completion callback. | 99 | returned to you in a completion callback. It will automatically be |
100 | deallocated when it is no longer in use. | ||
99 | 101 | ||
100 | 102 | ||
101 | 1.4. What has to be filled in? | 103 | 1.4. What has to be filled in? |
@@ -145,30 +147,36 @@ to get seamless ISO streaming. | |||
145 | 147 | ||
146 | 1.6. How to cancel an already running URB? | 148 | 1.6. How to cancel an already running URB? |
147 | 149 | ||
148 | For an URB which you've submitted, but which hasn't been returned to | 150 | There are two ways to cancel an URB you've submitted but which hasn't |
149 | your driver by the host controller, call | 151 | been returned to your driver yet. For an asynchronous cancel, call |
150 | 152 | ||
151 | int usb_unlink_urb(struct urb *urb) | 153 | int usb_unlink_urb(struct urb *urb) |
152 | 154 | ||
153 | It removes the urb from the internal list and frees all allocated | 155 | It removes the urb from the internal list and frees all allocated |
154 | HW descriptors. The status is changed to reflect unlinking. After | 156 | HW descriptors. The status is changed to reflect unlinking. Note |
155 | usb_unlink_urb() returns with that status code, you can free the URB | 157 | that the URB will not normally have finished when usb_unlink_urb() |
156 | with usb_free_urb(). | 158 | returns; you must still wait for the completion handler to be called. |
157 | 159 | ||
158 | There is also an asynchronous unlink mode. To use this, set the | 160 | To cancel an URB synchronously, call |
159 | the URB_ASYNC_UNLINK flag in urb->transfer flags before calling | 161 | |
160 | usb_unlink_urb(). When using async unlinking, the URB will not | 162 | void usb_kill_urb(struct urb *urb) |
161 | normally be unlinked when usb_unlink_urb() returns. Instead, wait | 163 | |
162 | for the completion handler to be called. | 164 | It does everything usb_unlink_urb does, and in addition it waits |
165 | until after the URB has been returned and the completion handler | ||
166 | has finished. It also marks the URB as temporarily unusable, so | ||
167 | that if the completion handler or anyone else tries to resubmit it | ||
168 | they will get a -EPERM error. Thus you can be sure that when | ||
169 | usb_kill_urb() returns, the URB is totally idle. | ||
163 | 170 | ||
164 | 171 | ||
165 | 1.7. What about the completion handler? | 172 | 1.7. What about the completion handler? |
166 | 173 | ||
167 | The handler is of the following type: | 174 | The handler is of the following type: |
168 | 175 | ||
169 | typedef void (*usb_complete_t)(struct urb *); | 176 | typedef void (*usb_complete_t)(struct urb *, struct pt_regs *) |
170 | 177 | ||
171 | i.e. it gets just the URB that caused the completion call. | 178 | I.e., it gets the URB that caused the completion call, plus the |
179 | register values at the time of the corresponding interrupt (if any). | ||
172 | In the completion handler, you should have a look at urb->status to | 180 | In the completion handler, you should have a look at urb->status to |
173 | detect any USB errors. Since the context parameter is included in the URB, | 181 | detect any USB errors. Since the context parameter is included in the URB, |
174 | you can pass information to the completion handler. | 182 | you can pass information to the completion handler. |
@@ -176,17 +184,11 @@ you can pass information to the completion handler. | |||
176 | Note that even when an error (or unlink) is reported, data may have been | 184 | Note that even when an error (or unlink) is reported, data may have been |
177 | transferred. That's because USB transfers are packetized; it might take | 185 | transferred. That's because USB transfers are packetized; it might take |
178 | sixteen packets to transfer your 1KByte buffer, and ten of them might | 186 | sixteen packets to transfer your 1KByte buffer, and ten of them might |
179 | have transferred succesfully before the completion is called. | 187 | have transferred succesfully before the completion was called. |
180 | 188 | ||
181 | 189 | ||
182 | NOTE: ***** WARNING ***** | 190 | NOTE: ***** WARNING ***** |
183 | Don't use urb->dev field in your completion handler; it's cleared | 191 | NEVER SLEEP IN A COMPLETION HANDLER. These are normally called |
184 | as part of giving urbs back to drivers. (Addressing an issue with | ||
185 | ownership of periodic URBs, which was otherwise ambiguous.) Instead, | ||
186 | use urb->context to hold all the data your driver needs. | ||
187 | |||
188 | NOTE: ***** WARNING ***** | ||
189 | Also, NEVER SLEEP IN A COMPLETION HANDLER. These are normally called | ||
190 | during hardware interrupt processing. If you can, defer substantial | 192 | during hardware interrupt processing. If you can, defer substantial |
191 | work to a tasklet (bottom half) to keep system latencies low. You'll | 193 | work to a tasklet (bottom half) to keep system latencies low. You'll |
192 | probably need to use spinlocks to protect data structures you manipulate | 194 | probably need to use spinlocks to protect data structures you manipulate |
@@ -229,24 +231,10 @@ ISO data with some other event stream. | |||
229 | Interrupt transfers, like isochronous transfers, are periodic, and happen | 231 | Interrupt transfers, like isochronous transfers, are periodic, and happen |
230 | in intervals that are powers of two (1, 2, 4 etc) units. Units are frames | 232 | in intervals that are powers of two (1, 2, 4 etc) units. Units are frames |
231 | for full and low speed devices, and microframes for high speed ones. | 233 | for full and low speed devices, and microframes for high speed ones. |
232 | |||
233 | Currently, after you submit one interrupt URB, that urb is owned by the | ||
234 | host controller driver until you cancel it with usb_unlink_urb(). You | ||
235 | may unlink interrupt urbs in their completion handlers, if you need to. | ||
236 | |||
237 | After a transfer completion is called, the URB is automagically resubmitted. | ||
238 | THIS BEHAVIOR IS EXPECTED TO BE REMOVED!! | ||
239 | |||
240 | Interrupt transfers may only send (or receive) the "maxpacket" value for | ||
241 | the given interrupt endpoint; if you need more data, you will need to | ||
242 | copy that data out of (or into) another buffer. Similarly, you can't | ||
243 | queue interrupt transfers. | ||
244 | THESE RESTRICTIONS ARE EXPECTED TO BE REMOVED!! | ||
245 | |||
246 | Note that this automagic resubmission model does make it awkward to use | ||
247 | interrupt OUT transfers. The portable solution involves unlinking those | ||
248 | OUT urbs after the data is transferred, and perhaps submitting a final | ||
249 | URB for a short packet. | ||
250 | |||
251 | The usb_submit_urb() call modifies urb->interval to the implemented interval | 234 | The usb_submit_urb() call modifies urb->interval to the implemented interval |
252 | value that is less than or equal to the requested interval value. | 235 | value that is less than or equal to the requested interval value. |
236 | |||
237 | In Linux 2.6, unlike earlier versions, interrupt URBs are not automagically | ||
238 | restarted when they complete. They end when the completion handler is | ||
239 | called, just like other URBs. If you want an interrupt URB to be restarted, | ||
240 | your completion handler must resubmit it. | ||