aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/DMA-API.txt49
-rw-r--r--Documentation/DMA-mapping.txt28
-rw-r--r--Documentation/DocBook/Makefile2
-rw-r--r--Documentation/DocBook/kernel-api.tmpl1
-rw-r--r--Documentation/DocBook/libata.tmpl49
-rw-r--r--Documentation/HOWTO3
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--Documentation/acpi-hotkey.txt2
-rw-r--r--Documentation/arm/SA1100/Assabet2
-rw-r--r--Documentation/arm/SA1100/LART2
-rw-r--r--Documentation/block/biodoc.txt14
-rw-r--r--Documentation/block/switching-sched.txt22
-rw-r--r--Documentation/cpu-freq/index.txt2
-rw-r--r--Documentation/cpu-hotplug.txt4
-rw-r--r--Documentation/cputopology.txt4
-rw-r--r--Documentation/devices.txt5
-rw-r--r--Documentation/dvb/get_dvb_firmware8
-rw-r--r--Documentation/feature-removal-schedule.txt41
-rw-r--r--Documentation/filesystems/00-INDEX54
-rw-r--r--Documentation/filesystems/sysfs.txt5
-rw-r--r--Documentation/filesystems/vfs.txt12
-rw-r--r--Documentation/firmware_class/README17
-rw-r--r--Documentation/firmware_class/firmware_sample_driver.c11
-rw-r--r--Documentation/fujitsu/frv/kernel-ABI.txt192
-rw-r--r--Documentation/i2c/busses/i2c-parport16
-rw-r--r--Documentation/input/joystick-parport.txt11
-rw-r--r--Documentation/ioctl-number.txt2
-rw-r--r--Documentation/isdn/README.gigaset286
-rw-r--r--Documentation/kbuild/modules.txt2
-rw-r--r--Documentation/kernel-parameters.txt34
-rw-r--r--Documentation/laptop-mode.txt10
-rw-r--r--Documentation/leds-class.txt71
-rw-r--r--Documentation/m68k/README.buddha2
-rw-r--r--Documentation/memory-barriers.txt2133
-rw-r--r--Documentation/mtrr.txt23
-rw-r--r--Documentation/networking/TODO18
-rw-r--r--Documentation/networking/bcm43xx.txt36
-rw-r--r--Documentation/networking/ifenslave.c2
-rw-r--r--Documentation/networking/operstates.txt161
-rw-r--r--Documentation/networking/packet_mmap.txt2
-rw-r--r--Documentation/networking/tuntap.txt2
-rw-r--r--Documentation/networking/xfrm_sync.txt166
-rw-r--r--Documentation/pci.txt12
-rw-r--r--Documentation/pcmcia/driver-changes.txt6
-rw-r--r--Documentation/pnp.txt3
-rw-r--r--Documentation/power/video.txt2
-rw-r--r--Documentation/powerpc/booting-without-of.txt5
-rw-r--r--Documentation/robust-futex-ABI.txt182
-rw-r--r--Documentation/robust-futexes.txt218
-rw-r--r--Documentation/rpc-cache.txt121
-rw-r--r--Documentation/scsi/ChangeLog.megaraid25
-rw-r--r--Documentation/scsi/scsi_eh.txt14
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt19
-rw-r--r--Documentation/serial/driver31
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt69
-rw-r--r--Documentation/sound/alsa/Audiophile-Usb.txt81
-rw-r--r--Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl20
-rw-r--r--Documentation/spi/pxa2xx234
-rw-r--r--Documentation/spi/spi-summary34
-rw-r--r--Documentation/video4linux/CARDLIST.saa71345
-rw-r--r--Documentation/video4linux/et61x251.txt (renamed from Documentation/usb/et61x251.txt)0
-rw-r--r--Documentation/video4linux/ibmcam.txt (renamed from Documentation/usb/ibmcam.txt)2
-rw-r--r--Documentation/video4linux/ov511.txt (renamed from Documentation/usb/ov511.txt)11
-rw-r--r--Documentation/video4linux/se401.txt (renamed from Documentation/usb/se401.txt)0
-rw-r--r--Documentation/video4linux/sn9c102.txt (renamed from Documentation/usb/sn9c102.txt)16
-rw-r--r--Documentation/video4linux/stv680.txt (renamed from Documentation/usb/stv680.txt)26
-rw-r--r--Documentation/video4linux/w9968cf.txt (renamed from Documentation/usb/w9968cf.txt)36
-rw-r--r--Documentation/video4linux/zc0301.txt (renamed from Documentation/usb/zc0301.txt)0
-rw-r--r--Documentation/vm/hugetlbpage.txt31
-rw-r--r--Documentation/watchdog/watchdog-api.txt3
-rw-r--r--Documentation/x86_64/boot-options.txt5
71 files changed, 4294 insertions, 425 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 1af0f2d50220..2ffb0d62f0fe 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -33,7 +33,9 @@ pci_alloc_consistent(struct pci_dev *dev, size_t size,
33 33
34Consistent memory is memory for which a write by either the device or 34Consistent memory is memory for which a write by either the device or
35the processor can immediately be read by the processor or device 35the processor can immediately be read by the processor or device
36without having to worry about caching effects. 36without having to worry about caching effects. (You may however need
37to make sure to flush the processor's write buffers before telling
38devices to read that memory.)
37 39
38This routine allocates a region of <size> bytes of consistent memory. 40This routine allocates a region of <size> bytes of consistent memory.
39it also returns a <dma_handle> which may be cast to an unsigned 41it also returns a <dma_handle> which may be cast to an unsigned
@@ -304,12 +306,12 @@ dma address with dma_mapping_error(). A non zero return value means the mapping
304could not be created and the driver should take appropriate action (eg 306could not be created and the driver should take appropriate action (eg
305reduce current DMA mapping usage or delay and try again later). 307reduce current DMA mapping usage or delay and try again later).
306 308
307int 309 int
308dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 310 dma_map_sg(struct device *dev, struct scatterlist *sg,
309 enum dma_data_direction direction) 311 int nents, enum dma_data_direction direction)
310int 312 int
311pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, 313 pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
312 int nents, int direction) 314 int nents, int direction)
313 315
314Maps a scatter gather list from the block layer. 316Maps a scatter gather list from the block layer.
315 317
@@ -327,12 +329,33 @@ critical that the driver do something, in the case of a block driver
327aborting the request or even oopsing is better than doing nothing and 329aborting the request or even oopsing is better than doing nothing and
328corrupting the filesystem. 330corrupting the filesystem.
329 331
330void 332With scatterlists, you use the resulting mapping like this:
331dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, 333
332 enum dma_data_direction direction) 334 int i, count = dma_map_sg(dev, sglist, nents, direction);
333void 335 struct scatterlist *sg;
334pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, 336
335 int nents, int direction) 337 for (i = 0, sg = sglist; i < count; i++, sg++) {
338 hw_address[i] = sg_dma_address(sg);
339 hw_len[i] = sg_dma_len(sg);
340 }
341
342where nents is the number of entries in the sglist.
343
344The implementation is free to merge several consecutive sglist entries
345into one (e.g. with an IOMMU, or if several pages just happen to be
346physically contiguous) and returns the actual number of sg entries it
347mapped them to. On failure 0, is returned.
348
349Then you should loop count times (note: this can be less than nents times)
350and use sg_dma_address() and sg_dma_len() macros where you previously
351accessed sg->address and sg->length as shown above.
352
353 void
354 dma_unmap_sg(struct device *dev, struct scatterlist *sg,
355 int nhwentries, enum dma_data_direction direction)
356 void
357 pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
358 int nents, int direction)
336 359
337unmap the previously mapped scatter/gather list. All the parameters 360unmap the previously mapped scatter/gather list. All the parameters
338must be the same as those and passed in to the scatter/gather mapping 361must be the same as those and passed in to the scatter/gather mapping
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index 684557474c15..7c717699032c 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -58,11 +58,15 @@ translating each of those pages back to a kernel address using
58something like __va(). [ EDIT: Update this when we integrate 58something like __va(). [ EDIT: Update this when we integrate
59Gerd Knorr's generic code which does this. ] 59Gerd Knorr's generic code which does this. ]
60 60
61This rule also means that you may not use kernel image addresses 61This rule also means that you may use neither kernel image addresses
62(ie. items in the kernel's data/text/bss segment, or your driver's) 62(items in data/text/bss segments), nor module image addresses, nor
63nor may you use kernel stack addresses for DMA. Both of these items 63stack addresses for DMA. These could all be mapped somewhere entirely
64might be mapped somewhere entirely different than the rest of physical 64different than the rest of physical memory. Even if those classes of
65memory. 65memory could physically work with DMA, you'd need to ensure the I/O
66buffers were cacheline-aligned. Without that, you'd see cacheline
67sharing problems (data corruption) on CPUs with DMA-incoherent caches.
68(The CPU could write to one word, DMA would write to a different one
69in the same cache line, and one of them could be overwritten.)
66 70
67Also, this means that you cannot take the return of a kmap() 71Also, this means that you cannot take the return of a kmap()
68call and DMA to/from that. This is similar to vmalloc(). 72call and DMA to/from that. This is similar to vmalloc().
@@ -194,11 +198,13 @@ document for how to handle this case.
194Finally, if your device can only drive the low 24-bits of 198Finally, if your device can only drive the low 24-bits of
195address during PCI bus mastering you might do something like: 199address during PCI bus mastering you might do something like:
196 200
197 if (pci_set_dma_mask(pdev, 0x00ffffff)) { 201 if (pci_set_dma_mask(pdev, DMA_24BIT_MASK)) {
198 printk(KERN_WARNING 202 printk(KERN_WARNING
199 "mydev: 24-bit DMA addressing not available.\n"); 203 "mydev: 24-bit DMA addressing not available.\n");
200 goto ignore_this_device; 204 goto ignore_this_device;
201 } 205 }
206[Better use DMA_24BIT_MASK instead of 0x00ffffff.
207See linux/include/dma-mapping.h for reference.]
202 208
203When pci_set_dma_mask() is successful, and returns zero, the PCI layer 209When pci_set_dma_mask() is successful, and returns zero, the PCI layer
204saves away this mask you have provided. The PCI layer will use this 210saves away this mask you have provided. The PCI layer will use this
@@ -210,7 +216,7 @@ functions (for example a sound card provides playback and record
210functions) and the various different functions have _different_ 216functions) and the various different functions have _different_
211DMA addressing limitations, you may wish to probe each mask and 217DMA addressing limitations, you may wish to probe each mask and
212only provide the functionality which the machine can handle. It 218only provide the functionality which the machine can handle. It
213is important that the last call to pci_set_dma_mask() be for the 219is important that the last call to pci_set_dma_mask() be for the
214most specific mask. 220most specific mask.
215 221
216Here is pseudo-code showing how this might be done: 222Here is pseudo-code showing how this might be done:
@@ -282,6 +288,11 @@ There are two types of DMA mappings:
282 288
283 in order to get correct behavior on all platforms. 289 in order to get correct behavior on all platforms.
284 290
291 Also, on some platforms your driver may need to flush CPU write
292 buffers in much the same way as it needs to flush write buffers
293 found in PCI bridges (such as by reading a register's value
294 after writing it).
295
285- Streaming DMA mappings which are usually mapped for one DMA transfer, 296- Streaming DMA mappings which are usually mapped for one DMA transfer,
286 unmapped right after it (unless you use pci_dma_sync_* below) and for which 297 unmapped right after it (unless you use pci_dma_sync_* below) and for which
287 hardware can optimize for sequential accesses. 298 hardware can optimize for sequential accesses.
@@ -301,6 +312,9 @@ There are two types of DMA mappings:
301 312
302Neither type of DMA mapping has alignment restrictions that come 313Neither type of DMA mapping has alignment restrictions that come
303from PCI, although some devices may have such restrictions. 314from PCI, although some devices may have such restrictions.
315Also, systems with caches that aren't DMA-coherent will work better
316when the underlying buffers don't share cache lines with other data.
317
304 318
305 Using Consistent DMA mappings. 319 Using Consistent DMA mappings.
306 320
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 7d87dd73cbe4..5a2882d275ba 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -2,7 +2,7 @@
2# This makefile is used to generate the kernel documentation, 2# This makefile is used to generate the kernel documentation,
3# primarily based on in-line comments in various source files. 3# primarily based on in-line comments in various source files.
4# See Documentation/kernel-doc-nano-HOWTO.txt for instruction in how 4# See Documentation/kernel-doc-nano-HOWTO.txt for instruction in how
5# to ducument the SRC - and how to read it. 5# to document the SRC - and how to read it.
6# To add a new book the only step required is to add the book to the 6# To add a new book the only step required is to add the book to the
7# list of DOCBOOKS. 7# list of DOCBOOKS.
8 8
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 8c9c6704e85b..ca02e04a906c 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -322,7 +322,6 @@ X!Earch/i386/kernel/mca.c
322 <chapter id="sysfs"> 322 <chapter id="sysfs">
323 <title>The Filesystem for Exporting Kernel Objects</title> 323 <title>The Filesystem for Exporting Kernel Objects</title>
324!Efs/sysfs/file.c 324!Efs/sysfs/file.c
325!Efs/sysfs/dir.c
326!Efs/sysfs/symlink.c 325!Efs/sysfs/symlink.c
327!Efs/sysfs/bin.c 326!Efs/sysfs/bin.c
328 </chapter> 327 </chapter>
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
index d260d92089ad..f869b03929db 100644
--- a/Documentation/DocBook/libata.tmpl
+++ b/Documentation/DocBook/libata.tmpl
@@ -120,14 +120,27 @@ void (*dev_config) (struct ata_port *, struct ata_device *);
120 <programlisting> 120 <programlisting>
121void (*set_piomode) (struct ata_port *, struct ata_device *); 121void (*set_piomode) (struct ata_port *, struct ata_device *);
122void (*set_dmamode) (struct ata_port *, struct ata_device *); 122void (*set_dmamode) (struct ata_port *, struct ata_device *);
123void (*post_set_mode) (struct ata_port *ap); 123void (*post_set_mode) (struct ata_port *);
124unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned int);
124 </programlisting> 125 </programlisting>
125 126
126 <para> 127 <para>
127 Hooks called prior to the issue of SET FEATURES - XFER MODE 128 Hooks called prior to the issue of SET FEATURES - XFER MODE
128 command. dev->pio_mode is guaranteed to be valid when 129 command. The optional ->mode_filter() hook is called when libata
129 ->set_piomode() is called, and dev->dma_mode is guaranteed to be 130 has built a mask of the possible modes. This is passed to the
130 valid when ->set_dmamode() is called. ->post_set_mode() is 131 ->mode_filter() function which should return a mask of valid modes
132 after filtering those unsuitable due to hardware limits. It is not
133 valid to use this interface to add modes.
134 </para>
135 <para>
136 dev->pio_mode and dev->dma_mode are guaranteed to be valid when
137 ->set_piomode() and when ->set_dmamode() is called. The timings for
138 any other drive sharing the cable will also be valid at this point.
139 That is the library records the decisions for the modes of each
140 drive on a channel before it attempts to set any of them.
141 </para>
142 <para>
143 ->post_set_mode() is
131 called unconditionally, after the SET FEATURES - XFER MODE 144 called unconditionally, after the SET FEATURES - XFER MODE
132 command completes successfully. 145 command completes successfully.
133 </para> 146 </para>
@@ -230,6 +243,32 @@ void (*dev_select)(struct ata_port *ap, unsigned int device);
230 243
231 </sect2> 244 </sect2>
232 245
246 <sect2><title>Private tuning method</title>
247 <programlisting>
248void (*set_mode) (struct ata_port *ap);
249 </programlisting>
250
251 <para>
252 By default libata performs drive and controller tuning in
253 accordance with the ATA timing rules and also applies blacklists
254 and cable limits. Some controllers need special handling and have
255 custom tuning rules, typically raid controllers that use ATA
256 commands but do not actually do drive timing.
257 </para>
258
259 <warning>
260 <para>
261 This hook should not be used to replace the standard controller
262 tuning logic when a controller has quirks. Replacing the default
263 tuning logic in that case would bypass handling for drive and
264 bridge quirks that may be important to data reliability. If a
265 controller needs to filter the mode selection it should use the
266 mode_filter hook instead.
267 </para>
268 </warning>
269
270 </sect2>
271
233 <sect2><title>Reset ATA bus</title> 272 <sect2><title>Reset ATA bus</title>
234 <programlisting> 273 <programlisting>
235void (*phy_reset) (struct ata_port *ap); 274void (*phy_reset) (struct ata_port *ap);
@@ -666,7 +705,7 @@ and other resources, etc.
666 705
667 <sect1><title>ata_scsi_error()</title> 706 <sect1><title>ata_scsi_error()</title>
668 <para> 707 <para>
669 ata_scsi_error() is the current hostt->eh_strategy_handler() 708 ata_scsi_error() is the current transportt->eh_strategy_handler()
670 for libata. As discussed above, this will be entered in two 709 for libata. As discussed above, this will be entered in two
671 cases - timeout and ATAPI error completion. This function 710 cases - timeout and ATAPI error completion. This function
672 calls low level libata driver's eng_timeout() callback, the 711 calls low level libata driver's eng_timeout() callback, the
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 6c9e746267da..915ae8c986c6 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -603,7 +603,8 @@ start exactly where you are now.
603 603
604 604
605---------- 605----------
606Thanks to Paolo Ciarrocchi who allowed the "Development Process" section 606Thanks to Paolo Ciarrocchi who allowed the "Development Process"
607(http://linux.tar.bz/articles/2.6-development_process) section
607to be based on text he had written, and to Randy Dunlap and Gerrit 608to be based on text he had written, and to Randy Dunlap and Gerrit
608Huizenga for some of the list of things you should and should not say. 609Huizenga for some of the list of things you should and should not say.
609Also thanks to Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers, 610Also thanks to Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index b4ea51ad3610..07cb93b82ba9 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -605,7 +605,7 @@ are the same as those shown in the preceding section, so they are omitted.
605 { 605 {
606 int cpu; 606 int cpu;
607 607
608 for_each_cpu(cpu) 608 for_each_possible_cpu(cpu)
609 run_on(cpu); 609 run_on(cpu);
610 } 610 }
611 611
diff --git a/Documentation/acpi-hotkey.txt b/Documentation/acpi-hotkey.txt
index 744f1aec6553..38040fa37649 100644
--- a/Documentation/acpi-hotkey.txt
+++ b/Documentation/acpi-hotkey.txt
@@ -30,7 +30,7 @@ specific hotkey(event))
30echo "event_num:event_type:event_argument" > 30echo "event_num:event_type:event_argument" >
31 /proc/acpi/hotkey/action. 31 /proc/acpi/hotkey/action.
32The result of the execution of this aml method is 32The result of the execution of this aml method is
33attached to /proc/acpi/hotkey/poll_method, which is dnyamically 33attached to /proc/acpi/hotkey/poll_method, which is dynamically
34created. Please use command "cat /proc/acpi/hotkey/polling_method" 34created. Please use command "cat /proc/acpi/hotkey/polling_method"
35to retrieve it. 35to retrieve it.
36 36
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
index cbbe5587c78d..78bc1c1b04e5 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/SA1100/Assabet
@@ -26,7 +26,7 @@ Installing a bootloader
26 26
27A couple of bootloaders able to boot Linux on Assabet are available: 27A couple of bootloaders able to boot Linux on Assabet are available:
28 28
29BLOB (http://www.lart.tudelft.nl/lartware/blob/) 29BLOB (http://www.lartmaker.nl/lartware/blob/)
30 30
31 BLOB is a bootloader used within the LART project. Some contributed 31 BLOB is a bootloader used within the LART project. Some contributed
32 patches were merged into BLOB to add support for Assabet. 32 patches were merged into BLOB to add support for Assabet.
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART
index 2f73f513e16a..6d412b685598 100644
--- a/Documentation/arm/SA1100/LART
+++ b/Documentation/arm/SA1100/LART
@@ -11,4 +11,4 @@ is under development, with plenty of others in different stages of
11planning. 11planning.
12 12
13The hardware designs for this board have been released under an open license; 13The hardware designs for this board have been released under an open license;
14see the LART page at http://www.lart.tudelft.nl/ for more information. 14see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 8e63831971d5..f989a9e839b4 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -132,8 +132,18 @@ Some new queue property settings:
132 limit. No highmem default. 132 limit. No highmem default.
133 133
134 blk_queue_max_sectors(q, max_sectors) 134 blk_queue_max_sectors(q, max_sectors)
135 Maximum size request you can handle in units of 512 byte 135 Sets two variables that limit the size of the request.
136 sectors. 255 default. 136
137 - The request queue's max_sectors, which is a soft size in
138 in units of 512 byte sectors, and could be dynamically varied
139 by the core kernel.
140
141 - The request queue's max_hw_sectors, which is a hard limit
142 and reflects the maximum size request a driver can handle
143 in units of 512 byte sectors.
144
145 The default for both max_sectors and max_hw_sectors is
146 255. The upper limit of max_sectors is 1024.
137 147
138 blk_queue_max_phys_segments(q, max_segments) 148 blk_queue_max_phys_segments(q, max_segments)
139 Maximum physical segments you can handle in a request. 128 149 Maximum physical segments you can handle in a request. 128
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
new file mode 100644
index 000000000000..5fa130a67531
--- /dev/null
+++ b/Documentation/block/switching-sched.txt
@@ -0,0 +1,22 @@
1As of the Linux 2.6.10 kernel, it is now possible to change the
2IO scheduler for a given block device on the fly (thus making it possible,
3for instance, to set the CFQ scheduler for the system default, but
4set a specific device to use the anticipatory or noop schedulers - which
5can improve that device's throughput).
6
7To set a specific scheduler, simply do this:
8
9echo SCHEDNAME > /sys/block/DEV/queue/scheduler
10
11where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
12device name (hda, hdb, sga, or whatever you happen to have).
13
14The list of defined schedulers can be found by simply doing
15a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
16will be displayed, with the currently selected scheduler in brackets:
17
18# cat /sys/block/hda/queue/scheduler
19noop anticipatory deadline [cfq]
20# echo anticipatory > /sys/block/hda/queue/scheduler
21# cat /sys/block/hda/queue/scheduler
22noop [anticipatory] deadline cfq
diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt
index 5009805f9378..ffdb5323df37 100644
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt
@@ -53,4 +53,4 @@ the CPUFreq Mailing list:
53* http://lists.linux.org.uk/mailman/listinfo/cpufreq 53* http://lists.linux.org.uk/mailman/listinfo/cpufreq
54 54
55Clock and voltage scaling for the SA-1100: 55Clock and voltage scaling for the SA-1100:
56* http://www.lart.tudelft.nl/projects/scaling 56* http://www.lartmaker.nl/projects/scaling
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 57a09f99ecb0..1bcf69996c9d 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -97,13 +97,13 @@ at which time hotplug is disabled.
97 97
98You really dont need to manipulate any of the system cpu maps. They should 98You really dont need to manipulate any of the system cpu maps. They should
99be read-only for most use. When setting up per-cpu resources almost always use 99be read-only for most use. When setting up per-cpu resources almost always use
100cpu_possible_map/for_each_cpu() to iterate. 100cpu_possible_map/for_each_possible_cpu() to iterate.
101 101
102Never use anything other than cpumask_t to represent bitmap of CPUs. 102Never use anything other than cpumask_t to represent bitmap of CPUs.
103 103
104#include <linux/cpumask.h> 104#include <linux/cpumask.h>
105 105
106for_each_cpu - Iterate over cpu_possible_map 106for_each_possible_cpu - Iterate over cpu_possible_map
107for_each_online_cpu - Iterate over cpu_online_map 107for_each_online_cpu - Iterate over cpu_online_map
108for_each_present_cpu - Iterate over cpu_present_map 108for_each_present_cpu - Iterate over cpu_present_map
109for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask. 109for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index ff280e2e1613..2b28e9ec4e3a 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -1,5 +1,5 @@
1 1
2Export cpu topology info by sysfs. Items (attributes) are similar 2Export cpu topology info via sysfs. Items (attributes) are similar
3to /proc/cpuinfo. 3to /proc/cpuinfo.
4 4
51) /sys/devices/system/cpu/cpuX/topology/physical_package_id: 51) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
@@ -12,7 +12,7 @@ represent the thread siblings to cpu X in the same core;
12represent the thread siblings to cpu X in the same physical package; 12represent the thread siblings to cpu X in the same physical package;
13 13
14To implement it in an architecture-neutral way, a new source file, 14To implement it in an architecture-neutral way, a new source file,
15driver/base/topology.c, is to export the 5 attributes. 15drivers/base/topology.c, is to export the 4 attributes.
16 16
17If one architecture wants to support this feature, it just needs to 17If one architecture wants to support this feature, it just needs to
18implement 4 defines, typically in file include/asm-XXX/topology.h. 18implement 4 defines, typically in file include/asm-XXX/topology.h.
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 3c406acd4dfa..b369a8c46a73 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -1721,11 +1721,6 @@ Your cooperation is appreciated.
1721 These devices support the same API as the generic SCSI 1721 These devices support the same API as the generic SCSI
1722 devices. 1722 devices.
1723 1723
1724 97 block Packet writing for CD/DVD devices
1725 0 = /dev/pktcdvd0 First packet-writing module
1726 1 = /dev/pktcdvd1 Second packet-writing module
1727 ...
1728
1729 98 char Control and Measurement Device (comedi) 1724 98 char Control and Measurement Device (comedi)
1730 0 = /dev/comedi0 First comedi device 1725 0 = /dev/comedi0 First comedi device
1731 1 = /dev/comedi1 Second comedi device 1726 1 = /dev/comedi1 Second comedi device
diff --git a/Documentation/dvb/get_dvb_firmware b/Documentation/dvb/get_dvb_firmware
index 15fc8fbef67e..4820366b6ae8 100644
--- a/Documentation/dvb/get_dvb_firmware
+++ b/Documentation/dvb/get_dvb_firmware
@@ -259,9 +259,9 @@ sub dibusb {
259} 259}
260 260
261sub nxt2002 { 261sub nxt2002 {
262 my $sourcefile = "Broadband4PC_4_2_11.zip"; 262 my $sourcefile = "Technisat_DVB-PC_4_4_COMPACT.zip";
263 my $url = "http://www.bbti.us/download/windows/$sourcefile"; 263 my $url = "http://www.bbti.us/download/windows/$sourcefile";
264 my $hash = "c6d2ea47a8f456d887ada0cfb718ff2a"; 264 my $hash = "476befae8c7c1bb9648954060b1eec1f";
265 my $outfile = "dvb-fe-nxt2002.fw"; 265 my $outfile = "dvb-fe-nxt2002.fw";
266 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); 266 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
267 267
@@ -269,8 +269,8 @@ sub nxt2002 {
269 269
270 wgetfile($sourcefile, $url); 270 wgetfile($sourcefile, $url);
271 unzip($sourcefile, $tmpdir); 271 unzip($sourcefile, $tmpdir);
272 verify("$tmpdir/SkyNETU.sys", $hash); 272 verify("$tmpdir/SkyNET.sys", $hash);
273 extract("$tmpdir/SkyNETU.sys", 375832, 5908, $outfile); 273 extract("$tmpdir/SkyNET.sys", 331624, 5908, $outfile);
274 274
275 $outfile; 275 $outfile;
276} 276}
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 495858b236b6..43ab119963d5 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -25,8 +25,9 @@ Who: Adrian Bunk <bunk@stusta.de>
25 25
26--------------------------- 26---------------------------
27 27
28What: drivers depending on OBSOLETE_OSS_DRIVER 28What: drivers that were depending on OBSOLETE_OSS_DRIVER
29When: January 2006 29 (config options already removed)
30When: before 2.6.19
30Why: OSS drivers with ALSA replacements 31Why: OSS drivers with ALSA replacements
31Who: Adrian Bunk <bunk@stusta.de> 32Who: Adrian Bunk <bunk@stusta.de>
32 33
@@ -56,6 +57,15 @@ Who: Jody McIntyre <scjody@steamballoon.com>
56 57
57--------------------------- 58---------------------------
58 59
60What: sbp2: module parameter "force_inquiry_hack"
61When: July 2006
62Why: Superceded by parameter "workarounds". Both parameters are meant to be
63 used ad-hoc and for single devices only, i.e. not in modprobe.conf,
64 therefore the impact of this feature replacement should be low.
65Who: Stefan Richter <stefanr@s5r6.in-berlin.de>
66
67---------------------------
68
59What: Video4Linux API 1 ioctls and video_decoder.h from Video devices. 69What: Video4Linux API 1 ioctls and video_decoder.h from Video devices.
60When: July 2006 70When: July 2006
61Why: V4L1 AP1 was replaced by V4L2 API. during migration from 2.4 to 2.6 71Why: V4L1 AP1 was replaced by V4L2 API. during migration from 2.4 to 2.6
@@ -71,14 +81,6 @@ Who: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
71 81
72--------------------------- 82---------------------------
73 83
74What: remove EXPORT_SYMBOL(panic_timeout)
75When: April 2006
76Files: kernel/panic.c
77Why: No modular usage in the kernel.
78Who: Adrian Bunk <bunk@stusta.de>
79
80---------------------------
81
82What: remove EXPORT_SYMBOL(insert_resource) 84What: remove EXPORT_SYMBOL(insert_resource)
83When: April 2006 85When: April 2006
84Files: kernel/resource.c 86Files: kernel/resource.c
@@ -127,13 +129,6 @@ Who: Christoph Hellwig <hch@lst.de>
127 129
128--------------------------- 130---------------------------
129 131
130What: EXPORT_SYMBOL(lookup_hash)
131When: January 2006
132Why: Too low-level interface. Use lookup_one_len or lookup_create instead.
133Who: Christoph Hellwig <hch@lst.de>
134
135---------------------------
136
137What: CONFIG_FORCED_INLINING 132What: CONFIG_FORCED_INLINING
138When: June 2006 133When: June 2006
139Why: Config option is there to see if gcc is good enough. (in january 134Why: Config option is there to see if gcc is good enough. (in january
@@ -241,3 +236,15 @@ Why: The USB subsystem has changed a lot over time, and it has been
241Who: Greg Kroah-Hartman <gregkh@suse.de> 236Who: Greg Kroah-Hartman <gregkh@suse.de>
242 237
243--------------------------- 238---------------------------
239
240What: find_trylock_page
241When: January 2007
242Why: The interface no longer has any callers left in the kernel. It
243 is an odd interface (compared with other find_*_page functions), in
244 that it does not take a refcount to the page, only the page lock.
245 It should be replaced with find_get_page or find_lock_page if possible.
246 This feature removal can be reevaluated if users of the interface
247 cannot cleanly use something else.
248Who: Nick Piggin <npiggin@suse.de>
249
250---------------------------
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 74052d22d868..66fdc0744fe0 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -1,27 +1,47 @@
100-INDEX 100-INDEX
2 - this file (info on some of the filesystems supported by linux). 2 - this file (info on some of the filesystems supported by linux).
3Exporting
4 - explanation of how to make filesystems exportable.
3Locking 5Locking
4 - info on locking rules as they pertain to Linux VFS. 6 - info on locking rules as they pertain to Linux VFS.
5adfs.txt 7adfs.txt
6 - info and mount options for the Acorn Advanced Disc Filing System. 8 - info and mount options for the Acorn Advanced Disc Filing System.
9afs.txt
10 - info and examples for the distributed AFS (Andrew File System) fs.
7affs.txt 11affs.txt
8 - info and mount options for the Amiga Fast File System. 12 - info and mount options for the Amiga Fast File System.
13automount-support.txt
14 - information about filesystem automount support.
15befs.txt
16 - information about the BeOS filesystem for Linux.
9bfs.txt 17bfs.txt
10 - info for the SCO UnixWare Boot Filesystem (BFS). 18 - info for the SCO UnixWare Boot Filesystem (BFS).
11cifs.txt 19cifs.txt
12 - description of the CIFS filesystem 20 - description of the CIFS filesystem.
13coda.txt 21coda.txt
14 - description of the CODA filesystem. 22 - description of the CODA filesystem.
15configfs/ 23configfs/
16 - directory containing configfs documentation and example code. 24 - directory containing configfs documentation and example code.
17cramfs.txt 25cramfs.txt
18 - info on the cram filesystem for small storage (ROMs etc) 26 - info on the cram filesystem for small storage (ROMs etc).
27dentry-locking.txt
28 - info on the RCU-based dcache locking model.
19devfs/ 29devfs/
20 - directory containing devfs documentation. 30 - directory containing devfs documentation.
31directory-locking
32 - info about the locking scheme used for directory operations.
21dlmfs.txt 33dlmfs.txt
22 - info on the userspace interface to the OCFS2 DLM. 34 - info on the userspace interface to the OCFS2 DLM.
23ext2.txt 35ext2.txt
24 - info, mount options and specifications for the Ext2 filesystem. 36 - info, mount options and specifications for the Ext2 filesystem.
37ext3.txt
38 - info, mount options and specifications for the Ext3 filesystem.
39files.txt
40 - info on file management in the Linux kernel.
41fuse.txt
42 - info on the Filesystem in User SpacE including mount options.
43hfs.txt
44 - info on the Macintosh HFS Filesystem for Linux.
25hpfs.txt 45hpfs.txt
26 - info and mount options for the OS/2 HPFS. 46 - info and mount options for the OS/2 HPFS.
27isofs.txt 47isofs.txt
@@ -32,23 +52,43 @@ ncpfs.txt
32 - info on Novell Netware(tm) filesystem using NCP protocol. 52 - info on Novell Netware(tm) filesystem using NCP protocol.
33ntfs.txt 53ntfs.txt
34 - info and mount options for the NTFS filesystem (Windows NT). 54 - info and mount options for the NTFS filesystem (Windows NT).
35proc.txt
36 - info on Linux's /proc filesystem.
37ocfs2.txt 55ocfs2.txt
38 - info and mount options for the OCFS2 clustered filesystem. 56 - info and mount options for the OCFS2 clustered filesystem.
57porting
58 - various information on filesystem porting.
59proc.txt
60 - info on Linux's /proc filesystem.
61ramfs-rootfs-initramfs.txt
62 - info on the 'in memory' filesystems ramfs, rootfs and initramfs.
63reiser4.txt
64 - info on the Reiser4 filesystem based on dancing tree algorithms.
65relayfs.txt
66 - info on relayfs, for efficient streaming from kernel to user space.
39romfs.txt 67romfs.txt
40 - Description of the ROMFS filesystem. 68 - description of the ROMFS filesystem.
41smbfs.txt 69smbfs.txt
42 - info on using filesystems with the SMB protocol (Windows 3.11 and NT) 70 - info on using filesystems with the SMB protocol (Win 3.11 and NT).
71spufs.txt
72 - info and mount options for the SPU filesystem used on Cell.
73sysfs-pci.txt
74 - info on accessing PCI device resources through sysfs.
75sysfs.txt
76 - info on sysfs, a ram-based filesystem for exporting kernel objects.
43sysv-fs.txt 77sysv-fs.txt
44 - info on the SystemV/V7/Xenix/Coherent filesystem. 78 - info on the SystemV/V7/Xenix/Coherent filesystem.
79tmpfs.txt
80 - info on tmpfs, a filesystem that holds all files in virtual memory.
45udf.txt 81udf.txt
46 - info and mount options for the UDF filesystem. 82 - info and mount options for the UDF filesystem.
47ufs.txt 83ufs.txt
48 - info on the ufs filesystem. 84 - info on the ufs filesystem.
85v9fs.txt
86 - v9fs is a Unix implementation of the Plan 9 9p remote fs protocol.
49vfat.txt 87vfat.txt
50 - info on using the VFAT filesystem used in Windows NT and Windows 95 88 - info on using the VFAT filesystem used in Windows NT and Windows 95
51vfs.txt 89vfs.txt
52 - Overview of the Virtual File System 90 - overview of the Virtual File System
53xfs.txt 91xfs.txt
54 - info and mount options for the XFS filesystem. 92 - info and mount options for the XFS filesystem.
93xip.txt
94 - info on execute-in-place for file mappings.
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index c8bce82ddcac..89b1d196ca80 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -246,6 +246,7 @@ class/
246devices/ 246devices/
247firmware/ 247firmware/
248net/ 248net/
249fs/
249 250
250devices/ contains a filesystem representation of the device tree. It maps 251devices/ contains a filesystem representation of the device tree. It maps
251directly to the internal kernel device tree, which is a hierarchy of 252directly to the internal kernel device tree, which is a hierarchy of
@@ -264,6 +265,10 @@ drivers/ contains a directory for each device driver that is loaded
264for devices on that particular bus (this assumes that drivers do not 265for devices on that particular bus (this assumes that drivers do not
265span multiple bus types). 266span multiple bus types).
266 267
268fs/ contains a directory for some filesystems. Currently each
269filesystem wanting to export attributes must create its own hierarchy
270below fs/ (see ./fuse.txt for an example).
271
267 272
268More information can driver-model specific features can be found in 273More information can driver-model specific features can be found in
269Documentation/driver-model/. 274Documentation/driver-model/.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index adaa899e5c90..3a2e5520c1e3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -694,7 +694,7 @@ struct file_operations
694---------------------- 694----------------------
695 695
696This describes how the VFS can manipulate an open file. As of kernel 696This describes how the VFS can manipulate an open file. As of kernel
6972.6.13, the following members are defined: 6972.6.17, the following members are defined:
698 698
699struct file_operations { 699struct file_operations {
700 loff_t (*llseek) (struct file *, loff_t, int); 700 loff_t (*llseek) (struct file *, loff_t, int);
@@ -723,6 +723,10 @@ struct file_operations {
723 int (*check_flags)(int); 723 int (*check_flags)(int);
724 int (*dir_notify)(struct file *filp, unsigned long arg); 724 int (*dir_notify)(struct file *filp, unsigned long arg);
725 int (*flock) (struct file *, int, struct file_lock *); 725 int (*flock) (struct file *, int, struct file_lock *);
726 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned
727int);
728 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned
729int);
726}; 730};
727 731
728Again, all methods are called without any locks being held, unless 732Again, all methods are called without any locks being held, unless
@@ -790,6 +794,12 @@ otherwise noted.
790 794
791 flock: called by the flock(2) system call 795 flock: called by the flock(2) system call
792 796
797 splice_write: called by the VFS to splice data from a pipe to a file. This
798 method is used by the splice(2) system call
799
800 splice_read: called by the VFS to splice data from file to a pipe. This
801 method is used by the splice(2) system call
802
793Note that the file operations are implemented by the specific 803Note that the file operations are implemented by the specific
794filesystem in which the inode resides. When opening a device node 804filesystem in which the inode resides. When opening a device node
795(character or block special) most filesystems will call special 805(character or block special) most filesystems will call special
diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README
index 43e836c07ae8..e9cc8bb26f7d 100644
--- a/Documentation/firmware_class/README
+++ b/Documentation/firmware_class/README
@@ -105,20 +105,3 @@
105 on the setup, so I think that the choice on what firmware to make 105 on the setup, so I think that the choice on what firmware to make
106 persistent should be left to userspace. 106 persistent should be left to userspace.
107 107
108 - Why register_firmware()+__init can be useful:
109 - For boot devices needing firmware.
110 - To make the transition easier:
111 The firmware can be declared __init and register_firmware()
112 called on module_init. Then the firmware is warranted to be
113 there even if "firmware hotplug userspace" is not there yet or
114 it doesn't yet provide the needed firmware.
115 Once the firmware is widely available in userspace, it can be
116 removed from the kernel. Or made optional (CONFIG_.*_FIRMWARE).
117
118 In either case, if firmware hotplug support is there, it can move the
119 firmware out of kernel memory into the real filesystem for later
120 usage.
121
122 Note: If persistence is implemented on top of initramfs,
123 register_firmware() may not be appropriate.
124
diff --git a/Documentation/firmware_class/firmware_sample_driver.c b/Documentation/firmware_class/firmware_sample_driver.c
index ad3edaba4533..87feccdb5c9f 100644
--- a/Documentation/firmware_class/firmware_sample_driver.c
+++ b/Documentation/firmware_class/firmware_sample_driver.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Sample code on how to use request_firmware() from drivers. 6 * Sample code on how to use request_firmware() from drivers.
7 * 7 *
8 * Note that register_firmware() is currently useless.
9 *
10 */ 8 */
11 9
12#include <linux/module.h> 10#include <linux/module.h>
@@ -17,11 +15,6 @@
17 15
18#include "linux/firmware.h" 16#include "linux/firmware.h"
19 17
20#define WE_CAN_NEED_FIRMWARE_BEFORE_USERSPACE_IS_AVAILABLE
21#ifdef WE_CAN_NEED_FIRMWARE_BEFORE_USERSPACE_IS_AVAILABLE
22char __init inkernel_firmware[] = "let's say that this is firmware\n";
23#endif
24
25static struct device ghost_device = { 18static struct device ghost_device = {
26 .bus_id = "ghost0", 19 .bus_id = "ghost0",
27}; 20};
@@ -104,10 +97,6 @@ static void sample_probe_async(void)
104 97
105static int sample_init(void) 98static int sample_init(void)
106{ 99{
107#ifdef WE_CAN_NEED_FIRMWARE_BEFORE_USERSPACE_IS_AVAILABLE
108 register_firmware("sample_driver_fw", inkernel_firmware,
109 sizeof(inkernel_firmware));
110#endif
111 device_initialize(&ghost_device); 100 device_initialize(&ghost_device);
112 /* since there is no real hardware insertion I just call the 101 /* since there is no real hardware insertion I just call the
113 * sample probe functions here */ 102 * sample probe functions here */
diff --git a/Documentation/fujitsu/frv/kernel-ABI.txt b/Documentation/fujitsu/frv/kernel-ABI.txt
index 0ed9b0a779bc..8b0a5fc8bfd9 100644
--- a/Documentation/fujitsu/frv/kernel-ABI.txt
+++ b/Documentation/fujitsu/frv/kernel-ABI.txt
@@ -1,17 +1,19 @@
1 ================================= 1 =================================
2 INTERNAL KERNEL ABI FOR FR-V ARCH 2 INTERNAL KERNEL ABI FOR FR-V ARCH
3 ================================= 3 =================================
4 4
5The internal FRV kernel ABI is not quite the same as the userspace ABI. A number of the registers 5The internal FRV kernel ABI is not quite the same as the userspace ABI. A
6are used for special purposed, and the ABI is not consistent between modules vs core, and MMU vs 6number of the registers are used for special purposed, and the ABI is not
7no-MMU. 7consistent between modules vs core, and MMU vs no-MMU.
8 8
9This partly stems from the fact that FRV CPUs do not have a separate supervisor stack pointer, and 9This partly stems from the fact that FRV CPUs do not have a separate
10most of them do not have any scratch registers, thus requiring at least one general purpose 10supervisor stack pointer, and most of them do not have any scratch
11register to be clobbered in such an event. Also, within the kernel core, it is possible to simply 11registers, thus requiring at least one general purpose register to be
12jump or call directly between functions using a relative offset. This cannot be extended to modules 12clobbered in such an event. Also, within the kernel core, it is possible to
13for the displacement is likely to be too far. Thus in modules the address of a function to call 13simply jump or call directly between functions using a relative offset.
14must be calculated in a register and then used, requiring two extra instructions. 14This cannot be extended to modules for the displacement is likely to be too
15far. Thus in modules the address of a function to call must be calculated
16in a register and then used, requiring two extra instructions.
15 17
16This document has the following sections: 18This document has the following sections:
17 19
@@ -39,7 +41,8 @@ When a system call is made, the following registers are effective:
39CPU OPERATING MODES 41CPU OPERATING MODES
40=================== 42===================
41 43
42The FR-V CPU has three basic operating modes. In order of increasing capability: 44The FR-V CPU has three basic operating modes. In order of increasing
45capability:
43 46
44 (1) User mode. 47 (1) User mode.
45 48
@@ -47,42 +50,46 @@ The FR-V CPU has three basic operating modes. In order of increasing capability:
47 50
48 (2) Kernel mode. 51 (2) Kernel mode.
49 52
50 Normal kernel mode. There are many additional control registers available that may be 53 Normal kernel mode. There are many additional control registers
51 accessed in this mode, in addition to all the stuff available to user mode. This has two 54 available that may be accessed in this mode, in addition to all the
52 submodes: 55 stuff available to user mode. This has two submodes:
53 56
54 (a) Exceptions enabled (PSR.T == 1). 57 (a) Exceptions enabled (PSR.T == 1).
55 58
56 Exceptions will invoke the appropriate normal kernel mode handler. On entry to the 59 Exceptions will invoke the appropriate normal kernel mode
57 handler, the PSR.T bit will be cleared. 60 handler. On entry to the handler, the PSR.T bit will be cleared.
58 61
59 (b) Exceptions disabled (PSR.T == 0). 62 (b) Exceptions disabled (PSR.T == 0).
60 63
61 No exceptions or interrupts may happen. Any mandatory exceptions will cause the CPU to 64 No exceptions or interrupts may happen. Any mandatory exceptions
62 halt unless the CPU is told to jump into debug mode instead. 65 will cause the CPU to halt unless the CPU is told to jump into
66 debug mode instead.
63 67
64 (3) Debug mode. 68 (3) Debug mode.
65 69
66 No exceptions may happen in this mode. Memory protection and management exceptions will be 70 No exceptions may happen in this mode. Memory protection and
67 flagged for later consideration, but the exception handler won't be invoked. Debugging traps 71 management exceptions will be flagged for later consideration, but
68 such as hardware breakpoints and watchpoints will be ignored. This mode is entered only by 72 the exception handler won't be invoked. Debugging traps such as
69 debugging events obtained from the other two modes. 73 hardware breakpoints and watchpoints will be ignored. This mode is
74 entered only by debugging events obtained from the other two modes.
70 75
71 All kernel mode registers may be accessed, plus a few extra debugging specific registers. 76 All kernel mode registers may be accessed, plus a few extra debugging
77 specific registers.
72 78
73 79
74================================= 80=================================
75INTERNAL KERNEL-MODE REGISTER ABI 81INTERNAL KERNEL-MODE REGISTER ABI
76================================= 82=================================
77 83
78There are a number of permanent register assignments that are set up by entry.S in the exception 84There are a number of permanent register assignments that are set up by
79prologue. Note that there is a complete set of exception prologues for each of user->kernel 85entry.S in the exception prologue. Note that there is a complete set of
80transition and kernel->kernel transition. There are also user->debug and kernel->debug mode 86exception prologues for each of user->kernel transition and kernel->kernel
81transition prologues. 87transition. There are also user->debug and kernel->debug mode transition
88prologues.
82 89
83 90
84 REGISTER FLAVOUR USE 91 REGISTER FLAVOUR USE
85 =============== ======= ==================================================== 92 =============== ======= ==============================================
86 GR1 Supervisor stack pointer 93 GR1 Supervisor stack pointer
87 GR15 Current thread info pointer 94 GR15 Current thread info pointer
88 GR16 GP-Rel base register for small data 95 GR16 GP-Rel base register for small data
@@ -92,10 +99,12 @@ transition prologues.
92 GR31 NOMMU Destroyed by debug mode entry 99 GR31 NOMMU Destroyed by debug mode entry
93 GR31 MMU Destroyed by TLB miss kernel mode entry 100 GR31 MMU Destroyed by TLB miss kernel mode entry
94 CCR.ICC2 Virtual interrupt disablement tracking 101 CCR.ICC2 Virtual interrupt disablement tracking
95 CCCR.CC3 Cleared by exception prologue (atomic op emulation) 102 CCCR.CC3 Cleared by exception prologue
103 (atomic op emulation)
96 SCR0 MMU See mmu-layout.txt. 104 SCR0 MMU See mmu-layout.txt.
97 SCR1 MMU See mmu-layout.txt. 105 SCR1 MMU See mmu-layout.txt.
98 SCR2 MMU Save for EAR0 (destroyed by icache insns in debug mode) 106 SCR2 MMU Save for EAR0 (destroyed by icache insns
107 in debug mode)
99 SCR3 MMU Save for GR31 during debug exceptions 108 SCR3 MMU Save for GR31 during debug exceptions
100 DAMR/IAMR NOMMU Fixed memory protection layout. 109 DAMR/IAMR NOMMU Fixed memory protection layout.
101 DAMR/IAMR MMU See mmu-layout.txt. 110 DAMR/IAMR MMU See mmu-layout.txt.
@@ -104,18 +113,21 @@ transition prologues.
104Certain registers are also used or modified across function calls: 113Certain registers are also used or modified across function calls:
105 114
106 REGISTER CALL RETURN 115 REGISTER CALL RETURN
107 =============== =============================== =============================== 116 =============== =============================== ======================
108 GR0 Fixed Zero - 117 GR0 Fixed Zero -
109 GR2 Function call frame pointer 118 GR2 Function call frame pointer
110 GR3 Special Preserved 119 GR3 Special Preserved
111 GR3-GR7 - Clobbered 120 GR3-GR7 - Clobbered
112 GR8 Function call arg #1 Return value (or clobbered) 121 GR8 Function call arg #1 Return value
113 GR9 Function call arg #2 Return value MSW (or clobbered) 122 (or clobbered)
123 GR9 Function call arg #2 Return value MSW
124 (or clobbered)
114 GR10-GR13 Function call arg #3-#6 Clobbered 125 GR10-GR13 Function call arg #3-#6 Clobbered
115 GR14 - Clobbered 126 GR14 - Clobbered
116 GR15-GR16 Special Preserved 127 GR15-GR16 Special Preserved
117 GR17-GR27 - Preserved 128 GR17-GR27 - Preserved
118 GR28-GR31 Special Only accessed explicitly 129 GR28-GR31 Special Only accessed
130 explicitly
119 LR Return address after CALL Clobbered 131 LR Return address after CALL Clobbered
120 CCR/CCCR - Mostly Clobbered 132 CCR/CCCR - Mostly Clobbered
121 133
@@ -124,46 +136,53 @@ Certain registers are also used or modified across function calls:
124INTERNAL DEBUG-MODE REGISTER ABI 136INTERNAL DEBUG-MODE REGISTER ABI
125================================ 137================================
126 138
127This is the same as the kernel-mode register ABI for functions calls. The difference is that in 139This is the same as the kernel-mode register ABI for functions calls. The
128debug-mode there's a different stack and a different exception frame. Almost all the global 140difference is that in debug-mode there's a different stack and a different
129registers from kernel-mode (including the stack pointer) may be changed. 141exception frame. Almost all the global registers from kernel-mode
142(including the stack pointer) may be changed.
130 143
131 REGISTER FLAVOUR USE 144 REGISTER FLAVOUR USE
132 =============== ======= ==================================================== 145 =============== ======= ==============================================
133 GR1 Debug stack pointer 146 GR1 Debug stack pointer
134 GR16 GP-Rel base register for small data 147 GR16 GP-Rel base register for small data
135 GR31 Current debug exception frame pointer (__debug_frame) 148 GR31 Current debug exception frame pointer
149 (__debug_frame)
136 SCR3 MMU Saved value of GR31 150 SCR3 MMU Saved value of GR31
137 151
138 152
139Note that debug mode is able to interfere with the kernel's emulated atomic ops, so it must be 153Note that debug mode is able to interfere with the kernel's emulated atomic
140exceedingly careful not to do any that would interact with the main kernel in this regard. Hence 154ops, so it must be exceedingly careful not to do any that would interact
141the debug mode code (gdbstub) is almost completely self-contained. The only external code used is 155with the main kernel in this regard. Hence the debug mode code (gdbstub) is
142the sprintf family of functions. 156almost completely self-contained. The only external code used is the
157sprintf family of functions.
143 158
144Futhermore, break.S is so complicated because single-step mode does not switch off on entry to an 159Futhermore, break.S is so complicated because single-step mode does not
145exception. That means unless manually disabled, single-stepping will blithely go on stepping into 160switch off on entry to an exception. That means unless manually disabled,
146things like interrupts. See gdbstub.txt for more information. 161single-stepping will blithely go on stepping into things like interrupts.
162See gdbstub.txt for more information.
147 163
148 164
149========================== 165==========================
150VIRTUAL INTERRUPT HANDLING 166VIRTUAL INTERRUPT HANDLING
151========================== 167==========================
152 168
153Because accesses to the PSR is so slow, and to disable interrupts we have to access it twice (once 169Because accesses to the PSR is so slow, and to disable interrupts we have
154to read and once to write), we don't actually disable interrupts at all if we don't have to. What 170to access it twice (once to read and once to write), we don't actually
155we do instead is use the ICC2 condition code flags to note virtual disablement, such that if we 171disable interrupts at all if we don't have to. What we do instead is use
156then do take an interrupt, we note the flag, really disable interrupts, set another flag and resume 172the ICC2 condition code flags to note virtual disablement, such that if we
157execution at the point the interrupt happened. Setting condition flags as a side effect of an 173then do take an interrupt, we note the flag, really disable interrupts, set
158arithmetic or logical instruction is really fast. This use of the ICC2 only occurs within the 174another flag and resume execution at the point the interrupt happened.
175Setting condition flags as a side effect of an arithmetic or logical
176instruction is really fast. This use of the ICC2 only occurs within the
159kernel - it does not affect userspace. 177kernel - it does not affect userspace.
160 178
161The flags we use are: 179The flags we use are:
162 180
163 (*) CCR.ICC2.Z [Zero flag] 181 (*) CCR.ICC2.Z [Zero flag]
164 182
165 Set to virtually disable interrupts, clear when interrupts are virtually enabled. Can be 183 Set to virtually disable interrupts, clear when interrupts are
166 modified by logical instructions without affecting the Carry flag. 184 virtually enabled. Can be modified by logical instructions without
185 affecting the Carry flag.
167 186
168 (*) CCR.ICC2.C [Carry flag] 187 (*) CCR.ICC2.C [Carry flag]
169 188
@@ -176,8 +195,9 @@ What happens is this:
176 195
177 ICC2.Z is 0, ICC2.C is 1. 196 ICC2.Z is 0, ICC2.C is 1.
178 197
179 (2) An interrupt occurs. The exception prologue examines ICC2.Z and determines that nothing needs 198 (2) An interrupt occurs. The exception prologue examines ICC2.Z and
180 doing. This is done simply with an unlikely BEQ instruction. 199 determines that nothing needs doing. This is done simply with an
200 unlikely BEQ instruction.
181 201
182 (3) The interrupts are disabled (local_irq_disable) 202 (3) The interrupts are disabled (local_irq_disable)
183 203
@@ -187,48 +207,56 @@ What happens is this:
187 207
188 ICC2.Z would be set to 0. 208 ICC2.Z would be set to 0.
189 209
190 A TIHI #2 instruction (trap #2 if condition HI - Z==0 && C==0) would be used to trap if 210 A TIHI #2 instruction (trap #2 if condition HI - Z==0 && C==0) would
191 interrupts were now virtually enabled, but physically disabled - which they're not, so the 211 be used to trap if interrupts were now virtually enabled, but
192 trap isn't taken. The kernel would then be back to state (1). 212 physically disabled - which they're not, so the trap isn't taken. The
213 kernel would then be back to state (1).
193 214
194 (5) An interrupt occurs. The exception prologue examines ICC2.Z and determines that the interrupt 215 (5) An interrupt occurs. The exception prologue examines ICC2.Z and
195 shouldn't actually have happened. It jumps aside, and there disabled interrupts by setting 216 determines that the interrupt shouldn't actually have happened. It
196 PSR.PIL to 14 and then it clears ICC2.C. 217 jumps aside, and there disabled interrupts by setting PSR.PIL to 14
218 and then it clears ICC2.C.
197 219
198 (6) If interrupts were then saved and disabled again (local_irq_save): 220 (6) If interrupts were then saved and disabled again (local_irq_save):
199 221
200 ICC2.Z would be shifted into the save variable and masked off (giving a 1). 222 ICC2.Z would be shifted into the save variable and masked off
223 (giving a 1).
201 224
202 ICC2.Z would then be set to 1 (thus unchanged), and ICC2.C would be unaffected (ie: 0). 225 ICC2.Z would then be set to 1 (thus unchanged), and ICC2.C would be
226 unaffected (ie: 0).
203 227
204 (7) If interrupts were then restored from state (6) (local_irq_restore): 228 (7) If interrupts were then restored from state (6) (local_irq_restore):
205 229
206 ICC2.Z would be set to indicate the result of XOR'ing the saved value (ie: 1) with 1, which 230 ICC2.Z would be set to indicate the result of XOR'ing the saved
207 gives a result of 0 - thus leaving ICC2.Z set. 231 value (ie: 1) with 1, which gives a result of 0 - thus leaving
232 ICC2.Z set.
208 233
209 ICC2.C would remain unaffected (ie: 0). 234 ICC2.C would remain unaffected (ie: 0).
210 235
211 A TIHI #2 instruction would be used to again assay the current state, but this would do 236 A TIHI #2 instruction would be used to again assay the current state,
212 nothing as Z==1. 237 but this would do nothing as Z==1.
213 238
214 (8) If interrupts were then enabled (local_irq_enable): 239 (8) If interrupts were then enabled (local_irq_enable):
215 240
216 ICC2.Z would be cleared. ICC2.C would be left unaffected. Both flags would now be 0. 241 ICC2.Z would be cleared. ICC2.C would be left unaffected. Both
242 flags would now be 0.
217 243
218 A TIHI #2 instruction again issued to assay the current state would then trap as both Z==0 244 A TIHI #2 instruction again issued to assay the current state would
219 [interrupts virtually enabled] and C==0 [interrupts really disabled] would then be true. 245 then trap as both Z==0 [interrupts virtually enabled] and C==0
246 [interrupts really disabled] would then be true.
220 247
221 (9) The trap #2 handler would simply enable hardware interrupts (set PSR.PIL to 0), set ICC2.C to 248 (9) The trap #2 handler would simply enable hardware interrupts
222 1 and return. 249 (set PSR.PIL to 0), set ICC2.C to 1 and return.
223 250
224(10) Immediately upon returning, the pending interrupt would be taken. 251(10) Immediately upon returning, the pending interrupt would be taken.
225 252
226(11) The interrupt handler would take the path of actually processing the interrupt (ICC2.Z is 253(11) The interrupt handler would take the path of actually processing the
227 clear, BEQ fails as per step (2)). 254 interrupt (ICC2.Z is clear, BEQ fails as per step (2)).
228 255
229(12) The interrupt handler would then set ICC2.C to 1 since hardware interrupts are definitely 256(12) The interrupt handler would then set ICC2.C to 1 since hardware
230 enabled - or else the kernel wouldn't be here. 257 interrupts are definitely enabled - or else the kernel wouldn't be here.
231 258
232(13) On return from the interrupt handler, things would be back to state (1). 259(13) On return from the interrupt handler, things would be back to state (1).
233 260
234This trap (#2) is only available in kernel mode. In user mode it will result in SIGILL. 261This trap (#2) is only available in kernel mode. In user mode it will
262result in SIGILL.
diff --git a/Documentation/i2c/busses/i2c-parport b/Documentation/i2c/busses/i2c-parport
index d9f23c0763f1..77b995dfca22 100644
--- a/Documentation/i2c/busses/i2c-parport
+++ b/Documentation/i2c/busses/i2c-parport
@@ -12,18 +12,22 @@ meant as a replacement for the older, individual drivers:
12 teletext adapters) 12 teletext adapters)
13 13
14It currently supports the following devices: 14It currently supports the following devices:
15 * Philips adapter 15 * (type=0) Philips adapter
16 * home brew teletext adapter 16 * (type=1) home brew teletext adapter
17 * Velleman K8000 adapter 17 * (type=2) Velleman K8000 adapter
18 * ELV adapter 18 * (type=3) ELV adapter
19 * Analog Devices evaluation boards (ADM1025, ADM1030, ADM1031, ADM1032) 19 * (type=4) Analog Devices ADM1032 evaluation board
20 * Barco LPT->DVI (K5800236) adapter 20 * (type=5) Analog Devices evaluation boards: ADM1025, ADM1030, ADM1031
21 * (type=6) Barco LPT->DVI (K5800236) adapter
21 22
22These devices use different pinout configurations, so you have to tell 23These devices use different pinout configurations, so you have to tell
23the driver what you have, using the type module parameter. There is no 24the driver what you have, using the type module parameter. There is no
24way to autodetect the devices. Support for different pinout configurations 25way to autodetect the devices. Support for different pinout configurations
25can be easily added when needed. 26can be easily added when needed.
26 27
28Earlier kernels defaulted to type=0 (Philips). But now, if the type
29parameter is missing, the driver will simply fail to initialize.
30
27 31
28Building your own adapter 32Building your own adapter
29------------------------- 33-------------------------
diff --git a/Documentation/input/joystick-parport.txt b/Documentation/input/joystick-parport.txt
index 88a011c9f985..d537c48cc6d0 100644
--- a/Documentation/input/joystick-parport.txt
+++ b/Documentation/input/joystick-parport.txt
@@ -36,12 +36,12 @@ with them.
36 36
37 All NES and SNES use the same synchronous serial protocol, clocked from 37 All NES and SNES use the same synchronous serial protocol, clocked from
38the computer's side (and thus timing insensitive). To allow up to 5 NES 38the computer's side (and thus timing insensitive). To allow up to 5 NES
39and/or SNES gamepads connected to the parallel port at once, the output 39and/or SNES gamepads and/or SNES mice connected to the parallel port at once,
40lines of the parallel port are shared, while one of 5 available input lines 40the output lines of the parallel port are shared, while one of 5 available
41is assigned to each gamepad. 41input lines is assigned to each gamepad.
42 42
43 This protocol is handled by the gamecon.c driver, so that's the one 43 This protocol is handled by the gamecon.c driver, so that's the one
44you'll use for NES and SNES gamepads. 44you'll use for NES, SNES gamepads and SNES mice.
45 45
46 The main problem with PC parallel ports is that they don't have +5V power 46 The main problem with PC parallel ports is that they don't have +5V power
47source on any of their pins. So, if you want a reliable source of power 47source on any of their pins. So, if you want a reliable source of power
@@ -106,7 +106,7 @@ A, Turbo B, Select and Start, and is connected through 5 wires, then it is
106either a NES or NES clone and will work with this connection. SNES gamepads 106either a NES or NES clone and will work with this connection. SNES gamepads
107also use 5 wires, but have more buttons. They will work as well, of course. 107also use 5 wires, but have more buttons. They will work as well, of course.
108 108
109Pinout for NES gamepads Pinout for SNES gamepads 109Pinout for NES gamepads Pinout for SNES gamepads and mice
110 110
111 +----> Power +-----------------------\ 111 +----> Power +-----------------------\
112 | 7 | o o o o | x x o | 1 112 | 7 | o o o o | x x o | 1
@@ -454,6 +454,7 @@ uses the following kernel/module command line:
454 6 | N64 pad 454 6 | N64 pad
455 7 | Sony PSX controller 455 7 | Sony PSX controller
456 8 | Sony PSX DDR controller 456 8 | Sony PSX DDR controller
457 9 | SNES mouse
457 458
458 The exact type of the PSX controller type is autoprobed when used so 459 The exact type of the PSX controller type is autoprobed when used so
459hot swapping should work (but is not recomended). 460hot swapping should work (but is not recomended).
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index aa7ba00ec082..171a44ebd939 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -78,8 +78,6 @@ Code Seq# Include File Comments
78'#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem 78'#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem
79'1' 00-1F <linux/timepps.h> PPS kit from Ulrich Windl 79'1' 00-1F <linux/timepps.h> PPS kit from Ulrich Windl
80 <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/> 80 <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
81'6' 00-10 <asm-i386/processor.h> Intel IA32 microcode update driver
82 <mailto:tigran@veritas.com>
83'8' all SNP8023 advanced NIC card 81'8' all SNP8023 advanced NIC card
84 <mailto:mcr@solidum.com> 82 <mailto:mcr@solidum.com>
85'A' 00-1F linux/apm_bios.h 83'A' 00-1F linux/apm_bios.h
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
new file mode 100644
index 000000000000..85a64defd385
--- /dev/null
+++ b/Documentation/isdn/README.gigaset
@@ -0,0 +1,286 @@
1GigaSet 307x Device Driver
2==========================
3
41. Requirements
5 ------------
61.1. Hardware
7 --------
8 This release supports the connection of the Gigaset 307x/417x family of
9 ISDN DECT bases via Gigaset M101 Data, Gigaset M105 Data or direct USB
10 connection. The following devices are reported to be compatible:
11 307x/417x:
12 Gigaset SX255isdn
13 Gigaset SX353isdn
14 Sinus 45 [AB] isdn (Deutsche Telekom)
15 Sinus 721X/XA
16 Vox Chicago 390 ISDN (KPN Telecom)
17 M101:
18 Sinus 45 Data 1 (Telekom)
19 M105:
20 Gigaset USB Adapter DECT
21 Sinus 45 Data 2 (Telekom)
22 Sinus 721 data
23 Chicago 390 USB (KPN)
24 See also http://www.erbze.info/sinus_gigaset.htm and
25 http://gigaset307x.sourceforge.net/
26
27 We had also reports from users of Gigaset M105 who could use the drivers
28 with SX 100 and CX 100 ISDN bases (only in unimodem mode, see section 2.4.)
29 If you have another device that works with our driver, please let us know.
30 For example, Gigaset SX205isdn/Sinus 721 X SE and Gigaset SX303isdn bases
31 are just versions without answering machine of models known to work, so
32 they should work just as well; but so far we are lacking positive reports
33 on these.
34
35 Chances of getting an USB device to work are good if the output of
36 lsusb
37 at the command line contains one of the following:
38 ID 0681:0001
39 ID 0681:0002
40 ID 0681:0009
41 ID 0681:0021
42 ID 0681:0022
43
441.2. Software
45 --------
46 The driver works with ISDN4linux and so can be used with any software
47 which is able to use ISDN4linux for ISDN connections (voice or data).
48 CAPI4Linux support is planned but not yet available.
49
50 There are some user space tools available at
51 http://sourceforge.net/projects/gigaset307x/
52 which provide access to additional device specific functions like SMS,
53 phonebook or call journal.
54
55
562. How to use the driver
57 ---------------------
582.1. Modules
59 -------
60 To get the device working, you have to load the proper kernel module. You
61 can do this using
62 modprobe modulename
63 where modulename is usb_gigaset (M105) or bas_gigaset (direct USB
64 connection to the base).
65
662.2. Device nodes for user space programs
67 ------------------------------------
68 The device can be accessed from user space (eg. by the user space tools
69 mentioned in 1.2.) through the device nodes:
70
71 - /dev/ttyGU0 for M105 (USB data boxes)
72 - /dev/ttyGB0 for the base driver (direct USB connection)
73
74 You can also select a "default device" which is used by the frontends when
75 no device node is given as parameter, by creating a symlink /dev/ttyG to
76 one of them, eg.:
77
78 ln -s /dev/ttyGB0 /dev/ttyG
79
802.3. ISDN4linux
81 ----------
82 This is the "normal" mode of operation. After loading the module you can
83 set up the ISDN system just as you'd do with any ISDN card.
84 Your distribution should provide some configuration utility.
85 If not, you can use some HOWTOs like
86 http://www.linuxhaven.de/dlhp/HOWTO/DE-ISDN-HOWTO-5.html
87 If this doesn't work, because you have some recent device like SX100 where
88 debug output (see section 3.2.) shows something like this when dialing
89 CMD Received: ERROR
90 Available Params: 0
91 Connection State: 0, Response: -1
92 gigaset_process_response: resp_code -1 in ConState 0 !
93 Timeout occurred
94 you might need to use unimodem mode:
95
962.4. Unimodem mode
97 -------------
98 This is needed for some devices [e.g. SX100] as they have problems with
99 the "normal" commands.
100
101 If you have installed the command line tool gigacontr, you can enter
102 unimodem mode using
103 gigacontr --mode unimodem
104 You can switch back using
105 gigacontr --mode isdn
106
107 You can also load the driver using e.g.
108 modprobe usb_gigaset startmode=0
109 to prevent the driver from starting in "isdn4linux mode".
110
111 In this mode the device works like a modem connected to a serial port
112 (the /dev/ttyGU0, ... mentioned above) which understands the commands
113 ATZ init, reset
114 => OK or ERROR
115 ATD
116 ATDT dial
117 => OK, CONNECT,
118 BUSY,
119 NO DIAL TONE,
120 NO CARRIER,
121 NO ANSWER
122 <pause>+++<pause> change to command mode when connected
123 ATH hangup
124
125 You can use some configuration tool of your distribution to configure this
126 "modem" or configure pppd/wvdial manually. There are some example ppp
127 configuration files and chat scripts in the gigaset-VERSION/ppp directory.
128 Please note that the USB drivers are not able to change the state of the
129 control lines (the M105 driver can be configured to use some undocumented
130 control requests, if you really need the control lines, though). This means
131 you must use "Stupid Mode" if you are using wvdial or you should use the
132 nocrtscts option of pppd.
133 You must also assure that the ppp_async module is loaded with the parameter
134 flag_time=0. You can do this e.g. by adding a line like
135
136 options ppp_async flag_time=0
137
138 to /etc/modprobe.conf. If your distribution has some local module
139 configuration file like /etc/modprobe.conf.local,
140 using that should be preferred.
141
1422.5. Call-ID (CID) mode
143 ------------------
144 Call-IDs are numbers used to tag commands to, and responses from, the
145 Gigaset base in order to support the simultaneous handling of multiple
146 ISDN calls. Their use can be enabled ("CID mode") or disabled ("Unimodem
147 mode"). Without Call-IDs (in Unimodem mode), only a very limited set of
148 functions is available. It allows outgoing data connections only, but
149 does not signal incoming calls or other base events.
150
151 DECT cordless data devices (M10x) permanently occupy the cordless
152 connection to the base while Call-IDs are activated. As the Gigaset
153 bases only support one DECT data connection at a time, this prevents
154 other DECT cordless data devices from accessing the base.
155
156 During active operation, the driver switches to the necessary mode
157 automatically. However, for the reasons above, the mode chosen when
158 the device is not in use (idle) can be selected by the user.
159 - If you want to receive incoming calls, you can use the default
160 settings (CID mode).
161 - If you have several DECT data devices (M10x) which you want to use
162 in turn, select Unimodem mode by passing the parameter "cidmode=0" to
163 the driver ("modprobe usb_gigaset cidmode=0" or modprobe.conf).
164
165 If you want both of these at once, you are out of luck.
166
167 You can also use /sys/module/<name>/parameters/cidmode for changing
168 the CID mode setting (<name> is usb_gigaset or bas_gigaset).
169
170
1713. Troubleshooting
172 ---------------
1733.1. Solutions to frequently reported problems
174 -----------------------------------------
175 Problem:
176 You have a slow provider and isdn4linux gives up dialing too early.
177 Solution:
178 Load the isdn module using the dialtimeout option. You can do this e.g.
179 by adding a line like
180
181 options isdn dialtimeout=15
182
183 to /etc/modprobe.conf. If your distribution has some local module
184 configuration file like /etc/modprobe.conf.local,
185 using that should be preferred.
186
187 Problem:
188 Your isdn script aborts with a message about isdnlog.
189 Solution:
190 Try deactivating (or commenting out) isdnlog. This driver does not
191 support it.
192
193 Problem:
194 You have two or more DECT data adapters (M101/M105) and only the
195 first one you turn on works.
196 Solution:
197 Select Unimodem mode for all DECT data adapters. (see section 2.4.)
198
1993.2. Telling the driver to provide more information
200 ----------------------------------------------
201 Building the driver with the "Gigaset debugging" kernel configuration
202 option (CONFIG_GIGASET_DEBUG) gives it the ability to produce additional
203 information useful for debugging.
204
205 You can control the amount of debugging information the driver produces by
206 writing an appropriate value to /sys/module/gigaset/parameters/debug, e.g.
207 echo 0 > /sys/module/gigaset/parameters/debug
208 switches off debugging output completely,
209 echo 0x10a020 > /sys/module/gigaset/parameters/debug
210 enables the standard set of debugging output messages. These values are
211 bit patterns where every bit controls a certain type of debugging output.
212 See the constants DEBUG_* in the source file gigaset.h for details.
213
214 The initial value can be set using the debug parameter when loading the
215 module "gigaset", e.g. by adding a line
216 options gigaset debug=0
217 to /etc/modprobe.conf, ...
218
219 Generated debugging information can be found
220 - as output of the command
221 dmesg
222 - in system log files written by your syslog daemon, usually
223 in /var/log/, e.g. /var/log/messages.
224
2253.3. Reporting problems and bugs
226 ---------------------------
227 If you can't solve problems with the driver on your own, feel free to
228 use one of the forums, bug trackers, or mailing lists on
229 http://sourceforge.net/projects/gigaset307x
230 or write an electronic mail to the maintainers.
231
232 Try to provide as much information as possible, such as
233 - distribution
234 - kernel version (uname -r)
235 - gcc version (gcc --version)
236 - hardware architecture (uname -m, ...)
237 - type and firmware version of your device (base and wireless module,
238 if any)
239 - output of "lsusb -v" (if using an USB device)
240 - error messages
241 - relevant system log messages (it would help if you activate debug
242 output as described in 3.2.)
243
244 For help with general configuration problems not specific to our driver,
245 such as isdn4linux and network configuration issues, please refer to the
246 appropriate forums and newsgroups.
247
2483.4. Reporting problem solutions
249 ---------------------------
250 If you solved a problem with our drivers, wrote startup scripts for your
251 distribution, ... feel free to contact us (using one of the places
252 mentioned in 3.3.). We'd like to add scripts, hints, documentation
253 to the driver and/or the project web page.
254
255
2564. Links, other software
257 ---------------------
258 - Sourceforge project developing this driver and associated tools
259 http://sourceforge.net/projects/gigaset307x
260 - Yahoo! Group on the Siemens Gigaset family of devices
261 http://de.groups.yahoo.com/group/Siemens-Gigaset
262 - Siemens Gigaset/T-Sinus compatibility table
263 http://www.erbze.info/sinus_gigaset.htm
264
265
2665. Credits
267 -------
268 Thanks to
269
270 Karsten Keil
271 for his help with isdn4linux
272 Deti Fliegl
273 for his base driver code
274 Dennis Dietrich
275 for his kernel 2.6 patches
276 Andreas Rummel
277 for his work and logs to get unimodem mode working
278 Andreas Degert
279 for his logs and patches to get cx 100 working
280 Dietrich Feist
281 for his generous donation of one M105 and two M101 cordless adapters
282 Christoph Schweers
283 for his generous donation of a M34 device
284
285 and all the other people who sent logs and other information.
286
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index fcccf2432f98..61fc079eb966 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -44,7 +44,7 @@ What is covered within this file is mainly information to authors
44of modules. The author of an external modules should supply 44of modules. The author of an external modules should supply
45a makefile that hides most of the complexity so one only has to type 45a makefile that hides most of the complexity so one only has to type
46'make' to build the module. A complete example will be present in 46'make' to build the module. A complete example will be present in
47chapter ¤. Creating a kbuild file for an external module". 47chapter 4, "Creating a kbuild file for an external module".
48 48
49 49
50=== 2. How to build external modules 50=== 2. How to build external modules
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f8cb55c30b0f..b3a6187e5305 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1,4 +1,4 @@
1February 2003 Kernel Parameters v2.5.59 1 Kernel Parameters
2 ~~~~~~~~~~~~~~~~~ 2 ~~~~~~~~~~~~~~~~~
3 3
4The following is a consolidated list of the kernel parameters as implemented 4The following is a consolidated list of the kernel parameters as implemented
@@ -17,9 +17,17 @@ are specified on the kernel command line with the module name plus
17 17
18 usbcore.blinkenlights=1 18 usbcore.blinkenlights=1
19 19
20The text in square brackets at the beginning of the description states the 20This document may not be entirely up to date and comprehensive. The command
21restrictions on the kernel for the said kernel parameter to be valid. The 21"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
22restrictions referred to are that the relevant option is valid if: 22module. Loadable modules, after being loaded into the running kernel, also
23reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
24parameters may be changed at runtime by the command
25"echo -n ${value} > /sys/module/${modulename}/parameters/${parm}".
26
27The parameters listed below are only valid if certain kernel build options were
28enabled and if respective hardware is present. The text in square brackets at
29the beginning of each description states the restrictions within which a
30parameter is applicable:
23 31
24 ACPI ACPI support is enabled. 32 ACPI ACPI support is enabled.
25 ALSA ALSA sound support is enabled. 33 ALSA ALSA sound support is enabled.
@@ -1046,10 +1054,10 @@ running once the system is up.
1046 noltlbs [PPC] Do not use large page/tlb entries for kernel 1054 noltlbs [PPC] Do not use large page/tlb entries for kernel
1047 lowmem mapping on PPC40x. 1055 lowmem mapping on PPC40x.
1048 1056
1049 nomce [IA-32] Machine Check Exception
1050
1051 nomca [IA-64] Disable machine check abort handling 1057 nomca [IA-64] Disable machine check abort handling
1052 1058
1059 nomce [IA-32] Machine Check Exception
1060
1053 noresidual [PPC] Don't use residual data on PReP machines. 1061 noresidual [PPC] Don't use residual data on PReP machines.
1054 1062
1055 noresume [SWSUSP] Disables resume and restores original swap 1063 noresume [SWSUSP] Disables resume and restores original swap
@@ -1682,20 +1690,6 @@ running once the system is up.
1682 1690
1683 1691
1684______________________________________________________________________ 1692______________________________________________________________________
1685Changelog:
1686
16872000-06-?? Mr. Unknown
1688 The last known update (for 2.4.0) - the changelog was not kept before.
1689
16902002-11-24 Petr Baudis <pasky@ucw.cz>
1691 Randy Dunlap <randy.dunlap@verizon.net>
1692 Update for 2.5.49, description for most of the options introduced,
1693 references to other documentation (C files, READMEs, ..), added S390,
1694 PPC, SPARC, MTD, ALSA and OSS category. Minor corrections and
1695 reformatting.
1696
16972005-10-19 Randy Dunlap <rdunlap@xenotime.net>
1698 Lots of typos, whitespace, some reformatting.
1699 1693
1700TODO: 1694TODO:
1701 1695
diff --git a/Documentation/laptop-mode.txt b/Documentation/laptop-mode.txt
index b18e21675906..5696e879449b 100644
--- a/Documentation/laptop-mode.txt
+++ b/Documentation/laptop-mode.txt
@@ -919,11 +919,11 @@ int main(int argc, char **argv)
919 int settle_time = 60; 919 int settle_time = 60;
920 920
921 /* Parse the simple command-line */ 921 /* Parse the simple command-line */
922 if (ac == 2) 922 if (argc == 2)
923 disk = av[1]; 923 disk = argv[1];
924 else if (ac == 4) { 924 else if (argc == 4) {
925 settle_time = atoi(av[2]); 925 settle_time = atoi(argv[2]);
926 disk = av[3]; 926 disk = argv[3];
927 } else 927 } else
928 usage(); 928 usage();
929 929
diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt
new file mode 100644
index 000000000000..8c35c0426110
--- /dev/null
+++ b/Documentation/leds-class.txt
@@ -0,0 +1,71 @@
1LED handling under Linux
2========================
3
4If you're reading this and thinking about keyboard leds, these are
5handled by the input subsystem and the led class is *not* needed.
6
7In its simplest form, the LED class just allows control of LEDs from
8userspace. LEDs appear in /sys/class/leds/. The brightness file will
9set the brightness of the LED (taking a value 0-255). Most LEDs don't
10have hardware brightness support so will just be turned on for non-zero
11brightness settings.
12
13The class also introduces the optional concept of an LED trigger. A trigger
14is a kernel based source of led events. Triggers can either be simple or
15complex. A simple trigger isn't configurable and is designed to slot into
16existing subsystems with minimal additional code. Examples are the ide-disk,
17nand-disk and sharpsl-charge triggers. With led triggers disabled, the code
18optimises away.
19
20Complex triggers whilst available to all LEDs have LED specific
21parameters and work on a per LED basis. The timer trigger is an example.
22
23You can change triggers in a similar manner to the way an IO scheduler
24is chosen (via /sys/class/leds/<device>/trigger). Trigger specific
25parameters can appear in /sys/class/leds/<device> once a given trigger is
26selected.
27
28
29Design Philosophy
30=================
31
32The underlying design philosophy is simplicity. LEDs are simple devices
33and the aim is to keep a small amount of code giving as much functionality
34as possible. Please keep this in mind when suggesting enhancements.
35
36
37LED Device Naming
38=================
39
40Is currently of the form:
41
42"devicename:colour"
43
44There have been calls for LED properties such as colour to be exported as
45individual led class attributes. As a solution which doesn't incur as much
46overhead, I suggest these become part of the device name. The naming scheme
47above leaves scope for further attributes should they be needed.
48
49
50Known Issues
51============
52
53The LED Trigger core cannot be a module as the simple trigger functions
54would cause nightmare dependency issues. I see this as a minor issue
55compared to the benefits the simple trigger functionality brings. The
56rest of the LED subsystem can be modular.
57
58Some leds can be programmed to flash in hardware. As this isn't a generic
59LED device property, this should be exported as a device specific sysfs
60attribute rather than part of the class if this functionality is required.
61
62
63Future Development
64==================
65
66At the moment, a trigger can't be created specifically for a single LED.
67There are a number of cases where a trigger might only be mappable to a
68particular LED (ACPI?). The addition of triggers provided by the LED driver
69should cover this option and be possible to add without breaking the
70current interface.
71
diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha
index bf802ffc98ad..ef484a719bb9 100644
--- a/Documentation/m68k/README.buddha
+++ b/Documentation/m68k/README.buddha
@@ -29,7 +29,7 @@ address is written to $4a, then the whole Byte is written to
29$48, while it doesn't matter how often you're writing to $4a 29$48, while it doesn't matter how often you're writing to $4a
30as long as $48 is not touched. After $48 has been written, 30as long as $48 is not touched. After $48 has been written,
31the whole card disappears from $e8 and is mapped to the new 31the whole card disappears from $e8 and is mapped to the new
32address just written. Make shure $4a is written before $48, 32address just written. Make sure $4a is written before $48,
33otherwise your chance is only 1:16 to find the board :-). 33otherwise your chance is only 1:16 to find the board :-).
34 34
35The local memory-map is even active when mapped to $e8: 35The local memory-map is even active when mapped to $e8:
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
new file mode 100644
index 000000000000..4710845dbac4
--- /dev/null
+++ b/Documentation/memory-barriers.txt
@@ -0,0 +1,2133 @@
1 ============================
2 LINUX KERNEL MEMORY BARRIERS
3 ============================
4
5By: David Howells <dhowells@redhat.com>
6
7Contents:
8
9 (*) Abstract memory access model.
10
11 - Device operations.
12 - Guarantees.
13
14 (*) What are memory barriers?
15
16 - Varieties of memory barrier.
17 - What may not be assumed about memory barriers?
18 - Data dependency barriers.
19 - Control dependencies.
20 - SMP barrier pairing.
21 - Examples of memory barrier sequences.
22 - Read memory barriers vs load speculation.
23
24 (*) Explicit kernel barriers.
25
26 - Compiler barrier.
27 - The CPU memory barriers.
28 - MMIO write barrier.
29
30 (*) Implicit kernel memory barriers.
31
32 - Locking functions.
33 - Interrupt disabling functions.
34 - Miscellaneous functions.
35
36 (*) Inter-CPU locking barrier effects.
37
38 - Locks vs memory accesses.
39 - Locks vs I/O accesses.
40
41 (*) Where are memory barriers needed?
42
43 - Interprocessor interaction.
44 - Atomic operations.
45 - Accessing devices.
46 - Interrupts.
47
48 (*) Kernel I/O barrier effects.
49
50 (*) Assumed minimum execution ordering model.
51
52 (*) The effects of the cpu cache.
53
54 - Cache coherency.
55 - Cache coherency vs DMA.
56 - Cache coherency vs MMIO.
57
58 (*) The things CPUs get up to.
59
60 - And then there's the Alpha.
61
62 (*) References.
63
64
65============================
66ABSTRACT MEMORY ACCESS MODEL
67============================
68
69Consider the following abstract model of the system:
70
71 : :
72 : :
73 : :
74 +-------+ : +--------+ : +-------+
75 | | : | | : | |
76 | | : | | : | |
77 | CPU 1 |<----->| Memory |<----->| CPU 2 |
78 | | : | | : | |
79 | | : | | : | |
80 +-------+ : +--------+ : +-------+
81 ^ : ^ : ^
82 | : | : |
83 | : | : |
84 | : v : |
85 | : +--------+ : |
86 | : | | : |
87 | : | | : |
88 +---------->| Device |<----------+
89 : | | :
90 : | | :
91 : +--------+ :
92 : :
93
94Each CPU executes a program that generates memory access operations. In the
95abstract CPU, memory operation ordering is very relaxed, and a CPU may actually
96perform the memory operations in any order it likes, provided program causality
97appears to be maintained. Similarly, the compiler may also arrange the
98instructions it emits in any order it likes, provided it doesn't affect the
99apparent operation of the program.
100
101So in the above diagram, the effects of the memory operations performed by a
102CPU are perceived by the rest of the system as the operations cross the
103interface between the CPU and rest of the system (the dotted lines).
104
105
106For example, consider the following sequence of events:
107
108 CPU 1 CPU 2
109 =============== ===============
110 { A == 1; B == 2 }
111 A = 3; x = A;
112 B = 4; y = B;
113
114The set of accesses as seen by the memory system in the middle can be arranged
115in 24 different combinations:
116
117 STORE A=3, STORE B=4, x=LOAD A->3, y=LOAD B->4
118 STORE A=3, STORE B=4, y=LOAD B->4, x=LOAD A->3
119 STORE A=3, x=LOAD A->3, STORE B=4, y=LOAD B->4
120 STORE A=3, x=LOAD A->3, y=LOAD B->2, STORE B=4
121 STORE A=3, y=LOAD B->2, STORE B=4, x=LOAD A->3
122 STORE A=3, y=LOAD B->2, x=LOAD A->3, STORE B=4
123 STORE B=4, STORE A=3, x=LOAD A->3, y=LOAD B->4
124 STORE B=4, ...
125 ...
126
127and can thus result in four different combinations of values:
128
129 x == 1, y == 2
130 x == 1, y == 4
131 x == 3, y == 2
132 x == 3, y == 4
133
134
135Furthermore, the stores committed by a CPU to the memory system may not be
136perceived by the loads made by another CPU in the same order as the stores were
137committed.
138
139
140As a further example, consider this sequence of events:
141
142 CPU 1 CPU 2
143 =============== ===============
144 { A == 1, B == 2, C = 3, P == &A, Q == &C }
145 B = 4; Q = P;
146 P = &B D = *Q;
147
148There is an obvious data dependency here, as the value loaded into D depends on
149the address retrieved from P by CPU 2. At the end of the sequence, any of the
150following results are possible:
151
152 (Q == &A) and (D == 1)
153 (Q == &B) and (D == 2)
154 (Q == &B) and (D == 4)
155
156Note that CPU 2 will never try and load C into D because the CPU will load P
157into Q before issuing the load of *Q.
158
159
160DEVICE OPERATIONS
161-----------------
162
163Some devices present their control interfaces as collections of memory
164locations, but the order in which the control registers are accessed is very
165important. For instance, imagine an ethernet card with a set of internal
166registers that are accessed through an address port register (A) and a data
167port register (D). To read internal register 5, the following code might then
168be used:
169
170 *A = 5;
171 x = *D;
172
173but this might show up as either of the following two sequences:
174
175 STORE *A = 5, x = LOAD *D
176 x = LOAD *D, STORE *A = 5
177
178the second of which will almost certainly result in a malfunction, since it set
179the address _after_ attempting to read the register.
180
181
182GUARANTEES
183----------
184
185There are some minimal guarantees that may be expected of a CPU:
186
187 (*) On any given CPU, dependent memory accesses will be issued in order, with
188 respect to itself. This means that for:
189
190 Q = P; D = *Q;
191
192 the CPU will issue the following memory operations:
193
194 Q = LOAD P, D = LOAD *Q
195
196 and always in that order.
197
198 (*) Overlapping loads and stores within a particular CPU will appear to be
199 ordered within that CPU. This means that for:
200
201 a = *X; *X = b;
202
203 the CPU will only issue the following sequence of memory operations:
204
205 a = LOAD *X, STORE *X = b
206
207 And for:
208
209 *X = c; d = *X;
210
211 the CPU will only issue:
212
213 STORE *X = c, d = LOAD *X
214
215 (Loads and stores overlap if they are targetted at overlapping pieces of
216 memory).
217
218And there are a number of things that _must_ or _must_not_ be assumed:
219
220 (*) It _must_not_ be assumed that independent loads and stores will be issued
221 in the order given. This means that for:
222
223 X = *A; Y = *B; *D = Z;
224
225 we may get any of the following sequences:
226
227 X = LOAD *A, Y = LOAD *B, STORE *D = Z
228 X = LOAD *A, STORE *D = Z, Y = LOAD *B
229 Y = LOAD *B, X = LOAD *A, STORE *D = Z
230 Y = LOAD *B, STORE *D = Z, X = LOAD *A
231 STORE *D = Z, X = LOAD *A, Y = LOAD *B
232 STORE *D = Z, Y = LOAD *B, X = LOAD *A
233
234 (*) It _must_ be assumed that overlapping memory accesses may be merged or
235 discarded. This means that for:
236
237 X = *A; Y = *(A + 4);
238
239 we may get any one of the following sequences:
240
241 X = LOAD *A; Y = LOAD *(A + 4);
242 Y = LOAD *(A + 4); X = LOAD *A;
243 {X, Y} = LOAD {*A, *(A + 4) };
244
245 And for:
246
247 *A = X; Y = *A;
248
249 we may get either of:
250
251 STORE *A = X; Y = LOAD *A;
252 STORE *A = Y = X;
253
254
255=========================
256WHAT ARE MEMORY BARRIERS?
257=========================
258
259As can be seen above, independent memory operations are effectively performed
260in random order, but this can be a problem for CPU-CPU interaction and for I/O.
261What is required is some way of intervening to instruct the compiler and the
262CPU to restrict the order.
263
264Memory barriers are such interventions. They impose a perceived partial
265ordering between the memory operations specified on either side of the barrier.
266They request that the sequence of memory events generated appears to other
267parts of the system as if the barrier is effective on that CPU.
268
269
270VARIETIES OF MEMORY BARRIER
271---------------------------
272
273Memory barriers come in four basic varieties:
274
275 (1) Write (or store) memory barriers.
276
277 A write memory barrier gives a guarantee that all the STORE operations
278 specified before the barrier will appear to happen before all the STORE
279 operations specified after the barrier with respect to the other
280 components of the system.
281
282 A write barrier is a partial ordering on stores only; it is not required
283 to have any effect on loads.
284
285 A CPU can be viewed as as commiting a sequence of store operations to the
286 memory system as time progresses. All stores before a write barrier will
287 occur in the sequence _before_ all the stores after the write barrier.
288
289 [!] Note that write barriers should normally be paired with read or data
290 dependency barriers; see the "SMP barrier pairing" subsection.
291
292
293 (2) Data dependency barriers.
294
295 A data dependency barrier is a weaker form of read barrier. In the case
296 where two loads are performed such that the second depends on the result
297 of the first (eg: the first load retrieves the address to which the second
298 load will be directed), a data dependency barrier would be required to
299 make sure that the target of the second load is updated before the address
300 obtained by the first load is accessed.
301
302 A data dependency barrier is a partial ordering on interdependent loads
303 only; it is not required to have any effect on stores, independent loads
304 or overlapping loads.
305
306 As mentioned in (1), the other CPUs in the system can be viewed as
307 committing sequences of stores to the memory system that the CPU being
308 considered can then perceive. A data dependency barrier issued by the CPU
309 under consideration guarantees that for any load preceding it, if that
310 load touches one of a sequence of stores from another CPU, then by the
311 time the barrier completes, the effects of all the stores prior to that
312 touched by the load will be perceptible to any loads issued after the data
313 dependency barrier.
314
315 See the "Examples of memory barrier sequences" subsection for diagrams
316 showing the ordering constraints.
317
318 [!] Note that the first load really has to have a _data_ dependency and
319 not a control dependency. If the address for the second load is dependent
320 on the first load, but the dependency is through a conditional rather than
321 actually loading the address itself, then it's a _control_ dependency and
322 a full read barrier or better is required. See the "Control dependencies"
323 subsection for more information.
324
325 [!] Note that data dependency barriers should normally be paired with
326 write barriers; see the "SMP barrier pairing" subsection.
327
328
329 (3) Read (or load) memory barriers.
330
331 A read barrier is a data dependency barrier plus a guarantee that all the
332 LOAD operations specified before the barrier will appear to happen before
333 all the LOAD operations specified after the barrier with respect to the
334 other components of the system.
335
336 A read barrier is a partial ordering on loads only; it is not required to
337 have any effect on stores.
338
339 Read memory barriers imply data dependency barriers, and so can substitute
340 for them.
341
342 [!] Note that read barriers should normally be paired with write barriers;
343 see the "SMP barrier pairing" subsection.
344
345
346 (4) General memory barriers.
347
348 A general memory barrier gives a guarantee that all the LOAD and STORE
349 operations specified before the barrier will appear to happen before all
350 the LOAD and STORE operations specified after the barrier with respect to
351 the other components of the system.
352
353 A general memory barrier is a partial ordering over both loads and stores.
354
355 General memory barriers imply both read and write memory barriers, and so
356 can substitute for either.
357
358
359And a couple of implicit varieties:
360
361 (5) LOCK operations.
362
363 This acts as a one-way permeable barrier. It guarantees that all memory
364 operations after the LOCK operation will appear to happen after the LOCK
365 operation with respect to the other components of the system.
366
367 Memory operations that occur before a LOCK operation may appear to happen
368 after it completes.
369
370 A LOCK operation should almost always be paired with an UNLOCK operation.
371
372
373 (6) UNLOCK operations.
374
375 This also acts as a one-way permeable barrier. It guarantees that all
376 memory operations before the UNLOCK operation will appear to happen before
377 the UNLOCK operation with respect to the other components of the system.
378
379 Memory operations that occur after an UNLOCK operation may appear to
380 happen before it completes.
381
382 LOCK and UNLOCK operations are guaranteed to appear with respect to each
383 other strictly in the order specified.
384
385 The use of LOCK and UNLOCK operations generally precludes the need for
386 other sorts of memory barrier (but note the exceptions mentioned in the
387 subsection "MMIO write barrier").
388
389
390Memory barriers are only required where there's a possibility of interaction
391between two CPUs or between a CPU and a device. If it can be guaranteed that
392there won't be any such interaction in any particular piece of code, then
393memory barriers are unnecessary in that piece of code.
394
395
396Note that these are the _minimum_ guarantees. Different architectures may give
397more substantial guarantees, but they may _not_ be relied upon outside of arch
398specific code.
399
400
401WHAT MAY NOT BE ASSUMED ABOUT MEMORY BARRIERS?
402----------------------------------------------
403
404There are certain things that the Linux kernel memory barriers do not guarantee:
405
406 (*) There is no guarantee that any of the memory accesses specified before a
407 memory barrier will be _complete_ by the completion of a memory barrier
408 instruction; the barrier can be considered to draw a line in that CPU's
409 access queue that accesses of the appropriate type may not cross.
410
411 (*) There is no guarantee that issuing a memory barrier on one CPU will have
412 any direct effect on another CPU or any other hardware in the system. The
413 indirect effect will be the order in which the second CPU sees the effects
414 of the first CPU's accesses occur, but see the next point:
415
416 (*) There is no guarantee that the a CPU will see the correct order of effects
417 from a second CPU's accesses, even _if_ the second CPU uses a memory
418 barrier, unless the first CPU _also_ uses a matching memory barrier (see
419 the subsection on "SMP Barrier Pairing").
420
421 (*) There is no guarantee that some intervening piece of off-the-CPU
422 hardware[*] will not reorder the memory accesses. CPU cache coherency
423 mechanisms should propagate the indirect effects of a memory barrier
424 between CPUs, but might not do so in order.
425
426 [*] For information on bus mastering DMA and coherency please read:
427
428 Documentation/pci.txt
429 Documentation/DMA-mapping.txt
430 Documentation/DMA-API.txt
431
432
433DATA DEPENDENCY BARRIERS
434------------------------
435
436The usage requirements of data dependency barriers are a little subtle, and
437it's not always obvious that they're needed. To illustrate, consider the
438following sequence of events:
439
440 CPU 1 CPU 2
441 =============== ===============
442 { A == 1, B == 2, C = 3, P == &A, Q == &C }
443 B = 4;
444 <write barrier>
445 P = &B
446 Q = P;
447 D = *Q;
448
449There's a clear data dependency here, and it would seem that by the end of the
450sequence, Q must be either &A or &B, and that:
451
452 (Q == &A) implies (D == 1)
453 (Q == &B) implies (D == 4)
454
455But! CPU 2's perception of P may be updated _before_ its perception of B, thus
456leading to the following situation:
457
458 (Q == &B) and (D == 2) ????
459
460Whilst this may seem like a failure of coherency or causality maintenance, it
461isn't, and this behaviour can be observed on certain real CPUs (such as the DEC
462Alpha).
463
464To deal with this, a data dependency barrier must be inserted between the
465address load and the data load:
466
467 CPU 1 CPU 2
468 =============== ===============
469 { A == 1, B == 2, C = 3, P == &A, Q == &C }
470 B = 4;
471 <write barrier>
472 P = &B
473 Q = P;
474 <data dependency barrier>
475 D = *Q;
476
477This enforces the occurrence of one of the two implications, and prevents the
478third possibility from arising.
479
480[!] Note that this extremely counterintuitive situation arises most easily on
481machines with split caches, so that, for example, one cache bank processes
482even-numbered cache lines and the other bank processes odd-numbered cache
483lines. The pointer P might be stored in an odd-numbered cache line, and the
484variable B might be stored in an even-numbered cache line. Then, if the
485even-numbered bank of the reading CPU's cache is extremely busy while the
486odd-numbered bank is idle, one can see the new value of the pointer P (&B),
487but the old value of the variable B (1).
488
489
490Another example of where data dependency barriers might by required is where a
491number is read from memory and then used to calculate the index for an array
492access:
493
494 CPU 1 CPU 2
495 =============== ===============
496 { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
497 M[1] = 4;
498 <write barrier>
499 P = 1
500 Q = P;
501 <data dependency barrier>
502 D = M[Q];
503
504
505The data dependency barrier is very important to the RCU system, for example.
506See rcu_dereference() in include/linux/rcupdate.h. This permits the current
507target of an RCU'd pointer to be replaced with a new modified target, without
508the replacement target appearing to be incompletely initialised.
509
510See also the subsection on "Cache Coherency" for a more thorough example.
511
512
513CONTROL DEPENDENCIES
514--------------------
515
516A control dependency requires a full read memory barrier, not simply a data
517dependency barrier to make it work correctly. Consider the following bit of
518code:
519
520 q = &a;
521 if (p)
522 q = &b;
523 <data dependency barrier>
524 x = *q;
525
526This will not have the desired effect because there is no actual data
527dependency, but rather a control dependency that the CPU may short-circuit by
528attempting to predict the outcome in advance. In such a case what's actually
529required is:
530
531 q = &a;
532 if (p)
533 q = &b;
534 <read barrier>
535 x = *q;
536
537
538SMP BARRIER PAIRING
539-------------------
540
541When dealing with CPU-CPU interactions, certain types of memory barrier should
542always be paired. A lack of appropriate pairing is almost certainly an error.
543
544A write barrier should always be paired with a data dependency barrier or read
545barrier, though a general barrier would also be viable. Similarly a read
546barrier or a data dependency barrier should always be paired with at least an
547write barrier, though, again, a general barrier is viable:
548
549 CPU 1 CPU 2
550 =============== ===============
551 a = 1;
552 <write barrier>
553 b = 2; x = b;
554 <read barrier>
555 y = a;
556
557Or:
558
559 CPU 1 CPU 2
560 =============== ===============================
561 a = 1;
562 <write barrier>
563 b = &a; x = b;
564 <data dependency barrier>
565 y = *x;
566
567Basically, the read barrier always has to be there, even though it can be of
568the "weaker" type.
569
570[!] Note that the stores before the write barrier would normally be expected to
571match the loads after the read barrier or data dependency barrier, and vice
572versa:
573
574 CPU 1 CPU 2
575 =============== ===============
576 a = 1; }---- --->{ v = c
577 b = 2; } \ / { w = d
578 <write barrier> \ <read barrier>
579 c = 3; } / \ { x = a;
580 d = 4; }---- --->{ y = b;
581
582
583EXAMPLES OF MEMORY BARRIER SEQUENCES
584------------------------------------
585
586Firstly, write barriers act as a partial orderings on store operations.
587Consider the following sequence of events:
588
589 CPU 1
590 =======================
591 STORE A = 1
592 STORE B = 2
593 STORE C = 3
594 <write barrier>
595 STORE D = 4
596 STORE E = 5
597
598This sequence of events is committed to the memory coherence system in an order
599that the rest of the system might perceive as the unordered set of { STORE A,
600STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E
601}:
602
603 +-------+ : :
604 | | +------+
605 | |------>| C=3 | } /\
606 | | : +------+ }----- \ -----> Events perceptible
607 | | : | A=1 | } \/ to rest of system
608 | | : +------+ }
609 | CPU 1 | : | B=2 | }
610 | | +------+ }
611 | | wwwwwwwwwwwwwwww } <--- At this point the write barrier
612 | | +------+ } requires all stores prior to the
613 | | : | E=5 | } barrier to be committed before
614 | | : +------+ } further stores may be take place.
615 | |------>| D=4 | }
616 | | +------+
617 +-------+ : :
618 |
619 | Sequence in which stores are committed to the
620 | memory system by CPU 1
621 V
622
623
624Secondly, data dependency barriers act as a partial orderings on data-dependent
625loads. Consider the following sequence of events:
626
627 CPU 1 CPU 2
628 ======================= =======================
629 { B = 7; X = 9; Y = 8; C = &Y }
630 STORE A = 1
631 STORE B = 2
632 <write barrier>
633 STORE C = &B LOAD X
634 STORE D = 4 LOAD C (gets &B)
635 LOAD *C (reads B)
636
637Without intervention, CPU 2 may perceive the events on CPU 1 in some
638effectively random order, despite the write barrier issued by CPU 1:
639
640 +-------+ : : : :
641 | | +------+ +-------+ | Sequence of update
642 | |------>| B=2 |----- --->| Y->8 | | of perception on
643 | | : +------+ \ +-------+ | CPU 2
644 | CPU 1 | : | A=1 | \ --->| C->&Y | V
645 | | +------+ | +-------+
646 | | wwwwwwwwwwwwwwww | : :
647 | | +------+ | : :
648 | | : | C=&B |--- | : : +-------+
649 | | : +------+ \ | +-------+ | |
650 | |------>| D=4 | ----------->| C->&B |------>| |
651 | | +------+ | +-------+ | |
652 +-------+ : : | : : | |
653 | : : | |
654 | : : | CPU 2 |
655 | +-------+ | |
656 Apparently incorrect ---> | | B->7 |------>| |
657 perception of B (!) | +-------+ | |
658 | : : | |
659 | +-------+ | |
660 The load of X holds ---> \ | X->9 |------>| |
661 up the maintenance \ +-------+ | |
662 of coherence of B ----->| B->2 | +-------+
663 +-------+
664 : :
665
666
667In the above example, CPU 2 perceives that B is 7, despite the load of *C
668(which would be B) coming after the the LOAD of C.
669
670If, however, a data dependency barrier were to be placed between the load of C
671and the load of *C (ie: B) on CPU 2:
672
673 CPU 1 CPU 2
674 ======================= =======================
675 { B = 7; X = 9; Y = 8; C = &Y }
676 STORE A = 1
677 STORE B = 2
678 <write barrier>
679 STORE C = &B LOAD X
680 STORE D = 4 LOAD C (gets &B)
681 <data dependency barrier>
682 LOAD *C (reads B)
683
684then the following will occur:
685
686 +-------+ : : : :
687 | | +------+ +-------+
688 | |------>| B=2 |----- --->| Y->8 |
689 | | : +------+ \ +-------+
690 | CPU 1 | : | A=1 | \ --->| C->&Y |
691 | | +------+ | +-------+
692 | | wwwwwwwwwwwwwwww | : :
693 | | +------+ | : :
694 | | : | C=&B |--- | : : +-------+
695 | | : +------+ \ | +-------+ | |
696 | |------>| D=4 | ----------->| C->&B |------>| |
697 | | +------+ | +-------+ | |
698 +-------+ : : | : : | |
699 | : : | |
700 | : : | CPU 2 |
701 | +-------+ | |
702 | | X->9 |------>| |
703 | +-------+ | |
704 Makes sure all effects ---> \ ddddddddddddddddd | |
705 prior to the store of C \ +-------+ | |
706 are perceptible to ----->| B->2 |------>| |
707 subsequent loads +-------+ | |
708 : : +-------+
709
710
711And thirdly, a read barrier acts as a partial order on loads. Consider the
712following sequence of events:
713
714 CPU 1 CPU 2
715 ======================= =======================
716 { A = 0, B = 9 }
717 STORE A=1
718 <write barrier>
719 STORE B=2
720 LOAD B
721 LOAD A
722
723Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
724some effectively random order, despite the write barrier issued by CPU 1:
725
726 +-------+ : : : :
727 | | +------+ +-------+
728 | |------>| A=1 |------ --->| A->0 |
729 | | +------+ \ +-------+
730 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
731 | | +------+ | +-------+
732 | |------>| B=2 |--- | : :
733 | | +------+ \ | : : +-------+
734 +-------+ : : \ | +-------+ | |
735 ---------->| B->2 |------>| |
736 | +-------+ | CPU 2 |
737 | | A->0 |------>| |
738 | +-------+ | |
739 | : : +-------+
740 \ : :
741 \ +-------+
742 ---->| A->1 |
743 +-------+
744 : :
745
746
747If, however, a read barrier were to be placed between the load of E and the
748load of A on CPU 2:
749
750 CPU 1 CPU 2
751 ======================= =======================
752 { A = 0, B = 9 }
753 STORE A=1
754 <write barrier>
755 STORE B=2
756 LOAD B
757 <read barrier>
758 LOAD A
759
760then the partial ordering imposed by CPU 1 will be perceived correctly by CPU
7612:
762
763 +-------+ : : : :
764 | | +------+ +-------+
765 | |------>| A=1 |------ --->| A->0 |
766 | | +------+ \ +-------+
767 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
768 | | +------+ | +-------+
769 | |------>| B=2 |--- | : :
770 | | +------+ \ | : : +-------+
771 +-------+ : : \ | +-------+ | |
772 ---------->| B->2 |------>| |
773 | +-------+ | CPU 2 |
774 | : : | |
775 | : : | |
776 At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
777 barrier causes all effects \ +-------+ | |
778 prior to the storage of B ---->| A->1 |------>| |
779 to be perceptible to CPU 2 +-------+ | |
780 : : +-------+
781
782
783To illustrate this more completely, consider what could happen if the code
784contained a load of A either side of the read barrier:
785
786 CPU 1 CPU 2
787 ======================= =======================
788 { A = 0, B = 9 }
789 STORE A=1
790 <write barrier>
791 STORE B=2
792 LOAD B
793 LOAD A [first load of A]
794 <read barrier>
795 LOAD A [second load of A]
796
797Even though the two loads of A both occur after the load of B, they may both
798come up with different values:
799
800 +-------+ : : : :
801 | | +------+ +-------+
802 | |------>| A=1 |------ --->| A->0 |
803 | | +------+ \ +-------+
804 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
805 | | +------+ | +-------+
806 | |------>| B=2 |--- | : :
807 | | +------+ \ | : : +-------+
808 +-------+ : : \ | +-------+ | |
809 ---------->| B->2 |------>| |
810 | +-------+ | CPU 2 |
811 | : : | |
812 | : : | |
813 | +-------+ | |
814 | | A->0 |------>| 1st |
815 | +-------+ | |
816 At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
817 barrier causes all effects \ +-------+ | |
818 prior to the storage of B ---->| A->1 |------>| 2nd |
819 to be perceptible to CPU 2 +-------+ | |
820 : : +-------+
821
822
823But it may be that the update to A from CPU 1 becomes perceptible to CPU 2
824before the read barrier completes anyway:
825
826 +-------+ : : : :
827 | | +------+ +-------+
828 | |------>| A=1 |------ --->| A->0 |
829 | | +------+ \ +-------+
830 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
831 | | +------+ | +-------+
832 | |------>| B=2 |--- | : :
833 | | +------+ \ | : : +-------+
834 +-------+ : : \ | +-------+ | |
835 ---------->| B->2 |------>| |
836 | +-------+ | CPU 2 |
837 | : : | |
838 \ : : | |
839 \ +-------+ | |
840 ---->| A->1 |------>| 1st |
841 +-------+ | |
842 rrrrrrrrrrrrrrrrr | |
843 +-------+ | |
844 | A->1 |------>| 2nd |
845 +-------+ | |
846 : : +-------+
847
848
849The guarantee is that the second load will always come up with A == 1 if the
850load of B came up with B == 2. No such guarantee exists for the first load of
851A; that may come up with either A == 0 or A == 1.
852
853
854READ MEMORY BARRIERS VS LOAD SPECULATION
855----------------------------------------
856
857Many CPUs speculate with loads: that is they see that they will need to load an
858item from memory, and they find a time where they're not using the bus for any
859other loads, and so do the load in advance - even though they haven't actually
860got to that point in the instruction execution flow yet. This permits the
861actual load instruction to potentially complete immediately because the CPU
862already has the value to hand.
863
864It may turn out that the CPU didn't actually need the value - perhaps because a
865branch circumvented the load - in which case it can discard the value or just
866cache it for later use.
867
868Consider:
869
870 CPU 1 CPU 2
871 ======================= =======================
872 LOAD B
873 DIVIDE } Divide instructions generally
874 DIVIDE } take a long time to perform
875 LOAD A
876
877Which might appear as this:
878
879 : : +-------+
880 +-------+ | |
881 --->| B->2 |------>| |
882 +-------+ | CPU 2 |
883 : :DIVIDE | |
884 +-------+ | |
885 The CPU being busy doing a ---> --->| A->0 |~~~~ | |
886 division speculates on the +-------+ ~ | |
887 LOAD of A : : ~ | |
888 : :DIVIDE | |
889 : : ~ | |
890 Once the divisions are complete --> : : ~-->| |
891 the CPU can then perform the : : | |
892 LOAD with immediate effect : : +-------+
893
894
895Placing a read barrier or a data dependency barrier just before the second
896load:
897
898 CPU 1 CPU 2
899 ======================= =======================
900 LOAD B
901 DIVIDE
902 DIVIDE
903 <read barrier>
904 LOAD A
905
906will force any value speculatively obtained to be reconsidered to an extent
907dependent on the type of barrier used. If there was no change made to the
908speculated memory location, then the speculated value will just be used:
909
910 : : +-------+
911 +-------+ | |
912 --->| B->2 |------>| |
913 +-------+ | CPU 2 |
914 : :DIVIDE | |
915 +-------+ | |
916 The CPU being busy doing a ---> --->| A->0 |~~~~ | |
917 division speculates on the +-------+ ~ | |
918 LOAD of A : : ~ | |
919 : :DIVIDE | |
920 : : ~ | |
921 : : ~ | |
922 rrrrrrrrrrrrrrrr~ | |
923 : : ~ | |
924 : : ~-->| |
925 : : | |
926 : : +-------+
927
928
929but if there was an update or an invalidation from another CPU pending, then
930the speculation will be cancelled and the value reloaded:
931
932 : : +-------+
933 +-------+ | |
934 --->| B->2 |------>| |
935 +-------+ | CPU 2 |
936 : :DIVIDE | |
937 +-------+ | |
938 The CPU being busy doing a ---> --->| A->0 |~~~~ | |
939 division speculates on the +-------+ ~ | |
940 LOAD of A : : ~ | |
941 : :DIVIDE | |
942 : : ~ | |
943 : : ~ | |
944 rrrrrrrrrrrrrrrrr | |
945 +-------+ | |
946 The speculation is discarded ---> --->| A->1 |------>| |
947 and an updated value is +-------+ | |
948 retrieved : : +-------+
949
950
951========================
952EXPLICIT KERNEL BARRIERS
953========================
954
955The Linux kernel has a variety of different barriers that act at different
956levels:
957
958 (*) Compiler barrier.
959
960 (*) CPU memory barriers.
961
962 (*) MMIO write barrier.
963
964
965COMPILER BARRIER
966----------------
967
968The Linux kernel has an explicit compiler barrier function that prevents the
969compiler from moving the memory accesses either side of it to the other side:
970
971 barrier();
972
973This a general barrier - lesser varieties of compiler barrier do not exist.
974
975The compiler barrier has no direct effect on the CPU, which may then reorder
976things however it wishes.
977
978
979CPU MEMORY BARRIERS
980-------------------
981
982The Linux kernel has eight basic CPU memory barriers:
983
984 TYPE MANDATORY SMP CONDITIONAL
985 =============== ======================= ===========================
986 GENERAL mb() smp_mb()
987 WRITE wmb() smp_wmb()
988 READ rmb() smp_rmb()
989 DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends()
990
991
992All CPU memory barriers unconditionally imply compiler barriers.
993
994SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
995systems because it is assumed that a CPU will be appear to be self-consistent,
996and will order overlapping accesses correctly with respect to itself.
997
998[!] Note that SMP memory barriers _must_ be used to control the ordering of
999references to shared memory on SMP systems, though the use of locking instead
1000is sufficient.
1001
1002Mandatory barriers should not be used to control SMP effects, since mandatory
1003barriers unnecessarily impose overhead on UP systems. They may, however, be
1004used to control MMIO effects on accesses through relaxed memory I/O windows.
1005These are required even on non-SMP systems as they affect the order in which
1006memory operations appear to a device by prohibiting both the compiler and the
1007CPU from reordering them.
1008
1009
1010There are some more advanced barrier functions:
1011
1012 (*) set_mb(var, value)
1013 (*) set_wmb(var, value)
1014
1015 These assign the value to the variable and then insert at least a write
1016 barrier after it, depending on the function. They aren't guaranteed to
1017 insert anything more than a compiler barrier in a UP compilation.
1018
1019
1020 (*) smp_mb__before_atomic_dec();
1021 (*) smp_mb__after_atomic_dec();
1022 (*) smp_mb__before_atomic_inc();
1023 (*) smp_mb__after_atomic_inc();
1024
1025 These are for use with atomic add, subtract, increment and decrement
1026 functions that don't return a value, especially when used for reference
1027 counting. These functions do not imply memory barriers.
1028
1029 As an example, consider a piece of code that marks an object as being dead
1030 and then decrements the object's reference count:
1031
1032 obj->dead = 1;
1033 smp_mb__before_atomic_dec();
1034 atomic_dec(&obj->ref_count);
1035
1036 This makes sure that the death mark on the object is perceived to be set
1037 *before* the reference counter is decremented.
1038
1039 See Documentation/atomic_ops.txt for more information. See the "Atomic
1040 operations" subsection for information on where to use these.
1041
1042
1043 (*) smp_mb__before_clear_bit(void);
1044 (*) smp_mb__after_clear_bit(void);
1045
1046 These are for use similar to the atomic inc/dec barriers. These are
1047 typically used for bitwise unlocking operations, so care must be taken as
1048 there are no implicit memory barriers here either.
1049
1050 Consider implementing an unlock operation of some nature by clearing a
1051 locking bit. The clear_bit() would then need to be barriered like this:
1052
1053 smp_mb__before_clear_bit();
1054 clear_bit( ... );
1055
1056 This prevents memory operations before the clear leaking to after it. See
1057 the subsection on "Locking Functions" with reference to UNLOCK operation
1058 implications.
1059
1060 See Documentation/atomic_ops.txt for more information. See the "Atomic
1061 operations" subsection for information on where to use these.
1062
1063
1064MMIO WRITE BARRIER
1065------------------
1066
1067The Linux kernel also has a special barrier for use with memory-mapped I/O
1068writes:
1069
1070 mmiowb();
1071
1072This is a variation on the mandatory write barrier that causes writes to weakly
1073ordered I/O regions to be partially ordered. Its effects may go beyond the
1074CPU->Hardware interface and actually affect the hardware at some level.
1075
1076See the subsection "Locks vs I/O accesses" for more information.
1077
1078
1079===============================
1080IMPLICIT KERNEL MEMORY BARRIERS
1081===============================
1082
1083Some of the other functions in the linux kernel imply memory barriers, amongst
1084which are locking and scheduling functions.
1085
1086This specification is a _minimum_ guarantee; any particular architecture may
1087provide more substantial guarantees, but these may not be relied upon outside
1088of arch specific code.
1089
1090
1091LOCKING FUNCTIONS
1092-----------------
1093
1094The Linux kernel has a number of locking constructs:
1095
1096 (*) spin locks
1097 (*) R/W spin locks
1098 (*) mutexes
1099 (*) semaphores
1100 (*) R/W semaphores
1101 (*) RCU
1102
1103In all cases there are variants on "LOCK" operations and "UNLOCK" operations
1104for each construct. These operations all imply certain barriers:
1105
1106 (1) LOCK operation implication:
1107
1108 Memory operations issued after the LOCK will be completed after the LOCK
1109 operation has completed.
1110
1111 Memory operations issued before the LOCK may be completed after the LOCK
1112 operation has completed.
1113
1114 (2) UNLOCK operation implication:
1115
1116 Memory operations issued before the UNLOCK will be completed before the
1117 UNLOCK operation has completed.
1118
1119 Memory operations issued after the UNLOCK may be completed before the
1120 UNLOCK operation has completed.
1121
1122 (3) LOCK vs LOCK implication:
1123
1124 All LOCK operations issued before another LOCK operation will be completed
1125 before that LOCK operation.
1126
1127 (4) LOCK vs UNLOCK implication:
1128
1129 All LOCK operations issued before an UNLOCK operation will be completed
1130 before the UNLOCK operation.
1131
1132 All UNLOCK operations issued before a LOCK operation will be completed
1133 before the LOCK operation.
1134
1135 (5) Failed conditional LOCK implication:
1136
1137 Certain variants of the LOCK operation may fail, either due to being
1138 unable to get the lock immediately, or due to receiving an unblocked
1139 signal whilst asleep waiting for the lock to become available. Failed
1140 locks do not imply any sort of barrier.
1141
1142Therefore, from (1), (2) and (4) an UNLOCK followed by an unconditional LOCK is
1143equivalent to a full barrier, but a LOCK followed by an UNLOCK is not.
1144
1145[!] Note: one of the consequence of LOCKs and UNLOCKs being only one-way
1146 barriers is that the effects instructions outside of a critical section may
1147 seep into the inside of the critical section.
1148
1149A LOCK followed by an UNLOCK may not be assumed to be full memory barrier
1150because it is possible for an access preceding the LOCK to happen after the
1151LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
1152two accesses can themselves then cross:
1153
1154 *A = a;
1155 LOCK
1156 UNLOCK
1157 *B = b;
1158
1159may occur as:
1160
1161 LOCK, STORE *B, STORE *A, UNLOCK
1162
1163Locks and semaphores may not provide any guarantee of ordering on UP compiled
1164systems, and so cannot be counted on in such a situation to actually achieve
1165anything at all - especially with respect to I/O accesses - unless combined
1166with interrupt disabling operations.
1167
1168See also the section on "Inter-CPU locking barrier effects".
1169
1170
1171As an example, consider the following:
1172
1173 *A = a;
1174 *B = b;
1175 LOCK
1176 *C = c;
1177 *D = d;
1178 UNLOCK
1179 *E = e;
1180 *F = f;
1181
1182The following sequence of events is acceptable:
1183
1184 LOCK, {*F,*A}, *E, {*C,*D}, *B, UNLOCK
1185
1186 [+] Note that {*F,*A} indicates a combined access.
1187
1188But none of the following are:
1189
1190 {*F,*A}, *B, LOCK, *C, *D, UNLOCK, *E
1191 *A, *B, *C, LOCK, *D, UNLOCK, *E, *F
1192 *A, *B, LOCK, *C, UNLOCK, *D, *E, *F
1193 *B, LOCK, *C, *D, UNLOCK, {*F,*A}, *E
1194
1195
1196
1197INTERRUPT DISABLING FUNCTIONS
1198-----------------------------
1199
1200Functions that disable interrupts (LOCK equivalent) and enable interrupts
1201(UNLOCK equivalent) will act as compiler barriers only. So if memory or I/O
1202barriers are required in such a situation, they must be provided from some
1203other means.
1204
1205
1206MISCELLANEOUS FUNCTIONS
1207-----------------------
1208
1209Other functions that imply barriers:
1210
1211 (*) schedule() and similar imply full memory barriers.
1212
1213
1214=================================
1215INTER-CPU LOCKING BARRIER EFFECTS
1216=================================
1217
1218On SMP systems locking primitives give a more substantial form of barrier: one
1219that does affect memory access ordering on other CPUs, within the context of
1220conflict on any particular lock.
1221
1222
1223LOCKS VS MEMORY ACCESSES
1224------------------------
1225
1226Consider the following: the system has a pair of spinlocks (M) and (Q), and
1227three CPUs; then should the following sequence of events occur:
1228
1229 CPU 1 CPU 2
1230 =============================== ===============================
1231 *A = a; *E = e;
1232 LOCK M LOCK Q
1233 *B = b; *F = f;
1234 *C = c; *G = g;
1235 UNLOCK M UNLOCK Q
1236 *D = d; *H = h;
1237
1238Then there is no guarantee as to what order CPU #3 will see the accesses to *A
1239through *H occur in, other than the constraints imposed by the separate locks
1240on the separate CPUs. It might, for example, see:
1241
1242 *E, LOCK M, LOCK Q, *G, *C, *F, *A, *B, UNLOCK Q, *D, *H, UNLOCK M
1243
1244But it won't see any of:
1245
1246 *B, *C or *D preceding LOCK M
1247 *A, *B or *C following UNLOCK M
1248 *F, *G or *H preceding LOCK Q
1249 *E, *F or *G following UNLOCK Q
1250
1251
1252However, if the following occurs:
1253
1254 CPU 1 CPU 2
1255 =============================== ===============================
1256 *A = a;
1257 LOCK M [1]
1258 *B = b;
1259 *C = c;
1260 UNLOCK M [1]
1261 *D = d; *E = e;
1262 LOCK M [2]
1263 *F = f;
1264 *G = g;
1265 UNLOCK M [2]
1266 *H = h;
1267
1268CPU #3 might see:
1269
1270 *E, LOCK M [1], *C, *B, *A, UNLOCK M [1],
1271 LOCK M [2], *H, *F, *G, UNLOCK M [2], *D
1272
1273But assuming CPU #1 gets the lock first, it won't see any of:
1274
1275 *B, *C, *D, *F, *G or *H preceding LOCK M [1]
1276 *A, *B or *C following UNLOCK M [1]
1277 *F, *G or *H preceding LOCK M [2]
1278 *A, *B, *C, *E, *F or *G following UNLOCK M [2]
1279
1280
1281LOCKS VS I/O ACCESSES
1282---------------------
1283
1284Under certain circumstances (especially involving NUMA), I/O accesses within
1285two spinlocked sections on two different CPUs may be seen as interleaved by the
1286PCI bridge, because the PCI bridge does not necessarily participate in the
1287cache-coherence protocol, and is therefore incapable of issuing the required
1288read memory barriers.
1289
1290For example:
1291
1292 CPU 1 CPU 2
1293 =============================== ===============================
1294 spin_lock(Q)
1295 writel(0, ADDR)
1296 writel(1, DATA);
1297 spin_unlock(Q);
1298 spin_lock(Q);
1299 writel(4, ADDR);
1300 writel(5, DATA);
1301 spin_unlock(Q);
1302
1303may be seen by the PCI bridge as follows:
1304
1305 STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
1306
1307which would probably cause the hardware to malfunction.
1308
1309
1310What is necessary here is to intervene with an mmiowb() before dropping the
1311spinlock, for example:
1312
1313 CPU 1 CPU 2
1314 =============================== ===============================
1315 spin_lock(Q)
1316 writel(0, ADDR)
1317 writel(1, DATA);
1318 mmiowb();
1319 spin_unlock(Q);
1320 spin_lock(Q);
1321 writel(4, ADDR);
1322 writel(5, DATA);
1323 mmiowb();
1324 spin_unlock(Q);
1325
1326this will ensure that the two stores issued on CPU #1 appear at the PCI bridge
1327before either of the stores issued on CPU #2.
1328
1329
1330Furthermore, following a store by a load to the same device obviates the need
1331for an mmiowb(), because the load forces the store to complete before the load
1332is performed:
1333
1334 CPU 1 CPU 2
1335 =============================== ===============================
1336 spin_lock(Q)
1337 writel(0, ADDR)
1338 a = readl(DATA);
1339 spin_unlock(Q);
1340 spin_lock(Q);
1341 writel(4, ADDR);
1342 b = readl(DATA);
1343 spin_unlock(Q);
1344
1345
1346See Documentation/DocBook/deviceiobook.tmpl for more information.
1347
1348
1349=================================
1350WHERE ARE MEMORY BARRIERS NEEDED?
1351=================================
1352
1353Under normal operation, memory operation reordering is generally not going to
1354be a problem as a single-threaded linear piece of code will still appear to
1355work correctly, even if it's in an SMP kernel. There are, however, three
1356circumstances in which reordering definitely _could_ be a problem:
1357
1358 (*) Interprocessor interaction.
1359
1360 (*) Atomic operations.
1361
1362 (*) Accessing devices (I/O).
1363
1364 (*) Interrupts.
1365
1366
1367INTERPROCESSOR INTERACTION
1368--------------------------
1369
1370When there's a system with more than one processor, more than one CPU in the
1371system may be working on the same data set at the same time. This can cause
1372synchronisation problems, and the usual way of dealing with them is to use
1373locks. Locks, however, are quite expensive, and so it may be preferable to
1374operate without the use of a lock if at all possible. In such a case
1375operations that affect both CPUs may have to be carefully ordered to prevent
1376a malfunction.
1377
1378Consider, for example, the R/W semaphore slow path. Here a waiting process is
1379queued on the semaphore, by virtue of it having a piece of its stack linked to
1380the semaphore's list of waiting processes:
1381
1382 struct rw_semaphore {
1383 ...
1384 spinlock_t lock;
1385 struct list_head waiters;
1386 };
1387
1388 struct rwsem_waiter {
1389 struct list_head list;
1390 struct task_struct *task;
1391 };
1392
1393To wake up a particular waiter, the up_read() or up_write() functions have to:
1394
1395 (1) read the next pointer from this waiter's record to know as to where the
1396 next waiter record is;
1397
1398 (4) read the pointer to the waiter's task structure;
1399
1400 (3) clear the task pointer to tell the waiter it has been given the semaphore;
1401
1402 (4) call wake_up_process() on the task; and
1403
1404 (5) release the reference held on the waiter's task struct.
1405
1406In otherwords, it has to perform this sequence of events:
1407
1408 LOAD waiter->list.next;
1409 LOAD waiter->task;
1410 STORE waiter->task;
1411 CALL wakeup
1412 RELEASE task
1413
1414and if any of these steps occur out of order, then the whole thing may
1415malfunction.
1416
1417Once it has queued itself and dropped the semaphore lock, the waiter does not
1418get the lock again; it instead just waits for its task pointer to be cleared
1419before proceeding. Since the record is on the waiter's stack, this means that
1420if the task pointer is cleared _before_ the next pointer in the list is read,
1421another CPU might start processing the waiter and might clobber the waiter's
1422stack before the up*() function has a chance to read the next pointer.
1423
1424Consider then what might happen to the above sequence of events:
1425
1426 CPU 1 CPU 2
1427 =============================== ===============================
1428 down_xxx()
1429 Queue waiter
1430 Sleep
1431 up_yyy()
1432 LOAD waiter->task;
1433 STORE waiter->task;
1434 Woken up by other event
1435 <preempt>
1436 Resume processing
1437 down_xxx() returns
1438 call foo()
1439 foo() clobbers *waiter
1440 </preempt>
1441 LOAD waiter->list.next;
1442 --- OOPS ---
1443
1444This could be dealt with using the semaphore lock, but then the down_xxx()
1445function has to needlessly get the spinlock again after being woken up.
1446
1447The way to deal with this is to insert a general SMP memory barrier:
1448
1449 LOAD waiter->list.next;
1450 LOAD waiter->task;
1451 smp_mb();
1452 STORE waiter->task;
1453 CALL wakeup
1454 RELEASE task
1455
1456In this case, the barrier makes a guarantee that all memory accesses before the
1457barrier will appear to happen before all the memory accesses after the barrier
1458with respect to the other CPUs on the system. It does _not_ guarantee that all
1459the memory accesses before the barrier will be complete by the time the barrier
1460instruction itself is complete.
1461
1462On a UP system - where this wouldn't be a problem - the smp_mb() is just a
1463compiler barrier, thus making sure the compiler emits the instructions in the
1464right order without actually intervening in the CPU. Since there there's only
1465one CPU, that CPU's dependency ordering logic will take care of everything
1466else.
1467
1468
1469ATOMIC OPERATIONS
1470-----------------
1471
1472Whilst they are technically interprocessor interaction considerations, atomic
1473operations are noted specially as some of them imply full memory barriers and
1474some don't, but they're very heavily relied on as a group throughout the
1475kernel.
1476
1477Any atomic operation that modifies some state in memory and returns information
1478about the state (old or new) implies an SMP-conditional general memory barrier
1479(smp_mb()) on each side of the actual operation. These include:
1480
1481 xchg();
1482 cmpxchg();
1483 atomic_cmpxchg();
1484 atomic_inc_return();
1485 atomic_dec_return();
1486 atomic_add_return();
1487 atomic_sub_return();
1488 atomic_inc_and_test();
1489 atomic_dec_and_test();
1490 atomic_sub_and_test();
1491 atomic_add_negative();
1492 atomic_add_unless();
1493 test_and_set_bit();
1494 test_and_clear_bit();
1495 test_and_change_bit();
1496
1497These are used for such things as implementing LOCK-class and UNLOCK-class
1498operations and adjusting reference counters towards object destruction, and as
1499such the implicit memory barrier effects are necessary.
1500
1501
1502The following operation are potential problems as they do _not_ imply memory
1503barriers, but might be used for implementing such things as UNLOCK-class
1504operations:
1505
1506 atomic_set();
1507 set_bit();
1508 clear_bit();
1509 change_bit();
1510
1511With these the appropriate explicit memory barrier should be used if necessary
1512(smp_mb__before_clear_bit() for instance).
1513
1514
1515The following also do _not_ imply memory barriers, and so may require explicit
1516memory barriers under some circumstances (smp_mb__before_atomic_dec() for
1517instance)):
1518
1519 atomic_add();
1520 atomic_sub();
1521 atomic_inc();
1522 atomic_dec();
1523
1524If they're used for statistics generation, then they probably don't need memory
1525barriers, unless there's a coupling between statistical data.
1526
1527If they're used for reference counting on an object to control its lifetime,
1528they probably don't need memory barriers because either the reference count
1529will be adjusted inside a locked section, or the caller will already hold
1530sufficient references to make the lock, and thus a memory barrier unnecessary.
1531
1532If they're used for constructing a lock of some description, then they probably
1533do need memory barriers as a lock primitive generally has to do things in a
1534specific order.
1535
1536
1537Basically, each usage case has to be carefully considered as to whether memory
1538barriers are needed or not.
1539
1540[!] Note that special memory barrier primitives are available for these
1541situations because on some CPUs the atomic instructions used imply full memory
1542barriers, and so barrier instructions are superfluous in conjunction with them,
1543and in such cases the special barrier primitives will be no-ops.
1544
1545See Documentation/atomic_ops.txt for more information.
1546
1547
1548ACCESSING DEVICES
1549-----------------
1550
1551Many devices can be memory mapped, and so appear to the CPU as if they're just
1552a set of memory locations. To control such a device, the driver usually has to
1553make the right memory accesses in exactly the right order.
1554
1555However, having a clever CPU or a clever compiler creates a potential problem
1556in that the carefully sequenced accesses in the driver code won't reach the
1557device in the requisite order if the CPU or the compiler thinks it is more
1558efficient to reorder, combine or merge accesses - something that would cause
1559the device to malfunction.
1560
1561Inside of the Linux kernel, I/O should be done through the appropriate accessor
1562routines - such as inb() or writel() - which know how to make such accesses
1563appropriately sequential. Whilst this, for the most part, renders the explicit
1564use of memory barriers unnecessary, there are a couple of situations where they
1565might be needed:
1566
1567 (1) On some systems, I/O stores are not strongly ordered across all CPUs, and
1568 so for _all_ general drivers locks should be used and mmiowb() must be
1569 issued prior to unlocking the critical section.
1570
1571 (2) If the accessor functions are used to refer to an I/O memory window with
1572 relaxed memory access properties, then _mandatory_ memory barriers are
1573 required to enforce ordering.
1574
1575See Documentation/DocBook/deviceiobook.tmpl for more information.
1576
1577
1578INTERRUPTS
1579----------
1580
1581A driver may be interrupted by its own interrupt service routine, and thus the
1582two parts of the driver may interfere with each other's attempts to control or
1583access the device.
1584
1585This may be alleviated - at least in part - by disabling local interrupts (a
1586form of locking), such that the critical operations are all contained within
1587the interrupt-disabled section in the driver. Whilst the driver's interrupt
1588routine is executing, the driver's core may not run on the same CPU, and its
1589interrupt is not permitted to happen again until the current interrupt has been
1590handled, thus the interrupt handler does not need to lock against that.
1591
1592However, consider a driver that was talking to an ethernet card that sports an
1593address register and a data register. If that driver's core talks to the card
1594under interrupt-disablement and then the driver's interrupt handler is invoked:
1595
1596 LOCAL IRQ DISABLE
1597 writew(ADDR, 3);
1598 writew(DATA, y);
1599 LOCAL IRQ ENABLE
1600 <interrupt>
1601 writew(ADDR, 4);
1602 q = readw(DATA);
1603 </interrupt>
1604
1605The store to the data register might happen after the second store to the
1606address register if ordering rules are sufficiently relaxed:
1607
1608 STORE *ADDR = 3, STORE *ADDR = 4, STORE *DATA = y, q = LOAD *DATA
1609
1610
1611If ordering rules are relaxed, it must be assumed that accesses done inside an
1612interrupt disabled section may leak outside of it and may interleave with
1613accesses performed in an interrupt - and vice versa - unless implicit or
1614explicit barriers are used.
1615
1616Normally this won't be a problem because the I/O accesses done inside such
1617sections will include synchronous load operations on strictly ordered I/O
1618registers that form implicit I/O barriers. If this isn't sufficient then an
1619mmiowb() may need to be used explicitly.
1620
1621
1622A similar situation may occur between an interrupt routine and two routines
1623running on separate CPUs that communicate with each other. If such a case is
1624likely, then interrupt-disabling locks should be used to guarantee ordering.
1625
1626
1627==========================
1628KERNEL I/O BARRIER EFFECTS
1629==========================
1630
1631When accessing I/O memory, drivers should use the appropriate accessor
1632functions:
1633
1634 (*) inX(), outX():
1635
1636 These are intended to talk to I/O space rather than memory space, but
1637 that's primarily a CPU-specific concept. The i386 and x86_64 processors do
1638 indeed have special I/O space access cycles and instructions, but many
1639 CPUs don't have such a concept.
1640
1641 The PCI bus, amongst others, defines an I/O space concept - which on such
1642 CPUs as i386 and x86_64 cpus readily maps to the CPU's concept of I/O
1643 space. However, it may also mapped as a virtual I/O space in the CPU's
1644 memory map, particularly on those CPUs that don't support alternate
1645 I/O spaces.
1646
1647 Accesses to this space may be fully synchronous (as on i386), but
1648 intermediary bridges (such as the PCI host bridge) may not fully honour
1649 that.
1650
1651 They are guaranteed to be fully ordered with respect to each other.
1652
1653 They are not guaranteed to be fully ordered with respect to other types of
1654 memory and I/O operation.
1655
1656 (*) readX(), writeX():
1657
1658 Whether these are guaranteed to be fully ordered and uncombined with
1659 respect to each other on the issuing CPU depends on the characteristics
1660 defined for the memory window through which they're accessing. On later
1661 i386 architecture machines, for example, this is controlled by way of the
1662 MTRR registers.
1663
1664 Ordinarily, these will be guaranteed to be fully ordered and uncombined,,
1665 provided they're not accessing a prefetchable device.
1666
1667 However, intermediary hardware (such as a PCI bridge) may indulge in
1668 deferral if it so wishes; to flush a store, a load from the same location
1669 is preferred[*], but a load from the same device or from configuration
1670 space should suffice for PCI.
1671
1672 [*] NOTE! attempting to load from the same location as was written to may
1673 cause a malfunction - consider the 16550 Rx/Tx serial registers for
1674 example.
1675
1676 Used with prefetchable I/O memory, an mmiowb() barrier may be required to
1677 force stores to be ordered.
1678
1679 Please refer to the PCI specification for more information on interactions
1680 between PCI transactions.
1681
1682 (*) readX_relaxed()
1683
1684 These are similar to readX(), but are not guaranteed to be ordered in any
1685 way. Be aware that there is no I/O read barrier available.
1686
1687 (*) ioreadX(), iowriteX()
1688
1689 These will perform as appropriate for the type of access they're actually
1690 doing, be it inX()/outX() or readX()/writeX().
1691
1692
1693========================================
1694ASSUMED MINIMUM EXECUTION ORDERING MODEL
1695========================================
1696
1697It has to be assumed that the conceptual CPU is weakly-ordered but that it will
1698maintain the appearance of program causality with respect to itself. Some CPUs
1699(such as i386 or x86_64) are more constrained than others (such as powerpc or
1700frv), and so the most relaxed case (namely DEC Alpha) must be assumed outside
1701of arch-specific code.
1702
1703This means that it must be considered that the CPU will execute its instruction
1704stream in any order it feels like - or even in parallel - provided that if an
1705instruction in the stream depends on the an earlier instruction, then that
1706earlier instruction must be sufficiently complete[*] before the later
1707instruction may proceed; in other words: provided that the appearance of
1708causality is maintained.
1709
1710 [*] Some instructions have more than one effect - such as changing the
1711 condition codes, changing registers or changing memory - and different
1712 instructions may depend on different effects.
1713
1714A CPU may also discard any instruction sequence that winds up having no
1715ultimate effect. For example, if two adjacent instructions both load an
1716immediate value into the same register, the first may be discarded.
1717
1718
1719Similarly, it has to be assumed that compiler might reorder the instruction
1720stream in any way it sees fit, again provided the appearance of causality is
1721maintained.
1722
1723
1724============================
1725THE EFFECTS OF THE CPU CACHE
1726============================
1727
1728The way cached memory operations are perceived across the system is affected to
1729a certain extent by the caches that lie between CPUs and memory, and by the
1730memory coherence system that maintains the consistency of state in the system.
1731
1732As far as the way a CPU interacts with another part of the system through the
1733caches goes, the memory system has to include the CPU's caches, and memory
1734barriers for the most part act at the interface between the CPU and its cache
1735(memory barriers logically act on the dotted line in the following diagram):
1736
1737 <--- CPU ---> : <----------- Memory ----------->
1738 :
1739 +--------+ +--------+ : +--------+ +-----------+
1740 | | | | : | | | | +--------+
1741 | CPU | | Memory | : | CPU | | | | |
1742 | Core |--->| Access |----->| Cache |<-->| | | |
1743 | | | Queue | : | | | |--->| Memory |
1744 | | | | : | | | | | |
1745 +--------+ +--------+ : +--------+ | | | |
1746 : | Cache | +--------+
1747 : | Coherency |
1748 : | Mechanism | +--------+
1749 +--------+ +--------+ : +--------+ | | | |
1750 | | | | : | | | | | |
1751 | CPU | | Memory | : | CPU | | |--->| Device |
1752 | Core |--->| Access |----->| Cache |<-->| | | |
1753 | | | Queue | : | | | | | |
1754 | | | | : | | | | +--------+
1755 +--------+ +--------+ : +--------+ +-----------+
1756 :
1757 :
1758
1759Although any particular load or store may not actually appear outside of the
1760CPU that issued it since it may have been satisfied within the CPU's own cache,
1761it will still appear as if the full memory access had taken place as far as the
1762other CPUs are concerned since the cache coherency mechanisms will migrate the
1763cacheline over to the accessing CPU and propagate the effects upon conflict.
1764
1765The CPU core may execute instructions in any order it deems fit, provided the
1766expected program causality appears to be maintained. Some of the instructions
1767generate load and store operations which then go into the queue of memory
1768accesses to be performed. The core may place these in the queue in any order
1769it wishes, and continue execution until it is forced to wait for an instruction
1770to complete.
1771
1772What memory barriers are concerned with is controlling the order in which
1773accesses cross from the CPU side of things to the memory side of things, and
1774the order in which the effects are perceived to happen by the other observers
1775in the system.
1776
1777[!] Memory barriers are _not_ needed within a given CPU, as CPUs always see
1778their own loads and stores as if they had happened in program order.
1779
1780[!] MMIO or other device accesses may bypass the cache system. This depends on
1781the properties of the memory window through which devices are accessed and/or
1782the use of any special device communication instructions the CPU may have.
1783
1784
1785CACHE COHERENCY
1786---------------
1787
1788Life isn't quite as simple as it may appear above, however: for while the
1789caches are expected to be coherent, there's no guarantee that that coherency
1790will be ordered. This means that whilst changes made on one CPU will
1791eventually become visible on all CPUs, there's no guarantee that they will
1792become apparent in the same order on those other CPUs.
1793
1794
1795Consider dealing with a system that has pair of CPUs (1 & 2), each of which has
1796a pair of parallel data caches (CPU 1 has A/B, and CPU 2 has C/D):
1797
1798 :
1799 : +--------+
1800 : +---------+ | |
1801 +--------+ : +--->| Cache A |<------->| |
1802 | | : | +---------+ | |
1803 | CPU 1 |<---+ | |
1804 | | : | +---------+ | |
1805 +--------+ : +--->| Cache B |<------->| |
1806 : +---------+ | |
1807 : | Memory |
1808 : +---------+ | System |
1809 +--------+ : +--->| Cache C |<------->| |
1810 | | : | +---------+ | |
1811 | CPU 2 |<---+ | |
1812 | | : | +---------+ | |
1813 +--------+ : +--->| Cache D |<------->| |
1814 : +---------+ | |
1815 : +--------+
1816 :
1817
1818Imagine the system has the following properties:
1819
1820 (*) an odd-numbered cache line may be in cache A, cache C or it may still be
1821 resident in memory;
1822
1823 (*) an even-numbered cache line may be in cache B, cache D or it may still be
1824 resident in memory;
1825
1826 (*) whilst the CPU core is interrogating one cache, the other cache may be
1827 making use of the bus to access the rest of the system - perhaps to
1828 displace a dirty cacheline or to do a speculative load;
1829
1830 (*) each cache has a queue of operations that need to be applied to that cache
1831 to maintain coherency with the rest of the system;
1832
1833 (*) the coherency queue is not flushed by normal loads to lines already
1834 present in the cache, even though the contents of the queue may
1835 potentially effect those loads.
1836
1837Imagine, then, that two writes are made on the first CPU, with a write barrier
1838between them to guarantee that they will appear to reach that CPU's caches in
1839the requisite order:
1840
1841 CPU 1 CPU 2 COMMENT
1842 =============== =============== =======================================
1843 u == 0, v == 1 and p == &u, q == &u
1844 v = 2;
1845 smp_wmb(); Make sure change to v visible before
1846 change to p
1847 <A:modify v=2> v is now in cache A exclusively
1848 p = &v;
1849 <B:modify p=&v> p is now in cache B exclusively
1850
1851The write memory barrier forces the other CPUs in the system to perceive that
1852the local CPU's caches have apparently been updated in the correct order. But
1853now imagine that the second CPU that wants to read those values:
1854
1855 CPU 1 CPU 2 COMMENT
1856 =============== =============== =======================================
1857 ...
1858 q = p;
1859 x = *q;
1860
1861The above pair of reads may then fail to happen in expected order, as the
1862cacheline holding p may get updated in one of the second CPU's caches whilst
1863the update to the cacheline holding v is delayed in the other of the second
1864CPU's caches by some other cache event:
1865
1866 CPU 1 CPU 2 COMMENT
1867 =============== =============== =======================================
1868 u == 0, v == 1 and p == &u, q == &u
1869 v = 2;
1870 smp_wmb();
1871 <A:modify v=2> <C:busy>
1872 <C:queue v=2>
1873 p = &v; q = p;
1874 <D:request p>
1875 <B:modify p=&v> <D:commit p=&v>
1876 <D:read p>
1877 x = *q;
1878 <C:read *q> Reads from v before v updated in cache
1879 <C:unbusy>
1880 <C:commit v=2>
1881
1882Basically, whilst both cachelines will be updated on CPU 2 eventually, there's
1883no guarantee that, without intervention, the order of update will be the same
1884as that committed on CPU 1.
1885
1886
1887To intervene, we need to interpolate a data dependency barrier or a read
1888barrier between the loads. This will force the cache to commit its coherency
1889queue before processing any further requests:
1890
1891 CPU 1 CPU 2 COMMENT
1892 =============== =============== =======================================
1893 u == 0, v == 1 and p == &u, q == &u
1894 v = 2;
1895 smp_wmb();
1896 <A:modify v=2> <C:busy>
1897 <C:queue v=2>
1898 p = &b; q = p;
1899 <D:request p>
1900 <B:modify p=&v> <D:commit p=&v>
1901 <D:read p>
1902 smp_read_barrier_depends()
1903 <C:unbusy>
1904 <C:commit v=2>
1905 x = *q;
1906 <C:read *q> Reads from v after v updated in cache
1907
1908
1909This sort of problem can be encountered on DEC Alpha processors as they have a
1910split cache that improves performance by making better use of the data bus.
1911Whilst most CPUs do imply a data dependency barrier on the read when a memory
1912access depends on a read, not all do, so it may not be relied on.
1913
1914Other CPUs may also have split caches, but must coordinate between the various
1915cachelets for normal memory accesss. The semantics of the Alpha removes the
1916need for coordination in absence of memory barriers.
1917
1918
1919CACHE COHERENCY VS DMA
1920----------------------
1921
1922Not all systems maintain cache coherency with respect to devices doing DMA. In
1923such cases, a device attempting DMA may obtain stale data from RAM because
1924dirty cache lines may be resident in the caches of various CPUs, and may not
1925have been written back to RAM yet. To deal with this, the appropriate part of
1926the kernel must flush the overlapping bits of cache on each CPU (and maybe
1927invalidate them as well).
1928
1929In addition, the data DMA'd to RAM by a device may be overwritten by dirty
1930cache lines being written back to RAM from a CPU's cache after the device has
1931installed its own data, or cache lines simply present in a CPUs cache may
1932simply obscure the fact that RAM has been updated, until at such time as the
1933cacheline is discarded from the CPU's cache and reloaded. To deal with this,
1934the appropriate part of the kernel must invalidate the overlapping bits of the
1935cache on each CPU.
1936
1937See Documentation/cachetlb.txt for more information on cache management.
1938
1939
1940CACHE COHERENCY VS MMIO
1941-----------------------
1942
1943Memory mapped I/O usually takes place through memory locations that are part of
1944a window in the CPU's memory space that have different properties assigned than
1945the usual RAM directed window.
1946
1947Amongst these properties is usually the fact that such accesses bypass the
1948caching entirely and go directly to the device buses. This means MMIO accesses
1949may, in effect, overtake accesses to cached memory that were emitted earlier.
1950A memory barrier isn't sufficient in such a case, but rather the cache must be
1951flushed between the cached memory write and the MMIO access if the two are in
1952any way dependent.
1953
1954
1955=========================
1956THE THINGS CPUS GET UP TO
1957=========================
1958
1959A programmer might take it for granted that the CPU will perform memory
1960operations in exactly the order specified, so that if a CPU is, for example,
1961given the following piece of code to execute:
1962
1963 a = *A;
1964 *B = b;
1965 c = *C;
1966 d = *D;
1967 *E = e;
1968
1969They would then expect that the CPU will complete the memory operation for each
1970instruction before moving on to the next one, leading to a definite sequence of
1971operations as seen by external observers in the system:
1972
1973 LOAD *A, STORE *B, LOAD *C, LOAD *D, STORE *E.
1974
1975
1976Reality is, of course, much messier. With many CPUs and compilers, the above
1977assumption doesn't hold because:
1978
1979 (*) loads are more likely to need to be completed immediately to permit
1980 execution progress, whereas stores can often be deferred without a
1981 problem;
1982
1983 (*) loads may be done speculatively, and the result discarded should it prove
1984 to have been unnecessary;
1985
1986 (*) loads may be done speculatively, leading to the result having being
1987 fetched at the wrong time in the expected sequence of events;
1988
1989 (*) the order of the memory accesses may be rearranged to promote better use
1990 of the CPU buses and caches;
1991
1992 (*) loads and stores may be combined to improve performance when talking to
1993 memory or I/O hardware that can do batched accesses of adjacent locations,
1994 thus cutting down on transaction setup costs (memory and PCI devices may
1995 both be able to do this); and
1996
1997 (*) the CPU's data cache may affect the ordering, and whilst cache-coherency
1998 mechanisms may alleviate this - once the store has actually hit the cache
1999 - there's no guarantee that the coherency management will be propagated in
2000 order to other CPUs.
2001
2002So what another CPU, say, might actually observe from the above piece of code
2003is:
2004
2005 LOAD *A, ..., LOAD {*C,*D}, STORE *E, STORE *B
2006
2007 (Where "LOAD {*C,*D}" is a combined load)
2008
2009
2010However, it is guaranteed that a CPU will be self-consistent: it will see its
2011_own_ accesses appear to be correctly ordered, without the need for a memory
2012barrier. For instance with the following code:
2013
2014 U = *A;
2015 *A = V;
2016 *A = W;
2017 X = *A;
2018 *A = Y;
2019 Z = *A;
2020
2021and assuming no intervention by an external influence, it can be assumed that
2022the final result will appear to be:
2023
2024 U == the original value of *A
2025 X == W
2026 Z == Y
2027 *A == Y
2028
2029The code above may cause the CPU to generate the full sequence of memory
2030accesses:
2031
2032 U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
2033
2034in that order, but, without intervention, the sequence may have almost any
2035combination of elements combined or discarded, provided the program's view of
2036the world remains consistent.
2037
2038The compiler may also combine, discard or defer elements of the sequence before
2039the CPU even sees them.
2040
2041For instance:
2042
2043 *A = V;
2044 *A = W;
2045
2046may be reduced to:
2047
2048 *A = W;
2049
2050since, without a write barrier, it can be assumed that the effect of the
2051storage of V to *A is lost. Similarly:
2052
2053 *A = Y;
2054 Z = *A;
2055
2056may, without a memory barrier, be reduced to:
2057
2058 *A = Y;
2059 Z = Y;
2060
2061and the LOAD operation never appear outside of the CPU.
2062
2063
2064AND THEN THERE'S THE ALPHA
2065--------------------------
2066
2067The DEC Alpha CPU is one of the most relaxed CPUs there is. Not only that,
2068some versions of the Alpha CPU have a split data cache, permitting them to have
2069two semantically related cache lines updating at separate times. This is where
2070the data dependency barrier really becomes necessary as this synchronises both
2071caches with the memory coherence system, thus making it seem like pointer
2072changes vs new data occur in the right order.
2073
2074The Alpha defines the Linux's kernel's memory barrier model.
2075
2076See the subsection on "Cache Coherency" above.
2077
2078
2079==========
2080REFERENCES
2081==========
2082
2083Alpha AXP Architecture Reference Manual, Second Edition (Sites & Witek,
2084Digital Press)
2085 Chapter 5.2: Physical Address Space Characteristics
2086 Chapter 5.4: Caches and Write Buffers
2087 Chapter 5.5: Data Sharing
2088 Chapter 5.6: Read/Write Ordering
2089
2090AMD64 Architecture Programmer's Manual Volume 2: System Programming
2091 Chapter 7.1: Memory-Access Ordering
2092 Chapter 7.4: Buffering and Combining Memory Writes
2093
2094IA-32 Intel Architecture Software Developer's Manual, Volume 3:
2095System Programming Guide
2096 Chapter 7.1: Locked Atomic Operations
2097 Chapter 7.2: Memory Ordering
2098 Chapter 7.4: Serializing Instructions
2099
2100The SPARC Architecture Manual, Version 9
2101 Chapter 8: Memory Models
2102 Appendix D: Formal Specification of the Memory Models
2103 Appendix J: Programming with the Memory Models
2104
2105UltraSPARC Programmer Reference Manual
2106 Chapter 5: Memory Accesses and Cacheability
2107 Chapter 15: Sparc-V9 Memory Models
2108
2109UltraSPARC III Cu User's Manual
2110 Chapter 9: Memory Models
2111
2112UltraSPARC IIIi Processor User's Manual
2113 Chapter 8: Memory Models
2114
2115UltraSPARC Architecture 2005
2116 Chapter 9: Memory
2117 Appendix D: Formal Specifications of the Memory Models
2118
2119UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
2120 Chapter 8: Memory Models
2121 Appendix F: Caches and Cache Coherency
2122
2123Solaris Internals, Core Kernel Architecture, p63-68:
2124 Chapter 3.3: Hardware Considerations for Locks and
2125 Synchronization
2126
2127Unix Systems for Modern Architectures, Symmetric Multiprocessing and Caching
2128for Kernel Programmers:
2129 Chapter 13: Other Memory Models
2130
2131Intel Itanium Architecture Software Developer's Manual: Volume 1:
2132 Section 2.6: Speculation
2133 Section 4.4: Memory Access
diff --git a/Documentation/mtrr.txt b/Documentation/mtrr.txt
index b78af1c32996..c39ac395970e 100644
--- a/Documentation/mtrr.txt
+++ b/Documentation/mtrr.txt
@@ -138,19 +138,29 @@ Reading MTRRs from a C program using ioctl()'s:
138 138
139*/ 139*/
140#include <stdio.h> 140#include <stdio.h>
141#include <stdlib.h>
141#include <string.h> 142#include <string.h>
142#include <sys/types.h> 143#include <sys/types.h>
143#include <sys/stat.h> 144#include <sys/stat.h>
144#include <fcntl.h> 145#include <fcntl.h>
145#include <sys/ioctl.h> 146#include <sys/ioctl.h>
146#include <errno.h> 147#include <errno.h>
147#define MTRR_NEED_STRINGS
148#include <asm/mtrr.h> 148#include <asm/mtrr.h>
149 149
150#define TRUE 1 150#define TRUE 1
151#define FALSE 0 151#define FALSE 0
152#define ERRSTRING strerror (errno) 152#define ERRSTRING strerror (errno)
153 153
154static char *mtrr_strings[MTRR_NUM_TYPES] =
155{
156 "uncachable", /* 0 */
157 "write-combining", /* 1 */
158 "?", /* 2 */
159 "?", /* 3 */
160 "write-through", /* 4 */
161 "write-protect", /* 5 */
162 "write-back", /* 6 */
163};
154 164
155int main () 165int main ()
156{ 166{
@@ -232,13 +242,22 @@ Creating MTRRs from a C programme using ioctl()'s:
232#include <fcntl.h> 242#include <fcntl.h>
233#include <sys/ioctl.h> 243#include <sys/ioctl.h>
234#include <errno.h> 244#include <errno.h>
235#define MTRR_NEED_STRINGS
236#include <asm/mtrr.h> 245#include <asm/mtrr.h>
237 246
238#define TRUE 1 247#define TRUE 1
239#define FALSE 0 248#define FALSE 0
240#define ERRSTRING strerror (errno) 249#define ERRSTRING strerror (errno)
241 250
251static char *mtrr_strings[MTRR_NUM_TYPES] =
252{
253 "uncachable", /* 0 */
254 "write-combining", /* 1 */
255 "?", /* 2 */
256 "?", /* 3 */
257 "write-through", /* 4 */
258 "write-protect", /* 5 */
259 "write-back", /* 6 */
260};
242 261
243int main (int argc, char **argv) 262int main (int argc, char **argv)
244{ 263{
diff --git a/Documentation/networking/TODO b/Documentation/networking/TODO
deleted file mode 100644
index 66d36ff14bae..000000000000
--- a/Documentation/networking/TODO
+++ /dev/null
@@ -1,18 +0,0 @@
1To-do items for network drivers
2-------------------------------
3
4* Move ethernet crc routine to generic code
5
6* (for 2.5) Integrate Jamal Hadi Salim's netdev Rx polling API change
7
8* Audit all net drivers to make sure magic packet / wake-on-lan /
9 similar features are disabled in the driver by default.
10
11* Audit all net drivers to make sure the module always prints out a
12 version string when loaded as a module, but only prints a version
13 string when built into the kernel if a device is detected.
14
15* Add ETHTOOL_GDRVINFO ioctl support to all ethernet drivers.
16
17* dmfe PCI DMA is totally wrong and only works on x86
18
diff --git a/Documentation/networking/bcm43xx.txt b/Documentation/networking/bcm43xx.txt
new file mode 100644
index 000000000000..28541d2bee1e
--- /dev/null
+++ b/Documentation/networking/bcm43xx.txt
@@ -0,0 +1,36 @@
1
2 BCM43xx Linux Driver Project
3 ============================
4
5About this software
6-------------------
7
8The goal of this project is to develop a linux driver for Broadcom
9BCM43xx chips, based on the specification at
10http://bcm-specs.sipsolutions.net/
11
12The project page is http://bcm43xx.berlios.de/
13
14
15Requirements
16------------
17
181) Linux Kernel 2.6.16 or later
19 http://www.kernel.org/
20
21 You may want to configure your kernel with:
22
23 CONFIG_DEBUG_FS (optional):
24 -> Kernel hacking
25 -> Debug Filesystem
26
272) SoftMAC IEEE 802.11 Networking Stack extension and patched ieee80211
28 modules:
29 http://softmac.sipsolutions.net/
30
313) Firmware Files
32
33 Please try fwcutter. Fwcutter can extract the firmware from various
34 binary driver files. It supports driver files from Windows, MacOS and
35 Linux. You can get fwcutter from http://bcm43xx.berlios.de/.
36 Also, fwcutter comes with a README file for further instructions.
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 545447ac503a..a12059886755 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -87,7 +87,7 @@
87 * would fail and generate an error message in the system log. 87 * would fail and generate an error message in the system log.
88 * - For opt_c: slave should not be set to the master's setting 88 * - For opt_c: slave should not be set to the master's setting
89 * while it is running. It was already set during enslave. To 89 * while it is running. It was already set during enslave. To
90 * simplify things, it is now handeled separately. 90 * simplify things, it is now handled separately.
91 * 91 *
92 * - 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> 92 * - 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
93 * - Code cleanup and style changes 93 * - Code cleanup and style changes
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt
new file mode 100644
index 000000000000..4a21d9bb836b
--- /dev/null
+++ b/Documentation/networking/operstates.txt
@@ -0,0 +1,161 @@
1
21. Introduction
3
4Linux distinguishes between administrative and operational state of an
5interface. Admininstrative state is the result of "ip link set dev
6<dev> up or down" and reflects whether the administrator wants to use
7the device for traffic.
8
9However, an interface is not usable just because the admin enabled it
10- ethernet requires to be plugged into the switch and, depending on
11a site's networking policy and configuration, an 802.1X authentication
12to be performed before user data can be transferred. Operational state
13shows the ability of an interface to transmit this user data.
14
15Thanks to 802.1X, userspace must be granted the possibility to
16influence operational state. To accommodate this, operational state is
17split into two parts: Two flags that can be set by the driver only, and
18a RFC2863 compatible state that is derived from these flags, a policy,
19and changeable from userspace under certain rules.
20
21
222. Querying from userspace
23
24Both admin and operational state can be queried via the netlink
25operation RTM_GETLINK. It is also possible to subscribe to RTMGRP_LINK
26to be notified of updates. This is important for setting from userspace.
27
28These values contain interface state:
29
30ifinfomsg::if_flags & IFF_UP:
31 Interface is admin up
32ifinfomsg::if_flags & IFF_RUNNING:
33 Interface is in RFC2863 operational state UP or UNKNOWN. This is for
34 backward compatibility, routing daemons, dhcp clients can use this
35 flag to determine whether they should use the interface.
36ifinfomsg::if_flags & IFF_LOWER_UP:
37 Driver has signaled netif_carrier_on()
38ifinfomsg::if_flags & IFF_DORMANT:
39 Driver has signaled netif_dormant_on()
40
41These interface flags can also be queried without netlink using the
42SIOCGIFFLAGS ioctl.
43
44TLV IFLA_OPERSTATE
45
46contains RFC2863 state of the interface in numeric representation:
47
48IF_OPER_UNKNOWN (0):
49 Interface is in unknown state, neither driver nor userspace has set
50 operational state. Interface must be considered for user data as
51 setting operational state has not been implemented in every driver.
52IF_OPER_NOTPRESENT (1):
53 Unused in current kernel (notpresent interfaces normally disappear),
54 just a numerical placeholder.
55IF_OPER_DOWN (2):
56 Interface is unable to transfer data on L1, f.e. ethernet is not
57 plugged or interface is ADMIN down.
58IF_OPER_LOWERLAYERDOWN (3):
59 Interfaces stacked on an interface that is IF_OPER_DOWN show this
60 state (f.e. VLAN).
61IF_OPER_TESTING (4):
62 Unused in current kernel.
63IF_OPER_DORMANT (5):
64 Interface is L1 up, but waiting for an external event, f.e. for a
65 protocol to establish. (802.1X)
66IF_OPER_UP (6):
67 Interface is operational up and can be used.
68
69This TLV can also be queried via sysfs.
70
71TLV IFLA_LINKMODE
72
73contains link policy. This is needed for userspace interaction
74described below.
75
76This TLV can also be queried via sysfs.
77
78
793. Kernel driver API
80
81Kernel drivers have access to two flags that map to IFF_LOWER_UP and
82IFF_DORMANT. These flags can be set from everywhere, even from
83interrupts. It is guaranteed that only the driver has write access,
84however, if different layers of the driver manipulate the same flag,
85the driver has to provide the synchronisation needed.
86
87__LINK_STATE_NOCARRIER, maps to !IFF_LOWER_UP:
88
89The driver uses netif_carrier_on() to clear and netif_carrier_off() to
90set this flag. On netif_carrier_off(), the scheduler stops sending
91packets. The name 'carrier' and the inversion are historical, think of
92it as lower layer.
93
94netif_carrier_ok() can be used to query that bit.
95
96__LINK_STATE_DORMANT, maps to IFF_DORMANT:
97
98Set by the driver to express that the device cannot yet be used
99because some driver controlled protocol establishment has to
100complete. Corresponding functions are netif_dormant_on() to set the
101flag, netif_dormant_off() to clear it and netif_dormant() to query.
102
103On device allocation, networking core sets the flags equivalent to
104netif_carrier_ok() and !netif_dormant().
105
106
107Whenever the driver CHANGES one of these flags, a workqueue event is
108scheduled to translate the flag combination to IFLA_OPERSTATE as
109follows:
110
111!netif_carrier_ok():
112 IF_OPER_LOWERLAYERDOWN if the interface is stacked, IF_OPER_DOWN
113 otherwise. Kernel can recognise stacked interfaces because their
114 ifindex != iflink.
115
116netif_carrier_ok() && netif_dormant():
117 IF_OPER_DORMANT
118
119netif_carrier_ok() && !netif_dormant():
120 IF_OPER_UP if userspace interaction is disabled. Otherwise
121 IF_OPER_DORMANT with the possibility for userspace to initiate the
122 IF_OPER_UP transition afterwards.
123
124
1254. Setting from userspace
126
127Applications have to use the netlink interface to influence the
128RFC2863 operational state of an interface. Setting IFLA_LINKMODE to 1
129via RTM_SETLINK instructs the kernel that an interface should go to
130IF_OPER_DORMANT instead of IF_OPER_UP when the combination
131netif_carrier_ok() && !netif_dormant() is set by the
132driver. Afterwards, the userspace application can set IFLA_OPERSTATE
133to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set
134netif_carrier_off() or netif_dormant_on(). Changes made by userspace
135are multicasted on the netlink group RTMGRP_LINK.
136
137So basically a 802.1X supplicant interacts with the kernel like this:
138
139-subscribe to RTMGRP_LINK
140-set IFLA_LINKMODE to 1 via RTM_SETLINK
141-query RTM_GETLINK once to get initial state
142-if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until
143 netlink multicast signals this state
144-do 802.1X, eventually abort if flags go down again
145-send RTM_SETLINK to set operstate to IF_OPER_UP if authentication
146 succeeds, IF_OPER_DORMANT otherwise
147-see how operstate and IFF_RUNNING is echoed via netlink multicast
148-set interface back to IF_OPER_DORMANT if 802.1X reauthentication
149 fails
150-restart if kernel changes IFF_LOWER_UP or IFF_DORMANT flag
151
152if supplicant goes down, bring back IFLA_LINKMODE to 0 and
153IFLA_OPERSTATE to a sane value.
154
155A routing daemon or dhcp client just needs to care for IFF_RUNNING or
156waiting for operstate to go IF_OPER_UP/IF_OPER_UNKNOWN before
157considering the interface / querying a DHCP address.
158
159
160For technical questions and/or comments please e-mail to Stefan Rompf
161(stefan at loplof.de).
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 4fc8e9874320..aaf99d5f0dad 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -254,7 +254,7 @@ and, the number of frames be
254 254
255 <block number> * <block size> / <frame size> 255 <block number> * <block size> / <frame size>
256 256
257Suposse the following parameters, which apply for 2.6 kernel and an 257Suppose the following parameters, which apply for 2.6 kernel and an
258i386 architecture: 258i386 architecture:
259 259
260 <size-max> = 131072 bytes 260 <size-max> = 131072 bytes
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
index ec3d109d787a..76750fb9151a 100644
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@@ -138,7 +138,7 @@ This means that you have to read/write IP packets when you are using tun and
138ethernet frames when using tap. 138ethernet frames when using tap.
139 139
1405. What is the difference between BPF and TUN/TAP driver? 1405. What is the difference between BPF and TUN/TAP driver?
141BFP is an advanced packet filter. It can be attached to existing 141BPF is an advanced packet filter. It can be attached to existing
142network interface. It does not provide a virtual network interface. 142network interface. It does not provide a virtual network interface.
143A TUN/TAP driver does provide a virtual network interface and it is possible 143A TUN/TAP driver does provide a virtual network interface and it is possible
144to attach BPF to this interface. 144to attach BPF to this interface.
diff --git a/Documentation/networking/xfrm_sync.txt b/Documentation/networking/xfrm_sync.txt
new file mode 100644
index 000000000000..8be626f7c0b8
--- /dev/null
+++ b/Documentation/networking/xfrm_sync.txt
@@ -0,0 +1,166 @@
1
2The sync patches work is based on initial patches from
3Krisztian <hidden@balabit.hu> and others and additional patches
4from Jamal <hadi@cyberus.ca>.
5
6The end goal for syncing is to be able to insert attributes + generate
7events so that the an SA can be safely moved from one machine to another
8for HA purposes.
9The idea is to synchronize the SA so that the takeover machine can do
10the processing of the SA as accurate as possible if it has access to it.
11
12We already have the ability to generate SA add/del/upd events.
13These patches add ability to sync and have accurate lifetime byte (to
14ensure proper decay of SAs) and replay counters to avoid replay attacks
15with as minimal loss at failover time.
16This way a backup stays as closely uptodate as an active member.
17
18Because the above items change for every packet the SA receives,
19it is possible for a lot of the events to be generated.
20For this reason, we also add a nagle-like algorithm to restrict
21the events. i.e we are going to set thresholds to say "let me
22know if the replay sequence threshold is reached or 10 secs have passed"
23These thresholds are set system-wide via sysctls or can be updated
24per SA.
25
26The identified items that need to be synchronized are:
27- the lifetime byte counter
28note that: lifetime time limit is not important if you assume the failover
29machine is known ahead of time since the decay of the time countdown
30is not driven by packet arrival.
31- the replay sequence for both inbound and outbound
32
331) Message Structure
34----------------------
35
36nlmsghdr:aevent_id:optional-TLVs.
37
38The netlink message types are:
39
40XFRM_MSG_NEWAE and XFRM_MSG_GETAE.
41
42A XFRM_MSG_GETAE does not have TLVs.
43A XFRM_MSG_NEWAE will have at least two TLVs (as is
44discussed further below).
45
46aevent_id structure looks like:
47
48 struct xfrm_aevent_id {
49 struct xfrm_usersa_id sa_id;
50 __u32 flags;
51 };
52
53xfrm_usersa_id in this message layout identifies the SA.
54
55flags are used to indicate different things. The possible
56flags are:
57 XFRM_AE_RTHR=1, /* replay threshold*/
58 XFRM_AE_RVAL=2, /* replay value */
59 XFRM_AE_LVAL=4, /* lifetime value */
60 XFRM_AE_ETHR=8, /* expiry timer threshold */
61 XFRM_AE_CR=16, /* Event cause is replay update */
62 XFRM_AE_CE=32, /* Event cause is timer expiry */
63 XFRM_AE_CU=64, /* Event cause is policy update */
64
65How these flags are used is dependent on the direction of the
66message (kernel<->user) as well the cause (config, query or event).
67This is described below in the different messages.
68
69The pid will be set appropriately in netlink to recognize direction
70(0 to the kernel and pid = processid that created the event
71when going from kernel to user space)
72
73A program needs to subscribe to multicast group XFRMNLGRP_AEVENTS
74to get notified of these events.
75
762) TLVS reflect the different parameters:
77-----------------------------------------
78
79a) byte value (XFRMA_LTIME_VAL)
80This TLV carries the running/current counter for byte lifetime since
81last event.
82
83b)replay value (XFRMA_REPLAY_VAL)
84This TLV carries the running/current counter for replay sequence since
85last event.
86
87c)replay threshold (XFRMA_REPLAY_THRESH)
88This TLV carries the threshold being used by the kernel to trigger events
89when the replay sequence is exceeded.
90
91d) expiry timer (XFRMA_ETIMER_THRESH)
92This is a timer value in milliseconds which is used as the nagle
93value to rate limit the events.
94
953) Default configurations for the parameters:
96----------------------------------------------
97
98By default these events should be turned off unless there is
99at least one listener registered to listen to the multicast
100group XFRMNLGRP_AEVENTS.
101
102Programs installing SAs will need to specify the two thresholds, however,
103in order to not change existing applications such as racoon
104we also provide default threshold values for these different parameters
105in case they are not specified.
106
107the two sysctls/proc entries are:
108a) /proc/sys/net/core/sysctl_xfrm_aevent_etime
109used to provide default values for the XFRMA_ETIMER_THRESH in incremental
110units of time of 100ms. The default is 10 (1 second)
111
112b) /proc/sys/net/core/sysctl_xfrm_aevent_rseqth
113used to provide default values for XFRMA_REPLAY_THRESH parameter
114in incremental packet count. The default is two packets.
115
1164) Message types
117----------------
118
119a) XFRM_MSG_GETAE issued by user-->kernel.
120XFRM_MSG_GETAE does not carry any TLVs.
121The response is a XFRM_MSG_NEWAE which is formatted based on what
122XFRM_MSG_GETAE queried for.
123The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
124*if XFRM_AE_RTHR flag is set, then XFRMA_REPLAY_THRESH is also retrieved
125*if XFRM_AE_ETHR flag is set, then XFRMA_ETIMER_THRESH is also retrieved
126
127b) XFRM_MSG_NEWAE is issued by either user space to configure
128or kernel to announce events or respond to a XFRM_MSG_GETAE.
129
130i) user --> kernel to configure a specific SA.
131any of the values or threshold parameters can be updated by passing the
132appropriate TLV.
133A response is issued back to the sender in user space to indicate success
134or failure.
135In the case of success, additionally an event with
136XFRM_MSG_NEWAE is also issued to any listeners as described in iii).
137
138ii) kernel->user direction as a response to XFRM_MSG_GETAE
139The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
140The threshold TLVs will be included if explicitly requested in
141the XFRM_MSG_GETAE message.
142
143iii) kernel->user to report as event if someone sets any values or
144thresholds for an SA using XFRM_MSG_NEWAE (as described in #i above).
145In such a case XFRM_AE_CU flag is set to inform the user that
146the change happened as a result of an update.
147The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
148
149iv) kernel->user to report event when replay threshold or a timeout
150is exceeded.
151In such a case either XFRM_AE_CR (replay exceeded) or XFRM_AE_CE (timeout
152happened) is set to inform the user what happened.
153Note the two flags are mutually exclusive.
154The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
155
156Exceptions to threshold settings
157--------------------------------
158
159If you have an SA that is getting hit by traffic in bursts such that
160there is a period where the timer threshold expires with no packets
161seen, then an odd behavior is seen as follows:
162The first packet arrival after a timer expiry will trigger a timeout
163aevent; i.e we dont wait for a timeout period or a packet threshold
164to be reached. This is done for simplicity and efficiency reasons.
165
166-JHS
diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index 711210b38f5f..66bbbf1d1ef6 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -259,7 +259,17 @@ on the bus need to be capable of doing it, so this is something which needs
259to be handled by platform and generic code, not individual drivers. 259to be handled by platform and generic code, not individual drivers.
260 260
261 261
2628. Obsolete functions 2628. Vendor and device identifications
263~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
264For the future, let's avoid adding device ids to include/linux/pci_ids.h.
265
266PCI_VENDOR_ID_xxx for vendors, and a hex constant for device ids.
267
268Rationale: PCI_VENDOR_ID_xxx constants are re-used, but device ids are not.
269 Further, device ids are arbitrary hex numbers, normally used only in a
270 single location, the pci_device_id table.
271
2729. Obsolete functions
263~~~~~~~~~~~~~~~~~~~~~ 273~~~~~~~~~~~~~~~~~~~~~
264There are several functions which you might come across when trying to 274There are several functions which you might come across when trying to
265port an old driver to the new PCI interface. They are no longer present 275port an old driver to the new PCI interface. They are no longer present
diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 97420f08c786..4739c5c3face 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,5 +1,11 @@
1This file details changes in 2.6 which affect PCMCIA card driver authors: 1This file details changes in 2.6 which affect PCMCIA card driver authors:
2 2
3* New release helper (as of 2.6.17)
4 Instead of calling pcmcia_release_{configuration,io,irq,win}, all that's
5 necessary now is calling pcmcia_disable_device. As there is no valid
6 reason left to call pcmcia_release_io and pcmcia_release_irq, the
7 exports for them were removed.
8
3* Unify detach and REMOVAL event code, as well as attach and INSERTION 9* Unify detach and REMOVAL event code, as well as attach and INSERTION
4 code (as of 2.6.16) 10 code (as of 2.6.16)
5 void (*remove) (struct pcmcia_device *dev); 11 void (*remove) (struct pcmcia_device *dev);
diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt
index af0f6eabfa1c..9529c9c9fd59 100644
--- a/Documentation/pnp.txt
+++ b/Documentation/pnp.txt
@@ -115,6 +115,9 @@ pnp_unregister_protocol
115pnp_register_driver 115pnp_register_driver
116- adds a PnP driver to the Plug and Play Layer 116- adds a PnP driver to the Plug and Play Layer
117- this includes driver model integration 117- this includes driver model integration
118- returns zero for success or a negative error number for failure; count
119 calls to the .add() method if you need to know how many devices bind to
120 the driver
118 121
119pnp_unregister_driver 122pnp_unregister_driver
120- removes a PnP driver from the Plug and Play Layer 123- removes a PnP driver from the Plug and Play Layer
diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt
index d18a57d1a531..43a889f8f08d 100644
--- a/Documentation/power/video.txt
+++ b/Documentation/power/video.txt
@@ -140,7 +140,7 @@ IBM TP T41p s3_bios (2), switch to X after resume
140IBM TP T42 s3_bios (2) 140IBM TP T42 s3_bios (2)
141IBM ThinkPad T42p (2373-GTG) s3_bios (2) 141IBM ThinkPad T42p (2373-GTG) s3_bios (2)
142IBM TP X20 ??? (*) 142IBM TP X20 ??? (*)
143IBM TP X30 s3_bios (2) 143IBM TP X30 s3_bios, s3_mode (4)
144IBM TP X31 / Type 2672-XXH none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight. 144IBM TP X31 / Type 2672-XXH none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
145IBM TP X32 none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results? 145IBM TP X32 none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results?
146IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4) 146IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4)
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index ee551c6ea235..217e51768b87 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -719,6 +719,11 @@ address which can extend beyond that limit.
719 - model : this is your board name/model 719 - model : this is your board name/model
720 - #address-cells : address representation for "root" devices 720 - #address-cells : address representation for "root" devices
721 - #size-cells: the size representation for "root" devices 721 - #size-cells: the size representation for "root" devices
722 - device_type : This property shouldn't be necessary. However, if
723 you decide to create a device_type for your root node, make sure it
724 is _not_ "chrp" unless your platform is a pSeries or PAPR compliant
725 one for 64-bit, or a CHRP-type machine for 32-bit as this will
726 matched by the kernel this way.
722 727
723 Additionally, some recommended properties are: 728 Additionally, some recommended properties are:
724 729
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
new file mode 100644
index 000000000000..8529a17ffaa1
--- /dev/null
+++ b/Documentation/robust-futex-ABI.txt
@@ -0,0 +1,182 @@
1Started by Paul Jackson <pj@sgi.com>
2
3The robust futex ABI
4--------------------
5
6Robust_futexes provide a mechanism that is used in addition to normal
7futexes, for kernel assist of cleanup of held locks on task exit.
8
9The interesting data as to what futexes a thread is holding is kept on a
10linked list in user space, where it can be updated efficiently as locks
11are taken and dropped, without kernel intervention. The only additional
12kernel intervention required for robust_futexes above and beyond what is
13required for futexes is:
14
15 1) a one time call, per thread, to tell the kernel where its list of
16 held robust_futexes begins, and
17 2) internal kernel code at exit, to handle any listed locks held
18 by the exiting thread.
19
20The existing normal futexes already provide a "Fast Userspace Locking"
21mechanism, which handles uncontested locking without needing a system
22call, and handles contested locking by maintaining a list of waiting
23threads in the kernel. Options on the sys_futex(2) system call support
24waiting on a particular futex, and waking up the next waiter on a
25particular futex.
26
27For robust_futexes to work, the user code (typically in a library such
28as glibc linked with the application) has to manage and place the
29necessary list elements exactly as the kernel expects them. If it fails
30to do so, then improperly listed locks will not be cleaned up on exit,
31probably causing deadlock or other such failure of the other threads
32waiting on the same locks.
33
34A thread that anticipates possibly using robust_futexes should first
35issue the system call:
36
37 asmlinkage long
38 sys_set_robust_list(struct robust_list_head __user *head, size_t len);
39
40The pointer 'head' points to a structure in the threads address space
41consisting of three words. Each word is 32 bits on 32 bit arch's, or 64
42bits on 64 bit arch's, and local byte order. Each thread should have
43its own thread private 'head'.
44
45If a thread is running in 32 bit compatibility mode on a 64 native arch
46kernel, then it can actually have two such structures - one using 32 bit
47words for 32 bit compatibility mode, and one using 64 bit words for 64
48bit native mode. The kernel, if it is a 64 bit kernel supporting 32 bit
49compatibility mode, will attempt to process both lists on each task
50exit, if the corresponding sys_set_robust_list() call has been made to
51setup that list.
52
53 The first word in the memory structure at 'head' contains a
54 pointer to a single linked list of 'lock entries', one per lock,
55 as described below. If the list is empty, the pointer will point
56 to itself, 'head'. The last 'lock entry' points back to the 'head'.
57
58 The second word, called 'offset', specifies the offset from the
59 address of the associated 'lock entry', plus or minus, of what will
60 be called the 'lock word', from that 'lock entry'. The 'lock word'
61 is always a 32 bit word, unlike the other words above. The 'lock
62 word' holds 3 flag bits in the upper 3 bits, and the thread id (TID)
63 of the thread holding the lock in the bottom 29 bits. See further
64 below for a description of the flag bits.
65
66 The third word, called 'list_op_pending', contains transient copy of
67 the address of the 'lock entry', during list insertion and removal,
68 and is needed to correctly resolve races should a thread exit while
69 in the middle of a locking or unlocking operation.
70
71Each 'lock entry' on the single linked list starting at 'head' consists
72of just a single word, pointing to the next 'lock entry', or back to
73'head' if there are no more entries. In addition, nearby to each 'lock
74entry', at an offset from the 'lock entry' specified by the 'offset'
75word, is one 'lock word'.
76
77The 'lock word' is always 32 bits, and is intended to be the same 32 bit
78lock variable used by the futex mechanism, in conjunction with
79robust_futexes. The kernel will only be able to wakeup the next thread
80waiting for a lock on a threads exit if that next thread used the futex
81mechanism to register the address of that 'lock word' with the kernel.
82
83For each futex lock currently held by a thread, if it wants this
84robust_futex support for exit cleanup of that lock, it should have one
85'lock entry' on this list, with its associated 'lock word' at the
86specified 'offset'. Should a thread die while holding any such locks,
87the kernel will walk this list, mark any such locks with a bit
88indicating their holder died, and wakeup the next thread waiting for
89that lock using the futex mechanism.
90
91When a thread has invoked the above system call to indicate it
92anticipates using robust_futexes, the kernel stores the passed in 'head'
93pointer for that task. The task may retrieve that value later on by
94using the system call:
95
96 asmlinkage long
97 sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
98 size_t __user *len_ptr);
99
100It is anticipated that threads will use robust_futexes embedded in
101larger, user level locking structures, one per lock. The kernel
102robust_futex mechanism doesn't care what else is in that structure, so
103long as the 'offset' to the 'lock word' is the same for all
104robust_futexes used by that thread. The thread should link those locks
105it currently holds using the 'lock entry' pointers. It may also have
106other links between the locks, such as the reverse side of a double
107linked list, but that doesn't matter to the kernel.
108
109By keeping its locks linked this way, on a list starting with a 'head'
110pointer known to the kernel, the kernel can provide to a thread the
111essential service available for robust_futexes, which is to help clean
112up locks held at the time of (a perhaps unexpectedly) exit.
113
114Actual locking and unlocking, during normal operations, is handled
115entirely by user level code in the contending threads, and by the
116existing futex mechanism to wait for, and wakeup, locks. The kernels
117only essential involvement in robust_futexes is to remember where the
118list 'head' is, and to walk the list on thread exit, handling locks
119still held by the departing thread, as described below.
120
121There may exist thousands of futex lock structures in a threads shared
122memory, on various data structures, at a given point in time. Only those
123lock structures for locks currently held by that thread should be on
124that thread's robust_futex linked lock list a given time.
125
126A given futex lock structure in a user shared memory region may be held
127at different times by any of the threads with access to that region. The
128thread currently holding such a lock, if any, is marked with the threads
129TID in the lower 29 bits of the 'lock word'.
130
131When adding or removing a lock from its list of held locks, in order for
132the kernel to correctly handle lock cleanup regardless of when the task
133exits (perhaps it gets an unexpected signal 9 in the middle of
134manipulating this list), the user code must observe the following
135protocol on 'lock entry' insertion and removal:
136
137On insertion:
138 1) set the 'list_op_pending' word to the address of the 'lock word'
139 to be inserted,
140 2) acquire the futex lock,
141 3) add the lock entry, with its thread id (TID) in the bottom 29 bits
142 of the 'lock word', to the linked list starting at 'head', and
143 4) clear the 'list_op_pending' word.
144
145On removal:
146 1) set the 'list_op_pending' word to the address of the 'lock word'
147 to be removed,
148 2) remove the lock entry for this lock from the 'head' list,
149 2) release the futex lock, and
150 2) clear the 'lock_op_pending' word.
151
152On exit, the kernel will consider the address stored in
153'list_op_pending' and the address of each 'lock word' found by walking
154the list starting at 'head'. For each such address, if the bottom 29
155bits of the 'lock word' at offset 'offset' from that address equals the
156exiting threads TID, then the kernel will do two things:
157
158 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
159 wakeup on that address, which will waken the next thread that has
160 used to the futex mechanism to wait on that address, and
161 2) atomically set bit 30 (0x40000000) in the 'lock word'.
162
163In the above, bit 31 was set by futex waiters on that lock to indicate
164they were waiting, and bit 30 is set by the kernel to indicate that the
165lock owner died holding the lock.
166
167The kernel exit code will silently stop scanning the list further if at
168any point:
169
170 1) the 'head' pointer or an subsequent linked list pointer
171 is not a valid address of a user space word
172 2) the calculated location of the 'lock word' (address plus
173 'offset') is not the valud address of a 32 bit user space
174 word
175 3) if the list contains more than 1 million (subject to
176 future kernel configuration changes) elements.
177
178When the kernel sees a list entry whose 'lock word' doesn't have the
179current threads TID in the lower 29 bits, it does nothing with that
180entry, and goes on to the next entry.
181
182Bit 29 (0x20000000) of the 'lock word' is reserved for future use.
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
new file mode 100644
index 000000000000..df82d75245a0
--- /dev/null
+++ b/Documentation/robust-futexes.txt
@@ -0,0 +1,218 @@
1Started by: Ingo Molnar <mingo@redhat.com>
2
3Background
4----------
5
6what are robust futexes? To answer that, we first need to understand
7what futexes are: normal futexes are special types of locks that in the
8noncontended case can be acquired/released from userspace without having
9to enter the kernel.
10
11A futex is in essence a user-space address, e.g. a 32-bit lock variable
12field. If userspace notices contention (the lock is already owned and
13someone else wants to grab it too) then the lock is marked with a value
14that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
15syscall is used to wait for the other guy to release it. The kernel
16creates a 'futex queue' internally, so that it can later on match up the
17waiter with the waker - without them having to know about each other.
18When the owner thread releases the futex, it notices (via the variable
19value) that there were waiter(s) pending, and does the
20sys_futex(FUTEX_WAKE) syscall to wake them up. Once all waiters have
21taken and released the lock, the futex is again back to 'uncontended'
22state, and there's no in-kernel state associated with it. The kernel
23completely forgets that there ever was a futex at that address. This
24method makes futexes very lightweight and scalable.
25
26"Robustness" is about dealing with crashes while holding a lock: if a
27process exits prematurely while holding a pthread_mutex_t lock that is
28also shared with some other process (e.g. yum segfaults while holding a
29pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
30to be notified that the last owner of the lock exited in some irregular
31way.
32
33To solve such types of problems, "robust mutex" userspace APIs were
34created: pthread_mutex_lock() returns an error value if the owner exits
35prematurely - and the new owner can decide whether the data protected by
36the lock can be recovered safely.
37
38There is a big conceptual problem with futex based mutexes though: it is
39the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
40the kernel cannot help with the cleanup: if there is no 'futex queue'
41(and in most cases there is none, futexes being fast lightweight locks)
42then the kernel has no information to clean up after the held lock!
43Userspace has no chance to clean up after the lock either - userspace is
44the one that crashes, so it has no opportunity to clean up. Catch-22.
45
46In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
47is needed to release that futex based lock. This is one of the leading
48bugreports against yum.
49
50To solve this problem, the traditional approach was to extend the vma
51(virtual memory area descriptor) concept to have a notion of 'pending
52robust futexes attached to this area'. This approach requires 3 new
53syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
54FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
55they have a robust_head set. This approach has two fundamental problems
56left:
57
58 - it has quite complex locking and race scenarios. The vma-based
59 approach had been pending for years, but they are still not completely
60 reliable.
61
62 - they have to scan _every_ vma at sys_exit() time, per thread!
63
64The second disadvantage is a real killer: pthread_exit() takes around 1
65microsecond on Linux, but with thousands (or tens of thousands) of vmas
66every pthread_exit() takes a millisecond or more, also totally
67destroying the CPU's L1 and L2 caches!
68
69This is very much noticeable even for normal process sys_exit_group()
70calls: the kernel has to do the vma scanning unconditionally! (this is
71because the kernel has no knowledge about how many robust futexes there
72are to be cleaned up, because a robust futex might have been registered
73in another task, and the futex variable might have been simply mmap()-ed
74into this process's address space).
75
76This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
77normal kernels can turn it off, but worse than that: the overhead makes
78robust futexes impractical for any type of generic Linux distribution.
79
80So something had to be done.
81
82New approach to robust futexes
83------------------------------
84
85At the heart of this new approach there is a per-thread private list of
86robust locks that userspace is holding (maintained by glibc) - which
87userspace list is registered with the kernel via a new syscall [this
88registration happens at most once per thread lifetime]. At do_exit()
89time, the kernel checks this user-space list: are there any robust futex
90locks to be cleaned up?
91
92In the common case, at do_exit() time, there is no list registered, so
93the cost of robust futexes is just a simple current->robust_list != NULL
94comparison. If the thread has registered a list, then normally the list
95is empty. If the thread/process crashed or terminated in some incorrect
96way then the list might be non-empty: in this case the kernel carefully
97walks the list [not trusting it], and marks all locks that are owned by
98this thread with the FUTEX_OWNER_DEAD bit, and wakes up one waiter (if
99any).
100
101The list is guaranteed to be private and per-thread at do_exit() time,
102so it can be accessed by the kernel in a lockless way.
103
104There is one race possible though: since adding to and removing from the
105list is done after the futex is acquired by glibc, there is a few
106instructions window for the thread (or process) to die there, leaving
107the futex hung. To protect against this possibility, userspace (glibc)
108also maintains a simple per-thread 'list_op_pending' field, to allow the
109kernel to clean up if the thread dies after acquiring the lock, but just
110before it could have added itself to the list. Glibc sets this
111list_op_pending field before it tries to acquire the futex, and clears
112it after the list-add (or list-remove) has finished.
113
114That's all that is needed - all the rest of robust-futex cleanup is done
115in userspace [just like with the previous patches].
116
117Ulrich Drepper has implemented the necessary glibc support for this new
118mechanism, which fully enables robust mutexes.
119
120Key differences of this userspace-list based approach, compared to the
121vma based method:
122
123 - it's much, much faster: at thread exit time, there's no need to loop
124 over every vma (!), which the VM-based method has to do. Only a very
125 simple 'is the list empty' op is done.
126
127 - no VM changes are needed - 'struct address_space' is left alone.
128
129 - no registration of individual locks is needed: robust mutexes dont
130 need any extra per-lock syscalls. Robust mutexes thus become a very
131 lightweight primitive - so they dont force the application designer
132 to do a hard choice between performance and robustness - robust
133 mutexes are just as fast.
134
135 - no per-lock kernel allocation happens.
136
137 - no resource limits are needed.
138
139 - no kernel-space recovery call (FUTEX_RECOVER) is needed.
140
141 - the implementation and the locking is "obvious", and there are no
142 interactions with the VM.
143
144Performance
145-----------
146
147I have benchmarked the time needed for the kernel to process a list of 1
148million (!) held locks, using the new method [on a 2GHz CPU]:
149
150 - with FUTEX_WAIT set [contended mutex]: 130 msecs
151 - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
152
153I have also measured an approach where glibc does the lock notification
154[which it currently does for !pshared robust mutexes], and that took 256
155msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
156userspace had to do.
157
158(1 million held locks are unheard of - we expect at most a handful of
159locks to be held at a time. Nevertheless it's nice to know that this
160approach scales nicely.)
161
162Implementation details
163----------------------
164
165The patch adds two new syscalls: one to register the userspace list, and
166one to query the registered list pointer:
167
168 asmlinkage long
169 sys_set_robust_list(struct robust_list_head __user *head,
170 size_t len);
171
172 asmlinkage long
173 sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
174 size_t __user *len_ptr);
175
176List registration is very fast: the pointer is simply stored in
177current->robust_list. [Note that in the future, if robust futexes become
178widespread, we could extend sys_clone() to register a robust-list head
179for new threads, without the need of another syscall.]
180
181So there is virtually zero overhead for tasks not using robust futexes,
182and even for robust futex users, there is only one extra syscall per
183thread lifetime, and the cleanup operation, if it happens, is fast and
184straightforward. The kernel doesnt have any internal distinction between
185robust and normal futexes.
186
187If a futex is found to be held at exit time, the kernel sets the
188following bit of the futex word:
189
190 #define FUTEX_OWNER_DIED 0x40000000
191
192and wakes up the next futex waiter (if any). User-space does the rest of
193the cleanup.
194
195Otherwise, robust futexes are acquired by glibc by putting the TID into
196the futex field atomically. Waiters set the FUTEX_WAITERS bit:
197
198 #define FUTEX_WAITERS 0x80000000
199
200and the remaining bits are for the TID.
201
202Testing, architecture support
203-----------------------------
204
205i've tested the new syscalls on x86 and x86_64, and have made sure the
206parsing of the userspace list is robust [ ;-) ] even if the list is
207deliberately corrupted.
208
209i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
210tested the new glibc code (on x86_64 and i386), and it works for his
211robust-mutex testcases.
212
213All other architectures should build just fine too - but they wont have
214the new syscalls yet.
215
216Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
217inline function before writing up the syscalls (that function returns
218-ENOSYS right now).
diff --git a/Documentation/rpc-cache.txt b/Documentation/rpc-cache.txt
index 2b5d4434fa5a..5f757c8cf979 100644
--- a/Documentation/rpc-cache.txt
+++ b/Documentation/rpc-cache.txt
@@ -1,4 +1,4 @@
1This document gives a brief introduction to the caching 1 This document gives a brief introduction to the caching
2mechanisms in the sunrpc layer that is used, in particular, 2mechanisms in the sunrpc layer that is used, in particular,
3for NFS authentication. 3for NFS authentication.
4 4
@@ -25,25 +25,17 @@ The common code handles such things as:
25 - supporting 'NEGATIVE' as well as positive entries 25 - supporting 'NEGATIVE' as well as positive entries
26 - allowing an EXPIRED time on cache items, and removing 26 - allowing an EXPIRED time on cache items, and removing
27 items after they expire, and are no longe in-use. 27 items after they expire, and are no longe in-use.
28
29 Future code extensions are expect to handle
30 - making requests to user-space to fill in cache entries 28 - making requests to user-space to fill in cache entries
31 - allowing user-space to directly set entries in the cache 29 - allowing user-space to directly set entries in the cache
32 - delaying RPC requests that depend on as-yet incomplete 30 - delaying RPC requests that depend on as-yet incomplete
33 cache entries, and replaying those requests when the cache entry 31 cache entries, and replaying those requests when the cache entry
34 is complete. 32 is complete.
35 - maintaining last-access times on cache entries 33 - clean out old entries as they expire.
36 - clean out old entries when the caches become full
37
38The code for performing a cache lookup is also common, but in the form
39of a template. i.e. a #define.
40Each cache defines a lookup function by using the DefineCacheLookup
41macro, or the simpler DefineSimpleCacheLookup macro
42 34
43Creating a Cache 35Creating a Cache
44---------------- 36----------------
45 37
461/ A cache needs a datum to cache. This is in the form of a 381/ A cache needs a datum to store. This is in the form of a
47 structure definition that must contain a 39 structure definition that must contain a
48 struct cache_head 40 struct cache_head
49 as an element, usually the first. 41 as an element, usually the first.
@@ -51,35 +43,69 @@ Creating a Cache
51 Each cache element is reference counted and contains 43 Each cache element is reference counted and contains
52 expiry and update times for use in cache management. 44 expiry and update times for use in cache management.
532/ A cache needs a "cache_detail" structure that 452/ A cache needs a "cache_detail" structure that
54 describes the cache. This stores the hash table, and some 46 describes the cache. This stores the hash table, some
55 parameters for cache management. 47 parameters for cache management, and some operations detailing how
563/ A cache needs a lookup function. This is created using 48 to work with particular cache items.
57 the DefineCacheLookup macro. This lookup function is used both 49 The operations requires are:
58 to find entries and to update entries. The normal mode for 50 struct cache_head *alloc(void)
59 updating an entry is to replace the old entry with a new 51 This simply allocates appropriate memory and returns
60 entry. However it is possible to allow update-in-place 52 a pointer to the cache_detail embedded within the
61 for those caches where it makes sense (no atomicity issues 53 structure
62 or indirect reference counting issue) 54 void cache_put(struct kref *)
634/ A cache needs to be registered using cache_register(). This 55 This is called when the last reference to an item is
64 includes in on a list of caches that will be regularly 56 is dropped. The pointer passed is to the 'ref' field
65 cleaned to discard old data. For this to work, some 57 in the cache_head. cache_put should release any
66 thread must periodically call cache_clean 58 references create by 'cache_init' and, if CACHE_VALID
67 59 is set, any references created by cache_update.
60 It should then release the memory allocated by
61 'alloc'.
62 int match(struct cache_head *orig, struct cache_head *new)
63 test if the keys in the two structures match. Return
64 1 if they do, 0 if they don't.
65 void init(struct cache_head *orig, struct cache_head *new)
66 Set the 'key' fields in 'new' from 'orig'. This may
67 include taking references to shared objects.
68 void update(struct cache_head *orig, struct cache_head *new)
69 Set the 'content' fileds in 'new' from 'orig'.
70 int cache_show(struct seq_file *m, struct cache_detail *cd,
71 struct cache_head *h)
72 Optional. Used to provide a /proc file that lists the
73 contents of a cache. This should show one item,
74 usually on just one line.
75 int cache_request(struct cache_detail *cd, struct cache_head *h,
76 char **bpp, int *blen)
77 Format a request to be send to user-space for an item
78 to be instantiated. *bpp is a buffer of size *blen.
79 bpp should be moved forward over the encoded message,
80 and *blen should be reduced to show how much free
81 space remains. Return 0 on success or <0 if not
82 enough room or other problem.
83 int cache_parse(struct cache_detail *cd, char *buf, int len)
84 A message from user space has arrived to fill out a
85 cache entry. It is in 'buf' of length 'len'.
86 cache_parse should parse this, find the item in the
87 cache with sunrpc_cache_lookup, and update the item
88 with sunrpc_cache_update.
89
90
913/ A cache needs to be registered using cache_register(). This
92 includes it on a list of caches that will be regularly
93 cleaned to discard old data.
94
68Using a cache 95Using a cache
69------------- 96-------------
70 97
71To find a value in a cache, call the lookup function passing it a the 98To find a value in a cache, call sunrpc_cache_lookup passing a pointer
72datum which contains key, and possibly content, and a flag saying 99to the cache_head in a sample item with the 'key' fields filled in.
73whether to update the cache with new data from the datum. Depending 100This will be passed to ->match to identify the target entry. If no
74on how the cache lookup function was defined, it may take an extra 101entry is found, a new entry will be create, added to the cache, and
75argument to identify the particular cache in question. 102marked as not containing valid data.
76 103
77Except in cases of kmalloc failure, the lookup function 104The item returned is typically passed to cache_check which will check
78will return a new datum which will store the key and 105if the data is valid, and may initiate an up-call to get fresh data.
79may contain valid content, or may not. 106cache_check will return -ENOENT in the entry is negative or if an up
80This datum is typically passed to cache_check which determines the 107call is needed but not possible, -EAGAIN if an upcall is pending,
81validity of the datum and may later initiate an upcall to fill 108or 0 if the data is valid;
82in the data.
83 109
84cache_check can be passed a "struct cache_req *". This structure is 110cache_check can be passed a "struct cache_req *". This structure is
85typically embedded in the actual request and can be used to create a 111typically embedded in the actual request and can be used to create a
@@ -90,6 +116,13 @@ item does become valid, the deferred copy of the request will be
90revisited (->revisit). It is expected that this method will 116revisited (->revisit). It is expected that this method will
91reschedule the request for processing. 117reschedule the request for processing.
92 118
119The value returned by sunrpc_cache_lookup can also be passed to
120sunrpc_cache_update to set the content for the item. A second item is
121passed which should hold the content. If the item found by _lookup
122has valid data, then it is discarded and a new item is created. This
123saves any user of an item from worrying about content changing while
124it is being inspected. If the item found by _lookup does not contain
125valid data, then the content is copied across and CACHE_VALID is set.
93 126
94Populating a cache 127Populating a cache
95------------------ 128------------------
@@ -114,8 +147,8 @@ should be create or updated to have the given content, and the
114expiry time should be set on that item. 147expiry time should be set on that item.
115 148
116Reading from a channel is a bit more interesting. When a cache 149Reading from a channel is a bit more interesting. When a cache
117lookup fail, or when it suceeds but finds an entry that may soon 150lookup fails, or when it succeeds but finds an entry that may soon
118expiry, a request is lodged for that cache item to be updated by 151expire, a request is lodged for that cache item to be updated by
119user-space. These requests appear in the channel file. 152user-space. These requests appear in the channel file.
120 153
121Successive reads will return successive requests. 154Successive reads will return successive requests.
@@ -130,7 +163,7 @@ Thus a user-space helper is likely to:
130 write a response 163 write a response
131 loop. 164 loop.
132 165
133If it dies and needs to be restarted, any requests that have not be 166If it dies and needs to be restarted, any requests that have not been
134answered will still appear in the file and will be read by the new 167answered will still appear in the file and will be read by the new
135instance of the helper. 168instance of the helper.
136 169
@@ -142,10 +175,9 @@ Each cache should also define a "cache_request" method which
142takes a cache item and encodes a request into the buffer 175takes a cache item and encodes a request into the buffer
143provided. 176provided.
144 177
145
146Note: If a cache has no active readers on the channel, and has had not 178Note: If a cache has no active readers on the channel, and has had not
147active readers for more than 60 seconds, further requests will not be 179active readers for more than 60 seconds, further requests will not be
148added to the channel but instead all looks that do not find a valid 180added to the channel but instead all lookups that do not find a valid
149entry will fail. This is partly for backward compatibility: The 181entry will fail. This is partly for backward compatibility: The
150previous nfs exports table was deemed to be authoritative and a 182previous nfs exports table was deemed to be authoritative and a
151failed lookup meant a definite 'no'. 183failed lookup meant a definite 'no'.
@@ -154,18 +186,17 @@ request/response format
154----------------------- 186-----------------------
155 187
156While each cache is free to use it's own format for requests 188While each cache is free to use it's own format for requests
157and responses over channel, the following is recommended are 189and responses over channel, the following is recommended as
158appropriate and support routines are available to help: 190appropriate and support routines are available to help:
159Each request or response record should be printable ASCII 191Each request or response record should be printable ASCII
160with precisely one newline character which should be at the end. 192with precisely one newline character which should be at the end.
161Fields within the record should be separated by spaces, normally one. 193Fields within the record should be separated by spaces, normally one.
162If spaces, newlines, or nul characters are needed in a field they 194If spaces, newlines, or nul characters are needed in a field they
163much be quotes. two mechanisms are available: 195much be quoted. two mechanisms are available:
1641/ If a field begins '\x' then it must contain an even number of 1961/ If a field begins '\x' then it must contain an even number of
165 hex digits, and pairs of these digits provide the bytes in the 197 hex digits, and pairs of these digits provide the bytes in the
166 field. 198 field.
1672/ otherwise a \ in the field must be followed by 3 octal digits 1992/ otherwise a \ in the field must be followed by 3 octal digits
168 which give the code for a byte. Other characters are treated 200 which give the code for a byte. Other characters are treated
169 as them selves. At the very least, space, newlines nul, and 201 as them selves. At the very least, space, newline, nul, and
170 '\' must be quoted in this way. 202 '\' must be quoted in this way.
171
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index 09f6300eda4b..c173806c91fa 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -1,3 +1,28 @@
1Release Date : Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com>
2Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
3Older Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
4
51. Fixed a bug in megaraid_reset_handler().
6 Customer reported "Unable to handle kernel NULL pointer dereference
7 at virtual address 00000000" when system goes to reset condition
8 for some reason. It happened randomly.
9 Root Cause: in the megaraid_reset_handler(), there is possibility not
10 returning pending packets in the pend_list if there are multiple
11 pending packets.
12 Fix: Made the change in the driver so that it will return all packets
13 in the pend_list.
14
152. Added change request.
16 As found in the following URL, rmb() only didn't help the
17 problem. I had to increase the loop counter to 0xFFFFFF. (6 F's)
18 http://marc.theaimsgroup.com/?l=linux-scsi&m=110971060502497&w=2
19
20 I attached a patch for your reference, too.
21 Could you check and get this fix in your driver?
22
23 Best Regards,
24 Jun'ichi Nomura
25
1Release Date : Fri Nov 11 12:27:22 EST 2005 - Seokmann Ju <sju@lsil.com> 26Release Date : Fri Nov 11 12:27:22 EST 2005 - Seokmann Ju <sju@lsil.com>
2Current Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module) 27Current Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
3Older Version : 2.20.4.6 (scsi module), 2.20.2.6 (cmm module) 28Older Version : 2.20.4.6 (scsi module), 2.20.2.6 (cmm module)
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 331afd791cbb..ce767b90bb0d 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -19,9 +19,9 @@ TABLE OF CONTENTS
19 [2-1-1] Overview 19 [2-1-1] Overview
20 [2-1-2] Flow of scmds through EH 20 [2-1-2] Flow of scmds through EH
21 [2-1-3] Flow of control 21 [2-1-3] Flow of control
22 [2-2] EH through hostt->eh_strategy_handler() 22 [2-2] EH through transportt->eh_strategy_handler()
23 [2-2-1] Pre hostt->eh_strategy_handler() SCSI midlayer conditions 23 [2-2-1] Pre transportt->eh_strategy_handler() SCSI midlayer conditions
24 [2-2-2] Post hostt->eh_strategy_handler() SCSI midlayer conditions 24 [2-2-2] Post transportt->eh_strategy_handler() SCSI midlayer conditions
25 [2-2-3] Things to consider 25 [2-2-3] Things to consider
26 26
27 27
@@ -413,9 +413,9 @@ scmd->allowed.
413 layer of failure of the scmds. 413 layer of failure of the scmds.
414 414
415 415
416[2-2] EH through hostt->eh_strategy_handler() 416[2-2] EH through transportt->eh_strategy_handler()
417 417
418 hostt->eh_strategy_handler() is invoked in the place of 418 transportt->eh_strategy_handler() is invoked in the place of
419scsi_unjam_host() and it is responsible for whole recovery process. 419scsi_unjam_host() and it is responsible for whole recovery process.
420On completion, the handler should have made lower layers forget about 420On completion, the handler should have made lower layers forget about
421all failed scmds and either ready for new commands or offline. Also, 421all failed scmds and either ready for new commands or offline. Also,
@@ -424,7 +424,7 @@ SCSI midlayer. IOW, of the steps described in [2-1-2], all steps
424except for #1 must be implemented by eh_strategy_handler(). 424except for #1 must be implemented by eh_strategy_handler().
425 425
426 426
427[2-2-1] Pre hostt->eh_strategy_handler() SCSI midlayer conditions 427[2-2-1] Pre transportt->eh_strategy_handler() SCSI midlayer conditions
428 428
429 The following conditions are true on entry to the handler. 429 The following conditions are true on entry to the handler.
430 430
@@ -437,7 +437,7 @@ except for #1 must be implemented by eh_strategy_handler().
437 - shost->host_failed == shost->host_busy 437 - shost->host_failed == shost->host_busy
438 438
439 439
440[2-2-2] Post hostt->eh_strategy_handler() SCSI midlayer conditions 440[2-2-2] Post transportt->eh_strategy_handler() SCSI midlayer conditions
441 441
442 The following conditions must be true on exit from the handler. 442 The following conditions must be true on exit from the handler.
443 443
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 8bbae3e1abdf..75a535a975c3 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -804,7 +804,6 @@ Summary:
804 eh_bus_reset_handler - issue SCSI bus reset 804 eh_bus_reset_handler - issue SCSI bus reset
805 eh_device_reset_handler - issue SCSI device reset 805 eh_device_reset_handler - issue SCSI device reset
806 eh_host_reset_handler - reset host (host bus adapter) 806 eh_host_reset_handler - reset host (host bus adapter)
807 eh_strategy_handler - driver supplied alternate to scsi_unjam_host()
808 info - supply information about given host 807 info - supply information about given host
809 ioctl - driver can respond to ioctls 808 ioctl - driver can respond to ioctls
810 proc_info - supports /proc/scsi/{driver_name}/{host_no} 809 proc_info - supports /proc/scsi/{driver_name}/{host_no}
@@ -970,24 +969,6 @@ Details:
970 969
971 970
972/** 971/**
973 * eh_strategy_handler - driver supplied alternate to scsi_unjam_host()
974 * @shp: host on which error has occurred
975 *
976 * Returns TRUE if host unjammed, else FALSE.
977 *
978 * Locks: none
979 *
980 * Calling context: kernel thread
981 *
982 * Notes: Invoked from scsi_eh thread. LLD supplied alternate to
983 * scsi_unjam_host() found in scsi_error.c
984 *
985 * Optionally defined in: LLD
986 **/
987 int eh_strategy_handler(struct Scsi_Host * shp)
988
989
990/**
991 * info - supply information about given host: driver name plus data 972 * info - supply information about given host: driver name plus data
992 * to distinguish given host 973 * to distinguish given host
993 * @shp: host to supply information about 974 * @shp: host to supply information about
diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index 42ef9970bc86..88ad615dd338 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -3,14 +3,11 @@
3 -------------------- 3 --------------------
4 4
5 5
6 $Id: driver,v 1.10 2002/07/22 15:27:30 rmk Exp $
7
8
9This document is meant as a brief overview of some aspects of the new serial 6This document is meant as a brief overview of some aspects of the new serial
10driver. It is not complete, any questions you have should be directed to 7driver. It is not complete, any questions you have should be directed to
11<rmk@arm.linux.org.uk> 8<rmk@arm.linux.org.uk>
12 9
13The reference implementation is contained within serial_amba.c. 10The reference implementation is contained within amba_pl011.c.
14 11
15 12
16 13
@@ -31,6 +28,11 @@ The serial core provides a few helper functions. This includes identifing
31the correct port structure (via uart_get_console) and decoding command line 28the correct port structure (via uart_get_console) and decoding command line
32arguments (uart_parse_options). 29arguments (uart_parse_options).
33 30
31There is also a helper function (uart_write_console) which performs a
32character by character write, translating newlines to CRLF sequences.
33Driver writers are recommended to use this function rather than implementing
34their own version.
35
34 36
35Locking 37Locking
36------- 38-------
@@ -86,6 +88,7 @@ hardware.
86 - TIOCM_DTR DTR signal. 88 - TIOCM_DTR DTR signal.
87 - TIOCM_OUT1 OUT1 signal. 89 - TIOCM_OUT1 OUT1 signal.
88 - TIOCM_OUT2 OUT2 signal. 90 - TIOCM_OUT2 OUT2 signal.
91 - TIOCM_LOOP Set the port into loopback mode.
89 If the appropriate bit is set, the signal should be driven 92 If the appropriate bit is set, the signal should be driven
90 active. If the bit is clear, the signal should be driven 93 active. If the bit is clear, the signal should be driven
91 inactive. 94 inactive.
@@ -141,6 +144,10 @@ hardware.
141 enable_ms(port) 144 enable_ms(port)
142 Enable the modem status interrupts. 145 Enable the modem status interrupts.
143 146
147 This method may be called multiple times. Modem status
148 interrupts should be disabled when the shutdown method is
149 called.
150
144 Locking: port->lock taken. 151 Locking: port->lock taken.
145 Interrupts: locally disabled. 152 Interrupts: locally disabled.
146 This call must not sleep 153 This call must not sleep
@@ -160,6 +167,8 @@ hardware.
160 state. Enable the port for reception. It should not activate 167 state. Enable the port for reception. It should not activate
161 RTS nor DTR; this will be done via a separate call to set_mctrl. 168 RTS nor DTR; this will be done via a separate call to set_mctrl.
162 169
170 This method will only be called when the port is initially opened.
171
163 Locking: port_sem taken. 172 Locking: port_sem taken.
164 Interrupts: globally disabled. 173 Interrupts: globally disabled.
165 174
@@ -169,6 +178,11 @@ hardware.
169 RTS nor DTR; this will have already been done via a separate 178 RTS nor DTR; this will have already been done via a separate
170 call to set_mctrl. 179 call to set_mctrl.
171 180
181 Drivers must not access port->info once this call has completed.
182
183 This method will only be called when there are no more users of
184 this port.
185
172 Locking: port_sem taken. 186 Locking: port_sem taken.
173 Interrupts: caller dependent. 187 Interrupts: caller dependent.
174 188
@@ -200,12 +214,13 @@ hardware.
200 The interaction of the iflag bits is as follows (parity error 214 The interaction of the iflag bits is as follows (parity error
201 given as an example): 215 given as an example):
202 Parity error INPCK IGNPAR 216 Parity error INPCK IGNPAR
203 None n/a n/a character received 217 n/a 0 n/a character received, marked as
204 Yes n/a 0 character discarded 218 TTY_NORMAL
205 Yes 0 1 character received, marked as 219 None 1 n/a character received, marked as
206 TTY_NORMAL 220 TTY_NORMAL
207 Yes 1 1 character received, marked as 221 Yes 1 0 character received, marked as
208 TTY_PARITY 222 TTY_PARITY
223 Yes 1 1 character discarded
209 224
210 Other flags may be used (eg, xon/xoff characters) if your 225 Other flags may be used (eg, xon/xoff characters) if your
211 hardware supports hardware "soft" flow control. 226 hardware supports hardware "soft" flow control.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 1def6049784c..0ee2c7dfc482 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -120,6 +120,34 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
120 enable - enable card 120 enable - enable card
121 - Default: enabled, for PCI and ISA PnP cards 121 - Default: enabled, for PCI and ISA PnP cards
122 122
123 Module snd-adlib
124 ----------------
125
126 Module for AdLib FM cards.
127
128 port - port # for OPL chip
129
130 This module supports multiple cards. It does not support autoprobe, so
131 the port must be specified. For actual AdLib FM cards it will be 0x388.
132 Note that this card does not have PCM support and no mixer; only FM
133 synthesis.
134
135 Make sure you have "sbiload" from the alsa-tools package available and,
136 after loading the module, find out the assigned ALSA sequencer port
137 number through "sbiload -l". Example output:
138
139 Port Client name Port name
140 64:0 OPL2 FM synth OPL2 FM Port
141
142 Load the std.sb and drums.sb patches also supplied by sbiload:
143
144 sbiload -p 64:0 std.sb drums.sb
145
146 If you use this driver to drive an OPL3, you can use std.o3 and drums.o3
147 instead. To have the card produce sound, use aplaymidi from alsa-utils:
148
149 aplaymidi -p 64:0 foo.mid
150
123 Module snd-ad1816a 151 Module snd-ad1816a
124 ------------------ 152 ------------------
125 153
@@ -190,6 +218,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
190 218
191 The power-management is supported. 219 The power-management is supported.
192 220
221 Module snd-als300
222 -----------------
223
224 Module for Avance Logic ALS300 and ALS300+
225
226 This module supports multiple cards.
227
228 The power-management is supported.
229
193 Module snd-als4000 230 Module snd-als4000
194 ------------------ 231 ------------------
195 232
@@ -701,6 +738,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
701 uniwill 3-jack 738 uniwill 3-jack
702 F1734 2-jack 739 F1734 2-jack
703 lg LG laptop (m1 express dual) 740 lg LG laptop (m1 express dual)
741 lg-lw LG LW20 laptop
704 test for testing/debugging purpose, almost all controls can be 742 test for testing/debugging purpose, almost all controls can be
705 adjusted. Appearing only when compiled with 743 adjusted. Appearing only when compiled with
706 $CONFIG_SND_DEBUG=y 744 $CONFIG_SND_DEBUG=y
@@ -1013,6 +1051,23 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1013 1051
1014 The power-management is supported. 1052 The power-management is supported.
1015 1053
1054 Module snd-miro
1055 ---------------
1056
1057 Module for Miro soundcards: miroSOUND PCM 1 pro,
1058 miroSOUND PCM 12,
1059 miroSOUND PCM 20 Radio.
1060
1061 port - Port # (0x530,0x604,0xe80,0xf40)
1062 irq - IRQ # (5,7,9,10,11)
1063 dma1 - 1st dma # (0,1,3)
1064 dma2 - 2nd dma # (0,1)
1065 mpu_port - MPU-401 port # (0x300,0x310,0x320,0x330)
1066 mpu_irq - MPU-401 irq # (5,7,9,10)
1067 fm_port - FM Port # (0x388)
1068 wss - enable WSS mode
1069 ide - enable onboard ide support
1070
1016 Module snd-mixart 1071 Module snd-mixart
1017 ----------------- 1072 -----------------
1018 1073
@@ -1202,6 +1257,20 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1202 1257
1203 The power-management is supported. 1258 The power-management is supported.
1204 1259
1260 Module snd-riptide
1261 ------------------
1262
1263 Module for Conexant Riptide chip
1264
1265 joystick_port - Joystick port # (default: 0x200)
1266 mpu_port - MPU401 port # (default: 0x330)
1267 opl3_port - OPL3 port # (default: 0x388)
1268
1269 This module supports multiple cards.
1270 The driver requires the firmware loader support on kernel.
1271 You need to install the firmware file "riptide.hex" to the standard
1272 firmware path (e.g. /lib/firmware).
1273
1205 Module snd-rme32 1274 Module snd-rme32
1206 ---------------- 1275 ----------------
1207 1276
diff --git a/Documentation/sound/alsa/Audiophile-Usb.txt b/Documentation/sound/alsa/Audiophile-Usb.txt
index 4692c8e77dc1..b535c2a198f8 100644
--- a/Documentation/sound/alsa/Audiophile-Usb.txt
+++ b/Documentation/sound/alsa/Audiophile-Usb.txt
@@ -1,4 +1,4 @@
1 Guide to using M-Audio Audiophile USB with ALSA and Jack v1.2 1 Guide to using M-Audio Audiophile USB with ALSA and Jack v1.3
2 ======================================================== 2 ========================================================
3 3
4 Thibault Le Meur <Thibault.LeMeur@supelec.fr> 4 Thibault Le Meur <Thibault.LeMeur@supelec.fr>
@@ -22,16 +22,16 @@ The device has 4 audio interfaces, and 2 MIDI ports:
22 * Midi In (Mi) 22 * Midi In (Mi)
23 * Midi Out (Mo) 23 * Midi Out (Mo)
24 24
25The internal DAC/ADC has the following caracteristics: 25The internal DAC/ADC has the following characteristics:
26* sample depth of 16 or 24 bits 26* sample depth of 16 or 24 bits
27* sample rate from 8kHz to 96kHz 27* sample rate from 8kHz to 96kHz
28* Two ports can't use different sample depths at the same time.Moreover, the 28* Two ports can't use different sample depths at the same time. Moreover, the
29Audiophile USB documentation gives the following Warning: "Please exit any 29Audiophile USB documentation gives the following Warning: "Please exit any
30audio application running before switching between bit depths" 30audio application running before switching between bit depths"
31 31
32Due to the USB 1.1 bandwidth limitation, a limited number of interfaces can be 32Due to the USB 1.1 bandwidth limitation, a limited number of interfaces can be
33activated at the same time depending on the audio mode selected: 33activated at the same time depending on the audio mode selected:
34 * 16-bit/48kHz ==> 4 channels in/ 4 channels out 34 * 16-bit/48kHz ==> 4 channels in/4 channels out
35 - Ai+Ao+Di+Do 35 - Ai+Ao+Di+Do
36 * 24-bit/48kHz ==> 4 channels in/2 channels out, 36 * 24-bit/48kHz ==> 4 channels in/2 channels out,
37 or 2 channels in/4 channels out 37 or 2 channels in/4 channels out
@@ -41,8 +41,8 @@ activated at the same time depending on the audio mode selected:
41 41
42Important facts about the Digital interface: 42Important facts about the Digital interface:
43-------------------------------------------- 43--------------------------------------------
44 * The Do port additionnaly supports surround-encoded AC-3 and DTS passthrough, 44 * The Do port additionally supports surround-encoded AC-3 and DTS passthrough,
45though I haven't tested it under linux 45though I haven't tested it under Linux
46 - Note that in this setup only the Do interface can be enabled 46 - Note that in this setup only the Do interface can be enabled
47 * Apart from recording an audio digital stream, enabling the Di port is a way 47 * Apart from recording an audio digital stream, enabling the Di port is a way
48to synchronize the device to an external sample clock 48to synchronize the device to an external sample clock
@@ -60,24 +60,23 @@ synchronization error (for instance sound played at an odd sample rate)
60The Audiophile USB MIDI ports will be automatically supported once the 60The Audiophile USB MIDI ports will be automatically supported once the
61following modules have been loaded: 61following modules have been loaded:
62 * snd-usb-audio 62 * snd-usb-audio
63 * snd-seq
64 * snd-seq-midi 63 * snd-seq-midi
65 64
66No additionnal setting is required. 65No additional setting is required.
67 66
682.2 - Audio ports 672.2 - Audio ports
69----------------- 68-----------------
70 69
71Audio functions of the Audiophile USB device are handled by the snd-usb-audio 70Audio functions of the Audiophile USB device are handled by the snd-usb-audio
72module. This module can work in a default mode (without any device-specific 71module. This module can work in a default mode (without any device-specific
73parameter), or in an advanced mode with the device-specific parameter called 72parameter), or in an "advanced" mode with the device-specific parameter called
74"device_setup". 73"device_setup".
75 74
762.2.1 - Default Alsa driver mode 752.2.1 - Default Alsa driver mode
77 76
78The default behaviour of the snd-usb-audio driver is to parse the device 77The default behavior of the snd-usb-audio driver is to parse the device
79capabilities at startup and enable all functions inside the device (including 78capabilities at startup and enable all functions inside the device (including
80all ports at any sample rates and any sample depths supported). This approach 79all ports at any supported sample rates and sample depths). This approach
81has the advantage to let the driver easily switch from sample rates/depths 80has the advantage to let the driver easily switch from sample rates/depths
82automatically according to the need of the application claiming the device. 81automatically according to the need of the application claiming the device.
83 82
@@ -114,9 +113,9 @@ gain).
114For people having this problem, the snd-usb-audio module has a new module 113For people having this problem, the snd-usb-audio module has a new module
115parameter called "device_setup". 114parameter called "device_setup".
116 115
1172.2.2.1 - Initializing the working mode of the Audiohile USB 1162.2.2.1 - Initializing the working mode of the Audiophile USB
118 117
119As far as the Audiohile USB device is concerned, this value let the user 118As far as the Audiophile USB device is concerned, this value let the user
120specify: 119specify:
121 * the sample depth 120 * the sample depth
122 * the sample rate 121 * the sample rate
@@ -174,20 +173,20 @@ The parameter can be given:
174 173
175IMPORTANT NOTE WHEN SWITCHING CONFIGURATION: 174IMPORTANT NOTE WHEN SWITCHING CONFIGURATION:
176------------------------------------------- 175-------------------------------------------
177 * You may need to _first_ intialize the module with the correct device_setup 176 * You may need to _first_ initialize the module with the correct device_setup
178 parameter and _only_after_ turn on the Audiophile USB device 177 parameter and _only_after_ turn on the Audiophile USB device
179 * This is especially true when switching the sample depth: 178 * This is especially true when switching the sample depth:
180 - first trun off the device 179 - first turn off the device
181 - de-register the snd-usb-audio module 180 - de-register the snd-usb-audio module (modprobe -r)
182 - change the device_setup parameter (by either manually reprobing the module 181 - change the device_setup parameter by changing the device_setup
183 or changing modprobe.conf) 182 option in /etc/modprobe.conf
184 - turn on the device 183 - turn on the device
185 184
1862.2.2.3 - Audiophile USB's device_setup structure 1852.2.2.3 - Audiophile USB's device_setup structure
187 186
188If you want to understand the device_setup magic numbers for the Audiophile 187If you want to understand the device_setup magic numbers for the Audiophile
189USB, you need some very basic understanding of binary computation. However, 188USB, you need some very basic understanding of binary computation. However,
190this is not required to use the parameter and you may skip thi section. 189this is not required to use the parameter and you may skip this section.
191 190
192The device_setup is one byte long and its structure is the following: 191The device_setup is one byte long and its structure is the following:
193 192
@@ -231,11 +230,11 @@ Caution:
231 230
2322.2.3 - USB implementation details for this device 2312.2.3 - USB implementation details for this device
233 232
234You may safely skip this section if you're not interrested in driver 233You may safely skip this section if you're not interested in driver
235development. 234development.
236 235
237This section describes some internals aspect of the device and summarize the 236This section describes some internal aspects of the device and summarize the
238data I got by usb-snooping the windows and linux drivers. 237data I got by usb-snooping the windows and Linux drivers.
239 238
240The M-Audio Audiophile USB has 7 USB Interfaces: 239The M-Audio Audiophile USB has 7 USB Interfaces:
241a "USB interface": 240a "USB interface":
@@ -277,9 +276,9 @@ Here is a short description of the AltSettings capabilities:
277 - 16-bit depth, 8-48kHz sample mode 276 - 16-bit depth, 8-48kHz sample mode
278 - Synch playback (Do), audio format type III IEC1937_AC-3 277 - Synch playback (Do), audio format type III IEC1937_AC-3
279 278
280In order to ensure a correct intialization of the device, the driver 279In order to ensure a correct initialization of the device, the driver
281_must_know_ how the device will be used: 280_must_know_ how the device will be used:
282 * if DTS is choosen, only Interface 2 with AltSet nb.6 must be 281 * if DTS is chosen, only Interface 2 with AltSet nb.6 must be
283 registered 282 registered
284 * if 96KHz only AltSets nb.1 of each interface must be selected 283 * if 96KHz only AltSets nb.1 of each interface must be selected
285 * if samples are using 24bits/48KHz then AltSet 2 must me used if 284 * if samples are using 24bits/48KHz then AltSet 2 must me used if
@@ -290,7 +289,7 @@ _must_know_ how the device will be used:
290 is not connected 289 is not connected
291 290
292When device_setup is given as a parameter to the snd-usb-audio module, the 291When device_setup is given as a parameter to the snd-usb-audio module, the
293parse_audio_enpoint function uses a quirk called 292parse_audio_endpoints function uses a quirk called
294"audiophile_skip_setting_quirk" in order to prevent AltSettings not 293"audiophile_skip_setting_quirk" in order to prevent AltSettings not
295corresponding to device_setup from being registered in the driver. 294corresponding to device_setup from being registered in the driver.
296 295
@@ -317,9 +316,8 @@ However you may see the following warning message:
317using the "default" ALSA device. This is less efficient than it could be. 316using the "default" ALSA device. This is less efficient than it could be.
318Consider using a hardware device instead rather than using the plug layer." 317Consider using a hardware device instead rather than using the plug layer."
319 318
320
3213.2 - Patching alsa to use direct pcm device 3193.2 - Patching alsa to use direct pcm device
322------------------------------------------- 320--------------------------------------------
323A patch for Jack by Andreas Steinmetz adds support for Big Endian devices. 321A patch for Jack by Andreas Steinmetz adds support for Big Endian devices.
324However it has not been included in the CVS tree. 322However it has not been included in the CVS tree.
325 323
@@ -331,3 +329,32 @@ After having applied the patch you can run jackd with the following command
331line: 329line:
332 % jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1 330 % jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
333 331
3323.2 - Getting 2 input and/or output interfaces in Jack
333------------------------------------------------------
334
335As you can see, starting the Jack server this way will only enable 1 stereo
336input (Di or Ai) and 1 stereo output (Ao or Do).
337
338This is due to the following restrictions:
339* Jack can only open one capture device and one playback device at a time
340* The Audiophile USB is seen as 2 (or three) Alsa devices: hw:1,0, hw:1,1
341 (and optionally hw:1,2)
342If you want to get Ai+Di and/or Ao+Do support with Jack, you would need to
343combine the Alsa devices into one logical "complex" device.
344
345If you want to give it a try, I recommend reading the information from
346this page: http://www.sound-man.co.uk/linuxaudio/ice1712multi.html
347It is related to another device (ice1712) but can be adapted to suit
348the Audiophile USB.
349
350Enabling multiple Audiophile USB interfaces for Jackd will certainly require:
351* patching Jack with the previously mentioned "Big Endian" patch
352* patching Jackd with the MMAP_COMPLEX patch (see the ice1712 page)
353* patching the alsa-lib/src/pcm/pcm_multi.c file (see the ice1712 page)
354* define a multi device (combination of hw:1,0 and hw:1,1) in your .asoundrc
355 file
356* start jackd with this device
357
358I had no success in testing this for now, but this may be due to my OS
359configuration. If you have any success with this kind of setup, please
360drop me an email.
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index 6feef9e82b63..1faf76383bab 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -1123,8 +1123,8 @@
1123 if ((err = pci_enable_device(pci)) < 0) 1123 if ((err = pci_enable_device(pci)) < 0)
1124 return err; 1124 return err;
1125 /* check PCI availability (28bit DMA) */ 1125 /* check PCI availability (28bit DMA) */
1126 if (pci_set_dma_mask(pci, 0x0fffffff) < 0 || 1126 if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 ||
1127 pci_set_consistent_dma_mask(pci, 0x0fffffff) < 0) { 1127 pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) {
1128 printk(KERN_ERR "error to set 28bit mask DMA\n"); 1128 printk(KERN_ERR "error to set 28bit mask DMA\n");
1129 pci_disable_device(pci); 1129 pci_disable_device(pci);
1130 return -ENXIO; 1130 return -ENXIO;
@@ -1172,7 +1172,7 @@
1172 } 1172 }
1173 1173
1174 /* PCI IDs */ 1174 /* PCI IDs */
1175 static struct pci_device_id snd_mychip_ids[] = { 1175 static struct pci_device_id snd_mychip_ids[] __devinitdata = {
1176 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR, 1176 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
1177 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, 1177 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
1178 .... 1178 ....
@@ -1216,7 +1216,7 @@
1216 The allocation of PCI resources is done in the 1216 The allocation of PCI resources is done in the
1217 <function>probe()</function> function, and usually an extra 1217 <function>probe()</function> function, and usually an extra
1218 <function>xxx_create()</function> function is written for this 1218 <function>xxx_create()</function> function is written for this
1219 purpose. 1219 purpose.
1220 </para> 1220 </para>
1221 1221
1222 <para> 1222 <para>
@@ -1225,7 +1225,7 @@
1225 allocating resources. Also, you need to set the proper PCI DMA 1225 allocating resources. Also, you need to set the proper PCI DMA
1226 mask to limit the accessed i/o range. In some cases, you might 1226 mask to limit the accessed i/o range. In some cases, you might
1227 need to call <function>pci_set_master()</function> function, 1227 need to call <function>pci_set_master()</function> function,
1228 too. 1228 too.
1229 </para> 1229 </para>
1230 1230
1231 <para> 1231 <para>
@@ -1236,8 +1236,8 @@
1236<![CDATA[ 1236<![CDATA[
1237 if ((err = pci_enable_device(pci)) < 0) 1237 if ((err = pci_enable_device(pci)) < 0)
1238 return err; 1238 return err;
1239 if (pci_set_dma_mask(pci, 0x0fffffff) < 0 || 1239 if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 ||
1240 pci_set_consistent_dma_mask(pci, 0x0fffffff) < 0) { 1240 pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) {
1241 printk(KERN_ERR "error to set 28bit mask DMA\n"); 1241 printk(KERN_ERR "error to set 28bit mask DMA\n");
1242 pci_disable_device(pci); 1242 pci_disable_device(pci);
1243 return -ENXIO; 1243 return -ENXIO;
@@ -1256,13 +1256,13 @@
1256 functions. Unlike ALSA ver.0.5.x., there are no helpers for 1256 functions. Unlike ALSA ver.0.5.x., there are no helpers for
1257 that. And these resources must be released in the destructor 1257 that. And these resources must be released in the destructor
1258 function (see below). Also, on ALSA 0.9.x, you don't need to 1258 function (see below). Also, on ALSA 0.9.x, you don't need to
1259 allocate (pseudo-)DMA for PCI like ALSA 0.5.x. 1259 allocate (pseudo-)DMA for PCI like ALSA 0.5.x.
1260 </para> 1260 </para>
1261 1261
1262 <para> 1262 <para>
1263 Now assume that this PCI device has an I/O port with 8 bytes 1263 Now assume that this PCI device has an I/O port with 8 bytes
1264 and an interrupt. Then struct <structname>mychip</structname> will have the 1264 and an interrupt. Then struct <structname>mychip</structname> will have the
1265 following fields: 1265 following fields:
1266 1266
1267 <informalexample> 1267 <informalexample>
1268 <programlisting> 1268 <programlisting>
@@ -1565,7 +1565,7 @@
1565 <informalexample> 1565 <informalexample>
1566 <programlisting> 1566 <programlisting>
1567<![CDATA[ 1567<![CDATA[
1568 static struct pci_device_id snd_mychip_ids[] = { 1568 static struct pci_device_id snd_mychip_ids[] __devinitdata = {
1569 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR, 1569 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
1570 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, 1570 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
1571 .... 1571 ....
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
new file mode 100644
index 000000000000..9c45f3df2e18
--- /dev/null
+++ b/Documentation/spi/pxa2xx
@@ -0,0 +1,234 @@
1PXA2xx SPI on SSP driver HOWTO
2===================================================
3This a mini howto on the pxa2xx_spi driver. The driver turns a PXA2xx
4synchronous serial port into a SPI master controller
5(see Documentation/spi/spi_summary). The driver has the following features
6
7- Support for any PXA2xx SSP
8- SSP PIO and SSP DMA data transfers.
9- External and Internal (SSPFRM) chip selects.
10- Per slave device (chip) configuration.
11- Full suspend, freeze, resume support.
12
13The driver is built around a "spi_message" fifo serviced by workqueue and a
14tasklet. The workqueue, "pump_messages", drives message fifo and the tasklet
15(pump_transfer) is responsible for queuing SPI transactions and setting up and
16launching the dma/interrupt driven transfers.
17
18Declaring PXA2xx Master Controllers
19-----------------------------------
20Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
21"platform device". The master configuration is passed to the driver via a table
22found in include/asm-arm/arch-pxa/pxa2xx_spi.h:
23
24struct pxa2xx_spi_master {
25 enum pxa_ssp_type ssp_type;
26 u32 clock_enable;
27 u16 num_chipselect;
28 u8 enable_dma;
29};
30
31The "pxa2xx_spi_master.ssp_type" field must have a value between 1 and 3 and
32informs the driver which features a particular SSP supports.
33
34The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
35corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
36the "PXA2xx Developer Manual" section "Clocks and Power Management".
37
38The "pxa2xx_spi_master.num_chipselect" field is used to determine the number of
39slave device (chips) attached to this SPI master.
40
41The "pxa2xx_spi_master.enable_dma" field informs the driver that SSP DMA should
42be used. This caused the driver to acquire two DMA channels: rx_channel and
43tx_channel. The rx_channel has a higher DMA service priority the tx_channel.
44See the "PXA2xx Developer Manual" section "DMA Controller".
45
46NSSP MASTER SAMPLE
47------------------
48Below is a sample configuration using the PXA255 NSSP.
49
50static struct resource pxa_spi_nssp_resources[] = {
51 [0] = {
52 .start = __PREG(SSCR0_P(2)), /* Start address of NSSP */
53 .end = __PREG(SSCR0_P(2)) + 0x2c, /* Range of registers */
54 .flags = IORESOURCE_MEM,
55 },
56 [1] = {
57 .start = IRQ_NSSP, /* NSSP IRQ */
58 .end = IRQ_NSSP,
59 .flags = IORESOURCE_IRQ,
60 },
61};
62
63static struct pxa2xx_spi_master pxa_nssp_master_info = {
64 .ssp_type = PXA25x_NSSP, /* Type of SSP */
65 .clock_enable = CKEN9_NSSP, /* NSSP Peripheral clock */
66 .num_chipselect = 1, /* Matches the number of chips attached to NSSP */
67 .enable_dma = 1, /* Enables NSSP DMA */
68};
69
70static struct platform_device pxa_spi_nssp = {
71 .name = "pxa2xx-spi", /* MUST BE THIS VALUE, so device match driver */
72 .id = 2, /* Bus number, MUST MATCH SSP number 1..n */
73 .resource = pxa_spi_nssp_resources,
74 .num_resources = ARRAY_SIZE(pxa_spi_nssp_resources),
75 .dev = {
76 .platform_data = &pxa_nssp_master_info, /* Passed to driver */
77 },
78};
79
80static struct platform_device *devices[] __initdata = {
81 &pxa_spi_nssp,
82};
83
84static void __init board_init(void)
85{
86 (void)platform_add_device(devices, ARRAY_SIZE(devices));
87}
88
89Declaring Slave Devices
90-----------------------
91Typically each SPI slave (chip) is defined in the arch/.../mach-*/board-*.c
92using the "spi_board_info" structure found in "linux/spi/spi.h". See
93"Documentation/spi/spi_summary" for additional information.
94
95Each slave device attached to the PXA must provide slave specific configuration
96information via the structure "pxa2xx_spi_chip" found in
97"include/asm-arm/arch-pxa/pxa2xx_spi.h". The pxa2xx_spi master controller driver
98will uses the configuration whenever the driver communicates with the slave
99device.
100
101struct pxa2xx_spi_chip {
102 u8 tx_threshold;
103 u8 rx_threshold;
104 u8 dma_burst_size;
105 u32 timeout_microsecs;
106 u8 enable_loopback;
107 void (*cs_control)(u32 command);
108};
109
110The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
111used to configure the SSP hardware fifo. These fields are critical to the
112performance of pxa2xx_spi driver and misconfiguration will result in rx
113fifo overruns (especially in PIO mode transfers). Good default values are
114
115 .tx_threshold = 12,
116 .rx_threshold = 4,
117
118The "pxa2xx_spi_chip.dma_burst_size" field is used to configure PXA2xx DMA
119engine and is related the "spi_device.bits_per_word" field. Read and understand
120the PXA2xx "Developer Manual" sections on the DMA controller and SSP Controllers
121to determine the correct value. An SSP configured for byte-wide transfers would
122use a value of 8.
123
124The "pxa2xx_spi_chip.timeout_microsecs" fields is used to efficiently handle
125trailing bytes in the SSP receiver fifo. The correct value for this field is
126dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
127slave device. Please note the the PXA2xx SSP 1 does not support trailing byte
128timeouts and must busy-wait any trailing bytes.
129
130The "pxa2xx_spi_chip.enable_loopback" field is used to place the SSP porting
131into internal loopback mode. In this mode the SSP controller internally
132connects the SSPTX pin the the SSPRX pin. This is useful for initial setup
133testing.
134
135The "pxa2xx_spi_chip.cs_control" field is used to point to a board specific
136function for asserting/deasserting a slave device chip select. If the field is
137NULL, the pxa2xx_spi master controller driver assumes that the SSP port is
138configured to use SSPFRM instead.
139
140NSSP SALVE SAMPLE
141-----------------
142The pxa2xx_spi_chip structure is passed to the pxa2xx_spi driver in the
143"spi_board_info.controller_data" field. Below is a sample configuration using
144the PXA255 NSSP.
145
146/* Chip Select control for the CS8415A SPI slave device */
147static void cs8415a_cs_control(u32 command)
148{
149 if (command & PXA2XX_CS_ASSERT)
150 GPCR(2) = GPIO_bit(2);
151 else
152 GPSR(2) = GPIO_bit(2);
153}
154
155/* Chip Select control for the CS8405A SPI slave device */
156static void cs8405a_cs_control(u32 command)
157{
158 if (command & PXA2XX_CS_ASSERT)
159 GPCR(3) = GPIO_bit(3);
160 else
161 GPSR(3) = GPIO_bit(3);
162}
163
164static struct pxa2xx_spi_chip cs8415a_chip_info = {
165 .tx_threshold = 12, /* SSP hardward FIFO threshold */
166 .rx_threshold = 4, /* SSP hardward FIFO threshold */
167 .dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
168 .timeout_microsecs = 64, /* Wait at least 64usec to handle trailing */
169 .cs_control = cs8415a_cs_control, /* Use external chip select */
170};
171
172static struct pxa2xx_spi_chip cs8405a_chip_info = {
173 .tx_threshold = 12, /* SSP hardward FIFO threshold */
174 .rx_threshold = 4, /* SSP hardward FIFO threshold */
175 .dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
176 .timeout_microsecs = 64, /* Wait at least 64usec to handle trailing */
177 .cs_control = cs8405a_cs_control, /* Use external chip select */
178};
179
180static struct spi_board_info streetracer_spi_board_info[] __initdata = {
181 {
182 .modalias = "cs8415a", /* Name of spi_driver for this device */
183 .max_speed_hz = 3686400, /* Run SSP as fast a possbile */
184 .bus_num = 2, /* Framework bus number */
185 .chip_select = 0, /* Framework chip select */
186 .platform_data = NULL; /* No spi_driver specific config */
187 .controller_data = &cs8415a_chip_info, /* Master chip config */
188 .irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
189 },
190 {
191 .modalias = "cs8405a", /* Name of spi_driver for this device */
192 .max_speed_hz = 3686400, /* Run SSP as fast a possbile */
193 .bus_num = 2, /* Framework bus number */
194 .chip_select = 1, /* Framework chip select */
195 .controller_data = &cs8405a_chip_info, /* Master chip config */
196 .irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
197 },
198};
199
200static void __init streetracer_init(void)
201{
202 spi_register_board_info(streetracer_spi_board_info,
203 ARRAY_SIZE(streetracer_spi_board_info));
204}
205
206
207DMA and PIO I/O Support
208-----------------------
209The pxa2xx_spi driver support both DMA and interrupt driven PIO message
210transfers. The driver defaults to PIO mode and DMA transfers must enabled by
211setting the "enable_dma" flag in the "pxa2xx_spi_master" structure and and
212ensuring that the "pxa2xx_spi_chip.dma_burst_size" field is non-zero. The DMA
213mode support both coherent and stream based DMA mappings.
214
215The following logic is used to determine the type of I/O to be used on
216a per "spi_transfer" basis:
217
218if !enable_dma or dma_burst_size == 0 then
219 always use PIO transfers
220
221if spi_message.is_dma_mapped and rx_dma_buf != 0 and tx_dma_buf != 0 then
222 use coherent DMA mode
223
224if rx_buf and tx_buf are aligned on 8 byte boundary then
225 use streaming DMA mode
226
227otherwise
228 use PIO transfer
229
230THANKS TO
231---------
232
233David Brownell and others for mentoring the development of this driver.
234
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index a5ffba33a351..068732d32276 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -414,7 +414,33 @@ to get the driver-private data allocated for that device.
414The driver will initialize the fields of that spi_master, including the 414The driver will initialize the fields of that spi_master, including the
415bus number (maybe the same as the platform device ID) and three methods 415bus number (maybe the same as the platform device ID) and three methods
416used to interact with the SPI core and SPI protocol drivers. It will 416used to interact with the SPI core and SPI protocol drivers. It will
417also initialize its own internal state. 417also initialize its own internal state. (See below about bus numbering
418and those methods.)
419
420After you initialize the spi_master, then use spi_register_master() to
421publish it to the rest of the system. At that time, device nodes for
422the controller and any predeclared spi devices will be made available,
423and the driver model core will take care of binding them to drivers.
424
425If you need to remove your SPI controller driver, spi_unregister_master()
426will reverse the effect of spi_register_master().
427
428
429BUS NUMBERING
430
431Bus numbering is important, since that's how Linux identifies a given
432SPI bus (shared SCK, MOSI, MISO). Valid bus numbers start at zero. On
433SOC systems, the bus numbers should match the numbers defined by the chip
434manufacturer. For example, hardware controller SPI2 would be bus number 2,
435and spi_board_info for devices connected to it would use that number.
436
437If you don't have such hardware-assigned bus number, and for some reason
438you can't just assign them, then provide a negative bus number. That will
439then be replaced by a dynamically assigned number. You'd then need to treat
440this as a non-static configuration (see above).
441
442
443SPI MASTER METHODS
418 444
419 master->setup(struct spi_device *spi) 445 master->setup(struct spi_device *spi)
420 This sets up the device clock rate, SPI mode, and word sizes. 446 This sets up the device clock rate, SPI mode, and word sizes.
@@ -431,6 +457,9 @@ also initialize its own internal state.
431 state it dynamically associates with that device. If you do that, 457 state it dynamically associates with that device. If you do that,
432 be sure to provide the cleanup() method to free that state. 458 be sure to provide the cleanup() method to free that state.
433 459
460
461SPI MESSAGE QUEUE
462
434The bulk of the driver will be managing the I/O queue fed by transfer(). 463The bulk of the driver will be managing the I/O queue fed by transfer().
435 464
436That queue could be purely conceptual. For example, a driver used only 465That queue could be purely conceptual. For example, a driver used only
@@ -440,6 +469,9 @@ But the queue will probably be very real, using message->queue, PIO,
440often DMA (especially if the root filesystem is in SPI flash), and 469often DMA (especially if the root filesystem is in SPI flash), and
441execution contexts like IRQ handlers, tasklets, or workqueues (such 470execution contexts like IRQ handlers, tasklets, or workqueues (such
442as keventd). Your driver can be as fancy, or as simple, as you need. 471as keventd). Your driver can be as fancy, or as simple, as you need.
472Such a transfer() method would normally just add the message to a
473queue, and then start some asynchronous transfer engine (unless it's
474already running).
443 475
444 476
445THANKS TO 477THANKS TO
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index 8c7195455963..bca50903233f 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -52,7 +52,7 @@
52 51 -> ProVideo PV952 [1540:9524] 52 51 -> ProVideo PV952 [1540:9524]
53 52 -> AverMedia AverTV/305 [1461:2108] 53 52 -> AverMedia AverTV/305 [1461:2108]
54 53 -> ASUS TV-FM 7135 [1043:4845] 54 53 -> ASUS TV-FM 7135 [1043:4845]
55 54 -> LifeView FlyTV Platinum FM [5168:0214,1489:0214] 55 54 -> LifeView FlyTV Platinum FM / Gold [5168:0214,1489:0214,5168:0304]
56 55 -> LifeView FlyDVB-T DUO [5168:0306] 56 55 -> LifeView FlyDVB-T DUO [5168:0306]
57 56 -> Avermedia AVerTV 307 [1461:a70a] 57 56 -> Avermedia AVerTV 307 [1461:a70a]
58 57 -> Avermedia AVerTV GO 007 FM [1461:f31f] 58 57 -> Avermedia AVerTV GO 007 FM [1461:f31f]
@@ -84,7 +84,7 @@
84 83 -> Terratec Cinergy 250 PCI TV [153b:1160] 84 83 -> Terratec Cinergy 250 PCI TV [153b:1160]
85 84 -> LifeView FlyDVB Trio [5168:0319] 85 84 -> LifeView FlyDVB Trio [5168:0319]
86 85 -> AverTV DVB-T 777 [1461:2c05] 86 85 -> AverTV DVB-T 777 [1461:2c05]
87 86 -> LifeView FlyDVB-T [5168:0301] 87 86 -> LifeView FlyDVB-T / Genius VideoWonder DVB-T [5168:0301,1489:0301]
88 87 -> ADS Instant TV Duo Cardbus PTV331 [0331:1421] 88 87 -> ADS Instant TV Duo Cardbus PTV331 [0331:1421]
89 88 -> Tevion/KWorld DVB-T 220RF [17de:7201] 89 88 -> Tevion/KWorld DVB-T 220RF [17de:7201]
90 89 -> ELSA EX-VISION 700TV [1048:226c] 90 89 -> ELSA EX-VISION 700TV [1048:226c]
@@ -92,3 +92,4 @@
92 91 -> AVerMedia A169 B [1461:7360] 92 91 -> AVerMedia A169 B [1461:7360]
93 92 -> AVerMedia A169 B1 [1461:6360] 93 92 -> AVerMedia A169 B1 [1461:6360]
94 93 -> Medion 7134 Bridge #2 [16be:0005] 94 93 -> Medion 7134 Bridge #2 [16be:0005]
95 94 -> LifeView FlyDVB-T Hybrid Cardbus [5168:3306,5168:3502]
diff --git a/Documentation/usb/et61x251.txt b/Documentation/video4linux/et61x251.txt
index 29340282ab5f..29340282ab5f 100644
--- a/Documentation/usb/et61x251.txt
+++ b/Documentation/video4linux/et61x251.txt
diff --git a/Documentation/usb/ibmcam.txt b/Documentation/video4linux/ibmcam.txt
index c25003644131..4a40a2e99451 100644
--- a/Documentation/usb/ibmcam.txt
+++ b/Documentation/video4linux/ibmcam.txt
@@ -122,7 +122,7 @@ WHAT YOU NEED:
122- A Linux box with USB support (2.3/2.4; 2.2 w/backport may work) 122- A Linux box with USB support (2.3/2.4; 2.2 w/backport may work)
123 123
124- A Video4Linux compatible frame grabber program such as xawtv. 124- A Video4Linux compatible frame grabber program such as xawtv.
125 125
126HOW TO COMPILE THE DRIVER: 126HOW TO COMPILE THE DRIVER:
127 127
128You need to compile the driver only if you are a developer 128You need to compile the driver only if you are a developer
diff --git a/Documentation/usb/ov511.txt b/Documentation/video4linux/ov511.txt
index a7fc0432bff1..142741e3c578 100644
--- a/Documentation/usb/ov511.txt
+++ b/Documentation/video4linux/ov511.txt
@@ -9,7 +9,7 @@ INTRODUCTION:
9 9
10This is a driver for the OV511, a USB-only chip used in many "webcam" devices. 10This is a driver for the OV511, a USB-only chip used in many "webcam" devices.
11Any camera using the OV511/OV511+ and the OV6620/OV7610/20/20AE should work. 11Any camera using the OV511/OV511+ and the OV6620/OV7610/20/20AE should work.
12Video capture devices that use the Philips SAA7111A decoder also work. It 12Video capture devices that use the Philips SAA7111A decoder also work. It
13supports streaming and capture of color or monochrome video via the Video4Linux 13supports streaming and capture of color or monochrome video via the Video4Linux
14API. Most V4L apps are compatible with it. Most resolutions with a width and 14API. Most V4L apps are compatible with it. Most resolutions with a width and
15height that are a multiple of 8 are supported. 15height that are a multiple of 8 are supported.
@@ -52,15 +52,15 @@ from it:
52 52
53 chmod 666 /dev/video 53 chmod 666 /dev/video
54 chmod 666 /dev/video0 (if necessary) 54 chmod 666 /dev/video0 (if necessary)
55 55
56Now you are ready to run a video app! Both vidcat and xawtv work well for me 56Now you are ready to run a video app! Both vidcat and xawtv work well for me
57at 640x480. 57at 640x480.
58 58
59[Using vidcat:] 59[Using vidcat:]
60 60
61 vidcat -s 640x480 -p c > test.jpg 61 vidcat -s 640x480 -p c > test.jpg
62 xview test.jpg 62 xview test.jpg
63 63
64[Using xawtv:] 64[Using xawtv:]
65 65
66From the main xawtv directory: 66From the main xawtv directory:
@@ -70,7 +70,7 @@ From the main xawtv directory:
70 make 70 make
71 make install 71 make install
72 72
73Now you should be able to run xawtv. Right click for the options dialog. 73Now you should be able to run xawtv. Right click for the options dialog.
74 74
75MODULE PARAMETERS: 75MODULE PARAMETERS:
76 76
@@ -286,4 +286,3 @@ Randy Dunlap, and others. Big thanks to them for their pioneering work on that
286and the USB stack. Thanks to Bret Wallach for getting camera reg IO, ISOC, and 286and the USB stack. Thanks to Bret Wallach for getting camera reg IO, ISOC, and
287image capture working. Thanks to Orion Sky Lawlor, Kevin Moore, and Claudio 287image capture working. Thanks to Orion Sky Lawlor, Kevin Moore, and Claudio
288Matsuoka for their work as well. 288Matsuoka for their work as well.
289
diff --git a/Documentation/usb/se401.txt b/Documentation/video4linux/se401.txt
index 7b9d1c960a10..7b9d1c960a10 100644
--- a/Documentation/usb/se401.txt
+++ b/Documentation/video4linux/se401.txt
diff --git a/Documentation/usb/sn9c102.txt b/Documentation/video4linux/sn9c102.txt
index b957beae5607..142920bc011f 100644
--- a/Documentation/usb/sn9c102.txt
+++ b/Documentation/video4linux/sn9c102.txt
@@ -174,7 +174,7 @@ Module parameters are listed below:
174------------------------------------------------------------------------------- 174-------------------------------------------------------------------------------
175Name: video_nr 175Name: video_nr
176Type: short array (min = 0, max = 64) 176Type: short array (min = 0, max = 64)
177Syntax: <-1|n[,...]> 177Syntax: <-1|n[,...]>
178Description: Specify V4L2 minor mode number: 178Description: Specify V4L2 minor mode number:
179 -1 = use next available 179 -1 = use next available
180 n = use minor number n 180 n = use minor number n
@@ -187,7 +187,7 @@ Default: -1
187------------------------------------------------------------------------------- 187-------------------------------------------------------------------------------
188Name: force_munmap 188Name: force_munmap
189Type: bool array (min = 0, max = 64) 189Type: bool array (min = 0, max = 64)
190Syntax: <0|1[,...]> 190Syntax: <0|1[,...]>
191Description: Force the application to unmap previously mapped buffer memory 191Description: Force the application to unmap previously mapped buffer memory
192 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not 192 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not
193 all the applications support this feature. This parameter is 193 all the applications support this feature. This parameter is
@@ -206,7 +206,7 @@ Default: 2
206------------------------------------------------------------------------------- 206-------------------------------------------------------------------------------
207Name: debug 207Name: debug
208Type: ushort 208Type: ushort
209Syntax: <n> 209Syntax: <n>
210Description: Debugging information level, from 0 to 3: 210Description: Debugging information level, from 0 to 3:
211 0 = none (use carefully) 211 0 = none (use carefully)
212 1 = critical errors 212 1 = critical errors
@@ -267,7 +267,7 @@ The sysfs interface also provides the "frame_header" entry, which exports the
267frame header of the most recent requested and captured video frame. The header 267frame header of the most recent requested and captured video frame. The header
268is always 18-bytes long and is appended to every video frame by the SN9C10x 268is always 18-bytes long and is appended to every video frame by the SN9C10x
269controllers. As an example, this additional information can be used by the user 269controllers. As an example, this additional information can be used by the user
270application for implementing auto-exposure features via software. 270application for implementing auto-exposure features via software.
271 271
272The following table describes the frame header: 272The following table describes the frame header:
273 273
@@ -441,7 +441,7 @@ blue pixels in one video frame. Each pixel is associated with a 8-bit long
441value and is disposed in memory according to the pattern shown below: 441value and is disposed in memory according to the pattern shown below:
442 442
443B[0] G[1] B[2] G[3] ... B[m-2] G[m-1] 443B[0] G[1] B[2] G[3] ... B[m-2] G[m-1]
444G[m] R[m+1] G[m+2] R[m+2] ... G[2m-2] R[2m-1] 444G[m] R[m+1] G[m+2] R[m+2] ... G[2m-2] R[2m-1]
445... 445...
446... B[(n-1)(m-2)] G[(n-1)(m-1)] 446... B[(n-1)(m-2)] G[(n-1)(m-1)]
447... G[n(m-2)] R[n(m-1)] 447... G[n(m-2)] R[n(m-1)]
@@ -472,12 +472,12 @@ The pixel reference value is calculated as follows:
472The algorithm purely describes the conversion from compressed Bayer code used 472The algorithm purely describes the conversion from compressed Bayer code used
473in the SN9C10x chips to uncompressed Bayer. Additional steps are required to 473in the SN9C10x chips to uncompressed Bayer. Additional steps are required to
474convert this to a color image (i.e. a color interpolation algorithm). 474convert this to a color image (i.e. a color interpolation algorithm).
475 475
476The following Huffman codes have been found: 476The following Huffman codes have been found:
4770: +0 (relative to reference pixel value) 4770: +0 (relative to reference pixel value)
478100: +4 478100: +4
479101: -4? 479101: -4?
4801110xxxx: set absolute value to xxxx.0000 4801110xxxx: set absolute value to xxxx.0000
4811101: +11 4811101: +11
4821111: -11 4821111: -11
48311001: +20 48311001: +20
diff --git a/Documentation/usb/stv680.txt b/Documentation/video4linux/stv680.txt
index 6448041e7a37..4f8946f32f51 100644
--- a/Documentation/usb/stv680.txt
+++ b/Documentation/video4linux/stv680.txt
@@ -5,15 +5,15 @@ Copyright, 2001, Kevin Sisson
5 5
6INTRODUCTION: 6INTRODUCTION:
7 7
8STMicroelectronics produces the STV0680B chip, which comes in two 8STMicroelectronics produces the STV0680B chip, which comes in two
9types, -001 and -003. The -003 version allows the recording and downloading 9types, -001 and -003. The -003 version allows the recording and downloading
10of sound clips from the camera, and allows a flash attachment. Otherwise, 10of sound clips from the camera, and allows a flash attachment. Otherwise,
11it uses the same commands as the -001 version. Both versions support a 11it uses the same commands as the -001 version. Both versions support a
12variety of SDRAM sizes and sensors, allowing for a maximum of 26 VGA or 20 12variety of SDRAM sizes and sensors, allowing for a maximum of 26 VGA or 20
13CIF pictures. The STV0680 supports either a serial or a usb interface, and 13CIF pictures. The STV0680 supports either a serial or a usb interface, and
14video is possible through the usb interface. 14video is possible through the usb interface.
15 15
16The following cameras are known to work with this driver, although any 16The following cameras are known to work with this driver, although any
17camera with Vendor/Product codes of 0553/0202 should work: 17camera with Vendor/Product codes of 0553/0202 should work:
18 18
19Aiptek Pencam (various models) 19Aiptek Pencam (various models)
@@ -34,15 +34,15 @@ http://www.linux-usb.org
34MODULE OPTIONS: 34MODULE OPTIONS:
35 35
36When the driver is compiled as a module, you can set a "swapRGB=1" 36When the driver is compiled as a module, you can set a "swapRGB=1"
37option, if necessary, for those applications that require it 37option, if necessary, for those applications that require it
38(such as xawtv). However, the driver should detect and set this 38(such as xawtv). However, the driver should detect and set this
39automatically, so this option should not normally be used. 39automatically, so this option should not normally be used.
40 40
41 41
42KNOWN PROBLEMS: 42KNOWN PROBLEMS:
43 43
44The driver seems to work better with the usb-ohci than the usb-uhci host 44The driver seems to work better with the usb-ohci than the usb-uhci host
45controller driver. 45controller driver.
46 46
47HELP: 47HELP:
48 48
@@ -50,6 +50,4 @@ The latest info on this driver can be found at:
50http://personal.clt.bellsouth.net/~kjsisson or at 50http://personal.clt.bellsouth.net/~kjsisson or at
51http://stv0680-usb.sourceforge.net 51http://stv0680-usb.sourceforge.net
52 52
53Any questions to me can be send to: kjsisson@bellsouth.net 53Any questions to me can be send to: kjsisson@bellsouth.net \ No newline at end of file
54
55
diff --git a/Documentation/usb/w9968cf.txt b/Documentation/video4linux/w9968cf.txt
index 9d46cd0b19e3..3b704f2aae6d 100644
--- a/Documentation/usb/w9968cf.txt
+++ b/Documentation/video4linux/w9968cf.txt
@@ -1,5 +1,5 @@
1 1
2 W996[87]CF JPEG USB Dual Mode Camera Chip 2 W996[87]CF JPEG USB Dual Mode Camera Chip
3 Driver for Linux 2.6 (basic version) 3 Driver for Linux 2.6 (basic version)
4 ========================================= 4 =========================================
5 5
@@ -115,7 +115,7 @@ additional testing and full support, would be much appreciated.
115====================== 115======================
116For it to work properly, the driver needs kernel support for Video4Linux, USB 116For it to work properly, the driver needs kernel support for Video4Linux, USB
117and I2C, and the "ovcamchip" module for the image sensor. Make sure you are not 117and I2C, and the "ovcamchip" module for the image sensor. Make sure you are not
118actually using any external "ovcamchip" module, given that the W996[87]CF 118actually using any external "ovcamchip" module, given that the W996[87]CF
119driver depends on the version of the module present in the official kernels. 119driver depends on the version of the module present in the official kernels.
120 120
121The following options of the kernel configuration file must be enabled and 121The following options of the kernel configuration file must be enabled and
@@ -197,16 +197,16 @@ Note: The kernel must be compiled with the CONFIG_KMOD option
197 enabled for the 'ovcamchip' module to be loaded and for 197 enabled for the 'ovcamchip' module to be loaded and for
198 this parameter to be present. 198 this parameter to be present.
199------------------------------------------------------------------------------- 199-------------------------------------------------------------------------------
200Name: simcams 200Name: simcams
201Type: int 201Type: int
202Syntax: <n> 202Syntax: <n>
203Description: Number of cameras allowed to stream simultaneously. 203Description: Number of cameras allowed to stream simultaneously.
204 n may vary from 0 to 32. 204 n may vary from 0 to 32.
205Default: 32 205Default: 32
206------------------------------------------------------------------------------- 206-------------------------------------------------------------------------------
207Name: video_nr 207Name: video_nr
208Type: int array (min = 0, max = 32) 208Type: int array (min = 0, max = 32)
209Syntax: <-1|n[,...]> 209Syntax: <-1|n[,...]>
210Description: Specify V4L minor mode number. 210Description: Specify V4L minor mode number.
211 -1 = use next available 211 -1 = use next available
212 n = use minor number n 212 n = use minor number n
@@ -219,7 +219,7 @@ Default: -1
219------------------------------------------------------------------------------- 219-------------------------------------------------------------------------------
220Name: packet_size 220Name: packet_size
221Type: int array (min = 0, max = 32) 221Type: int array (min = 0, max = 32)
222Syntax: <n[,...]> 222Syntax: <n[,...]>
223Description: Specify the maximum data payload size in bytes for alternate 223Description: Specify the maximum data payload size in bytes for alternate
224 settings, for each device. n is scaled between 63 and 1023. 224 settings, for each device. n is scaled between 63 and 1023.
225Default: 1023 225Default: 1023
@@ -234,7 +234,7 @@ Default: 2
234------------------------------------------------------------------------------- 234-------------------------------------------------------------------------------
235Name: double_buffer 235Name: double_buffer
236Type: bool array (min = 0, max = 32) 236Type: bool array (min = 0, max = 32)
237Syntax: <0|1[,...]> 237Syntax: <0|1[,...]>
238Description: Hardware double buffering: 0 disabled, 1 enabled. 238Description: Hardware double buffering: 0 disabled, 1 enabled.
239 It should be enabled if you want smooth video output: if you 239 It should be enabled if you want smooth video output: if you
240 obtain out of sync. video, disable it, or try to 240 obtain out of sync. video, disable it, or try to
@@ -243,13 +243,13 @@ Default: 1 for every device.
243------------------------------------------------------------------------------- 243-------------------------------------------------------------------------------
244Name: clamping 244Name: clamping
245Type: bool array (min = 0, max = 32) 245Type: bool array (min = 0, max = 32)
246Syntax: <0|1[,...]> 246Syntax: <0|1[,...]>
247Description: Video data clamping: 0 disabled, 1 enabled. 247Description: Video data clamping: 0 disabled, 1 enabled.
248Default: 0 for every device. 248Default: 0 for every device.
249------------------------------------------------------------------------------- 249-------------------------------------------------------------------------------
250Name: filter_type 250Name: filter_type
251Type: int array (min = 0, max = 32) 251Type: int array (min = 0, max = 32)
252Syntax: <0|1|2[,...]> 252Syntax: <0|1|2[,...]>
253Description: Video filter type. 253Description: Video filter type.
254 0 none, 1 (1-2-1) 3-tap filter, 2 (2-3-6-3-2) 5-tap filter. 254 0 none, 1 (1-2-1) 3-tap filter, 2 (2-3-6-3-2) 5-tap filter.
255 The filter is used to reduce noise and aliasing artifacts 255 The filter is used to reduce noise and aliasing artifacts
@@ -258,13 +258,13 @@ Default: 0 for every device.
258------------------------------------------------------------------------------- 258-------------------------------------------------------------------------------
259Name: largeview 259Name: largeview
260Type: bool array (min = 0, max = 32) 260Type: bool array (min = 0, max = 32)
261Syntax: <0|1[,...]> 261Syntax: <0|1[,...]>
262Description: Large view: 0 disabled, 1 enabled. 262Description: Large view: 0 disabled, 1 enabled.
263Default: 1 for every device. 263Default: 1 for every device.
264------------------------------------------------------------------------------- 264-------------------------------------------------------------------------------
265Name: upscaling 265Name: upscaling
266Type: bool array (min = 0, max = 32) 266Type: bool array (min = 0, max = 32)
267Syntax: <0|1[,...]> 267Syntax: <0|1[,...]>
268Description: Software scaling (for non-compressed video only): 268Description: Software scaling (for non-compressed video only):
269 0 disabled, 1 enabled. 269 0 disabled, 1 enabled.
270 Disable it if you have a slow CPU or you don't have enough 270 Disable it if you have a slow CPU or you don't have enough
@@ -341,8 +341,8 @@ Default: 50 for every device.
341------------------------------------------------------------------------------- 341-------------------------------------------------------------------------------
342Name: bandingfilter 342Name: bandingfilter
343Type: bool array (min = 0, max = 32) 343Type: bool array (min = 0, max = 32)
344Syntax: <0|1[,...]> 344Syntax: <0|1[,...]>
345Description: Banding filter to reduce effects of fluorescent 345Description: Banding filter to reduce effects of fluorescent
346 lighting: 346 lighting:
347 0 disabled, 1 enabled. 347 0 disabled, 1 enabled.
348 This filter tries to reduce the pattern of horizontal 348 This filter tries to reduce the pattern of horizontal
@@ -374,7 +374,7 @@ Default: 0 for every device.
374------------------------------------------------------------------------------- 374-------------------------------------------------------------------------------
375Name: monochrome 375Name: monochrome
376Type: bool array (min = 0, max = 32) 376Type: bool array (min = 0, max = 32)
377Syntax: <0|1[,...]> 377Syntax: <0|1[,...]>
378Description: The image sensor is monochrome: 378Description: The image sensor is monochrome:
379 0 = no, 1 = yes 379 0 = no, 1 = yes
380Default: 0 for every device. 380Default: 0 for every device.
@@ -400,19 +400,19 @@ Default: 32768 for every device.
400------------------------------------------------------------------------------- 400-------------------------------------------------------------------------------
401Name: contrast 401Name: contrast
402Type: long array (min = 0, max = 32) 402Type: long array (min = 0, max = 32)
403Syntax: <n[,...]> 403Syntax: <n[,...]>
404Description: Set picture contrast (0-65535). 404Description: Set picture contrast (0-65535).
405Default: 50000 for every device. 405Default: 50000 for every device.
406------------------------------------------------------------------------------- 406-------------------------------------------------------------------------------
407Name: whiteness 407Name: whiteness
408Type: long array (min = 0, max = 32) 408Type: long array (min = 0, max = 32)
409Syntax: <n[,...]> 409Syntax: <n[,...]>
410Description: Set picture whiteness (0-65535). 410Description: Set picture whiteness (0-65535).
411Default: 32768 for every device. 411Default: 32768 for every device.
412------------------------------------------------------------------------------- 412-------------------------------------------------------------------------------
413Name: debug 413Name: debug
414Type: int 414Type: int
415Syntax: <n> 415Syntax: <n>
416Description: Debugging information level, from 0 to 6: 416Description: Debugging information level, from 0 to 6:
417 0 = none (use carefully) 417 0 = none (use carefully)
418 1 = critical errors 418 1 = critical errors
diff --git a/Documentation/usb/zc0301.txt b/Documentation/video4linux/zc0301.txt
index f55262c6733b..f55262c6733b 100644
--- a/Documentation/usb/zc0301.txt
+++ b/Documentation/video4linux/zc0301.txt
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 1ad9af1ca4d0..687104bfd09a 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -27,12 +27,21 @@ number of free hugetlb pages at any time. It also displays information about
27the configured hugepage size - this is needed for generating the proper 27the configured hugepage size - this is needed for generating the proper
28alignment and size of the arguments to the above system calls. 28alignment and size of the arguments to the above system calls.
29 29
30The output of "cat /proc/meminfo" will have output like: 30The output of "cat /proc/meminfo" will have lines like:
31 31
32..... 32.....
33HugePages_Total: xxx 33HugePages_Total: xxx
34HugePages_Free: yyy 34HugePages_Free: yyy
35Hugepagesize: zzz KB 35HugePages_Rsvd: www
36Hugepagesize: zzz kB
37
38where:
39HugePages_Total is the size of the pool of hugepages.
40HugePages_Free is the number of hugepages in the pool that are not yet
41allocated.
42HugePages_Rsvd is short for "reserved," and is the number of hugepages
43for which a commitment to allocate from the pool has been made, but no
44allocation has yet been made. It's vaguely analogous to overcommit.
36 45
37/proc/filesystems should also show a filesystem of type "hugetlbfs" configured 46/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
38in the kernel. 47in the kernel.
@@ -42,11 +51,11 @@ pages in the kernel. Super user can dynamically request more (or free some
42pre-configured) hugepages. 51pre-configured) hugepages.
43The allocation (or deallocation) of hugetlb pages is possible only if there are 52The allocation (or deallocation) of hugetlb pages is possible only if there are
44enough physically contiguous free pages in system (freeing of hugepages is 53enough physically contiguous free pages in system (freeing of hugepages is
45possible only if there are enough hugetlb pages free that can be transfered 54possible only if there are enough hugetlb pages free that can be transferred
46back to regular memory pool). 55back to regular memory pool).
47 56
48Pages that are used as hugetlb pages are reserved inside the kernel and can 57Pages that are used as hugetlb pages are reserved inside the kernel and cannot
49not be used for other purposes. 58be used for other purposes.
50 59
51Once the kernel with Hugetlb page support is built and running, a user can 60Once the kernel with Hugetlb page support is built and running, a user can
52use either the mmap system call or shared memory system calls to start using 61use either the mmap system call or shared memory system calls to start using
@@ -60,7 +69,7 @@ Use the following command to dynamically allocate/deallocate hugepages:
60This command will try to configure 20 hugepages in the system. The success 69This command will try to configure 20 hugepages in the system. The success
61or failure of allocation depends on the amount of physically contiguous 70or failure of allocation depends on the amount of physically contiguous
62memory that is preset in system at this time. System administrators may want 71memory that is preset in system at this time. System administrators may want
63to put this command in one of the local rc init file. This will enable the 72to put this command in one of the local rc init files. This will enable the
64kernel to request huge pages early in the boot process (when the possibility 73kernel to request huge pages early in the boot process (when the possibility
65of getting physical contiguous pages is still very high). 74of getting physical contiguous pages is still very high).
66 75
@@ -78,8 +87,8 @@ the uid and gid of the current process are taken. The mode option sets the
78mode of root of file system to value & 0777. This value is given in octal. 87mode of root of file system to value & 0777. This value is given in octal.
79By default the value 0755 is picked. The size option sets the maximum value of 88By default the value 0755 is picked. The size option sets the maximum value of
80memory (huge pages) allowed for that filesystem (/mnt/huge). The size is 89memory (huge pages) allowed for that filesystem (/mnt/huge). The size is
81rounded down to HPAGE_SIZE. The option nr_inode sets the maximum number of 90rounded down to HPAGE_SIZE. The option nr_inodes sets the maximum number of
82inodes that /mnt/huge can use. If the size or nr_inode options are not 91inodes that /mnt/huge can use. If the size or nr_inodes options are not
83provided on command line then no limits are set. For size and nr_inodes 92provided on command line then no limits are set. For size and nr_inodes
84options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For 93options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
85example, size=2K has the same meaning as size=2048. An example is given at 94example, size=2K has the same meaning as size=2048. An example is given at
@@ -88,7 +97,7 @@ the end of this document.
88read and write system calls are not supported on files that reside on hugetlb 97read and write system calls are not supported on files that reside on hugetlb
89file systems. 98file systems.
90 99
91A regular chown, chgrp and chmod commands (with right permissions) could be 100Regular chown, chgrp, and chmod commands (with right permissions) could be
92used to change the file attributes on hugetlbfs. 101used to change the file attributes on hugetlbfs.
93 102
94Also, it is important to note that no such mount command is required if the 103Also, it is important to note that no such mount command is required if the
@@ -96,8 +105,8 @@ applications are going to use only shmat/shmget system calls. Users who
96wish to use hugetlb page via shared memory segment should be a member of 105wish to use hugetlb page via shared memory segment should be a member of
97a supplementary group and system admin needs to configure that gid into 106a supplementary group and system admin needs to configure that gid into
98/proc/sys/vm/hugetlb_shm_group. It is possible for same or different 107/proc/sys/vm/hugetlb_shm_group. It is possible for same or different
99applications to use any combination of mmaps and shm* calls. Though the 108applications to use any combination of mmaps and shm* calls, though the
100mount of filesystem will be required for using mmaps. 109mount of filesystem will be required for using mmap calls.
101 110
102******************************************************************* 111*******************************************************************
103 112
diff --git a/Documentation/watchdog/watchdog-api.txt b/Documentation/watchdog/watchdog-api.txt
index c5beb548cfc4..21ed51173662 100644
--- a/Documentation/watchdog/watchdog-api.txt
+++ b/Documentation/watchdog/watchdog-api.txt
@@ -36,6 +36,9 @@ timeout or margin. The simplest way to ping the watchdog is to write
36some data to the device. So a very simple watchdog daemon would look 36some data to the device. So a very simple watchdog daemon would look
37like this: 37like this:
38 38
39#include <stdlib.h>
40#include <fcntl.h>
41
39int main(int argc, const char *argv[]) { 42int main(int argc, const char *argv[]) {
40 int fd=open("/dev/watchdog",O_WRONLY); 43 int fd=open("/dev/watchdog",O_WRONLY);
41 if (fd==-1) { 44 if (fd==-1) {
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 1921353259ae..f2cd6ef53ff3 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -151,6 +151,11 @@ NUMA
151 151
152 numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine. 152 numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine.
153 153
154 numa=hotadd=percent
155 Only allow hotadd memory to preallocate page structures upto
156 percent of already available memory.
157 numa=hotadd=0 will disable hotadd memory.
158
154ACPI 159ACPI
155 160
156 acpi=off Don't enable ACPI 161 acpi=off Don't enable ACPI